fst

package
v0.0.0-...-d0be9ee Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 10, 2015 License: Apache-2.0 Imports: 8 Imported by: 0

Documentation

Index

Constants

View Source
const (
	INPUT_TYPE_BYTE1 = 1
	INPUT_TYPE_BYTE2 = 2
	INPUT_TYPE_BYTE4 = 3
)
View Source
const (
	FST_BIT_FINAL_ARC            = byte(1 << 0)
	FST_BIT_LAST_ARC             = byte(1 << 1)
	FST_BIT_TARGET_NEXT          = byte(1 << 2)
	FST_BIT_STOP_NODE            = byte(1 << 3)
	FST_BIT_ARC_HAS_OUTPUT       = byte(1 << 4)
	FST_BIT_ARC_HAS_FINAL_OUTPUT = byte(1 << 5)
	FST_BIT_TARGET_DELTA         = byte(1 << 6)
	FST_ARCS_AS_FIXED_ARRAY      = FST_BIT_ARC_HAS_FINAL_OUTPUT

	FIXED_ARRAY_SHALLOW_DISTANCE = 3 // 0 => only root node
	FIXED_ARRAY_NUM_ARCS_SHALLOW = 5
	FIXED_ARRAY_NUM_ARCS_DEEP    = 10

	FST_FILE_FORMAT_NAME    = "FST"
	FST_VERSION_PACKED      = 3
	FST_VERSION_VINT_TARGET = 4

	VERSION_CURRENT = FST_VERSION_VINT_TARGET

	FST_FINAL_END_NODE     = -1
	FST_NON_FINAL_END_NODE = 0

	/** If arc has this label then that arc is final/accepted */
	FST_END_LABEL = -1

	FST_DEFAULT_MAX_BLOCK_BITS = 28 // 30 for 64 bit int
)
View Source
const PRIME = 31

Variables

View Source
var ARC_SHALLOW_RAM_BYTES_USED = util.ShallowSizeOfInstance(reflect.TypeOf(Arc{}))
View Source
var BASE_NUM_BYTES = util.ShallowSizeOf(NO_OUTPUT)
View Source
var NO_OUTPUT = newNoOutputs()

Functions

func AsInt

func AsInt(n int32, err error) (n2 int, err2 error)

func AsInt64

func AsInt64(n int32, err error) (n2 int64, err2 error)

func CompareFSTValue

func CompareFSTValue(a, b interface{}) bool

func GetFSTOutput

func GetFSTOutput(fst *FST, input []byte) (output interface{}, err error)

* Looks up the output for this input, or null if the

  • input is not accepted

func ToIntsRef

func ToIntsRef(input []byte, scratch *util.IntsRefBuilder) *util.IntsRef

Just takes unsigned byte values from the BytesRef and converts into an IntsRef.

Types

type Arc

type Arc struct {
	Label  int
	Output interface{}

	NextFinalOutput interface{}
	// contains filtered or unexported fields
}

Represents a single arc

func (*Arc) IsFinal

func (arc *Arc) IsFinal() bool

func (*Arc) String

func (arc *Arc) String() string

type Builder

type Builder struct {
	NO_OUTPUT interface{}
	// contains filtered or unexported fields
}

Builds a minimal FST (maps an []int term to an arbitrary output) from pre-sorted terms with outputs. The FST becomes an FSA if you use NoOutputs. The FST is written on-the-fly into a compact serialized format byte array, which can be saved to / loaded from a Directory or used directly for traversal. The FST is always finite (no cycles).

NOTE: the algorithm is described at http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.3698

FSTs larger than 2.1GB are now possible (as of Lucene 4.2). FSTs containing more than 2.1B nodes are also now possible, however they cannot be packed.

func NewBuilder

func NewBuilder(inputType InputType, minSuffixCount1, minSuffixCount2 int,
	doShareSuffix, doShareNonSingletonNodes bool, shareMaxTailLength int,
	outputs Outputs, doPackFST bool,
	acceptableOverheadRatio float32, allowArrayArcs bool, bytesPageBits int) *Builder

Instantiates an FST/FSA builder with all the possible tuning and construction tweaks. Read parameter documentation carefully.

...

func (*Builder) Add

func (b *Builder) Add(input *util.IntsRef, output interface{}) error

It's OK to add the same input twice in a row with different outputs, as long as outputs impls the merge method. Note that input is fully consumed after this method is returned (so caller is free to reuse), but output is not. So if your outputs are changeable (eg ByteSequenceOutputs or IntSequenceOutputs) then you cannot reuse across calls.

func (*Builder) Finish

func (b *Builder) Finish() (*FST, error)

Returns final FST. NOTE: this will return nil if nothing is accepted by the FST.

type ByteSequenceOutputs

type ByteSequenceOutputs struct {
	// contains filtered or unexported fields
}

*

  • An FST {@link Outputs} implementation where each output
  • is a sequence of bytes.

func ByteSequenceOutputsSingleton

func ByteSequenceOutputsSingleton() *ByteSequenceOutputs

func (*ByteSequenceOutputs) Add

func (out *ByteSequenceOutputs) Add(_prefix interface{}, _output interface{}) interface{}

func (*ByteSequenceOutputs) Common

func (out *ByteSequenceOutputs) Common(_output1, _output2 interface{}) interface{}

func (*ByteSequenceOutputs) NoOutput

func (out *ByteSequenceOutputs) NoOutput() interface{}

func (*ByteSequenceOutputs) Read

func (out *ByteSequenceOutputs) Read(in util.DataInput) (e interface{}, err error)

func (ByteSequenceOutputs) ReadFinalOutput

func (out ByteSequenceOutputs) ReadFinalOutput(in util.DataInput) (e interface{}, err error)

Decode an output value previously written with writeFinalOutput(). By default this just calls read().

func (ByteSequenceOutputs) SkipFinalOutput

func (out ByteSequenceOutputs) SkipFinalOutput(in util.DataInput) error

func (ByteSequenceOutputs) SkipOutput

func (out ByteSequenceOutputs) SkipOutput(in util.DataInput) error

func (*ByteSequenceOutputs) String

func (out *ByteSequenceOutputs) String() string

func (*ByteSequenceOutputs) Subtract

func (out *ByteSequenceOutputs) Subtract(_output, _inc interface{}) interface{}

func (*ByteSequenceOutputs) Write

func (o *ByteSequenceOutputs) Write(obj interface{}, out util.DataOutput) error

type BytesReader

type BytesReader interface {
	// *util.DataInputImpl
	util.DataInput
	RandomAccess
}

type BytesRefFSTEnum

type BytesRefFSTEnum struct {
	*FSTEnum
	// contains filtered or unexported fields
}

func NewBytesRefFSTEnum

func NewBytesRefFSTEnum(fst *FST) *BytesRefFSTEnum

func (*BytesRefFSTEnum) Next

func (e *BytesRefFSTEnum) Next() (*BytesRefFSTEnumIO, error)

type BytesRefFSTEnumIO

type BytesRefFSTEnumIO struct {
	Input  *util.BytesRef
	Output interface{}
}

Holds a single input ([]byte) + output pair.

type BytesStore

type BytesStore struct {
	*util.DataOutputImpl
	// contains filtered or unexported fields
}

func (*BytesStore) String

func (s *BytesStore) String() string

func (*BytesStore) WriteByte

func (bs *BytesStore) WriteByte(b byte) error

func (*BytesStore) WriteBytes

func (bs *BytesStore) WriteBytes(buf []byte) error

type BytesStoreForwardReader

type BytesStoreForwardReader struct {
	*util.DataInputImpl
	// contains filtered or unexported fields
}

func (*BytesStoreForwardReader) ReadByte

func (r *BytesStoreForwardReader) ReadByte() (b byte, err error)

func (*BytesStoreForwardReader) ReadBytes

func (r *BytesStoreForwardReader) ReadBytes(buf []byte) error

type BytesStoreReverseReader

type BytesStoreReverseReader struct {
	*util.DataInputImpl
	// contains filtered or unexported fields
}

func (*BytesStoreReverseReader) ReadByte

func (r *BytesStoreReverseReader) ReadByte() (b byte, err error)

func (*BytesStoreReverseReader) ReadBytes

func (r *BytesStoreReverseReader) ReadBytes(buf []byte) error

type CompiledNode

type CompiledNode struct {
	// contains filtered or unexported fields
}

type FST

type FST struct {
	NO_OUTPUT interface{}
	// contains filtered or unexported fields
}

func LoadFST

func LoadFST(in util.DataInput, outputs Outputs) (fst *FST, err error)

func (*FST) BytesReader

func (t *FST) BytesReader() BytesReader

func (*FST) EmptyOutput

func (t *FST) EmptyOutput() interface{}

func (*FST) FindTargetArc

func (t *FST) FindTargetArc(labelToMatch int, follow *Arc, arc *Arc, in BytesReader) (target *Arc, err error)

* Finds an arc leaving the incoming arc, replacing the arc in place.

  • This returns null if the arc was not found, else the incoming arc.

func (*FST) FirstArc

func (t *FST) FirstArc(arc *Arc) *Arc

func (*FST) NodeCount

func (t *FST) NodeCount() int64

func (*FST) Save

func (t *FST) Save(out util.DataOutput) error

type FSTEnum

type FSTEnum struct {
	NO_OUTPUT interface{}
	// contains filtered or unexported fields
}

func (*FSTEnum) Arc

func (e *FSTEnum) Arc(idx int) *Arc

type FSTEnumSPI

type FSTEnumSPI interface {
	// contains filtered or unexported methods
}

type ForwardBytesReader

type ForwardBytesReader struct {
	*util.DataInputImpl
	// contains filtered or unexported fields
}

func (*ForwardBytesReader) ReadByte

func (r *ForwardBytesReader) ReadByte() (b byte, err error)

func (*ForwardBytesReader) ReadBytes

func (r *ForwardBytesReader) ReadBytes(buf []byte) error

type InputType

type InputType int

type NoOutputs

type NoOutputs struct {
	// contains filtered or unexported fields
}

A nil FST Outputs implementation; use this if you just want to build an FSA.

func (*NoOutputs) Add

func (o *NoOutputs) Add(prefix, output interface{}) interface{}

func (*NoOutputs) Common

func (o *NoOutputs) Common(output1, output2 interface{}) interface{}

func (*NoOutputs) NoOutput

func (o *NoOutputs) NoOutput() interface{}

func (*NoOutputs) Read

func (o *NoOutputs) Read(in util.DataInput) (interface{}, error)

func (NoOutputs) ReadFinalOutput

func (out NoOutputs) ReadFinalOutput(in util.DataInput) (e interface{}, err error)

Decode an output value previously written with writeFinalOutput(). By default this just calls read().

func (NoOutputs) SkipFinalOutput

func (out NoOutputs) SkipFinalOutput(in util.DataInput) error

func (NoOutputs) SkipOutput

func (out NoOutputs) SkipOutput(in util.DataInput) error

func (*NoOutputs) Subtract

func (o *NoOutputs) Subtract(output1, output2 interface{}) interface{}

func (*NoOutputs) Write

func (o *NoOutputs) Write(prefix interface{}, out util.DataOutput) error

type Node

type Node interface {
	// contains filtered or unexported methods
}

NOTE: not many instances of Node or CompiledNode are in memory while the FST is being built; it's only the current "frontier":

type NodeHash

type NodeHash struct {
	// contains filtered or unexported fields
}

Used to dedup states (lookup already-frozen states)

type Outputs

type Outputs interface {
	// Eg common("foobar", "food") -> "foo"
	Common(output1, output2 interface{}) interface{}
	// Eg subtract("foobar", "foo") -> "bar"
	Subtract(output1, output2 interface{}) interface{}
	/** Eg add("foo", "bar") -> "foobar" */
	Add(prefix interface{}, output interface{}) interface{}
	// Encode an output value into a DataOutput.
	Write(interface{}, util.DataOutput) error

	/** Decode an output value previously written with {@link
	 *  #write(Object, DataOutput)}. */
	Read(in util.DataInput) (e interface{}, err error)
	// Skip the output; defaults to just calling Read() and discarding the result
	SkipOutput(util.DataInput) error
	/** Decode an output value previously written with {@link
	 *  #writeFinalOutput(Object, DataOutput)}.  By default this
	 *  just calls {@link #read(DataInput)}. */
	ReadFinalOutput(in util.DataInput) (e interface{}, err error)
	// Skip the output previously written with WriteFinalOutput;
	// defaults to just calling ReadFinalOutput and discarding the
	// result.
	SkipFinalOutput(util.DataInput) error
	/** NOTE: this output is compared with == so you must
	 *  ensure that all methods return the single object if
	 *  it's really no output */
	NoOutput() interface{}
	// contains filtered or unexported methods
}

*

  • Represents the outputs for an FST, providing the basic
  • algebra required for building and traversing the FST. *
  • <p>Note that any operation that returns NO_OUTPUT must
  • return the same singleton object from {@link
  • #getNoOutput}.</p>

type RandomAccess

type RandomAccess interface {
	// contains filtered or unexported methods
}

type ReverseBytesReader

type ReverseBytesReader struct {
	*util.DataInputImpl
	// contains filtered or unexported fields
}

func (*ReverseBytesReader) ReadByte

func (r *ReverseBytesReader) ReadByte() (b byte, err error)

func (*ReverseBytesReader) ReadBytes

func (r *ReverseBytesReader) ReadBytes(buf []byte) error

func (*ReverseBytesReader) String

func (r *ReverseBytesReader) String() string

type UnCompiledNode

type UnCompiledNode struct {
	NumArcs int
	Arcs    []*builderArc

	IsFinal    bool
	InputCount int64
	// contains filtered or unexported fields
}

Expert: holds a pending (seen but not yet serialized) Node.

func NewUnCompiledNode

func NewUnCompiledNode(owner *Builder, depth int) *UnCompiledNode

func (*UnCompiledNode) Clear

func (n *UnCompiledNode) Clear()

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL