fst

package
v0.0.0-...-53ff736 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 27, 2024 License: Apache-2.0 Imports: 16 Imported by: 4

Documentation

Index

Constants

View Source
const (
	BYTE_SIZE    = 8
	INTEGER_SIZE = 32
	LONG_SIZE    = 64
	LONG_BYTES   = 8
)
View Source
const (
	BitFinalArc          = 1 << 0
	BitLastArc           = 1 << 1
	BitTargetNext        = 1 << 2
	BitStopNode          = 1 << 3
	BitArcHasOutput      = 1 << 4 // This flag is set if the arc has an output.
	BitArcHasFinalOutput = 1 << 5

	// ArcsForBinarySearch
	// value of the arc flags to declare a node with fixed length arcs designed for binary search.
	// We use this as a marker because this one flag is illegal by itself.
	ArcsForBinarySearch = BitArcHasFinalOutput

	// ArcsForDirectAddressing
	// value of the arc flags to declare a node with fixed length arcs and bit table designed for direct addressing.
	ArcsForDirectAddressing = 1 << 6
)
View Source
const (
	DEFAULT_MAX_BLOCK_BITS = 30
	INTEGER_BYTES          = 4

	BYTE1 = InputType(iota)
	BYTE2
	BYTE4

	// FIXED_LENGTH_ARC_SHALLOW_DEPTH
	// See Also: shouldExpandNodeWithFixedLengthArcs
	// 0 => only root node.
	FIXED_LENGTH_ARC_SHALLOW_DEPTH = 3

	// FIXED_LENGTH_ARC_SHALLOW_NUM_ARCS
	// See Also: shouldExpandNodeWithFixedLengthArcs
	FIXED_LENGTH_ARC_SHALLOW_NUM_ARCS = 5

	// FIXED_LENGTH_ARC_DEEP_NUM_ARCS
	// See Also: shouldExpandNodeWithFixedLengthArcs
	FIXED_LENGTH_ARC_DEEP_NUM_ARCS = 10

	// DIRECT_ADDRESSING_MAX_OVERSIZE_WITH_CREDIT_FACTOR
	// Maximum oversizing factor allowed for direct addressing compared to binary search
	// when expansion credits allow the oversizing. This factor prevents expansions
	// that are obviously too costly even if there are sufficient credits.
	// See Also: shouldExpandNodeWithDirectAddressing
	DIRECT_ADDRESSING_MAX_OVERSIZE_WITH_CREDIT_FACTOR = 1.66

	FILE_FORMAT_NAME = "FST"
	VERSION_START    = 6
	VERSION_CURRENT  = 7

	// FINAL_END_NODE
	// Never serialized; just used to represent the virtual
	// final node w/ no arcs:
	FINAL_END_NODE = -1

	// NON_FINAL_END_NODE
	// Never serialized; just used to represent the virtual
	// non-final node w/ no arcs:
	NON_FINAL_END_NODE = 0

	// END_LABEL
	// If arc has this label then that arc is final/accepted
	END_LABEL = -1
)
View Source
const DIRECT_ADDRESSING_MAX_OVERSIZING_FACTOR = 1.0

DIRECT_ADDRESSING_MAX_OVERSIZING_FACTOR Default oversizing factor used to decide whether to encode a node with direct addressing or binary search. Default is 1: ensure no oversizing on average. This factor does not determine whether to encode a node with a list of variable length arcs or with fixed length arcs. It only determines the effective encoding of a node that is already known to be encoded with fixed length arcs. See Fst.shouldExpandNodeWithFixedLengthArcs() and Fst.shouldExpandNodeWithDirectAddressing(). For English words we measured 217K nodes, only 3.27% nodes are encoded with fixed length arcs, and 99.99% of them with direct addressing. Overall FST memory reduced by 1.67%. For worst case we measured 168K nodes, 50% of them are encoded with fixed length arcs, and 14% of them with direct encoding. Overall FST memory reduced by 0.8%. Use TestFstDirectAddressing.main() and TestFstDirectAddressing.testWorstCaseForDirectAddressing() to evaluate a change. see: setDirectAddressingMaxOversizingFactor

View Source
const (
	PRIME = int64(32)
)

Variables

View Source
var (
	ErrByteStoreBasic = errors.New("bytestore basic error")
	ErrItemNotFound   = errors.Wrap(ErrByteStoreBasic, "item not found")
)

Functions

func CountBits

func CountBits(arc *Arc, in BytesReader) (int, error)

CountBits See BitTableUtil.countBits(int, Fst.BytesReader). The count of bit set is the number of arcs of a direct addressing node.

func CountBitsUpTo

func CountBitsUpTo(bitIndex int, arc *Arc, in BytesReader) (int, error)

CountBitsUpTo See BitTableUtil.countBitsUpTo(int, Fst.BytesReader).

func IsBitSet

func IsBitSet(ctx context.Context, bitIndex int, arc *Arc, in BytesReader) (bool, error)

IsBitSet See BitTableUtil.IsBitSet(int, Fst.BytesReader).

func NextBitSet

func NextBitSet(ctx context.Context, bitIndex int, arc *Arc, in BytesReader) (int, error)

NextBitSet See BitTableUtil.NextBitSet(int, int, Fst.BytesReader).

func PreviousBitSet

func PreviousBitSet(bitIndex int, arc *Arc, in BytesReader) (int, error)

PreviousBitSet See BitTableUtil.previousBitSet(int, Fst.BytesReader).

func TargetHasArcs

func TargetHasArcs(arc *Arc) bool

TargetHasArcs returns true if the node at this address has any outgoing arcs

Types

type AbsEnum

type AbsEnum interface {
	GetUpTo() int
	GetOutput(idx int) Output
	SetTargetLength(size int)
	DoNext(ctx context.Context, lm LabelManager) error
	DoSeekCeil(ctx context.Context, lm LabelManager) error
	DoSeekFloor(ctx context.Context, lm LabelManager) error
	DoSeekExact(ctx context.Context, lm LabelManager) (bool, error)
}

type Arc

type Arc struct {
	// contains filtered or unexported fields
}

Arc Represents a single arc.

func (*Arc) ArcIdx

func (r *Arc) ArcIdx() int

ArcIdx Where we are in the array; only valid if bytesPerArc != 0.

func (*Arc) BytesPerArc

func (r *Arc) BytesPerArc() int

BytesPerArc Non-zero if this arc is part of a node with fixed length arcs, which means all arcs for the node are encoded with a fixed number of bytes so that we binary search or direct address. We do when there are enough arcs leaving one node. It wastes some bytes but gives faster lookups.

func (*Arc) FirstLabel

func (r *Arc) FirstLabel() int

FirstLabel First label of a direct addressing node. Only valid if nodeFlags == ArcsForDirectAddressing.

func (*Arc) Flags

func (r *Arc) Flags() byte

func (*Arc) IsFinal

func (r *Arc) IsFinal() bool

func (*Arc) IsLast

func (r *Arc) IsLast() bool

func (*Arc) Label

func (r *Arc) Label() int

func (*Arc) NextArc

func (r *Arc) NextArc() int64

NextArc Address (into the byte[]) of the next arc - only for list of variable length arc. Or ord/address to the next node if label == END_LABEL.

func (*Arc) NextFinalOutput

func (r *Arc) NextFinalOutput() Output

func (*Arc) NodeFlags

func (r *Arc) NodeFlags() byte

NodeFlags Node header flags. Only meaningful to check if the value is either ArcsForBinarySearch or ArcsForDirectAddressing (other value when bytesPerArc == 0).

func (*Arc) NumArcs

func (r *Arc) NumArcs() int

NumArcs How many arcs; only valid if bytesPerArc != 0 (fixed length arcs). For a node designed for binary search this is the array size. For a node designed for direct addressing, this is the label range.

func (*Arc) Output

func (r *Arc) Output() Output

func (*Arc) PosArcsStart

func (r *Arc) PosArcsStart() int64

PosArcsStart Where the first arc in the array starts; only valid if bytesPerArc != 0

func (*Arc) Target

func (r *Arc) Target() int64

Target Ord/address to target node.

type BoxManager

type BoxManager[T Int] struct {
	// contains filtered or unexported fields
}

func NewBoxManager

func NewBoxManager[T Int]() *BoxManager[T]

func (*BoxManager[T]) EmptyOutput

func (b *BoxManager[T]) EmptyOutput() Output

func (*BoxManager[T]) New

func (b *BoxManager[T]) New() Output

func (*BoxManager[T]) Read

func (b *BoxManager[T]) Read(ctx context.Context, in store.DataInput, v any) error

func (*BoxManager[T]) ReadFinalOutput

func (b *BoxManager[T]) ReadFinalOutput(ctx context.Context, in store.DataInput, v any) error

func (*BoxManager[T]) SkipFinalOutput

func (b *BoxManager[T]) SkipFinalOutput(ctx context.Context, in store.DataInput) error

func (*BoxManager[T]) SkipOutput

func (b *BoxManager[T]) SkipOutput(ctx context.Context, in store.DataInput) error

func (*BoxManager[T]) Write

func (b *BoxManager[T]) Write(ctx context.Context, out store.DataOutput, v any) error

func (*BoxManager[T]) WriteFinalOutput

func (b *BoxManager[T]) WriteFinalOutput(ctx context.Context, out store.DataOutput, v any) error

type Buffer

type Buffer struct {
	bytes.Buffer
	// contains filtered or unexported fields
}

func NewBuffer

func NewBuffer() *Buffer

func (*Buffer) WriteUvarint

func (b *Buffer) WriteUvarint(i uint64) error

type Builder

type Builder struct {
	// contains filtered or unexported fields
}

Builder Builds a minimal FST (maps a term([]int) to an arbitrary output) from pre-sorted terms with output. The FST becomes an FSA if you use NoOutputs. The FST is written on-the-fly into a compact serialized format byte array, which can be saved to / loaded from a Directory or used directly for traversal. The FST is always finite (no cycles).

NOTE: The algorithm is described at http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.3698

The parameterized type T is the output type. See the subclasses of Output.

FSTs larger than 2.1GB are now possible (as of Lucene 4.2). FSTs containing more than 2.1B nodes are also now possible, however they cannot be packed.

lucene.experimental

func NewBuilder

func NewBuilder(inputType InputType, manager OutputManager, options ...BuilderOption) (*Builder, error)

NewBuilder Instantiates an FST/FSA builder without any pruning. A shortcut to Builder(Fst.INPUT_TYPE, int, int, boolean, boolean, int, Output, boolean, int) with pruning options turned off.

func (*Builder) Add

func (b *Builder) Add(ctx context.Context, input []rune, output Output) error

func (*Builder) AddInts

func (b *Builder) AddInts(ctx context.Context, input []int, output Output) error

func (*Builder) AddStr

func (b *Builder) AddStr(ctx context.Context, input string, output Output) error

func (*Builder) Finish

func (b *Builder) Finish(ctx context.Context) (*FST, error)

Finish Returns final FST. NOTE: this will return null if nothing is accepted by the FST.

func (*Builder) GetArcCount

func (b *Builder) GetArcCount() int

func (*Builder) GetDirectAddressingMaxOversizingFactor

func (b *Builder) GetDirectAddressingMaxOversizingFactor() float64

func (*Builder) GetNodeCount

func (b *Builder) GetNodeCount() int

func (*Builder) GetTermCount

func (b *Builder) GetTermCount() int

func (*Builder) SetDirectAddressingMaxOversizingFactor

func (b *Builder) SetDirectAddressingMaxOversizingFactor(factor float64) *Builder

SetDirectAddressingMaxOversizingFactor Overrides the default the maximum oversizing of fixed array allowed to enable direct addressing of arcs instead of binary search. Setting this factor to a negative value (e.g. -1) effectively disables direct addressing, only binary search nodes will be created. DIRECT_ADDRESSING_MAX_OVERSIZING_FACTOR

type BuilderOption

type BuilderOption func(option *builderOption)

func WithAllowFixedLengthArcs

func WithAllowFixedLengthArcs(allowFixedLengthArcs bool) BuilderOption

func WithBytesPageBits

func WithBytesPageBits(bytesPageBits int) BuilderOption

func WithDoShareNonSingletonNodes

func WithDoShareNonSingletonNodes(doShareNonSingletonNodes bool) BuilderOption

func WithDoShareSuffix

func WithDoShareSuffix(doShareSuffix bool) BuilderOption

func WithMinSuffixCount1

func WithMinSuffixCount1(minSuffixCount1 int) BuilderOption

func WithMinSuffixCount2

func WithMinSuffixCount2(minSuffixCount2 int) BuilderOption

func WithShareMaxTailLength

func WithShareMaxTailLength(shareMaxTailLength int) BuilderOption

type ByteStore

type ByteStore struct {
	*store.BaseDataOutput
	// contains filtered or unexported fields
}

func NewByteStore

func NewByteStore(blockBits int) *ByteStore

func NewBytesStoreByDataInput

func NewBytesStoreByDataInput(in io.Reader, numBytes, maxBlockSize int64) (*ByteStore, error)

func (*ByteStore) CopyTo

func (r *ByteStore) CopyTo(ctx context.Context, src int64, size int64, w io.Writer) error

CopyTo Copies bytes from this store to a target io.Writer

func (*ByteStore) Finish

func (r *ByteStore) Finish() error

func (*ByteStore) GetBlockBits

func (r *ByteStore) GetBlockBits() int64

func (*ByteStore) GetPosition

func (r *ByteStore) GetPosition() int64

func (*ByteStore) GetReverseReader

func (r *ByteStore) GetReverseReader() (BytesReader, error)

func (*ByteStore) MoveBytes

func (r *ByteStore) MoveBytes(ctx context.Context, src, dest, size int64) error

MoveBytes Absolute copy bytes self to self, without changing the position. Note: this cannot "grow" the bytes, so must only call it on already written parts.

func (*ByteStore) Reverse

func (r *ByteStore) Reverse(srcPos, destPos int64) error

Reverse from srcPos, inclusive, to destPos, inclusive.

func (*ByteStore) SkipBytes

func (r *ByteStore) SkipBytes(size int64) error

func (*ByteStore) Truncate

func (r *ByteStore) Truncate(newLen int64) error

Truncate Pos must be less than the max position written so far! Ie, you cannot "grow" the file with this!

func (*ByteStore) Write

func (r *ByteStore) Write(bs []byte) (int, error)

func (*ByteStore) WriteByte

func (r *ByteStore) WriteByte(b byte) error

func (*ByteStore) WriteByteAt

func (r *ByteStore) WriteByteAt(dest int64, b byte) error

WriteByteAt Absolute write byte; you must ensure dest is < max position written so far.

func (*ByteStore) WriteBytesAt

func (r *ByteStore) WriteBytesAt(ctx context.Context, dest int64, bs []byte) error

WriteBytesAt Absolute writeBytes without changing the current position. Note: this cannot "grow" the bytes, so you must only call it on already written parts.

func (*ByteStore) WriteInt32

func (r *ByteStore) WriteInt32(pos int64, value int32) error

WriteInt32 Writes an int at the absolute position without changing the current pointer.

func (*ByteStore) WriteToDataOutput

func (r *ByteStore) WriteToDataOutput(out store.DataOutput) error

WriteToDataOutput Writes all of our bytes to the target DataOutput.

type BytesReader

type BytesReader interface {
	store.DataInput

	// GetPosition Get current read position.
	GetPosition() int64

	// SetPosition Set current read position.
	SetPosition(pos int64) error

	// Reversed Returns true if this reader uses reversed bytes under-the-hood.
	Reversed() bool
}

BytesReader Reads bytes stored in an FST.

type CompiledNode

type CompiledNode struct {
	// contains filtered or unexported fields
}

func NewCompiledNode

func NewCompiledNode() *CompiledNode

func (*CompiledNode) Code

func (r *CompiledNode) Code() int64

func (*CompiledNode) IsCompiled

func (*CompiledNode) IsCompiled() bool

type Enum

type Enum[T byte | int] struct {
	// contains filtered or unexported fields
}

Enum Enumerates all input (BytesRef) + output pairs in an FST. lucene.experimental

func NewEnum

func NewEnum[T int | byte](fst *FST) (*Enum[T], error)

func (*Enum[T]) Current

func (b *Enum[T]) Current() *KV[T]

func (*Enum[T]) GetCurrentLabel

func (b *Enum[T]) GetCurrentLabel(upto int) int

func (*Enum[T]) GetTargetLabel

func (b *Enum[T]) GetTargetLabel(upto int) int

func (*Enum[T]) Grow

func (b *Enum[T]) Grow()

func (*Enum[T]) Next

func (b *Enum[T]) Next(ctx context.Context) (*KV[T], error)

func (*Enum[T]) SeekCeil

func (b *Enum[T]) SeekCeil(ctx context.Context, target []T) (*KV[T], bool, error)

SeekCeil Seeks to smallest term that's >= target.

func (*Enum[T]) SeekExact

func (b *Enum[T]) SeekExact(ctx context.Context, target []T) (*KV[T], bool, error)

SeekExact Seeks to exactly this term, returning null if the term doesn't exist. This is faster than using seekFloor or seekCeil because it short-circuits as soon the match is not found.

func (*Enum[T]) SeekFloor

func (b *Enum[T]) SeekFloor(ctx context.Context, target []T) (*KV[T], bool, error)

SeekFloor Seeks to biggest term that's <= target.

func (*Enum[T]) SetCurrentLabel

func (b *Enum[T]) SetCurrentLabel(label int) error

type FST

type FST struct {
	// contains filtered or unexported fields
}

func NewFST

func NewFST(inputType InputType, outputM OutputManager, bytesPageBits int) *FST

func NewFSTFromFile

func NewFSTFromFile(ctx context.Context, path string, outputs OutputManager) (*FST, error)

NewFSTFromFile Reads an automaton from a file.

func NewFstV1

func NewFstV1(ctx context.Context, manager OutputManager, metaIn, in store.DataInput) (*FST, error)

NewFstV1 Load a previously saved FST.

func NewFstV2

func NewFstV2(ctx context.Context, manager OutputManager, fstStore Store, metaIn, in store.DataInput) (*FST, error)

NewFstV2 Load a previously saved FST; maxBlockBits allows you to control the size of the byte[] pages used to hold the FST bytes.

func (*FST) AddNode

func (f *FST) AddNode(ctx context.Context, builder *Builder, nodeIn *UnCompiledNode) (int64, error)

AddNode serializes new node by appending its bytes to the end of the current byte[]

func (*FST) FindTarget

func (f *FST) FindTarget(ctx context.Context, labelToMatch int, current *Arc, in BytesReader) (*Arc, bool, error)

func (*FST) FindTargetArc

func (f *FST) FindTargetArc(ctx context.Context, labelToMatch int, in BytesReader, follow, arc *Arc) (*Arc, bool, error)

FindTargetArc Finds an arc leaving the incoming arc, replacing the arc in place. This returns null if the arc was not found, else the incoming arc. 查找follow后满足label=${labelToMatch}的Arc

func (*FST) Finish

func (f *FST) Finish(newStartNode int64) error

func (*FST) GetBytesReader

func (f *FST) GetBytesReader() (BytesReader, error)

GetBytesReader Returns a Fst.BytesReader for this FST, positioned at position 0.

func (*FST) GetFirstArc

func (f *FST) GetFirstArc(arc *Arc) (*Arc, error)

GetFirstArc Fills virtual 'start' arc, ie, an empty incoming arc to the FST's start node

func (*FST) ReadArcByDirectAddressing

func (f *FST) ReadArcByDirectAddressing(ctx context.Context, in BytesReader, rangeIndex int, arc *Arc) (*Arc, error)

ReadArcByDirectAddressing Reads a present direct addressing node arc, with the provided index in the label range. rangeIndex: The index of the arc in the label range. It must be present. The real arc offset is computed based on the presence bits of the direct addressing node.

func (*FST) ReadArcByIndex

func (f *FST) ReadArcByIndex(ctx context.Context, in BytesReader, idx int, arc *Arc) (*Arc, error)

func (*FST) ReadFirstRealTargetArc

func (f *FST) ReadFirstRealTargetArc(ctx context.Context, nodeAddress int64, in BytesReader, arc *Arc) (*Arc, error)

func (*FST) ReadFirstTargetArc

func (f *FST) ReadFirstTargetArc(ctx context.Context, in BytesReader, follow *Arc, arc *Arc) (*Arc, error)

ReadFirstTargetArc Follow the follow arc and read the first arc of its target; this changes the provided arc (3rd arg) in-place and returns it. Returns: Returns the second argument (arc).

func (*FST) ReadLabel

func (f *FST) ReadLabel(ctx context.Context, in store.DataInput) (int, error)

ReadLabel Reads one BYTE1/2/4 label from the provided DataInput.

func (*FST) ReadLastArcByDirectAddressing

func (f *FST) ReadLastArcByDirectAddressing(ctx context.Context, arc *Arc, in BytesReader) (*Arc, error)

ReadLastArcByDirectAddressing Reads the last arc of a direct addressing node. This method is equivalent to call readArcByDirectAddressing(Fst.Arc, Fst.BytesReader, int) with rangeIndex equal to arc.numArcs() - 1, but it is faster.

func (*FST) ReadNextArc

func (f *FST) ReadNextArc(ctx context.Context, arc *Arc, in BytesReader) (*Arc, error)

ReadNextArc In-place read; returns the arc.

func (*FST) ReadNextRealArc

func (f *FST) ReadNextRealArc(ctx context.Context, in BytesReader, arc *Arc) (*Arc, error)

ReadNextRealArc Never returns null, but you should never call this if arc.isLast() is true.

func (*FST) Save

func (f *FST) Save(ctx context.Context, metaOut store.DataOutput, out store.DataOutput) error

func (*FST) SaveToFile

func (f *FST) SaveToFile(ctx context.Context, path string) error

func (*FST) SetEmptyOutput

func (f *FST) SetEmptyOutput(output Output) error

type InputType

type InputType int

type Int

type Int interface {
	~int | ~int8 | ~int16 | ~int32 | ~int64 | ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64
}

type IntBox

type IntBox[T Int] struct {
	// contains filtered or unexported fields
}

func NewIntBox

func NewIntBox[T Int](v T) *IntBox[T]

func (*IntBox[T]) Add

func (b *IntBox[T]) Add(v Output) (Output, error)

func (*IntBox[T]) Common

func (b *IntBox[T]) Common(v Output) (Output, error)

func (*IntBox[T]) Equal

func (b *IntBox[T]) Equal(v Output) bool

func (*IntBox[T]) Hash

func (b *IntBox[T]) Hash() int64

func (*IntBox[T]) IsNoOutput

func (b *IntBox[T]) IsNoOutput() bool

func (*IntBox[T]) Merge

func (b *IntBox[T]) Merge(v Output) (Output, error)

func (*IntBox[T]) Sub

func (b *IntBox[T]) Sub(v Output) (Output, error)

func (*IntBox[T]) Value

func (b *IntBox[T]) Value() T

type Ints

type Ints[T Int] []T

func (Ints[T]) Add

func (r Ints[T]) Add(v Output) (Output, error)

func (Ints[T]) Common

func (r Ints[T]) Common(v Output) (Output, error)

func (Ints[T]) Equal

func (r Ints[T]) Equal(v Output) bool

func (Ints[T]) Hash

func (r Ints[T]) Hash() int64

func (Ints[T]) IsNoOutput

func (r Ints[T]) IsNoOutput() bool

func (Ints[T]) Merge

func (r Ints[T]) Merge(_ Output) (Output, error)

func (Ints[T]) Sub

func (r Ints[T]) Sub(v Output) (Output, error)

type IntsManager

type IntsManager[T Int] struct {
	// contains filtered or unexported fields
}

func NewIntsManager

func NewIntsManager[T Int](size int) *IntsManager[T]

func (*IntsManager[T]) EmptyOutput

func (r *IntsManager[T]) EmptyOutput() Output

func (*IntsManager[T]) New

func (r *IntsManager[T]) New() Output

func (*IntsManager[T]) Read

func (r *IntsManager[T]) Read(ctx context.Context, in store.DataInput, v any) error

func (*IntsManager[T]) ReadFinalOutput

func (r *IntsManager[T]) ReadFinalOutput(ctx context.Context, in store.DataInput, v any) error

func (*IntsManager[T]) SkipFinalOutput

func (r *IntsManager[T]) SkipFinalOutput(ctx context.Context, in store.DataInput) error

func (*IntsManager[T]) SkipOutput

func (r *IntsManager[T]) SkipOutput(ctx context.Context, in store.DataInput) error

func (*IntsManager[T]) Write

func (r *IntsManager[T]) Write(ctx context.Context, out store.DataOutput, v any) error

func (*IntsManager[T]) WriteFinalOutput

func (r *IntsManager[T]) WriteFinalOutput(ctx context.Context, out store.DataOutput, v any) error

type KV

type KV[T byte | int] struct {
	// contains filtered or unexported fields
}

KV Holds a single input (BytesRef) + output pair.

func (*KV[T]) GetInput

func (i *KV[T]) GetInput() []T

func (*KV[T]) GetOutput

func (i *KV[T]) GetOutput() Output

type LabelManager

type LabelManager interface {
	GetTargetLabel(upto int) int
	GetCurrentLabel(upto int) int
	SetCurrentLabel(label int) error
	Grow()
}

type Node

type Node interface {
	IsCompiled() bool
}

type NodeHash

type NodeHash struct {
	// contains filtered or unexported fields
}

NodeHash Used to dedup states (lookup already-frozen states)

func NewNodeHash

func NewNodeHash(fst *FST, in BytesReader) *NodeHash

func (*NodeHash) Add

func (n *NodeHash) Add(ctx context.Context, builder *Builder, nodeIn *UnCompiledNode) (int64, error)

type OffHeapStore

type OffHeapStore struct {
	// contains filtered or unexported fields
}

OffHeapStore Provides off heap storage of finite state machine (FST), using underlying index input instead of byte store on heap

func (*OffHeapStore) GetReverseBytesReader

func (o *OffHeapStore) GetReverseBytesReader() (BytesReader, error)

func (*OffHeapStore) Init

func (o *OffHeapStore) Init(r io.Reader, numBytes int64) error

func (*OffHeapStore) Size

func (o *OffHeapStore) Size() int64

func (*OffHeapStore) WriteTo

func (o *OffHeapStore) WriteTo(ctx context.Context, out store.DataOutput) error

type OnHeapStore

type OnHeapStore struct {
	// contains filtered or unexported fields
}

OnHeapStore Provides storage of finite state machine (FST), using byte array or byte store allocated on heap.

func NewOnHeapStore

func NewOnHeapStore(maxBlockBits int) (*OnHeapStore, error)

func (*OnHeapStore) GetReverseBytesReader

func (o *OnHeapStore) GetReverseBytesReader() (BytesReader, error)

func (*OnHeapStore) Init

func (o *OnHeapStore) Init(in io.Reader, numBytes int64) error

func (*OnHeapStore) Size

func (o *OnHeapStore) Size() int64

func (*OnHeapStore) WriteTo

func (o *OnHeapStore) WriteTo(ctx context.Context, out store.DataOutput) error

type Output

type Output interface {
	Common(v Output) (Output, error)
	Sub(v Output) (Output, error)
	Add(v Output) (Output, error)
	Merge(v Output) (Output, error)
	IsNoOutput() bool
	Equal(v Output) bool // 检查output是否一致
	Hash() int64
}

type OutputBuilder

type OutputBuilder interface {
	EmptyOutput() Output
	New() Output
}

type OutputManager

type OutputManager interface {
	OutputBuilder
	OutputReader
	OutputWriter
}

type OutputReader

type OutputReader interface {
	// Read Decode an output value previously written with write(Object, DataOutput).
	Read(ctx context.Context, in store.DataInput, v any) error

	// SkipOutput Skip the output; defaults to just calling read and discarding the result.
	SkipOutput(ctx context.Context, in store.DataInput) error

	// ReadFinalOutput Decode an output value previously written with writeFinalOutput(Object, DataOutput).
	// By default this just calls read(DataInput).
	ReadFinalOutput(ctx context.Context, in store.DataInput, v any) error

	// SkipFinalOutput Skip the output previously written with writeFinalOutput;
	// defaults to just calling readFinalOutput and discarding the result.
	SkipFinalOutput(ctx context.Context, in store.DataInput) error
}

type OutputWriter

type OutputWriter interface {
	// Write Encode an output value into a DataOutput.
	Write(ctx context.Context, out store.DataOutput, v any) error

	// WriteFinalOutput Encode an final node output value into a DataOutput.
	// By default this just calls write(Object, DataOutput).
	WriteFinalOutput(ctx context.Context, out store.DataOutput, v any) error
}

type Outputs

type Outputs[T any] interface {

	// Common Eg common("foobar", "food") -> "foo"
	Common(output1, output2 T) (T, error)

	// Subtract Eg sub("foobar", "foo") -> "bar"
	Subtract(output1, inc T) (T, error)

	// Add Eg add("foo", "bar") -> "foobar"
	Add(prefix, output T) (T, error)

	// Write Encode an output value into a DataOutput.
	Write(output T, out store.DataOutput) error

	// WriteFinalOutput Encode an final node output value into a DataOutput.
	// By default this just calls write(Object, DataOutput).
	WriteFinalOutput(output T, out store.DataOutput) error

	// Read Decode an output value previously written with write(Object, DataOutput).
	Read(in store.DataInput) (T, error)

	// SkipOutput Skip the output; defaults to just calling read and discarding the result.
	SkipOutput(in store.DataInput) error

	// ReadFinalOutput Decode an output value previously written with writeFinalOutput(Object, DataOutput).
	// By default this just calls read(DataInput).
	ReadFinalOutput(in store.DataInput) (T, error)

	// SkipFinalOutput Skip the output previously written with writeFinalOutput;
	// defaults to just calling readFinalOutput and discarding the result.
	SkipFinalOutput(in store.DataInput) error

	IsNoOutput(v T) bool

	GetNoOutput() T

	Merge(first, second T) (T, error)
}

Outputs Represents the output for an FST, providing the basic algebra required for building and traversing the FST. Note that any operation that returns noOutput must return the same singleton object from getNoOutput. lucene.experimental

type PendingArc

type PendingArc struct {
	Label           int
	Target          Node
	IsFinal         bool
	Output          Output
	NextFinalOutput Output
}

PendingArc Expert: holds a pending (seen but not yet serialized) arc.

type PostingOutput

type PostingOutput struct {
	LastDocsStart int64
	SkipPointer   int64
	DocFreq       int64
	TotalTermFreq int64
}

func NewPostingOutput

func NewPostingOutput(lastDocsStart, skipPointer, docFreq, totalTermFreq int64) *PostingOutput

func (*PostingOutput) Add

func (r *PostingOutput) Add(v Output) (Output, error)

func (*PostingOutput) Common

func (r *PostingOutput) Common(v Output) (Output, error)

func (*PostingOutput) Equal

func (r *PostingOutput) Equal(v Output) bool

func (*PostingOutput) Hash

func (r *PostingOutput) Hash() int64

func (*PostingOutput) IsNoOutput

func (r *PostingOutput) IsNoOutput() bool

func (*PostingOutput) Merge

func (r *PostingOutput) Merge(v Output) (Output, error)

func (*PostingOutput) Sub

func (r *PostingOutput) Sub(v Output) (Output, error)

type PostingOutputManager

type PostingOutputManager struct {
	// contains filtered or unexported fields
}

func NewPostingOutputManager

func NewPostingOutputManager() *PostingOutputManager

func (*PostingOutputManager) EmptyOutput

func (p *PostingOutputManager) EmptyOutput() Output

func (*PostingOutputManager) New

func (p *PostingOutputManager) New() Output

func (*PostingOutputManager) Read

func (*PostingOutputManager) ReadFinalOutput

func (p *PostingOutputManager) ReadFinalOutput(ctx context.Context, in store.DataInput, v any) error

func (*PostingOutputManager) SkipFinalOutput

func (p *PostingOutputManager) SkipFinalOutput(ctx context.Context, in store.DataInput) error

func (*PostingOutputManager) SkipOutput

func (p *PostingOutputManager) SkipOutput(ctx context.Context, in store.DataInput) error

func (*PostingOutputManager) Write

func (*PostingOutputManager) WriteFinalOutput

func (p *PostingOutputManager) WriteFinalOutput(ctx context.Context, out store.DataOutput, v any) error

type ReverseBytesReader

type ReverseBytesReader struct {
	*store.BaseDataInput
	// contains filtered or unexported fields
}

func (*ReverseBytesReader) Clone

func (*ReverseBytesReader) GetPosition

func (r *ReverseBytesReader) GetPosition() int64

func (*ReverseBytesReader) Read

func (r *ReverseBytesReader) Read(b []byte) (int, error)

func (*ReverseBytesReader) ReadByte

func (r *ReverseBytesReader) ReadByte() (byte, error)

func (*ReverseBytesReader) Reversed

func (r *ReverseBytesReader) Reversed() bool

func (*ReverseBytesReader) SetPosition

func (r *ReverseBytesReader) SetPosition(pos int64) error

func (*ReverseBytesReader) SkipBytes

func (r *ReverseBytesReader) SkipBytes(ctx context.Context, numBytes int) error

type ReverseRandomAccessReader

type ReverseRandomAccessReader struct {
	*store.BaseDataInput
	// contains filtered or unexported fields
}

func (*ReverseRandomAccessReader) Clone

func (*ReverseRandomAccessReader) GetPosition

func (r *ReverseRandomAccessReader) GetPosition() int64

func (*ReverseRandomAccessReader) Read

func (r *ReverseRandomAccessReader) Read(b []byte) (int, error)

func (*ReverseRandomAccessReader) ReadByte

func (r *ReverseRandomAccessReader) ReadByte() (byte, error)

func (*ReverseRandomAccessReader) Reversed

func (r *ReverseRandomAccessReader) Reversed() bool

func (*ReverseRandomAccessReader) SetPosition

func (r *ReverseRandomAccessReader) SetPosition(pos int64) error

func (*ReverseRandomAccessReader) SkipBytes

func (r *ReverseRandomAccessReader) SkipBytes(ctx context.Context, numBytes int) error

type Store

type Store interface {
	Init(in io.Reader, numBytes int64) error

	Size() int64

	GetReverseBytesReader() (BytesReader, error)

	WriteTo(ctx context.Context, out store.DataOutput) error
}

Store Abstraction for reading/writing bytes necessary for FST.

type UnCompiledNode

type UnCompiledNode struct {
	Arcs       []*PendingArc
	Output     Output
	IsFinal    bool
	InputCount int
	Depth      int // This node's depth, starting from the automaton root.
	// contains filtered or unexported fields
}

UnCompiledNode TODO: instead of recording isFinal/output on the node, maybe we should use -1 arc to mean "end" (like we do when reading the FST). Would simplify much code here...

func NewUnCompiledNode

func NewUnCompiledNode(builder *Builder, depth int) *UnCompiledNode

func (*UnCompiledNode) AddArc

func (u *UnCompiledNode) AddArc(label int, target Node)

func (*UnCompiledNode) Clear

func (u *UnCompiledNode) Clear()

func (*UnCompiledNode) Code

func (u *UnCompiledNode) Code() int64

func (*UnCompiledNode) DeleteLast

func (u *UnCompiledNode) DeleteLast(ctx context.Context, label int, target Node) error

DeleteLast 移除目标arc

func (*UnCompiledNode) GetLastOutput

func (u *UnCompiledNode) GetLastOutput() Output

func (*UnCompiledNode) IsCompiled

func (u *UnCompiledNode) IsCompiled() bool

func (*UnCompiledNode) NumArcs

func (u *UnCompiledNode) NumArcs() int

func (*UnCompiledNode) PrependOutput

func (u *UnCompiledNode) PrependOutput(outputPrefix Output) error

PrependOutput pushes an output prefix forward onto all arcs 所有的边都增加一个output前缀

func (*UnCompiledNode) ReplaceLast

func (u *UnCompiledNode) ReplaceLast(labelToMatch int, target Node, nextFinalOutput Output, isFinal bool) error

ReplaceLast 替换最后的arc的内部数据

func (*UnCompiledNode) SetLastOutput

func (u *UnCompiledNode) SetLastOutput(ctx context.Context, label int, newOutput Output) error

SetLastOutput 设置最后arc的output对象

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL