fst

package
v1.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 12, 2020 License: Apache-2.0 Imports: 28 Imported by: 10

README

FS Segment

  • Version 1.1: Adds support for a metadata proto object per Field. This is used to store an additional postings offset per Field to a PostingsList comprising the union of all known PostingsList across all known Terms per Field.
┌───────────────────────────────┐            ┌──────────────────────────────────────┐
│ FST Fields File               │            │ FST Terms File                       │
│-------------------------------│            │--------------------------------------│
│- Vellum V1 Format             │            │`n` records, each:                    │
│- []byte -> FST Terms Offset   ├─────┐      │  - metadata proto (`md-size` bytes)  │
└───────────────────────────────┘     │      │  - md-size (int64)                   │
                                      │      │  - fst payload (`fst size` bytes)    │
                                      │      │  - fst size (int64)                  │
                                      └─────▶│  - magic number (int64)              │
                                             │                                      │
                                             │Payload:                              │
                                             │(1) Vellum V1 FST                     ├─┐
                                             │[]byte -> Postings Offset             │ │
                                             │                                      │ │
                                             │(2) Metadata Proto Bytes              │ │
                                             │Field Postings Offset                 │ │
                                             └──────────────────────────────────────┘ │
                                                   ┌───────────────────────────────┐  │
                                                   │ Postings Data File            │  │
                                                   │-------------------------------│  │
                                                   │`n` records, each:             │  │
                                                   │  - payload (`size` bytes)     │  │
                                                   │  - size (int64)               │  │
                                                   │  - magic number (int64)       │◀─┘
                                                   │                               │
                                                   │Payload:                       │
                                                   │- Pilosa Bitset                ├──┐
            ┌───────────────────────────┐          │- List of doc.ID               │  │
            │ Documents Data File       │          └───────────────────────────────┘  │
            │-------------------------  │                                             │
            │'n' records, each:         │                ┌─────────────────────────┐  │
            │  - Magic Number (int64)   │                │ Documents Index File    │  │
            │  - Valid (1 byte)         │                │-------------------------│  │
            │  - Size (int64)           │                │- Magic Number (int64)   │  │
            │  - Payload (`size` bytes) │                │- Num docs (int64)       │  │
            └───────────────────────────┘        ┌───────│- Base Doc.ID `b` (int64)│◀─┘
                          ▲                      │       │- Doc `b` offset (int64) │
                          │                      │       │- Doc `b+1` offset       │
                          └──────────────────────┘       │...                      │
                                                         │- Doc `b+n-1` offset     │
                                                         └─────────────────────────┘

  • Version 1.0: Initial Release.

┌───────────────────────────────┐           ┌───────────────────────────────┐
│ FST Fields File               │           │ FST Terms File                │
│-------------------------------│           │-------------------------------│
│- Vellum V1 FST                │           │`n` records, each:             │
│- []byte -> FST Terms Offset   │─────┐     │  - payload (`size` bytes)     │
└───────────────────────────────┘     │     │  - size (int64)               │
                                      └────▶│  - magic number (int64)       │
                                            │                               │
                                            │Payload:                       │
                                            │- Vellum V1 FST                │
                                            │- []byte -> Postings Offset    │
                                            └───────────────────────────────┘
        ┌───────────────────────────────┐                   │
        │ Postings Data File            │                   │
        │-------------------------------│                   │
        │`n` records, each:             │                   │
        │  - payload (`size` bytes)     │                   │
        │  - size (int64)               │                   │
        │  - magic number (int64)       │◀──────────────────┘
        │                               │
        │Payload:                       │
        │- Pilosa Bitset                │
        │- List of doc.ID               │
        └──────────┬────────────────────┘
                   │
                   │
                   │
                   │       ┌──────────────────────────┐           ┌───────────────────────────┐
                   │       │ Documents Index File     │           │ Documents Data File       │
                   │       │--------------------------│           │-------------------------  │
                   │       │- Base Doc.ID `b` (uint64)│           │'n' records, each:         │
                   │       │- Doc `b` offset (uint64) │    ┌─────▶│  - ID (bytes)             │
                   │       │- Doc `b+1` offset        │    │      │  - Fields (bytes)         │
                   └──────▶│...                       ├────┘      └───────────────────────────┘
                           │- Doc `b+n-1` offset      │
                           └──────────────────────────┘

Documentation

Overview

Package fst is a generated GoMock package.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ToTestSegment added in v0.4.3

func ToTestSegment(t *testing.T, s sgmt.MutableSegment, opts Options) sgmt.Segment

ToTestSegment returns a FST segment equivalent to the provide mutable segment.

Types

type DocumentsWriter added in v0.15.0

type DocumentsWriter struct {
	// contains filtered or unexported fields
}

DocumentsWriter writes out documents data given a doc iterator.

func NewDocumentsWriter added in v0.15.0

func NewDocumentsWriter() (*DocumentsWriter, error)

NewDocumentsWriter creates a new documents writer.

func (*DocumentsWriter) Reset added in v0.15.0

func (w *DocumentsWriter) Reset(opts DocumentsWriterOptions)

Reset the documents writer for writing out.

func (*DocumentsWriter) WriteDocumentsData added in v0.15.0

func (w *DocumentsWriter) WriteDocumentsData(iow io.Writer) error

WriteDocumentsData writes out the documents data.

func (*DocumentsWriter) WriteDocumentsIndex added in v0.15.0

func (w *DocumentsWriter) WriteDocumentsIndex(iow io.Writer) error

WriteDocumentsIndex writes out the documents index data.

type DocumentsWriterOptions added in v0.15.0

type DocumentsWriterOptions struct {
	// Iter is the ID and document iterator, required.
	Iter index.IDDocIterator
	// SizeHint is the size hint, optional.
	SizeHint int
}

DocumentsWriterOptions is a set of options to pass to the documents writer.

type MockSegment

type MockSegment struct {
	// contains filtered or unexported fields
}

MockSegment is a mock of Segment interface

func NewMockSegment

func NewMockSegment(ctrl *gomock.Controller) *MockSegment

NewMockSegment creates a new mock instance

func (*MockSegment) Close

func (m *MockSegment) Close() error

Close mocks base method

func (*MockSegment) ContainsField added in v0.9.1

func (m *MockSegment) ContainsField(arg0 []byte) (bool, error)

ContainsField mocks base method

func (*MockSegment) ContainsID

func (m *MockSegment) ContainsID(arg0 []byte) (bool, error)

ContainsID mocks base method

func (*MockSegment) EXPECT

func (m *MockSegment) EXPECT() *MockSegmentMockRecorder

EXPECT returns an object that allows the caller to indicate expected use

func (*MockSegment) FieldsIterable added in v0.5.0

func (m *MockSegment) FieldsIterable() segment.FieldsIterable

FieldsIterable mocks base method

func (*MockSegment) FreeMmap added in v0.15.0

func (m *MockSegment) FreeMmap() error

FreeMmap mocks base method

func (*MockSegment) Reader

func (m *MockSegment) Reader() (segment.Reader, error)

Reader mocks base method

func (*MockSegment) SegmentData added in v0.15.0

func (m *MockSegment) SegmentData(arg0 context.Context) (SegmentData, error)

SegmentData mocks base method

func (*MockSegment) Size

func (m *MockSegment) Size() int64

Size mocks base method

func (*MockSegment) TermsIterable added in v0.5.0

func (m *MockSegment) TermsIterable() segment.TermsIterable

TermsIterable mocks base method

type MockSegmentMockRecorder

type MockSegmentMockRecorder struct {
	// contains filtered or unexported fields
}

MockSegmentMockRecorder is the mock recorder for MockSegment

func (*MockSegmentMockRecorder) Close

func (mr *MockSegmentMockRecorder) Close() *gomock.Call

Close indicates an expected call of Close

func (*MockSegmentMockRecorder) ContainsField added in v0.9.1

func (mr *MockSegmentMockRecorder) ContainsField(arg0 interface{}) *gomock.Call

ContainsField indicates an expected call of ContainsField

func (*MockSegmentMockRecorder) ContainsID

func (mr *MockSegmentMockRecorder) ContainsID(arg0 interface{}) *gomock.Call

ContainsID indicates an expected call of ContainsID

func (*MockSegmentMockRecorder) FieldsIterable added in v0.5.0

func (mr *MockSegmentMockRecorder) FieldsIterable() *gomock.Call

FieldsIterable indicates an expected call of FieldsIterable

func (*MockSegmentMockRecorder) FreeMmap added in v0.15.0

func (mr *MockSegmentMockRecorder) FreeMmap() *gomock.Call

FreeMmap indicates an expected call of FreeMmap

func (*MockSegmentMockRecorder) Reader

func (mr *MockSegmentMockRecorder) Reader() *gomock.Call

Reader indicates an expected call of Reader

func (*MockSegmentMockRecorder) SegmentData added in v0.15.0

func (mr *MockSegmentMockRecorder) SegmentData(arg0 interface{}) *gomock.Call

SegmentData indicates an expected call of SegmentData

func (*MockSegmentMockRecorder) Size

func (mr *MockSegmentMockRecorder) Size() *gomock.Call

Size indicates an expected call of Size

func (*MockSegmentMockRecorder) TermsIterable added in v0.5.0

func (mr *MockSegmentMockRecorder) TermsIterable() *gomock.Call

TermsIterable indicates an expected call of TermsIterable

type MockWriter

type MockWriter struct {
	// contains filtered or unexported fields
}

MockWriter is a mock of Writer interface

func NewMockWriter

func NewMockWriter(ctrl *gomock.Controller) *MockWriter

NewMockWriter creates a new mock instance

func (*MockWriter) EXPECT

func (m *MockWriter) EXPECT() *MockWriterMockRecorder

EXPECT returns an object that allows the caller to indicate expected use

func (*MockWriter) MajorVersion

func (m *MockWriter) MajorVersion() int

MajorVersion mocks base method

func (*MockWriter) Metadata

func (m *MockWriter) Metadata() []byte

Metadata mocks base method

func (*MockWriter) MinorVersion

func (m *MockWriter) MinorVersion() int

MinorVersion mocks base method

func (*MockWriter) Reset

func (m *MockWriter) Reset(arg0 segment.Builder) error

Reset mocks base method

func (*MockWriter) WriteDocumentsData

func (m *MockWriter) WriteDocumentsData(arg0 io.Writer) error

WriteDocumentsData mocks base method

func (*MockWriter) WriteDocumentsIndex

func (m *MockWriter) WriteDocumentsIndex(arg0 io.Writer) error

WriteDocumentsIndex mocks base method

func (*MockWriter) WriteFSTFields

func (m *MockWriter) WriteFSTFields(arg0 io.Writer) error

WriteFSTFields mocks base method

func (*MockWriter) WriteFSTTerms

func (m *MockWriter) WriteFSTTerms(arg0 io.Writer) error

WriteFSTTerms mocks base method

func (*MockWriter) WritePostingsOffsets

func (m *MockWriter) WritePostingsOffsets(arg0 io.Writer) error

WritePostingsOffsets mocks base method

type MockWriterMockRecorder

type MockWriterMockRecorder struct {
	// contains filtered or unexported fields
}

MockWriterMockRecorder is the mock recorder for MockWriter

func (*MockWriterMockRecorder) MajorVersion

func (mr *MockWriterMockRecorder) MajorVersion() *gomock.Call

MajorVersion indicates an expected call of MajorVersion

func (*MockWriterMockRecorder) Metadata

func (mr *MockWriterMockRecorder) Metadata() *gomock.Call

Metadata indicates an expected call of Metadata

func (*MockWriterMockRecorder) MinorVersion

func (mr *MockWriterMockRecorder) MinorVersion() *gomock.Call

MinorVersion indicates an expected call of MinorVersion

func (*MockWriterMockRecorder) Reset

func (mr *MockWriterMockRecorder) Reset(arg0 interface{}) *gomock.Call

Reset indicates an expected call of Reset

func (*MockWriterMockRecorder) WriteDocumentsData

func (mr *MockWriterMockRecorder) WriteDocumentsData(arg0 interface{}) *gomock.Call

WriteDocumentsData indicates an expected call of WriteDocumentsData

func (*MockWriterMockRecorder) WriteDocumentsIndex

func (mr *MockWriterMockRecorder) WriteDocumentsIndex(arg0 interface{}) *gomock.Call

WriteDocumentsIndex indicates an expected call of WriteDocumentsIndex

func (*MockWriterMockRecorder) WriteFSTFields

func (mr *MockWriterMockRecorder) WriteFSTFields(arg0 interface{}) *gomock.Call

WriteFSTFields indicates an expected call of WriteFSTFields

func (*MockWriterMockRecorder) WriteFSTTerms

func (mr *MockWriterMockRecorder) WriteFSTTerms(arg0 interface{}) *gomock.Call

WriteFSTTerms indicates an expected call of WriteFSTTerms

func (*MockWriterMockRecorder) WritePostingsOffsets

func (mr *MockWriterMockRecorder) WritePostingsOffsets(arg0 interface{}) *gomock.Call

WritePostingsOffsets indicates an expected call of WritePostingsOffsets

type Options

type Options interface {
	// SetInstrumentOptions sets the instrument options.
	SetInstrumentOptions(value instrument.Options) Options

	// InstrumentOptions returns the instrument options.
	InstrumentOptions() instrument.Options

	// SetBytesPool sets the bytes pool.
	SetBytesPool(value pool.BytesPool) Options

	// BytesPool returns the bytes pool.
	BytesPool() pool.BytesPool

	// SetPostingsListPool sets the postings list pool.
	SetPostingsListPool(value postings.Pool) Options

	// PostingsListPool returns the postings list pool.
	PostingsListPool() postings.Pool

	// SetContextPool sets the contextPool.
	SetContextPool(value context.Pool) Options

	// ContextPool returns the contextPool.
	ContextPool() context.Pool
}

Options is a collection of knobs for a fs segment.

func NewOptions

func NewOptions() Options

NewOptions returns new options.

type Segment

type Segment interface {
	sgmt.ImmutableSegment

	// SegmentData returns the segment data used to create the segment.
	// Note: Must close context when done with the data
	// so that can resources can be free'd safely.
	SegmentData(ctx context.Context) (SegmentData, error)
}

Segment is an FST segment.

func NewSegment

func NewSegment(data SegmentData, opts Options) (Segment, error)

NewSegment returns a new Segment backed by the provided options. NB(prateek): this method only assumes ownership of the data if it returns a nil error, otherwise, the user is expected to handle the lifecycle of the input.

type SegmentData

type SegmentData struct {
	Version  Version
	Metadata []byte

	DocsData      mmap.Descriptor
	DocsIdxData   mmap.Descriptor
	PostingsData  mmap.Descriptor
	FSTTermsData  mmap.Descriptor
	FSTFieldsData mmap.Descriptor

	// DocsReader is an alternative to specifying
	// the docs data and docs idx data if the documents
	// already reside in memory and we want to use the
	// in memory references instead.
	DocsReader docs.Reader

	Closer io.Closer
}

SegmentData represent the collection of required parameters to construct a Segment.

func (SegmentData) Validate

func (sd SegmentData) Validate() error

Validate validates the provided segment data, returning an error if it's not.

type Version added in v0.8.2

type Version struct {
	Major int
	Minor int
}

Version controls internal behaviour of the fst package.

var (
	// CurrentVersion describes the default current Version.
	CurrentVersion Version = Version{Major: 1, Minor: 1}

	// SupportedVersions lists all supported versions of the FST package.
	SupportedVersions = []Version{

		Version{Major: 1, Minor: 1},

		Version{Major: 1, Minor: 0},
	}
)

func (Version) Supported added in v0.8.2

func (v Version) Supported() error

Supported returns an error indicating if the version is supported.

type Writer

type Writer interface {
	// Reset sets the Writer to persist the provide segment.
	// NB(prateek): if provided segment is a mutable segment it must be sealed.
	Reset(s sgmt.Builder) error

	// MajorVersion is the major version for the writer.
	MajorVersion() int

	// MinorVersion is the minor version for the writer.
	MinorVersion() int

	// Metadata returns metadata about the writer.
	Metadata() []byte

	// WriteDocumentsData writes out the documents data to the provided writer.
	WriteDocumentsData(w io.Writer) error

	// WriteDocumentsIndex writes out the documents index to the provided writer.
	// NB(prateek): this must be called after WriteDocumentsData().
	WriteDocumentsIndex(w io.Writer) error

	// WritePostingsOffsets writes out the postings offset file to the provided
	// writer.
	WritePostingsOffsets(w io.Writer) error

	// WriteFSTTerms writes out the FSTTerms file using the provided writer.
	// NB(prateek): this must be called after WritePostingsOffsets().
	WriteFSTTerms(w io.Writer) error

	// WriteFSTFields writes out the FSTFields file using the provided writer.
	// NB(prateek): this must be called after WriteFSTTerm().
	WriteFSTFields(w io.Writer) error
}

Writer writes out a FST segment from the provided elements.

func NewWriter

func NewWriter(opts WriterOptions) (Writer, error)

NewWriter returns a new writer.

type WriterOptions added in v0.5.0

type WriterOptions struct {
	// DisableRegistry disables the FST builder node registry cache which can
	// de-duplicate transitions that are an exact match of each other during
	// a final compilation phase, this helps compress the FST by a significant
	// amount (e.g. 2x). You can disable this to speed up high fixed cost
	// lookups to during building of the FST however.
	DisableRegistry bool
}

WriterOptions is a set of options used when writing an FST.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL