Documentation ¶
Index ¶
- Variables
- func DedupeByFileMetadata(ctx context.Context, d *Datastore, datasetID string, content []byte, ...) (bool, error)
- func DummyDedupe(ctx context.Context, d *Datastore, datasetID string, content []byte, ...) (bool, error)
- func GetDatastorePaths(dsn, vectordbPath string) (string, string, error)
- func GetDocuments(ctx context.Context, filename, filetype string, reader io.Reader, ...) ([]vs.Document, error)
- func NewLcgoMarkdownSplitter(opts TextSplitterOpts) *lcgosplitter.MarkdownTextSplitter
- func NewLcgoTextSplitter(opts TextSplitterOpts) lcgosplitter.TokenSplitter
- type Datastore
- func (s *Datastore) DeleteDataset(ctx context.Context, datasetID string) error
- func (s *Datastore) DeleteDocument(ctx context.Context, documentID, datasetID string) error
- func (s *Datastore) DeleteFile(ctx context.Context, datasetID, fileID string) error
- func (s *Datastore) GetDataset(ctx context.Context, datasetID string) (*index.Dataset, error)
- func (s *Datastore) Ingest(ctx context.Context, datasetID string, content []byte, opts IngestOpts) ([]string, error)
- func (s *Datastore) ListDatasets(ctx context.Context) ([]index.Dataset, error)
- func (s *Datastore) NewDataset(ctx context.Context, dataset index.Dataset) error
- func (s *Datastore) Retrieve(ctx context.Context, datasetID string, query string, topk int) ([]vectorstore.Document, error)
- type IngestOpts
- type IsDuplicateFunc
- type TextSplitterOpts
Constants ¶
This section is empty.
Variables ¶
var ErrDBDocumentNotFound = errors.New("document not found in database")
ErrDBDocumentNotFound is returned when a document is not found in the database.
var ErrDBFileNotFound = errors.New("file not found in database")
ErrDBFileNotFound is returned when a file is not found.
var IsDuplicateFuncs = map[string]IsDuplicateFunc{ "file_metadata": DedupeByFileMetadata, "dummy": DummyDedupe, "none": DummyDedupe, "ignore": DummyDedupe, }
IsDuplicateFuncs is a map of deduplication functions by name.
Functions ¶
func DedupeByFileMetadata ¶
func DedupeByFileMetadata(ctx context.Context, d *Datastore, datasetID string, content []byte, opts IngestOpts) (bool, error)
DedupeByFileMetadata is a deduplication function that checks if the document is a duplicate based on the file metadata.
func DummyDedupe ¶
func DummyDedupe(ctx context.Context, d *Datastore, datasetID string, content []byte, opts IngestOpts) (bool, error)
DummyDedupe is a dummy deduplication function that always returns false (i.e. "No Duplicate").
func GetDocuments ¶
func NewLcgoMarkdownSplitter ¶ added in v0.1.5
func NewLcgoMarkdownSplitter(opts TextSplitterOpts) *lcgosplitter.MarkdownTextSplitter
func NewLcgoTextSplitter ¶ added in v0.1.4
func NewLcgoTextSplitter(opts TextSplitterOpts) lcgosplitter.TokenSplitter
NewLcgoTextSplitter returns a new langchain-go text splitter.
Types ¶
type Datastore ¶
type Datastore struct { Index *index.DB Vectorstore vectorstore.VectorStore }
func NewDatastore ¶
func (*Datastore) DeleteDataset ¶
func (*Datastore) DeleteDocument ¶
func (*Datastore) DeleteFile ¶
func (*Datastore) GetDataset ¶
func (*Datastore) Ingest ¶
func (s *Datastore) Ingest(ctx context.Context, datasetID string, content []byte, opts IngestOpts) ([]string, error)
Ingest loads a document from a reader and adds it to the dataset.
func (*Datastore) ListDatasets ¶
func (*Datastore) NewDataset ¶
type IngestOpts ¶
type IngestOpts struct { Filename *string FileMetadata *index.FileMetadata IsDuplicateFuncName string IsDuplicateFunc IsDuplicateFunc TextSplitterOpts *TextSplitterOpts }
type IsDuplicateFunc ¶
type IsDuplicateFunc func(ctx context.Context, d *Datastore, datasetID string, content []byte, opts IngestOpts) (bool, error)
IsDuplicateFunc is a function that determines whether a document is a duplicate or if it should be ingested. The function should return true if the document is a duplicate (and thus should not be ingested) and false otherwise.
type TextSplitterOpts ¶ added in v0.1.4
type TextSplitterOpts struct { ChunkSize int `usage:"Textsplitter Chunk Size" default:"1024" env:"KNOW_TEXTSPLITTER_CHUNK_SIZE" name:"textsplitter-chunk-size"` ChunkOverlap int `usage:"Textsplitter Chunk Overlap" default:"256" env:"KNOW_TEXTSPLITTER_CHUNK_OVERLAP" name:"textsplitter-chunk-overlap"` ModelName string `usage:"Textsplitter Model Name" default:"gpt-4" env:"KNOW_TEXTSPLITTER_MODEL_NAME" name:"textsplitter-model-name"` EncodingName string `` /* 128-byte string literal not displayed */ }
func NewTextSplitterOpts ¶ added in v0.1.4
func NewTextSplitterOpts() TextSplitterOpts
NewTextSplitterOpts returns the default options for a text splitter.