Documentation ¶
Index ¶
- Variables
- func DedupeByFileMetadata(ctx context.Context, d *Datastore, datasetID string, content []byte, ...) (bool, error)
- func DummyDedupe(ctx context.Context, d *Datastore, datasetID string, content []byte, ...) (bool, error)
- func GetDatastorePaths(dsn, vectordbPath string) (string, string, error)
- func GetDocuments(ctx context.Context, filename, filetype string, reader io.Reader) ([]vs.Document, error)
- type Datastore
- func (s *Datastore) DeleteDataset(ctx context.Context, datasetID string) error
- func (s *Datastore) DeleteDocument(ctx context.Context, documentID, datasetID string) error
- func (s *Datastore) DeleteFile(ctx context.Context, datasetID, fileID string) error
- func (s *Datastore) GetDataset(ctx context.Context, datasetID string) (*index.Dataset, error)
- func (s *Datastore) Ingest(ctx context.Context, datasetID string, content []byte, opts IngestOpts) ([]string, error)
- func (s *Datastore) ListDatasets(ctx context.Context) ([]types.Dataset, error)
- func (s *Datastore) NewDataset(ctx context.Context, dataset types.Dataset) error
- func (s *Datastore) Retrieve(ctx context.Context, datasetID string, query types.Query) ([]vectorstore.Document, error)
- type IngestOpts
- type IsDuplicateFunc
Constants ¶
This section is empty.
Variables ¶
var ErrDBDocumentNotFound = errors.New("document not found in database")
ErrDBDocumentNotFound is returned when a document is not found in the database.
var ErrDBFileNotFound = errors.New("file not found in database")
ErrDBFileNotFound is returned when a file is not found.
var IsDuplicateFuncs = map[string]IsDuplicateFunc{ "file_metadata": DedupeByFileMetadata, "dummy": DummyDedupe, "none": DummyDedupe, "ignore": DummyDedupe, }
IsDuplicateFuncs is a map of deduplication functions by name.
Functions ¶
func DedupeByFileMetadata ¶
func DedupeByFileMetadata(ctx context.Context, d *Datastore, datasetID string, content []byte, opts IngestOpts) (bool, error)
DedupeByFileMetadata is a deduplication function that checks if the document is a duplicate based on the file metadata.
func DummyDedupe ¶
func DummyDedupe(ctx context.Context, d *Datastore, datasetID string, content []byte, opts IngestOpts) (bool, error)
DummyDedupe is a dummy deduplication function that always returns false (i.e. "No Duplicate").
Types ¶
type Datastore ¶
type Datastore struct { Index *index.DB Vectorstore vectorstore.VectorStore }
func NewDatastore ¶
func (*Datastore) DeleteDataset ¶
func (*Datastore) DeleteDocument ¶
func (*Datastore) DeleteFile ¶
func (*Datastore) GetDataset ¶
func (*Datastore) Ingest ¶
func (s *Datastore) Ingest(ctx context.Context, datasetID string, content []byte, opts IngestOpts) ([]string, error)
Ingest loads a document from a reader and adds it to the dataset.
func (*Datastore) ListDatasets ¶
func (*Datastore) NewDataset ¶
type IngestOpts ¶
type IngestOpts struct { Filename *string FileMetadata *index.FileMetadata IsDuplicateFuncName string IsDuplicateFunc IsDuplicateFunc }
type IsDuplicateFunc ¶
type IsDuplicateFunc func(ctx context.Context, d *Datastore, datasetID string, content []byte, opts IngestOpts) (bool, error)
IsDuplicateFunc is a function that determines whether a document is a duplicate or if it should be ingested. The function should return true if the document is a duplicate (and thus should not be ingested) and false otherwise.