datastore

package
v0.1.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 8, 2024 License: Apache-2.0 Imports: 26 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ErrDBDocumentNotFound = errors.New("document not found in database")

ErrDBDocumentNotFound is returned when a document is not found in the database.

View Source
var ErrDBFileNotFound = errors.New("file not found in database")

ErrDBFileNotFound is returned when a file is not found.

View Source
var IsDuplicateFuncs = map[string]IsDuplicateFunc{
	"file_metadata": DedupeByFileMetadata,
	"dummy":         DummyDedupe,
	"none":          DummyDedupe,
	"ignore":        DummyDedupe,
}

IsDuplicateFuncs is a map of deduplication functions by name.

Functions

func DedupeByFileMetadata

func DedupeByFileMetadata(ctx context.Context, d *Datastore, datasetID string, content []byte, opts IngestOpts) (bool, error)

DedupeByFileMetadata is a deduplication function that checks if the document is a duplicate based on the file metadata.

func DummyDedupe

func DummyDedupe(ctx context.Context, d *Datastore, datasetID string, content []byte, opts IngestOpts) (bool, error)

DummyDedupe is a dummy deduplication function that always returns false (i.e. "No Duplicate").

func GetDatastorePaths

func GetDatastorePaths(dsn, vectordbPath string) (string, string, error)

func GetDocuments

func GetDocuments(ctx context.Context, filename, filetype string, reader io.Reader) ([]vs.Document, error)

Types

type Datastore

type Datastore struct {
	Index       *index.DB
	Vectorstore vectorstore.VectorStore
}

func NewDatastore

func NewDatastore(dsn string, automigrate bool, vectorDBPath string, openAIConfig types.OpenAIConfig) (*Datastore, error)

func (*Datastore) DeleteDataset

func (s *Datastore) DeleteDataset(ctx context.Context, datasetID string) error

func (*Datastore) DeleteDocument

func (s *Datastore) DeleteDocument(ctx context.Context, documentID, datasetID string) error

func (*Datastore) DeleteFile

func (s *Datastore) DeleteFile(ctx context.Context, datasetID, fileID string) error

func (*Datastore) GetDataset

func (s *Datastore) GetDataset(ctx context.Context, datasetID string) (*index.Dataset, error)

func (*Datastore) Ingest

func (s *Datastore) Ingest(ctx context.Context, datasetID string, content []byte, opts IngestOpts) ([]string, error)

Ingest loads a document from a reader and adds it to the dataset.

func (*Datastore) ListDatasets

func (s *Datastore) ListDatasets(ctx context.Context) ([]types.Dataset, error)

func (*Datastore) NewDataset

func (s *Datastore) NewDataset(ctx context.Context, dataset types.Dataset) error

func (*Datastore) Retrieve

func (s *Datastore) Retrieve(ctx context.Context, datasetID string, query types.Query) ([]vectorstore.Document, error)

type IngestOpts

type IngestOpts struct {
	Filename            *string
	FileMetadata        *index.FileMetadata
	IsDuplicateFuncName string
	IsDuplicateFunc     IsDuplicateFunc
}

type IsDuplicateFunc

type IsDuplicateFunc func(ctx context.Context, d *Datastore, datasetID string, content []byte, opts IngestOpts) (bool, error)

IsDuplicateFunc is a function that determines whether a document is a duplicate or if it should be ingested. The function should return true if the document is a duplicate (and thus should not be ingested) and false otherwise.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL