Documentation ¶
Index ¶
- Variables
- func DedupeByFileMetadata(ctx context.Context, d *Datastore, datasetID string, content []byte, ...) (bool, error)
- func DedupeUpsert(ctx context.Context, d *Datastore, datasetID string, content []byte, ...) (bool, error)
- func DummyDedupe(ctx context.Context, d *Datastore, datasetID string, content []byte, ...) (bool, error)
- func GetDefaultDSNs(indexDSN, vectorDSN string) (string, string, bool, error)
- func LogEmbeddingFunc(embeddingFunc cg.EmbeddingFunc) cg.EmbeddingFunc
- type Datastore
- func (s *Datastore) DeleteDataset(ctx context.Context, datasetID string) error
- func (s *Datastore) DeleteDocument(ctx context.Context, documentID, datasetID string) error
- func (s *Datastore) DeleteFile(ctx context.Context, datasetID, fileID string) error
- func (s *Datastore) ExportDatasetsToFile(ctx context.Context, path string, datasets ...string) error
- func (s *Datastore) FindFile(ctx context.Context, searchFile index.File) (*index.File, error)
- func (s *Datastore) GetDataset(ctx context.Context, datasetID string) (*index.Dataset, error)
- func (s *Datastore) GetDocuments(ctx context.Context, datasetID string, where map[string]string, ...) ([]types.Document, error)
- func (s *Datastore) ImportDatasetsFromFile(ctx context.Context, path string, datasets ...string) error
- func (s *Datastore) Ingest(ctx context.Context, datasetID string, name string, content []byte, ...) ([]string, error)
- func (s *Datastore) ListDatasets(ctx context.Context) ([]index.Dataset, error)
- func (s *Datastore) NewDataset(ctx context.Context, dataset index.Dataset) error
- func (s *Datastore) PruneFiles(ctx context.Context, datasetID string, pathPrefix string, keep []string) ([]index.File, error)
- func (s *Datastore) Retrieve(ctx context.Context, datasetIDs []string, query string, opts RetrieveOpts) (*types.RetrievalResponse, error)
- func (s *Datastore) SimilaritySearch(ctx context.Context, query string, numDocuments int, datasetID string, ...) ([]types2.Document, error)
- func (s *Datastore) UpdateDataset(ctx context.Context, updatedDataset index.Dataset, opts *UpdateDatasetOpts) (*index.Dataset, error)
- type IngestOpts
- type IsDuplicateFunc
- type RetrieveOpts
- type UpdateDatasetOpts
Constants ¶
This section is empty.
Variables ¶
var ErrDBDatasetExists = errors.New("dataset already exists in database")
var ErrDBDocumentNotFound = errors.New("document not found in database")
ErrDBDocumentNotFound is returned when a document is not found in the database.
var ErrDBFileNotFound = errors.New("file not found in database")
ErrDBFileNotFound is returned when a file is not found.
var IsDuplicateFuncs = map[string]IsDuplicateFunc{ "file_metadata": DedupeByFileMetadata, "dummy": DummyDedupe, "none": DummyDedupe, "ignore": DummyDedupe, "upsert": DedupeUpsert, }
IsDuplicateFuncs is a map of deduplication functions by name.
Functions ¶
func DedupeByFileMetadata ¶
func DedupeByFileMetadata(ctx context.Context, d *Datastore, datasetID string, content []byte, opts IngestOpts) (bool, error)
DedupeByFileMetadata is a deduplication function that checks if the document is a duplicate based on the file metadata.
func DedupeUpsert ¶ added in v0.4.3
func DummyDedupe ¶
func DummyDedupe(ctx context.Context, d *Datastore, datasetID string, content []byte, opts IngestOpts) (bool, error)
DummyDedupe is a dummy deduplication function that always returns false (i.e. "No Duplicate").
func GetDefaultDSNs ¶ added in v0.4.14
GetDefaultDSNs returns the paths for the datastore and vectorstore databases. In addition, it returns a boolean indicating whether the datastore is an archive.
func LogEmbeddingFunc ¶ added in v0.4.14
func LogEmbeddingFunc(embeddingFunc cg.EmbeddingFunc) cg.EmbeddingFunc
Types ¶
type Datastore ¶
type Datastore struct { LLM llm.LLM Index *index.DB Vectorstore vectorstore.VectorStore EmbeddingConfig config.EmbeddingsConfig EmbeddingModelProvider etypes.EmbeddingModelProvider }
func NewDatastore ¶
func (*Datastore) DeleteDataset ¶
func (*Datastore) DeleteDocument ¶
func (*Datastore) DeleteFile ¶
func (*Datastore) ExportDatasetsToFile ¶ added in v0.1.7
func (*Datastore) GetDataset ¶
func (*Datastore) GetDocuments ¶ added in v0.4.11
func (*Datastore) ImportDatasetsFromFile ¶ added in v0.1.7
func (*Datastore) Ingest ¶
func (s *Datastore) Ingest(ctx context.Context, datasetID string, name string, content []byte, opts IngestOpts) ([]string, error)
Ingest loads a document from a reader and adds it to the dataset.
func (*Datastore) ListDatasets ¶
func (*Datastore) NewDataset ¶
func (*Datastore) PruneFiles ¶ added in v0.4.3
func (*Datastore) Retrieve ¶
func (s *Datastore) Retrieve(ctx context.Context, datasetIDs []string, query string, opts RetrieveOpts) (*types.RetrievalResponse, error)
func (*Datastore) SimilaritySearch ¶ added in v0.1.8
type IngestOpts ¶
type IngestOpts struct { FileMetadata *index.FileMetadata IsDuplicateFuncName string IsDuplicateFunc IsDuplicateFunc TextSplitterOpts *textsplitter.TextSplitterOpts IngestionFlows []flows.IngestionFlow ExtraMetadata map[string]any }
type IsDuplicateFunc ¶
type IsDuplicateFunc func(ctx context.Context, d *Datastore, datasetID string, content []byte, opts IngestOpts) (bool, error)
IsDuplicateFunc is a function that determines whether a document is a duplicate or if it should be ingested. The function should return true if the document is a duplicate (and thus should not be ingested) and false otherwise.
type RetrieveOpts ¶ added in v0.1.6
type RetrieveOpts struct { TopK int Keywords []string RetrievalFlow *flows.RetrievalFlow }
type UpdateDatasetOpts ¶ added in v0.1.8
type UpdateDatasetOpts struct {
ReplaceMedata bool
}
Source Files ¶
Directories ¶
Path | Synopsis |
---|---|
lib
|
|
Package postprocessors is basically the same as package transformers, but used at a different stage of the RAG pipeline
|
Package postprocessors is basically the same as package transformers, but used at a different stage of the RAG pipeline |