transformers

package
v0.6.1-rc1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 21, 2024 License: Apache-2.0 Imports: 7 Imported by: 0

Documentation

Index

Constants

View Source
const ExtraMetadataName = "extra_metadata"
View Source
const FilterMarkdownDocsNoContentName = "filter_markdown_docs_no_content"
View Source
const KeywordExtractorName = "keywords"
View Source
const MetadataManipulatorName = "metadata"

Variables

View Source
var TransformerMap = map[string]dstypes.DocumentTransformer{
	ExtraMetadataName:               &ExtraMetadata{},
	FilterMarkdownDocsNoContentName: &FilterMarkdownDocsNoContent{},
	KeywordExtractorName:            &KeywordExtractor{},
	MetadataManipulatorName:         &MetadataManipulator{},
}

Functions

func DefaultDocumentTransformers

func DefaultDocumentTransformers(filetype string) (transformers []types.DocumentTransformer)

func GetTransformer

func GetTransformer(name string) (dstypes.DocumentTransformer, error)

Types

type ExtraMetadata

type ExtraMetadata struct {
	Metadata map[string]any
}

func (*ExtraMetadata) Name added in v0.1.9

func (e *ExtraMetadata) Name() string

func (*ExtraMetadata) Transform

func (e *ExtraMetadata) Transform(_ context.Context, docs []vs.Document) ([]vs.Document, error)

type FilterMarkdownDocsNoContent

type FilterMarkdownDocsNoContent struct{}

FilterMarkdownDocsNoContent filters out Markdown documents with no content or only headings

TODO: this may be moved into the MarkdownTextSplitter

func (*FilterMarkdownDocsNoContent) Name added in v0.1.9

func (*FilterMarkdownDocsNoContent) Transform

func (f *FilterMarkdownDocsNoContent) Transform(_ context.Context, docs []vs.Document) ([]vs.Document, error)

type GenericTransformer

type GenericTransformer struct {
	TransformationFunc func(context.Context, []vs.Document) ([]vs.Document, error)
}

func (*GenericTransformer) Transform

func (g *GenericTransformer) Transform(ctx context.Context, docs []vs.Document) ([]vs.Document, error)

type KeywordExtractor

type KeywordExtractor struct {
	NumKeywords int
	LLM         llm.LLM
}

func NewKeyWordExtractor

func NewKeyWordExtractor(numKeywords int, llm llm.LLM) *KeywordExtractor

func (*KeywordExtractor) Name added in v0.1.9

func (k *KeywordExtractor) Name() string

func (*KeywordExtractor) Transform

func (k *KeywordExtractor) Transform(ctx context.Context, docs []vs.Document) ([]vs.Document, error)

type MetadataManipulation added in v0.4.12

type MetadataManipulation struct {
	Operator MetadataManipulationOperator `json:"operator,omitempty" mapstructure:"operator"`
	Key      string                       `json:"key,omitempty" mapstructure:"key"`
	Value    any                          `json:"value,omitempty" mapstructure:"value"`
}

type MetadataManipulationOperator added in v0.4.12

type MetadataManipulationOperator string
const (
	MetadataManipulationOperatorAdd    MetadataManipulationOperator = "add"
	MetadataManipulationOperatorUpdate MetadataManipulationOperator = "upsert"
	MetadataManipulationOperatorRemove MetadataManipulationOperator = "remove"
)

type MetadataManipulator added in v0.4.12

type MetadataManipulator struct {
	Manipulations []MetadataManipulation
}

func (*MetadataManipulator) Name added in v0.4.12

func (m *MetadataManipulator) Name() string

func (*MetadataManipulator) Transform added in v0.4.12

func (m *MetadataManipulator) Transform(_ context.Context, docs []vs.Document) ([]vs.Document, error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL