transformers

package
v0.1.6 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 12, 2024 License: Apache-2.0 Imports: 7 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var TransformerMap = map[string]types.DocumentTransformer{
	"extra_metadata":                  &ExtraMetadata{},
	"filter_markdown_docs_no_content": &FilterMarkdownDocsNoContent{},
	"keywords":                        &KeywordExtractor{},
}

Functions

func DefaultDocumentTransformers

func DefaultDocumentTransformers(filetype string) (transformers []types.DocumentTransformer)

func GetTransformer

func GetTransformer(name string) (types.DocumentTransformer, error)

Types

type ExtraMetadata

type ExtraMetadata struct {
	Metadata map[string]any
}

func (*ExtraMetadata) Transform

func (e *ExtraMetadata) Transform(_ context.Context, docs []vs.Document) ([]vs.Document, error)

type FilterMarkdownDocsNoContent

type FilterMarkdownDocsNoContent struct{}

FilterMarkdownDocsNoContent filters out Markdown documents with no content or only headings

TODO: this may be moved into the MarkdownTextSplitter

func (*FilterMarkdownDocsNoContent) Transform

func (f *FilterMarkdownDocsNoContent) Transform(_ context.Context, docs []vs.Document) ([]vs.Document, error)

type GenericTransformer

type GenericTransformer struct {
	TransformationFunc func(context.Context, []vs.Document) ([]vs.Document, error)
}

func (*GenericTransformer) Transform

func (g *GenericTransformer) Transform(ctx context.Context, docs []vs.Document) ([]vs.Document, error)

type KeywordExtractor

type KeywordExtractor struct {
	NumKeywords int
	LLM         llm.LLM
}

func NewKeyWordExtractor

func NewKeyWordExtractor(numKeywords int, llm llm.LLM) *KeywordExtractor

func (*KeywordExtractor) Transform

func (k *KeywordExtractor) Transform(ctx context.Context, docs []vs.Document) ([]vs.Document, error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL