loader

package
v0.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 2, 2024 License: MIT Imports: 18 Imported by: 1

Documentation

Index

Constants

View Source
const (
	SourceMetadataKey = "source"
)

Variables

View Source
var (
	ErrInternal = fmt.Errorf("internal error")
)
View Source
var (
	ErrLibreOfficeNotFound = fmt.Errorf("pdftotext not found")
)
View Source
var (
	ErrPdfToTextNotFound = fmt.Errorf("pdftotext not found")
)
View Source
var (
	ErrTesseractNotFound = fmt.Errorf("pdftotext not found")
)
View Source
var (
	ErrYoutubeDLNotFound = fmt.Errorf("youtube-dl not found")
)

Functions

This section is empty.

Types

type CSVLoader added in v0.0.9

type CSVLoader struct {
	// contains filtered or unexported fields
}

func NewCSV added in v0.1.0

func NewCSV() *CSVLoader

func NewCSVLoader added in v0.0.6

func NewCSVLoader(filename string) *CSVLoader

func (*CSVLoader) Load added in v0.0.9

func (c *CSVLoader) Load(ctx context.Context) ([]document.Document, error)

func (*CSVLoader) LoadFromSource added in v0.1.0

func (c *CSVLoader) LoadFromSource(ctx context.Context, source string) ([]document.Document, error)

func (*CSVLoader) WithLazyQuotes added in v0.0.9

func (c *CSVLoader) WithLazyQuotes() *CSVLoader

func (*CSVLoader) WithSeparator added in v0.0.9

func (c *CSVLoader) WithSeparator(separator rune) *CSVLoader

func (*CSVLoader) WithTextSplitter added in v0.0.9

func (c *CSVLoader) WithTextSplitter(textSplitter TextSplitter) *CSVLoader

type DirectoryLoader

type DirectoryLoader struct {
	// contains filtered or unexported fields
}

func NewDirectoryLoader

func NewDirectoryLoader(dirname string, regExPathMatch string) *DirectoryLoader

func (*DirectoryLoader) Load

func (*DirectoryLoader) WithTextSplitter added in v0.0.9

func (d *DirectoryLoader) WithTextSplitter(textSplitter TextSplitter) *DirectoryLoader

type HFImageToText added in v0.0.9

type HFImageToText struct {
	// contains filtered or unexported fields
}

func NewHFImageToText added in v0.1.0

func NewHFImageToText() *HFImageToText

func NewHFImageToTextLoader added in v0.0.7

func NewHFImageToTextLoader(mediaFile string) *HFImageToText

func (*HFImageToText) Load added in v0.0.9

func (h *HFImageToText) Load(ctx context.Context) ([]document.Document, error)

func (*HFImageToText) LoadFromSource added in v0.1.0

func (h *HFImageToText) LoadFromSource(ctx context.Context, source string) ([]document.Document, error)

func (*HFImageToText) WithModel added in v0.0.9

func (h *HFImageToText) WithModel(model string) *HFImageToText

func (*HFImageToText) WithTextSplitter added in v0.0.9

func (h *HFImageToText) WithTextSplitter(textSplitter TextSplitter) *HFImageToText

func (*HFImageToText) WithToken added in v0.0.9

func (h *HFImageToText) WithToken(token string) *HFImageToText

type HFSpeechRecognition added in v0.0.9

type HFSpeechRecognition struct {
	// contains filtered or unexported fields
}

func NewHFSpeechRecognition added in v0.1.0

func NewHFSpeechRecognition() *HFSpeechRecognition

func NewHFSpeechRecognitionLoader added in v0.0.7

func NewHFSpeechRecognitionLoader(mediaFile string) *HFSpeechRecognition

func (*HFSpeechRecognition) Load added in v0.0.9

func (*HFSpeechRecognition) LoadFromSource added in v0.1.0

func (h *HFSpeechRecognition) LoadFromSource(ctx context.Context, source string) ([]document.Document, error)

func (*HFSpeechRecognition) WithModel added in v0.0.9

func (h *HFSpeechRecognition) WithModel(model string) *HFSpeechRecognition

func (*HFSpeechRecognition) WithTextSplitter added in v0.0.9

func (h *HFSpeechRecognition) WithTextSplitter(textSplitter TextSplitter) *HFSpeechRecognition

func (*HFSpeechRecognition) WithToken added in v0.0.9

func (h *HFSpeechRecognition) WithToken(token string) *HFSpeechRecognition

type LibreOfficeLoader added in v0.0.9

type LibreOfficeLoader struct {
	// contains filtered or unexported fields
}

func NewLibreOffice added in v0.1.0

func NewLibreOffice() *LibreOfficeLoader

func NewLibreOfficeLoader added in v0.0.6

func NewLibreOfficeLoader(filename string) *LibreOfficeLoader

func (*LibreOfficeLoader) Load added in v0.0.9

func (*LibreOfficeLoader) LoadFromSource added in v0.1.0

func (l *LibreOfficeLoader) LoadFromSource(ctx context.Context, source string) ([]document.Document, error)

func (*LibreOfficeLoader) WithArgs added in v0.0.9

func (l *LibreOfficeLoader) WithArgs(libreOfficeArgs []string) *LibreOfficeLoader

func (*LibreOfficeLoader) WithLibreOfficePath added in v0.0.9

func (l *LibreOfficeLoader) WithLibreOfficePath(libreOfficePath string) *LibreOfficeLoader

func (*LibreOfficeLoader) WithTextSplitter added in v0.0.9

func (l *LibreOfficeLoader) WithTextSplitter(textSplitter TextSplitter) *LibreOfficeLoader

type Loader added in v0.0.9

type Loader struct {
	// contains filtered or unexported fields
}

type PDFLoader added in v0.0.9

type PDFLoader struct {
	// contains filtered or unexported fields
}

func NewPDFToText added in v0.1.0

func NewPDFToText() *PDFLoader

func NewPDFToTextLoader added in v0.0.4

func NewPDFToTextLoader(path string) *PDFLoader

func (*PDFLoader) Load added in v0.0.9

func (p *PDFLoader) Load(ctx context.Context) ([]document.Document, error)

func (*PDFLoader) LoadFromSource added in v0.1.0

func (p *PDFLoader) LoadFromSource(ctx context.Context, source string) ([]document.Document, error)

func (*PDFLoader) WithPDFToTextPath added in v0.0.9

func (p *PDFLoader) WithPDFToTextPath(pdfToTextPath string) *PDFLoader

func (*PDFLoader) WithTextSplitter added in v0.0.9

func (p *PDFLoader) WithTextSplitter(textSplitter TextSplitter) *PDFLoader

type PubMedLoader added in v0.0.9

type PubMedLoader struct {
	// contains filtered or unexported fields
}

func NewPubmed added in v0.1.0

func NewPubmed() *PubMedLoader

func NewPubmedLoader added in v0.0.4

func NewPubmedLoader(pubMedIDs []string) *PubMedLoader

func (*PubMedLoader) Load added in v0.0.9

func (p *PubMedLoader) Load(ctx context.Context) ([]document.Document, error)

func (*PubMedLoader) LoadFromSource added in v0.1.0

func (p *PubMedLoader) LoadFromSource(ctx context.Context, source string) ([]document.Document, error)

func (*PubMedLoader) WithTextSplitter added in v0.0.9

func (p *PubMedLoader) WithTextSplitter(textSplitter TextSplitter) *PubMedLoader

type TesseractLoader added in v0.0.9

type TesseractLoader struct {
	// contains filtered or unexported fields
}

func NewTesseract added in v0.1.0

func NewTesseract() *TesseractLoader

func NewTesseractLoader added in v0.0.7

func NewTesseractLoader(filename string) *TesseractLoader

func (*TesseractLoader) Load added in v0.0.9

func (*TesseractLoader) LoadFromSource added in v0.1.0

func (l *TesseractLoader) LoadFromSource(ctx context.Context, source string) ([]document.Document, error)

func (*TesseractLoader) WithArgs added in v0.0.9

func (l *TesseractLoader) WithArgs(tesseractArgs []string) *TesseractLoader

func (*TesseractLoader) WithTesseractPath added in v0.0.9

func (l *TesseractLoader) WithTesseractPath(tesseractPath string) *TesseractLoader

func (*TesseractLoader) WithTextSplitter added in v0.0.9

func (l *TesseractLoader) WithTextSplitter(textSplitter TextSplitter) *TesseractLoader

type TextLoader

type TextLoader struct {
	// contains filtered or unexported fields
}

func NewText added in v0.1.0

func NewText() *TextLoader

func NewTextLoader

func NewTextLoader(filename string, metadata types.Meta) *TextLoader

func (*TextLoader) Load

func (t *TextLoader) Load(ctx context.Context) ([]document.Document, error)

func (*TextLoader) LoadFromSource added in v0.1.0

func (t *TextLoader) LoadFromSource(ctx context.Context, source string) ([]document.Document, error)

func (*TextLoader) WithMetadata added in v0.1.0

func (t *TextLoader) WithMetadata(metadata types.Meta) *TextLoader

func (*TextLoader) WithTextSplitter added in v0.0.9

func (t *TextLoader) WithTextSplitter(textSplitter TextSplitter) *TextLoader

type TextSplitter added in v0.0.5

type TextSplitter interface {
	SplitDocuments(documents []document.Document) []document.Document
}

type WhisperCppLoader added in v0.0.11

type WhisperCppLoader struct {
	// contains filtered or unexported fields
}

func NewWhisperCpp added in v0.1.0

func NewWhisperCpp() *WhisperCppLoader

func NewWhisperCppLoader added in v0.0.6

func NewWhisperCppLoader(filename string) *WhisperCppLoader

func (*WhisperCppLoader) Load added in v0.0.11

func (*WhisperCppLoader) LoadFromSource added in v0.1.0

func (w *WhisperCppLoader) LoadFromSource(ctx context.Context, source string) ([]document.Document, error)

func (*WhisperCppLoader) WithArgs added in v0.0.11

func (w *WhisperCppLoader) WithArgs(whisperCppArgs []string) *WhisperCppLoader

func (*WhisperCppLoader) WithFfmpegPath added in v0.0.11

func (w *WhisperCppLoader) WithFfmpegPath(ffmpegPath string) *WhisperCppLoader

func (*WhisperCppLoader) WithModel added in v0.0.11

func (w *WhisperCppLoader) WithModel(whisperCppModelPath string) *WhisperCppLoader

func (*WhisperCppLoader) WithTextSplitter added in v0.0.11

func (w *WhisperCppLoader) WithTextSplitter(textSplitter TextSplitter) *WhisperCppLoader

func (*WhisperCppLoader) WithWhisperCppPath added in v0.0.11

func (w *WhisperCppLoader) WithWhisperCppPath(whisperCppPath string) *WhisperCppLoader

type WhisperLoader added in v0.0.9

type WhisperLoader struct {
	// contains filtered or unexported fields
}

func NewWhisper added in v0.1.0

func NewWhisper() *WhisperLoader

func NewWhisperLoader added in v0.0.6

func NewWhisperLoader(filename string) *WhisperLoader

func (*WhisperLoader) Load added in v0.0.9

func (w *WhisperLoader) Load(ctx context.Context) ([]document.Document, error)

func (*WhisperLoader) LoadFromSource added in v0.1.0

func (w *WhisperLoader) LoadFromSource(ctx context.Context, source string) ([]document.Document, error)

func (*WhisperLoader) WithClient added in v0.0.9

func (w *WhisperLoader) WithClient(client *openai.Client) *WhisperLoader

type YoutubeDLLoader added in v0.0.9

type YoutubeDLLoader struct {
	// contains filtered or unexported fields
}

func NewYoutubeDL added in v0.1.0

func NewYoutubeDL() *YoutubeDLLoader

func NewYoutubeDLLoader added in v0.0.9

func NewYoutubeDLLoader(url string) *YoutubeDLLoader

func (*YoutubeDLLoader) Load added in v0.0.9

func (*YoutubeDLLoader) LoadFromSource added in v0.1.0

func (y *YoutubeDLLoader) LoadFromSource(ctx context.Context, source string) ([]document.Document, error)

func (*YoutubeDLLoader) WithAutoSubtitlesMode added in v0.0.9

func (y *YoutubeDLLoader) WithAutoSubtitlesMode() *YoutubeDLLoader

func (*YoutubeDLLoader) WithLanguage added in v0.0.9

func (y *YoutubeDLLoader) WithLanguage(language string) *YoutubeDLLoader

func (*YoutubeDLLoader) WithTextSplitter added in v0.0.9

func (y *YoutubeDLLoader) WithTextSplitter(textSplitter TextSplitter) *YoutubeDLLoader

func (*YoutubeDLLoader) WithYoutubeDLPath added in v0.0.9

func (y *YoutubeDLLoader) WithYoutubeDLPath(youtubeDLPath string) *YoutubeDLLoader

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL