input

package
v0.0.0-...-cd37aad Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 23, 2024 License: MIT Imports: 13 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	TextExtensions = []string{
		".txt",
		".md",
		".yml",
		".yaml",
		".html",
		".json",
		".csv",
		".xml",
	}

	ImageExtensions = []string{
		".png",
		".jpg",
		".jpeg",
		".gif",
		".bmp",
	}

	DocumentExtensions = []string{
		".pdf",
		".doc",
		".docx",
	}
)

Common file extensions

Functions

This section is empty.

Types

type Handler

type Handler struct {
	// contains filtered or unexported fields
}

Handler processes input files and directories

func NewHandler

func NewHandler() *Handler

NewHandler creates a new input handler

func (*Handler) Clear

func (h *Handler) Clear()

Clear removes all processed inputs

func (*Handler) GetAllContents

func (h *Handler) GetAllContents() []byte

GetAllContents returns all file contents concatenated

func (*Handler) GetFileContents

func (h *Handler) GetFileContents(path string) ([]byte, error)

GetFileContents returns the contents of a specific file

func (*Handler) GetInputs

func (h *Handler) GetInputs() []*Input

GetInputs returns all processed inputs

func (*Handler) ProcessPath

func (h *Handler) ProcessPath(path string) error

ProcessPath handles both file and directory inputs

func (*Handler) ProcessScrape

func (h *Handler) ProcessScrape(url string, config map[string]interface{}) error

ProcessScrape handles web scraping input

type Input

type Input struct {
	Path         string
	Type         InputType
	Contents     []byte
	Metadata     map[string]interface{} // For additional data like scraping config
	ScrapeConfig *ScrapeConfig          // Specific configuration for web scraping
	MimeType     string                 // Added MimeType field
}

Input represents a file or directory to be processed

type InputType

type InputType int

InputType represents the type of input being processed

const (
	FileInput InputType = iota
	DirectoryInput
	ScreenshotInput
	ImageInput
	WebScrapeInput // Changed from ScrapeInput to WebScrapeInput to avoid naming conflict
)

type ScrapeConfig

type ScrapeConfig struct {
	URL            string            `yaml:"url"`
	AllowedDomains []string          `yaml:"allowed_domains"`
	Headers        map[string]string `yaml:"headers"`
	Extract        []string          `yaml:"extract"`
}

ScrapeConfig represents the configuration for web scraping

type Validator

type Validator struct {
	// contains filtered or unexported fields
}

Validator validates input paths

func NewValidator

func NewValidator(additionalExtensions []string) *Validator

NewValidator creates a new input validator with default text extensions

func (*Validator) IsDocumentFile

func (v *Validator) IsDocumentFile(path string) bool

IsDocumentFile checks if the file has a document extension

func (*Validator) IsImageFile

func (v *Validator) IsImageFile(path string) bool

IsImageFile checks if the file has an image extension

func (*Validator) ValidateFileExtension

func (v *Validator) ValidateFileExtension(path string) error

ValidateFileExtension checks if the file has an allowed extension

func (*Validator) ValidatePath

func (v *Validator) ValidatePath(path string) error

ValidatePath checks if the path is valid

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL