embedding

package
v0.0.108 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 28, 2024 License: MIT Imports: 26 Imported by: 0

Documentation

Overview

Package embedding contains the implementation to create vector embeddings from text using different APIs

Index

Constants

This section is empty.

Variables

View Source
var DefaultOpenAIConfig = OpenAIOptions{
	ModelName:              "text-embedding-3-small",
	EmbeddingContextLength: 8191,
	ChunkSize:              1000,
	MaxRetries:             3,
}

Functions

This section is empty.

Types

type AzureOpenAIOptions added in v0.0.26

type AzureOpenAIOptions struct {
	OpenAIOptions
	APIVersion string
	Deployment string
}

type Bedrock added in v0.0.73

type Bedrock struct {
	// contains filtered or unexported fields
}

Bedrock is a struct representing the Bedrock model embedding functionality.

func NewBedrock added in v0.0.73

func NewBedrock(client BedrockRuntimeClient, modelID string, optFns ...func(o *BedrockOptions)) *Bedrock

NewBedrock creates a new instance of Bedrock with the provided BedrockRuntimeClient and optional configuration.

func NewBedrockAmazon added in v0.0.108

func NewBedrockAmazon(client BedrockRuntimeClient, optFns ...func(o *BedrockAmazonOptions)) *Bedrock

NewBedrockAmazon creates a new instance of Bedrock with the Amazon provider.

func NewBedrockCohere added in v0.0.108

func NewBedrockCohere(client BedrockRuntimeClient, optFns ...func(o *BedrockCohereOptions)) *Bedrock

NewBedrockCohere creates a new instance of Bedrock with the Cohere provider.

func (*Bedrock) BatchEmbedText added in v0.0.93

func (e *Bedrock) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*Bedrock) EmbedText added in v0.0.93

func (e *Bedrock) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type BedrockAmazonOptions added in v0.0.108

type BedrockAmazonOptions struct {
	// Model id to use.
	ModelID string `map:"model_id,omitempty"`
}

BedrockAmazonOptions is a struct containing options for configuring the Amazon Bedrock model.

type BedrockCohereOptions added in v0.0.108

type BedrockCohereOptions struct {
	// Model id to use.
	ModelID string `map:"model_id,omitempty"`

	InputType string `map:"input_type"`

	Truncate string `map:"truncate"`
}

BedrockCohereOptions is a struct containing options for configuring the Cohere Bedrock model.

type BedrockInputOutputAdapter added in v0.0.108

type BedrockInputOutputAdapter struct {
	// contains filtered or unexported fields
}

BedrockInputOutputAdapter is a helper struct for preparing input and handling output for Bedrock model.

func NewBedrockInputOutputAdapter added in v0.0.108

func NewBedrockInputOutputAdapter(provider string) *BedrockInputOutputAdapter

NewBedrockInputOutputAdpter creates a new instance of BedrockInputOutputAdpter.

func (*BedrockInputOutputAdapter) PrepareInput added in v0.0.108

func (bioa *BedrockInputOutputAdapter) PrepareInput(text string, modelParams map[string]any) ([]byte, error)

PrepareInput prepares the input for the Bedrock model based on the specified provider.

func (*BedrockInputOutputAdapter) PrepareOutput added in v0.0.108

func (bioa *BedrockInputOutputAdapter) PrepareOutput(response []byte) ([]float32, error)

PrepareOutput prepares the output for the Bedrock model based on the specified provider.

type BedrockOptions added in v0.0.73

type BedrockOptions struct {
	MaxConcurrency int

	// Model params to use.
	ModelParams map[string]any `map:"model_params,omitempty"`
}

BedrockOptions contains options for configuring the Bedrock model.

type BedrockRuntimeClient added in v0.0.73

type BedrockRuntimeClient interface {
	InvokeModel(ctx context.Context, params *bedrockruntime.InvokeModelInput, optFns ...func(*bedrockruntime.Options)) (*bedrockruntime.InvokeModelOutput, error)
}

BedrockRuntimeClient is an interface for the Bedrock model runtime client.

type Cohere added in v0.0.39

type Cohere struct {
	// contains filtered or unexported fields
}

Cohere is a client for the Cohere API.

func NewCohere added in v0.0.39

func NewCohere(apiKey string, optFns ...func(o *CohereOptions)) *Cohere

NewCohere creates a new Cohere instance with the provided API key and options. It returns the initialized Cohere instance or an error if initialization fails.

func NewCohereFromClient added in v0.0.39

func NewCohereFromClient(client CohereClient, optFns ...func(o *CohereOptions)) *Cohere

NewCohereFromClient creates a new Cohere instance from an existing Cohere client and options. It returns the initialized Cohere instance.

func (*Cohere) BatchEmbedText added in v0.0.93

func (e *Cohere) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*Cohere) EmbedText added in v0.0.93

func (e *Cohere) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single query and returns its embedding.

type CohereClient added in v0.0.39

type CohereClient interface {
	Embed(ctx context.Context, request *cohere.EmbedRequest, opts ...core.RequestOption) (*cohere.EmbedResponse, error)
}

CohereClient is an interface for the Cohere client.

type CohereOptions added in v0.0.39

type CohereOptions struct {
	// Model name to use.
	Model string
	// Truncate embeddings that are too long from start or end ("NONE"|"START"|"END")
	Truncate string
	// MaxRetries represents the maximum number of retries to make when embedding.
	MaxRetries uint `map:"max_retries,omitempty"`
}

CohereOptions contains options for configuring the Cohere instance.

type Cybertron added in v0.0.103

type Cybertron struct {
	// contains filtered or unexported fields
}

Cybertron represents an embedder powered by Cybertron.

func NewCybertron added in v0.0.103

func NewCybertron(optFns ...func(o *CybertronOptions)) (*Cybertron, error)

NewCybertron creates a new instance of the Cybertron embedder.

func NewCybertronFromEncoder added in v0.0.103

func NewCybertronFromEncoder(encoder textencoding.Interface, optFns ...func(o *CybertronFromEncoderOptions)) (*Cybertron, error)

NewCybertronFromEncoder creates a new Cybertron embedder from an existing encoder.

func (*Cybertron) BatchEmbedText added in v0.0.103

func (e *Cybertron) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*Cybertron) EmbedText added in v0.0.103

func (e *Cybertron) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type CybertronFromEncoderOptions added in v0.0.103

type CybertronFromEncoderOptions struct {
	// PoolingStrategy specifies the pooling strategy for embedding calculation.
	PoolingStrategy int
}

CybertronFromEncoderOption represents options for the Cybertron embedder.

type CybertronOptions added in v0.0.103

type CybertronOptions struct {
	CybertronFromEncoderOptions
	// ModelName is the name of the model (format: <org>/<model>).
	Model string
	// ModelsDir is the directory where the models are stored.
	ModelsDir string
	// HubAccessToken is the access token for the Hugging Face Hub.
	HubAccessToken string
}

CybertronOptions represents options for the Cybertron embedder.

type Ernie added in v0.0.67

type Ernie struct {
	// contains filtered or unexported fields
}

Ernie represents the text embedding component powered by Ernie.

func NewErnie added in v0.0.67

func NewErnie(clientID, clientSecret string, optFns ...func(o *ErnieOptions)) *Ernie

NewErnie creates a new instance of the Ernie text embedding component with default options.

func NewErnieFromClient added in v0.0.67

func NewErnieFromClient(client ErnieClient, optFns ...func(o *ErnieOptions)) *Ernie

NewErnieFromClient creates a new instance of the Ernie text embedding component with a custom ErnieClient and optional configuration.

func (*Ernie) BatchEmbedText added in v0.0.93

func (e *Ernie) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*Ernie) EmbedText added in v0.0.93

func (e *Ernie) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type ErnieClient added in v0.0.67

type ErnieClient interface {
	// CreateEmbedding generates text embeddings using the specified model and request.
	CreateEmbedding(ctx context.Context, model string, request ernie.EmbeddingRequest) (*ernie.EmbeddingResponse, error)
}

ErnieClient is an interface for interacting with the Ernie API for text embedding.

type ErnieOptions added in v0.0.67

type ErnieOptions struct {
	Model string
}

ErnieOptions represents configuration options for the Ernie text embedding component.

type Fake

type Fake struct {
	Size int
}

func NewFake

func NewFake(size int) *Fake

func (*Fake) BatchEmbedText added in v0.0.93

func (e *Fake) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*Fake) EmbedText added in v0.0.93

func (e *Fake) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type GoogleGenAI added in v0.0.92

type GoogleGenAI struct {
	// contains filtered or unexported fields
}

GoogleGenAI is a client for the GoogleGenAI embedding service.

func NewGoogleGenAI added in v0.0.92

func NewGoogleGenAI(client GoogleGenAIClient, optFns ...func(o *GoogleGenAIOptions)) *GoogleGenAI

NewGoogleGenAI creates a new instance of the GoogleGenAI client.

func (*GoogleGenAI) BatchEmbedText added in v0.0.93

func (e *GoogleGenAI) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*GoogleGenAI) EmbedText added in v0.0.93

func (e *GoogleGenAI) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type GoogleGenAIClient added in v0.0.92

GoogleGenAIClient is an interface for the GoogleGenAI client.

type GoogleGenAIOptions added in v0.0.92

type GoogleGenAIOptions struct {
	ModelName string
}

GoogleGenAIOptions contains options for configuring the GoogleGenAI client.

type HuggingFaceHub added in v0.0.66

type HuggingFaceHub struct {
	// contains filtered or unexported fields
}

HuggingFaceHub represents an embedder for Hugging Face Hub models.

func NewHuggingFaceHub added in v0.0.66

func NewHuggingFaceHub(token string, optFns ...func(o *HuggingFaceHubOptions)) *HuggingFaceHub

NewHuggingFaceHub creates a new instance of the HuggingFaceHub embedder.

func NewHuggingFaceHubFromClient added in v0.0.66

func NewHuggingFaceHubFromClient(client HuggingFaceHubClient, optFns ...func(o *HuggingFaceHubOptions)) *HuggingFaceHub

NewHuggingFaceHubFromClient creates a new instance of the HuggingFaceHub embedder from a custom client.

func (*HuggingFaceHub) BatchEmbedText added in v0.0.93

func (e *HuggingFaceHub) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*HuggingFaceHub) EmbedText added in v0.0.93

func (e *HuggingFaceHub) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type HuggingFaceHubClient added in v0.0.66

type HuggingFaceHubClient interface {
	// FeatureExtractionWithAutomaticReduction performs feature extraction with automatic reduction.
	// It returns the extraction response or an error if the operation fails.
	FeatureExtractionWithAutomaticReduction(ctx context.Context, req *huggingface.FeatureExtractionRequest) (huggingface.FeatureExtractionWithAutomaticReductionResponse, error)
}

HuggingFaceHubClient represents a client for interacting with Hugging Face Hub.

type HuggingFaceHubOptions added in v0.0.66

type HuggingFaceHubOptions struct {
	// Model to use for embedding.
	Model string
	// Options represents optional settings for the feature extraction.
	Options huggingface.Options
}

type Ollama added in v0.0.96

type Ollama struct {
	// contains filtered or unexported fields
}

Ollama is a struct representing the Ollama embedding model.

func NewOllama added in v0.0.96

func NewOllama(client OllamaClient, optFns ...func(o *OllamaOptions)) *Ollama

NewOllama creates a new instance of the Ollama embedding model.

func (*Ollama) BatchEmbedText added in v0.0.96

func (e *Ollama) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*Ollama) EmbedText added in v0.0.96

func (e *Ollama) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type OllamaClient added in v0.0.96

type OllamaClient interface {
	CreateEmbedding(ctx context.Context, req *ollama.EmbeddingRequest) (*ollama.EmbeddingResponse, error)
}

OllamaClient is an interface for interacting with the Ollama model's embedding functionality.

type OllamaOptions added in v0.0.96

type OllamaOptions struct {
	MaxConcurrency int
	// ModelName is the name of the Gemini model to use.
	ModelName string `map:"model_name,omitempty"`
}

OllamaOptions contains options for configuring the Ollama model.

type OpenAI added in v0.0.6

type OpenAI struct {
	// contains filtered or unexported fields
}

func NewAzureOpenAI added in v0.0.26

func NewAzureOpenAI(apiKey, baseURL string, optFns ...func(o *AzureOpenAIOptions)) *OpenAI

func NewOpenAI added in v0.0.6

func NewOpenAI(apiKey string, optFns ...func(o *OpenAIOptions)) *OpenAI

func NewOpenAIFromClient added in v0.0.38

func NewOpenAIFromClient(client OpenAIClient, optFns ...func(o *OpenAIOptions)) *OpenAI

func (*OpenAI) BatchEmbedText added in v0.0.93

func (e *OpenAI) BatchEmbedText(ctx context.Context, texts []string) ([][]float32, error)

BatchEmbedText embeds a list of texts and returns their embeddings.

func (*OpenAI) EmbedText added in v0.0.93

func (e *OpenAI) EmbedText(ctx context.Context, text string) ([]float32, error)

EmbedText embeds a single text and returns its embedding.

type OpenAIClient added in v0.0.68

type OpenAIClient interface {
	CreateEmbeddings(ctx context.Context, conv openai.EmbeddingRequestConverter) (res openai.EmbeddingResponse, err error)
}

type OpenAIOptions added in v0.0.6

type OpenAIOptions struct {
	// Model name to use.
	ModelName              string
	EmbeddingContextLength int
	// Maximum number of texts to embed in each batch
	ChunkSize int
	// BaseURL is the base URL of the OpenAI service.
	BaseURL string
	// OrgID is the organization ID for accessing the OpenAI service.
	OrgID string
	// MaxRetries represents the maximum number of retries to make when embedding.
	MaxRetries uint `map:"max_retries,omitempty"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL