botMaker

package module
v0.0.0-...-77c214f Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 25, 2023 License: MIT Imports: 23 Imported by: 0

README

botMaker

A Go library to help create and train AI bots quickly with OpenA and PineCone. It is heavily based on code from the excellent vault-ai project.

Sample Usage

A simple chatbot:
func Chat() {
    cfg := Config{
        OpenAPIKey:       "xxxx",
        PineconeKey:      "xxxx",
        PineconeEndpoint: "xxxx",
    }
    
    // Client
    cl := NewOAIClient(cfg.OpenAPIKey)
    
    // Settings for the AI
    bs := NewBotSettings()
    bs.ID = "a-UUID-here"
    
    // Build a prompt using the default template
    pr := NewBotPrompt("", cl)
    pr.Instructions = "You are an AI assistant that provides answers that are helpful in a friendly and cheerful way."
    pr.Body = "What is the best way to scale a redis database?"
    
    // Create some storage
    pc := &Pinecone{
        APIEndpoint: cfg.PineconeEndpoint,
        APIKey:      cfg.PineconeKey,
        UUID:        bs.ID,
    }
    
    // attach memory
    bs.Memory = pc
    
    oaiResponse, _, err := cl.CallUnifiedCompletionAPI(bs, pr)
        if err != nil {
       fatal("query send fail: %v", err)
    }
    
    fmt.Println("FIRST PROMPT:")
    fmt.Println(pr.RenderedPrompt)
    
    fmt.Println("GOT FIRST RESPONSE: ")
    fmt.Println(oaiResponse)
    time.Sleep(5 * time.Second)
    
    // we do some string shenanigans to make a chatbot
    // First: Update the context
    oldBody := "Human: " + pr.Body
    pr.ContextToRender = append(pr.ContextToRender, oldBody)
    
    // Next make sure the AI's response is added too
    pr.ContextToRender = append(pr.ContextToRender, oaiResponse)
    
    // Replace the main query with a new one
    pr.Body = "How is a cluster different from sentinel?"
    
    // Make the call!
    secondResponse, _, err := cl.CallUnifiedCompletionAPI(bs, pr)
    if err != nil {
        fatal("prompt2 fail: %v", err)
    }
    
    fmt.Println("SECOND RESPONSE")
    fmt.Println(secondResponse)
}
Learning from a PDF
func TestLearning() {
	cfg := Config{
		OpenAPIKey:       "xxx",
		PineconeKey:      "xxx",
		PineconeEndpoint: "xxx",
	}

	// Client
	cl := NewOAIClient(cfg.OpenAPIKey)

	// Create some storage
	pc := &Pinecone{
		APIEndpoint: cfg.PineconeEndpoint,
		APIKey:      cfg.PineconeKey,
		UUID:        "a45dbe63-4207-419c-bca7-5d940bf3d908",
	}

	l := Learn{
		Model:      openai.GPT3TextDavinci003,
		TokenLimit: 8191,
		ChunkSize:  20,
		Memory:     pc,
		Client:     cl,
	}

	_, err := l.FromFile("/data/socrates.pdf")
	if err != nil {
		fatal(err)
	}
}

Documentation

Index

Constants

This section is empty.

Variables

View Source
var DEFAULT_TEMPLATE = `` /* 266-byte string literal not displayed */

Functions

func CountTokens

func CountTokens(text, model string) (int, error)

func GetContexts

func GetContexts(b *BotPrompt, s *BotSettings, m Storage, c LLMAPIClient) ([]string, error)

GetContexts will use OpenAI to get vectors for the prompt, then use Memory to retrieve relevant contexts to include in the query prompt

func HashFileName

func HashFileName(filename string) string

func PathTitleGetter

func PathTitleGetter(path string) (string, error)

Types

type BotPrompt

type BotPrompt struct {
	OAIClient       LLMAPIClient
	Instructions    string   // You are an AI assistant that is happy, helpful and tries to offer insightful answers
	Body            string   // The actual prompt
	DesiredFormat   string   // Provide your answer using the following output
	ContextToRender []string // Rendered context (within token limit)
	ContextTitles   []string // titles and references to the content
	Stop            []string // Human: AI:
	History         []*RenderContext
	Template        string
	RenderedPrompt  string
	PromptLength    int
	// contains filtered or unexported fields
}

BotPrompt has the components to make a call to OpenAPI

func NewBotPrompt

func NewBotPrompt(promptTemplate string, withClient LLMAPIClient) *BotPrompt

func (*BotPrompt) AsChatCompletionRequest

func (b *BotPrompt) AsChatCompletionRequest(s *BotSettings) (*openai.ChatCompletionRequest, error)

func (*BotPrompt) AsCompletionRequest

func (b *BotPrompt) AsCompletionRequest(s *BotSettings) (*openai.CompletionRequest, error)

func (*BotPrompt) GetContextsForLastPrompt

func (b *BotPrompt) GetContextsForLastPrompt() []string

func (*BotPrompt) Prompt

func (b *BotPrompt) Prompt(settings *BotSettings) (string, error)

Prompt renders the prompt to the prompt template

type BotSettings

type BotSettings struct {
	ID                string // Used when retrieving contexts
	Model             string
	Temp              float32
	TopP              float32
	FrequencyPenalty  float32
	PresencePenalty   float32
	MaxTokens         int // Max to receive
	TokenLimit        int // Max to send
	EmbeddingModel    openai.EmbeddingModel
	Memory            Storage
	MemoryAcceptScore float32
}

BotSettings holds configs for OpenAI APIs

func NewBotSettings

func NewBotSettings() *BotSettings

NewBotSettings Returns settings for OpenAI with sane defaults

type Chunk

type Chunk struct {
	Start int
	End   int
	Title string
	Text  string
}

type Config

type Config struct {
	LLMAPIKey string `env:"LLM_API_KEY,required"`

	PineconeKey      string `env:"PINECONE_KEY"`
	PineconeEndpoint string `env:"PINECONE_URL"`
}

func NewConfig

func NewConfig() *Config

func NewConfigFromEnv

func NewConfigFromEnv() *Config

type ContentSplitter

type ContentSplitter func(string, string) []Chunk

type Context

type Context struct {
	Text  string `json:"text"`
	Title string `json:"title"`
}

type LLMAPIClient

type LLMAPIClient interface {
	CallCompletionAPI(settings *BotSettings, prompt *BotPrompt) (string, int, error)
	CallEmbeddingAPIWithRetry(texts []string, embedModel openai.EmbeddingModel, maxRetries int) (*openai.EmbeddingResponse, error)
	GetEmbeddingsForData(chunks []Chunk, batchSize int, embedModel openai.EmbeddingModel) ([][]float32, error)
	GetEmbeddingsForPrompt(text string, embedModel openai.EmbeddingModel) ([]float32, error)
	GetEmbeddingModel() openai.EmbeddingModel
	CheckTokenLimit(text, model string, tokenLimit int) bool
}

func NewOAIClient

func NewOAIClient(key string) LLMAPIClient

type Learn

type Learn struct {
	Model           string
	TokenLimit      int
	ChunkSize       int
	Overlap         int
	Memory          Storage
	Client          LLMAPIClient
	GetTitle        TitleGetter
	PreProcessBody  PreProcessor
	PreProcessChunk PreProcessor
	ContentSplitter ContentSplitter
}

func (*Learn) CreateChunks

func (l *Learn) CreateChunks(fileContent, title string) []Chunk

CreateChunks generates uploadable chunks to send to a memory store

func (*Learn) CreateChunksCharacterBased

func (l *Learn) CreateChunksCharacterBased(fileContent, title string) []Chunk

func (*Learn) ExtensionSupported

func (l *Learn) ExtensionSupported(path string) (string, bool)

ExtensionSupported checks if the extension for a given file path is supported by the library, it returns the file extension and a bool whether it is supported or not, supported file types are txt, pdf, and md.

func (*Learn) FromFile

func (l *Learn) FromFile(path string) (int, error)

FromFile processes a file to learn into an OpenAI memory store, returns number of embeddings created and an error if failed

func (*Learn) Learn

func (l *Learn) Learn(contents, title string) (int, error)

func (*Learn) ProcessMarkdown

func (l *Learn) ProcessMarkdown(path string) (string, string, error)

func (*Learn) ProcessPDFFile

func (l *Learn) ProcessPDFFile(path string) (string, string, error)

ProcessPDFFile reads a PDF file from the path and extracts the human-readable text, it will also attempt to extract the title of the PDF. It returns the title, the human-readable content, and an optional error if there is a problem reading or parsing the file.

func (*Learn) ProcessTextFile

func (l *Learn) ProcessTextFile(path string) (string, string, error)

ProcessTextFile opens and fully reads the file in 'path', it treats the first line that contains text as a title, and reads the remaining file into another variable, it then returns the title and the file contents. It returns an error if there is a problem opening or reading the file.

type OAIClient

type OAIClient struct {
	Client *openai.Client
}

func (*OAIClient) CallCompletionAPI

func (c *OAIClient) CallCompletionAPI(settings *BotSettings, prompt *BotPrompt) (string, int, error)

func (*OAIClient) CallEmbeddingAPIWithRetry

func (c *OAIClient) CallEmbeddingAPIWithRetry(texts []string, embedModel openai.EmbeddingModel,
	maxRetries int) (*openai.EmbeddingResponse, error)

func (*OAIClient) CheckTokenLimit

func (c *OAIClient) CheckTokenLimit(text, model string, tokenLimit int) bool

func (*OAIClient) GetEmbeddingModel

func (c *OAIClient) GetEmbeddingModel() openai.EmbeddingModel

func (*OAIClient) GetEmbeddingsForData

func (c *OAIClient) GetEmbeddingsForData(chunks []Chunk, batchSize int,
	embedModel openai.EmbeddingModel) ([][]float32, error)

GetEmbeddingsForData gets embedding vectors for data to be ingested and used for context in queries

func (*OAIClient) GetEmbeddingsForPrompt

func (c *OAIClient) GetEmbeddingsForPrompt(text string, embedModel openai.EmbeddingModel) ([]float32, error)

GetEmbeddingsForPrompt will return embedding vectors for the prompt

type OpenAIResponse

type OpenAIResponse struct {
	Response string `json:"response"`
	Tokens   int    `json:"tokens"`
}

type Pinecone

type Pinecone struct {
	APIEndpoint string
	APIKey      string
	UUID        string // Used when ingesting data
}

func (*Pinecone) Retrieve

func (p *Pinecone) Retrieve(questionEmbedding []float32, topK int, uuid string) ([]QueryMatch, error)

func (*Pinecone) UploadEmbeddings

func (p *Pinecone) UploadEmbeddings(embeddings [][]float32, chunks []Chunk) error

type PineconeQueryItem

type PineconeQueryItem struct {
	Values []float32 `json:"values"`
}

type PineconeQueryRequest

type PineconeQueryRequest struct {
	TopK            int                 `json:"topK"`
	IncludeMetadata bool                `json:"includeMetadata"`
	Namespace       string              `json:"namespace"`
	Queries         []PineconeQueryItem `json:"queries"`
}

type PineconeQueryResponse

type PineconeQueryResponse struct {
	Results []PineconeQueryResponseResult `json:"results"`
}

type PineconeQueryResponseResult

type PineconeQueryResponseResult struct {
	Matches []QueryMatch `json:"matches"`
}

type PineconeVector

type PineconeVector struct {
	ID       string            `json:"id"`
	Values   []float32         `json:"values"`
	Metadata map[string]string `json:"metadata,omitempty"`
}

type PreProcessor

type PreProcessor func(string) (string, error)

type QueryMatch

type QueryMatch struct {
	ID       string            `json:"id"`
	Score    float32           `json:"score"` // Use "score" instead of "distance"
	Metadata map[string]string `json:"metadata"`
}

type RenderContext

type RenderContext struct {
	Role    string
	Content string
}

type Storage

type Storage interface {
	Retrieve(questionEmbedding []float32, topK int, uuid string) ([]QueryMatch, error)
	UploadEmbeddings(embeddings [][]float32, chunks []Chunk) error
}

type TitleGetter

type TitleGetter func(string) (string, error)

Directories

Path Synopsis
examples

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL