botMaker

package module

v0.0.0-...-77c214f Latest Latest Go to latest Published: May 25, 2023 License: MIT Imports: 23 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/lonelycode/botMaker

Links

Open Source Insights

README ¶

botMaker

A Go library to help create and train AI bots quickly with OpenA and PineCone. It is heavily based on code from the excellent vault-ai project.

Sample Usage

A simple chatbot:

func Chat() {
    cfg := Config{
        OpenAPIKey:       "xxxx",
        PineconeKey:      "xxxx",
        PineconeEndpoint: "xxxx",
    }
    
    // Client
    cl := NewOAIClient(cfg.OpenAPIKey)
    
    // Settings for the AI
    bs := NewBotSettings()
    bs.ID = "a-UUID-here"
    
    // Build a prompt using the default template
    pr := NewBotPrompt("", cl)
    pr.Instructions = "You are an AI assistant that provides answers that are helpful in a friendly and cheerful way."
    pr.Body = "What is the best way to scale a redis database?"
    
    // Create some storage
    pc := &Pinecone{
        APIEndpoint: cfg.PineconeEndpoint,
        APIKey:      cfg.PineconeKey,
        UUID:        bs.ID,
    }
    
    // attach memory
    bs.Memory = pc
    
    oaiResponse, _, err := cl.CallUnifiedCompletionAPI(bs, pr)
        if err != nil {
       fatal("query send fail: %v", err)
    }
    
    fmt.Println("FIRST PROMPT:")
    fmt.Println(pr.RenderedPrompt)
    
    fmt.Println("GOT FIRST RESPONSE: ")
    fmt.Println(oaiResponse)
    time.Sleep(5 * time.Second)
    
    // we do some string shenanigans to make a chatbot
    // First: Update the context
    oldBody := "Human: " + pr.Body
    pr.ContextToRender = append(pr.ContextToRender, oldBody)
    
    // Next make sure the AI's response is added too
    pr.ContextToRender = append(pr.ContextToRender, oaiResponse)
    
    // Replace the main query with a new one
    pr.Body = "How is a cluster different from sentinel?"
    
    // Make the call!
    secondResponse, _, err := cl.CallUnifiedCompletionAPI(bs, pr)
    if err != nil {
        fatal("prompt2 fail: %v", err)
    }
    
    fmt.Println("SECOND RESPONSE")
    fmt.Println(secondResponse)
}

Learning from a PDF

func TestLearning() {
	cfg := Config{
		OpenAPIKey:       "xxx",
		PineconeKey:      "xxx",
		PineconeEndpoint: "xxx",
	}

	// Client
	cl := NewOAIClient(cfg.OpenAPIKey)

	// Create some storage
	pc := &Pinecone{
		APIEndpoint: cfg.PineconeEndpoint,
		APIKey:      cfg.PineconeKey,
		UUID:        "a45dbe63-4207-419c-bca7-5d940bf3d908",
	}

	l := Learn{
		Model:      openai.GPT3TextDavinci003,
		TokenLimit: 8191,
		ChunkSize:  20,
		Memory:     pc,
		Client:     cl,
	}

	_, err := l.FromFile("/data/socrates.pdf")
	if err != nil {
		fatal(err)
	}
}

Documentation ¶

Index ¶

Variables
func CountTokens(text, model string) (int, error)
func GetContexts(b *BotPrompt, s *BotSettings, m Storage, c LLMAPIClient) ([]string, error)
func HashFileName(filename string) string
func PathTitleGetter(path string) (string, error)
type BotPrompt
- func NewBotPrompt(promptTemplate string, withClient LLMAPIClient) *BotPrompt
- func (b *BotPrompt) AsChatCompletionRequest(s *BotSettings) (*openai.ChatCompletionRequest, error)
- func (b *BotPrompt) AsCompletionRequest(s *BotSettings) (*openai.CompletionRequest, error)
- func (b *BotPrompt) GetContextsForLastPrompt() []string
- func (b *BotPrompt) Prompt(settings *BotSettings) (string, error)
type BotSettings
- func NewBotSettings() *BotSettings
type Chunk
type Config
- func NewConfig() *Config
- func NewConfigFromEnv() *Config
type ContentSplitter
type Context
type LLMAPIClient
- func NewOAIClient(key string) LLMAPIClient
type Learn
- func (l *Learn) CreateChunks(fileContent, title string) []Chunk
- func (l *Learn) CreateChunksCharacterBased(fileContent, title string) []Chunk
- func (l *Learn) ExtensionSupported(path string) (string, bool)
- func (l *Learn) FromFile(path string) (int, error)
- func (l *Learn) Learn(contents, title string) (int, error)
- func (l *Learn) ProcessMarkdown(path string) (string, string, error)
- func (l *Learn) ProcessPDFFile(path string) (string, string, error)
- func (l *Learn) ProcessTextFile(path string) (string, string, error)
type OAIClient
- func (c *OAIClient) CallCompletionAPI(settings *BotSettings, prompt *BotPrompt) (string, int, error)
- func (c *OAIClient) CallEmbeddingAPIWithRetry(texts []string, embedModel openai.EmbeddingModel, maxRetries int) (*openai.EmbeddingResponse, error)
- func (c *OAIClient) CheckTokenLimit(text, model string, tokenLimit int) bool
- func (c *OAIClient) GetEmbeddingModel() openai.EmbeddingModel
- func (c *OAIClient) GetEmbeddingsForData(chunks []Chunk, batchSize int, embedModel openai.EmbeddingModel) ([][]float32, error)
- func (c *OAIClient) GetEmbeddingsForPrompt(text string, embedModel openai.EmbeddingModel) ([]float32, error)
type OpenAIResponse
type Pinecone
- func (p *Pinecone) Retrieve(questionEmbedding []float32, topK int, uuid string) ([]QueryMatch, error)
- func (p *Pinecone) UploadEmbeddings(embeddings [][]float32, chunks []Chunk) error
type PineconeQueryItem
type PineconeQueryRequest
type PineconeQueryResponse
type PineconeQueryResponseResult
type PineconeVector
type PreProcessor
type QueryMatch
type RenderContext
type Storage
type TitleGetter

Constants ¶

This section is empty.

Variables ¶

View Source

var DEFAULT_TEMPLATE = `` /* 266-byte string literal not displayed */

Functions ¶

func CountTokens ¶

func CountTokens(text, model string) (int, error)

func GetContexts ¶

func GetContexts(b *BotPrompt, s *BotSettings, m Storage, c LLMAPIClient) ([]string, error)

GetContexts will use OpenAI to get vectors for the prompt, then use Memory to retrieve relevant contexts to include in the query prompt

func HashFileName ¶

func HashFileName(filename string) string

func PathTitleGetter ¶

func PathTitleGetter(path string) (string, error)

Types ¶

type BotPrompt ¶

type BotPrompt struct {
	OAIClient       LLMAPIClient
	Instructions    string   // You are an AI assistant that is happy, helpful and tries to offer insightful answers
	Body            string   // The actual prompt
	DesiredFormat   string   // Provide your answer using the following output
	ContextToRender []string // Rendered context (within token limit)
	ContextTitles   []string // titles and references to the content
	Stop            []string // Human: AI:
	History         []*RenderContext
	Template        string
	RenderedPrompt  string
	PromptLength    int
	// contains filtered or unexported fields
}

BotPrompt has the components to make a call to OpenAPI

func NewBotPrompt ¶

func NewBotPrompt(promptTemplate string, withClient LLMAPIClient) *BotPrompt

func (*BotPrompt) AsChatCompletionRequest ¶

func (b *BotPrompt) AsChatCompletionRequest(s *BotSettings) (*openai.ChatCompletionRequest, error)

func (*BotPrompt) AsCompletionRequest ¶

func (b *BotPrompt) AsCompletionRequest(s *BotSettings) (*openai.CompletionRequest, error)

func (*BotPrompt) GetContextsForLastPrompt ¶

func (b *BotPrompt) GetContextsForLastPrompt() []string

func (*BotPrompt) Prompt ¶

func (b *BotPrompt) Prompt(settings *BotSettings) (string, error)

Prompt renders the prompt to the prompt template

type BotSettings ¶

type BotSettings struct {
	ID                string // Used when retrieving contexts
	Model             string
	Temp              float32
	TopP              float32
	FrequencyPenalty  float32
	PresencePenalty   float32
	MaxTokens         int // Max to receive
	TokenLimit        int // Max to send
	EmbeddingModel    openai.EmbeddingModel
	Memory            Storage
	MemoryAcceptScore float32
}

BotSettings holds configs for OpenAI APIs

func NewBotSettings ¶

func NewBotSettings() *BotSettings

NewBotSettings Returns settings for OpenAI with sane defaults

type Chunk ¶

type Chunk struct {
	Start int
	End   int
	Title string
	Text  string
}

type Config ¶

type Config struct {
	LLMAPIKey string `env:"LLM_API_KEY,required"`

	PineconeKey      string `env:"PINECONE_KEY"`
	PineconeEndpoint string `env:"PINECONE_URL"`
}

func NewConfig ¶

func NewConfig() *Config

func NewConfigFromEnv ¶

func NewConfigFromEnv() *Config

type ContentSplitter ¶

type ContentSplitter func(string, string) []Chunk

type Context ¶

type Context struct {
	Text  string `json:"text"`
	Title string `json:"title"`
}

type LLMAPIClient ¶

type LLMAPIClient interface {
	CallCompletionAPI(settings *BotSettings, prompt *BotPrompt) (string, int, error)
	CallEmbeddingAPIWithRetry(texts []string, embedModel openai.EmbeddingModel, maxRetries int) (*openai.EmbeddingResponse, error)
	GetEmbeddingsForData(chunks []Chunk, batchSize int, embedModel openai.EmbeddingModel) ([][]float32, error)
	GetEmbeddingsForPrompt(text string, embedModel openai.EmbeddingModel) ([]float32, error)
	GetEmbeddingModel() openai.EmbeddingModel
	CheckTokenLimit(text, model string, tokenLimit int) bool
}

func NewOAIClient ¶

func NewOAIClient(key string) LLMAPIClient

type Learn ¶

type Learn struct {
	Model           string
	TokenLimit      int
	ChunkSize       int
	Overlap         int
	Memory          Storage
	Client          LLMAPIClient
	GetTitle        TitleGetter
	PreProcessBody  PreProcessor
	PreProcessChunk PreProcessor
	ContentSplitter ContentSplitter
}

func (*Learn) CreateChunks ¶

func (l *Learn) CreateChunks(fileContent, title string) []Chunk

CreateChunks generates uploadable chunks to send to a memory store

func (*Learn) CreateChunksCharacterBased ¶

func (l *Learn) CreateChunksCharacterBased(fileContent, title string) []Chunk

func (*Learn) ExtensionSupported ¶

func (l *Learn) ExtensionSupported(path string) (string, bool)

ExtensionSupported checks if the extension for a given file path is supported by the library, it returns the file extension and a bool whether it is supported or not, supported file types are txt, pdf, and md.

func (*Learn) FromFile ¶

func (l *Learn) FromFile(path string) (int, error)

FromFile processes a file to learn into an OpenAI memory store, returns number of embeddings created and an error if failed

func (*Learn) Learn ¶

func (l *Learn) Learn(contents, title string) (int, error)

func (*Learn) ProcessMarkdown ¶

func (l *Learn) ProcessMarkdown(path string) (string, string, error)

func (*Learn) ProcessPDFFile ¶

func (l *Learn) ProcessPDFFile(path string) (string, string, error)

ProcessPDFFile reads a PDF file from the path and extracts the human-readable text, it will also attempt to extract the title of the PDF. It returns the title, the human-readable content, and an optional error if there is a problem reading or parsing the file.

func (*Learn) ProcessTextFile ¶

func (l *Learn) ProcessTextFile(path string) (string, string, error)

ProcessTextFile opens and fully reads the file in 'path', it treats the first line that contains text as a title, and reads the remaining file into another variable, it then returns the title and the file contents. It returns an error if there is a problem opening or reading the file.

type OAIClient ¶

type OAIClient struct {
	Client *openai.Client
}

func (*OAIClient) CallCompletionAPI ¶

func (c *OAIClient) CallCompletionAPI(settings *BotSettings, prompt *BotPrompt) (string, int, error)

func (*OAIClient) CallEmbeddingAPIWithRetry ¶

func (c *OAIClient) CallEmbeddingAPIWithRetry(texts []string, embedModel openai.EmbeddingModel,
	maxRetries int) (*openai.EmbeddingResponse, error)

func (*OAIClient) CheckTokenLimit ¶

func (c *OAIClient) CheckTokenLimit(text, model string, tokenLimit int) bool

func (*OAIClient) GetEmbeddingModel ¶

func (c *OAIClient) GetEmbeddingModel() openai.EmbeddingModel

func (*OAIClient) GetEmbeddingsForData ¶

func (c *OAIClient) GetEmbeddingsForData(chunks []Chunk, batchSize int,
	embedModel openai.EmbeddingModel) ([][]float32, error)

GetEmbeddingsForData gets embedding vectors for data to be ingested and used for context in queries

func (*OAIClient) GetEmbeddingsForPrompt ¶

func (c *OAIClient) GetEmbeddingsForPrompt(text string, embedModel openai.EmbeddingModel) ([]float32, error)

GetEmbeddingsForPrompt will return embedding vectors for the prompt

type OpenAIResponse ¶

type OpenAIResponse struct {
	Response string `json:"response"`
	Tokens   int    `json:"tokens"`
}

type Pinecone ¶

type Pinecone struct {
	APIEndpoint string
	APIKey      string
	UUID        string // Used when ingesting data
}

func (*Pinecone) Retrieve ¶

func (p *Pinecone) Retrieve(questionEmbedding []float32, topK int, uuid string) ([]QueryMatch, error)

func (*Pinecone) UploadEmbeddings ¶

func (p *Pinecone) UploadEmbeddings(embeddings [][]float32, chunks []Chunk) error

type PineconeQueryItem ¶

type PineconeQueryItem struct {
	Values []float32 `json:"values"`
}

type PineconeQueryRequest ¶

type PineconeQueryRequest struct {
	TopK            int                 `json:"topK"`
	IncludeMetadata bool                `json:"includeMetadata"`
	Namespace       string              `json:"namespace"`
	Queries         []PineconeQueryItem `json:"queries"`
}

type PineconeQueryResponse ¶

type PineconeQueryResponse struct {
	Results []PineconeQueryResponseResult `json:"results"`
}

type PineconeQueryResponseResult ¶

type PineconeQueryResponseResult struct {
	Matches []QueryMatch `json:"matches"`
}

type PineconeVector ¶

type PineconeVector struct {
	ID       string            `json:"id"`
	Values   []float32         `json:"values"`
	Metadata map[string]string `json:"metadata,omitempty"`
}

type PreProcessor ¶

type PreProcessor func(string) (string, error)

type QueryMatch ¶

type QueryMatch struct {
	ID       string            `json:"id"`
	Score    float32           `json:"score"` // Use "score" instead of "distance"
	Metadata map[string]string `json:"metadata"`
}

type RenderContext ¶

type RenderContext struct {
	Role    string
	Content string
}

type Storage ¶

type Storage interface {
	Retrieve(questionEmbedding []float32, topK int, uuid string) ([]QueryMatch, error)
	UploadEmbeddings(embeddings [][]float32, chunks []Chunk) error
}

type TitleGetter ¶

type TitleGetter func(string) (string, error)

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
examples
chat
code
learning

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL