README ¶
botMaker
A Go library to help create and train AI bots quickly with OpenA and PineCone. It is heavily based on code from the excellent vault-ai project.
Sample Usage
A simple chatbot:
func Chat() {
cfg := Config{
OpenAPIKey: "xxxx",
PineconeKey: "xxxx",
PineconeEndpoint: "xxxx",
}
// Client
cl := NewOAIClient(cfg.OpenAPIKey)
// Settings for the AI
bs := NewBotSettings()
bs.ID = "a-UUID-here"
// Build a prompt using the default template
pr := NewBotPrompt("", cl)
pr.Instructions = "You are an AI assistant that provides answers that are helpful in a friendly and cheerful way."
pr.Body = "What is the best way to scale a redis database?"
// Create some storage
pc := &Pinecone{
APIEndpoint: cfg.PineconeEndpoint,
APIKey: cfg.PineconeKey,
UUID: bs.ID,
}
// attach memory
bs.Memory = pc
oaiResponse, _, err := cl.CallUnifiedCompletionAPI(bs, pr)
if err != nil {
fatal("query send fail: %v", err)
}
fmt.Println("FIRST PROMPT:")
fmt.Println(pr.RenderedPrompt)
fmt.Println("GOT FIRST RESPONSE: ")
fmt.Println(oaiResponse)
time.Sleep(5 * time.Second)
// we do some string shenanigans to make a chatbot
// First: Update the context
oldBody := "Human: " + pr.Body
pr.ContextToRender = append(pr.ContextToRender, oldBody)
// Next make sure the AI's response is added too
pr.ContextToRender = append(pr.ContextToRender, oaiResponse)
// Replace the main query with a new one
pr.Body = "How is a cluster different from sentinel?"
// Make the call!
secondResponse, _, err := cl.CallUnifiedCompletionAPI(bs, pr)
if err != nil {
fatal("prompt2 fail: %v", err)
}
fmt.Println("SECOND RESPONSE")
fmt.Println(secondResponse)
}
Learning from a PDF
func TestLearning() {
cfg := Config{
OpenAPIKey: "xxx",
PineconeKey: "xxx",
PineconeEndpoint: "xxx",
}
// Client
cl := NewOAIClient(cfg.OpenAPIKey)
// Create some storage
pc := &Pinecone{
APIEndpoint: cfg.PineconeEndpoint,
APIKey: cfg.PineconeKey,
UUID: "a45dbe63-4207-419c-bca7-5d940bf3d908",
}
l := Learn{
Model: openai.GPT3TextDavinci003,
TokenLimit: 8191,
ChunkSize: 20,
Memory: pc,
Client: cl,
}
_, err := l.FromFile("/data/socrates.pdf")
if err != nil {
fatal(err)
}
}
Documentation ¶
Index ¶
- Variables
- func CountTokens(text, model string) (int, error)
- func GetContexts(b *BotPrompt, s *BotSettings, m Storage, c LLMAPIClient) ([]string, error)
- func HashFileName(filename string) string
- func PathTitleGetter(path string) (string, error)
- type BotPrompt
- func (b *BotPrompt) AsChatCompletionRequest(s *BotSettings) (*openai.ChatCompletionRequest, error)
- func (b *BotPrompt) AsCompletionRequest(s *BotSettings) (*openai.CompletionRequest, error)
- func (b *BotPrompt) GetContextsForLastPrompt() []string
- func (b *BotPrompt) Prompt(settings *BotSettings) (string, error)
- type BotSettings
- type Chunk
- type Config
- type ContentSplitter
- type Context
- type LLMAPIClient
- type Learn
- func (l *Learn) CreateChunks(fileContent, title string) []Chunk
- func (l *Learn) CreateChunksCharacterBased(fileContent, title string) []Chunk
- func (l *Learn) ExtensionSupported(path string) (string, bool)
- func (l *Learn) FromFile(path string) (int, error)
- func (l *Learn) Learn(contents, title string) (int, error)
- func (l *Learn) ProcessMarkdown(path string) (string, string, error)
- func (l *Learn) ProcessPDFFile(path string) (string, string, error)
- func (l *Learn) ProcessTextFile(path string) (string, string, error)
- type OAIClient
- func (c *OAIClient) CallCompletionAPI(settings *BotSettings, prompt *BotPrompt) (string, int, error)
- func (c *OAIClient) CallEmbeddingAPIWithRetry(texts []string, embedModel openai.EmbeddingModel, maxRetries int) (*openai.EmbeddingResponse, error)
- func (c *OAIClient) CheckTokenLimit(text, model string, tokenLimit int) bool
- func (c *OAIClient) GetEmbeddingModel() openai.EmbeddingModel
- func (c *OAIClient) GetEmbeddingsForData(chunks []Chunk, batchSize int, embedModel openai.EmbeddingModel) ([][]float32, error)
- func (c *OAIClient) GetEmbeddingsForPrompt(text string, embedModel openai.EmbeddingModel) ([]float32, error)
- type OpenAIResponse
- type Pinecone
- type PineconeQueryItem
- type PineconeQueryRequest
- type PineconeQueryResponse
- type PineconeQueryResponseResult
- type PineconeVector
- type PreProcessor
- type QueryMatch
- type RenderContext
- type Storage
- type TitleGetter
Constants ¶
This section is empty.
Variables ¶
var DEFAULT_TEMPLATE = `` /* 266-byte string literal not displayed */
Functions ¶
func CountTokens ¶
func GetContexts ¶
func GetContexts(b *BotPrompt, s *BotSettings, m Storage, c LLMAPIClient) ([]string, error)
GetContexts will use OpenAI to get vectors for the prompt, then use Memory to retrieve relevant contexts to include in the query prompt
func HashFileName ¶
func PathTitleGetter ¶
Types ¶
type BotPrompt ¶
type BotPrompt struct { OAIClient LLMAPIClient Instructions string // You are an AI assistant that is happy, helpful and tries to offer insightful answers Body string // The actual prompt DesiredFormat string // Provide your answer using the following output ContextToRender []string // Rendered context (within token limit) ContextTitles []string // titles and references to the content Stop []string // Human: AI: History []*RenderContext Template string RenderedPrompt string PromptLength int // contains filtered or unexported fields }
BotPrompt has the components to make a call to OpenAPI
func NewBotPrompt ¶
func NewBotPrompt(promptTemplate string, withClient LLMAPIClient) *BotPrompt
func (*BotPrompt) AsChatCompletionRequest ¶
func (b *BotPrompt) AsChatCompletionRequest(s *BotSettings) (*openai.ChatCompletionRequest, error)
func (*BotPrompt) AsCompletionRequest ¶
func (b *BotPrompt) AsCompletionRequest(s *BotSettings) (*openai.CompletionRequest, error)
func (*BotPrompt) GetContextsForLastPrompt ¶
type BotSettings ¶
type BotSettings struct { ID string // Used when retrieving contexts Model string Temp float32 TopP float32 FrequencyPenalty float32 PresencePenalty float32 MaxTokens int // Max to receive TokenLimit int // Max to send EmbeddingModel openai.EmbeddingModel Memory Storage MemoryAcceptScore float32 }
BotSettings holds configs for OpenAI APIs
func NewBotSettings ¶
func NewBotSettings() *BotSettings
NewBotSettings Returns settings for OpenAI with sane defaults
type Config ¶
type Config struct { LLMAPIKey string `env:"LLM_API_KEY,required"` PineconeKey string `env:"PINECONE_KEY"` PineconeEndpoint string `env:"PINECONE_URL"` }
func NewConfigFromEnv ¶
func NewConfigFromEnv() *Config
type ContentSplitter ¶
type LLMAPIClient ¶
type LLMAPIClient interface { CallCompletionAPI(settings *BotSettings, prompt *BotPrompt) (string, int, error) CallEmbeddingAPIWithRetry(texts []string, embedModel openai.EmbeddingModel, maxRetries int) (*openai.EmbeddingResponse, error) GetEmbeddingsForData(chunks []Chunk, batchSize int, embedModel openai.EmbeddingModel) ([][]float32, error) GetEmbeddingsForPrompt(text string, embedModel openai.EmbeddingModel) ([]float32, error) GetEmbeddingModel() openai.EmbeddingModel CheckTokenLimit(text, model string, tokenLimit int) bool }
func NewOAIClient ¶
func NewOAIClient(key string) LLMAPIClient
type Learn ¶
type Learn struct { Model string TokenLimit int ChunkSize int Overlap int Memory Storage Client LLMAPIClient GetTitle TitleGetter PreProcessBody PreProcessor PreProcessChunk PreProcessor ContentSplitter ContentSplitter }
func (*Learn) CreateChunks ¶
CreateChunks generates uploadable chunks to send to a memory store
func (*Learn) CreateChunksCharacterBased ¶
func (*Learn) ExtensionSupported ¶
ExtensionSupported checks if the extension for a given file path is supported by the library, it returns the file extension and a bool whether it is supported or not, supported file types are txt, pdf, and md.
func (*Learn) FromFile ¶
FromFile processes a file to learn into an OpenAI memory store, returns number of embeddings created and an error if failed
func (*Learn) ProcessMarkdown ¶
func (*Learn) ProcessPDFFile ¶
ProcessPDFFile reads a PDF file from the path and extracts the human-readable text, it will also attempt to extract the title of the PDF. It returns the title, the human-readable content, and an optional error if there is a problem reading or parsing the file.
func (*Learn) ProcessTextFile ¶
ProcessTextFile opens and fully reads the file in 'path', it treats the first line that contains text as a title, and reads the remaining file into another variable, it then returns the title and the file contents. It returns an error if there is a problem opening or reading the file.
type OAIClient ¶
func (*OAIClient) CallCompletionAPI ¶
func (*OAIClient) CallEmbeddingAPIWithRetry ¶
func (c *OAIClient) CallEmbeddingAPIWithRetry(texts []string, embedModel openai.EmbeddingModel, maxRetries int) (*openai.EmbeddingResponse, error)
func (*OAIClient) CheckTokenLimit ¶
func (*OAIClient) GetEmbeddingModel ¶
func (c *OAIClient) GetEmbeddingModel() openai.EmbeddingModel
func (*OAIClient) GetEmbeddingsForData ¶
func (c *OAIClient) GetEmbeddingsForData(chunks []Chunk, batchSize int, embedModel openai.EmbeddingModel) ([][]float32, error)
GetEmbeddingsForData gets embedding vectors for data to be ingested and used for context in queries
func (*OAIClient) GetEmbeddingsForPrompt ¶
func (c *OAIClient) GetEmbeddingsForPrompt(text string, embedModel openai.EmbeddingModel) ([]float32, error)
GetEmbeddingsForPrompt will return embedding vectors for the prompt
type OpenAIResponse ¶
type PineconeQueryItem ¶
type PineconeQueryItem struct {
Values []float32 `json:"values"`
}
type PineconeQueryRequest ¶
type PineconeQueryRequest struct { TopK int `json:"topK"` IncludeMetadata bool `json:"includeMetadata"` Namespace string `json:"namespace"` Queries []PineconeQueryItem `json:"queries"` }
type PineconeQueryResponse ¶
type PineconeQueryResponse struct {
Results []PineconeQueryResponseResult `json:"results"`
}
type PineconeQueryResponseResult ¶
type PineconeQueryResponseResult struct {
Matches []QueryMatch `json:"matches"`
}