Documentation
¶
Index ¶
- Constants
- Variables
- func Colorize(format string, opts ...interface{}) (n int, err error)
- func Eval(lctx *Context, tokens []uint32, tokensCount uint32, pastCount uint32, ...) error
- func ExtractTokens(r *ring.Ring, count int) []uint32
- func Resize(slice []float32, size int) []float32
- func ResizeInplace(slice *[]float32, size int)
- func SampleTopPTopK(lctx *Context, lastNTokens *ring.Ring, lastNTokensSize uint32, topK uint32, ...) uint32
- type Context
- type ContextParams
- type HParams
- type KVCache
- type Layer
- type Model
- type ModelParams
- type ModelType
Constants ¶
View Source
const ( LLAMA_FILE_VERSION = 1 LLAMA_FILE_MAGIC = 0x67676a74 // 'ggjt' in hex LLAMA_FILE_MAGIC_OLD = 0x67676d66 // 'ggmf' in hex LLAMA_FILE_MAGIC_UNVERSIONED = 0x67676d6c // 'ggml' pre-versioned files SPLIT_NONE = 0 SPLIT_BY_COLUMNS = 1 SPLIT_BY_ROWS = 2 )
Variables ¶
View Source
var ( // determine number of model parts based on the dimension LLAMA_N_PARTS = map[uint32]int{ 4096: 1, 5120: 2, 6656: 4, 8192: 8, } )
Functions ¶
func ExtractTokens ¶
ExtractTokens is a function to extract a slice of tokens from the ring buffer
func Resize ¶
Resize() (safe) for using instead of C++ std::vector:resize() https://go.dev/play/p/VlQ7N75E5AD
func ResizeInplace ¶
NB! This do not clear the underlying array when resizing https://go.dev/play/p/DbK4dFqwrZn
Types ¶
type Context ¶
type Context struct { Model *Model Vocab *ml.Vocab // decode output (2-dimensional array: [n_tokens][n_vocab]) Logits []float32 LogitsAll bool // input embedding (1-dimensional array: [n_embd]) Embedding []float32 }
func NewContext ¶
func NewContext() *Context
type ContextParams ¶
type ContextParams struct { CtxSize uint32 // text context PartsCount int // -1 for default Seed int // RNG seed, 0 for random LogitsAll bool // the llama_eval() call computes all logits, not just the last one VocabOnly bool // only load the vocabulary, no weights UseLock bool // force system to keep model in RAM Embedding bool // embedding mode only }
struct llama_context_params {
type HParams ¶
type HParams struct {
// contains filtered or unexported fields
}
default hparams (LLaMA 7B)
type ModelParams ¶ added in v1.2.0
type ModelParams struct { Model string // model path Prompt string MaxThreads int UseAVX bool UseNEON bool Seed int PredictCount uint32 // new tokens to predict RepeatLastN uint32 // last n tokens to penalize PartsCount int // amount of model parts (-1 = determine from model dimensions) CtxSize uint32 // context size BatchSize uint32 // batch size for prompt processing KeepCount uint32 TopK uint32 // 40 TopP float32 // 0.95 Temp float32 // 0.80 RepeatPenalty float32 // 1.10 InputPrefix string // string to prefix user inputs with Antiprompt []string // string upon seeing which more user input is prompted MemoryFP16 bool // use f16 instead of f32 for memory kv RandomPrompt bool // do not randomize prompt if none provided UseColor bool // use color to distinguish generations and inputs Interactive bool // interactive mode Embedding bool // get only sentence embedding InteractiveStart bool // wait for user input immediately Instruct bool // instruction mode (used for Alpaca models) IgnoreEOS bool // do not stop generating after eos Perplexity bool // compute perplexity over the prompt UseMLock bool // use mlock to keep model in memory MemTest bool // compute maximum memory usage VerbosePrompt bool }
Click to show internal directories.
Click to hide internal directories.