llama

package
v1.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 20, 2023 License: MIT Imports: 17 Imported by: 1

Documentation

Index

Constants

View Source
const (
	LLAMA_FILE_VERSION           = 1
	LLAMA_FILE_MAGIC             = 0x67676a74 // 'ggjt' in hex
	LLAMA_FILE_MAGIC_OLD         = 0x67676d66 // 'ggmf' in hex
	LLAMA_FILE_MAGIC_UNVERSIONED = 0x67676d6c // 'ggml' pre-versioned files

	SPLIT_NONE       = 0
	SPLIT_BY_COLUMNS = 1
	SPLIT_BY_ROWS    = 2
)

Variables

View Source
var (
	// determine number of model parts based on the dimension
	LLAMA_N_PARTS = map[uint32]int{
		4096: 1,
		5120: 2,
		6656: 4,
		8192: 8,
	}
)

Functions

func Colorize

func Colorize(format string, opts ...interface{}) (n int, err error)

func Eval

func Eval(

	lctx *Context,
	tokens []uint32,
	tokensCount uint32,
	pastCount uint32,
	params ModelParams) error

func ExtractTokens

func ExtractTokens(r *ring.Ring, count int) []uint32

ExtractTokens is a function to extract a slice of tokens from the ring buffer

func Resize

func Resize(slice []float32, size int) []float32

Resize() (safe) for using instead of C++ std::vector:resize() https://go.dev/play/p/VlQ7N75E5AD

func ResizeInplace

func ResizeInplace(slice *[]float32, size int)

NB! This do not clear the underlying array when resizing https://go.dev/play/p/DbK4dFqwrZn

func SampleTopPTopK

func SampleTopPTopK(
	lctx *Context,

	lastNTokens *ring.Ring,
	lastNTokensSize uint32,
	topK uint32,
	topP float32,
	temp float32,
	repeatPenalty float32,
) uint32

std::mt19937 = A Mersenne Twister pseudo-random generator of 32-bit numbers with a state size of 19937 bits.

Types

type Context

type Context struct {
	Model *Model
	Vocab *ml.Vocab

	// decode output (2-dimensional array: [n_tokens][n_vocab])
	Logits    []float32
	LogitsAll bool

	// input embedding (1-dimensional array: [n_embd])
	Embedding []float32
}

func LoadModel

func LoadModel(
	fileName string,

	silent bool,
) (*Context, error)

func NewContext

func NewContext() *Context

type ContextParams

type ContextParams struct {
	CtxSize    uint32 // text context
	PartsCount int    // -1 for default
	Seed       int    // RNG seed, 0 for random
	LogitsAll  bool   // the llama_eval() call computes all logits, not just the last one
	VocabOnly  bool   // only load the vocabulary, no weights
	UseLock    bool   // force system to keep model in RAM
	Embedding  bool   // embedding mode only
}

struct llama_context_params {

type HParams

type HParams struct {
	// contains filtered or unexported fields
}

default hparams (LLaMA 7B)

type KVCache

type KVCache struct {
	K *ml.Tensor
	V *ml.Tensor

	N uint32 // number of tokens currently in the cache
}

type Layer

type Layer struct {
	// contains filtered or unexported fields
}

type Model

type Model struct {
	Type ModelType
	// contains filtered or unexported fields
}

func NewModel

func NewModel() *Model

type ModelParams added in v1.2.0

type ModelParams struct {
	Model  string // model path
	Prompt string

	MaxThreads int

	UseAVX  bool
	UseNEON bool

	Seed         int
	PredictCount uint32 // new tokens to predict
	RepeatLastN  uint32 // last n tokens to penalize
	PartsCount   int    // amount of model parts (-1 = determine from model dimensions)
	CtxSize      uint32 // context size
	BatchSize    uint32 // batch size for prompt processing
	KeepCount    uint32

	TopK          uint32  // 40
	TopP          float32 // 0.95
	Temp          float32 // 0.80
	RepeatPenalty float32 // 1.10

	InputPrefix string   // string to prefix user inputs with
	Antiprompt  []string // string upon seeing which more user input is prompted

	MemoryFP16   bool // use f16 instead of f32 for memory kv
	RandomPrompt bool // do not randomize prompt if none provided
	UseColor     bool // use color to distinguish generations and inputs
	Interactive  bool // interactive mode

	Embedding        bool // get only sentence embedding
	InteractiveStart bool // wait for user input immediately

	Instruct   bool // instruction mode (used for Alpaca models)
	IgnoreEOS  bool // do not stop generating after eos
	Perplexity bool // compute perplexity over the prompt
	UseMLock   bool // use mlock to keep model in memory
	MemTest    bool // compute maximum memory usage

	VerbosePrompt bool
}

type ModelType

type ModelType uint8
const (
	MODEL_UNKNOWN ModelType = iota
	MODEL_7B
	MODEL_13B
	MODEL_30B
	MODEL_65B
)

available llama models

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL