llama

package

v1.2.0 Latest Latest Go to latest Published: Apr 20, 2023 License: MIT Imports: 17 Imported by: 1

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/gotzmann/llama.go

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func Colorize(format string, opts ...interface{}) (n int, err error)
func Eval(lctx *Context, tokens []uint32, tokensCount uint32, pastCount uint32, ...) error
func ExtractTokens(r *ring.Ring, count int) []uint32
func Resize(slice []float32, size int) []float32
func ResizeInplace(slice *[]float32, size int)
func SampleTopPTopK(lctx *Context, lastNTokens *ring.Ring, lastNTokensSize uint32, topK uint32, ...) uint32
type Context
- func LoadModel(fileName string, silent bool) (*Context, error)
- func NewContext() *Context
type ContextParams
type HParams
type KVCache
type Layer
type Model
- func NewModel() *Model
type ModelParams
type ModelType

Constants ¶

View Source

const (
	LLAMA_FILE_VERSION           = 1
	LLAMA_FILE_MAGIC             = 0x67676a74 // 'ggjt' in hex
	LLAMA_FILE_MAGIC_OLD         = 0x67676d66 // 'ggmf' in hex
	LLAMA_FILE_MAGIC_UNVERSIONED = 0x67676d6c // 'ggml' pre-versioned files

	SPLIT_NONE       = 0
	SPLIT_BY_COLUMNS = 1
	SPLIT_BY_ROWS    = 2
)

Variables ¶

View Source

var (
	// determine number of model parts based on the dimension
	LLAMA_N_PARTS = map[uint32]int{
		4096: 1,
		5120: 2,
		6656: 4,
		8192: 8,
	}
)

Functions ¶

func Colorize ¶

func Colorize(format string, opts ...interface{}) (n int, err error)

func Eval ¶

func Eval(

	lctx *Context,
	tokens []uint32,
	tokensCount uint32,
	pastCount uint32,
	params ModelParams) error

func ExtractTokens ¶

func ExtractTokens(r *ring.Ring, count int) []uint32

ExtractTokens is a function to extract a slice of tokens from the ring buffer

func Resize ¶

func Resize(slice []float32, size int) []float32

Resize() (safe) for using instead of C++ std::vector:resize() https://go.dev/play/p/VlQ7N75E5AD

func ResizeInplace ¶

func ResizeInplace(slice *[]float32, size int)

NB! This do not clear the underlying array when resizing https://go.dev/play/p/DbK4dFqwrZn

func SampleTopPTopK ¶

func SampleTopPTopK(
	lctx *Context,

	lastNTokens *ring.Ring,
	lastNTokensSize uint32,
	topK uint32,
	topP float32,
	temp float32,
	repeatPenalty float32,
) uint32

std::mt19937 = A Mersenne Twister pseudo-random generator of 32-bit numbers with a state size of 19937 bits.

Types ¶

type Context ¶

type Context struct {
	Model *Model
	Vocab *ml.Vocab

	// decode output (2-dimensional array: [n_tokens][n_vocab])
	Logits    []float32
	LogitsAll bool

	// input embedding (1-dimensional array: [n_embd])
	Embedding []float32
}

func LoadModel ¶

func LoadModel(
	fileName string,

	silent bool,
) (*Context, error)

func NewContext ¶

func NewContext() *Context

type ContextParams ¶

type ContextParams struct {
	CtxSize    uint32 // text context
	PartsCount int    // -1 for default
	Seed       int    // RNG seed, 0 for random
	LogitsAll  bool   // the llama_eval() call computes all logits, not just the last one
	VocabOnly  bool   // only load the vocabulary, no weights
	UseLock    bool   // force system to keep model in RAM
	Embedding  bool   // embedding mode only
}

struct llama_context_params {

type HParams ¶

type HParams struct {
	// contains filtered or unexported fields
}

default hparams (LLaMA 7B)

type KVCache ¶

type KVCache struct {
	K *ml.Tensor
	V *ml.Tensor

	N uint32 // number of tokens currently in the cache
}

type Layer ¶

type Layer struct {
	// contains filtered or unexported fields
}

type Model ¶

type Model struct {
	Type ModelType
	// contains filtered or unexported fields
}

func NewModel ¶

func NewModel() *Model

type ModelParams ¶ added in v1.2.0

type ModelParams struct {
	Model  string // model path
	Prompt string

	MaxThreads int

	UseAVX  bool
	UseNEON bool

	Seed         int
	PredictCount uint32 // new tokens to predict
	RepeatLastN  uint32 // last n tokens to penalize
	PartsCount   int    // amount of model parts (-1 = determine from model dimensions)
	CtxSize      uint32 // context size
	BatchSize    uint32 // batch size for prompt processing
	KeepCount    uint32

	TopK          uint32  // 40
	TopP          float32 // 0.95
	Temp          float32 // 0.80
	RepeatPenalty float32 // 1.10

	InputPrefix string   // string to prefix user inputs with
	Antiprompt  []string // string upon seeing which more user input is prompted

	MemoryFP16   bool // use f16 instead of f32 for memory kv
	RandomPrompt bool // do not randomize prompt if none provided
	UseColor     bool // use color to distinguish generations and inputs
	Interactive  bool // interactive mode

	Embedding        bool // get only sentence embedding
	InteractiveStart bool // wait for user input immediately

	Instruct   bool // instruction mode (used for Alpaca models)
	IgnoreEOS  bool // do not stop generating after eos
	Perplexity bool // compute perplexity over the prompt
	UseMLock   bool // use mlock to keep model in memory
	MemTest    bool // compute maximum memory usage

	VerbosePrompt bool
}

type ModelType ¶

type ModelType uint8

const (
	MODEL_UNKNOWN ModelType = iota
	MODEL_7B
	MODEL_13B
	MODEL_30B
	MODEL_65B
)

available llama models

Source Files ¶

View all Source files

llama.go

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL