Documentation ¶
Index ¶
- Constants
- Variables
- func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts api.Options) (int, uint64)
- func Init() error
- func NewGGUFV3(bo binary.ByteOrder) *gguf
- func PredictServerFit(allGpus gpu.GpuInfoList, ggml *GGML, adapters, projectors []string, ...) (bool, uint64)
- func Quantize(infile, outfile, filetype string) error
- func SystemInfo() string
- type CompletionRequest
- type CompletionResponse
- type DetokenizeRequest
- type DetokenizeResponse
- type EmbeddingRequest
- type EmbeddingResponse
- type GGML
- type ImageData
- type KV
- func (kv KV) Architecture() string
- func (kv KV) BlockCount() uint64
- func (kv KV) ContextLength() uint64
- func (kv KV) EmbeddingLength() uint64
- func (kv KV) FileType() string
- func (kv KV) GQA() uint64
- func (kv KV) HeadCount() uint64
- func (kv KV) HeadCountKV() uint64
- func (kv KV) ParameterCount() uint64
- type Layer
- type LlamaServer
- type ServerStatus
- type ServerStatusResp
- type StatusWriter
- type Tensor
- type Tensors
- type TokenizeRequest
- type TokenizeResponse
Constants ¶
View Source
const ( // Magic constant for `ggml` files (unversioned). FILE_MAGIC_GGML = 0x67676d6c // Magic constant for `ggml` files (versioned, ggmf). FILE_MAGIC_GGMF = 0x67676d66 // Magic constant for `ggml` files (versioned, ggjt). FILE_MAGIC_GGJT = 0x67676a74 // Magic constant for `ggla` files (LoRA adapter). FILE_MAGIC_GGLA = 0x67676C61 // Magic constant for `gguf` files (versioned, gguf) FILE_MAGIC_GGUF_LE = 0x46554747 FILE_MAGIC_GGUF_BE = 0x47475546 )
View Source
const ( GGUFTokenNormal uint32 GGUFTokenUnknown GGUFTokenControl GGUFTokenUserDefined GGUFTokenUnused GGUFTokenByte )
Variables ¶
View Source
var ErrUnsupportedFormat = errors.New("unsupported model format")
Functions ¶
func EstimateGPULayers ¶ added in v0.1.33
func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts api.Options) (int, uint64)
Given a model and one or more GPU targets, predict how many layers and bytes we can load The GPUs provided must all be the same Library
func PredictServerFit ¶ added in v0.1.33
func PredictServerFit(allGpus gpu.GpuInfoList, ggml *GGML, adapters, projectors []string, opts api.Options) (bool, uint64)
This algorithm looks for a complete fit to determine if we need to unload other models
func SystemInfo ¶ added in v0.1.32
func SystemInfo() string
SystemInfo is an unused example of calling llama.cpp functions using CGo
Types ¶
type CompletionRequest ¶ added in v0.1.32
type CompletionResponse ¶ added in v0.1.32
type DetokenizeRequest ¶
type DetokenizeRequest struct {
Tokens []int `json:"tokens"`
}
type DetokenizeResponse ¶
type DetokenizeResponse struct {
Content string `json:"content"`
}
type EmbeddingRequest ¶
type EmbeddingRequest struct {
Content string `json:"content"`
}
type EmbeddingResponse ¶
type EmbeddingResponse struct {
Embedding []float64 `json:"embedding"`
}
type GGML ¶
type GGML struct {
// contains filtered or unexported fields
}
func DecodeGGML ¶
func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error)
type KV ¶
func (KV) Architecture ¶ added in v0.1.32
func (KV) BlockCount ¶ added in v0.1.32
func (KV) ContextLength ¶ added in v0.1.32
func (KV) EmbeddingLength ¶ added in v0.1.32
func (KV) HeadCountKV ¶ added in v0.1.32
func (KV) ParameterCount ¶ added in v0.1.32
type LlamaServer ¶ added in v0.1.32
type LlamaServer interface { Ping(ctx context.Context) error WaitUntilRunning(ctx context.Context) error Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error Embedding(ctx context.Context, prompt string) ([]float64, error) Tokenize(ctx context.Context, content string) ([]int, error) Detokenize(ctx context.Context, tokens []int) (string, error) Close() error EstimatedVRAM() uint64 }
func NewLlamaServer ¶ added in v0.1.32
func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, projectors []string, opts api.Options) (LlamaServer, error)
NewLlamaServer will run a server for the given GPUs The gpu list must be a single family.
type ServerStatus ¶ added in v0.1.32
type ServerStatus int
const ( ServerStatusReady ServerStatus = iota ServerStatusNoSlotsAvaialble ServerStatusLoadingModel ServerStatusNotResponding ServerStatusError )
func (ServerStatus) ToString ¶ added in v0.1.33
func (s ServerStatus) ToString() string
type ServerStatusResp ¶ added in v0.1.32
type StatusWriter ¶ added in v0.1.32
type StatusWriter struct { LastErrMsg string // contains filtered or unexported fields }
StatusWriter is a writer that captures error messages from the llama runner process
func NewStatusWriter ¶ added in v0.1.32
func NewStatusWriter(out *os.File) *StatusWriter
type TokenizeRequest ¶
type TokenizeRequest struct {
Content string `json:"content"`
}
type TokenizeResponse ¶
type TokenizeResponse struct {
Tokens []int `json:"tokens"`
}
Source Files ¶
Click to show internal directories.
Click to hide internal directories.