llamarunner

package
v0.0.0-...-e65ccaf Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 20, 2025 License: MIT Imports: 25 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func Execute

func Execute(args []string) error

Types

type CompletionRequest

type CompletionRequest struct {
	Prompt      string      `json:"prompt"`
	Images      []ImageData `json:"image_data"`
	Grammar     string      `json:"grammar"`
	CachePrompt bool        `json:"cache_prompt"`

	Options
}

type CompletionResponse

type CompletionResponse struct {
	Content string `json:"content"`
	Stop    bool   `json:"stop"`

	Model        string  `json:"model,omitempty"`
	Prompt       string  `json:"prompt,omitempty"`
	StoppedLimit bool    `json:"stopped_limit,omitempty"`
	PredictedN   int     `json:"predicted_n,omitempty"`
	PredictedMS  float64 `json:"predicted_ms,omitempty"`
	PromptN      int     `json:"prompt_n,omitempty"`
	PromptMS     float64 `json:"prompt_ms,omitempty"`

	Timings Timings `json:"timings"`
}

type EmbeddingRequest

type EmbeddingRequest struct {
	Content     string `json:"content"`
	CachePrompt bool   `json:"cache_prompt"`
}

type EmbeddingResponse

type EmbeddingResponse struct {
	Embedding []float32 `json:"embedding"`
}

type HealthResponse

type HealthResponse struct {
	Status   string  `json:"status"`
	Progress float32 `json:"progress"`
}

type ImageContext

type ImageContext struct {
	// contains filtered or unexported fields
}

func NewImageContext

func NewImageContext(llamaContext *llama.Context, modelPath string) (*ImageContext, error)

func (*ImageContext) BatchSize

func (c *ImageContext) BatchSize(configuredBatchSize int) int

func (*ImageContext) EmbedSize

func (c *ImageContext) EmbedSize(llamaContext *llama.Context) int

func (*ImageContext) Free

func (c *ImageContext) Free(modelPath string)

func (*ImageContext) NeedCrossAttention

func (c *ImageContext) NeedCrossAttention(inputs ...input) bool

func (*ImageContext) NewEmbed

func (c *ImageContext) NewEmbed(llamaContext *llama.Context, data []byte, aspectRatioId int) ([][]float32, error)

type ImageData

type ImageData struct {
	Data          []byte `json:"data"`
	ID            int    `json:"id"`
	AspectRatioID int    `json:"aspect_ratio_id"`
}

type InputCache

type InputCache struct {
	// contains filtered or unexported fields
}

func NewInputCache

func NewInputCache(lc *llama.Context, kvSize int, numSlots int, multiUserCache bool) (*InputCache, error)

func (*InputCache) LoadCacheSlot

func (c *InputCache) LoadCacheSlot(prompt []input, cachePrompt bool) (*InputCacheSlot, []input, error)

func (*InputCache) ShiftCacheSlot

func (c *InputCache) ShiftCacheSlot(slot *InputCacheSlot, numKeep int) error

Frees up space in the KV cache by deleting the oldest half of history and shifting the newest half into that space (saving numKeep inputs at the beginning).

Assumes that at least 1 entry can be freed up by shifting (i.e. numKeep < numCtx)

func (*InputCache) ShiftDiscard

func (c *InputCache) ShiftDiscard(inputLen int, numKeep int) int

type InputCacheSlot

type InputCacheSlot struct {
	// Index in the KV cache
	Id int

	// Inputs that are stored in the KV cache
	Inputs []input

	// is this cache actively being processed as part of a sequence?
	InUse bool
	// contains filtered or unexported fields
}

type NewSequenceParams

type NewSequenceParams struct {
	// contains filtered or unexported fields
}

type Options

type Options struct {
	api.Runner

	NumKeep          int      `json:"n_keep"`
	Seed             int      `json:"seed"`
	NumPredict       int      `json:"n_predict"`
	TopK             int      `json:"top_k"`
	TopP             float32  `json:"top_p"`
	MinP             float32  `json:"min_p"`
	TypicalP         float32  `json:"typical_p"`
	RepeatLastN      int      `json:"repeat_last_n"`
	Temperature      float32  `json:"temperature"`
	RepeatPenalty    float32  `json:"repeat_penalty"`
	PresencePenalty  float32  `json:"presence_penalty"`
	FrequencyPenalty float32  `json:"frequency_penalty"`
	Mirostat         int      `json:"mirostat"`
	MirostatTau      float32  `json:"mirostat_tau"`
	MirostatEta      float32  `json:"mirostat_eta"`
	Stop             []string `json:"stop"`
}

TODO (jmorganca): use structs from the api package to avoid duplication this way the api acts as a proxy instead of using a different api for the runner

type Sequence

type Sequence struct {
	// contains filtered or unexported fields
}

type Server

type Server struct {
	// contains filtered or unexported fields
}

func (*Server) NewSequence

func (s *Server) NewSequence(prompt string, images []ImageData, params NewSequenceParams) (*Sequence, error)

type ServerStatus

type ServerStatus int
const (
	ServerStatusReady ServerStatus = iota
	ServerStatusLoadingModel
	ServerStatusError
)

func (ServerStatus) ToString

func (s ServerStatus) ToString() string

type Timings

type Timings struct {
	PredictedN  int     `json:"predicted_n"`
	PredictedMS float64 `json:"predicted_ms"`
	PromptN     int     `json:"prompt_n"`
	PromptMS    float64 `json:"prompt_ms"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL