Documentation
¶
Index ¶
- func Execute(args []string) error
- type CompletionRequest
- type CompletionResponse
- type EmbeddingRequest
- type EmbeddingResponse
- type HealthResponse
- type ImageContext
- func (c *ImageContext) BatchSize(configuredBatchSize int) int
- func (c *ImageContext) EmbedSize(llamaContext *llama.Context) int
- func (c *ImageContext) Free(modelPath string)
- func (c *ImageContext) NeedCrossAttention(inputs ...input) bool
- func (c *ImageContext) NewEmbed(llamaContext *llama.Context, data []byte, aspectRatioId int) ([][]float32, error)
- type ImageData
- type InputCache
- type InputCacheSlot
- type NewSequenceParams
- type Options
- type Sequence
- type Server
- type ServerStatus
- type Timings
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
Types ¶
type CompletionRequest ¶
type CompletionResponse ¶
type CompletionResponse struct { Content string `json:"content"` Stop bool `json:"stop"` Model string `json:"model,omitempty"` Prompt string `json:"prompt,omitempty"` StoppedLimit bool `json:"stopped_limit,omitempty"` PredictedN int `json:"predicted_n,omitempty"` PredictedMS float64 `json:"predicted_ms,omitempty"` PromptN int `json:"prompt_n,omitempty"` PromptMS float64 `json:"prompt_ms,omitempty"` Timings Timings `json:"timings"` }
type EmbeddingRequest ¶
type EmbeddingResponse ¶
type EmbeddingResponse struct {
Embedding []float32 `json:"embedding"`
}
type HealthResponse ¶
type ImageContext ¶
type ImageContext struct {
// contains filtered or unexported fields
}
func NewImageContext ¶
func NewImageContext(llamaContext *llama.Context, modelPath string) (*ImageContext, error)
func (*ImageContext) BatchSize ¶
func (c *ImageContext) BatchSize(configuredBatchSize int) int
func (*ImageContext) Free ¶
func (c *ImageContext) Free(modelPath string)
func (*ImageContext) NeedCrossAttention ¶
func (c *ImageContext) NeedCrossAttention(inputs ...input) bool
type InputCache ¶
type InputCache struct {
// contains filtered or unexported fields
}
func NewInputCache ¶
func (*InputCache) LoadCacheSlot ¶
func (c *InputCache) LoadCacheSlot(prompt []input, cachePrompt bool) (*InputCacheSlot, []input, error)
func (*InputCache) ShiftCacheSlot ¶
func (c *InputCache) ShiftCacheSlot(slot *InputCacheSlot, numKeep int) error
Frees up space in the KV cache by deleting the oldest half of history and shifting the newest half into that space (saving numKeep inputs at the beginning).
Assumes that at least 1 entry can be freed up by shifting (i.e. numKeep < numCtx)
func (*InputCache) ShiftDiscard ¶
func (c *InputCache) ShiftDiscard(inputLen int, numKeep int) int
type InputCacheSlot ¶
type NewSequenceParams ¶
type NewSequenceParams struct {
// contains filtered or unexported fields
}
type Options ¶
type Options struct { api.Runner NumKeep int `json:"n_keep"` Seed int `json:"seed"` NumPredict int `json:"n_predict"` TopK int `json:"top_k"` TopP float32 `json:"top_p"` MinP float32 `json:"min_p"` TypicalP float32 `json:"typical_p"` RepeatLastN int `json:"repeat_last_n"` Temperature float32 `json:"temperature"` RepeatPenalty float32 `json:"repeat_penalty"` PresencePenalty float32 `json:"presence_penalty"` FrequencyPenalty float32 `json:"frequency_penalty"` Mirostat int `json:"mirostat"` MirostatTau float32 `json:"mirostat_tau"` MirostatEta float32 `json:"mirostat_eta"` Stop []string `json:"stop"` }
TODO (jmorganca): use structs from the api package to avoid duplication this way the api acts as a proxy instead of using a different api for the runner
type Server ¶
type Server struct {
// contains filtered or unexported fields
}
func (*Server) NewSequence ¶
type ServerStatus ¶
type ServerStatus int
const ( ServerStatusReady ServerStatus = iota ServerStatusLoadingModel ServerStatusError )
func (ServerStatus) ToString ¶
func (s ServerStatus) ToString() string
Click to show internal directories.
Click to hide internal directories.