envconfig

package
v0.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 1, 2025 License: MIT Imports: 11 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	// Debug enabled additional debug information.
	Debug = Bool("OLLAMA_DEBUG")
	// FlashAttention enables the experimental flash attention feature.
	FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
	// KvCacheType is the quantization type for the K/V cache.
	KvCacheType = String("OLLAMA_KV_CACHE_TYPE")
	// NoHistory disables readline history.
	NoHistory = Bool("OLLAMA_NOHISTORY")
	// NoPrune disables pruning of model blobs on startup.
	NoPrune = Bool("OLLAMA_NOPRUNE")
	// SchedSpread allows scheduling models across all GPUs.
	SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
	// IntelGPU enables experimental Intel GPU detection.
	IntelGPU = Bool("OLLAMA_INTEL_GPU")
	// MultiUserCache optimizes prompt caching for multi-user scenarios
	MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
)
View Source
var (
	LLMLibrary = String("OLLAMA_LLM_LIBRARY")

	CudaVisibleDevices    = String("CUDA_VISIBLE_DEVICES")
	HipVisibleDevices     = String("HIP_VISIBLE_DEVICES")
	RocrVisibleDevices    = String("ROCR_VISIBLE_DEVICES")
	GpuDeviceOrdinal      = String("GPU_DEVICE_ORDINAL")
	HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
)
View Source
var (
	// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
	NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
	// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
	MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
	// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
	MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
	// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
	MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
)
View Source
var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)

Set aside VRAM per GPU

Functions

func AsMap

func AsMap() map[string]EnvVar

func Bool

func Bool(k string) func() bool

func Host

func Host() *url.URL

Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable. Default is scheme "http" and host "127.0.0.1:11434"

func KeepAlive

func KeepAlive() (keepAlive time.Duration)

KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable. Negative values are treated as infinite. Zero is treated as no keep alive. Default is 5 minutes.

func LoadTimeout

func LoadTimeout() (loadTimeout time.Duration)

LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable. Zero or Negative values are treated as infinite. Default is 5 minutes.

func Models

func Models() string

Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable. Default is $HOME/.ollama/models

func Origins

func Origins() (origins []string)

Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.

func String

func String(s string) func() string

func Uint

func Uint(key string, defaultValue uint) func() uint

func Uint64

func Uint64(key string, defaultValue uint64) func() uint64

func Values

func Values() map[string]string

func Var

func Var(key string) string

Var returns an environment variable stripped of leading and trailing quotes or spaces

Types

type EnvVar

type EnvVar struct {
	Name        string
	Value       any
	Description string
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL