Documentation
¶
Index ¶
- Variables
- func AsMap() map[string]EnvVar
- func Bool(k string) func() bool
- func Host() *url.URL
- func KeepAlive() (keepAlive time.Duration)
- func LoadTimeout() (loadTimeout time.Duration)
- func Models() string
- func Origins() (origins []string)
- func String(s string) func() string
- func Uint(key string, defaultValue uint) func() uint
- func Uint64(key string, defaultValue uint64) func() uint64
- func Values() map[string]string
- func Var(key string) string
- type EnvVar
Constants ¶
This section is empty.
Variables ¶
var ( // Debug enabled additional debug information. Debug = Bool("OLLAMA_DEBUG") // FlashAttention enables the experimental flash attention feature. FlashAttention = Bool("OLLAMA_FLASH_ATTENTION") // KvCacheType is the quantization type for the K/V cache. KvCacheType = String("OLLAMA_KV_CACHE_TYPE") // NoHistory disables readline history. NoHistory = Bool("OLLAMA_NOHISTORY") // NoPrune disables pruning of model blobs on startup. NoPrune = Bool("OLLAMA_NOPRUNE") // SchedSpread allows scheduling models across all GPUs. SchedSpread = Bool("OLLAMA_SCHED_SPREAD") // IntelGPU enables experimental Intel GPU detection. IntelGPU = Bool("OLLAMA_INTEL_GPU") // MultiUserCache optimizes prompt caching for multi-user scenarios MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE") )
var ( LLMLibrary = String("OLLAMA_LLM_LIBRARY") CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES") HipVisibleDevices = String("HIP_VISIBLE_DEVICES") RocrVisibleDevices = String("ROCR_VISIBLE_DEVICES") GpuDeviceOrdinal = String("GPU_DEVICE_ORDINAL") HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION") )
var ( // NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable. NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0) // MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable. MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0) // MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable. MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512) // MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable. MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0) )
var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
Set aside VRAM per GPU
Functions ¶
func Host ¶
Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable. Default is scheme "http" and host "127.0.0.1:11434"
func KeepAlive ¶
KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable. Negative values are treated as infinite. Zero is treated as no keep alive. Default is 5 minutes.
func LoadTimeout ¶
LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable. Zero or Negative values are treated as infinite. Default is 5 minutes.
func Models ¶
func Models() string
Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable. Default is $HOME/.ollama/models