Documentation ¶
Overview ¶
Package v1 contains API Schema definitions for the kubeai v1 API group +kubebuilder:object:generate=true +groupName=kubeai.org
Index ¶
Constants ¶
const ( PodModelLabel = "model" // PodHashLabel is a label key used to store the hash of the Pod spec // that was used to create the Pod. This is used to determine if a Pod // needs to be recreated. PodHashLabel = "pod-hash" ModelFeatureLabelDomain = "features.kubeai.org" // ModelPodIPAnnotation is the annotation key used to specify an IP // to use for the model Pod instead of the IP address in the status of the Pod. // Use in conjunction with --allow-pod-address-override for development purposes. ModelPodIPAnnotation = "model-pod-ip" ModelPodPortAnnotation = "model-pod-port" ModelCacheEvictionFinalizer = "kubeai.org/cache-eviction" )
const ( ModelFeatureTextGeneration = "TextGeneration" ModelFeatureTextEmbedding = "TextEmbedding" // TODO (samos123): Add validation that Speech to Text only supports Faster Whisper. ModelFeatureSpeechToText = "SpeechToText" )
const ( OLlamaEngine = "OLlama" VLLMEngine = "VLLM" FasterWhisperEngine = "FasterWhisper" InfinityEngine = "Infinity" )
const (
PodAdapterLabelPrefix = "adapter.kubeai.org/"
)
Variables ¶
var ( // GroupVersion is group version used to register these objects GroupVersion = schema.GroupVersion{Group: "kubeai.org", Version: "v1"} // SchemeBuilder is used to add go types to the GroupVersionKind scheme SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} // AddToScheme adds the types in this group-version to the given scheme. AddToScheme = SchemeBuilder.AddToScheme )
Functions ¶
func PVCModelAnnotation ¶ added in v0.9.0
func PodAdapterLabel ¶ added in v0.11.0
Types ¶
type Adapter ¶ added in v0.11.0
type Adapter struct { // Name must be a lowercase string with no spaces. // +kubebuilder:validation:Required // +kubebuilder:validation:Pattern=^[a-z0-9-]+$ // +kubebuilder:validation:MaxLength=63 Name string `json:"name"` // +kubebuilder:validation:XValidation:rule="self.startsWith(\"hf://\") || self.startsWith(\"s3://\") || self.startsWith(\"gs://\") || self.startsWith(\"oss://\")", message="adapter url must start with \"hf://\", \"s3://\", \"gs://\", or \"oss://\"." URL string `json:"url"` }
func (*Adapter) DeepCopy ¶ added in v0.11.0
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Adapter.
func (*Adapter) DeepCopyInto ¶ added in v0.11.0
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type Model ¶
type Model struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec ModelSpec `json:"spec,omitempty"` Status ModelStatus `json:"status,omitempty"` }
Model resources define the ML models that will be served by KubeAI. +kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:subresource:scale:specpath=.spec.replicas,statuspath=.status.replicas.all +kubebuilder:validation:XValidation:rule="size(self.metadata.name) <= 40", message="name must not exceed 40 characters."
func (*Model) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Model.
func (*Model) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*Model) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ModelFeature ¶
type ModelFeature string
+kubebuilder:validation:Enum=TextGeneration;TextEmbedding;SpeechToText
type ModelList ¶
type ModelList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []Model `json:"items"` }
ModelList contains a list of Models.
func (*ModelList) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelList.
func (*ModelList) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ModelList) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ModelSpec ¶
type ModelSpec struct { // URL of the model to be served. // Currently the following formats are supported: // // For VLLM, FasterWhisper, Infinity engines: // // "hf://<repo>/<model>" // "pvc://<pvcName>" // "pvc://<pvcName>/<pvcSubpath>" // "gs://<bucket>/<path>" (only with cacheProfile) // "oss://<bucket>/<path>" (only with cacheProfile) // "s3://<bucket>/<path>" (only with cacheProfile) // // For OLlama engine: // // "ollama://<model>" // // +kubebuilder:validation:Required // +kubebuilder:validation:XValidation:rule="self == oldSelf", message="url is immutable." // +kubebuilder:validation:XValidation:rule="self.startsWith(\"hf://\") || self.startsWith(\"pvc://\") || self.startsWith(\"ollama://\") || self.startsWith(\"s3://\") || self.startsWith(\"gs://\") || self.startsWith(\"oss://\")", message="url must start with \"hf://\", \"pvc://\", \"ollama://\", \"s3://\", \"gs://\", or \"oss://\" and not be empty." URL string `json:"url"` Adapters []Adapter `json:"adapters,omitempty"` // Features that the model supports. // Dictates the APIs that are available for the model. Features []ModelFeature `json:"features"` // Engine to be used for the server process. // +kubebuilder:validation:Enum=OLlama;VLLM;FasterWhisper;Infinity // +kubebuilder:validation:Required Engine string `json:"engine"` // ResourceProfile required to serve the model. // Use the format "<resource-profile-name>:<count>". // Example: "nvidia-gpu-l4:2" - 2x NVIDIA L4 GPUs. // Must be a valid ResourceProfile defined in the system config. ResourceProfile string `json:"resourceProfile,omitempty"` // CacheProfile to be used for caching model artifacts. // Must be a valid CacheProfile defined in the system config. // +kubebuilder:validation:XValidation:rule="self == oldSelf", message="cacheProfile is immutable." CacheProfile string `json:"cacheProfile,omitempty"` // Image to be used for the server process. // Will be set from ResourceProfile + Engine if not specified. Image string `json:"image,omitempty"` // Args to be added to the server process. Args []string `json:"args,omitempty"` // Env variables to be added to the server process. Env map[string]string `json:"env,omitempty"` // Replicas is the number of Pod replicas that should be actively // serving the model. KubeAI will manage this field unless AutoscalingDisabled // is set to true. Replicas *int32 `json:"replicas,omitempty"` // MinReplicas is the minimum number of Pod replicas that the model can scale down to. // Note: 0 is a valid value. // +kubebuilder:validation:Minimum=0 // +kubebuilder:validation:Optional MinReplicas int32 `json:"minReplicas"` // MaxReplicas is the maximum number of Pod replicas that the model can scale up to. // Empty value means no limit. // +kubebuilder:validation:Minimum=1 MaxReplicas *int32 `json:"maxReplicas,omitempty"` // AutoscalingDisabled will stop the controller from managing the replicas // for the Model. When disabled, metrics will not be collected on server Pods. AutoscalingDisabled bool `json:"autoscalingDisabled,omitempty"` // TargetRequests is average number of active requests that the autoscaler // will try to maintain on model server Pods. // +kubebuilder:validation:Minimum=1 // +kubebuilder:default=100 TargetRequests *int32 `json:"targetRequests"` // ScaleDownDelay is the minimum time before a deployment is scaled down after // the autoscaling algorithm determines that it should be scaled down. // +kubebuilder:default=30 ScaleDownDelaySeconds *int64 `json:"scaleDownDelaySeconds"` // Owner of the model. Used solely to populate the owner field in the // OpenAI /v1/models endpoint. // DEPRECATED. // +kubebuilder:validation:Optional Owner string `json:"owner"` }
ModelSpec defines the desired state of Model. +kubebuilder:validation:XValidation:rule="!has(self.cacheProfile) || self.url.startsWith(\"hf://\") || self.url.startsWith(\"s3://\") || self.url.startsWith(\"gs://\") || self.url.startsWith(\"oss://\")", message="cacheProfile is only supported with urls of format \"hf://...\", \"s3://...\", \"gs://...\", or \"oss://...\" at the moment." +kubebuilder:validation:XValidation:rule="!self.url.startsWith(\"s3://\") || has(self.cacheProfile)", message="urls of format \"s3://...\" only supported when using a cacheProfile" +kubebuilder:validation:XValidation:rule="!self.url.startsWith(\"gs://\") || has(self.cacheProfile)", message="urls of format \"gs://...\" only supported when using a cacheProfile" +kubebuilder:validation:XValidation:rule="!self.url.startsWith(\"oss://\") || has(self.cacheProfile)", message="urls of format \"oss://...\" only supported when using a cacheProfile" +kubebuilder:validation:XValidation:rule="!has(self.maxReplicas) || self.minReplicas <= self.maxReplicas", message="minReplicas should be less than or equal to maxReplicas." +kubebuilder:validation:XValidation:rule="!has(self.adapters) || self.engine == \"VLLM\"", message="adapters only supported with VLLM engine."
func (*ModelSpec) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelSpec.
func (*ModelSpec) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ModelStatus ¶
type ModelStatus struct { Replicas ModelStatusReplicas `json:"replicas,omitempty"` Cache *ModelStatusCache `json:"cache,omitempty"` }
ModelStatus defines the observed state of Model.
func (*ModelStatus) DeepCopy ¶
func (in *ModelStatus) DeepCopy() *ModelStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelStatus.
func (*ModelStatus) DeepCopyInto ¶
func (in *ModelStatus) DeepCopyInto(out *ModelStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ModelStatusCache ¶ added in v0.9.0
type ModelStatusCache struct {
Loaded bool `json:"loaded"`
}
func (*ModelStatusCache) DeepCopy ¶ added in v0.9.0
func (in *ModelStatusCache) DeepCopy() *ModelStatusCache
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelStatusCache.
func (*ModelStatusCache) DeepCopyInto ¶ added in v0.9.0
func (in *ModelStatusCache) DeepCopyInto(out *ModelStatusCache)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ModelStatusReplicas ¶
func (*ModelStatusReplicas) DeepCopy ¶
func (in *ModelStatusReplicas) DeepCopy() *ModelStatusReplicas
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelStatusReplicas.
func (*ModelStatusReplicas) DeepCopyInto ¶
func (in *ModelStatusReplicas) DeepCopyInto(out *ModelStatusReplicas)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.