Documentation ¶
Overview ¶
Package v1alpha1 contains API Schema definitions for the serving v1alpha1 API group
Package v1alpha1 contains API Schema definitions for the serving v1alpha1 API group +k8s:openapi-gen=true +kubebuilder:object:generate=true +k8s:defaulter-gen=TypeMeta +groupName=serving.kserve.io
Index ¶
- Constants
- Variables
- func RegisterDefaults(scheme *runtime.Scheme) error
- func Resource(resource string) schema.GroupResource
- type BuiltInAdapter
- type ClusterLocalModel
- type ClusterLocalModelList
- type ClusterLocalModelSpec
- type ClusterLocalModelStatus
- type ClusterServingRuntime
- type ClusterServingRuntimeList
- type ClusterStorageContainer
- type ClusterStorageContainerList
- type InferenceGraph
- type InferenceGraphList
- type InferenceGraphSpec
- type InferenceGraphStatus
- type InferenceGraphValidator
- func (v *InferenceGraphValidator) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error)
- func (v *InferenceGraphValidator) ValidateDelete(ctx context.Context, obj runtime.Object) (admission.Warnings, error)
- func (v *InferenceGraphValidator) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error)
- type InferenceRouter
- type InferenceRouterType
- type InferenceStep
- type InferenceStepDependencyType
- type InferenceTarget
- type LocalModelNodeGroup
- type LocalModelNodeGroupList
- type LocalModelNodeGroupSpec
- type LocalModelNodeGroupStatus
- type ModelCopies
- type ModelSpec
- type NamespacedName
- type NodeStatus
- type ScaleMetric
- type ServerType
- type ServingRuntime
- type ServingRuntimeList
- type ServingRuntimePodSpec
- type ServingRuntimeSpec
- func (in *ServingRuntimeSpec) DeepCopy() *ServingRuntimeSpec
- func (in *ServingRuntimeSpec) DeepCopyInto(out *ServingRuntimeSpec)
- func (srSpec *ServingRuntimeSpec) GetPriority(modelName string) *int32
- func (srSpec *ServingRuntimeSpec) IsDisabled() bool
- func (srSpec *ServingRuntimeSpec) IsMultiModelRuntime() bool
- func (srSpec *ServingRuntimeSpec) IsProtocolVersionSupported(modelProtocolVersion constants.InferenceServiceProtocol) bool
- type ServingRuntimeStatus
- type StorageContainerSpec
- type StorageHelper
- type SupportedModelFormat
- type SupportedRuntime
- type SupportedUriFormat
- type TrainedModel
- type TrainedModelList
- type TrainedModelSpec
- type TrainedModelStatus
- func (in *TrainedModelStatus) DeepCopy() *TrainedModelStatus
- func (in *TrainedModelStatus) DeepCopyInto(out *TrainedModelStatus)
- func (ss *TrainedModelStatus) GetCondition(t apis.ConditionType) *apis.Condition
- func (ss *TrainedModelStatus) InitializeConditions()
- func (ss *TrainedModelStatus) IsConditionReady(t apis.ConditionType) bool
- func (ss *TrainedModelStatus) IsReady() bool
- func (ss *TrainedModelStatus) SetCondition(conditionType apis.ConditionType, condition *apis.Condition)
- type TrainedModelValidator
- func (v *TrainedModelValidator) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error)
- func (v *TrainedModelValidator) ValidateDelete(ctx context.Context, obj runtime.Object) (admission.Warnings, error)
- func (v *TrainedModelValidator) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error)
- type WorkloadType
Constants ¶
const ( // InvalidGraphNameFormatError defines the error message for invalid inference graph name InvalidGraphNameFormatError = "" /* 231-byte string literal not displayed */ // RootNodeNotFoundError defines the error message for root node not found RootNodeNotFoundError = "root node not found, InferenceGraph needs a node with name 'root' as the root node of the graph" // WeightNotProvidedError defines the error message for traffic weight is nil for inference step WeightNotProvidedError = "InferenceGraph[%s] Node[%s] Route[%s] missing the 'Weight'" // InvalidWeightError defines the error message for sum of traffic weight is not 100 InvalidWeightError = "InferenceGraph[%s] Node[%s] splitter node: the sum of traffic weights for all routing targets should be 100" // DuplicateStepNameError defines the error message for more than one step contains same name DuplicateStepNameError = "Node \"%s\" of InferenceGraph \"%s\" contains more than one step with name \"%s\"" // TargetNotProvidedError defines the error message for inference graph target not specified TargetNotProvidedError = "Step %d (\"%s\") in node \"%s\" of InferenceGraph \"%s\" does not specify an inference target" // InvalidTargetError defines the error message for inference graph target specifies more than one of nodeName, serviceName, serviceUrl InvalidTargetError = "Step %d (\"%s\") in node \"%s\" of InferenceGraph \"%s\" specifies more than one of nodeName, serviceName, serviceUrl" )
const ( // InferenceServiceReady is set when inference service reported readiness InferenceServiceReady apis.ConditionType = "InferenceServiceReady" // MemoryResourceAvailable is set when inference service reported resources availability MemoryResourceAvailable apis.ConditionType = "MemoryResourceAvailable" // IsMMSPredictor is set when inference service predictor is set to multi-model serving IsMMSPredictor apis.ConditionType = "IsMMSPredictor" )
ConditionType represents a Service condition value
const ( CommaSpaceSeparator = ", " TmNameFmt string = "[a-zA-Z0-9_-]+" InvalidTmNameFormatError = "" /* 180-byte string literal not displayed */ InvalidStorageUriFormatError = "" /* 144-byte string literal not displayed */ InvalidTmMemoryModification = "the Trained Model \"%s\" memory field is immutable. The memory was \"%s\" but it is updated to \"%s\"" )
regular expressions for validation of isvc name
const ( // GraphNameFmt regular expressions for validation of isvc name GraphNameFmt string = "[a-z]([-a-z0-9]*[a-z0-9])?" )
const ( // GraphRootNodeName is the root node name. GraphRootNodeName string = "root" )
Variables ¶
var ( // regular expressions for validation of tm name TmRegexp = regexp.MustCompile("^" + TmNameFmt + "$") // protocols that are accepted by storage uri StorageUriProtocols = strings.Join(storage.GetAllProtocol(), CommaSpaceSeparator) )
var ( // APIVersion is the current API version used to register these objects APIVersion = "v1alpha1" // SchemeGroupVersion is group version used to register these objects SchemeGroupVersion = schema.GroupVersion{Group: constants.KServeAPIGroupName, Version: APIVersion} // SchemeBuilder is used to add go types to the GroupVersionKind scheme SchemeBuilder = &scheme.Builder{GroupVersion: SchemeGroupVersion} // AddToScheme is required by pkg/client/... AddToScheme = SchemeBuilder.AddToScheme )
var ( // GraphRegexp regular expressions for validation of graph name GraphRegexp = regexp.MustCompile("^" + GraphNameFmt + "$") )
Functions ¶
func RegisterDefaults ¶ added in v0.14.0
RegisterDefaults adds defaulters functions to the given scheme. Public to allow building arbitrary schemes. All generated defaulters are covering - they call all nested defaulters.
func Resource ¶
func Resource(resource string) schema.GroupResource
Resource is required by pkg/client/listers/...
Types ¶
type BuiltInAdapter ¶ added in v0.8.0
type BuiltInAdapter struct { // ServerType must be one of the supported built-in types such as "triton" or "mlserver", // and the runtime's container must have the same name ServerType ServerType `json:"serverType,omitempty"` // Port which the runtime server listens for model management requests RuntimeManagementPort int `json:"runtimeManagementPort,omitempty"` // Fixed memory overhead to subtract from runtime container's memory allocation to determine model capacity MemBufferBytes int `json:"memBufferBytes,omitempty"` // Timeout for model loading operations in milliseconds ModelLoadingTimeoutMillis int `json:"modelLoadingTimeoutMillis,omitempty"` // Environment variables used to control other aspects of the built-in adapter's behaviour (uncommon) Env []corev1.EnvVar `json:"env,omitempty"` }
+k8s:openapi-gen=true
func (*BuiltInAdapter) DeepCopy ¶ added in v0.8.0
func (in *BuiltInAdapter) DeepCopy() *BuiltInAdapter
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BuiltInAdapter.
func (*BuiltInAdapter) DeepCopyInto ¶ added in v0.8.0
func (in *BuiltInAdapter) DeepCopyInto(out *BuiltInAdapter)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ClusterLocalModel ¶ added in v0.14.0
type ClusterLocalModel struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec ClusterLocalModelSpec `json:"spec,omitempty"` Status ClusterLocalModelStatus `json:"status,omitempty"` }
+k8s:openapi-gen=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +genclient +kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:resource:scope="Cluster"
func (*ClusterLocalModel) DeepCopy ¶ added in v0.14.0
func (in *ClusterLocalModel) DeepCopy() *ClusterLocalModel
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterLocalModel.
func (*ClusterLocalModel) DeepCopyInto ¶ added in v0.14.0
func (in *ClusterLocalModel) DeepCopyInto(out *ClusterLocalModel)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ClusterLocalModel) DeepCopyObject ¶ added in v0.14.0
func (in *ClusterLocalModel) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ClusterLocalModelList ¶ added in v0.14.0
type ClusterLocalModelList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []ClusterLocalModel `json:"items" validate:"required"` }
+k8s:openapi-gen=true +kubebuilder:object:root=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
func (*ClusterLocalModelList) DeepCopy ¶ added in v0.14.0
func (in *ClusterLocalModelList) DeepCopy() *ClusterLocalModelList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterLocalModelList.
func (*ClusterLocalModelList) DeepCopyInto ¶ added in v0.14.0
func (in *ClusterLocalModelList) DeepCopyInto(out *ClusterLocalModelList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ClusterLocalModelList) DeepCopyObject ¶ added in v0.14.0
func (in *ClusterLocalModelList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ClusterLocalModelSpec ¶ added in v0.14.0
type ClusterLocalModelSpec struct { // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="StorageUri is immutable" // Original StorageUri SourceModelUri string `json:"sourceModelUri" validate:"required"` // Model size to make sure it does not exceed the disk space reserved for local models. The limit is defined on the NodeGroup. ModelSize resource.Quantity `json:"modelSize" validate:"required"` // group of nodes to cache the model on. NodeGroup string `json:"nodeGroup" validate:"required"` }
+k8s:openapi-gen=true
func (*ClusterLocalModelSpec) DeepCopy ¶ added in v0.14.0
func (in *ClusterLocalModelSpec) DeepCopy() *ClusterLocalModelSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterLocalModelSpec.
func (*ClusterLocalModelSpec) DeepCopyInto ¶ added in v0.14.0
func (in *ClusterLocalModelSpec) DeepCopyInto(out *ClusterLocalModelSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ClusterLocalModelStatus ¶ added in v0.14.0
type ClusterLocalModelStatus struct { // Status of the model on a node, like NodeDownloaded or NodeNotReady NodeStatus map[string]NodeStatus `json:"nodeStatus,omitempty"` // How many nodes have the model available locally // +optional ModelCopies *ModelCopies `json:"copies,omitempty"` // Inference services using this local model InferenceServices []NamespacedName `json:"inferenceServices,omitempty"` }
func (*ClusterLocalModelStatus) DeepCopy ¶ added in v0.14.0
func (in *ClusterLocalModelStatus) DeepCopy() *ClusterLocalModelStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterLocalModelStatus.
func (*ClusterLocalModelStatus) DeepCopyInto ¶ added in v0.14.0
func (in *ClusterLocalModelStatus) DeepCopyInto(out *ClusterLocalModelStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ClusterServingRuntime ¶ added in v0.8.0
type ClusterServingRuntime struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec ServingRuntimeSpec `json:"spec,omitempty"` Status ServingRuntimeStatus `json:"status,omitempty"` }
ClusterServingRuntime is the Schema for the servingruntimes API +k8s:openapi-gen=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +genclient +kubebuilder:object:root=true +kubebuilder:resource:scope="Cluster" +kubebuilder:printcolumn:name="Disabled",type="boolean",JSONPath=".spec.disabled" +kubebuilder:printcolumn:name="ModelType",type="string",JSONPath=".spec.supportedModelFormats[*].name" +kubebuilder:printcolumn:name="Containers",type="string",JSONPath=".spec.containers[*].name" +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
func (*ClusterServingRuntime) DeepCopy ¶ added in v0.8.0
func (in *ClusterServingRuntime) DeepCopy() *ClusterServingRuntime
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterServingRuntime.
func (*ClusterServingRuntime) DeepCopyInto ¶ added in v0.8.0
func (in *ClusterServingRuntime) DeepCopyInto(out *ClusterServingRuntime)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ClusterServingRuntime) DeepCopyObject ¶ added in v0.8.0
func (in *ClusterServingRuntime) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ClusterServingRuntimeList ¶ added in v0.8.0
type ClusterServingRuntimeList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []ClusterServingRuntime `json:"items"` }
ClusterServingRuntimeList contains a list of ServingRuntime +k8s:openapi-gen=true +kubebuilder:object:root=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
func (*ClusterServingRuntimeList) DeepCopy ¶ added in v0.8.0
func (in *ClusterServingRuntimeList) DeepCopy() *ClusterServingRuntimeList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterServingRuntimeList.
func (*ClusterServingRuntimeList) DeepCopyInto ¶ added in v0.8.0
func (in *ClusterServingRuntimeList) DeepCopyInto(out *ClusterServingRuntimeList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ClusterServingRuntimeList) DeepCopyObject ¶ added in v0.8.0
func (in *ClusterServingRuntimeList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ClusterStorageContainer ¶ added in v0.11.1
type ClusterStorageContainer struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec StorageContainerSpec `json:"spec,omitempty"` // +optional Disabled *bool `json:"disabled,omitempty"` }
+k8s:openapi-gen=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +genclient +kubebuilder:object:root=true +kubebuilder:resource:scope="Cluster"
func (*ClusterStorageContainer) DeepCopy ¶ added in v0.11.1
func (in *ClusterStorageContainer) DeepCopy() *ClusterStorageContainer
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterStorageContainer.
func (*ClusterStorageContainer) DeepCopyInto ¶ added in v0.11.1
func (in *ClusterStorageContainer) DeepCopyInto(out *ClusterStorageContainer)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ClusterStorageContainer) DeepCopyObject ¶ added in v0.11.1
func (in *ClusterStorageContainer) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (*ClusterStorageContainer) IsDisabled ¶ added in v0.11.1
func (sc *ClusterStorageContainer) IsDisabled() bool
type ClusterStorageContainerList ¶ added in v0.11.1
type ClusterStorageContainerList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []ClusterStorageContainer `json:"items" validate:"required"` }
+k8s:openapi-gen=true +kubebuilder:object:root=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
func (*ClusterStorageContainerList) DeepCopy ¶ added in v0.11.1
func (in *ClusterStorageContainerList) DeepCopy() *ClusterStorageContainerList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterStorageContainerList.
func (*ClusterStorageContainerList) DeepCopyInto ¶ added in v0.11.1
func (in *ClusterStorageContainerList) DeepCopyInto(out *ClusterStorageContainerList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ClusterStorageContainerList) DeepCopyObject ¶ added in v0.11.1
func (in *ClusterStorageContainerList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type InferenceGraph ¶ added in v0.9.0
type InferenceGraph struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec InferenceGraphSpec `json:"spec,omitempty"` Status InferenceGraphStatus `json:"status,omitempty"` }
InferenceGraph is the Schema for the InferenceGraph API for multiple models +k8s:openapi-gen=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +genclient +kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:printcolumn:name="URL",type="string",JSONPath=".status.url" +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status" +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" +kubebuilder:resource:path=inferencegraphs,shortName=ig,singular=inferencegraph
func (*InferenceGraph) DeepCopy ¶ added in v0.9.0
func (in *InferenceGraph) DeepCopy() *InferenceGraph
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceGraph.
func (*InferenceGraph) DeepCopyInto ¶ added in v0.9.0
func (in *InferenceGraph) DeepCopyInto(out *InferenceGraph)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*InferenceGraph) DeepCopyObject ¶ added in v0.9.0
func (in *InferenceGraph) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type InferenceGraphList ¶ added in v0.9.0
type InferenceGraphList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` // +listType=set Items []InferenceGraph `json:"items"` }
InferenceGraphList contains a list of InferenceGraph +k8s:openapi-gen=true +kubebuilder:object:root=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
func (*InferenceGraphList) DeepCopy ¶ added in v0.9.0
func (in *InferenceGraphList) DeepCopy() *InferenceGraphList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceGraphList.
func (*InferenceGraphList) DeepCopyInto ¶ added in v0.9.0
func (in *InferenceGraphList) DeepCopyInto(out *InferenceGraphList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*InferenceGraphList) DeepCopyObject ¶ added in v0.9.0
func (in *InferenceGraphList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type InferenceGraphSpec ¶ added in v0.9.0
type InferenceGraphSpec struct { // Map of InferenceGraph router nodes // Each node defines the router which can be different routing types Nodes map[string]InferenceRouter `json:"nodes"` // +optional Resources corev1.ResourceRequirements `json:"resources,omitempty"` // +optional Affinity *corev1.Affinity `json:"affinity,omitempty" protobuf:"bytes,18,opt,name=affinity"` // TimeoutSeconds specifies the number of seconds to wait before timing out a request to the component. // +optional TimeoutSeconds *int64 `json:"timeout,omitempty"` // Minimum number of replicas, defaults to 1 but can be set to 0 to enable scale-to-zero. // +optional MinReplicas *int `json:"minReplicas,omitempty"` // Maximum number of replicas for autoscaling. // +optional MaxReplicas int `json:"maxReplicas,omitempty"` // ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for. // concurrency and rps targets are supported by Knative Pod Autoscaler // (https://knative.dev/docs/serving/autoscaling/autoscaling-targets/). // +optional ScaleTarget *int `json:"scaleTarget,omitempty"` // ScaleMetric defines the scaling metric type watched by autoscaler // possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via // Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics). // +optional ScaleMetric *ScaleMetric `json:"scaleMetric,omitempty"` }
InferenceGraphSpec defines the InferenceGraph spec +k8s:openapi-gen=true
func (*InferenceGraphSpec) DeepCopy ¶ added in v0.9.0
func (in *InferenceGraphSpec) DeepCopy() *InferenceGraphSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceGraphSpec.
func (*InferenceGraphSpec) DeepCopyInto ¶ added in v0.9.0
func (in *InferenceGraphSpec) DeepCopyInto(out *InferenceGraphSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type InferenceGraphStatus ¶ added in v0.9.0
type InferenceGraphStatus struct { // Conditions for InferenceGraph duckv1.Status `json:",inline"` // Url for the InferenceGraph // +optional URL *apis.URL `json:"url,omitempty"` }
InferenceGraphStatus defines the InferenceGraph conditions and status +k8s:openapi-gen=true
func (*InferenceGraphStatus) DeepCopy ¶ added in v0.9.0
func (in *InferenceGraphStatus) DeepCopy() *InferenceGraphStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceGraphStatus.
func (*InferenceGraphStatus) DeepCopyInto ¶ added in v0.9.0
func (in *InferenceGraphStatus) DeepCopyInto(out *InferenceGraphStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type InferenceGraphValidator ¶ added in v0.14.0
type InferenceGraphValidator struct{}
+kubebuilder:object:generate=false +k8s:deepcopy-gen=false +k8s:openapi-gen=false InferenceGraphValidator is responsible for setting default values on the InferenceGraph resources when created or updated.
NOTE: The +kubebuilder:object:generate=false and +k8s:deepcopy-gen=false marker prevents controller-gen from generating DeepCopy methods, as it is used only for temporary operations and does not need to be deeply copied.
func (*InferenceGraphValidator) ValidateCreate ¶ added in v0.14.0
func (v *InferenceGraphValidator) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error)
ValidateCreate implements webhook.Validator so a webhook will be registered for the type
func (*InferenceGraphValidator) ValidateDelete ¶ added in v0.14.0
func (v *InferenceGraphValidator) ValidateDelete(ctx context.Context, obj runtime.Object) (admission.Warnings, error)
ValidateDelete implements webhook.Validator so a webhook will be registered for the type
func (*InferenceGraphValidator) ValidateUpdate ¶ added in v0.14.0
func (v *InferenceGraphValidator) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error)
ValidateUpdate implements webhook.Validator so a webhook will be registered for the type
type InferenceRouter ¶ added in v0.9.0
type InferenceRouter struct { // RouterType // // - `Sequence:` chain multiple inference steps with input/output from previous step // // - `Splitter:` randomly routes to the target service according to the weight // // - `Ensemble:` routes the request to multiple models and then merge the responses // // - `Switch:` routes the request to one of the steps based on condition // RouterType InferenceRouterType `json:"routerType"` // Steps defines destinations for the current router node // +optional Steps []InferenceStep `json:"steps,omitempty"` }
+k8s:openapi-gen=true InferenceRouter defines the router for each InferenceGraph node with one or multiple steps
```yaml kind: InferenceGraph metadata:
name: canary-route
spec:
nodes: root: routerType: Splitter routes: - service: mymodel1 weight: 20 - service: mymodel2 weight: 80
```
```yaml kind: InferenceGraph metadata:
name: abtest
spec:
nodes: mymodel: routerType: Switch routes: - service: mymodel1 condition: "{ .input.userId == 1 }" - service: mymodel2 condition: "{ .input.userId == 2 }"
```
Scoring a case using a model ensemble consists of scoring it using each model separately, then combining the results into a single scoring result using one of the pre-defined combination methods.
Tree Ensemble constitutes a case where simple algorithms for combining results of either classification or regression trees are well known. Multiple classification trees, for example, are commonly combined using a "majority-vote" method. Multiple regression trees are often combined using various averaging techniques. e.g tagging models with segment identifiers and weights to be used for their combination in these ways. ```yaml kind: InferenceGraph metadata:
name: ensemble
spec:
nodes: root: routerType: Sequence routes: - service: feast - nodeName: ensembleModel data: $response ensembleModel: routerType: Ensemble routes: - service: sklearn-model - service: xgboost-model
```
Scoring a case using a sequence, or chain of models allows the output of one model to be passed in as input to the subsequent models. ```yaml kind: InferenceGraph metadata:
name: model-chainer
spec:
nodes: root: routerType: Sequence routes: - service: mymodel-s1 - service: mymodel-s2 data: $response - service: mymodel-s3 data: $response
```
In the flow described below, the pre_processing node base64 encodes the image and passes it to two model nodes in the flow. The encoded data is available to both these nodes for classification. The second node i.e. dog-breed-classification takes the original input from the pre_processing node along-with the response from the cat-dog-classification node to do further classification of the dog breed if required. ```yaml kind: InferenceGraph metadata:
name: dog-breed-classification
spec:
nodes: root: routerType: Sequence routes: - service: cat-dog-classifier - nodeName: breed-classifier data: $request breed-classifier: routerType: Switch routes: - service: dog-breed-classifier condition: { .predictions.class == "dog" } - service: cat-breed-classifier condition: { .predictions.class == "cat" }
```
func (*InferenceRouter) DeepCopy ¶ added in v0.9.0
func (in *InferenceRouter) DeepCopy() *InferenceRouter
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceRouter.
func (*InferenceRouter) DeepCopyInto ¶ added in v0.9.0
func (in *InferenceRouter) DeepCopyInto(out *InferenceRouter)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type InferenceRouterType ¶ added in v0.9.0
type InferenceRouterType string
InferenceRouterType constant for inference routing types +k8s:openapi-gen=true +kubebuilder:validation:Enum=Sequence;Splitter;Ensemble;Switch
const ( // Sequence Default type only route to one destination Sequence InferenceRouterType = "Sequence" // Splitter router randomly routes the requests to the named service according to the weight Splitter InferenceRouterType = "Splitter" // Ensemble router routes the requests to multiple models and then merge the responses Ensemble InferenceRouterType = "Ensemble" // Switch routes the request to the model based on certain condition Switch InferenceRouterType = "Switch" )
InferenceRouterType Enum
type InferenceStep ¶ added in v0.9.0
type InferenceStep struct { // Unique name for the step within this node // +optional StepName string `json:"name,omitempty"` // Node or service used to process this step InferenceTarget `json:",inline"` // request data sent to the next route with input/output from the previous step // $request // $response.predictions // +optional Data string `json:"data,omitempty"` // the weight for split of the traffic, only used for Split Router // when weight is specified all the routing targets should be sum to 100 // +optional Weight *int64 `json:"weight,omitempty"` // routing based on the condition // +optional Condition string `json:"condition,omitempty"` // to decide whether a step is a hard or a soft dependency in the Inference Graph // +optional Dependency InferenceStepDependencyType `json:"dependency,omitempty"` }
InferenceStep defines the inference target of the current step with condition, weights and data. +k8s:openapi-gen=true
func (*InferenceStep) DeepCopy ¶ added in v0.9.0
func (in *InferenceStep) DeepCopy() *InferenceStep
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceStep.
func (*InferenceStep) DeepCopyInto ¶ added in v0.9.0
func (in *InferenceStep) DeepCopyInto(out *InferenceStep)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type InferenceStepDependencyType ¶ added in v0.11.1
type InferenceStepDependencyType string
InferenceStepDependencyType constant for inference step dependency +k8s:openapi-gen=true +kubebuilder:validation:Enum=Soft;Hard
const ( // Soft Soft InferenceStepDependencyType = "Soft" // Hard Hard InferenceStepDependencyType = "Hard" )
StepDependency Enum
type InferenceTarget ¶ added in v0.9.0
type InferenceTarget struct { // The node name for routing as next step // +optional NodeName string `json:"nodeName,omitempty"` // named reference for InferenceService ServiceName string `json:"serviceName,omitempty"` // InferenceService URL, mutually exclusive with ServiceName // +optional ServiceURL string `json:"serviceUrl,omitempty"` }
+k8s:openapi-gen=true Exactly one InferenceTarget field must be specified
func (*InferenceTarget) DeepCopy ¶ added in v0.9.0
func (in *InferenceTarget) DeepCopy() *InferenceTarget
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceTarget.
func (*InferenceTarget) DeepCopyInto ¶ added in v0.9.0
func (in *InferenceTarget) DeepCopyInto(out *InferenceTarget)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type LocalModelNodeGroup ¶ added in v0.14.0
type LocalModelNodeGroup struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec LocalModelNodeGroupSpec `json:"spec,omitempty"` Status LocalModelNodeGroupStatus `json:"status,omitempty"` }
+k8s:openapi-gen=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +genclient +kubebuilder:object:root=true +kubebuilder:resource:scope="Cluster"
func (*LocalModelNodeGroup) DeepCopy ¶ added in v0.14.0
func (in *LocalModelNodeGroup) DeepCopy() *LocalModelNodeGroup
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LocalModelNodeGroup.
func (*LocalModelNodeGroup) DeepCopyInto ¶ added in v0.14.0
func (in *LocalModelNodeGroup) DeepCopyInto(out *LocalModelNodeGroup)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*LocalModelNodeGroup) DeepCopyObject ¶ added in v0.14.0
func (in *LocalModelNodeGroup) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type LocalModelNodeGroupList ¶ added in v0.14.0
type LocalModelNodeGroupList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []LocalModelNodeGroup `json:"items"` }
+k8s:openapi-gen=true +kubebuilder:object:root=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
func (*LocalModelNodeGroupList) DeepCopy ¶ added in v0.14.0
func (in *LocalModelNodeGroupList) DeepCopy() *LocalModelNodeGroupList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LocalModelNodeGroupList.
func (*LocalModelNodeGroupList) DeepCopyInto ¶ added in v0.14.0
func (in *LocalModelNodeGroupList) DeepCopyInto(out *LocalModelNodeGroupList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*LocalModelNodeGroupList) DeepCopyObject ¶ added in v0.14.0
func (in *LocalModelNodeGroupList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type LocalModelNodeGroupSpec ¶ added in v0.14.0
type LocalModelNodeGroupSpec struct { // Max storage size per node in this node group StorageLimit resource.Quantity `json:"storageLimit"` // Used to create PersistentVolumes for downloading models and in inference service namespaces PersistentVolumeSpec corev1.PersistentVolumeSpec `json:"persistentVolumeSpec"` // Used to create PersistentVolumeClaims for download and in inference service namespaces PersistentVolumeClaimSpec corev1.PersistentVolumeClaimSpec `json:"persistentVolumeClaimSpec"` }
LocalModelNodeGroupSpec defines a group of nodes for to download the model to. +k8s:openapi-gen=true
func (*LocalModelNodeGroupSpec) DeepCopy ¶ added in v0.14.0
func (in *LocalModelNodeGroupSpec) DeepCopy() *LocalModelNodeGroupSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LocalModelNodeGroupSpec.
func (*LocalModelNodeGroupSpec) DeepCopyInto ¶ added in v0.14.0
func (in *LocalModelNodeGroupSpec) DeepCopyInto(out *LocalModelNodeGroupSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type LocalModelNodeGroupStatus ¶ added in v0.14.0
type LocalModelNodeGroupStatus struct { // Used storage space on any node for this node group Used resource.Quantity `json:"used,omitempty"` // Available storage space on any node for this node group Available resource.Quantity `json:"available,omitempty"` }
func (*LocalModelNodeGroupStatus) DeepCopy ¶ added in v0.14.0
func (in *LocalModelNodeGroupStatus) DeepCopy() *LocalModelNodeGroupStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LocalModelNodeGroupStatus.
func (*LocalModelNodeGroupStatus) DeepCopyInto ¶ added in v0.14.0
func (in *LocalModelNodeGroupStatus) DeepCopyInto(out *LocalModelNodeGroupStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ModelCopies ¶ added in v0.14.0
type ModelCopies struct { Available int `json:"available,omitempty"` // Total number of nodes that we expect the model to be downloaded. Including nodes that are not ready Total int `json:"total,omitempty"` // Download Failed Failed int `json:"failed,omitempty"` }
func (*ModelCopies) DeepCopy ¶ added in v0.14.0
func (in *ModelCopies) DeepCopy() *ModelCopies
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelCopies.
func (*ModelCopies) DeepCopyInto ¶ added in v0.14.0
func (in *ModelCopies) DeepCopyInto(out *ModelCopies)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ModelSpec ¶
type ModelSpec struct { // Storage URI for the model repository StorageURI string `json:"storageUri"` // Machine Learning <framework name> // The values could be: "tensorflow","pytorch","sklearn","onnx","xgboost", "myawesomeinternalframework" etc. Framework string `json:"framework"` // Maximum memory this model will consume, this field is used to decide if a model server has enough memory to load this model. Memory resource.Quantity `json:"memory"` }
ModelSpec describes a TrainedModel +k8s:openapi-gen=true
func (*ModelSpec) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelSpec.
func (*ModelSpec) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NamespacedName ¶ added in v0.14.0
type NamespacedName struct { Namespace string `json:"namespace,omitempty"` Name string `json:"name,omitempty"` }
func (*NamespacedName) DeepCopy ¶ added in v0.14.0
func (in *NamespacedName) DeepCopy() *NamespacedName
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NamespacedName.
func (*NamespacedName) DeepCopyInto ¶ added in v0.14.0
func (in *NamespacedName) DeepCopyInto(out *NamespacedName)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeStatus ¶ added in v0.14.0
type NodeStatus string
NodeStatus enum +kubebuilder:validation:Enum="";NodeNotReady;NodeDownloadPending;NodeDownloading;NodeDownloaded;NodeDownloadError;NodeDeleting;NodeDeletionError;NodeDeleted
const ( NodeNotReady NodeStatus = "NodeNotReady" NodeDownloadPending NodeStatus = "NodeDownloadPending" NodeDownloading NodeStatus = "NodeDownloading" NodeDownloaded NodeStatus = "NodeDownloaded" NodeDownloadError NodeStatus = "NodeDownloadError" NodeDeleting NodeStatus = "NodeDeleting" NodeDeletionError NodeStatus = "NodeDeletionError" NodeDeleted NodeStatus = "NodeDeleted" )
NodeStatus Enum values
type ScaleMetric ¶ added in v0.12.0
type ScaleMetric string
ScaleMetric enum +kubebuilder:validation:Enum=cpu;memory;concurrency;rps
type ServerType ¶ added in v0.8.0
type ServerType string
ServerType constant for specifying the runtime name +k8s:openapi-gen=true
const ( // Model server is Triton Triton ServerType = "triton" // Model server is MLServer MLServer ServerType = "mlserver" // Model server is OpenVino Model Server OVMS ServerType = "ovms" )
Built-in ServerTypes (others may be supported)
type ServingRuntime ¶ added in v0.8.0
type ServingRuntime struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec ServingRuntimeSpec `json:"spec,omitempty"` Status ServingRuntimeStatus `json:"status,omitempty"` }
ServingRuntime is the Schema for the servingruntimes API +k8s:openapi-gen=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +genclient +kubebuilder:object:root=true +kubebuilder:printcolumn:name="Disabled",type="boolean",JSONPath=".spec.disabled" +kubebuilder:printcolumn:name="ModelType",type="string",JSONPath=".spec.supportedModelFormats[*].name" +kubebuilder:printcolumn:name="Containers",type="string",JSONPath=".spec.containers[*].name" +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
func (*ServingRuntime) DeepCopy ¶ added in v0.8.0
func (in *ServingRuntime) DeepCopy() *ServingRuntime
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServingRuntime.
func (*ServingRuntime) DeepCopyInto ¶ added in v0.8.0
func (in *ServingRuntime) DeepCopyInto(out *ServingRuntime)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ServingRuntime) DeepCopyObject ¶ added in v0.8.0
func (in *ServingRuntime) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ServingRuntimeList ¶ added in v0.8.0
type ServingRuntimeList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []ServingRuntime `json:"items"` }
ServingRuntimeList contains a list of ServingRuntime +k8s:openapi-gen=true +kubebuilder:object:root=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
func (*ServingRuntimeList) DeepCopy ¶ added in v0.8.0
func (in *ServingRuntimeList) DeepCopy() *ServingRuntimeList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServingRuntimeList.
func (*ServingRuntimeList) DeepCopyInto ¶ added in v0.8.0
func (in *ServingRuntimeList) DeepCopyInto(out *ServingRuntimeList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ServingRuntimeList) DeepCopyObject ¶ added in v0.8.0
func (in *ServingRuntimeList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ServingRuntimePodSpec ¶ added in v0.8.0
type ServingRuntimePodSpec struct { // List of containers belonging to the pod. // Containers cannot currently be added or removed. // There must be at least one container in a Pod. // Cannot be updated. // +patchMergeKey=name // +patchStrategy=merge Containers []corev1.Container `json:"containers" patchStrategy:"merge" patchMergeKey:"name" validate:"required"` // List of volumes that can be mounted by containers belonging to the pod. // More info: https://kubernetes.io/docs/concepts/storage/volumes // +optional // +patchMergeKey=name // +patchStrategy=merge,retainKeys Volumes []corev1.Volume `json:"volumes,omitempty" patchStrategy:"merge,retainKeys" patchMergeKey:"name" protobuf:"bytes,1,rep,name=volumes"` // NodeSelector is a selector which must be true for the pod to fit on a node. // Selector which must match a node's labels for the pod to be scheduled on that node. // More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ // +optional NodeSelector map[string]string `json:"nodeSelector,omitempty"` // If specified, the pod's scheduling constraints // +optional Affinity *corev1.Affinity `json:"affinity,omitempty"` // If specified, the pod's tolerations. // +optional Tolerations []corev1.Toleration `json:"tolerations,omitempty"` // Labels that will be add to the pod. // More info: http://kubernetes.io/docs/user-guide/labels // +optional Labels map[string]string `json:"labels,omitempty"` // Annotations that will be add to the pod. // More info: http://kubernetes.io/docs/user-guide/annotations // +optional Annotations map[string]string `json:"annotations,omitempty"` // ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec. // If specified, these secrets will be passed to individual puller implementations for them to use. For example, // in the case of docker, only DockerConfig type secrets are honored. // More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod // +optional // +patchMergeKey=name // +patchStrategy=merge ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,15,rep,name=imagePullSecrets"` // Use the host's ipc namespace. // Optional: Default to false. // +k8s:conversion-gen=false // +optional HostIPC bool `json:"hostIPC,omitempty" protobuf:"varint,13,opt,name=hostIPC"` }
+k8s:openapi-gen=true
func (*ServingRuntimePodSpec) DeepCopy ¶ added in v0.8.0
func (in *ServingRuntimePodSpec) DeepCopy() *ServingRuntimePodSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServingRuntimePodSpec.
func (*ServingRuntimePodSpec) DeepCopyInto ¶ added in v0.8.0
func (in *ServingRuntimePodSpec) DeepCopyInto(out *ServingRuntimePodSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ServingRuntimeSpec ¶ added in v0.8.0
type ServingRuntimeSpec struct { // Model formats and version supported by this runtime SupportedModelFormats []SupportedModelFormat `json:"supportedModelFormats,omitempty"` // Whether this ServingRuntime is intended for multi-model usage or not. // +optional MultiModel *bool `json:"multiModel,omitempty"` // Set to true to disable use of this runtime // +optional Disabled *bool `json:"disabled,omitempty"` // Supported protocol versions (i.e. v1 or v2 or grpc-v1 or grpc-v2) // +optional ProtocolVersions []constants.InferenceServiceProtocol `json:"protocolVersions,omitempty"` ServingRuntimePodSpec `json:",inline"` // Grpc endpoint for internal model-management (implementing mmesh.ModelRuntime gRPC service) // Assumed to be single-model runtime if omitted // +optional GrpcMultiModelManagementEndpoint *string `json:"grpcEndpoint,omitempty"` // Grpc endpoint for inferencing // +optional GrpcDataEndpoint *string `json:"grpcDataEndpoint,omitempty"` // HTTP endpoint for inferencing // +optional HTTPDataEndpoint *string `json:"httpDataEndpoint,omitempty"` // Configure the number of replicas in the Deployment generated by this ServingRuntime // If specified, this overrides the podsPerRuntime configuration value // +optional Replicas *uint16 `json:"replicas,omitempty"` // Configuration for this runtime's use of the storage helper (model puller) // It is enabled unless explicitly disabled // +optional StorageHelper *StorageHelper `json:"storageHelper,omitempty"` // Provide the details about built-in runtime adapter // +optional BuiltInAdapter *BuiltInAdapter `json:"builtInAdapter,omitempty"` }
ServingRuntimeSpec defines the desired state of ServingRuntime. This spec is currently provisional and are subject to change as details regarding single-model serving and multi-model serving are hammered out. +k8s:openapi-gen=true
func (*ServingRuntimeSpec) DeepCopy ¶ added in v0.8.0
func (in *ServingRuntimeSpec) DeepCopy() *ServingRuntimeSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServingRuntimeSpec.
func (*ServingRuntimeSpec) DeepCopyInto ¶ added in v0.8.0
func (in *ServingRuntimeSpec) DeepCopyInto(out *ServingRuntimeSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ServingRuntimeSpec) GetPriority ¶ added in v0.11.1
func (srSpec *ServingRuntimeSpec) GetPriority(modelName string) *int32
GetPriority returns the priority of the specified model. It returns nil if priority is not set or the model is not found.
func (*ServingRuntimeSpec) IsDisabled ¶ added in v0.8.0
func (srSpec *ServingRuntimeSpec) IsDisabled() bool
func (*ServingRuntimeSpec) IsMultiModelRuntime ¶ added in v0.8.0
func (srSpec *ServingRuntimeSpec) IsMultiModelRuntime() bool
func (*ServingRuntimeSpec) IsProtocolVersionSupported ¶ added in v0.9.0
func (srSpec *ServingRuntimeSpec) IsProtocolVersionSupported(modelProtocolVersion constants.InferenceServiceProtocol) bool
type ServingRuntimeStatus ¶ added in v0.8.0
type ServingRuntimeStatus struct { }
ServingRuntimeStatus defines the observed state of ServingRuntime +k8s:openapi-gen=true
func (*ServingRuntimeStatus) DeepCopy ¶ added in v0.8.0
func (in *ServingRuntimeStatus) DeepCopy() *ServingRuntimeStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServingRuntimeStatus.
func (*ServingRuntimeStatus) DeepCopyInto ¶ added in v0.8.0
func (in *ServingRuntimeStatus) DeepCopyInto(out *ServingRuntimeStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type StorageContainerSpec ¶ added in v0.11.1
type StorageContainerSpec struct { // Container spec for the storage initializer init container Container corev1.Container `json:"container" validate:"required"` // List of URI formats that this container supports SupportedUriFormats []SupportedUriFormat `json:"supportedUriFormats" validate:"required"` // +kubebuilder:default="initContainer" WorkloadType WorkloadType `json:"workloadType,omitempty"` }
StorageContainerSpec defines the container spec for the storage initializer init container, and the protocols it supports. +k8s:openapi-gen=true
func (*StorageContainerSpec) DeepCopy ¶ added in v0.11.1
func (in *StorageContainerSpec) DeepCopy() *StorageContainerSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StorageContainerSpec.
func (*StorageContainerSpec) DeepCopyInto ¶ added in v0.11.1
func (in *StorageContainerSpec) DeepCopyInto(out *StorageContainerSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*StorageContainerSpec) IsStorageUriSupported ¶ added in v0.11.1
func (spec *StorageContainerSpec) IsStorageUriSupported(storageUri string) (bool, error)
type StorageHelper ¶ added in v0.8.0
type StorageHelper struct { // +optional Disabled bool `json:"disabled,omitempty"` }
+k8s:openapi-gen=true
func (*StorageHelper) DeepCopy ¶ added in v0.8.0
func (in *StorageHelper) DeepCopy() *StorageHelper
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StorageHelper.
func (*StorageHelper) DeepCopyInto ¶ added in v0.8.0
func (in *StorageHelper) DeepCopyInto(out *StorageHelper)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type SupportedModelFormat ¶ added in v0.8.0
type SupportedModelFormat struct { // Name of the model format. // +required Name string `json:"name"` // Version of the model format. // Used in validating that a predictor is supported by a runtime. // Can be "major", "major.minor" or "major.minor.patch". // +optional Version *string `json:"version,omitempty"` // Set to true to allow the ServingRuntime to be used for automatic model placement if // this model format is specified with no explicit runtime. // +optional AutoSelect *bool `json:"autoSelect,omitempty"` // Priority of this serving runtime for auto selection. // This is used to select the serving runtime if more than one serving runtime supports the same model format. // The value should be greater than zero. The higher the value, the higher the priority. // Priority is not considered if AutoSelect is either false or not specified. // Priority can be overridden by specifying the runtime in the InferenceService. // +optional Priority *int32 `json:"priority,omitempty"` }
+k8s:openapi-gen=true
func (*SupportedModelFormat) DeepCopy ¶ added in v0.8.0
func (in *SupportedModelFormat) DeepCopy() *SupportedModelFormat
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SupportedModelFormat.
func (*SupportedModelFormat) DeepCopyInto ¶ added in v0.8.0
func (in *SupportedModelFormat) DeepCopyInto(out *SupportedModelFormat)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*SupportedModelFormat) IsAutoSelectEnabled ¶ added in v0.11.1
func (m *SupportedModelFormat) IsAutoSelectEnabled() bool
type SupportedRuntime ¶ added in v0.9.0
type SupportedRuntime struct { Name string Spec ServingRuntimeSpec }
SupportedRuntime is the schema for supported runtime result of automatic selection
func (*SupportedRuntime) DeepCopy ¶ added in v0.9.0
func (in *SupportedRuntime) DeepCopy() *SupportedRuntime
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SupportedRuntime.
func (*SupportedRuntime) DeepCopyInto ¶ added in v0.9.0
func (in *SupportedRuntime) DeepCopyInto(out *SupportedRuntime)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type SupportedUriFormat ¶ added in v0.11.1
type SupportedUriFormat struct { Prefix string `json:"prefix,omitempty"` Regex string `json:"regex,omitempty"` }
SupportedUriFormat can be either prefix or regex. Todo: Add validation that only one of them is set. +k8s:openapi-gen=true
func (*SupportedUriFormat) DeepCopy ¶ added in v0.11.1
func (in *SupportedUriFormat) DeepCopy() *SupportedUriFormat
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SupportedUriFormat.
func (*SupportedUriFormat) DeepCopyInto ¶ added in v0.11.1
func (in *SupportedUriFormat) DeepCopyInto(out *SupportedUriFormat)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type TrainedModel ¶ added in v0.7.0
type TrainedModel struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec TrainedModelSpec `json:"spec,omitempty"` Status TrainedModelStatus `json:"status,omitempty"` }
TrainedModel is the Schema for the TrainedModel API +k8s:openapi-gen=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +genclient +kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:printcolumn:name="URL",type="string",JSONPath=".status.url" +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status" +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" +kubebuilder:resource:path=trainedmodels,shortName=tm,singular=trainedmodel
func (*TrainedModel) DeepCopy ¶ added in v0.7.0
func (in *TrainedModel) DeepCopy() *TrainedModel
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TrainedModel.
func (*TrainedModel) DeepCopyInto ¶ added in v0.7.0
func (in *TrainedModel) DeepCopyInto(out *TrainedModel)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TrainedModel) DeepCopyObject ¶ added in v0.7.0
func (in *TrainedModel) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type TrainedModelList ¶ added in v0.7.0
type TrainedModelList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` // +listType=set Items []TrainedModel `json:"items"` }
TrainedModelList contains a list of TrainedModel +k8s:openapi-gen=true +kubebuilder:object:root=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
func (*TrainedModelList) DeepCopy ¶ added in v0.7.0
func (in *TrainedModelList) DeepCopy() *TrainedModelList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TrainedModelList.
func (*TrainedModelList) DeepCopyInto ¶ added in v0.7.0
func (in *TrainedModelList) DeepCopyInto(out *TrainedModelList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TrainedModelList) DeepCopyObject ¶ added in v0.7.0
func (in *TrainedModelList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (*TrainedModelList) TotalRequestedMemory ¶ added in v0.7.0
func (tms *TrainedModelList) TotalRequestedMemory() resource.Quantity
type TrainedModelSpec ¶ added in v0.7.0
type TrainedModelSpec struct { // parent inference service to deploy to // +required InferenceService string `json:"inferenceService"` // Predictor model spec // +required Model ModelSpec `json:"model"` }
TrainedModelSpec defines the TrainedModel spec +k8s:openapi-gen=true
func (*TrainedModelSpec) DeepCopy ¶ added in v0.7.0
func (in *TrainedModelSpec) DeepCopy() *TrainedModelSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TrainedModelSpec.
func (*TrainedModelSpec) DeepCopyInto ¶ added in v0.7.0
func (in *TrainedModelSpec) DeepCopyInto(out *TrainedModelSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type TrainedModelStatus ¶ added in v0.7.0
type TrainedModelStatus struct { // Conditions for trained model duckv1.Status `json:",inline"` // URL holds the url that will distribute traffic over the provided traffic targets. // For v1: http[s]://{route-name}.{route-namespace}.{cluster-level-suffix}/v1/models/<trainedmodel>:predict // For v2: http[s]://{route-name}.{route-namespace}.{cluster-level-suffix}/v2/models/<trainedmodel>/infer URL *apis.URL `json:"url,omitempty"` // Addressable endpoint for the deployed trained model // http://<inferenceservice.metadata.name>/v1/models/<trainedmodel>.metadata.name Address *duckv1.Addressable `json:"address,omitempty"` }
TrainedModelStatus defines the observed state of TrainedModel
func (*TrainedModelStatus) DeepCopy ¶ added in v0.7.0
func (in *TrainedModelStatus) DeepCopy() *TrainedModelStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TrainedModelStatus.
func (*TrainedModelStatus) DeepCopyInto ¶ added in v0.7.0
func (in *TrainedModelStatus) DeepCopyInto(out *TrainedModelStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TrainedModelStatus) GetCondition ¶ added in v0.7.0
func (ss *TrainedModelStatus) GetCondition(t apis.ConditionType) *apis.Condition
GetCondition returns the condition by name.
func (*TrainedModelStatus) InitializeConditions ¶ added in v0.7.0
func (ss *TrainedModelStatus) InitializeConditions()
func (*TrainedModelStatus) IsConditionReady ¶ added in v0.7.0
func (ss *TrainedModelStatus) IsConditionReady(t apis.ConditionType) bool
IsConditionReady returns the readiness for a given condition
func (*TrainedModelStatus) IsReady ¶ added in v0.7.0
func (ss *TrainedModelStatus) IsReady() bool
IsReady returns if the service is ready to serve the requested configuration.
func (*TrainedModelStatus) SetCondition ¶ added in v0.7.0
func (ss *TrainedModelStatus) SetCondition(conditionType apis.ConditionType, condition *apis.Condition)
type TrainedModelValidator ¶ added in v0.14.0
type TrainedModelValidator struct{}
+kubebuilder:object:generate=false +k8s:deepcopy-gen=false +k8s:openapi-gen=false TrainedModelValidator is responsible for setting default values on the TrainedModel resources when created or updated.
NOTE: The +kubebuilder:object:generate=false and +k8s:deepcopy-gen=false marker prevents controller-gen from generating DeepCopy methods, as it is used only for temporary operations and does not need to be deeply copied.
func (*TrainedModelValidator) ValidateCreate ¶ added in v0.14.0
func (v *TrainedModelValidator) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error)
ValidateCreate implements webhook.Validator so a webhook will be registered for the type
func (*TrainedModelValidator) ValidateDelete ¶ added in v0.14.0
func (v *TrainedModelValidator) ValidateDelete(ctx context.Context, obj runtime.Object) (admission.Warnings, error)
ValidateDelete implements webhook.Validator so a webhook will be registered for the type
func (*TrainedModelValidator) ValidateUpdate ¶ added in v0.14.0
func (v *TrainedModelValidator) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error)
ValidateUpdate implements webhook.Validator so a webhook will be registered for the type
type WorkloadType ¶ added in v0.14.0
type WorkloadType string
+k8s:openapi-gen=true
const ( InitContainer WorkloadType = "initContainer" LocalModelDownloadJob WorkloadType = "localModelDownloadJob" )
Source Files ¶
- cluster_local_model_status.go
- cluster_local_model_types.go
- doc.go
- inference_graph.go
- inference_graph_validation.go
- local_model_node_group_status.go
- local_model_node_group_types.go
- servingruntime_types.go
- storage_container_types.go
- trained_model.go
- trained_model_status.go
- trainedmodel_webhook.go
- v1alpha1.go
- zz_generated.deepcopy.go
- zz_generated.defaults.go