Documentation ¶
Overview ¶
Package v1beta1 contains API Schema definitions for the serving v1beta1 API group +k8s:openapi-gen=true +k8s:deepcopy-gen=package,register +k8s:conversion-gen=kserve/pkg/apis/serving +k8s:defaulter-gen=TypeMeta +groupName=serving.kserve.io
Package v1beta1 contains API Schema definitions for the serving v1beta1 API group +k8s:openapi-gen=true +k8s:deepcopy-gen=package,register +k8s:conversion-gen=kserve/pkg/apis/serving +k8s:defaulter-gen=TypeMeta +groupName=serving.kserve.io
Index ¶
- Constants
- Variables
- func ExactlyOneErrorFor(component Component) error
- func GetIntReference(number int) *int
- func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition
- func GetProtocolVersionPriority(protocols []constants.InferenceServiceProtocol) int
- func Resource(resource string) schema.GroupResource
- type ARTExplainerSpec
- func (in *ARTExplainerSpec) DeepCopy() *ARTExplainerSpec
- func (in *ARTExplainerSpec) DeepCopyInto(out *ARTExplainerSpec)
- func (s *ARTExplainerSpec) Default(config *InferenceServicesConfig)
- func (s *ARTExplainerSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, ...) *v1.Container
- func (s *ARTExplainerSpec) GetProtocol() constants.InferenceServiceProtocol
- func (s *ARTExplainerSpec) GetResourceRequirements() *v1.ResourceRequirements
- func (s *ARTExplainerSpec) IsMMS(config *InferenceServicesConfig) bool
- type ARTExplainerType
- type Batcher
- type Component
- type ComponentExtensionSpec
- type ComponentImplementation
- type ComponentStatusSpec
- type ComponentType
- type CustomExplainer
- func (in *CustomExplainer) DeepCopy() *CustomExplainer
- func (in *CustomExplainer) DeepCopyInto(out *CustomExplainer)
- func (c *CustomExplainer) Default(config *InferenceServicesConfig)
- func (c *CustomExplainer) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, ...) *v1.Container
- func (c *CustomExplainer) GetProtocol() constants.InferenceServiceProtocol
- func (c *CustomExplainer) GetStorageSpec() *StorageSpec
- func (c *CustomExplainer) GetStorageUri() *string
- func (c *CustomExplainer) IsMMS(config *InferenceServicesConfig) bool
- func (s *CustomExplainer) Validate() error
- type CustomPredictor
- func (in *CustomPredictor) DeepCopy() *CustomPredictor
- func (in *CustomPredictor) DeepCopyInto(out *CustomPredictor)
- func (c *CustomPredictor) Default(config *InferenceServicesConfig)
- func (c *CustomPredictor) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, ...) *v1.Container
- func (c *CustomPredictor) GetProtocol() constants.InferenceServiceProtocol
- func (c *CustomPredictor) GetStorageSpec() *StorageSpec
- func (c *CustomPredictor) GetStorageUri() *string
- func (c *CustomPredictor) Validate() error
- type CustomTransformer
- func (in *CustomTransformer) DeepCopy() *CustomTransformer
- func (in *CustomTransformer) DeepCopyInto(out *CustomTransformer)
- func (c *CustomTransformer) Default(config *InferenceServicesConfig)
- func (c *CustomTransformer) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, ...) *v1.Container
- func (c *CustomTransformer) GetProtocol() constants.InferenceServiceProtocol
- func (c *CustomTransformer) GetStorageSpec() *StorageSpec
- func (c *CustomTransformer) GetStorageUri() *string
- func (c *CustomTransformer) IsMMS(config *InferenceServicesConfig) bool
- func (c *CustomTransformer) Validate() error
- type DeployConfig
- type ExplainerConfig
- type ExplainerExtensionSpec
- func (in *ExplainerExtensionSpec) DeepCopy() *ExplainerExtensionSpec
- func (in *ExplainerExtensionSpec) DeepCopyInto(out *ExplainerExtensionSpec)
- func (e *ExplainerExtensionSpec) GetStorageSpec() *StorageSpec
- func (e *ExplainerExtensionSpec) GetStorageUri() *string
- func (e *ExplainerExtensionSpec) Validate() error
- type ExplainerSpec
- func (in *ExplainerSpec) DeepCopy() *ExplainerSpec
- func (in *ExplainerSpec) DeepCopyInto(out *ExplainerSpec)
- func (s *ExplainerSpec) GetExtensions() *ComponentExtensionSpec
- func (s *ExplainerSpec) GetImplementation() ComponentImplementation
- func (s *ExplainerSpec) GetImplementations() []ComponentImplementation
- type ExplainersConfig
- type FailureInfo
- type FailureReason
- type HuggingFaceRuntimeSpec
- func (in *HuggingFaceRuntimeSpec) DeepCopy() *HuggingFaceRuntimeSpec
- func (in *HuggingFaceRuntimeSpec) DeepCopyInto(out *HuggingFaceRuntimeSpec)
- func (o *HuggingFaceRuntimeSpec) Default(config *InferenceServicesConfig)
- func (o *HuggingFaceRuntimeSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, ...) *v1.Container
- func (o *HuggingFaceRuntimeSpec) GetProtocol() constants.InferenceServiceProtocol
- func (o *HuggingFaceRuntimeSpec) Validate() error
- type InferenceService
- func (in *InferenceService) DeepCopy() *InferenceService
- func (in *InferenceService) DeepCopyInto(out *InferenceService)
- func (in *InferenceService) DeepCopyObject() runtime.Object
- func (isvc *InferenceService) Default()
- func (isvc *InferenceService) DefaultInferenceService(config *InferenceServicesConfig, deployConfig *DeployConfig)
- func (*InferenceService) Hub()
- func (isvc *InferenceService) SetMlServerDefaults()
- func (isvc *InferenceService) SetRuntimeDefaults()
- func (isvc *InferenceService) SetTorchServeDefaults()
- func (isvc *InferenceService) SetTritonDefaults()
- func (isvc *InferenceService) ValidateCreate() (admission.Warnings, error)
- func (isvc *InferenceService) ValidateDelete() (admission.Warnings, error)
- func (isvc *InferenceService) ValidateUpdate(old runtime.Object) (admission.Warnings, error)
- type InferenceServiceList
- type InferenceServiceSpec
- type InferenceServiceStatus
- func (ss *InferenceServiceStatus) ClearCondition(conditionType apis.ConditionType)
- func (in *InferenceServiceStatus) DeepCopy() *InferenceServiceStatus
- func (in *InferenceServiceStatus) DeepCopyInto(out *InferenceServiceStatus)
- func (ss *InferenceServiceStatus) GetCondition(t apis.ConditionType) *apis.Condition
- func (ss *InferenceServiceStatus) InitializeConditions()
- func (ss *InferenceServiceStatus) IsConditionFalse(t apis.ConditionType) bool
- func (ss *InferenceServiceStatus) IsConditionReady(t apis.ConditionType) bool
- func (ss *InferenceServiceStatus) IsConditionUnknown(t apis.ConditionType) bool
- func (ss *InferenceServiceStatus) IsReady() bool
- func (ss *InferenceServiceStatus) PropagateCrossComponentStatus(componentList []ComponentType, conditionType apis.ConditionType)
- func (ss *InferenceServiceStatus) PropagateModelStatus(statusSpec ComponentStatusSpec, podList *v1.PodList, rawDeployment bool)
- func (ss *InferenceServiceStatus) PropagateRawStatus(component ComponentType, deployment *appsv1.Deployment, url *apis.URL)
- func (ss *InferenceServiceStatus) PropagateStatus(component ComponentType, serviceStatus *knservingv1.ServiceStatus)
- func (ss *InferenceServiceStatus) SetCondition(conditionType apis.ConditionType, condition *apis.Condition)
- func (ss *InferenceServiceStatus) SetModelFailureInfo(info *FailureInfo) bool
- func (ss *InferenceServiceStatus) UpdateModelRevisionStates(modelState ModelState, totalCopies int, info *FailureInfo)
- func (ss *InferenceServiceStatus) UpdateModelTransitionStatus(status TransitionStatus, info *FailureInfo)
- type InferenceServicesConfig
- type IngressConfig
- type LightGBMSpec
- func (in *LightGBMSpec) DeepCopy() *LightGBMSpec
- func (in *LightGBMSpec) DeepCopyInto(out *LightGBMSpec)
- func (x *LightGBMSpec) Default(config *InferenceServicesConfig)
- func (x *LightGBMSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, ...) *v1.Container
- func (x *LightGBMSpec) GetProtocol() constants.InferenceServiceProtocol
- type LoggerSpec
- type LoggerType
- type ModelCopies
- type ModelFormat
- type ModelRevisionStates
- type ModelSpec
- func (in *ModelSpec) DeepCopy() *ModelSpec
- func (in *ModelSpec) DeepCopyInto(out *ModelSpec)
- func (m *ModelSpec) Default(config *InferenceServicesConfig)
- func (m *ModelSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, ...) *v1.Container
- func (m *ModelSpec) GetProtocol() constants.InferenceServiceProtocol
- func (m *ModelSpec) GetSupportingRuntimes(cl client.Client, namespace string, isMMS bool) ([]v1alpha1.SupportedRuntime, error)
- func (m *ModelSpec) RuntimeSupportsModel(srSpec *v1alpha1.ServingRuntimeSpec) bool
- type ModelState
- type ModelStatus
- type ONNXRuntimeSpec
- func (in *ONNXRuntimeSpec) DeepCopy() *ONNXRuntimeSpec
- func (in *ONNXRuntimeSpec) DeepCopyInto(out *ONNXRuntimeSpec)
- func (o *ONNXRuntimeSpec) Default(config *InferenceServicesConfig)
- func (o *ONNXRuntimeSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, ...) *v1.Container
- func (o *ONNXRuntimeSpec) GetProtocol() constants.InferenceServiceProtocol
- func (o *ONNXRuntimeSpec) Validate() error
- type PMMLSpec
- func (in *PMMLSpec) DeepCopy() *PMMLSpec
- func (in *PMMLSpec) DeepCopyInto(out *PMMLSpec)
- func (p *PMMLSpec) Default(config *InferenceServicesConfig)
- func (p *PMMLSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, ...) *v1.Container
- func (p *PMMLSpec) GetProtocol() constants.InferenceServiceProtocol
- func (p *PMMLSpec) Validate() error
- type PaddleServerSpec
- func (in *PaddleServerSpec) DeepCopy() *PaddleServerSpec
- func (in *PaddleServerSpec) DeepCopyInto(out *PaddleServerSpec)
- func (p *PaddleServerSpec) Default(config *InferenceServicesConfig)
- func (p *PaddleServerSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, ...) *v1.Container
- func (p *PaddleServerSpec) GetProtocol() constants.InferenceServiceProtocol
- type PodSpec
- type PredictorExtensionSpec
- func (in *PredictorExtensionSpec) DeepCopy() *PredictorExtensionSpec
- func (in *PredictorExtensionSpec) DeepCopyInto(out *PredictorExtensionSpec)
- func (p *PredictorExtensionSpec) GetStorageSpec() *StorageSpec
- func (p *PredictorExtensionSpec) GetStorageUri() *string
- func (p *PredictorExtensionSpec) Validate() error
- type PredictorImplementation
- type PredictorSpec
- func (in *PredictorSpec) DeepCopy() *PredictorSpec
- func (in *PredictorSpec) DeepCopyInto(out *PredictorSpec)
- func (s *PredictorSpec) GetExtensions() *ComponentExtensionSpec
- func (s *PredictorSpec) GetImplementation() ComponentImplementation
- func (s *PredictorSpec) GetImplementations() []ComponentImplementation
- func (s *PredictorSpec) GetPredictorImplementation() *ComponentImplementation
- func (s *PredictorSpec) GetPredictorImplementations() []ComponentImplementation
- type SKLearnSpec
- func (in *SKLearnSpec) DeepCopy() *SKLearnSpec
- func (in *SKLearnSpec) DeepCopyInto(out *SKLearnSpec)
- func (k *SKLearnSpec) Default(config *InferenceServicesConfig)
- func (k *SKLearnSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, ...) *v1.Container
- func (k *SKLearnSpec) GetProtocol() constants.InferenceServiceProtocol
- type ScaleMetric
- type StorageSpec
- type TFServingSpec
- func (in *TFServingSpec) DeepCopy() *TFServingSpec
- func (in *TFServingSpec) DeepCopyInto(out *TFServingSpec)
- func (t *TFServingSpec) Default(config *InferenceServicesConfig)
- func (t *TFServingSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, ...) *v1.Container
- func (t *TFServingSpec) GetProtocol() constants.InferenceServiceProtocol
- func (t *TFServingSpec) Validate() error
- type TorchServeSpec
- func (in *TorchServeSpec) DeepCopy() *TorchServeSpec
- func (in *TorchServeSpec) DeepCopyInto(out *TorchServeSpec)
- func (t *TorchServeSpec) Default(config *InferenceServicesConfig)
- func (t *TorchServeSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, ...) *v1.Container
- func (t *TorchServeSpec) GetProtocol() constants.InferenceServiceProtocol
- func (t *TorchServeSpec) Validate() error
- type TransformerSpec
- func (in *TransformerSpec) DeepCopy() *TransformerSpec
- func (in *TransformerSpec) DeepCopyInto(out *TransformerSpec)
- func (s *TransformerSpec) GetExtensions() *ComponentExtensionSpec
- func (s *TransformerSpec) GetImplementation() ComponentImplementation
- func (s *TransformerSpec) GetImplementations() []ComponentImplementation
- type TransitionStatus
- type TritonSpec
- func (in *TritonSpec) DeepCopy() *TritonSpec
- func (in *TritonSpec) DeepCopyInto(out *TritonSpec)
- func (t *TritonSpec) Default(config *InferenceServicesConfig)
- func (t *TritonSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, ...) *v1.Container
- func (t *TritonSpec) GetProtocol() constants.InferenceServiceProtocol
- type XGBoostSpec
- func (in *XGBoostSpec) DeepCopy() *XGBoostSpec
- func (in *XGBoostSpec) DeepCopyInto(out *XGBoostSpec)
- func (x *XGBoostSpec) Default(config *InferenceServicesConfig)
- func (x *XGBoostSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, ...) *v1.Container
- func (x *XGBoostSpec) GetProtocol() constants.InferenceServiceProtocol
Constants ¶
const ( MinReplicasShouldBeLessThanMaxError = "MinReplicas cannot be greater than MaxReplicas." MinReplicasLowerBoundExceededError = "MinReplicas cannot be less than 0." MaxReplicasLowerBoundExceededError = "MaxReplicas cannot be less than 0." ParallelismLowerBoundExceededError = "Parallelism cannot be less than 0." UnsupportedStorageURIFormatError = "" /* 156-byte string literal not displayed */ UnsupportedStorageSpecFormatError = "storage.spec.type, must be one of: [%s]. storage.spec.type [%s] is not supported." InvalidLoggerType = "Invalid logger type" InvalidISVCNameFormatError = "" /* 235-byte string literal not displayed */ InvalidProtocol = "Invalid protocol %s. Must be one of [%s]" )
Known error messages
const ( IngressConfigKeyName = "ingress" DeployConfigName = "deploy" DefaultDomainTemplate = "{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}" DefaultIngressDomain = "example.com" DefaultUrlScheme = "http" )
const ( // PredictorRouteReady is set when network configuration has completed. PredictorRouteReady apis.ConditionType = "PredictorRouteReady" // TransformerRouteReady is set when network configuration has completed. TransformerRouteReady apis.ConditionType = "TransformerRouteReady" // ExplainerRoutesReady is set when network configuration has completed. ExplainerRoutesReady apis.ConditionType = "ExplainerRoutesReady" // PredictorConfigurationReady is set when predictor pods are ready. PredictorConfigurationReady apis.ConditionType = "PredictorConfigurationReady" // TransformerConfigurationReady is set when transformer pods are ready. TransformerConfigurationReady apis.ConditionType = "TransformerConfigurationReady" // ExplainerConfigurationReady is set when explainer pods are ready. ExplainerConfigurationReady apis.ConditionType = "ExplainerConfigurationReady" // PredictorReady is set when predictor has reported readiness. PredictorReady apis.ConditionType = "PredictorReady" // TransformerReady is set when transformer has reported readiness. TransformerReady apis.ConditionType = "TransformerReady" // ExplainerReady is set when explainer has reported readiness. ExplainerReady apis.ConditionType = "ExplainerReady" // IngressReady is set when Ingress is created IngressReady apis.ConditionType = "IngressReady" // RoutesReady is set when underlying routes for all components have reported readiness. RoutesReady apis.ConditionType = "RoutesReady" // LatestDeploymentReady is set when underlying configurations for all components have reported readiness. LatestDeploymentReady apis.ConditionType = "LatestDeploymentReady" )
ConditionType represents a Service condition value
const ( IsvcNameFmt string = "[a-z]([-a-z0-9]*[a-z0-9])?" StorageUriPresentInTransformerError string = "storage uri should not be specified in transformer container" )
regular expressions for validation of isvc name
const ( PyTorchServingGPUSuffix = "-gpu" InvalidPyTorchRuntimeIncludesGPU = "PyTorch RuntimeVersion is not GPU enabled but GPU resources are requested. " InvalidPyTorchRuntimeExcludesGPU = "PyTorch RuntimeVersion is GPU enabled but GPU resources are not requested. " V1ServiceEnvelope = "kserve" V2ServiceEnvelope = "kservev2" )
const (
ExplainerConfigKeyName = "explainers"
)
ConfigMap Keys
Variables ¶
var ( TensorflowEntrypointCommand = "/usr/bin/tensorflow_model_server" TensorflowServingGRPCPort = "9000" TensorflowServingRestPort = "8080" TensorflowServingGPUSuffix = "-gpu" InvalidTensorflowRuntimeVersionError = "Tensorflow RuntimeVersion must be one of %s" InvalidTensorflowRuntimeIncludesGPU = "Tensorflow RuntimeVersion is not GPU enabled but GPU resources are requested. " + InvalidTensorflowRuntimeVersionError InvalidTensorflowRuntimeExcludesGPU = "Tensorflow RuntimeVersion is GPU enabled but GPU resources are not requested. " + InvalidTensorflowRuntimeVersionError )
var ( // APIVersion is the current API version used to register these objects APIVersion = "v1beta1" // SchemeGroupVersion is group version used to register these objects SchemeGroupVersion = schema.GroupVersion{Group: constants.KServeAPIGroupName, Version: APIVersion} // SchemeBuilder is used to add go types to the GroupVersionKind scheme SchemeBuilder = &scheme.Builder{GroupVersion: SchemeGroupVersion} // AddToScheme is required by pkg/client/... AddToScheme = SchemeBuilder.AddToScheme )
var ( // regular expressions for validation of isvc name IsvcRegexp = regexp.MustCompile("^" + IsvcNameFmt + "$") )
var (
ONNXFileExt = ".onnx"
)
var (
SupportedStorageSpecURIPrefixList = []string{"s3://", "hdfs://", "webhdfs://"}
)
Constants
Functions ¶
func ExactlyOneErrorFor ¶
ExactlyOneErrorFor creates an error for the component's one-of semantic.
func GetIntReference ¶
GetIntReference returns the pointer for the integer input
func GetOpenAPIDefinitions ¶
func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition
func GetProtocolVersionPriority ¶ added in v0.9.0
func GetProtocolVersionPriority(protocols []constants.InferenceServiceProtocol) int
func Resource ¶
func Resource(resource string) schema.GroupResource
Resource is required by pkg/client/listers/...
Types ¶
type ARTExplainerSpec ¶
type ARTExplainerSpec struct { // The type of ART explainer Type ARTExplainerType `json:"type"` // Contains fields shared across all explainers ExplainerExtensionSpec `json:",inline"` }
ARTExplainerType defines the arguments for configuring an ART Explanation Server
func (*ARTExplainerSpec) DeepCopy ¶
func (in *ARTExplainerSpec) DeepCopy() *ARTExplainerSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ARTExplainerSpec.
func (*ARTExplainerSpec) DeepCopyInto ¶
func (in *ARTExplainerSpec) DeepCopyInto(out *ARTExplainerSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ARTExplainerSpec) Default ¶
func (s *ARTExplainerSpec) Default(config *InferenceServicesConfig)
func (*ARTExplainerSpec) GetContainer ¶
func (s *ARTExplainerSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
func (*ARTExplainerSpec) GetProtocol ¶
func (s *ARTExplainerSpec) GetProtocol() constants.InferenceServiceProtocol
func (*ARTExplainerSpec) GetResourceRequirements ¶
func (s *ARTExplainerSpec) GetResourceRequirements() *v1.ResourceRequirements
func (*ARTExplainerSpec) IsMMS ¶
func (s *ARTExplainerSpec) IsMMS(config *InferenceServicesConfig) bool
type ARTExplainerType ¶
type ARTExplainerType string
const (
ARTSquareAttackExplainer ARTExplainerType = "SquareAttack"
)
type Batcher ¶
type Batcher struct { // Specifies the max number of requests to trigger a batch // +optional MaxBatchSize *int `json:"maxBatchSize,omitempty"` // Specifies the max latency to trigger a batch // +optional MaxLatency *int `json:"maxLatency,omitempty"` // Specifies the timeout of a batch // +optional Timeout *int `json:"timeout,omitempty"` }
Batcher specifies optional payload batching available for all components
func (*Batcher) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Batcher.
func (*Batcher) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type Component ¶
type Component interface { GetImplementation() ComponentImplementation GetImplementations() []ComponentImplementation GetExtensions() *ComponentExtensionSpec }
Component interface is implemented by all specs that contain component implementations, e.g. PredictorSpec, ExplainerSpec, TransformerSpec. +kubebuilder:object:generate=false
type ComponentExtensionSpec ¶
type ComponentExtensionSpec struct { // Minimum number of replicas, defaults to 1 but can be set to 0 to enable scale-to-zero. // +optional MinReplicas *int `json:"minReplicas,omitempty"` // Maximum number of replicas for autoscaling. // +optional MaxReplicas int `json:"maxReplicas,omitempty"` // ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for. // concurrency and rps targets are supported by Knative Pod Autoscaler // (https://knative.dev/docs/serving/autoscaling/autoscaling-targets/). // +optional ScaleTarget *int `json:"scaleTarget,omitempty"` // ScaleMetric defines the scaling metric type watched by autoscaler // possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via // Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics). // +optional ScaleMetric *ScaleMetric `json:"scaleMetric,omitempty"` // ContainerConcurrency specifies how many requests can be processed concurrently, this sets the hard limit of the container // concurrency(https://knative.dev/docs/serving/autoscaling/concurrency). // +optional ContainerConcurrency *int64 `json:"containerConcurrency,omitempty"` // TimeoutSeconds specifies the number of seconds to wait before timing out a request to the component. // +optional TimeoutSeconds *int64 `json:"timeout,omitempty"` // CanaryTrafficPercent defines the traffic split percentage between the candidate revision and the last ready revision // +optional CanaryTrafficPercent *int64 `json:"canaryTrafficPercent,omitempty"` // Activate request/response logging and logger configurations // +optional Logger *LoggerSpec `json:"logger,omitempty"` // Activate request batching and batching configurations // +optional Batcher *Batcher `json:"batcher,omitempty"` // Labels that will be add to the component pod. // More info: http://kubernetes.io/docs/user-guide/labels // +optional Labels map[string]string `json:"labels,omitempty"` // Annotations that will be add to the component pod. // More info: http://kubernetes.io/docs/user-guide/annotations // +optional Annotations map[string]string `json:"annotations,omitempty"` }
ComponentExtensionSpec defines the deployment configuration for a given InferenceService component
func (*ComponentExtensionSpec) DeepCopy ¶
func (in *ComponentExtensionSpec) DeepCopy() *ComponentExtensionSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComponentExtensionSpec.
func (*ComponentExtensionSpec) DeepCopyInto ¶
func (in *ComponentExtensionSpec) DeepCopyInto(out *ComponentExtensionSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ComponentExtensionSpec) Default ¶
func (s *ComponentExtensionSpec) Default(config *InferenceServicesConfig)
Default the ComponentExtensionSpec
func (*ComponentExtensionSpec) Validate ¶
func (s *ComponentExtensionSpec) Validate() error
Validate the ComponentExtensionSpec
type ComponentImplementation ¶
type ComponentImplementation interface { Default(config *InferenceServicesConfig) Validate() error GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container GetStorageUri() *string GetStorageSpec() *StorageSpec GetProtocol() constants.InferenceServiceProtocol }
ComponentImplementation interface is implemented by predictor, transformer, and explainer implementations +kubebuilder:object:generate=false
func FirstNonNilComponent ¶
func FirstNonNilComponent(objects []ComponentImplementation) ComponentImplementation
FirstNonNilComponent returns the first non nil object or returns nil
func NonNilComponents ¶
func NonNilComponents(objects []ComponentImplementation) (results []ComponentImplementation)
NonNilComponents returns components that are not nil
func NonNilPredictors ¶
func NonNilPredictors(objects []ComponentImplementation) (results []ComponentImplementation)
type ComponentStatusSpec ¶
type ComponentStatusSpec struct { // Latest revision name that is in ready state // +optional LatestReadyRevision string `json:"latestReadyRevision,omitempty"` // Latest revision name that is created // +optional LatestCreatedRevision string `json:"latestCreatedRevision,omitempty"` // Previous revision name that is rolled out with 100 percent traffic // +optional PreviousRolledoutRevision string `json:"previousRolledoutRevision,omitempty"` // Latest revision name that is rolled out with 100 percent traffic // +optional LatestRolledoutRevision string `json:"latestRolledoutRevision,omitempty"` // Traffic holds the configured traffic distribution for latest ready revision and previous rolled out revision. // +optional Traffic []knservingv1.TrafficTarget `json:"traffic,omitempty"` // URL holds the primary url that will distribute traffic over the provided traffic targets. // This will be one the REST or gRPC endpoints that are available. // It generally has the form http[s]://{route-name}.{route-namespace}.{cluster-level-suffix} // +optional URL *apis.URL `json:"url,omitempty"` // REST endpoint of the component if available. // +optional RestURL *apis.URL `json:"restUrl,omitempty"` // gRPC endpoint of the component if available. // +optional GrpcURL *apis.URL `json:"grpcUrl,omitempty"` // Addressable endpoint for the InferenceService // +optional Address *duckv1.Addressable `json:"address,omitempty"` }
ComponentStatusSpec describes the state of the component
func (*ComponentStatusSpec) DeepCopy ¶
func (in *ComponentStatusSpec) DeepCopy() *ComponentStatusSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComponentStatusSpec.
func (*ComponentStatusSpec) DeepCopyInto ¶
func (in *ComponentStatusSpec) DeepCopyInto(out *ComponentStatusSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ComponentType ¶
type ComponentType string
ComponentType contains the different types of components of the service
const ( PredictorComponent ComponentType = "predictor" ExplainerComponent ComponentType = "explainer" TransformerComponent ComponentType = "transformer" )
ComponentType Enum
type CustomExplainer ¶
CustomExplainer defines arguments for configuring a custom explainer.
func NewCustomExplainer ¶
func NewCustomExplainer(podSpec *PodSpec) *CustomExplainer
func (*CustomExplainer) DeepCopy ¶
func (in *CustomExplainer) DeepCopy() *CustomExplainer
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CustomExplainer.
func (*CustomExplainer) DeepCopyInto ¶
func (in *CustomExplainer) DeepCopyInto(out *CustomExplainer)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*CustomExplainer) Default ¶
func (c *CustomExplainer) Default(config *InferenceServicesConfig)
Default sets defaults on the resource
func (*CustomExplainer) GetContainer ¶
func (c *CustomExplainer) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
GetContainer transforms the resource into a container spec
func (*CustomExplainer) GetProtocol ¶
func (c *CustomExplainer) GetProtocol() constants.InferenceServiceProtocol
func (*CustomExplainer) GetStorageSpec ¶ added in v0.9.0
func (c *CustomExplainer) GetStorageSpec() *StorageSpec
func (*CustomExplainer) GetStorageUri ¶
func (c *CustomExplainer) GetStorageUri() *string
func (*CustomExplainer) IsMMS ¶
func (c *CustomExplainer) IsMMS(config *InferenceServicesConfig) bool
type CustomPredictor ¶
CustomPredictor defines arguments for configuring a custom server.
func NewCustomPredictor ¶
func NewCustomPredictor(podSpec *PodSpec) *CustomPredictor
func (*CustomPredictor) DeepCopy ¶
func (in *CustomPredictor) DeepCopy() *CustomPredictor
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CustomPredictor.
func (*CustomPredictor) DeepCopyInto ¶
func (in *CustomPredictor) DeepCopyInto(out *CustomPredictor)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*CustomPredictor) Default ¶
func (c *CustomPredictor) Default(config *InferenceServicesConfig)
Default sets defaults on the resource
func (*CustomPredictor) GetContainer ¶
func (c *CustomPredictor) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
GetContainer transforms the resource into a container spec
func (*CustomPredictor) GetProtocol ¶
func (c *CustomPredictor) GetProtocol() constants.InferenceServiceProtocol
func (*CustomPredictor) GetStorageSpec ¶ added in v0.9.0
func (c *CustomPredictor) GetStorageSpec() *StorageSpec
func (*CustomPredictor) GetStorageUri ¶
func (c *CustomPredictor) GetStorageUri() *string
func (*CustomPredictor) Validate ¶
func (c *CustomPredictor) Validate() error
Validate returns an error if invalid
type CustomTransformer ¶
CustomTransformer defines arguments for configuring a custom transformer.
func NewCustomTransformer ¶
func NewCustomTransformer(podSpec *PodSpec) *CustomTransformer
func (*CustomTransformer) DeepCopy ¶
func (in *CustomTransformer) DeepCopy() *CustomTransformer
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CustomTransformer.
func (*CustomTransformer) DeepCopyInto ¶
func (in *CustomTransformer) DeepCopyInto(out *CustomTransformer)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*CustomTransformer) Default ¶
func (c *CustomTransformer) Default(config *InferenceServicesConfig)
Default sets defaults on the resource
func (*CustomTransformer) GetContainer ¶
func (c *CustomTransformer) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
GetContainer transforms the resource into a container spec
func (*CustomTransformer) GetProtocol ¶
func (c *CustomTransformer) GetProtocol() constants.InferenceServiceProtocol
func (*CustomTransformer) GetStorageSpec ¶ added in v0.9.0
func (c *CustomTransformer) GetStorageSpec() *StorageSpec
func (*CustomTransformer) GetStorageUri ¶
func (c *CustomTransformer) GetStorageUri() *string
func (*CustomTransformer) IsMMS ¶
func (c *CustomTransformer) IsMMS(config *InferenceServicesConfig) bool
func (*CustomTransformer) Validate ¶
func (c *CustomTransformer) Validate() error
Validate returns an error if invalid
type DeployConfig ¶
type DeployConfig struct {
DefaultDeploymentMode string `json:"defaultDeploymentMode,omitempty"`
}
+kubebuilder:object:generate=false
func NewDeployConfig ¶
func NewDeployConfig(clientset kubernetes.Interface) (*DeployConfig, error)
type ExplainerConfig ¶
type ExplainerConfig struct { // explainer docker image name ContainerImage string `json:"image"` // default explainer docker image version DefaultImageVersion string `json:"defaultImageVersion"` }
+kubebuilder:object:generate=false
type ExplainerExtensionSpec ¶
type ExplainerExtensionSpec struct { // The location of a trained explanation model StorageURI string `json:"storageUri,omitempty"` // Defaults to latest Explainer Version RuntimeVersion *string `json:"runtimeVersion,omitempty"` // Inline custom parameter settings for explainer Config map[string]string `json:"config,omitempty"` // Container enables overrides for the predictor. // Each framework will have different defaults that are populated in the underlying container spec. // +optional v1.Container `json:",inline"` // Storage Spec for model location // +optional Storage *StorageSpec `json:"storage,omitempty"` }
ExplainerExtensionSpec defines configuration shared across all explainer frameworks
func (*ExplainerExtensionSpec) DeepCopy ¶
func (in *ExplainerExtensionSpec) DeepCopy() *ExplainerExtensionSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExplainerExtensionSpec.
func (*ExplainerExtensionSpec) DeepCopyInto ¶
func (in *ExplainerExtensionSpec) DeepCopyInto(out *ExplainerExtensionSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ExplainerExtensionSpec) GetStorageSpec ¶ added in v0.9.0
func (e *ExplainerExtensionSpec) GetStorageSpec() *StorageSpec
GetStorageSpec returns the predictor storage spec object
func (*ExplainerExtensionSpec) GetStorageUri ¶ added in v0.9.0
func (e *ExplainerExtensionSpec) GetStorageUri() *string
GetStorageUri returns the predictor storage Uri
func (*ExplainerExtensionSpec) Validate ¶ added in v0.9.0
func (e *ExplainerExtensionSpec) Validate() error
Validate returns an error if invalid
type ExplainerSpec ¶
type ExplainerSpec struct { // Spec for ART explainer ART *ARTExplainerSpec `json:"art,omitempty"` // This spec is dual purpose. // 1) Users may choose to provide a full PodSpec for their custom explainer. // The field PodSpec.Containers is mutually exclusive with other explainers. // 2) Users may choose to provide a Explainer and specify PodSpec // overrides in the PodSpec. They must not provide PodSpec.Containers in this case. PodSpec `json:",inline"` // Component extension defines the deployment configurations for explainer ComponentExtensionSpec `json:",inline"` }
ExplainerSpec defines the container spec for a model explanation server, The following fields follow a "1-of" semantic. Users must specify exactly one spec.
func (*ExplainerSpec) DeepCopy ¶
func (in *ExplainerSpec) DeepCopy() *ExplainerSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExplainerSpec.
func (*ExplainerSpec) DeepCopyInto ¶
func (in *ExplainerSpec) DeepCopyInto(out *ExplainerSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ExplainerSpec) GetExtensions ¶
func (s *ExplainerSpec) GetExtensions() *ComponentExtensionSpec
GetExtensions returns the extensions for the component
func (*ExplainerSpec) GetImplementation ¶
func (s *ExplainerSpec) GetImplementation() ComponentImplementation
GetImplementation returns the implementation for the component
func (*ExplainerSpec) GetImplementations ¶
func (s *ExplainerSpec) GetImplementations() []ComponentImplementation
GetImplementations returns the implementations for the component
type ExplainersConfig ¶
type ExplainersConfig struct {
ARTExplainer ExplainerConfig `json:"art,omitempty"`
}
+kubebuilder:object:generate=false
type FailureInfo ¶ added in v0.9.0
type FailureInfo struct { // Name of component to which the failure relates (usually Pod name) //+optional Location string `json:"location,omitempty"` // High level class of failure //+optional Reason FailureReason `json:"reason,omitempty"` // Detailed error message //+optional Message string `json:"message,omitempty"` // Internal Revision/ID of model, tied to specific Spec contents //+optional ModelRevisionName string `json:"modelRevisionName,omitempty"` // Time failure occurred or was discovered //+optional Time *metav1.Time `json:"time,omitempty"` // Exit status from the last termination of the container //+optional ExitCode int32 `json:"exitCode,omitempty"` }
func (*FailureInfo) DeepCopy ¶ added in v0.9.0
func (in *FailureInfo) DeepCopy() *FailureInfo
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailureInfo.
func (*FailureInfo) DeepCopyInto ¶ added in v0.9.0
func (in *FailureInfo) DeepCopyInto(out *FailureInfo)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type FailureReason ¶ added in v0.9.0
type FailureReason string
FailureReason enum +kubebuilder:validation:Enum=ModelLoadFailed;RuntimeUnhealthy;RuntimeDisabled;NoSupportingRuntime;RuntimeNotRecognized;InvalidPredictorSpec
const ( // The model failed to load within a ServingRuntime container ModelLoadFailed FailureReason = "ModelLoadFailed" // Corresponding ServingRuntime containers failed to start or are unhealthy RuntimeUnhealthy FailureReason = "RuntimeUnhealthy" // The ServingRuntime is disabled RuntimeDisabled FailureReason = "RuntimeDisabled" // There are no ServingRuntime which support the specified model type NoSupportingRuntime FailureReason = "NoSupportingRuntime" // There is no ServingRuntime defined with the specified runtime name RuntimeNotRecognized FailureReason = "RuntimeNotRecognized" // The current Predictor Spec is invalid or unsupported InvalidPredictorSpec FailureReason = "InvalidPredictorSpec" )
FailureReason enum values
type HuggingFaceRuntimeSpec ¶ added in v0.12.0
type HuggingFaceRuntimeSpec struct { // Contains fields shared across all predictors PredictorExtensionSpec `json:",inline"` }
HuggingFaceRuntimeSpec defines arguments for configuring HuggingFace model serving.
func (*HuggingFaceRuntimeSpec) DeepCopy ¶ added in v0.12.0
func (in *HuggingFaceRuntimeSpec) DeepCopy() *HuggingFaceRuntimeSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HuggingFaceRuntimeSpec.
func (*HuggingFaceRuntimeSpec) DeepCopyInto ¶ added in v0.12.0
func (in *HuggingFaceRuntimeSpec) DeepCopyInto(out *HuggingFaceRuntimeSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*HuggingFaceRuntimeSpec) Default ¶ added in v0.12.0
func (o *HuggingFaceRuntimeSpec) Default(config *InferenceServicesConfig)
Default sets defaults on the resource
func (*HuggingFaceRuntimeSpec) GetContainer ¶ added in v0.12.0
func (o *HuggingFaceRuntimeSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
GetContainer transforms the resource into a container spec
func (*HuggingFaceRuntimeSpec) GetProtocol ¶ added in v0.12.0
func (o *HuggingFaceRuntimeSpec) GetProtocol() constants.InferenceServiceProtocol
func (*HuggingFaceRuntimeSpec) Validate ¶ added in v0.12.0
func (o *HuggingFaceRuntimeSpec) Validate() error
Validate returns an error if invalid
type InferenceService ¶
type InferenceService struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec InferenceServiceSpec `json:"spec,omitempty"` // +kubebuilder:pruning:PreserveUnknownFields Status InferenceServiceStatus `json:"status,omitempty"` }
InferenceService is the Schema for the InferenceServices API +k8s:openapi-gen=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +genclient +kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:printcolumn:name="URL",type="string",JSONPath=".status.url" +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status" +kubebuilder:printcolumn:name="Prev",type="integer",JSONPath=".status.components.predictor.traffic[?(@.tag=='prev')].percent" +kubebuilder:printcolumn:name="Latest",type="integer",JSONPath=".status.components.predictor.traffic[?(@.latestRevision==true)].percent" +kubebuilder:printcolumn:name="PrevRolledoutRevision",type="string",JSONPath=".status.components.predictor.traffic[?(@.tag=='prev')].revisionName" +kubebuilder:printcolumn:name="LatestReadyRevision",type="string",JSONPath=".status.components.predictor.traffic[?(@.latestRevision==true)].revisionName" +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" +kubebuilder:resource:path=inferenceservices,shortName=isvc +kubebuilder:storageversion
func (*InferenceService) DeepCopy ¶
func (in *InferenceService) DeepCopy() *InferenceService
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceService.
func (*InferenceService) DeepCopyInto ¶
func (in *InferenceService) DeepCopyInto(out *InferenceService)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*InferenceService) DeepCopyObject ¶
func (in *InferenceService) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (*InferenceService) Default ¶
func (isvc *InferenceService) Default()
func (*InferenceService) DefaultInferenceService ¶
func (isvc *InferenceService) DefaultInferenceService(config *InferenceServicesConfig, deployConfig *DeployConfig)
func (*InferenceService) Hub ¶
func (*InferenceService) Hub()
func (*InferenceService) SetMlServerDefaults ¶ added in v0.9.0
func (isvc *InferenceService) SetMlServerDefaults()
func (*InferenceService) SetRuntimeDefaults ¶ added in v0.9.0
func (isvc *InferenceService) SetRuntimeDefaults()
func (*InferenceService) SetTorchServeDefaults ¶ added in v0.9.0
func (isvc *InferenceService) SetTorchServeDefaults()
func (*InferenceService) SetTritonDefaults ¶ added in v0.9.0
func (isvc *InferenceService) SetTritonDefaults()
func (*InferenceService) ValidateCreate ¶
func (isvc *InferenceService) ValidateCreate() (admission.Warnings, error)
ValidateCreate implements webhook.Validator so a webhook will be registered for the type
func (*InferenceService) ValidateDelete ¶
func (isvc *InferenceService) ValidateDelete() (admission.Warnings, error)
ValidateDelete implements webhook.Validator so a webhook will be registered for the type
func (*InferenceService) ValidateUpdate ¶
ValidateUpdate implements webhook.Validator so a webhook will be registered for the type
type InferenceServiceList ¶
type InferenceServiceList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` // +listType=set Items []InferenceService `json:"items"` }
InferenceServiceList contains a list of Service +k8s:openapi-gen=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +kubebuilder:object:root=true
func (*InferenceServiceList) DeepCopy ¶
func (in *InferenceServiceList) DeepCopy() *InferenceServiceList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceServiceList.
func (*InferenceServiceList) DeepCopyInto ¶
func (in *InferenceServiceList) DeepCopyInto(out *InferenceServiceList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*InferenceServiceList) DeepCopyObject ¶
func (in *InferenceServiceList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type InferenceServiceSpec ¶
type InferenceServiceSpec struct { // Predictor defines the model serving spec // +required Predictor PredictorSpec `json:"predictor"` // Explainer defines the model explanation service spec, // explainer service calls to predictor or transformer if it is specified. // +optional Explainer *ExplainerSpec `json:"explainer,omitempty"` // Transformer defines the pre/post processing before and after the predictor call, // transformer service calls to predictor service. // +optional Transformer *TransformerSpec `json:"transformer,omitempty"` }
InferenceServiceSpec is the top level type for this resource
func (*InferenceServiceSpec) DeepCopy ¶
func (in *InferenceServiceSpec) DeepCopy() *InferenceServiceSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceServiceSpec.
func (*InferenceServiceSpec) DeepCopyInto ¶
func (in *InferenceServiceSpec) DeepCopyInto(out *InferenceServiceSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type InferenceServiceStatus ¶
type InferenceServiceStatus struct { // Conditions for the InferenceService <br/> // - PredictorReady: predictor readiness condition; <br/> // - TransformerReady: transformer readiness condition; <br/> // - ExplainerReady: explainer readiness condition; <br/> // - RoutesReady (serverless mode only): aggregated routing condition, i.e. endpoint readiness condition; <br/> // - LatestDeploymentReady (serverless mode only): aggregated configuration condition, i.e. latest deployment readiness condition; <br/> // - Ready: aggregated condition; <br/> duckv1.Status `json:",inline"` // Addressable endpoint for the InferenceService // +optional Address *duckv1.Addressable `json:"address,omitempty"` // URL holds the url that will distribute traffic over the provided traffic targets. // It generally has the form http[s]://{route-name}.{route-namespace}.{cluster-level-suffix} // +optional URL *apis.URL `json:"url,omitempty"` // Statuses for the components of the InferenceService Components map[ComponentType]ComponentStatusSpec `json:"components,omitempty"` // Model related statuses ModelStatus ModelStatus `json:"modelStatus,omitempty"` }
InferenceServiceStatus defines the observed state of InferenceService
func (*InferenceServiceStatus) ClearCondition ¶ added in v0.8.0
func (ss *InferenceServiceStatus) ClearCondition(conditionType apis.ConditionType)
func (*InferenceServiceStatus) DeepCopy ¶
func (in *InferenceServiceStatus) DeepCopy() *InferenceServiceStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceServiceStatus.
func (*InferenceServiceStatus) DeepCopyInto ¶
func (in *InferenceServiceStatus) DeepCopyInto(out *InferenceServiceStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*InferenceServiceStatus) GetCondition ¶
func (ss *InferenceServiceStatus) GetCondition(t apis.ConditionType) *apis.Condition
GetCondition returns the condition by name.
func (*InferenceServiceStatus) InitializeConditions ¶
func (ss *InferenceServiceStatus) InitializeConditions()
func (*InferenceServiceStatus) IsConditionFalse ¶ added in v0.11.0
func (ss *InferenceServiceStatus) IsConditionFalse(t apis.ConditionType) bool
IsConditionFalse returns if a given condition is False
func (*InferenceServiceStatus) IsConditionReady ¶
func (ss *InferenceServiceStatus) IsConditionReady(t apis.ConditionType) bool
IsConditionReady returns the readiness for a given condition
func (*InferenceServiceStatus) IsConditionUnknown ¶ added in v0.11.0
func (ss *InferenceServiceStatus) IsConditionUnknown(t apis.ConditionType) bool
IsConditionUnknown returns if a given condition is Unknown
func (*InferenceServiceStatus) IsReady ¶
func (ss *InferenceServiceStatus) IsReady() bool
IsReady returns the overall readiness for the inference service.
func (*InferenceServiceStatus) PropagateCrossComponentStatus ¶ added in v0.11.0
func (ss *InferenceServiceStatus) PropagateCrossComponentStatus(componentList []ComponentType, conditionType apis.ConditionType)
PropagateCrossComponentStatus aggregates the RoutesReady or ConfigurationsReady condition across all available components and propagates the RoutesReady or LatestDeploymentReady status accordingly.
func (*InferenceServiceStatus) PropagateModelStatus ¶ added in v0.9.0
func (ss *InferenceServiceStatus) PropagateModelStatus(statusSpec ComponentStatusSpec, podList *v1.PodList, rawDeployment bool)
func (*InferenceServiceStatus) PropagateRawStatus ¶
func (ss *InferenceServiceStatus) PropagateRawStatus( component ComponentType, deployment *appsv1.Deployment, url *apis.URL)
func (*InferenceServiceStatus) PropagateStatus ¶
func (ss *InferenceServiceStatus) PropagateStatus(component ComponentType, serviceStatus *knservingv1.ServiceStatus)
func (*InferenceServiceStatus) SetCondition ¶
func (ss *InferenceServiceStatus) SetCondition(conditionType apis.ConditionType, condition *apis.Condition)
func (*InferenceServiceStatus) SetModelFailureInfo ¶ added in v0.9.0
func (ss *InferenceServiceStatus) SetModelFailureInfo(info *FailureInfo) bool
func (*InferenceServiceStatus) UpdateModelRevisionStates ¶ added in v0.9.0
func (ss *InferenceServiceStatus) UpdateModelRevisionStates(modelState ModelState, totalCopies int, info *FailureInfo)
func (*InferenceServiceStatus) UpdateModelTransitionStatus ¶ added in v0.9.0
func (ss *InferenceServiceStatus) UpdateModelTransitionStatus(status TransitionStatus, info *FailureInfo)
type InferenceServicesConfig ¶
type InferenceServicesConfig struct { // Explainer configurations Explainers ExplainersConfig `json:"explainers"` }
+kubebuilder:object:generate=false
func NewInferenceServicesConfig ¶
func NewInferenceServicesConfig(clientset kubernetes.Interface) (*InferenceServicesConfig, error)
type IngressConfig ¶
type IngressConfig struct { IngressGateway string `json:"ingressGateway,omitempty"` IngressServiceName string `json:"ingressService,omitempty"` LocalGateway string `json:"localGateway,omitempty"` LocalGatewayServiceName string `json:"localGatewayService,omitempty"` IngressDomain string `json:"ingressDomain,omitempty"` IngressClassName *string `json:"ingressClassName,omitempty"` AdditionalIngressDomains *[]string `json:"additionalIngressDomains,omitempty"` DomainTemplate string `json:"domainTemplate,omitempty"` UrlScheme string `json:"urlScheme,omitempty"` DisableIstioVirtualHost bool `json:"disableIstioVirtualHost,omitempty"` PathTemplate string `json:"pathTemplate,omitempty"` DisableIngressCreation bool `json:"disableIngressCreation,omitempty"` }
+kubebuilder:object:generate=false
func NewIngressConfig ¶
func NewIngressConfig(clientset kubernetes.Interface) (*IngressConfig, error)
type LightGBMSpec ¶
type LightGBMSpec struct { // Contains fields shared across all predictors PredictorExtensionSpec `json:",inline"` }
LightGBMSpec defines arguments for configuring LightGBMSpec model serving.
func (*LightGBMSpec) DeepCopy ¶
func (in *LightGBMSpec) DeepCopy() *LightGBMSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LightGBMSpec.
func (*LightGBMSpec) DeepCopyInto ¶
func (in *LightGBMSpec) DeepCopyInto(out *LightGBMSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*LightGBMSpec) Default ¶
func (x *LightGBMSpec) Default(config *InferenceServicesConfig)
Default sets defaults on the resource
func (*LightGBMSpec) GetContainer ¶
func (x *LightGBMSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
func (*LightGBMSpec) GetProtocol ¶
func (x *LightGBMSpec) GetProtocol() constants.InferenceServiceProtocol
type LoggerSpec ¶
type LoggerSpec struct { // URL to send logging events // +optional URL *string `json:"url,omitempty"` // Specifies the scope of the loggers. <br /> // Valid values are: <br /> // - "all" (default): log both request and response; <br /> // - "request": log only request; <br /> // - "response": log only response <br /> // +optional Mode LoggerType `json:"mode,omitempty"` }
LoggerSpec specifies optional payload logging available for all components
func (*LoggerSpec) DeepCopy ¶
func (in *LoggerSpec) DeepCopy() *LoggerSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LoggerSpec.
func (*LoggerSpec) DeepCopyInto ¶
func (in *LoggerSpec) DeepCopyInto(out *LoggerSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type LoggerType ¶
type LoggerType string
LoggerType controls the scope of log publishing +kubebuilder:validation:Enum=all;request;response
const ( // Logger mode to log both request and response LogAll LoggerType = "all" // Logger mode to log only request LogRequest LoggerType = "request" // Logger mode to log only response LogResponse LoggerType = "response" )
LoggerType Enum
type ModelCopies ¶ added in v0.9.0
type ModelCopies struct { // How many copies of this predictor's models failed to load recently // +kubebuilder:default=0 FailedCopies int `json:"failedCopies"` // Total number copies of this predictor's models that are currently loaded // +optional TotalCopies int `json:"totalCopies,omitempty"` }
func (*ModelCopies) DeepCopy ¶ added in v0.9.0
func (in *ModelCopies) DeepCopy() *ModelCopies
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelCopies.
func (*ModelCopies) DeepCopyInto ¶ added in v0.9.0
func (in *ModelCopies) DeepCopyInto(out *ModelCopies)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ModelFormat ¶ added in v0.8.0
type ModelFormat struct { // Name of the model format. // +required Name string `json:"name"` // Version of the model format. // Used in validating that a predictor is supported by a runtime. // Can be "major", "major.minor" or "major.minor.patch". // +optional Version *string `json:"version,omitempty"` }
func (*ModelFormat) DeepCopy ¶ added in v0.8.0
func (in *ModelFormat) DeepCopy() *ModelFormat
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelFormat.
func (*ModelFormat) DeepCopyInto ¶ added in v0.8.0
func (in *ModelFormat) DeepCopyInto(out *ModelFormat)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ModelRevisionStates ¶ added in v0.9.0
type ModelRevisionStates struct { // High level state string: Pending, Standby, Loading, Loaded, FailedToLoad // +kubebuilder:default=Pending ActiveModelState ModelState `json:"activeModelState"` // +kubebuilder:default="" TargetModelState ModelState `json:"targetModelState,omitempty"` }
func (*ModelRevisionStates) DeepCopy ¶ added in v0.9.0
func (in *ModelRevisionStates) DeepCopy() *ModelRevisionStates
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelRevisionStates.
func (*ModelRevisionStates) DeepCopyInto ¶ added in v0.9.0
func (in *ModelRevisionStates) DeepCopyInto(out *ModelRevisionStates)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ModelSpec ¶ added in v0.8.0
type ModelSpec struct { // ModelFormat being served. // +required ModelFormat ModelFormat `json:"modelFormat"` // Specific ClusterServingRuntime/ServingRuntime name to use for deployment. // +optional Runtime *string `json:"runtime,omitempty"` PredictorExtensionSpec `json:",inline"` }
func (*ModelSpec) DeepCopy ¶ added in v0.8.0
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelSpec.
func (*ModelSpec) DeepCopyInto ¶ added in v0.8.0
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ModelSpec) Default ¶ added in v0.8.0
func (m *ModelSpec) Default(config *InferenceServicesConfig)
func (*ModelSpec) GetContainer ¶ added in v0.8.0
func (m *ModelSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
func (*ModelSpec) GetProtocol ¶ added in v0.8.0
func (m *ModelSpec) GetProtocol() constants.InferenceServiceProtocol
func (*ModelSpec) GetSupportingRuntimes ¶ added in v0.8.0
func (m *ModelSpec) GetSupportingRuntimes(cl client.Client, namespace string, isMMS bool) ([]v1alpha1.SupportedRuntime, error)
GetSupportingRuntimes Get a list of ServingRuntimeSpecs that correspond to ServingRuntimes and ClusterServingRuntimes that support the given model. If the `isMMS` argument is true, this function will only return ServingRuntimes that are ModelMesh compatible, otherwise only single-model serving compatible runtimes will be returned.
func (*ModelSpec) RuntimeSupportsModel ¶ added in v0.8.0
func (m *ModelSpec) RuntimeSupportsModel(srSpec *v1alpha1.ServingRuntimeSpec) bool
RuntimeSupportsModel Check if the given runtime supports the specified model.
type ModelState ¶ added in v0.9.0
type ModelState string
ModelState enum +kubebuilder:validation:Enum="";Pending;Standby;Loading;Loaded;FailedToLoad
const ( // Model is not yet registered Pending ModelState = "Pending" // Model is available but not loaded (will load when used) Standby ModelState = "Standby" // Model is loading Loading ModelState = "Loading" // At least one copy of the model is loaded Loaded ModelState = "Loaded" // All copies of the model failed to load FailedToLoad ModelState = "FailedToLoad" )
ModelState Enum values
type ModelStatus ¶ added in v0.9.0
type ModelStatus struct { // Whether the available predictor endpoints reflect the current Spec or is in transition // +kubebuilder:default=UpToDate TransitionStatus TransitionStatus `json:"transitionStatus"` // State information of the predictor's model. // +optional ModelRevisionStates *ModelRevisionStates `json:"states,omitempty"` // Details of last failure, when load of target model is failed or blocked. // +optional LastFailureInfo *FailureInfo `json:"lastFailureInfo,omitempty"` // Model copy information of the predictor's model. // +optional ModelCopies *ModelCopies `json:"copies,omitempty"` }
func (*ModelStatus) DeepCopy ¶ added in v0.9.0
func (in *ModelStatus) DeepCopy() *ModelStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelStatus.
func (*ModelStatus) DeepCopyInto ¶ added in v0.9.0
func (in *ModelStatus) DeepCopyInto(out *ModelStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ONNXRuntimeSpec ¶
type ONNXRuntimeSpec struct { // Contains fields shared across all predictors PredictorExtensionSpec `json:",inline"` }
ONNXRuntimeSpec defines arguments for configuring ONNX model serving.
func (*ONNXRuntimeSpec) DeepCopy ¶
func (in *ONNXRuntimeSpec) DeepCopy() *ONNXRuntimeSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ONNXRuntimeSpec.
func (*ONNXRuntimeSpec) DeepCopyInto ¶
func (in *ONNXRuntimeSpec) DeepCopyInto(out *ONNXRuntimeSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ONNXRuntimeSpec) Default ¶
func (o *ONNXRuntimeSpec) Default(config *InferenceServicesConfig)
Default sets defaults on the resource
func (*ONNXRuntimeSpec) GetContainer ¶
func (o *ONNXRuntimeSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
GetContainers transforms the resource into a container spec
func (*ONNXRuntimeSpec) GetProtocol ¶
func (o *ONNXRuntimeSpec) GetProtocol() constants.InferenceServiceProtocol
func (*ONNXRuntimeSpec) Validate ¶
func (o *ONNXRuntimeSpec) Validate() error
Validate returns an error if invalid
type PMMLSpec ¶
type PMMLSpec struct { // Contains fields shared across all predictors PredictorExtensionSpec `json:",inline"` }
PMMLSpec defines arguments for configuring PMML model serving.
func (*PMMLSpec) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PMMLSpec.
func (*PMMLSpec) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*PMMLSpec) Default ¶
func (p *PMMLSpec) Default(config *InferenceServicesConfig)
Default sets defaults on the resource
func (*PMMLSpec) GetContainer ¶
func (p *PMMLSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
func (*PMMLSpec) GetProtocol ¶
func (p *PMMLSpec) GetProtocol() constants.InferenceServiceProtocol
type PaddleServerSpec ¶
type PaddleServerSpec struct {
PredictorExtensionSpec `json:",inline"`
}
func (*PaddleServerSpec) DeepCopy ¶
func (in *PaddleServerSpec) DeepCopy() *PaddleServerSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PaddleServerSpec.
func (*PaddleServerSpec) DeepCopyInto ¶
func (in *PaddleServerSpec) DeepCopyInto(out *PaddleServerSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*PaddleServerSpec) Default ¶
func (p *PaddleServerSpec) Default(config *InferenceServicesConfig)
func (*PaddleServerSpec) GetContainer ¶
func (p *PaddleServerSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
func (*PaddleServerSpec) GetProtocol ¶
func (p *PaddleServerSpec) GetProtocol() constants.InferenceServiceProtocol
type PodSpec ¶
type PodSpec struct { // List of volumes that can be mounted by containers belonging to the pod. // More info: https://kubernetes.io/docs/concepts/storage/volumes // +optional // +patchMergeKey=name // +patchStrategy=merge,retainKeys Volumes []v1.Volume `json:"volumes,omitempty" patchStrategy:"merge,retainKeys" patchMergeKey:"name" protobuf:"bytes,1,rep,name=volumes"` // List of initialization containers belonging to the pod. // Init containers are executed in order prior to containers being started. If any // init container fails, the pod is considered to have failed and is handled according // to its restartPolicy. The name for an init container or normal container must be // unique among all containers. // Init containers may not have Lifecycle actions, Readiness probes, Liveness probes, or Startup probes. // The resourceRequirements of an init container are taken into account during scheduling // by finding the highest request/limit for each resource type, and then using the max of // of that value or the sum of the normal containers. Limits are applied to init containers // in a similar fashion. // Init containers cannot currently be added or removed. // Cannot be updated. // More info: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/ // +patchMergeKey=name // +patchStrategy=merge InitContainers []v1.Container `json:"initContainers,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,20,rep,name=initContainers"` // List of containers belonging to the pod. // Containers cannot currently be added or removed. // There must be at least one container in a Pod. // Cannot be updated. // +patchMergeKey=name // +patchStrategy=merge Containers []v1.Container `json:"containers,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,2,rep,name=containers"` // List of ephemeral containers run in this pod. Ephemeral containers may be run in an existing // pod to perform user-initiated actions such as debugging. This list cannot be specified when // creating a pod, and it cannot be modified by updating the pod spec. In order to add an // ephemeral container to an existing pod, use the pod's ephemeralcontainers subresource. // This field is beta-level and available on clusters that haven't disabled the EphemeralContainers feature gate. // +optional // +patchMergeKey=name // +patchStrategy=merge EphemeralContainers []v1.EphemeralContainer `` /* 128-byte string literal not displayed */ // Restart policy for all containers within the pod. // One of Always, OnFailure, Never. // Default to Always. // More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy // +optional RestartPolicy v1.RestartPolicy `json:"restartPolicy,omitempty" protobuf:"bytes,3,opt,name=restartPolicy,casttype=RestartPolicy"` // Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request. // Value must be non-negative integer. The value zero indicates stop immediately via // the kill signal (no opportunity to shut down). // If this value is nil, the default grace period will be used instead. // The grace period is the duration in seconds after the processes running in the pod are sent // a termination signal and the time when the processes are forcibly halted with a kill signal. // Set this value longer than the expected cleanup time for your process. // Defaults to 30 seconds. // +optional TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" protobuf:"varint,4,opt,name=terminationGracePeriodSeconds"` // Optional duration in seconds the pod may be active on the node relative to // StartTime before the system will actively try to mark it failed and kill associated containers. // Value must be a positive integer. // +optional ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty" protobuf:"varint,5,opt,name=activeDeadlineSeconds"` // Set DNS policy for the pod. // Defaults to "ClusterFirst". // Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'. // DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy. // To have DNS options set along with hostNetwork, you have to specify DNS policy // explicitly to 'ClusterFirstWithHostNet'. // +optional DNSPolicy v1.DNSPolicy `json:"dnsPolicy,omitempty" protobuf:"bytes,6,opt,name=dnsPolicy,casttype=DNSPolicy"` // NodeSelector is a selector which must be true for the pod to fit on a node. // Selector which must match a node's labels for the pod to be scheduled on that node. // More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ // +optional // +mapType=atomic NodeSelector map[string]string `json:"nodeSelector,omitempty" protobuf:"bytes,7,rep,name=nodeSelector"` // ServiceAccountName is the name of the ServiceAccount to use to run this pod. // More info: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ // +optional ServiceAccountName string `json:"serviceAccountName,omitempty" protobuf:"bytes,8,opt,name=serviceAccountName"` // DeprecatedServiceAccount is a depreciated alias for ServiceAccountName. // Deprecated: Use serviceAccountName instead. // +k8s:conversion-gen=false // +optional DeprecatedServiceAccount string `json:"serviceAccount,omitempty" protobuf:"bytes,9,opt,name=serviceAccount"` // AutomountServiceAccountToken indicates whether a service account token should be automatically mounted. // +optional AutomountServiceAccountToken *bool `json:"automountServiceAccountToken,omitempty" protobuf:"varint,21,opt,name=automountServiceAccountToken"` // NodeName is a request to schedule this pod onto a specific node. If it is non-empty, // the scheduler simply schedules this pod onto that node, assuming that it fits resource // requirements. // +optional NodeName string `json:"nodeName,omitempty" protobuf:"bytes,10,opt,name=nodeName"` // Host networking requested for this pod. Use the host's network namespace. // If this option is set, the ports that will be used must be specified. // Default to false. // +k8s:conversion-gen=false // +optional HostNetwork bool `json:"hostNetwork,omitempty" protobuf:"varint,11,opt,name=hostNetwork"` // Use the host's pid namespace. // Optional: Default to false. // +k8s:conversion-gen=false // +optional HostPID bool `json:"hostPID,omitempty" protobuf:"varint,12,opt,name=hostPID"` // Use the host's ipc namespace. // Optional: Default to false. // +k8s:conversion-gen=false // +optional HostIPC bool `json:"hostIPC,omitempty" protobuf:"varint,13,opt,name=hostIPC"` // When this is set containers will be able to view and signal processes from other containers // in the same pod, and the first process in each container will not be assigned PID 1. // HostPID and ShareProcessNamespace cannot both be set. // Optional: Default to false. // +k8s:conversion-gen=false // +optional ShareProcessNamespace *bool `json:"shareProcessNamespace,omitempty" protobuf:"varint,27,opt,name=shareProcessNamespace"` // SecurityContext holds pod-level security attributes and common container settings. // Optional: Defaults to empty. See type description for default values of each field. // +optional SecurityContext *v1.PodSecurityContext `json:"securityContext,omitempty" protobuf:"bytes,14,opt,name=securityContext"` // ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec. // If specified, these secrets will be passed to individual puller implementations for them to use. For example, // in the case of docker, only DockerConfig type secrets are honored. // More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod // +optional // +patchMergeKey=name // +patchStrategy=merge ImagePullSecrets []v1.LocalObjectReference `json:"imagePullSecrets,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,15,rep,name=imagePullSecrets"` // Specifies the hostname of the Pod // If not specified, the pod's hostname will be set to a system-defined value. // +optional Hostname string `json:"hostname,omitempty" protobuf:"bytes,16,opt,name=hostname"` // If specified, the fully qualified Pod hostname will be "<hostname>.<subdomain>.<pod namespace>.svc.<cluster domain>". // If not specified, the pod will not have a domainname at all. // +optional Subdomain string `json:"subdomain,omitempty" protobuf:"bytes,17,opt,name=subdomain"` // If specified, the pod's scheduling constraints // +optional Affinity *v1.Affinity `json:"affinity,omitempty" protobuf:"bytes,18,opt,name=affinity"` // If specified, the pod will be dispatched by specified scheduler. // If not specified, the pod will be dispatched by default scheduler. // +optional SchedulerName string `json:"schedulerName,omitempty" protobuf:"bytes,19,opt,name=schedulerName"` // If specified, the pod's tolerations. // +optional Tolerations []v1.Toleration `json:"tolerations,omitempty" protobuf:"bytes,22,opt,name=tolerations"` // HostAliases is an optional list of hosts and IPs that will be injected into the pod's hosts // file if specified. This is only valid for non-hostNetwork pods. // +optional // +patchMergeKey=ip // +patchStrategy=merge HostAliases []v1.HostAlias `json:"hostAliases,omitempty" patchStrategy:"merge" patchMergeKey:"ip" protobuf:"bytes,23,rep,name=hostAliases"` // If specified, indicates the pod's priority. "system-node-critical" and // "system-cluster-critical" are two special keywords which indicate the // highest priorities with the former being the highest priority. Any other // name must be defined by creating a PriorityClass object with that name. // If not specified, the pod priority will be default or zero if there is no // default. // +optional PriorityClassName string `json:"priorityClassName,omitempty" protobuf:"bytes,24,opt,name=priorityClassName"` // The priority value. Various system components use this field to find the // priority of the pod. When Priority Admission Controller is enabled, it // prevents users from setting this field. The admission controller populates // this field from PriorityClassName. // The higher the value, the higher the priority. // +optional Priority *int32 `json:"priority,omitempty" protobuf:"bytes,25,opt,name=priority"` // Specifies the DNS parameters of a pod. // Parameters specified here will be merged to the generated DNS // configuration based on DNSPolicy. // +optional DNSConfig *v1.PodDNSConfig `json:"dnsConfig,omitempty" protobuf:"bytes,26,opt,name=dnsConfig"` // If specified, all readiness gates will be evaluated for pod readiness. // A pod is ready when all its containers are ready AND // all conditions specified in the readiness gates have status equal to "True" // More info: https://git.k8s.io/enhancements/keps/sig-network/580-pod-readiness-gates // +optional ReadinessGates []v1.PodReadinessGate `json:"readinessGates,omitempty" protobuf:"bytes,28,opt,name=readinessGates"` // RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used // to run this pod. If no RuntimeClass resource matches the named class, the pod will not be run. // If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an // empty definition that uses the default runtime handler. // More info: https://git.k8s.io/enhancements/keps/sig-node/585-runtime-class // This is a beta feature as of Kubernetes v1.14. // +optional RuntimeClassName *string `json:"runtimeClassName,omitempty" protobuf:"bytes,29,opt,name=runtimeClassName"` // EnableServiceLinks indicates whether information about services should be injected into pod's // environment variables, matching the syntax of Docker links. // Optional: Defaults to true. // +optional EnableServiceLinks *bool `json:"enableServiceLinks,omitempty" protobuf:"varint,30,opt,name=enableServiceLinks"` // PreemptionPolicy is the Policy for preempting pods with lower priority. // One of Never, PreemptLowerPriority. // Defaults to PreemptLowerPriority if unset. // This field is beta-level, gated by the NonPreemptingPriority feature-gate. // +optional PreemptionPolicy *v1.PreemptionPolicy `json:"preemptionPolicy,omitempty" protobuf:"bytes,31,opt,name=preemptionPolicy"` // Overhead represents the resource overhead associated with running a pod for a given RuntimeClass. // This field will be autopopulated at admission time by the RuntimeClass admission controller. If // the RuntimeClass admission controller is enabled, overhead must not be set in Pod create requests. // The RuntimeClass admission controller will reject Pod create requests which have the overhead already // set. If RuntimeClass is configured and selected in the PodSpec, Overhead will be set to the value // defined in the corresponding RuntimeClass, otherwise it will remain unset and treated as zero. // More info: https://git.k8s.io/enhancements/keps/sig-node/688-pod-overhead/README.md // This field is beta-level as of Kubernetes v1.18, and is only honored by servers that enable the PodOverhead feature. // +optional Overhead v1.ResourceList `json:"overhead,omitempty" protobuf:"bytes,32,opt,name=overhead"` // TopologySpreadConstraints describes how a group of pods ought to spread across topology // domains. Scheduler will schedule pods in a way which abides by the constraints. // All topologySpreadConstraints are ANDed. // +optional // +patchMergeKey=topologyKey // +patchStrategy=merge // +listType=map // +listMapKey=topologyKey // +listMapKey=whenUnsatisfiable TopologySpreadConstraints []v1.TopologySpreadConstraint `` /* 147-byte string literal not displayed */ // If true the pod's hostname will be configured as the pod's FQDN, rather than the leaf name (the default). // In Linux containers, this means setting the FQDN in the hostname field of the kernel (the nodename field of struct utsname). // In Windows containers, this means setting the registry value of hostname for the registry key HKEY_LOCAL_MACHINE\\SYSTEM\\CurrentControlSet\\Services\\Tcpip\\Parameters to FQDN. // If a pod does not have FQDN, this has no effect. // Default to false. // +optional SetHostnameAsFQDN *bool `json:"setHostnameAsFQDN,omitempty" protobuf:"varint,35,opt,name=setHostnameAsFQDN"` // Specifies the OS of the containers in the pod. // Some pod and container fields are restricted if this is set. // // If the OS field is set to linux, the following fields must be unset: // -securityContext.windowsOptions // // If the OS field is set to windows, following fields must be unset: // - spec.hostPID // - spec.hostIPC // - spec.securityContext.seLinuxOptions // - spec.securityContext.seccompProfile // - spec.securityContext.fsGroup // - spec.securityContext.fsGroupChangePolicy // - spec.securityContext.sysctls // - spec.shareProcessNamespace // - spec.securityContext.runAsUser // - spec.securityContext.runAsGroup // - spec.securityContext.supplementalGroups // - spec.containers[*].securityContext.seLinuxOptions // - spec.containers[*].securityContext.seccompProfile // - spec.containers[*].securityContext.capabilities // - spec.containers[*].securityContext.readOnlyRootFilesystem // - spec.containers[*].securityContext.privileged // - spec.containers[*].securityContext.allowPrivilegeEscalation // - spec.containers[*].securityContext.procMount // - spec.containers[*].securityContext.runAsUser // - spec.containers[*].securityContext.runAsGroup // +optional // This is an alpha field and requires the IdentifyPodOS feature OS *v1.PodOS `json:"os,omitempty" protobuf:"bytes,36,opt,name=os"` // Use the host's user namespace. // Optional: Default to true. // If set to true or not present, the pod will be run in the host user namespace, useful // for when the pod needs a feature only available to the host user namespace, such as // loading a kernel module with CAP_SYS_MODULE. // When set to false, a new userns is created for the pod. Setting false is useful for // mitigating container breakout vulnerabilities even allowing users to run their // containers as root without actually having root privileges on the host. // This field is alpha-level and is only honored by servers that enable the UserNamespacesSupport feature. // +k8s:conversion-gen=false // +optional HostUsers *bool `json:"hostUsers,omitempty" protobuf:"bytes,37,opt,name=hostUsers"` // SchedulingGates is an opaque list of values that if specified will block scheduling the pod. // If schedulingGates is not empty, the pod will stay in the SchedulingGated state and the // scheduler will not attempt to schedule the pod. // // SchedulingGates can only be set at pod creation time, and be removed only afterwards. // // This is a beta feature enabled by the PodSchedulingReadiness feature gate. // // +patchMergeKey=name // +patchStrategy=merge // +listType=map // +listMapKey=name // +featureGate=PodSchedulingReadiness // +optional SchedulingGates []v1.PodSchedulingGate `json:"schedulingGates,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,38,opt,name=schedulingGates"` // ResourceClaims defines which ResourceClaims must be allocated // and reserved before the Pod is allowed to start. The resources // will be made available to those containers which consume them // by name. // // This is an alpha field and requires enabling the // DynamicResourceAllocation feature gate. // // This field is immutable. // // +patchMergeKey=name // +patchStrategy=merge,retainKeys // +listType=map // +listMapKey=name // +featureGate=DynamicResourceAllocation // +optional ResourceClaims []v1.PodResourceClaim `` /* 129-byte string literal not displayed */ }
PodSpec is a description of a pod.
func (*PodSpec) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodSpec.
func (*PodSpec) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type PredictorExtensionSpec ¶
type PredictorExtensionSpec struct { // This field points to the location of the trained model which is mounted onto the pod. // +optional StorageURI *string `json:"storageUri,omitempty"` // Runtime version of the predictor docker image // +optional RuntimeVersion *string `json:"runtimeVersion,omitempty"` // Protocol version to use by the predictor (i.e. v1 or v2 or grpc-v1 or grpc-v2) // +optional ProtocolVersion *constants.InferenceServiceProtocol `json:"protocolVersion,omitempty"` // Container enables overrides for the predictor. // Each framework will have different defaults that are populated in the underlying container spec. // +optional v1.Container `json:",inline"` // Storage Spec for model location // +optional Storage *StorageSpec `json:"storage,omitempty"` }
PredictorExtensionSpec defines configuration shared across all predictor frameworks
func (*PredictorExtensionSpec) DeepCopy ¶
func (in *PredictorExtensionSpec) DeepCopy() *PredictorExtensionSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredictorExtensionSpec.
func (*PredictorExtensionSpec) DeepCopyInto ¶
func (in *PredictorExtensionSpec) DeepCopyInto(out *PredictorExtensionSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*PredictorExtensionSpec) GetStorageSpec ¶ added in v0.9.0
func (p *PredictorExtensionSpec) GetStorageSpec() *StorageSpec
GetStorageSpec returns the predictor storage spec object
func (*PredictorExtensionSpec) GetStorageUri ¶ added in v0.9.0
func (p *PredictorExtensionSpec) GetStorageUri() *string
GetStorageUri returns the predictor storage Uri
func (*PredictorExtensionSpec) Validate ¶ added in v0.9.0
func (p *PredictorExtensionSpec) Validate() error
Validate returns an error if invalid
type PredictorImplementation ¶
type PredictorImplementation interface { }
PredictorImplementation defines common functions for all predictors e.g Tensorflow, Triton, etc +kubebuilder:object:generate=false
type PredictorSpec ¶
type PredictorSpec struct { // Spec for SKLearn model server SKLearn *SKLearnSpec `json:"sklearn,omitempty"` // Spec for XGBoost model server XGBoost *XGBoostSpec `json:"xgboost,omitempty"` // Spec for TFServing (https://github.com/tensorflow/serving) Tensorflow *TFServingSpec `json:"tensorflow,omitempty"` // Spec for TorchServe (https://pytorch.org/serve) PyTorch *TorchServeSpec `json:"pytorch,omitempty"` // Spec for Triton Inference Server (https://github.com/triton-inference-server/server) Triton *TritonSpec `json:"triton,omitempty"` // Spec for ONNX runtime (https://github.com/microsoft/onnxruntime) ONNX *ONNXRuntimeSpec `json:"onnx,omitempty"` // Spec for HuggingFace runtime (https://github.com/huggingface) HuggingFace *HuggingFaceRuntimeSpec `json:"huggingface,omitempty"` // Spec for PMML (http://dmg.org/pmml/v4-1/GeneralStructure.html) PMML *PMMLSpec `json:"pmml,omitempty"` // Spec for LightGBM model server LightGBM *LightGBMSpec `json:"lightgbm,omitempty"` // Spec for Paddle model server (https://github.com/PaddlePaddle/Serving) Paddle *PaddleServerSpec `json:"paddle,omitempty"` // Model spec for any arbitrary framework. Model *ModelSpec `json:"model,omitempty"` // This spec is dual purpose. <br /> // 1) Provide a full PodSpec for custom predictor. // The field PodSpec.Containers is mutually exclusive with other predictors (i.e. TFServing). <br /> // 2) Provide a predictor (i.e. TFServing) and specify PodSpec // overrides, you must not provide PodSpec.Containers in this case. <br /> PodSpec `json:",inline"` // Component extension defines the deployment configurations for a predictor ComponentExtensionSpec `json:",inline"` }
PredictorSpec defines the configuration for a predictor, The following fields follow a "1-of" semantic. Users must specify exactly one spec.
func (*PredictorSpec) DeepCopy ¶
func (in *PredictorSpec) DeepCopy() *PredictorSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredictorSpec.
func (*PredictorSpec) DeepCopyInto ¶
func (in *PredictorSpec) DeepCopyInto(out *PredictorSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*PredictorSpec) GetExtensions ¶
func (s *PredictorSpec) GetExtensions() *ComponentExtensionSpec
GetExtensions returns the extensions for the component
func (*PredictorSpec) GetImplementation ¶
func (s *PredictorSpec) GetImplementation() ComponentImplementation
GetImplementation returns the implementation for the component
func (*PredictorSpec) GetImplementations ¶
func (s *PredictorSpec) GetImplementations() []ComponentImplementation
GetImplementations returns the implementations for the component
func (*PredictorSpec) GetPredictorImplementation ¶
func (s *PredictorSpec) GetPredictorImplementation() *ComponentImplementation
func (*PredictorSpec) GetPredictorImplementations ¶
func (s *PredictorSpec) GetPredictorImplementations() []ComponentImplementation
GetPredictorImplementations GetPredictor returns the implementation for the predictor
type SKLearnSpec ¶
type SKLearnSpec struct { // Contains fields shared across all predictors PredictorExtensionSpec `json:",inline"` }
SKLearnSpec defines arguments for configuring SKLearn model serving.
func (*SKLearnSpec) DeepCopy ¶
func (in *SKLearnSpec) DeepCopy() *SKLearnSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SKLearnSpec.
func (*SKLearnSpec) DeepCopyInto ¶
func (in *SKLearnSpec) DeepCopyInto(out *SKLearnSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*SKLearnSpec) Default ¶
func (k *SKLearnSpec) Default(config *InferenceServicesConfig)
Default sets defaults on the resource
func (*SKLearnSpec) GetContainer ¶
func (k *SKLearnSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
func (*SKLearnSpec) GetProtocol ¶
func (k *SKLearnSpec) GetProtocol() constants.InferenceServiceProtocol
type ScaleMetric ¶ added in v0.9.0
type ScaleMetric string
ScaleMetric enum +kubebuilder:validation:Enum=cpu;memory;concurrency;rps
const ( MetricCPU ScaleMetric = "cpu" MetricMemory ScaleMetric = "memory" MetricConcurrency ScaleMetric = "concurrency" MetricRPS ScaleMetric = "rps" )
type StorageSpec ¶ added in v0.9.0
type StorageSpec struct { // The path to the model object in the storage. It cannot co-exist // with the storageURI. // +optional Path *string `json:"path,omitempty"` // The path to the model schema file in the storage. // +optional SchemaPath *string `json:"schemaPath,omitempty"` // Parameters to override the default storage credentials and config. // +optional Parameters *map[string]string `json:"parameters,omitempty"` // The Storage Key in the secret for this model. // +optional StorageKey *string `json:"key,omitempty"` }
func (*StorageSpec) DeepCopy ¶ added in v0.9.0
func (in *StorageSpec) DeepCopy() *StorageSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StorageSpec.
func (*StorageSpec) DeepCopyInto ¶ added in v0.9.0
func (in *StorageSpec) DeepCopyInto(out *StorageSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type TFServingSpec ¶
type TFServingSpec struct { // Contains fields shared across all predictors PredictorExtensionSpec `json:",inline"` }
TFServingSpec defines arguments for configuring Tensorflow model serving.
func (*TFServingSpec) DeepCopy ¶
func (in *TFServingSpec) DeepCopy() *TFServingSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TFServingSpec.
func (*TFServingSpec) DeepCopyInto ¶
func (in *TFServingSpec) DeepCopyInto(out *TFServingSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TFServingSpec) Default ¶
func (t *TFServingSpec) Default(config *InferenceServicesConfig)
Default sets defaults on the resource
func (*TFServingSpec) GetContainer ¶
func (t *TFServingSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
func (*TFServingSpec) GetProtocol ¶
func (t *TFServingSpec) GetProtocol() constants.InferenceServiceProtocol
func (*TFServingSpec) Validate ¶
func (t *TFServingSpec) Validate() error
Validate returns an error if invalid
type TorchServeSpec ¶
type TorchServeSpec struct { // Contains fields shared across all predictors PredictorExtensionSpec `json:",inline"` }
TorchServeSpec defines arguments for configuring PyTorch model serving.
func (*TorchServeSpec) DeepCopy ¶
func (in *TorchServeSpec) DeepCopy() *TorchServeSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TorchServeSpec.
func (*TorchServeSpec) DeepCopyInto ¶
func (in *TorchServeSpec) DeepCopyInto(out *TorchServeSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TorchServeSpec) Default ¶
func (t *TorchServeSpec) Default(config *InferenceServicesConfig)
Default sets defaults on the resource
func (*TorchServeSpec) GetContainer ¶
func (t *TorchServeSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
func (*TorchServeSpec) GetProtocol ¶
func (t *TorchServeSpec) GetProtocol() constants.InferenceServiceProtocol
func (*TorchServeSpec) Validate ¶
func (t *TorchServeSpec) Validate() error
Validate returns an error if invalid
type TransformerSpec ¶
type TransformerSpec struct { // This spec is dual purpose. <br /> // 1) Provide a full PodSpec for custom transformer. // The field PodSpec.Containers is mutually exclusive with other transformers. <br /> // 2) Provide a transformer and specify PodSpec // overrides, you must not provide PodSpec.Containers in this case. <br /> PodSpec `json:",inline"` // Component extension defines the deployment configurations for a transformer ComponentExtensionSpec `json:",inline"` }
TransformerSpec defines transformer service for pre/post processing
func (*TransformerSpec) DeepCopy ¶
func (in *TransformerSpec) DeepCopy() *TransformerSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TransformerSpec.
func (*TransformerSpec) DeepCopyInto ¶
func (in *TransformerSpec) DeepCopyInto(out *TransformerSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TransformerSpec) GetExtensions ¶
func (s *TransformerSpec) GetExtensions() *ComponentExtensionSpec
GetExtensions returns the extensions for the component
func (*TransformerSpec) GetImplementation ¶
func (s *TransformerSpec) GetImplementation() ComponentImplementation
GetImplementation returns the implementation for the component
func (*TransformerSpec) GetImplementations ¶
func (s *TransformerSpec) GetImplementations() []ComponentImplementation
GetImplementations returns the implementations for the component
type TransitionStatus ¶ added in v0.9.0
type TransitionStatus string
TransitionStatus enum +kubebuilder:validation:Enum="";UpToDate;InProgress;BlockedByFailedLoad;InvalidSpec
const ( // Predictor is up-to-date (reflects current spec) UpToDate TransitionStatus = "UpToDate" // Waiting for target model to reach state of active model InProgress TransitionStatus = "InProgress" // Target model failed to load BlockedByFailedLoad TransitionStatus = "BlockedByFailedLoad" // Target predictor spec failed validation InvalidSpec TransitionStatus = "InvalidSpec" )
TransitionStatus Enum values
type TritonSpec ¶
type TritonSpec struct { // Contains fields shared across all predictors PredictorExtensionSpec `json:",inline"` }
TritonSpec defines arguments for configuring Triton model serving.
func (*TritonSpec) DeepCopy ¶
func (in *TritonSpec) DeepCopy() *TritonSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TritonSpec.
func (*TritonSpec) DeepCopyInto ¶
func (in *TritonSpec) DeepCopyInto(out *TritonSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TritonSpec) Default ¶
func (t *TritonSpec) Default(config *InferenceServicesConfig)
Default sets defaults on the resource
func (*TritonSpec) GetContainer ¶
func (t *TritonSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
func (*TritonSpec) GetProtocol ¶
func (t *TritonSpec) GetProtocol() constants.InferenceServiceProtocol
type XGBoostSpec ¶
type XGBoostSpec struct { // Contains fields shared across all predictors PredictorExtensionSpec `json:",inline"` }
XGBoostSpec defines arguments for configuring XGBoost model serving.
func (*XGBoostSpec) DeepCopy ¶
func (in *XGBoostSpec) DeepCopy() *XGBoostSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XGBoostSpec.
func (*XGBoostSpec) DeepCopyInto ¶
func (in *XGBoostSpec) DeepCopyInto(out *XGBoostSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*XGBoostSpec) Default ¶
func (x *XGBoostSpec) Default(config *InferenceServicesConfig)
Default sets defaults on the resource
func (*XGBoostSpec) GetContainer ¶
func (x *XGBoostSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
func (*XGBoostSpec) GetProtocol ¶
func (x *XGBoostSpec) GetProtocol() constants.InferenceServiceProtocol
Source Files ¶
- component.go
- configmap.go
- doc.go
- explainer.go
- explainer_art.go
- explainer_custom.go
- inference_service.go
- inference_service_conversion.go
- inference_service_defaults.go
- inference_service_status.go
- inference_service_validation.go
- openapi_generated.go
- podspec.go
- predictor.go
- predictor_custom.go
- predictor_huggingfaceruntime.go
- predictor_lightgbm.go
- predictor_model.go
- predictor_onnxruntime.go
- predictor_paddle.go
- predictor_pmml.go
- predictor_sklearn.go
- predictor_tfserving.go
- predictor_torchserve.go
- predictor_triton.go
- predictor_xgboost.go
- transformer.go
- transformer_custom.go
- v1beta1.go
- zz_generated.deepcopy.go