v1beta1

package
v0.11.0-rc1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 10, 2023 License: Apache-2.0 Imports: 32 Imported by: 21

Documentation

Overview

Package v1beta1 contains API Schema definitions for the serving v1beta1 API group +k8s:openapi-gen=true +k8s:deepcopy-gen=package,register +k8s:conversion-gen=kserve/pkg/apis/serving +k8s:defaulter-gen=TypeMeta +groupName=serving.kserve.io

Package v1beta1 contains API Schema definitions for the serving v1beta1 API group +k8s:openapi-gen=true +k8s:deepcopy-gen=package,register +k8s:conversion-gen=kserve/pkg/apis/serving +k8s:defaulter-gen=TypeMeta +groupName=serving.kserve.io

Index

Constants

View Source
const (
	MinReplicasShouldBeLessThanMaxError = "MinReplicas cannot be greater than MaxReplicas."
	MinReplicasLowerBoundExceededError  = "MinReplicas cannot be less than 0."
	MaxReplicasLowerBoundExceededError  = "MaxReplicas cannot be less than 0."
	ParallelismLowerBoundExceededError  = "Parallelism cannot be less than 0."
	UnsupportedStorageURIFormatError    = "" /* 156-byte string literal not displayed */
	UnsupportedStorageSpecFormatError   = "storage.spec.type, must be one of: [%s]. storage.spec.type [%s] is not supported."
	InvalidLoggerType                   = "Invalid logger type"
	InvalidISVCNameFormatError          = "" /* 235-byte string literal not displayed */
	MaxWorkersShouldBeLessThanMaxError  = "Workers cannot be greater than %d"
	InvalidWorkerArgument               = "Invalid workers argument"
	InvalidProtocol                     = "Invalid protocol %s. Must be one of [%s]"
)

Known error messages

View Source
const (
	IngressConfigKeyName = "ingress"
	DeployConfigName     = "deploy"

	DefaultDomainTemplate = "{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}"
	DefaultIngressDomain  = "example.com"

	DefaultUrlScheme = "http"
)
View Source
const (
	// PredictorRouteReady  is set when network configuration has completed.
	PredictorRouteReady apis.ConditionType = "PredictorRouteReady"
	// TransformerRouteReady is set when network configuration has completed.
	TransformerRouteReady apis.ConditionType = "TransformerRouteReady"
	// ExplainerRoutesReady is set when network configuration has completed.
	ExplainerRoutesReady apis.ConditionType = "ExplainerRoutesReady"
	// PredictorConfigurationReady is set when predictor pods are ready.
	PredictorConfigurationReady apis.ConditionType = "PredictorConfigurationReady"
	// TransformerConfigurationReady is set when transformer pods are ready.
	TransformerConfigurationReady apis.ConditionType = "TransformerConfigurationReady"
	// ExplainerConfigurationReady is set when explainer pods are ready.
	ExplainerConfigurationReady apis.ConditionType = "ExplainerConfigurationReady"
	// PredictorReady is set when predictor has reported readiness.
	PredictorReady apis.ConditionType = "PredictorReady"
	// TransformerReady is set when transformer has reported readiness.
	TransformerReady apis.ConditionType = "TransformerReady"
	// ExplainerReady is set when explainer has reported readiness.
	ExplainerReady apis.ConditionType = "ExplainerReady"
	// IngressReady is set when Ingress is created
	IngressReady apis.ConditionType = "IngressReady"
	// RoutesReady is set when underlying routes for all components have reported readiness.
	RoutesReady apis.ConditionType = "RoutesReady"
	// LatestDeploymentReady is set when underlying configurations for all components have reported readiness.
	LatestDeploymentReady apis.ConditionType = "LatestDeploymentReady"
)

ConditionType represents a Service condition value

View Source
const (
	PyTorchServingGPUSuffix          = "-gpu"
	InvalidPyTorchRuntimeIncludesGPU = "PyTorch RuntimeVersion is not GPU enabled but GPU resources are requested. "
	InvalidPyTorchRuntimeExcludesGPU = "PyTorch RuntimeVersion is GPU enabled but GPU resources are not requested. "
	V1ServiceEnvelope                = "kserve"
	V2ServiceEnvelope                = "kservev2"
)
View Source
const (
	ExplainerConfigKeyName = "explainers"
)

ConfigMap Keys

View Source
const (
	IsvcNameFmt string = "[a-z]([-a-z0-9]*[a-z0-9])?"
)

regular expressions for validation of isvc name

Variables

View Source
var (
	SupportedStorageURIPrefixList     = []string{"gs://", "s3://", "pvc://", "file://", "https://", "http://", "hdfs://", "webhdfs://"}
	SupportedStorageSpecURIPrefixList = []string{"s3://", "hdfs://", "webhdfs://"}
	AzureBlobURL                      = "blob.core.windows.net"
	AzureBlobURIRegEx                 = "https://(.+?).blob.core.windows.net/(.+)"
)

Constants

View Source
var (
	TensorflowEntrypointCommand          = "/usr/bin/tensorflow_model_server"
	TensorflowServingGRPCPort            = "9000"
	TensorflowServingRestPort            = "8080"
	TensorflowServingGPUSuffix           = "-gpu"
	InvalidTensorflowRuntimeVersionError = "Tensorflow RuntimeVersion must be one of %s"
	InvalidTensorflowRuntimeIncludesGPU  = "Tensorflow RuntimeVersion is not GPU enabled but GPU resources are requested. " + InvalidTensorflowRuntimeVersionError
	InvalidTensorflowRuntimeExcludesGPU  = "Tensorflow RuntimeVersion is GPU enabled but GPU resources are not requested. " + InvalidTensorflowRuntimeVersionError
)
View Source
var (
	// APIVersion is the current API version used to register these objects
	APIVersion = "v1beta1"

	// SchemeGroupVersion is group version used to register these objects
	SchemeGroupVersion = schema.GroupVersion{Group: constants.KServeAPIGroupName, Version: APIVersion}

	// SchemeBuilder is used to add go types to the GroupVersionKind scheme
	SchemeBuilder = &scheme.Builder{GroupVersion: SchemeGroupVersion}

	// AddToScheme is required by pkg/client/...
	AddToScheme = SchemeBuilder.AddToScheme
)
View Source
var (

	// regular expressions for validation of isvc name
	IsvcRegexp = regexp.MustCompile("^" + IsvcNameFmt + "$")
)
View Source
var (
	ONNXFileExt = ".onnx"
)

Functions

func ExactlyOneErrorFor

func ExactlyOneErrorFor(component Component) error

ExactlyOneErrorFor creates an error for the component's one-of semantic.

func GetIntReference

func GetIntReference(number int) *int

GetIntReference returns the pointer for the integer input

func GetProtocolVersionPriority added in v0.9.0

func GetProtocolVersionPriority(protocols []constants.InferenceServiceProtocol) int

func Resource

func Resource(resource string) schema.GroupResource

Resource is required by pkg/client/listers/...

func ValidateMaxArgumentWorkers

func ValidateMaxArgumentWorkers(slice []string, maxWorkers int64) error

ValidateMaxArgumentWorkers will to validate illegal workers count.

Types

type ARTExplainerSpec

type ARTExplainerSpec struct {
	// The type of ART explainer
	Type ARTExplainerType `json:"type"`
	// Contains fields shared across all explainers
	ExplainerExtensionSpec `json:",inline"`
}

ARTExplainerType defines the arguments for configuring an ART Explanation Server

func (*ARTExplainerSpec) DeepCopy

func (in *ARTExplainerSpec) DeepCopy() *ARTExplainerSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ARTExplainerSpec.

func (*ARTExplainerSpec) DeepCopyInto

func (in *ARTExplainerSpec) DeepCopyInto(out *ARTExplainerSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*ARTExplainerSpec) Default

func (s *ARTExplainerSpec) Default(config *InferenceServicesConfig)

func (*ARTExplainerSpec) GetContainer

func (s *ARTExplainerSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig,
	predictorHost ...string) *v1.Container

func (*ARTExplainerSpec) GetProtocol

func (*ARTExplainerSpec) GetResourceRequirements

func (s *ARTExplainerSpec) GetResourceRequirements() *v1.ResourceRequirements

func (*ARTExplainerSpec) IsMMS

func (s *ARTExplainerSpec) IsMMS(config *InferenceServicesConfig) bool

type ARTExplainerType

type ARTExplainerType string
const (
	ARTSquareAttackExplainer ARTExplainerType = "SquareAttack"
)

type AlibiExplainerSpec

type AlibiExplainerSpec struct {
	// The type of Alibi explainer <br />
	// Valid values are: <br />
	// - "AnchorTabular"; <br />
	// - "AnchorImages"; <br />
	// - "AnchorText"; <br />
	// - "Counterfactuals"; <br />
	// - "Contrastive"; <br />
	Type AlibiExplainerType `json:"type"`
	// Contains fields shared across all explainers
	ExplainerExtensionSpec `json:",inline"`
}

AlibiExplainerSpec defines the arguments for configuring an Alibi Explanation Server

func (*AlibiExplainerSpec) DeepCopy

func (in *AlibiExplainerSpec) DeepCopy() *AlibiExplainerSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AlibiExplainerSpec.

func (*AlibiExplainerSpec) DeepCopyInto

func (in *AlibiExplainerSpec) DeepCopyInto(out *AlibiExplainerSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*AlibiExplainerSpec) Default

func (s *AlibiExplainerSpec) Default(config *InferenceServicesConfig)

func (*AlibiExplainerSpec) GetContainer

func (s *AlibiExplainerSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig,
	predictorHost ...string) *v1.Container

func (*AlibiExplainerSpec) GetProtocol

func (*AlibiExplainerSpec) GetResourceRequirements

func (s *AlibiExplainerSpec) GetResourceRequirements() *v1.ResourceRequirements

func (*AlibiExplainerSpec) IsMMS

type AlibiExplainerType

type AlibiExplainerType string

AlibiExplainerType is the explanation method

const (
	AlibiAnchorsTabularExplainer  AlibiExplainerType = "AnchorTabular"
	AlibiAnchorsImageExplainer    AlibiExplainerType = "AnchorImages"
	AlibiAnchorsTextExplainer     AlibiExplainerType = "AnchorText"
	AlibiCounterfactualsExplainer AlibiExplainerType = "Counterfactuals"
	AlibiContrastiveExplainer     AlibiExplainerType = "Contrastive"
)

AlibiExplainerType Enum

type Batcher

type Batcher struct {
	// Specifies the max number of requests to trigger a batch
	// +optional
	MaxBatchSize *int `json:"maxBatchSize,omitempty"`
	// Specifies the max latency to trigger a batch
	// +optional
	MaxLatency *int `json:"maxLatency,omitempty"`
	// Specifies the timeout of a batch
	// +optional
	Timeout *int `json:"timeout,omitempty"`
}

Batcher specifies optional payload batching available for all components

func (*Batcher) DeepCopy

func (in *Batcher) DeepCopy() *Batcher

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Batcher.

func (*Batcher) DeepCopyInto

func (in *Batcher) DeepCopyInto(out *Batcher)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type Component

type Component interface {
	GetImplementation() ComponentImplementation
	GetImplementations() []ComponentImplementation
	GetExtensions() *ComponentExtensionSpec
}

Component interface is implemented by all specs that contain component implementations, e.g. PredictorSpec, ExplainerSpec, TransformerSpec. +kubebuilder:object:generate=false

type ComponentExtensionSpec

type ComponentExtensionSpec struct {
	// Minimum number of replicas, defaults to 1 but can be set to 0 to enable scale-to-zero.
	// +optional
	MinReplicas *int `json:"minReplicas,omitempty"`
	// Maximum number of replicas for autoscaling.
	// +optional
	MaxReplicas int `json:"maxReplicas,omitempty"`
	// ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for.
	// concurrency and rps targets are supported by Knative Pod Autoscaler
	//(https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).
	// +optional
	ScaleTarget *int `json:"scaleTarget,omitempty"`
	// ScaleMetric defines the scaling metric type watched by autoscaler
	// possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via
	// Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics).
	// +optional
	ScaleMetric *ScaleMetric `json:"scaleMetric,omitempty"`
	// ContainerConcurrency specifies how many requests can be processed concurrently, this sets the hard limit of the container
	// concurrency(https://knative.dev/docs/serving/autoscaling/concurrency).
	// +optional
	ContainerConcurrency *int64 `json:"containerConcurrency,omitempty"`
	// TimeoutSeconds specifies the number of seconds to wait before timing out a request to the component.
	// +optional
	TimeoutSeconds *int64 `json:"timeout,omitempty"`
	// CanaryTrafficPercent defines the traffic split percentage between the candidate revision and the last ready revision
	// +optional
	CanaryTrafficPercent *int64 `json:"canaryTrafficPercent,omitempty"`
	// Activate request/response logging and logger configurations
	// +optional
	Logger *LoggerSpec `json:"logger,omitempty"`
	// Activate request batching and batching configurations
	// +optional
	Batcher *Batcher `json:"batcher,omitempty"`
	// Labels that will be add to the component pod.
	// More info: http://kubernetes.io/docs/user-guide/labels
	// +optional
	Labels map[string]string `json:"labels,omitempty"`
	// Annotations that will be add to the component pod.
	// More info: http://kubernetes.io/docs/user-guide/annotations
	// +optional
	Annotations map[string]string `json:"annotations,omitempty"`
}

ComponentExtensionSpec defines the deployment configuration for a given InferenceService component

func (*ComponentExtensionSpec) DeepCopy

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComponentExtensionSpec.

func (*ComponentExtensionSpec) DeepCopyInto

func (in *ComponentExtensionSpec) DeepCopyInto(out *ComponentExtensionSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*ComponentExtensionSpec) Default

func (s *ComponentExtensionSpec) Default(config *InferenceServicesConfig)

Default the ComponentExtensionSpec

func (*ComponentExtensionSpec) Validate

func (s *ComponentExtensionSpec) Validate() error

Validate the ComponentExtensionSpec

type ComponentImplementation

type ComponentImplementation interface {
	Default(config *InferenceServicesConfig)
	Validate() error
	GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container
	GetStorageUri() *string
	GetStorageSpec() *StorageSpec
	GetProtocol() constants.InferenceServiceProtocol
}

ComponentImplementation interface is implemented by predictor, transformer, and explainer implementations +kubebuilder:object:generate=false

func FirstNonNilComponent

func FirstNonNilComponent(objects []ComponentImplementation) ComponentImplementation

FirstNonNilComponent returns the first non nil object or returns nil

func NonNilComponents

func NonNilComponents(objects []ComponentImplementation) (results []ComponentImplementation)

NonNilComponents returns components that are not nil

func NonNilPredictors

func NonNilPredictors(objects []ComponentImplementation) (results []ComponentImplementation)

type ComponentStatusSpec

type ComponentStatusSpec struct {
	// Latest revision name that is in ready state
	// +optional
	LatestReadyRevision string `json:"latestReadyRevision,omitempty"`
	// Latest revision name that is created
	// +optional
	LatestCreatedRevision string `json:"latestCreatedRevision,omitempty"`
	// Previous revision name that is rolled out with 100 percent traffic
	// +optional
	PreviousRolledoutRevision string `json:"previousRolledoutRevision,omitempty"`
	// Latest revision name that is rolled out with 100 percent traffic
	// +optional
	LatestRolledoutRevision string `json:"latestRolledoutRevision,omitempty"`
	// Traffic holds the configured traffic distribution for latest ready revision and previous rolled out revision.
	// +optional
	Traffic []knservingv1.TrafficTarget `json:"traffic,omitempty"`
	// URL holds the primary url that will distribute traffic over the provided traffic targets.
	// This will be one the REST or gRPC endpoints that are available.
	// It generally has the form http[s]://{route-name}.{route-namespace}.{cluster-level-suffix}
	// +optional
	URL *apis.URL `json:"url,omitempty"`
	// REST endpoint of the component if available.
	// +optional
	RestURL *apis.URL `json:"restUrl,omitempty"`
	// gRPC endpoint of the component if available.
	// +optional
	GrpcURL *apis.URL `json:"grpcUrl,omitempty"`
	// Addressable endpoint for the InferenceService
	// +optional
	Address *duckv1.Addressable `json:"address,omitempty"`
}

ComponentStatusSpec describes the state of the component

func (*ComponentStatusSpec) DeepCopy

func (in *ComponentStatusSpec) DeepCopy() *ComponentStatusSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComponentStatusSpec.

func (*ComponentStatusSpec) DeepCopyInto

func (in *ComponentStatusSpec) DeepCopyInto(out *ComponentStatusSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type ComponentType

type ComponentType string

ComponentType contains the different types of components of the service

const (
	PredictorComponent   ComponentType = "predictor"
	ExplainerComponent   ComponentType = "explainer"
	TransformerComponent ComponentType = "transformer"
)

ComponentType Enum

type CustomExplainer

type CustomExplainer struct {
	v1.PodSpec `json:",inline"`
}

CustomExplainer defines arguments for configuring a custom explainer.

func NewCustomExplainer

func NewCustomExplainer(podSpec *PodSpec) *CustomExplainer

func (*CustomExplainer) DeepCopy

func (in *CustomExplainer) DeepCopy() *CustomExplainer

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CustomExplainer.

func (*CustomExplainer) DeepCopyInto

func (in *CustomExplainer) DeepCopyInto(out *CustomExplainer)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*CustomExplainer) Default

func (c *CustomExplainer) Default(config *InferenceServicesConfig)

Default sets defaults on the resource

func (*CustomExplainer) GetContainer

func (c *CustomExplainer) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig,
	predictorHost ...string) *v1.Container

GetContainer transforms the resource into a container spec

func (*CustomExplainer) GetProtocol

func (*CustomExplainer) GetStorageSpec added in v0.9.0

func (c *CustomExplainer) GetStorageSpec() *StorageSpec

func (*CustomExplainer) GetStorageUri

func (c *CustomExplainer) GetStorageUri() *string

func (*CustomExplainer) IsMMS

func (c *CustomExplainer) IsMMS(config *InferenceServicesConfig) bool

func (*CustomExplainer) Validate

func (s *CustomExplainer) Validate() error

Validate the spec

type CustomPredictor

type CustomPredictor struct {
	v1.PodSpec `json:",inline"`
}

CustomPredictor defines arguments for configuring a custom server.

func NewCustomPredictor

func NewCustomPredictor(podSpec *PodSpec) *CustomPredictor

func (*CustomPredictor) DeepCopy

func (in *CustomPredictor) DeepCopy() *CustomPredictor

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CustomPredictor.

func (*CustomPredictor) DeepCopyInto

func (in *CustomPredictor) DeepCopyInto(out *CustomPredictor)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*CustomPredictor) Default

func (c *CustomPredictor) Default(config *InferenceServicesConfig)

Default sets defaults on the resource

func (*CustomPredictor) GetContainer

func (c *CustomPredictor) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig,
	predictorHost ...string) *v1.Container

GetContainer transforms the resource into a container spec

func (*CustomPredictor) GetProtocol

func (*CustomPredictor) GetStorageSpec added in v0.9.0

func (c *CustomPredictor) GetStorageSpec() *StorageSpec

func (*CustomPredictor) GetStorageUri

func (c *CustomPredictor) GetStorageUri() *string

func (*CustomPredictor) Validate

func (c *CustomPredictor) Validate() error

Validate returns an error if invalid

type CustomTransformer

type CustomTransformer struct {
	v1.PodSpec `json:",inline"`
}

CustomTransformer defines arguments for configuring a custom transformer.

func NewCustomTransformer

func NewCustomTransformer(podSpec *PodSpec) *CustomTransformer

func (*CustomTransformer) DeepCopy

func (in *CustomTransformer) DeepCopy() *CustomTransformer

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CustomTransformer.

func (*CustomTransformer) DeepCopyInto

func (in *CustomTransformer) DeepCopyInto(out *CustomTransformer)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*CustomTransformer) Default

func (c *CustomTransformer) Default(config *InferenceServicesConfig)

Default sets defaults on the resource

func (*CustomTransformer) GetContainer

func (c *CustomTransformer) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig,
	predictorHost ...string) *v1.Container

GetContainer transforms the resource into a container spec

func (*CustomTransformer) GetProtocol

func (*CustomTransformer) GetStorageSpec added in v0.9.0

func (c *CustomTransformer) GetStorageSpec() *StorageSpec

func (*CustomTransformer) GetStorageUri

func (c *CustomTransformer) GetStorageUri() *string

func (*CustomTransformer) IsMMS

func (*CustomTransformer) Validate

func (c *CustomTransformer) Validate() error

Validate returns an error if invalid

type DeployConfig

type DeployConfig struct {
	DefaultDeploymentMode string `json:"defaultDeploymentMode,omitempty"`
}

+kubebuilder:object:generate=false

func NewDeployConfig

func NewDeployConfig(cli client.Client) (*DeployConfig, error)

func (*DeployConfig) DeepCopy added in v0.10.0

func (in *DeployConfig) DeepCopy() *DeployConfig

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeployConfig.

func (*DeployConfig) DeepCopyInto added in v0.10.0

func (in *DeployConfig) DeepCopyInto(out *DeployConfig)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type ExplainerConfig

type ExplainerConfig struct {
	// explainer docker image name
	ContainerImage string `json:"image"`
	// default explainer docker image version
	DefaultImageVersion string `json:"defaultImageVersion"`
}

+kubebuilder:object:generate=false

func (*ExplainerConfig) DeepCopy added in v0.10.0

func (in *ExplainerConfig) DeepCopy() *ExplainerConfig

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExplainerConfig.

func (*ExplainerConfig) DeepCopyInto added in v0.10.0

func (in *ExplainerConfig) DeepCopyInto(out *ExplainerConfig)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type ExplainerExtensionSpec

type ExplainerExtensionSpec struct {
	// The location of a trained explanation model
	StorageURI string `json:"storageUri,omitempty"`
	// Defaults to latest Explainer Version
	RuntimeVersion *string `json:"runtimeVersion,omitempty"`
	// Inline custom parameter settings for explainer
	Config map[string]string `json:"config,omitempty"`
	// Container enables overrides for the predictor.
	// Each framework will have different defaults that are populated in the underlying container spec.
	// +optional
	v1.Container `json:",inline"`
	// Storage Spec for model location
	// +optional
	Storage *StorageSpec `json:"storage,omitempty"`
}

ExplainerExtensionSpec defines configuration shared across all explainer frameworks

func (*ExplainerExtensionSpec) DeepCopy

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExplainerExtensionSpec.

func (*ExplainerExtensionSpec) DeepCopyInto

func (in *ExplainerExtensionSpec) DeepCopyInto(out *ExplainerExtensionSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*ExplainerExtensionSpec) GetStorageSpec added in v0.9.0

func (e *ExplainerExtensionSpec) GetStorageSpec() *StorageSpec

GetStorageSpec returns the predictor storage spec object

func (*ExplainerExtensionSpec) GetStorageUri added in v0.9.0

func (e *ExplainerExtensionSpec) GetStorageUri() *string

GetStorageUri returns the predictor storage Uri

func (*ExplainerExtensionSpec) Validate added in v0.9.0

func (e *ExplainerExtensionSpec) Validate() error

Validate returns an error if invalid

type ExplainerSpec

type ExplainerSpec struct {
	// Spec for alibi explainer
	Alibi *AlibiExplainerSpec `json:"alibi,omitempty"`
	// Spec for ART explainer
	ART *ARTExplainerSpec `json:"art,omitempty"`
	// This spec is dual purpose.
	// 1) Users may choose to provide a full PodSpec for their custom explainer.
	// The field PodSpec.Containers is mutually exclusive with other explainers (i.e. Alibi).
	// 2) Users may choose to provide a Explainer (i.e. Alibi) and specify PodSpec
	// overrides in the PodSpec. They must not provide PodSpec.Containers in this case.
	PodSpec `json:",inline"`
	// Component extension defines the deployment configurations for explainer
	ComponentExtensionSpec `json:",inline"`
}

ExplainerSpec defines the container spec for a model explanation server, The following fields follow a "1-of" semantic. Users must specify exactly one spec.

func (*ExplainerSpec) DeepCopy

func (in *ExplainerSpec) DeepCopy() *ExplainerSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExplainerSpec.

func (*ExplainerSpec) DeepCopyInto

func (in *ExplainerSpec) DeepCopyInto(out *ExplainerSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*ExplainerSpec) GetExtensions

func (s *ExplainerSpec) GetExtensions() *ComponentExtensionSpec

GetExtensions returns the extensions for the component

func (*ExplainerSpec) GetImplementation

func (s *ExplainerSpec) GetImplementation() ComponentImplementation

GetImplementation returns the implementation for the component

func (*ExplainerSpec) GetImplementations

func (s *ExplainerSpec) GetImplementations() []ComponentImplementation

GetImplementations returns the implementations for the component

type ExplainersConfig

type ExplainersConfig struct {
	AlibiExplainer ExplainerConfig `json:"alibi,omitempty"`
	ARTExplainer   ExplainerConfig `json:"art,omitempty"`
}

+kubebuilder:object:generate=false

func (*ExplainersConfig) DeepCopy added in v0.10.0

func (in *ExplainersConfig) DeepCopy() *ExplainersConfig

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExplainersConfig.

func (*ExplainersConfig) DeepCopyInto added in v0.10.0

func (in *ExplainersConfig) DeepCopyInto(out *ExplainersConfig)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type FailureInfo added in v0.9.0

type FailureInfo struct {
	// Name of component to which the failure relates (usually Pod name)
	//+optional
	Location string `json:"location,omitempty"`
	// High level class of failure
	//+optional
	Reason FailureReason `json:"reason,omitempty"`
	// Detailed error message
	//+optional
	Message string `json:"message,omitempty"`
	// Internal Revision/ID of model, tied to specific Spec contents
	//+optional
	ModelRevisionName string `json:"modelRevisionName,omitempty"`
	// Time failure occurred or was discovered
	//+optional
	Time *metav1.Time `json:"time,omitempty"`
	// Exit status from the last termination of the container
	//+optional
	ExitCode int32 `json:"exitCode,omitempty"`
}

func (*FailureInfo) DeepCopy added in v0.9.0

func (in *FailureInfo) DeepCopy() *FailureInfo

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailureInfo.

func (*FailureInfo) DeepCopyInto added in v0.9.0

func (in *FailureInfo) DeepCopyInto(out *FailureInfo)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type FailureReason added in v0.9.0

type FailureReason string

FailureReason enum +kubebuilder:validation:Enum=ModelLoadFailed;RuntimeUnhealthy;RuntimeDisabled;NoSupportingRuntime;RuntimeNotRecognized;InvalidPredictorSpec

const (
	// The model failed to load within a ServingRuntime container
	ModelLoadFailed FailureReason = "ModelLoadFailed"
	// Corresponding ServingRuntime containers failed to start or are unhealthy
	RuntimeUnhealthy FailureReason = "RuntimeUnhealthy"
	// The ServingRuntime is disabled
	RuntimeDisabled FailureReason = "RuntimeDisabled"
	// There are no ServingRuntime which support the specified model type
	NoSupportingRuntime FailureReason = "NoSupportingRuntime"
	// There is no ServingRuntime defined with the specified runtime name
	RuntimeNotRecognized FailureReason = "RuntimeNotRecognized"
	// The current Predictor Spec is invalid or unsupported
	InvalidPredictorSpec FailureReason = "InvalidPredictorSpec"
)

FailureReason enum values

type InferenceService

type InferenceService struct {
	metav1.TypeMeta   `json:",inline"`
	metav1.ObjectMeta `json:"metadata,omitempty"`

	Spec InferenceServiceSpec `json:"spec,omitempty"`

	// +kubebuilder:pruning:PreserveUnknownFields
	Status InferenceServiceStatus `json:"status,omitempty"`
}

InferenceService is the Schema for the InferenceServices API +k8s:openapi-gen=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +genclient +kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:printcolumn:name="URL",type="string",JSONPath=".status.url" +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status" +kubebuilder:printcolumn:name="Prev",type="integer",JSONPath=".status.components.predictor.traffic[?(@.tag=='prev')].percent" +kubebuilder:printcolumn:name="Latest",type="integer",JSONPath=".status.components.predictor.traffic[?(@.latestRevision==true)].percent" +kubebuilder:printcolumn:name="PrevRolledoutRevision",type="string",JSONPath=".status.components.predictor.traffic[?(@.tag=='prev')].revisionName" +kubebuilder:printcolumn:name="LatestReadyRevision",type="string",JSONPath=".status.components.predictor.traffic[?(@.latestRevision==true)].revisionName" +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" +kubebuilder:resource:path=inferenceservices,shortName=isvc +kubebuilder:storageversion

func (*InferenceService) DeepCopy

func (in *InferenceService) DeepCopy() *InferenceService

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceService.

func (*InferenceService) DeepCopyInto

func (in *InferenceService) DeepCopyInto(out *InferenceService)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*InferenceService) DeepCopyObject

func (in *InferenceService) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

func (*InferenceService) Default

func (isvc *InferenceService) Default()

func (*InferenceService) DefaultInferenceService

func (isvc *InferenceService) DefaultInferenceService(config *InferenceServicesConfig, deployConfig *DeployConfig)

func (*InferenceService) Hub

func (*InferenceService) Hub()

func (*InferenceService) SetMlServerDefaults added in v0.9.0

func (isvc *InferenceService) SetMlServerDefaults()

func (*InferenceService) SetRuntimeDefaults added in v0.9.0

func (isvc *InferenceService) SetRuntimeDefaults()

func (*InferenceService) SetTorchServeDefaults added in v0.9.0

func (isvc *InferenceService) SetTorchServeDefaults()

func (*InferenceService) SetTritonDefaults added in v0.9.0

func (isvc *InferenceService) SetTritonDefaults()

func (*InferenceService) ValidateCreate

func (isvc *InferenceService) ValidateCreate() error

ValidateCreate implements webhook.Validator so a webhook will be registered for the type

func (*InferenceService) ValidateDelete

func (isvc *InferenceService) ValidateDelete() error

ValidateDelete implements webhook.Validator so a webhook will be registered for the type

func (*InferenceService) ValidateUpdate

func (isvc *InferenceService) ValidateUpdate(old runtime.Object) error

ValidateUpdate implements webhook.Validator so a webhook will be registered for the type

type InferenceServiceList

type InferenceServiceList struct {
	metav1.TypeMeta `json:",inline"`
	metav1.ListMeta `json:"metadata,omitempty"`
	// +listType=set
	Items []InferenceService `json:"items"`
}

InferenceServiceList contains a list of Service +k8s:openapi-gen=true +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +kubebuilder:object:root=true

func (*InferenceServiceList) DeepCopy

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceServiceList.

func (*InferenceServiceList) DeepCopyInto

func (in *InferenceServiceList) DeepCopyInto(out *InferenceServiceList)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*InferenceServiceList) DeepCopyObject

func (in *InferenceServiceList) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type InferenceServiceSpec

type InferenceServiceSpec struct {
	// Predictor defines the model serving spec
	// +required
	Predictor PredictorSpec `json:"predictor"`
	// Explainer defines the model explanation service spec,
	// explainer service calls to predictor or transformer if it is specified.
	// +optional
	Explainer *ExplainerSpec `json:"explainer,omitempty"`
	// Transformer defines the pre/post processing before and after the predictor call,
	// transformer service calls to predictor service.
	// +optional
	Transformer *TransformerSpec `json:"transformer,omitempty"`
}

InferenceServiceSpec is the top level type for this resource

func (*InferenceServiceSpec) DeepCopy

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceServiceSpec.

func (*InferenceServiceSpec) DeepCopyInto

func (in *InferenceServiceSpec) DeepCopyInto(out *InferenceServiceSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type InferenceServiceStatus

type InferenceServiceStatus struct {
	// Conditions for the InferenceService <br/>
	// - PredictorReady: predictor readiness condition; <br/>
	// - TransformerReady: transformer readiness condition; <br/>
	// - ExplainerReady: explainer readiness condition; <br/>
	// - RoutesReady: aggregated routing condition; <br/>
	// - Ready: aggregated condition; <br/>
	duckv1.Status `json:",inline"`
	// Addressable endpoint for the InferenceService
	// +optional
	Address *duckv1.Addressable `json:"address,omitempty"`
	// URL holds the url that will distribute traffic over the provided traffic targets.
	// It generally has the form http[s]://{route-name}.{route-namespace}.{cluster-level-suffix}
	// +optional
	URL *apis.URL `json:"url,omitempty"`
	// Statuses for the components of the InferenceService
	Components map[ComponentType]ComponentStatusSpec `json:"components,omitempty"`
	// Model related statuses
	ModelStatus ModelStatus `json:"modelStatus,omitempty"`
}

InferenceServiceStatus defines the observed state of InferenceService

func (*InferenceServiceStatus) ClearCondition added in v0.8.0

func (ss *InferenceServiceStatus) ClearCondition(conditionType apis.ConditionType)

func (*InferenceServiceStatus) DeepCopy

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceServiceStatus.

func (*InferenceServiceStatus) DeepCopyInto

func (in *InferenceServiceStatus) DeepCopyInto(out *InferenceServiceStatus)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*InferenceServiceStatus) GetCondition

GetCondition returns the condition by name.

func (*InferenceServiceStatus) InitializeConditions

func (ss *InferenceServiceStatus) InitializeConditions()

func (*InferenceServiceStatus) IsConditionFalse added in v0.11.0

func (ss *InferenceServiceStatus) IsConditionFalse(t apis.ConditionType) bool

IsConditionFalse returns if a given condition is False

func (*InferenceServiceStatus) IsConditionReady

func (ss *InferenceServiceStatus) IsConditionReady(t apis.ConditionType) bool

IsConditionReady returns the readiness for a given condition

func (*InferenceServiceStatus) IsConditionUnknown added in v0.11.0

func (ss *InferenceServiceStatus) IsConditionUnknown(t apis.ConditionType) bool

IsConditionUnknown returns if a given condition is Unknown

func (*InferenceServiceStatus) IsReady

func (ss *InferenceServiceStatus) IsReady() bool

IsReady returns the overall readiness for the inference service.

func (*InferenceServiceStatus) PropagateCrossComponentStatus added in v0.11.0

func (ss *InferenceServiceStatus) PropagateCrossComponentStatus(componentList []ComponentType, conditionType apis.ConditionType)

PropagateCrossComponentStatus aggregates the RoutesReady or ConfigurationsReady condition across all available components and propagates the RoutesReady or LatestDeploymentReady status accordingly.

func (*InferenceServiceStatus) PropagateModelStatus added in v0.9.0

func (ss *InferenceServiceStatus) PropagateModelStatus(statusSpec ComponentStatusSpec, podList *v1.PodList, rawDeployment bool)

func (*InferenceServiceStatus) PropagateRawStatus

func (ss *InferenceServiceStatus) PropagateRawStatus(
	component ComponentType,
	deployment *appsv1.Deployment,
	url *apis.URL)

func (*InferenceServiceStatus) PropagateStatus

func (ss *InferenceServiceStatus) PropagateStatus(component ComponentType, serviceStatus *knservingv1.ServiceStatus)

func (*InferenceServiceStatus) SetCondition

func (ss *InferenceServiceStatus) SetCondition(conditionType apis.ConditionType, condition *apis.Condition)

func (*InferenceServiceStatus) SetModelFailureInfo added in v0.9.0

func (ss *InferenceServiceStatus) SetModelFailureInfo(info *FailureInfo) bool

func (*InferenceServiceStatus) UpdateModelRevisionStates added in v0.9.0

func (ss *InferenceServiceStatus) UpdateModelRevisionStates(modelState ModelState, totalCopies int, info *FailureInfo)

func (*InferenceServiceStatus) UpdateModelTransitionStatus added in v0.9.0

func (ss *InferenceServiceStatus) UpdateModelTransitionStatus(status TransitionStatus, info *FailureInfo)

type InferenceServicesConfig

type InferenceServicesConfig struct {
	// Explainer configurations
	Explainers ExplainersConfig `json:"explainers"`
}

+kubebuilder:object:generate=false

func NewInferenceServicesConfig

func NewInferenceServicesConfig(cli client.Client) (*InferenceServicesConfig, error)

func (*InferenceServicesConfig) DeepCopy added in v0.10.0

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceServicesConfig.

func (*InferenceServicesConfig) DeepCopyInto added in v0.10.0

func (in *InferenceServicesConfig) DeepCopyInto(out *InferenceServicesConfig)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type IngressConfig

type IngressConfig struct {
	IngressGateway          string  `json:"ingressGateway,omitempty"`
	IngressServiceName      string  `json:"ingressService,omitempty"`
	LocalGateway            string  `json:"localGateway,omitempty"`
	LocalGatewayServiceName string  `json:"localGatewayService,omitempty"`
	IngressDomain           string  `json:"ingressDomain,omitempty"`
	IngressClassName        *string `json:"ingressClassName,omitempty"`
	DomainTemplate          string  `json:"domainTemplate,omitempty"`
	UrlScheme               string  `json:"urlScheme,omitempty"`
	DisableIstioVirtualHost bool    `json:"disableIstioVirtualHost,omitempty"`
	PathTemplate            string  `json:"pathTemplate,omitempty"`
}

+kubebuilder:object:generate=false

func NewIngressConfig

func NewIngressConfig(cli client.Client) (*IngressConfig, error)

func (*IngressConfig) DeepCopy added in v0.10.0

func (in *IngressConfig) DeepCopy() *IngressConfig

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IngressConfig.

func (*IngressConfig) DeepCopyInto added in v0.10.0

func (in *IngressConfig) DeepCopyInto(out *IngressConfig)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type LightGBMSpec

type LightGBMSpec struct {
	// Contains fields shared across all predictors
	PredictorExtensionSpec `json:",inline"`
}

LightGBMSpec defines arguments for configuring LightGBMSpec model serving.

func (*LightGBMSpec) DeepCopy

func (in *LightGBMSpec) DeepCopy() *LightGBMSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LightGBMSpec.

func (*LightGBMSpec) DeepCopyInto

func (in *LightGBMSpec) DeepCopyInto(out *LightGBMSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*LightGBMSpec) Default

func (x *LightGBMSpec) Default(config *InferenceServicesConfig)

Default sets defaults on the resource

func (*LightGBMSpec) GetContainer

func (x *LightGBMSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container

func (*LightGBMSpec) GetProtocol

type LoggerSpec

type LoggerSpec struct {
	// URL to send logging events
	// +optional
	URL *string `json:"url,omitempty"`
	// Specifies the scope of the loggers. <br />
	// Valid values are: <br />
	// - "all" (default): log both request and response; <br />
	// - "request": log only request; <br />
	// - "response": log only response <br />
	// +optional
	Mode LoggerType `json:"mode,omitempty"`
}

LoggerSpec specifies optional payload logging available for all components

func (*LoggerSpec) DeepCopy

func (in *LoggerSpec) DeepCopy() *LoggerSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LoggerSpec.

func (*LoggerSpec) DeepCopyInto

func (in *LoggerSpec) DeepCopyInto(out *LoggerSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type LoggerType

type LoggerType string

LoggerType controls the scope of log publishing +kubebuilder:validation:Enum=all;request;response

const (
	// Logger mode to log both request and response
	LogAll LoggerType = "all"
	// Logger mode to log only request
	LogRequest LoggerType = "request"
	// Logger mode to log only response
	LogResponse LoggerType = "response"
)

LoggerType Enum

type ModelCopies added in v0.9.0

type ModelCopies struct {
	// How many copies of this predictor's models failed to load recently
	// +kubebuilder:default=0
	FailedCopies int `json:"failedCopies"`
	// Total number copies of this predictor's models that are currently loaded
	// +optional
	TotalCopies int `json:"totalCopies,omitempty"`
}

func (*ModelCopies) DeepCopy added in v0.9.0

func (in *ModelCopies) DeepCopy() *ModelCopies

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelCopies.

func (*ModelCopies) DeepCopyInto added in v0.9.0

func (in *ModelCopies) DeepCopyInto(out *ModelCopies)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type ModelFormat added in v0.8.0

type ModelFormat struct {
	// Name of the model format.
	// +required
	Name string `json:"name"`
	// Version of the model format.
	// Used in validating that a predictor is supported by a runtime.
	// Can be "major", "major.minor" or "major.minor.patch".
	// +optional
	Version *string `json:"version,omitempty"`
}

func (*ModelFormat) DeepCopy added in v0.8.0

func (in *ModelFormat) DeepCopy() *ModelFormat

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelFormat.

func (*ModelFormat) DeepCopyInto added in v0.8.0

func (in *ModelFormat) DeepCopyInto(out *ModelFormat)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type ModelRevisionStates added in v0.9.0

type ModelRevisionStates struct {
	// High level state string: Pending, Standby, Loading, Loaded, FailedToLoad
	// +kubebuilder:default=Pending
	ActiveModelState ModelState `json:"activeModelState"`
	// +kubebuilder:default=""
	TargetModelState ModelState `json:"targetModelState,omitempty"`
}

func (*ModelRevisionStates) DeepCopy added in v0.9.0

func (in *ModelRevisionStates) DeepCopy() *ModelRevisionStates

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelRevisionStates.

func (*ModelRevisionStates) DeepCopyInto added in v0.9.0

func (in *ModelRevisionStates) DeepCopyInto(out *ModelRevisionStates)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type ModelSpec added in v0.8.0

type ModelSpec struct {
	// ModelFormat being served.
	// +required
	ModelFormat ModelFormat `json:"modelFormat"`

	// Specific ClusterServingRuntime/ServingRuntime name to use for deployment.
	// +optional
	Runtime *string `json:"runtime,omitempty"`

	PredictorExtensionSpec `json:",inline"`
}

func (*ModelSpec) DeepCopy added in v0.8.0

func (in *ModelSpec) DeepCopy() *ModelSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelSpec.

func (*ModelSpec) DeepCopyInto added in v0.8.0

func (in *ModelSpec) DeepCopyInto(out *ModelSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*ModelSpec) Default added in v0.8.0

func (m *ModelSpec) Default(config *InferenceServicesConfig)

func (*ModelSpec) GetContainer added in v0.8.0

func (m *ModelSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container

func (*ModelSpec) GetProtocol added in v0.8.0

func (m *ModelSpec) GetProtocol() constants.InferenceServiceProtocol

func (*ModelSpec) GetSupportingRuntimes added in v0.8.0

func (m *ModelSpec) GetSupportingRuntimes(cl client.Client, namespace string, isMMS bool) ([]v1alpha1.SupportedRuntime, error)

GetSupportingRuntimes Get a list of ServingRuntimeSpecs that correspond to ServingRuntimes and ClusterServingRuntimes that support the given model. If the `isMMS` argument is true, this function will only return ServingRuntimes that are ModelMesh compatible, otherwise only single-model serving compatible runtimes will be returned.

func (*ModelSpec) RuntimeSupportsModel added in v0.8.0

func (m *ModelSpec) RuntimeSupportsModel(srSpec *v1alpha1.ServingRuntimeSpec) bool

RuntimeSupportsModel Check if the given runtime supports the specified model.

type ModelState added in v0.9.0

type ModelState string

ModelState enum +kubebuilder:validation:Enum="";Pending;Standby;Loading;Loaded;FailedToLoad

const (
	// Model is not yet registered
	Pending ModelState = "Pending"
	// Model is available but not loaded (will load when used)
	Standby ModelState = "Standby"
	// Model is loading
	Loading ModelState = "Loading"
	// At least one copy of the model is loaded
	Loaded ModelState = "Loaded"
	// All copies of the model failed to load
	FailedToLoad ModelState = "FailedToLoad"
)

ModelState Enum values

type ModelStatus added in v0.9.0

type ModelStatus struct {
	// Whether the available predictor endpoints reflect the current Spec or is in transition
	// +kubebuilder:default=UpToDate
	TransitionStatus TransitionStatus `json:"transitionStatus"`

	// State information of the predictor's model.
	// +optional
	ModelRevisionStates *ModelRevisionStates `json:"states,omitempty"`

	// Details of last failure, when load of target model is failed or blocked.
	// +optional
	LastFailureInfo *FailureInfo `json:"lastFailureInfo,omitempty"`

	// Model copy information of the predictor's model.
	// +optional
	ModelCopies *ModelCopies `json:"copies,omitempty"`
}

func (*ModelStatus) DeepCopy added in v0.9.0

func (in *ModelStatus) DeepCopy() *ModelStatus

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelStatus.

func (*ModelStatus) DeepCopyInto added in v0.9.0

func (in *ModelStatus) DeepCopyInto(out *ModelStatus)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type ONNXRuntimeSpec

type ONNXRuntimeSpec struct {
	// Contains fields shared across all predictors
	PredictorExtensionSpec `json:",inline"`
}

ONNXRuntimeSpec defines arguments for configuring ONNX model serving.

func (*ONNXRuntimeSpec) DeepCopy

func (in *ONNXRuntimeSpec) DeepCopy() *ONNXRuntimeSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ONNXRuntimeSpec.

func (*ONNXRuntimeSpec) DeepCopyInto

func (in *ONNXRuntimeSpec) DeepCopyInto(out *ONNXRuntimeSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*ONNXRuntimeSpec) Default

func (o *ONNXRuntimeSpec) Default(config *InferenceServicesConfig)

Default sets defaults on the resource

func (*ONNXRuntimeSpec) GetContainer

func (o *ONNXRuntimeSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container

GetContainers transforms the resource into a container spec

func (*ONNXRuntimeSpec) GetProtocol

func (*ONNXRuntimeSpec) Validate

func (o *ONNXRuntimeSpec) Validate() error

Validate returns an error if invalid

type PMMLSpec

type PMMLSpec struct {
	// Contains fields shared across all predictors
	PredictorExtensionSpec `json:",inline"`
}

PMMLSpec defines arguments for configuring PMML model serving.

func (*PMMLSpec) DeepCopy

func (in *PMMLSpec) DeepCopy() *PMMLSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PMMLSpec.

func (*PMMLSpec) DeepCopyInto

func (in *PMMLSpec) DeepCopyInto(out *PMMLSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*PMMLSpec) Default

func (p *PMMLSpec) Default(config *InferenceServicesConfig)

Default sets defaults on the resource

func (*PMMLSpec) GetContainer

func (p *PMMLSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container

func (*PMMLSpec) GetProtocol

func (p *PMMLSpec) GetProtocol() constants.InferenceServiceProtocol

func (*PMMLSpec) Validate

func (p *PMMLSpec) Validate() error

Validate returns an error if invalid

type PaddleServerSpec

type PaddleServerSpec struct {
	PredictorExtensionSpec `json:",inline"`
}

func (*PaddleServerSpec) DeepCopy

func (in *PaddleServerSpec) DeepCopy() *PaddleServerSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PaddleServerSpec.

func (*PaddleServerSpec) DeepCopyInto

func (in *PaddleServerSpec) DeepCopyInto(out *PaddleServerSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*PaddleServerSpec) Default

func (p *PaddleServerSpec) Default(config *InferenceServicesConfig)

func (*PaddleServerSpec) GetContainer

func (p *PaddleServerSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container

func (*PaddleServerSpec) GetProtocol

type PodSpec

type PodSpec struct {
	// List of volumes that can be mounted by containers belonging to the pod.
	// More info: https://kubernetes.io/docs/concepts/storage/volumes
	// +optional
	// +patchMergeKey=name
	// +patchStrategy=merge,retainKeys
	Volumes []v1.Volume `json:"volumes,omitempty" patchStrategy:"merge,retainKeys" patchMergeKey:"name" protobuf:"bytes,1,rep,name=volumes"`
	// List of initialization containers belonging to the pod.
	// Init containers are executed in order prior to containers being started. If any
	// init container fails, the pod is considered to have failed and is handled according
	// to its restartPolicy. The name for an init container or normal container must be
	// unique among all containers.
	// Init containers may not have Lifecycle actions, Readiness probes, Liveness probes, or Startup probes.
	// The resourceRequirements of an init container are taken into account during scheduling
	// by finding the highest request/limit for each resource type, and then using the max of
	// of that value or the sum of the normal containers. Limits are applied to init containers
	// in a similar fashion.
	// Init containers cannot currently be added or removed.
	// Cannot be updated.
	// More info: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/
	// +patchMergeKey=name
	// +patchStrategy=merge
	InitContainers []v1.Container `json:"initContainers,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,20,rep,name=initContainers"`
	// List of containers belonging to the pod.
	// Containers cannot currently be added or removed.
	// There must be at least one container in a Pod.
	// Cannot be updated.
	// +patchMergeKey=name
	// +patchStrategy=merge
	Containers []v1.Container `json:"containers,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,2,rep,name=containers"`
	// List of ephemeral containers run in this pod. Ephemeral containers may be run in an existing
	// pod to perform user-initiated actions such as debugging. This list cannot be specified when
	// creating a pod, and it cannot be modified by updating the pod spec. In order to add an
	// ephemeral container to an existing pod, use the pod's ephemeralcontainers subresource.
	// This field is beta-level and available on clusters that haven't disabled the EphemeralContainers feature gate.
	// +optional
	// +patchMergeKey=name
	// +patchStrategy=merge
	EphemeralContainers []v1.EphemeralContainer `` /* 128-byte string literal not displayed */
	// Restart policy for all containers within the pod.
	// One of Always, OnFailure, Never.
	// Default to Always.
	// More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy
	// +optional
	RestartPolicy v1.RestartPolicy `json:"restartPolicy,omitempty" protobuf:"bytes,3,opt,name=restartPolicy,casttype=RestartPolicy"`
	// Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request.
	// Value must be non-negative integer. The value zero indicates stop immediately via
	// the kill signal (no opportunity to shut down).
	// If this value is nil, the default grace period will be used instead.
	// The grace period is the duration in seconds after the processes running in the pod are sent
	// a termination signal and the time when the processes are forcibly halted with a kill signal.
	// Set this value longer than the expected cleanup time for your process.
	// Defaults to 30 seconds.
	// +optional
	TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" protobuf:"varint,4,opt,name=terminationGracePeriodSeconds"`
	// Optional duration in seconds the pod may be active on the node relative to
	// StartTime before the system will actively try to mark it failed and kill associated containers.
	// Value must be a positive integer.
	// +optional
	ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty" protobuf:"varint,5,opt,name=activeDeadlineSeconds"`
	// Set DNS policy for the pod.
	// Defaults to "ClusterFirst".
	// Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'.
	// DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy.
	// To have DNS options set along with hostNetwork, you have to specify DNS policy
	// explicitly to 'ClusterFirstWithHostNet'.
	// +optional
	DNSPolicy v1.DNSPolicy `json:"dnsPolicy,omitempty" protobuf:"bytes,6,opt,name=dnsPolicy,casttype=DNSPolicy"`
	// NodeSelector is a selector which must be true for the pod to fit on a node.
	// Selector which must match a node's labels for the pod to be scheduled on that node.
	// More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
	// +optional
	// +mapType=atomic
	NodeSelector map[string]string `json:"nodeSelector,omitempty" protobuf:"bytes,7,rep,name=nodeSelector"`

	// ServiceAccountName is the name of the ServiceAccount to use to run this pod.
	// More info: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
	// +optional
	ServiceAccountName string `json:"serviceAccountName,omitempty" protobuf:"bytes,8,opt,name=serviceAccountName"`
	// DeprecatedServiceAccount is a depreciated alias for ServiceAccountName.
	// Deprecated: Use serviceAccountName instead.
	// +k8s:conversion-gen=false
	// +optional
	DeprecatedServiceAccount string `json:"serviceAccount,omitempty" protobuf:"bytes,9,opt,name=serviceAccount"`
	// AutomountServiceAccountToken indicates whether a service account token should be automatically mounted.
	// +optional
	AutomountServiceAccountToken *bool `json:"automountServiceAccountToken,omitempty" protobuf:"varint,21,opt,name=automountServiceAccountToken"`

	// NodeName is a request to schedule this pod onto a specific node. If it is non-empty,
	// the scheduler simply schedules this pod onto that node, assuming that it fits resource
	// requirements.
	// +optional
	NodeName string `json:"nodeName,omitempty" protobuf:"bytes,10,opt,name=nodeName"`
	// Host networking requested for this pod. Use the host's network namespace.
	// If this option is set, the ports that will be used must be specified.
	// Default to false.
	// +k8s:conversion-gen=false
	// +optional
	HostNetwork bool `json:"hostNetwork,omitempty" protobuf:"varint,11,opt,name=hostNetwork"`
	// Use the host's pid namespace.
	// Optional: Default to false.
	// +k8s:conversion-gen=false
	// +optional
	HostPID bool `json:"hostPID,omitempty" protobuf:"varint,12,opt,name=hostPID"`
	// Use the host's ipc namespace.
	// Optional: Default to false.
	// +k8s:conversion-gen=false
	// +optional
	HostIPC bool `json:"hostIPC,omitempty" protobuf:"varint,13,opt,name=hostIPC"`
	// Share a single process namespace between all of the containers in a pod.
	// When this is set containers will be able to view and signal processes from other containers
	// in the same pod, and the first process in each container will not be assigned PID 1.
	// HostPID and ShareProcessNamespace cannot both be set.
	// Optional: Default to false.
	// +k8s:conversion-gen=false
	// +optional
	ShareProcessNamespace *bool `json:"shareProcessNamespace,omitempty" protobuf:"varint,27,opt,name=shareProcessNamespace"`
	// SecurityContext holds pod-level security attributes and common container settings.
	// Optional: Defaults to empty.  See type description for default values of each field.
	// +optional
	SecurityContext *v1.PodSecurityContext `json:"securityContext,omitempty" protobuf:"bytes,14,opt,name=securityContext"`
	// ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec.
	// If specified, these secrets will be passed to individual puller implementations for them to use. For example,
	// in the case of docker, only DockerConfig type secrets are honored.
	// More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod
	// +optional
	// +patchMergeKey=name
	// +patchStrategy=merge
	ImagePullSecrets []v1.LocalObjectReference `json:"imagePullSecrets,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,15,rep,name=imagePullSecrets"`
	// Specifies the hostname of the Pod
	// If not specified, the pod's hostname will be set to a system-defined value.
	// +optional
	Hostname string `json:"hostname,omitempty" protobuf:"bytes,16,opt,name=hostname"`
	// If specified, the fully qualified Pod hostname will be "<hostname>.<subdomain>.<pod namespace>.svc.<cluster domain>".
	// If not specified, the pod will not have a domainname at all.
	// +optional
	Subdomain string `json:"subdomain,omitempty" protobuf:"bytes,17,opt,name=subdomain"`
	// If specified, the pod's scheduling constraints
	// +optional
	Affinity *v1.Affinity `json:"affinity,omitempty" protobuf:"bytes,18,opt,name=affinity"`
	// If specified, the pod will be dispatched by specified scheduler.
	// If not specified, the pod will be dispatched by default scheduler.
	// +optional
	SchedulerName string `json:"schedulerName,omitempty" protobuf:"bytes,19,opt,name=schedulerName"`
	// If specified, the pod's tolerations.
	// +optional
	Tolerations []v1.Toleration `json:"tolerations,omitempty" protobuf:"bytes,22,opt,name=tolerations"`
	// HostAliases is an optional list of hosts and IPs that will be injected into the pod's hosts
	// file if specified. This is only valid for non-hostNetwork pods.
	// +optional
	// +patchMergeKey=ip
	// +patchStrategy=merge
	HostAliases []v1.HostAlias `json:"hostAliases,omitempty" patchStrategy:"merge" patchMergeKey:"ip" protobuf:"bytes,23,rep,name=hostAliases"`
	// If specified, indicates the pod's priority. "system-node-critical" and
	// "system-cluster-critical" are two special keywords which indicate the
	// highest priorities with the former being the highest priority. Any other
	// name must be defined by creating a PriorityClass object with that name.
	// If not specified, the pod priority will be default or zero if there is no
	// default.
	// +optional
	PriorityClassName string `json:"priorityClassName,omitempty" protobuf:"bytes,24,opt,name=priorityClassName"`
	// The priority value. Various system components use this field to find the
	// priority of the pod. When Priority Admission Controller is enabled, it
	// prevents users from setting this field. The admission controller populates
	// this field from PriorityClassName.
	// The higher the value, the higher the priority.
	// +optional
	Priority *int32 `json:"priority,omitempty" protobuf:"bytes,25,opt,name=priority"`
	// Specifies the DNS parameters of a pod.
	// Parameters specified here will be merged to the generated DNS
	// configuration based on DNSPolicy.
	// +optional
	DNSConfig *v1.PodDNSConfig `json:"dnsConfig,omitempty" protobuf:"bytes,26,opt,name=dnsConfig"`
	// If specified, all readiness gates will be evaluated for pod readiness.
	// A pod is ready when all its containers are ready AND
	// all conditions specified in the readiness gates have status equal to "True"
	// More info: https://git.k8s.io/enhancements/keps/sig-network/580-pod-readiness-gates
	// +optional
	ReadinessGates []v1.PodReadinessGate `json:"readinessGates,omitempty" protobuf:"bytes,28,opt,name=readinessGates"`
	// RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used
	// to run this pod.  If no RuntimeClass resource matches the named class, the pod will not be run.
	// If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an
	// empty definition that uses the default runtime handler.
	// More info: https://git.k8s.io/enhancements/keps/sig-node/585-runtime-class
	// This is a beta feature as of Kubernetes v1.14.
	// +optional
	RuntimeClassName *string `json:"runtimeClassName,omitempty" protobuf:"bytes,29,opt,name=runtimeClassName"`
	// EnableServiceLinks indicates whether information about services should be injected into pod's
	// environment variables, matching the syntax of Docker links.
	// Optional: Defaults to true.
	// +optional
	EnableServiceLinks *bool `json:"enableServiceLinks,omitempty" protobuf:"varint,30,opt,name=enableServiceLinks"`
	// PreemptionPolicy is the Policy for preempting pods with lower priority.
	// One of Never, PreemptLowerPriority.
	// Defaults to PreemptLowerPriority if unset.
	// This field is beta-level, gated by the NonPreemptingPriority feature-gate.
	// +optional
	PreemptionPolicy *v1.PreemptionPolicy `json:"preemptionPolicy,omitempty" protobuf:"bytes,31,opt,name=preemptionPolicy"`
	// Overhead represents the resource overhead associated with running a pod for a given RuntimeClass.
	// This field will be autopopulated at admission time by the RuntimeClass admission controller. If
	// the RuntimeClass admission controller is enabled, overhead must not be set in Pod create requests.
	// The RuntimeClass admission controller will reject Pod create requests which have the overhead already
	// set. If RuntimeClass is configured and selected in the PodSpec, Overhead will be set to the value
	// defined in the corresponding RuntimeClass, otherwise it will remain unset and treated as zero.
	// More info: https://git.k8s.io/enhancements/keps/sig-node/688-pod-overhead/README.md
	// This field is beta-level as of Kubernetes v1.18, and is only honored by servers that enable the PodOverhead feature.
	// +optional
	Overhead v1.ResourceList `json:"overhead,omitempty" protobuf:"bytes,32,opt,name=overhead"`
	// TopologySpreadConstraints describes how a group of pods ought to spread across topology
	// domains. Scheduler will schedule pods in a way which abides by the constraints.
	// All topologySpreadConstraints are ANDed.
	// +optional
	// +patchMergeKey=topologyKey
	// +patchStrategy=merge
	// +listType=map
	// +listMapKey=topologyKey
	// +listMapKey=whenUnsatisfiable
	TopologySpreadConstraints []v1.TopologySpreadConstraint `` /* 147-byte string literal not displayed */
	// If true the pod's hostname will be configured as the pod's FQDN, rather than the leaf name (the default).
	// In Linux containers, this means setting the FQDN in the hostname field of the kernel (the nodename field of struct utsname).
	// In Windows containers, this means setting the registry value of hostname for the registry key HKEY_LOCAL_MACHINE\\SYSTEM\\CurrentControlSet\\Services\\Tcpip\\Parameters to FQDN.
	// If a pod does not have FQDN, this has no effect.
	// Default to false.
	// +optional
	SetHostnameAsFQDN *bool `json:"setHostnameAsFQDN,omitempty" protobuf:"varint,35,opt,name=setHostnameAsFQDN"`
	// Specifies the OS of the containers in the pod.
	// Some pod and container fields are restricted if this is set.
	//
	// If the OS field is set to linux, the following fields must be unset:
	// -securityContext.windowsOptions
	//
	// If the OS field is set to windows, following fields must be unset:
	// - spec.hostPID
	// - spec.hostIPC
	// - spec.securityContext.seLinuxOptions
	// - spec.securityContext.seccompProfile
	// - spec.securityContext.fsGroup
	// - spec.securityContext.fsGroupChangePolicy
	// - spec.securityContext.sysctls
	// - spec.shareProcessNamespace
	// - spec.securityContext.runAsUser
	// - spec.securityContext.runAsGroup
	// - spec.securityContext.supplementalGroups
	// - spec.containers[*].securityContext.seLinuxOptions
	// - spec.containers[*].securityContext.seccompProfile
	// - spec.containers[*].securityContext.capabilities
	// - spec.containers[*].securityContext.readOnlyRootFilesystem
	// - spec.containers[*].securityContext.privileged
	// - spec.containers[*].securityContext.allowPrivilegeEscalation
	// - spec.containers[*].securityContext.procMount
	// - spec.containers[*].securityContext.runAsUser
	// - spec.containers[*].securityContext.runAsGroup
	// +optional
	// This is an alpha field and requires the IdentifyPodOS feature
	OS *v1.PodOS `json:"os,omitempty" protobuf:"bytes,36,opt,name=os"`
	// Use the host's user namespace.
	// Optional: Default to true.
	// If set to true or not present, the pod will be run in the host user namespace, useful
	// for when the pod needs a feature only available to the host user namespace, such as
	// loading a kernel module with CAP_SYS_MODULE.
	// When set to false, a new userns is created for the pod. Setting false is useful for
	// mitigating container breakout vulnerabilities even allowing users to run their
	// containers as root without actually having root privileges on the host.
	// This field is alpha-level and is only honored by servers that enable the UserNamespacesSupport feature.
	// +k8s:conversion-gen=false
	// +optional
	HostUsers *bool `json:"hostUsers,omitempty" protobuf:"bytes,37,opt,name=hostUsers"`
	// SchedulingGates is an opaque list of values that if specified will block scheduling the pod.
	// If schedulingGates is not empty, the pod will stay in the SchedulingGated state and the
	// scheduler will not attempt to schedule the pod.
	//
	// SchedulingGates can only be set at pod creation time, and be removed only afterwards.
	//
	// This is a beta feature enabled by the PodSchedulingReadiness feature gate.
	//
	// +patchMergeKey=name
	// +patchStrategy=merge
	// +listType=map
	// +listMapKey=name
	// +featureGate=PodSchedulingReadiness
	// +optional
	SchedulingGates []v1.PodSchedulingGate `json:"schedulingGates,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,38,opt,name=schedulingGates"`
	// ResourceClaims defines which ResourceClaims must be allocated
	// and reserved before the Pod is allowed to start. The resources
	// will be made available to those containers which consume them
	// by name.
	//
	// This is an alpha field and requires enabling the
	// DynamicResourceAllocation feature gate.
	//
	// This field is immutable.
	//
	// +patchMergeKey=name
	// +patchStrategy=merge,retainKeys
	// +listType=map
	// +listMapKey=name
	// +featureGate=DynamicResourceAllocation
	// +optional
	ResourceClaims []v1.PodResourceClaim `` /* 129-byte string literal not displayed */
}

PodSpec is a description of a pod.

func (*PodSpec) DeepCopy

func (in *PodSpec) DeepCopy() *PodSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodSpec.

func (*PodSpec) DeepCopyInto

func (in *PodSpec) DeepCopyInto(out *PodSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type PredictorExtensionSpec

type PredictorExtensionSpec struct {
	// This field points to the location of the trained model which is mounted onto the pod.
	// +optional
	StorageURI *string `json:"storageUri,omitempty"`
	// Runtime version of the predictor docker image
	// +optional
	RuntimeVersion *string `json:"runtimeVersion,omitempty"`
	// Protocol version to use by the predictor (i.e. v1 or v2 or grpc-v1 or grpc-v2)
	// +optional
	ProtocolVersion *constants.InferenceServiceProtocol `json:"protocolVersion,omitempty"`
	// Container enables overrides for the predictor.
	// Each framework will have different defaults that are populated in the underlying container spec.
	// +optional
	v1.Container `json:",inline"`
	// Storage Spec for model location
	// +optional
	Storage *StorageSpec `json:"storage,omitempty"`
}

PredictorExtensionSpec defines configuration shared across all predictor frameworks

func (*PredictorExtensionSpec) DeepCopy

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredictorExtensionSpec.

func (*PredictorExtensionSpec) DeepCopyInto

func (in *PredictorExtensionSpec) DeepCopyInto(out *PredictorExtensionSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*PredictorExtensionSpec) GetStorageSpec added in v0.9.0

func (p *PredictorExtensionSpec) GetStorageSpec() *StorageSpec

GetStorageSpec returns the predictor storage spec object

func (*PredictorExtensionSpec) GetStorageUri added in v0.9.0

func (p *PredictorExtensionSpec) GetStorageUri() *string

GetStorageUri returns the predictor storage Uri

func (*PredictorExtensionSpec) Validate added in v0.9.0

func (p *PredictorExtensionSpec) Validate() error

Validate returns an error if invalid

type PredictorImplementation

type PredictorImplementation interface {
}

PredictorImplementation defines common functions for all predictors e.g Tensorflow, Triton, etc +kubebuilder:object:generate=false

type PredictorSpec

type PredictorSpec struct {
	// Spec for SKLearn model server
	SKLearn *SKLearnSpec `json:"sklearn,omitempty"`
	// Spec for XGBoost model server
	XGBoost *XGBoostSpec `json:"xgboost,omitempty"`
	// Spec for TFServing (https://github.com/tensorflow/serving)
	Tensorflow *TFServingSpec `json:"tensorflow,omitempty"`
	// Spec for TorchServe (https://pytorch.org/serve)
	PyTorch *TorchServeSpec `json:"pytorch,omitempty"`
	// Spec for Triton Inference Server (https://github.com/triton-inference-server/server)
	Triton *TritonSpec `json:"triton,omitempty"`
	// Spec for ONNX runtime (https://github.com/microsoft/onnxruntime)
	ONNX *ONNXRuntimeSpec `json:"onnx,omitempty"`
	// Spec for PMML (http://dmg.org/pmml/v4-1/GeneralStructure.html)
	PMML *PMMLSpec `json:"pmml,omitempty"`
	// Spec for LightGBM model server
	LightGBM *LightGBMSpec `json:"lightgbm,omitempty"`
	// Spec for Paddle model server (https://github.com/PaddlePaddle/Serving)
	Paddle *PaddleServerSpec `json:"paddle,omitempty"`

	// Model spec for any arbitrary framework.
	Model *ModelSpec `json:"model,omitempty"`

	// This spec is dual purpose. <br />
	// 1) Provide a full PodSpec for custom predictor.
	// The field PodSpec.Containers is mutually exclusive with other predictors (i.e. TFServing). <br />
	// 2) Provide a predictor (i.e. TFServing) and specify PodSpec
	// overrides, you must not provide PodSpec.Containers in this case. <br />
	PodSpec `json:",inline"`
	// Component extension defines the deployment configurations for a predictor
	ComponentExtensionSpec `json:",inline"`
}

PredictorSpec defines the configuration for a predictor, The following fields follow a "1-of" semantic. Users must specify exactly one spec.

func (*PredictorSpec) DeepCopy

func (in *PredictorSpec) DeepCopy() *PredictorSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredictorSpec.

func (*PredictorSpec) DeepCopyInto

func (in *PredictorSpec) DeepCopyInto(out *PredictorSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*PredictorSpec) GetExtensions

func (s *PredictorSpec) GetExtensions() *ComponentExtensionSpec

GetExtensions returns the extensions for the component

func (*PredictorSpec) GetImplementation

func (s *PredictorSpec) GetImplementation() ComponentImplementation

GetImplementation returns the implementation for the component

func (*PredictorSpec) GetImplementations

func (s *PredictorSpec) GetImplementations() []ComponentImplementation

GetImplementations returns the implementations for the component

func (*PredictorSpec) GetPredictorImplementation

func (s *PredictorSpec) GetPredictorImplementation() *ComponentImplementation

func (*PredictorSpec) GetPredictorImplementations

func (s *PredictorSpec) GetPredictorImplementations() []ComponentImplementation

GetPredictorImplementations GetPredictor returns the implementation for the predictor

type SKLearnSpec

type SKLearnSpec struct {
	// Contains fields shared across all predictors
	PredictorExtensionSpec `json:",inline"`
}

SKLearnSpec defines arguments for configuring SKLearn model serving.

func (*SKLearnSpec) DeepCopy

func (in *SKLearnSpec) DeepCopy() *SKLearnSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SKLearnSpec.

func (*SKLearnSpec) DeepCopyInto

func (in *SKLearnSpec) DeepCopyInto(out *SKLearnSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*SKLearnSpec) Default

func (k *SKLearnSpec) Default(config *InferenceServicesConfig)

Default sets defaults on the resource

func (*SKLearnSpec) GetContainer

func (k *SKLearnSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container

func (*SKLearnSpec) GetProtocol

type ScaleMetric added in v0.9.0

type ScaleMetric string

ScaleMetric enum +kubebuilder:validation:Enum=cpu;memory;concurrency;rps

const (
	MetricCPU         ScaleMetric = "cpu"
	MetricMemory      ScaleMetric = "memory"
	MetricConcurrency ScaleMetric = "concurrency"
	MetricRPS         ScaleMetric = "rps"
)

type StorageSpec added in v0.9.0

type StorageSpec struct {
	// The path to the model object in the storage. It cannot co-exist
	// with the storageURI.
	// +optional
	Path *string `json:"path,omitempty"`
	// The path to the model schema file in the storage.
	// +optional
	SchemaPath *string `json:"schemaPath,omitempty"`
	// Parameters to override the default storage credentials and config.
	// +optional
	Parameters *map[string]string `json:"parameters,omitempty"`
	// The Storage Key in the secret for this model.
	// +optional
	StorageKey *string `json:"key,omitempty"`
}

func (*StorageSpec) DeepCopy added in v0.9.0

func (in *StorageSpec) DeepCopy() *StorageSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StorageSpec.

func (*StorageSpec) DeepCopyInto added in v0.9.0

func (in *StorageSpec) DeepCopyInto(out *StorageSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type TFServingSpec

type TFServingSpec struct {
	// Contains fields shared across all predictors
	PredictorExtensionSpec `json:",inline"`
}

TFServingSpec defines arguments for configuring Tensorflow model serving.

func (*TFServingSpec) DeepCopy

func (in *TFServingSpec) DeepCopy() *TFServingSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TFServingSpec.

func (*TFServingSpec) DeepCopyInto

func (in *TFServingSpec) DeepCopyInto(out *TFServingSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*TFServingSpec) Default

func (t *TFServingSpec) Default(config *InferenceServicesConfig)

Default sets defaults on the resource

func (*TFServingSpec) GetContainer

func (t *TFServingSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container

func (*TFServingSpec) GetProtocol

func (*TFServingSpec) Validate

func (t *TFServingSpec) Validate() error

Validate returns an error if invalid

type TorchServeSpec

type TorchServeSpec struct {
	// Contains fields shared across all predictors
	PredictorExtensionSpec `json:",inline"`
}

TorchServeSpec defines arguments for configuring PyTorch model serving.

func (*TorchServeSpec) DeepCopy

func (in *TorchServeSpec) DeepCopy() *TorchServeSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TorchServeSpec.

func (*TorchServeSpec) DeepCopyInto

func (in *TorchServeSpec) DeepCopyInto(out *TorchServeSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*TorchServeSpec) Default

func (t *TorchServeSpec) Default(config *InferenceServicesConfig)

Default sets defaults on the resource

func (*TorchServeSpec) GetContainer

func (t *TorchServeSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container

func (*TorchServeSpec) GetProtocol

func (*TorchServeSpec) Validate

func (t *TorchServeSpec) Validate() error

Validate returns an error if invalid

type TransformerSpec

type TransformerSpec struct {
	// This spec is dual purpose. <br />
	// 1) Provide a full PodSpec for custom transformer.
	// The field PodSpec.Containers is mutually exclusive with other transformers. <br />
	// 2) Provide a transformer and specify PodSpec
	// overrides, you must not provide PodSpec.Containers in this case. <br />
	PodSpec `json:",inline"`
	// Component extension defines the deployment configurations for a transformer
	ComponentExtensionSpec `json:",inline"`
}

TransformerSpec defines transformer service for pre/post processing

func (*TransformerSpec) DeepCopy

func (in *TransformerSpec) DeepCopy() *TransformerSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TransformerSpec.

func (*TransformerSpec) DeepCopyInto

func (in *TransformerSpec) DeepCopyInto(out *TransformerSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*TransformerSpec) GetExtensions

func (s *TransformerSpec) GetExtensions() *ComponentExtensionSpec

GetExtensions returns the extensions for the component

func (*TransformerSpec) GetImplementation

func (s *TransformerSpec) GetImplementation() ComponentImplementation

GetImplementation returns the implementation for the component

func (*TransformerSpec) GetImplementations

func (s *TransformerSpec) GetImplementations() []ComponentImplementation

GetImplementations returns the implementations for the component

type TransitionStatus added in v0.9.0

type TransitionStatus string

TransitionStatus enum +kubebuilder:validation:Enum="";UpToDate;InProgress;BlockedByFailedLoad;InvalidSpec

const (
	// Predictor is up-to-date (reflects current spec)
	UpToDate TransitionStatus = "UpToDate"
	// Waiting for target model to reach state of active model
	InProgress TransitionStatus = "InProgress"
	// Target model failed to load
	BlockedByFailedLoad TransitionStatus = "BlockedByFailedLoad"
	// Target predictor spec failed validation
	InvalidSpec TransitionStatus = "InvalidSpec"
)

TransitionStatus Enum values

type TritonSpec

type TritonSpec struct {
	// Contains fields shared across all predictors
	PredictorExtensionSpec `json:",inline"`
}

TritonSpec defines arguments for configuring Triton model serving.

func (*TritonSpec) DeepCopy

func (in *TritonSpec) DeepCopy() *TritonSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TritonSpec.

func (*TritonSpec) DeepCopyInto

func (in *TritonSpec) DeepCopyInto(out *TritonSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*TritonSpec) Default

func (t *TritonSpec) Default(config *InferenceServicesConfig)

Default sets defaults on the resource

func (*TritonSpec) GetContainer

func (t *TritonSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container

func (*TritonSpec) GetProtocol

type XGBoostSpec

type XGBoostSpec struct {
	// Contains fields shared across all predictors
	PredictorExtensionSpec `json:",inline"`
}

XGBoostSpec defines arguments for configuring XGBoost model serving.

func (*XGBoostSpec) DeepCopy

func (in *XGBoostSpec) DeepCopy() *XGBoostSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XGBoostSpec.

func (*XGBoostSpec) DeepCopyInto

func (in *XGBoostSpec) DeepCopyInto(out *XGBoostSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*XGBoostSpec) Default

func (x *XGBoostSpec) Default(config *InferenceServicesConfig)

Default sets defaults on the resource

func (*XGBoostSpec) GetContainer

func (x *XGBoostSpec) GetContainer(metadata metav1.ObjectMeta, extensions *ComponentExtensionSpec, config *InferenceServicesConfig, predictorHost ...string) *v1.Container

func (*XGBoostSpec) GetProtocol

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL