Documentation ¶
Index ¶
- Constants
- Variables
- func CanaryExplainerServiceName(name string) string
- func CanaryPredictorServiceName(name string) string
- func CanaryServiceName(name string, component InferenceServiceComponent) string
- func CanaryTransformerServiceName(name string) string
- func DefaultExplainerServiceName(name string) string
- func DefaultPredictorServiceName(name string) string
- func DefaultServiceName(name string, component InferenceServiceComponent) string
- func DefaultTransformerServiceName(name string) string
- func ExplainPath(name string) string
- func ExplainPrefix() string
- func GetRawServiceLabel(service string) string
- func HostRegExp(host string) string
- func InferenceServiceHostName(name string, namespace string, domain string) string
- func InferenceServicePrefix(name string) string
- func InferenceServiceURL(scheme, name, namespace, domain string) string
- func ModelConfigName(inferenceserviceName string, shardId int) string
- func PredictPath(name string, protocol InferenceServiceProtocol) string
- func PredictPrefix() string
- func PredictorURL(metadata metav1.ObjectMeta, isCanary bool) string
- func TransformerURL(metadata metav1.ObjectMeta, isCanary bool) string
- func VirtualServiceHostname(name string, predictorHostName string) string
- type AutoScalerKPAMetricsType
- type AutoscalerClassType
- type AutoscalerMetricsType
- type CheckResultType
- type DeploymentModeType
- type InferenceServiceComponent
- type InferenceServiceProtocol
- type InferenceServiceVerb
- type ProtocolVersion
Constants ¶
const ( RouterHeadersPropagateEnvVar = "PROPAGATE_HEADERS" InferenceGraphLabel = "serving.kserve.io/inferencegraph" )
InferenceGraph Constants
const ( AgentContainerName = "agent" AgentConfigMapKeyName = "agent" AgentEnableFlag = "--enable-puller" AgentConfigDirArgName = "--config-dir" AgentModelDirArgName = "--model-dir" )
Model agent Constants
const ( CustomSpecStorageUriEnvVarKey = "STORAGE_URI" CustomSpecProtocolEnvVarKey = "PROTOCOL" CustomSpecMultiModelServerEnvVarKey = "MULTI_MODEL_SERVER" KServeContainerPrometheusMetricsPortEnvVarKey = "KSERVE_CONTAINER_PROMETHEUS_METRICS_PORT" KServeContainerPrometheusMetricsPathEnvVarKey = "KSERVE_CONTAINER_PROMETHEUS_METRICS_PATH" QueueProxyAggregatePrometheusMetricsPortEnvVarKey = "AGGREGATE_PROMETHEUS_METRICS_PORT" )
InferenceService Environment Variables
const ( KnativeLocalGateway = "knative-serving/knative-local-gateway" KnativeIngressGateway = "knative-serving/knative-ingress-gateway" VisibilityLabel = "networking.knative.dev/visibility" )
Knative constants
const ( InferenceServiceDefaultHttpPort = "8080" InferenceServiceDefaultAgentPortStr = "9081" InferenceServiceDefaultAgentPort = 9081 CommonDefaultHttpPort = 80 )
InferenceService Endpoint Ports
const ( KServiceComponentLabel = "component" KServiceModelLabel = "model" KServiceEndpointLabel = "endpoint" )
Labels to put on kservice
const ( ParentInferenceServiceLabel = "inferenceservice" InferenceServiceLabel = "serving.kserve.io/inferenceservice" )
Labels for TrainedModel
const ( InferenceServiceDefault = "default" InferenceServiceCanary = "canary" )
InferenceService default/canary constants
const ( ArgumentModelName = "--model_name" ArgumentModelDir = "--model_dir" ArgumentModelClassName = "--model_class_name" ArgumentPredictorHost = "--predictor_host" ArgumentHttpPort = "--http_port" ArgumentWorkers = "--workers" )
InferenceService model server args
const ( InferenceServiceContainerName = "kserve-container" StorageInitializerContainerName = "storage-initializer" )
InferenceService container name
const ( ModelConfigVolumeName = "model-config" ModelDirVolumeName = "model-dir" ModelConfigDir = "/mnt/configs" ModelDir = DefaultModelLocalMountPath )
Multi-model InferenceService
const ( SKLearnServer = "kserve-sklearnserver" MLServer = "kserve-mlserver" TFServing = "kserve-tensorflow-serving" XGBServer = "kserve-xgbserver" TorchServe = "kserve-torchserve" TritonServer = "kserve-tritonserver" PMMLServer = "kserve-pmmlserver" LGBServer = "kserve-lgbserver" PaddleServer = "kserve-paddleserver" )
built-in runtime servers
const ( ModelClassLabel = "modelClass" ServiceEnvelope = "serviceEnvelope" )
const ( MLServerModelClassSKLearn = "mlserver_sklearn.SKLearnModel" MLServerModelClassXGBoost = "mlserver_xgboost.XGBoostModel" MLServerModelClassLightGBM = "mlserver_lightgbm.LightGBMModel" MLServerModelClassMLFlow = "mlserver_mlflow.MLflowRuntime" )
allowed model class implementation in mlserver
const ( ServiceEnvelopeKServe = "kserve" ServiceEnvelopeKServeV2 = "kservev2" )
torchserve service envelope label allowed values
const ( SupportedModelSKLearn = "sklearn" SupportedModelTensorflow = "tensorflow" SupportedModelXGBoost = "xgboost" SupportedModelPyTorch = "pytorch" SupportedModelONNX = "onnx" SupportedModelPMML = "pmml" SupportedModelLightGBM = "lightgbm" SupportedModelPaddle = "paddle" SupportedModelTriton = "triton" SupportedModelMLFlow = "mlflow" )
supported model type
const ( RevisionLabel = "serving.knative.dev/revision" RawDeploymentAppLabel = "app" )
revision label
const ( StateReasonRunning = "Running" StateReasonCompleted = "Completed" StateReasonError = "Error" StateReasonCrashLoopBackOff = "CrashLoopBackOff" )
container state reason
const ( MLServerHTTPPortEnv = "MLSERVER_HTTP_PORT" MLServerGRPCPortEnv = "MLSERVER_GRPC_PORT" MLServerLoadModelsStartupEnv = "MLSERVER_LOAD_MODELS_AT_STARTUP" MLServerModelsDirEnv = "MODELS_DIR" MLServerModelImplementationEnv = "MLSERVER_MODEL_IMPLEMENTATION" MLServerModelNameEnv = "MLSERVER_MODEL_NAME" MLServerModelURIEnv = "MLSERVER_MODEL_URI" MLServerSKLearnImplementation = "mlserver_sklearn.SKLearnModel" MLServerXGBoostImplementation = "mlserver_xgboost.XGBoostModel" )
const DefaultModelLocalMountPath = "/mnt/models"
DefaultModelLocalMountPath is where models will be mounted by the storage-initializer
const (
NvidiaGPUResourceType = "nvidia.com/gpu"
)
GPU Constants
Variables ¶
var ( KServeName = "kserve" KServeAPIGroupName = "serving.kserve.io" KnativeAutoscalingAPIGroupName = "autoscaling.knative.dev" KnativeServingAPIGroupName = "serving.knative.dev" KServeNamespace = getEnvOrDefault("POD_NAMESPACE", "kserve") KServeDefaultVersion = "v0.5.0" )
KServe Constants
var ( InferenceServiceName = "inferenceservice" InferenceServiceAPIName = "inferenceservices" InferenceServicePodLabelKey = KServeAPIGroupName + "/" + InferenceServiceName InferenceServiceConfigMapName = "inferenceservice-config" )
InferenceService Constants
var ( InferenceServiceGKEAcceleratorAnnotationKey = KServeAPIGroupName + "/gke-accelerator" DeploymentMode = KServeAPIGroupName + "/deploymentMode" EnableRoutingTagAnnotationKey = KServeAPIGroupName + "/enable-tag-routing" AutoscalerClass = KServeAPIGroupName + "/autoscalerClass" AutoscalerMetrics = KServeAPIGroupName + "/metrics" TargetUtilizationPercentage = KServeAPIGroupName + "/targetUtilizationPercentage" MinScaleAnnotationKey = KnativeAutoscalingAPIGroupName + "/min-scale" MaxScaleAnnotationKey = KnativeAutoscalingAPIGroupName + "/max-scale" RollOutDurationAnnotationKey = KnativeServingAPIGroupName + "/rollout-duration" EnableMetricAggregation = KServeAPIGroupName + "/enable-metric-aggregation" SetPrometheusAnnotation = KServeAPIGroupName + "/enable-prometheus-scraping" KserveContainerPrometheusPortKey = "prometheus.kserve.io/port" KServeContainerPrometheusPathKey = "prometheus.kserve.io/path" PrometheusPortAnnotationKey = "prometheus.io/port" PrometheusPathAnnotationKey = "prometheus.io/path" DefaultPrometheusPath = "/metrics" QueueProxyAggregatePrometheusMetricsPort = "9088" DefaultPodPrometheusPort = "9090" )
InferenceService Annotations
var ( InferenceServiceInternalAnnotationsPrefix = "internal." + KServeAPIGroupName StorageInitializerSourceUriInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/storage-initializer-sourceuri" StorageSpecAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/storage-spec" StorageSpecParamAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/storage-spec-param" StorageSpecKeyAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/storage-spec-key" LoggerInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/logger" LoggerSinkUrlInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/logger-sink-url" LoggerModeInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/logger-mode" BatcherInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/batcher" BatcherMaxBatchSizeInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/batcher-max-batchsize" BatcherMaxLatencyInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/batcher-max-latency" BatcherTimeoutInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/batcher-timeout" AgentShouldInjectAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/agent" AgentModelConfigVolumeNameAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/configVolumeName" AgentModelConfigMountPathAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/configMountPath" AgentModelDirAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/modelDir" PredictorHostAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/predictor-host" PredictorProtocolAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/predictor-protocol" )
InferenceService Internal Annotations
var ( DefaultStorageSpecSecret = "storage-config" DefaultStorageSpecSecretPath = "/mnt/storage-secret" )
StorageSpec Constants
var ( ControllerLabelName = KServeName + "-controller-manager" DefaultMinReplicas = 1 )
Controller Constants
var ( ServiceAnnotationDisallowedList = []string{ autoscaling.MinScaleAnnotationKey, autoscaling.MaxScaleAnnotationKey, StorageInitializerSourceUriInternalAnnotationKey, "kubectl.kubernetes.io/last-applied-configuration", } RevisionTemplateLabelDisallowedList = []string{ VisibilityLabel, } )
var ( MLServerISGRPCPort = int32(9000) MLServerISRestPort = int32(8080) )
var AutoScalerKPAMetricsAllowedList = []AutoScalerKPAMetricsType{ AutoScalerKPAMetricsConcurrency, AutoScalerKPAMetricsRPS, }
Autoscaler KPA Metrics Allowed List
var AutoscalerAllowedClassList = []AutoscalerClassType{ AutoscalerClassHPA, }
Autoscaler Class Allowed List
var AutoscalerAllowedMetricsList = []AutoscalerMetricsType{ AutoScalerMetricsCPU, AutoScalerMetricsMemory, }
Autoscaler Metrics Allowed List
var (
DefaultAutoscalerClass = AutoscalerClassHPA
)
Autoscaler Default Class
var (
DefaultCPUUtilization int32 = 80
)
Autoscaler Default Metrics Value
var (
LocalGatewayHost = "knative-local-gateway.istio-system.svc." + network.GetClusterDomainName()
)
var (
ModelConfigFileName = "models.json"
)
InferenceService MultiModel Constants
var (
PodMutatorWebhookName = KServeName + "-pod-mutator-webhook"
)
Webhook Constants
var (
TrainedModelAllocated = KServeAPIGroupName + "/" + "trainedmodel-allocated"
)
TrainedModel Constants
Functions ¶
func CanaryExplainerServiceName ¶ added in v0.1.3
func CanaryPredictorServiceName ¶ added in v0.1.3
func CanaryServiceName ¶ added in v0.1.3
func CanaryServiceName(name string, component InferenceServiceComponent) string
func CanaryTransformerServiceName ¶ added in v0.1.3
func DefaultExplainerServiceName ¶ added in v0.1.3
func DefaultPredictorServiceName ¶ added in v0.1.3
func DefaultServiceName ¶ added in v0.1.3
func DefaultServiceName(name string, component InferenceServiceComponent) string
func DefaultTransformerServiceName ¶ added in v0.1.3
func ExplainPath ¶ added in v0.7.0
func ExplainPrefix ¶ added in v0.3.0
func ExplainPrefix() string
func GetRawServiceLabel ¶ added in v0.7.0
GetRawServiceLabel generate native service label
func HostRegExp ¶ added in v0.3.0
HostRegExp returns an ECMAScript regular expression to match either host or host:<any port> for clusterLocalHost, we will also match the prefixes.
func InferenceServiceHostName ¶ added in v0.3.0
func InferenceServicePrefix ¶ added in v0.3.0
func InferenceServiceURL ¶ added in v0.3.0
func ModelConfigName ¶ added in v0.7.0
func PredictPath ¶ added in v0.7.0
func PredictPath(name string, protocol InferenceServiceProtocol) string
func PredictPrefix ¶ added in v0.3.0
func PredictPrefix() string
func PredictorURL ¶ added in v0.3.0
func PredictorURL(metadata metav1.ObjectMeta, isCanary bool) string
func TransformerURL ¶ added in v0.3.0
func TransformerURL(metadata metav1.ObjectMeta, isCanary bool) string
func VirtualServiceHostname ¶ added in v0.7.0
Types ¶
type AutoScalerKPAMetricsType ¶ added in v0.9.0
type AutoScalerKPAMetricsType string
var ( AutoScalerKPAMetricsRPS AutoScalerKPAMetricsType = "rps" AutoScalerKPAMetricsConcurrency AutoScalerKPAMetricsType = "concurrency" )
type AutoscalerClassType ¶ added in v0.7.0
type AutoscalerClassType string
var (
AutoscalerClassHPA AutoscalerClassType = "hpa"
)
Autoscaler Class
type AutoscalerMetricsType ¶ added in v0.7.0
type AutoscalerMetricsType string
var (
AutoScalerMetricsCPU AutoscalerMetricsType = "cpu"
)
Autoscaler Metrics
var (
AutoScalerMetricsMemory AutoscalerMetricsType = "memory"
)
Autoscaler Memory metrics
type CheckResultType ¶ added in v0.7.0
type CheckResultType int
CheckResultType raw k8s deployment, resource exist check result
const ( CheckResultCreate CheckResultType = 0 CheckResultUpdate CheckResultType = 1 CheckResultExisted CheckResultType = 2 CheckResultUnknown CheckResultType = 3 )
type DeploymentModeType ¶ added in v0.7.0
type DeploymentModeType string
const ( Serverless DeploymentModeType = "Serverless" RawDeployment DeploymentModeType = "RawDeployment" ModelMeshDeployment DeploymentModeType = "ModelMesh" )
type InferenceServiceComponent ¶ added in v0.3.0
type InferenceServiceComponent string
const ( Predictor InferenceServiceComponent = "predictor" Explainer InferenceServiceComponent = "explainer" Transformer InferenceServiceComponent = "transformer" )
InferenceService Component enums
func (InferenceServiceComponent) String ¶ added in v0.3.0
func (e InferenceServiceComponent) String() string
type InferenceServiceProtocol ¶ added in v0.7.0
type InferenceServiceProtocol string
const ( ProtocolV1 InferenceServiceProtocol = "v1" ProtocolV2 InferenceServiceProtocol = "v2" ProtocolGRPCV1 InferenceServiceProtocol = "grpc-v1" ProtocolGRPCV2 InferenceServiceProtocol = "grpc-v2" ProtocolUnknown InferenceServiceProtocol = "" )
InferenceService protocol enums
func GetProtocolVersionString ¶ added in v0.9.0
func GetProtocolVersionString(protocol ProtocolVersion) InferenceServiceProtocol
type InferenceServiceVerb ¶ added in v0.3.0
type InferenceServiceVerb string
const ( Predict InferenceServiceVerb = "predict" Explain InferenceServiceVerb = "explain" )
InferenceService verb enums
func (InferenceServiceVerb) String ¶ added in v0.3.0
func (v InferenceServiceVerb) String() string
type ProtocolVersion ¶ added in v0.9.0
type ProtocolVersion int
const ( V1 ProtocolVersion V2 GRPCV1 GRPCV2 Unknown )
func GetProtocolVersionInt ¶ added in v0.9.0
func GetProtocolVersionInt(protocol InferenceServiceProtocol) ProtocolVersion