Documentation ¶
Index ¶
- Constants
- Variables
- func CanaryExplainerServiceName(name string) string
- func CanaryPredictorServiceName(name string) string
- func CanaryServiceName(name string, component InferenceServiceComponent) string
- func CanaryTransformerServiceName(name string) string
- func DefaultExplainerServiceName(name string) string
- func DefaultPredictorServiceName(name string) string
- func DefaultServiceName(name string, component InferenceServiceComponent) string
- func DefaultTransformerServiceName(name string) string
- func ExplainPath(name string) string
- func ExplainPrefix() string
- func GetRawServiceLabel(service string) string
- func HostRegExp(host string) string
- func InferenceServiceHostName(name string, namespace string, domain string) string
- func InferenceServicePrefix(name string) string
- func InferenceServiceURL(scheme, name, namespace, domain string) string
- func ModelConfigName(inferenceserviceName string, shardId int) string
- func PredictPath(name string, protocol InferenceServiceProtocol) string
- func PredictPrefix() string
- func PredictorURL(metadata v1.ObjectMeta, isCanary bool) string
- func TransformerURL(metadata v1.ObjectMeta, isCanary bool) string
- func VirtualServiceHostname(name string, predictorHostName string) string
- type AutoscalerClassType
- type AutoscalerMetricsType
- type CheckResultType
- type DeploymentModeType
- type InferenceServiceComponent
- type InferenceServiceProtocol
- type InferenceServiceVerb
Constants ¶
const ( AgentContainerName = "agent" AgentConfigMapKeyName = "agent" AgentEnableFlag = "--enable-puller" AgentConfigDirArgName = "--config-dir" AgentModelDirArgName = "--model-dir" )
Model agent Constants
const ( CustomSpecStorageUriEnvVarKey = "STORAGE_URI" CustomSpecProtocolEnvVarKey = "PROTOCOL" CustomSpecMultiModelServerEnvVarKey = "MULTI_MODEL_SERVER" )
InferenceService Environment Variables
const ( KnativeLocalGateway = "knative-serving/knative-local-gateway" KnativeIngressGateway = "knative-serving/knative-ingress-gateway" VisibilityLabel = "serving.knative.dev/visibility" )
Knative constants
const ( InferenceServiceDefaultHttpPort = "8080" InferenceServiceDefaultAgentPortStr = "9081" InferenceServiceDefaultAgentPort = 9081 CommonDefaultHttpPort = 80 )
InferenceService Endpoint Ports
const ( KServiceComponentLabel = "component" KServiceModelLabel = "model" KServiceEndpointLabel = "endpoint" )
Labels to put on kservice
const ( ParentInferenceServiceLabel = "inferenceservice" InferenceServiceLabel = "serving.kubeflow.org/inferenceservice" )
Labels for TrainedModel
const ( InferenceServiceDefault = "default" InferenceServiceCanary = "canary" )
InferenceService default/canary constants
const ( ArgumentModelName = "--model_name" ArgumentModelDir = "--model_dir" ArgumentModelClassName = "--model_class_name" ArgumentPredictorHost = "--predictor_host" ArgumentHttpPort = "--http_port" ArgumentWorkers = "--workers" )
InferenceService model server args
const ( ModelConfigVolumeName = "model-config" ModelDirVolumeName = "model-dir" ModelConfigDir = "/mnt/configs" ModelDir = DefaultModelLocalMountPath )
Multi-model InferenceService
const ( SKLearnServer = "kserve-sklearnserver" MLServer = "kserve-mlserver" TFServing = "kserve-tensorflow-serving" XGBServer = "kserve-xgbserver" TorchServe = "kserve-torchserve" TritonServer = "kserve-tritonserver" PMMLServer = "kserve-pmmlserver" LGBServer = "kserve-lgbserver" PaddleServer = "kserve-paddleserver" )
built-in runtime servers
const ( ModelClassLabel = "modelClass" ServiceEnvelope = "serviceEnvelope" )
const ( MLServerModelClassSKLearn = "mlserver_sklearn.SKLearnModel" MLServerModelClassXGBoost = "mlserver_xgboost.XGBoostModel" MLServerModelClassLightGBM = "mlserver_lightgbm.LightGBMModel" )
allowed model class implementation in mlserver
const ( ServiceEnvelopeKServe = "kserve" ServiceEnvelopeKServeV2 = "kservev2" )
torchserve service envelope label allowed values
const ( SupportedModelSKLearn = "sklearn" SupportedModelTensorflow = "tensorflow" SupportedModelXGBoost = "xgboost" SupportedModelPyTorch = "pytorch" SupportedModelONNX = "onnx" SupportedModelPMML = "pmml" SupportedModelLightGBM = "lightgbm" SupportedModelPaddle = "paddle" SupportedModelTriton = "triton" )
supported model type
const ( MLServerHTTPPortEnv = "MLSERVER_HTTP_PORT" MLServerGRPCPortEnv = "MLSERVER_GRPC_PORT" MLServerLoadModelsStartupEnv = "MLSERVER_LOAD_MODELS_AT_STARTUP" MLServerModelsDirEnv = "MODELS_DIR" MLServerModelImplementationEnv = "MLSERVER_MODEL_IMPLEMENTATION" MLServerModelNameEnv = "MLSERVER_MODEL_NAME" MLServerModelURIEnv = "MLSERVER_MODEL_URI" MLServerSKLearnImplementation = "mlserver_sklearn.SKLearnModel" MLServerXGBoostImplementation = "mlserver_xgboost.XGBoostModel" )
const DefaultModelLocalMountPath = "/mnt/models"
DefaultModelLocalMountPath is where models will be mounted by the storage-initializer
const (
InferenceServiceContainerName = "kserve-container"
)
InferenceService container name
const (
NvidiaGPUResourceType = "nvidia.com/gpu"
)
GPU Constants
Variables ¶
var ( KServeName = "kserve" KServeAPIGroupName = "serving.kubeflow.org" KnativeAutoscalingAPIGroupName = "autoscaling.knative.dev" KServeNamespace = getEnvOrDefault("POD_NAMESPACE", "kserve") KServeDefaultVersion = "v0.5.0" )
KServe Constants
var ( InferenceServiceName = "inferenceservice" InferenceServiceAPIName = "inferenceservices" InferenceServicePodLabelKey = KServeAPIGroupName + "/" + InferenceServiceName InferenceServiceConfigMapName = "inferenceservice-config" )
InferenceService Constants
var ( InferenceServiceGKEAcceleratorAnnotationKey = KServeAPIGroupName + "/gke-accelerator" DeploymentMode = KServeAPIGroupName + "/deploymentMode" EnableRoutingTagAnnotationKey = KServeAPIGroupName + "/enable-tag-routing" AutoscalerClass = KServeAPIGroupName + "/autoscalerClass" AutoscalerMetrics = KServeAPIGroupName + "/metrics" TargetUtilizationPercentage = KServeAPIGroupName + "/targetUtilizationPercentage" MinScaleAnnotationKey = KnativeAutoscalingAPIGroupName + "/minScale" MaxScaleAnnotationKey = KnativeAutoscalingAPIGroupName + "/maxScale" )
InferenceService Annotations
var ( InferenceServiceInternalAnnotationsPrefix = "internal." + KServeAPIGroupName StorageInitializerSourceUriInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/storage-initializer-sourceuri" LoggerInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/logger" LoggerSinkUrlInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/logger-sink-url" LoggerModeInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/logger-mode" BatcherInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/batcher" BatcherMaxBatchSizeInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/batcher-max-batchsize" BatcherMaxLatencyInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/batcher-max-latency" BatcherTimeoutInternalAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/batcher-timeout" AgentShouldInjectAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/agent" AgentModelConfigVolumeNameAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/configVolumeName" AgentModelConfigMountPathAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/configMountPath" AgentModelDirAnnotationKey = InferenceServiceInternalAnnotationsPrefix + "/modelDir" )
InferenceService Internal Annotations
var ( ControllerLabelName = KServeName + "-controller-manager" DefaultMinReplicas = 1 )
Controller Constants
var ( ServiceAnnotationDisallowedList = []string{ autoscaling.MinScaleAnnotationKey, autoscaling.MaxScaleAnnotationKey, StorageInitializerSourceUriInternalAnnotationKey, "kubectl.kubernetes.io/last-applied-configuration", } RevisionTemplateLabelDisallowedList = []string{ VisibilityLabel, } )
var ( MLServerISGRPCPort = int32(9000) MLServerISRestPort = int32(8080) )
var AutoscalerAllowedClassList = []AutoscalerClassType{ AutoscalerClassHPA, }
Autoscaler Class Allowed List
var AutoscalerAllowedMetricsList = []AutoscalerMetricsType{ AutoScalerMetricsCPU, }
Autoscaler Metrics Allowed List
var (
DefaultAutoscalerClass = AutoscalerClassHPA
)
Autoscaler Default Class
var (
DefaultCPUUtilization int32 = 80
)
Autoscaler Default Metrics Value
var (
LocalGatewayHost = "knative-local-gateway.istio-system.svc." + network.GetClusterDomainName()
)
var (
ModelConfigFileName = "models.json"
)
InferenceService MultiModel Constants
var (
PodMutatorWebhookName = KServeName + "-pod-mutator-webhook"
)
Webhook Constants
var (
TrainedModelAllocated = KServeAPIGroupName + "/" + "trainedmodel-allocated"
)
TrainedModel Constants
Functions ¶
func CanaryExplainerServiceName ¶ added in v0.1.3
func CanaryPredictorServiceName ¶ added in v0.1.3
func CanaryServiceName ¶ added in v0.1.3
func CanaryServiceName(name string, component InferenceServiceComponent) string
func CanaryTransformerServiceName ¶ added in v0.1.3
func DefaultExplainerServiceName ¶ added in v0.1.3
func DefaultPredictorServiceName ¶ added in v0.1.3
func DefaultServiceName ¶ added in v0.1.3
func DefaultServiceName(name string, component InferenceServiceComponent) string
func DefaultTransformerServiceName ¶ added in v0.1.3
func ExplainPath ¶ added in v0.8.100
func ExplainPrefix ¶ added in v0.3.0
func ExplainPrefix() string
func GetRawServiceLabel ¶ added in v0.8.100
GetRawServiceLabel generate native service label
func HostRegExp ¶ added in v0.3.0
HostRegExp returns an ECMAScript regular expression to match either host or host:<any port> for clusterLocalHost, we will also match the prefixes.
func InferenceServiceHostName ¶ added in v0.3.0
func InferenceServicePrefix ¶ added in v0.3.0
func InferenceServiceURL ¶ added in v0.3.0
func ModelConfigName ¶ added in v0.8.100
func PredictPath ¶ added in v0.8.100
func PredictPath(name string, protocol InferenceServiceProtocol) string
func PredictPrefix ¶ added in v0.3.0
func PredictPrefix() string
func PredictorURL ¶ added in v0.3.0
func PredictorURL(metadata v1.ObjectMeta, isCanary bool) string
func TransformerURL ¶ added in v0.3.0
func TransformerURL(metadata v1.ObjectMeta, isCanary bool) string
func VirtualServiceHostname ¶ added in v0.8.100
Types ¶
type AutoscalerClassType ¶ added in v0.8.100
type AutoscalerClassType string
var (
AutoscalerClassHPA AutoscalerClassType = "hpa"
)
Autoscaler Class
type AutoscalerMetricsType ¶ added in v0.8.100
type AutoscalerMetricsType string
var (
AutoScalerMetricsCPU AutoscalerMetricsType = "cpu"
)
Autoscaler Metrics
type CheckResultType ¶ added in v0.8.100
type CheckResultType int
CheckResultType raw k8s deployment, resource exist check result
const ( CheckResultCreate CheckResultType = 0 CheckResultUpdate CheckResultType = 1 CheckResultExisted CheckResultType = 2 CheckResultUnknown CheckResultType = 3 )
type DeploymentModeType ¶ added in v0.8.100
type DeploymentModeType string
const ( Serverless DeploymentModeType = "Serverless" RawDeployment DeploymentModeType = "RawDeployment" ModelMeshDeployment DeploymentModeType = "ModelMesh" )
type InferenceServiceComponent ¶ added in v0.3.0
type InferenceServiceComponent string
const ( Predictor InferenceServiceComponent = "predictor" Explainer InferenceServiceComponent = "explainer" Transformer InferenceServiceComponent = "transformer" )
InferenceService Component enums
func (InferenceServiceComponent) String ¶ added in v0.3.0
func (e InferenceServiceComponent) String() string
type InferenceServiceProtocol ¶ added in v0.8.100
type InferenceServiceProtocol string
const ( ProtocolV1 InferenceServiceProtocol = "v1" ProtocolV2 InferenceServiceProtocol = "v2" )
InferenceService protocol enums
type InferenceServiceVerb ¶ added in v0.3.0
type InferenceServiceVerb string
const ( Predict InferenceServiceVerb = "predict" Explain InferenceServiceVerb = "explain" )
InferenceService verb enums
func (InferenceServiceVerb) String ¶ added in v0.3.0
func (v InferenceServiceVerb) String() string