Documentation ¶
Index ¶
- Constants
- type Build
- type BuildPhase
- type BuildSource
- type BuildSpec
- type BuildStatus
- type BuilderType
- type DeleteFunctionRequest
- type ErrorResponse
- type Framework
- type GitRepositorySource
- type InferenceDeployment
- type InferenceDeploymentInstance
- type InferenceDeploymentInstanceSpec
- type InferenceDeploymentInstanceStatus
- type InferenceDeploymentSpec
- type InferenceDeploymentStatus
- type InferenceUsage
- type InstancePhase
- type LogRequest
- type Message
- type NamespaceRequest
- type NodeSystemInfo
- type Phase
- type ProviderInfo
- type Quantity
- type QueueRequest
- type RequestQueuer
- type ResourceList
- type ResourceName
- type ResourceRequirements
- type ScaleServiceRequest
- type ScalingConfig
- type ScalingType
- type Secret
- type Server
- type ServerSpec
- type ServerStatus
- type VersionInfo
Constants ¶
const ( DeploymentCreateEvent = "deployment-create" DeploymentUpdateEvent = "deployment-update" DeploymentDeleteEvent = "deployment-delete" DeploymentScaleUpEvent = "deployment-scale-up" DeploymentScaleDownEvent = "deployment-scale-down" )
const ( ScalingTypeCapacity = "capacity" ScalingTypeRPS = "rps" )
const (
LabelNamespace = "modelz.tensorchord.ai/namespace"
)
const (
RuntimeClassNvidia string = "nvidia"
)
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Build ¶
type Build struct { Spec BuildSpec `json:"spec"` Status BuildStatus `json:"status,omitempty"` }
type BuildPhase ¶
type BuildPhase string
const ( BuildPhasePending BuildPhase = "Pending" BuildPhaseRunning BuildPhase = "Running" BuildPhaseSucceeded BuildPhase = "Succeeded" BuildPhaseFailed BuildPhase = "Failed" )
type BuildSource ¶
type BuildSource struct { // directory is the target directory name. // Must not contain or start with '..'. If '.' is supplied, the volume directory will be the // git repository. Otherwise, if specified, the volume will contain the git repository in // the subdirectory with the given name. // +optional Directory string `json:"directory,omitempty"` Builder BuilderType `json:"builder,omitempty"` ArtifactImage string `json:"image,omitempty"` ArtifactImageTag string `json:"image_tag,omitempty"` Duration string `json:"duration,omitempty"` }
type BuildSpec ¶
type BuildSpec struct { Name string `json:"name,omitempty"` ProjectID string `json:"project_id,omitempty"` Namespace string `json:"namespace,omitempty"` GitRepositorySource `json:",inline,omitempty"` BuildSource `json:",inline,omitempty"` }
type BuildStatus ¶
type BuildStatus struct { Image string `json:"image,omitempty"` Phase BuildPhase `json:"phase,omitempty"` }
type BuilderType ¶
type BuilderType string
const ( BuilderTypeDockerfile BuilderType = "Dockerfile" BuilderTypeENVD BuilderType = "envd" )
type DeleteFunctionRequest ¶
type DeleteFunctionRequest struct {
FunctionName string `json:"functionName"`
}
DeleteFunctionRequest delete a deployed function
type ErrorResponse ¶
type ErrorResponse struct {
Message string `json:"message"`
}
type Framework ¶
type Framework string
Framework is the inference framework. It is only used to set the default port and command. For example, if the framework is "gradio", the default port is 7860 and the default command is "python app.py". You could override these defaults by setting the port and command fields and framework to `other`.
type GitRepositorySource ¶
type InferenceDeployment ¶
type InferenceDeployment struct { Spec InferenceDeploymentSpec `json:"spec"` Status InferenceDeploymentStatus `json:"status,omitempty"` }
InferenceDeployment represents a request to create or update a Model.
type InferenceDeploymentInstance ¶
type InferenceDeploymentInstance struct { Spec InferenceDeploymentInstanceSpec `json:"spec,omitempty"` Status InferenceDeploymentInstanceStatus `json:"status,omitempty"` }
type InferenceDeploymentInstanceStatus ¶
type InferenceDeploymentInstanceStatus struct { Phase InstancePhase `json:"phase,omitempty"` StartTime time.Time `json:"createdAt,omitempty"` Reason string `json:"reason,omitempty"` Message string `json:"message,omitempty"` }
type InferenceDeploymentSpec ¶
type InferenceDeploymentSpec struct { // Name is the name of the inference. Name string `json:"name"` // Namespace for the inference. Namespace string `json:"namespace,omitempty"` // Scaling is the scaling configuration for the inference. Scaling *ScalingConfig `json:"scaling,omitempty"` // Framework is the inference framework. Framework Framework `json:"framework,omitempty"` // Image is a fully-qualified container image Image string `json:"image"` // Port is the port exposed by the inference. Port *int32 `json:"port,omitempty"` // HTTPProbePath is the path of the http probe. HTTPProbePath *string `json:"http_probe_path,omitempty"` // Command to run when starting the Command *string `json:"command,omitempty"` // EnvVars can be provided to set environment variables for the inference runtime. EnvVars map[string]string `json:"envVars,omitempty"` // Constraints are the constraints for the inference. Constraints []string `json:"constraints,omitempty"` // Secrets list of secrets to be made available to inference. Secrets []string `json:"secrets,omitempty"` // Labels are key-value pairs that may be attached to the inference. Labels map[string]string `json:"labels,omitempty"` // Annotations are key-value pairs that may be attached to the inference. Annotations map[string]string `json:"annotations,omitempty"` // Resources are the compute resource requirements. Resources *ResourceRequirements `json:"resources,omitempty"` }
type InferenceDeploymentStatus ¶
type InferenceDeploymentStatus struct { Phase Phase `json:"phase,omitempty"` // InvocationCount count of invocations InvocationCount int32 `json:"invocationCount,omitempty"` // Replicas desired within the cluster Replicas int32 `json:"replicas,omitempty"` // AvailableReplicas is the count of replicas ready to receive // invocations as reported by the faas-provider AvailableReplicas int32 `json:"availableReplicas,omitempty"` // CreatedAt is the time read back from the faas backend's // data store for when the function or its container was created. CreatedAt *time.Time `json:"createdAt,omitempty"` // Usage represents CPU and RAM used by all of the // functions' replicas. Divide by AvailableReplicas for an // average value per replica. Usage *InferenceUsage `json:"usage,omitempty"` // EventMessage record human readable message indicating details about the event of deployment. EventMessage string `json:"eventMessage,omitempty"` }
InferenceDeploymentStatus exported for system/inferences endpoint
type InferenceUsage ¶
type InferenceUsage struct { // CPU is the increase in CPU usage since the last measurement // equivalent to Kubernetes' concept of millicores. CPU float64 `json:"cpu,omitempty"` //TotalMemoryBytes is the total memory usage in bytes. TotalMemoryBytes float64 `json:"totalMemoryBytes,omitempty"` GPU float64 `json:"gpu,omitempty"` }
InferenceUsage represents CPU and RAM used by all of the functions' replicas.
CPU is measured in seconds consumed since the last measurement RAM is measured in total bytes consumed
type InstancePhase ¶
type InstancePhase string
const ( InstancePhaseScheduling InstancePhase = "Scheduling" InstancePhasePending InstancePhase = "Pending" InstancePhaseRunning InstancePhase = "Running" InstancePhaseFailed InstancePhase = "Failed" InstancePhaseSucceeded InstancePhase = "Succeeded" InstancePhaseUnknown InstancePhase = "Unknown" InstancePhaseCreating InstancePhase = "Creating" InstancePhaseInitializing InstancePhase = "Initializing" )
type LogRequest ¶
type LogRequest struct { Namespace string `form:"namespace" json:"namespace,omitempty"` Name string `form:"name" json:"name,omitempty"` // Instance is the optional pod name, that allows you to request logs from a specific instance Instance string `form:"instance" json:"instance,omitempty"` // Follow is allows the user to request a stream of logs until the timeout Follow bool `form:"follow" json:"follow,omitempty"` // Tail sets the maximum number of log messages to return, <=0 means unlimited Tail int `form:"tail" json:"tail,omitempty"` Since string `form:"since" json:"since,omitempty"` // End is the end time of the log stream End string `form:"end" json:"end,omitempty"` }
type Message ¶
type Message struct { // Name is the function name Name string `json:"name"` Namespace string `json:"namespace"` // instance is the name/id of the specific function instance Instance string `json:"instance"` // Timestamp is the timestamp of when the log message was recorded Timestamp time.Time `json:"timestamp"` // Text is the raw log message content Text string `json:"text"` }
Message is a specific log message from a function container log stream
type NamespaceRequest ¶
type NamespaceRequest struct {
Name string `json:"name,omitempty"`
}
type NodeSystemInfo ¶
type NodeSystemInfo struct { // MachineID reported by the node. For unique machine identification // in the cluster this field is preferred. Learn more from man(5) // machine-id: http://man7.org/linux/man-pages/man5/machine-id.5.html MachineID string `json:"machineID" protobuf:"bytes,1,opt,name=machineID"` // Kernel Version reported by the node from 'uname -r' (e.g. 3.16.0-0.bpo.4-amd64). KernelVersion string `json:"kernelVersion" protobuf:"bytes,4,opt,name=kernelVersion"` // OS Image reported by the node from /etc/os-release (e.g. Debian GNU/Linux 7 (wheezy)). OSImage string `json:"osImage" protobuf:"bytes,5,opt,name=osImage"` // The Operating System reported by the node OperatingSystem string `json:"operatingSystem" protobuf:"bytes,9,opt,name=operatingSystem"` // The Architecture reported by the node Architecture string `json:"architecture" protobuf:"bytes,10,opt,name=architecture"` }
NodeSystemInfo is a set of ids/uuids to uniquely identify the node.
type Phase ¶
type Phase string
const ( // PhaseReady is the state of an inference when it is ready to // receive invocations. PhaseReady Phase = "Ready" // PhaseScaling is the state of an inference when scales. PhaseScaling Phase = "Scaling" PhaseTerminating Phase = "Terminating" PhaseNoReplicas Phase = "NoReplicas" PhaseNotReady Phase = "NotReady" PhaseBuilding Phase = "Building" )
type ProviderInfo ¶
type ProviderInfo struct { Name string `json:"provider"` Version *VersionInfo `json:"version"` Orchestration string `json:"orchestration"` }
ProviderInfo provides information about the configured provider
type QueueRequest ¶
type QueueRequest struct { // Header from HTTP request Header http.Header // Host from HTTP request Host string // Body from HTTP request to use for invocation Body []byte // Method from HTTP request Method string // Path from HTTP request Path string // QueryString from HTTP request QueryString string // Function name to invoke Function string // QueueName to publish the request to, leave blank // for default. QueueName string // Used by queue worker to submit a result CallbackURL *url.URL `json:"CallbackUrl"` }
Request for asynchronous processing
type RequestQueuer ¶
type RequestQueuer interface {
Queue(req *QueueRequest) error
}
RequestQueuer can public a request to be executed asynchronously
type ResourceList ¶
type ResourceList map[ResourceName]Quantity
ResourceList is a set of (resource name, quantity) pairs.
type ResourceName ¶
type ResourceName string
const ( ResourceCPU ResourceName = "cpu" ResourceMemory ResourceName = "memory" ResourceGPU ResourceName = "gpu" )
type ResourceRequirements ¶
type ResourceRequirements struct { // Limits describes the maximum amount of compute resources allowed. // More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ // +optional Limits ResourceList `json:"limits,omitempty" protobuf:"bytes,1,rep,name=limits,casttype=ResourceList,castkey=ResourceName"` // Requests describes the minimum amount of compute resources required. // If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, // otherwise to an implementation-defined value. // More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ // +optional Requests ResourceList `json:"requests,omitempty" protobuf:"bytes,2,rep,name=requests,casttype=ResourceList,castkey=ResourceName"` }
ResourceRequirements describes the compute resource requirements.
type ScaleServiceRequest ¶
type ScaleServiceRequest struct { ServiceName string `json:"serviceName"` Replicas uint64 `json:"replicas"` EventMessage string `json:"eventMessage"` }
ScaleServiceRequest scales the service to the requested replcia count.
type ScalingConfig ¶
type ScalingConfig struct { // MinReplicas is the lower limit for the number of replicas to which the // autoscaler can scale down. It defaults to 0. MinReplicas *int32 `json:"min_replicas,omitempty"` // MaxReplicas is the upper limit for the number of replicas to which the // autoscaler can scale up. It cannot be less that minReplicas. It defaults // to 1. MaxReplicas *int32 `json:"max_replicas,omitempty"` // TargetLoad is the target load. In capacity mode, it is the expected number of the inflight requests per replica. TargetLoad *int32 `json:"target_load,omitempty"` // Type is the scaling type. It can be either "capacity" or "rps". Default is "capacity". Type *ScalingType `json:"type,omitempty"` // ZeroDuration is the duration (in seconds) of zero load before scaling down to zero. Default is 5 minutes. ZeroDuration *int32 `json:"zero_duration,omitempty"` // StartupDuration is the duration (in seconds) of startup time. StartupDuration *int32 `json:"startup_duration,omitempty"` }
type ScalingType ¶
type ScalingType string
type Secret ¶
type Secret struct { // Name of the secret Name string `json:"name"` // Namespace if applicable for the secret Namespace string `json:"namespace,omitempty"` // Value is a string representing the string's value Value string `json:"value,omitempty"` // RawValue can be used to provide binary data when // Value is not set RawValue []byte `json:"rawValue,omitempty"` }
Secret for underlying orchestrator
type Server ¶
type Server struct { Spec ServerSpec `json:"spec,omitempty"` Status ServerStatus `json:"status,omitempty"` }
type ServerSpec ¶
type ServerStatus ¶
type ServerStatus struct { Allocatable ResourceList `json:"allocatable,omitempty"` Capacity ResourceList `json:"capacity,omitempty"` Phase string `json:"phase,omitempty"` System NodeSystemInfo `json:"system,omitempty"` }
type VersionInfo ¶
type VersionInfo struct { Version string `json:"version,omitempty"` BuildDate string `json:"build_date,omitempty"` GitCommit string `json:"git_commit,omitempty"` GitTag string `json:"git_tag,omitempty"` GitTreeState string `json:"git_tree_state,omitempty"` GoVersion string `json:"go_version,omitempty"` Compiler string `json:"compiler,omitempty"` Platform string `json:"platform,omitempty"` }
VersionInfo provides the commit message, sha and release version number