types

package

v0.0.0-...-37f5ccb Latest Latest Go to latest Published: Aug 11, 2023 License: Apache-2.0 Imports: 3 Imported by: 3

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/tensorchord/openmodelz

Documentation ¶

Index ¶

Constants
type Build
type BuildPhase
type BuildSource
type BuildSpec
type BuildStatus
type BuilderType
type DeleteFunctionRequest
type ErrorResponse
type Framework
type GitRepositorySource
type InferenceDeployment
type InferenceDeploymentInstance
type InferenceDeploymentInstanceSpec
type InferenceDeploymentInstanceStatus
type InferenceDeploymentSpec
type InferenceDeploymentStatus
type InferenceUsage
type InstancePhase
type LogRequest
type Message
type NamespaceRequest
type NodeSystemInfo
type Phase
type ProviderInfo
type Quantity
type QueueRequest
type RequestQueuer
type ResourceList
type ResourceName
type ResourceRequirements
type ScaleServiceRequest
type ScalingConfig
type ScalingType
type Secret
type Server
type ServerSpec
type ServerStatus
type VersionInfo

Constants ¶

View Source

const (
	DeploymentCreateEvent    = "deployment-create"
	DeploymentUpdateEvent    = "deployment-update"
	DeploymentDeleteEvent    = "deployment-delete"
	DeploymentScaleUpEvent   = "deployment-scale-up"
	DeploymentScaleDownEvent = "deployment-scale-down"
)

View Source

const (
	ScalingTypeCapacity = "capacity"
	ScalingTypeRPS      = "rps"
)

View Source

const (
	LabelNamespace = "modelz.tensorchord.ai/namespace"
)

View Source

const (
	RuntimeClassNvidia string = "nvidia"
)

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type Build ¶

type Build struct {
	Spec   BuildSpec   `json:"spec"`
	Status BuildStatus `json:"status,omitempty"`
}

type BuildPhase ¶

type BuildPhase string

const (
	BuildPhasePending   BuildPhase = "Pending"
	BuildPhaseRunning   BuildPhase = "Running"
	BuildPhaseSucceeded BuildPhase = "Succeeded"
	BuildPhaseFailed    BuildPhase = "Failed"
)

type BuildSource ¶

type BuildSource struct {
	// directory is the target directory name.
	// Must not contain or start with '..'.  If '.' is supplied, the volume directory will be the
	// git repository.  Otherwise, if specified, the volume will contain the git repository in
	// the subdirectory with the given name.
	// +optional
	Directory string `json:"directory,omitempty"`

	Builder          BuilderType `json:"builder,omitempty"`
	ArtifactImage    string      `json:"image,omitempty"`
	ArtifactImageTag string      `json:"image_tag,omitempty"`

	Duration string `json:"duration,omitempty"`
}

type BuildSpec ¶

type BuildSpec struct {
	Name                string `json:"name,omitempty"`
	ProjectID           string `json:"project_id,omitempty"`
	Namespace           string `json:"namespace,omitempty"`
	GitRepositorySource `json:",inline,omitempty"`
	BuildSource         `json:",inline,omitempty"`
}

type BuildStatus ¶

type BuildStatus struct {
	Image string     `json:"image,omitempty"`
	Phase BuildPhase `json:"phase,omitempty"`
}

type BuilderType ¶

type BuilderType string

const (
	BuilderTypeDockerfile BuilderType = "Dockerfile"
	BuilderTypeENVD       BuilderType = "envd"
)

type DeleteFunctionRequest ¶

type DeleteFunctionRequest struct {
	FunctionName string `json:"functionName"`
}

DeleteFunctionRequest delete a deployed function

type ErrorResponse ¶

type ErrorResponse struct {
	Message string `json:"message"`
}

type Framework ¶

type Framework string

Framework is the inference framework. It is only used to set the default port and command. For example, if the framework is "gradio", the default port is 7860 and the default command is "python app.py". You could override these defaults by setting the port and command fields and framework to `other`.

const (
	FrameworkGradio    Framework = "gradio"
	FrameworkStreamlit Framework = "streamlit"
	FrameworkMosec     Framework = "mosec"
	FrameworkOther     Framework = "other"
)

type GitRepositorySource ¶

type GitRepositorySource struct {
	// repository is the URL
	Repository string `json:"repository"`
	Branch     string `json:"branch,omitempty"`
	// revision is the commit hash for the specified revision.
	// +optional
	Revision string `json:"revision,omitempty"`
}

type InferenceDeployment ¶

type InferenceDeployment struct {
	Spec   InferenceDeploymentSpec   `json:"spec"`
	Status InferenceDeploymentStatus `json:"status,omitempty"`
}

InferenceDeployment represents a request to create or update a Model.

type InferenceDeploymentInstance ¶

type InferenceDeploymentInstance struct {
	Spec   InferenceDeploymentInstanceSpec   `json:"spec,omitempty"`
	Status InferenceDeploymentInstanceStatus `json:"status,omitempty"`
}

type InferenceDeploymentInstanceSpec ¶

type InferenceDeploymentInstanceSpec struct {
	Namespace      string `json:"namespace,omitempty"`
	Name           string `json:"name,omitempty"`
	OwnerReference string `json:"owner_reference,omitempty"`
}

type InferenceDeploymentInstanceStatus ¶

type InferenceDeploymentInstanceStatus struct {
	Phase     InstancePhase `json:"phase,omitempty"`
	StartTime time.Time     `json:"createdAt,omitempty"`
	Reason    string        `json:"reason,omitempty"`
	Message   string        `json:"message,omitempty"`
}

type InferenceDeploymentSpec ¶

type InferenceDeploymentSpec struct {
	// Name is the name of the inference.
	Name string `json:"name"`

	// Namespace for the inference.
	Namespace string `json:"namespace,omitempty"`

	// Scaling is the scaling configuration for the inference.
	Scaling *ScalingConfig `json:"scaling,omitempty"`

	// Framework is the inference framework.
	Framework Framework `json:"framework,omitempty"`

	// Image is a fully-qualified container image
	Image string `json:"image"`

	// Port is the port exposed by the inference.
	Port *int32 `json:"port,omitempty"`

	// HTTPProbePath is the path of the http probe.
	HTTPProbePath *string `json:"http_probe_path,omitempty"`

	// Command to run when starting the
	Command *string `json:"command,omitempty"`

	// EnvVars can be provided to set environment variables for the inference runtime.
	EnvVars map[string]string `json:"envVars,omitempty"`

	// Constraints are the constraints for the inference.
	Constraints []string `json:"constraints,omitempty"`

	// Secrets list of secrets to be made available to inference.
	Secrets []string `json:"secrets,omitempty"`

	// Labels are key-value pairs that may be attached to the inference.
	Labels map[string]string `json:"labels,omitempty"`

	// Annotations are key-value pairs that may be attached to the inference.
	Annotations map[string]string `json:"annotations,omitempty"`

	// Resources are the compute resource requirements.
	Resources *ResourceRequirements `json:"resources,omitempty"`
}

type InferenceDeploymentStatus ¶

type InferenceDeploymentStatus struct {
	Phase Phase `json:"phase,omitempty"`

	// InvocationCount count of invocations
	InvocationCount int32 `json:"invocationCount,omitempty"`

	// Replicas desired within the cluster
	Replicas int32 `json:"replicas,omitempty"`

	// AvailableReplicas is the count of replicas ready to receive
	// invocations as reported by the faas-provider
	AvailableReplicas int32 `json:"availableReplicas,omitempty"`

	// CreatedAt is the time read back from the faas backend's
	// data store for when the function or its container was created.
	CreatedAt *time.Time `json:"createdAt,omitempty"`

	// Usage represents CPU and RAM used by all of the
	// functions' replicas. Divide by AvailableReplicas for an
	// average value per replica.
	Usage *InferenceUsage `json:"usage,omitempty"`

	// EventMessage record human readable message indicating details about the event of deployment.
	EventMessage string `json:"eventMessage,omitempty"`
}

InferenceDeploymentStatus exported for system/inferences endpoint

type InferenceUsage ¶

type InferenceUsage struct {
	// CPU is the increase in CPU usage since the last measurement
	// equivalent to Kubernetes' concept of millicores.
	CPU float64 `json:"cpu,omitempty"`

	//TotalMemoryBytes is the total memory usage in bytes.
	TotalMemoryBytes float64 `json:"totalMemoryBytes,omitempty"`

	GPU float64 `json:"gpu,omitempty"`
}

InferenceUsage represents CPU and RAM used by all of the functions' replicas.

CPU is measured in seconds consumed since the last measurement RAM is measured in total bytes consumed

type InstancePhase ¶

type InstancePhase string

const (
	InstancePhaseScheduling   InstancePhase = "Scheduling"
	InstancePhasePending      InstancePhase = "Pending"
	InstancePhaseRunning      InstancePhase = "Running"
	InstancePhaseFailed       InstancePhase = "Failed"
	InstancePhaseSucceeded    InstancePhase = "Succeeded"
	InstancePhaseUnknown      InstancePhase = "Unknown"
	InstancePhaseCreating     InstancePhase = "Creating"
	InstancePhaseInitializing InstancePhase = "Initializing"
)

type LogRequest ¶

type LogRequest struct {
	Namespace string `form:"namespace" json:"namespace,omitempty"`
	Name      string `form:"name" json:"name,omitempty"`
	// Instance is the optional pod name, that allows you to request logs from a specific instance
	Instance string `form:"instance" json:"instance,omitempty"`
	// Follow is allows the user to request a stream of logs until the timeout
	Follow bool `form:"follow" json:"follow,omitempty"`
	// Tail sets the maximum number of log messages to return, <=0 means unlimited
	Tail  int    `form:"tail" json:"tail,omitempty"`
	Since string `form:"since" json:"since,omitempty"`
	// End is the end time of the log stream
	End string `form:"end" json:"end,omitempty"`
}

type Message ¶

type Message struct {
	// Name is the function name
	Name      string `json:"name"`
	Namespace string `json:"namespace"`
	// instance is the name/id of the specific function instance
	Instance string `json:"instance"`
	// Timestamp is the timestamp of when the log message was recorded
	Timestamp time.Time `json:"timestamp"`
	// Text is the raw log message content
	Text string `json:"text"`
}

Message is a specific log message from a function container log stream

type NamespaceRequest ¶

type NamespaceRequest struct {
	Name string `json:"name,omitempty"`
}

type NodeSystemInfo ¶

type NodeSystemInfo struct {
	// MachineID reported by the node. For unique machine identification
	// in the cluster this field is preferred. Learn more from man(5)
	// machine-id: http://man7.org/linux/man-pages/man5/machine-id.5.html
	MachineID string `json:"machineID" protobuf:"bytes,1,opt,name=machineID"`
	// Kernel Version reported by the node from 'uname -r' (e.g. 3.16.0-0.bpo.4-amd64).
	KernelVersion string `json:"kernelVersion" protobuf:"bytes,4,opt,name=kernelVersion"`
	// OS Image reported by the node from /etc/os-release (e.g. Debian GNU/Linux 7 (wheezy)).
	OSImage string `json:"osImage" protobuf:"bytes,5,opt,name=osImage"`
	// The Operating System reported by the node
	OperatingSystem string `json:"operatingSystem" protobuf:"bytes,9,opt,name=operatingSystem"`
	// The Architecture reported by the node
	Architecture string `json:"architecture" protobuf:"bytes,10,opt,name=architecture"`
}

NodeSystemInfo is a set of ids/uuids to uniquely identify the node.

type Phase ¶

type Phase string

const (
	// PhaseReady is the state of an inference when it is ready to
	// receive invocations.
	PhaseReady Phase = "Ready"

	// PhaseScaling is the state of an inference when scales.
	PhaseScaling Phase = "Scaling"

	PhaseTerminating Phase = "Terminating"

	PhaseNoReplicas Phase = "NoReplicas"

	PhaseNotReady Phase = "NotReady"

	PhaseBuilding Phase = "Building"
)

type ProviderInfo ¶

type ProviderInfo struct {
	Name          string       `json:"provider"`
	Version       *VersionInfo `json:"version"`
	Orchestration string       `json:"orchestration"`
}

ProviderInfo provides information about the configured provider

type Quantity ¶

type Quantity string

type QueueRequest ¶

type QueueRequest struct {
	// Header from HTTP request
	Header http.Header

	// Host from HTTP request
	Host string

	// Body from HTTP request to use for invocation
	Body []byte

	// Method from HTTP request
	Method string

	// Path from HTTP request
	Path string

	// QueryString from HTTP request
	QueryString string

	// Function name to invoke
	Function string

	// QueueName to publish the request to, leave blank
	// for default.
	QueueName string

	// Used by queue worker to submit a result
	CallbackURL *url.URL `json:"CallbackUrl"`
}

Request for asynchronous processing

type RequestQueuer ¶

type RequestQueuer interface {
	Queue(req *QueueRequest) error
}

RequestQueuer can public a request to be executed asynchronously

type ResourceList ¶

type ResourceList map[ResourceName]Quantity

ResourceList is a set of (resource name, quantity) pairs.

type ResourceName ¶

type ResourceName string

const (
	ResourceCPU    ResourceName = "cpu"
	ResourceMemory ResourceName = "memory"
	ResourceGPU    ResourceName = "gpu"
)

type ResourceRequirements ¶

type ResourceRequirements struct {
	// Limits describes the maximum amount of compute resources allowed.
	// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
	// +optional
	Limits ResourceList `json:"limits,omitempty" protobuf:"bytes,1,rep,name=limits,casttype=ResourceList,castkey=ResourceName"`
	// Requests describes the minimum amount of compute resources required.
	// If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
	// otherwise to an implementation-defined value.
	// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
	// +optional
	Requests ResourceList `json:"requests,omitempty" protobuf:"bytes,2,rep,name=requests,casttype=ResourceList,castkey=ResourceName"`
}

ResourceRequirements describes the compute resource requirements.

type ScaleServiceRequest ¶

type ScaleServiceRequest struct {
	ServiceName  string `json:"serviceName"`
	Replicas     uint64 `json:"replicas"`
	EventMessage string `json:"eventMessage"`
}

ScaleServiceRequest scales the service to the requested replcia count.

type ScalingConfig ¶

type ScalingConfig struct {
	// MinReplicas is the lower limit for the number of replicas to which the
	// autoscaler can scale down. It defaults to 0.
	MinReplicas *int32 `json:"min_replicas,omitempty"`
	// MaxReplicas is the upper limit for the number of replicas to which the
	// autoscaler can scale up. It cannot be less that minReplicas. It defaults
	// to 1.
	MaxReplicas *int32 `json:"max_replicas,omitempty"`
	// TargetLoad is the target load. In capacity mode, it is the expected number of the inflight requests per replica.
	TargetLoad *int32 `json:"target_load,omitempty"`
	// Type is the scaling type. It can be either "capacity" or "rps". Default is "capacity".
	Type *ScalingType `json:"type,omitempty"`
	// ZeroDuration is the duration (in seconds) of zero load before scaling down to zero. Default is 5 minutes.
	ZeroDuration *int32 `json:"zero_duration,omitempty"`
	// StartupDuration is the duration (in seconds) of startup time.
	StartupDuration *int32 `json:"startup_duration,omitempty"`
}

type ScalingType ¶

type ScalingType string

type Secret ¶

type Secret struct {
	// Name of the secret
	Name string `json:"name"`

	// Namespace if applicable for the secret
	Namespace string `json:"namespace,omitempty"`

	// Value is a string representing the string's value
	Value string `json:"value,omitempty"`

	// RawValue can be used to provide binary data when
	// Value is not set
	RawValue []byte `json:"rawValue,omitempty"`
}

Secret for underlying orchestrator

type Server ¶

type Server struct {
	Spec   ServerSpec   `json:"spec,omitempty"`
	Status ServerStatus `json:"status,omitempty"`
}

type ServerSpec ¶

type ServerSpec struct {
	Name   string            `json:"name,omitempty"`
	Labels map[string]string `json:"labels,omitempty"`
}

type ServerStatus ¶

type ServerStatus struct {
	Allocatable ResourceList   `json:"allocatable,omitempty"`
	Capacity    ResourceList   `json:"capacity,omitempty"`
	Phase       string         `json:"phase,omitempty"`
	System      NodeSystemInfo `json:"system,omitempty"`
}

type VersionInfo ¶

type VersionInfo struct {
	Version      string `json:"version,omitempty"`
	BuildDate    string `json:"build_date,omitempty"`
	GitCommit    string `json:"git_commit,omitempty"`
	GitTag       string `json:"git_tag,omitempty"`
	GitTreeState string `json:"git_tree_state,omitempty"`
	GoVersion    string `json:"go_version,omitempty"`
	Compiler     string `json:"compiler,omitempty"`
	Platform     string `json:"platform,omitempty"`
}

VersionInfo provides the commit message, sha and release version number

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL