Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type MessageStream ¶
type Messaging ¶ added in v0.5.2
type Messaging struct { // ErrorMaxBackoff is the maximum backoff time that will be applied when // consecutive errors are encountered. ErrorMaxBackoff Duration `json:"errorMaxBackoff"` Streams []MessageStream `json:"streams"` }
type ModelAutoscaling ¶ added in v0.6.0
type ModelAutoscaling struct { // Interval is the time between each autoscaling check. // Defaults to 10 seconds. Interval Duration `json:"interval" validate:"required"` // TimeWindow that the autoscaling algorithm will consider when // calculating the average number of requests. // Defaults to 10 minutes. TimeWindow Duration `json:"timeWindow" validate:"required"` }
func (*ModelAutoscaling) AverageWindowCount ¶ added in v0.6.0
func (a *ModelAutoscaling) AverageWindowCount() int
AverageWindowCount returns the number of intervals that will be considered when calculating the average value.
func (*ModelAutoscaling) RequiredConsecutiveScaleDowns ¶ added in v0.6.0
func (a *ModelAutoscaling) RequiredConsecutiveScaleDowns(scaleDownDelaySeconds int64) int
RequiredConsecutiveScaleDowns returns the number of consecutive scale down operations required before the deployment is scaled down. This is calculated by dividing the ScaleDownDelay by the Interval.
type ModelRollouts ¶ added in v0.7.0
type ModelRollouts struct { // Surge is the number of additional Pods to create when rolling out an update. Surge int32 `json:"surge"` }
type ModelServer ¶ added in v0.4.3
type ModelServerPods ¶ added in v0.7.0
type ModelServerPods struct { // The service account to use for all model pods ModelServiceAccountName string `json:"serviceAccountName,omitempty"` // Security Context for the model pods ModelPodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"` // Security Context for the model pod containers ModelContainerSecurityContext *corev1.SecurityContext `json:"securityContext,omitempty"` }
type ModelServers ¶
type ModelServers struct { OLlama ModelServer `json:"OLlama"` VLLM ModelServer `json:"VLLM"` FasterWhisper ModelServer `json:"FasterWhisper"` Infinity ModelServer `json:"Infinity"` }
type ResourceProfile ¶
type ResourceProfile struct { ImageName string `json:"imageName"` Requests corev1.ResourceList `json:"requests,omitempty"` Limits corev1.ResourceList `json:"limits,omitempty"` NodeSelector map[string]string `json:"nodeSelector,omitempty"` Affinity *corev1.Affinity `json:"affinity,omitempty"` Tolerations []corev1.Toleration `json:"tolerations,omitempty"` RuntimeClassName *string `json:"runtimeClassName,omitempty"` }
type SecretNames ¶ added in v0.5.2
type SecretNames struct {
Huggingface string `json:"huggingface" validate:"required"`
}
type System ¶
type System struct { SecretNames SecretNames `json:"secretNames" validate:"required"` ModelServers ModelServers `json:"modelServers" validate:"required"` ResourceProfiles map[string]ResourceProfile `json:"resourceProfiles" validate:"required"` Messaging Messaging `json:"messaging"` // MetricsAddr is the address the metric endpoint binds to. // Defaults to ":8080" MetricsAddr string `json:"metricsAddr" validate:"required"` // HealthAddr is the address the health probe endpoint binds to. // Defaults to ":8081" HealthAddress string `json:"healthAddress" validate:"required"` // AllowPodAddressOverride will allow the pod address to be overridden by the Model objects. This is useful for development purposes. AllowPodAddressOverride bool `json:"allowPodAddressOverride"` ModelAutoscaling ModelAutoscaling `json:"modelAutoscaling" validate:"required"` ModelServerPods ModelServerPods `json:"modelServerPods,omitempty"` ModelRollouts ModelRollouts `json:"modelRollouts"` }
func (*System) DefaultAndValidate ¶ added in v0.5.2
Click to show internal directories.
Click to hide internal directories.