Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type CacheProfile ¶ added in v0.9.0
type CacheProfile struct {
}type CacheSharedFilesystem ¶ added in v0.9.0
type CacheSharedFilesystem struct { string `json:"storageClassName,omitempty" validate:"required_without=PersistentVolumeName"` // This is usually used if you have an existing filesystem that you want to use. PersistentVolumeName string `json:"persistentVolumeName,omitempty" validate:"required_without=StorageClassName"` }StorageClassName
type LeaderElection ¶ added in v0.9.0
type LeaderElection struct { // LeaseDuration is the duration that non-leader candidates will // wait to force acquire leadership. This is measured against time of // last observed ack. // // A client needs to wait a full LeaseDuration without observing a change to // the record before it can attempt to take over. When all clients are // shutdown and a new set of clients are started with different names against // the same leader record, they must wait the full LeaseDuration before // attempting to acquire the lease. Thus LeaseDuration should be as short as // possible (within your tolerance for clock skew rate) to avoid a possible // long waits in the scenario. // // Defaults to 15 seconds. LeaseDuration Duration `json:"leaseDuration"` // RenewDeadline is the duration that the acting master will retry // refreshing leadership before giving up. // // Defaults to 10 seconds. RenewDeadline Duration `json:"renewDeadline"` // RetryPeriod is the duration the LeaderElector clients should wait // between tries of actions. // // Defaults to 2 seconds. RetryPeriod Duration `json:"retryPeriod"` }
type MessageStream ¶
type Messaging ¶ added in v0.5.2
type Messaging struct { // ErrorMaxBackoff is the maximum backoff time that will be applied when // consecutive errors are encountered. ErrorMaxBackoff Duration `json:"errorMaxBackoff"` Streams []MessageStream `json:"streams"` }
type ModelAutoscaling ¶ added in v0.6.0
type ModelAutoscaling struct { // Interval is the time between each autoscaling check. // Defaults to 10 seconds. Interval Duration `json:"interval" validate:"required"` // TimeWindow that the autoscaling algorithm will consider when // calculating the average number of requests. // Defaults to 10 minutes. TimeWindow Duration `json:"timeWindow" validate:"required"` // StateConfigMapName is the name of the ConfigMap that will be used // to store the state of the autoscaler. This ConfigMap ensures that // the autoscaler can recover from crashes and restarts without losing // its state. // Required. StateConfigMapName string `json:"stateConfigMapName" validate:"required"` }
func (*ModelAutoscaling) AverageWindowCount ¶ added in v0.6.0
func (a *ModelAutoscaling) AverageWindowCount() int
AverageWindowCount returns the number of intervals that will be considered when calculating the average value.
func (*ModelAutoscaling) RequiredConsecutiveScaleDowns ¶ added in v0.6.0
func (a *ModelAutoscaling) RequiredConsecutiveScaleDowns(scaleDownDelaySeconds int64) int
RequiredConsecutiveScaleDowns returns the number of consecutive scale down operations required before the deployment is scaled down. This is calculated by dividing the ScaleDownDelay by the Interval.
type ModelLoading ¶ added in v0.11.0
type ModelLoading struct {
Image string `json:"image" validate:"required"`
}
type ModelRollouts ¶ added in v0.7.0
type ModelRollouts struct { // Surge is the number of additional Pods to create when rolling out an update. Surge int32 `json:"surge"` }
type ModelServer ¶ added in v0.4.3
type ModelServerPods ¶ added in v0.7.0
type ModelServerPods struct { // The service account to use for all model pods ModelServiceAccountName string `json:"serviceAccountName,omitempty"` // Security Context for the model pods ModelPodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"` // Security Context for the model pod containers ModelContainerSecurityContext *corev1.SecurityContext `json:"securityContext,omitempty"` }
type ModelServers ¶
type ModelServers struct { OLlama ModelServer `json:"OLlama"` VLLM ModelServer `json:"VLLM"` FasterWhisper ModelServer `json:"FasterWhisper"` Infinity ModelServer `json:"Infinity"` }
type ResourceProfile ¶
type ResourceProfile struct { ImageName string `json:"imageName"` Requests corev1.ResourceList `json:"requests,omitempty"` Limits corev1.ResourceList `json:"limits,omitempty"` NodeSelector map[string]string `json:"nodeSelector,omitempty"` Affinity *corev1.Affinity `json:"affinity,omitempty"` Tolerations []corev1.Toleration `json:"tolerations,omitempty"` RuntimeClassName *string `json:"runtimeClassName,omitempty"` }
type SecretNames ¶ added in v0.5.2
type System ¶
type System struct { SecretNames SecretNames `json:"secretNames" validate:"required"` ModelServers ModelServers `json:"modelServers" validate:"required"` ModelLoading ModelLoading `json:"modelLoading" validate:"required"` ResourceProfiles map[string]ResourceProfile `json:"resourceProfiles" validate:"required"` CacheProfiles map[string]CacheProfile `json:"cacheProfiles"` Messaging Messaging `json:"messaging"` // MetricsAddr is the address the metric endpoint binds to. // Defaults to ":8080" MetricsAddr string `json:"metricsAddr" validate:"required"` // HealthAddr is the address the health probe endpoint binds to. // Defaults to ":8081" HealthAddress string `json:"healthAddress" validate:"required"` ModelAutoscaling ModelAutoscaling `json:"modelAutoscaling" validate:"required"` ModelServerPods ModelServerPods `json:"modelServerPods,omitempty"` ModelRollouts ModelRollouts `json:"modelRollouts"` LeaderElection LeaderElection `json:"leaderElection"` // AllowPodAddressOverride will allow the pod address to be overridden by the Model objects. Useful for development purposes. AllowPodAddressOverride bool `json:"allowPodAddressOverride"` // FixedSelfMetricAddrs is a list of fixed addresses to be used when scraping metrics for autoscaling. Useful for development purposes. FixedSelfMetricAddrs []string `json:"fixedSelfMetricAddrs,omitempty"` }
func (*System) DefaultAndValidate ¶ added in v0.5.2
Click to show internal directories.
Click to hide internal directories.