config

package
v0.8.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 1, 2024 License: Apache-2.0 Imports: 6 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Duration

type Duration struct {
	time.Duration
}

func (Duration) MarshalJSON

func (d Duration) MarshalJSON() ([]byte, error)

func (*Duration) UnmarshalJSON

func (d *Duration) UnmarshalJSON(b []byte) error

type MessageStream

type MessageStream struct {
	RequestsURL  string `json:"requestsURL"`
	ResponsesURL string `json:"responsesURL"`
	// MaxHandlers is the maximum number of handlers that will be started for this stream.
	// Must be greater than 0. Defaults to 1.
	MaxHandlers int `json:"maxHandlers" validate:"min=1"`
}

type Messaging added in v0.5.2

type Messaging struct {
	// ErrorMaxBackoff is the maximum backoff time that will be applied when
	// consecutive errors are encountered.
	ErrorMaxBackoff Duration        `json:"errorMaxBackoff"`
	Streams         []MessageStream `json:"streams"`
}

type ModelAutoscaling added in v0.6.0

type ModelAutoscaling struct {
	// Interval is the time between each autoscaling check.
	// Defaults to 10 seconds.
	Interval Duration `json:"interval" validate:"required"`
	// TimeWindow that the autoscaling algorithm will consider when
	// calculating the average number of requests.
	// Defaults to 10 minutes.
	TimeWindow Duration `json:"timeWindow" validate:"required"`
}

func (*ModelAutoscaling) AverageWindowCount added in v0.6.0

func (a *ModelAutoscaling) AverageWindowCount() int

AverageWindowCount returns the number of intervals that will be considered when calculating the average value.

func (*ModelAutoscaling) RequiredConsecutiveScaleDowns added in v0.6.0

func (a *ModelAutoscaling) RequiredConsecutiveScaleDowns(scaleDownDelaySeconds int64) int

RequiredConsecutiveScaleDowns returns the number of consecutive scale down operations required before the deployment is scaled down. This is calculated by dividing the ScaleDownDelay by the Interval.

type ModelRollouts added in v0.7.0

type ModelRollouts struct {
	// Surge is the number of additional Pods to create when rolling out an update.
	Surge int32 `json:"surge"`
}

type ModelServer added in v0.4.3

type ModelServer struct {
	Images map[string]string `json:"images"`
}

type ModelServerPods added in v0.7.0

type ModelServerPods struct {
	// The service account to use for all model pods
	ModelServiceAccountName string `json:"serviceAccountName,omitempty"`

	// Security Context for the model pods
	ModelPodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"`

	// Security Context for the model pod containers
	ModelContainerSecurityContext *corev1.SecurityContext `json:"securityContext,omitempty"`
}

type ModelServers

type ModelServers struct {
	OLlama        ModelServer `json:"OLlama"`
	VLLM          ModelServer `json:"VLLM"`
	FasterWhisper ModelServer `json:"FasterWhisper"`
	Infinity      ModelServer `json:"Infinity"`
}

type ResourceProfile

type ResourceProfile struct {
	ImageName        string              `json:"imageName"`
	Requests         corev1.ResourceList `json:"requests,omitempty"`
	Limits           corev1.ResourceList `json:"limits,omitempty"`
	NodeSelector     map[string]string   `json:"nodeSelector,omitempty"`
	Affinity         *corev1.Affinity    `json:"affinity,omitempty"`
	Tolerations      []corev1.Toleration `json:"tolerations,omitempty"`
	RuntimeClassName *string             `json:"runtimeClassName,omitempty"`
}

type SecretNames added in v0.5.2

type SecretNames struct {
	Huggingface string `json:"huggingface" validate:"required"`
}

type System

type System struct {
	SecretNames SecretNames `json:"secretNames" validate:"required"`

	ModelServers ModelServers `json:"modelServers" validate:"required"`

	ResourceProfiles map[string]ResourceProfile `json:"resourceProfiles" validate:"required"`

	Messaging Messaging `json:"messaging"`

	// MetricsAddr is the address the metric endpoint binds to.
	// Defaults to ":8080"
	MetricsAddr string `json:"metricsAddr" validate:"required"`

	// HealthAddr is the address the health probe endpoint binds to.
	// Defaults to ":8081"
	HealthAddress string `json:"healthAddress" validate:"required"`

	// AllowPodAddressOverride will allow the pod address to be overridden by the Model objects. This is useful for development purposes.
	AllowPodAddressOverride bool `json:"allowPodAddressOverride"`

	ModelAutoscaling ModelAutoscaling `json:"modelAutoscaling" validate:"required"`

	ModelServerPods ModelServerPods `json:"modelServerPods,omitempty"`

	ModelRollouts ModelRollouts `json:"modelRollouts"`
}

func (*System) DefaultAndValidate added in v0.5.2

func (s *System) DefaultAndValidate() error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL