config

package

v0.8.0 Latest Latest Go to latest Published: Oct 1, 2024 License: Apache-2.0 Imports: 6 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/substratusai/kubeai

Links

Open Source Insights

Documentation ¶

Index ¶

type Duration
- func (d Duration) MarshalJSON() ([]byte, error)
- func (d *Duration) UnmarshalJSON(b []byte) error
type MessageStream
type Messaging
type ModelAutoscaling
- func (a *ModelAutoscaling) AverageWindowCount() int
- func (a *ModelAutoscaling) RequiredConsecutiveScaleDowns(scaleDownDelaySeconds int64) int
type ModelRollouts
type ModelServer
type ModelServerPods
type ModelServers
type ResourceProfile
type SecretNames
type System
- func (s *System) DefaultAndValidate() error

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type Duration ¶

type Duration struct {
	time.Duration
}

func (Duration) MarshalJSON ¶

func (d Duration) MarshalJSON() ([]byte, error)

func (*Duration) UnmarshalJSON ¶

func (d *Duration) UnmarshalJSON(b []byte) error

type MessageStream ¶

type MessageStream struct {
	RequestsURL  string `json:"requestsURL"`
	ResponsesURL string `json:"responsesURL"`
	// MaxHandlers is the maximum number of handlers that will be started for this stream.
	// Must be greater than 0. Defaults to 1.
	MaxHandlers int `json:"maxHandlers" validate:"min=1"`
}

type Messaging ¶ added in v0.5.2

type Messaging struct {
	// ErrorMaxBackoff is the maximum backoff time that will be applied when
	// consecutive errors are encountered.
	ErrorMaxBackoff Duration        `json:"errorMaxBackoff"`
	Streams         []MessageStream `json:"streams"`
}

type ModelAutoscaling ¶ added in v0.6.0

type ModelAutoscaling struct {
	// Interval is the time between each autoscaling check.
	// Defaults to 10 seconds.
	Interval Duration `json:"interval" validate:"required"`
	// TimeWindow that the autoscaling algorithm will consider when
	// calculating the average number of requests.
	// Defaults to 10 minutes.
	TimeWindow Duration `json:"timeWindow" validate:"required"`
}

func (*ModelAutoscaling) AverageWindowCount ¶ added in v0.6.0

func (a *ModelAutoscaling) AverageWindowCount() int

AverageWindowCount returns the number of intervals that will be considered when calculating the average value.

func (*ModelAutoscaling) RequiredConsecutiveScaleDowns ¶ added in v0.6.0

func (a *ModelAutoscaling) RequiredConsecutiveScaleDowns(scaleDownDelaySeconds int64) int

RequiredConsecutiveScaleDowns returns the number of consecutive scale down operations required before the deployment is scaled down. This is calculated by dividing the ScaleDownDelay by the Interval.

type ModelRollouts ¶ added in v0.7.0

type ModelRollouts struct {
	// Surge is the number of additional Pods to create when rolling out an update.
	Surge int32 `json:"surge"`
}

type ModelServer ¶ added in v0.4.3

type ModelServer struct {
	Images map[string]string `json:"images"`
}

type ModelServerPods ¶ added in v0.7.0

type ModelServerPods struct {
	// The service account to use for all model pods
	ModelServiceAccountName string `json:"serviceAccountName,omitempty"`

	// Security Context for the model pods
	ModelPodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"`

	// Security Context for the model pod containers
	ModelContainerSecurityContext *corev1.SecurityContext `json:"securityContext,omitempty"`
}

type ModelServers ¶

type ModelServers struct {
	OLlama        ModelServer `json:"OLlama"`
	VLLM          ModelServer `json:"VLLM"`
	FasterWhisper ModelServer `json:"FasterWhisper"`
	Infinity      ModelServer `json:"Infinity"`
}

type ResourceProfile ¶

type ResourceProfile struct {
	ImageName        string              `json:"imageName"`
	Requests         corev1.ResourceList `json:"requests,omitempty"`
	Limits           corev1.ResourceList `json:"limits,omitempty"`
	NodeSelector     map[string]string   `json:"nodeSelector,omitempty"`
	Affinity         *corev1.Affinity    `json:"affinity,omitempty"`
	Tolerations      []corev1.Toleration `json:"tolerations,omitempty"`
	RuntimeClassName *string             `json:"runtimeClassName,omitempty"`
}

type SecretNames ¶ added in v0.5.2

type SecretNames struct {
	Huggingface string `json:"huggingface" validate:"required"`
}

type System ¶

type System struct {
	SecretNames SecretNames `json:"secretNames" validate:"required"`

	ModelServers ModelServers `json:"modelServers" validate:"required"`

	ResourceProfiles map[string]ResourceProfile `json:"resourceProfiles" validate:"required"`

	Messaging Messaging `json:"messaging"`

	// MetricsAddr is the address the metric endpoint binds to.
	// Defaults to ":8080"
	MetricsAddr string `json:"metricsAddr" validate:"required"`

	// HealthAddr is the address the health probe endpoint binds to.
	// Defaults to ":8081"
	HealthAddress string `json:"healthAddress" validate:"required"`

	// AllowPodAddressOverride will allow the pod address to be overridden by the Model objects. This is useful for development purposes.
	AllowPodAddressOverride bool `json:"allowPodAddressOverride"`

	ModelAutoscaling ModelAutoscaling `json:"modelAutoscaling" validate:"required"`

	ModelServerPods ModelServerPods `json:"modelServerPods,omitempty"`

	ModelRollouts ModelRollouts `json:"modelRollouts"`
}

func (*System) DefaultAndValidate ¶ added in v0.5.2

func (s *System) DefaultAndValidate() error

Source Files ¶

View all Source files

system.go

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL