Documentation ¶
Index ¶
- Constants
- func TemplateRegister(ctx context.Context, mgmt *config.Management) error
- func VersionRegister(ctx context.Context, mgmt *config.Management) error
- type AutoScalingConfig
- type DeploymentConfig
- type EngineConfig
- type GenerationConfig
- type MirrorConfig
- type PromptFormat
- type RayActorOptions
- type RayLLMModelConfig
- type ScalingConfig
- type TemplateHandler
- func (h *TemplateHandler) AssignVersion(_ string, tpv *mlv1.ModelTemplateVersion) (*mlv1.ModelTemplateVersion, error)
- func (h *TemplateHandler) DeleteLatestVersion(_ string, tp *mlv1.ModelTemplate) (*mlv1.ModelTemplate, error)
- func (h *TemplateHandler) SetDefaultVersion(_ string, tp *mlv1.ModelTemplate) (*mlv1.ModelTemplate, error)
- func (h *TemplateHandler) SyncLatestVersion(_ string, tp *mlv1.ModelTemplate) (*mlv1.ModelTemplate, error)
- type TemplateVersionHandler
Constants ¶
View Source
const (
MaxConcurrentRatio = 40
)
Variables ¶
This section is empty.
Functions ¶
func TemplateRegister ¶
func TemplateRegister(ctx context.Context, mgmt *config.Management) error
func VersionRegister ¶
func VersionRegister(ctx context.Context, mgmt *config.Management) error
Types ¶
type AutoScalingConfig ¶
type AutoScalingConfig struct { MinReplicas int32 `yaml:"min_replicas"` MaxReplicas int32 `yaml:"max_replicas"` InitialReplicas int32 `yaml:"initial_replicas"` TargetNumOngoingRequestsPerReplica int32 `yaml:"target_num_ongoing_requests_per_replica"` MetricsIntervalS float32 `yaml:"metrics_interval_s"` LookBackPeriodS float32 `yaml:"look_back_period_s"` SmoothingFactor float32 `yaml:"smoothing_factor"` DownscaleDelayS float32 `yaml:"downscale_delay_s"` UpscaleDelayS float32 `yaml:"upscale_delay_s"` }
type DeploymentConfig ¶
type DeploymentConfig struct { AutoScalingConfig AutoScalingConfig `yaml:"auto_scaling_config"` MaxConcurrentQueries int32 `yaml:"max_concurrent_queries"` RayActorOptions RayActorOptions `yaml:"ray_actor_options"` }
type EngineConfig ¶
type EngineConfig struct { ModelID string `yaml:"model_id"` HFModelID string `yaml:"hf_model_id,omitempty"` S3MirrorConfig MirrorConfig `yaml:"s3_mirror_config,omitempty"` GCSMirrorConfig MirrorConfig `yaml:"gcs_mirror_config,omitempty"` Type string `yaml:"type"` EngineKwargs map[string]interface{} `yaml:"engine_kwargs"` MaxTotalTokens int32 `yaml:"max_total_tokens"` Generation GenerationConfig `yaml:"generation"` }
type GenerationConfig ¶
type GenerationConfig struct { PromptFormat PromptFormat `yaml:"prompt_format"` StoppingSequences []string `yaml:"stopping_sequences"` }
type MirrorConfig ¶
type MirrorConfig struct {
BucketURI string `yaml:"bucket_uri,omitempty"`
}
type PromptFormat ¶
type PromptFormat struct { System string `yaml:"system"` Assistant string `yaml:"assistant"` TrailingAssistant string `yaml:"trailing_assistant"` User string `yaml:"user"` DefaultSystemMessage string `yaml:"default_system_message"` SystemInUser bool `yaml:"system_in_user"` AddSystemTagsEvenIfMessageIsEmpty bool `yaml:"add_system_tags_even_if_message_is_empty"` StripWhitespace bool `yaml:"strip_whitespace"` }
type RayActorOptions ¶
type RayLLMModelConfig ¶
type RayLLMModelConfig struct { DeploymentConfig DeploymentConfig `yaml:"deployment_config"` EngineConfig EngineConfig `yaml:"engine_config"` ScalingConfig ScalingConfig `yaml:"scaling_config"` }
type ScalingConfig ¶
type ScalingConfig struct { NumWorkers int32 `yaml:"num_workers"` NumGPUsPerWorker int32 `yaml:"num_gpus_per_worker"` NumCPUsPerWorker int32 `yaml:"num_cpus_per_worker"` PlacementStrategy string `yaml:"placement_strategy,omitempty"` ResourcesPerWorker map[string]string `yaml:"resources_per_worker,omitempty"` }
type TemplateHandler ¶
type TemplateHandler struct {
// contains filtered or unexported fields
}
func (*TemplateHandler) AssignVersion ¶
func (h *TemplateHandler) AssignVersion(_ string, tpv *mlv1.ModelTemplateVersion) (*mlv1.ModelTemplateVersion, error)
AssignVersion assigns a version number to the template version
func (*TemplateHandler) DeleteLatestVersion ¶
func (h *TemplateHandler) DeleteLatestVersion(_ string, tp *mlv1.ModelTemplate) (*mlv1.ModelTemplate, error)
func (*TemplateHandler) SetDefaultVersion ¶
func (h *TemplateHandler) SetDefaultVersion(_ string, tp *mlv1.ModelTemplate) (*mlv1.ModelTemplate, error)
SetDefaultVersion sets the default version for the template
func (*TemplateHandler) SyncLatestVersion ¶
func (h *TemplateHandler) SyncLatestVersion(_ string, tp *mlv1.ModelTemplate) (*mlv1.ModelTemplate, error)
SyncLatestVersion syncs the latest version from memory to the template CR
type TemplateVersionHandler ¶
type TemplateVersionHandler struct {
// contains filtered or unexported fields
}
func (*TemplateVersionHandler) ConfigModelDeployment ¶
func (h *TemplateVersionHandler) ConfigModelDeployment(_ string, tv *mlv1.ModelTemplateVersion) (*mlv1.ModelTemplateVersion, error)
func (*TemplateVersionHandler) OnChange ¶
func (h *TemplateVersionHandler) OnChange(_ string, tv *mlv1.ModelTemplateVersion) (*mlv1.ModelTemplateVersion, error)
Click to show internal directories.
Click to hide internal directories.