model

package

v0.4.0 Latest Latest Go to latest Published: Dec 5, 2024 License: MIT Imports: 2 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/kaito-project/kaito

Links

Open Source Insights

Documentation ¶

Overview ¶

Copyright (c) Microsoft Corporation. Licensed under the MIT license.

Index ¶

type HuggingfaceTransformersParam
- func (h *HuggingfaceTransformersParam) DeepCopy() HuggingfaceTransformersParam
type Model
type PresetParam
- func (p *PresetParam) DeepCopy() *PresetParam
- func (p *PresetParam) GetInferenceCommand(runtime RuntimeName, skuNumGPUs string) []string
type RuntimeName
type RuntimeParam
- func (rp *RuntimeParam) DeepCopy() RuntimeParam
type VLLMParam
- func (v *VLLMParam) DeepCopy() VLLMParam

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type HuggingfaceTransformersParam ¶

type HuggingfaceTransformersParam struct {
	BaseCommand        string            // The initial command (e.g., 'torchrun', 'accelerate launch') used in the command line.
	TorchRunParams     map[string]string // Parameters for configuring the torchrun command.
	TorchRunRdzvParams map[string]string // Optional rendezvous parameters for distributed training/inference using torchrun (elastic).
	InferenceMainFile  string            // The main file for inference.
	ModelRunParams     map[string]string // Parameters for running the model training/inference.
}

func (*HuggingfaceTransformersParam) DeepCopy ¶

func (h *HuggingfaceTransformersParam) DeepCopy() HuggingfaceTransformersParam

type Model ¶

type Model interface {
	GetInferenceParameters() *PresetParam
	GetTuningParameters() *PresetParam
	SupportDistributedInference() bool //If true, the model workload will be a StatefulSet, using the torch elastic runtime framework.
	SupportTuning() bool
}

type PresetParam ¶

type PresetParam struct {
	Tag             string // The model image tag
	ModelFamilyName string // The name of the model family.
	ImageAccessMode string // Defines where the Image is Public or Private.

	DiskStorageRequirement        string         // Disk storage requirements for the model.
	GPUCountRequirement           string         // Number of GPUs required for the Preset. Used for inference.
	TotalGPUMemoryRequirement     string         // Total GPU memory required for the Preset. Used for inference.
	PerGPUMemoryRequirement       string         // GPU memory required per GPU. Used for inference.
	TuningPerGPUMemoryRequirement map[string]int // Min GPU memory per tuning method (batch size 1). Used for tuning.
	WorldSize                     int            // Defines the number of processes required for distributed inference.

	RuntimeParam

	// ReadinessTimeout defines the maximum duration for creating the workload.
	// This timeout accommodates the size of the image, ensuring pull completion
	// even under slower network conditions or unforeseen delays.
	ReadinessTimeout time.Duration
}

PresetParam defines the preset inference parameters for a model.

func (*PresetParam) DeepCopy ¶

func (p *PresetParam) DeepCopy() *PresetParam

func (*PresetParam) GetInferenceCommand ¶

func (p *PresetParam) GetInferenceCommand(runtime RuntimeName, skuNumGPUs string) []string

builds the container command: eg. torchrun <TORCH_PARAMS> <OPTIONAL_RDZV_PARAMS> baseCommand <MODEL_PARAMS>

type RuntimeName ¶

type RuntimeName string

RuntimeName is LLM runtime name.

const (
	RuntimeNameHuggingfaceTransformers RuntimeName = "transformers"
	RuntimeNameVLLM                    RuntimeName = "vllm"
)

type RuntimeParam ¶

type RuntimeParam struct {
	Transformers HuggingfaceTransformersParam
	VLLM         VLLMParam
	// Disable the tensor parallelism
	DisableTensorParallelism bool
}

RuntimeParam defines the llm runtime parameters.

func (*RuntimeParam) DeepCopy ¶

func (rp *RuntimeParam) DeepCopy() RuntimeParam

type VLLMParam ¶

type VLLMParam struct {
	BaseCommand string
	// The model name used in the openai serving API.
	// see https://platform.openai.com/docs/api-reference/chat/create#chat-create-model.
	ModelName string
	// Parameters for distributed inference.
	DistributionParams map[string]string
	// Parameters for running the model training/inference.
	ModelRunParams map[string]string
}

func (*VLLMParam) DeepCopy ¶

func (v *VLLMParam) DeepCopy() VLLMParam

Source Files ¶

View all Source files

interface.go

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL