Documentation ¶
Index ¶
- Constants
- Variables
- func MakeMergedDeployment(target *appsv1.Deployment, desired *appsv1.Deployment) *appsv1.Deployment
- func NumberOfGPUs(resource corev1.ResourceList) string
- type Action
- type Device
- type KubeAGIRunner
- type LoaderGit
- type LoaderOSS
- type ModelLoader
- type ModelRunner
- func NewKubeAGIRunner(c client.Client, w *arcadiav1alpha1.Worker, modelFileFromRemote bool) (ModelRunner, error)
- func NewRunnerFastchat(c client.Client, w *arcadiav1alpha1.Worker, modelFileFromRemote bool) (ModelRunner, error)
- func NewRunnerFastchatVLLM(c client.Client, w *arcadiav1alpha1.Worker, modelFileFromRemote bool) (ModelRunner, error)
- type PodWorker
- func (podWorker *PodWorker) AfterStart(ctx context.Context) error
- func (podWorker *PodWorker) BeforeStart(ctx context.Context) error
- func (podWorker *PodWorker) BeforeStop(ctx context.Context) error
- func (podWorker *PodWorker) Model() *arcadiav1alpha1.Model
- func (podWorker *PodWorker) Start(ctx context.Context) error
- func (podWorker *PodWorker) State(ctx context.Context) (any, error)
- func (podWorker *PodWorker) Stop(ctx context.Context) error
- func (podWorker *PodWorker) SuffixedName() string
- func (podWorker *PodWorker) Worker() *arcadiav1alpha1.Worker
- type RDMALoader
- type RunnerFastchat
- type RunnerFastchatVLLM
- type Worker
Constants ¶
const ( WokerCommonSuffix = "-worker" RDMANodeLabel = "arcadia.kubeagi.k8s.com.cn/rdma" )
const ( // Resource ResourceNvidiaGPU corev1.ResourceName = "nvidia.com/gpu" )
Variables ¶
Functions ¶
func MakeMergedDeployment ¶
func MakeMergedDeployment(target *appsv1.Deployment, desired *appsv1.Deployment) *appsv1.Deployment
func NumberOfGPUs ¶
func NumberOfGPUs(resource corev1.ResourceList) string
NumberOfGPUs from ResourceList
Types ¶
type Device ¶
type Device string
Device defines different types like cpu,gpu,xpu,npu which runs the model
func DeviceBasedOnResource ¶
func DeviceBasedOnResource(resource corev1.ResourceList) Device
DeviceBasedOnResource returns the device type based on the resource list
type KubeAGIRunner ¶ added in v0.2.1
type KubeAGIRunner struct {
// contains filtered or unexported fields
}
KubeAGIRunner utilizes core-library-cli(https://github.com/kubeagi/core-library/tree/main/libs/cli) to run model services Mainly for reranking,whisper,etc..
func (*KubeAGIRunner) Build ¶ added in v0.2.1
func (runner *KubeAGIRunner) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
Build a model runner instance
func (*KubeAGIRunner) Device ¶ added in v0.2.1
func (runner *KubeAGIRunner) Device() Device
Device used when running model
func (*KubeAGIRunner) NumberOfGPUs ¶ added in v0.2.1
func (runner *KubeAGIRunner) NumberOfGPUs() string
NumberOfGPUs utilized by this runner
type LoaderGit ¶
type LoaderGit struct{}
LoaderGit defines the way to load model from git
func (*LoaderGit) Build ¶
func (loader *LoaderGit) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
type LoaderOSS ¶
type LoaderOSS struct {
// contains filtered or unexported fields
}
LoaderOSS defines the way to load model from oss
func (*LoaderOSS) Build ¶
func (loader *LoaderOSS) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
Load nothing inner go code
type ModelLoader ¶
type ModelLoader interface {
Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
}
ModelLoader load models for worker
func NewLoaderOSS ¶
func NewLoaderOSS(ctx context.Context, c client.Client, endpoint *arcadiav1alpha1.Endpoint, worker *arcadiav1alpha1.Worker) (ModelLoader, error)
type ModelRunner ¶
type ModelRunner interface { // Device used when running model Device() Device // NumberOfGPUs used when running model NumberOfGPUs() string // Build a model runner instance Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error) }
ModelRunner run a model service
func NewKubeAGIRunner ¶ added in v0.2.1
func NewKubeAGIRunner(c client.Client, w *arcadiav1alpha1.Worker, modelFileFromRemote bool) (ModelRunner, error)
func NewRunnerFastchat ¶
func NewRunnerFastchat(c client.Client, w *arcadiav1alpha1.Worker, modelFileFromRemote bool) (ModelRunner, error)
func NewRunnerFastchatVLLM ¶
func NewRunnerFastchatVLLM(c client.Client, w *arcadiav1alpha1.Worker, modelFileFromRemote bool) (ModelRunner, error)
type PodWorker ¶
type PodWorker struct { // worker's namespacedname types.NamespacedName // contains filtered or unexported fields }
PodWorker hosts this worker in a single pod but with different loader and runner based on Worker's configuration
func NewPodWorker ¶
func NewPodWorker(ctx context.Context, c client.Client, s *runtime.Scheme, w *arcadiav1alpha1.Worker, d *arcadiav1alpha1.Datasource) (*PodWorker, error)
func (*PodWorker) AfterStart ¶
Actions to do after start this worker
func (*PodWorker) BeforeStart ¶
BeforeStart will create resources which are related to this Worker Now we have a pvc(if configured), service, LLM(if a llm model), Embedder(if a embedding model)
func (*PodWorker) BeforeStop ¶
TODO: BeforeStop
func (*PodWorker) Model ¶
func (podWorker *PodWorker) Model() *arcadiav1alpha1.Model
Model that this worker is running for
func (*PodWorker) SuffixedName ¶
func (*PodWorker) Worker ¶
func (podWorker *PodWorker) Worker() *arcadiav1alpha1.Worker
type RDMALoader ¶
type RDMALoader struct {
// contains filtered or unexported fields
}
RDMALoader Support for RDMA. Allow Pod to user hostpath and RDMA to pull models faster and start services
func NewRDMALoader ¶
func NewRDMALoader(c client.Client, modelName, workerUID string, source *arcadiav1alpha1.Datasource, worker *arcadiav1alpha1.Worker) *RDMALoader
func (*RDMALoader) Build ¶
func (r *RDMALoader) Build(ctx context.Context, _ *arcadiav1alpha1.TypedObjectReference) (any, error)
type RunnerFastchat ¶
type RunnerFastchat struct {
// contains filtered or unexported fields
}
RunnerFastchat use fastchat to run a model
func (*RunnerFastchat) Build ¶
func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
Build a runner instance
func (*RunnerFastchat) Device ¶
func (runner *RunnerFastchat) Device() Device
Device utilized by this runner
func (*RunnerFastchat) NumberOfGPUs ¶
func (runner *RunnerFastchat) NumberOfGPUs() string
NumberOfGPUs utilized by this runner
type RunnerFastchatVLLM ¶
type RunnerFastchatVLLM struct {
// contains filtered or unexported fields
}
RunnerFastchatVLLM use fastchat with vllm to run a model
func (*RunnerFastchatVLLM) Build ¶
func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
Build a runner instance
func (*RunnerFastchatVLLM) Device ¶
func (runner *RunnerFastchatVLLM) Device() Device
Device used by this runner
func (*RunnerFastchatVLLM) NumberOfGPUs ¶
func (runner *RunnerFastchatVLLM) NumberOfGPUs() string
NumberOfGPUs utilized by this runner
type Worker ¶
type Worker interface { // Worker that this is for Worker() *arcadiav1alpha1.Worker // Model that this worker is running for Model() *arcadiav1alpha1.Model // Actions to do before start this worker BeforeStart(ctx context.Context) error // Actions to do when Start this worker Start(ctx context.Context) error // Actions to do after start this worker AfterStart(ctx context.Context) error // Actions to do before stop this worker BeforeStop(ctx context.Context) error // Actions to do when Stop this worker Stop(ctx context.Context) error // State of this worker State(context.Context) (any, error) }
Worker implement the lifecycle management of a LLM worker