runtime

package
v1.13.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 14, 2025 License: Apache-2.0 Imports: 43 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ErrRequestCanceled = errors.New("request is canceled")

ErrRequestCanceled is returned when the request is canceled.

Functions

func ModelDir

func ModelDir() string

ModelDir returns the directory where models are stored.

func PreferredModelFormat

func PreferredModelFormat(runtime string, supportedFormats []mv1.ModelFormat) (mv1.ModelFormat, error)

PreferredModelFormat returns the preferred model format.

Types

type Client

type Client interface {
	GetName(modelID string) string
	GetAddress(name string) string
	DeployRuntime(ctx context.Context, modelID string, update bool) (*appsv1.StatefulSet, error)
}

Client is the interface for managing runtimes.

func NewOllamaClient

func NewOllamaClient(
	k8sClient client.Client,
	namespace string,
	owner *metav1apply.OwnerReferenceApplyConfiguration,
	rconfig *config.RuntimeConfig,
	mconfig *config.ProcessedModelConfig,
	oconfig config.OllamaConfig,
	modelClient modelGetter,
) Client

NewOllamaClient creates a new Ollama runtime client.a

func NewTritonClient added in v0.378.0

func NewTritonClient(
	k8sClient client.Client,
	namespace string,
	owner *metav1apply.OwnerReferenceApplyConfiguration,
	rconfig *config.RuntimeConfig,
	mconfig *config.ProcessedModelConfig,
) Client

NewTritonClient creates a new Triton runtime client.

func NewVLLMClient

func NewVLLMClient(
	k8sClient client.Client,
	namespace string,
	owner *metav1apply.OwnerReferenceApplyConfiguration,
	rconfig *config.RuntimeConfig,
	mconfig *config.ProcessedModelConfig,
	modelClient modelClient,
) Client

NewVLLMClient creates a new VLLM runtime client.

type ClientFactory

type ClientFactory interface {
	New(modelID string) (Client, error)
}

ClientFactory is the interface for creating a new Client given a model ID.

type Manager

type Manager struct {
	// contains filtered or unexported fields
}

Manager manages runtimes.

func NewManager

func NewManager(
	k8sClient client.Client,
	rtClientFactory ClientFactory,
	autoscaler autoscaler.Registerer,
) *Manager

NewManager creates a new runtime manager.

func (*Manager) GetLLMAddress

func (m *Manager) GetLLMAddress(modelID string) (string, error)

GetLLMAddress returns the address of the LLM.

func (*Manager) ListInProgressModels

func (m *Manager) ListInProgressModels() []string

ListInProgressModels returns the list of models that are in progress.

func (*Manager) ListSyncedModelIDs

func (m *Manager) ListSyncedModelIDs() []string

ListSyncedModelIDs returns the list of models that are synced.

func (*Manager) PullModel

func (m *Manager) PullModel(ctx context.Context, modelID string) error

PullModel pulls the model from the model manager.

func (*Manager) Reconcile

func (m *Manager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)

Reconcile reconciles the runtime.

func (*Manager) SetupWithManager

func (m *Manager) SetupWithManager(mgr ctrl.Manager, leaderElection bool) error

SetupWithManager sets up the runtime manager with the given controller manager.

type Preloader

type Preloader struct {
	// contains filtered or unexported fields
}

Preloader preloads models.

func NewPreloader

func NewPreloader(rtManager *Manager, ids []string, modelClient modelGetter) *Preloader

NewPreloader creates a new Preloader.

func (*Preloader) NeedLeaderElection

func (p *Preloader) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*Preloader) SetupWithManager

func (p *Preloader) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the multi-autoscaler with the Manager.

func (*Preloader) Start

func (p *Preloader) Start(ctx context.Context) error

Start starts the multi-autoscaler.

type Updater

type Updater struct {
	// contains filtered or unexported fields
}

Updater updates runtimes at startup.

func NewUpdater

func NewUpdater(namespace string, rtClientFactory ClientFactory) *Updater

NewUpdater creates a new Updater.

func (*Updater) NeedLeaderElection

func (u *Updater) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*Updater) SetupWithManager

func (u *Updater) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the updater with the manager.

func (*Updater) Start

func (u *Updater) Start(ctx context.Context) error

Start starts the updater.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL