Documentation ¶
Index ¶
- type LocalSchedulerStore
- type MemoryStore
- func (m *MemoryStore) AddServerReplica(request *agent.AgentSubscribeRequest) error
- func (m *MemoryStore) DrainServerReplica(serverName string, replicaIdx int) ([]string, error)
- func (m *MemoryStore) FailedScheduling(modelVersion *ModelVersion, reason string)
- func (m *MemoryStore) GetAllModels() []string
- func (m *MemoryStore) GetModel(key string) (*ModelSnapshot, error)
- func (m *MemoryStore) GetModels() ([]*ModelSnapshot, error)
- func (m *MemoryStore) GetServer(serverKey string, shallow bool, modelDetails bool) (*ServerSnapshot, error)
- func (m *MemoryStore) GetServers(shallow bool, modelDetails bool) ([]*ServerSnapshot, error)
- func (m *MemoryStore) LockModel(modelId string)
- func (m *MemoryStore) RemoveModel(req *pb.UnloadModelRequest) error
- func (m *MemoryStore) RemoveServerReplica(serverName string, replicaIdx int) ([]string, error)
- func (m *MemoryStore) ServerNotify(request *pb.ServerNotifyRequest) error
- func (m *MemoryStore) UnloadVersionModels(modelKey string, version uint32) (bool, error)
- func (m *MemoryStore) UnlockModel(modelId string)
- func (m *MemoryStore) UpdateLoadedModels(modelKey string, version uint32, serverKey string, replicas []*ServerReplica) error
- func (m *MemoryStore) UpdateModel(req *pb.LoadModelRequest) error
- func (m *MemoryStore) UpdateModelState(modelKey string, version uint32, serverKey string, replicaIdx int, ...) error
- type Model
- func (m *Model) GetLastAvailableModelVersion() *ModelVersion
- func (m *Model) GetVersion(version uint32) *ModelVersion
- func (m *Model) GetVersions() []uint32
- func (m *Model) HasLatest() bool
- func (m *Model) Inactive() bool
- func (m *Model) IsDeleted() bool
- func (m *Model) Latest() *ModelVersion
- func (m *Model) Previous() *ModelVersion
- func (m *Model) SetDeleted()
- type ModelEquality
- type ModelReplicaState
- func (m ModelReplicaState) AlreadyLoadingOrLoaded() bool
- func (m ModelReplicaState) CanReceiveTraffic() bool
- func (m ModelReplicaState) Inactive() bool
- func (m ModelReplicaState) IsLoadingOrLoaded() bool
- func (me ModelReplicaState) String() string
- func (m ModelReplicaState) UnloadingOrUnloaded() bool
- type ModelSnapshot
- func (m *ModelSnapshot) CanReceiveTraffic() bool
- func (m *ModelSnapshot) GetLastAvailableModel() *ModelVersion
- func (m *ModelSnapshot) GetLatest() *ModelVersion
- func (m *ModelSnapshot) GetPrevious() *ModelVersion
- func (m *ModelSnapshot) GetVersion(version uint32) *ModelVersion
- func (m *ModelSnapshot) GetVersionsBeforeLastAvailable() []*ModelVersion
- type ModelState
- type ModelStatus
- type ModelStore
- type ModelVersion
- func (m *ModelVersion) DeleteReplica(replicaIdx int)
- func (m *ModelVersion) DesiredReplicas() int
- func (m *ModelVersion) GetAssignment() []int
- func (m *ModelVersion) GetDeploymentSpec() *pb.DeploymentSpec
- func (m *ModelVersion) GetMeta() *pb.MetaData
- func (m *ModelVersion) GetModel() *pb.Model
- func (m *ModelVersion) GetModelReplicaState(replicaIdx int) ModelReplicaState
- func (m *ModelVersion) GetModelSpec() *pb.ModelSpec
- func (m *ModelVersion) GetReplicaForState(state ModelReplicaState) []int
- func (m *ModelVersion) GetRequestedServer() *string
- func (m *ModelVersion) GetRequiredMemory() uint64
- func (m *ModelVersion) GetRequirements() []string
- func (m *ModelVersion) GetVersion() uint32
- func (m *ModelVersion) HasLiveReplicas() bool
- func (m *ModelVersion) HasServer() bool
- func (m *ModelVersion) Inactive() bool
- func (m *ModelVersion) IsLoadingOrLoaded(server string, replicaIdx int) bool
- func (m *ModelVersion) Key() string
- func (m *ModelVersion) ModelState() ModelStatus
- func (m *ModelVersion) ReplicaState() map[int]ReplicaStatus
- func (m *ModelVersion) Server() string
- func (m *ModelVersion) SetDeploymentSpec(spec *pb.DeploymentSpec)
- func (m *ModelVersion) SetModelState(s ModelStatus)
- func (m *ModelVersion) SetReplicaState(replicaIdx int, state ModelReplicaState, reason string)
- func (m *ModelVersion) UpdateKubernetesMeta(meta *pb.KubernetesMeta)
- type ModelVersionID
- type ReplicaStatus
- type Server
- func (s *Server) CreateSnapshot(shallow bool, modelDetails bool) *ServerSnapshot
- func (s *Server) GetAvailableMemory(idx int) uint64
- func (s *Server) GetMemory(idx int) uint64
- func (s *Server) GetReplicaInferenceHttpPort(idx int) int32
- func (s *Server) GetReplicaInferenceSvc(idx int) string
- func (s *Server) Key() string
- func (s *Server) NumReplicas() uint32
- func (s *Server) SetExpectedReplicas(replicas int)
- func (s *Server) SetKubernetesMeta(meta *pb.KubernetesMeta)
- type ServerReplica
- func (s *ServerReplica) GetAvailableMemory() uint64
- func (s *ServerReplica) GetCapabilities() []string
- func (s *ServerReplica) GetInferenceGrpcPort() int32
- func (s *ServerReplica) GetInferenceHttpPort() int32
- func (s *ServerReplica) GetInferenceSvc() string
- func (s *ServerReplica) GetIsDraining() bool
- func (s *ServerReplica) GetLoadedModelVersions() []ModelVersionID
- func (s *ServerReplica) GetMemory() uint64
- func (s *ServerReplica) GetNumLoadedModels() int
- func (s *ServerReplica) GetOverCommitPercentage() uint32
- func (s *ServerReplica) GetReplicaIdx() int
- func (s *ServerReplica) GetReservedMemory() uint64
- func (s *ServerReplica) GetServerName() string
- func (s *ServerReplica) SetIsDraining()
- func (s *ServerReplica) UpdateReservedMemory(memBytes uint64, isAdd bool)
- type ServerSnapshot
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type LocalSchedulerStore ¶
type LocalSchedulerStore struct {
// contains filtered or unexported fields
}
func NewLocalSchedulerStore ¶
func NewLocalSchedulerStore() *LocalSchedulerStore
type MemoryStore ¶
type MemoryStore struct {
// contains filtered or unexported fields
}
func NewMemoryStore ¶
func NewMemoryStore( logger log.FieldLogger, store *LocalSchedulerStore, eventHub *coordinator.EventHub, ) *MemoryStore
func (*MemoryStore) AddServerReplica ¶
func (m *MemoryStore) AddServerReplica(request *agent.AgentSubscribeRequest) error
func (*MemoryStore) DrainServerReplica ¶
func (m *MemoryStore) DrainServerReplica(serverName string, replicaIdx int) ([]string, error)
func (*MemoryStore) FailedScheduling ¶
func (m *MemoryStore) FailedScheduling(modelVersion *ModelVersion, reason string)
func (*MemoryStore) GetAllModels ¶
func (m *MemoryStore) GetAllModels() []string
func (*MemoryStore) GetModel ¶
func (m *MemoryStore) GetModel(key string) (*ModelSnapshot, error)
func (*MemoryStore) GetModels ¶
func (m *MemoryStore) GetModels() ([]*ModelSnapshot, error)
func (*MemoryStore) GetServer ¶
func (m *MemoryStore) GetServer(serverKey string, shallow bool, modelDetails bool) (*ServerSnapshot, error)
func (*MemoryStore) GetServers ¶
func (m *MemoryStore) GetServers(shallow bool, modelDetails bool) ([]*ServerSnapshot, error)
func (*MemoryStore) LockModel ¶
func (m *MemoryStore) LockModel(modelId string)
func (*MemoryStore) RemoveModel ¶
func (m *MemoryStore) RemoveModel(req *pb.UnloadModelRequest) error
func (*MemoryStore) RemoveServerReplica ¶
func (m *MemoryStore) RemoveServerReplica(serverName string, replicaIdx int) ([]string, error)
func (*MemoryStore) ServerNotify ¶
func (m *MemoryStore) ServerNotify(request *pb.ServerNotifyRequest) error
func (*MemoryStore) UnloadVersionModels ¶
func (m *MemoryStore) UnloadVersionModels(modelKey string, version uint32) (bool, error)
func (*MemoryStore) UnlockModel ¶
func (m *MemoryStore) UnlockModel(modelId string)
func (*MemoryStore) UpdateLoadedModels ¶
func (m *MemoryStore) UpdateLoadedModels( modelKey string, version uint32, serverKey string, replicas []*ServerReplica, ) error
func (*MemoryStore) UpdateModel ¶
func (m *MemoryStore) UpdateModel(req *pb.LoadModelRequest) error
func (*MemoryStore) UpdateModelState ¶
func (m *MemoryStore) UpdateModelState( modelKey string, version uint32, serverKey string, replicaIdx int, availableMemory *uint64, expectedState ModelReplicaState, desiredState ModelReplicaState, reason string, ) error
type Model ¶
type Model struct {
// contains filtered or unexported fields
}
func (*Model) GetLastAvailableModelVersion ¶
func (m *Model) GetLastAvailableModelVersion() *ModelVersion
func (*Model) GetVersion ¶
func (m *Model) GetVersion(version uint32) *ModelVersion
func (*Model) GetVersions ¶
func (*Model) Latest ¶
func (m *Model) Latest() *ModelVersion
func (*Model) Previous ¶
func (m *Model) Previous() *ModelVersion
func (*Model) SetDeleted ¶
func (m *Model) SetDeleted()
type ModelEquality ¶
type ModelEquality struct { Equal bool MetaDiffers bool ModelSpecDiffers bool DeploymentSpecDiffers bool }
func ModelEqualityCheck ¶
func ModelEqualityCheck(model1 *pb.Model, model2 *pb.Model) ModelEquality
type ModelReplicaState ¶
type ModelReplicaState uint32
const ( ModelReplicaStateUnknown ModelReplicaState = iota LoadRequested Loading Loaded LoadFailed UnloadEnvoyRequested UnloadRequested Unloading Unloaded UnloadFailed Available Draining )
func (ModelReplicaState) AlreadyLoadingOrLoaded ¶
func (m ModelReplicaState) AlreadyLoadingOrLoaded() bool
func (ModelReplicaState) CanReceiveTraffic ¶
func (m ModelReplicaState) CanReceiveTraffic() bool
LoadedUnavailable is included as we can try to move state to Available via an Envoy update
func (ModelReplicaState) Inactive ¶
func (m ModelReplicaState) Inactive() bool
func (ModelReplicaState) IsLoadingOrLoaded ¶
func (m ModelReplicaState) IsLoadingOrLoaded() bool
func (ModelReplicaState) String ¶
func (me ModelReplicaState) String() string
func (ModelReplicaState) UnloadingOrUnloaded ¶
func (m ModelReplicaState) UnloadingOrUnloaded() bool
type ModelSnapshot ¶
type ModelSnapshot struct { Name string Versions []*ModelVersion Deleted bool }
func (*ModelSnapshot) CanReceiveTraffic ¶
func (m *ModelSnapshot) CanReceiveTraffic() bool
func (*ModelSnapshot) GetLastAvailableModel ¶
func (m *ModelSnapshot) GetLastAvailableModel() *ModelVersion
func (*ModelSnapshot) GetLatest ¶
func (m *ModelSnapshot) GetLatest() *ModelVersion
func (*ModelSnapshot) GetPrevious ¶
func (m *ModelSnapshot) GetPrevious() *ModelVersion
func (*ModelSnapshot) GetVersion ¶
func (m *ModelSnapshot) GetVersion(version uint32) *ModelVersion
func (*ModelSnapshot) GetVersionsBeforeLastAvailable ¶
func (m *ModelSnapshot) GetVersionsBeforeLastAvailable() []*ModelVersion
type ModelState ¶
type ModelState uint32
const ( ModelStateUnknown ModelState = iota ModelProgressing ModelAvailable ModelFailed ModelTerminating ModelTerminated ModelTerminateFailed ScheduleFailed )
func (ModelState) String ¶
func (m ModelState) String() string
type ModelStatus ¶
type ModelStore ¶
type ModelStore interface { UpdateModel(config *pb.LoadModelRequest) error GetModel(key string) (*ModelSnapshot, error) GetModels() ([]*ModelSnapshot, error) LockModel(modelId string) UnlockModel(modelId string) RemoveModel(req *pb.UnloadModelRequest) error GetServers(shallow bool, modelDetails bool) ([]*ServerSnapshot, error) GetServer(serverKey string, shallow bool, modelDetails bool) (*ServerSnapshot, error) UpdateLoadedModels(modelKey string, version uint32, serverKey string, replicas []*ServerReplica) error UnloadVersionModels(modelKey string, version uint32) (bool, error) UpdateModelState(modelKey string, version uint32, serverKey string, replicaIdx int, availableMemory *uint64, expectedState, desiredState ModelReplicaState, reason string) error AddServerReplica(request *pba.AgentSubscribeRequest) error ServerNotify(request *pb.ServerNotifyRequest) error RemoveServerReplica(serverName string, replicaIdx int) ([]string, error) // return previously loaded models DrainServerReplica(serverName string, replicaIdx int) ([]string, error) // return previously loaded models FailedScheduling(modelVersion *ModelVersion, reason string) GetAllModels() []string }
type ModelVersion ¶
type ModelVersion struct {
// contains filtered or unexported fields
}
func NewDefaultModelVersion ¶
func NewDefaultModelVersion(model *pb.Model, version uint32) *ModelVersion
func NewModelVersion ¶
func NewModelVersion(model *pb.Model, version uint32, server string, replicas map[int]ReplicaStatus, deleted bool, state ModelState) *ModelVersion
TODO: remove deleted from here and reflect in callers
func (*ModelVersion) DeleteReplica ¶
func (m *ModelVersion) DeleteReplica(replicaIdx int)
func (*ModelVersion) DesiredReplicas ¶
func (m *ModelVersion) DesiredReplicas() int
func (*ModelVersion) GetAssignment ¶
func (m *ModelVersion) GetAssignment() []int
func (*ModelVersion) GetDeploymentSpec ¶
func (m *ModelVersion) GetDeploymentSpec() *pb.DeploymentSpec
func (*ModelVersion) GetMeta ¶
func (m *ModelVersion) GetMeta() *pb.MetaData
func (*ModelVersion) GetModel ¶
func (m *ModelVersion) GetModel() *pb.Model
func (*ModelVersion) GetModelReplicaState ¶
func (m *ModelVersion) GetModelReplicaState(replicaIdx int) ModelReplicaState
func (*ModelVersion) GetModelSpec ¶
func (m *ModelVersion) GetModelSpec() *pb.ModelSpec
func (*ModelVersion) GetReplicaForState ¶
func (m *ModelVersion) GetReplicaForState(state ModelReplicaState) []int
func (*ModelVersion) GetRequestedServer ¶
func (m *ModelVersion) GetRequestedServer() *string
func (*ModelVersion) GetRequiredMemory ¶
func (m *ModelVersion) GetRequiredMemory() uint64
func (*ModelVersion) GetRequirements ¶
func (m *ModelVersion) GetRequirements() []string
func (*ModelVersion) GetVersion ¶
func (m *ModelVersion) GetVersion() uint32
func (*ModelVersion) HasLiveReplicas ¶
func (m *ModelVersion) HasLiveReplicas() bool
func (*ModelVersion) HasServer ¶
func (m *ModelVersion) HasServer() bool
func (*ModelVersion) Inactive ¶
func (m *ModelVersion) Inactive() bool
func (*ModelVersion) IsLoadingOrLoaded ¶
func (m *ModelVersion) IsLoadingOrLoaded(server string, replicaIdx int) bool
func (*ModelVersion) Key ¶
func (m *ModelVersion) Key() string
func (*ModelVersion) ModelState ¶
func (m *ModelVersion) ModelState() ModelStatus
func (*ModelVersion) ReplicaState ¶
func (m *ModelVersion) ReplicaState() map[int]ReplicaStatus
func (*ModelVersion) Server ¶
func (m *ModelVersion) Server() string
func (*ModelVersion) SetDeploymentSpec ¶
func (m *ModelVersion) SetDeploymentSpec(spec *pb.DeploymentSpec)
func (*ModelVersion) SetModelState ¶
func (m *ModelVersion) SetModelState(s ModelStatus)
note: this is used for testing purposes and should not be called directly in production
func (*ModelVersion) SetReplicaState ¶
func (m *ModelVersion) SetReplicaState(replicaIdx int, state ModelReplicaState, reason string)
func (*ModelVersion) UpdateKubernetesMeta ¶
func (m *ModelVersion) UpdateKubernetesMeta(meta *pb.KubernetesMeta)
type ModelVersionID ¶
func (*ModelVersionID) String ¶
func (mv *ModelVersionID) String() string
type ReplicaStatus ¶
type ReplicaStatus struct { State ModelReplicaState Reason string Timestamp time.Time }
type Server ¶
type Server struct {
// contains filtered or unexported fields
}
func (*Server) CreateSnapshot ¶
func (s *Server) CreateSnapshot(shallow bool, modelDetails bool) *ServerSnapshot
func (*Server) GetAvailableMemory ¶
func (*Server) GetReplicaInferenceHttpPort ¶
func (*Server) GetReplicaInferenceSvc ¶
func (*Server) NumReplicas ¶
func (*Server) SetExpectedReplicas ¶
func (*Server) SetKubernetesMeta ¶
func (s *Server) SetKubernetesMeta(meta *pb.KubernetesMeta)
type ServerReplica ¶
type ServerReplica struct {
// contains filtered or unexported fields
}
func NewServerReplica ¶
func NewServerReplicaFromConfig ¶
func NewServerReplicaFromConfig(server *Server, replicaIdx int, loadedModels map[ModelVersionID]bool, config *pba.ReplicaConfig, availableMemoryBytes uint64) *ServerReplica
func (*ServerReplica) GetAvailableMemory ¶
func (s *ServerReplica) GetAvailableMemory() uint64
func (*ServerReplica) GetCapabilities ¶
func (s *ServerReplica) GetCapabilities() []string
func (*ServerReplica) GetInferenceGrpcPort ¶
func (s *ServerReplica) GetInferenceGrpcPort() int32
func (*ServerReplica) GetInferenceHttpPort ¶
func (s *ServerReplica) GetInferenceHttpPort() int32
func (*ServerReplica) GetInferenceSvc ¶
func (s *ServerReplica) GetInferenceSvc() string
func (*ServerReplica) GetIsDraining ¶
func (s *ServerReplica) GetIsDraining() bool
func (*ServerReplica) GetLoadedModelVersions ¶
func (s *ServerReplica) GetLoadedModelVersions() []ModelVersionID
func (*ServerReplica) GetMemory ¶
func (s *ServerReplica) GetMemory() uint64
func (*ServerReplica) GetNumLoadedModels ¶
func (s *ServerReplica) GetNumLoadedModels() int
func (*ServerReplica) GetOverCommitPercentage ¶
func (s *ServerReplica) GetOverCommitPercentage() uint32
func (*ServerReplica) GetReplicaIdx ¶
func (s *ServerReplica) GetReplicaIdx() int
func (*ServerReplica) GetReservedMemory ¶
func (s *ServerReplica) GetReservedMemory() uint64
func (*ServerReplica) GetServerName ¶
func (s *ServerReplica) GetServerName() string
func (*ServerReplica) SetIsDraining ¶
func (s *ServerReplica) SetIsDraining()
func (*ServerReplica) UpdateReservedMemory ¶
func (s *ServerReplica) UpdateReservedMemory(memBytes uint64, isAdd bool)
type ServerSnapshot ¶
type ServerSnapshot struct { Name string Replicas map[int]*ServerReplica ExpectedReplicas int KubernetesMeta *pb.KubernetesMeta }
func (*ServerSnapshot) String ¶
func (s *ServerSnapshot) String() string
Click to show internal directories.
Click to hide internal directories.