Documentation ¶
Index ¶
- func AddAllocationAcceleratorData(ctx context.Context, accData model.AcceleratorData) error
- type AllocationExited
- type AllocationService
- type AllocationSignal
- type AllocationState
- type ErrAllocationUnfulfilled
- type ErrAlreadyCancelled
- type ErrBehaviorDisabled
- type ErrBehaviorUnsupported
- type ErrNoAllocation
- type ErrStaleContainer
- type ErrStaleResources
- type ErrStaleResourcesReceived
- type ErrTimeoutExceeded
- type RendezvousInfoOrError
- type RendezvousWatcher
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func AddAllocationAcceleratorData ¶
func AddAllocationAcceleratorData(ctx context.Context, accData model.AcceleratorData, ) error
AddAllocationAcceleratorData stores acceleration data for an allocation.
Types ¶
type AllocationExited ¶
type AllocationExited struct { // userRequestedStop is when a container unexpectedly exits with 0. UserRequestedStop bool Err error FinalState AllocationState }
AllocationExited summarizes the exit status of an allocation.
func (*AllocationExited) String ¶
func (a *AllocationExited) String() string
type AllocationService ¶
type AllocationService interface { GetAllAllocationIDs() []model.AllocationID StartAllocation( logCtx logger.Context, req sproto.AllocateRequest, db db.DB, rm rm.ResourceManager, specifier tasks.TaskSpecifier, onExit func(*AllocationExited), ) error AwaitTermination(id model.AllocationID) Signal( id model.AllocationID, sig AllocationSignal, reason string, ) error State(id model.AllocationID) (AllocationState, error) SetReady(ctx context.Context, id model.AllocationID) error SetWaiting(ctx context.Context, id model.AllocationID) error SetProxyAddress( ctx context.Context, id model.AllocationID, addr string, ) error GetAllocation( ctx context.Context, allocallocationID string, ) (*model.Allocation, error) SetAcceleratorData( ctx context.Context, accData model.AcceleratorData, ) error WatchRendezvous( ctx context.Context, id model.AllocationID, rID sproto.ResourcesID, ) (*trialv1.RendezvousInfo, error) SetResourcesAsDaemon( ctx context.Context, id model.AllocationID, rID sproto.ResourcesID, ) error AllGather( ctx context.Context, allocationID model.AllocationID, id uuid.UUID, numPeers int, data any, ) ([]any, error) WatchPreemption(ctx context.Context, id model.AllocationID) (bool, error) AckPreemption(ctx context.Context, id model.AllocationID) error SendLog( ctx context.Context, id model.AllocationID, log *sproto.ContainerLog, ) }
AllocationService allows callers to launch, direct and query allocations.
var DefaultService AllocationService = newAllocationService()
DefaultService is the singleton default allocationService.
type AllocationSignal ¶
type AllocationSignal string
AllocationSignal is an interface for signals that can be sent to an allocation.
const ( // KillAllocation is the signal to kill an allocation; analogous to SIGKILL. KillAllocation AllocationSignal = "kill" // TerminateAllocation is the signal to kill an allocation; analogous to SIGTERM. TerminateAllocation AllocationSignal = "terminate" )
type AllocationState ¶
type AllocationState struct { State model.AllocationState Resources map[sproto.ResourcesID]sproto.ResourcesSummary Ready bool Addresses map[sproto.ResourcesID][]cproto.Address Containers map[sproto.ResourcesID][]cproto.Container }
AllocationState requests allocation state. A copy is filled and returned.
func (AllocationState) SingleContainer ¶
func (a AllocationState) SingleContainer() *cproto.Container
SingleContainer returns a single random container from the allocation state.
func (AllocationState) SingleContainerAddresses ¶
func (a AllocationState) SingleContainerAddresses() []cproto.Address
SingleContainerAddresses returns a single random container's addresses from the allocation state.
type ErrAllocationUnfulfilled ¶
type ErrAllocationUnfulfilled struct {
Action string
}
ErrAllocationUnfulfilled is returned an operation is tried without an active allocation.
func (ErrAllocationUnfulfilled) Error ¶
func (e ErrAllocationUnfulfilled) Error() string
type ErrAlreadyCancelled ¶
type ErrAlreadyCancelled struct{}
ErrAlreadyCancelled is returned to the allocation when it tries to take an action but has an unread cancellation in its inbox.
func (ErrAlreadyCancelled) Error ¶
func (e ErrAlreadyCancelled) Error() string
type ErrBehaviorDisabled ¶
type ErrBehaviorDisabled struct {
Behavior string
}
ErrBehaviorDisabled is returned an operation is tried without the behavior being enabled.
func (ErrBehaviorDisabled) Error ¶
func (e ErrBehaviorDisabled) Error() string
type ErrBehaviorUnsupported ¶
type ErrBehaviorUnsupported struct {
Behavior string
}
ErrBehaviorUnsupported is returned an operation is tried without the behavior being supported.
func (ErrBehaviorUnsupported) Error ¶
func (e ErrBehaviorUnsupported) Error() string
type ErrNoAllocation ¶
type ErrNoAllocation struct {
Action string
}
ErrNoAllocation is returned an operation is tried without a requested allocation.
func (ErrNoAllocation) Error ¶
func (e ErrNoAllocation) Error() string
type ErrStaleContainer ¶
ErrStaleContainer is returned when an operation was attempted by a stale container.
func (ErrStaleContainer) Error ¶
func (e ErrStaleContainer) Error() string
type ErrStaleResources ¶
type ErrStaleResources struct {
ID sproto.ResourcesID
}
ErrStaleResources is returned when an operation was attempted by a stale resources.
func (ErrStaleResources) Error ¶
func (e ErrStaleResources) Error() string
type ErrStaleResourcesReceived ¶
type ErrStaleResourcesReceived struct{}
ErrStaleResourcesReceived is returned the scheduler gives an allocation resources between when it requests them and it deciding, for some reason or another, they are not needed.
func (ErrStaleResourcesReceived) Error ¶
func (e ErrStaleResourcesReceived) Error() string
type ErrTimeoutExceeded ¶
type ErrTimeoutExceeded struct {
Message string
}
ErrTimeoutExceeded is return, with a bit of detail, when a timeout is exceeded.
func (ErrTimeoutExceeded) Error ¶
func (e ErrTimeoutExceeded) Error() string
type RendezvousInfoOrError ¶
type RendezvousInfoOrError struct { Info *trialv1.RendezvousInfo Err error }
RendezvousInfoOrError contains either rendezvous info or an error from failing to materialize it.
type RendezvousWatcher ¶
type RendezvousWatcher struct {
C <-chan RendezvousInfoOrError
}
RendezvousWatcher contains a channel which can be polled for rendezvous info.