task

package
v0.38.0-rc7 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 4, 2024 License: Apache-2.0 Imports: 36 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func AddAllocationAcceleratorData

func AddAllocationAcceleratorData(ctx context.Context, accData model.AcceleratorData,
) error

AddAllocationAcceleratorData stores acceleration data for an allocation.

func InsertNTSCAllocationWorkspaceRecord

func InsertNTSCAllocationWorkspaceRecord(
	ctx context.Context,
	allocationID model.AllocationID,
	workspaceID int,
	workspaceName string,
) error

InsertNTSCAllocationWorkspaceRecord inserts a record linking an NTSC tasks' allocation to it's respective workspace.

func InsertTrialAllocationWorkspaceRecord

func InsertTrialAllocationWorkspaceRecord(
	ctx context.Context,
	experimentID int,
	allocationID model.AllocationID,
) error

InsertTrialAllocationWorkspaceRecord inserts a record linking an trial's allocation to a trial to it's respective workspace & experiment.

Types

type AllocationExited

type AllocationExited struct {
	// userRequestedStop is when a container unexpectedly exits with 0.
	UserRequestedStop bool
	Err               error
	FinalState        AllocationState
}

AllocationExited summarizes the exit status of an allocation.

func (*AllocationExited) String

func (a *AllocationExited) String() string

type AllocationService

type AllocationService interface {
	GetAllAllocationIDs() []model.AllocationID
	StartAllocation(
		logCtx logger.Context,
		req sproto.AllocateRequest,
		db db.DB,
		rm rm.ResourceManager,
		specifier tasks.TaskSpecifier,
		onExit func(*AllocationExited),
	) error
	AwaitTermination(id model.AllocationID)
	Signal(
		id model.AllocationID,
		sig AllocationSignal,
		reason string,
	) error
	State(id model.AllocationID) (AllocationState, error)
	SetReady(ctx context.Context, id model.AllocationID) error
	SetWaiting(ctx context.Context, id model.AllocationID) error
	SetProxyAddress(
		ctx context.Context,
		id model.AllocationID,
		addr string,
	) error
	GetAllocation(
		ctx context.Context,
		allocallocationID string,
	) (*model.Allocation, error)
	SetAcceleratorData(
		ctx context.Context,
		accData model.AcceleratorData,
	) error
	WatchRendezvous(
		ctx context.Context,
		id model.AllocationID,
		rID sproto.ResourcesID,
	) (*trialv1.RendezvousInfo, error)
	SetResourcesAsDaemon(
		ctx context.Context,
		id model.AllocationID,
		rID sproto.ResourcesID,
	) error
	AllGather(
		ctx context.Context,
		allocationID model.AllocationID,
		id uuid.UUID,
		numPeers int,
		data any,
	) ([]any, error)
	WatchPreemption(ctx context.Context, id model.AllocationID) (bool, error)
	AckPreemption(ctx context.Context, id model.AllocationID) error
	SendLog(
		ctx context.Context,
		id model.AllocationID,
		log *sproto.ContainerLog,
	)
	WaitForRestore(ctx context.Context, id model.AllocationID) error
	Detach(id model.AllocationID) error
}

AllocationService allows callers to launch, direct and query allocations.

var DefaultService AllocationService = newAllocationService()

DefaultService is the singleton default allocationService.

type AllocationSignal

type AllocationSignal string

AllocationSignal is an interface for signals that can be sent to an allocation.

const (
	// KillAllocation is the signal to kill an allocation; analogous to SIGKILL.
	KillAllocation AllocationSignal = "kill"
	// TerminateAllocation is the signal to kill an allocation; analogous to SIGTERM.
	TerminateAllocation AllocationSignal = "terminate"
)

type AllocationState

type AllocationState struct {
	State     model.AllocationState
	Resources map[sproto.ResourcesID]sproto.ResourcesSummary
	Ready     bool

	Addresses  map[sproto.ResourcesID][]cproto.Address
	Containers map[sproto.ResourcesID][]cproto.Container
}

AllocationState requests allocation state. A copy is filled and returned.

func (AllocationState) SingleContainer

func (a AllocationState) SingleContainer() *cproto.Container

SingleContainer returns a single random container from the allocation state.

func (AllocationState) SingleContainerAddresses

func (a AllocationState) SingleContainerAddresses() []cproto.Address

SingleContainerAddresses returns a single random container's addresses from the allocation state.

type AllocationUnfulfilledError

type AllocationUnfulfilledError struct {
	Action string
}

AllocationUnfulfilledError is returned an operation is tried without an active allocation.

func (AllocationUnfulfilledError) Error

type AlreadyCancelledError

type AlreadyCancelledError struct{}

AlreadyCancelledError is returned to the allocation when it tries to take an action but has an unread cancellation in its inbox.

func (AlreadyCancelledError) Error

func (e AlreadyCancelledError) Error() string

type BehaviorDisabledError

type BehaviorDisabledError struct {
	Behavior string
}

BehaviorDisabledError is returned an operation is tried without the behavior being enabled.

func (BehaviorDisabledError) Error

func (e BehaviorDisabledError) Error() string

type BehaviorUnsupportedError

type BehaviorUnsupportedError struct {
	Behavior string
}

BehaviorUnsupportedError is returned an operation is tried without the behavior being supported.

func (BehaviorUnsupportedError) Error

func (e BehaviorUnsupportedError) Error() string

type NoAllocationError

type NoAllocationError struct {
	Action string
}

NoAllocationError is returned an operation is tried without a requested allocation.

func (NoAllocationError) Error

func (e NoAllocationError) Error() string

type RendezvousInfoOrError

type RendezvousInfoOrError struct {
	Info *trialv1.RendezvousInfo
	Err  error
}

RendezvousInfoOrError contains either rendezvous info or an error from failing to materialize it.

type RendezvousWatcher

type RendezvousWatcher struct {
	C <-chan RendezvousInfoOrError
}

RendezvousWatcher contains a channel which can be polled for rendezvous info.

type StaleContainerError

type StaleContainerError struct {
	ID cproto.ID
}

StaleContainerError is returned when an operation was attempted by a stale container.

func (StaleContainerError) Error

func (e StaleContainerError) Error() string

type StaleResourcesError

type StaleResourcesError struct {
	ID sproto.ResourcesID
}

StaleResourcesError is returned when an operation was attempted by a stale resources.

func (StaleResourcesError) Error

func (e StaleResourcesError) Error() string

type StaleResourcesReceivedError

type StaleResourcesReceivedError struct{}

StaleResourcesReceivedError is returned the scheduler gives an allocation resources between when it requests them and it deciding, for some reason or another, they are not needed.

func (StaleResourcesReceivedError) Error

type TimeoutExceededError

type TimeoutExceededError struct {
	Message string
}

TimeoutExceededError is return, with a bit of detail, when a timeout is exceeded.

func (TimeoutExceededError) Error

func (e TimeoutExceededError) Error() string

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL