spec

package
v0.0.0-...-c377703 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 14, 2017 License: Apache-2.0 Imports: 9 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// CRDKind k8s crd kind
	CRDKind = "MxJob"
	// CRDKindPlural k8s crd Plural
	CRDKindPlural = "mxjobs"
	// CRDGroup k8s crd group
	CRDGroup = "mxnet.mlkube.io"
	// CRDVersion k8s crd version
	CRDVersion = "v1beta1"
	// CRDApiVersion k8s crd api version
	CRDApiVersion = CRDGroup + "/" + CRDVersion // "mlkube.io/v1beta1"

	// AppLabel Value of the APP label that gets applied to a lot of entities.
	AppLabel = "mxnet-job"

	// PsRootPort Defaults for the Spec
	PsRootPort = 9091
	// Replicas Defaults for the Spec
	Replicas = 1
)
View Source
const (
	// MxJobPhaseNone job phase none
	MxJobPhaseNone MxJobPhase = ""
	// MxJobPhaseCreating job phase creating
	MxJobPhaseCreating = "Creating"
	// MxJobPhaseRunning job phase running
	MxJobPhaseRunning = "Running"
	// MxJobPhaseCleanUp job phase cleanup
	MxJobPhaseCleanUp = "CleanUp"
	// MxJobPhaseFailed job phase failed
	MxJobPhaseFailed = "Failed"
	// MxJobPhaseDone job phase done
	MxJobPhaseDone = "Done"
)
View Source
const (
	MxJobConditionReady = "Ready"

	MxJobConditionRemovingDeadMember = "RemovingDeadMember"

	MxJobConditionRecovering = "Recovering"

	MxJobConditionScalingUp   = "ScalingUp"
	MxJobConditionScalingDown = "ScalingDown"

	MxJobConditionUpgrading = "Upgrading"
)

TODO(jlewi): Need to define appropriate conditions and get rid of the ones we don't need.

View Source
const (
	// ReplicaStateUnknown replica state unknown
	ReplicaStateUnknown ReplicaState = "Unknown"
	// ReplicaStateStarting replica state starting
	ReplicaStateStarting = "Starting"
	// ReplicaStateRunning replica state running
	ReplicaStateRunning = "Running"
	// ReplicaStateFailed replica state failed
	ReplicaStateFailed = "Failed"
	// ReplicaStateSucceeded replica state succeeded
	ReplicaStateSucceeded = "Succeeded"
)

Variables

View Source
var (
	// SchemeBuilder for
	SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)
	// AddToScheme for
	AddToScheme = SchemeBuilder.AddToScheme
)
View Source
var SchemeGroupVersion = schema.GroupVersion{Group: CRDGroup, Version: CRDVersion}

SchemeGroupVersion is the group version used to register these objects.

Functions

func CRDName

func CRDName() string

CRDName return crd name

func Resource

func Resource(resource string) schema.GroupResource

Resource takes an unqualified resource and returns a Group-qualified GroupResource.

Types

type AcceleratorConfig

type AcceleratorConfig struct {
	Volumes []AcceleratorVolume
	EnvVars []EnvironmentVariableConfig
}

AcceleratorConfig for docker container's volume and enviroment

type AcceleratorVolume

type AcceleratorVolume struct {
	Name      string
	HostPath  string
	MountPath string
}

AcceleratorVolume represents a host path that must be mounted into each container that needs to use GPUs.

type ContainerName

type ContainerName string

ContainerName is an enum for expected containers.

const (
	// MXNET container name for mxnet training job
	MXNET ContainerName = "mxnet"
)

type ControllerConfig

type ControllerConfig struct {
	// Accelerators is a map from the name of the accelerator to the config for that accelerator.
	// This should match the value specified as a container limit.
	// e.g. alpha.kubernetes.io/nvidia-gpu
	Accelerators map[string]AcceleratorConfig
}

ControllerConfig for docker container with GPU accelerator

type EnvironmentVariableConfig

type EnvironmentVariableConfig struct {
	Name  string
	Value string
}

EnvironmentVariableConfig for container

type JobMode

type JobMode string

JobMode mxnet job mode

const (
	// LocalJob job kind local
	LocalJob JobMode = "local"
	// DistJob job kind distribution
	DistJob JobMode = "dist"
)

type MxJob

type MxJob struct {
	metav1.TypeMeta `json:",inline"`
	Metadata        metav1.ObjectMeta `json:"metadata,omitempty"`
	Spec            MxJobSpec         `json:"spec"`
	Status          MxJobStatus       `json:"status"`
}

MxJob mxnet job

func (*MxJob) AsOwner

func (j *MxJob) AsOwner() metav1.OwnerReference

AsOwner return owner reference

func (*MxJob) Key

func (j *MxJob) Key() string

Key is an unique key for MxJob to store in maps

func (*MxJob) UnmarshalJSON

func (c *MxJob) UnmarshalJSON(data []byte) error

UnmarshalJSON for MxJob

type MxJobCondition

type MxJobCondition struct {
	Type MxJobConditionType `json:"type,omitempty"`

	Reason string `json:"reason,omitempty"`

	TransitionTime string `json:"transitionTime,omitempty"`
}

MxJobCondition mxnet job condition

type MxJobConditionType

type MxJobConditionType string

MxJobConditionType mxnet job condition type

type MxJobCopy

type MxJobCopy MxJob

MxJobCopy for MxJob

type MxJobList

type MxJobList struct {
	metav1.TypeMeta `json:",inline"`
	// Standard list metadata
	// More info: http://releases.k8s.io/HEAD/docs/devel/api-conventions.md#metadata
	Metadata metav1.ListMeta `json:"metadata,omitempty"`
	// Items is a list of third party objects
	Items []MxJob `json:"items"`
}

MxJobList is a list of etcd clusters.

func (*MxJobList) UnmarshalJSON

func (cl *MxJobList) UnmarshalJSON(data []byte) error

UnmarshalJSON for MxJobList

type MxJobListCopy

type MxJobListCopy MxJobList

MxJobListCopy for MxJobList

type MxJobPhase

type MxJobPhase string

MxJobPhase mxnet job phase

type MxJobSpec

type MxJobSpec struct {

	// RuntimeId job id
	RuntimeId string

	// JobMode MXNet training job mode: local, dist
	JobMode `json:"jobMode"`

	// ReplicaSpecs specifies the Mx replicas to run.
	ReplicaSpecs []*MxReplicaSpec `json:"replicaSpecs"`
}

MxJobSpec mxnet job specification

func (*MxJobSpec) Cleanup

func (c *MxJobSpec) Cleanup()

Cleanup cleans up user passed spec, e.g. defaulting, transforming fields. TODO: move this to admission controller

func (*MxJobSpec) ConfigureAccelerators

func (c *MxJobSpec) ConfigureAccelerators(accelerators map[string]AcceleratorConfig) error

ConfigureAccelerators adds any accelerator specific configuration to the pods.

func (*MxJobSpec) SetDefaults

func (c *MxJobSpec) SetDefaults() error

SetDefaults sets any unspecified values to defaults

func (*MxJobSpec) Validate

func (c *MxJobSpec) Validate() error

Validate checks that the MxJobSpec is valid.

type MxJobStatus

type MxJobStatus struct {
	// Phase is the MxJob running phase
	Phase  MxJobPhase `json:"phase,omitempty"`
	Reason string     `json:"reason,omitempty"`

	// ControlPuased indicates the operator pauses the control of the cluster.
	// TODO(jlewi): I think we can get rid of ControlPaued.
	ControlPaused bool `json:"controlPaused"`

	// Condition keeps ten most recent cluster conditions
	Conditions []MxJobCondition `json:"conditions,omitempty"`

	// State indicates the state of the job.
	State State `json:"state,omitempty"`

	// ReplicaStatuses specifies the status of each Mx replica.
	ReplicaStatuses []*MxReplicaStatus `json:"replicaStatuses"`
}

MxJobStatus mxnet job status

func (*MxJobStatus) AppendRecoveringCondition

func (cs *MxJobStatus) AppendRecoveringCondition()

AppendRecoveringCondition for mxnet job status

func (*MxJobStatus) AppendRemovingDeadMember

func (cs *MxJobStatus) AppendRemovingDeadMember(name string)

AppendRemovingDeadMember for mxnet job status

func (*MxJobStatus) AppendScalingDownCondition

func (cs *MxJobStatus) AppendScalingDownCondition(from, to int)

AppendScalingDownCondition for mxnet job status

func (*MxJobStatus) AppendUpgradingCondition

func (cs *MxJobStatus) AppendUpgradingCondition(to string, member string)

AppendUpgradingCondition for mxnet job status

func (*MxJobStatus) Control

func (cs *MxJobStatus) Control()

Control set cs ControlPaused = false

func (MxJobStatus) Copy

func (cs MxJobStatus) Copy() MxJobStatus

Copy mxnet job status

func (*MxJobStatus) IsFailed

func (cs *MxJobStatus) IsFailed() bool

IsFailed return true if job status failed

func (*MxJobStatus) PauseControl

func (cs *MxJobStatus) PauseControl()

PauseControl set cs ControlPaused = true

func (*MxJobStatus) SetPhase

func (cs *MxJobStatus) SetPhase(p MxJobPhase)

SetPhase set up mxnet job status phase

func (*MxJobStatus) SetReadyCondition

func (cs *MxJobStatus) SetReadyCondition()

SetReadyCondition for mxnet job status

func (*MxJobStatus) SetReason

func (cs *MxJobStatus) SetReason(r string)

SetReason for mxnet job status

func (*MxJobStatus) SetState

func (cs *MxJobStatus) SetState(s State)

SetState for mxnet job status

type MxReplicaSpec

type MxReplicaSpec struct {
	// Replicas is the number of desired replicas.
	// This is a pointer to distinguish between explicit zero and unspecified.
	// Defaults to 1.
	// More info: http://kubernetes.io/docs/user-guide/replication-controller#what-is-a-replication-controller
	// +optional
	Replicas *int32              `json:"replicas,omitempty" protobuf:"varint,1,opt,name=replicas"`
	Template *v1.PodTemplateSpec `json:"template,omitempty" protobuf:"bytes,3,opt,name=template"`
	// Root_PS_Port is the port to use for scheduler.
	PsRootPort    *int32 `json:"PsRootPort,omitempty" protobuf:"varint,1,opt,name=PsRootPort"`
	MxReplicaType `json:"mxReplicaType"`
}

MxReplicaSpec mxnet replica specification

type MxReplicaStatus

type MxReplicaStatus struct {
	MxReplicaType `json:"Mx_replica_type"`
	// State is the overall state of the replica
	State ReplicaState `json:"state"`

	// ReplicasStates provides the number of replicas in each status.
	ReplicasStates map[ReplicaState]int
}

MxReplicaStatus mxnet replica status

type MxReplicaType

type MxReplicaType string

MxReplicaType determines how a set of Mx processes are handled.

const (
	// SCHEDULER mxnet training job replica type
	SCHEDULER MxReplicaType = "SCHEDULER"
	// SERVER mxnet training job replica type
	SERVER MxReplicaType = "SERVER"
	// WORKER mxnet training job replica type
	WORKER MxReplicaType = "WORKER"
)

type ReplicaState

type ReplicaState string

ReplicaState mxnet job replica state

type State

type State string

State mxnet job state

const (
	// StateUnknown state unknown
	StateUnknown State = "Unknown"
	// StateRunning state running
	StateRunning State = "Running"
	// StateSucceeded state succeeded
	StateSucceeded State = "Succeeded"
	// StateFailed state failed
	StateFailed State = "Failed"
)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL