commands

package
v0.1.0-rc.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 9, 2018 License: Apache-2.0 Imports: 27 Imported by: 0

Documentation

Index

Constants

View Source
const (
	CHART_PKG_LOC = "CHARTREPO"
	// GPUResourceName is the extended name of the GPU resource since v1.8
	// this uses the device plugin mechanism
	NVIDIAGPUResourceName = "nvidia.com/gpu"

	DeprecatedNVIDIAGPUResourceName = "alpha.kubernetes.io/nvidia-gpu"
)
View Source
const (
	// CLIName is the name of the CLI
	CLIName = "arena"
)

Variables

This section is empty.

Functions

func DeleteTrainingJob

func DeleteTrainingJob(jobName string) error

func GetJobDashboards

func GetJobDashboards(dashboard string, job *v1.Job, pods []corev1.Pod) []string

func NewCommand

func NewCommand() *cobra.Command

NewCommand returns a new instance of an Arena command

func NewCompletionCommand

func NewCompletionCommand() *cobra.Command

func NewDeleteCommand

func NewDeleteCommand() *cobra.Command

func NewGetCommand

func NewGetCommand() *cobra.Command

func NewListCommand

func NewListCommand() *cobra.Command

func NewLogViewerCommand

func NewLogViewerCommand() *cobra.Command

func NewLogsCommand

func NewLogsCommand() *cobra.Command

func NewSubmitCommand

func NewSubmitCommand() *cobra.Command

func NewSubmitHorovodJobCommand

func NewSubmitHorovodJobCommand() *cobra.Command

func NewSubmitStandaloneJobCommand

func NewSubmitStandaloneJobCommand() *cobra.Command

func NewSubmitTFJobCommand

func NewSubmitTFJobCommand() *cobra.Command

func NewTopCommand

func NewTopCommand() *cobra.Command

func NewTopJobCommand

func NewTopJobCommand() *cobra.Command

func NewTopNodeCommand

func NewTopNodeCommand() *cobra.Command

func NewVersionCmd

func NewVersionCmd(cliName string) *cobra.Command

Types

type HorovodJob

type HorovodJob struct {
	*JobInfo
}

Horovod Job Information

func (*HorovodJob) AllPods

func (hj *HorovodJob) AllPods() []v1.Pod

Get all the pods of the Training Job

func (*HorovodJob) ChiefPod

func (hj *HorovodJob) ChiefPod() v1.Pod

Get the chief Pod of the Job.

func (*HorovodJob) GetJobDashboards

func (hj *HorovodJob) GetJobDashboards(client *kubernetes.Clientset) ([]string, error)

Get Dashboard url of the job

func (*HorovodJob) HostIPOfChief

func (hj *HorovodJob) HostIPOfChief() (hostIP string)

Get the hostIP of the chief Pod

type HorovodJobTrainer

type HorovodJobTrainer struct {
	// contains filtered or unexported fields
}

Horovod Job trainer

func (*HorovodJobTrainer) GetTrainingJob

func (m *HorovodJobTrainer) GetTrainingJob(name, namespace string) (tj TrainingJob, err error)

func (*HorovodJobTrainer) IsSupported

func (m *HorovodJobTrainer) IsSupported(name, ns string) bool

check if it's Horovod job

func (*HorovodJobTrainer) Type

func (m *HorovodJobTrainer) Type() string

type JobInfo

type JobInfo struct {
	// contains filtered or unexported fields
}

func (*JobInfo) Age

func (ji *JobInfo) Age() string

func (*JobInfo) AllPods

func (ji *JobInfo) AllPods() []v1.Pod

Get all the pods of the Training Job

func (*JobInfo) AllocatedGPU

func (ji *JobInfo) AllocatedGPU() int64

Requested GPU count of the Job

func (*JobInfo) ChiefPod

func (ji *JobInfo) ChiefPod() v1.Pod

Get the chief Pod of the Job.

func (*JobInfo) GetStatus

func (ji *JobInfo) GetStatus() (status string)

Get the Status of the Job: RUNNING, PENDING, SUCCEEDED, FAILED

func (*JobInfo) HostIPOfChief

func (ji *JobInfo) HostIPOfChief() (hostIP string)

Get the hostIP of the chief Pod

func (*JobInfo) Name

func (ji *JobInfo) Name() string

func (*JobInfo) RequestedGPU

func (ji *JobInfo) RequestedGPU() int64

Requested GPU count of the Job

func (*JobInfo) StartTime

func (ji *JobInfo) StartTime() *metav1.Time

func (*JobInfo) Trainer

func (ji *JobInfo) Trainer() string

type NodeDescriber

type NodeDescriber struct {
	// contains filtered or unexported fields
}

type NodeInfo

type NodeInfo struct {
	// contains filtered or unexported fields
}

type StandaloneJob

type StandaloneJob struct {
	*JobInfo
}

Standalone Job Information

func (*StandaloneJob) GetJobDashboards

func (sj *StandaloneJob) GetJobDashboards(client *kubernetes.Clientset) ([]string, error)

Get Dashboard url of the job

type StandaloneJobTrainer

type StandaloneJobTrainer struct {
	// contains filtered or unexported fields
}

Standalone Job trainer

func (*StandaloneJobTrainer) GetTrainingJob

func (s *StandaloneJobTrainer) GetTrainingJob(name, namespace string) (tj TrainingJob, err error)

func (*StandaloneJobTrainer) IsSupported

func (s *StandaloneJobTrainer) IsSupported(name, ns string) bool

check if it's Standalone job

func (*StandaloneJobTrainer) Type

func (s *StandaloneJobTrainer) Type() string

type TensorFlowJob

type TensorFlowJob struct {
	// contains filtered or unexported fields
}

TensorFlow Job Information

func (*TensorFlowJob) Age

func (tj *TensorFlowJob) Age() string

Get the Job Age

func (*TensorFlowJob) AllPods

func (tj *TensorFlowJob) AllPods() []v1.Pod

Get all the pods of the Training Job

func (*TensorFlowJob) AllocatedGPU

func (tj *TensorFlowJob) AllocatedGPU() int64

Requested GPU count of the Job

func (*TensorFlowJob) ChiefPod

func (tj *TensorFlowJob) ChiefPod() v1.Pod

Get the chief Pod of the Job.

func (*TensorFlowJob) GetJobDashboards

func (tj *TensorFlowJob) GetJobDashboards(client *kubernetes.Clientset) ([]string, error)

Get Dashboard url of the job

func (*TensorFlowJob) GetStatus

func (tj *TensorFlowJob) GetStatus() (status string)

Get the Status of the Job: RUNNING, PENDING, SUCCEEDED, FAILED

func (*TensorFlowJob) HostIPOfChief

func (tj *TensorFlowJob) HostIPOfChief() (hostIP string)

Get the hostIP of the chief Pod

func (*TensorFlowJob) Name

func (tj *TensorFlowJob) Name() string

func (*TensorFlowJob) RequestedGPU

func (tj *TensorFlowJob) RequestedGPU() int64

Requested GPU count of the Job

func (*TensorFlowJob) StartTime

func (tj *TensorFlowJob) StartTime() *metav1.Time

func (*TensorFlowJob) Trainer

func (tj *TensorFlowJob) Trainer() string

type TensorFlowJobTrainer

type TensorFlowJobTrainer struct {
	// contains filtered or unexported fields
}

TensorFlow Job trainer

func (*TensorFlowJobTrainer) GetTrainingJob

func (tt *TensorFlowJobTrainer) GetTrainingJob(name, namespace string) (tj TrainingJob, err error)

func (*TensorFlowJobTrainer) IsSupported

func (tt *TensorFlowJobTrainer) IsSupported(name, ns string) bool

check if it's TensorFlow job

func (*TensorFlowJobTrainer) Type

func (tt *TensorFlowJobTrainer) Type() string

type Trainer

type Trainer interface {
	// Check if the training job is supported
	IsSupported(name, ns string) bool

	// Get TrainingJob object directly. this method is called when `arena get`
	GetTrainingJob(name, namespace string) (TrainingJob, error)

	// Get the type of trainer
	Type() string
}

func NewHorovodJobTrainer

func NewHorovodJobTrainer(client *kubernetes.Clientset) Trainer

func NewStandaloneJobTrainer

func NewStandaloneJobTrainer(client *kubernetes.Clientset) Trainer

func NewTensorFlowJobTrainer

func NewTensorFlowJobTrainer(client *kubernetes.Clientset) Trainer

func NewTrainers

func NewTrainers(client *kubernetes.Clientset) []Trainer

construct the trainer list

type TrainingJob

type TrainingJob interface {
	// Get the chief Pod of the Job.
	ChiefPod() v1.Pod

	// Get the name of the Training Job
	Name() string

	// Get all the pods of the Training Job
	AllPods() []v1.Pod

	// Get the Status of the Job: RUNNING, PENDING,
	GetStatus() string

	// Return trainer Type, support MPI, standalone, tensorflow
	Trainer() string

	// Get the Job Age
	Age() string

	// Get start time
	StartTime() *metav1.Time

	// Get Dashboard
	GetJobDashboards(client *kubernetes.Clientset) ([]string, error)

	// Requested GPU count of the Job
	RequestedGPU() int64

	// Requested GPU count of the Job
	AllocatedGPU() int64

	// the host ip of the chief pod
	HostIPOfChief() string
}

The Training Job can be TensorFlow, MPI and Caffe

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL