Documentation ¶
Index ¶
Constants ¶
const ( // ErrCodeNormal indicates a normal non-error situation ErrCodeNormal = "000" // ErrCodeInsufficientResources indicates a scheduling error due to resource constraints ErrCodeInsufficientResources = "S100" // ErrCodeFailedDeploy indicates a general deployment error ErrCodeFailedDeploy = "S101" // ErrCodeFailedPS ... ErrCodeFailedPS = "S102" // ErrCodeImagePull indicates an image pull error ErrCodeImagePull = "S103" // ErrFailedPodReasonUnknown indicates an unknown pod error ErrFailedPodReasonUnknown = "S104" // ErrCodeK8SConnection indicates a kubernetes connection error ErrCodeK8SConnection = "S200" // ErrCodeEtcdConnection indicates a etcd connection error ErrCodeEtcdConnection = "S201" // ErrCodeFailEnqueue indicates an error while adding the job to the queue in mongo ErrCodeFailEnqueue = "S210" // ErrCodeFailDequeue indicates an error where a job was incorrectly dequeued ErrCodeFailDequeue = "S211" // ErrCodeFailLoadModel indicates an error while loading the model code ErrCodeFailLoadModel = "S301" // ErrCodeFailLoadData indicates an error while loading the training data ErrCodeFailLoadData = "S302" // ErrCodeFailStoreResults indicates an error while storing the trained model and logs ErrCodeFailStoreResults = "S303" // ErrCodeFailStoreResultsOnFail indicates an error while storing the logs on job error ErrCodeFailStoreResultsOnFail = "S304" // ErrCodeFailStoreResultsOnHalt indicates an error while storing the logs on job halt ErrCodeFailStoreResultsOnHalt = "S305" // ErrInvalidManifestFile indicates an invalid manifest file ErrInvalidManifestFile = "C101" // ErrInvalidZipFile indicates an invalid ZIP file ErrInvalidZipFile = "C102" // ErrInvalidCredentials indicates an invalid set of credentials ErrInvalidCredentials = "C103" // ErrInvalidResourceSpecs indicates invalid resouce specifications ErrInvalidResourceSpecs = "C104" // ErrLearnerProcessCrash indicates a crash of the process in the learner container ErrLearnerProcessCrash = "C201" )
const (
// TrainerV2LocalAddress exposes the local address that is used if we run with DNS disabled
TrainerV2LocalAddress = ":30005"
)
Variables ¶
This section is empty.
Functions ¶
func CurrentTimestampAsString ¶
func CurrentTimestampAsString() string
CurrentTimestampAsString returns the current time as milliseconds since the Unix epoch (e.g., "1519135679722")
Types ¶
type JobStatusClient ¶
type JobStatusClient interface {
UpdateJobStatus()
}
JobStatusClient is a client interface for updating the status of training jobs
type TrainerClient ¶
type TrainerClient interface { Client() grpc_trainer_v2.TrainerClient Close() error }
TrainerClient is a client interface for interacting with the trainer service.
func NewTrainer ¶
func NewTrainer() (TrainerClient, error)
NewTrainer create a new load-balanced client to talk to the Trainer service. If the dns_server config option is set to 'disabled', it will default to the pre-defined LocalPort of the service.
func NewTrainerWithAddress ¶
func NewTrainerWithAddress(addr string) (TrainerClient, error)
NewTrainerWithAddress create a new load-balanced client to talk to the Trainer service. If the dns_server config option is set to 'disabled', it will default to the pre-defined LocalPort of the service.
type TrainingStatusUpdate ¶
type TrainingStatusUpdate struct { Status grpc_trainer_v2.Status Timestamp string ErrorCode string StatusMessage string }
TrainingStatusUpdate captures the details for training status update events
func GetStatus ¶
func GetStatus(value string, logr *logger.LocLoggingEntry) *TrainingStatusUpdate
GetStatus converts between a string and proper DLaaS type of job status updates. The value parameter is either a status string (e.g., "PROCESSING"), or a JSON string with status and error details, e.g., '{"status":"FAILED","exit_code":"51","status_message":"Error opening ZIP file"}'