Documentation
¶
Index ¶
Constants ¶
View Source
const ( // DefaultPollInterval is the default value for interval between running processes check DefaultPollInterval = time.Second // DefaultTimeout is the default value for timeout before invoke error during running processes check // To run without timeout set value to 0 DefaultTimeout = 0 // DefaultWaitAll is the default value whether wait for all other main process of container exiting DefaultWaitAllProcesses = "true" // TrainingCompleted is the job finished marker in $$$$.pid file when main training process is completed TrainingCompleted = "completed" // TrainingEarlyStopped is the job finished marker in $$$$.pid file when main training process is early stopped TrainingEarlyStopped = "early-stopped" // DefaultFilter is the default metrics collector filter to parse the metrics. // Metrics must be printed this way // loss=0.3 // accuracy=.98 // Score=-7.53e-05 // Score=-7.53e+05 // Score=1E0 // Score=1.23E10 DefaultFilter = `([\w|-]+)\s*=\s*([+-]?\d*(\.\d+)?([Ee][+-]?\d+)?)` TimeStampJsonKey = "timestamp" // TODO (andreyvelich): Do we need to maintain 2 names? Should we leave only 1? MetricCollectorContainerName = "metrics-collector" MetricLoggerCollectorContainerName = "metrics-logger-and-collector" )
Variables ¶
View Source
var ( AutoInjectMetricsCollectorList = [...]v1beta1common.CollectorKind{ v1beta1common.StdOutCollector, v1beta1common.TfEventCollector, v1beta1common.FileCollector, v1beta1common.PrometheusMetricCollector, } )
Functions ¶
func GetMainProcesses ¶ added in v0.10.0
GetMainProcesses returns array with all running processes pids and main process pid which metrics collector is waiting.
func WaitMainProcesses ¶ added in v0.10.0
func WaitMainProcesses(opts WaitPidsOpts) error
WaitMainProcesses holds metrics collector parser until required pids are finished.
Types ¶
Click to show internal directories.
Click to hide internal directories.