Documentation ¶
Index ¶
- func GetPodAddr() (string, error)
- func GetProcCurrentCPUMillicores(cpuTime float64, prevCPUTime float64, systemCPUTime float64, ...) float64
- func GetSystemCPU() (float64, error)
- type AWSCredentialProvider
- type AssignedGpuDevices
- type CedanaClient
- func (c *CedanaClient) Checkpoint(ctx context.Context, containerId string) (string, error)
- func (c *CedanaClient) Close()
- func (c *CedanaClient) DetailedHealthCheckWait(ctx context.Context) (*cedanaproto.DetailedHealthCheckResponse, error)
- func (c *CedanaClient) Manage(ctx context.Context, containerId string, gpuEnabled bool) error
- func (c *CedanaClient) PrepareContainerSpec(spec *specs.Spec, containerId string, containerHostname string, ...) error
- func (c *CedanaClient) Restore(ctx context.Context, restoreOpts cedanaRestoreOpts, runcOpts *runc.CreateOpts) (*cedanaproto.ProcessState, error)
- type ConsoleWriter
- type ContainerInstance
- type ContainerLogMessage
- type ContainerLogger
- type ContainerMountManager
- type ContainerNetworkManager
- type ContainerNvidiaManager
- func (c *ContainerNvidiaManager) AssignGPUDevices(containerId string, gpuCount uint32) (*AssignedGpuDevices, error)
- func (c *ContainerNvidiaManager) InjectEnvVars(env []string, options *ContainerOptions) ([]string, bool)
- func (c *ContainerNvidiaManager) InjectMounts(mounts []specs.Mount) []specs.Mount
- func (c *ContainerNvidiaManager) UnassignGPUDevices(containerId string)
- type ContainerOptions
- type CredentialProvider
- type DockerCredentialProvider
- type ExecWriter
- type FileCacheManager
- func (cm *FileCacheManager) CacheAvailable() bool
- func (cm *FileCacheManager) CacheFilesInPath(sourcePath string)
- func (cm *FileCacheManager) EnableVolumeCaching(workspaceName string, volumeCacheMap map[string]string, spec *specs.Spec) error
- func (cm *FileCacheManager) GetClient() *blobcache.BlobCacheClient
- type FileLock
- type GPUInfoClient
- type GPUInfoStat
- type GPUManager
- type GPUMemoryUsageStats
- type ImageClient
- func (c *ImageClient) Archive(ctx context.Context, bundlePath string, imageId string, progressChan chan int) error
- func (c *ImageClient) BuildAndArchiveImage(ctx context.Context, outputLogger *slog.Logger, dockerfile string, ...) error
- func (c *ImageClient) Cleanup() error
- func (c *ImageClient) InspectAndVerifyImage(ctx context.Context, sourceImage string, creds string) error
- func (c *ImageClient) PullAndArchiveImage(ctx context.Context, sourceImage string, imageId string, creds string) error
- func (c *ImageClient) PullLazy(request *types.ContainerRequest) error
- type Mount
- type NvidiaInfoClient
- type ProcUtil
- type ProcessMonitor
- type ProcessStats
- type RunCServer
- func (s *RunCServer) RunCArchive(req *pb.RunCArchiveRequest, stream pb.RunCService_RunCArchiveServer) error
- func (s *RunCServer) RunCExec(ctx context.Context, in *pb.RunCExecRequest) (*pb.RunCExecResponse, error)
- func (s *RunCServer) RunCKill(ctx context.Context, in *pb.RunCKillRequest) (*pb.RunCKillResponse, error)
- func (s *RunCServer) RunCStatus(ctx context.Context, in *pb.RunCStatusRequest) (*pb.RunCStatusResponse, error)
- func (s *RunCServer) RunCStreamLogs(req *pb.RunCStreamLogsRequest, stream pb.RunCService_RunCStreamLogsServer) error
- func (s *RunCServer) Start() error
- type Worker
- type WorkerMetrics
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func GetPodAddr ¶
GetPodAddr gets the IP from the POD_IP env var. Returns an error if it fails to retrieve an IP.
func GetSystemCPU ¶
Types ¶
type AWSCredentialProvider ¶
type AWSCredentialProvider struct { CredentialProvider Region string AccessKey string SecretKey string }
AWS auth provider
func (*AWSCredentialProvider) GetAuthString ¶
func (p *AWSCredentialProvider) GetAuthString() (string, error)
func (*AWSCredentialProvider) GetAuthorizationToken ¶
func (p *AWSCredentialProvider) GetAuthorizationToken() (string, error)
func (*AWSCredentialProvider) GetUsername ¶
func (p *AWSCredentialProvider) GetUsername() string
type AssignedGpuDevices ¶
type AssignedGpuDevices struct {
// contains filtered or unexported fields
}
func (*AssignedGpuDevices) String ¶
func (d *AssignedGpuDevices) String() string
type CedanaClient ¶
type CedanaClient struct {
// contains filtered or unexported fields
}
func NewCedanaClient ¶
func (*CedanaClient) Checkpoint ¶
Checkpoint a runc container, returns the path to the checkpoint
func (*CedanaClient) Close ¶
func (c *CedanaClient) Close()
func (*CedanaClient) DetailedHealthCheckWait ¶
func (c *CedanaClient) DetailedHealthCheckWait( ctx context.Context, ) (*cedanaproto.DetailedHealthCheckResponse, error)
Perform a detailed health check of cedana C/R capabilities
func (*CedanaClient) PrepareContainerSpec ¶
func (c *CedanaClient) PrepareContainerSpec(spec *specs.Spec, containerId string, containerHostname string, gpuEnabled bool) error
Updates the runc container spec to make the shared library available as well as the shared memory that is used for communication
func (*CedanaClient) Restore ¶
func (c *CedanaClient) Restore( ctx context.Context, restoreOpts cedanaRestoreOpts, runcOpts *runc.CreateOpts, ) (*cedanaproto.ProcessState, error)
Restore a runc container. If a checkpoint path is provided, it will be used as the checkpoint. If empty path is provided, the latest checkpoint path from DB will be used.
type ConsoleWriter ¶
type ConsoleWriter struct {
// contains filtered or unexported fields
}
func NewConsoleWriter ¶
func NewConsoleWriter(writer io.Writer) (*ConsoleWriter, error)
Implementation of runc ConsoleSocket, for writing only
func (*ConsoleWriter) Path ¶
func (w *ConsoleWriter) Path() string
type ContainerInstance ¶
type ContainerInstance struct { Id string StubId string BundlePath string Overlay *common.ContainerOverlay Spec *specs.Spec Err error ExitCode int Port int OutputWriter *common.OutputWriter LogBuffer *common.LogBuffer Request *types.ContainerRequest }
type ContainerLogMessage ¶
type ContainerLogger ¶
type ContainerLogger struct {
// contains filtered or unexported fields
}
func (*ContainerLogger) CaptureLogs ¶
func (r *ContainerLogger) CaptureLogs(containerId string, logChan chan common.LogRecord) error
type ContainerMountManager ¶
type ContainerMountManager struct { EagerCacheStubCode bool // contains filtered or unexported fields }
func NewContainerMountManager ¶
func NewContainerMountManager(config types.AppConfig) *ContainerMountManager
func (*ContainerMountManager) RemoveContainerMounts ¶
func (c *ContainerMountManager) RemoveContainerMounts(containerId string)
RemoveContainerMounts removes all mounts for a container
func (*ContainerMountManager) SetupContainerMounts ¶
func (c *ContainerMountManager) SetupContainerMounts(request *types.ContainerRequest) error
SetupContainerMounts initializes any external storage for a container
type ContainerNetworkManager ¶
type ContainerNetworkManager struct {
// contains filtered or unexported fields
}
func NewContainerNetworkManager ¶
func NewContainerNetworkManager(ctx context.Context, workerId string, workerRepo repository.WorkerRepository, containerRepo repository.ContainerRepository, config types.AppConfig) (*ContainerNetworkManager, error)
func (*ContainerNetworkManager) ExposePort ¶
func (m *ContainerNetworkManager) ExposePort(containerId string, hostPort, containerPort int) error
func (*ContainerNetworkManager) Setup ¶
func (m *ContainerNetworkManager) Setup(containerId string, spec *specs.Spec) error
func (*ContainerNetworkManager) TearDown ¶
func (m *ContainerNetworkManager) TearDown(containerId string) error
type ContainerNvidiaManager ¶
type ContainerNvidiaManager struct {
// contains filtered or unexported fields
}
func (*ContainerNvidiaManager) AssignGPUDevices ¶
func (c *ContainerNvidiaManager) AssignGPUDevices(containerId string, gpuCount uint32) (*AssignedGpuDevices, error)
func (*ContainerNvidiaManager) InjectEnvVars ¶
func (c *ContainerNvidiaManager) InjectEnvVars(env []string, options *ContainerOptions) ([]string, bool)
func (*ContainerNvidiaManager) InjectMounts ¶
func (c *ContainerNvidiaManager) InjectMounts(mounts []specs.Mount) []specs.Mount
func (*ContainerNvidiaManager) UnassignGPUDevices ¶
func (c *ContainerNvidiaManager) UnassignGPUDevices(containerId string)
type ContainerOptions ¶
type CredentialProvider ¶
type DockerCredentialProvider ¶
type DockerCredentialProvider struct { CredentialProvider Username string Password string }
Docker provider
func (*DockerCredentialProvider) GetAuthString ¶
func (p *DockerCredentialProvider) GetAuthString() (string, error)
func (*DockerCredentialProvider) GetAuthorizationToken ¶
func (p *DockerCredentialProvider) GetAuthorizationToken() (string, error)
func (*DockerCredentialProvider) GetUsername ¶
func (p *DockerCredentialProvider) GetUsername() string
type ExecWriter ¶
type ExecWriter struct {
// contains filtered or unexported fields
}
Will be replaced when structured logging is merged
type FileCacheManager ¶
type FileCacheManager struct {
// contains filtered or unexported fields
}
func NewFileCacheManager ¶
func NewFileCacheManager(config types.AppConfig, client *blobcache.BlobCacheClient) *FileCacheManager
func (*FileCacheManager) CacheAvailable ¶
func (cm *FileCacheManager) CacheAvailable() bool
CacheAvailable checks if the file cache is available
func (*FileCacheManager) CacheFilesInPath ¶
func (cm *FileCacheManager) CacheFilesInPath(sourcePath string)
CacheFilesInPath caches files from a specified source path
func (*FileCacheManager) EnableVolumeCaching ¶
func (cm *FileCacheManager) EnableVolumeCaching(workspaceName string, volumeCacheMap map[string]string, spec *specs.Spec) error
func (*FileCacheManager) GetClient ¶
func (cm *FileCacheManager) GetClient() *blobcache.BlobCacheClient
GetClient returns the blobcache client instance.
type FileLock ¶
type FileLock struct {
// contains filtered or unexported fields
}
func NewFileLock ¶
type GPUInfoClient ¶
type GPUInfoClient interface { AvailableGPUDevices() ([]int, error) GetGPUMemoryUsage(deviceIndex int) (GPUMemoryUsageStats, error) }
type GPUInfoStat ¶
type GPUManager ¶
type GPUManager interface { AssignGPUDevices(containerId string, gpuCount uint32) (*AssignedGpuDevices, error) UnassignGPUDevices(containerId string) InjectEnvVars(env []string, options *ContainerOptions) ([]string, bool) InjectMounts(mounts []specs.Mount) []specs.Mount }
func NewContainerNvidiaManager ¶
func NewContainerNvidiaManager(gpuCount uint32) GPUManager
type GPUMemoryUsageStats ¶
type ImageClient ¶
type ImageClient struct {
// contains filtered or unexported fields
}
func NewImageClient ¶
func NewImageClient(config types.AppConfig, workerId string, workerRepo repository.WorkerRepository, fileCacheManager *FileCacheManager) (*ImageClient, error)
func (*ImageClient) Archive ¶
func (c *ImageClient) Archive(ctx context.Context, bundlePath string, imageId string, progressChan chan int) error
Generate and upload archived version of the image for distribution
func (*ImageClient) BuildAndArchiveImage ¶
func (*ImageClient) Cleanup ¶
func (c *ImageClient) Cleanup() error
func (*ImageClient) InspectAndVerifyImage ¶
func (*ImageClient) PullAndArchiveImage ¶
func (*ImageClient) PullLazy ¶
func (c *ImageClient) PullLazy(request *types.ContainerRequest) error
type NvidiaInfoClient ¶
type NvidiaInfoClient struct{}
func (*NvidiaInfoClient) AvailableGPUDevices ¶
func (c *NvidiaInfoClient) AvailableGPUDevices() ([]int, error)
func (*NvidiaInfoClient) GetGPUMemoryUsage ¶
func (c *NvidiaInfoClient) GetGPUMemoryUsage(deviceIndex int) (GPUMemoryUsageStats, error)
GetGpuMemoryUsage retrieves the memory usage of a specific NVIDIA GPU. It returns the total and used memory in bytes.
type ProcUtil ¶
func NewProcUtil ¶
type ProcessMonitor ¶
type ProcessMonitor struct {
// contains filtered or unexported fields
}
func NewProcessMonitor ¶
func NewProcessMonitor(pid int, devices []specs.LinuxDeviceCgroup) *ProcessMonitor
func (*ProcessMonitor) GetStatistics ¶
func (m *ProcessMonitor) GetStatistics() (*ProcessStats, error)
type ProcessStats ¶
type ProcessStats struct { CPU uint64 // in millicores Memory process.MemoryInfoStat IO process.IOCountersStat NetIO net.IOCountersStat GPU GPUInfoStat }
type RunCServer ¶
type RunCServer struct { pb.UnimplementedRunCServiceServer // contains filtered or unexported fields }
func NewRunCServer ¶
func NewRunCServer(containerInstances *common.SafeMap[*ContainerInstance], imageClient *ImageClient) (*RunCServer, error)
func (*RunCServer) RunCArchive ¶
func (s *RunCServer) RunCArchive(req *pb.RunCArchiveRequest, stream pb.RunCService_RunCArchiveServer) error
func (*RunCServer) RunCExec ¶
func (s *RunCServer) RunCExec(ctx context.Context, in *pb.RunCExecRequest) (*pb.RunCExecResponse, error)
Execute an arbitary command inside a running container
func (*RunCServer) RunCKill ¶
func (s *RunCServer) RunCKill(ctx context.Context, in *pb.RunCKillRequest) (*pb.RunCKillResponse, error)
func (*RunCServer) RunCStatus ¶
func (s *RunCServer) RunCStatus(ctx context.Context, in *pb.RunCStatusRequest) (*pb.RunCStatusResponse, error)
func (*RunCServer) RunCStreamLogs ¶
func (s *RunCServer) RunCStreamLogs(req *pb.RunCStreamLogsRequest, stream pb.RunCService_RunCStreamLogsServer) error
func (*RunCServer) Start ¶
func (s *RunCServer) Start() error
type Worker ¶
type Worker struct {
// contains filtered or unexported fields
}
func (*Worker) IsCRIUAvailable ¶
func (*Worker) RunContainer ¶
func (s *Worker) RunContainer(request *types.ContainerRequest) error
Spawn a single container and stream output to stdout/stderr
type WorkerMetrics ¶
type WorkerMetrics struct {
// contains filtered or unexported fields
}
func NewWorkerMetrics ¶
func NewWorkerMetrics( ctx context.Context, workerId string, workerRepo repo.WorkerRepository, config types.MonitoringConfig, ) (*WorkerMetrics, error)
func (*WorkerMetrics) EmitContainerUsage ¶
func (wm *WorkerMetrics) EmitContainerUsage(ctx context.Context, request *types.ContainerRequest)
Periodically send metrics to track container duration