Documentation ¶
Index ¶
- Constants
- Variables
- func ConfigureCloud(configFile *ServerConfigFile, controllerID, nametag string) (cloud.CloudClient, error)
- func ConfigureK8sKipClient() (*kubeclient.Clientset, *rest.Config, error)
- func GetNodeForRunningPod(podName, unitName string, podRegistry *registry.PodRegistry, ...) (*api.Node, error)
- func VersionAndKind(m []byte) (string, string, error)
- type AWSConfig
- type ActivePods
- type AzureConfig
- type CellController
- type CellOp
- type CellsConfig
- type CloudAPIHealthCheck
- type Controller
- type ControllerManager
- func (cm *ControllerManager) ControllersRunning() bool
- func (cm *ControllerManager) GetAllControllers() map[string]Controller
- func (cm *ControllerManager) GetController(name string) (Controller, bool)
- func (cm *ControllerManager) Start()
- func (cm *ControllerManager) StartControllers()
- func (cm *ControllerManager) StopControllers()
- func (cm *ControllerManager) WaitForShutdown(systemShutdown <-chan struct{}, systemWG *sync.WaitGroup)
- type EtcdClientConfig
- type EtcdConfig
- type FullPodStatus
- type GCEConfig
- type GCECredentials
- type GarbageController
- func (c *GarbageController) CleanAzureResourceGroups()
- func (c *GarbageController) CleanAzureResourceGroupsHelper(client ResourceGrouper) error
- func (c *GarbageController) CleanDanglingRoutes()
- func (c *GarbageController) CleanInstances()
- func (c *GarbageController) CleanTerminatedNodes()
- func (c *GarbageController) Dump() []byte
- func (c *GarbageController) GCLoop(quit <-chan struct{}, wg *sync.WaitGroup)
- func (c *GarbageController) Start(quit <-chan struct{}, wg *sync.WaitGroup)
- type GarbageControllerConfig
- type HealthCheckConfig
- type InstanceProvider
- func (s InstanceProvider) Attach(stream clientapi.Kip_AttachServer) error
- func (p *InstanceProvider) ConfigureNode(ctx context.Context, n *v1.Node)
- func (s InstanceProvider) Create(context context.Context, request *clientapi.CreateRequest) (*clientapi.APIReply, error)
- func (p *InstanceProvider) CreatePod(ctx context.Context, pod *v1.Pod) error
- func (s InstanceProvider) Delete(context context.Context, request *clientapi.DeleteRequest) (*clientapi.APIReply, error)
- func (p *InstanceProvider) DeletePod(ctx context.Context, pod *v1.Pod) (err error)
- func (s InstanceProvider) Deploy(stream clientapi.Kip_DeployServer) error
- func (s InstanceProvider) Dump(context context.Context, request *clientapi.DumpRequest) (*clientapi.APIReply, error)
- func (s InstanceProvider) Exec(stream clientapi.Kip_ExecServer) error
- func (s InstanceProvider) Get(context context.Context, request *clientapi.GetRequest) (*clientapi.APIReply, error)
- func (p *InstanceProvider) GetContainerLogs(ctx context.Context, namespace, podName, containerName string, ...) (io.ReadCloser, error)
- func (s InstanceProvider) GetLogs(context context.Context, request *clientapi.LogsRequest) (*clientapi.APIReply, error)
- func (s InstanceProvider) GetNodeForRunningPod(podName, unitName string) (*api.Node, error)
- func (p *InstanceProvider) GetPod(ctx context.Context, namespace, name string) (*v1.Pod, error)
- func (p *InstanceProvider) GetPodStatus(ctx context.Context, namespace, name string) (*v1.PodStatus, error)
- func (p *InstanceProvider) GetPods(ctx context.Context) ([]*v1.Pod, error)
- func (p *InstanceProvider) GetStatsSummary(ctx context.Context) (*stats.Summary, error)
- func (s InstanceProvider) GetVersion(context context.Context, request *clientapi.VersionRequest) (*clientapi.VersionReply, error)
- func (p *InstanceProvider) Handle(ev events.Event) error
- func (s InstanceProvider) IsLeader(context context.Context, request *clientapi.IsLeaderRequest) (*clientapi.IsLeaderReply, error)
- func (p *InstanceProvider) NotifyNodeStatus(ctx context.Context, notifier func(*v1.Node))
- func (p *InstanceProvider) NotifyPods(ctx context.Context, notifier func(*v1.Pod))
- func (p *InstanceProvider) Ping(ctx context.Context) error
- func (p *InstanceProvider) RunInContainer(ctx context.Context, namespace, podName, containerName string, cmd []string, ...) error
- func (p *InstanceProvider) Stop()
- func (s InstanceProvider) StreamLogs(slr *clientapi.StreamLogsRequest, stream clientapi.Kip_StreamLogsServer) error
- func (s InstanceProvider) Update(context context.Context, request *clientapi.UpdateRequest) (*clientapi.APIReply, error)
- func (p *InstanceProvider) UpdatePod(ctx context.Context, pod *v1.Pod) error
- type InternalEtcdConfig
- type ItzoConfig
- type KubeletConfig
- type MetricsController
- type MultiCloudConfig
- type PodController
- func (c *PodController) ControlLoop(quit <-chan struct{}, wg *sync.WaitGroup)
- func (c *PodController) ControlPods()
- func (c *PodController) Dump() []byte
- func (c *PodController) Start(quit <-chan struct{}, wg *sync.WaitGroup)
- func (c *PodController) SyncRunningPods()
- func (c *PodController) TagNodeWithPodLabels(pod *api.Pod, node *api.Node)
- type ResourceGrouper
- type SendRecver
- type ServerConfigFile
- type StatusHealthCheck
- type TestingConfig
- type WinSize
Constants ¶
const ( StatusNotFound = 404 StatusAlreadyExists = 409 MisdirectedRequest = 421 StatusUnprocessableEntity = 422 StatusServerError = 500 )
const ( PodControllerCleanPeriod = 20 * time.Second PodControllerControlPeriod = 5 * time.Second PodControllerFullSyncPeriod = 31 * time.Second )
make this configurable
const (
ResourceLimitsGPU v1.ResourceName = "nvidia.com/gpu"
)
Variables ¶
var (
GPUNodeSelectorPrefixes = []string{
"node.elotl.co/gpu-",
"cloud.google.com/gke-accelerator",
}
)
var (
MaxEventListSize = 4000 // modified for testing
)
Functions ¶
func ConfigureCloud ¶
func ConfigureCloud(configFile *ServerConfigFile, controllerID, nametag string) (cloud.CloudClient, error)
func ConfigureK8sKipClient ¶
func ConfigureK8sKipClient() (*kubeclient.Clientset, *rest.Config, error)
func GetNodeForRunningPod ¶
func GetNodeForRunningPod(podName, unitName string, podRegistry *registry.PodRegistry, nodeRegistry *registry.NodeRegistry) (*api.Node, error)
Types ¶
type AWSConfig ¶
type AWSConfig struct { Region string `json:"region"` AccessKeyID string `json:"accessKeyID"` SecretAccessKey string `json:"secretAccessKey"` VPCID string `json:"vpcID,omitempty"` SubnetID string `json:"subnetID,omitempty"` EcsClusterName string `json:"ecsClusterName"` EndpointURL string `json:"endpointURL"` }
type ActivePods ¶
ActivePods type allows custom sorting of pods so a controller can pick the best ones to delete. Taken from k8s
func (ActivePods) Len ¶
func (s ActivePods) Len() int
func (ActivePods) Less ¶
func (s ActivePods) Less(i, j int) bool
func (ActivePods) Swap ¶
func (s ActivePods) Swap(i, j int)
type AzureConfig ¶
type AzureConfig struct { SubscriptionID string `json:"subscriptionID"` Location string `json:"location"` VNetName string `json:"virtualNetworkName"` SubnetName string `json:"subnetName"` TenantID string `json:"tenantID"` ClientID string `json:"clientID"` ClientSecret string `json:"clientSecret"` }
See https://github.com/Azure/azure-sdk-for-go/blob/master/README.md for more info on SDK login credentials. We might want to support CertificatePath and CertificatePassword.
type CellController ¶
type CellController struct {
// contains filtered or unexported fields
}
func NewCellController ¶
func NewCellController( controllerID, nodeName string, restConfig *rest.Config, k8sKipClient kv1b1.CellInterface, eventsSystem *events.EventSystem, podLister registry.PodLister, nodeLister registry.NodeLister, ) (*CellController, error)
func (*CellController) CreateCRDIfNotExists ¶
func (c *CellController) CreateCRDIfNotExists() error
func (*CellController) Dump ¶
func (c *CellController) Dump() []byte
func (*CellController) Start ¶
func (c *CellController) Start(quit <-chan struct{}, wg *sync.WaitGroup)
type CellsConfig ¶
type CellsConfig struct { BootImageSpec cloud.BootImageSpec `json:"bootImageSpec"` DefaultInstanceType string `json:"defaultInstanceType"` DefaultVolumeSize string `json:"defaultVolumeSize"` StandbyCells []nodemanager.StandbyNodeSpec `json:"standbyCells"` CloudInitFile string `json:"cloudInitFile"` Itzo ItzoConfig `json:"itzo"` ExtraCIDRs []string `json:"extraCIDRs"` ExtraSecurityGroups []string `json:"extraSecurityGroups"` Nametag string `json:"nametag"` StatusInterval int `json:"statusInterval"` HealthCheck HealthCheckConfig `json:"healthcheck"` PrivateIPOnly *bool `json:"privateIPOnly"` CellConfig map[string]string `json:"cellConfig"` }
type CloudAPIHealthCheck ¶ added in v0.0.3
type Controller ¶
type ControllerManager ¶
type ControllerManager struct {
// contains filtered or unexported fields
}
The ControllerManager was created to make the interaction between leader election and controllers easier. It takes care of starting and stopping controllers based on the leader elector.
func NewControllerManager ¶
func NewControllerManager(controllers map[string]Controller) *ControllerManager
func (*ControllerManager) ControllersRunning ¶
func (cm *ControllerManager) ControllersRunning() bool
func (*ControllerManager) GetAllControllers ¶
func (cm *ControllerManager) GetAllControllers() map[string]Controller
func (*ControllerManager) GetController ¶
func (cm *ControllerManager) GetController(name string) (Controller, bool)
func (*ControllerManager) Start ¶
func (cm *ControllerManager) Start()
This doesn't take a quit channel on purpose. This is because if you start listening for quit, it becomes difficult to handle starting and stopping controllers through the channel as well so we just let this goroutine run until the end of the milpa process
func (*ControllerManager) StartControllers ¶
func (cm *ControllerManager) StartControllers()
func (*ControllerManager) StopControllers ¶
func (cm *ControllerManager) StopControllers()
func (*ControllerManager) WaitForShutdown ¶
func (cm *ControllerManager) WaitForShutdown(systemShutdown <-chan struct{}, systemWG *sync.WaitGroup)
Our leader election used to tell our controller manager to shutdown now this takes care of that. We could simplify the controller manager a fair amount... I'm concerned we'll need a clustered system again eventually.
type EtcdClientConfig ¶
type EtcdConfig ¶
type EtcdConfig struct { Client EtcdClientConfig `json:"client"` Internal InternalEtcdConfig `json:"internal"` }
type FullPodStatus ¶
type FullPodStatus struct { Name string PodIP string UnitStatuses []api.UnitStatus InitUnitStatuses []api.UnitStatus ResourceUsage api.ResourceMetrics // If an error occurred, Status will be nil, and Error will contain the // error instance. Error error }
type GCEConfig ¶
type GCEConfig struct { ProjectID string `json:"projectID"` CredentialsFile string `json:"credentialsFile,omitempty"` Credentials *GCECredentials `json:"credentials,omitempty"` Zone string `json:"zone,omitempty"` VPCName string `json:"vpcName,omitempty"` SubnetName string `json:"subnetName,omitempty"` }
type GCECredentials ¶ added in v0.0.3
type GarbageController ¶
type GarbageController struct {
// contains filtered or unexported fields
}
func (*GarbageController) CleanAzureResourceGroups ¶
func (c *GarbageController) CleanAzureResourceGroups()
func (*GarbageController) CleanAzureResourceGroupsHelper ¶
func (c *GarbageController) CleanAzureResourceGroupsHelper(client ResourceGrouper) error
func (*GarbageController) CleanDanglingRoutes ¶
func (c *GarbageController) CleanDanglingRoutes()
func (*GarbageController) CleanInstances ¶
func (c *GarbageController) CleanInstances()
func (*GarbageController) CleanTerminatedNodes ¶
func (c *GarbageController) CleanTerminatedNodes()
func (*GarbageController) Dump ¶
func (c *GarbageController) Dump() []byte
func (*GarbageController) GCLoop ¶
func (c *GarbageController) GCLoop(quit <-chan struct{}, wg *sync.WaitGroup)
func (*GarbageController) Start ¶
func (c *GarbageController) Start(quit <-chan struct{}, wg *sync.WaitGroup)
type GarbageControllerConfig ¶
type HealthCheckConfig ¶ added in v0.0.3
type HealthCheckConfig struct { Status *StatusHealthCheck `json:"status"` CloudAPI *CloudAPIHealthCheck `json:"cloudAPI"` }
type InstanceProvider ¶
type InstanceProvider struct { Registries map[string]registry.Registryer Encoder api.MilpaCodec SystemQuit <-chan struct{} SystemWaitGroup *sync.WaitGroup Controllers map[string]Controller ItzoClientFactory nodeclient.ItzoClientFactoryer // contains filtered or unexported fields }
func NewInstanceProvider ¶
func NewInstanceProvider(configFilePath, nodeName, internalIP, serverURL, networkAgentSecret, clusterDNS, clusterDomain string, daemonEndpointPort int32, debugServer bool, rm *manager.ResourceManager, systemQuit <-chan struct{}) (*InstanceProvider, error)
InstanceProvider should implement node.PodLifecycleHandler
func (InstanceProvider) Attach ¶
func (s InstanceProvider) Attach(stream clientapi.Kip_AttachServer) error
func (*InstanceProvider) ConfigureNode ¶
func (p *InstanceProvider) ConfigureNode(ctx context.Context, n *v1.Node)
func (InstanceProvider) Create ¶
func (s InstanceProvider) Create(context context.Context, request *clientapi.CreateRequest) (*clientapi.APIReply, error)
func (InstanceProvider) Delete ¶
func (s InstanceProvider) Delete(context context.Context, request *clientapi.DeleteRequest) (*clientapi.APIReply, error)
func (InstanceProvider) Deploy ¶
func (s InstanceProvider) Deploy(stream clientapi.Kip_DeployServer) error
func (InstanceProvider) Dump ¶
func (s InstanceProvider) Dump(context context.Context, request *clientapi.DumpRequest) (*clientapi.APIReply, error)
func (InstanceProvider) Exec ¶
func (s InstanceProvider) Exec(stream clientapi.Kip_ExecServer) error
func (InstanceProvider) Get ¶
func (s InstanceProvider) Get(context context.Context, request *clientapi.GetRequest) (*clientapi.APIReply, error)
func (*InstanceProvider) GetContainerLogs ¶
func (p *InstanceProvider) GetContainerLogs(ctx context.Context, namespace, podName, containerName string, opts vkapi.ContainerLogOpts) (io.ReadCloser, error)
func (InstanceProvider) GetLogs ¶
func (s InstanceProvider) GetLogs(context context.Context, request *clientapi.LogsRequest) (*clientapi.APIReply, error)
func (InstanceProvider) GetNodeForRunningPod ¶
func (s InstanceProvider) GetNodeForRunningPod(podName, unitName string) (*api.Node, error)
func (*InstanceProvider) GetPodStatus ¶
func (*InstanceProvider) GetStatsSummary ¶
func (InstanceProvider) GetVersion ¶
func (s InstanceProvider) GetVersion(context context.Context, request *clientapi.VersionRequest) (*clientapi.VersionReply, error)
func (InstanceProvider) IsLeader ¶
func (s InstanceProvider) IsLeader(context context.Context, request *clientapi.IsLeaderRequest) (*clientapi.IsLeaderReply, error)
func (*InstanceProvider) NotifyNodeStatus ¶
func (p *InstanceProvider) NotifyNodeStatus(ctx context.Context, notifier func(*v1.Node))
func (*InstanceProvider) NotifyPods ¶
func (p *InstanceProvider) NotifyPods(ctx context.Context, notifier func(*v1.Pod))
NotifyPods is called to set a pod notifier callback function. This should be called before any operations are done within the provider.
func (*InstanceProvider) RunInContainer ¶
func (*InstanceProvider) Stop ¶
func (p *InstanceProvider) Stop()
func (InstanceProvider) StreamLogs ¶
func (s InstanceProvider) StreamLogs(slr *clientapi.StreamLogsRequest, stream clientapi.Kip_StreamLogsServer) error
func (InstanceProvider) Update ¶
func (s InstanceProvider) Update(context context.Context, request *clientapi.UpdateRequest) (*clientapi.APIReply, error)
type InternalEtcdConfig ¶
type ItzoConfig ¶
type KubeletConfig ¶
type KubeletConfig struct { // Deprecated: CPU, Memory and Pods are copied into Capacity, and are only // present for backward compatibility. CPU *resource.Quantity `json:"cpu"` Memory *resource.Quantity `json:"memory"` Pods *resource.Quantity `json:"pods"` Capacity v1.ResourceList `json:"capacity"` Labels map[string]string `json:"labels"` }
Kubelet stores kubelet-specific configuration such as capacity and labels.
type MetricsController ¶
type MetricsController struct {
// contains filtered or unexported fields
}
func (*MetricsController) Dump ¶
func (c *MetricsController) Dump() []byte
func (*MetricsController) Start ¶
func (c *MetricsController) Start(quit <-chan struct{}, wg *sync.WaitGroup)
type MultiCloudConfig ¶
type MultiCloudConfig struct { AWS *AWSConfig `json:"aws,omitempty"` GCE *GCEConfig `json:"gce,omitempty"` Azure *AzureConfig `json:"azure,omitempty"` }
type PodController ¶
type PodController struct {
// contains filtered or unexported fields
}
func (*PodController) ControlLoop ¶
func (c *PodController) ControlLoop(quit <-chan struct{}, wg *sync.WaitGroup)
func (*PodController) ControlPods ¶
func (c *PodController) ControlPods()
We do all pod controlling in one loop since there are windows for races otherwise.
func (*PodController) Dump ¶
func (c *PodController) Dump() []byte
func (*PodController) Start ¶
func (c *PodController) Start(quit <-chan struct{}, wg *sync.WaitGroup)
func (*PodController) SyncRunningPods ¶
func (c *PodController) SyncRunningPods()
func (*PodController) TagNodeWithPodLabels ¶
func (c *PodController) TagNodeWithPodLabels(pod *api.Pod, node *api.Node)
type ResourceGrouper ¶
type SendRecver ¶
type ServerConfigFile ¶
type ServerConfigFile struct { api.TypeMeta `json:",inline"` Cloud MultiCloudConfig `json:"cloud"` Etcd EtcdConfig `json:"etcd"` Cells CellsConfig `json:"cells"` Testing TestingConfig `json:"testing"` Kubelet KubeletConfig `json:"kubelet"` }
ServerConfigFile stores the parsed json from provider.yaml
func ParseConfig ¶
func ParseConfig(path string) (*ServerConfigFile, error)
type StatusHealthCheck ¶ added in v0.0.3
type StatusHealthCheck struct {
HealthyTimeout int `json:"healthyTimeout"`
}
type TestingConfig ¶
type TestingConfig struct {
ControllerID string `json:"controllerID"`
}
Source Files ¶
- attach.go
- cell_controller.go
- config.go
- controller_id.go
- controller_manager.go
- controller_utils.go
- convert.go
- create.go
- delete.go
- deploy.go
- deploy_util.go
- dump.go
- errors.go
- exec.go
- garbage_controller.go
- get.go
- getcontainerlogs.go
- getstatssummary.go
- helpers.go
- is_master.go
- logs.go
- manifest.go
- metrics_controller.go
- pod_controller.go
- pod_controller_utils.go
- runincontainer.go
- server.go
- streaming_utils.go
- update.go
- version.go
Directories ¶
Path | Synopsis |
---|---|
not sure if this is a good pattern for decoupling the pod_controller from the node controller...
|
not sure if this is a good pattern for decoupling the pod_controller from the node controller... |
Need to start DRYing up the registry code, this is terrible
|
Need to start DRYing up the registry code, this is terrible |