Documentation
¶
Overview ¶
Package ctrl contains simplified abstractions for interacting with Cloud APIs.
These simplified APIs are useful for testing the workflows executed by the commands package.
Index ¶
- type Ctrl
- type GCECP
- func (g *GCECP) CreateInstance(request *GCECreateRequest) (lro LongRunningOperation, err error)
- func (g *GCECP) DeleteInstance() (LongRunningOperation, error)
- func (g *GCECP) Instance() (*GCEInstance, error)
- func (g *GCECP) ListInstances() ([]*GCEInstance, error)
- func (g *GCECP) OptionallyRetrieveInstance(enableAPIIfRequired bool) (gceInstance *GCEInstance, apiEnabled bool, err error)
- func (g *GCECP) StartInstance() (LongRunningOperation, error)
- func (g *GCECP) StopInstance() (LongRunningOperation, error)
- type GCECreateRequest
- type GCEInstance
- type GCloudCLI
- type LongRunningOperation
- type ResourceManagementCP
- func (r *ResourceManagementCP) AddTPUUserAgent(tpuUserAgent string) error
- func (r *ResourceManagementCP) GetBucketACL(ctx context.Context, bucket string) ([]storage.ACLRule, error)
- func (r *ResourceManagementCP) GetProject() (*cloudresourcemanager.Project, error)
- func (r *ResourceManagementCP) GetProjectPolicy() (*cloudresourcemanager.Policy, error)
- func (r *ResourceManagementCP) IsProjectInGoogleOrg() (bool, error)
- func (r *ResourceManagementCP) SetBucketACL(ctx context.Context, bucket string, entity storage.ACLEntity, ...) error
- func (r *ResourceManagementCP) SetProjectPolicy(policy *cloudresourcemanager.Policy) error
- type TPUCP
- func (g *TPUCP) CreateInstance(ctx context.Context, version string, preemptible, reserved bool, ...) (LongRunningOperation, error)
- func (g *TPUCP) DeleteInstance() (LongRunningOperation, error)
- func (g *TPUCP) Instance() (*TPUInstance, error)
- func (g *TPUCP) ListInstances() ([]*TPUInstance, error)
- func (g *TPUCP) ListLocations() ([]*tpu.Location, error)
- func (g *TPUCP) ListSizes() ([]*tpu.AcceleratorType, error)
- func (g *TPUCP) ListVersions() ([]*tpu.TensorFlowVersion, error)
- func (g *TPUCP) OptionallyRetrieveInstance(enableAPIIfRequired bool) (instance *TPUInstance, apiEnabled bool, err error)
- func (g *TPUCP) StartInstance() error
- func (g *TPUCP) StopInstance(waitForAsync bool) error
- type TPUInstance
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Ctrl ¶
type Ctrl struct { GCE *GCECP TPU *TPUCP CLI *GCloudCLI ResourceManagement *ResourceManagementCP // contains filtered or unexported fields }
Ctrl contains the set of Control Plane APIs required to manage Cloud TPU flocks.
type GCECP ¶
type GCECP struct {
// contains filtered or unexported fields
}
GCECP contains an abstract representation of the Compute Engine control plane.
It is intentionally small so that other packages in the ctpu tool can be effectively tested.
func (*GCECP) CreateInstance ¶
func (g *GCECP) CreateInstance(request *GCECreateRequest) (lro LongRunningOperation, err error)
CreateInstance creates the Compute Engine instance with an API call to the Compute Engine control plane.
func (*GCECP) DeleteInstance ¶
func (g *GCECP) DeleteInstance() (LongRunningOperation, error)
DeleteInstance deletes a previously created Compute Engine instance with an API call to the Compute Engine control plane.
func (*GCECP) Instance ¶
func (g *GCECP) Instance() (*GCEInstance, error)
Instance retrieves the instance from the Compute Engine control plane.
func (*GCECP) ListInstances ¶
func (g *GCECP) ListInstances() ([]*GCEInstance, error)
ListInstances lists all Compute Engine instances in a given zone.
func (*GCECP) OptionallyRetrieveInstance ¶
func (g *GCECP) OptionallyRetrieveInstance(enableAPIIfRequired bool) (gceInstance *GCEInstance, apiEnabled bool, err error)
OptionallyRetrieveInstance retrieves the instance from the Compute Engine control plane.
If enableAPIIfRequired is false and the Compute Engine API has not been enabled, it returns immediately and does not enable the API.
func (*GCECP) StartInstance ¶
func (g *GCECP) StartInstance() (LongRunningOperation, error)
StartInstance starts a previously stopped Compute Engine instance with an API call to the Compute Engine control plane.
func (*GCECP) StopInstance ¶
func (g *GCECP) StopInstance() (LongRunningOperation, error)
StopInstance stops a previously started Compute Engine instance with an API call to the Compute Engine control plane.
type GCECreateRequest ¶
type GCECreateRequest struct { // ImageFamily is the name of the ever-green image family that should be used to create the Compute Engine VM. It is resolved to the ImageName during the CreateInstance request. // // Exactly one of ImageFamily and ImageName should be non-empty. ImageFamily string // ImageName is the name of the image that should be used to create the Compute Engine VM. // // Exactly one of ImageFamily and ImageName should be non-empty. ImageName string // TensorFlowVersion is the version of TensorFlow that is expected to be used. TensorFlowVersion string // MachineType is the Compute Engine machine type used when creating the instance. MachineType string // DiskSizeGb is the size the root volume should be sized to upon instance creation. DiskSizeGb int64 // Preemptible is whether the Compute Engine VM runs in preemptible or not. Preemptible bool // Network is the network on which the Compute Engine VM should be created. Network string }
GCECreateRequest captures all the configurable parameters involved in creating the Compute Engine VM.
type GCEInstance ¶
type GCEInstance struct {
*compute.Instance
}
GCEInstance represents the Compute Engine instance within the flock.
func (*GCEInstance) CanDelete ¶
func (i *GCEInstance) CanDelete() bool
CanDelete returns true if the Compute Engine intance can be deleted, false otherwise.
func (*GCEInstance) IsFlockVM ¶
func (i *GCEInstance) IsFlockVM() bool
IsFlockVM returns true if this Compute Engine VM appears to have been created by the ctpu tool.
func (*GCEInstance) IsRunning ¶
func (i *GCEInstance) IsRunning() bool
IsRunning returns true if the Compute Engine instance is running, false otherwise.
type GCloudCLI ¶
GCloudCLI abstracts away interacting with a locally installed GCloud to facilitate testing
func (GCloudCLI) IsGcloudInstalled ¶
IsGcloudInstalled returnes true if the gcloud cli is installed, false otherwise.
func (GCloudCLI) MakeEnviron ¶
MakeEnviron creates an environment that includes the extra environment variable TPU_NAME
This new environment is then suitable for use when calling execve to execute the gcloud tool.
func (GCloudCLI) PrintInstallInstructions ¶
func (GCloudCLI) PrintInstallInstructions()
PrintInstallInstructions prints instructions for how to install gcloud to the console.
func (GCloudCLI) SSHToInstance ¶
func (g GCloudCLI) SSHToInstance(forwardPorts, forwardAgent bool, tpuInstance *TPUInstance) error
SSHToInstance opens an ssh connection to the Compute Engine VM in the flock.
If an error is encountered, an error is returned.
Note: SSHToInstance calls syscall.Exec which replaces the contents of the current process with the ssh command (via the gcloud helper tool). As a result in the successful case, SSHToInstance never returns.
type LongRunningOperation ¶
type LongRunningOperation interface { // LoopUntilComplete pools the control plane until the operation is complete. LoopUntilComplete() error }
LongRunningOperation represents asynchronous control plane operations.
type ResourceManagementCP ¶
type ResourceManagementCP struct {
// contains filtered or unexported fields
}
ResourceManagementCP contains an abstract representation of the Cloud Resource Manager, and related ACLs
It is intentionally small so that other packages in the ctpu tool can be effectively tested.
func (*ResourceManagementCP) AddTPUUserAgent ¶
func (r *ResourceManagementCP) AddTPUUserAgent(tpuUserAgent string) error
AddTPUUserAgent adds the TPU user agent to enable Cloud Storage access and send logging
It is a no-op if the tpuUserAgent has already been granted some access.
func (*ResourceManagementCP) GetBucketACL ¶
func (r *ResourceManagementCP) GetBucketACL(ctx context.Context, bucket string) ([]storage.ACLRule, error)
GetBucketACL retrieves the ACL list for a Cloud Storage bucket.
func (*ResourceManagementCP) GetProject ¶
func (r *ResourceManagementCP) GetProject() (*cloudresourcemanager.Project, error)
GetProject retrieves the project metadata.
func (*ResourceManagementCP) GetProjectPolicy ¶
func (r *ResourceManagementCP) GetProjectPolicy() (*cloudresourcemanager.Policy, error)
GetProjectPolicy retrieves the IAM policy for the project.
func (*ResourceManagementCP) IsProjectInGoogleOrg ¶
func (r *ResourceManagementCP) IsProjectInGoogleOrg() (bool, error)
IsProjectInGoogleOrg determines if the project is part of the Google organization.
Note: this will need to be updated in the presence of folders.
func (*ResourceManagementCP) SetBucketACL ¶
func (r *ResourceManagementCP) SetBucketACL(ctx context.Context, bucket string, entity storage.ACLEntity, role storage.ACLRole) error
SetBucketACL adds the entity to the ACL list at the specified role on the provided bucket.
func (*ResourceManagementCP) SetProjectPolicy ¶
func (r *ResourceManagementCP) SetProjectPolicy(policy *cloudresourcemanager.Policy) error
SetProjectPolicy sets the IAM policy for project.
type TPUCP ¶
type TPUCP struct {
// contains filtered or unexported fields
}
TPUCP contains an abstract representation of the Cloud TPU control plane.
It is intentionally small so that other packages in the ctpu tool can be effectively tested.
func (*TPUCP) CreateInstance ¶
func (g *TPUCP) CreateInstance(ctx context.Context, version string, preemptible, reserved bool, hardwareType, network string) (LongRunningOperation, error)
CreateInstance creates the Cloud TPU with an API call to the TPU control plane.
func (*TPUCP) DeleteInstance ¶
func (g *TPUCP) DeleteInstance() (LongRunningOperation, error)
DeleteInstance deletes a previously created Cloud TPU with an API call to the TPU control plane.
func (*TPUCP) Instance ¶
func (g *TPUCP) Instance() (*TPUInstance, error)
Instance retrieves the instance from the TPU control plane.
func (*TPUCP) ListInstances ¶
func (g *TPUCP) ListInstances() ([]*TPUInstance, error)
ListInstances lists all TPUs within a zone of the GCP project.
func (*TPUCP) ListLocations ¶
ListLocations retrieves all locations where TPUs might be available.
func (*TPUCP) ListVersions ¶
ListVersions retrieves all available TensorFlow versions that can be used to create a Cloud TPU.
func (*TPUCP) OptionallyRetrieveInstance ¶
func (g *TPUCP) OptionallyRetrieveInstance(enableAPIIfRequired bool) (instance *TPUInstance, apiEnabled bool, err error)
OptionallyRetrieveInstance retrieves the Instance from the TPU control plane.
If enableAPIIfRequired is false and the TPU API has not been enabled, it returns immediately and does not enable the API.
func (*TPUCP) StartInstance ¶
StartInstance starts a previously stopped Cloud TPU with an API call to the TPU control plane.
func (*TPUCP) StopInstance ¶
StopInstance stops a previously started Cloud TPU with an API call to the TPU control plane.
type TPUInstance ¶
type TPUInstance struct {
*tpu.Node
}
TPUInstance represents the Cloud TPU within the flock.
func (*TPUInstance) IsPreemptible ¶
func (i *TPUInstance) IsPreemptible() bool
IsPreemptible returns true if the Cloud TPU is a preemptible Cloud TPU, false otherwise.
func (*TPUInstance) IsReserved ¶
func (i *TPUInstance) IsReserved() bool
IsReserved returns true if the Cloud TPU is a reserved Cloud TPU, false otherwise.
func (*TPUInstance) IsRunning ¶
func (i *TPUInstance) IsRunning() bool
IsRunning returns true if the Cloud TPU is running, false otherwise.
func (*TPUInstance) NodeName ¶
func (i *TPUInstance) NodeName() string
NodeName returns the flock name (the human-usable name) of the Cloud TPU