Documentation ¶
Index ¶
- Constants
- Variables
- func CalculateAvailableReplicas(pods corev1.PodList) int32
- func CalculateDesiredReplicas(ctx context.Context, cluster *rayv1.RayCluster) int32
- func CalculateDesiredResources(cluster *rayv1.RayCluster) corev1.ResourceList
- func CalculateMaxReplicas(cluster *rayv1.RayCluster) int32
- func CalculateMinReplicas(cluster *rayv1.RayCluster) int32
- func CalculateMinResources(cluster *rayv1.RayCluster) corev1.ResourceList
- func CheckAllPodsRunning(ctx context.Context, runningPods corev1.PodList) bool
- func CheckLabel(s string) string
- func CheckName(s string) string
- func CheckRouteName(ctx context.Context, s string, n string) string
- func CompareJsonStruct(objA interface{}, objB interface{}) bool
- func Contains(elems []string, searchTerm string) bool
- func ConvertUnixTimeToMetav1Time(unixTime uint64) *metav1.Time
- func EnvVarByName(envName string, envVars []corev1.EnvVar) (corev1.EnvVar, bool)
- func EnvVarExists(envName string, envVars []corev1.EnvVar) bool
- func ExtractRayIPFromFQDN(fqdnRayIP string) string
- func FetchHeadServiceURL(ctx context.Context, cli client.Client, rayCluster *rayv1.RayCluster, ...) (string, error)
- func FindContainerPort(container *corev1.Container, portName string, defaultPort int) int
- func FormatInt32(n int32) string
- func GenerateFQDNServiceName(ctx context.Context, cluster rayv1.RayCluster, namespace string) string
- func GenerateHeadServiceName(crdType CRDType, clusterSpec rayv1.RayClusterSpec, ownerName string) (string, error)
- func GenerateIdentifier(clusterName string, nodeType rayv1.RayNodeType) string
- func GenerateIngressName(clusterName string) string
- func GenerateJsonHash(obj interface{}) (string, error)
- func GenerateRayClusterName(serviceName string) string
- func GenerateRayJobId(rayjob string) string
- func GenerateRouteName(clusterName string) string
- func GenerateServeServiceLabel(serviceName string) string
- func GenerateServeServiceName(serviceName string) string
- func GetClusterDomainName() string
- func GetHeadGroupServiceAccountName(cluster *rayv1.RayCluster) string
- func GetNamespace(metaData metav1.ObjectMeta) string
- func GetWorkerGroupDesiredReplicas(ctx context.Context, workerGroupSpec rayv1.WorkerGroupSpec) int32
- func IsCreated(pod *corev1.Pod) bool
- func IsJobFinished(j *batchv1.Job) (batchv1.JobConditionType, bool)
- func IsRunningAndReady(pod *corev1.Pod) bool
- func PodNotMatchingTemplate(pod corev1.Pod, template corev1.PodTemplateSpec) bool
- func RayOriginatedFromCRDLabelValue(crdType CRDType) string
- type BaseDashboardClient
- type CRDType
- type FakeRayDashboardClient
- func (r *FakeRayDashboardClient) DeleteJob(_ context.Context, jobName string) error
- func (r *FakeRayDashboardClient) GetJobInfo(ctx context.Context, jobId string) (*RayJobInfo, error)
- func (r *FakeRayDashboardClient) GetJobLog(_ context.Context, jobName string) (*string, error)
- func (r *FakeRayDashboardClient) GetMultiApplicationStatus(_ context.Context) (map[string]*ServeApplicationStatus, error)
- func (r *FakeRayDashboardClient) GetServeDetails(_ context.Context) (*ServeDetails, error)
- func (r *FakeRayDashboardClient) InitClient(url string)
- func (r *FakeRayDashboardClient) ListJobs(ctx context.Context) (*[]RayJobInfo, error)
- func (r *FakeRayDashboardClient) SetMultiApplicationStatuses(statuses map[string]*ServeApplicationStatus)
- func (r *FakeRayDashboardClient) StopJob(_ context.Context, jobName string) (err error)
- func (r *FakeRayDashboardClient) SubmitJob(_ context.Context, rayJob *rayv1.RayJob) (jobId string, err error)
- func (r *FakeRayDashboardClient) SubmitJobReq(_ context.Context, request *RayJobRequest, name *string) (string, error)
- func (r *FakeRayDashboardClient) UpdateDeployments(_ context.Context, configJson []byte) error
- type FakeRayHttpProxyClient
- type RayDashboardClient
- func (r *RayDashboardClient) ConvertServeDetailsToApplicationStatuses(serveDetails *ServeDetails) (map[string]*ServeApplicationStatus, error)
- func (r *RayDashboardClient) DeleteJob(ctx context.Context, jobName string) error
- func (r *RayDashboardClient) GetJobInfo(ctx context.Context, jobId string) (*RayJobInfo, error)
- func (r *RayDashboardClient) GetJobLog(ctx context.Context, jobName string) (*string, error)
- func (r *RayDashboardClient) GetMultiApplicationStatus(ctx context.Context) (map[string]*ServeApplicationStatus, error)
- func (r *RayDashboardClient) GetServeDetails(ctx context.Context) (*ServeDetails, error)
- func (r *RayDashboardClient) InitClient(url string)
- func (r *RayDashboardClient) ListJobs(ctx context.Context) (*[]RayJobInfo, error)
- func (r *RayDashboardClient) StopJob(ctx context.Context, jobName string) (err error)
- func (r *RayDashboardClient) SubmitJob(ctx context.Context, rayJob *rayv1.RayJob) (jobId string, err error)
- func (r *RayDashboardClient) SubmitJobReq(ctx context.Context, request *RayJobRequest, name *string) (jobId string, err error)
- func (r *RayDashboardClient) UpdateDeployments(ctx context.Context, configJson []byte) error
- type RayDashboardClientInterface
- type RayHttpProxyClient
- type RayHttpProxyClientInterface
- type RayJobInfo
- type RayJobLogsResponse
- type RayJobRequest
- type RayJobResponse
- type RayJobStopResponse
- type RuntimeEnvType
- type ServeApplicationDetails
- type ServeApplicationStatus
- type ServeDeploymentDetails
- type ServeDeploymentStatus
- type ServeDetails
- type ServiceType
Constants ¶
const ( // Default application name DefaultServeAppName = "default" // RayOriginatedFromCRNameLabelKey and RayOriginatedFromCRDLabelKey are the labels used to associate the root KubeRay Custom Resource. // [Example 1] If we create a RayJob named `myjob`, then (1) the RayCluster and (2) the submitter K8s Job will have a // `ray.io/originated-from-cr-name=myjob` and a `ray.io/originated-from-crd=RayJob` label. // // [Example 2] If we create a RayService named `mysvc`, then (1) the RayCluster and (2) the Kubernetes services managed by the RayService // will have a `ray.io/originated-from-cr-name=mysvc` and a `ray.io/originated-from-crd=RayService` label. RayOriginatedFromCRNameLabelKey = "ray.io/originated-from-cr-name" RayOriginatedFromCRDLabelKey = "ray.io/originated-from-crd" RayClusterLabelKey = "ray.io/cluster" RayNodeTypeLabelKey = "ray.io/node-type" RayNodeGroupLabelKey = "ray.io/group" RayNodeLabelKey = "ray.io/is-ray-node" RayIDLabelKey = "ray.io/identifier" RayClusterServingServiceLabelKey = "ray.io/serve" RayClusterHeadlessServiceLabelKey = "ray.io/headless-worker-svc" HashWithoutReplicasAndWorkersToDeleteKey = "ray.io/hash-without-replicas-and-workers-to-delete" NumWorkerGroupsKey = "ray.io/num-worker-groups" // In KubeRay, the Ray container must be the first application container in a head or worker Pod. RayContainerIndex = 0 // Batch scheduling labels // TODO(tgaddair): consider making these part of the CRD RaySchedulerName = "ray.io/scheduler-name" RayPriorityClassName = "ray.io/priority-class-name" // Ray GCS FT related annotations RayFTEnabledAnnotationKey = "ray.io/ft-enabled" RayExternalStorageNSAnnotationKey = "ray.io/external-storage-namespace" // If this annotation is set to "true", the KubeRay operator will not modify the container's command. // However, the generated `ray start` command will still be stored in the container's environment variable // `KUBERAY_GEN_RAY_START_CMD`. RayOverwriteContainerCmdAnnotationKey = "ray.io/overwrite-container-cmd" // Finalizers for GCS fault tolerance GCSFaultToleranceRedisCleanupFinalizer = "ray.io/gcs-ft-redis-cleanup-finalizer" // EnableServeServiceKey is exclusively utilized to indicate if a RayCluster is directly used for serving. // See https://github.com/ray-project/kuberay/pull/1672 for more details. EnableServeServiceKey = "ray.io/enable-serve-service" EnableServeServiceTrue = "true" EnableRayClusterServingServiceTrue = "true" EnableRayClusterServingServiceFalse = "false" KubernetesApplicationNameLabelKey = "app.kubernetes.io/name" KubernetesCreatedByLabelKey = "app.kubernetes.io/created-by" // Use as separator for pod name, for example, raycluster-small-size-worker-0 DashSymbol = "-" // Use as default port DefaultClientPort = 10001 // For Ray >= 1.11.0, "DefaultRedisPort" actually refers to the GCS server port. // However, the role of this port is unchanged in Ray APIs like ray.init and ray start. // This is the port used by Ray workers and drivers inside the Ray cluster to connect to the Ray head. DefaultRedisPort = 6379 DefaultDashboardPort = 8265 DefaultMetricsPort = 8080 DefaultDashboardAgentListenPort = 52365 DefaultServingPort = 8000 ClientPortName = "client" RedisPortName = "redis" DashboardPortName = "dashboard" MetricsPortName = "metrics" ServingPortName = "serve" // The default AppProtocol for Kubernetes service DefaultServiceAppProtocol = "tcp" // The default application name ApplicationName = "kuberay" // The default name for kuberay operator ComponentName = "kuberay-operator" // The default suffix for Headless Service for multi-host worker groups. // The full name will be of the form "${RayCluster_Name}-headless-worker-svc". HeadlessServiceSuffix = "headless-worker-svc" // Use as container env variable RAY_CLUSTER_NAME = "RAY_CLUSTER_NAME" RAY_IP = "RAY_IP" FQ_RAY_IP = "FQ_RAY_IP" RAY_PORT = "RAY_PORT" RAY_ADDRESS = "RAY_ADDRESS" REDIS_PASSWORD = "REDIS_PASSWORD" RAY_DASHBOARD_ENABLE_K8S_DISK_USAGE = "RAY_DASHBOARD_ENABLE_K8S_DISK_USAGE" RAY_EXTERNAL_STORAGE_NS = "RAY_external_storage_namespace" RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S = "RAY_gcs_rpc_server_reconnect_timeout_s" RAY_TIMEOUT_MS_TASK_WAIT_FOR_DEATH_INFO = "RAY_timeout_ms_task_wait_for_death_info" RAY_GCS_SERVER_REQUEST_TIMEOUT_SECONDS = "RAY_gcs_server_request_timeout_seconds" RAY_SERVE_KV_TIMEOUT_S = "RAY_SERVE_KV_TIMEOUT_S" RAY_USAGE_STATS_KUBERAY_IN_USE = "RAY_USAGE_STATS_KUBERAY_IN_USE" RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV = "RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV" RAYCLUSTER_DEFAULT_REQUEUE_SECONDS = 300 KUBERAY_GEN_RAY_START_CMD = "KUBERAY_GEN_RAY_START_CMD" // Environment variables for RayJob submitter Kubernetes Job. // Example: ray job submit --address=http://$RAY_DASHBOARD_ADDRESS --submission-id=$RAY_JOB_SUBMISSION_ID ... RAY_DASHBOARD_ADDRESS = "RAY_DASHBOARD_ADDRESS" RAY_JOB_SUBMISSION_ID = "RAY_JOB_SUBMISSION_ID" // Environment variables for Ray Autoscaler V2. // The value of RAY_CLOUD_INSTANCE_ID is the Pod name for Autoscaler V2 alpha. This may change in the future. RAY_CLOUD_INSTANCE_ID = "RAY_CLOUD_INSTANCE_ID" // The value of RAY_NODE_TYPE_NAME is the name of the node group (i.e., the value of the "ray.io/group" label). RAY_NODE_TYPE_NAME = "RAY_NODE_TYPE_NAME" // This KubeRay operator environment variable is used to determine if random Pod // deletion should be enabled. Note that this only takes effect when autoscaling // is enabled for the RayCluster. This is a feature flag for v0.6.0, and will be // removed if the default behavior is stable enoguh. ENABLE_RANDOM_POD_DELETE = "ENABLE_RANDOM_POD_DELETE" // This KubeRay operator environment variable is used to determine if the Redis // cleanup Job should be enabled. This is a feature flag for v1.0.0. ENABLE_GCS_FT_REDIS_CLEANUP = "ENABLE_GCS_FT_REDIS_CLEANUP" // This environment variable for the KubeRay operator is used to determine whether to enable // the injection of readiness and liveness probes into Ray head and worker containers. // Enabling this feature contributes to the robustness of Ray clusters. It is currently a feature // flag for v1.1.0 and will be removed if the behavior proves to be stable enough. ENABLE_PROBES_INJECTION = "ENABLE_PROBES_INJECTION" // Ray core default configurations DefaultWorkerRayGcsReconnectTimeoutS = "600" LOCAL_HOST = "127.0.0.1" // Ray FT default readiness probe values DefaultReadinessProbeInitialDelaySeconds = 10 DefaultReadinessProbeTimeoutSeconds = 1 DefaultReadinessProbePeriodSeconds = 5 DefaultReadinessProbeSuccessThreshold = 1 DefaultReadinessProbeFailureThreshold = 10 ServeReadinessProbeFailureThreshold = 1 // Ray FT default liveness probe values DefaultLivenessProbeInitialDelaySeconds = 30 DefaultLivenessProbeTimeoutSeconds = 1 DefaultLivenessProbePeriodSeconds = 5 DefaultLivenessProbeSuccessThreshold = 1 DefaultLivenessProbeFailureThreshold = 120 // Ray health check related configurations // Note: Since the Raylet process and the dashboard agent process are fate-sharing, // only one of them needs to be checked. So, RayAgentRayletHealthPath accesses the dashboard agent's API endpoint // to check the health of the Raylet process. // TODO (kevin85421): Should we take the dashboard process into account? RayAgentRayletHealthPath = "api/local_raylet_healthz" RayDashboardGCSHealthPath = "api/gcs_healthz" RayServeProxyHealthPath = "-/healthz" BaseWgetHealthCommand = "wget -T 2 -q -O- http://localhost:%d/%s | grep success" // Finalizers for RayJob RayJobStopJobFinalizer = "ray.io/rayjob-finalizer" // RayNodeHeadGroupLabelValue is the value for the RayNodeGroupLabelKey label on a head node RayNodeHeadGroupLabelValue = "headgroup" )
const ( RayClusterSuffix = "-raycluster-" ServeName = "serve" ClusterDomainEnvKey = "CLUSTER_DOMAIN" DefaultDomainName = "cluster.local" )
Variables ¶
var ( // Multi-application URL paths ServeDetailsPath = "/api/serve/applications/" DeployPathV2 = "/api/serve/applications/" // Job URL paths JobPath = "/api/jobs/" )
Functions ¶
func CalculateAvailableReplicas ¶
CalculateAvailableReplicas calculates available worker replicas at the cluster level A worker is available if its Pod is running
func CalculateDesiredReplicas ¶
func CalculateDesiredReplicas(ctx context.Context, cluster *rayv1.RayCluster) int32
CalculateDesiredReplicas calculate desired worker replicas at the cluster level
func CalculateDesiredResources ¶
func CalculateDesiredResources(cluster *rayv1.RayCluster) corev1.ResourceList
func CalculateMaxReplicas ¶
func CalculateMaxReplicas(cluster *rayv1.RayCluster) int32
CalculateMaxReplicas calculates max worker replicas at the cluster level
func CalculateMinReplicas ¶
func CalculateMinReplicas(cluster *rayv1.RayCluster) int32
CalculateMinReplicas calculates min worker replicas at the cluster level
func CalculateMinResources ¶
func CalculateMinResources(cluster *rayv1.RayCluster) corev1.ResourceList
func CheckAllPodsRunning ¶ added in v0.6.0
CheckAllPodsRunning returns true if all the RayCluster's Pods are running, false otherwise
func CheckLabel ¶
CheckLabel makes sure the label value does not start with a punctuation and the total length is < 63 char
func CheckName ¶
CheckName makes sure the name does not start with a numeric value and the total length is < 63 char
func CheckRouteName ¶ added in v1.1.0
func CompareJsonStruct ¶
func CompareJsonStruct(objA interface{}, objB interface{}) bool
CompareJsonStruct This is a way to better compare if two objects are the same when they are json/yaml structs. reflect.DeepEqual will fail in some cases.
func EnvVarByName ¶ added in v1.1.0
EnvVarByName returns an entry in []corev1.EnvVar that matches a name. Also returns a bool for whether the env var exists.
func ExtractRayIPFromFQDN ¶
ExtractRayIPFromFQDN extracts the head service name (i.e., RAY_IP, deprecated) from a fully qualified domain name (FQDN). This function is provided for backward compatibility purposes only.
func FetchHeadServiceURL ¶ added in v0.6.0
func FetchHeadServiceURL(ctx context.Context, cli client.Client, rayCluster *rayv1.RayCluster, defaultPortName string) (string, error)
FetchHeadServiceURL fetches the URL that consists of the FQDN for the RayCluster's head service and the port with the given port name (defaultPortName).
func FindContainerPort ¶ added in v0.6.0
FindContainerPort searches for a specific port $portName in the container. If the port is found in the container, the corresponding port is returned. If the port is not found, the $defaultPort is returned instead.
func FormatInt32 ¶
FormatInt returns the string representation of i in the given base, for 2 <= base <= 36. The result uses the lower-case letters 'a' to 'z' for digit values >= 10.
func GenerateFQDNServiceName ¶
func GenerateFQDNServiceName(ctx context.Context, cluster rayv1.RayCluster, namespace string) string
GenerateFQDNServiceName generates a Fully Qualified Domain Name.
func GenerateHeadServiceName ¶ added in v1.0.0
func GenerateHeadServiceName(crdType CRDType, clusterSpec rayv1.RayClusterSpec, ownerName string) (string, error)
GenerateHeadServiceName generates a Ray head service name. Note that there are two types of head services:
(1) For RayCluster: If `HeadService.Name` in the cluster spec is not empty, it will be used as the head service name. Otherwise, the name is generated based on the RayCluster CR's name. (2) For RayService: It's important to note that the RayService CR not only possesses a head service owned by its RayCluster CR but also maintains a separate head service for itself to facilitate zero-downtime upgrades. The name of the head service owned by the RayService CR is generated based on the RayService CR's name.
@param crdType: The type of the CRD that owns the head service. @param clusterSpec: `RayClusterSpec` @param ownerName: The name of the CR that owns the head service.
func GenerateIdentifier ¶
func GenerateIdentifier(clusterName string, nodeType rayv1.RayNodeType) string
GenerateIdentifier generates identifier of same group pods
func GenerateIngressName ¶
GenerateIngressName generates an ingress name from cluster name
func GenerateJsonHash ¶
Json-serializes obj and returns its hash string
func GenerateRayClusterName ¶
GenerateRayClusterName generates a ray cluster name from ray service name
func GenerateRayJobId ¶
GenerateRayJobId generates a ray job id for submission
func GenerateRouteName ¶ added in v1.0.0
GenerateRouteName generates an ingress name from cluster name
func GenerateServeServiceLabel ¶
GenerateServeServiceLabel generates label value for serve service selector.
func GenerateServeServiceName ¶
GenerateServeServiceName generates name for serve service.
func GetClusterDomainName ¶
func GetClusterDomainName() string
GetClusterDomainName returns cluster's domain name
func GetHeadGroupServiceAccountName ¶
func GetHeadGroupServiceAccountName(cluster *rayv1.RayCluster) string
GetHeadGroupServiceAccountName returns the head group service account if it exists. Otherwise, it returns the name of the cluster itself.
func GetNamespace ¶
func GetNamespace(metaData metav1.ObjectMeta) string
GetNamespace return namespace
func GetWorkerGroupDesiredReplicas ¶ added in v1.0.0
func GetWorkerGroupDesiredReplicas(ctx context.Context, workerGroupSpec rayv1.WorkerGroupSpec) int32
func IsJobFinished ¶ added in v1.1.0
func IsJobFinished(j *batchv1.Job) (batchv1.JobConditionType, bool)
IsJobFinished checks whether the given Job has finished execution. It does not discriminate between successful and failed terminations. src: https://github.com/kubernetes/kubernetes/blob/a8a1abc25cad87333840cd7d54be2efaf31a3177/pkg/controller/job/utils.go#L26
func IsRunningAndReady ¶
IsRunningAndReady returns true if pod is in the PodRunning Phase, if it has a condition of PodReady.
func PodNotMatchingTemplate ¶
func PodNotMatchingTemplate(pod corev1.Pod, template corev1.PodTemplateSpec) bool
func RayOriginatedFromCRDLabelValue ¶ added in v1.1.0
RayOriginatedFromCRDLabelValue generates a value for the label RayOriginatedFromCRDLabelKey This is also the only function to construct label filter of resources originated from a given CRDType.
Types ¶
type BaseDashboardClient ¶ added in v0.6.0
type BaseDashboardClient struct {
// contains filtered or unexported fields
}
type CRDType ¶ added in v1.0.0
type CRDType string
TODO (kevin85421): Define CRDType here rather than constant.go to avoid circular dependency.
func GetCRDType ¶ added in v1.1.0
type FakeRayDashboardClient ¶
type FakeRayDashboardClient struct { BaseDashboardClient GetJobInfoMock atomic.Pointer[func(context.Context, string) (*RayJobInfo, error)] // contains filtered or unexported fields }
func (*FakeRayDashboardClient) DeleteJob ¶ added in v1.1.0
func (r *FakeRayDashboardClient) DeleteJob(_ context.Context, jobName string) error
func (*FakeRayDashboardClient) GetJobInfo ¶
func (r *FakeRayDashboardClient) GetJobInfo(ctx context.Context, jobId string) (*RayJobInfo, error)
func (*FakeRayDashboardClient) GetMultiApplicationStatus ¶ added in v0.6.0
func (r *FakeRayDashboardClient) GetMultiApplicationStatus(_ context.Context) (map[string]*ServeApplicationStatus, error)
func (*FakeRayDashboardClient) GetServeDetails ¶ added in v0.6.0
func (r *FakeRayDashboardClient) GetServeDetails(_ context.Context) (*ServeDetails, error)
func (*FakeRayDashboardClient) InitClient ¶
func (r *FakeRayDashboardClient) InitClient(url string)
func (*FakeRayDashboardClient) ListJobs ¶ added in v1.1.0
func (r *FakeRayDashboardClient) ListJobs(ctx context.Context) (*[]RayJobInfo, error)
func (*FakeRayDashboardClient) SetMultiApplicationStatuses ¶ added in v0.6.0
func (r *FakeRayDashboardClient) SetMultiApplicationStatuses(statuses map[string]*ServeApplicationStatus)
func (*FakeRayDashboardClient) StopJob ¶
func (r *FakeRayDashboardClient) StopJob(_ context.Context, jobName string) (err error)
func (*FakeRayDashboardClient) SubmitJobReq ¶ added in v1.1.0
func (r *FakeRayDashboardClient) SubmitJobReq(_ context.Context, request *RayJobRequest, name *string) (string, error)
func (*FakeRayDashboardClient) UpdateDeployments ¶
func (r *FakeRayDashboardClient) UpdateDeployments(_ context.Context, configJson []byte) error
type FakeRayHttpProxyClient ¶
type FakeRayHttpProxyClient struct {
// contains filtered or unexported fields
}
func (*FakeRayHttpProxyClient) CheckHealth ¶
func (r *FakeRayHttpProxyClient) CheckHealth() error
func (*FakeRayHttpProxyClient) InitClient ¶
func (r *FakeRayHttpProxyClient) InitClient()
func (*FakeRayHttpProxyClient) SetHostIp ¶
func (r *FakeRayHttpProxyClient) SetHostIp(hostIp string, port int)
type RayDashboardClient ¶
type RayDashboardClient struct {
BaseDashboardClient
}
func (*RayDashboardClient) ConvertServeDetailsToApplicationStatuses ¶ added in v0.6.0
func (r *RayDashboardClient) ConvertServeDetailsToApplicationStatuses(serveDetails *ServeDetails) (map[string]*ServeApplicationStatus, error)
func (*RayDashboardClient) DeleteJob ¶ added in v1.1.0
func (r *RayDashboardClient) DeleteJob(ctx context.Context, jobName string) error
func (*RayDashboardClient) GetJobInfo ¶
func (r *RayDashboardClient) GetJobInfo(ctx context.Context, jobId string) (*RayJobInfo, error)
Note that RayJobInfo and error can't be nil at the same time. Please make sure if the Ray job with JobId can't be found. Return a BadRequest error.
func (*RayDashboardClient) GetMultiApplicationStatus ¶ added in v0.6.0
func (r *RayDashboardClient) GetMultiApplicationStatus(ctx context.Context) (map[string]*ServeApplicationStatus, error)
func (*RayDashboardClient) GetServeDetails ¶ added in v0.6.0
func (r *RayDashboardClient) GetServeDetails(ctx context.Context) (*ServeDetails, error)
GetServeDetails gets details on all live applications on the Ray cluster.
func (*RayDashboardClient) InitClient ¶
func (r *RayDashboardClient) InitClient(url string)
func (*RayDashboardClient) ListJobs ¶ added in v1.1.0
func (r *RayDashboardClient) ListJobs(ctx context.Context) (*[]RayJobInfo, error)
func (*RayDashboardClient) StopJob ¶
func (r *RayDashboardClient) StopJob(ctx context.Context, jobName string) (err error)
func (*RayDashboardClient) SubmitJobReq ¶ added in v1.1.0
func (r *RayDashboardClient) SubmitJobReq(ctx context.Context, request *RayJobRequest, name *string) (jobId string, err error)
func (*RayDashboardClient) UpdateDeployments ¶
func (r *RayDashboardClient) UpdateDeployments(ctx context.Context, configJson []byte) error
UpdateDeployments update the deployments in the Ray cluster.
type RayDashboardClientInterface ¶
type RayDashboardClientInterface interface { InitClient(url string) UpdateDeployments(ctx context.Context, configJson []byte) error // V2/multi-app Rest API GetServeDetails(ctx context.Context) (*ServeDetails, error) GetMultiApplicationStatus(context.Context) (map[string]*ServeApplicationStatus, error) GetJobInfo(ctx context.Context, jobId string) (*RayJobInfo, error) ListJobs(ctx context.Context) (*[]RayJobInfo, error) SubmitJob(ctx context.Context, rayJob *rayv1.RayJob) (string, error) SubmitJobReq(ctx context.Context, request *RayJobRequest, name *string) (string, error) GetJobLog(ctx context.Context, jobName string) (*string, error) StopJob(ctx context.Context, jobName string) error DeleteJob(ctx context.Context, jobName string) error }
func GetRayDashboardClient ¶
func GetRayDashboardClient() RayDashboardClientInterface
type RayHttpProxyClient ¶
type RayHttpProxyClient struct {
// contains filtered or unexported fields
}
func (*RayHttpProxyClient) CheckHealth ¶
func (r *RayHttpProxyClient) CheckHealth() error
func (*RayHttpProxyClient) InitClient ¶
func (r *RayHttpProxyClient) InitClient()
func (*RayHttpProxyClient) SetHostIp ¶
func (r *RayHttpProxyClient) SetHostIp(hostIp string, port int)
type RayHttpProxyClientInterface ¶
type RayHttpProxyClientInterface interface { InitClient() CheckHealth() error SetHostIp(hostIp string, port int) }
func GetRayHttpProxyClient ¶
func GetRayHttpProxyClient() RayHttpProxyClientInterface
type RayJobInfo ¶
type RayJobInfo struct { JobStatus rayv1.JobStatus `json:"status,omitempty"` Entrypoint string `json:"entrypoint,omitempty"` JobId string `json:"job_id,omitempty"` SubmissionId string `json:"submission_id,omitempty"` Message string `json:"message,omitempty"` ErrorType *string `json:"error_type,omitempty"` StartTime uint64 `json:"start_time,omitempty"` EndTime uint64 `json:"end_time,omitempty"` Metadata map[string]string `json:"metadata,omitempty"` RuntimeEnv RuntimeEnvType `json:"runtime_env,omitempty"` }
RayJobInfo is the response of "ray job status" api. Reference to https://docs.ray.io/en/latest/cluster/running-applications/job-submission/rest.html#ray-job-rest-api-spec Reference to https://github.com/ray-project/ray/blob/cfbf98c315cfb2710c56039a3c96477d196de049/dashboard/modules/job/pydantic_models.py#L38-L107
type RayJobLogsResponse ¶ added in v1.1.0
type RayJobLogsResponse struct {
Logs string `json:"logs,omitempty"`
}
type RayJobRequest ¶
type RayJobRequest struct { Entrypoint string `json:"entrypoint"` SubmissionId string `json:"submission_id,omitempty"` RuntimeEnv RuntimeEnvType `json:"runtime_env,omitempty"` Metadata map[string]string `json:"metadata,omitempty"` NumCpus float32 `json:"entrypoint_num_cpus,omitempty"` NumGpus float32 `json:"entrypoint_num_gpus,omitempty"` Resources map[string]float32 `json:"entrypoint_resources,omitempty"` }
RayJobRequest is the request body to submit. Reference to https://docs.ray.io/en/latest/cluster/running-applications/job-submission/rest.html#ray-job-rest-api-spec Reference to https://github.com/ray-project/ray/blob/cfbf98c315cfb2710c56039a3c96477d196de049/dashboard/modules/job/common.py#L325-L353
func ConvertRayJobToReq ¶
func ConvertRayJobToReq(rayJob *rayv1.RayJob) (*RayJobRequest, error)
type RayJobResponse ¶
type RayJobResponse struct {
JobId string `json:"job_id"`
}
type RayJobStopResponse ¶
type RayJobStopResponse struct {
Stopped bool `json:"stopped"`
}
type RuntimeEnvType ¶ added in v1.1.0
type RuntimeEnvType map[string]interface{}
func UnmarshalRuntimeEnvYAML ¶ added in v1.1.0
func UnmarshalRuntimeEnvYAML(runtimeEnvYAML string) (RuntimeEnvType, error)
type ServeApplicationDetails ¶ added in v0.6.0
type ServeApplicationDetails struct { ServeApplicationStatus RoutePrefix string `json:"route_prefix,omitempty"` DocsPath string `json:"docs_path,omitempty"` Deployments map[string]ServeDeploymentDetails `json:"deployments"` }
type ServeApplicationStatus ¶ added in v0.6.0
type ServeApplicationStatus struct { Name string `json:"name,omitempty"` Status string `json:"status"` Message string `json:"message,omitempty"` Deployments map[string]ServeDeploymentStatus `json:"deployments"` }
Describes the status of an application
type ServeDeploymentDetails ¶ added in v0.6.0
type ServeDeploymentDetails struct { ServeDeploymentStatus RoutePrefix string `json:"route_prefix,omitempty"` }
V2 Serve API Response format. These extend the ServeDeploymentStatus and ServeApplicationStatus structs, but contain more information such as route prefix because the V2/multi-app GET API fetchs general metadata, not just statuses.
type ServeDeploymentStatus ¶ added in v0.6.0
type ServeDeploymentStatus struct { Name string `json:"name,omitempty"` Status string `json:"status,omitempty"` Message string `json:"message,omitempty"` }
ServeDeploymentStatus and ServeApplicationStatus describe the format of status(es) that will be returned by the GetMultiApplicationStatus method of the dashboard client Describes the status of a deployment
type ServeDetails ¶ added in v0.6.0
type ServeDetails struct { Applications map[string]ServeApplicationDetails `json:"applications"` DeployMode string `json:"deploy_mode,omitempty"` }
type ServiceType ¶ added in v1.1.0
type ServiceType string
const ( HeadService ServiceType = "headService" ServingService ServiceType = "serveService" )