Documentation ¶
Index ¶
- Constants
- Variables
- func CalculateAvailableReplicas(pods corev1.PodList) int32
- func CalculateDesiredReplicas(ctx context.Context, cluster *rayv1.RayCluster) int32
- func CalculateDesiredResources(cluster *rayv1.RayCluster) corev1.ResourceList
- func CalculateMaxReplicas(cluster *rayv1.RayCluster) int32
- func CalculateMinReplicas(cluster *rayv1.RayCluster) int32
- func CalculateMinResources(cluster *rayv1.RayCluster) corev1.ResourceList
- func CalculateReadyReplicas(pods corev1.PodList) int32
- func CheckAllPodsRunning(ctx context.Context, runningPods corev1.PodList) bool
- func CheckLabel(s string) string
- func CheckName(s string) string
- func CheckRouteName(ctx context.Context, s string, n string) string
- func CompareJsonStruct(objA interface{}, objB interface{}) bool
- func Contains(elems []string, searchTerm string) bool
- func ConvertUnixTimeToMetav1Time(unixTime uint64) *metav1.Time
- func EnvVarByName(envName string, envVars []corev1.EnvVar) (corev1.EnvVar, bool)
- func EnvVarExists(envName string, envVars []corev1.EnvVar) bool
- func ExtractRayIPFromFQDN(fqdnRayIP string) string
- func FetchHeadServiceURL(ctx context.Context, cli client.Client, rayCluster *rayv1.RayCluster, ...) (string, error)
- func FindContainerPort(container *corev1.Container, portName string, defaultPort int) int
- func FindHeadPodReadyCondition(headPod *corev1.Pod) metav1.Condition
- func FormatInt32(n int32) string
- func GenerateFQDNServiceName(ctx context.Context, cluster rayv1.RayCluster, namespace string) string
- func GenerateHeadServiceName(crdType CRDType, clusterSpec rayv1.RayClusterSpec, ownerName string) (string, error)
- func GenerateIdentifier(clusterName string, nodeType rayv1.RayNodeType) string
- func GenerateIngressName(clusterName string) string
- func GenerateJsonHash(obj interface{}) (string, error)
- func GenerateRayClusterName(serviceName string) string
- func GenerateRayJobId(rayjob string) string
- func GenerateRouteName(clusterName string) string
- func GenerateServeServiceLabel(serviceName string) string
- func GenerateServeServiceName(serviceName string) string
- func GetClusterDomainName() string
- func GetHeadGroupServiceAccountName(cluster *rayv1.RayCluster) string
- func GetNamespace(metaData metav1.ObjectMeta) string
- func GetRayDashboardClientFunc(mgr ctrl.Manager, useKubernetesProxy bool) func() RayDashboardClientInterface
- func GetRayHttpProxyClientFunc(mgr ctrl.Manager, useKubernetesProxy bool) func() RayHttpProxyClientInterface
- func GetWorkerGroupDesiredReplicas(ctx context.Context, workerGroupSpec rayv1.WorkerGroupSpec) int32
- func IsCreated(pod *corev1.Pod) bool
- func IsJobFinished(j *batchv1.Job) (batchv1.JobConditionType, bool)
- func IsRunningAndReady(pod *corev1.Pod) bool
- func PodGenerateName(prefix string, nodeType rayv1.RayNodeType) string
- func RayClusterReplicaFailureReason(err error) string
- func RayOriginatedFromCRDLabelValue(crdType CRDType) string
- type BaseDashboardClient
- type CRDType
- type ClientProvider
- type FakeRayDashboardClient
- func (r *FakeRayDashboardClient) DeleteJob(_ context.Context, _ string) error
- func (r *FakeRayDashboardClient) GetJobInfo(ctx context.Context, jobId string) (*RayJobInfo, error)
- func (r *FakeRayDashboardClient) GetJobLog(_ context.Context, _ string) (*string, error)
- func (r *FakeRayDashboardClient) GetMultiApplicationStatus(_ context.Context) (map[string]*ServeApplicationStatus, error)
- func (r *FakeRayDashboardClient) GetServeDetails(_ context.Context) (*ServeDetails, error)
- func (r *FakeRayDashboardClient) InitClient(_ context.Context, url string, _ *rayv1.RayCluster) error
- func (r *FakeRayDashboardClient) ListJobs(ctx context.Context) (*[]RayJobInfo, error)
- func (r *FakeRayDashboardClient) SetMultiApplicationStatuses(statuses map[string]*ServeApplicationStatus)
- func (r *FakeRayDashboardClient) StopJob(_ context.Context, _ string) (err error)
- func (r *FakeRayDashboardClient) SubmitJob(_ context.Context, _ *rayv1.RayJob) (jobId string, err error)
- func (r *FakeRayDashboardClient) SubmitJobReq(_ context.Context, _ *RayJobRequest, _ *string) (string, error)
- func (r *FakeRayDashboardClient) UpdateDeployments(_ context.Context, _ []byte) error
- type FakeRayHttpProxyClient
- type RayDashboardClient
- func (r *RayDashboardClient) ConvertServeDetailsToApplicationStatuses(serveDetails *ServeDetails) (map[string]*ServeApplicationStatus, error)
- func (r *RayDashboardClient) DeleteJob(ctx context.Context, jobName string) error
- func (r *RayDashboardClient) GetJobInfo(ctx context.Context, jobId string) (*RayJobInfo, error)
- func (r *RayDashboardClient) GetJobLog(ctx context.Context, jobName string) (*string, error)
- func (r *RayDashboardClient) GetMultiApplicationStatus(ctx context.Context) (map[string]*ServeApplicationStatus, error)
- func (r *RayDashboardClient) GetServeDetails(ctx context.Context) (*ServeDetails, error)
- func (r *RayDashboardClient) InitClient(ctx context.Context, url string, rayCluster *rayv1.RayCluster) error
- func (r *RayDashboardClient) ListJobs(ctx context.Context) (*[]RayJobInfo, error)
- func (r *RayDashboardClient) StopJob(ctx context.Context, jobName string) (err error)
- func (r *RayDashboardClient) SubmitJob(ctx context.Context, rayJob *rayv1.RayJob) (jobId string, err error)
- func (r *RayDashboardClient) SubmitJobReq(ctx context.Context, request *RayJobRequest, name *string) (jobId string, err error)
- func (r *RayDashboardClient) UpdateDeployments(ctx context.Context, configJson []byte) error
- type RayDashboardClientInterface
- type RayHttpProxyClient
- type RayHttpProxyClientInterface
- type RayJobInfo
- type RayJobLogsResponse
- type RayJobRequest
- type RayJobResponse
- type RayJobStopResponse
- type RuntimeEnvType
- type ServeApplicationDetails
- type ServeApplicationStatus
- type ServeDeploymentDetails
- type ServeDeploymentStatus
- type ServeDetails
- type ServiceType
Constants ¶
const ( // Default application name DefaultServeAppName = "default" // RayOriginatedFromCRNameLabelKey and RayOriginatedFromCRDLabelKey are the labels used to associate the root KubeRay Custom Resource. // [Example 1] If we create a RayJob named `myjob`, then (1) the RayCluster and (2) the submitter K8s Job will have a // `ray.io/originated-from-cr-name=myjob` and a `ray.io/originated-from-crd=RayJob` label. // // [Example 2] If we create a RayService named `mysvc`, then (1) the RayCluster and (2) the Kubernetes services managed by the RayService // will have a `ray.io/originated-from-cr-name=mysvc` and a `ray.io/originated-from-crd=RayService` label. RayOriginatedFromCRNameLabelKey = "ray.io/originated-from-cr-name" RayOriginatedFromCRDLabelKey = "ray.io/originated-from-crd" RayClusterLabelKey = "ray.io/cluster" RayNodeTypeLabelKey = "ray.io/node-type" RayNodeGroupLabelKey = "ray.io/group" RayNodeLabelKey = "ray.io/is-ray-node" RayIDLabelKey = "ray.io/identifier" RayClusterServingServiceLabelKey = "ray.io/serve" RayClusterHeadlessServiceLabelKey = "ray.io/headless-worker-svc" HashWithoutReplicasAndWorkersToDeleteKey = "ray.io/hash-without-replicas-and-workers-to-delete" NumWorkerGroupsKey = "ray.io/num-worker-groups" KubeRayVersion = "ray.io/kuberay-version" // In KubeRay, the Ray container must be the first application container in a head or worker Pod. RayContainerIndex = 0 // Batch scheduling labels // TODO(tgaddair): consider making these part of the CRD RaySchedulerName = "ray.io/scheduler-name" RayPriorityClassName = "ray.io/priority-class-name" // Ray GCS FT related annotations RayFTEnabledAnnotationKey = "ray.io/ft-enabled" RayExternalStorageNSAnnotationKey = "ray.io/external-storage-namespace" // If this annotation is set to "true", the KubeRay operator will not modify the container's command. // However, the generated `ray start` command will still be stored in the container's environment variable // `KUBERAY_GEN_RAY_START_CMD`. RayOverwriteContainerCmdAnnotationKey = "ray.io/overwrite-container-cmd" // Finalizers for GCS fault tolerance GCSFaultToleranceRedisCleanupFinalizer = "ray.io/gcs-ft-redis-cleanup-finalizer" // EnableServeServiceKey is exclusively utilized to indicate if a RayCluster is directly used for serving. // See https://github.com/ray-project/kuberay/pull/1672 for more details. EnableServeServiceKey = "ray.io/enable-serve-service" EnableServeServiceTrue = "true" EnableRayClusterServingServiceTrue = "true" EnableRayClusterServingServiceFalse = "false" KubernetesApplicationNameLabelKey = "app.kubernetes.io/name" KubernetesCreatedByLabelKey = "app.kubernetes.io/created-by" // Use as separator for pod name, for example, raycluster-small-size-worker-0 DashSymbol = "-" // Use as default port DefaultClientPort = 10001 // For Ray >= 1.11.0, "DefaultRedisPort" actually refers to the GCS server port. // However, the role of this port is unchanged in Ray APIs like ray.init and ray start. // This is the port used by Ray workers and drivers inside the Ray cluster to connect to the Ray head. DefaultRedisPort = 6379 DefaultDashboardPort = 8265 DefaultMetricsPort = 8080 DefaultDashboardAgentListenPort = 52365 DefaultServingPort = 8000 ClientPortName = "client" RedisPortName = "redis" DashboardPortName = "dashboard" MetricsPortName = "metrics" ServingPortName = "serve" // The default AppProtocol for Kubernetes service DefaultServiceAppProtocol = "tcp" // The default application name ApplicationName = "kuberay" // The default name for kuberay operator ComponentName = "kuberay-operator" // The default suffix for Headless Service for multi-host worker groups. // The full name will be of the form "${RayCluster_Name}-headless-worker-svc". HeadlessServiceSuffix = "headless-worker-svc" // Use as container env variable RAY_CLUSTER_NAME = "RAY_CLUSTER_NAME" RAY_IP = "RAY_IP" FQ_RAY_IP = "FQ_RAY_IP" RAY_PORT = "RAY_PORT" RAY_ADDRESS = "RAY_ADDRESS" REDIS_PASSWORD = "REDIS_PASSWORD" RAY_DASHBOARD_ENABLE_K8S_DISK_USAGE = "RAY_DASHBOARD_ENABLE_K8S_DISK_USAGE" RAY_EXTERNAL_STORAGE_NS = "RAY_external_storage_namespace" RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S = "RAY_gcs_rpc_server_reconnect_timeout_s" RAY_TIMEOUT_MS_TASK_WAIT_FOR_DEATH_INFO = "RAY_timeout_ms_task_wait_for_death_info" RAY_GCS_SERVER_REQUEST_TIMEOUT_SECONDS = "RAY_gcs_server_request_timeout_seconds" RAY_SERVE_KV_TIMEOUT_S = "RAY_SERVE_KV_TIMEOUT_S" RAY_USAGE_STATS_KUBERAY_IN_USE = "RAY_USAGE_STATS_KUBERAY_IN_USE" RAY_USAGE_STATS_EXTRA_TAGS = "RAY_USAGE_STATS_EXTRA_TAGS" RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV = "RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV" RAYCLUSTER_DEFAULT_REQUEUE_SECONDS = 300 KUBERAY_GEN_RAY_START_CMD = "KUBERAY_GEN_RAY_START_CMD" // Environment variables for RayJob submitter Kubernetes Job. // Example: ray job submit --address=http://$RAY_DASHBOARD_ADDRESS --submission-id=$RAY_JOB_SUBMISSION_ID ... RAY_DASHBOARD_ADDRESS = "RAY_DASHBOARD_ADDRESS" RAY_JOB_SUBMISSION_ID = "RAY_JOB_SUBMISSION_ID" // Environment variables for Ray Autoscaler V2. // The value of RAY_CLOUD_INSTANCE_ID is the Pod name for Autoscaler V2 alpha. This may change in the future. RAY_CLOUD_INSTANCE_ID = "RAY_CLOUD_INSTANCE_ID" // The value of RAY_NODE_TYPE_NAME is the name of the node group (i.e., the value of the "ray.io/group" label). RAY_NODE_TYPE_NAME = "RAY_NODE_TYPE_NAME" // This KubeRay operator environment variable is used to determine if random Pod // deletion should be enabled. Note that this only takes effect when autoscaling // is enabled for the RayCluster. This is a feature flag for v0.6.0, and will be // removed if the default behavior is stable enoguh. ENABLE_RANDOM_POD_DELETE = "ENABLE_RANDOM_POD_DELETE" // This KubeRay operator environment variable is used to determine if the Redis // cleanup Job should be enabled. This is a feature flag for v1.0.0. ENABLE_GCS_FT_REDIS_CLEANUP = "ENABLE_GCS_FT_REDIS_CLEANUP" // This environment variable for the KubeRay operator is used to determine whether to enable // the injection of readiness and liveness probes into Ray head and worker containers. // Enabling this feature contributes to the robustness of Ray clusters. It is currently a feature // flag for v1.1.0 and will be removed if the behavior proves to be stable enough. ENABLE_PROBES_INJECTION = "ENABLE_PROBES_INJECTION" // If set to true, kuberay creates a normal ClusterIP service for a Ray Head instead of a Headless service. ENABLE_RAY_HEAD_CLUSTER_IP_SERVICE = "ENABLE_RAY_HEAD_CLUSTER_IP_SERVICE" // If set to true, the RayJob CR itself will be deleted if shutdownAfterJobFinishes is set to true. Note that all resources created by the RayJob CR will be deleted, including the K8s Job. DELETE_RAYJOB_CR_AFTER_JOB_FINISHES = "DELETE_RAYJOB_CR_AFTER_JOB_FINISHES" // Ray core default configurations DefaultWorkerRayGcsReconnectTimeoutS = "600" LOCAL_HOST = "127.0.0.1" // Ray FT default readiness probe values DefaultReadinessProbeInitialDelaySeconds = 10 DefaultReadinessProbeTimeoutSeconds = 2 DefaultReadinessProbePeriodSeconds = 5 DefaultReadinessProbeSuccessThreshold = 1 DefaultReadinessProbeFailureThreshold = 10 ServeReadinessProbeFailureThreshold = 1 // Ray FT default liveness probe values DefaultLivenessProbeInitialDelaySeconds = 30 DefaultLivenessProbeTimeoutSeconds = 2 DefaultLivenessProbePeriodSeconds = 5 DefaultLivenessProbeSuccessThreshold = 1 DefaultLivenessProbeFailureThreshold = 120 // Ray health check related configurations // Note: Since the Raylet process and the dashboard agent process are fate-sharing, // only one of them needs to be checked. So, RayAgentRayletHealthPath accesses the dashboard agent's API endpoint // to check the health of the Raylet process. // TODO (kevin85421): Should we take the dashboard process into account? RayAgentRayletHealthPath = "api/local_raylet_healthz" RayDashboardGCSHealthPath = "api/gcs_healthz" RayServeProxyHealthPath = "-/healthz" BaseWgetHealthCommand = "wget -T %d -q -O- http://localhost:%d/%s | grep success" // Finalizers for RayJob RayJobStopJobFinalizer = "ray.io/rayjob-finalizer" // RayNodeHeadGroupLabelValue is the value for the RayNodeGroupLabelKey label on a head node RayNodeHeadGroupLabelValue = "headgroup" // KUBERAY_VERSION is the build version of KubeRay. // The version is included in the RAY_USAGE_STATS_EXTRA_TAGS environment variable // as well as the user-agent. This constant is updated before release. // TODO: Update KUBERAY_VERSION to be a build-time variable. KUBERAY_VERSION = "v1.2.1" )
const ( RayClusterSuffix = "-raycluster-" ServeName = "serve" ClusterDomainEnvKey = "CLUSTER_DOMAIN" DefaultDomainName = "cluster.local" )
Variables ¶
var ( ErrFailedDeleteAllPods = &errRayClusterReplicaFailure{reason: "FailedDeleteAllPods"} ErrFailedDeleteHeadPod = &errRayClusterReplicaFailure{reason: "FailedDeleteHeadPod"} ErrFailedCreateHeadPod = &errRayClusterReplicaFailure{reason: "FailedCreateHeadPod"} ErrFailedDeleteWorkerPod = &errRayClusterReplicaFailure{reason: "FailedDeleteWorkerPod"} ErrFailedCreateWorkerPod = &errRayClusterReplicaFailure{reason: "FailedCreateWorkerPod"} )
These are markers used by the calculateStatus() for setting the RayClusterReplicaFailure condition.
var ( // Multi-application URL paths ServeDetailsPath = "/api/serve/applications/" DeployPathV2 = "/api/serve/applications/" // Job URL paths JobPath = "/api/jobs/" )
Functions ¶
func CalculateAvailableReplicas ¶
CalculateAvailableReplicas calculates available worker replicas at the cluster level A worker is available if its Pod is running
func CalculateDesiredReplicas ¶
func CalculateDesiredReplicas(ctx context.Context, cluster *rayv1.RayCluster) int32
CalculateDesiredReplicas calculate desired worker replicas at the cluster level
func CalculateDesiredResources ¶
func CalculateDesiredResources(cluster *rayv1.RayCluster) corev1.ResourceList
func CalculateMaxReplicas ¶
func CalculateMaxReplicas(cluster *rayv1.RayCluster) int32
CalculateMaxReplicas calculates max worker replicas at the cluster level
func CalculateMinReplicas ¶
func CalculateMinReplicas(cluster *rayv1.RayCluster) int32
CalculateMinReplicas calculates min worker replicas at the cluster level
func CalculateMinResources ¶
func CalculateMinResources(cluster *rayv1.RayCluster) corev1.ResourceList
func CalculateReadyReplicas ¶ added in v1.2.0
CalculateReadyReplicas calculates ready worker replicas at the cluster level A worker is ready if its Pod has a PodCondition with type == Ready and status == True
func CheckAllPodsRunning ¶ added in v0.6.0
CheckAllPodsRunning returns true if all the RayCluster's Pods are running, false otherwise
func CheckLabel ¶
CheckLabel makes sure the label value does not start with a punctuation and the total length is < 63 char
func CheckName ¶
CheckName makes sure the name does not start with a numeric value and the total length is < 63 char
func CheckRouteName ¶ added in v1.1.0
func CompareJsonStruct ¶
func CompareJsonStruct(objA interface{}, objB interface{}) bool
CompareJsonStruct This is a way to better compare if two objects are the same when they are json/yaml structs. reflect.DeepEqual will fail in some cases.
func EnvVarByName ¶ added in v1.1.0
EnvVarByName returns an entry in []corev1.EnvVar that matches a name. Also returns a bool for whether the env var exists.
func ExtractRayIPFromFQDN ¶
ExtractRayIPFromFQDN extracts the head service name (i.e., RAY_IP, deprecated) from a fully qualified domain name (FQDN). This function is provided for backward compatibility purposes only.
func FetchHeadServiceURL ¶ added in v0.6.0
func FetchHeadServiceURL(ctx context.Context, cli client.Client, rayCluster *rayv1.RayCluster, defaultPortName string) (string, error)
FetchHeadServiceURL fetches the URL that consists of the FQDN for the RayCluster's head service and the port with the given port name (defaultPortName).
func FindContainerPort ¶ added in v0.6.0
FindContainerPort searches for a specific port $portName in the container. If the port is found in the container, the corresponding port is returned. If the port is not found, the $defaultPort is returned instead.
func FindHeadPodReadyCondition ¶ added in v1.2.0
func FormatInt32 ¶
FormatInt returns the string representation of i in the given base, for 2 <= base <= 36. The result uses the lower-case letters 'a' to 'z' for digit values >= 10.
func GenerateFQDNServiceName ¶
func GenerateFQDNServiceName(ctx context.Context, cluster rayv1.RayCluster, namespace string) string
GenerateFQDNServiceName generates a Fully Qualified Domain Name.
func GenerateHeadServiceName ¶ added in v1.0.0
func GenerateHeadServiceName(crdType CRDType, clusterSpec rayv1.RayClusterSpec, ownerName string) (string, error)
GenerateHeadServiceName generates a Ray head service name. Note that there are two types of head services:
(1) For RayCluster: If `HeadService.Name` in the cluster spec is not empty, it will be used as the head service name. Otherwise, the name is generated based on the RayCluster CR's name. (2) For RayService: It's important to note that the RayService CR not only possesses a head service owned by its RayCluster CR but also maintains a separate head service for itself to facilitate zero-downtime upgrades. The name of the head service owned by the RayService CR is generated based on the RayService CR's name.
@param crdType: The type of the CRD that owns the head service. @param clusterSpec: `RayClusterSpec` @param ownerName: The name of the CR that owns the head service.
func GenerateIdentifier ¶
func GenerateIdentifier(clusterName string, nodeType rayv1.RayNodeType) string
GenerateIdentifier generates identifier of same group pods
func GenerateIngressName ¶
GenerateIngressName generates an ingress name from cluster name
func GenerateJsonHash ¶
Json-serializes obj and returns its hash string
func GenerateRayClusterName ¶
GenerateRayClusterName generates a ray cluster name from ray service name
func GenerateRayJobId ¶
GenerateRayJobId generates a ray job id for submission
func GenerateRouteName ¶ added in v1.0.0
GenerateRouteName generates an ingress name from cluster name
func GenerateServeServiceLabel ¶
GenerateServeServiceLabel generates label value for serve service selector.
func GenerateServeServiceName ¶
GenerateServeServiceName generates name for serve service.
func GetClusterDomainName ¶
func GetClusterDomainName() string
GetClusterDomainName returns cluster's domain name
func GetHeadGroupServiceAccountName ¶
func GetHeadGroupServiceAccountName(cluster *rayv1.RayCluster) string
GetHeadGroupServiceAccountName returns the head group service account if it exists. Otherwise, it returns the name of the cluster itself.
func GetNamespace ¶
func GetNamespace(metaData metav1.ObjectMeta) string
GetNamespace return namespace
func GetRayDashboardClientFunc ¶
func GetRayDashboardClientFunc(mgr ctrl.Manager, useKubernetesProxy bool) func() RayDashboardClientInterface
func GetRayHttpProxyClientFunc ¶
func GetRayHttpProxyClientFunc(mgr ctrl.Manager, useKubernetesProxy bool) func() RayHttpProxyClientInterface
func GetWorkerGroupDesiredReplicas ¶ added in v1.0.0
func GetWorkerGroupDesiredReplicas(ctx context.Context, workerGroupSpec rayv1.WorkerGroupSpec) int32
func IsJobFinished ¶ added in v1.1.0
func IsJobFinished(j *batchv1.Job) (batchv1.JobConditionType, bool)
IsJobFinished checks whether the given Job has finished execution. It does not discriminate between successful and failed terminations. src: https://github.com/kubernetes/kubernetes/blob/a8a1abc25cad87333840cd7d54be2efaf31a3177/pkg/controller/job/utils.go#L26
func IsRunningAndReady ¶
IsRunningAndReady returns true if pod is in the PodRunning Phase, if it has a condition of PodReady.
func PodGenerateName ¶ added in v1.2.0
func PodGenerateName(prefix string, nodeType rayv1.RayNodeType) string
PodGenerateName returns the value that should be used for a Pod's generateName based on the RayCluster name and node type (head or worker).
func RayClusterReplicaFailureReason ¶ added in v1.2.0
func RayOriginatedFromCRDLabelValue ¶ added in v1.1.0
RayOriginatedFromCRDLabelValue generates a value for the label RayOriginatedFromCRDLabelKey This is also the only function to construct label filter of resources originated from a given CRDType.
Types ¶
type BaseDashboardClient ¶ added in v0.6.0
type BaseDashboardClient struct {
// contains filtered or unexported fields
}
type CRDType ¶ added in v1.0.0
type CRDType string
TODO (kevin85421): Define CRDType here rather than constant.go to avoid circular dependency.
func GetCRDType ¶ added in v1.1.0
type ClientProvider ¶ added in v1.2.0
type ClientProvider interface { GetDashboardClient(mgr manager.Manager) func() RayDashboardClientInterface GetHttpProxyClient(mgr manager.Manager) func() RayHttpProxyClientInterface }
type FakeRayDashboardClient ¶
type FakeRayDashboardClient struct { GetJobInfoMock atomic.Pointer[func(context.Context, string) (*RayJobInfo, error)] BaseDashboardClient // contains filtered or unexported fields }
func (*FakeRayDashboardClient) DeleteJob ¶ added in v1.1.0
func (r *FakeRayDashboardClient) DeleteJob(_ context.Context, _ string) error
func (*FakeRayDashboardClient) GetJobInfo ¶
func (r *FakeRayDashboardClient) GetJobInfo(ctx context.Context, jobId string) (*RayJobInfo, error)
func (*FakeRayDashboardClient) GetMultiApplicationStatus ¶ added in v0.6.0
func (r *FakeRayDashboardClient) GetMultiApplicationStatus(_ context.Context) (map[string]*ServeApplicationStatus, error)
func (*FakeRayDashboardClient) GetServeDetails ¶ added in v0.6.0
func (r *FakeRayDashboardClient) GetServeDetails(_ context.Context) (*ServeDetails, error)
func (*FakeRayDashboardClient) InitClient ¶
func (r *FakeRayDashboardClient) InitClient(_ context.Context, url string, _ *rayv1.RayCluster) error
func (*FakeRayDashboardClient) ListJobs ¶ added in v1.1.0
func (r *FakeRayDashboardClient) ListJobs(ctx context.Context) (*[]RayJobInfo, error)
func (*FakeRayDashboardClient) SetMultiApplicationStatuses ¶ added in v0.6.0
func (r *FakeRayDashboardClient) SetMultiApplicationStatuses(statuses map[string]*ServeApplicationStatus)
func (*FakeRayDashboardClient) StopJob ¶
func (r *FakeRayDashboardClient) StopJob(_ context.Context, _ string) (err error)
func (*FakeRayDashboardClient) SubmitJobReq ¶ added in v1.1.0
func (r *FakeRayDashboardClient) SubmitJobReq(_ context.Context, _ *RayJobRequest, _ *string) (string, error)
func (*FakeRayDashboardClient) UpdateDeployments ¶
func (r *FakeRayDashboardClient) UpdateDeployments(_ context.Context, _ []byte) error
type FakeRayHttpProxyClient ¶
type FakeRayHttpProxyClient struct {
// contains filtered or unexported fields
}
func (*FakeRayHttpProxyClient) CheckProxyActorHealth ¶ added in v1.2.0
func (r *FakeRayHttpProxyClient) CheckProxyActorHealth(_ context.Context) error
func (*FakeRayHttpProxyClient) InitClient ¶
func (r *FakeRayHttpProxyClient) InitClient()
func (*FakeRayHttpProxyClient) SetHostIp ¶
func (r *FakeRayHttpProxyClient) SetHostIp(hostIp, _, _ string, port int)
type RayDashboardClient ¶
type RayDashboardClient struct { BaseDashboardClient // contains filtered or unexported fields }
func (*RayDashboardClient) ConvertServeDetailsToApplicationStatuses ¶ added in v0.6.0
func (r *RayDashboardClient) ConvertServeDetailsToApplicationStatuses(serveDetails *ServeDetails) (map[string]*ServeApplicationStatus, error)
func (*RayDashboardClient) DeleteJob ¶ added in v1.1.0
func (r *RayDashboardClient) DeleteJob(ctx context.Context, jobName string) error
func (*RayDashboardClient) GetJobInfo ¶
func (r *RayDashboardClient) GetJobInfo(ctx context.Context, jobId string) (*RayJobInfo, error)
Note that RayJobInfo and error can't be nil at the same time. Please make sure if the Ray job with JobId can't be found. Return a BadRequest error.
func (*RayDashboardClient) GetMultiApplicationStatus ¶ added in v0.6.0
func (r *RayDashboardClient) GetMultiApplicationStatus(ctx context.Context) (map[string]*ServeApplicationStatus, error)
func (*RayDashboardClient) GetServeDetails ¶ added in v0.6.0
func (r *RayDashboardClient) GetServeDetails(ctx context.Context) (*ServeDetails, error)
GetServeDetails gets details on all live applications on the Ray cluster.
func (*RayDashboardClient) InitClient ¶
func (r *RayDashboardClient) InitClient(ctx context.Context, url string, rayCluster *rayv1.RayCluster) error
func (*RayDashboardClient) ListJobs ¶ added in v1.1.0
func (r *RayDashboardClient) ListJobs(ctx context.Context) (*[]RayJobInfo, error)
func (*RayDashboardClient) StopJob ¶
func (r *RayDashboardClient) StopJob(ctx context.Context, jobName string) (err error)
func (*RayDashboardClient) SubmitJobReq ¶ added in v1.1.0
func (r *RayDashboardClient) SubmitJobReq(ctx context.Context, request *RayJobRequest, name *string) (jobId string, err error)
func (*RayDashboardClient) UpdateDeployments ¶
func (r *RayDashboardClient) UpdateDeployments(ctx context.Context, configJson []byte) error
UpdateDeployments update the deployments in the Ray cluster.
type RayDashboardClientInterface ¶
type RayDashboardClientInterface interface { InitClient(ctx context.Context, url string, rayCluster *rayv1.RayCluster) error UpdateDeployments(ctx context.Context, configJson []byte) error // V2/multi-app Rest API GetServeDetails(ctx context.Context) (*ServeDetails, error) GetMultiApplicationStatus(context.Context) (map[string]*ServeApplicationStatus, error) GetJobInfo(ctx context.Context, jobId string) (*RayJobInfo, error) ListJobs(ctx context.Context) (*[]RayJobInfo, error) SubmitJob(ctx context.Context, rayJob *rayv1.RayJob) (string, error) SubmitJobReq(ctx context.Context, request *RayJobRequest, name *string) (string, error) GetJobLog(ctx context.Context, jobName string) (*string, error) StopJob(ctx context.Context, jobName string) error DeleteJob(ctx context.Context, jobName string) error }
type RayHttpProxyClient ¶
type RayHttpProxyClient struct {
// contains filtered or unexported fields
}
func (*RayHttpProxyClient) CheckProxyActorHealth ¶ added in v1.2.0
func (r *RayHttpProxyClient) CheckProxyActorHealth(ctx context.Context) error
CheckProxyActorHealth checks the health status of the Ray Serve proxy actor.
func (*RayHttpProxyClient) InitClient ¶
func (r *RayHttpProxyClient) InitClient()
func (*RayHttpProxyClient) SetHostIp ¶
func (r *RayHttpProxyClient) SetHostIp(hostIp, podNamespace, podName string, port int)
type RayJobInfo ¶
type RayJobInfo struct { ErrorType *string `json:"error_type,omitempty"` Metadata map[string]string `json:"metadata,omitempty"` RuntimeEnv RuntimeEnvType `json:"runtime_env,omitempty"` JobStatus rayv1.JobStatus `json:"status,omitempty"` Entrypoint string `json:"entrypoint,omitempty"` JobId string `json:"job_id,omitempty"` SubmissionId string `json:"submission_id,omitempty"` Message string `json:"message,omitempty"` StartTime uint64 `json:"start_time,omitempty"` EndTime uint64 `json:"end_time,omitempty"` }
RayJobInfo is the response of "ray job status" api. Reference to https://docs.ray.io/en/latest/cluster/running-applications/job-submission/rest.html#ray-job-rest-api-spec Reference to https://github.com/ray-project/ray/blob/cfbf98c315cfb2710c56039a3c96477d196de049/dashboard/modules/job/pydantic_models.py#L38-L107
type RayJobLogsResponse ¶ added in v1.1.0
type RayJobLogsResponse struct {
Logs string `json:"logs,omitempty"`
}
type RayJobRequest ¶
type RayJobRequest struct { RuntimeEnv RuntimeEnvType `json:"runtime_env,omitempty"` Metadata map[string]string `json:"metadata,omitempty"` Resources map[string]float32 `json:"entrypoint_resources,omitempty"` Entrypoint string `json:"entrypoint"` SubmissionId string `json:"submission_id,omitempty"` NumCpus float32 `json:"entrypoint_num_cpus,omitempty"` NumGpus float32 `json:"entrypoint_num_gpus,omitempty"` }
RayJobRequest is the request body to submit. Reference to https://docs.ray.io/en/latest/cluster/running-applications/job-submission/rest.html#ray-job-rest-api-spec Reference to https://github.com/ray-project/ray/blob/cfbf98c315cfb2710c56039a3c96477d196de049/dashboard/modules/job/common.py#L325-L353
func ConvertRayJobToReq ¶
func ConvertRayJobToReq(rayJob *rayv1.RayJob) (*RayJobRequest, error)
type RayJobResponse ¶
type RayJobResponse struct {
JobId string `json:"job_id"`
}
type RayJobStopResponse ¶
type RayJobStopResponse struct {
Stopped bool `json:"stopped"`
}
type RuntimeEnvType ¶ added in v1.1.0
type RuntimeEnvType map[string]interface{}
func UnmarshalRuntimeEnvYAML ¶ added in v1.1.0
func UnmarshalRuntimeEnvYAML(runtimeEnvYAML string) (RuntimeEnvType, error)
type ServeApplicationDetails ¶ added in v0.6.0
type ServeApplicationDetails struct { Deployments map[string]ServeDeploymentDetails `json:"deployments"` ServeApplicationStatus RoutePrefix string `json:"route_prefix,omitempty"` DocsPath string `json:"docs_path,omitempty"` }
type ServeApplicationStatus ¶ added in v0.6.0
type ServeApplicationStatus struct { Deployments map[string]ServeDeploymentStatus `json:"deployments"` Name string `json:"name,omitempty"` Status string `json:"status"` Message string `json:"message,omitempty"` }
Describes the status of an application
type ServeDeploymentDetails ¶ added in v0.6.0
type ServeDeploymentDetails struct { ServeDeploymentStatus RoutePrefix string `json:"route_prefix,omitempty"` }
V2 Serve API Response format. These extend the ServeDeploymentStatus and ServeApplicationStatus structs, but contain more information such as route prefix because the V2/multi-app GET API fetchs general metadata, not just statuses.
type ServeDeploymentStatus ¶ added in v0.6.0
type ServeDeploymentStatus struct { Name string `json:"name,omitempty"` Status string `json:"status,omitempty"` Message string `json:"message,omitempty"` }
ServeDeploymentStatus and ServeApplicationStatus describe the format of status(es) that will be returned by the GetMultiApplicationStatus method of the dashboard client Describes the status of a deployment
type ServeDetails ¶ added in v0.6.0
type ServeDetails struct { Applications map[string]ServeApplicationDetails `json:"applications"` DeployMode string `json:"deploy_mode,omitempty"` }
type ServiceType ¶ added in v1.1.0
type ServiceType string
const ( HeadService ServiceType = "headService" ServingService ServiceType = "serveService" )