Documentation ¶
Index ¶
- Constants
- Variables
- type BaseDriver
- func (d *BaseDriver) CreateCacheStatus(opt *common.ServerOptions, status *v1alpha1.CacheStatus) error
- func (d *BaseDriver) CreateClearJobOptions(opt *v1alpha1.ClearJobOptions, ctx *common.RequestContext) error
- func (d *BaseDriver) CreatePVC(pvc *v1.PersistentVolumeClaim, ctx *common.RequestContext) error
- func (d *BaseDriver) CreateRmrJobOptions(opt *v1alpha1.RmrJobOptions, ctx *common.RequestContext) error
- func (d *BaseDriver) CreateService(service *v1.Service, ctx *common.RequestContext) error
- func (d *BaseDriver) DoClearJob(ctx context.Context, opt *v1alpha1.ClearJobOptions, log logr.Logger) error
- func (d *BaseDriver) GetLabel(sampleSetName string) string
- func (d *BaseDriver) GetRuntimeName(sampleSetName string) string
- func (d *BaseDriver) GetServiceName(sampleSetName string) string
- type Driver
- type JuiceFS
- func (j *JuiceFS) CreatePV(pv *v1.PersistentVolume, ctx *common.RequestContext) error
- func (j *JuiceFS) CreateRuntime(ds *appv1.StatefulSet, ctx *common.RequestContext) error
- func (j *JuiceFS) CreateSyncJobOptions(opt *v1alpha1.SyncJobOptions, ctx *common.RequestContext) error
- func (j *JuiceFS) CreateWarmupJobOptions(opt *v1alpha1.WarmupJobOptions, ctx *common.RequestContext) error
- func (j *JuiceFS) DoRmrJob(ctx context.Context, opt *v1alpha1.RmrJobOptions, log logr.Logger) error
- func (j *JuiceFS) DoSyncJob(ctx context.Context, opt *v1alpha1.SyncJobOptions, log logr.Logger) error
- func (j *JuiceFS) DoWarmupJob(ctx context.Context, opt *v1alpha1.WarmupJobOptions, log logr.Logger) error
Constants ¶
const ( JuiceFSDriver v1alpha1.DriverName = "juicefs" JuiceFSCacheDirOption = "cache-dir" JuiceFSCacheSizeOption = "cache-size" JuiceFSCSIDriverName = "csi.juicefs.com" )
const ( JuiceFSSecretName string = "name" JuiceFSSecretStorage string = "storage" JuiceFSSecretMetaURL string = "metaurl" JuiceFSSecretBucket string = "bucket" JuiceFSSecretSK string = "secret-key" JuiceFSSecretAK string = "access-key" )
const (
DefaultDriver = JuiceFSDriver
)
Variables ¶
var ( JuiceFSSecretDataKeys []string JuiceFSSupportStorage []string JuiceFSDefaultMountOptions *v1alpha1.JuiceFSMountOptions )
var (
StorageClassName = "paddle-operator"
)
Functions ¶
This section is empty.
Types ¶
type BaseDriver ¶
type BaseDriver struct {
Name v1alpha1.DriverName
}
func (*BaseDriver) CreateCacheStatus ¶
func (d *BaseDriver) CreateCacheStatus(opt *common.ServerOptions, status *v1alpha1.CacheStatus) error
func (*BaseDriver) CreateClearJobOptions ¶
func (d *BaseDriver) CreateClearJobOptions(opt *v1alpha1.ClearJobOptions, ctx *common.RequestContext) error
func (*BaseDriver) CreatePVC ¶
func (d *BaseDriver) CreatePVC(pvc *v1.PersistentVolumeClaim, ctx *common.RequestContext) error
CreatePVC create persistent volume claim, and it will be used by runtime server and PaddleJob worker pods
func (*BaseDriver) CreateRmrJobOptions ¶
func (d *BaseDriver) CreateRmrJobOptions(opt *v1alpha1.RmrJobOptions, ctx *common.RequestContext) error
func (*BaseDriver) CreateService ¶
func (d *BaseDriver) CreateService(service *v1.Service, ctx *common.RequestContext) error
CreateService create service for runtime StatefulSet server
func (*BaseDriver) DoClearJob ¶
func (d *BaseDriver) DoClearJob(ctx context.Context, opt *v1alpha1.ClearJobOptions, log logr.Logger) error
DoClearJob clear the cache data in folders specified by options
func (*BaseDriver) GetLabel ¶
func (d *BaseDriver) GetLabel(sampleSetName string) string
GetLabel label is concatenated by PaddleLabel、driver name and SampleSet name
func (*BaseDriver) GetRuntimeName ¶
func (d *BaseDriver) GetRuntimeName(sampleSetName string) string
func (*BaseDriver) GetServiceName ¶
func (d *BaseDriver) GetServiceName(sampleSetName string) string
type Driver ¶
type Driver interface { // CreatePV create persistent volume by specified driver CreatePV(pv *v1.PersistentVolume, ctx *common.RequestContext) error // CreatePVC create persistent volume claim for PaddleJob CreatePVC(pvc *v1.PersistentVolumeClaim, ctx *common.RequestContext) error // GetLabel get the label to mark pv、pvc and nodes which have cached data GetLabel(sampleSetName string) string // CreateService create a service for runtime StatefulSet CreateService(service *v1.Service, ctx *common.RequestContext) error // GetServiceName get the name of runtime StatefulSet service GetServiceName(sampleSetName string) string // CreateRuntime create runtime StatefulSet to manager cache data CreateRuntime(ds *appv1.StatefulSet, ctx *common.RequestContext) error // GetRuntimeName get the runtime StatefulSet name GetRuntimeName(sampleSetName string) string // CreateSyncJobOptions create the options of sync job, the controller will post it to runtime server CreateSyncJobOptions(opt *v1alpha1.SyncJobOptions, ctx *common.RequestContext) error // CreateWarmupJobOptions create the options of warmup job, this method now only use by SampleJob Controller CreateWarmupJobOptions(opt *v1alpha1.WarmupJobOptions, ctx *common.RequestContext) error // CreateRmrJobOptions create the options of rmr job, used by SampleJob controller CreateRmrJobOptions(opt *v1alpha1.RmrJobOptions, ctx *common.RequestContext) error // CreateClearJobOptions create the options of clear job, used by SampleJob controller CreateClearJobOptions(opt *v1alpha1.ClearJobOptions, ctx *common.RequestContext) error // CreateCacheStatus get the data status in mount and cache paths CreateCacheStatus(opt *common.ServerOptions, status *v1alpha1.CacheStatus) error // DoSyncJob call by runtime server, sync data from remote storage to cache engine DoSyncJob(ctx context.Context, opt *v1alpha1.SyncJobOptions, log logr.Logger) error // DoClearJob call by runtime server, clear the cached data DoClearJob(ctx context.Context, opt *v1alpha1.ClearJobOptions, log logr.Logger) error // DoWarmupJob call by runtime server, warmup data to local storage on each node respectively DoWarmupJob(ctx context.Context, opt *v1alpha1.WarmupJobOptions, log logr.Logger) error // DoRmrJob call by runtime server, remove the data of specified path from cache engine DoRmrJob(ctx context.Context, opt *v1alpha1.RmrJobOptions, log logr.Logger) error }
type JuiceFS ¶
type JuiceFS struct {
BaseDriver
}
func NewJuiceFSDriver ¶
func NewJuiceFSDriver() *JuiceFS
func (*JuiceFS) CreatePV ¶
func (j *JuiceFS) CreatePV(pv *v1.PersistentVolume, ctx *common.RequestContext) error
CreatePV create JuiceFS persistent volume with mount options. How to set parameters of pv can refer to https://github.com/juicedata/juicefs-csi-driver/tree/master/examples/static-provisioning-mount-options
func (*JuiceFS) CreateRuntime ¶
func (j *JuiceFS) CreateRuntime(ds *appv1.StatefulSet, ctx *common.RequestContext) error
func (*JuiceFS) CreateSyncJobOptions ¶
func (j *JuiceFS) CreateSyncJobOptions(opt *v1alpha1.SyncJobOptions, ctx *common.RequestContext) error
CreateSyncJobOptions create sync job options by the information from request context, the options is used by controller to request runtime server do sync data task asynchronously. TODO: Support different uri format for all storage in JuiceFSSupportStorage, some data storage may need additional secret setting in v1alpha1.Source.SecretRef more info: https://github.com/juicedata/juicesync
func (*JuiceFS) CreateWarmupJobOptions ¶
func (j *JuiceFS) CreateWarmupJobOptions(opt *v1alpha1.WarmupJobOptions, ctx *common.RequestContext) error
func (*JuiceFS) DoRmrJob ¶
DoRmrJob delete the data of JuiceFS storage backend under the specified paths. TODO: there some bugs in JuiceFS rmr command, after rmr paths the sync command can't work correctly in container, but posix rm can work well with JuiceFS sync command.
func (*JuiceFS) DoSyncJob ¶
func (j *JuiceFS) DoSyncJob(ctx context.Context, opt *v1alpha1.SyncJobOptions, log logr.Logger) error
DoSyncJob sync data from source databases to JuiceFS backend object storage, this job will only work in the first runtime server. According to the design concept of container, it is not a good practice to specify --worker option when do sync job. When executor sync command in first runtime server, the data will be automatically warmed up to this node, this may bring duplicate cached data problem in kubernetes cluster. TODO: clean cached data after sync command done or is there a better way?
func (*JuiceFS) DoWarmupJob ¶
func (j *JuiceFS) DoWarmupJob(ctx context.Context, opt *v1alpha1.WarmupJobOptions, log logr.Logger) error
DoWarmupJob warmup data from remote object storage to cache nodes, this can speed up model training process in kubernetes cluster TODO: different cache nodes should warmup different data, the warmup Strategy should match the sampler api defined in paddle.io submodule, like RandomSampler/SequenceSampler/DistributedBatchSampler etc... More information: https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/io/Overview_cn.html