Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var PrepCmd = &cli.Command{ Name: "ez-prep", Category: "Easy Commands", ArgsUsage: "<path>", Usage: "Prepare a dataset from a local path", Description: "This commands can be used to prepare a dataset from a local path with minimum configurable parameters.\n" + "For more advanced usage, please use the subcommands under `dataset` and `datasource`.\n" + "You can also use this command for benchmarking with in-memory database and inline preparation, i.e.\n" + " mkdir dataset\n" + " truncate -s 1024G dataset/1T.bin\n" + " singularity ez-prep --output-dir '' --database-file '' -j $(($(nproc) / 4 + 1)) ./dataset", Flags: []cli.Flag{ &cli.StringFlag{ Name: "max-size", Aliases: []string{"M"}, Usage: "Maximum size of the CAR files to be created", Value: "31.5GiB", }, &cli.StringFlag{ Name: "output-dir", Aliases: []string{"o"}, Usage: "Output directory for CAR files. To use inline preparation, use an empty string", Value: "./cars", }, &cli.IntFlag{ Name: "concurrency", Aliases: []string{"j"}, Usage: "Concurrency for packing", Value: 1, }, &cli.StringFlag{ Name: "database-file", Usage: "The database file to store the metadata. To use in memory database, use an empty string.", DefaultText: "./ezprep-<name>.db", }, }, Action: func(c *cli.Context) error { t := time.Now().Unix() path := c.Args().Get(0) if path == "" { return errors.New("path is required") } databaseFile := c.String("database-file") if databaseFile == "" { if c.IsSet("database-file") { databaseFile = "file::memory:?cache=shared" } else { databaseFile = fmt.Sprintf("./ezprep-%d.db", t) } } var err error if !strings.HasPrefix(databaseFile, "file::memory") { databaseFile, err = filepath.Abs(databaseFile) if err != nil { return errors.Wrap(err, "failed to get absolute path") } } db, err := database.Open("sqlite:"+databaseFile, &gorm.Config{}) if err != nil { return errors.Wrapf(err, "failed to open database %s", databaseFile) } err = admin.InitHandler(db) if err != nil { return err } // Step 2, create a dataset var outputDirs []string if c.String("output-dir") != "" { outputDirs = []string{c.String("output-dir")} err = os.MkdirAll(outputDirs[0], 0755) if err != nil { return errors.Wrap(err, "failed to create output directory") } } ds, err2 := dataset.CreateHandler(db, dataset.CreateRequest{ Name: "ez", MaxSizeStr: c.String("max-size"), OutputDirs: outputDirs, }) if err2 != nil { return err2 } path, err = filepath.Abs(path) if err != nil { return errors.Wrap(err, "failed to get absolute path") } source := model.Source{ DatasetID: ds.ID, Type: "local", Path: path, Metadata: model.Metadata(nil), ScanningState: model.Ready, DagGenState: model.Created, } err = db.Create(&source).Error if err != nil { return errors.Wrap(err, "failed to create source") } root := model.Directory{ SourceID: source.ID, Name: path, } err = db.Create(&root).Error if err != nil { return errors.Wrap(err, "failed to create root directory") } worker := datasetworker.NewDatasetWorker( db, datasetworker.DatasetWorkerConfig{ Concurrency: c.Int("concurrency"), EnableScan: true, EnablePack: true, EnableDag: true, ExitOnComplete: true, }) err = worker.Run(c.Context) if err != nil { return err } _, err2 = datasource.DagGenHandler(db, strconv.Itoa(int(source.ID))) if err2 != nil { return err2 } err = worker.Run(c.Context) if err != nil { return err } cars, err2 := dataset.ListPiecesHandler( db, ds.Name, ) if err2 != nil { return err2 } cliutil.PrintToConsole(cars, false, nil) return nil }, }
Functions ¶
This section is empty.
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.