datasetArchive

package
v4.32.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 9, 2024 License: Apache-2.0 Imports: 12 Imported by: 0

Documentation

Overview

Implements archiving/retrieval of dataset source zip files as delivered by GDS. When new data arrives from GDS, we receive it in snapshot zip files containing what arrived at a given time. These are all stored in an S3 bucket and when required they are extracted in timestamp order on top of each other to get a view of the data at a given time. Used to then generate a PIXLISE dataset using the dataset importer.

Example (DecodeArchiveFileName)
// Just a simple one
id, ts, e := DecodeArchiveFileName("161677829-12-06-2022-06-41-00.zip")
fmt.Printf("%v, %v, %v\n", id, ts, e)

// Should accept paths too but snip the path off!
id, ts, e = DecodeArchiveFileName("/Archive/161677829-12-06-2022-06-41-00.zip")
fmt.Printf("%v, %v, %v\n", id, ts, e)

id, ts, e = DecodeArchiveFileName("data/161677829-12-06-2022-06-41-00.zip")
fmt.Printf("%v, %v, %v\n", id, ts, e)

// FAIL: just a timestamp
id, ts, e = DecodeArchiveFileName("12-06-2022-06-41-00.zip")
fmt.Printf("%v, %v, %v\n", id, ts, e)

// FAIL: something else
id, ts, e = DecodeArchiveFileName("readme.txt")
fmt.Printf("%v, %v, %v\n", id, ts, e)

// FAIL: something else with path
id, ts, e = DecodeArchiveFileName("/Archive/readme.txt")
fmt.Printf("%v, %v, %v\n", id, ts, e)
Output:

161677829, 1655016060, <nil>
161677829, 1655016060, <nil>
161677829, 1655016060, <nil>
, 0, DecodeArchiveFileName "12-06-2022-06-41-00.zip" error: parsing time "06-2022-06-41-00": month out of range
, 0, DecodeArchiveFileName unexpected file name: readme.txt
, 0, DecodeArchiveFileName unexpected file name: /Archive/readme.txt
Example (DecodeManualUploadPath)
f, p, e := decodeManualUploadPath("/dataset-addons/dataset123/custom-meta.json")
fmt.Printf("%v, %v, %v\n", f, p, e)

// Contains subdir
f, p, e = decodeManualUploadPath("/dataset-addons/dataset123/MATCHED/something.png")
fmt.Printf("%v, %v, %v\n", f, p, e)

// Contains multiple subdir
f, p, e = decodeManualUploadPath("/dataset-addons/dataset123/MATCHED/more/file.png")
fmt.Printf("%v, %v, %v\n", f, p, e)

// Without leading /
f, p, e = decodeManualUploadPath("dataset-addons/dataset123/MATCHED/more/image.png")
fmt.Printf("%v, %v, %v\n", f, p, e)

// Path too short
f, p, e = decodeManualUploadPath("/dataset-addons/the-dir/invalid.txt")
fmt.Printf("%v, %v, %v\n", f, p, e)

// Path way too short
f, p, e = decodeManualUploadPath("/dataset-addons/invalid.txt")
fmt.Printf("%v, %v, %v\n", f, p, e)
Output:

custom-meta.json, [], <nil>
something.png, [MATCHED], <nil>
file.png, [MATCHED more], <nil>
image.png, [MATCHED more], <nil>
, [], Manual upload path invalid: dataset-addons/the-dir/invalid.txt
, [], Manual upload path invalid: dataset-addons/invalid.txt
Example (GetOrderedArchiveFiles)
ordered, err := getOrderedArchiveFiles([]string{"161677829-12-06-2022-06-41-00.zip", "161677829-12-06-2022-06-42-00.zip", "161677829-12-06-2022-06-39-00.zip", "161677829-12-05-2022-06-40-00.zip"})
fmt.Printf("%v, %v\n", ordered, err)

ordered, err = getOrderedArchiveFiles([]string{"Archive/161677829-12-06-2022-06-41-00.zip", "Archive/161677829-12-06-2022-06-42-00.zip", "Archive/161677829-12-06-2022-06-39-00.zip", "161677829-12-05-2022-06-40-00.zip"})
fmt.Printf("%v, %v\n", ordered, err)

ordered, err = getOrderedArchiveFiles([]string{"161677829-12-06-2022-06-41-00.zip", "161677829-12-06-2022-06-42-00.zip", "161677829-12-06-2022-06-39-00.zip", "161677829-12-05-2022-24-40-00.zip"})
fmt.Printf("%v, %v\n", ordered, err)

ordered, err = getOrderedArchiveFiles([]string{"161677829-12-06-2022-06-41-00.zip", "161677829-12-06-2022-06-42-00.zip", "12-06-2022-06-39-00.zip", "161677829-12-05-2022-06-40-00.zip"})
fmt.Printf("%v, %v\n", ordered, err)

ordered, err = getOrderedArchiveFiles([]string{"161677829-12-06-2022-06-41-00.zip", "161677829-12-06-2022-06-42-00.zip", "readme.txt", "161677829-12-05-2022-06-40-00.zip"})
fmt.Printf("%v, %v\n", ordered, err)

ordered, err = getOrderedArchiveFiles([]string{})
fmt.Printf("%v, %v\n", ordered, err)
Output:

[161677829-12-05-2022-06-40-00.zip 161677829-12-06-2022-06-39-00.zip 161677829-12-06-2022-06-41-00.zip 161677829-12-06-2022-06-42-00.zip], <nil>
[161677829-12-05-2022-06-40-00.zip Archive/161677829-12-06-2022-06-39-00.zip Archive/161677829-12-06-2022-06-41-00.zip Archive/161677829-12-06-2022-06-42-00.zip], <nil>
[], DecodeArchiveFileName "161677829-12-05-2022-24-40-00.zip" error: parsing time "12-05-2022-24-40-00": hour out of range
[], DecodeArchiveFileName "12-06-2022-06-39-00.zip" error: parsing time "06-2022-06-39-00": month out of range
[], DecodeArchiveFileName unexpected file name: readme.txt
[], <nil>

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func AddToDatasetArchive

func AddToDatasetArchive(remoteFS fileaccess.FileAccess, log logger.ILogger, datasetBucket string, sourceBucket string, sourceFilePath string) (bool, error)

func DecodeArchiveFileName

func DecodeArchiveFileName(fileName string) (string, int, error)

Types

type DatasetArchiveDownloader

type DatasetArchiveDownloader struct {
	// contains filtered or unexported fields
}

func NewDatasetArchiveDownloader

func NewDatasetArchiveDownloader(
	remoteFS fileaccess.FileAccess,
	localFS fileaccess.FileAccess,
	log logger.ILogger,
	datasetBucket string,
	manualUploadBucket string) *DatasetArchiveDownloader

func (*DatasetArchiveDownloader) DownloadFromDatasetArchive

func (dl *DatasetArchiveDownloader) DownloadFromDatasetArchive(datasetID string, workingDir string) (string, string, int, error)

Returns: Downloads path (raw zip files go here), Unzipped files path (archive zips unzipped here), How many zips loaded from archive Error (if any)

func (*DatasetArchiveDownloader) DownloadFromDatasetUploads

func (dl *DatasetArchiveDownloader) DownloadFromDatasetUploads(datasetID string, workingDir string) (string, string, error)

Downloads from user uploaded dataset zip area. Expects the following files to exist: - creator.json - describing who uploaded the dataset, and when - detector.json - describing what detector, hence what dataset type this is Other files depending on what type of detector: BREADBOARD: - import.json - import parameters for the jpl breadboard importer - spectra.zip - all .MSA files

Returns: Downloads path (raw zip files go here), Unzipped files path (archive zips unzipped here), Error (if any)

func (*DatasetArchiveDownloader) DownloadPseudoIntensityRangesFile

func (dl *DatasetArchiveDownloader) DownloadPseudoIntensityRangesFile(configBucket string, downloadPath string, version string) (string, error)

func (*DatasetArchiveDownloader) DownloadUserCustomisationsForDataset

func (dl *DatasetArchiveDownloader) DownloadUserCustomisationsForDataset(datasetID string, downloadPath string) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL