Documentation ¶
Overview ¶
Implements archiving/retrieval of dataset source zip files as delivered by GDS. When new data arrives from GDS, we receive it in snapshot zip files containing what arrived at a given time. These are all stored in an S3 bucket and when required they are extracted in timestamp order on top of each other to get a view of the data at a given time. Used to then generate a PIXLISE dataset using the dataset importer.
Example (DecodeArchiveFileName) ¶
// Just a simple one id, ts, e := DecodeArchiveFileName("161677829-12-06-2022-06-41-00.zip") fmt.Printf("%v, %v, %v\n", id, ts, e) // Should accept paths too but snip the path off! id, ts, e = DecodeArchiveFileName("/Archive/161677829-12-06-2022-06-41-00.zip") fmt.Printf("%v, %v, %v\n", id, ts, e) id, ts, e = DecodeArchiveFileName("data/161677829-12-06-2022-06-41-00.zip") fmt.Printf("%v, %v, %v\n", id, ts, e) // FAIL: just a timestamp id, ts, e = DecodeArchiveFileName("12-06-2022-06-41-00.zip") fmt.Printf("%v, %v, %v\n", id, ts, e) // FAIL: something else id, ts, e = DecodeArchiveFileName("readme.txt") fmt.Printf("%v, %v, %v\n", id, ts, e) // FAIL: something else with path id, ts, e = DecodeArchiveFileName("/Archive/readme.txt") fmt.Printf("%v, %v, %v\n", id, ts, e)
Output: 161677829, 1655016060, <nil> 161677829, 1655016060, <nil> 161677829, 1655016060, <nil> , 0, DecodeArchiveFileName "12-06-2022-06-41-00.zip" error: parsing time "06-2022-06-41-00": month out of range , 0, DecodeArchiveFileName unexpected file name: readme.txt , 0, DecodeArchiveFileName unexpected file name: /Archive/readme.txt
Example (DecodeManualUploadPath) ¶
f, p, e := decodeManualUploadPath("/dataset-addons/dataset123/custom-meta.json") fmt.Printf("%v, %v, %v\n", f, p, e) // Contains subdir f, p, e = decodeManualUploadPath("/dataset-addons/dataset123/MATCHED/something.png") fmt.Printf("%v, %v, %v\n", f, p, e) // Contains multiple subdir f, p, e = decodeManualUploadPath("/dataset-addons/dataset123/MATCHED/more/file.png") fmt.Printf("%v, %v, %v\n", f, p, e) // Without leading / f, p, e = decodeManualUploadPath("dataset-addons/dataset123/MATCHED/more/image.png") fmt.Printf("%v, %v, %v\n", f, p, e) // Path too short f, p, e = decodeManualUploadPath("/dataset-addons/the-dir/invalid.txt") fmt.Printf("%v, %v, %v\n", f, p, e) // Path way too short f, p, e = decodeManualUploadPath("/dataset-addons/invalid.txt") fmt.Printf("%v, %v, %v\n", f, p, e)
Output: custom-meta.json, [], <nil> something.png, [MATCHED], <nil> file.png, [MATCHED more], <nil> image.png, [MATCHED more], <nil> , [], Manual upload path invalid: dataset-addons/the-dir/invalid.txt , [], Manual upload path invalid: dataset-addons/invalid.txt
Example (GetOrderedArchiveFiles) ¶
ordered, err := getOrderedArchiveFiles([]string{"161677829-12-06-2022-06-41-00.zip", "161677829-12-06-2022-06-42-00.zip", "161677829-12-06-2022-06-39-00.zip", "161677829-12-05-2022-06-40-00.zip"}) fmt.Printf("%v, %v\n", ordered, err) ordered, err = getOrderedArchiveFiles([]string{"Archive/161677829-12-06-2022-06-41-00.zip", "Archive/161677829-12-06-2022-06-42-00.zip", "Archive/161677829-12-06-2022-06-39-00.zip", "161677829-12-05-2022-06-40-00.zip"}) fmt.Printf("%v, %v\n", ordered, err) ordered, err = getOrderedArchiveFiles([]string{"161677829-12-06-2022-06-41-00.zip", "161677829-12-06-2022-06-42-00.zip", "161677829-12-06-2022-06-39-00.zip", "161677829-12-05-2022-24-40-00.zip"}) fmt.Printf("%v, %v\n", ordered, err) ordered, err = getOrderedArchiveFiles([]string{"161677829-12-06-2022-06-41-00.zip", "161677829-12-06-2022-06-42-00.zip", "12-06-2022-06-39-00.zip", "161677829-12-05-2022-06-40-00.zip"}) fmt.Printf("%v, %v\n", ordered, err) ordered, err = getOrderedArchiveFiles([]string{"161677829-12-06-2022-06-41-00.zip", "161677829-12-06-2022-06-42-00.zip", "readme.txt", "161677829-12-05-2022-06-40-00.zip"}) fmt.Printf("%v, %v\n", ordered, err) ordered, err = getOrderedArchiveFiles([]string{}) fmt.Printf("%v, %v\n", ordered, err)
Output: [161677829-12-05-2022-06-40-00.zip 161677829-12-06-2022-06-39-00.zip 161677829-12-06-2022-06-41-00.zip 161677829-12-06-2022-06-42-00.zip], <nil> [161677829-12-05-2022-06-40-00.zip Archive/161677829-12-06-2022-06-39-00.zip Archive/161677829-12-06-2022-06-41-00.zip Archive/161677829-12-06-2022-06-42-00.zip], <nil> [], DecodeArchiveFileName "161677829-12-05-2022-24-40-00.zip" error: parsing time "12-05-2022-24-40-00": hour out of range [], DecodeArchiveFileName "12-06-2022-06-39-00.zip" error: parsing time "06-2022-06-39-00": month out of range [], DecodeArchiveFileName unexpected file name: readme.txt [], <nil>
Index ¶
- func AddToDatasetArchive(remoteFS fileaccess.FileAccess, log logger.ILogger, datasetBucket string, ...) (bool, error)
- func DecodeArchiveFileName(fileName string) (string, int, error)
- type DatasetArchiveDownloader
- func (dl *DatasetArchiveDownloader) DownloadFromDatasetArchive(datasetID string, workingDir string) (string, string, []string, error)
- func (dl *DatasetArchiveDownloader) DownloadFromDatasetUploads(datasetID string, workingDir string) (string, string, error)
- func (dl *DatasetArchiveDownloader) DownloadPseudoIntensityRangesFile(configBucket string, downloadPath string, version string) (string, error)
- func (dl *DatasetArchiveDownloader) DownloadUserCustomisationsForDataset(datasetID string, downloadPath string) error
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func AddToDatasetArchive ¶
func AddToDatasetArchive(remoteFS fileaccess.FileAccess, log logger.ILogger, datasetBucket string, sourceBucket string, sourceFilePath string) (bool, error)
Types ¶
type DatasetArchiveDownloader ¶
type DatasetArchiveDownloader struct {
// contains filtered or unexported fields
}
func NewDatasetArchiveDownloader ¶
func NewDatasetArchiveDownloader( remoteFS fileaccess.FileAccess, localFS fileaccess.FileAccess, log logger.ILogger, datasetBucket string, manualUploadBucket string) *DatasetArchiveDownloader
func (*DatasetArchiveDownloader) DownloadFromDatasetArchive ¶
func (dl *DatasetArchiveDownloader) DownloadFromDatasetArchive(datasetID string, workingDir string) (string, string, []string, error)
Returns: Downloads path (raw zip files go here), Unzipped files path (archive zips unzipped here), How many zips loaded from archive Error (if any)
func (*DatasetArchiveDownloader) DownloadFromDatasetUploads ¶
func (dl *DatasetArchiveDownloader) DownloadFromDatasetUploads(datasetID string, workingDir string) (string, string, error)
Downloads from user uploaded dataset zip area. Expects the following files to exist: - creator.json - describing who uploaded the dataset, and when - detector.json - describing what detector, hence what dataset type this is Other files depending on what type of detector: BREADBOARD: - import.json - import parameters for the jpl breadboard importer - spectra.zip - all .MSA files
Returns: Downloads path (raw zip files go here), Unzipped files path (archive zips unzipped here), Error (if any)
func (*DatasetArchiveDownloader) DownloadPseudoIntensityRangesFile ¶
func (*DatasetArchiveDownloader) DownloadUserCustomisationsForDataset ¶
func (dl *DatasetArchiveDownloader) DownloadUserCustomisationsForDataset(datasetID string, downloadPath string) error