fileaccess

package
v4.21.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 10, 2024 License: Apache-2.0 Imports: 14 Imported by: 0

Documentation

Overview

Provides a higher-level file access interface which is implemented using local file storage as well as AWS S3. This makes writing code that is agnostic to file storage medium much easier. Both are tested with the same unit testing framework to ensure they are compatible NOTE: In the FileAccess interface any reference to a bucket when used with the local file storage model is concatented with the path, so it could be thought of as a reference to the disk the path is on, or the root of where relative paths are available from

Example (GetBucketFromS3Url)
b, err := GetBucketFromS3Url("some/path/file.json")
fmt.Printf("%v|%v\n", b, err)

b, err = GetBucketFromS3Url("s3:///path/file.json")
fmt.Printf("%v|%v\n", b, err)

b, err = GetBucketFromS3Url("s3://bucket")
fmt.Printf("%v|%v\n", b, err)

b, err = GetBucketFromS3Url("s3://the_bucket/some/path.json")
fmt.Printf("%v|%v\n", b, err)
Output:

|GetBucketFromS3Url parameter was not a valid S3 url: some/path/file.json
|GetBucketFromS3Url failed to get bucket from S3 url: s3:///path/file.json
|GetBucketFromS3Url failed to get bucket from S3 url: s3://bucket
the_bucket|<nil>
Example (GetPathFromS3Url)
p, err := GetPathFromS3Url("some/path/file.json")
fmt.Printf("%v|%v\n", p, err)

p, err = GetPathFromS3Url("s3:///path/file.json")
fmt.Printf("%v|%v\n", p, err)

p, err = GetPathFromS3Url("s3://bucket")
fmt.Printf("%v|%v\n", p, err)

p, err = GetPathFromS3Url("s3://the_bucket/some/path.json")
fmt.Printf("%v|%v\n", p, err)
Output:

|GetPathFromS3Url parameter was not a valid S3 url: some/path/file.json
|GetPathFromS3Url failed to get path from S3 url: s3:///path/file.json
|GetPathFromS3Url failed to get path from S3 url: s3://bucket
some/path.json|<nil>
Example (IsValidObjectName)
fmt.Println(IsValidObjectName("name"))
fmt.Println(IsValidObjectName("Name With Spaces"))
fmt.Println(IsValidObjectName("Name With Spaces"))
fmt.Println(IsValidObjectName(""))
fmt.Println(IsValidObjectName("Name \"Quote"))
Output:

true
true
true
false
false
Example (LocalFileSystem)
// First, clear any files we may have there already
fmt.Printf("Setup: %v\n", os.RemoveAll("./test-output/"))

// Now run the tests
runTest(&FSAccess{}, "./test-output")

// NOTE: test output must match the output from S3 (except cleanup steps)
Output:

Setup: <nil>
JSON: <nil>
JSON no-indent: <nil>
Exists1: false|<nil>
Binary: <nil>
Exists2: true|<nil>
Copy: <nil>
Copy bad path, got not found error: true
Read JSON: <nil>, {Hello 778 World}
Read JSON no-indent: <nil>, {Hello 778 World}
Read Binary: <nil>, [250 130 10 0 33]
Read bad path, got not found error: true
Read bad JSON: invalid character 'ú' looking for beginning of value
Not a "not found" error: true
Listing: <nil>, [the-files/data.bin the-files/pretty.json the-files/subdir/copied.json the-files/subdir/ugly.json]
Listing subdir: <nil>, [the-files/subdir/copied.json the-files/subdir/ugly.json]
Listing with prefix: <nil>, [the-files/subdir/ugly.json]
Listing bad path: <nil>, []
Delete copy: <nil>
Delete bin: <nil>
Listing2: <nil>, [the-files/pretty.json the-files/subdir/ugly.json]
Listing subdir2: <nil>, [the-files/subdir/ugly.json]
Empty dir: <nil>
Listing subdir3: <nil>, []
Example (MakeValidObjectName)
fmt.Println(MakeValidObjectName("my file!", true))
fmt.Println(MakeValidObjectName("this/path/to.bin", true))
fmt.Println(MakeValidObjectName("Hope \"this\" isn't too $expensive", true))
fmt.Println(MakeValidObjectName("This-file is it", true))
fmt.Println(MakeValidObjectName("A!B#C$D/E\\F", true))
fmt.Println(MakeValidObjectName("This-file; is it", true))
fmt.Println(MakeValidObjectName("This-file is it", true))
fmt.Println(MakeValidObjectName("This-file is it", false))
Output:

my file
this_path_to.bin
Hope this isnt too expensive
This-file is it
ABCD_E_F
This-file is it
This-file is it
This-file_is_it
Example (S3)
rand.Seed(time.Now().UnixNano())
sess, err := awsutil.GetSessionWithRegion("us-east-1")
if err != nil {
	fmt.Println("Failed to get AWS session")
	return
}
s3svc, err := awsutil.GetS3(sess)
if err != nil {
	fmt.Println("Failed to get S3")
	return
}

fmt.Printf("Setup: %v\n", err)

fs := MakeS3Access(s3svc)

// Create test S3 bucket for this purpose
testBucket := "api-fileaccess-s3-test-" + utils.RandStringBytesMaskImpr(10)
_, err = s3svc.CreateBucket(
	&s3.CreateBucketInput{
		Bucket: aws.String(testBucket),
		//CreateBucketConfiguration:
	},
)
if err != nil {
	fmt.Printf("Failed to create test S3 bucket: %v\n", err)
	return
}

defer func() {
	_, err := s3svc.DeleteBucket(&s3.DeleteBucketInput{Bucket: aws.String(testBucket)})
	fmt.Printf("Delete bucket errors: %v\n", err)
}()

// Now run the tests
runTest(fs, testBucket)

// NOTE: test output must match the output from local file system (except cleanup steps)
Output:

Setup: <nil>
JSON: <nil>
JSON no-indent: <nil>
Exists1: false|<nil>
Binary: <nil>
Exists2: true|<nil>
Copy: <nil>
Copy bad path, got not found error: true
Read JSON: <nil>, {Hello 778 World}
Read JSON no-indent: <nil>, {Hello 778 World}
Read Binary: <nil>, [250 130 10 0 33]
Read bad path, got not found error: true
Read bad JSON: invalid character 'ú' looking for beginning of value
Not a "not found" error: true
Listing: <nil>, [the-files/data.bin the-files/pretty.json the-files/subdir/copied.json the-files/subdir/ugly.json]
Listing subdir: <nil>, [the-files/subdir/copied.json the-files/subdir/ugly.json]
Listing with prefix: <nil>, [the-files/subdir/ugly.json]
Listing bad path: <nil>, []
Delete copy: <nil>
Delete bin: <nil>
Listing2: <nil>, [the-files/pretty.json the-files/subdir/ugly.json]
Listing subdir2: <nil>, [the-files/subdir/ugly.json]
Empty dir: <nil>
Listing subdir3: <nil>, []
Delete bucket errors: <nil>
Example (S3ListingWithContinuation)
const bucket = "dev-pixlise-data"
const listPath = "Datasets/"

var mockS3 awsutil.MockS3Client
defer mockS3.FinishTest()

mockS3.ExpListObjectsV2Input = []s3.ListObjectsV2Input{
	{
		Bucket: aws.String(bucket), Prefix: aws.String(listPath),
	},
	{
		Bucket: aws.String(bucket), Prefix: aws.String(listPath), ContinuationToken: aws.String("cont-1"),
	},
	{
		Bucket: aws.String(bucket), Prefix: aws.String(listPath), ContinuationToken: aws.String("cont-2"),
	},
}
mockS3.QueuedListObjectsV2Output = []*s3.ListObjectsV2Output{
	{
		IsTruncated:           aws.Bool(true),
		NextContinuationToken: aws.String("cont-1"),
		Contents: []*s3.Object{
			{Key: aws.String("Datasets/abc-123/summary.json")},
			{Key: aws.String("Datasets/abc-123/node1.json")},
			{Key: aws.String("Datasets/abc-123/params.json")},
		},
	},
	{
		IsTruncated:           aws.Bool(true),
		NextContinuationToken: aws.String("cont-2"),
		Contents: []*s3.Object{
			{Key: aws.String("Datasets/abc-456/summary.json")},
			{Key: aws.String("Datasets/abc-789/summary.json")},
			{Key: aws.String("Datasets/")}, // Happens when we create a path in S3 web console, but has no use for us, so we filter it
			{Key: aws.String("Datasets/abc-456/params.json")},
		},
	},
	{
		IsTruncated: aws.Bool(false),
		Contents: []*s3.Object{
			{Key: aws.String("Datasets/abc-456/output/combined.csv")},
		},
	},
}

fs := MakeS3Access(&mockS3)
list, err := fs.ListObjects(bucket, listPath)
fmt.Printf("%v, list: %v\n", err, list)
Output:

<nil>, list: [Datasets/abc-123/summary.json Datasets/abc-123/node1.json Datasets/abc-123/params.json Datasets/abc-456/summary.json Datasets/abc-789/summary.json Datasets/abc-456/params.json Datasets/abc-456/output/combined.csv]

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func CopyFileLocally

func CopyFileLocally(srcPath string, dstPath string) error

func GetBucketFromS3Url

func GetBucketFromS3Url(url string) (string, error)

func GetPathFromS3Url

func GetPathFromS3Url(url string) (string, error)

func IsValidObjectName

func IsValidObjectName(name string) bool

Is this string a valid name to use as an AWS object name?

func MakeEmptyLocalDirectory

func MakeEmptyLocalDirectory(root string, subdir string) (string, error)

Creates a directory under the specified root, ensures it's empty (eg if it already existed)

func MakeValidObjectName

func MakeValidObjectName(name string, allowSpace bool) string

Turns a string name potentially typed by a user into a file name that should be valid for storage in anything we store in. This removes or replaces illegal characters with _.

Types

type FSAccess

type FSAccess struct {
}

Implementation of file access using local file system

func (*FSAccess) CopyObject

func (fs *FSAccess) CopyObject(srcRootPath string, srcPath string, dstRootPath string, dstPath string) error

func (*FSAccess) DeleteObject

func (fs *FSAccess) DeleteObject(rootPath string, path string) error

func (*FSAccess) EmptyObjects

func (fs *FSAccess) EmptyObjects(rootPath string) error

func (*FSAccess) IsNotFoundError

func (fs *FSAccess) IsNotFoundError(err error) bool

func (*FSAccess) ListObjects

func (fs *FSAccess) ListObjects(rootPath string, prefix string) ([]string, error)

func (*FSAccess) ObjectExists

func (fs *FSAccess) ObjectExists(rootPath string, path string) (bool, error)

func (*FSAccess) ReadJSON

func (fs *FSAccess) ReadJSON(rootPath string, s3Path string, itemsPtr interface{}, emptyIfNotFound bool) error

func (*FSAccess) ReadObject

func (fs *FSAccess) ReadObject(rootPath string, path string) ([]byte, error)

func (*FSAccess) WriteJSON

func (fs *FSAccess) WriteJSON(rootPath string, s3Path string, itemsPtr interface{}) error

func (*FSAccess) WriteJSONNoIndent

func (fs *FSAccess) WriteJSONNoIndent(rootPath string, s3Path string, itemsPtr interface{}) error

func (*FSAccess) WriteObject

func (fs *FSAccess) WriteObject(rootPath string, path string, data []byte) error

type FileAccess

type FileAccess interface {
	// Effectively recursive listing of files in a given directory
	ListObjects(bucket string, prefix string) ([]string, error)

	// Does a file at a given path exist
	ObjectExists(rootPath string, path string) (bool, error)

	// Reads a file as bytes
	ReadObject(bucket string, path string) ([]byte, error)
	// Writes a file as bytes
	WriteObject(bucket string, path string, data []byte) error

	// Reads a file as JSON and decodes it into itemsPtr
	ReadJSON(bucket string, s3Path string, itemsPtr interface{}, emptyIfNotFound bool) error
	// Writes itemsPtr as a JSON file
	WriteJSON(bucket string, s3Path string, itemsPtr interface{}) error

	// Same as WriteJSON, but a few places in the code need to write
	// non-pretty-printed JSON for those files to work with Athena queries for
	// example. Instead of adding a flag to WriteJSON this is easier to implement.
	// Searching for WriteJSON still returns these!
	WriteJSONNoIndent(bucket string, s3Path string, itemsPtr interface{}) error

	// Delete a file
	DeleteObject(bucket string, path string) error

	// Copy a file
	CopyObject(srcBucket string, srcPath string, dstBucket string, dstPath string) error

	// Effectively performs "rm -rf" of all files the given bucket/root directory
	EmptyObjects(targetBucket string) error

	// Checks if the given error is a "not found" error for the implementation. This is because
	// AWS S3 would provide a different "not found" error than would a local file system fopen() failing
	IsNotFoundError(err error) bool
}

type Mock

type Mock struct {
}

Mockup of file access implementation for unit tests

type S3Access

type S3Access struct {
	// contains filtered or unexported fields
}

Implementation of file access using AWS S3

func MakeS3Access

func MakeS3Access(s3Api s3iface.S3API) S3Access

func (S3Access) CopyObject

func (s3Access S3Access) CopyObject(srcBucket string, srcPath string, dstBucket string, dstPath string) error

func (S3Access) DeleteObject

func (s3Access S3Access) DeleteObject(bucket string, path string) error

func (S3Access) EmptyObjects

func (s3Access S3Access) EmptyObjects(targetBucket string) error

func (S3Access) IsNotFoundError

func (s3Access S3Access) IsNotFoundError(err error) bool

func (S3Access) ListObjects

func (s3Access S3Access) ListObjects(bucket string, prefix string) ([]string, error)

ListObjects - calls AWS ListObjectsV2 and if a continuation token is returned this keeps looping and storing more items until no more continuation tokens are left.

func (S3Access) ObjectExists

func (s3Access S3Access) ObjectExists(bucket string, path string) (bool, error)

func (S3Access) ReadJSON

func (s3Access S3Access) ReadJSON(bucket string, s3Path string, itemsPtr interface{}, emptyIfNotFound bool) error

func (S3Access) ReadObject

func (s3Access S3Access) ReadObject(bucket string, path string) ([]byte, error)

func (S3Access) WriteJSON

func (s3Access S3Access) WriteJSON(bucket string, s3Path string, itemsPtr interface{}) error

func (S3Access) WriteJSONNoIndent

func (s3Access S3Access) WriteJSONNoIndent(bucket string, s3Path string, itemsPtr interface{}) error

func (S3Access) WriteObject

func (s3Access S3Access) WriteObject(bucket string, path string, data []byte) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL