awsglue

package
v1.15.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 22, 2021 License: AGPL-3.0 Imports: 17 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	// RuleMatchColumns are columns added by the rules engine
	RuleMatchColumns = []Column{
		{
			Name:    "p_rule_id",
			Type:    glueschema.TypeString,
			Comment: "Rule id",
		},
		{
			Name:    "p_alert_id",
			Type:    glueschema.TypeString,
			Comment: "Alert id",
		},
		{
			Name:    "p_alert_context",
			Type:    glueschema.TypeString,
			Comment: "Additional alert context",
		},
		{
			Name:    "p_alert_creation_time",
			Type:    glueschema.TypeTimestamp,
			Comment: "The time the alert was initially created (first match)",
		},
		{
			Name:    "p_alert_update_time",
			Type:    glueschema.TypeTimestamp,
			Comment: "The time the alert last updated (last match)",
		},
		{
			Name:    "p_rule_tags",
			Type:    glueschema.ArrayOf(glueschema.TypeString),
			Comment: "The tags of the rule that generated this alert",
		},
		{
			Name:    "p_rule_reports",
			Type:    glueschema.MapOf(glueschema.TypeString, glueschema.ArrayOf(glueschema.TypeString)),
			Comment: "The reporting tags of the rule that generated this alert",
		},
	}

	// RuleErrorColumns are columns added by the rules engine
	RuleErrorColumns = append(
		RuleMatchColumns,
		Column{
			Name:    "p_rule_error",
			Type:    glueschema.TypeString,
			Comment: "The rule error",
		},
	)
)

Functions

func CreateDatabase added in v1.4.0

func CreateDatabase(client glueiface.GlueAPI, name, description string) (*glue.CreateDatabaseOutput, error)

func CreatePartition

func CreatePartition(client glueiface.GlueAPI, databaseName, tableName string,
	partitionValues []*string, storageDescriptor *glue.StorageDescriptor,
	parameters map[string]*string) (*glue.CreatePartitionOutput, error)

func DataPrefix added in v1.15.0

func DataPrefix(databaseName string) string

func DataTypeFromS3Key added in v1.15.0

func DataTypeFromS3Key(s3key string) (dataType pantherdb.DataType, err error)

func DeleteDatabase added in v1.4.0

func DeleteDatabase(client glueiface.GlueAPI, name string) (*glue.DeleteDatabaseOutput, error)

func DeletePartition

func DeletePartition(client glueiface.GlueAPI, databaseName, tableName string,
	partitionValues []*string) (*glue.DeletePartitionOutput, error)

func DeleteTable

func DeleteTable(client glueiface.GlueAPI, databaseName, tableName string) (*glue.DeleteTableOutput, error)

func EnsureDatabase added in v1.14.0

func EnsureDatabase(ctx context.Context, client glueiface.GlueAPI, name, description string) error

func GetPartition

func GetPartition(client glueiface.GlueAPI, databaseName, tableName string,
	partitionValues []*string) (*glue.GetPartitionOutput, error)

func GetTable

func GetTable(client glueiface.GlueAPI, databaseName, tableName string) (*glue.GetTableOutput, error)

func IsJSONPartition

func IsJSONPartition(storageDescriptor *glue.StorageDescriptor) bool

func ParseS3URL

func ParseS3URL(s3URL string) (bucket, key string, err error)

func PartitionPrefix added in v1.15.0

func PartitionPrefix(database, table string, timebin GlueTableTimebin, time time.Time) string

func PartitionTimeFromValues added in v1.10.0

func PartitionTimeFromValues(values []*string) (tm time.Time, err error)

PartitionTimeFromValues resolves the timebin from a glue partition's values

func TableHasPartitions

func TableHasPartitions(client glueiface.GlueAPI, databaseName, tableName string) (hasData bool, err error)

func TablePrefix added in v1.15.0

func TablePrefix(database, tableName string) string

Returns the prefix of the table in S3 or error if it failed to generate it

func UpdatePartition

func UpdatePartition(client glueiface.GlueAPI, databaseName, tableName string,
	partitionValues []*string, storageDescriptor *glue.StorageDescriptor,
	parameters map[string]*string) (*glue.UpdatePartitionOutput, error)

Types

type Column added in v1.4.0

type Column = glueschema.Column

type GluePartition

type GluePartition struct {
	// contains filtered or unexported fields
}

A partition in Glue containing Panther data

func PartitionFromS3Object added in v1.15.0

func PartitionFromS3Object(s3Bucket, s3ObjectKey string) (*GluePartition, error)

Gets the partition from S3bucket and S3 object key info. The s3Object key is expected to be in the the format `{logs,rules}/{table_name}/year=d{4}/month=d{2}/[day=d{2}/][hour=d{2}/]/{S+}.json.gz` otherwise an error is returned.

func PartitionFromS3Path added in v1.15.0

func PartitionFromS3Path(s3Path string) (*GluePartition, error)

func (*GluePartition) GetDatabase

func (gp *GluePartition) GetDatabase() string

func (*GluePartition) GetGlueTableMetadata

func (gp *GluePartition) GetGlueTableMetadata() *GlueTableMetadata

func (*GluePartition) GetPartitionColumnsInfo

func (gp *GluePartition) GetPartitionColumnsInfo() []PartitionColumnInfo

func (*GluePartition) GetS3Bucket

func (gp *GluePartition) GetS3Bucket() string

func (*GluePartition) GetTable

func (gp *GluePartition) GetTable() string

func (*GluePartition) GetTime

func (gp *GluePartition) GetTime() time.Time

func (*GluePartition) PartitionLocation added in v1.15.0

func (gp *GluePartition) PartitionLocation() string

type GlueTableMetadata

type GlueTableMetadata struct {
	// contains filtered or unexported fields
}

Metadata about Glue table

func NewGlueTableMetadata

func NewGlueTableMetadata(
	database, table, logDescription string, timebin GlueTableTimebin, eventStruct interface{}) *GlueTableMetadata

Creates a new GlueTableMetadata object for Panther log sources

func (*GlueTableMetadata) CreateJSONPartition

func (gm *GlueTableMetadata) CreateJSONPartition(client glueiface.GlueAPI, t time.Time) (created bool, err error)

func (*GlueTableMetadata) CreateOrUpdateTable added in v1.4.0

func (gm *GlueTableMetadata) CreateOrUpdateTable(glueClient glueiface.GlueAPI, bucketName string) error

func (*GlueTableMetadata) CreateTableIfNotExists added in v1.15.0

func (gm *GlueTableMetadata) CreateTableIfNotExists(ctx context.Context, glueAPI glueiface.GlueAPI, bucketName string) (bool, error)

func (*GlueTableMetadata) DatabaseName

func (gm *GlueTableMetadata) DatabaseName() string

func (*GlueTableMetadata) Description

func (gm *GlueTableMetadata) Description() string

func (*GlueTableMetadata) EventStruct

func (gm *GlueTableMetadata) EventStruct() interface{}

func (*GlueTableMetadata) GetPartition

func (gm *GlueTableMetadata) GetPartition(client glueiface.GlueAPI, t time.Time) (output *glue.GetPartitionOutput, err error)

get partition, return nil if it does not exist

func (*GlueTableMetadata) HasPartitions

func (gm *GlueTableMetadata) HasPartitions(glueClient glueiface.GlueAPI) (bool, error)

func (*GlueTableMetadata) PartitionKeys

func (gm *GlueTableMetadata) PartitionKeys() (partitions []PartitionKey)

The partition keys for this table

func (*GlueTableMetadata) PartitionPrefix added in v1.15.0

func (gm *GlueTableMetadata) PartitionPrefix(t time.Time) string

Based on Timebin(), return an S3 prefix for objects of this table

func (*GlueTableMetadata) Prefix

func (gm *GlueTableMetadata) Prefix() string

All data for this table are stored in this S3 prefix

func (*GlueTableMetadata) RuleErrorTable added in v1.10.0

func (gm *GlueTableMetadata) RuleErrorTable() *GlueTableMetadata

func (*GlueTableMetadata) RuleTable added in v1.4.0

func (gm *GlueTableMetadata) RuleTable() *GlueTableMetadata

func (*GlueTableMetadata) SyncPartitions

func (gm *GlueTableMetadata) SyncPartitions(glueClient glueiface.GlueAPI, s3Client s3iface.S3API,
	startDate time.Time, deadline *time.Time) (*time.Time, error)

SyncPartitions updates a table's partitions using the latest table schema. Used when schemas change. If deadline is non-nil, it will stop when execution time has passed the deadline and will return the _next_ time period needing evaluation. Deadlines are used when this is called in Lambdas to avoid running past the lambda deadline.

func (*GlueTableMetadata) TableName

func (gm *GlueTableMetadata) TableName() string

func (*GlueTableMetadata) Timebin

func (gm *GlueTableMetadata) Timebin() GlueTableTimebin

func (*GlueTableMetadata) UpdateTableIfExists added in v1.15.0

func (gm *GlueTableMetadata) UpdateTableIfExists(ctx context.Context, glueAPI glueiface.GlueAPI, bucketName string) (bool, error)

type GlueTableTimebin

type GlueTableTimebin int

Use this to tag the time partitioning used in a GlueTableMetadata table

const (
	GlueTableMonthly GlueTableTimebin = iota + 1
	GlueTableDaily
	GlueTableHourly
)

func TimebinFromTable added in v1.10.0

func TimebinFromTable(tbl *glue.TableData) (GlueTableTimebin, error)

TimebinFromTable resolves the timebin from a table storage descriptor

func (GlueTableTimebin) Next

func (tb GlueTableTimebin) Next(t time.Time) (next time.Time)

Next returns the next time interval

func (GlueTableTimebin) PartitionFilter added in v1.10.0

func (tb GlueTableTimebin) PartitionFilter(start, end time.Time) string

PartitionFilter returns a partition filter expression

func (GlueTableTimebin) PartitionHasData

func (tb GlueTableTimebin) PartitionHasData(client s3iface.S3API, t time.Time, tableOutput *glue.GetTableOutput) (bool, error)

PartitionHasData checks if there is at least 1 S3 object in the partition

func (GlueTableTimebin) PartitionPathS3 added in v1.10.0

func (tb GlueTableTimebin) PartitionPathS3(t time.Time) (s3Path string)

PartitionPathS3 constructs the S3 path for this partition

func (GlueTableTimebin) PartitionValuesFromTime

func (tb GlueTableTimebin) PartitionValuesFromTime(t time.Time) (values []*string)

PartitionValuesFromTime returns an []*string values (used for Glue APIs)

func (GlueTableTimebin) PartitionsAfter added in v1.10.0

func (tb GlueTableTimebin) PartitionsAfter(tm time.Time) string

PartitionsAfter returns an expression to scan for partitions after tm see https://docs.aws.amazon.com/glue/latest/webapi/API_GetPartitions.html nolint:lll

func (GlueTableTimebin) PartitionsBefore added in v1.10.0

func (tb GlueTableTimebin) PartitionsBefore(tm time.Time) string

PartitionsBefore returns an expression to scan for partitions before tm see https://docs.aws.amazon.com/glue/latest/webapi/API_GetPartitions.html nolint:lll

func (GlueTableTimebin) PartitionsBetween added in v1.10.0

func (tb GlueTableTimebin) PartitionsBetween(start, end time.Time) string

PartitionsBetween returns an expression to scan for partitions between two timestamps see https://docs.aws.amazon.com/glue/latest/webapi/API_GetPartitions.html

func (GlueTableTimebin) S3PathLayout added in v1.10.0

func (tb GlueTableTimebin) S3PathLayout() string

S3PathLayout returns a go time layout to format/parse S3 paths for Glue partitions

func (GlueTableTimebin) TimeFromS3Path added in v1.10.0

func (tb GlueTableTimebin) TimeFromS3Path(path string) (time.Time, bool)

TimeFromS3Path converts an S3 path to time. The path must not contain any prefixes such as db/table name

func (GlueTableTimebin) Truncate added in v1.10.0

func (tb GlueTableTimebin) Truncate(t time.Time) time.Time

Truncate truncates the date to the time bin time unit

type PartitionColumnInfo

type PartitionColumnInfo struct {
	Key   string
	Value string
}

Contains information about partition columns

type PartitionKey

type PartitionKey struct {
	Name string
	Type string
}

Directories

Path Synopsis
Package gluetimestamp handles encoding/decoding of timestamp values for AWS glue.
Package gluetimestamp handles encoding/decoding of timestamp values for AWS glue.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL