awsglue

package
v1.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 22, 2020 License: AGPL-3.0 Imports: 13 Imported by: 0

Documentation

Index

Constants

View Source
const (
	LogProcessingDatabaseName        = "panther_logs"
	LogProcessingDatabaseDescription = "Holds tables with data from Panther log processing"

	RuleMatchDatabaseName        = "panther_rule_matches"
	RuleMatchDatabaseDescription = "Holds tables with data from Panther rule matching (same table structure as panther_logs)"

	ViewsDatabaseName        = "panther_views"
	ViewsDatabaseDescription = "Holds views useful for querying Panther data"

	TempDatabaseName        = "panther_temp"
	TempDatabaseDescription = "Holds temporary tables used for processing tasks"
)

Variables

View Source
var (
	// PantherDatabases is exposed as public var to allow code to get/lookup the Panther databases
	PantherDatabases = map[string]string{
		LogProcessingDatabaseName: LogProcessingDatabaseDescription,
		RuleMatchDatabaseName:     RuleMatchDatabaseDescription,
		ViewsDatabaseName:         ViewsDatabaseDescription,
	}
)

Functions

func CreatePartition

func CreatePartition(client glueiface.GlueAPI, databaseName, tableName string,
	partitionValues []*string, storageDescriptor *glue.StorageDescriptor,
	parameters map[string]*string) (*glue.CreatePartitionOutput, error)

func DeletePartition

func DeletePartition(client glueiface.GlueAPI, databaseName, tableName string,
	partitionValues []*string) (*glue.DeletePartitionOutput, error)

func DeleteTable

func DeleteTable(client glueiface.GlueAPI, databaseName, tableName string) (*glue.DeleteTableOutput, error)

func GetDataPrefix

func GetDataPrefix(databaseName string) string

func GetPartition

func GetPartition(client glueiface.GlueAPI, databaseName, tableName string,
	partitionValues []*string) (*glue.GetPartitionOutput, error)

func GetPartitionLocation

func GetPartitionLocation(s3Path string) (string, error)

GetPartitionLocation takes an S3 path for an object and returns just the part of the patch associated with the partition

func GetPartitionPrefix

func GetPartitionPrefix(datatype models.DataType, logType string, timebin GlueTableTimebin, time time.Time) string

func GetTable

func GetTable(client glueiface.GlueAPI, databaseName, tableName string) (*glue.GetTableOutput, error)

func GetTableName

func GetTableName(logType string) string

func IsJSONPartition

func IsJSONPartition(storageDescriptor *glue.StorageDescriptor) bool

func ParseS3URL

func ParseS3URL(s3URL string) (bucket, key string, err error)

func TableHasPartitions

func TableHasPartitions(client glueiface.GlueAPI, databaseName, tableName string) (hasData bool, err error)

func UpdatePartition

func UpdatePartition(client glueiface.GlueAPI, databaseName, tableName string,
	partitionValues []*string, storageDescriptor *glue.StorageDescriptor,
	parameters map[string]*string) (*glue.UpdatePartitionOutput, error)

Types

type GluePartition

type GluePartition struct {
	// contains filtered or unexported fields
}

A partition in Glue containing Panther data

func GetPartitionFromS3

func GetPartitionFromS3(s3Bucket, s3ObjectKey string) (*GluePartition, error)

Gets the partition from S3bucket and S3 object key info. The s3Object key is expected to be in the the format `{logs,rules}/{table_name}/year=d{4}/month=d{2}/[day=d{2}/][hour=d{2}/]/{S+}.json.gz` otherwise an error is returned.

func GetPartitionFromS3Path

func GetPartitionFromS3Path(s3Path string) (*GluePartition, error)

func (*GluePartition) GetDatabase

func (gp *GluePartition) GetDatabase() string

func (*GluePartition) GetGlueTableMetadata

func (gp *GluePartition) GetGlueTableMetadata() *GlueTableMetadata

func (*GluePartition) GetPartitionColumnsInfo

func (gp *GluePartition) GetPartitionColumnsInfo() []PartitionColumnInfo

func (*GluePartition) GetPartitionLocation

func (gp *GluePartition) GetPartitionLocation() string

func (*GluePartition) GetS3Bucket

func (gp *GluePartition) GetS3Bucket() string

func (*GluePartition) GetTable

func (gp *GluePartition) GetTable() string

func (*GluePartition) GetTime

func (gp *GluePartition) GetTime() time.Time

type GlueTableMetadata

type GlueTableMetadata struct {
	// contains filtered or unexported fields
}

Metadata about Glue table

func NewGlueTableMetadata

func NewGlueTableMetadata(
	datatype models.DataType, logType, logDescription string, timebin GlueTableTimebin, eventStruct interface{}) *GlueTableMetadata

Creates a new GlueTableMetadata object for Panther log sources

func (*GlueTableMetadata) CreateJSONPartition

func (gm *GlueTableMetadata) CreateJSONPartition(client glueiface.GlueAPI, t time.Time) (created bool, err error)

func (*GlueTableMetadata) DatabaseName

func (gm *GlueTableMetadata) DatabaseName() string

func (*GlueTableMetadata) Description

func (gm *GlueTableMetadata) Description() string

func (*GlueTableMetadata) EventStruct

func (gm *GlueTableMetadata) EventStruct() interface{}

func (*GlueTableMetadata) GetPartition

func (gm *GlueTableMetadata) GetPartition(client glueiface.GlueAPI, t time.Time) (output *glue.GetPartitionOutput, err error)

get partition, return nil if it does not exist

func (*GlueTableMetadata) GetPartitionPrefix

func (gm *GlueTableMetadata) GetPartitionPrefix(t time.Time) string

Based on Timebin(), return an S3 prefix for objects of this table

func (*GlueTableMetadata) HasPartitions

func (gm *GlueTableMetadata) HasPartitions(glueClient glueiface.GlueAPI) (bool, error)

func (*GlueTableMetadata) LogType

func (gm *GlueTableMetadata) LogType() string

func (*GlueTableMetadata) PartitionKeys

func (gm *GlueTableMetadata) PartitionKeys() (partitions []PartitionKey)

The partition keys for this table

func (*GlueTableMetadata) Prefix

func (gm *GlueTableMetadata) Prefix() string

All data for this table are stored in this S3 prefix

func (*GlueTableMetadata) SyncPartitions

func (gm *GlueTableMetadata) SyncPartitions(glueClient glueiface.GlueAPI, s3Client s3iface.S3API, startDate time.Time) error

SyncPartitions updates a table's partitions using the latest table schema. Used when schemas change.

func (*GlueTableMetadata) TableName

func (gm *GlueTableMetadata) TableName() string

func (*GlueTableMetadata) Timebin

func (gm *GlueTableMetadata) Timebin() GlueTableTimebin

type GlueTableTimebin

type GlueTableTimebin int

Use this to tag the time partitioning used in a GlueTableMetadata table

const (
	GlueTableMonthly GlueTableTimebin = iota + 1
	GlueTableDaily
	GlueTableHourly
)

func (GlueTableTimebin) Next

func (tb GlueTableTimebin) Next(t time.Time) (next time.Time)

Next returns the next time interval

func (GlueTableTimebin) PartitionHasData

func (tb GlueTableTimebin) PartitionHasData(client s3iface.S3API, t time.Time, tableOutput *glue.GetTableOutput) (bool, error)

PartitionHasData checks if there is at least 1 s3 object in the partition

func (GlueTableTimebin) PartitionS3PathFromTime

func (tb GlueTableTimebin) PartitionS3PathFromTime(t time.Time) (s3Path string)

PartitionS3PathFromTime constructs the S3 path for this partition

func (GlueTableTimebin) PartitionValuesFromTime

func (tb GlueTableTimebin) PartitionValuesFromTime(t time.Time) (values []*string)

PartitionValuesFromTime returns an []*string values (used for Glue APIs)

func (GlueTableTimebin) Validate

func (tb GlueTableTimebin) Validate() (err error)

type PartitionColumnInfo

type PartitionColumnInfo struct {
	Key   string
	Value string
}

Contains information about partition columns

type PartitionKey

type PartitionKey struct {
	Name string
	Type string
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL