warehouse

package
v1.5.7 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 10, 2023 License: AGPL-3.0 Imports: 70 Imported by: 0

Documentation

Index

Constants

View Source
const (
	TriggeredSuccessfully   = "Triggered successfully"
	NoPendingEvents         = "No pending events to sync for this destination"
	DownloadFileNamePattern = "downloadfile.*.tmp"
	NoSuchSync              = "No such sync exist"
)
View Source
const (
	STATS_WORKER_IDLE_TIME                  = "worker_idle_time"
	STATS_WORKER_CLAIM_PROCESSING_TIME      = "worker_claim_processing_time"
	STATS_WORKER_CLAIM_PROCESSING_FAILED    = "worker_claim_processing_failed"
	STATS_WORKER_CLAIM_PROCESSING_SUCCEEDED = "worker_claim_processing_succeeded"
	TAG_WORKERID                            = "workerId"
)
View Source
const (
	GeneratingStagingFileFailedState        = "generating_staging_file_failed"
	GeneratedStagingFileState               = "generated_staging_file"
	PopulatingHistoricIdentitiesState       = "populating_historic_identities"
	PopulatingHistoricIdentitiesStateFailed = "populating_historic_identities_failed"
	FetchingRemoteSchemaFailed              = "fetching_remote_schema_failed"
	InternalProcessingFailed                = "internal_processing_failed"
)
View Source
const (
	TableUploadExecuting               = "executing"
	TableUploadUpdatingSchema          = "updating_schema"
	TableUploadUpdatingSchemaFailed    = "updating_schema_failed"
	TableUploadUpdatedSchema           = "updated_schema"
	TableUploadExporting               = "exporting_data"
	TableUploadExportingFailed         = "exporting_data_failed"
	UserTableUploadExportingFailed     = "exporting_user_tables_failed"
	IdentityTableUploadExportingFailed = "exporting_identities_failed"
	TableUploadExported                = "exported_data"
)

Table Upload status

View Source
const (
	CloudSourceCategory          = "cloud"
	SingerProtocolSourceCategory = "singer-protocol"
)
View Source
const (
	UploadStatusField          = "status"
	UploadStartLoadFileIDField = "start_load_file_id"
	UploadEndLoadFileIDField   = "end_load_file_id"
	UploadUpdatedAtField       = "updated_at"
	UploadTimingsField         = "timings"
	UploadSchemaField          = "schema"
	MergedSchemaField          = "mergedschema"
	UploadLastExecAtField      = "last_exec_at"
	UploadInProgress           = "in_progress"
)
View Source
const (
	MasterMode         = "master"
	SlaveMode          = "slave"
	MasterSlaveMode    = "master_and_slave"
	EmbeddedMode       = "embedded"
	EmbeddedMasterMode = "embedded_master"
)

warehouses worker modes

View Source
const (
	DegradedMode = "degraded"
)
View Source
const (
	WorkerProcessingDownloadStagingFileFailed = "worker_processing_download_staging_file_failed"
)

Variables

View Source
var (
	ErrIncompatibleSchemaConversion = errors.New("incompatible schema conversion")
	ErrSchemaConversionNotSupported = errors.New("schema conversion not supported")
)
View Source
var (
	ShouldForceSetLowerVersion bool
)

Functions

func CheckCurrentTimeExistsInExcludeWindow added in v0.1.10

func CheckCurrentTimeExistsInExcludeWindow(currentTime time.Time, windowStartTime, windowEndTime string) bool

func CheckForWarehouseEnvVars added in v0.1.10

func CheckForWarehouseEnvVars() bool

CheckForWarehouseEnvVars Checks if all the required Env Variables for Warehouse are present

func CheckPGHealth

func CheckPGHealth(dbHandle *sql.DB) bool

func ClauseQueryArgs added in v1.2.0

func ClauseQueryArgs(filterClauses ...FilterClause) (string, []interface{})

func DeepCopy added in v1.5.0

func DeepCopy(src, dest interface{}) error

func DurationBeforeNextAttempt added in v1.2.0

func DurationBeforeNextAttempt(attempt int64) time.Duration

func GetExcludeWindowStartEndTimes added in v1.2.0

func GetExcludeWindowStartEndTimes(excludeWindow map[string]interface{}) (string, string)

func GetKeyAsBool added in v1.5.0

func GetKeyAsBool(key string, conf map[string]interface{}) bool

func GetPrevScheduledTime

func GetPrevScheduledTime(syncFrequency, syncStartAt string, currTime time.Time) time.Time

GetPrevScheduledTime returns the closest previous scheduled time e.g. Syncing every 3hrs starting at 13:00 (scheduled times: 13:00, 16:00, 19:00, 22:00, 01:00, 04:00, 07:00, 10:00) prev scheduled time for current time (e.g. 18:00 -> 16:00 same day, 00:30 -> 22:00 prev day)

func HandleSchemaChange added in v0.1.10

func HandleSchemaChange(existingDataType, currentDataType model.SchemaType, value any) (any, error)

func Init added in v0.1.10

func Init()

func Init2 added in v0.1.10

func Init2()

func Init3 added in v0.1.10

func Init3()

func Init4 added in v0.1.10

func Init4()

func Init5 added in v0.1.10

func Init5()

func Init6 added in v0.1.10

func Init6()

func InitWarehouseAPI added in v0.1.10

func InitWarehouseAPI(dbHandle *sql.DB, log logger.Logger) error

func PickupStagingConfiguration added in v1.0.2

func PickupStagingConfiguration(job *Payload) bool

func ScheduledTimes

func ScheduledTimes(syncFrequency, syncStartAt string) []int

ScheduledTimes returns all possible start times (minutes from start of day) as per schedule e.g. Syncing every 3hrs starting at 13:00 (scheduled times: 13:00, 16:00, 19:00, 22:00, 01:00, 04:00, 07:00, 10:00)

func Setup added in v1.4.0

func Setup(ctx context.Context) error

Setup prepares the database connection for warehouse service, verifies database compatibility and creates the required tables

func Start

func Start(ctx context.Context, app app.App) error

Start starts the warehouse service

func TriggerUploadHandler added in v0.1.10

func TriggerUploadHandler(sourceID, destID string) error

Types

type AsyncJobRunResult added in v1.2.0

type AsyncJobRunResult struct {
	Result bool
	Id     string
}

type BatchRouterEventT added in v0.1.10

type BatchRouterEventT struct {
	Metadata MetadataT `json:"metadata"`
	Data     DataT     `json:"data"`
}

func (*BatchRouterEventT) GetColumnInfo added in v0.1.10

func (event *BatchRouterEventT) GetColumnInfo(columnName string) (columnInfo warehouseutils.ColumnInfo, ok bool)

type ConfigurationTestInput added in v1.0.2

type ConfigurationTestInput struct {
	DestID string
}

type ConfigurationTestOutput added in v1.0.2

type ConfigurationTestOutput struct {
	Valid bool
	Error string
}

type ConstraintsI added in v0.1.10

type ConstraintsI interface {
	// contains filtered or unexported methods
}

type ConstraintsViolationT added in v0.1.10

type ConstraintsViolationT struct {
	IsViolated         bool
	ViolatedIdentifier string
}

func ViolatedConstraints added in v0.1.10

func ViolatedConstraints(destinationType string, brEvent *BatchRouterEventT, columnName string) (cv *ConstraintsViolationT)

type DB added in v1.0.2

type DB struct {
	// contains filtered or unexported fields
}

DB encapsulate interactions of warehouse operations with the database.

func NewWarehouseDB added in v1.0.2

func NewWarehouseDB(handle *sql.DB) *DB

func (*DB) GetLatestUploadStatus added in v1.0.2

func (db *DB) GetLatestUploadStatus(ctx context.Context, destType, sourceID, destinationID string) (int64, string, int, error)

func (*DB) GetUploadsCount added in v1.2.0

func (db *DB) GetUploadsCount(ctx context.Context, filterClauses ...FilterClause) (count int64, err error)

func (*DB) RetryUploads added in v1.2.0

func (db *DB) RetryUploads(ctx context.Context, filterClauses ...FilterClause) (rowsAffected int64, err error)

type DataT added in v0.1.10

type DataT map[string]interface{}

type ErrorResponseT

type ErrorResponseT struct {
	Error string
}

type FilterClause added in v1.2.0

type FilterClause struct {
	Clause    string
	ClauseArg interface{}
}

type HandleT

type HandleT struct {
	Now    func() time.Time
	NowSQL string
	Logger logger.Logger
	// contains filtered or unexported fields
}

func (*HandleT) CronTracker added in v1.5.0

func (wh *HandleT) CronTracker(ctx context.Context) error

func (*HandleT) Disable

func (wh *HandleT) Disable()

Disable disables a router:)

func (*HandleT) Enable

func (wh *HandleT) Enable()

Enable enables a router :)

func (*HandleT) Setup

func (wh *HandleT) Setup(whType string)

func (*HandleT) Shutdown added in v0.1.10

func (wh *HandleT) Shutdown()

func (*HandleT) Track added in v1.5.0

func (wh *HandleT) Track(ctx context.Context, warehouse *warehouseutils.Warehouse, config *config.Config) error

Track tracks the status of the warehouse uploads for the corresponding cases: 1. Staging files is not picked. 2. Upload job is struck

type IndexConstraintT added in v0.1.10

type IndexConstraintT struct {
	TableName    string
	ColumnName   string
	IndexColumns []string
	Limit        int
}

type InvalidDestinationCredErr added in v1.2.0

type InvalidDestinationCredErr struct {
	Base      error
	Operation string
}

func (InvalidDestinationCredErr) Error added in v1.2.0

func (err InvalidDestinationCredErr) Error() string

type JobIDT added in v0.1.10

type JobIDT int64

type JobRunT added in v0.1.10

type JobRunT struct {
	// contains filtered or unexported fields
}

JobRunT Temporary store for processing staging file to load file

func (*JobRunT) GetWriter added in v0.1.10

func (jobRun *JobRunT) GetWriter(tableName string) (warehouseutils.LoadFileWriterI, error)

type LoadFileJobT

type LoadFileJobT struct {
	Upload                     Upload
	StagingFile                *model.StagingFile
	Schema                     map[string]map[string]string
	Warehouse                  warehouseutils.Warehouse
	Wg                         *misc.WaitGroup
	LoadFileIDsChan            chan []int64
	TableToBucketFolderMap     map[string]string
	TableToBucketFolderMapLock *sync.RWMutex
}

type MetadataT added in v0.1.10

type MetadataT struct {
	Table        string            `json:"table"`
	Columns      map[string]string `json:"columns"`
	IsMergeRule  bool              `json:"isMergeRule"`
	ReceivedAt   time.Time         `json:"receivedAt"`
	MergePropOne string            `json:"mergePropOne"`
	MergePropTwo string            `json:"mergePropTwo"`
}

type ObjectStorageValidationRequest added in v1.2.0

type ObjectStorageValidationRequest struct {
	Type   string                 `json:"type"`
	Config map[string]interface{} `json:"config"`
}

type Payload added in v1.2.0

type Payload struct {
	BatchID                      string
	UploadID                     int64
	StagingFileID                int64
	StagingFileLocation          string
	UploadSchema                 map[string]map[string]string
	WorkspaceID                  string
	SourceID                     string
	SourceName                   string
	DestinationID                string
	DestinationName              string
	DestinationType              string
	DestinationNamespace         string
	DestinationRevisionID        string
	StagingDestinationRevisionID string
	DestinationConfig            map[string]interface{}
	StagingDestinationConfig     interface{}
	UseRudderStorage             bool
	StagingUseRudderStorage      bool
	UniqueLoadGenID              string
	RudderStoragePrefix          string
	Output                       []loadFileUploadOutputT
	LoadFilePrefix               string // prefix for the load file name
	LoadFileType                 string
}

type ProcessStagingFilesJobT

type ProcessStagingFilesJobT struct {
	Upload    Upload
	List      []*model.StagingFile
	Warehouse warehouseutils.Warehouse
}

type QueryInput added in v0.1.10

type QueryInput struct {
	DestID       string
	SourceID     string
	SQLStatement string
}

type RetryRequest added in v0.1.10

type RetryRequest struct {
	WorkspaceID     string
	SourceID        string
	DestinationID   string
	DestinationType string
	IntervalInHours int64   // Optional, if provided we will retry based on the interval provided
	UploadIds       []int64 // Optional, if provided we will retry the upload ids provided
	ForceRetry      bool
	API             UploadAPIT
}

func (*RetryRequest) RetryWHUploads added in v0.1.10

func (retryReq *RetryRequest) RetryWHUploads(ctx context.Context) (response RetryResponse, err error)

func (*RetryRequest) UploadsToRetry added in v1.2.0

func (retryReq *RetryRequest) UploadsToRetry(ctx context.Context) (response RetryResponse, err error)

type RetryResponse added in v0.1.10

type RetryResponse struct {
	Count      int64
	Message    string
	StatusCode int32
}

type SchemaHandleT added in v0.1.10

type SchemaHandleT struct {
	// contains filtered or unexported fields
}

type TableSkipError added in v0.1.10

type TableSkipError struct {
	// contains filtered or unexported fields
}

TableSkipError is a custom error type to capture if a table load is skipped because of a previously failed table load

func (*TableSkipError) Error added in v0.1.10

func (tse *TableSkipError) Error() string

type TableUploadIDInfoT added in v0.1.10

type TableUploadIDInfoT struct {
	// contains filtered or unexported fields
}

TableUploadIDInfoT captures the uploadID and error for [uploadID][tableName]

type TableUploadReqT added in v0.1.10

type TableUploadReqT struct {
	UploadID int64
	Name     string
	API      UploadAPIT
}

func (TableUploadReqT) GetWhTableUploads added in v0.1.10

func (tableUploadReq TableUploadReqT) GetWhTableUploads() ([]*proto.WHTable, error)

type TableUploadResT added in v0.1.10

type TableUploadResT struct {
	ID         int64     `json:"id"`
	UploadID   int64     `json:"upload_id"`
	Name       string    `json:"name"`
	Error      string    `json:"error"`
	Status     string    `json:"status"`
	Count      int32     `json:"count"`
	LastExecAt time.Time `json:"last_exec_at"`
	Duration   int32     `json:"duration"`
}

type TableUploadStatusInfoT added in v0.1.10

type TableUploadStatusInfoT struct {
	// contains filtered or unexported fields
}

TableUploadStatusInfoT captures the status and error for [uploadID][tableName]

type TableUploadStatusT added in v0.1.10

type TableUploadStatusT struct {
	// contains filtered or unexported fields
}

TableUploadStatusT captures the status of each table upload along with its parent upload_job's info like destination_id and namespace

type TableUploadT added in v0.1.10

type TableUploadT struct {
	// contains filtered or unexported fields
}

func NewTableUpload added in v0.1.10

func NewTableUpload(uploadID int64, tableName string) *TableUploadT

type TablesResT added in v0.1.10

type TablesResT struct {
	Tables []TableUploadResT `json:"tables,omitempty"`
}

type Upload added in v1.2.0

type Upload struct {
	ID                   int64
	Namespace            string
	WorkspaceID          string
	SourceID             string
	SourceType           string
	SourceCategory       string
	DestinationID        string
	DestinationType      string
	StartStagingFileID   int64
	EndStagingFileID     int64
	StartLoadFileID      int64
	EndLoadFileID        int64
	Status               string
	UploadSchema         warehouseutils.SchemaT
	MergedSchema         warehouseutils.SchemaT
	Error                json.RawMessage
	Timings              []map[string]string
	FirstAttemptAt       time.Time
	LastAttemptAt        time.Time
	Attempts             int64
	Metadata             json.RawMessage
	FirstEventAt         time.Time
	LastEventAt          time.Time
	UseRudderStorage     bool
	LoadFileGenStartTime time.Time
	TimingsObj           sql.NullString
	Priority             int
	// cloud sources specific info
	SourceBatchID   string
	SourceTaskID    string
	SourceTaskRunID string
	SourceJobID     string
	SourceJobRunID  string
	LoadFileType    string
}

type UploadAPIT added in v0.1.10

type UploadAPIT struct {
	// contains filtered or unexported fields
}
var UploadAPI UploadAPIT

type UploadColumnT added in v0.1.10

type UploadColumnT struct {
	Column string
	Value  interface{}
}

type UploadColumnsOpts added in v0.1.10

type UploadColumnsOpts struct {
	Fields []UploadColumnT
	Txn    *sql.Tx
}

type UploadJobT added in v0.1.10

type UploadJobT struct {
	// contains filtered or unexported fields
}

func (*UploadJobT) Aborted added in v0.1.10

func (job *UploadJobT) Aborted(attempts int, startTime time.Time) bool

Aborted makes a check that if the state of the job should be aborted

func (*UploadJobT) GetFirstLastEvent added in v1.0.2

func (job *UploadJobT) GetFirstLastEvent() (time.Time, time.Time)

func (*UploadJobT) GetLoadFileGenStartTIme added in v0.1.10

func (job *UploadJobT) GetLoadFileGenStartTIme() time.Time

func (*UploadJobT) GetLoadFileType added in v0.1.10

func (job *UploadJobT) GetLoadFileType() string

func (*UploadJobT) GetLoadFilesMetadata added in v0.1.10

func (job *UploadJobT) GetLoadFilesMetadata(options warehouseutils.GetLoadFilesOptionsT) (loadFiles []warehouseutils.LoadFileT)

func (*UploadJobT) GetLocalSchema added in v0.1.10

func (job *UploadJobT) GetLocalSchema() warehouseutils.SchemaT

func (*UploadJobT) GetSampleLoadFileLocation added in v0.1.10

func (job *UploadJobT) GetSampleLoadFileLocation(tableName string) (location string, err error)

func (*UploadJobT) GetSchemaInWarehouse added in v0.1.10

func (job *UploadJobT) GetSchemaInWarehouse() (schema warehouseutils.SchemaT)

func (*UploadJobT) GetSingleLoadFile added in v0.1.10

func (job *UploadJobT) GetSingleLoadFile(tableName string) (warehouseutils.LoadFileT, error)

func (*UploadJobT) GetTableSchemaInUpload added in v0.1.10

func (job *UploadJobT) GetTableSchemaInUpload(tableName string) warehouseutils.TableSchemaT

func (*UploadJobT) GetTableSchemaInWarehouse added in v0.1.10

func (job *UploadJobT) GetTableSchemaInWarehouse(tableName string) warehouseutils.TableSchemaT

func (*UploadJobT) ShouldOnDedupUseNewRecord added in v0.1.10

func (job *UploadJobT) ShouldOnDedupUseNewRecord() bool

func (*UploadJobT) UpdateLocalSchema added in v0.1.10

func (job *UploadJobT) UpdateLocalSchema(schema warehouseutils.SchemaT) error

func (*UploadJobT) UseRudderStorage added in v0.1.10

func (job *UploadJobT) UseRudderStorage() bool

type UploadPagination added in v0.1.10

type UploadPagination struct {
	Total  int32 `json:"total"`
	Limit  int32 `json:"limit"`
	Offset int32 `json:"offset"`
}

type UploadReqT added in v0.1.10

type UploadReqT struct {
	WorkspaceID string
	UploadId    int64
	API         UploadAPIT
}

func (UploadReqT) GetWHUpload added in v0.1.10

func (uploadReq UploadReqT) GetWHUpload() (*proto.WHUploadResponse, error)

func (UploadReqT) TriggerWHUpload added in v0.1.10

func (uploadReq UploadReqT) TriggerWHUpload() (response *proto.TriggerWhUploadsResponse, err error)

type UploadResT added in v0.1.10

type UploadResT struct {
	ID              int64             `json:"id"`
	Namespace       string            `json:"namespace"`
	SourceID        string            `json:"source_id"`
	DestinationID   string            `json:"destination_id"`
	DestinationType string            `json:"destination_type"`
	Status          string            `json:"status"`
	Error           string            `json:"error"`
	Attempt         int32             `json:"attempt"`
	Duration        int32             `json:"duration"`
	NextRetryTime   string            `json:"nextRetryTime"`
	FirstEventAt    time.Time         `json:"first_event_at"`
	LastEventAt     time.Time         `json:"last_event_at"`
	Tables          []TableUploadResT `json:"tables,omitempty"`
}

type UploadStatusOpts added in v0.1.10

type UploadStatusOpts struct {
	Status           string
	AdditionalFields []UploadColumnT
	ReportingMetric  types.PUReportedMetric
}

type UploadsReqT added in v0.1.10

type UploadsReqT struct {
	WorkspaceID     string
	SourceID        string
	DestinationID   string
	DestinationType string
	Status          string
	Limit           int32
	Offset          int32
	API             UploadAPIT
}

func (*UploadsReqT) GetWhUploads added in v0.1.10

func (uploadsReq *UploadsReqT) GetWhUploads() (uploadsRes *proto.WHUploadsResponse, err error)

func (*UploadsReqT) TriggerWhUploads added in v0.1.10

func (uploadsReq *UploadsReqT) TriggerWhUploads() (response *proto.TriggerWhUploadsResponse, err error)

type UploadsResT added in v0.1.10

type UploadsResT struct {
	Uploads    []UploadResT     `json:"uploads"`
	Pagination UploadPagination `json:"pagination"`
}

type WarehouseAdmin added in v0.1.10

type WarehouseAdmin struct{}

func (*WarehouseAdmin) ConfigurationTest added in v1.0.2

ConfigurationTest test the underlying warehouse destination

func (*WarehouseAdmin) Query added in v0.1.10

Query the underlying warehouse

func (*WarehouseAdmin) TriggerUpload added in v0.1.10

func (*WarehouseAdmin) TriggerUpload(off bool, reply *string) error

TriggerUpload sets uploads to start without delay

type WorkerIdentifierT added in v0.1.10

type WorkerIdentifierT string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL