importinto

package
v1.1.0-beta.0...-68ac9ec Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 10, 2024 License: Apache-2.0 Imports: 63 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var NewTaskRegisterWithTTL = utils.NewTaskRegisterWithTTL

NewTaskRegisterWithTTL is the ctor for TaskRegister. It is exported for testing.

Functions

func GetTaskImportedRows

func GetTaskImportedRows(ctx context.Context, jobID int64) (uint64, error)

GetTaskImportedRows gets the number of imported rows of a job. Note: for finished job, we can get the number of imported rows from task meta.

func NewImportExecutor

func NewImportExecutor(
	ctx context.Context,
	id string,
	task *proto.Task,
	taskTable taskexecutor.TaskTable,
	store tidbkv.Storage,
) taskexecutor.TaskExecutor

NewImportExecutor creates a new import task executor.

func NewImportScheduler

func NewImportScheduler(
	ctx context.Context,
	task *proto.Task,
	param scheduler.Param,
	storeWithPD kv.StorageWithPD,
) scheduler.Scheduler

NewImportScheduler creates a new import scheduler.

func NewPostProcessStepExecutor

func NewPostProcessStepExecutor(taskID int64, store tidbkv.Storage, taskMeta *TaskMeta, logger *zap.Logger) execute.StepExecutor

NewPostProcessStepExecutor creates a new post process step executor. exported for testing.

func SubmitStandaloneTask

func SubmitStandaloneTask(ctx context.Context, plan *importer.Plan, stmt string, ecp map[int32]*checkpoints.EngineCheckpoint) (int64, *proto.TaskBase, error)

SubmitStandaloneTask submits a task to the distribute framework that only runs on the current node. when import from server-disk, pass engine checkpoints too, as scheduler might run on another node where we can't access the data files.

func SubmitTask

func SubmitTask(ctx context.Context, plan *importer.Plan, stmt string) (int64, *proto.TaskBase, error)

SubmitTask submits a task to the distribute framework that runs on all managed nodes.

func TaskKey

func TaskKey(jobID int64) string

TaskKey returns the task key for a job.

Types

type Checksum

type Checksum struct {
	Sum  uint64
	KVs  uint64
	Size uint64
}

Checksum records the checksum information.

type Chunk

type Chunk struct {
	Path         string
	FileSize     int64
	Offset       int64
	EndOffset    int64
	PrevRowIDMax int64
	RowIDMax     int64
	Type         mydump.SourceType
	Compression  mydump.Compression
	Timestamp    int64
}

Chunk records the chunk information.

type ImportCleanUpS3

type ImportCleanUpS3 struct {
}

ImportCleanUpS3 implements scheduler.CleanUpRoutine.

func (*ImportCleanUpS3) CleanUp

func (*ImportCleanUpS3) CleanUp(ctx context.Context, task *proto.Task) error

CleanUp implements the CleanUpRoutine.CleanUp interface.

type ImportSchedulerExt

type ImportSchedulerExt struct {
	GlobalSort bool
	// contains filtered or unexported fields
}

ImportSchedulerExt is an extension of ImportScheduler, exported for test.

func (*ImportSchedulerExt) GetEligibleInstances

func (*ImportSchedulerExt) GetEligibleInstances(_ context.Context, task *proto.Task) ([]string, error)

GetEligibleInstances implements scheduler.Extension interface.

func (*ImportSchedulerExt) GetNextStep

func (sch *ImportSchedulerExt) GetNextStep(task *proto.TaskBase) proto.Step

GetNextStep implements scheduler.Extension interface.

func (*ImportSchedulerExt) IsRetryableErr

func (*ImportSchedulerExt) IsRetryableErr(error) bool

IsRetryableErr implements scheduler.Extension interface.

func (*ImportSchedulerExt) OnDone

func (sch *ImportSchedulerExt) OnDone(ctx context.Context, handle storage.TaskHandle, task *proto.Task) error

OnDone implements scheduler.Extension interface.

func (*ImportSchedulerExt) OnNextSubtasksBatch

func (sch *ImportSchedulerExt) OnNextSubtasksBatch(
	ctx context.Context,
	taskHandle storage.TaskHandle,
	task *proto.Task,
	execIDs []string,
	nextStep proto.Step,
) (
	resSubtaskMeta [][]byte, err error)

OnNextSubtasksBatch generate batch of next stage's plan.

func (*ImportSchedulerExt) OnTick

func (sch *ImportSchedulerExt) OnTick(ctx context.Context, task *proto.Task)

OnTick implements scheduler.Extension interface.

type ImportSpec

type ImportSpec struct {
	ID     int32
	Plan   importer.Plan
	Chunks []Chunk
}

ImportSpec is the specification of an import pipeline.

func (*ImportSpec) ToSubtaskMeta

func (s *ImportSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)

ToSubtaskMeta converts the import spec to subtask meta.

type ImportStepMeta

type ImportStepMeta struct {
	// this is the engine ID, not the id in tidb_background_subtask table.
	ID       int32
	Chunks   []Chunk
	Checksum map[int64]Checksum // see KVGroupChecksum for definition of map key.
	Result   Result
	// MaxIDs stores the max id that have been used during encoding for each allocator type.
	// the max id is same among all allocator types for now, since we're using same base, see
	// NewPanickingAllocators for more info.
	MaxIDs map[autoid.AllocatorType]int64

	SortedDataMeta *external.SortedKVMeta
	// SortedIndexMetas is a map from index id to its sorted kv meta.
	SortedIndexMetas map[int64]*external.SortedKVMeta
}

ImportStepMeta is the meta of import step. Scheduler will split the task into subtasks(FileInfos -> Chunks) All the field should be serializable.

type LogicalPlan

type LogicalPlan struct {
	JobID             int64
	Plan              importer.Plan
	Stmt              string
	EligibleInstances []*infosync.ServerInfo
	ChunkMap          map[int32][]Chunk
}

LogicalPlan represents a logical plan for import into.

func (*LogicalPlan) FromTaskMeta

func (p *LogicalPlan) FromTaskMeta(bs []byte) error

FromTaskMeta converts the task meta to logical plan.

func (*LogicalPlan) ToPhysicalPlan

func (p *LogicalPlan) ToPhysicalPlan(planCtx planner.PlanCtx) (*planner.PhysicalPlan, error)

ToPhysicalPlan converts the logical plan to physical plan.

func (*LogicalPlan) ToTaskMeta

func (p *LogicalPlan) ToTaskMeta() ([]byte, error)

ToTaskMeta converts the logical plan to task meta.

type MergeSortSpec

type MergeSortSpec struct {
	*MergeSortStepMeta
}

MergeSortSpec is the specification of a merge-sort pipeline.

func (*MergeSortSpec) ToSubtaskMeta

func (s *MergeSortSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)

ToSubtaskMeta converts the merge-sort spec to subtask meta.

type MergeSortStepMeta

type MergeSortStepMeta struct {
	// KVGroup is the group name of the sorted kv, either dataKVGroup or index-id.
	KVGroup               string   `json:"kv-group"`
	DataFiles             []string `json:"data-files"`
	external.SortedKVMeta `json:"sorted-kv-meta"`
}

MergeSortStepMeta is the meta of merge sort step.

type MiniTaskExecutor

type MiniTaskExecutor interface {
	Run(ctx context.Context, dataWriter, indexWriter backend.EngineWriter) error
}

MiniTaskExecutor is the interface for a minimal task executor. exported for testing.

type PostProcessSpec

type PostProcessSpec struct {
	// for checksum request
	Schema string
	Table  string
}

PostProcessSpec is the specification of a post process pipeline.

func (*PostProcessSpec) ToSubtaskMeta

func (*PostProcessSpec) ToSubtaskMeta(planCtx planner.PlanCtx) ([]byte, error)

ToSubtaskMeta converts the post process spec to subtask meta.

type PostProcessStepMeta

type PostProcessStepMeta struct {
	// accumulated checksum of all subtasks in import step. See KVGroupChecksum for
	// definition of map key.
	Checksum map[int64]Checksum
	// MaxIDs of max all max-ids of subtasks in import step.
	MaxIDs map[autoid.AllocatorType]int64
}

PostProcessStepMeta is the meta of post process step.

type Result

type Result struct {
	LoadedRowCnt uint64
	ColSizeMap   map[int64]int64
}

Result records the metrics information. This portion of the code may be implemented uniformly in the framework in the future.

type SharedVars

type SharedVars struct {
	TableImporter *importer.TableImporter
	DataEngine    *backend.OpenedEngine
	IndexEngine   *backend.OpenedEngine
	Progress      *importer.Progress

	Checksum *verification.KVGroupChecksum

	SortedDataMeta *external.SortedKVMeta
	// SortedIndexMetas is a map from index id to its sorted kv meta.
	SortedIndexMetas map[int64]*external.SortedKVMeta
	ShareMu          sync.Mutex
	// contains filtered or unexported fields
}

SharedVars is the shared variables of all minimal tasks in a subtask. This is because subtasks cannot directly obtain the results of the minimal subtask. All the fields should be concurrent safe.

type TaskMeta

type TaskMeta struct {
	// IMPORT INTO job id, see mysql.tidb_import_jobs.
	JobID  int64
	Plan   importer.Plan
	Stmt   string
	Result Result
	// eligible instances to run this task, we run on all instances if it's empty.
	// we only need this when run IMPORT INTO without distributed option now, i.e.
	// running on the instance that initiate the IMPORT INTO.
	EligibleInstances []*infosync.ServerInfo
	// the file chunks to import, when import from server file, we need to pass those
	// files to the framework scheduler which might run on another instance.
	// we use a map from engine ID to chunks since we need support split_file for CSV,
	// so need to split them into engines before passing to scheduler.
	ChunkMap map[int32][]Chunk
}

TaskMeta is the task of IMPORT INTO. All the field should be serializable.

type WriteIngestSpec

type WriteIngestSpec struct {
	*WriteIngestStepMeta
}

WriteIngestSpec is the specification of a write-ingest pipeline.

func (*WriteIngestSpec) ToSubtaskMeta

func (s *WriteIngestSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)

ToSubtaskMeta converts the write-ingest spec to subtask meta.

type WriteIngestStepMeta

type WriteIngestStepMeta struct {
	KVGroup               string `json:"kv-group"`
	external.SortedKVMeta `json:"sorted-kv-meta"`
	DataFiles             []string `json:"data-files"`
	StatFiles             []string `json:"stat-files"`
	RangeJobKeys          [][]byte `json:"range-job-keys"`
	RangeSplitKeys        [][]byte `json:"range-split-keys"`
	TS                    uint64   `json:"ts"`

	Result Result
}

WriteIngestStepMeta is the meta of write and ingest step. only used when global sort is enabled.

Directories

Path Synopsis
Package mock is a generated GoMock package.
Package mock is a generated GoMock package.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL