spec

package
v0.0.13 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 26, 2023 License: Apache-2.0 Imports: 4 Imported by: 5

Documentation

Overview

package spec defines specification of YT operation.

See https://wiki.yandex-team.ru/yt/userdoc/operations/

Index

Constants

View Source
const (
	AutoMergeRelaxed  = "relaxed"
	AutoMergeEconomic = "economic"
	AutoMergeDisabled = "disabled"
	AutoMergeManual   = "manual"
)

Variables

This section is empty.

Functions

func ConfigureJobIOForDynamicTable

func ConfigureJobIOForDynamicTable(spec *Spec)

ConfigureJobIOForDynamicTable sets appropriate IO settings for operation that outputs static table that would be later converted to dynamic table.

Types

type AutoMerge

type AutoMerge struct {
	Mode string `yson:"mode"`

	// Setting for manual mode.
	MaxIntermediateChunkCount int `yson:"max_intermediate_chunk_count,omitempty"`
	ChunkCountPerMergeJob     int `yson:"chunk_count_per_merge_job,omitempty"`
	ChunkSizeThreshold        int `yson:"chunk_size_threshold,omitempty"`
}

type ControlAttributes

type ControlAttributes struct {
	EnableTableIndex bool `yson:"enable_table_index"`
	EnableRowIndex   bool `yson:"enable_row_index"`
	EnableRangeIndex bool `yson:"enable_range_index"`
	EnableKeySwitch  bool `yson:"enable_key_switch"`
}

type File

type File struct {
	FileName            string `yson:"file_name,attr,omitempty"`
	Format              any    `yson:"format,attr,omitempty"`
	Executable          bool   `yson:"executable,attr,omitempty"`
	BypassArtifactCache bool   `yson:"bypass_artifact_cache,attr,omitempty"`

	CypressPath ypath.Path `yson:",value"`
}

type JobCPUMonitor added in v0.0.6

type JobCPUMonitor struct {
	StartDelay            yson.Duration `yson:"start_delay,omitempty"`
	CheckPeriod           yson.Duration `yson:"check_period,omitempty"`
	IncreaseCoefficient   float64       `yson:"increase_coefficient,omitempty"`
	DecreaseCoefficient   float64       `yson:"decrease_coefficient,omitempty"`
	SmoothingFactor       float64       `yson:"smoothing_factor,omitempty"`
	VoteWindowSize        int           `yson:"vote_window_size,omitempty"`
	VoteDecisionThreshold int           `yson:"vote_decision_threshold,omitempty"`
	MinCPULimit           float64       `yson:"min_cpu_limit,omitempty"`
	EnableCPUReclaim      bool          `yson:"enable_cpu_reclaim"`
}

type JobIO

type JobIO struct {
	TableReader       any                `yson:"table_reader,omitempty"`
	TableWriter       any                `yson:"table_writer,omitempty"`
	ControlAttributes *ControlAttributes `yson:"control_attributes,omitempty"`
}

type ResourceLimits

type ResourceLimits struct {
	UserSlots int   `yson:"user_slots,omitempty"`
	CPU       int   `yson:"cpu,omitempty"`
	Memory    int64 `yson:"memory,omitempty"`
}

type Spec

type Spec struct {
	Type yt.OperationType `yson:"-"`

	Title       string         `yson:"title,omitempty"`
	StartedBy   map[string]any `yson:"started_by,omitempty"`
	Annotations map[string]any `yson:"annotations"`
	Description map[string]any `yson:"description,omitempty"`

	Pool                string   `yson:"pool,omitempty"`
	Weight              float64  `yson:"weight,omitempty"`
	PoolTrees           []string `yson:"pool_trees,omitempty"`
	SchedulingTagFilter string   `yson:"scheduling_tag_filter,omitempty"`
	TentativePoolTrees  []string `yson:"tentative_pool_trees,omitempty"`

	ResourceLimits *ResourceLimits `yson:"resource_limits,omitempty"`

	SecureVault map[string]string `yson:"secure_vault,omitempty"`

	InputTablePaths        []ypath.YPath `yson:"input_table_paths,omitempty"`
	OutputTablePaths       []ypath.YPath `yson:"output_table_paths,omitempty"`
	OutputTablePath        ypath.YPath   `yson:"output_table_path,omitempty"`
	MapperOutputTableCount int           `yson:"mapper_output_table_count,omitempty"`

	Atomicity string   `yson:"atomicity,omitempty"`
	Ordered   bool     `yson:"ordered,omitempty"`
	ReduceBy  []string `yson:"reduce_by,omitempty"`
	SortBy    []string `yson:"sort_by,omitempty"`
	JoinBy    []string `yson:"join_by,omitempty"`
	PivotKeys [][]any  `yson:"pivot_keys,omitempty"`

	MergeMode      string   `yson:"mode,omitempty"`
	MergeBy        []string `yson:"merge_by,omitempty"`
	CombineChunks  bool     `yson:"combine_chunks,omitempty"`
	ForceTransform bool     `yson:"force_transform,omitempty"`

	JobCount              int   `yson:"job_count,omitempty"`
	DataSizePerJob        int64 `yson:"data_size_per_job,omitempty"`
	UseColumnarStatistics *bool `yson:"use_columnar_statistics,omitempty"`

	TimeLimit                     yson.Duration `yson:"time_limit,omitempty"`
	MaxFailedJobCount             int           `yson:"max_failed_job_count,omitempty"`
	MaxSpeculativeJobCountPerTask *int          `yson:"max_speculative_job_count_per_task,omitempty"`
	FailOnJobRestart              *bool         `yson:"fail_on_job_restart,omitempty"`
	TryAvoidDuplicatingJobs       *bool         `yson:"try_avoid_duplicating_jobs,omitempty"`
	StderrTablePath               ypath.Path    `yson:"stderr_table_path,omitempty"`
	CoreTablePath                 ypath.Path    `yson:"core_table_path,omitempty"`

	JobCPUMonitor *JobCPUMonitor `yson:"job_cpu_monitor,omitempty"`

	Mapper         *UserScript            `yson:"mapper,omitempty"`
	Reducer        *UserScript            `yson:"reducer,omitempty"`
	ReduceCombiner *UserScript            `yson:"reduce_combiner,omitempty"`
	Tasks          map[string]*UserScript `yson:"tasks,omitempty"`

	JobIO          *JobIO `yson:"job_io,omitempty"`
	MapJobIO       *JobIO `yson:"map_job_io,omitempty"`
	ReduceJobIO    *JobIO `yson:"reduce_job_io,omitempty"`
	PartitionJobIO *JobIO `yson:"partition_job_io,omitempty"`
	MergeJobIO     *JobIO `yson:"merge_job_io,omitempty"`
	SortJobIO      *JobIO `yson:"sort_job_io,omitempty"`

	AutoMerge *AutoMerge `yson:"auto_merge,omitempty"`

	ACL                     []yt.ACE `yson:"acl,omitempty"`
	EnableKeyGuarantee      *bool    `yson:"enable_key_guarantee,omitempty"`
	ConsiderOnlyPrimarySize *bool    `yson:"consider_only_primary_size,omitempty"`

	ClusterName    string `yson:"cluster_name,omitempty"`
	NetworkName    string `yson:"network_name,omitempty"`
	CopyAttributes *bool  `yson:"copy_attributes,omitempty"`

	IntermediateDataReplicationFactor int `yson:"intermediate_data_replication_factor,omitempty"`

	ProbingRatio    int    `yson:"probing_ratio,omitempty"`
	ProbingPoolTree string `yson:"probing_pool_tree,omitempty"`

	InputQuery string `yson:"input_query,omitempty"`
}

func Erase

func Erase() *Spec

func Map

func Map() *Spec

func MapReduce

func MapReduce() *Spec

func Merge

func Merge() *Spec

func Reduce

func Reduce() *Spec

func Sort

func Sort() *Spec

func Vanilla

func Vanilla() *Spec

func (*Spec) AddAnnotations

func (s *Spec) AddAnnotations(annotations map[string]any) *Spec

func (*Spec) AddInput

func (s *Spec) AddInput(path ypath.YPath) *Spec

func (*Spec) AddOutput

func (s *Spec) AddOutput(path ypath.YPath) *Spec

func (*Spec) AddSecureVaultVar

func (s *Spec) AddSecureVaultVar(name, value string) *Spec

func (*Spec) AddVanillaTask

func (s *Spec) AddVanillaTask(name string, jobCount int) *Spec

func (*Spec) Clone

func (s *Spec) Clone() *Spec

func (*Spec) Erase

func (s *Spec) Erase() *Spec

func (*Spec) JoinByColumns

func (s *Spec) JoinByColumns(columns ...string) *Spec

func (*Spec) Map

func (s *Spec) Map() *Spec

func (*Spec) MapReduce

func (s *Spec) MapReduce() *Spec

func (*Spec) Merge

func (s *Spec) Merge() *Spec

func (*Spec) PatchUserBinary

func (s *Spec) PatchUserBinary(path ypath.Path)

func (*Spec) Reduce

func (s *Spec) Reduce() *Spec

func (*Spec) ReduceByColumns

func (s *Spec) ReduceByColumns(columns ...string) *Spec

func (*Spec) SetOutput

func (s *Spec) SetOutput(path ypath.YPath) *Spec

func (*Spec) Sort

func (s *Spec) Sort() *Spec

func (*Spec) SortByColumns

func (s *Spec) SortByColumns(columns ...string) *Spec

func (*Spec) Vanilla

func (s *Spec) Vanilla() *Spec

func (*Spec) VisitUserScripts

func (s *Spec) VisitUserScripts(cb func(*UserScript))

type UserScript

type UserScript struct {
	// Command specifies shell command that would be executed inside job.
	//
	// mapreduce package uses command for it's own purpose. User should not set this field.
	Command string `yson:"command"`

	Format             any               `yson:"format,omitempty"`
	InputFormat        any               `yson:"input_format,omitempty"`
	OutputFormat       any               `yson:"output_format,omitempty"`
	Environment        map[string]string `yson:"environment,omitempty"`
	FilePaths          []File            `yson:"file_paths,omitempty"`
	LayerPaths         []ypath.Path      `yson:"layer_paths,omitempty"`
	MakeRootFSWritable bool              `yson:"make_rootfs_writable,omitempty"`

	TmpfsPath string `yson:"tmpfs_path,omitempty"`
	TmpfsSize int64  `yson:"tmpfs_size,omitempty"`
	CopyFiles bool   `yson:"copy_files,omitempty"`

	// CPULimit corresponds to cpu_limit job setting.
	//
	// This setting results in GOMAXPROCS set to max(1, ceil(CPULimit)).
	CPULimit            float32 `yson:"cpu_limit,omitempty"`
	MemoryLimit         int64   `yson:"memory_limit,omitempty"`
	MemoryReserveFactor float64 `yson:"memory_reserve_factor,omitempty"`
	GPULimit            int     `yson:"gpu_limit,omitempty"`

	NetworkProject string `yson:"network_project,omitempty"`

	EnablePorto            string `yson:"enable_porto,omitempty"`
	UsePortoMemoryTracking *bool  `yson:"use_porto_memory_tracking,omitempty"`

	// Following fields are used only in vanilla operations.
	JobCount         int           `yson:"job_count,omitempty"`
	OutputTablePaths []ypath.YPath `yson:"output_table_paths,omitempty"`
	JobIO            *JobIO        `yson:"job_io,omitempty"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL