prolly_cellwise

package
v0.0.0-...-4f4d58c Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 28, 2024 License: Apache-2.0 Imports: 24 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func NewProllyShardManager

func NewProllyShardManager(logger *zap.Logger, nbf *types.NomsBinFormat, numCommits int, inputShard AttributionShard, table, shardBasePath string, tableSch schema.Schema, shardParams ProllyAttShardParams, shardStore att.ShardStore) *prollyShardManager

NewProllyShardManager takes an input shard, a ShardStore, some ShardParams and some other metadata and returns a prollyShardManager which is used to manage dynamic sharding, and persisting of shard data

Types

type AttributionShard

type AttributionShard struct {
	// Table is the name of the table this shard is referring to
	Table string `json:"table"`
	// StartInclusive is the primary key of the start of the shard.  All keys within this shard will be equal to or greater
	// than this value. If this value is nil then it starts from the beginning of the data.
	StartInclusive []byte `json:"start"`
	// EndExclusive is the end value for this shard.  All keys within this shard will be less than this value. If the value
	// is nil then it ends at the end of the data.
	EndExclusive []byte `json:"end"`
	// Path is the ShardStore key used for persisting / retrieving the attribution data for this shard
	Path string `json:"path"`
	// CommitCounts is a slice of counts this shard has attributed to commits
	CommitCounts []uint64 `json:"commit_counts"`
	// How many rows this shard addresses
	Cardinality uint64 `json:"cardinality"`
}

AttributionShard is the ShardInfo object used by cellwise attribution to track a shard of.

func (AttributionShard) DebugFormat

func (as AttributionShard) DebugFormat(kd val.TupleDesc) string

func (AttributionShard) Equals

func (as AttributionShard) Equals(other interface{}) bool

func (AttributionShard) Key

todo (dhruv): is this key okay?

type CellwiseAttSummary

type CellwiseAttSummary struct{}

type Method

type Method struct {
	// contains filtered or unexported fields
}

Method implements att.AttributionMethod

func NewMethod

func NewMethod(logger *zap.Logger, ddb *doltdb.DoltDB, startHash hash.Hash, shardStore att.ShardStore, params ProllyAttShardParams) Method

NewMethod returns a new Method object

func (Method) CollectShards

func (m Method) CollectShards(ctx context.Context, commit, prevCommit *doltdb.Commit, summary att.Summary) ([]att.ShardInfo, error)

func (Method) DeserializeResults

func (m Method) DeserializeResults(ctx context.Context, data []byte) (att.ShardResult, error)

func (Method) DeserializeShardInfo

func (m Method) DeserializeShardInfo(ctx context.Context, data []byte) (att.ShardInfo, error)

func (Method) EmptySummary

func (m Method) EmptySummary(ctx context.Context) att.Summary

func (Method) ProcessResults

func (m Method) ProcessResults(ctx context.Context, commitHash hash.Hash, prevSummary att.Summary, results []att.ShardResult) (att.Summary, error)

func (Method) ProcessShard

func (m Method) ProcessShard(ctx context.Context, commitIdx int16, cm, prevCm *doltdb.Commit, shardInfo att.ShardInfo) (att.ShardResult, error)

func (Method) ReadSummary

func (m Method) ReadSummary(ctx context.Context, key string) (att.Summary, error)

func (Method) SerializeResults

func (m Method) SerializeResults(ctx context.Context, results att.ShardResult) ([]byte, error)

func (Method) SerializeShardInfo

func (m Method) SerializeShardInfo(ctx context.Context, info att.ShardInfo) ([]byte, error)

func (Method) WriteSummary

func (m Method) WriteSummary(ctx context.Context, summary att.Summary) (string, error)

type ProllyAttShardParams

type ProllyAttShardParams struct {
	// MaximumShardCardinality controls the maximum cardinality of a shard's key
	// range.
	MaximumShardCardinality int
}

ProllyAttShardParams control the dynamic sharding behavior

type ProllyAttSummary

type ProllyAttSummary struct {
	// StartHash is the commit hash at the start of attribution before the first attributed commit is merged
	StartHash hash.Hash `noms:"start_hash" json:"start_hash"`
	// CommitHashes is an ordered list of the commit hashes that have been processed. The current commit will be the
	// last element in the slice
	CommitHashes []hash.Hash `noms:"commit_hashes" json:"commit_hashes"`
	// CommitCounts provides the number of cellwise attribution changes that are attributed to each of the commits.
	CommitCounts []uint64 `noms:"commit_counts" json:"commit_counts"`
	// TableShards tracks the AttributionShard for each table used to build attribution
	TableShards map[string][]AttributionShard `noms:"table_to_shards" json:"table_to_shard"`
}

ProllyAttSummary is the Summary implementation used by cellwise attribution to summarize the attribution for a given commit and supports marshalling to and from noms

func (ProllyAttSummary) CommitToCount

func (c ProllyAttSummary) CommitToCount(ctx context.Context) (map[hash.Hash]uint64, error)

CommitToCount returns a map from commit hash to the number of changes attributed to them.

func (ProllyAttSummary) NumCommits

func (c ProllyAttSummary) NumCommits() int16

NumCommits returns the number of commits that were built and summarized by this object

type Shard

type Shard struct {
	// Table is the name of the table this shard is referring to
	Table string `json:"table"`
	// Path is the ShardStore key used for persisting / retrieving the attribution data for this shard
	Path string `json:"path"`
	// CommitCounts is a slice of counts this shard has attributed to commits
	CommitCounts []uint64 `json:"commit_counts"`
	// StartInclusive is a val.Tuple
	StartInclusive []byte `json:"start_inclusive"`
	// EndInclusive is a val.Tuple
	EndInclusive []byte `json:"end_inclusive"`
}

type UnchangedShard

type UnchangedShard struct {
	AttributionShard
}

UnchangedShard is used to mark shards which have not changed since they were last processed

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL