sybil

package
v0.5.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 24, 2024 License: BSD-2-Clause-Views Imports: 26 Imported by: 6

Documentation

Index

Constants

View Source
const (
	NO_OP       = ""
	OP_AVG      = "avg"
	OP_HIST     = "hist"
	OP_DISTINCT = "distinct"
)
View Source
const (
	INT_VAL = iota
	STR_VAL = iota
	SET_VAL = iota
)
View Source
const DISTINCT_STR = "distinct"
View Source
const FALSE = false
View Source
const HIST_STR = "hist"
View Source
const NUM_DISTINCT = "distinct-limit"
View Source
const SORT_COUNT = "$COUNT"
View Source
const TRUE = true

Variables

View Source
var BLOCKS_PER_CACHE_FILE = 64
View Source
var BLOCK_VERSION = int32(1)

the BLOCK_VERSION is how we get hints about decoding blocks for backwards compatibility. at least, it will be in the future

View Source
var BREAK_MAP = make(map[string]int, 0)
View Source
var CACHE_DIR = "cache"
View Source
var CARDINALITY_THRESHOLD = 5000

After testing various cardinalities for timestamps, the optimal seems to be about 5000 (or even less) unique values. TODO: determine optimal for different sized integers other than timestamps

View Source
var CHUNKS_BEFORE_GC = 16
View Source
var CHUNK_SIZE = 1024 * 8 * 8
View Source
var CHUNK_THRESHOLD = CHUNK_SIZE / 8
View Source
var COPY_RECORD_INTERNS = false
View Source
var DEBUG_OUTLIERS = false
View Source
var DEBUG_RECORD_CONSISTENCY = false
View Source
var DEBUG_TIMING = false
View Source
var DELETE_BLOCKS = make([]string, 0)
View Source
var DELETE_BLOCKS_AFTER_QUERY = true
View Source
var EMPTY = ""
View Source
var ENABLE_LUA = false
View Source
var ENABLE_TDIGEST = false
View Source
var ENV_FLAG = os.Getenv("DEBUG")

extracted from and influenced by https://groups.google.com/forum/#!topic/golang-nuts/ct99dtK2Jo4 use env variable DEBUG=1 to turn on debug output

View Source
var FILE_DIGEST_THRESHOLD = 256
View Source
var FLAGS = FlagDefs{}

TODO: merge these two into one thing current problem is that FLAGS needs pointers

View Source
var FREE_MEM_AFTER = uint64(1024)
View Source
var GOB_GZIP_EXT = ".db.gz"
View Source
var GROUP_BY_WIDTH = 8 // bytes
View Source
var GROUP_DELIMITER = "\t"
View Source
var GZIP_EXT = ".gz"
View Source
var HIST_FACTOR_POW = uint(1)
View Source
var HOLD_MATCHES = false
View Source
var INGEST_DIR = "ingest"
View Source
var INTERNAL_RESULT_LIMIT = 100000
View Source
var KB = int64(1024)
View Source
var LOADED_TABLES = make(map[string]*Table)
View Source
var LOCK_TRIES = 50
View Source
var LOCK_US = time.Millisecond * 3
View Source
var MAX_LOCK_BREAKS = 5
View Source
var MAX_MEM = uint64(1024)
View Source
var MAX_ROW_STORE_TRIES = 20
View Source
var MIN_CUTOFF = 5 // need at least this many elements before we determine min/max
View Source
var MIN_FILES_TO_DIGEST = 0
View Source
var MISSING_VALUE = uint64(math.MaxUint64)
View Source
var NO_MORE_BLOCKS = GROUP_DELIMITER
View Source
var NULL_BLOCK = "NULL_BLOCK"
View Source
var NUM_BUCKETS = 1000
View Source
var OPTS = OptionDefs{}
View Source
var PROFILER_ENABLED bool
View Source
var READ_ROWS_ONLY = false
View Source
var REGEX_CACHE_SIZE = 100000
View Source
var ROW_STORE_BLOCK = "ROW_STORE"
View Source
var RUN_PROFILER = func() ProfilerStop {
	return NoProfile{}
}
View Source
var SIZE_DIGEST_THRESHOLD = int64(1024) * 2
View Source
var STD_CUTOFF = 1000.0 // if value is 1000 SDs away, we ignore it
View Source
var STOMACHE_DIR = "stomache"
View Source
var STOP_PROFILER = func() {
}
View Source
var TEMP_INGEST_DIR = ".ingest.temp"
View Source
var TEST_MODE = false
View Source
var TOP_STRING_COUNT = 20
View Source
var VERSION_STRING = "0.5.2"

Functions

func Debug added in v0.2.0

func Debug(args ...interface{})

func DecodeFlags added in v0.5.0

func DecodeFlags()

func EncodeFlags added in v0.5.0

func EncodeFlags()

func Error added in v0.2.0

func Error(args ...interface{})

func FilterAndAggRecords

func FilterAndAggRecords(querySpec *QuerySpec, recordsPtr *RecordList) int

func GetVersionInfo added in v0.2.0

func GetVersionInfo() map[string]interface{}

func ListTables added in v0.5.0

func ListTables() []string

func LoadRowBlockCB

func LoadRowBlockCB(digestname string, records RecordList)

func Max added in v0.5.0

func Max(x, y int64) int64

func Min added in v0.5.0

func Min(x, y int64) int64

func NewFalseFlag added in v0.5.0

func NewFalseFlag() *bool

func NewTrueFlag added in v0.5.0

func NewTrueFlag() *bool

func Print added in v0.2.0

func Print(args ...interface{})

func PrintBytes added in v0.5.0

func PrintBytes(obj interface{})

func PrintTables

func PrintTables()

func PrintVersionInfo added in v0.2.0

func PrintVersionInfo()

func RecoverLock

func RecoverLock(lock RecoverableLock) bool

func RenameAndMod added in v0.2.0

func RenameAndMod(src, dst string) error

TODO: We should really split this into two functions based on dir / file

func SearchBlocks

func SearchBlocks(querySpec *QuerySpec, block_list map[string]*TableBlock) map[string]*QuerySpec

OLD SEARCHING FUNCTIONS BELOW HERE

func UnloadTable added in v0.5.2

func UnloadTable(name string)

UnloadTable de-registers a table.

func Warn added in v0.2.0

func Warn(args ...interface{})

Types

type AfterLoadQueryCB

type AfterLoadQueryCB struct {
	// contains filtered or unexported fields
}

func (*AfterLoadQueryCB) CB

func (cb *AfterLoadQueryCB) CB(digestname string, records RecordList)

type AfterRowBlockLoad

type AfterRowBlockLoad func(string, RecordList)

type Aggregation

type Aggregation struct {
	Op   string
	Name string

	HistType string
	// contains filtered or unexported fields
}

type BasicHist added in v0.5.0

type BasicHist struct {
	BasicHistCachedInfo
	// contains filtered or unexported fields
}

func (*BasicHist) AddValue added in v0.5.0

func (h *BasicHist) AddValue(value int64)

func (*BasicHist) AddWeightedValue added in v0.5.0

func (h *BasicHist) AddWeightedValue(value int64, weight int64)

func (*BasicHist) Combine added in v0.5.0

func (h *BasicHist) Combine(oh interface{})

func (*BasicHist) GetPercentiles added in v0.5.0

func (h *BasicHist) GetPercentiles() []int64

func (*BasicHist) GetSparseBuckets added in v0.5.0

func (h *BasicHist) GetSparseBuckets() map[int64]int64

func (*BasicHist) GetStdDev added in v0.5.0

func (h *BasicHist) GetStdDev() float64

STD DEV is defined as sqrt(VARIANCE)

func (*BasicHist) GetStrBuckets added in v0.5.0

func (h *BasicHist) GetStrBuckets() map[string]int64

func (*BasicHist) GetVariance added in v0.5.0

func (h *BasicHist) GetVariance() float64

VARIANCE is defined as the squared error from the mean

func (*BasicHist) Print added in v0.5.0

func (h *BasicHist) Print()

func (*BasicHist) SetupBuckets added in v0.5.0

func (h *BasicHist) SetupBuckets(buckets int, min, max int64)

func (*BasicHist) Sum added in v0.5.0

func (h *BasicHist) Sum() int64

func (*BasicHist) TrackPercentiles added in v0.5.0

func (h *BasicHist) TrackPercentiles()

type BasicHistCachedInfo added in v0.5.0

type BasicHistCachedInfo struct {
	NumBuckets     int
	BucketSize     int
	Values         []int64
	Averages       []float64
	PercentileMode bool

	Outliers   []int64
	Underliers []int64

	Max     int64
	Min     int64
	Samples int
	Count   int64
	Avg     float64
	Info    IntInfo
}

type BlockLock

type BlockLock struct {
	Lock
}

func (*BlockLock) Recover

func (l *BlockLock) Recover() bool

type CacheLock added in v0.2.0

type CacheLock struct {
	Lock
}

func (*CacheLock) Recover added in v0.2.0

func (l *CacheLock) Recover() bool

type ColumnInfo added in v0.5.2

type ColumnInfo struct {
	Strs []string `json:"strs"`
	Ints []string `json:"ints"`
	Sets []string `json:"sets"`
}

type DigestLock

type DigestLock struct {
	Lock
}

func (*DigestLock) Recover

func (l *DigestLock) Recover() bool

type FileDecoder added in v0.5.0

type FileDecoder interface {
	Decode(interface{}) error
	CloseFile() bool
}

func GetFileDecoder added in v0.2.0

func GetFileDecoder(filename string) FileDecoder

type FileEncoder added in v0.5.0

type FileEncoder interface {
	Encode(interface{}) error
	CloseFile() bool
}

func GetFileEncoder added in v0.5.0

func GetFileEncoder(filename string) FileEncoder

type Filter

type Filter interface {
	Filter(*Record) bool
}

func BuildFilters

func BuildFilters(t *Table, loadSpec *LoadSpec, filterSpec FilterSpec) []Filter

type FilterSpec

type FilterSpec struct {
	Int string
	Str string
	Set string
}

This is the passed in flags

func (*FilterSpec) GetFilterCols added in v0.5.2

func (filterSpec *FilterSpec) GetFilterCols() []string

type FlagDefs

type FlagDefs struct {
	OP          string
	PRINT       bool // print results out
	EXPORT      bool // save records that match filter to tsv files
	LIST_TABLES bool // list the tables in the db dir

	// for usage with distributed queries
	DECODE_FLAGS   bool // load query flags from stdin as gob encoded data
	ENCODE_FLAGS   bool // print the query flags to stdout as binary
	ENCODE_RESULTS bool // print the querySpec results to stdout as binary

	INT_FILTERS string
	STR_FILTERS string
	STR_REPLACE string // regex replacement for strings
	SET_FILTERS string

	INTS        string
	STRS        string
	SETS        string
	SAMPLE_COLS string
	GROUPS      string
	DISTINCT    string

	ADD_RECORDS int

	TIME        bool
	TIME_COL    string
	TIME_BUCKET int
	HIST_BUCKET int
	HDR_HIST    bool
	LOG_HIST    bool
	T_DIGEST    bool

	FIELD_SEPARATOR    string
	FILTER_SEPARATOR   string
	PRINT_KEYS         bool
	LOAD_AND_QUERY     bool
	LOAD_THEN_QUERY    bool
	READ_INGESTION_LOG bool
	READ_ROWSTORE      bool
	SKIP_COMPACT       bool
	SAVE_AS_SRB        bool

	PROFILE     bool
	PROFILE_MEM bool

	RECYCLE_MEM       bool
	FAST_RECYCLE      bool
	CACHED_QUERIES    bool
	SHORTEN_KEY_TABLE bool

	WEIGHT_COL string

	LIMIT        int
	NUM_DISTINCT int

	DEBUG bool
	JSON  bool
	GC    bool

	DIR        string
	SORT       string
	SORT_ASC   bool
	PRUNE_BY   string
	TABLE      string
	PRINT_INFO bool
	SAMPLES    bool

	UPDATE_TABLE_INFO bool
	SKIP_OUTLIERS     bool
}

type GobFileDecoder added in v0.5.0

type GobFileDecoder struct {
	*gob.Decoder
	File *os.File
}

func (GobFileDecoder) CloseFile added in v0.5.0

func (gfd GobFileDecoder) CloseFile() bool

type GobFileEncoder added in v0.5.0

type GobFileEncoder struct {
	*gob.Encoder
	File *os.File
}

func (GobFileEncoder) CloseFile added in v0.5.0

func (pb GobFileEncoder) CloseFile() bool

type Grouping

type Grouping struct {
	Name string
	// contains filtered or unexported fields
}

type HistCompat added in v0.5.0

type HistCompat struct {
	*BasicHist
}

func (*HistCompat) GetIntBuckets added in v0.5.0

func (h *HistCompat) GetIntBuckets() map[int64]int64

func (*HistCompat) GetMeanVariance added in v0.5.0

func (h *HistCompat) GetMeanVariance() float64

func (*HistCompat) Max added in v0.5.0

func (hc *HistCompat) Max() int64

func (*HistCompat) Mean added in v0.5.0

func (h *HistCompat) Mean() float64

func (*HistCompat) Min added in v0.5.0

func (hc *HistCompat) Min() int64

func (*HistCompat) NewHist added in v0.5.0

func (hc *HistCompat) NewHist() Histogram

func (*HistCompat) Range added in v0.5.0

func (h *HistCompat) Range() (int64, int64)

func (*HistCompat) StdDev added in v0.5.0

func (h *HistCompat) StdDev() float64

func (*HistCompat) TotalCount added in v0.5.0

func (h *HistCompat) TotalCount() int64

type Histogram added in v0.5.0

type Histogram interface {
	Mean() float64
	Max() int64
	Min() int64
	TotalCount() int64

	AddWeightedValue(int64, int64)
	GetPercentiles() []int64
	GetStrBuckets() map[string]int64
	GetIntBuckets() map[int64]int64

	Range() (int64, int64)
	StdDev() float64

	NewHist() Histogram
	Combine(interface{})
}

type InfoLock

type InfoLock struct {
	Lock
}

func (*InfoLock) Recover

func (l *InfoLock) Recover() bool

type IntArr

type IntArr []IntField

type IntField

type IntField int64

type IntFilter

type IntFilter struct {
	Field   string
	FieldId int16
	Op      string
	Value   int
	// contains filtered or unexported fields
}

func (IntFilter) Filter

func (filter IntFilter) Filter(r *Record) bool

type IntInfo

type IntInfo struct {
	Min   int64
	Max   int64
	Avg   float64
	M2    float64 // used for calculating std dev, expressed as M2 / (Count - 1)
	Count int
}

type IntInfoTable

type IntInfoTable map[int16]*IntInfo

type KeyInfo added in v0.5.2

type KeyInfo struct {
	Table    *Table
	KeyTypes map[int16]int8
	KeyTable map[string]int16
	IntInfo  IntInfoTable
	StrInfo  StrInfoTable

	KeyExchange map[int16]int16 // the key exchange maps the original table's keytable -> new key table
}

type LoadSpec

type LoadSpec struct {
	LoadAllColumns bool
	// contains filtered or unexported fields
}

func NewLoadSpec

func NewLoadSpec() LoadSpec

func (*LoadSpec) Int

func (l *LoadSpec) Int(name string)

func (*LoadSpec) Missing added in v0.5.2

func (l *LoadSpec) Missing(name string)

func (*LoadSpec) Set

func (l *LoadSpec) Set(name string)

func (*LoadSpec) Str

func (l *LoadSpec) Str(name string)

type Lock

type Lock struct {
	Name  string
	Table *Table
	// contains filtered or unexported fields
}

func (*Lock) ForceDeleteFile

func (l *Lock) ForceDeleteFile()

func (*Lock) ForceMakeFile

func (l *Lock) ForceMakeFile(pid int64)

func (*Lock) Grab

func (l *Lock) Grab() bool

func (*Lock) Recover

func (l *Lock) Recover() bool

func (*Lock) Release

func (l *Lock) Release() bool

type MultiHist added in v0.5.0

type MultiHist struct {
	Max     int64
	Min     int64
	Samples int
	Count   int64
	Avg     float64

	PercentileMode bool

	Subhists []*HistCompat
	Info     *IntInfo
	// contains filtered or unexported fields
}

func (*MultiHist) AddValue added in v0.5.0

func (h *MultiHist) AddValue(value int64)

func (*MultiHist) AddWeightedValue added in v0.5.0

func (h *MultiHist) AddWeightedValue(value int64, weight int64)

func (*MultiHist) Combine added in v0.5.0

func (h *MultiHist) Combine(oh interface{})

func (*MultiHist) GetMeanVariance added in v0.5.0

func (h *MultiHist) GetMeanVariance() float64

func (*MultiHist) GetNonZeroBuckets added in v0.5.0

func (h *MultiHist) GetNonZeroBuckets() map[string]int64

func (*MultiHist) GetPercentiles added in v0.5.0

func (h *MultiHist) GetPercentiles() []int64

func (*MultiHist) GetSparseBuckets added in v0.5.0

func (h *MultiHist) GetSparseBuckets() map[int64]int64

func (*MultiHist) GetStdDev added in v0.5.0

func (h *MultiHist) GetStdDev() float64

VARIANCE is defined as the squared error from the mean STD DEV is defined as sqrt(VARIANCE)

func (*MultiHist) GetStrBuckets added in v0.5.0

func (h *MultiHist) GetStrBuckets() map[string]int64

func (*MultiHist) GetVariance added in v0.5.0

func (h *MultiHist) GetVariance() float64

func (*MultiHist) Print added in v0.5.0

func (h *MultiHist) Print()

func (*MultiHist) Sum added in v0.5.0

func (h *MultiHist) Sum() int64

func (*MultiHist) TrackPercentiles added in v0.5.0

func (h *MultiHist) TrackPercentiles()

type MultiHistCompat added in v0.5.0

type MultiHistCompat struct {
	*MultiHist

	Histogram *MultiHist
}

func (*MultiHistCompat) GetIntBuckets added in v0.5.0

func (h *MultiHistCompat) GetIntBuckets() map[int64]int64

func (*MultiHistCompat) GetMeanVariance added in v0.5.0

func (h *MultiHistCompat) GetMeanVariance() float64

func (*MultiHistCompat) Max added in v0.5.0

func (hc *MultiHistCompat) Max() int64

func (*MultiHistCompat) Mean added in v0.5.0

func (h *MultiHistCompat) Mean() float64

func (*MultiHistCompat) Min added in v0.5.0

func (hc *MultiHistCompat) Min() int64

func (*MultiHistCompat) NewHist added in v0.5.0

func (hc *MultiHistCompat) NewHist() Histogram

func (*MultiHistCompat) Range added in v0.5.0

func (h *MultiHistCompat) Range() (int64, int64)

func (*MultiHistCompat) StdDev added in v0.5.0

func (h *MultiHistCompat) StdDev() float64

func (*MultiHistCompat) TotalCount added in v0.5.0

func (h *MultiHistCompat) TotalCount() int64

type NoFilter

type NoFilter struct{}

FILTERS RETURN TRUE ON MATCH SUCCESS

func (NoFilter) Filter

func (f NoFilter) Filter(r *Record) bool

type NoProfile

type NoProfile struct{}

func (NoProfile) Start

func (p NoProfile) Start() ProfilerStart

func (NoProfile) Stop

func (p NoProfile) Stop()

type NodeResults added in v0.5.0

type NodeResults struct {
	Table     Table
	Tables    []string
	QuerySpec QuerySpec
	Samples   []*Sample
}

type OptionDefs

type OptionDefs struct {
	STR_REPLACEMENTS map[string]StrReplace
	WEIGHT_COL       bool
	WEIGHT_COL_ID    int16
	WRITE_BLOCK_INFO bool
	TIME_COL_ID      int16
	TIME_FORMAT      string
	MERGE_TABLE      *Table
}

type ProfilerStart

type ProfilerStart interface {
	Stop()
}

type ProfilerStop

type ProfilerStop interface {
	Start() ProfilerStart
}

type QueryParams added in v0.5.0

type QueryParams savedQueryParams

For outside consumption

type QueryResults added in v0.5.0

type QueryResults savedQueryResults

type QuerySpec

type QuerySpec struct {
	QueryParams
	QueryResults

	BlockList map[string]TableBlock
	Table     *Table
}

func CombineAndPrune added in v0.5.0

func CombineAndPrune(querySpec *QuerySpec, block_specs map[string]*QuerySpec) *QuerySpec

func CombineResults

func CombineResults(querySpec *QuerySpec, block_specs map[string]*QuerySpec) *QuerySpec

func CopyQuerySpec

func CopyQuerySpec(querySpec *QuerySpec) *QuerySpec

func MultiCombineResults added in v0.5.0

func MultiCombineResults(querySpec *QuerySpec, block_specs map[string]*QuerySpec) *QuerySpec

func (*QuerySpec) CalculateICC

func (querySpec *QuerySpec) CalculateICC() map[string]float64

to calculate SSW and SSB, we do: SSW = sum of squares within groups. Take each group and calculate its variance, then add all those variances together SSB = sum of square between groups. Take each group's averages and calculate their variance against the overall average.

func (*QuerySpec) GetCacheKey added in v0.5.0

func (qs *QuerySpec) GetCacheKey(blockname string) string

func (*QuerySpec) GetCacheRelevantFilters added in v0.5.0

func (querySpec *QuerySpec) GetCacheRelevantFilters(blockname string) []Filter

for a per block query cache, we exclude any trivial filters (that are true for all records in the block) when creating our cache key

func (*QuerySpec) GetCacheStruct added in v0.5.0

func (qs *QuerySpec) GetCacheStruct(blockname string) QueryParams

func (*QuerySpec) LoadCachedResults added in v0.5.0

func (qs *QuerySpec) LoadCachedResults(blockname string) bool

func (*QuerySpec) NewResult added in v0.5.0

func (qs *QuerySpec) NewResult() *Result

func (*QuerySpec) PrintResults

func (qs *QuerySpec) PrintResults()

func (*QuerySpec) PruneResults added in v0.5.0

func (qs *QuerySpec) PruneResults(limit int)

func (*QuerySpec) Punctuate

func (querySpec *QuerySpec) Punctuate()

func (*QuerySpec) ResetResults

func (querySpec *QuerySpec) ResetResults()

func (*QuerySpec) SaveCachedResults added in v0.5.0

func (qs *QuerySpec) SaveCachedResults(blockname string)

func (*QuerySpec) SortResults added in v0.5.0

func (qs *QuerySpec) SortResults(orderBy string, orderAsc bool)

type Record

type Record struct {
	Strs      []StrField
	Ints      []IntField
	SetMap    map[int16]SetField
	Populated []int8

	Timestamp int64
	// contains filtered or unexported fields
}

func (*Record) AddIntField

func (r *Record) AddIntField(name string, val int64)

func (*Record) AddSetField

func (r *Record) AddSetField(name string, val []string)

func (*Record) AddStrField

func (r *Record) AddStrField(name string, val string)

func (*Record) CopyRecord

func (r *Record) CopyRecord() *Record

func (*Record) GetIntVal

func (r *Record) GetIntVal(name string) (int, bool)

func (*Record) GetSetVal

func (r *Record) GetSetVal(name string) ([]string, bool)

func (*Record) GetStrVal

func (r *Record) GetStrVal(name string) (string, bool)

func (*Record) ResizeFields

func (r *Record) ResizeFields(length int16)

type RecordList

type RecordList []*Record

Before we save the new record list in a table, we tend to sort by time

func CombineMatches

func CombineMatches(block_specs map[string]*QuerySpec) RecordList

func (RecordList) Len

func (a RecordList) Len() int

func (RecordList) ResetRecords added in v0.2.0

func (rl RecordList) ResetRecords(tb *TableBlock)

recycle allocated records between blocks that means we need a wash and rinse cycle we can re-use blocks if:

same loadSpec
table is the same
NumRecords are the same

to do so,

we clean out the different arrays inside a block
re-home the record list into the table block

func (RecordList) Swap

func (a RecordList) Swap(i, j int)

type RecoverableLock

type RecoverableLock interface {
	Grab() bool
	Release() bool
	Recover() bool
}

Every LockFile should have a recovery plan

type Result

type Result struct {
	Hists    map[string]Histogram
	Distinct *hll.LogLogBeta

	GroupByKey  string
	BinaryByKey string
	Count       int64
	Samples     int64
}

func (*Result) Combine

func (rs *Result) Combine(next_result *Result)

This does an in place combine of the next_result into this one...

type ResultJSON

type ResultJSON map[string]interface{}

type ResultMap

type ResultMap map[string]*Result

func (*ResultMap) Combine

func (master_result *ResultMap) Combine(results *ResultMap)

type RowSavedInt

type RowSavedInt struct {
	Name  int16
	Value int64
}

type RowSavedSet

type RowSavedSet struct {
	Name  int16
	Value []string
}

type RowSavedStr

type RowSavedStr struct {
	Name  int16
	Value string
}

type Sample

type Sample map[string]interface{}

type SaveBlockChunkCB

type SaveBlockChunkCB struct {
	// contains filtered or unexported fields
}

func (*SaveBlockChunkCB) CB

func (cb *SaveBlockChunkCB) CB(digestname string, records RecordList)

type SavedBlockCache added in v0.2.0

type SavedBlockCache map[string]*SavedColumnInfo

type SavedColumnInfo

type SavedColumnInfo struct {
	NumRecords int32

	StrInfoMap SavedStrInfo
	IntInfoMap SavedIntInfo
}

type SavedIntBucket

type SavedIntBucket struct {
	Value   int64
	Records []uint32
}

type SavedIntColumn

type SavedIntColumn struct {
	Name            string
	DeltaEncodedIDs bool
	ValueEncoded    bool
	BucketEncoded   bool
	Bins            []SavedIntBucket
	Values          []int64
	VERSION         int32
}

func NewSavedIntColumn

func NewSavedIntColumn() SavedIntColumn

type SavedIntInfo

type SavedIntInfo map[string]*IntInfo

type SavedRecord

type SavedRecord struct {
	Ints []RowSavedInt
	Strs []RowSavedStr
	Sets []RowSavedSet
}

type SavedRecordBlock added in v0.5.2

type SavedRecordBlock struct {
	RecordList []*SavedRecord
	KeyTable   *map[string]int16
	// contains filtered or unexported fields
}

type SavedSetBucket

type SavedSetBucket struct {
	Value   int32
	Records []uint32
}

type SavedSetColumn

type SavedSetColumn struct {
	Name            string
	Bins            []SavedSetBucket
	Values          [][]int32
	StringTable     []string
	DeltaEncodedIDs bool
	BucketEncoded   bool
	VERSION         int32
}

func NewSavedSetColumn

func NewSavedSetColumn() SavedSetColumn

type SavedStrBucket

type SavedStrBucket struct {
	Value   int32
	Records []uint32
}

type SavedStrColumn

type SavedStrColumn struct {
	Name            string
	DeltaEncodedIDs bool
	BucketEncoded   bool
	Bins            []SavedStrBucket
	Values          []int32
	StringTable     []string
	VERSION         int32
}

func NewSavedStrColumn

func NewSavedStrColumn() SavedStrColumn

type SavedStrInfo

type SavedStrInfo map[string]*StrInfo

type SeparatedColumns

type SeparatedColumns struct {
	// contains filtered or unexported fields
}

type SetArr

type SetArr []SetField

type SetField

type SetField []int32

type SetFilter

type SetFilter struct {
	Field   string
	FieldId int16
	Op      string
	Value   string
	// contains filtered or unexported fields
}

func (SetFilter) Filter

func (filter SetFilter) Filter(r *Record) bool

type SetMap

type SetMap map[int16]SetField

type SortBlocksByEndTime

type SortBlocksByEndTime []*TableBlock

func (SortBlocksByEndTime) Len

func (a SortBlocksByEndTime) Len() int

func (SortBlocksByEndTime) Less

func (a SortBlocksByEndTime) Less(i, j int) bool

func (SortBlocksByEndTime) Swap

func (a SortBlocksByEndTime) Swap(i, j int)

type SortBlocksByTime

type SortBlocksByTime []*TableBlock

func (SortBlocksByTime) Len

func (a SortBlocksByTime) Len() int

func (SortBlocksByTime) Less

func (a SortBlocksByTime) Less(i, j int) bool

func (SortBlocksByTime) Swap

func (a SortBlocksByTime) Swap(i, j int)

type SortMatchedByCol added in v0.5.2

type SortMatchedByCol struct {
	Matched []*Record

	Col string
}

func (SortMatchedByCol) Len added in v0.5.2

func (a SortMatchedByCol) Len() int

func (SortMatchedByCol) Less added in v0.5.2

func (a SortMatchedByCol) Less(i, j int) bool

This sorts the records in descending order

func (SortMatchedByCol) Swap added in v0.5.2

func (a SortMatchedByCol) Swap(i, j int)

type SortRecordsByTime

type SortRecordsByTime struct {
	RecordList
}

func (SortRecordsByTime) Less

func (a SortRecordsByTime) Less(i, j int) bool

type SortResultsByCol

type SortResultsByCol struct {
	Results []*Result

	Col string
}

func (SortResultsByCol) Len

func (a SortResultsByCol) Len() int

func (SortResultsByCol) Less

func (a SortResultsByCol) Less(i, j int) bool

This sorts the records in descending order

func (SortResultsByCol) Swap

func (a SortResultsByCol) Swap(i, j int)

type SortStrsByCount

type SortStrsByCount []StrInfoCol

func (SortStrsByCount) Len

func (a SortStrsByCount) Len() int

func (SortStrsByCount) Less

func (a SortStrsByCount) Less(i, j int) bool

func (SortStrsByCount) Swap

func (a SortStrsByCount) Swap(i, j int)

type StrArr

type StrArr []StrField

type StrField

type StrField int32

type StrFilter

type StrFilter struct {
	Field   string
	FieldId int16
	Op      string
	Value   string
	// contains filtered or unexported fields
}

func (StrFilter) Filter

func (filter StrFilter) Filter(r *Record) bool

type StrInfo

type StrInfo struct {
	TopStringCount map[int32]int
	Cardinality    int
}

StrInfo and IntInfo contains interesting tidbits about columns they also get serialized to disk in the block's info.db

type StrInfoCol

type StrInfoCol struct {
	Name  int32
	Value int
}

type StrInfoTable

type StrInfoTable map[int16]*StrInfo

type StrReplace

type StrReplace struct {
	Pattern string
	Replace string
}

type Table

type Table struct {
	Name      string
	BlockList map[string]*TableBlock
	KeyTable  map[string]int16 // String Key Names
	KeyTypes  map[int16]int8

	ShortKeyInfo *KeyInfo // Keys that we will be using during a query
	AllKeyInfo   *KeyInfo // The original table KeyInfo before key shortening

	// Need to keep track of the last block we've used, right?
	LastBlock TableBlock
	RowBlock  *TableBlock

	StrInfo StrInfoTable
	IntInfo IntInfoTable

	BlockInfoCache map[string]*SavedColumnInfo
	NewBlockInfos  []string
	// contains filtered or unexported fields
}

func GetTable

func GetTable(name string) *Table

This is a singleton constructor for Tables

func (*Table) Aggregation

func (t *Table) Aggregation(name string, op string) Aggregation

func (*Table) AppendRecordsToLog

func (t *Table) AppendRecordsToLog(records RecordList, blockname string)

func (*Table) ChunkAndSave

func (t *Table) ChunkAndSave()

func (*Table) CompactRecords added in v0.2.0

func (t *Table) CompactRecords()

TODO: figure out how often we actually do a collation check by storing last collation inside a file somewhere

func (*Table) DeduceTableInfoFromBlocks

func (t *Table) DeduceTableInfoFromBlocks()

Alright, so... I accidentally broke my info.db file How can I go about loading the TableInfo based off the blocks? I think I go through each block and load the block, verifying the different column types

func (*Table) DigestRecords

func (t *Table) DigestRecords()

Go through rowstore and save records out to column store

func (*Table) FillPartialBlock

func (t *Table) FillPartialBlock() bool

TODO: find any open blocks and then fill them...

func (*Table) FindPartialBlocks

func (t *Table) FindPartialBlocks() []*TableBlock

func (*Table) GetColumnType

func (t *Table) GetColumnType(v string) int8

func (*Table) GrabBlockLock

func (t *Table) GrabBlockLock(name string) bool

func (*Table) GrabCacheLock added in v0.2.0

func (t *Table) GrabCacheLock() bool

func (*Table) GrabDigestLock

func (t *Table) GrabDigestLock() bool

func (*Table) GrabInfoLock

func (t *Table) GrabInfoLock() bool

func (*Table) Grouping

func (t *Table) Grouping(name string) Grouping

func (*Table) HasFlagFile

func (t *Table) HasFlagFile() bool

func (*Table) IngestRecords

func (t *Table) IngestRecords(blockname string)

Go through newRecords list and save all the new records out to a row store

func (*Table) IntFilter

func (t *Table) IntFilter(name string, op string, value int) IntFilter

func (*Table) IsNotExist added in v0.2.0

func (t *Table) IsNotExist() bool

func (*Table) LoadAndQueryRecords

func (t *Table) LoadAndQueryRecords(loadSpec *LoadSpec, querySpec *QuerySpec) int

func (*Table) LoadBlockCache added in v0.2.0

func (t *Table) LoadBlockCache()

func (*Table) LoadBlockFromDir

func (t *Table) LoadBlockFromDir(dirname string, loadSpec *LoadSpec, load_records bool) *TableBlock

TODO: have this only pull the blocks into column format and not materialize the columns immediately

func (*Table) LoadBlockInfo added in v0.2.0

func (t *Table) LoadBlockInfo(dirname string) *SavedColumnInfo

func (*Table) LoadRecords

func (t *Table) LoadRecords(loadSpec *LoadSpec) int

func (*Table) LoadRecordsFromLog

func (t *Table) LoadRecordsFromLog(filename string) RecordList

func (*Table) LoadRowStoreRecords

func (t *Table) LoadRowStoreRecords(digest string, after_block_load_cb AfterRowBlockLoad)

func (*Table) LoadTableInfo

func (t *Table) LoadTableInfo() bool

func (*Table) LoadTableInfoFrom

func (t *Table) LoadTableInfoFrom(filename string) bool

func (*Table) MakeDir added in v0.2.0

func (t *Table) MakeDir()

func (*Table) MatchAndAggregate

func (t *Table) MatchAndAggregate(querySpec *QuerySpec)

func (*Table) MaybeCompactRecords

func (t *Table) MaybeCompactRecords()

we compact if: we have over X files we have over X megabytes of data remember, there is no reason to actually read the data off disk until we decide to compact

func (*Table) NewHist

func (t *Table) NewHist(info *IntInfo) Histogram

func (*Table) NewLoadSpec

func (t *Table) NewLoadSpec() LoadSpec

func (*Table) NewRecord

func (t *Table) NewRecord() *Record

func (*Table) NewTDigestHist added in v0.5.2

func (t *Table) NewTDigestHist(info *IntInfo) Histogram

func (*Table) PrintRecord

func (t *Table) PrintRecord(r *Record)

func (*Table) PrintRecords

func (t *Table) PrintRecords(records RecordList)

func (*Table) PrintSamples

func (t *Table) PrintSamples(qs *QuerySpec)

func (*Table) PrintTableInfo added in v0.5.2

func (t *Table) PrintTableInfo()

func (*Table) ReadBlockInfoFromDir

func (t *Table) ReadBlockInfoFromDir(dirname string) *SavedColumnInfo

TODO: have this only pull the blocks into column format and not materialize the columns immediately

func (*Table) ReleaseBlockLock

func (t *Table) ReleaseBlockLock(name string) bool

func (*Table) ReleaseCacheLock added in v0.2.0

func (t *Table) ReleaseCacheLock() bool

func (*Table) ReleaseDigestLock

func (t *Table) ReleaseDigestLock() bool

func (*Table) ReleaseInfoLock

func (t *Table) ReleaseInfoLock() bool

func (*Table) ReleaseRecords

func (t *Table) ReleaseRecords()

Remove our pointer to the blocklist so a GC is triggered and a bunch of new memory becomes available

func (*Table) ResetBlockCache added in v0.2.0

func (t *Table) ResetBlockCache()

func (*Table) RestoreUningestedFiles

func (t *Table) RestoreUningestedFiles()

func (*Table) SaveRecordsToBlock

func (t *Table) SaveRecordsToBlock(records RecordList, filename string) bool

func (*Table) SaveRecordsToColumns

func (t *Table) SaveRecordsToColumns() bool

func (*Table) SaveTableInfo

func (t *Table) SaveTableInfo(fname string)

func (*Table) SetFilter

func (t *Table) SetFilter(name string, op string, value string) SetFilter

func (*Table) ShortenKeyTable added in v0.5.2

func (t *Table) ShortenKeyTable()

func (*Table) ShouldCompactRowStore added in v0.2.0

func (t *Table) ShouldCompactRowStore(digest string) bool

func (*Table) ShouldLoadBlockFromDir

func (t *Table) ShouldLoadBlockFromDir(dirname string, querySpec *QuerySpec) bool

optimizing for integer pre-cached info

func (*Table) StrFilter

func (t *Table) StrFilter(name string, op string, value string) StrFilter

func (*Table) TableInfo added in v0.5.2

func (t *Table) TableInfo() *TableInfo

func (*Table) TrimTable

func (t *Table) TrimTable(trimSpec *TrimSpec) []*TableBlock

List all the blocks that should be trimmed to keep the table within it's memory limits

func (*Table) UseKeys added in v0.5.2

func (t *Table) UseKeys(keys []string)

func (*Table) WriteBlockCache added in v0.2.0

func (t *Table) WriteBlockCache()

func (*Table) WriteQueryCache added in v0.5.0

func (t *Table) WriteQueryCache(to_cache_specs map[string]*QuerySpec)

type TableBlock

type TableBlock struct {
	Name       string
	RecordList RecordList
	Info       *SavedColumnInfo
	Size       int64
	Matched    RecordList

	IntInfo IntInfoTable
	StrInfo StrInfoTable
	// contains filtered or unexported fields
}

Table Block should have a bunch of metadata next to it, too

func (*TableBlock) ExportBlockData added in v0.2.0

func (b *TableBlock) ExportBlockData()

func (*TableBlock) GetColumnInfo

func (tb *TableBlock) GetColumnInfo(name_id int16) *TableColumn

func (*TableBlock) RecycleSlab added in v0.2.0

func (tb *TableBlock) RecycleSlab(loadSpec *LoadSpec)

func (*TableBlock) SaveInfoToColumns

func (tb *TableBlock) SaveInfoToColumns(dirname string)

func (*TableBlock) SaveIntsToColumns

func (tb *TableBlock) SaveIntsToColumns(dirname string, same_ints map[int16]ValueMap)

func (*TableBlock) SaveSetsToColumns

func (tb *TableBlock) SaveSetsToColumns(dirname string, same_sets map[int16]ValueMap)

func (*TableBlock) SaveStrsToColumns

func (tb *TableBlock) SaveStrsToColumns(dirname string, same_strs map[int16]ValueMap)

func (*TableBlock) SaveToColumns

func (tb *TableBlock) SaveToColumns(filename string) bool

func (*TableBlock) SeparateRecordsIntoColumns

func (tb *TableBlock) SeparateRecordsIntoColumns() SeparatedColumns

type TableColumn

type TableColumn struct {
	Type        int8
	StringTable map[string]int32
	RCache      map[int]bool
	// contains filtered or unexported fields
}

type TableInfo added in v0.5.2

type TableInfo struct {
	Count             int64   `json:"count"`
	Size              int64   `json:"storageSize"`
	AverageObjectSize float64 `json:"avgObjSize"`

	Columns ColumnInfo `json:"columns"`
}

type TrimSpec

type TrimSpec struct {
	MBLimit      int64 // size limit of DB in megabytes
	DeleteBefore int64 // delete records older than DeleteBefore in seconds
}

type VTable added in v0.5.0

type VTable struct {
	Table

	Columns map[string]*IntInfo
}

func (*VTable) AggregateInfo added in v0.5.0

func (vt *VTable) AggregateInfo(dirs []string)

func (*VTable) AggregateSamples added in v0.5.0

func (vt *VTable) AggregateSamples(dirs []string)

func (*VTable) AggregateSpecs added in v0.5.0

func (vt *VTable) AggregateSpecs(dirs []string)

func (*VTable) AggregateTables added in v0.5.0

func (vt *VTable) AggregateTables(dirs []string)

func (*VTable) StitchResults added in v0.5.0

func (vt *VTable) StitchResults(dirs []string)

type ValueMap

type ValueMap map[int64][]uint32

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL