Documentation ¶
Index ¶
- Constants
- Variables
- func Debug(args ...interface{})
- func Error(args ...interface{})
- func FilterAndAggRecords(querySpec *QuerySpec, recordsPtr *RecordList) int
- func GetFileDecoder(filename string) *gob.Decoder
- func GetTimeFormat(time_fmt string) string
- func GetVersionInfo() map[string]interface{}
- func LoadAndSessionize(tables []*Table, querySpec *QuerySpec, sessionSpec *SessionSpec) int
- func LoadRowBlockCB(digestname string, records RecordList)
- func Print(args ...interface{})
- func PrintResults(querySpec *QuerySpec)
- func PrintTables()
- func PrintVersionInfo()
- func RecoverLock(lock RecoverableLock) bool
- func RenameAndMod(src, dst string) error
- func SearchBlocks(querySpec *QuerySpec, block_list map[string]*TableBlock) map[string]*QuerySpec
- func SessionizeRecords(querySpec *QuerySpec, sessionSpec *SessionSpec, recordsptr *RecordList)
- func SetDefaults()
- func SetLuaScript(filename string)
- func SortResults(querySpec *QuerySpec)
- func Warn(args ...interface{})
- type ActiveSession
- type Activity
- type ActivityMap
- type AfterLoadQueryCB
- type AfterRowBlockLoad
- type Aggregation
- type BlockLock
- type CacheLock
- type Calendar
- type DigestLock
- type Filter
- type FilterSpec
- type FlagDefs
- type Grouping
- type Hist
- func (h *Hist) Combine(next_hist *Hist)
- func (h *Hist) GetBuckets() map[string]int64
- func (h *Hist) GetMeanVariance() float64
- func (h *Hist) GetPercentiles() []int64
- func (h *Hist) GetStdDev() float64
- func (h *Hist) GetVariance() float64
- func (h *Hist) Print()
- func (h *Hist) SetupBuckets(buckets int, min, max int64)
- func (h *Hist) Sum() int64
- func (h *Hist) TrackPercentiles()
- type InfoLock
- type IntArr
- type IntField
- type IntFilter
- type IntInfo
- type IntInfoTable
- type LoadSpec
- type Lock
- type LuaKey
- type LuaTable
- type NoFilter
- type NoProfile
- type OptionDefs
- type ProfilerStart
- type ProfilerStop
- type QuerySpec
- type Record
- func (r *Record) AddIntField(name string, val int64)
- func (r *Record) AddSetField(name string, val []string)
- func (r *Record) AddStrField(name string, val string)
- func (r *Record) CopyRecord() *Record
- func (r *Record) GetIntVal(name string) (int, bool)
- func (r *Record) GetSetVal(name string) ([]string, bool)
- func (r *Record) GetStrVal(name string) (string, bool)
- func (r *Record) ResizeFields(length int16)
- type RecordList
- type RecoverableLock
- type Result
- type ResultJSON
- type ResultMap
- type RowSavedInt
- type RowSavedSet
- type RowSavedStr
- type Sample
- type SaveBlockChunkCB
- type SavedBlockCache
- type SavedColumnInfo
- type SavedIntBucket
- type SavedIntColumn
- type SavedIntInfo
- type SavedRecord
- type SavedRecords
- type SavedSetBucket
- type SavedSetColumn
- type SavedStrBucket
- type SavedStrColumn
- type SavedStrInfo
- type SeparatedColumns
- type SessionList
- type SessionSpec
- type SessionStats
- type Sessions
- type SetArr
- type SetField
- type SetFilter
- type SetMap
- type SortBlocksByEndTime
- type SortBlocksByTime
- type SortRecordsByTime
- type SortResultsByCol
- type SortStrsByCount
- type StrArr
- type StrField
- type StrFilter
- type StrInfo
- type StrInfoCol
- type StrInfoTable
- type StrReplace
- type Table
- func (t *Table) Aggregation(name string, op string) Aggregation
- func (t *Table) AppendRecordsToLog(records RecordList, blockname string)
- func (t *Table) BuildJoinMap()
- func (t *Table) ChunkAndSave()
- func (t *Table) CompactRecords()
- func (t *Table) DeduceTableInfoFromBlocks()
- func (t *Table) DigestRecords()
- func (t *Table) FillPartialBlock() bool
- func (t *Table) FindPartialBlocks() []*TableBlock
- func (t *Table) GetColumnType(v string) int8
- func (t *Table) GetRecordById(id string) *Record
- func (t *Table) GrabBlockLock(name string) bool
- func (t *Table) GrabCacheLock() bool
- func (t *Table) GrabDigestLock() bool
- func (t *Table) GrabInfoLock() bool
- func (t *Table) Grouping(name string) Grouping
- func (t *Table) HasFlagFile() bool
- func (t *Table) IngestRecords(blockname string)
- func (t *Table) IntFilter(name string, op string, value int) IntFilter
- func (t *Table) IsNotExist() bool
- func (t *Table) LoadAndQueryRecords(loadSpec *LoadSpec, querySpec *QuerySpec) int
- func (t *Table) LoadBlockCache()
- func (t *Table) LoadBlockFromDir(dirname string, loadSpec *LoadSpec, load_records bool) *TableBlock
- func (t *Table) LoadBlockInfo(dirname string) *SavedColumnInfo
- func (t *Table) LoadRecords(loadSpec *LoadSpec) int
- func (t *Table) LoadRecordsFromLog(filename string) RecordList
- func (t *Table) LoadRowStoreRecords(digest string, after_block_load_cb AfterRowBlockLoad)
- func (t *Table) LoadSavedRecordsFromLog(filename string) []*SavedRecord
- func (t *Table) LoadTableInfo() bool
- func (t *Table) LoadTableInfoFrom(filename string) bool
- func (t *Table) MakeDir()
- func (t *Table) MatchAndAggregate(querySpec *QuerySpec)
- func (t *Table) MaybeCompactRecords()
- func (t *Table) NewHist(info *IntInfo) *Hist
- func (t *Table) NewLoadSpec() LoadSpec
- func (t *Table) NewRecord() *Record
- func (t *Table) PrintColInfo()
- func (t *Table) PrintRecord(r *Record)
- func (t *Table) PrintRecords(records RecordList)
- func (t *Table) PrintSamples()
- func (t *Table) ReadBlockInfoFromDir(dirname string) *SavedColumnInfo
- func (t *Table) ReleaseBlockLock(name string) bool
- func (t *Table) ReleaseCacheLock() bool
- func (t *Table) ReleaseDigestLock() bool
- func (t *Table) ReleaseInfoLock() bool
- func (t *Table) ReleaseRecords()
- func (t *Table) ResetBlockCache()
- func (t *Table) RestoreUningestedFiles()
- func (t *Table) SaveRecordsToBlock(records RecordList, filename string) bool
- func (t *Table) SaveRecordsToColumns() bool
- func (t *Table) SaveTableInfo(fname string)
- func (t *Table) SetFilter(name string, op string, value string) SetFilter
- func (t *Table) ShouldCompactRowStore(digest string) bool
- func (t *Table) ShouldLoadBlockFromDir(dirname string, querySpec *QuerySpec) bool
- func (t *Table) StrFilter(name string, op string, value string) StrFilter
- func (t *Table) TrimTable(trimSpec *TrimSpec) []*TableBlock
- func (t *Table) WriteBlockCache()
- type TableBlock
- func (b *TableBlock) ExportBlockData()
- func (tb *TableBlock) GetColumnInfo(name_id int16) *TableColumn
- func (tb *TableBlock) RecycleSlab(loadSpec *LoadSpec)
- func (tb *TableBlock) SaveInfoToColumns(dirname string)
- func (tb *TableBlock) SaveIntsToColumns(dirname string, same_ints map[int16]ValueMap)
- func (tb *TableBlock) SaveSetsToColumns(dirname string, same_sets map[int16]ValueMap)
- func (tb *TableBlock) SaveStrsToColumns(dirname string, same_strs map[int16]ValueMap)
- func (tb *TableBlock) SaveToColumns(filename string) bool
- func (tb *TableBlock) SeparateRecordsIntoColumns() SeparatedColumns
- type TableColumn
- type TrimSpec
- type ValueMap
Constants ¶
const ( NO_OP = iota OP_AVG = iota OP_HIST = iota OP_DISTINCT = iota )
const ( INT_VAL = iota STR_VAL = iota SET_VAL = iota )
Variables ¶
var BLOCKS_BEFORE_GC = 8
var BLOCKS_PER_CACHE_FILE = 64
var BLOCK_VERSION = int32(1)
the BLOCK_VERSION is how we get hints about decoding blocks for backwards compatibility. at least, it will be in the future
var BREAK_MAP = make(map[string]int, 0)
var CACHE_DIR = "cache"
var CARDINALITY_THRESHOLD = 4
var CHUNKS_BEFORE_GC = 16
var CHUNK_SIZE = 1024 * 8 * 8
var CHUNK_THRESHOLD = CHUNK_SIZE / 8
var COPY_RECORD_INTERNS = false
var DEBUG_OUTLIERS = false
var DEBUG_RECORD_CONSISTENCY = false
var DEBUG_TIMING = false
var DELETE_BLOCKS = make([]string, 0)
var DELETE_BLOCKS_AFTER_QUERY = true
var EMPTY = ""
var ENABLE_LUA = false
var ENV_FLAG = os.Getenv("DEBUG")
extracted from and influenced by https://groups.google.com/forum/#!topic/golang-nuts/ct99dtK2Jo4 use env variable DEBUG=1 to turn on debug output
var FALSE = false
var FILE_DIGEST_THRESHOLD = 256
var FLAGS = FlagDefs{}
TODO: merge these two into one thing current problem is that FLAGS needs pointers
var FORMATS = map[string]string{ "ansic": time.ANSIC, "unixdate": time.UnixDate, "rubydate": time.RubyDate, "rfc822": time.RFC822, "rfc822z": time.RFC822Z, "rfc850": time.RFC850, "rfc1123": time.RFC1123, "rfc1123z": time.RFC1123Z, "rfc3339": time.RFC3339, "rfc3339nano": time.RFC3339Nano, "kitchen": time.Kitchen, "stamp": time.Stamp, "stampmilli": time.StampMilli, "stampmicro": time.StampMicro, "stampnano": time.StampNano, }
var GROUP_BY_WIDTH = 8 // bytes
var GROUP_DELIMITER = "\t"
var GZIP_EXT = ".gz"
var HOLD_MATCHES = false
var INGEST_DIR = "ingest"
var INTERNAL_RESULT_LIMIT = 100000
var KB = int64(1024)
var LOADED_TABLES = make(map[string]*Table)
var LOCK_TRIES = 50
var LOCK_US = time.Millisecond * 3
var MAX_LOCK_BREAKS = 5
var MAX_ROW_STORE_TRIES = 20
var MIN_CUTOFF = 5 // need at least this many elements before we determine min/max
var MIN_FILES_TO_DIGEST = 0
var NO_MORE_BLOCKS = GROUP_DELIMITER
var NUM_BUCKETS = 1000
var OPTS = OptionDefs{}
var PROFILE *bool = &PROFILER_ENABLED
var PROFILER_ENABLED bool
var READ_ROWS_ONLY = false
var REGEX_CACHE_SIZE = 100000
var ROW_STORE_BLOCK = "ROW_STORE"
var RUN_PROFILER = func() ProfilerStop { return NoProfile{} }
var SINGLE_EVENT_DURATION = int64(30) // i think this means 30 seconds
var SIZE_DIGEST_THRESHOLD = int64(1024) * 2
var STD_CUTOFF = 1000.0 // if value is 1000 SDs away, we ignore it
var STOMACHE_DIR = "stomache"
var STOP_PROFILER = func() {
}
var TEMP_INGEST_DIR = ".ingest.temp"
var TEST_MODE = false
var TOP_STRING_COUNT = 20
var TRUE = true
var VERSION_STRING = "0.2.0"
Functions ¶
func FilterAndAggRecords ¶
func FilterAndAggRecords(querySpec *QuerySpec, recordsPtr *RecordList) int
func GetFileDecoder ¶ added in v0.2.0
func GetTimeFormat ¶
func GetVersionInfo ¶ added in v0.2.0
func GetVersionInfo() map[string]interface{}
func LoadAndSessionize ¶
func LoadAndSessionize(tables []*Table, querySpec *QuerySpec, sessionSpec *SessionSpec) int
func LoadRowBlockCB ¶
func LoadRowBlockCB(digestname string, records RecordList)
func PrintResults ¶
func PrintResults(querySpec *QuerySpec)
func PrintTables ¶
func PrintTables()
func PrintVersionInfo ¶ added in v0.2.0
func PrintVersionInfo()
func RecoverLock ¶
func RecoverLock(lock RecoverableLock) bool
func RenameAndMod ¶ added in v0.2.0
TODO: We should really split this into two functions based on dir / file
func SearchBlocks ¶
func SearchBlocks(querySpec *QuerySpec, block_list map[string]*TableBlock) map[string]*QuerySpec
OLD SEARCHING FUNCTIONS BELOW HERE
func SessionizeRecords ¶
func SessionizeRecords(querySpec *QuerySpec, sessionSpec *SessionSpec, recordsptr *RecordList)
func SetDefaults ¶
func SetDefaults()
func SetLuaScript ¶ added in v0.2.0
func SetLuaScript(filename string)
func SortResults ¶
func SortResults(querySpec *QuerySpec)
Types ¶
type ActiveSession ¶
type ActiveSession struct { Records RecordList Stats *SessionStats Path []string PathKey bytes.Buffer PathLength int PathStats map[string]int }
func (*ActiveSession) AddRecord ¶
func (as *ActiveSession) AddRecord(r *Record)
func (*ActiveSession) CombineSession ¶
func (as *ActiveSession) CombineSession(session *ActiveSession)
func (*ActiveSession) ExpireRecords ¶
func (as *ActiveSession) ExpireRecords(timestamp int) []RecordList
func (*ActiveSession) IsExpired ¶
func (as *ActiveSession) IsExpired() bool
type ActivityMap ¶
type AfterLoadQueryCB ¶
type AfterLoadQueryCB struct {
// contains filtered or unexported fields
}
func (*AfterLoadQueryCB) CB ¶
func (cb *AfterLoadQueryCB) CB(digestname string, records RecordList)
type AfterRowBlockLoad ¶
type AfterRowBlockLoad func(string, RecordList)
type Aggregation ¶
type Aggregation struct {
// contains filtered or unexported fields
}
type Calendar ¶
type Calendar struct { Daily ActivityMap Weekly ActivityMap Monthly ActivityMap Min int64 Max int64 }
Trying out a calendar with stats by day, week and month
func NewCalendar ¶
func NewCalendar() *Calendar
func (*Calendar) AddActivity ¶
func (*Calendar) CombineCalendar ¶
type DigestLock ¶
type DigestLock struct {
Lock
}
func (*DigestLock) Recover ¶
func (l *DigestLock) Recover() bool
type Filter ¶
func BuildFilters ¶
func BuildFilters(t *Table, loadSpec *LoadSpec, filterSpec FilterSpec) []Filter
type FilterSpec ¶
This is the passed in flags
type FlagDefs ¶
type FlagDefs struct { OP *string PRINT *bool EXPORT *bool INT_FILTERS *string STR_FILTERS *string STR_REPLACE *string // regex replacement for strings SET_FILTERS *string SESSION_COL *string INTS *string STRS *string GROUPS *string ADD_RECORDS *int TIME *bool TIME_COL *string TIME_BUCKET *int HIST_BUCKET *int FIELD_SEPARATOR *string FILTER_SEPARATOR *string PRINT_KEYS *bool LOAD_AND_QUERY *bool LOAD_THEN_QUERY *bool READ_INGESTION_LOG *bool READ_ROWSTORE *bool SKIP_COMPACT *bool PROFILE *bool PROFILE_MEM *bool RECYCLE_MEM *bool WEIGHT_COL *string LIMIT *int DEBUG *bool JSON *bool GC *bool DIR *string SORT *string TABLE *string PRINT_INFO *bool SAMPLES *bool LUA *bool LUAFILE *string UPDATE_TABLE_INFO *bool SKIP_OUTLIERS *bool // Join keys JOIN_TABLE *string JOIN_KEY *string JOIN_GROUP *string // Sessionization stuff SESSION_CUTOFF *int RETENTION *bool PATH_KEY *string PATH_LENGTH *int // STATS ANOVA_ICC *bool }
type Hist ¶
type Hist struct { Max int64 Min int64 Samples int Count int64 Avg float64 // contains filtered or unexported fields }
func (*Hist) GetBuckets ¶
func (*Hist) GetMeanVariance ¶
func (*Hist) GetPercentiles ¶
func (*Hist) GetStdDev ¶
VARIANCE is defined as the squared error from the mean STD DEV is defined as sqrt(VARIANCE)
func (*Hist) GetVariance ¶
func (*Hist) SetupBuckets ¶
func (*Hist) TrackPercentiles ¶
func (h *Hist) TrackPercentiles()
type IntFilter ¶
type IntInfoTable ¶
type LoadSpec ¶
type LoadSpec struct { LoadAllColumns bool // contains filtered or unexported fields }
func NewLoadSpec ¶
func NewLoadSpec() LoadSpec
type Lock ¶
func (*Lock) ForceDeleteFile ¶
func (l *Lock) ForceDeleteFile()
func (*Lock) ForceMakeFile ¶
type NoProfile ¶
type NoProfile struct{}
func (NoProfile) Start ¶
func (p NoProfile) Start() ProfilerStart
type OptionDefs ¶
type ProfilerStart ¶
type ProfilerStart interface {
Stop()
}
type ProfilerStop ¶
type ProfilerStop interface {
Start() ProfilerStart
}
type QuerySpec ¶
type QuerySpec struct { Filters []Filter Groups []Grouping Aggregations []Aggregation OrderBy string Limit int16 TimeBucket int Cumulative *Result Results ResultMap TimeResults map[int]ResultMap Sorted []*Result Matched RecordList Sessions SessionList BlockList map[string]TableBlock Table *Table LuaResult LuaTable LuaState *C.struct_lua_State }
func CombineResults ¶
func CopyQuerySpec ¶
func (*QuerySpec) CalculateICC ¶
to calculate SSW and SSB, we do: SSW = sum of squares within groups. Take each group and calculate its variance, then add all those variances together SSB = sum of square between groups. Take each group's averages and calculate their variance against the overall average.
func (*QuerySpec) PrintResults ¶
func (qs *QuerySpec) PrintResults()
func (*QuerySpec) ResetResults ¶
func (querySpec *QuerySpec) ResetResults()
type Record ¶
type Record struct { Strs []StrField Ints []IntField SetMap map[int16]SetField Populated []int8 Timestamp int64 Path string // contains filtered or unexported fields }
func (*Record) AddIntField ¶
func (*Record) AddSetField ¶
func (*Record) AddStrField ¶
func (*Record) CopyRecord ¶
func (*Record) ResizeFields ¶
type RecordList ¶
type RecordList []*Record
Before we save the new record list in a table, we tend to sort by time
func CombineMatches ¶
func CombineMatches(block_specs map[string]*QuerySpec) RecordList
func (RecordList) Len ¶
func (a RecordList) Len() int
func (RecordList) ResetRecords ¶ added in v0.2.0
func (rl RecordList) ResetRecords(tb *TableBlock)
recycle allocated records between blocks that means we need a wash and rinse cycle we can re-use blocks if:
same loadSpec table is the same NumRecords are the same
to do so,
we clean out the different arrays inside a block re-home the record list into the table block
func (RecordList) Swap ¶
func (a RecordList) Swap(i, j int)
type RecoverableLock ¶
Every LockFile should have a recovery plan
type Result ¶
type ResultJSON ¶
type ResultJSON map[string]interface{}
type RowSavedInt ¶
type RowSavedSet ¶
type RowSavedStr ¶
type SaveBlockChunkCB ¶
type SaveBlockChunkCB struct {
// contains filtered or unexported fields
}
func (*SaveBlockChunkCB) CB ¶
func (cb *SaveBlockChunkCB) CB(digestname string, records RecordList)
type SavedBlockCache ¶ added in v0.2.0
type SavedBlockCache map[string]*SavedColumnInfo
type SavedColumnInfo ¶
type SavedColumnInfo struct { NumRecords int32 StrInfoMap SavedStrInfo IntInfoMap SavedIntInfo }
type SavedIntBucket ¶
type SavedIntColumn ¶
type SavedIntColumn struct { Name string DeltaEncodedIDs bool ValueEncoded bool BucketEncoded bool Bins []SavedIntBucket Values []int64 VERSION int32 }
func NewSavedIntColumn ¶
func NewSavedIntColumn() SavedIntColumn
type SavedIntInfo ¶
type SavedRecord ¶
type SavedRecord struct { Ints []RowSavedInt Strs []RowSavedStr Sets []RowSavedSet }
type SavedRecords ¶
type SavedRecords struct {
RecordList []*SavedRecord
}
type SavedSetBucket ¶
type SavedSetColumn ¶
type SavedSetColumn struct { Name string Bins []SavedSetBucket Values [][]int32 StringTable []string DeltaEncodedIDs bool BucketEncoded bool VERSION int32 }
func NewSavedSetColumn ¶
func NewSavedSetColumn() SavedSetColumn
type SavedStrBucket ¶
type SavedStrColumn ¶
type SavedStrColumn struct { Name string DeltaEncodedIDs bool BucketEncoded bool Bins []SavedStrBucket Values []int32 StringTable []string VERSION int32 }
func NewSavedStrColumn ¶
func NewSavedStrColumn() SavedStrColumn
type SavedStrInfo ¶
type SeparatedColumns ¶
type SeparatedColumns struct {
// contains filtered or unexported fields
}
type SessionList ¶
type SessionList struct { List Sessions JoinTable *Table Results map[string]*SessionStats PathCounts map[string]int PathUniques map[string]int Expiration int LastExpiration int }
func (*SessionList) AddRecord ¶
func (sl *SessionList) AddRecord(group_key string, r *Record)
func (*SessionList) ExpireRecords ¶
func (sl *SessionList) ExpireRecords() int
func (*SessionList) NoMoreRecordsBefore ¶
func (as *SessionList) NoMoreRecordsBefore(timestamp int)
type SessionSpec ¶
type SessionSpec struct { ExpireAfter int // Seconds to expire a session after not seeing any new events Sessions SessionList Count int }
func NewSessionSpec ¶
func NewSessionSpec() SessionSpec
func (*SessionSpec) CombineSessions ¶
func (ss *SessionSpec) CombineSessions(sessionspec *SessionSpec)
func (*SessionSpec) ExpireRecords ¶
func (ss *SessionSpec) ExpireRecords()
func (*SessionSpec) Finalize ¶
func (ss *SessionSpec) Finalize()
func (*SessionSpec) PrintResults ¶
func (ss *SessionSpec) PrintResults()
type SessionStats ¶
type SessionStats struct { NumEvents Hist NumBounces Hist NumSessions Hist SessionDuration Hist Retention Hist Calendar *Calendar SessionDelta Hist LastSessionEnd int64 }
func NewSessionStats ¶
func NewSessionStats() *SessionStats
func (*SessionStats) CombineStats ¶
func (ss *SessionStats) CombineStats(stats *SessionStats)
func (*SessionStats) PrintStats ¶
func (ss *SessionStats) PrintStats(key string)
func (*SessionStats) SummarizeSession ¶
func (ss *SessionStats) SummarizeSession(records RecordList)
type Sessions ¶
type Sessions map[string]*ActiveSession
type SetFilter ¶
type SortBlocksByEndTime ¶
type SortBlocksByEndTime []*TableBlock
func (SortBlocksByEndTime) Len ¶
func (a SortBlocksByEndTime) Len() int
func (SortBlocksByEndTime) Less ¶
func (a SortBlocksByEndTime) Less(i, j int) bool
func (SortBlocksByEndTime) Swap ¶
func (a SortBlocksByEndTime) Swap(i, j int)
type SortBlocksByTime ¶
type SortBlocksByTime []*TableBlock
func (SortBlocksByTime) Len ¶
func (a SortBlocksByTime) Len() int
func (SortBlocksByTime) Less ¶
func (a SortBlocksByTime) Less(i, j int) bool
func (SortBlocksByTime) Swap ¶
func (a SortBlocksByTime) Swap(i, j int)
type SortRecordsByTime ¶
type SortRecordsByTime struct {
RecordList
}
func (SortRecordsByTime) Less ¶
func (a SortRecordsByTime) Less(i, j int) bool
type SortResultsByCol ¶
func (SortResultsByCol) Len ¶
func (a SortResultsByCol) Len() int
func (SortResultsByCol) Less ¶
func (a SortResultsByCol) Less(i, j int) bool
This sorts the records in descending order
func (SortResultsByCol) Swap ¶
func (a SortResultsByCol) Swap(i, j int)
type SortStrsByCount ¶
type SortStrsByCount []StrInfoCol
func (SortStrsByCount) Len ¶
func (a SortStrsByCount) Len() int
func (SortStrsByCount) Less ¶
func (a SortStrsByCount) Less(i, j int) bool
func (SortStrsByCount) Swap ¶
func (a SortStrsByCount) Swap(i, j int)
type StrFilter ¶
type StrInfo ¶
StrInfo and IntInfo contains interesting tidbits about columns they also get serialized to disk in the block's info.db
type StrInfoCol ¶
type StrInfoTable ¶
type StrReplace ¶
type StrReplace struct {
// contains filtered or unexported fields
}
type Table ¶
type Table struct { Name string BlockList map[string]*TableBlock KeyTable map[string]int16 // String Key Names KeyTypes map[int16]int8 // Need to keep track of the last block we've used, right? LastBlock TableBlock RowBlock *TableBlock StrInfo StrInfoTable IntInfo IntInfoTable BlockInfoCache map[string]*SavedColumnInfo NewBlockInfos []string // contains filtered or unexported fields }
func (*Table) Aggregation ¶
func (t *Table) Aggregation(name string, op string) Aggregation
func (*Table) AppendRecordsToLog ¶
func (t *Table) AppendRecordsToLog(records RecordList, blockname string)
func (*Table) BuildJoinMap ¶
func (t *Table) BuildJoinMap()
func (*Table) ChunkAndSave ¶
func (t *Table) ChunkAndSave()
func (*Table) CompactRecords ¶ added in v0.2.0
func (t *Table) CompactRecords()
TODO: figure out how often we actually do a collation check by storing last collation inside a file somewhere
func (*Table) DeduceTableInfoFromBlocks ¶
func (t *Table) DeduceTableInfoFromBlocks()
Alright, so... I accidentally broke my info.db file How can I go about loading the TableInfo based off the blocks? I think I go through each block and load the block, verifying the different column types
func (*Table) DigestRecords ¶
func (t *Table) DigestRecords()
Go through rowstore and save records out to column store
func (*Table) FillPartialBlock ¶
TODO: find any open blocks and then fill them...
func (*Table) FindPartialBlocks ¶
func (t *Table) FindPartialBlocks() []*TableBlock
func (*Table) GetColumnType ¶
func (*Table) GetRecordById ¶
func (*Table) GrabBlockLock ¶
func (*Table) GrabCacheLock ¶ added in v0.2.0
func (*Table) GrabDigestLock ¶
func (*Table) GrabInfoLock ¶
func (*Table) HasFlagFile ¶
func (*Table) IngestRecords ¶
Go through newRecords list and save all the new records out to a row store
func (*Table) IsNotExist ¶ added in v0.2.0
func (*Table) LoadAndQueryRecords ¶
func (*Table) LoadBlockCache ¶ added in v0.2.0
func (t *Table) LoadBlockCache()
func (*Table) LoadBlockFromDir ¶
func (t *Table) LoadBlockFromDir(dirname string, loadSpec *LoadSpec, load_records bool) *TableBlock
TODO: have this only pull the blocks into column format and not materialize the columns immediately
func (*Table) LoadBlockInfo ¶ added in v0.2.0
func (t *Table) LoadBlockInfo(dirname string) *SavedColumnInfo
func (*Table) LoadRecords ¶
func (*Table) LoadRecordsFromLog ¶
func (t *Table) LoadRecordsFromLog(filename string) RecordList
func (*Table) LoadRowStoreRecords ¶
func (t *Table) LoadRowStoreRecords(digest string, after_block_load_cb AfterRowBlockLoad)
func (*Table) LoadSavedRecordsFromLog ¶
func (t *Table) LoadSavedRecordsFromLog(filename string) []*SavedRecord
func (*Table) LoadTableInfo ¶
func (*Table) LoadTableInfoFrom ¶
func (*Table) MatchAndAggregate ¶
func (*Table) MaybeCompactRecords ¶
func (t *Table) MaybeCompactRecords()
we compact if: we have over X files we have over X megabytes of data remember, there is no reason to actually read the data off disk until we decide to compact
func (*Table) NewLoadSpec ¶
func (*Table) PrintColInfo ¶
func (t *Table) PrintColInfo()
func (*Table) PrintRecord ¶
func (*Table) PrintRecords ¶
func (t *Table) PrintRecords(records RecordList)
func (*Table) PrintSamples ¶
func (t *Table) PrintSamples()
func (*Table) ReadBlockInfoFromDir ¶
func (t *Table) ReadBlockInfoFromDir(dirname string) *SavedColumnInfo
TODO: have this only pull the blocks into column format and not materialize the columns immediately
func (*Table) ReleaseBlockLock ¶
func (*Table) ReleaseCacheLock ¶ added in v0.2.0
func (*Table) ReleaseDigestLock ¶
func (*Table) ReleaseInfoLock ¶
func (*Table) ReleaseRecords ¶
func (t *Table) ReleaseRecords()
Remove our pointer to the blocklist so a GC is triggered and a bunch of new memory becomes available
func (*Table) ResetBlockCache ¶ added in v0.2.0
func (t *Table) ResetBlockCache()
func (*Table) RestoreUningestedFiles ¶
func (t *Table) RestoreUningestedFiles()
func (*Table) SaveRecordsToBlock ¶
func (t *Table) SaveRecordsToBlock(records RecordList, filename string) bool
func (*Table) SaveRecordsToColumns ¶
func (*Table) SaveTableInfo ¶
func (*Table) ShouldCompactRowStore ¶ added in v0.2.0
func (*Table) ShouldLoadBlockFromDir ¶
optimizing for integer pre-cached info
func (*Table) TrimTable ¶
func (t *Table) TrimTable(trimSpec *TrimSpec) []*TableBlock
List all the blocks that should be trimmed to keep the table within it's memory limits
func (*Table) WriteBlockCache ¶ added in v0.2.0
func (t *Table) WriteBlockCache()
type TableBlock ¶
type TableBlock struct { Name string RecordList RecordList Info *SavedColumnInfo Size int64 Matched RecordList IntInfo IntInfoTable StrInfo StrInfoTable // contains filtered or unexported fields }
Table Block should have a bunch of metadata next to it, too
func (*TableBlock) ExportBlockData ¶ added in v0.2.0
func (b *TableBlock) ExportBlockData()
func (*TableBlock) GetColumnInfo ¶
func (tb *TableBlock) GetColumnInfo(name_id int16) *TableColumn
func (*TableBlock) RecycleSlab ¶ added in v0.2.0
func (tb *TableBlock) RecycleSlab(loadSpec *LoadSpec)
func (*TableBlock) SaveInfoToColumns ¶
func (tb *TableBlock) SaveInfoToColumns(dirname string)
func (*TableBlock) SaveIntsToColumns ¶
func (tb *TableBlock) SaveIntsToColumns(dirname string, same_ints map[int16]ValueMap)
func (*TableBlock) SaveSetsToColumns ¶
func (tb *TableBlock) SaveSetsToColumns(dirname string, same_sets map[int16]ValueMap)
func (*TableBlock) SaveStrsToColumns ¶
func (tb *TableBlock) SaveStrsToColumns(dirname string, same_strs map[int16]ValueMap)
func (*TableBlock) SaveToColumns ¶
func (tb *TableBlock) SaveToColumns(filename string) bool
func (*TableBlock) SeparateRecordsIntoColumns ¶
func (tb *TableBlock) SeparateRecordsIntoColumns() SeparatedColumns
type TableColumn ¶
Source Files ¶
- aggregate.go
- block_manager.go
- calendar.go
- cmd_flags.go
- column_store.go
- column_store_io.go
- cp.go
- disable_luajit.go
- disable_profiler.go
- fields.go
- filter.go
- hist.go
- joins.go
- printer.go
- profiler.go
- query_spec.go
- record.go
- row_store.go
- sessionize.go
- slog.go
- stats.go
- table.go
- table_block.go
- table_block_io.go
- table_column.go
- table_column_info.go
- table_decoder.go
- table_ingest.go
- table_io.go
- table_load_spec.go
- table_lock.go
- table_recover_info.go
- table_trim.go
- time_formats.go
- version.go