Documentation ¶
Index ¶
- Constants
- Variables
- func BuildVectorsFromHLLResult(result AQLQueryResult, dimDataTypes []memCom.DataType, ...) (hllVector, dimVector, countVector []byte, err error)
- func CalculateEnumCasesBytes(enumCases []string) uint32
- func CreateTimeFilterExpr(expression expr.Expr, from, to *AlignedTime) (fromExpr, toExpr expr.Expr)
- func DimValResVectorSize(resultSize int, numDimsPerDimWidth DimCountsPerDimWidth) int
- func GetCurrentCalendarUnit(base time.Time, unit string) (start, end time.Time, err error)
- func GetDimensionDataBytes(expression expr.Expr) int
- func GetDimensionDataType(expression expr.Expr) memCom.DataType
- func GetDimensionStartOffsets(numDimsPerDimWidth DimCountsPerDimWidth, dimIndex int, length int) (valueOffset, nullOffset int)
- func ParseTimezone(timezone string) (*time.Location, error)
- func ReadDimension(valueStart, nullStart unsafe.Pointer, index int, dataType memCom.DataType, ...) *string
- type AQLQuery
- type AQLQueryResult
- type AQLRequest
- type AQLResponse
- type AlignedTime
- type DimCountsPerDimWidth
- type Dimension
- type HLL
- type HLLData
- type HLLDataWriter
- type HLLQueryResults
- type HLLRegister
- type Join
- type Measure
- type NumericBucketizerDef
- type SortField
- type TimeDimensionMeta
- type TimeFilter
- type TimeSeriesBucketizer
Constants ¶
const ( MatrixDataKey = "matrixData" HeadersKey = "headers" )
const ( // OldHLLDataHeader is the old magic header for migration OldHLLDataHeader uint32 = 0xACED0101 // HLLDataHeader is the magic header written into serialized format of hyperloglog query result. HLLDataHeader uint32 = 0xACED0102 // EnumDelimiter is the delimiter to delimit enum cases. EnumDelimiter = "\u0000\n" // DenseDataLength is the length of hll dense data in bytes. DenseDataLength = 1 << 14 // 16kb // DenseThreshold is the thresold to convert sparse value to dense value. DenseThreshold = DenseDataLength / 4 )
const ( // SecondsPerMinute is number of seconds per minute SecondsPerMinute = 60 // SecondsPerHour is number of seconds per hour SecondsPerHour = SecondsPerMinute * 60 // SecondsPerDay is number of secods per day SecondsPerDay = SecondsPerHour * 24 // SecondsPer4Day is number of seconds per 4 days SecondsPer4Day = SecondsPerDay * 4 // DaysPerWeek is number of days per week DaysPerWeek = 7 // WeekdayOffset is to compensate 1970-01-01 being a Thursday WeekdayOffset = 4 // SecondsPerWeek is number of seconds per week SecondsPerWeek = SecondsPerDay * DaysPerWeek )
const (
// string representing null dimension values
NULLString = "NULL"
)
Variables ¶
var BucketSizeToseconds = map[string]int{ "m": SecondsPerMinute, "h": SecondsPerHour, "d": SecondsPerDay, }
BucketSizeToseconds is the map from normalized bucket unit to number of seconds
var DataTypeToExprType = map[memCom.DataType]expr.Type{ memCom.Bool: expr.Boolean, memCom.Int8: expr.Signed, memCom.Int16: expr.Signed, memCom.Int32: expr.Signed, memCom.Int64: expr.Signed, memCom.Uint8: expr.Unsigned, memCom.Uint16: expr.Unsigned, memCom.Uint32: expr.Unsigned, memCom.Float32: expr.Float, memCom.SmallEnum: expr.Unsigned, memCom.BigEnum: expr.Unsigned, memCom.GeoPoint: expr.GeoPoint, memCom.GeoShape: expr.GeoShape, }
DataTypeToExprType maps data type from the column schema format to expression AST format.
Functions ¶
func BuildVectorsFromHLLResult ¶ added in v0.0.2
func BuildVectorsFromHLLResult(result AQLQueryResult, dimDataTypes []memCom.DataType, enumDicts map[int]map[string]int, dimensionVectorIndex []int) (hllVector, dimVector, countVector []byte, err error)
BuildVectorsFromHLLResult traverses input HLL query result and builds byte slices result must have HLL in the leave nodes this function is useful when converting HLL query result to it's binary format dimDataTypes stores types of each dimension, in the the same order as in query dimensionVectorIndex stores re-ordered dimension index, sorted by dim datatype width
func CalculateEnumCasesBytes ¶
CalculateEnumCasesBytes calculates how many bytes the enum case values will occupy including 8 bytes alignment.
func CreateTimeFilterExpr ¶ added in v0.0.2
func CreateTimeFilterExpr(expression expr.Expr, from, to *AlignedTime) (fromExpr, toExpr expr.Expr)
CreateTimeFilterExpr creates time filter expr
func DimValResVectorSize ¶ added in v0.0.2
func DimValResVectorSize(resultSize int, numDimsPerDimWidth DimCountsPerDimWidth) int
DimValueVectorSize returns the size of final dim value vector on host side.
func GetCurrentCalendarUnit ¶ added in v0.0.2
GetCurrentCalendarUnit returns the start and end of the calendar unit for base.
func GetDimensionDataBytes ¶ added in v0.0.2
GetDimensionDataBytes gets num bytes for given expr
func GetDimensionDataType ¶ added in v0.0.2
GetDimensionDataType gets DataType for given expr
func GetDimensionStartOffsets ¶
func GetDimensionStartOffsets(numDimsPerDimWidth DimCountsPerDimWidth, dimIndex int, length int) (valueOffset, nullOffset int)
GetDimensionStartOffsets calculates the value and null starting position for given dimension inside dimension vector dimIndex is the ordered index of given dimension inside the dimension vector
func ParseTimezone ¶ added in v0.0.2
ParseTimezone parses timezone
func ReadDimension ¶
func ReadDimension(valueStart, nullStart unsafe.Pointer, index int, dataType memCom.DataType, enumReverseDict []string, meta *TimeDimensionMeta, cache map[TimeDimensionMeta]map[int64]string) *string
ReadDimension reads a dimension value given the index and corresponding data type of node. tzRemedy is used to remedy the timezone offset
Types ¶
type AQLQuery ¶ added in v0.0.2
type AQLQuery struct { // Name of the main table. Table string `json:"table"` // Shards of the query // If empty then all shards of the table // owned by they host will be queried Shards []int `json:"shards"` // Foreign tables to be joined. Joins []Join `json:"joins,omitempty"` // Dimensions to group by on. Dimensions []Dimension `json:"dimensions,omitempty"` // Measures/metrics to report. Measures []Measure `json:"measures"` // Row level filters to apply for all measures. The filters are ANDed together. Filters []string `json:"rowFilters,omitempty"` FiltersParsed []expr.Expr `json:"-"` // Syntax sugar for specifying a time based range filter. TimeFilter TimeFilter `json:"timeFilter,omitempty"` // Additional supporting dimensions, these dimensions will not be grouped by, // but they may be referenced in Dimensions, Measures, SupportingDimensions and SupportingMeasures. SupportingDimensions []Dimension `json:"supportingDimensions,omitempty"` // Additional supporting measures, these measures will not be reported, // but they may be referenced in Measures and SupportingMeasures. SupportingMeasures []Measure `json:"supportingMeasures,omitempty"` // Timezone to use when converting timestamp to calendar time, specified as: // - -8:00 // - GMT // - America/Los_Angeles // - timezone(city_id) // - region_timezone(city_id) // - mega_region_timezone(city_id) // - sub_region_timezone(city_id) // - country_timezone(city_id) Timezone string `json:"timezone,omitempty"` // This overrides "now" (in seconds) Now int64 `json:"now,omitempty"` // Limit is the max number of rows need to be return, and only used for non-aggregation Limit int `json:"limit,omitempty"` Sorts []SortField `json:"sorts,omitempty" yaml:"sorts"` // SQLQuery SQLQuery string `json:"sql,omitempty"` }
AQLQuery specifies the query on top of tables.
type AQLQueryResult ¶ added in v0.0.2
type AQLQueryResult map[string]interface{}
AQLQueryResult represents final result of one AQL query
It has 2 possible formats: Time series result format: One dimension on each layer:
- there is always an outermost time dimension. it stores the start time of the bucket/duration (in seconds since Epoch).
- after the time dimension, there could be zero or more layers of additional dimensions (all values are represented as strings). a special "NULL" string
/ is used to represent NULL values.
- there is always a single measure, and the measure type is either float64 or nil (not *float64);
Non aggregate query result format:
- there will be a "headers" key, value will be a list of column names
- there will be a "matrixData" key, value will be a 2d arary of values (row formated)
user should use it as only 1 of the 2 formats consistently
func ComputeHLLResult ¶ added in v0.0.2
func ComputeHLLResult(result AQLQueryResult) AQLQueryResult
ComputeHLLResult computes hll result
func NewTimeSeriesHLLResult ¶
func NewTimeSeriesHLLResult(buffer []byte, magicHeader uint32, ignoreEnum bool) (AQLQueryResult, error)
NewTimeSeriesHLLResult creates a new NewTimeSeriesHLLResult and deserialize the buffer into the result.
func ParseHLLQueryResults ¶
func ParseHLLQueryResults(data []byte, ignoreEnum bool) (queryResults []AQLQueryResult, queryErrors []error, err error)
ParseHLLQueryResults will parse the response body into a slice of query results and a slice of errors.
func (AQLQueryResult) Append ¶ added in v0.0.2
func (r AQLQueryResult) Append(dimValues []*string)
func (AQLQueryResult) Set ¶ added in v0.0.2
func (r AQLQueryResult) Set(dimValues []*string, measureValue *float64)
Set measure value for dimensions
func (AQLQueryResult) SetHLL ¶ added in v0.0.2
func (r AQLQueryResult) SetHLL(dimValues []*string, hll HLL)
SetHLL sets hll struct to be the leaves of the nested map.
func (AQLQueryResult) SetHeaders ¶ added in v0.0.2
func (r AQLQueryResult) SetHeaders(headers []string)
SetHeaders sets headers field for the results
type AQLRequest ¶ added in v0.0.2
type AQLRequest struct {
Queries []AQLQuery `json:"queries"`
}
AQLRequest contains multiple of AQLQueries.
type AQLResponse ¶ added in v0.0.2
type AQLResponse struct { Results []AQLQueryResult `json:"results"` Errors []error `json:"errors,omitempty"` QueryContext []string `json:"context,omitempty"` }
AQLResponse contains results for multiple AQLQueries.
type AlignedTime ¶ added in v0.0.2
type AlignedTime struct { Time time.Time `json:"time"` // Values for unit: y, q, M, w, d, {12, 8, 6, 4, 3, 2}h, h, {30, 20, 15, 12, 10, 6, 5, 4, 3, 2}m, m Unit string `json:"unit"` }
AlignedTime is time that is calendar aligned to the unit.
func ParseTimeFilter ¶ added in v0.0.2
func ParseTimeFilter(filter TimeFilter, loc *time.Location, now time.Time) (from, to *AlignedTime, err error)
ParseTimeFilter parses time filter
type DimCountsPerDimWidth ¶
type DimCountsPerDimWidth [5]uint8
DimCountsPerDimWidth defines dimension counts per dimension width 16-byte 8-byte 4-byte 2-byte 1-byte
type Dimension ¶ added in v0.0.2
type Dimension struct { // Alias/name of the dimension, to be referenced by other dimensions and measures. Alias string `json:"alias,omitempty"` // The SQL expression for computing the dimension. // Expr can be empty when TimeBucketizer is specified, which implies the // designated time column from the main table is used as the expresssion. Expr string `json:"sqlExpression"` ExprParsed expr.Expr `json:"-"` // Decides how to bucketize a timestamp Dimension before grouping by. // See https://github.com/uber/aresdb/wiki/aql#time_bucketizer TimeBucketizer string `json:"timeBucketizer,omitempty"` TimeUnit string `json:"timeUnit,omitempty"` // Bucketizes numeric dimensions for integers and floating point numbers. NumericBucketizer NumericBucketizerDef `json:"numericBucketizer,omitempty"` }
Dimension specifies a row level dimension for grouping by.
func (Dimension) IsTimeDimension ¶ added in v0.0.2
type HLL ¶
type HLL struct { SparseData []HLLRegister // Unsorted registers. DenseData []byte // Rho by register index. NonZeroRegisters uint16 }
HLL stores only the dense data for now.
func (*HLL) ConvertToDense ¶
func (hll *HLL) ConvertToDense()
ConvertToDense converts the HLL to dense format.
func (*HLL) ConvertToSparse ¶
ConvertToSparse try converting the hll to sparse format if it turns out to be cheaper.
func (*HLL) Decode ¶
Decode decodes the HLL from cache cache. Interprets as dense or sparse format based on len(data).
func (*HLL) Encode ¶
Encode encodes the HLL for cache storage. Dense format will have a length of 1<<hllP. Sparse format will have a smaller length
func (*HLL) EncodeBinary ¶ added in v0.0.2
EncodeBinary converts HLL to binary format aligns to 4 bytes for sparse hll used to build response for application/hll queries from HLL struct
type HLLData ¶
type HLLData struct { NumDimsPerDimWidth DimCountsPerDimWidth ResultSize uint32 PaddedRawDimValuesVectorLength uint32 PaddedHLLVectorLength int64 DimIndexes []int DataTypes []memCom.DataType // map from dimension index => enum cases. It will // only include columns used in dimensions. EnumDicts map[int][]string }
HLLData stores fields for serialize and deserialize an hyperloglog query result when client sets Content-Accept header to be application/hll. The serialized buffer of a hll data is in following format:
[uint32] magic_number [uint32] padding -----------query result 0------------------- <header> [uint32] query result 0 size [uint8] error or result [3 bytes padding] [uint8] num_enum_columns [uint8] bytes per dim ... [padding for 8 bytes] [uint32] result_size [uint32] raw_dim_values_vector_length [uint8] dim_index_0... [uint8] dim_index_n [padding for 8 bytes] [uint32] data_type_0...[uint32] data_type_n [padding for 8 bytes] <enum cases 0> [uint32_t] number of bytes of enum cases [uint16] dim_index [2 bytes: padding] <enum values 0> delimited by "\u0000\n" [padding for 8 bytes] <end of header> <raw dim values vector> ... [padding for 8 byte alignment] <raw hll dense vector> ... ------------error 1---------- [uint32] query result 1 size [uint8] error or result [3 bytes padding] ...
func (*HLLData) CalculateSizes ¶
CalculateSizes returns the header size and total size of used by this hll data.
type HLLDataWriter ¶ added in v0.0.2
HLLDataWriter is the struct to serialize HLL Data struct.
func (*HLLDataWriter) SerializeHeader ¶ added in v0.0.2
func (builder *HLLDataWriter) SerializeHeader() error
SerializeHeader serialize HLL header
-----------query result 0------------------- <header> [uint8] num_enum_columns [uint8] bytes per dim ... [padding for 8 bytes] [uint32] result_size [uint32] raw_dim_values_vector_length [uint8] dim_index_0... [uint8] dim_index_n [padding for 8 bytes] [uint32] data_type_0...[uint32] data_type_n [padding for 8 bytes] <enum cases 0> [uint32_t] number of bytes of enum cases [uint16] column_index [2 bytes: padding] <enum values 0> delimited by "\u0000\n" [padding for 8 bytes] <end of header>
type HLLQueryResults ¶ added in v0.0.2
type HLLQueryResults struct {
// contains filtered or unexported fields
}
HLLQueryResults holds the buffer to store multiple hll query results or errors.
func NewHLLQueryResults ¶ added in v0.0.2
func NewHLLQueryResults() *HLLQueryResults
NewHLLQueryResults returns a new NewHLLQueryResults and writes the magical header and padding to underlying buffer.
func (*HLLQueryResults) GetBytes ¶ added in v0.0.2
func (r *HLLQueryResults) GetBytes() []byte
GetBytes returns the underlying bytes.
func (*HLLQueryResults) WriteError ¶ added in v0.0.2
func (r *HLLQueryResults) WriteError(err error)
WriteError write error to the buffer.
func (*HLLQueryResults) WriteResult ¶ added in v0.0.2
func (r *HLLQueryResults) WriteResult(result []byte)
WriteResult write result to the buffer.
type HLLRegister ¶
HLLRegister is the register used in the sparse representation.
type Join ¶ added in v0.0.2
type Join struct { // Name of the table to join against. Table string `json:"table"` // Alias for the table. Empty means the table name will be used as alias. Alias string `json:"alias"` // Condition expressions to be ANDed together for the join. Conditions []string `json:"conditions"` ConditionsParsed []expr.Expr `json:"-"` }
Join specifies a secondary table to be explicitly joined in the query.
type Measure ¶ added in v0.0.2
type Measure struct { // Alias/name of the measure, to be referenced by other (derived) measures. Alias string `json:"alias,omitempty"` // The SQL expression for computing the measure. Expr string `json:"sqlExpression"` ExprParsed expr.Expr `json:"-"` // Row level filters to apply for this measure. // The filters are ANDed togther. Filters []string `json:"rowFilters,omitempty"` FiltersParsed []expr.Expr `json:"-"` }
Measure specifies a group level aggregation measure.
type NumericBucketizerDef ¶ added in v0.0.2
type NumericBucketizerDef struct { // Generates equal-width buckets. BucketWidth should be positive. // The generated buckets are: // ... [-2w, -w), [-w, 0), [0, w), [w, 2w) ... BucketWidth float64 `json:"bucketWidth,omitempty"` // Generates exponential/log buckets. LogBase should be positive. // The generated buckets are: // ... [pow(b, -2), pow(b, -1)), [pow(b, -1), 1), [1, pow(b, 1)), [pow(b, 1), pow(b, 2)) ... LogBase float64 `json:"logBase,omitempty"` // Generates a fixed number of buckets using the specified partitions. // The numbers should be in sorted order. The generated buckets are: // [-inf, p0), [p0, p1), [p1, p2), ... [pn-1, inf) ManualPartitions []float64 `json:"manualPartitions,omitempty"` }
NumericBucketizerDef defines how numbers should be bucketized before being grouped by as a dimension. The returned dimension is a string in the format of `lower_bound`, representing `[lower_bound, uper_bound)`.
type SortField ¶ added in v0.0.2
type SortField struct { // Name or alias of the field Name string `json:"name"` // Order the column, will be asc or desc Order string `json:"order"` }
SortField represents a field to sort results by.
type TimeDimensionMeta ¶
type TimeDimensionMeta struct { TimeBucketizer string TimeUnit string IsTimezoneTable bool TimeZone *time.Location DSTSwitchTs int64 FromOffset int ToOffset int }
TimeDimensionMeta is the aggregation of meta data needed to format time dimensions
type TimeFilter ¶ added in v0.0.2
type TimeFilter struct { // A table time column in the format of column, or table_alias.column. // When empty, it defaults to the designated time column of the main table. Column string `json:"column"` // The time specified in from and to are both inclusive. // See https://github.com/uber/aresdb/wiki/aql#time_filter From string `json:"from"` To string `json:"to"` }
TimeFilter is a syntax sugar for specifying time range.
type TimeSeriesBucketizer ¶
TimeSeriesBucketizer is the helper struct to express parsed time bucketizer, see comment below
func ParseRegularTimeBucketizer ¶
func ParseRegularTimeBucketizer(timeBucketizerString string) (TimeSeriesBucketizer, error)
ParseRegularTimeBucketizer tries to convert a regular time bucketizer(anything below month) input string to a (Size, Unit) pair, reports error if input is invalid/unsupported. e.g. "3m" -> (3, "m") "4 hours" -> (4, "h")