Documentation
¶
Index ¶
- Constants
- Variables
- func ColumnHeaderSize(numCols int) int
- func ColumnHeaderSizeNew(numCols int) int
- func CompareBool(a, b bool) int
- func CompareFloat32(a, b unsafe.Pointer) int
- func CompareInt16(a, b unsafe.Pointer) int
- func CompareInt32(a, b unsafe.Pointer) int
- func CompareInt64(a, b unsafe.Pointer) int
- func CompareInt8(a, b unsafe.Pointer) int
- func CompareUint16(a, b unsafe.Pointer) int
- func CompareUint32(a, b unsafe.Pointer) int
- func CompareUint8(a, b unsafe.Pointer) int
- func ConvertToBool(value interface{}) (bool, bool)
- func ConvertToFloat32(value interface{}) (float32, bool)
- func ConvertToFloat64(value interface{}) (float64, bool)
- func ConvertToGeoPoint(value interface{}) ([2]float32, bool)
- func ConvertToInt16(value interface{}) (int16, bool)
- func ConvertToInt32(value interface{}) (int32, bool)
- func ConvertToInt64(value interface{}) (int64, bool)
- func ConvertToInt8(value interface{}) (int8, bool)
- func ConvertToUUID(value interface{}) ([2]uint64, bool)
- func ConvertToUint16(value interface{}) (uint16, bool)
- func ConvertToUint32(value interface{}) (uint32, bool)
- func ConvertToUint64(value interface{}) (uint64, bool)
- func ConvertToUint8(value interface{}) (uint8, bool)
- func DataTypeBits(dataType DataType) int
- func DataTypeBytes(dataType DataType) int
- func GeoPointFromString(str string) (point [2]float32, err error)
- func IsGoType(dataType DataType) bool
- func IsNumeric(dataType DataType) bool
- type ArchiveVectorParty
- type CVectorParty
- type ColumnMemoryUsage
- type ColumnMode
- type ColumnUpdateMode
- type CompareFunc
- type DataType
- type DataValue
- func UpdateWithAdditionFunc(oldValue, newValue *DataValue) (*DataValue, bool, error)
- func UpdateWithMaxFunc(oldValue, newValue *DataValue) (*DataValue, bool, error)
- func UpdateWithMinFunc(oldValue, newValue *DataValue) (*DataValue, bool, error)
- func ValueFromString(str string, dataType DataType) (val DataValue, err error)
- type GeoPointGo
- type GeoShapeGo
- type GoDataValue
- type HostMemoryManager
- type HostVectorPartySlice
- type JobType
- type LiveVectorParty
- type SlicedVector
- type UpsertBatchBuilder
- func (u *UpsertBatchBuilder) AddColumn(columnID int, dataType DataType) error
- func (u *UpsertBatchBuilder) AddColumnWithUpdateMode(columnID int, dataType DataType, updateMode ColumnUpdateMode) error
- func (u *UpsertBatchBuilder) AddRow()
- func (u *UpsertBatchBuilder) RemoveRow()
- func (u *UpsertBatchBuilder) ResetRows()
- func (u *UpsertBatchBuilder) SetValue(row int, col int, value interface{}) error
- func (u UpsertBatchBuilder) ToByteArray() ([]byte, error)
- func (u UpsertBatchBuilder) ToByteArrayNew() ([]byte, error)
- type UpsertBatchHeader
- func (u UpsertBatchHeader) ReadColumnFlag(col int) (ColumnMode, ColumnUpdateMode, error)
- func (u UpsertBatchHeader) ReadColumnID(col int) (int, error)
- func (u UpsertBatchHeader) ReadColumnOffset(col int) (int, error)
- func (u UpsertBatchHeader) ReadColumnType(col int) (DataType, error)
- func (u *UpsertBatchHeader) WriteColumnFlag(columnMode ColumnMode, columnUpdateMode ColumnUpdateMode, col int) error
- func (u *UpsertBatchHeader) WriteColumnID(value int, col int) error
- func (u *UpsertBatchHeader) WriteColumnOffset(value int, col int) error
- func (u *UpsertBatchHeader) WriteColumnType(value DataType, col int) error
- type ValueCountsUpdateMode
- type VectorParty
- type VectorPartySerializer
Constants ¶
const SizeOfGeoPoint = unsafe.Sizeof(GeoPointGo{})
SizeOfGeoPoint is the size of GeoPointGo in memory
const (
UpsertBatchVersion uint32 = 0xFEED0001
)
Variables ¶
var DataTypeName = map[DataType]string{ Unknown: "Unknown", Bool: metaCom.Bool, Int8: metaCom.Int8, Uint8: metaCom.Uint8, Int16: metaCom.Int16, Uint16: metaCom.Uint16, Int32: metaCom.Int32, Uint32: metaCom.Uint32, Float32: metaCom.Float32, SmallEnum: metaCom.SmallEnum, BigEnum: metaCom.BigEnum, UUID: metaCom.UUID, GeoPoint: metaCom.GeoPoint, GeoShape: metaCom.GeoShape, Int64: metaCom.Int64, }
DataTypeName returns the literal name of the data type.
var NullDataValue = DataValue{}
NullDataValue is a global data value that stands a null value where the newly added columns haven't received any data.
var StringToDataType = map[string]DataType{ metaCom.Bool: Bool, metaCom.Int8: Int8, metaCom.Uint8: Uint8, metaCom.Int16: Int16, metaCom.Uint16: Uint16, metaCom.Int32: Int32, metaCom.Uint32: Uint32, metaCom.Float32: Float32, metaCom.SmallEnum: SmallEnum, metaCom.BigEnum: BigEnum, metaCom.UUID: UUID, metaCom.GeoPoint: GeoPoint, metaCom.GeoShape: GeoShape, metaCom.Int64: Int64, }
StringToDataType maps string representation to DataType
Functions ¶
func ColumnHeaderSize ¶
ColumnHeaderSize returns the total size of the column headers. TODO: delete after migration of new upsert batch version
func ColumnHeaderSizeNew ¶
ColumnHeaderSizeNew returns the total size of the column headers.
func CompareFloat32 ¶
CompareFloat32 compares float32 value
func CompareUint16 ¶
CompareUint16 compares uint16 value
func CompareUint32 ¶
CompareUint32 compares uint32 value
func ConvertToBool ¶
ConvertToBool convert input into bool at best effort
func ConvertToFloat32 ¶
ConvertToFloat32 convert input into float32 at best effort
func ConvertToFloat64 ¶
ConvertToFloat64 convert input into float64 at best effort
func ConvertToGeoPoint ¶
ConvertToGeoPoint convert input into uuid type ([2]float32) at best effort
func ConvertToInt16 ¶
ConvertToInt16 convert input into int16 at best effort
func ConvertToInt32 ¶
ConvertToInt32 convert input into int32 at best effort
func ConvertToInt64 ¶
ConvertToInt64 convert input into int64 at best effort
func ConvertToInt8 ¶
ConvertToInt8 convert input into int8 at best effort
func ConvertToUUID ¶
ConvertToUUID convert input into uuid type ([2]uint64) at best effort
func ConvertToUint16 ¶
ConvertToUint16 convert input into uint16 at best effort
func ConvertToUint32 ¶
ConvertToUint32 convert input into uint32 at best effort
func ConvertToUint64 ¶
ConvertToUint64 convert input into uint64 at best effort
func ConvertToUint8 ¶
ConvertToUint8 convert input into uint8 at best effort
func DataTypeBits ¶
DataTypeBits returns the number of bits of a data type.
func DataTypeBytes ¶
DataTypeBytes returns how many bytes a value of the data type occupies.
func GeoPointFromString ¶
GeoPointFromString convert string to geopoint we support wkt format, eg. Point(lng,lat) Inside gforcedb system we store lat,lng format
Types ¶
type ArchiveVectorParty ¶
type ArchiveVectorParty interface { VectorParty // Get cumulative count on specified offset GetCount(offset int) uint32 // set cumulative count on specified offset SetCount(offset int, count uint32) // Pin archive vector party for use Pin() // Release pin Release() // WaitForUsers Wait/Check whether all users finished // batch lock needs to be held before calling if blocking wait // eg. // batch.Lock() // vp.WaitForUsers(true) // batch.Unlock() WaitForUsers(blocking bool) (usersDone bool) // CopyOnWrite copies vector party on write/update CopyOnWrite(batchSize int) ArchiveVectorParty // LoadFromDisk start loading vector party from disk, // this is a non-blocking operation LoadFromDisk(hostMemManager HostMemoryManager, diskStore diskstore.DiskStore, table string, shardID int, columnID, batchID int, batchVersion uint32, seqNum uint32) // WaitForDiskLoad waits for vector party disk load to finish WaitForDiskLoad() // Prune prunes vector party based on column mode to clean memory if possible Prune() // Slice vector party using specified value within [lowerBoundRow, upperBoundRow) SliceByValue(lowerBoundRow, upperBoundRow int, value unsafe.Pointer) (startRow int, endRow int, startIndex int, endIndex int) // Slice vector party to get [startIndex, endIndex) based on [lowerBoundRow, upperBoundRow) SliceIndex(lowerBoundRow, upperBoundRow int) (startIndex, endIndex int) }
ArchiveVectorParty represents vector party in archive store
type CVectorParty ¶
type CVectorParty interface { //Judge column mode JudgeMode() ColumnMode // Get column mode GetMode() ColumnMode }
CVectorParty is vector party that is backed by c
type ColumnMemoryUsage ¶
type ColumnMemoryUsage struct { Preloaded uint `json:"preloaded"` NonPreloaded uint `json:"nonPreloaded"` Live uint `json:"live"` }
ColumnMemoryUsage contains column memory usage
type ColumnMode ¶
type ColumnMode int
ColumnMode represents how many vectors a vector party may have. For live batch, it should always be 0,1 or 2. For sorted column of archive batch, it will be mode 0 or 3. For other columns of archive batch, it can be any of these four modes.
const ( // AllValuesDefault (mode 0) AllValuesDefault ColumnMode = iota // AllValuesPresent (mode 1) AllValuesPresent // HasNullVector (mode 2) HasNullVector // HasCountVector (mode 3) HasCountVector // MaxColumnMode represents the upper limit of column modes MaxColumnMode )
type ColumnUpdateMode ¶
type ColumnUpdateMode int
ColumnUpdateMode represents how to update data from UpsertBatch
const ( // UpdateOverwriteNotNull (default) will overwrite existing value if new value is NOT null, otherwise just skip UpdateOverwriteNotNull ColumnUpdateMode = iota // UpdateForceOverwrite will simply overwrite existing value even when new data is null UpdateForceOverwrite // UpdateWithAddition will add the existing value with new value if new value is not null, existing null value will be treated as 0 in Funculation UpdateWithAddition // UpdateWithMin will save the minimum of existing and new value if new value is not null, existing null value will be treated as MAX_INT in Funculation UpdateWithMin // UpdateWithMax will save the maximum of existing and new value if new value is not null, existing null value will be treated as MIN_INT in Funculation UpdateWithMax // MaxColumnUpdateMode is the current upper limit for column update modes MaxColumnUpdateMode )
type CompareFunc ¶
CompareFunc represents compare function
func GetCompareFunc ¶
func GetCompareFunc(dataType DataType) CompareFunc
GetCompareFunc get the compare function for specific data type
type DataType ¶
type DataType uint32
DataType is the type of value supported in gforcedb.
const ( Unknown DataType = 0x00000000 Bool DataType = 0x00000001 Int8 DataType = 0x00010008 Uint8 DataType = 0x00020008 Int16 DataType = 0x00030010 Uint16 DataType = 0x00040010 Int32 DataType = 0x00050020 Uint32 DataType = 0x00060020 Float32 DataType = 0x00070020 SmallEnum DataType = 0x00080008 BigEnum DataType = 0x00090010 UUID DataType = 0x000a0080 GeoPoint DataType = 0x000b0040 GeoShape DataType = 0x000c0000 Int64 DataType = 0x000d0040 )
The list of supported DataTypes. DataType & 0x0000FFFF: The width of the data type in bits. DataType & 0x00FF0000 >> 16: The base type of the enum. DataType & 0xFF000000 >> 24: Reserved for supporting variable length values (array). See https://github.com/uber/aresdb/wiki/redologs for more details.
func DataTypeFromString ¶
DataTypeFromString convert string representation of data type into DataType
func NewDataType ¶
NewDataType converts an uint32 value into a DataType. It returns error if the the data type is invalid.
type DataValue ¶
type DataValue struct { // Used for golang vector party GoVal GoDataValue OtherVal unsafe.Pointer DataType DataType CmpFunc CompareFunc Valid bool IsBool bool BoolVal bool }
DataValue is the wrapper to encapsulate validity, bool value and other value type into a single struct to make it easier for value comparison.
func UpdateWithAdditionFunc ¶
UpdateWithAdditionFunc will return the addition of old value and new value
func UpdateWithMaxFunc ¶
UpdateWithMaxFunc will return the maximum of old and new value
func UpdateWithMinFunc ¶
UpdateWithMinFunc will return the minimum of old and new value
func ValueFromString ¶
ValueFromString converts raw string value to actual value given input data type.
func (DataValue) ConvertToHumanReadable ¶
ConvertToHumanReadable convert DataValue to meaningful golang data types
type GeoShapeGo ¶
type GeoShapeGo struct {
Polygons [][]GeoPointGo
}
GeoShapeGo represents GeoShape Golang Type
func ConvertToGeoShape ¶
func ConvertToGeoShape(value interface{}) (*GeoShapeGo, bool)
ConvertToGeoShape converts the arbitrary value to GeoShapeGo
func GeoShapeFromString ¶
func GeoShapeFromString(str string) (GeoShapeGo, error)
GeoShapeFromString convert string to geoshape Supported format POLYGON ((lng lat, lng lat, lng lat, ...), (...))
func (*GeoShapeGo) GetBytes ¶
func (gs *GeoShapeGo) GetBytes() int
GetBytes implements GoDataValue interface
func (*GeoShapeGo) GetSerBytes ¶
func (gs *GeoShapeGo) GetSerBytes() int
GetSerBytes implements GoDataValue interface
func (*GeoShapeGo) Read ¶
func (gs *GeoShapeGo) Read(dataReader *utils.StreamDataReader) error
Read implements Read interface for GoDataValue
func (*GeoShapeGo) Write ¶
func (gs *GeoShapeGo) Write(dataWriter *utils.StreamDataWriter) error
Write implements Read interface for GoDataValue
type GoDataValue ¶
type GoDataValue interface { // GetBytes returns number of bytes copied in golang memory for this value GetBytes() int // GetSerBytes return the number of bytes required for serialize this value GetSerBytes() int Write(writer *utils.StreamDataWriter) error Read(reader *utils.StreamDataReader) error }
GoDataValue represents a value backed in golang memory
func GetGoDataValue ¶
func GetGoDataValue(dataType DataType) GoDataValue
GetGoDataValue return GoDataValue
type HostMemoryManager ¶
type HostMemoryManager interface { ReportUnmanagedSpaceUsageChange(bytes int64) ReportManagedObject(table string, shard, batchID, columnID int, bytes int64) GetArchiveMemoryUsageByTableShard() (map[string]map[string]*ColumnMemoryUsage, error) TriggerEviction() TriggerPreload(tableName string, columnID int, oldPreloadingDays int, newPreloadingDays int) Start() Stop() }
HostMemoryManager manages archive batch storage in host memory. Specifically, it keeps track of memory usage of archive batches and makes preloading and eviction decisions based on retention config.
The space available to archive batches is defined as maxMem - unmanagedMem, where unmanagedMem accounts for C allocated buffers in live batches and primary keys, which changes over time. Eviction of archive batches is configured at column level using two configs: preloadingDays and priorities.
Data eviction policy is defined as such: Always evict data not in preloading zone first; Preloading data won’t be evicted until all the non-preloading data are evicted and server is still in short of memory. For data within the same zone, eviction will happen based on column priority For data with same priority, eviction will happen based on data time, older data will be evicted first, for same old data, larger size columns will be evicted first;
HostMemoryManger will also maintain two go routines. One for preloading data and another for eviction. Calling start to start those goroutines and call stop to stop them. Stop is a blocking call.
Both TriggerPreload and TriggerEviction are asynchronous calls.
type HostVectorPartySlice ¶
type HostVectorPartySlice struct { Values unsafe.Pointer Nulls unsafe.Pointer // The length of the count vector is Length+1 Counts unsafe.Pointer Length int ValueType DataType DefaultValue DataValue ValueStartIndex int NullStartIndex int CountStartIndex int ValueBytes int NullBytes int CountBytes int }
HostVectorPartySlice stores pointers to data for a column in host memory. And its start index and Bytes
type JobType ¶
type JobType string
JobType now we only have archiving job type.
const ( // ArchivingJobType is the archiving job type. ArchivingJobType JobType = "archiving" // BackfillJobType is the backfill job type. BackfillJobType JobType = "backfill" // SnapshotJobType is the snapshot job type. SnapshotJobType JobType = "snapshot" // PurgeJobType is the purge job type. PurgeJobType JobType = "purge" )
type LiveVectorParty ¶
type LiveVectorParty interface { VectorParty // GetMinMaxValue get min and max value, // returns uint32 value since only valid for time column GetMinMaxValue() (min, max uint32) }
LiveVectorParty represents vector party in live store
type SlicedVector ¶
type SlicedVector struct { Values []interface{} `json:"values"` Counts []int `json:"counts"` }
SlicedVector is vector party data represented into human-readable slice format consists of a value slice and count slice, count slice consists of accumulative counts. swagger:model slicedVector
type UpsertBatchBuilder ¶
type UpsertBatchBuilder struct { NumRows int // contains filtered or unexported fields }
UpsertBatchBuilder is the builder for constructing an UpsertBatch buffer. It allows random value write at (row, col).
func NewUpsertBatchBuilder ¶
func NewUpsertBatchBuilder() *UpsertBatchBuilder
NewUpsertBatchBuilder creates a new builder for constructing an UpersetBatch.
func (*UpsertBatchBuilder) AddColumn ¶
func (u *UpsertBatchBuilder) AddColumn(columnID int, dataType DataType) error
AddColumn add a new column to the builder. Initially, new columns have all values set to null.
func (*UpsertBatchBuilder) AddColumnWithUpdateMode ¶
func (u *UpsertBatchBuilder) AddColumnWithUpdateMode(columnID int, dataType DataType, updateMode ColumnUpdateMode) error
AddColumnWithUpdateMode add a new column to the builder with update mode info. Initially, new columns have all values set to null.
func (*UpsertBatchBuilder) AddRow ¶
func (u *UpsertBatchBuilder) AddRow()
AddRow increases the number of rows in the batch by 1. A new row with all nil values is appended to the row array.
func (*UpsertBatchBuilder) RemoveRow ¶
func (u *UpsertBatchBuilder) RemoveRow()
RemoveRow decreases the number of rows in the batch by 1. The last row will be removed. It's a no-op if the number of rows is 0.
func (*UpsertBatchBuilder) ResetRows ¶
func (u *UpsertBatchBuilder) ResetRows()
ResetRows reset the row count to 0.
func (*UpsertBatchBuilder) SetValue ¶
func (u *UpsertBatchBuilder) SetValue(row int, col int, value interface{}) error
SetValue set a value to a given (row, col).
func (UpsertBatchBuilder) ToByteArray ¶
func (u UpsertBatchBuilder) ToByteArray() ([]byte, error)
ToByteArray produces a serialized UpsertBatch in byte array.
func (UpsertBatchBuilder) ToByteArrayNew ¶
func (u UpsertBatchBuilder) ToByteArrayNew() ([]byte, error)
ToByteArrayNew will be used for new upsert batch version TODO: exposed only for test purpose before migration of new version is done
type UpsertBatchHeader ¶
type UpsertBatchHeader struct {
// contains filtered or unexported fields
}
UpsertBatchHeader is a helper class used by upsert batch reader and writer to access the column header info.
func NewUpsertBatchHeader ¶
func NewUpsertBatchHeader(buffer []byte, numCols int) UpsertBatchHeader
NewUpsertBatchHeader create upsert batch header from buffer TODO: delete after migration of new upsert batch version
func NewUpsertBatchHeaderNew ¶
func NewUpsertBatchHeaderNew(buffer []byte, numCols int) UpsertBatchHeader
NewUpsertBatchHeaderNew create upsert batch header from buffer
func (UpsertBatchHeader) ReadColumnFlag ¶
func (u UpsertBatchHeader) ReadColumnFlag(col int) (ColumnMode, ColumnUpdateMode, error)
ReadColumnFlag returns the mode for a column.
func (UpsertBatchHeader) ReadColumnID ¶
func (u UpsertBatchHeader) ReadColumnID(col int) (int, error)
ReadColumnID returns the logical ID for a column.
func (UpsertBatchHeader) ReadColumnOffset ¶
func (u UpsertBatchHeader) ReadColumnOffset(col int) (int, error)
ReadColumnOffset takes col index from 0 to numCols + 1 and returns the value stored.
func (UpsertBatchHeader) ReadColumnType ¶
func (u UpsertBatchHeader) ReadColumnType(col int) (DataType, error)
ReadColumnType returns the type for a column.
func (*UpsertBatchHeader) WriteColumnFlag ¶
func (u *UpsertBatchHeader) WriteColumnFlag(columnMode ColumnMode, columnUpdateMode ColumnUpdateMode, col int) error
WriteColumnFlag writes the mode of a column.
func (*UpsertBatchHeader) WriteColumnID ¶
func (u *UpsertBatchHeader) WriteColumnID(value int, col int) error
WriteColumnID writes the id of a column.
func (*UpsertBatchHeader) WriteColumnOffset ¶
func (u *UpsertBatchHeader) WriteColumnOffset(value int, col int) error
WriteColumnOffset writes the offset of a column. It can take col index from 0 to numCols + 1.
func (*UpsertBatchHeader) WriteColumnType ¶
func (u *UpsertBatchHeader) WriteColumnType(value DataType, col int) error
WriteColumnType writes the type of a column.
type ValueCountsUpdateMode ¶
type ValueCountsUpdateMode int
ValueCountsUpdateMode represents the way we update value counts when we are writing values to vector parties.
type VectorParty ¶
type VectorParty interface { // Allocate allocate underlying storage for vector party Allocate(hasCount bool) // GetValidity get validity of given offset. GetValidity(offset int) bool // GetDataValue returns the DataValue for the specified index. // It first check validity of the value, then it check whether it's a // boolean column to decide whether to load bool value or other value // type. Index bound is not checked! GetDataValue(offset int) DataValue // SetDataValue writes a data value at given offset. Third parameter count should // only be passed for compressed columns. checkValueCount is a flag to tell whether // need to check value count (NonDefaultValueCount and ValidValueCount) while setting // the value. It should be true for archive store and false for live store. **This does // not set the count vector as this is not accumulated count.** SetDataValue(offset int, value DataValue, countsUpdateMode ValueCountsUpdateMode, counts ...uint32) // GetDataValueByRow returns the DataValue for the specified row. It will do binary // search on the count vector to find the correct offset if this is a mode 3 vector // party. Otherwise it will behave same as GetDataValue. Index bound is not checked! GetDataValueByRow(row int) DataValue GetDataType() DataType GetLength() int GetBytes() int64 // Slice vector party into human readable SlicedVector format Slice(startRow, numRows int) SlicedVector // SafeDestruct destructs vector party memory SafeDestruct() // Write serialize vector party Write(writer io.Writer) error // Read deserialize vector party Read(reader io.Reader, serializer VectorPartySerializer) error // Check whether two vector parties are equal (used only in unit tests) Equals(other VectorParty) bool // GetNonDefaultValueCount get Number of non-default values stored GetNonDefaultValueCount() int }
VectorParty interface
type VectorPartySerializer ¶
type VectorPartySerializer interface { // ReadVectorParty reads vector party from disk and set fields in passed-in vp. ReadVectorParty(vp VectorParty) error // WriteSnapshotVectorParty writes vector party to disk WriteVectorParty(vp VectorParty) error // CheckVectorPartySerializable check if the VectorParty is serializable CheckVectorPartySerializable(vp VectorParty) error // ReportVectorPartyMemoryUsage report memory usage according to underneath VectorParty property ReportVectorPartyMemoryUsage(bytes int64) }
VectorPartySerializer is the interface to read/write a vector party from/to disk. Refer to https://github.com/uber/aresdb/wiki/VectorStore for more details about vector party's on disk format.