Documentation ¶
Overview ¶
Package testutils contains utilities for generating random data and other helpers that are used for testing the various aspects of the parquet library.
Index ¶
- func FillRandomBooleans(p float64, seed uint64, out []bool)
- func FillRandomByteArray(seed uint64, out []parquet.ByteArray, heap *memory.Buffer)
- func FillRandomFixedByteArray(seed uint64, out []parquet.FixedLenByteArray, heap *memory.Buffer, size int)
- func FillRandomFloat16(seed uint64, out []float16.Num)
- func FillRandomFloat32(seed uint64, out []float32)
- func FillRandomFloat64(seed uint64, out []float64)
- func FillRandomInt16(seed uint64, min, max int16, out []int16)
- func FillRandomInt32(seed uint64, out []int32)
- func FillRandomInt32Max(seed uint64, max int32, out []int32)
- func FillRandomInt64(seed uint64, out []int64)
- func FillRandomInt64Max(seed uint64, max int64, out []int64)
- func FillRandomInt8(seed uint64, min, max int8, out []int8)
- func FillRandomInt96(seed uint64, out []parquet.Int96)
- func FillRandomUint16(seed uint64, min, max uint16, out []uint16)
- func FillRandomUint32(seed uint64, out []uint32)
- func FillRandomUint32Max(seed uint64, max uint32, out []uint32)
- func FillRandomUint64(seed uint64, out []uint64)
- func FillRandomUint64Max(seed uint64, max uint64, out []uint64)
- func FillRandomUint8(seed uint64, min, max uint8, out []uint8)
- func InitValues(values interface{}, heap *memory.Buffer)
- func MakeDataPage(dataPageVersion parquet.DataPageVersion, d *schema.Column, values interface{}, ...) file.Page
- func MakeDictPage(d *schema.Column, values interface{}, valuesPerPage []int, e parquet.Encoding) (*file.DictionaryPage, []encoding.Buffer)
- func Max(v1, v2 interface{}) interface{}
- func Min(v1, v2 interface{}) interface{}
- func PaginateDict(version parquet.DataPageVersion, d *schema.Column, values reflect.Value, ...) []file.Page
- func PaginatePlain(version parquet.DataPageVersion, d *schema.Column, values reflect.Value, ...) []file.Page
- func RandomByteArray(seed uint64, out []parquet.ByteArray, heap *memory.Buffer, minlen, maxlen int)
- func RandomDecimals(n int64, seed uint64, precision int32) []byte
- func RandomNonNull(mem memory.Allocator, dt arrow.DataType, size int) arrow.Array
- func RandomNullable(dt arrow.DataType, size int, numNulls int) arrow.Array
- func TypeToParquetType(typ reflect.Type) parquet.Type
- type DataPageBuilder
- type DictionaryPageBuilder
- type MockPageReader
- type PrimitiveTypedTest
- func (p *PrimitiveTypedTest) GenerateData(nvalues int64)
- func (p *PrimitiveTypedTest) GetMinMax(stat metadata.TypedStatistics) (min, max interface{})
- func (p *PrimitiveTypedTest) ReadBatch(reader file.ColumnChunkReader, batch, valuesRead int64, ...) int64
- func (p *PrimitiveTypedTest) SetupSchema(rep parquet.Repetition, ncols int)
- func (p *PrimitiveTypedTest) SetupValuesOut(nvalues int64)
- func (p *PrimitiveTypedTest) UpdateStats(stat metadata.TypedStatistics, numNull int64)
- func (p *PrimitiveTypedTest) UpdateStatsSpaced(stat metadata.TypedStatistics, numNull int64, validBits []byte, ...)
- func (p *PrimitiveTypedTest) WriteBatchSubset(batch, offset int, writer file.ColumnChunkWriter, defLevels, repLevels []int16) (int64, error)
- func (p *PrimitiveTypedTest) WriteBatchValues(writer file.ColumnChunkWriter, defLevels, repLevels []int16) (int64, error)
- func (p *PrimitiveTypedTest) WriteBatchValuesSpaced(writer file.ColumnChunkWriter, defLevels, repLevels []int16, validBits []byte, ...)
- type RandomArrayGenerator
- func (r *RandomArrayGenerator) ByteArray(size int64, minLen, maxLen int32, nullProb float64) arrow.Array
- func (r *RandomArrayGenerator) Float64(size int64, pctNull float64) *array.Float64
- func (r *RandomArrayGenerator) GenerateBitmap(buffer []byte, n int64, prob float64) int64
- func (r *RandomArrayGenerator) Int32(size int64, min, max int32, pctNull float64) *array.Int32
- func (r *RandomArrayGenerator) Int64(size int64, min, max int64, pctNull float64) *array.Int64
- func (r *RandomArrayGenerator) StringWithRepeats(mem memory.Allocator, sz, unique int64, minLen, maxLen int32, nullProb float64) *array.String
- func (r *RandomArrayGenerator) Uint8(size int64, min, max uint8, prob float64) arrow.Array
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func FillRandomBooleans ¶
FillRandomBooleans populates out with random bools with the probability p of being false using seed as the random seed to the generator in order to allow consistency for testing. This uses a Bernoulli distribution of values.
func FillRandomByteArray ¶
FillRandomByteArray populates out with random ByteArray values with lengths between 2 and 12 using heap as the actual memory storage used for the bytes generated. Each element of out will be some slice of the bytes in heap, and as such heap must outlive the byte array slices.
func FillRandomFixedByteArray ¶
func FillRandomFixedByteArray(seed uint64, out []parquet.FixedLenByteArray, heap *memory.Buffer, size int)
FillRandomFixedByteArray populates out with random FixedLenByteArray values with of a length equal to size using heap as the actual memory storage used for the bytes generated. Each element of out will be a slice of size bytes in heap, and as such heap must outlive the byte array slices.
func FillRandomFloat16 ¶
FillRandomFloat16 populates out with random float64 values using seed as the random seed for the generator to allow consistency for testing.
func FillRandomFloat32 ¶
FillRandomFloat32 populates out with random float32 values using seed as the random seed for the generator to allow consistency for testing.
func FillRandomFloat64 ¶
FillRandomFloat64 populates out with random float64 values using seed as the random seed for the generator to allow consistency for testing.
func FillRandomInt16 ¶
FillRandomInt16 populates the slice out with random int16 values between min and max using seed as the random see for generation to allow consistency for testing.
func FillRandomInt32 ¶
FillRandomInt32 populates out with random int32 values using seed as the random seed for the generator to allow consistency for testing.
func FillRandomInt32Max ¶
FillRandomInt32Max populates out with random int32 values between 0 and max using seed as the random seed for the generator to allow consistency for testing.
func FillRandomInt64 ¶
FillRandomInt64 populates out with random int64 values using seed as the random seed for the generator to allow consistency for testing.
func FillRandomInt64Max ¶
FillRandomInt64Max populates out with random int64 values between 0 and max using seed as the random seed for the generator to allow consistency for testing.
func FillRandomInt8 ¶
FillRandomInt8 populates the slice out with random int8 values between min and max using seed as the random see for generation to allow consistency for testing.
func FillRandomInt96 ¶
FillRandomInt96 populates out with random Int96 values using seed as the random seed for the generator to allow consistency for testing. It does this by generating three random uint32 values for each int96 value.
func FillRandomUint16 ¶
FillRandomUint16 populates the slice out with random uint16 values between min and max using seed as the random see for generation to allow consistency for testing.
func FillRandomUint32 ¶
FillRandomUint32 populates out with random uint32 values using seed as the random seed for the generator to allow consistency for testing.
func FillRandomUint32Max ¶
FillRandomUint32Max populates out with random uint32 values between 0 and max using seed as the random seed for the generator to allow consistency for testing.
func FillRandomUint64 ¶
FillRandomUint64 populates out with random uint64 values using seed as the random seed for the generator to allow consistency for testing.
func FillRandomUint64Max ¶
FillRandomUint64Max populates out with random uint64 values between 0 and max using seed as the random seed for the generator to allow consistency for testing.
func FillRandomUint8 ¶
FillRandomUint8 populates the slice out with random uint8 values between min and max using seed as the random see for generation to allow consistency for testing.
func InitValues ¶
InitValues is a convenience function for generating a slice of random values based on the type. If the type is parquet.ByteArray or parquet.FixedLenByteArray, heap must not be null.
The default values are:
[]bool uses the current time as the seed with only values of 1 being false, for use of creating validity boolean slices. all other types use 0 as the seed a []parquet.ByteArray is populated with lengths between 2 and 12 a []parquet.FixedLenByteArray is populated with fixed size random byte arrays of length 12.
func MakeDataPage ¶
func MakeDictPage ¶
func PaginateDict ¶
func PaginatePlain ¶
func RandomByteArray ¶
RandomByteArray populates out with random ByteArray values with lengths between minlen and maxlen using heap as the actual memory storage used for the bytes generated. Each element of out will be some slice of the bytes in heap, and as such heap must outlive the byte array slices.
func RandomDecimals ¶
RandomDecimals generates n random decimal values with precision determining the byte width for the values and seed as the random generator seed to allow consistency for testing. The resulting values will be either 32 bytes or 16 bytes each depending on the precision.
func RandomNonNull ¶
RandomNonNull generates a random arrow array of the requested type with length size with no nulls. Accepts float32, float64, all integer primitives, Date32, date64, string, binary, fixed_size_binary, bool and decimal.
Always uses 0 as the seed with the following min/max restrictions: int16, uint16, int8, and uint8 will be min 0, max 64 Date32 and Date64 will be between 0 and 24 * 86400000 in increments of 86400000 String will all have the value "test-string" binary will have each value between length 2 and 12 but random bytes that are not limited to ascii fixed size binary will all be of length 10, random bytes are not limited to ascii bool will be approximately half false and half true randomly.
func RandomNullable ¶
RandomNullable generates a random arrow array of length size with approximately numNulls, at most there can be size/2 nulls. Other than there being nulls, the values follow the same rules as described in the docs for RandomNonNull.
Types ¶
type DataPageBuilder ¶
type DataPageBuilder struct {
// contains filtered or unexported fields
}
func (*DataPageBuilder) AppendDefLevels ¶
func (d *DataPageBuilder) AppendDefLevels(lvls []int16, maxLvl int16)
func (*DataPageBuilder) AppendRepLevels ¶
func (d *DataPageBuilder) AppendRepLevels(lvls []int16, maxLvl int16)
func (*DataPageBuilder) AppendValues ¶
func (d *DataPageBuilder) AppendValues(desc *schema.Column, values interface{}, e parquet.Encoding)
type DictionaryPageBuilder ¶
type DictionaryPageBuilder struct {
// contains filtered or unexported fields
}
func NewDictionaryPageBuilder ¶
func NewDictionaryPageBuilder(d *schema.Column) *DictionaryPageBuilder
func (*DictionaryPageBuilder) AppendValues ¶
func (d *DictionaryPageBuilder) AppendValues(values interface{}) encoding.Buffer
func (*DictionaryPageBuilder) NumValues ¶
func (d *DictionaryPageBuilder) NumValues() int32
func (*DictionaryPageBuilder) WriteDict ¶
func (d *DictionaryPageBuilder) WriteDict() *memory.Buffer
type MockPageReader ¶
func (*MockPageReader) Err ¶
func (m *MockPageReader) Err() error
func (*MockPageReader) Next ¶
func (m *MockPageReader) Next() bool
func (*MockPageReader) Page ¶
func (m *MockPageReader) Page() file.Page
func (*MockPageReader) Reset ¶
func (m *MockPageReader) Reset(parquet.BufferedReader, int64, compress.Compression, *file.CryptoContext)
func (*MockPageReader) SetMaxPageHeaderSize ¶
func (m *MockPageReader) SetMaxPageHeaderSize(int)
type PrimitiveTypedTest ¶
type PrimitiveTypedTest struct { Node schema.Node Schema *schema.Schema Typ reflect.Type DefLevels []int16 RepLevels []int16 Buffer *memory.Buffer Values interface{} ValuesOut interface{} DefLevelsOut []int16 RepLevelsOut []int16 }
func NewPrimitiveTypedTest ¶
func NewPrimitiveTypedTest(typ reflect.Type) PrimitiveTypedTest
func (*PrimitiveTypedTest) GenerateData ¶
func (p *PrimitiveTypedTest) GenerateData(nvalues int64)
func (*PrimitiveTypedTest) GetMinMax ¶
func (p *PrimitiveTypedTest) GetMinMax(stat metadata.TypedStatistics) (min, max interface{})
func (*PrimitiveTypedTest) ReadBatch ¶
func (p *PrimitiveTypedTest) ReadBatch(reader file.ColumnChunkReader, batch, valuesRead int64, defLevels, repLevels []int16) int64
func (*PrimitiveTypedTest) SetupSchema ¶
func (p *PrimitiveTypedTest) SetupSchema(rep parquet.Repetition, ncols int)
func (*PrimitiveTypedTest) SetupValuesOut ¶
func (p *PrimitiveTypedTest) SetupValuesOut(nvalues int64)
func (*PrimitiveTypedTest) UpdateStats ¶
func (p *PrimitiveTypedTest) UpdateStats(stat metadata.TypedStatistics, numNull int64)
func (*PrimitiveTypedTest) UpdateStatsSpaced ¶
func (p *PrimitiveTypedTest) UpdateStatsSpaced(stat metadata.TypedStatistics, numNull int64, validBits []byte, validBitsOffset int64)
func (*PrimitiveTypedTest) WriteBatchSubset ¶
func (p *PrimitiveTypedTest) WriteBatchSubset(batch, offset int, writer file.ColumnChunkWriter, defLevels, repLevels []int16) (int64, error)
func (*PrimitiveTypedTest) WriteBatchValues ¶
func (p *PrimitiveTypedTest) WriteBatchValues(writer file.ColumnChunkWriter, defLevels, repLevels []int16) (int64, error)
func (*PrimitiveTypedTest) WriteBatchValuesSpaced ¶
func (p *PrimitiveTypedTest) WriteBatchValuesSpaced(writer file.ColumnChunkWriter, defLevels, repLevels []int16, validBits []byte, validBitsOffset int64)
type RandomArrayGenerator ¶
type RandomArrayGenerator struct {
// contains filtered or unexported fields
}
RandomArrayGenerator is a struct used for constructing Random Arrow arrays for use with testing.
func NewRandomArrayGenerator ¶
func NewRandomArrayGenerator(seed uint64) RandomArrayGenerator
NewRandomArrayGenerator constructs a new generator with the requested Seed
func (*RandomArrayGenerator) ByteArray ¶
func (r *RandomArrayGenerator) ByteArray(size int64, minLen, maxLen int32, nullProb float64) arrow.Array
ByteArray creates an array.String for use of creating random ByteArray values for testing parquet writing/reading. minLen/maxLen are the min and max length for a given value in the resulting array, with nullProb being the probability of a given index being null.
For this generation we only generate ascii values with a min of 'A' and max of 'z'.
func (*RandomArrayGenerator) Float64 ¶
func (r *RandomArrayGenerator) Float64(size int64, pctNull float64) *array.Float64
Float64 generates a random array.Float64 of the requested size with pctNull as the probability that a given index will be null.
func (*RandomArrayGenerator) GenerateBitmap ¶
func (r *RandomArrayGenerator) GenerateBitmap(buffer []byte, n int64, prob float64) int64
GenerateBitmap generates a bitmap of n bits and stores it into buffer. Prob is the probability that a given bit will be zero, with 1-prob being the probability it will be 1. The return value is the number of bits that were left unset. The assumption being that buffer is currently zero initialized as this function does not clear any bits, it only sets 1s.
func (*RandomArrayGenerator) Int32 ¶
Int32 generates a random array.Int32 of the given size with each value between min and max, and pctNull as the probability that a given index will be null.
func (*RandomArrayGenerator) Int64 ¶
Int64 generates a random array.Int64 of the given size with each value between min and max, and pctNull as the probability that a given index will be null.