arrow

package
v18.0.0-...-e99480f Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 7, 2025 License: Apache-2.0, BSD-3-Clause Imports: 22 Imported by: 0

Documentation

Overview

Package arrow provides an implementation of Apache Arrow.

Apache Arrow is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. It also provides computational libraries and zero-copy streaming messaging and inter-process communication.

Basics

The fundamental data structure in Arrow is an Array, which holds a sequence of values of the same type. An array consists of memory holding the data and an additional validity bitmap that indicates if the corresponding entry in the array is valid (not null). If the array has no null entries, it is possible to omit this bitmap.

Requirements

To build with tinygo include the noasm build tag.

Example (DenseUnionArray)
package main

import (
	"fmt"

	"github.com/joechenrh/arrow-go/v18/arrow/array"
	"github.com/joechenrh/arrow-go/v18/arrow/memory"
)

func main() {
	pool := memory.NewGoAllocator()

	denseBuilder := array.NewEmptyDenseUnionBuilder(pool)
	defer denseBuilder.Release()

	i8Builder := array.NewInt8Builder(pool)
	defer i8Builder.Release()
	i8Code := denseBuilder.AppendChild(i8Builder, "i8")

	strBuilder := array.NewStringBuilder(pool)
	defer strBuilder.Release()
	strCode := denseBuilder.AppendChild(strBuilder, "str")

	f64Builder := array.NewFloat64Builder(pool)
	defer f64Builder.Release()
	f64Code := denseBuilder.AppendChild(f64Builder, "f64")

	values := []interface{}{int8(33), "abc", float64(1.0), float64(-1.0), nil,
		"", int8(10), "def", int8(-10), float64(0.5)}

	for _, v := range values {
		switch v := v.(type) {
		case int8:
			denseBuilder.Append(i8Code)
			i8Builder.Append(v)
		case string:
			denseBuilder.Append(strCode)
			strBuilder.Append(v)
		case float64:
			denseBuilder.Append(f64Code)
			f64Builder.Append(v)
		case nil:
			denseBuilder.AppendNull()
		}
	}

	arr := denseBuilder.NewDenseUnionArray()
	defer arr.Release()

	fmt.Printf("Len() = %d\n", arr.Len())
	fields := arr.UnionType().Fields()
	offsets := arr.RawValueOffsets()
	for i := 0; i < arr.Len(); i++ {
		child := arr.ChildID(i)
		data := arr.Field(child)
		field := fields[child]

		idx := int(offsets[i])
		if data.IsNull(idx) {
			fmt.Printf("[%d]   = (null)\n", i)
			continue
		}
		var v interface{}
		switch varr := data.(type) {
		case *array.Int8:
			v = varr.Value(idx)
		case *array.String:
			v = varr.Value(idx)
		case *array.Float64:
			v = varr.Value(idx)
		}
		fmt.Printf("[%d]   = %#5v {%s}\n", i, v, field.Name)
	}

	fmt.Printf("i8:  %s\n", arr.Field(0))
	fmt.Printf("str: %s\n", arr.Field(1))
	fmt.Printf("f64: %s\n", arr.Field(2))

}
Output:

Len() = 10
[0]   =    33 {i8}
[1]   = "abc" {str}
[2]   =     1 {f64}
[3]   =    -1 {f64}
[4]   = (null)
[5]   =    "" {str}
[6]   =    10 {i8}
[7]   = "def" {str}
[8]   =   -10 {i8}
[9]   =   0.5 {f64}
i8:  [33 (null) 10 -10]
str: ["abc" "" "def"]
f64: [1 -1 0.5]
Example (FixedSizeListArray)

This example shows how to create a FixedSizeList array. The resulting array should be:

[[0, 1, 2], (null), [3, 4, 5], [6, 7, 8], (null)]
package main

import (
	"fmt"

	"github.com/joechenrh/arrow-go/v18/arrow"
	"github.com/joechenrh/arrow-go/v18/arrow/array"
	"github.com/joechenrh/arrow-go/v18/arrow/memory"
)

func main() {
	pool := memory.NewGoAllocator()

	lb := array.NewFixedSizeListBuilder(pool, 3, arrow.PrimitiveTypes.Int64)
	defer lb.Release()

	vb := lb.ValueBuilder().(*array.Int64Builder)
	vb.Reserve(10)

	lb.Append(true)
	vb.Append(0)
	vb.Append(1)
	vb.Append(2)

	lb.AppendNull()

	lb.Append(true)
	vb.Append(3)
	vb.Append(4)
	vb.Append(5)

	lb.Append(true)
	vb.Append(6)
	vb.Append(7)
	vb.Append(8)

	lb.AppendNull()

	arr := lb.NewArray().(*array.FixedSizeList)
	arr.DataType().(*arrow.FixedSizeListType).SetElemNullable(false)
	defer arr.Release()

	fmt.Printf("NullN()   = %d\n", arr.NullN())
	fmt.Printf("Len()     = %d\n", arr.Len())
	fmt.Printf("Type()    = %v\n", arr.DataType())
	fmt.Printf("List      = %v\n", arr)

}
Output:

NullN()   = 2
Len()     = 5
Type()    = fixed_size_list<item: int64>[3]
List      = [[0 1 2] (null) [3 4 5] [6 7 8] (null)]
Example (Float64Slice)

This example shows how one can slice an array. The initial (float64) array is:

[1, 2, 3, (null), 4, 5]

and the sub-slice is:

[3, (null), 4]
package main

import (
	"fmt"

	"github.com/joechenrh/arrow-go/v18/arrow/array"
	"github.com/joechenrh/arrow-go/v18/arrow/memory"
)

func main() {
	pool := memory.NewGoAllocator()

	b := array.NewFloat64Builder(pool)
	defer b.Release()

	b.AppendValues(
		[]float64{1, 2, 3, -1, 4, 5},
		[]bool{true, true, true, false, true, true},
	)

	arr := b.NewFloat64Array()
	defer arr.Release()

	fmt.Printf("array = %v\n", arr)

	sli := array.NewSlice(arr, 2, 5).(*array.Float64)
	defer sli.Release()

	fmt.Printf("slice = %v\n", sli)

}
Output:

array = [1 2 3 (null) 4 5]
slice = [3 (null) 4]
Example (Float64Tensor2x5)
package main

import (
	"fmt"

	"github.com/joechenrh/arrow-go/v18/arrow/array"
	"github.com/joechenrh/arrow-go/v18/arrow/memory"
	"github.com/joechenrh/arrow-go/v18/arrow/tensor"
)

func main() {
	pool := memory.NewGoAllocator()

	b := array.NewFloat64Builder(pool)
	defer b.Release()

	raw := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
	b.AppendValues(raw, nil)

	arr := b.NewFloat64Array()
	defer arr.Release()

	f64 := tensor.NewFloat64(arr.Data(), []int64{2, 5}, nil, []string{"x", "y"})
	defer f64.Release()

	for _, i := range [][]int64{
		{0, 0},
		{0, 1},
		{0, 2},
		{0, 3},
		{0, 4},
		{1, 0},
		{1, 1},
		{1, 2},
		{1, 3},
		{1, 4},
	} {
		fmt.Printf("arr%v = %v\n", i, f64.Value(i))
	}

}
Output:

arr[0 0] = 1
arr[0 1] = 2
arr[0 2] = 3
arr[0 3] = 4
arr[0 4] = 5
arr[1 0] = 6
arr[1 1] = 7
arr[1 2] = 8
arr[1 3] = 9
arr[1 4] = 10
Example (Float64Tensor2x5ColMajor)
package main

import (
	"fmt"

	"github.com/joechenrh/arrow-go/v18/arrow/array"
	"github.com/joechenrh/arrow-go/v18/arrow/memory"
	"github.com/joechenrh/arrow-go/v18/arrow/tensor"
)

func main() {
	pool := memory.NewGoAllocator()

	b := array.NewFloat64Builder(pool)
	defer b.Release()

	raw := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
	b.AppendValues(raw, nil)

	arr := b.NewFloat64Array()
	defer arr.Release()

	f64 := tensor.NewFloat64(arr.Data(), []int64{2, 5}, []int64{8, 16}, []string{"x", "y"})
	defer f64.Release()

	for _, i := range [][]int64{
		{0, 0},
		{0, 1},
		{0, 2},
		{0, 3},
		{0, 4},
		{1, 0},
		{1, 1},
		{1, 2},
		{1, 3},
		{1, 4},
	} {
		fmt.Printf("arr%v = %v\n", i, f64.Value(i))
	}

}
Output:

arr[0 0] = 1
arr[0 1] = 3
arr[0 2] = 5
arr[0 3] = 7
arr[0 4] = 9
arr[1 0] = 2
arr[1 1] = 4
arr[1 2] = 6
arr[1 3] = 8
arr[1 4] = 10
Example (FromMemory)

This example demonstrates creating an array, sourcing the values and null bitmaps directly from byte slices. The null count is set to UnknownNullCount, instructing the array to calculate the null count from the bitmap when NullN is called.

package main

import (
	"fmt"

	"github.com/joechenrh/arrow-go/v18/arrow/array"
	"github.com/joechenrh/arrow-go/v18/arrow/memory"
)

func main() {
	// create LSB packed bits with the following pattern:
	// 01010011 11000101
	data := memory.NewBufferBytes([]byte{0xca, 0xa3})

	// create LSB packed validity (null) bitmap, where every 4th element is null:
	// 11101110 11101110
	nullBitmap := memory.NewBufferBytes([]byte{0x77, 0x77})

	// Create a boolean array and lazily determine NullN using UnknownNullCount
	bools := array.NewBoolean(16, data, nullBitmap, array.UnknownNullCount)
	defer bools.Release()

	// Show the null count
	fmt.Printf("NullN()  = %d\n", bools.NullN())

	// Enumerate the values.
	n := bools.Len()
	for i := 0; i < n; i++ {
		fmt.Printf("bools[%d] = ", i)
		if bools.IsNull(i) {
			fmt.Println(array.NullValueStr)
		} else {
			fmt.Printf("%t\n", bools.Value(i))
		}
	}

}
Output:

NullN()  = 4
bools[0] = false
bools[1] = true
bools[2] = false
bools[3] = (null)
bools[4] = false
bools[5] = false
bools[6] = true
bools[7] = (null)
bools[8] = true
bools[9] = true
bools[10] = false
bools[11] = (null)
bools[12] = false
bools[13] = true
bools[14] = false
bools[15] = (null)
Example (ListArray)

This example shows how to create a List array. The resulting array should be:

[[0, 1, 2], [], [3], [4, 5], [6, 7, 8], [], [9]]
package main

import (
	"fmt"

	"github.com/joechenrh/arrow-go/v18/arrow"
	"github.com/joechenrh/arrow-go/v18/arrow/array"
	"github.com/joechenrh/arrow-go/v18/arrow/memory"
)

func main() {
	pool := memory.NewGoAllocator()

	lb := array.NewListBuilder(pool, arrow.PrimitiveTypes.Int64)
	defer lb.Release()

	vb := lb.ValueBuilder().(*array.Int64Builder)
	vb.Reserve(10)

	lb.Append(true)
	vb.Append(0)
	vb.Append(1)
	vb.Append(2)

	lb.AppendNull()

	lb.Append(true)
	vb.Append(3)

	lb.Append(true)
	vb.Append(4)
	vb.Append(5)

	lb.Append(true)
	vb.Append(6)
	vb.Append(7)
	vb.Append(8)

	lb.AppendNull()

	lb.Append(true)
	vb.Append(9)

	arr := lb.NewArray().(*array.List)
	defer arr.Release()

	arr.DataType().(*arrow.ListType).SetElemNullable(false)
	fmt.Printf("NullN()   = %d\n", arr.NullN())
	fmt.Printf("Len()     = %d\n", arr.Len())
	fmt.Printf("Offsets() = %v\n", arr.Offsets())
	fmt.Printf("Type()    = %v\n", arr.DataType())

	offsets := arr.Offsets()[1:]

	varr := arr.ListValues().(*array.Int64)

	pos := 0
	for i := 0; i < arr.Len(); i++ {
		if !arr.IsValid(i) {
			fmt.Printf("List[%d]   = (null)\n", i)
			continue
		}
		fmt.Printf("List[%d]   = [", i)
		for j := pos; j < int(offsets[i]); j++ {
			if j != pos {
				fmt.Printf(", ")
			}
			fmt.Printf("%v", varr.Value(j))
		}
		pos = int(offsets[i])
		fmt.Printf("]\n")
	}
	fmt.Printf("List      = %v\n", arr)

}
Output:

NullN()   = 2
Len()     = 7
Offsets() = [0 3 3 4 6 9 9 10]
Type()    = list<item: int64>
List[0]   = [0, 1, 2]
List[1]   = (null)
List[2]   = [3]
List[3]   = [4, 5]
List[4]   = [6, 7, 8]
List[5]   = (null)
List[6]   = [9]
List      = [[0 1 2] (null) [3] [4 5] [6 7 8] (null) [9]]
Example (MapArray)

This example demonstrates how to create a Map Array. The resulting array should be:

[{["ab" "cd" "ef" "gh"] [1 2 3 4]} (null) {["ab" "cd" "ef" "gh"] [(null) 2 5 1]}]
package main

import (
	"fmt"

	"github.com/joechenrh/arrow-go/v18/arrow"
	"github.com/joechenrh/arrow-go/v18/arrow/array"
	"github.com/joechenrh/arrow-go/v18/arrow/memory"
)

func main() {
	pool := memory.NewGoAllocator()
	mb := array.NewMapBuilder(pool, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int16, false)
	defer mb.Release()

	kb := mb.KeyBuilder().(*array.StringBuilder)
	ib := mb.ItemBuilder().(*array.Int16Builder)

	keys := []string{"ab", "cd", "ef", "gh"}

	mb.Append(true)
	kb.AppendValues(keys, nil)
	ib.AppendValues([]int16{1, 2, 3, 4}, nil)

	mb.AppendNull()

	mb.Append(true)
	kb.AppendValues(keys, nil)
	ib.AppendValues([]int16{-1, 2, 5, 1}, []bool{false, true, true, true})

	arr := mb.NewMapArray()
	defer arr.Release()

	fmt.Printf("NullN() = %d\n", arr.NullN())
	fmt.Printf("Len()   = %d\n", arr.Len())

	offsets := arr.Offsets()
	keyArr := arr.Keys().(*array.String)
	itemArr := arr.Items().(*array.Int16)

	for i := 0; i < arr.Len(); i++ {
		if arr.IsNull(i) {
			fmt.Printf("Map[%d] = (null)\n", i)
			continue
		}

		fmt.Printf("Map[%d] = {", i)
		for j := offsets[i]; j < offsets[i+1]; j++ {
			if j != offsets[i] {
				fmt.Printf(", ")
			}
			fmt.Printf("%v => ", keyArr.Value(int(j)))
			if itemArr.IsValid(int(j)) {
				fmt.Printf("%v", itemArr.Value(int(j)))
			} else {
				fmt.Printf(array.NullValueStr)
			}
		}
		fmt.Printf("}\n")
	}
	fmt.Printf("Map    = %v\n", arr)

}
Output:

NullN() = 1
Len()   = 3
Map[0] = {ab => 1, cd => 2, ef => 3, gh => 4}
Map[1] = (null)
Map[2] = {ab => (null), cd => 2, ef => 5, gh => 1}
Map    = [{["ab" "cd" "ef" "gh"] [1 2 3 4]} (null) {["ab" "cd" "ef" "gh"] [(null) 2 5 1]}]
Example (Minimal)

This example demonstrates how to build an array of int64 values using a builder and Append.

package main

import (
	"fmt"

	"github.com/joechenrh/arrow-go/v18/arrow/array"
	"github.com/joechenrh/arrow-go/v18/arrow/memory"
)

func main() {
	// Create an allocator.
	pool := memory.NewGoAllocator()

	// Create an int64 array builder.
	builder := array.NewInt64Builder(pool)
	defer builder.Release()

	builder.Append(1)
	builder.Append(2)
	builder.Append(3)
	builder.AppendNull()
	builder.Append(5)
	builder.Append(6)
	builder.Append(7)
	builder.Append(8)

	// Finish building the int64 array and reset the builder.
	ints := builder.NewInt64Array()
	defer ints.Release()

	// Enumerate the values.
	for i, v := range ints.Int64Values() {
		fmt.Printf("ints[%d] = ", i)
		if ints.IsNull(i) {
			fmt.Println(array.NullValueStr)
		} else {
			fmt.Println(v)
		}
	}
	fmt.Printf("ints = %v\n", ints)

}
Output:

ints[0] = 1
ints[1] = 2
ints[2] = 3
ints[3] = (null)
ints[4] = 5
ints[5] = 6
ints[6] = 7
ints[7] = 8
ints = [1 2 3 (null) 5 6 7 8]
Example (Record)
package main

import (
	"fmt"

	"github.com/joechenrh/arrow-go/v18/arrow"
	"github.com/joechenrh/arrow-go/v18/arrow/array"
	"github.com/joechenrh/arrow-go/v18/arrow/memory"
)

func main() {
	pool := memory.NewGoAllocator()

	schema := arrow.NewSchema(
		[]arrow.Field{
			{Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32},
			{Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64},
		},
		nil,
	)

	b := array.NewRecordBuilder(pool, schema)
	defer b.Release()

	b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3, 4, 5, 6}, nil)
	b.Field(0).(*array.Int32Builder).AppendValues([]int32{7, 8, 9, 10}, []bool{true, true, false, true})
	b.Field(1).(*array.Float64Builder).AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil)

	rec := b.NewRecord()
	defer rec.Release()

	for i, col := range rec.Columns() {
		fmt.Printf("column[%d] %q: %v\n", i, rec.ColumnName(i), col)
	}

}
Output:

column[0] "f1-i32": [1 2 3 4 5 6 7 8 (null) 10]
column[1] "f2-f64": [1 2 3 4 5 6 7 8 9 10]
Example (RecordReader)
package main

import (
	"fmt"
	"log"

	"github.com/joechenrh/arrow-go/v18/arrow"
	"github.com/joechenrh/arrow-go/v18/arrow/array"
	"github.com/joechenrh/arrow-go/v18/arrow/memory"
)

func main() {
	pool := memory.NewGoAllocator()

	schema := arrow.NewSchema(
		[]arrow.Field{
			{Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32},
			{Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64},
		},
		nil,
	)

	b := array.NewRecordBuilder(pool, schema)
	defer b.Release()

	b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3, 4, 5, 6}, nil)
	b.Field(0).(*array.Int32Builder).AppendValues([]int32{7, 8, 9, 10}, []bool{true, true, false, true})
	b.Field(1).(*array.Float64Builder).AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil)

	rec1 := b.NewRecord()
	defer rec1.Release()

	b.Field(0).(*array.Int32Builder).AppendValues([]int32{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil)
	b.Field(1).(*array.Float64Builder).AppendValues([]float64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil)

	rec2 := b.NewRecord()
	defer rec2.Release()

	itr, err := array.NewRecordReader(schema, []arrow.Record{rec1, rec2})
	if err != nil {
		log.Fatal(err)
	}
	defer itr.Release()

	n := 0
	for itr.Next() {
		rec := itr.Record()
		for i, col := range rec.Columns() {
			fmt.Printf("rec[%d][%q]: %v\n", n, rec.ColumnName(i), col)
		}
		n++
	}

}
Output:

rec[0]["f1-i32"]: [1 2 3 4 5 6 7 8 (null) 10]
rec[0]["f2-f64"]: [1 2 3 4 5 6 7 8 9 10]
rec[1]["f1-i32"]: [11 12 13 14 15 16 17 18 19 20]
rec[1]["f2-f64"]: [11 12 13 14 15 16 17 18 19 20]
Example (SparseUnionArray)
package main

import (
	"fmt"

	"github.com/joechenrh/arrow-go/v18/arrow/array"
	"github.com/joechenrh/arrow-go/v18/arrow/memory"
)

func main() {
	pool := memory.NewGoAllocator()

	sparseBuilder := array.NewEmptySparseUnionBuilder(pool)
	defer sparseBuilder.Release()

	i8Builder := array.NewInt8Builder(pool)
	defer i8Builder.Release()
	i8Code := sparseBuilder.AppendChild(i8Builder, "i8")

	strBuilder := array.NewStringBuilder(pool)
	defer strBuilder.Release()
	strCode := sparseBuilder.AppendChild(strBuilder, "str")

	f64Builder := array.NewFloat64Builder(pool)
	defer f64Builder.Release()
	f64Code := sparseBuilder.AppendChild(f64Builder, "f64")

	values := []interface{}{int8(33), "abc", float64(1.0), float64(-1.0), nil,
		"", int8(10), "def", int8(-10), float64(0.5)}

	for _, v := range values {
		switch v := v.(type) {
		case int8:
			sparseBuilder.Append(i8Code)
			i8Builder.Append(v)
			strBuilder.AppendEmptyValue()
			f64Builder.AppendEmptyValue()
		case string:
			sparseBuilder.Append(strCode)
			i8Builder.AppendEmptyValue()
			strBuilder.Append(v)
			f64Builder.AppendEmptyValue()
		case float64:
			sparseBuilder.Append(f64Code)
			i8Builder.AppendEmptyValue()
			strBuilder.AppendEmptyValue()
			f64Builder.Append(v)
		case nil:
			sparseBuilder.AppendNull()
		}
	}

	arr := sparseBuilder.NewSparseUnionArray()
	defer arr.Release()

	fmt.Printf("Len() = %d\n", arr.Len())
	fields := arr.UnionType().Fields()
	for i := 0; i < arr.Len(); i++ {
		child := arr.ChildID(i)
		data := arr.Field(child)
		field := fields[child]

		if data.IsNull(i) {
			fmt.Printf("[%d]   = (null)\n", i)
			continue
		}
		var v interface{}
		switch varr := data.(type) {
		case *array.Int8:
			v = varr.Value(i)
		case *array.String:
			v = varr.Value(i)
		case *array.Float64:
			v = varr.Value(i)
		}
		fmt.Printf("[%d]   = %#5v {%s}\n", i, v, field.Name)
	}

	fmt.Printf("i8:  %s\n", arr.Field(0))
	fmt.Printf("str: %s\n", arr.Field(1))
	fmt.Printf("f64: %s\n", arr.Field(2))

}
Output:

Len() = 10
[0]   =    33 {i8}
[1]   = "abc" {str}
[2]   =     1 {f64}
[3]   =    -1 {f64}
[4]   = (null)
[5]   =    "" {str}
[6]   =    10 {i8}
[7]   = "def" {str}
[8]   =   -10 {i8}
[9]   =   0.5 {f64}
i8:  [33 0 0 0 (null) 0 10 0 -10 0]
str: ["" "abc" "" "" "" "" "" "def" "" ""]
f64: [0 0 1 -1 0 0 0 0 0 0.5]
Example (StructArray)

This example shows how to create a Struct array. The resulting array should be:

[{‘joe’, 1}, {null, 2}, null, {‘mark’, 4}]
package main

import (
	"fmt"

	"github.com/joechenrh/arrow-go/v18/arrow"
	"github.com/joechenrh/arrow-go/v18/arrow/array"
	"github.com/joechenrh/arrow-go/v18/arrow/memory"
)

func main() {
	pool := memory.NewGoAllocator()

	dtype := arrow.StructOf([]arrow.Field{
		{Name: "f1", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint8)},
		{Name: "f2", Type: arrow.PrimitiveTypes.Int32},
	}...)

	sb := array.NewStructBuilder(pool, dtype)
	defer sb.Release()

	f1b := sb.FieldBuilder(0).(*array.ListBuilder)
	f1vb := f1b.ValueBuilder().(*array.Uint8Builder)
	f2b := sb.FieldBuilder(1).(*array.Int32Builder)

	sb.Reserve(4)
	f1vb.Reserve(7)
	f2b.Reserve(3)

	sb.Append(true)
	f1b.Append(true)
	f1vb.AppendValues([]byte("joe"), nil)
	f2b.Append(1)

	sb.Append(true)
	f1b.AppendNull()
	f2b.Append(2)

	sb.AppendNull()

	sb.Append(true)
	f1b.Append(true)
	f1vb.AppendValues([]byte("mark"), nil)
	f2b.Append(4)

	arr := sb.NewArray().(*array.Struct)
	defer arr.Release()

	fmt.Printf("NullN() = %d\n", arr.NullN())
	fmt.Printf("Len()   = %d\n", arr.Len())
	fmt.Printf("Type()    = %v\n", arr.DataType())

	list := arr.Field(0).(*array.List)
	offsets := list.Offsets()

	varr := list.ListValues().(*array.Uint8)
	ints := arr.Field(1).(*array.Int32)

	for i := 0; i < arr.Len(); i++ {
		if !arr.IsValid(i) {
			fmt.Printf("Struct[%d] = (null)\n", i)
			continue
		}
		fmt.Printf("Struct[%d] = [", i)
		pos := int(offsets[i])
		switch {
		case list.IsValid(pos):
			fmt.Printf("[")
			for j := offsets[i]; j < offsets[i+1]; j++ {
				if j != offsets[i] {
					fmt.Printf(", ")
				}
				fmt.Printf("%v", string(varr.Value(int(j))))
			}
			fmt.Printf("], ")
		default:
			fmt.Printf("(null), ")
		}
		fmt.Printf("%d]\n", ints.Value(i))
	}

}
Output:

NullN() = 1
Len()   = 4
Type()    = struct<f1: list<item: uint8, nullable>, f2: int32>
Struct[0] = [[j, o, e], 1]
Struct[1] = [[], 2]
Struct[2] = (null)
Struct[3] = [[m, a, r, k], 4]
Example (Table)
package main

import (
	"fmt"

	"github.com/joechenrh/arrow-go/v18/arrow"
	"github.com/joechenrh/arrow-go/v18/arrow/array"
	"github.com/joechenrh/arrow-go/v18/arrow/memory"
)

func main() {
	pool := memory.NewGoAllocator()

	schema := arrow.NewSchema(
		[]arrow.Field{
			{Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32},
			{Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64},
		},
		nil,
	)

	b := array.NewRecordBuilder(pool, schema)
	defer b.Release()

	b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3, 4, 5, 6}, nil)
	b.Field(0).(*array.Int32Builder).AppendValues([]int32{7, 8, 9, 10}, []bool{true, true, false, true})
	b.Field(1).(*array.Float64Builder).AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil)

	rec1 := b.NewRecord()
	defer rec1.Release()

	b.Field(0).(*array.Int32Builder).AppendValues([]int32{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil)
	b.Field(1).(*array.Float64Builder).AppendValues([]float64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil)

	rec2 := b.NewRecord()
	defer rec2.Release()

	tbl := array.NewTableFromRecords(schema, []arrow.Record{rec1, rec2})
	defer tbl.Release()

	tr := array.NewTableReader(tbl, 5)
	defer tr.Release()

	n := 0
	for tr.Next() {
		rec := tr.Record()
		for i, col := range rec.Columns() {
			fmt.Printf("rec[%d][%q]: %v\n", n, rec.ColumnName(i), col)
		}
		n++
	}

}
Output:

rec[0]["f1-i32"]: [1 2 3 4 5]
rec[0]["f2-f64"]: [1 2 3 4 5]
rec[1]["f1-i32"]: [6 7 8 (null) 10]
rec[1]["f2-f64"]: [6 7 8 9 10]
rec[2]["f1-i32"]: [11 12 13 14 15]
rec[2]["f2-f64"]: [11 12 13 14 15]
rec[3]["f1-i32"]: [16 17 18 19 20]
rec[3]["f2-f64"]: [16 17 18 19 20]

Index

Examples

Constants

View Source
const (
	ConvDIVIDE = iota
	ConvMULTIPLY
)
View Source
const (
	MaxUnionTypeCode    UnionTypeCode = 127
	InvalidUnionChildID int           = -1

	SparseMode UnionMode = iota // SPARSE
	DenseMode                   // DENSE
)
View Source
const (
	// Date32SizeBytes specifies the number of bytes required to store a single Date32 in memory
	Date32SizeBytes = int(unsafe.Sizeof(Date32(0)))
)
View Source
const (
	// Date64SizeBytes specifies the number of bytes required to store a single Date64 in memory
	Date64SizeBytes = int(unsafe.Sizeof(Date64(0)))
)
View Source
const (
	// DayTimeIntervalSizeBytes specifies the number of bytes required to store a single DayTimeInterval in memory
	DayTimeIntervalSizeBytes = int(unsafe.Sizeof(DayTimeInterval{}))
)
View Source
const (
	// Decimal128SizeBytes specifies the number of bytes required to store a single decimal128 in memory
	Decimal128SizeBytes = int(unsafe.Sizeof(decimal.Decimal128{}))
)
View Source
const (
	Decimal256SizeBytes = int(unsafe.Sizeof(decimal.Decimal256{}))
)
View Source
const (
	// Decimal32SizeBytes specifies the number of bytes required to store a single decimal32 in memory
	Decimal32SizeBytes = int(unsafe.Sizeof(decimal.Decimal32(0)))
)
View Source
const (
	// Decimal64SizeBytes specifies the number of bytes required to store a single decimal64 in memory
	Decimal64SizeBytes = int(unsafe.Sizeof(decimal.Decimal64(0)))
)
View Source
const (
	// DurationSizeBytes specifies the number of bytes required to store a single Duration in memory
	DurationSizeBytes = int(unsafe.Sizeof(Duration(0)))
)
View Source
const (
	// Float16SizeBytes specifies the number of bytes required to store a single float16 in memory
	Float16SizeBytes = int(unsafe.Sizeof(uint16(0)))
)
View Source
const (
	// Float32SizeBytes specifies the number of bytes required to store a single float32 in memory
	Float32SizeBytes = int(unsafe.Sizeof(float32(0)))
)
View Source
const (
	// Float64SizeBytes specifies the number of bytes required to store a single float64 in memory
	Float64SizeBytes = int(unsafe.Sizeof(float64(0)))
)
View Source
const (
	// Int16SizeBytes specifies the number of bytes required to store a single int16 in memory
	Int16SizeBytes = int(unsafe.Sizeof(int16(0)))
)
View Source
const (
	// Int32SizeBytes specifies the number of bytes required to store a single int32 in memory
	Int32SizeBytes = int(unsafe.Sizeof(int32(0)))
)
View Source
const (
	// Int64SizeBytes specifies the number of bytes required to store a single int64 in memory
	Int64SizeBytes = int(unsafe.Sizeof(int64(0)))
)
View Source
const (
	// Int8SizeBytes specifies the number of bytes required to store a single int8 in memory
	Int8SizeBytes = int(unsafe.Sizeof(int8(0)))
)
View Source
const (
	// MonthDayNanoIntervalSizeBytes specifies the number of bytes required to store a single DayTimeInterval in memory
	MonthDayNanoIntervalSizeBytes = int(unsafe.Sizeof(MonthDayNanoInterval{}))
)
View Source
const (
	// MonthIntervalSizeBytes specifies the number of bytes required to store a single MonthInterval in memory
	MonthIntervalSizeBytes = int(unsafe.Sizeof(MonthInterval(0)))
)
View Source
const PkgVersion = "18.0.0"
View Source
const (
	// Time32SizeBytes specifies the number of bytes required to store a single Time32 in memory
	Time32SizeBytes = int(unsafe.Sizeof(Time32(0)))
)
View Source
const (
	// Time64SizeBytes specifies the number of bytes required to store a single Time64 in memory
	Time64SizeBytes = int(unsafe.Sizeof(Time64(0)))
)
View Source
const (
	// TimestampSizeBytes specifies the number of bytes required to store a single Timestamp in memory
	TimestampSizeBytes = int(unsafe.Sizeof(Timestamp(0)))
)
View Source
const (
	// Uint16SizeBytes specifies the number of bytes required to store a single uint16 in memory
	Uint16SizeBytes = int(unsafe.Sizeof(uint16(0)))
)
View Source
const (
	// Uint32SizeBytes specifies the number of bytes required to store a single uint32 in memory
	Uint32SizeBytes = int(unsafe.Sizeof(uint32(0)))
)
View Source
const (
	// Uint64SizeBytes specifies the number of bytes required to store a single uint64 in memory
	Uint64SizeBytes = int(unsafe.Sizeof(uint64(0)))
)
View Source
const (
	// Uint8SizeBytes specifies the number of bytes required to store a single uint8 in memory
	Uint8SizeBytes = int(unsafe.Sizeof(uint8(0)))
)
View Source
const (
	ViewHeaderSizeBytes = int(unsafe.Sizeof(ViewHeader{}))
)
View Source
const (
	ViewPrefixLen = 4
)

Variables

View Source
var (
	ErrInvalid        = errors.New("invalid")
	ErrNotImplemented = errors.New("not implemented")
	ErrType           = errors.New("type error")
	ErrKey            = errors.New("key error")
	ErrIndex          = errors.New("index error")
	ErrNotFound       = errors.New("not found")
)
View Source
var (
	MonthIntervalTraits        monthTraits
	DayTimeIntervalTraits      daytimeTraits
	MonthDayNanoIntervalTraits monthDayNanoTraits
)
View Source
var (
	Int64Traits    int64Traits
	Uint64Traits   uint64Traits
	Float64Traits  float64Traits
	Int32Traits    int32Traits
	Uint32Traits   uint32Traits
	Float32Traits  float32Traits
	Int16Traits    int16Traits
	Uint16Traits   uint16Traits
	Int8Traits     int8Traits
	Uint8Traits    uint8Traits
	Time32Traits   time32Traits
	Time64Traits   time64Traits
	Date32Traits   date32Traits
	Date64Traits   date64Traits
	DurationTraits durationTraits
)
View Source
var (
	BinaryTypes = struct {
		Binary      BinaryDataType
		String      BinaryDataType
		LargeBinary BinaryDataType
		LargeString BinaryDataType
		BinaryView  BinaryDataType
		StringView  BinaryDataType
	}{
		Binary:      &BinaryType{},
		String:      &StringType{},
		LargeBinary: &LargeBinaryType{},
		LargeString: &LargeStringType{},
		BinaryView:  &BinaryViewType{},
		StringView:  &StringViewType{},
	}
)
View Source
var BooleanTraits booleanTraits
View Source
var Decimal128Traits decimal128Traits

Decimal128 traits

View Source
var Decimal256Traits decimal256Traits

Decimal256 traits

View Source
var Decimal32Traits decimal32Traits

Decimal32 traits

View Source
var Decimal64Traits decimal64Traits

Decimal64 traits

View Source
var (
	FixedWidthTypes = struct {
		Boolean              FixedWidthDataType
		Date32               FixedWidthDataType
		Date64               FixedWidthDataType
		DayTimeInterval      FixedWidthDataType
		Duration_s           FixedWidthDataType
		Duration_ms          FixedWidthDataType
		Duration_us          FixedWidthDataType
		Duration_ns          FixedWidthDataType
		Float16              FixedWidthDataType
		MonthInterval        FixedWidthDataType
		Time32s              FixedWidthDataType
		Time32ms             FixedWidthDataType
		Time64us             FixedWidthDataType
		Time64ns             FixedWidthDataType
		Timestamp_s          FixedWidthDataType
		Timestamp_ms         FixedWidthDataType
		Timestamp_us         FixedWidthDataType
		Timestamp_ns         FixedWidthDataType
		MonthDayNanoInterval FixedWidthDataType
	}{
		Boolean:              &BooleanType{},
		Date32:               &Date32Type{},
		Date64:               &Date64Type{},
		DayTimeInterval:      &DayTimeIntervalType{},
		Duration_s:           &DurationType{Unit: Second},
		Duration_ms:          &DurationType{Unit: Millisecond},
		Duration_us:          &DurationType{Unit: Microsecond},
		Duration_ns:          &DurationType{Unit: Nanosecond},
		Float16:              &Float16Type{},
		MonthInterval:        &MonthIntervalType{},
		Time32s:              &Time32Type{Unit: Second},
		Time32ms:             &Time32Type{Unit: Millisecond},
		Time64us:             &Time64Type{Unit: Microsecond},
		Time64ns:             &Time64Type{Unit: Nanosecond},
		Timestamp_s:          &TimestampType{Unit: Second, TimeZone: "UTC"},
		Timestamp_ms:         &TimestampType{Unit: Millisecond, TimeZone: "UTC"},
		Timestamp_us:         &TimestampType{Unit: Microsecond, TimeZone: "UTC"},
		Timestamp_ns:         &TimestampType{Unit: Nanosecond, TimeZone: "UTC"},
		MonthDayNanoInterval: &MonthDayNanoIntervalType{},
	}
)
View Source
var Float16Traits float16Traits

Float16 traits

View Source
var (
	PrimitiveTypes = struct {
		Int8    DataType
		Int16   DataType
		Int32   DataType
		Int64   DataType
		Uint8   DataType
		Uint16  DataType
		Uint32  DataType
		Uint64  DataType
		Float32 DataType
		Float64 DataType
		Date32  DataType
		Date64  DataType
	}{

		Int8:    &Int8Type{},
		Int16:   &Int16Type{},
		Int32:   &Int32Type{},
		Int64:   &Int64Type{},
		Uint8:   &Uint8Type{},
		Uint16:  &Uint16Type{},
		Uint32:  &Uint32Type{},
		Uint64:  &Uint64Type{},
		Float32: &Float32Type{},
		Float64: &Float64Type{},
		Date32:  &Date32Type{},
		Date64:  &Date64Type{},
	}
)
View Source
var TimestampTraits timestampTraits
View Source
var ViewHeaderTraits viewHeaderTraits

Functions

func ConvertTimestampValue

func ConvertTimestampValue(in, out TimeUnit, value int64) int64

func GetBytes

func GetBytes[T FixedWidthType | ViewHeader](in []T) []byte

GetBytes reinterprets a slice of T to a slice of bytes.

func GetData

func GetData[T FixedWidthType | ViewHeader](in []byte) []T

GetData reinterprets a slice of bytes to a slice of T.

NOTE: the buffer's length must be a multiple of Sizeof(T).

func GetOffsets

func GetOffsets[T int32 | int64](data ArrayData, i int) []T

GetOffsets reinterprets the data.Buffers()[i] to a slice of T with len=data.Len()+1.

NOTE: the buffer's length must be a multiple of Sizeof(T).

func GetValues

func GetValues[T FixedWidthType](data ArrayData, i int) []T

GetValues reinterprets the data.Buffers()[i] to a slice of T with len=data.Len().

If the buffer is nil, nil will be returned.

NOTE: the buffer's length must be a multiple of Sizeof(T).

func HashType

func HashType(seed maphash.Seed, dt DataType) uint64

func IsBaseBinary

func IsBaseBinary(t Type) bool

IsBaseBinary returns true for Binary/String and their LARGE variants

func IsBinaryLike

func IsBinaryLike(t Type) bool

IsBinaryLike returns true for only BINARY and STRING

func IsDecimal

func IsDecimal(t Type) bool

IsDecimal returns true for Decimal128 and Decimal256

func IsFixedSizeBinary

func IsFixedSizeBinary(t Type) bool

IsFixedSizeBinary returns true for Decimal32/64/128/256 and FixedSizeBinary

func IsFloating

func IsFloating(t Type) bool

IsFloating is a helper that returns true if the type ID provided is one of Float16, Float32, or Float64

func IsInteger

func IsInteger(t Type) bool

IsInteger is a helper to return true if the type ID provided is one of the integral types of uint or int with the varying sizes.

func IsLargeBinaryLike

func IsLargeBinaryLike(t Type) bool

IsLargeBinaryLike returns true for only LARGE_BINARY and LARGE_STRING

func IsListLike

func IsListLike(t Type) bool

IsListLike returns true for List, LargeList, FixedSizeList, and Map

func IsNested

func IsNested(t Type) bool

IsNested returns true for List, LargeList, FixedSizeList, Map, Struct, and Unions

func IsPrimitive

func IsPrimitive(t Type) bool

IsPrimitive returns true if the provided type ID represents a fixed width primitive type.

func IsSignedInteger

func IsSignedInteger(t Type) bool

IsSignedInteger is a helper that returns true if the type ID provided is one of the int integral types (int8, int16, int32, int64)

func IsUnion

func IsUnion(t Type) bool

IsUnion returns true for Sparse and Dense Unions

func IsUnsignedInteger

func IsUnsignedInteger(t Type) bool

IsUnsignedInteger is a helper that returns true if the type ID provided is one of the uint integral types (uint8, uint16, uint32, uint64)

func IsViewInline

func IsViewInline(length int) bool

func RegisterExtensionType

func RegisterExtensionType(typ ExtensionType) error

RegisterExtensionType registers the provided ExtensionType by calling ExtensionName to use as a Key for registering the type. If a type with the same name is already registered then this will return an error saying so, otherwise it will return nil if successful registering the type. This function is safe to call from multiple goroutines simultaneously.

func TypeEqual

func TypeEqual(left, right DataType, opts ...TypeEqualOption) bool

TypeEqual checks if two DataType are the same, optionally checking metadata equality for STRUCT types.

func TypesToString

func TypesToString(types []DataType) string

TypesToString is a convenience function to create a list of types which are comma delimited as a string

func UnregisterExtensionType

func UnregisterExtensionType(typName string) error

UnregisterExtensionType removes the type with the given name from the registry causing any messages with that type which come in to be expressed with their metadata and underlying type instead of the extension type that isn't known. This function is safe to call from multiple goroutines simultaneously.

Types

type Array

type Array interface {
	json.Marshaler

	fmt.Stringer

	// DataType returns the type metadata for this instance.
	DataType() DataType

	// NullN returns the number of null values in the array.
	NullN() int

	// NullBitmapBytes returns a byte slice of the validity bitmap.
	NullBitmapBytes() []byte

	// IsNull returns true if value at index is null.
	// NOTE: IsNull will panic if NullBitmapBytes is not empty and 0 > i ≥ Len.
	IsNull(i int) bool

	// IsValid returns true if value at index is not null.
	// NOTE: IsValid will panic if NullBitmapBytes is not empty and 0 > i ≥ Len.
	IsValid(i int) bool
	// ValueStr returns the value at index as a string.
	ValueStr(i int) string

	// Get single value to be marshalled with `json.Marshal`
	GetOneForMarshal(i int) interface{}

	Data() ArrayData

	// Len returns the number of elements in the array.
	Len() int

	// Retain increases the reference count by 1.
	// Retain may be called simultaneously from multiple goroutines.
	Retain()

	// Release decreases the reference count by 1.
	// Release may be called simultaneously from multiple goroutines.
	// When the reference count goes to zero, the memory is freed.
	Release()
}

Array represents an immutable sequence of values using the Arrow in-memory format.

type ArrayData

type ArrayData interface {
	// Retain increases the reference count by 1, it is safe to call
	// in multiple goroutines simultaneously.
	Retain()
	// Release decreases the reference count by 1, it is safe to call
	// in multiple goroutines simultaneously. Data is removed when reference
	// count is 0.
	Release()
	// DataType returns the current datatype stored in the object.
	DataType() DataType
	// NullN returns the number of nulls for this data instance.
	NullN() int
	// Len returns the length of this data instance
	Len() int
	// Offset returns the offset into the raw buffers where this data begins
	Offset() int
	// Buffers returns the slice of raw data buffers for this data instance. Their
	// meaning depends on the context of the data type.
	Buffers() []*memory.Buffer
	// Children returns the slice of children data instances, only relevant for
	// nested data types. For instance, List data will have a single child containing
	// elements of all the rows and Struct data will contain numfields children which
	// are the arrays for each field of the struct.
	Children() []ArrayData
	// Reset allows reusing this ArrayData object by replacing the data in this ArrayData
	// object without changing the reference count.
	Reset(newtype DataType, newlength int, newbuffers []*memory.Buffer, newchildren []ArrayData, newnulls int, newoffset int)
	// Dictionary returns the ArrayData object for the dictionary if this is a
	// dictionary array, otherwise it will be nil.
	Dictionary() ArrayData
	// SizeInBytes returns the size of the ArrayData buffers and any children and/or dictionary in bytes.
	SizeInBytes() uint64
}

ArrayData is the underlying memory and metadata of an Arrow array, corresponding to the same-named object in the C++ implementation.

The Array interface and subsequent typed objects provide strongly typed accessors which support marshalling and other patterns to the data. This interface allows direct access to the underlying raw byte buffers which allows for manipulating the internal data and casting. For example, one could cast the raw bytes from int64 to float64 like so:

arrdata := GetMyInt64Data().Data()
newdata := array.NewData(arrow.PrimitiveTypes.Float64, arrdata.Len(),
		arrdata.Buffers(), nil, arrdata.NullN(), arrdata.Offset())
defer newdata.Release()
float64arr := array.NewFloat64Data(newdata)
defer float64arr.Release()

This is also useful in an analytics setting where memory may be reused. For example, if we had a group of operations all returning float64 such as:

Log(Sqrt(Expr(arr)))

The low-level implementations could have signatures such as:

func Log(values arrow.ArrayData) arrow.ArrayData

Another example would be a function that consumes one or more memory buffers in an input array and replaces them with newly-allocated data, changing the output data type as well.

type BinaryDataType

type BinaryDataType interface {
	DataType
	IsUtf8() bool
	// contains filtered or unexported methods
}

type BinaryType

type BinaryType struct{}

func (*BinaryType) Fingerprint

func (t *BinaryType) Fingerprint() string

func (*BinaryType) ID

func (t *BinaryType) ID() Type

func (BinaryType) IsUtf8

func (BinaryType) IsUtf8() bool

func (*BinaryType) Layout

func (t *BinaryType) Layout() DataTypeLayout

func (*BinaryType) Name

func (t *BinaryType) Name() string

func (*BinaryType) OffsetTypeTraits

func (t *BinaryType) OffsetTypeTraits() OffsetTraits

func (*BinaryType) String

func (t *BinaryType) String() string

type BinaryViewDataType

type BinaryViewDataType interface {
	BinaryDataType
	// contains filtered or unexported methods
}

type BinaryViewType

type BinaryViewType struct{}

func (*BinaryViewType) Fingerprint

func (t *BinaryViewType) Fingerprint() string

func (*BinaryViewType) ID

func (*BinaryViewType) ID() Type

func (*BinaryViewType) IsUtf8

func (*BinaryViewType) IsUtf8() bool

func (*BinaryViewType) Layout

func (*BinaryViewType) Layout() DataTypeLayout

func (*BinaryViewType) Name

func (*BinaryViewType) Name() string

func (*BinaryViewType) String

func (*BinaryViewType) String() string

type BooleanType

type BooleanType struct{}

func (*BooleanType) BitWidth

func (t *BooleanType) BitWidth() int

BitWidth returns the number of bits required to store a single element of this data type in memory.

func (BooleanType) Bytes

func (BooleanType) Bytes() int

func (*BooleanType) Fingerprint

func (t *BooleanType) Fingerprint() string

func (*BooleanType) ID

func (t *BooleanType) ID() Type

func (BooleanType) Layout

func (BooleanType) Layout() DataTypeLayout

func (*BooleanType) Name

func (t *BooleanType) Name() string

func (*BooleanType) String

func (t *BooleanType) String() string

type BufferKind

type BufferKind int8

BufferKind describes the type of buffer expected when defining a layout specification

const (
	KindFixedWidth BufferKind = iota
	KindVarWidth
	KindBitmap
	KindAlwaysNull
)

The expected types of buffers

type BufferSpec

type BufferSpec struct {
	Kind      BufferKind
	ByteWidth int // for KindFixedWidth
}

BufferSpec provides a specification for the buffers of a particular datatype

func SpecAlwaysNull

func SpecAlwaysNull() BufferSpec

func SpecBitmap

func SpecBitmap() BufferSpec

func SpecFixedWidth

func SpecFixedWidth(w int) BufferSpec

func SpecVariableWidth

func SpecVariableWidth() BufferSpec

func (BufferSpec) Equals

func (b BufferSpec) Equals(other BufferSpec) bool

type Chunked

type Chunked struct {
	// contains filtered or unexported fields
}

Chunked manages a collection of primitives arrays as one logical large array.

func NewChunked

func NewChunked(dtype DataType, chunks []Array) *Chunked

NewChunked returns a new chunked array from the slice of arrays.

NewChunked panics if the chunks do not have the same data type.

func (*Chunked) Chunk

func (a *Chunked) Chunk(i int) Array

func (*Chunked) Chunks

func (a *Chunked) Chunks() []Array

func (*Chunked) DataType

func (a *Chunked) DataType() DataType

func (*Chunked) Len

func (a *Chunked) Len() int

func (*Chunked) NullN

func (a *Chunked) NullN() int

func (*Chunked) Release

func (a *Chunked) Release()

Release decreases the reference count by 1. When the reference count goes to zero, the memory is freed. Release may be called simultaneously from multiple goroutines.

func (*Chunked) Retain

func (a *Chunked) Retain()

Retain increases the reference count by 1. Retain may be called simultaneously from multiple goroutines.

type Column

type Column struct {
	// contains filtered or unexported fields
}

Column is an immutable column data structure consisting of a field (type metadata) and a chunked data array.

To get strongly typed data from a Column, you need to iterate the chunks and type assert each individual Array. For example:

switch column.DataType().ID() {
case arrow.INT32:
	for _, c := range column.Data().Chunks() {
		arr := c.(*array.Int32)
		// do something with arr
	}
case arrow.INT64:
	for _, c := range column.Data().Chunks() {
		arr := c.(*array.Int64)
		// do something with arr
	}
case ...
}

func NewColumn

func NewColumn(field Field, chunks *Chunked) *Column

NewColumn returns a column from a field and a chunked data array.

NewColumn panics if the field's data type is inconsistent with the data type of the chunked data array.

func NewColumnFromArr

func NewColumnFromArr(field Field, arr Array) Column

NewColumnFromArr is a convenience function to create a column from a field and a non-chunked array.

This provides a simple mechanism for bypassing the middle step of constructing a Chunked array of one and then releasing it because of the ref counting.

func (*Column) Data

func (col *Column) Data() *Chunked

func (*Column) DataType

func (col *Column) DataType() DataType

func (*Column) Field

func (col *Column) Field() Field

func (*Column) Len

func (col *Column) Len() int

func (*Column) Name

func (col *Column) Name() string

func (*Column) NullN

func (col *Column) NullN() int

func (*Column) Release

func (col *Column) Release()

Release decreases the reference count by 1. When the reference count goes to zero, the memory is freed. Release may be called simultaneously from multiple goroutines.

func (*Column) Retain

func (col *Column) Retain()

Retain increases the reference count by 1. Retain may be called simultaneously from multiple goroutines.

type DataType

type DataType interface {
	fmt.Stringer
	ID() Type
	// Name is name of the data type.
	Name() string
	Fingerprint() string
	Layout() DataTypeLayout
}

DataType is the representation of an Arrow type.

var Null DataType = new(NullType)

Null gives us both the compile-time assertion of DataType interface as well as serving a good element for use in schemas.

func GetDataType

func GetDataType[T NumericType | bool | string | []byte | float16.Num]() DataType

GetDataType returns the appropriate DataType for the given type T only for non-parametric types. This uses a map and reflection internally so don't call this in a tight loop, instead call this once and then use a closure with the result.

type DataTypeLayout

type DataTypeLayout struct {
	Buffers []BufferSpec
	HasDict bool
	// VariadicSpec is what the buffers beyond len(Buffers) are expected to conform to.
	VariadicSpec *BufferSpec
}

DataTypeLayout represents the physical layout of a datatype's buffers including the number of and types of those binary buffers. This will correspond with the buffers in the ArrayData for an array of that type.

type Date32

type Date32 int32

func Date32FromTime

func Date32FromTime(t time.Time) Date32

Date32FromTime returns a Date32 value from a time object

func (Date32) FormattedString

func (d Date32) FormattedString() string

func (Date32) ToTime

func (d Date32) ToTime() time.Time

type Date32Type

type Date32Type struct{}

func (*Date32Type) BitWidth

func (t *Date32Type) BitWidth() int

func (*Date32Type) Bytes

func (t *Date32Type) Bytes() int

func (*Date32Type) Fingerprint

func (t *Date32Type) Fingerprint() string

func (*Date32Type) ID

func (t *Date32Type) ID() Type

func (*Date32Type) Layout

func (t *Date32Type) Layout() DataTypeLayout

func (*Date32Type) Name

func (t *Date32Type) Name() string

func (*Date32Type) String

func (t *Date32Type) String() string

type Date64

type Date64 int64

func Date64FromTime

func Date64FromTime(t time.Time) Date64

Date64FromTime returns a Date64 value from a time object

func (Date64) FormattedString

func (d Date64) FormattedString() string

func (Date64) ToTime

func (d Date64) ToTime() time.Time

type Date64Type

type Date64Type struct{}

func (*Date64Type) BitWidth

func (t *Date64Type) BitWidth() int

func (*Date64Type) Bytes

func (t *Date64Type) Bytes() int

func (*Date64Type) Fingerprint

func (t *Date64Type) Fingerprint() string

func (*Date64Type) ID

func (t *Date64Type) ID() Type

func (*Date64Type) Layout

func (t *Date64Type) Layout() DataTypeLayout

func (*Date64Type) Name

func (t *Date64Type) Name() string

func (*Date64Type) String

func (t *Date64Type) String() string

type DayTimeInterval

type DayTimeInterval struct {
	Days         int32 `json:"days"`
	Milliseconds int32 `json:"milliseconds"`
}

DayTimeInterval represents a number of days and milliseconds (fraction of day).

type DayTimeIntervalType

type DayTimeIntervalType struct{}

DayTimeIntervalType is encoded as a pair of 32-bit signed integer, representing a number of days and milliseconds (fraction of day).

func (*DayTimeIntervalType) BitWidth

func (t *DayTimeIntervalType) BitWidth() int

BitWidth returns the number of bits required to store a single element of this data type in memory.

func (DayTimeIntervalType) Bytes

func (DayTimeIntervalType) Bytes() int

func (*DayTimeIntervalType) Fingerprint

func (*DayTimeIntervalType) Fingerprint() string

func (*DayTimeIntervalType) ID

func (*DayTimeIntervalType) ID() Type

func (DayTimeIntervalType) Layout

func (*DayTimeIntervalType) Name

func (*DayTimeIntervalType) Name() string

func (*DayTimeIntervalType) String

func (*DayTimeIntervalType) String() string

type Decimal128Type

type Decimal128Type struct {
	Precision int32
	Scale     int32
}

Decimal128Type represents a fixed-size 128-bit decimal type.

func (*Decimal128Type) BitWidth

func (*Decimal128Type) BitWidth() int

func (*Decimal128Type) Bytes

func (*Decimal128Type) Bytes() int

func (*Decimal128Type) Fingerprint

func (t *Decimal128Type) Fingerprint() string

func (*Decimal128Type) GetPrecision

func (t *Decimal128Type) GetPrecision() int32

func (*Decimal128Type) GetScale

func (t *Decimal128Type) GetScale() int32

func (*Decimal128Type) ID

func (*Decimal128Type) ID() Type

func (Decimal128Type) Layout

func (Decimal128Type) Layout() DataTypeLayout

func (*Decimal128Type) Name

func (*Decimal128Type) Name() string

func (*Decimal128Type) String

func (t *Decimal128Type) String() string

type Decimal256Type

type Decimal256Type struct {
	Precision int32
	Scale     int32
}

Decimal256Type represents a fixed-size 256-bit decimal type.

func (*Decimal256Type) BitWidth

func (*Decimal256Type) BitWidth() int

func (*Decimal256Type) Bytes

func (*Decimal256Type) Bytes() int

func (*Decimal256Type) Fingerprint

func (t *Decimal256Type) Fingerprint() string

func (*Decimal256Type) GetPrecision

func (t *Decimal256Type) GetPrecision() int32

func (*Decimal256Type) GetScale

func (t *Decimal256Type) GetScale() int32

func (*Decimal256Type) ID

func (*Decimal256Type) ID() Type

func (Decimal256Type) Layout

func (Decimal256Type) Layout() DataTypeLayout

func (*Decimal256Type) Name

func (*Decimal256Type) Name() string

func (*Decimal256Type) String

func (t *Decimal256Type) String() string

type Decimal32Type

type Decimal32Type struct {
	Precision int32
	Scale     int32
}

Decimal32Type represents a fixed-size 32-bit decimal type.

func (*Decimal32Type) BitWidth

func (*Decimal32Type) BitWidth() int

func (*Decimal32Type) Bytes

func (*Decimal32Type) Bytes() int

func (*Decimal32Type) Fingerprint

func (t *Decimal32Type) Fingerprint() string

func (*Decimal32Type) GetPrecision

func (t *Decimal32Type) GetPrecision() int32

func (*Decimal32Type) GetScale

func (t *Decimal32Type) GetScale() int32

func (*Decimal32Type) ID

func (*Decimal32Type) ID() Type

func (Decimal32Type) Layout

func (Decimal32Type) Layout() DataTypeLayout

func (*Decimal32Type) Name

func (*Decimal32Type) Name() string

func (*Decimal32Type) String

func (t *Decimal32Type) String() string

type Decimal64Type

type Decimal64Type struct {
	Precision int32
	Scale     int32
}

Decimal64Type represents a fixed-size 32-bit decimal type.

func (*Decimal64Type) BitWidth

func (*Decimal64Type) BitWidth() int

func (*Decimal64Type) Bytes

func (*Decimal64Type) Bytes() int

func (*Decimal64Type) Fingerprint

func (t *Decimal64Type) Fingerprint() string

func (*Decimal64Type) GetPrecision

func (t *Decimal64Type) GetPrecision() int32

func (*Decimal64Type) GetScale

func (t *Decimal64Type) GetScale() int32

func (*Decimal64Type) ID

func (*Decimal64Type) ID() Type

func (Decimal64Type) Layout

func (Decimal64Type) Layout() DataTypeLayout

func (*Decimal64Type) Name

func (*Decimal64Type) Name() string

func (*Decimal64Type) String

func (t *Decimal64Type) String() string

type DecimalType

type DecimalType interface {
	DataType
	GetPrecision() int32
	GetScale() int32
	BitWidth() int
}

func NarrowestDecimalType

func NarrowestDecimalType(prec, scale int32) (DecimalType, error)

NarrowestDecimalType constructs the smallest decimal type that can represent the requested precision. An error is returned if the requested precision cannot be represented (prec <= 0 || prec > 76).

For reference:

prec in [ 1,  9] => Decimal32Type
prec in [10, 18] => Decimal64Type
prec in [19, 38] => Decimal128Type
prec in [39, 76] => Decimal256Type

func NewDecimalType

func NewDecimalType(id Type, prec, scale int32) (DecimalType, error)

type DenseUnionType

type DenseUnionType struct {
	// contains filtered or unexported fields
}

DenseUnionType is the concrete type for dense union data.

A dense union is a nested type where each logical value is taken from a single child, at a specific offset. A buffer of 8-bit type ids (typed as UnionTypeCode) indicates which child a given logical value is to be taken from and a buffer of 32-bit offsets indicating which physical position in the given child array has the logical value for that index.

Unlike a sparse union, a dense union allows encoding only the child values which are actually referred to by the union array. This is counterbalanced by the additional footprint of the offsets buffer, and the additional indirection cost when looking up values.

Unlike most other types, unions don't have a top-level validity bitmap

func DenseUnionFromArrays

func DenseUnionFromArrays(children []Array, fields []string, codes []UnionTypeCode) *DenseUnionType

DenseUnionFromArrays enables creating a union type from a list of Arrays, field names, and type codes. len(fields) should be either 0 or equal to len(children). len(codes) should also be either 0, or equal to len(children).

If len(fields) == 0, then the fields will be named numerically as "0", "1", "2"... and so on. If len(codes) == 0, then the type codes will be constructed as [0, 1, 2, ..., n].

func DenseUnionOf

func DenseUnionOf(fields []Field, typeCodes []UnionTypeCode) *DenseUnionType

DenseUnionOf is equivalent to UnionOf(arrow.DenseMode, fields, typeCodes), constructing a DenseUnionType from a list of fields and type codes.

If len(fields) != len(typeCodes) this will panic. They are allowed to be of length 0.

func (*DenseUnionType) ChildIDs

func (t *DenseUnionType) ChildIDs() []int

func (*DenseUnionType) Fields

func (t *DenseUnionType) Fields() []Field

Fields method provides a copy of union type fields (so it can be safely mutated and will not result in updating the union type).

func (*DenseUnionType) Fingerprint

func (t *DenseUnionType) Fingerprint() string

func (DenseUnionType) ID

func (DenseUnionType) ID() Type

func (DenseUnionType) Layout

func (DenseUnionType) Layout() DataTypeLayout

func (*DenseUnionType) MaxTypeCode

func (t *DenseUnionType) MaxTypeCode() (max UnionTypeCode)

func (DenseUnionType) Mode

func (DenseUnionType) Mode() UnionMode

func (DenseUnionType) Name

func (DenseUnionType) Name() string

func (*DenseUnionType) NumFields

func (t *DenseUnionType) NumFields() int

func (DenseUnionType) OffsetTypeTraits

func (DenseUnionType) OffsetTypeTraits() OffsetTraits

func (*DenseUnionType) String

func (t *DenseUnionType) String() string

func (*DenseUnionType) TypeCodes

func (t *DenseUnionType) TypeCodes() []UnionTypeCode

type DictionaryType

type DictionaryType struct {
	IndexType DataType
	ValueType DataType
	Ordered   bool
}

DictionaryType represents categorical or dictionary-encoded in-memory data It contains a dictionary-encoded value type (any type) and an index type (any integer type).

func (*DictionaryType) BitWidth

func (d *DictionaryType) BitWidth() int

func (*DictionaryType) Bytes

func (d *DictionaryType) Bytes() int

func (*DictionaryType) Fingerprint

func (d *DictionaryType) Fingerprint() string

func (*DictionaryType) ID

func (*DictionaryType) ID() Type

func (*DictionaryType) Layout

func (d *DictionaryType) Layout() DataTypeLayout

func (*DictionaryType) Name

func (*DictionaryType) Name() string

func (*DictionaryType) String

func (d *DictionaryType) String() string

type Duration

type Duration int64

type DurationType

type DurationType struct {
	Unit TimeUnit
}

DurationType is encoded as a 64-bit signed integer, representing an amount of elapsed time without any relation to a calendar artifact.

func (*DurationType) BitWidth

func (*DurationType) BitWidth() int

func (*DurationType) Bytes

func (*DurationType) Bytes() int

func (*DurationType) Fingerprint

func (t *DurationType) Fingerprint() string

func (*DurationType) ID

func (*DurationType) ID() Type

func (DurationType) Layout

func (DurationType) Layout() DataTypeLayout

func (*DurationType) Name

func (*DurationType) Name() string

func (*DurationType) String

func (t *DurationType) String() string

func (*DurationType) TimeUnit

func (t *DurationType) TimeUnit() TimeUnit

type EncodedType

type EncodedType interface {
	DataType
	Encoded() DataType
}

type ExtensionBase

type ExtensionBase struct {
	// Storage is the underlying storage type
	Storage DataType
}

ExtensionBase is the base struct for user-defined Extension Types which must be embedded in any user-defined types like so:

type UserDefinedType struct {
    arrow.ExtensionBase
    // any other data
}

func (*ExtensionBase) Fields

func (e *ExtensionBase) Fields() []Field

func (*ExtensionBase) Fingerprint

func (e *ExtensionBase) Fingerprint() string

func (*ExtensionBase) ID

func (*ExtensionBase) ID() Type

ID always returns arrow.EXTENSION and should not be overridden

func (*ExtensionBase) Layout

func (e *ExtensionBase) Layout() DataTypeLayout

func (*ExtensionBase) Name

func (*ExtensionBase) Name() string

Name should always return "extension" and should not be overridden

func (*ExtensionBase) NumFields

func (e *ExtensionBase) NumFields() int

func (*ExtensionBase) StorageType

func (e *ExtensionBase) StorageType() DataType

StorageType returns the underlying storage type and exists so that functions written against the ExtensionType interface can access the storage type.

func (*ExtensionBase) String

func (e *ExtensionBase) String() string

String by default will return "extension_type<storage=storage_type>" by can be overridden to customize what is printed out when printing this extension type.

type ExtensionType

type ExtensionType interface {
	DataType
	// ArrayType should return the reflect.TypeOf(ExtensionArrayType{}) where the
	// ExtensionArrayType is a type that implements the array.ExtensionArray interface.
	// Such a type must also embed the array.ExtensionArrayBase in it. This will be used
	// when creating arrays of this ExtensionType by using reflect.New
	ArrayType() reflect.Type
	// ExtensionName is what will be used when registering / unregistering this extension
	// type. Multiple user-defined types can be defined with a parameterized ExtensionType
	// as long as the parameter is used in the ExtensionName to distinguish the instances
	// in the global Extension Type registry.
	// The return from this is also what will be placed in the metadata for IPC communication
	// under the key ARROW:extension:name
	ExtensionName() string
	// StorageType returns the underlying storage type which is used by this extension
	// type. It is already implemented by the ExtensionBase struct and thus does not need
	// to be re-implemented by a user-defined type.
	StorageType() DataType
	// ExtensionEquals is used to tell whether two ExtensionType instances are equal types.
	ExtensionEquals(ExtensionType) bool
	// Serialize should produce any extra metadata necessary for initializing an instance of
	// this user-defined type. Not all user-defined types require this and it is valid to return
	// nil from this function or an empty slice. This is used for the IPC format and will be
	// added to metadata for IPC communication under the key ARROW:extension:metadata
	// This should be implemented such that it is valid to be called by multiple goroutines
	// concurrently.
	Serialize() string
	// Deserialize is called when reading in extension arrays and types via the IPC format
	// in order to construct an instance of the appropriate extension type. The passed in data
	// is pulled from the ARROW:extension:metadata key and may be nil or an empty slice.
	// If the storage type is incorrect or something else is invalid with the data this should
	// return nil and an appropriate error.
	Deserialize(storageType DataType, data string) (ExtensionType, error)
	// contains filtered or unexported methods
}

ExtensionType is an interface for handling user-defined types. They must be DataTypes and must embed arrow.ExtensionBase in them in order to work properly ensuring that they always have the expected base behavior.

The arrow.ExtensionBase that needs to be embedded implements the DataType interface leaving the remaining functions having to be implemented by the actual user-defined type in order to be handled properly.

func GetExtensionType

func GetExtensionType(typName string) ExtensionType

GetExtensionType retrieves and returns the extension type of the given name from the global extension type registry. If the type isn't found it will return nil. This function is safe to call from multiple goroutines concurrently.

type Field

type Field struct {
	Name     string   // Field name
	Type     DataType // The field's data type
	Nullable bool     // Fields can be nullable
	Metadata Metadata // The field's metadata, if any
}

func (Field) Equal

func (f Field) Equal(o Field) bool

func (Field) Fingerprint

func (f Field) Fingerprint() string

func (Field) HasMetadata

func (f Field) HasMetadata() bool

func (Field) String

func (f Field) String() string

type FixedSizeBinaryType

type FixedSizeBinaryType struct {
	ByteWidth int
}

func (*FixedSizeBinaryType) BitWidth

func (t *FixedSizeBinaryType) BitWidth() int

func (*FixedSizeBinaryType) Bytes

func (t *FixedSizeBinaryType) Bytes() int

func (*FixedSizeBinaryType) Fingerprint

func (t *FixedSizeBinaryType) Fingerprint() string

func (*FixedSizeBinaryType) ID

func (*FixedSizeBinaryType) ID() Type

func (*FixedSizeBinaryType) Layout

func (t *FixedSizeBinaryType) Layout() DataTypeLayout

func (*FixedSizeBinaryType) Name

func (*FixedSizeBinaryType) Name() string

func (*FixedSizeBinaryType) String

func (t *FixedSizeBinaryType) String() string

type FixedSizeListType

type FixedSizeListType struct {
	// contains filtered or unexported fields
}

FixedSizeListType describes a nested type in which each array slot contains a fixed-size sequence of values, all having the same relative type.

func FixedSizeListOf

func FixedSizeListOf(n int32, t DataType) *FixedSizeListType

FixedSizeListOf returns the list type with element type t. For example, if t represents int32, FixedSizeListOf(10, t) represents [10]int32.

FixedSizeListOf panics if t is nil or invalid. FixedSizeListOf panics if n is <= 0. NullableElem defaults to true

func FixedSizeListOfField

func FixedSizeListOfField(n int32, f Field) *FixedSizeListType

func FixedSizeListOfNonNullable

func FixedSizeListOfNonNullable(n int32, t DataType) *FixedSizeListType

FixedSizeListOfNonNullable is like FixedSizeListOf but NullableElem defaults to false indicating that the child type should be marked as non-nullable.

func (*FixedSizeListType) Elem

func (t *FixedSizeListType) Elem() DataType

Elem returns the FixedSizeListType's element type.

func (*FixedSizeListType) ElemField

func (t *FixedSizeListType) ElemField() Field

func (*FixedSizeListType) Fields

func (t *FixedSizeListType) Fields() []Field

func (*FixedSizeListType) Fingerprint

func (t *FixedSizeListType) Fingerprint() string

func (*FixedSizeListType) ID

func (*FixedSizeListType) ID() Type

func (*FixedSizeListType) Layout

func (*FixedSizeListType) Len

func (t *FixedSizeListType) Len() int32

Len returns the FixedSizeListType's size.

func (*FixedSizeListType) Name

func (*FixedSizeListType) Name() string

func (*FixedSizeListType) NumFields

func (t *FixedSizeListType) NumFields() int

func (*FixedSizeListType) SetElemNullable

func (t *FixedSizeListType) SetElemNullable(n bool)

func (*FixedSizeListType) String

func (t *FixedSizeListType) String() string

type FixedWidthDataType

type FixedWidthDataType interface {
	DataType
	// BitWidth returns the number of bits required to store a single element of this data type in memory.
	BitWidth() int
	// Bytes returns the number of bytes required to store a single element of this data type in memory.
	Bytes() int
}

FixedWidthDataType is the representation of an Arrow type that requires a fixed number of bits in memory for each element.

type FixedWidthType

type FixedWidthType interface {
	IntType | UintType |
		FloatType | decimal.DecimalTypes |
		DayTimeInterval | MonthDayNanoInterval
}

FixedWidthType is a type constraint for raw values in Arrow that can be represented as FixedWidth byte slices. Specifically this is for using Go generics to easily re-type a byte slice to a properly-typed slice. Booleans are excluded here since they are represented by Arrow as a bitmap and thus the buffer can't be just reinterpreted as a []bool

type Float16Type

type Float16Type struct{}

Float16Type represents a floating point value encoded with a 16-bit precision.

func (*Float16Type) BitWidth

func (t *Float16Type) BitWidth() int

BitWidth returns the number of bits required to store a single element of this data type in memory.

func (Float16Type) Bytes

func (Float16Type) Bytes() int

func (*Float16Type) Fingerprint

func (t *Float16Type) Fingerprint() string

func (*Float16Type) ID

func (t *Float16Type) ID() Type

func (Float16Type) Layout

func (Float16Type) Layout() DataTypeLayout

func (*Float16Type) Name

func (t *Float16Type) Name() string

func (*Float16Type) String

func (t *Float16Type) String() string

type Float32Type

type Float32Type struct{}

func (*Float32Type) BitWidth

func (t *Float32Type) BitWidth() int

func (*Float32Type) Bytes

func (t *Float32Type) Bytes() int

func (*Float32Type) Fingerprint

func (t *Float32Type) Fingerprint() string

func (*Float32Type) ID

func (t *Float32Type) ID() Type

func (*Float32Type) Layout

func (t *Float32Type) Layout() DataTypeLayout

func (*Float32Type) Name

func (t *Float32Type) Name() string

func (*Float32Type) String

func (t *Float32Type) String() string

type Float64Type

type Float64Type struct{}

func (*Float64Type) BitWidth

func (t *Float64Type) BitWidth() int

func (*Float64Type) Bytes

func (t *Float64Type) Bytes() int

func (*Float64Type) Fingerprint

func (t *Float64Type) Fingerprint() string

func (*Float64Type) ID

func (t *Float64Type) ID() Type

func (*Float64Type) Layout

func (t *Float64Type) Layout() DataTypeLayout

func (*Float64Type) Name

func (t *Float64Type) Name() string

func (*Float64Type) String

func (t *Float64Type) String() string

type FloatType

type FloatType interface {
	float16.Num | constraints.Float
}

FloatType is a type constraint for raw values for representing floating point values in This consists of constraints.Float and float16.Num

type Int16Type

type Int16Type struct{}

func (*Int16Type) BitWidth

func (t *Int16Type) BitWidth() int

func (*Int16Type) Bytes

func (t *Int16Type) Bytes() int

func (*Int16Type) Fingerprint

func (t *Int16Type) Fingerprint() string

func (*Int16Type) ID

func (t *Int16Type) ID() Type

func (*Int16Type) Layout

func (t *Int16Type) Layout() DataTypeLayout

func (*Int16Type) Name

func (t *Int16Type) Name() string

func (*Int16Type) String

func (t *Int16Type) String() string

type Int32Type

type Int32Type struct{}

func (*Int32Type) BitWidth

func (t *Int32Type) BitWidth() int

func (*Int32Type) Bytes

func (t *Int32Type) Bytes() int

func (*Int32Type) Fingerprint

func (t *Int32Type) Fingerprint() string

func (*Int32Type) ID

func (t *Int32Type) ID() Type

func (*Int32Type) Layout

func (t *Int32Type) Layout() DataTypeLayout

func (*Int32Type) Name

func (t *Int32Type) Name() string

func (*Int32Type) String

func (t *Int32Type) String() string

type Int64Type

type Int64Type struct{}

func (*Int64Type) BitWidth

func (t *Int64Type) BitWidth() int

func (*Int64Type) Bytes

func (t *Int64Type) Bytes() int

func (*Int64Type) Fingerprint

func (t *Int64Type) Fingerprint() string

func (*Int64Type) ID

func (t *Int64Type) ID() Type

func (*Int64Type) Layout

func (t *Int64Type) Layout() DataTypeLayout

func (*Int64Type) Name

func (t *Int64Type) Name() string

func (*Int64Type) String

func (t *Int64Type) String() string

type Int8Type

type Int8Type struct{}

func (*Int8Type) BitWidth

func (t *Int8Type) BitWidth() int

func (*Int8Type) Bytes

func (t *Int8Type) Bytes() int

func (*Int8Type) Fingerprint

func (t *Int8Type) Fingerprint() string

func (*Int8Type) ID

func (t *Int8Type) ID() Type

func (*Int8Type) Layout

func (t *Int8Type) Layout() DataTypeLayout

func (*Int8Type) Name

func (t *Int8Type) Name() string

func (*Int8Type) String

func (t *Int8Type) String() string

type IntType

type IntType interface {
	~int8 | ~int16 | ~int32 | ~int64
}

IntType is a type constraint for raw values represented as signed integer types by We aren't just using constraints.Signed because we don't want to include the raw `int` type here whose size changes based on the architecture (int32 on 32-bit architectures and int64 on 64-bit architectures).

This will also cover types like MonthInterval or the time types as their underlying types are int32 and int64 which will get covered by using the ~

type LargeBinaryType

type LargeBinaryType struct{}

func (*LargeBinaryType) Fingerprint

func (t *LargeBinaryType) Fingerprint() string

func (*LargeBinaryType) ID

func (t *LargeBinaryType) ID() Type

func (LargeBinaryType) IsUtf8

func (LargeBinaryType) IsUtf8() bool

func (*LargeBinaryType) Layout

func (t *LargeBinaryType) Layout() DataTypeLayout

func (*LargeBinaryType) Name

func (t *LargeBinaryType) Name() string

func (*LargeBinaryType) OffsetTypeTraits

func (t *LargeBinaryType) OffsetTypeTraits() OffsetTraits

func (*LargeBinaryType) String

func (t *LargeBinaryType) String() string

type LargeListType

type LargeListType struct {
	ListType
}

func LargeListOf

func LargeListOf(t DataType) *LargeListType

LargeListOf returns the list type with element type t. For example, if t represents int32, LargeListOf(t) represents []int32.

LargeListOf panics if t is nil or invalid. NullableElem defaults to true

func LargeListOfField

func LargeListOfField(f Field) *LargeListType

func LargeListOfNonNullable

func LargeListOfNonNullable(t DataType) *LargeListType

LargeListOfNonNullable is like ListOf but NullableElem defaults to false, indicating that the child type should be marked as non-nullable.

func (*LargeListType) Fingerprint

func (t *LargeListType) Fingerprint() string

func (LargeListType) ID

func (LargeListType) ID() Type

func (*LargeListType) Layout

func (*LargeListType) Layout() DataTypeLayout

func (LargeListType) Name

func (LargeListType) Name() string

func (*LargeListType) OffsetTypeTraits

func (*LargeListType) OffsetTypeTraits() OffsetTraits

func (*LargeListType) String

func (t *LargeListType) String() string

type LargeListViewType

type LargeListViewType struct {
	// contains filtered or unexported fields
}

func LargeListViewOf

func LargeListViewOf(t DataType) *LargeListViewType

LargeListViewOf returns the list-view type with element type t. For example, if t represents int32, LargeListViewOf(t) represents []int32.

LargeListViewOf panics if t is nil or invalid. NullableElem defaults to true

func LargeListViewOfField

func LargeListViewOfField(f Field) *LargeListViewType

func LargeListViewOfNonNullable

func LargeListViewOfNonNullable(t DataType) *LargeListViewType

LargeListViewOfNonNullable is like LargeListViewOf but NullableElem defaults to false, indicating that the child type should be marked as non-nullable.

func (*LargeListViewType) Elem

func (t *LargeListViewType) Elem() DataType

Elem returns the LargeListViewType's element type.

func (*LargeListViewType) ElemField

func (t *LargeListViewType) ElemField() Field

func (*LargeListViewType) Fields

func (t *LargeListViewType) Fields() []Field

func (*LargeListViewType) Fingerprint

func (t *LargeListViewType) Fingerprint() string

func (*LargeListViewType) ID

func (*LargeListViewType) ID() Type

func (*LargeListViewType) Layout

func (*LargeListViewType) Name

func (*LargeListViewType) Name() string

func (*LargeListViewType) NumFields

func (t *LargeListViewType) NumFields() int

func (*LargeListViewType) OffsetTypeTraits

func (*LargeListViewType) OffsetTypeTraits() OffsetTraits

func (*LargeListViewType) SetElemMetadata

func (t *LargeListViewType) SetElemMetadata(md Metadata)

func (*LargeListViewType) SetElemNullable

func (t *LargeListViewType) SetElemNullable(n bool)

func (*LargeListViewType) String

func (t *LargeListViewType) String() string

type LargeStringType

type LargeStringType struct{}

func (*LargeStringType) Fingerprint

func (t *LargeStringType) Fingerprint() string

func (*LargeStringType) ID

func (t *LargeStringType) ID() Type

func (LargeStringType) IsUtf8

func (LargeStringType) IsUtf8() bool

func (*LargeStringType) Layout

func (t *LargeStringType) Layout() DataTypeLayout

func (*LargeStringType) Name

func (t *LargeStringType) Name() string

func (*LargeStringType) OffsetTypeTraits

func (t *LargeStringType) OffsetTypeTraits() OffsetTraits

func (*LargeStringType) String

func (t *LargeStringType) String() string

type ListLikeType

type ListLikeType interface {
	DataType
	Elem() DataType
	ElemField() Field
}

type ListType

type ListType struct {
	// contains filtered or unexported fields
}

ListType describes a nested type in which each array slot contains a variable-size sequence of values, all having the same relative type.

func ListOf

func ListOf(t DataType) *ListType

ListOf returns the list type with element type t. For example, if t represents int32, ListOf(t) represents []int32.

ListOf panics if t is nil or invalid. NullableElem defaults to true

func ListOfField

func ListOfField(f Field) *ListType

func ListOfNonNullable

func ListOfNonNullable(t DataType) *ListType

ListOfNonNullable is like ListOf but NullableElem defaults to false, indicating that the child type should be marked as non-nullable.

func (*ListType) Elem

func (t *ListType) Elem() DataType

Elem returns the ListType's element type.

func (*ListType) ElemField

func (t *ListType) ElemField() Field

func (*ListType) Fields

func (t *ListType) Fields() []Field

func (*ListType) Fingerprint

func (t *ListType) Fingerprint() string

func (*ListType) ID

func (*ListType) ID() Type

func (*ListType) Layout

func (*ListType) Layout() DataTypeLayout

func (*ListType) Name

func (*ListType) Name() string

func (*ListType) NumFields

func (t *ListType) NumFields() int

func (*ListType) OffsetTypeTraits

func (*ListType) OffsetTypeTraits() OffsetTraits

func (*ListType) SetElemMetadata

func (t *ListType) SetElemMetadata(md Metadata)

func (*ListType) SetElemNullable

func (t *ListType) SetElemNullable(n bool)

func (*ListType) String

func (t *ListType) String() string

type ListViewType

type ListViewType struct {
	// contains filtered or unexported fields
}

func ListViewOf

func ListViewOf(t DataType) *ListViewType

ListViewOf returns the list-view type with element type t. For example, if t represents int32, ListViewOf(t) represents []int32.

ListViewOf panics if t is nil or invalid. NullableElem defaults to true

func ListViewOfField

func ListViewOfField(f Field) *ListViewType

func ListViewOfNonNullable

func ListViewOfNonNullable(t DataType) *ListViewType

ListViewOfNonNullable is like ListViewOf but NullableElem defaults to false, indicating that the child type should be marked as non-nullable.

func (*ListViewType) Elem

func (t *ListViewType) Elem() DataType

Elem returns the ListViewType's element type.

func (*ListViewType) ElemField

func (t *ListViewType) ElemField() Field

func (*ListViewType) Fields

func (t *ListViewType) Fields() []Field

func (*ListViewType) Fingerprint

func (t *ListViewType) Fingerprint() string

func (*ListViewType) ID

func (*ListViewType) ID() Type

func (*ListViewType) Layout

func (*ListViewType) Layout() DataTypeLayout

func (*ListViewType) Name

func (*ListViewType) Name() string

func (*ListViewType) NumFields

func (t *ListViewType) NumFields() int

func (*ListViewType) OffsetTypeTraits

func (*ListViewType) OffsetTypeTraits() OffsetTraits

func (*ListViewType) SetElemMetadata

func (t *ListViewType) SetElemMetadata(md Metadata)

func (*ListViewType) SetElemNullable

func (t *ListViewType) SetElemNullable(n bool)

func (*ListViewType) String

func (t *ListViewType) String() string

type MapType

type MapType struct {
	KeysSorted bool
	// contains filtered or unexported fields
}

func MapOf

func MapOf(key, item DataType) *MapType

func MapOfWithMetadata

func MapOfWithMetadata(key DataType, keyMetadata Metadata, item DataType, itemMetadata Metadata) *MapType

func (*MapType) Elem

func (t *MapType) Elem() DataType

Elem returns the MapType's element type (if treating MapType as ListLikeType)

func (*MapType) ElemField

func (t *MapType) ElemField() Field

ElemField returns the MapType's element field (if treating MapType as ListLikeType)

func (*MapType) Fields

func (t *MapType) Fields() []Field

func (*MapType) Fingerprint

func (t *MapType) Fingerprint() string

func (*MapType) ID

func (*MapType) ID() Type

func (*MapType) ItemField

func (t *MapType) ItemField() Field

func (*MapType) ItemType

func (t *MapType) ItemType() DataType

func (*MapType) KeyField

func (t *MapType) KeyField() Field

func (*MapType) KeyType

func (t *MapType) KeyType() DataType

func (*MapType) Layout

func (t *MapType) Layout() DataTypeLayout

func (*MapType) Name

func (*MapType) Name() string

func (*MapType) NumFields

func (t *MapType) NumFields() int

func (*MapType) OffsetTypeTraits

func (*MapType) OffsetTypeTraits() OffsetTraits

func (*MapType) SetItemNullable

func (t *MapType) SetItemNullable(nullable bool)

func (*MapType) String

func (t *MapType) String() string

func (*MapType) ValueField deprecated

func (t *MapType) ValueField() Field

Deprecated: use MapType.ElemField() instead

func (*MapType) ValueType deprecated

func (t *MapType) ValueType() *StructType

Deprecated: use MapType.Elem().(*StructType) instead

type Metadata

type Metadata struct {
	// contains filtered or unexported fields
}

func MetadataFrom

func MetadataFrom(kv map[string]string) Metadata

func NewMetadata

func NewMetadata(keys, values []string) Metadata

func (Metadata) Equal

func (md Metadata) Equal(rhs Metadata) bool

func (Metadata) FindKey

func (md Metadata) FindKey(k string) int

FindKey returns the index of the key-value pair with the provided key name, or -1 if such a key does not exist.

func (Metadata) GetValue

func (md Metadata) GetValue(k string) (string, bool)

GetValue returns the value associated with the provided key name. If the key does not exist, the second return value is false.

func (Metadata) Keys

func (md Metadata) Keys() []string

func (Metadata) Len

func (md Metadata) Len() int

func (Metadata) String

func (md Metadata) String() string

func (Metadata) ToMap

func (md Metadata) ToMap() map[string]string

func (Metadata) Values

func (md Metadata) Values() []string

type MonthDayNanoInterval

type MonthDayNanoInterval struct {
	Months      int32 `json:"months"`
	Days        int32 `json:"days"`
	Nanoseconds int64 `json:"nanoseconds"`
}

MonthDayNanoInterval represents a number of months, days and nanoseconds (fraction of day).

type MonthDayNanoIntervalType

type MonthDayNanoIntervalType struct{}

MonthDayNanoIntervalType is encoded as two signed 32-bit integers representing a number of months and a number of days, followed by a 64-bit integer representing the number of nanoseconds since midnight for fractions of a day.

func (*MonthDayNanoIntervalType) BitWidth

func (*MonthDayNanoIntervalType) BitWidth() int

BitWidth returns the number of bits required to store a single element of this data type in memory.

func (*MonthDayNanoIntervalType) Bytes

func (*MonthDayNanoIntervalType) Bytes() int

func (*MonthDayNanoIntervalType) Fingerprint

func (*MonthDayNanoIntervalType) Fingerprint() string

func (*MonthDayNanoIntervalType) ID

func (MonthDayNanoIntervalType) Layout

func (*MonthDayNanoIntervalType) Name

func (*MonthDayNanoIntervalType) String

func (*MonthDayNanoIntervalType) String() string

type MonthInterval

type MonthInterval int32

MonthInterval represents a number of months.

func (MonthInterval) MarshalJSON

func (m MonthInterval) MarshalJSON() ([]byte, error)

func (*MonthInterval) UnmarshalJSON

func (m *MonthInterval) UnmarshalJSON(data []byte) error

type MonthIntervalType

type MonthIntervalType struct{}

MonthIntervalType is encoded as a 32-bit signed integer, representing a number of months.

func (*MonthIntervalType) BitWidth

func (t *MonthIntervalType) BitWidth() int

BitWidth returns the number of bits required to store a single element of this data type in memory.

func (MonthIntervalType) Bytes

func (MonthIntervalType) Bytes() int

func (*MonthIntervalType) Fingerprint

func (*MonthIntervalType) Fingerprint() string

func (*MonthIntervalType) ID

func (*MonthIntervalType) ID() Type

func (MonthIntervalType) Layout

func (*MonthIntervalType) Name

func (*MonthIntervalType) Name() string

func (*MonthIntervalType) String

func (*MonthIntervalType) String() string

type NestedType

type NestedType interface {
	DataType

	// Fields method provides a copy of NestedType fields
	// (so it can be safely mutated and will not result in updating the NestedType).
	Fields() []Field
	// NumFields provides the number of fields without allocating.
	NumFields() int
}

type NullType

type NullType struct{}

NullType describes a degenerate array, with zero physical storage.

func (*NullType) Fingerprint

func (*NullType) Fingerprint() string

func (*NullType) ID

func (*NullType) ID() Type

func (*NullType) Layout

func (*NullType) Layout() DataTypeLayout

func (*NullType) Name

func (*NullType) Name() string

func (*NullType) String

func (*NullType) String() string

type NumericType

type NumericType interface {
	IntType | UintType | constraints.Float
}

NumericType is a type constraint for just signed/unsigned integers and float32/float64.

type OffsetTraits

type OffsetTraits interface {
	// BytesRequired returns the number of bytes required to be allocated
	// in order to hold the passed in number of elements of this type.
	BytesRequired(int) int
}

OffsetTraits is a convenient interface over the various type traits constants such as arrow.Int32Traits allowing types with offsets, like BinaryType, StringType, LargeBinaryType and LargeStringType to have a method to return information about their offset type and how many bytes would be required to allocate an offset buffer for them.

type OffsetsDataType

type OffsetsDataType interface {
	DataType
	OffsetTypeTraits() OffsetTraits
}

type Record

type Record interface {
	json.Marshaler

	Release()
	Retain()

	Schema() *Schema

	NumRows() int64
	NumCols() int64

	Columns() []Array
	Column(i int) Array
	ColumnName(i int) string
	SetColumn(i int, col Array) (Record, error)

	// NewSlice constructs a zero-copy slice of the record with the indicated
	// indices i and j, corresponding to array[i:j].
	// The returned record must be Release()'d after use.
	//
	// NewSlice panics if the slice is outside the valid range of the record array.
	// NewSlice panics if j < i.
	NewSlice(i, j int64) Record
}

Record is a collection of equal-length arrays matching a particular Schema. Also known as a RecordBatch in the spec and in some implementations.

It is also possible to construct a Table from a collection of Records that all have the same schema.

type RunEndEncodedType

type RunEndEncodedType struct {
	ValueNullable bool
	// contains filtered or unexported fields
}

RunEndEncodedType is the datatype to represent a run-end encoded array of data. ValueNullable defaults to true, but can be set false if this should represent a type with a non-nullable value field.

func RunEndEncodedOf

func RunEndEncodedOf(runEnds, values DataType) *RunEndEncodedType

func (*RunEndEncodedType) Encoded

func (t *RunEndEncodedType) Encoded() DataType

func (*RunEndEncodedType) Fields

func (t *RunEndEncodedType) Fields() []Field

func (*RunEndEncodedType) Fingerprint

func (t *RunEndEncodedType) Fingerprint() string

func (*RunEndEncodedType) ID

func (*RunEndEncodedType) ID() Type

func (*RunEndEncodedType) Layout

func (*RunEndEncodedType) Name

func (*RunEndEncodedType) Name() string

func (*RunEndEncodedType) NumFields

func (t *RunEndEncodedType) NumFields() int

func (*RunEndEncodedType) RunEnds

func (t *RunEndEncodedType) RunEnds() DataType

func (*RunEndEncodedType) String

func (t *RunEndEncodedType) String() string

func (*RunEndEncodedType) ValidRunEndsType

func (*RunEndEncodedType) ValidRunEndsType(dt DataType) bool

type Schema

type Schema struct {
	// contains filtered or unexported fields
}

Schema is a sequence of Field values, describing the columns of a table or a record batch.

func NewSchema

func NewSchema(fields []Field, metadata *Metadata) *Schema

NewSchema returns a new Schema value from the slice of fields and metadata.

NewSchema panics if there is a field with an invalid DataType.

func NewSchemaWithEndian

func NewSchemaWithEndian(fields []Field, metadata *Metadata, e endian.Endianness) *Schema

func (*Schema) AddField

func (s *Schema) AddField(i int, field Field) (*Schema, error)

AddField adds a field at the given index and return a new schema.

func (*Schema) Endianness

func (sc *Schema) Endianness() endian.Endianness

func (*Schema) Equal

func (sc *Schema) Equal(o *Schema) bool

Equal returns whether two schema are equal. Equal does not compare the metadata.

func (*Schema) Field

func (sc *Schema) Field(i int) Field

func (*Schema) FieldIndices

func (sc *Schema) FieldIndices(n string) []int

FieldIndices returns the indices of the named field or nil.

func (*Schema) Fields

func (sc *Schema) Fields() []Field

func (*Schema) FieldsByName

func (sc *Schema) FieldsByName(n string) ([]Field, bool)

func (*Schema) Fingerprint

func (s *Schema) Fingerprint() string

func (*Schema) HasField

func (sc *Schema) HasField(n string) bool

func (*Schema) HasMetadata

func (sc *Schema) HasMetadata() bool

func (*Schema) IsNativeEndian

func (sc *Schema) IsNativeEndian() bool

func (*Schema) Metadata

func (sc *Schema) Metadata() Metadata

func (*Schema) NumFields

func (sc *Schema) NumFields() int

func (*Schema) String

func (s *Schema) String() string

func (*Schema) WithEndianness

func (sc *Schema) WithEndianness(e endian.Endianness) *Schema

type SparseUnionType

type SparseUnionType struct {
	// contains filtered or unexported fields
}

SparseUnionType is the concrete type for Sparse union data.

A sparse union is a nested type where each logical value is taken from a single child. A buffer of 8-bit type ids indicates which child a given logical value is to be taken from.

In a sparse union, each child array will have the same length as the union array itself, regardless of the actual number of union values which refer to it.

Unlike most other types, unions do not have a top-level validity bitmap.

func SparseUnionFromArrays

func SparseUnionFromArrays(children []Array, fields []string, codes []UnionTypeCode) *SparseUnionType

SparseUnionFromArrays enables creating a union type from a list of Arrays, field names, and type codes. len(fields) should be either 0 or equal to len(children). len(codes) should also be either 0, or equal to len(children).

If len(fields) == 0, then the fields will be named numerically as "0", "1", "2"... and so on. If len(codes) == 0, then the type codes will be constructed as [0, 1, 2, ..., n].

func SparseUnionOf

func SparseUnionOf(fields []Field, typeCodes []UnionTypeCode) *SparseUnionType

SparseUnionOf is equivalent to UnionOf(arrow.SparseMode, fields, typeCodes), constructing a SparseUnionType from a list of fields and type codes.

If len(fields) != len(typeCodes) this will panic. They are allowed to be of length 0.

func (*SparseUnionType) ChildIDs

func (t *SparseUnionType) ChildIDs() []int

func (*SparseUnionType) Fields

func (t *SparseUnionType) Fields() []Field

Fields method provides a copy of union type fields (so it can be safely mutated and will not result in updating the union type).

func (*SparseUnionType) Fingerprint

func (t *SparseUnionType) Fingerprint() string

func (SparseUnionType) ID

func (SparseUnionType) ID() Type

func (SparseUnionType) Layout

func (*SparseUnionType) MaxTypeCode

func (t *SparseUnionType) MaxTypeCode() (max UnionTypeCode)

func (SparseUnionType) Mode

func (SparseUnionType) Mode() UnionMode

func (SparseUnionType) Name

func (SparseUnionType) Name() string

func (*SparseUnionType) NumFields

func (t *SparseUnionType) NumFields() int

func (*SparseUnionType) String

func (t *SparseUnionType) String() string

func (*SparseUnionType) TypeCodes

func (t *SparseUnionType) TypeCodes() []UnionTypeCode

type StringType

type StringType struct{}

func (*StringType) Fingerprint

func (t *StringType) Fingerprint() string

func (*StringType) ID

func (t *StringType) ID() Type

func (StringType) IsUtf8

func (StringType) IsUtf8() bool

func (*StringType) Layout

func (t *StringType) Layout() DataTypeLayout

func (*StringType) Name

func (t *StringType) Name() string

func (*StringType) OffsetTypeTraits

func (t *StringType) OffsetTypeTraits() OffsetTraits

func (*StringType) String

func (t *StringType) String() string

type StringViewType

type StringViewType struct{}

func (*StringViewType) Fingerprint

func (t *StringViewType) Fingerprint() string

func (*StringViewType) ID

func (*StringViewType) ID() Type

func (*StringViewType) IsUtf8

func (*StringViewType) IsUtf8() bool

func (*StringViewType) Layout

func (*StringViewType) Layout() DataTypeLayout

func (*StringViewType) Name

func (*StringViewType) Name() string

func (*StringViewType) String

func (*StringViewType) String() string

type StructType

type StructType struct {
	// contains filtered or unexported fields
}

StructType describes a nested type parameterized by an ordered sequence of relative types, called its fields.

func StructOf

func StructOf(fs ...Field) *StructType

StructOf returns the struct type with fields fs.

StructOf panics if there is a field with an invalid DataType.

func (*StructType) Field

func (t *StructType) Field(i int) Field

func (*StructType) FieldByName

func (t *StructType) FieldByName(name string) (Field, bool)

FieldByName gets the field with the given name.

If there are multiple fields with the given name, FieldByName returns the first such field.

func (*StructType) FieldIdx

func (t *StructType) FieldIdx(name string) (int, bool)

FieldIdx gets the index of the field with the given name.

If there are multiple fields with the given name, FieldIdx returns the index of the first such field.

func (*StructType) FieldIndices

func (t *StructType) FieldIndices(name string) []int

FieldIndices returns indices of all fields with the given name, or nil.

func (*StructType) Fields

func (t *StructType) Fields() []Field

Fields method provides a copy of StructType fields (so it can be safely mutated and will not result in updating the StructType).

func (*StructType) FieldsByName

func (t *StructType) FieldsByName(n string) ([]Field, bool)

FieldsByName returns all fields with the given name.

func (*StructType) Fingerprint

func (t *StructType) Fingerprint() string

func (*StructType) ID

func (*StructType) ID() Type

func (*StructType) Layout

func (*StructType) Layout() DataTypeLayout

func (*StructType) Name

func (*StructType) Name() string

func (*StructType) NumFields

func (t *StructType) NumFields() int

func (*StructType) String

func (t *StructType) String() string

type Table

type Table interface {
	Schema() *Schema
	NumRows() int64
	NumCols() int64
	Column(i int) *Column

	// AddColumn adds a new column to the table and a corresponding field (of the same type)
	// to its schema, at the specified position. Returns the new table with updated columns and schema.
	AddColumn(pos int, f Field, c Column) (Table, error)

	Retain()
	Release()

	fmt.Stringer
}

Table represents a logical sequence of chunked arrays of equal length. It is similar to a Record except that the columns are ChunkedArrays instead, allowing for a Table to be built up by chunks progressively whereas the columns in a single Record are always each a single contiguous array.

type TemporalWithUnit

type TemporalWithUnit interface {
	FixedWidthDataType
	TimeUnit() TimeUnit
}

type Time32

type Time32 int32

func Time32FromString

func Time32FromString(val string, unit TimeUnit) (Time32, error)

Time32FromString parses a string to return a Time32 value in the given unit, unit needs to be only seconds or milliseconds and the string should be in the form of HH:MM or HH:MM:SS[.zzz] where the fractions of a second are optional.

func (Time32) FormattedString

func (t Time32) FormattedString(unit TimeUnit) string

func (Time32) ToTime

func (t Time32) ToTime(unit TimeUnit) time.Time

type Time32Type

type Time32Type struct {
	Unit TimeUnit
}

Time32Type is encoded as a 32-bit signed integer, representing either seconds or milliseconds since midnight.

func (*Time32Type) BitWidth

func (*Time32Type) BitWidth() int

func (*Time32Type) Bytes

func (*Time32Type) Bytes() int

func (*Time32Type) Fingerprint

func (t *Time32Type) Fingerprint() string

func (*Time32Type) ID

func (*Time32Type) ID() Type

func (Time32Type) Layout

func (Time32Type) Layout() DataTypeLayout

func (*Time32Type) Name

func (*Time32Type) Name() string

func (*Time32Type) String

func (t *Time32Type) String() string

func (*Time32Type) TimeUnit

func (t *Time32Type) TimeUnit() TimeUnit

type Time64

type Time64 int64

func Time64FromString

func Time64FromString(val string, unit TimeUnit) (Time64, error)

Time64FromString parses a string to return a Time64 value in the given unit, unit needs to be only microseconds or nanoseconds and the string should be in the form of HH:MM or HH:MM:SS[.zzzzzzzzz] where the fractions of a second are optional.

func (Time64) FormattedString

func (t Time64) FormattedString(unit TimeUnit) string

func (Time64) ToTime

func (t Time64) ToTime(unit TimeUnit) time.Time

type Time64Type

type Time64Type struct {
	Unit TimeUnit
}

Time64Type is encoded as a 64-bit signed integer, representing either microseconds or nanoseconds since midnight.

func (*Time64Type) BitWidth

func (*Time64Type) BitWidth() int

func (*Time64Type) Bytes

func (*Time64Type) Bytes() int

func (*Time64Type) Fingerprint

func (t *Time64Type) Fingerprint() string

func (*Time64Type) ID

func (*Time64Type) ID() Type

func (Time64Type) Layout

func (Time64Type) Layout() DataTypeLayout

func (*Time64Type) Name

func (*Time64Type) Name() string

func (*Time64Type) String

func (t *Time64Type) String() string

func (*Time64Type) TimeUnit

func (t *Time64Type) TimeUnit() TimeUnit

type TimeUnit

type TimeUnit int
const (
	Second TimeUnit = iota
	Millisecond
	Microsecond
	Nanosecond
)

func (TimeUnit) Multiplier

func (u TimeUnit) Multiplier() time.Duration

Multiplier returns a time.Duration value to multiply by in order to convert the value into nanoseconds

func (TimeUnit) String

func (u TimeUnit) String() string

type Timestamp

type Timestamp int64

func TimestampFromString

func TimestampFromString(val string, unit TimeUnit) (Timestamp, error)

TimestampFromString parses a string and returns a timestamp for the given unit level.

The timestamp should be in one of the following forms, [T] can be either T or a space, and [.zzzzzzzzz] can be either left out or up to 9 digits of fractions of a second.

YYYY-MM-DD
YYYY-MM-DD[T]HH
YYYY-MM-DD[T]HH:MM
YYYY-MM-DD[T]HH:MM:SS[.zzzzzzzz]

You can also optionally have an ending Z to indicate UTC or indicate a specific timezone using ±HH, ±HHMM or ±HH:MM at the end of the string.

func TimestampFromStringInLocation

func TimestampFromStringInLocation(val string, unit TimeUnit, loc *time.Location) (Timestamp, bool, error)

TimestampFromStringInLocation is like TimestampFromString, but treats the time instant as if it were in the provided timezone before converting to UTC for internal representation.

func TimestampFromTime

func TimestampFromTime(val time.Time, unit TimeUnit) (Timestamp, error)

TimestampFromTime allows converting time.Time to Timestamp

func (Timestamp) ToTime

func (t Timestamp) ToTime(unit TimeUnit) time.Time

type TimestampConvertOp

type TimestampConvertOp int8

func GetTimestampConvert

func GetTimestampConvert(in, out TimeUnit) (op TimestampConvertOp, factor int64)

type TimestampType

type TimestampType struct {
	Unit     TimeUnit
	TimeZone string
	// contains filtered or unexported fields
}

TimestampType is encoded as a 64-bit signed integer since the UNIX epoch (2017-01-01T00:00:00Z). The zero-value is a second and time zone neutral. In Arrow semantics, time zone neutral does not represent a physical point in time, but rather a "wall clock" time that only has meaning within the context that produced it. In Go, time.Time can only represent instants; there is no notion of "wall clock" time. Therefore, time zone neutral timestamps are represented as UTC per Go conventions even though the Arrow type itself has no time zone.

func (*TimestampType) BitWidth

func (*TimestampType) BitWidth() int

BitWidth returns the number of bits required to store a single element of this data type in memory.

func (*TimestampType) Bytes

func (*TimestampType) Bytes() int

func (*TimestampType) ClearCachedLocation

func (t *TimestampType) ClearCachedLocation()

ClearCachedLocation clears the cached time.Location object in the type. This should be called if you change the value of the TimeZone after having potentially called GetZone.

func (*TimestampType) Fingerprint

func (t *TimestampType) Fingerprint() string

func (*TimestampType) GetToTimeFunc

func (t *TimestampType) GetToTimeFunc() (func(Timestamp) time.Time, error)

GetToTimeFunc returns a function for converting an arrow.Timestamp value into a time.Time object with proper TimeZone and precision. If the TimeZone is invalid this will return an error. It calls GetZone to get the timezone for consistency.

func (*TimestampType) GetZone

func (t *TimestampType) GetZone() (*time.Location, error)

GetZone returns a *time.Location that represents the current TimeZone member of the TimestampType. If it is "", "UTC", or "utc", you'll get time.UTC. Otherwise it must either be a valid tzdata string such as "America/New_York" or of the format +HH:MM or -HH:MM indicating an absolute offset.

The location object will be cached in the TimestampType for subsequent calls so if you change the value of TimeZone after calling this, make sure to call ClearCachedLocation.

func (*TimestampType) ID

func (*TimestampType) ID() Type

func (*TimestampType) Layout

func (*TimestampType) Layout() DataTypeLayout

func (*TimestampType) Name

func (*TimestampType) Name() string

func (*TimestampType) String

func (t *TimestampType) String() string

func (*TimestampType) TimeUnit

func (t *TimestampType) TimeUnit() TimeUnit

type Type

type Type int

Type is a logical type. They can be expressed as either a primitive physical type (bytes or bits of some fixed size), a nested type consisting of other data types, or another data type (e.g. a timestamp encoded as an int64)

const (
	// NULL type having no physical storage
	NULL Type = iota

	// BOOL is a 1 bit, LSB bit-packed ordering
	BOOL

	// UINT8 is an Unsigned 8-bit little-endian integer
	UINT8

	// INT8 is a Signed 8-bit little-endian integer
	INT8

	// UINT16 is an Unsigned 16-bit little-endian integer
	UINT16

	// INT16 is a Signed 16-bit little-endian integer
	INT16

	// UINT32 is an Unsigned 32-bit little-endian integer
	UINT32

	// INT32 is a Signed 32-bit little-endian integer
	INT32

	// UINT64 is an Unsigned 64-bit little-endian integer
	UINT64

	// INT64 is a Signed 64-bit little-endian integer
	INT64

	// FLOAT16 is a 2-byte floating point value
	FLOAT16

	// FLOAT32 is a 4-byte floating point value
	FLOAT32

	// FLOAT64 is an 8-byte floating point value
	FLOAT64

	// STRING is a UTF8 variable-length string
	STRING

	// BINARY is a Variable-length byte type (no guarantee of UTF8-ness)
	BINARY

	// FIXED_SIZE_BINARY is a binary where each value occupies the same number of bytes
	FIXED_SIZE_BINARY

	// DATE32 is int32 days since the UNIX epoch
	DATE32

	// DATE64 is int64 milliseconds since the UNIX epoch
	DATE64

	// TIMESTAMP is an exact timestamp encoded with int64 since UNIX epoch
	// Default unit millisecond
	TIMESTAMP

	// TIME32 is a signed 32-bit integer, representing either seconds or
	// milliseconds since midnight
	TIME32

	// TIME64 is a signed 64-bit integer, representing either microseconds or
	// nanoseconds since midnight
	TIME64

	// INTERVAL_MONTHS is YEAR_MONTH interval in SQL style
	INTERVAL_MONTHS

	// INTERVAL_DAY_TIME is DAY_TIME in SQL Style
	INTERVAL_DAY_TIME

	// DECIMAL128 is a precision- and scale-based decimal type. Storage type depends on the
	// parameters.
	DECIMAL128

	// DECIMAL256 is a precision and scale based decimal type, with 256 bit max.
	DECIMAL256

	// LIST is a list of some logical data type
	LIST

	// STRUCT of logical types
	STRUCT

	// SPARSE_UNION of logical types
	SPARSE_UNION

	// DENSE_UNION of logical types
	DENSE_UNION

	// DICTIONARY aka Category type
	DICTIONARY

	// MAP is a repeated struct logical type
	MAP

	// Custom data type, implemented by user
	EXTENSION

	// Fixed size list of some logical type
	FIXED_SIZE_LIST

	// Measure of elapsed time in either seconds, milliseconds, microseconds
	// or nanoseconds.
	DURATION

	// like STRING, but 64-bit offsets
	LARGE_STRING

	// like BINARY but with 64-bit offsets
	LARGE_BINARY

	// like LIST but with 64-bit offsets
	LARGE_LIST

	// calendar interval with three fields
	INTERVAL_MONTH_DAY_NANO

	RUN_END_ENCODED

	// String (UTF8) view type with 4-byte prefix and inline
	// small string optimizations
	STRING_VIEW

	// Bytes view with 4-byte prefix and inline small byte arrays optimization
	BINARY_VIEW

	// LIST_VIEW is a list of some logical data type represented with offsets and sizes
	LIST_VIEW

	// like LIST but with 64-bit offsets
	LARGE_LIST_VIEW

	// Decimal value with 32-bit representation
	DECIMAL32

	// Decimal value with 64-bit representation
	DECIMAL64

	// Alias to ensure we do not break any consumers
	DECIMAL = DECIMAL128
)

func GetType

func GetType[T NumericType | bool | string]() Type

GetType returns the appropriate Type type T, only for non-parametric types. This uses a map and reflection internally so don't call this in a tight loop, instead call it once and then use a closure with the result.

func (Type) String

func (i Type) String() string

type TypeEqualOption

type TypeEqualOption func(*typeEqualsConfig)

TypeEqualOption is a functional option type used for configuring type equality checks.

func CheckMetadata

func CheckMetadata() TypeEqualOption

CheckMetadata is an option for TypeEqual that allows checking for metadata equality besides type equality. It only makes sense for types with metadata.

type Uint16Type

type Uint16Type struct{}

func (*Uint16Type) BitWidth

func (t *Uint16Type) BitWidth() int

func (*Uint16Type) Bytes

func (t *Uint16Type) Bytes() int

func (*Uint16Type) Fingerprint

func (t *Uint16Type) Fingerprint() string

func (*Uint16Type) ID

func (t *Uint16Type) ID() Type

func (*Uint16Type) Layout

func (t *Uint16Type) Layout() DataTypeLayout

func (*Uint16Type) Name

func (t *Uint16Type) Name() string

func (*Uint16Type) String

func (t *Uint16Type) String() string

type Uint32Type

type Uint32Type struct{}

func (*Uint32Type) BitWidth

func (t *Uint32Type) BitWidth() int

func (*Uint32Type) Bytes

func (t *Uint32Type) Bytes() int

func (*Uint32Type) Fingerprint

func (t *Uint32Type) Fingerprint() string

func (*Uint32Type) ID

func (t *Uint32Type) ID() Type

func (*Uint32Type) Layout

func (t *Uint32Type) Layout() DataTypeLayout

func (*Uint32Type) Name

func (t *Uint32Type) Name() string

func (*Uint32Type) String

func (t *Uint32Type) String() string

type Uint64Type

type Uint64Type struct{}

func (*Uint64Type) BitWidth

func (t *Uint64Type) BitWidth() int

func (*Uint64Type) Bytes

func (t *Uint64Type) Bytes() int

func (*Uint64Type) Fingerprint

func (t *Uint64Type) Fingerprint() string

func (*Uint64Type) ID

func (t *Uint64Type) ID() Type

func (*Uint64Type) Layout

func (t *Uint64Type) Layout() DataTypeLayout

func (*Uint64Type) Name

func (t *Uint64Type) Name() string

func (*Uint64Type) String

func (t *Uint64Type) String() string

type Uint8Type

type Uint8Type struct{}

func (*Uint8Type) BitWidth

func (t *Uint8Type) BitWidth() int

func (*Uint8Type) Bytes

func (t *Uint8Type) Bytes() int

func (*Uint8Type) Fingerprint

func (t *Uint8Type) Fingerprint() string

func (*Uint8Type) ID

func (t *Uint8Type) ID() Type

func (*Uint8Type) Layout

func (t *Uint8Type) Layout() DataTypeLayout

func (*Uint8Type) Name

func (t *Uint8Type) Name() string

func (*Uint8Type) String

func (t *Uint8Type) String() string

type UintType

type UintType interface {
	~uint8 | ~uint16 | ~uint32 | ~uint64
}

UintType is a type constraint for raw values represented as unsigned integer types by We aren't just using constraints.Unsigned because we don't want to include the raw `uint` type here whose size changes based on the architecture (uint32 on 32-bit architectures and uint64 on 64-bit architectures). We also don't want to include uintptr

type UnionMode

type UnionMode int8

func (UnionMode) String

func (i UnionMode) String() string

type UnionType

type UnionType interface {
	NestedType
	// Mode returns either SparseMode or DenseMode depending on the current
	// concrete data type.
	Mode() UnionMode
	// ChildIDs returns a slice of ints to map UnionTypeCode values to
	// the index in the Fields that represents the given Type. It is
	// initialized with all values being InvalidUnionChildID (-1)
	// before being populated based on the TypeCodes and fields of the type.
	// The field for a given type can be retrieved by Fields()[ChildIDs()[typeCode]]
	ChildIDs() []int
	// TypeCodes returns the list of available type codes for this union type
	// which will correspond to indexes into the ChildIDs slice to locate the
	// appropriate child. A union Array contains a buffer of these type codes
	// which indicate for a given index, which child has the value for that index.
	TypeCodes() []UnionTypeCode
	// MaxTypeCode returns the value of the largest TypeCode in the list of typecodes
	// that are defined by this Union type
	MaxTypeCode() UnionTypeCode
}

UnionType is an interface to encompass both Dense and Sparse Union types.

A UnionType is a nested type where each logical value is taken from a single child. A buffer of 8-bit type ids (typed as UnionTypeCode) indicates which child a given logical value is to be taken from. This is represented as the "child id" or "child index", which is the index into the list of child fields for a given child.

func UnionOf

func UnionOf(mode UnionMode, fields []Field, typeCodes []UnionTypeCode) UnionType

UnionOf returns an appropriate union type for the given Mode (Sparse or Dense), child fields, and type codes. len(fields) == len(typeCodes) must be true, or else this will panic. len(fields) can be 0.

type UnionTypeCode

type UnionTypeCode = int8

UnionTypeCode is an alias to int8 which is the type of the ids used for union arrays.

type VarLenListLikeType

type VarLenListLikeType interface {
	ListLikeType
}

type ViewHeader

type ViewHeader struct {
	// contains filtered or unexported fields
}

ViewHeader is a variable length string (utf8) or byte slice with a 4 byte prefix and inline optimization for small values (12 bytes or fewer). This is similar to Go's standard string but limited by a length of Uint32Max and up to the first four bytes of the string are copied into the struct. This prefix allows failing comparisons early and can reduce CPU cache working set when dealing with short strings.

There are two situations:

	Entirely inlined string data
                |----|------------|
	                ^    ^
	                |    |
	              size  inline string data, zero padded

	Reference into buffer
                |----|----|----|----|
	                ^    ^     ^     ^
	                |    |     |     |
	              size prefix buffer index and offset to out-of-line portion

Adapted from TU Munich's UmbraDB 1, Velox, DuckDB.

func (*ViewHeader) BufferIndex

func (sh *ViewHeader) BufferIndex() int32

func (*ViewHeader) BufferOffset

func (sh *ViewHeader) BufferOffset() int32

func (*ViewHeader) Equals

func (sh *ViewHeader) Equals(buffers []*memory.Buffer, other *ViewHeader, otherBuffers []*memory.Buffer) bool

func (*ViewHeader) InlineBytes

func (sh *ViewHeader) InlineBytes() (data []byte)

func (*ViewHeader) InlineString

func (sh *ViewHeader) InlineString() (data string)

func (*ViewHeader) IsInline

func (sh *ViewHeader) IsInline() bool

func (*ViewHeader) Len

func (sh *ViewHeader) Len() int

func (*ViewHeader) Prefix

func (sh *ViewHeader) Prefix() [ViewPrefixLen]byte

func (*ViewHeader) SetBytes

func (sh *ViewHeader) SetBytes(data []byte) int

func (*ViewHeader) SetIndexOffset

func (sh *ViewHeader) SetIndexOffset(bufferIndex, offset int32)

func (*ViewHeader) SetString

func (sh *ViewHeader) SetString(data string) int

Directories

Path Synopsis
_examples
_tools
Package array provides implementations of various Arrow array types.
Package array provides implementations of various Arrow array types.
Package arrio exposes functions to manipulate records, exposing and using interfaces not unlike the ones defined in the stdlib io package.
Package arrio exposes functions to manipulate records, exposing and using interfaces not unlike the ones defined in the stdlib io package.
Package avro reads Avro OCF files and presents the extracted data as records
Package avro reads Avro OCF files and presents the extracted data as records
Package compute is a native-go implementation of an Acero-like arrow compute engine.
Package compute is a native-go implementation of an Acero-like arrow compute engine.
internal/kernels
Package kernels defines all of the computation kernels for the compute library.
Package kernels defines all of the computation kernels for the compute library.
Package csv reads CSV files and presents the extracted data as records, also writes data as record into CSV files
Package csv reads CSV files and presents the extracted data as records, also writes data as record into CSV files
Package extensions provides implementations of Arrow canonical extension types as defined in the Arrow specification.
Package extensions provides implementations of Arrow canonical extension types as defined in the Arrow specification.
flightsql/driver
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.
flightsql/example
Package example contains a FlightSQL Server implementation using sqlite as the backing engine.
Package example contains a FlightSQL Server implementation using sqlite as the backing engine.
flightsql/schema_ref
Package schema_ref contains the expected reference Schemas to be used by FlightSQL servers and clients.
Package schema_ref contains the expected reference Schemas to be used by FlightSQL servers and clients.
session
Package session provides server middleware and reference implementations for Flight session management.
Package session provides server middleware and reference implementations for Flight session management.
arrdata
Package arrdata exports arrays and records data ready to be used for tests.
Package arrdata exports arrays and records data ready to be used for tests.
arrjson
Package arrjson provides types and functions to encode and decode ARROW types and data to and from JSON files.
Package arrjson provides types and functions to encode and decode ARROW types and data to and from JSON files.
debug
Package debug provides APIs for conditional runtime assertions and debug logging.
Package debug provides APIs for conditional runtime assertions and debug logging.
flight_integration/cmd/arrow-flight-integration-client
Client for use with Arrow Flight Integration tests via archery
Client for use with Arrow Flight Integration tests via archery
ipc
cmd/arrow-cat
Command arrow-cat displays the content of an Arrow stream or file.
Command arrow-cat displays the content of an Arrow stream or file.
cmd/arrow-ls
Command arrow-ls displays the listing of an Arrow file.
Command arrow-ls displays the listing of an Arrow file.
Package math provides optimized mathematical functions for processing Arrow arrays.
Package math provides optimized mathematical functions for processing Arrow arrays.
Package memory provides support for allocating and manipulating memory at a low level.
Package memory provides support for allocating and manipulating memory at a low level.
mallocator
Package mallocator defines an allocator implementation for memory.Allocator which defers to libc malloc.
Package mallocator defines an allocator implementation for memory.Allocator which defers to libc malloc.
Package tensor provides types that implement n-dimensional arrays.
Package tensor provides types that implement n-dimensional arrays.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL