ipc

package
v18.1.0-rc0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 9, 2025 License: Apache-2.0, BSD-3-Clause Imports: 26 Imported by: 11

Documentation

Overview

Example (ListSlice)
package main

import (
	"bytes"
	"fmt"
	"strings"

	"github.com/apache/arrow-go/v18/arrow"
	"github.com/apache/arrow-go/v18/arrow/array"
	"github.com/apache/arrow-go/v18/arrow/ipc"
	"github.com/apache/arrow-go/v18/arrow/memory"
)

func main() {
	mem := memory.DefaultAllocator
	dt := arrow.ListOf(arrow.BinaryTypes.String)
	schema := arrow.NewSchema([]arrow.Field{{
		Name: "list",
		Type: dt,
	}}, nil)

	arr, _, err := array.FromJSON(mem, dt, strings.NewReader(`[
		["index1"], 
		["index3", "tag_int"], ["index5", "tag_int"],
		["index6", "tag_int"], ["index7", "tag_int"], 
		["index7", "tag_int"],
		["index8"]
	]`))
	if err != nil {
		panic(err)
	}
	defer arr.Release()

	rec := array.NewRecord(schema, []arrow.Array{arr}, int64(arr.Len()))
	defer rec.Release()
	rec2 := rec.NewSlice(1, 2)
	defer rec2.Release()

	var buf bytes.Buffer
	w := ipc.NewWriter(&buf, ipc.WithSchema(rec.Schema()))
	if err := w.Write(rec2); err != nil {
		panic(err)
	}
	if err := w.Close(); err != nil {
		panic(err)
	}

	r, err := ipc.NewReader(&buf)
	if err != nil {
		panic(err)
	}
	defer r.Release()

	r.Next()
	fmt.Println(r.Record())

}
Output:

record:
  schema:
  fields: 1
    - list: type=list<item: utf8, nullable>
  rows: 1
  col[0][list]: [["index3" "tag_int"]]
Example (MapSlice)
package main

import (
	"bytes"
	"fmt"
	"strings"

	"github.com/apache/arrow-go/v18/arrow"
	"github.com/apache/arrow-go/v18/arrow/array"
	"github.com/apache/arrow-go/v18/arrow/ipc"
	"github.com/apache/arrow-go/v18/arrow/memory"
)

func main() {
	mem := memory.DefaultAllocator
	dt := arrow.MapOf(arrow.BinaryTypes.String, arrow.BinaryTypes.String)
	schema := arrow.NewSchema([]arrow.Field{{
		Name: "map",
		Type: dt,
	}}, nil)

	arr, _, err := array.FromJSON(mem, dt, strings.NewReader(`[
		[{"key": "index1", "value": "main2"}],
		[{"key": "index3", "value": "main4"}, {"key": "tag_int", "value": ""}],
		[{"key":"index5","value":"main6"},{"key":"tag_int","value":""}],
		[{"key":"index6","value":"main7"},{"key":"tag_int","value":""}],
		[{"key":"index7","value":"main8"},{"key":"tag_int","value":""}],
		[{"key":"index8","value":"main9"}]
	]`))
	if err != nil {
		panic(err)
	}
	defer arr.Release()

	rec := array.NewRecord(schema, []arrow.Array{arr}, int64(arr.Len()))
	defer rec.Release()
	rec2 := rec.NewSlice(1, 2)
	defer rec2.Release()

	var buf bytes.Buffer
	w := ipc.NewWriter(&buf, ipc.WithSchema(rec.Schema()))
	if err := w.Write(rec2); err != nil {
		panic(err)
	}
	if err := w.Close(); err != nil {
		panic(err)
	}

	r, err := ipc.NewReader(&buf)
	if err != nil {
		panic(err)
	}
	defer r.Release()

	r.Next()
	fmt.Println(r.Record())

}
Output:

record:
  schema:
  fields: 1
    - map: type=map<utf8, utf8, items_nullable>
  rows: 1
  col[0][map]: [{["index3" "tag_int"] ["main4" ""]}]

Index

Examples

Constants

View Source
const (
	MetadataV1 = MetadataVersion(flatbuf.MetadataVersionV1) // version for Arrow Format-0.1.0
	MetadataV2 = MetadataVersion(flatbuf.MetadataVersionV2) // version for Arrow Format-0.2.0
	MetadataV3 = MetadataVersion(flatbuf.MetadataVersionV3) // version for Arrow Format-0.3.0 to 0.7.1
	MetadataV4 = MetadataVersion(flatbuf.MetadataVersionV4) // version for >= Arrow Format-0.8.0
	MetadataV5 = MetadataVersion(flatbuf.MetadataVersionV5) // version for >= Arrow Format-1.0.0, backward compatible with v4
)
View Source
const (

	// constants for the extension type metadata keys for the type name and
	// any extension metadata to be passed to deserialize.
	ExtensionTypeKeyName     = "ARROW:extension:name"
	ExtensionMetadataKeyName = "ARROW:extension:metadata"
)

Variables

View Source
var Magic = []byte("ARROW1")

Magic string identifying an Apache Arrow file.

Functions

This section is empty.

Types

type FileReader

type FileReader struct {
	// contains filtered or unexported fields
}

FileReader is an Arrow file reader.

func NewFileReader

func NewFileReader(r ReadAtSeeker, opts ...Option) (*FileReader, error)

NewFileReader opens an Arrow file using the provided reader r.

func NewMappedFileReader

func NewMappedFileReader(data []byte, opts ...Option) (*FileReader, error)

NewMappedFileReader is like NewFileReader but instead of using a ReadAtSeeker, which will force copies through the Read/ReadAt methods, it uses a byte slice and pulls slices directly from the data. This is useful specifically when dealing with mmapped data so that you can lazily load the buffers and avoid extraneous copies. The slices used for the record column buffers will simply reference the existing data instead of performing copies via ReadAt/Read.

For example, syscall.Mmap returns a byte slice which could be referencing a shared memory region or otherwise a memory-mapped file.

func (*FileReader) Close

func (f *FileReader) Close() error

Close cleans up resources used by the File. Close does not close the underlying reader.

func (*FileReader) NumDictionaries

func (f *FileReader) NumDictionaries() int

func (*FileReader) NumRecords

func (f *FileReader) NumRecords() int

func (*FileReader) Read

func (f *FileReader) Read() (rec arrow.Record, err error)

Read reads the current record from the underlying stream and an error, if any. When the Reader reaches the end of the underlying stream, it returns (nil, io.EOF).

The returned record value is valid until the next call to Read. Users need to call Retain on that Record to keep it valid for longer.

func (*FileReader) ReadAt

func (f *FileReader) ReadAt(i int64) (arrow.Record, error)

ReadAt reads the i-th record from the underlying stream and an error, if any.

func (*FileReader) Record

func (f *FileReader) Record(i int) (arrow.Record, error)

Record returns the i-th record from the file. The returned value is valid until the next call to Record. Users need to call Retain on that Record to keep it valid for longer.

func (*FileReader) RecordAt

func (f *FileReader) RecordAt(i int) (arrow.Record, error)

Record returns the i-th record from the file. Ownership is transferred to the caller and must call Release() to free the memory. This method is safe to call concurrently.

func (*FileReader) Schema

func (f *FileReader) Schema() *arrow.Schema

func (*FileReader) Version

func (f *FileReader) Version() MetadataVersion

type FileWriter

type FileWriter struct {
	// contains filtered or unexported fields
}

FileWriter is an Arrow file writer.

func NewFileWriter

func NewFileWriter(w io.Writer, opts ...Option) (*FileWriter, error)

NewFileWriter opens an Arrow file using the provided writer w.

func (*FileWriter) Close

func (f *FileWriter) Close() error

func (*FileWriter) Write

func (f *FileWriter) Write(rec arrow.Record) error

type Message

type Message struct {
	// contains filtered or unexported fields
}

Message is an IPC message, including metadata and body.

func NewMessage

func NewMessage(meta, body *memory.Buffer) *Message

NewMessage creates a new message from the metadata and body buffers. NewMessage panics if any of these buffers is nil.

func (*Message) BodyLen

func (msg *Message) BodyLen() int64

func (*Message) Release

func (msg *Message) Release()

Release decreases the reference count by 1. Release may be called simultaneously from multiple goroutines. When the reference count goes to zero, the memory is freed.

func (*Message) Retain

func (msg *Message) Retain()

Retain increases the reference count by 1. Retain may be called simultaneously from multiple goroutines.

func (*Message) Type

func (msg *Message) Type() MessageType

func (*Message) Version

func (msg *Message) Version() MetadataVersion

type MessageReader

type MessageReader interface {
	Message() (*Message, error)
	Release()
	Retain()
}

func NewMessageReader

func NewMessageReader(r io.Reader, opts ...Option) MessageReader

NewMessageReader returns a reader that reads messages from an input stream.

type MessageType

type MessageType flatbuf.MessageHeader

MessageType represents the type of Message in an Arrow format.

func (MessageType) String

func (m MessageType) String() string

type MetadataVersion

type MetadataVersion flatbuf.MetadataVersion

MetadataVersion represents the Arrow metadata version.

func (MetadataVersion) String

func (m MetadataVersion) String() string

type Option

type Option func(*config)

Option is a functional option to configure opening or creating Arrow files and streams.

func WithAllocator

func WithAllocator(mem memory.Allocator) Option

WithAllocator specifies the Arrow memory allocator used while building records.

func WithCompressConcurrency

func WithCompressConcurrency(n int) Option

WithCompressConcurrency specifies a number of goroutines to spin up for concurrent compression of the body buffers when writing compress IPC records. If n <= 1 then compression will be done serially without goroutine parallelization. Default is 1.

func WithDelayReadSchema

func WithDelayReadSchema(v bool) Option

WithDelayedReadSchema alters the ipc.Reader behavior to delay attempting to read the schema from the stream until the first call to Next instead of immediately attempting to read a schema from the stream when created.

func WithDictionaryDeltas

func WithDictionaryDeltas(v bool) Option

WithDictionaryDeltas specifies whether or not to emit dictionary deltas.

func WithEnsureNativeEndian

func WithEnsureNativeEndian(v bool) Option

WithEnsureNativeEndian specifies whether or not to automatically byte-swap buffers with endian-sensitive data if the schema's endianness is not the platform-native endianness. This includes all numeric types, temporal types, decimal types, as well as the offset buffers of variable-sized binary and list-like types.

This is only relevant to ipc Reader objects, not to writers. This defaults to true.

func WithFooterOffset

func WithFooterOffset(offset int64) Option

WithFooterOffset specifies the Arrow footer position in bytes.

func WithLZ4

func WithLZ4() Option

WithLZ4 tells the writer to use LZ4 Frame compression on the data buffers before writing. Requires >= Arrow 1.0.0 to read/decompress

func WithMinSpaceSavings

func WithMinSpaceSavings(savings float64) Option

WithMinSpaceSavings specifies a percentage of space savings for compression to be applied to buffers.

Space savings is calculated as (1.0 - compressedSize / uncompressedSize).

For example, if minSpaceSavings = 0.1, a 100-byte body buffer won't undergo compression if its expected compressed size exceeds 90 bytes. If this option is unset, compression will be used indiscriminately. If no codec was supplied, this option is ignored.

Values outside of the range [0,1] are handled as errors.

Note that enabling this option may result in unreadable data for Arrow Go and C++ versions prior to 12.0.0.

func WithSchema

func WithSchema(schema *arrow.Schema) Option

WithSchema specifies the Arrow schema to be used for reading or writing.

func WithZstd

func WithZstd() Option

WithZstd tells the writer to use ZSTD compression on the data buffers before writing. Requires >= Arrow 1.0.0 to read/decompress

type Payload

type Payload struct {
	// contains filtered or unexported fields
}

Payload is the underlying message object which is passed to the payload writer for actually writing out ipc messages

func GetRecordBatchPayload

func GetRecordBatchPayload(batch arrow.Record, opts ...Option) (Payload, error)

GetRecordBatchPayload produces the ipc payload for a given record batch. The resulting payload itself must be released by the caller via the Release method after it is no longer needed.

func GetSchemaPayload

func GetSchemaPayload(schema *arrow.Schema, mem memory.Allocator) Payload

GetSchemaPayload produces the ipc payload for a given schema.

func (*Payload) Meta

func (p *Payload) Meta() *memory.Buffer

Meta returns the buffer containing the metadata for this payload, callers must call Release on the buffer

func (*Payload) Release

func (p *Payload) Release()

func (*Payload) SerializeBody

func (p *Payload) SerializeBody(w io.Writer) error

SerializeBody serializes the body buffers and writes them to the provided writer.

type PayloadWriter

type PayloadWriter interface {
	Start() error
	WritePayload(Payload) error
	Close() error
}

PayloadWriter is an interface for injecting a different payloadwriter allowing more reusability with the Writer object with other scenarios, such as with Flight data

type ReadAtSeeker

type ReadAtSeeker interface {
	io.Reader
	io.Seeker
	io.ReaderAt
}

type Reader

type Reader struct {
	// contains filtered or unexported fields
}

Reader reads records from an io.Reader. Reader expects a schema (plus any dictionaries) as the first messages in the stream, followed by records.

func NewReader

func NewReader(r io.Reader, opts ...Option) (*Reader, error)

NewReader returns a reader that reads records from an input stream.

func NewReaderFromMessageReader

func NewReaderFromMessageReader(r MessageReader, opts ...Option) (reader *Reader, err error)

NewReaderFromMessageReader allows constructing a new reader object with the provided MessageReader allowing injection of reading messages other than by simple streaming bytes such as Arrow Flight which receives a protobuf message

func (*Reader) Err

func (r *Reader) Err() error

Err returns the last error encountered during the iteration over the underlying stream.

func (*Reader) Next

func (r *Reader) Next() bool

Next returns whether a Record could be extracted from the underlying stream.

func (*Reader) Read

func (r *Reader) Read() (arrow.Record, error)

Read reads the current record from the underlying stream and an error, if any. When the Reader reaches the end of the underlying stream, it returns (nil, io.EOF).

func (*Reader) Record

func (r *Reader) Record() arrow.Record

Record returns the current record that has been extracted from the underlying stream. It is valid until the next call to Next.

func (*Reader) Release

func (r *Reader) Release()

Release decreases the reference count by 1. When the reference count goes to zero, the memory is freed. Release may be called simultaneously from multiple goroutines.

func (*Reader) Retain

func (r *Reader) Retain()

Retain increases the reference count by 1. Retain may be called simultaneously from multiple goroutines.

func (*Reader) Schema

func (r *Reader) Schema() *arrow.Schema

type Writer

type Writer struct {
	// contains filtered or unexported fields
}

Writer is an Arrow stream writer.

func NewWriter

func NewWriter(w io.Writer, opts ...Option) *Writer

NewWriter returns a writer that writes records to the provided output stream.

func NewWriterWithPayloadWriter

func NewWriterWithPayloadWriter(pw PayloadWriter, opts ...Option) *Writer

NewWriterWithPayloadWriter constructs a writer with the provided payload writer instead of the default stream payload writer. This makes the writer more reusable such as by the Arrow Flight writer.

func (*Writer) Close

func (w *Writer) Close() error

func (*Writer) Write

func (w *Writer) Write(rec arrow.Record) (err error)

Directories

Path Synopsis
cmd
arrow-cat
Command arrow-cat displays the content of an Arrow stream or file.
Command arrow-cat displays the content of an Arrow stream or file.
arrow-ls
Command arrow-ls displays the listing of an Arrow file.
Command arrow-ls displays the listing of an Arrow file.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL