impala

package module
v0.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 2, 2023 License: MIT Imports: 20 Imported by: 0

README

Golang Apache Impala Driver

Apache Impala driver for Go's database/sql package

As far as we know, this is the only pure golang driver for Apache Impala that has TLS and LDAP support.

The current implementation of the driver is based on the Hive Server 2 protocol.

The legacy Beeswax protocol based driver is available at go-impala v1.0.0, which is marked deprecated and will no longer be maintained.

If you are using Go 1.12 or later, you can get the v1.0.0 of the driver with go get github.com/jrbrinlee1/go-impala@v1.0.0 or use a dependency management tool such as dep

We at Bipp, want to make large scale data analytics accesible to every business user.

As part of that, we are commited to making this is a production grade driver that be used in serious enterprise scenarios in lieu of the ODBC/JDBC drivers.

Issues and contributions are welcome.

Install

go get github.com/jrbrinlee1/go-impala

Connection Parameters and DSN

The connection string uses a URL format: impala://username:password@host:port?param1=value&param2=value

Parameters:
  • auth - string. Authentication mode. Supported values: "noauth", "ldap"
  • tls - boolean. Enable TLS
  • ca-cert - The file that contains the public key certificate of the CA that signed the impala certificate
  • batch-size - integer value (default: 1024). Maximum number of rows fetched per request
  • buffer-size- in bytes (default: 4096); Buffer size for the Thrift transport
  • mem-limit - string value (example: 3m); Memory limit for query

A string of this format can be constructed using the URL type in the net/url package.

  query := url.Values{}
  query.Add("auth", "ldap")

  u := &url.URL{
      Scheme:   "impala",
      User:     url.UserPassword(username, password),
      Host:     net.JoinHostPort(hostname, port),
      RawQuery: query.Encode(),
  }
  db, err := sql.Open("impala", u.String())

Also, you can bypass string-base data source name by using sql.OpenDB:

  opts := impala.DefaultOptions
  opts.Host = hostname
  opts.UseLDAP = true
  opts.Username = username
  opts.Password = password

  connector := impala.NewConnector(&opts)
  db := sql.OpenDB(connector)

Example

package main

// Simple program to list databases and the tables

import (
	"context"
	"database/sql"
	"log"

	impala "github.com/jrbrinlee1/go-impala"
)

func main() {

	opts := impala.DefaultOptions

	opts.Host = "<impala host>"
	opts.Port = "21050"

	// enable LDAP authentication:
	opts.UseLDAP = true
	opts.Username = "<ldap username>"
	opts.Password = "<ldap password>"

	// enable TLS
	opts.UseTLS = true
	opts.CACertPath = "/path/to/cacert"

	connector := impala.NewConnector(&opts)
	db := sql.OpenDB(connector)
	defer db.Close()

	ctx := context.Background()

	rows, err := db.QueryContext(ctx, "SHOW DATABASES")
	if err != nil {
		log.Fatal(err)
	}

	r := struct {
		name    string
		comment string
	}{}

	databases := make([]string, 0) // databases will contain all the DBs to enumerate later
	for rows.Next() {
		if err := rows.Scan(&r.name, &r.comment); err != nil {
			log.Fatal(err)
		}
		databases = append(databases, r.name)
	}
	if err := rows.Err(); err != nil {
		log.Fatal(err)
	}
	log.Println("List of Databases", databases)

	stmt, err := db.PrepareContext(ctx, "SHOW TABLES IN ?")
	if err != nil {
		log.Fatal(err)
	}

	tbl := struct {
		name string
	}{}

	for _, d := range databases {
		rows, err := stmt.QueryContext(ctx, d)
		if err != nil {
			log.Printf("error in querying database %s: %s", d, err.Error())
			continue
		}

		tables := make([]string, 0)
		for rows.Next() {
			if err := rows.Scan(&tbl.name); err != nil {
				log.Println(err)
				continue
			}
			tables = append(tables, tbl.name)
		}
		log.Printf("List of Tables in Database %s: %v\n", d, tables)
	}
}

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	// DefaultOptions for impala driver
	DefaultOptions = Options{BatchSize: 1024, BufferSize: 4096, Port: "21050", LogOut: ioutil.Discard}
)
View Source
var (
	// ErrNotSupported means this operation is not supported by impala driver
	ErrNotSupported = errors.New("impala: not supported")
)

Functions

func NewConnector

func NewConnector(opts *Options) driver.Connector

NewConnector creates connector with specified options

Types

type Conn

type Conn struct {
	// contains filtered or unexported fields
}

Conn to impala. It is not used concurrently by multiple goroutines.

func (*Conn) Begin

func (c *Conn) Begin() (driver.Tx, error)

Begin is not supported

func (*Conn) CheckNamedValue

func (c *Conn) CheckNamedValue(val *driver.NamedValue) error

CheckNamedValue is called before passing arguments to the driver and is called in place of any ColumnConverter. CheckNamedValue must do type validation and conversion as appropriate for the driver.

func (*Conn) Close

func (c *Conn) Close() error

Close connection

func (*Conn) ExecContext

func (c *Conn) ExecContext(ctx context.Context, q string, args []driver.NamedValue) (driver.Result, error)

ExecContext executes a query that doesn't return rows

func (*Conn) OpenSession

func (c *Conn) OpenSession(ctx context.Context) (*hive.Session, error)

OpenSession ensure opened session

func (*Conn) Ping

func (c *Conn) Ping(ctx context.Context) error

Ping impala server

func (*Conn) Prepare

func (c *Conn) Prepare(query string) (driver.Stmt, error)

Prepare returns prepared statement

func (*Conn) PrepareContext

func (c *Conn) PrepareContext(ctx context.Context, query string) (driver.Stmt, error)

PrepareContext returns prepared statement

func (*Conn) QueryContext

func (c *Conn) QueryContext(ctx context.Context, q string, args []driver.NamedValue) (driver.Rows, error)

QueryContext executes a query that may return rows

func (*Conn) ResetSession

func (c *Conn) ResetSession(ctx context.Context) error

ResetSession closes hive session

type Driver

type Driver struct{}

Driver to impala

func (*Driver) Open

func (d *Driver) Open(uri string) (driver.Conn, error)

Open creates new connection to impala

func (*Driver) OpenConnector

func (d *Driver) OpenConnector(name string) (driver.Connector, error)

OpenConnector parses name and return connector with fixed options

type Options

type Options struct {
	Host     string
	Port     string
	Username string
	Password string

	UseLDAP      bool
	UseTLS       bool
	CACertPath   string
	BufferSize   int
	BatchSize    int
	MemoryLimit  string
	QueryTimeout int

	LogOut io.Writer
}

Options for impala driver connection

type Rows

type Rows struct {
	// contains filtered or unexported fields
}

Rows is an iterator over an executed query's results.

func (*Rows) Close

func (r *Rows) Close() error

Close closes rows iterator

func (*Rows) ColumnTypeDatabaseTypeName

func (r *Rows) ColumnTypeDatabaseTypeName(index int) string

ColumnTypeDatabaseTypeName returns column's database type name

func (*Rows) ColumnTypeScanType

func (r *Rows) ColumnTypeScanType(index int) reflect.Type

ColumnTypeScanType returns column's native type

func (*Rows) Columns

func (r *Rows) Columns() []string

Columns returns the names of the columns

func (*Rows) Next

func (r *Rows) Next(dest []driver.Value) error

Next prepares next row for scanning

type Stmt

type Stmt struct {
	// contains filtered or unexported fields
}

Stmt is statement

func (*Stmt) CheckNamedValue

func (s *Stmt) CheckNamedValue(val *driver.NamedValue) error

CheckNamedValue is called before passing arguments to the driver and is called in place of any ColumnConverter. CheckNamedValue must do type validation and conversion as appropriate for the driver.

func (*Stmt) Close

func (s *Stmt) Close() error

Close statement. No-op

func (*Stmt) Exec

func (s *Stmt) Exec(args []driver.Value) (driver.Result, error)

Exec executes a query that doesn't return rows

func (*Stmt) ExecContext

func (s *Stmt) ExecContext(ctx context.Context, args []driver.NamedValue) (driver.Result, error)

ExecContext executes a query that doesn't return rows

func (*Stmt) NumInput

func (s *Stmt) NumInput() int

NumInput returns number of inputs

func (*Stmt) Query

func (s *Stmt) Query(args []driver.Value) (driver.Rows, error)

Query executes a query that may return rows

func (*Stmt) QueryContext

func (s *Stmt) QueryContext(ctx context.Context, args []driver.NamedValue) (driver.Rows, error)

QueryContext executes a query that may return rows

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL