mapjitsu

package module
v0.0.0-...-58f14d4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 6, 2020 License: MIT Imports: 0 Imported by: 0

README

mapjitsu

GoDoc

mapjitsu is a Go library for wrestling data

Overview

mapjitsu provides a way of mapping data with Go providing builtin types and functions for common behaviour

Documentation can be found at godoc.org but as with many abstract concepts it is perhaps best explained by way of example

Getting Started

An understanding of mapjitsu starts with three standard ETL terms

Sources

We start with a source for each item of data. A simple interface is provided to represent this


type Source interface {
    Value() (interface{}, error)
}

You can implement this interface yourself or use one of the builtin sources

Transforms

When wrestling data a simple 1 to 1 mapping is often not sufficient and some form of transformation is required

This can be carried out through a series of functions referred to as a Pipeline

A type is provided to represent this as a slice of functions


type Pipeline []func(interface{}) (interface{}, error)

Targets

Finally we have a target which is the destination for the wrestled data item. A simple interface is provided to represent this


type Target interface {
    SetValue(interface{}) error
}

Again you can implement this interface yourself or use one of the builtin targets

Putting the Sources, Transforms & Targets together to wrestle some data

The tests provide examples

Here is a test of JSON input and output using MXJ


func TestMXJ(t *testing.T) {

	// start by unmarshaling some JSON to an MXJ Map
	input, err := mxj.NewMapJson([]byte(`{
		"user": {
			"first_name": "Tim",
			"last_name": "Test",
			"dob": null
		}
	}`))
	if err != nil {
		t.Fatalf("failed to unmarshal input %v", err)
	}

	// we also create an MXJ Map for the target
	output := mxj.Map{
		"Customer": make(map[string]interface{}),
	}

	// optional data items will need an error handler as MXJ will return a PathNotExistError
	// here is an example error handler returning a default value
	// (see user.title mapping below for example usage)
	onNotExistReturnDefault := func(defaultvalue interface{}) func(path string, v interface{}, err error) (interface{}, error) {
		return func(path string, v interface{}, err error) (interface{}, error) {
			if err == mxj.PathNotExistError {
				return defaultvalue, nil
			}
			return v, err
		}
	}

	// transformations can be added through pipeline functions
	// this simple example converts values to strings
	// NOTE: a nil value is converted to an empty string
	toString := func(v interface{}) (interface{}, error) { // this transformation converts nil values to an empty string
		if v == nil {
			return "", nil // return nil as empty string
		}
		// otherwise use default fmt
		return fmt.Sprintf("%v", v), nil
	}

	// next we define our mappings
	definition := mapjitsu.Definition{
		Mappings: []mapjitsu.Mapping{
			{
				// MXJ paths are used here, see https://godoc.org/github.com/clbanning/mxj#Map.ValueForPath
				Source: mxjData.Source{Map: input, Path: "user.first_name"},
				Target: mxjData.Target{Map: output, Path: "Customer.FirstName"},
			},
			{
				Source: mxjData.Source{Map: input, Path: "user.last_name"},
				Target: mxjData.Target{Map: output, Path: "Customer.LastName"},
			},
			{
				Source: mxjData.Source{Map: input, Path: "user.title", OnError: onNotExistReturnDefault("")},
				Target: mxjData.Target{Map: output, Path: "Customer.Title"},
			},
			{
				Source:    mxjData.Source{Map: input, Path: "user.dob"},
				Transform: mapjitsu.Pipeline{toString},
				Target:    mxjData.Target{Map: output, Path: "Customer.DOB"},
			},
			{
				// here a function is used as the Source to combine two text fields
				Source: mapjitsu.SourceFunc(func() (interface{}, error) {
					result := input.ValueOrEmptyForPathString("user.first_name")
					if s := input.ValueOrEmptyForPathString("user.last_name"); s != "" {
						if result != "" {
							result = result + " "
						}
						result = result + s
					}
					if result == "" {
						// optionally an error can be returned here e.g. for required data items
						return result, errors.New("could not calculate Customer.FullName missing either a user.first_name or user.last_name")
					}
					return result, nil
				}),
				Target: mxjData.Target{Map: output, Path: "Customer.FullName"},
			},
		},
	}

	// once our mappings are defined we can apply them
	err = definition.Apply()
	if err != nil {
		t.Fatalf("failed to apply mappings %v", err)
	}

	var json []byte
	json, err = output.JsonIndent("", "\t")
	if err != nil {
		t.Fatalf("failed to marshal output %v", err)
	}

	assert := func(expected string, actual []byte) {
		jsonString := "\n" + string(json)
		if jsonString != expected {
			t.Errorf("resulting json string \n%s\n does not match expected \n%s\n", jsonString, expected)
			return
		}
		t.Logf("%s", jsonString)
	}

	expected := `
{
	"Customer": {
		"DOB": "",
		"FirstName": "Tim",
		"FullName": "Tim Test",
		"LastName": "Test",
		"Title": ""
	}
}`

	assert(expected, json)

}

and here using CSV input and output instead


func TestCSV(t *testing.T) {

	inputCSV := `first_name, last_name, dob
Tim,Test,
Tina,Test,01/01/2000`

	inputHeader := []string{"first_name", "last_name", "dob"}
	outputHeader := []string{"Customer DOB", "Customer FirstName", "Customer FullName", "Customer LastName", "Customer Title"}

	// initialise output
	var output [][]string
	output = append(output, outputHeader)

	r := csv.NewReader(strings.NewReader(inputCSV))
	inputRecord, err := r.Read() // read header
	if err != nil {
		t.Fatalf("failed to read header %v", err)
	}
	row := 0
	for { // read next input row
		inputRecord, err = r.Read()
		if err == io.EOF {
			break
		}
		row = row + 1 // keep track of the row number
		if err != nil {
			t.Fatalf("failed to read input at row %d", err)
		}

		// initialise output
		outputRecord := []string{"", "", "", "", ""}

		// define row mappings
		definition := mapjitsu.Definition{
			Mappings: []mapjitsu.Mapping{
				{
					// CSV csvData and csvData can use a column number for the mapping
					Source: csvData.Source{Record: inputRecord, ColumnNumber: 1},  // first_name
					Target: csvData.Target{Record: outputRecord, ColumnNumber: 2}, // Customer FirstName
				},
				{
					// CSV csvData and csvData can also use column names for the mapping if the header is provided
					Source: csvData.Source{Record: inputRecord, ColumnName: "last_name", Header: inputHeader},
					Target: csvData.Target{Record: outputRecord, ColumnName: "Customer LastName", Header: outputHeader},
				},
				{
					Source: csvData.Source{Record: inputRecord, ColumnName: "dob", Header: inputHeader},
					Target: csvData.Target{Record: outputRecord, ColumnName: "Customer DOB", Header: outputHeader},
				},
				{
					// here a function is used as the Source to combine two text fields
					Source: mapjitsu.SourceFunc(func() (interface{}, error) {
						firstName := inputRecord[0]
						result := firstName
						lastName := inputRecord[1]
						if result != "" && lastName != "" {
							result = result + " "
						}
						result = result + lastName
						if result == "" {
							// optionally an error can be returned here e.g. for required data items
							return result, errors.New("could not calculate Customer FullName missing either a first_name or last_name")
						}
						return result, nil
					}),
					Target: csvData.Target{Record: outputRecord, ColumnName: "Customer FullName", Header: outputHeader},
				},
			},
		}

		// once our mappings are defined we can apply them
		err = definition.Apply()
		if err != nil {
			t.Fatalf("failed to apply row mappings at row %d %v", row, err)
		}

		// append output
		output = append(output, outputRecord)

	}

	assert := func(expected string, actual string) {
		if actual != expected {
			t.Errorf("resulting output \n%s does not match expected \n%s", actual, expected)
			return
		}
		t.Logf("%s", actual)
	}

	var outputCSV strings.Builder

	w := csv.NewWriter(&outputCSV)
	w.WriteAll(output)
	if err := w.Error(); err != nil {
		t.Fatalf("failed to write output %v", err)
	}

	expected := `Customer DOB,Customer FirstName,Customer FullName,Customer LastName,Customer Title
,Tim,Tim Test,Test,
01/01/2000,Tina,Tina Test,Test,
`

	assert(expected, outputCSV.String())

}

Contributing

Tests

GO111MODULE=on go test -v github.com/8legd/mapjitsu/tests

Correctness

GO111MODULE=on go vet ./...

Coding style

golint ./...

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Definition

type Definition struct {
	Mappings []Mapping
}

func (Definition) Apply

func (d Definition) Apply() error

type Mapping

type Mapping struct {
	Source    Source
	Transform Pipeline
	Target    Target
}

type Pipeline

type Pipeline []func(interface{}) (interface{}, error)

type Source

type Source interface {
	Value() (interface{}, error)
}

type SourceFunc

type SourceFunc func() (interface{}, error)

The SourceFunc type is an adapter to allow the use of ordinary functions as Sources. If f is a function with the appropriate signature, SourceFunc(f) is a Source that returns f().

func (SourceFunc) Value

func (f SourceFunc) Value() (interface{}, error)

Value returns f().

type Target

type Target interface {
	SetValue(interface{}) error
}

type TargetFunc

type TargetFunc func(interface{}) error

The TargetFunc type is an adapter to allow the use of ordinary functions as Targets. If f is a function with the appropriate signature, TargetFunc(f) is a Source that calls f.

func (TargetFunc) SetValue

func (f TargetFunc) SetValue(v interface{}) error

SetValue calls f.

Directories

Path Synopsis
csv
mxj
seifa
data
SOCIO-ECONOMIC INDEXES FOR AREAS (SEIFA) 2016 Copyright Commonwealth of Australia See https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/%A9+Copyright?opendocument/ SOCIO-ECONOMIC INDEXES FOR AREAS (SEIFA) 2016 Copyright Commonwealth of Australia See https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/%A9+Copyright?opendocument/ SOCIO-ECONOMIC INDEXES FOR AREAS (SEIFA) 2016 Copyright Commonwealth of Australia See https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/%A9+Copyright?opendocument/ SOCIO-ECONOMIC INDEXES FOR AREAS (SEIFA) 2016 Copyright Commonwealth of Australia See https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/%A9+Copyright?opendocument/ SOCIO-ECONOMIC INDEXES FOR AREAS (SEIFA) 2016 Copyright Commonwealth of Australia See https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/%A9+Copyright?opendocument/ SOCIO-ECONOMIC INDEXES FOR AREAS (SEIFA) 2016 Copyright Commonwealth of Australia See https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/%A9+Copyright?opendocument/ SOCIO-ECONOMIC INDEXES FOR AREAS (SEIFA) 2016 Copyright Commonwealth of Australia See https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/%A9+Copyright?opendocument/
SOCIO-ECONOMIC INDEXES FOR AREAS (SEIFA) 2016 Copyright Commonwealth of Australia See https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/%A9+Copyright?opendocument/ SOCIO-ECONOMIC INDEXES FOR AREAS (SEIFA) 2016 Copyright Commonwealth of Australia See https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/%A9+Copyright?opendocument/ SOCIO-ECONOMIC INDEXES FOR AREAS (SEIFA) 2016 Copyright Commonwealth of Australia See https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/%A9+Copyright?opendocument/ SOCIO-ECONOMIC INDEXES FOR AREAS (SEIFA) 2016 Copyright Commonwealth of Australia See https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/%A9+Copyright?opendocument/ SOCIO-ECONOMIC INDEXES FOR AREAS (SEIFA) 2016 Copyright Commonwealth of Australia See https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/%A9+Copyright?opendocument/ SOCIO-ECONOMIC INDEXES FOR AREAS (SEIFA) 2016 Copyright Commonwealth of Australia See https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/%A9+Copyright?opendocument/ SOCIO-ECONOMIC INDEXES FOR AREAS (SEIFA) 2016 Copyright Commonwealth of Australia See https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/%A9+Copyright?opendocument/

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL