lib

package

v6.13.0 Latest Latest Go to latest Published: Oct 5, 2024 License: BSD-2-Clause Imports: 22 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/johnkerl/miller

Links

Open Source Insights

README ¶

These are basic library routines for Miller.

Documentation ¶

Overview ¶

Package lib contains basic library routines for Miller.

Index ¶

Constants
func BoolToInt(b bool) int64
func BooleanXOR(a, b bool) bool
func CompileMillerRegex(regexString string) (*regexp.Regexp, error)
func CompileMillerRegexOrDie(regexString string) *regexp.Regexp
func CompileMillerRegexesOrDie(regexStrings []string) []*regexp.Regexp
func CopyStringArray(input []string) []string
func EpochNanosecondsToGMT(epochNanoseconds int64) time.Time
func EpochNanosecondsToLocalTime(epochNanoseconds int64) time.Time
func EpochNanosecondsToLocationTime(epochNanoseconds int64, location *time.Location) time.Time
func EpochSecondsToGMT(epochSeconds float64) time.Time
func EpochSecondsToLocalTime(epochSeconds float64) time.Time
func EpochSecondsToLocationTime(epochSeconds float64, location *time.Location) time.Time
func FormatAsParagraph(text string, maxWidth int) []string
func GetArrayKeysSorted(input map[string]string) []string
func GetCov(nint int64, sumx float64, sumy float64, sumxy float64) float64
func GetCovMatrix(nint int64, sumx float64, sumx2 float64, sumy float64, sumy2 float64, ...) (Q [2][2]float64)
func GetKurtosis(nint int, sumx float64, sumx2 float64, sumx3 float64, sumx4 float64) float64
func GetLinearRegressionOLS(nint int64, sumx float64, sumx2 float64, sumxy float64, sumy float64) (m, b float64)
func GetLinearRegressionPCA(eigenvalue_1 float64, eigenvalue_2 float64, eigenvector_1 [2]float64, ...) (m, b, quality float64)
func GetRealSymmetricEigensystem(matrix [2][2]float64) (eigenvalue1 float64, eigenvalue2 float64, eigenvector1 [2]float64, ...)
func GetSkewness(nint int, sumx float64, sumx2 float64, sumx3 float64) float64
func GetVar(nint int64, sumx float64, sumx2 float64) float64
func Getoptify(inargs []string) []string
func IntMin2(a, b int64) int64
func InternalCodingErrorIf(condition bool)
func InternalCodingErrorPanic(message string)
func InternalCodingErrorWithMessageIf(condition bool, message string)
func InterpolateCaptures(replacementString string, replacementMatrix [][]int, captures []string) string
func Invqnorm(x float64) float64
func IsEOF(err error) bool
func IsUpdateableInPlace(filename string, prepipe string) error
func LoadStringFromFile(filename string) (string, error)
func LoadStringsFromDir(dirname string, extension string) ([]string, error)
func LoadStringsFromFileOrDir(path string, extension string) ([]string, error)
func LogisticRegression(xs, ys []float64) (m, b float64)
func MakeEmptyCaptures() []string
func Nsec2GMT(epochNanoseconds int64, numDecimalPlaces int) string
func Nsec2LocalTime(epochNanoseconds int64, numDecimalPlaces int) string
func Nsec2LocationTime(epochNanoseconds int64, numDecimalPlaces int, location *time.Location) string
func OpenFileForRead(filename string, prepipe string, prepipeIsRaw bool, ...) (io.ReadCloser, error)
func OpenInboundHalfPipe(commandString string) (*os.File, error)
func OpenOutboundHalfPipe(commandString string) (*os.File, error)
func OpenStdin(prepipe string, prepipeIsRaw bool, encoding TFileInputEncoding) (io.ReadCloser, error)
func PathToHandle(path string) (io.ReadCloser, error)
func Plural(n int) string
func PrintWordsAsParagraph(words []string)
func Qnorm(x float64) float64
func RandFloat64() float64
func RandInt63() int64
func RandRange(lowInclusive, highExclusive int64) int64
func RandUint32() uint32
func ReadCSVHeader(filename string) ([]string, error)
func RegexCompiledMatchSimple(input string, regex *regexp.Regexp) bool
func RegexCompiledMatchWithCaptures(input string, regex *regexp.Regexp) (bool, []string)
func RegexCompiledMatchWithMapResults(input string, regex *regexp.Regexp) (bool, []string, []int, []int)
func RegexCompiledSplitString(regex *regexp.Regexp, input string, n int) []string
func RegexCompiledSub(input string, regex *regexp.Regexp, replacement string, ...) string
func RegexStringGsub(input string, sregex string, replacement string) string
func RegexStringMatchSimple(input string, sregex string) bool
func RegexStringMatchWithCaptures(input string, sregex string) (matches bool, capturesOneUp []string)
func RegexStringMatchWithMapResults(input string, sregex string) (matches bool, captures []string, starts []int, ends []int)
func RegexStringSub(input string, sregex string, replacement string) string
func ReplacementHasCaptures(replacement string) (hasCaptures bool, matrix [][]int)
func ReverseStringList(strings []string)
func Sec2GMT(epochSeconds float64, numDecimalPlaces int) string
func Sec2LocalTime(epochSeconds float64, numDecimalPlaces int) string
func Sec2LocationTime(epochSeconds float64, numDecimalPlaces int, location *time.Location) string
func SeedRandom(seed int64)
func SetTZFromEnv() error
func Sgn(a float64) float64
func SortStrings(strings []string)
func SortedStrings(strings []string) []string
func SplitString(input string, separator string) []string
func StringListToSet(stringList []string) map[string]bool
func StripEmpties(input []string) []string
func TSVDecodeField(input string) string
func TSVEncodeField(input string) string
func TryBoolFromBoolString(input string) (bool, bool)
func TryFloatFromString(input string) (float64, bool)
func TryIntFromString(input string) (int64, bool)
func TryIntFromStringWithBase(input string, base int64) (int64, bool)
func TryLatin1ToUTF8(input string) (string, error)
func TryUTF8ToLatin1(input string) (string, error)
func UTF8Strlen(s string) int64
func UnbackslashStringLiteral(input string) string
func UnhexStringLiteral(input string) string
func WhereAreWe()
func WrapOutputHandle(fileWriteHandle io.WriteCloser, inputFileEncoding TFileInputEncoding) (io.WriteCloser, bool, error)
func WriteTempFileOrDie(contents string) string
type BZip2ReadCloser
- func NewBZip2ReadCloser(handle io.ReadCloser) *BZip2ReadCloser
- func (rc *BZip2ReadCloser) Close() error
- func (rc *BZip2ReadCloser) Read(p []byte) (n int, err error)
type OrderedMap
- func NewOrderedMap() *OrderedMap
- func (omap *OrderedMap) Clear()
- func (omap *OrderedMap) Get(key string) interface{}
- func (omap *OrderedMap) GetKeys() []string
- func (omap *OrderedMap) GetKeysExcept(exceptions map[string]bool) []string
- func (omap *OrderedMap) GetWithCheck(key string) (interface{}, bool)
- func (omap *OrderedMap) Has(key string) bool
- func (omap *OrderedMap) IsEmpty() bool
- func (omap *OrderedMap) Put(key string, value interface{})
- func (omap *OrderedMap) Remove(key string) bool
type TFileInputEncoding
- func FindInputEncoding(filename string, inputFileInputEncoding TFileInputEncoding) TFileInputEncoding
type ZstdReadCloser
- func NewZstdReadCloser(handle io.ReadCloser) (*ZstdReadCloser, error)
- func (rc *ZstdReadCloser) Close() error
- func (rc *ZstdReadCloser) Read(p []byte) (n int, err error)

Constants ¶

View Source

const DOC_URL = "https://miller.readthedocs.io"

View Source

const INVQNORM_MAXITER int = 30

View Source

const INVQNORM_TOL float64 = 1e-9

View Source

const JACOBI_MAXITER = 20

View Source

const JACOBI_TOLERANCE = 1e-12

Variables ¶

This section is empty.

Functions ¶

func BoolToInt ¶

func BoolToInt(b bool) int64

func BooleanXOR ¶

func BooleanXOR(a, b bool) bool

func CompileMillerRegex ¶

func CompileMillerRegex(regexString string) (*regexp.Regexp, error)

CompileMillerRegex wraps Go regex-compile with some Miller-specific syntax which predates the port of Miller from C to Go. Miller regexes use a final 'i' to indicate case-insensitivity; Go regexes use an initial "(?i)".

(See also mlr.bnf where we specify which things can be backslash-escaped without a syntax error at parse time.)

* If the regex_string is of the form a.*b, compiles it case-sensitively. * If the regex_string is of the form "a.*b", compiles a.*b case-sensitively. * If the regex_string is of the form "a.*b"i, compiles a.*b case-insensitively.

func CompileMillerRegexOrDie ¶

func CompileMillerRegexOrDie(regexString string) *regexp.Regexp

CompileMillerRegexOrDie wraps CompileMillerRegex. Usually in Go we want to return a second error argument rather than fataling. However, if there's a malformed regex we really cannot continue so it's simpler to just fatal.

func CompileMillerRegexesOrDie ¶

func CompileMillerRegexesOrDie(regexStrings []string) []*regexp.Regexp

CompileMillerRegexesOrDie is a convenenience looper over CompileMillerRegexOrDie.

func CopyStringArray ¶

func CopyStringArray(input []string) []string

func EpochNanosecondsToGMT ¶

func EpochNanosecondsToGMT(epochNanoseconds int64) time.Time

func EpochNanosecondsToLocalTime ¶

func EpochNanosecondsToLocalTime(epochNanoseconds int64) time.Time

func EpochNanosecondsToLocationTime ¶

func EpochNanosecondsToLocationTime(epochNanoseconds int64, location *time.Location) time.Time

func EpochSecondsToGMT ¶

func EpochSecondsToGMT(epochSeconds float64) time.Time

func EpochSecondsToLocalTime ¶

func EpochSecondsToLocalTime(epochSeconds float64) time.Time

func EpochSecondsToLocationTime ¶

func EpochSecondsToLocationTime(epochSeconds float64, location *time.Location) time.Time

func FormatAsParagraph ¶

func FormatAsParagraph(text string, maxWidth int) []string

For online help contexts like printing all the built-in DSL functions, or the list of all verbs. Max width is nominally 80.

func GetArrayKeysSorted ¶

func GetArrayKeysSorted(input map[string]string) []string

Go doesn't preserve insertion order in its arrays, so here we make an accessor for getting the keys in sorted order for the benefit of map-printers.

func GetCov ¶

func GetCov(
	nint int64,
	sumx float64,
	sumy float64,
	sumxy float64,
) float64

func GetCovMatrix ¶

func GetCovMatrix(
	nint int64,
	sumx float64,
	sumx2 float64,
	sumy float64,
	sumy2 float64,
	sumxy float64,
) (Q [2][2]float64)

----------------------------------------------------------------

func GetKurtosis ¶

func GetKurtosis(
	nint int,
	sumx float64,
	sumx2 float64,
	sumx3 float64,
	sumx4 float64,
) float64

func GetLinearRegressionOLS ¶

func GetLinearRegressionOLS(
	nint int64,
	sumx float64,
	sumx2 float64,
	sumxy float64,
	sumy float64,
) (m, b float64)

func GetLinearRegressionPCA ¶

func GetLinearRegressionPCA(
	eigenvalue_1 float64,
	eigenvalue_2 float64,
	eigenvector_1 [2]float64,
	eigenvector_2 [2]float64,
	x_mean float64,
	y_mean float64,
) (m, b, quality float64)

func GetRealSymmetricEigensystem ¶

func GetRealSymmetricEigensystem(
	matrix [2][2]float64,
) (
	eigenvalue1 float64,
	eigenvalue2 float64,
	eigenvector1 [2]float64,
	eigenvector2 [2]float64,
)

func GetSkewness ¶

func GetSkewness(
	nint int,
	sumx float64,
	sumx2 float64,
	sumx3 float64,
) float64

GetSkewness is the finalizing function for computing skewness from streamed accumulator values.

func GetVar ¶

func GetVar(
	nint int64,
	sumx float64,
	sumx2 float64,
) float64

GetVar is the finalizing function for computing variance from streamed accumulator values.

func Getoptify ¶

func Getoptify(inargs []string) []string

Getoptify expands "-xyz" into "-x -y -z" while leaving "--xyz" intact. This is a keystroke-saver for the user.

This is OK to do here globally since Miller is quite consistent (in main, verbs, auxents, and terminals) that multi-character options start with two dashes, e.g. "--csv". (The sole exception is the sort verb's -nf/-nr which are handled specially there.)

Additionally, we split "--foo=bar" into "--foo" and "bar".

func IntMin2 ¶

func IntMin2(a, b int64) int64

func InternalCodingErrorIf ¶

func InternalCodingErrorIf(condition bool)

InternalCodingErrorIf is a lookalike for C's __FILE__ and __LINE__ printing, with exit 1 if the condition is true.

func InternalCodingErrorPanic ¶

func InternalCodingErrorPanic(message string)

InternalCodingErrorPanic is like InternalCodingErrorIf, expect that it panics the process (for stack trace, which is usually not desired), and that it requires the if-test to be at the caller.

func InternalCodingErrorWithMessageIf ¶

func InternalCodingErrorWithMessageIf(condition bool, message string)

InternalCodingErrorWithMessageIf is a lookalike for C's __FILE__ and __LINE__ printing, with exit 1 if the condition is true.

func InterpolateCaptures ¶

func InterpolateCaptures(
	replacementString string,
	replacementMatrix [][]int,
	captures []string,
) string

InterpolateCaptures example:

* Input $x is "ab_cde"

DSL expression if ($x =~ "(..)_(...)") { ... other lines of code ... $y = "\2:\1"; }

* InterpolateCaptures is used on the evaluation of "\2:\1"

* replacementString is "\2:\1"

replacementMatrix contains precomputed/cached offsets for the "\2" and "\1" substrings within "\2:\1"
captures has slot 0 being "ab_cde" (for "\0"), slot 1 being "ab" (for "\1"), slot 2 being "cde" (for "\2"), and slots 3-9 being "".

func Invqnorm ¶

func Invqnorm(x float64) float64

func IsEOF ¶

func IsEOF(err error) bool

IsEOF handles the following problem: reading past end of files opened with os.Open returns the error which is io.EOF. Reading past close of pipes opened with popen (e.g. Miller's prepipe, where the file isn't 'foo.dat' but rather the process 'gunzip < foo.dat |') returns not io.EOF but an error with 'file already closed' within it. See also https://stackoverflow.com/questions/47486128/why-does-io-pipe-continue-to-block-even-when-eof-is-reached

func IsUpdateableInPlace ¶

func IsUpdateableInPlace(
	filename string,
	prepipe string,
) error

IsUpdateableInPlace tells if we can use the input with mlr -I: not for URLs, and not for prepipe commands (which we don't presume to know how to invert for output).

func LoadStringFromFile ¶

func LoadStringFromFile(filename string) (string, error)

LoadStringFromFile is just a wrapper around os.ReadFile, with a cast from []byte to string.

func LoadStringsFromDir ¶

func LoadStringsFromDir(dirname string, extension string) ([]string, error)

LoadStringsFromDir loads all file contents for files in the given directory having the given extension. E.g. LoadStringsFromDir("/u/myfiles", ".mlr") will load /u/myfiles/foo.mlr and /u/myfiles/bar.mlr but will skip over /u/myfiles/data.csv and /u/myfiles/todo.txt.

func LoadStringsFromFileOrDir ¶

func LoadStringsFromFileOrDir(path string, extension string) ([]string, error)

LoadStringsFromFileOrDir calls LoadStringFromFile if path exists and is a file, or LoadStringsFromDir if path exists and is a directory. In the former case the extension is ignored; in the latter case it's used as a filter on the directory entries.

func LogisticRegression ¶

func LogisticRegression(xs, ys []float64) (m, b float64)

func MakeEmptyCaptures ¶

func MakeEmptyCaptures() []string

MakeEmptyCaptures is for initial CST state at the start of executing the DSL expression for the current record. Even if '$x =~ "(..)_(...)" set "\1" and "\2" on the previous record, at start of processing for the current record we need to start with a clean slate. This is in support of CST state, which `=~` semantics requires.

func Nsec2GMT ¶

func Nsec2GMT(epochNanoseconds int64, numDecimalPlaces int) string

func Nsec2LocalTime ¶

func Nsec2LocalTime(epochNanoseconds int64, numDecimalPlaces int) string

func Nsec2LocationTime ¶

func Nsec2LocationTime(epochNanoseconds int64, numDecimalPlaces int, location *time.Location) string

func OpenFileForRead ¶

func OpenFileForRead(
	filename string,
	prepipe string,
	prepipeIsRaw bool,
	encoding TFileInputEncoding,
) (io.ReadCloser, error)

OpenFileForRead: If prepipe is non-empty, popens "{prepipe} < {filename}" and returns a handle to that where prepipe is nominally things like "gunzip", "cat", etc. Otherwise, delegates to an in-process reader which can natively handle gzip/bzip2/zlib depending on the specified encoding. If the encoding isn't a compression encoding, this ends up being simply os.Open.

func OpenInboundHalfPipe ¶

func OpenInboundHalfPipe(commandString string) (*os.File, error)

func OpenOutboundHalfPipe ¶

func OpenOutboundHalfPipe(commandString string) (*os.File, error)

func OpenStdin ¶

func OpenStdin(
	prepipe string,
	prepipeIsRaw bool,
	encoding TFileInputEncoding,
) (io.ReadCloser, error)

OpenStdin: if prepipe is non-empty, popens "{prepipe}" and returns a handle to that where prepipe is nominally things like "gunzip", "cat", etc. Otherwise, delegates to an in-process reader which can natively handle gzip/bzip2/zlib depending on the specified encoding. If the encoding isn't a compression encoding, this ends up being simply os.Stdin.

func PathToHandle ¶

func PathToHandle(
	path string,
) (io.ReadCloser, error)

PathToHandle maps various back-ends to a stream. As of 2021-07-07, the following URI schemes are supported: * https://... and http://... * file://... * plain disk files

func Plural ¶

func Plural(n int) string

func PrintWordsAsParagraph ¶

func PrintWordsAsParagraph(words []string)

For online help contexts like printing all the built-in DSL functions, or the list of all verbs.

func Qnorm ¶

func Qnorm(x float64) float64

Normal cumulative distribution function, expressed in terms of erfc library function (which is awkward, but exists).

func RandFloat64 ¶

func RandFloat64() float64

func RandInt63 ¶

func RandInt63() int64

func RandRange ¶

func RandRange(lowInclusive, highExclusive int64) int64

func RandUint32 ¶

func RandUint32() uint32

func ReadCSVHeader ¶

func ReadCSVHeader(filename string) ([]string, error)

func RegexCompiledMatchSimple ¶

func RegexCompiledMatchSimple(
	input string,
	regex *regexp.Regexp,
) bool

RegexCompiledMatchSimple is for simple boolean return without any substring captures.

func RegexCompiledMatchWithCaptures ¶

func RegexCompiledMatchWithCaptures(
	input string,
	regex *regexp.Regexp,
) (bool, []string)

RegexCompiledMatchWithCaptures is the implementation for the =~ operator. Without Miller-style regex captures this would a simple one-line regex.MatchString(input). However, we return the captures array for the benefit of subsequent references to "\0".."\9".

func RegexCompiledMatchWithMapResults ¶

func RegexCompiledMatchWithMapResults(
	input string,
	regex *regexp.Regexp,
) (bool, []string, []int, []int)

RegexCompiledMatchWithMapResults does the work for RegexStringMatchWithMapResults once a compiled regexp is available. Array slot 0 is for the full match; slots 1 and up are for the capture-matches such as "$[0-9]+$:$[a-z]+$".

func RegexCompiledSplitString ¶

func RegexCompiledSplitString(regex *regexp.Regexp, input string, n int) []string

In Go as in all languages I'm aware of with a string-split, "a,b,c" splits on "," to ["a", "b", "c" and "a" splits to ["a"], both of which are fine -- but "" splits to [""] when I wish it were []. This function does the latter.

func RegexCompiledSub ¶

func RegexCompiledSub(
	input string,
	regex *regexp.Regexp,
	replacement string,
	replacementCaptureMatrix [][]int,
) string

RegexCompiledSub is the same as RegexStringSub but with compiled regex and replacement strings.

func RegexStringGsub ¶

func RegexStringGsub(
	input string,
	sregex string,
	replacement string,
) string

RegexStringGsub implements the `gsub` DSL function.

func RegexStringMatchSimple ¶

func RegexStringMatchSimple(
	input string,
	sregex string,
) bool

RegexStringMatchSimple is for simple boolean return without any substring captures.

func RegexStringMatchWithCaptures ¶

func RegexStringMatchWithCaptures(
	input string,
	sregex string,
) (
	matches bool,
	capturesOneUp []string,
)

RegexStringMatchWithCaptures implements the =~ DSL operator. The captures are stored in DSL state and may be used by a DSL statement after the =~. For example, in

sub($a, "(..)_(...)", "\1:\2")

the replacement string is an argument to sub and therefore the captures are confined to the implementation of the sub function. Similarly for gsub. But for the match operator, people can do

if ($x =~ "(..)_(...)") {
  ... other lines of code ...
  $y = "\2:\1"
}

and the =~ callsite doesn't know if captures will be used or not. So, RegexStringMatchWithCaptures always returns the captures array. It is stored within the CST state.

func RegexStringMatchWithMapResults ¶

func RegexStringMatchWithMapResults(
	input string,
	sregex string,
) (
	matches bool,
	captures []string,
	starts []int,
	ends []int,
)

RegexStringMatchWithMapResults implements much of the `strmatchx` DSL function. This returns captures via return values. This is distinct from RegexStringMatchWithCaptures which is for the `=~` DSL operator.

func RegexStringSub ¶

func RegexStringSub(
	input string,
	sregex string,
	replacement string,
) string

RegexStringSub implements the sub DSL function.

func ReplacementHasCaptures ¶

func ReplacementHasCaptures(
	replacement string,
) (
	hasCaptures bool,
	matrix [][]int,
)

ReplacementHasCaptures is used by the CST builder to see if string-literal is like "foo bar" or "foo \1 bar" -- in the latter case it needs to retain the compiled offsets-matrix information. This is in support of CST state, which `=~` semantics requires.

func ReverseStringList ¶

func ReverseStringList(strings []string)

func Sec2GMT ¶

func Sec2GMT(epochSeconds float64, numDecimalPlaces int) string

func Sec2LocalTime ¶

func Sec2LocalTime(epochSeconds float64, numDecimalPlaces int) string

func Sec2LocationTime ¶

func Sec2LocationTime(epochSeconds float64, numDecimalPlaces int, location *time.Location) string

func SeedRandom ¶

func SeedRandom(seed int64)

Users can request specific seeds if they want the same random-number sequence on each run.

func SetTZFromEnv ¶

func SetTZFromEnv() error

SetTZFromEnv applies the $TZ environment variable. This has three reasons: (1) On Windows (as of 2021-10-20), this is necessary to get $TZ into use. (2) On Linux/Mac, as of this writing it is not necessary for initial value of TZ at startup. However, an explicit check is helpful since if someone does 'export TZ=Something/Invalid', then runs Miller, and invalid TZ is simply *ignored* -- we want to surface that error to the user. (3) On any platform this is necessary for *changing* TZ mid-process: e.g. if a DSL statement does 'ENV["TZ"] = Asia/Istanbul'.

func Sgn ¶

func Sgn(a float64) float64

func SortStrings ¶

func SortStrings(strings []string)

func SortedStrings ¶

func SortedStrings(strings []string) []string

func SplitString ¶

func SplitString(input string, separator string) []string

In Go as in all languages I'm aware of with a string-split, "a,b,c" splits on "," to ["a", "b", "c" and "a" splits to ["a"], both of which are fine -- but "" splits to [""] when I wish it were []. This function does the latter.

func StringListToSet ¶

func StringListToSet(stringList []string) map[string]bool

func StripEmpties ¶

func StripEmpties(input []string) []string

func TSVDecodeField ¶

func TSVDecodeField(input string) string

TSVDecodeField is for the TSV record-reader.

func TSVEncodeField ¶

func TSVEncodeField(input string) string

TSVEncodeField is for the TSV record-writer.

func TryBoolFromBoolString ¶

func TryBoolFromBoolString(input string) (bool, bool)

func TryFloatFromString ¶

func TryFloatFromString(input string) (float64, bool)

func TryIntFromString ¶

func TryIntFromString(input string) (int64, bool)

TryIntFromString tries decimal, hex, octal, and binary.

func TryIntFromStringWithBase ¶

func TryIntFromStringWithBase(input string, base int64) (int64, bool)

TryIntFromStringWithBase allows the user to choose the base that's used, rather than inferring from 0x prefix, etc as TryIntFromString does.

func TryLatin1ToUTF8 ¶

func TryLatin1ToUTF8(input string) (string, error)

func TryUTF8ToLatin1 ¶

func TryUTF8ToLatin1(input string) (string, error)

func UTF8Strlen ¶

func UTF8Strlen(s string) int64

func UnbackslashStringLiteral ¶

func UnbackslashStringLiteral(input string) string

UnbackslashStringLiteral replaces "\t" with TAB, etc. for DSL expressions like '$foo = "a\tb"'. See also https://en.wikipedia.org/wiki/Escape_sequences_in_C (predates the port of Miller from C to Go).

Note that a CST-build pre-pass intentionally excludes regex literals (2nd argument to sub/gsub/regextract/etc) from being modified here.

Note "\0" .. "\9" are used for regex captures within the DSL CST builder and are not touched here. (See also lib/regex.go.)

func UnhexStringLiteral ¶

func UnhexStringLiteral(input string) string

UnhexStringLiteral is like UnbackslashStringLiteral but only unhexes things like "\x1f". This is for IFS and IPS setup; see the cli package.

func WhereAreWe ¶

func WhereAreWe()

WhereAreWe shows a stack trace from the current callsite.

func WrapOutputHandle ¶

func WrapOutputHandle(
	fileWriteHandle io.WriteCloser,
	inputFileEncoding TFileInputEncoding,
) (io.WriteCloser, bool, error)

WrapOutputHandle wraps a file-write handle with a decompressor. The first return value is the wrapped handle. The second is true if the returned handle needs to be closed separately from the original. The third is for in-process compression we can't undo: namely, as of September 2021 the gzip and zlib libraries support write-closers, but the bzip2 library does not.

func WriteTempFileOrDie ¶

func WriteTempFileOrDie(contents string) string

WriteTempFile places the contents string into a temp file, which the caller must remove.

Types ¶

type BZip2ReadCloser ¶

type BZip2ReadCloser struct {
	// contains filtered or unexported fields
}

---------------------------------------------------------------- BZip2ReadCloser remedies the fact that bzip2.NewReader does not implement io.ReadCloser.

func NewBZip2ReadCloser ¶

func NewBZip2ReadCloser(handle io.ReadCloser) *BZip2ReadCloser

func (*BZip2ReadCloser) Close ¶

func (rc *BZip2ReadCloser) Close() error

func (*BZip2ReadCloser) Read ¶

func (rc *BZip2ReadCloser) Read(p []byte) (n int, err error)

type OrderedMap ¶

type OrderedMap struct {
	FieldCount int64
	Head       *orderedMapEntry
	Tail       *orderedMapEntry
	// contains filtered or unexported fields
}

----------------------------------------------------------------

func NewOrderedMap ¶

func NewOrderedMap() *OrderedMap

----------------------------------------------------------------

func (*OrderedMap) Clear ¶

func (omap *OrderedMap) Clear()

----------------------------------------------------------------

func (*OrderedMap) Get ¶

func (omap *OrderedMap) Get(key string) interface{}

----------------------------------------------------------------

func (*OrderedMap) GetKeys ¶

func (omap *OrderedMap) GetKeys() []string

func (*OrderedMap) GetKeysExcept ¶

func (omap *OrderedMap) GetKeysExcept(exceptions map[string]bool) []string

Returns an array of keys, not including the ones specified. The ones specified are to be passed in as a map from string to bool, as Go doesn't have hash-sets.

func (*OrderedMap) GetWithCheck ¶

func (omap *OrderedMap) GetWithCheck(key string) (interface{}, bool)

The Get is sufficient for pointer values -- the caller can check if the return value is nil. For int/string values (which are non-nullable) we have this method.

func (*OrderedMap) Has ¶

func (omap *OrderedMap) Has(key string) bool

func (*OrderedMap) IsEmpty ¶

func (omap *OrderedMap) IsEmpty() bool

----------------------------------------------------------------

func (*OrderedMap) Put ¶

func (omap *OrderedMap) Put(key string, value interface{})

----------------------------------------------------------------

func (*OrderedMap) Remove ¶

func (omap *OrderedMap) Remove(key string) bool

---------------------------------------------------------------- Returns true if it was found and removed

type TFileInputEncoding ¶

type TFileInputEncoding int

const (
	FileInputEncodingDefault TFileInputEncoding = iota
	FileInputEncodingBzip2
	FileInputEncodingGzip
	FileInputEncodingZlib
	FileInputEncodingZstd
)

func FindInputEncoding ¶

func FindInputEncoding(
	filename string,
	inputFileInputEncoding TFileInputEncoding,
) TFileInputEncoding

FindInputEncoding determines the input encoding (compression), whether from a flag like --gzin, or from filename suffix like ".gz". If the user did --gzin on the command line, TFileInputEncoding will be FileInputEncodingGzip. If they didn't, but the filename ends in ".gz", then we auto-infer FileInputEncodingGzip. Either way, this function tells if we will be using in-process decompression within the file-format-specific record reader.

type ZstdReadCloser ¶

type ZstdReadCloser struct {
	// contains filtered or unexported fields
}

---------------------------------------------------------------- ZstdReadCloser remedies the fact that zstd.NewReader does not implement io.ReadCloser.

func NewZstdReadCloser ¶

func NewZstdReadCloser(handle io.ReadCloser) (*ZstdReadCloser, error)

func (*ZstdReadCloser) Close ¶

func (rc *ZstdReadCloser) Close() error

func (*ZstdReadCloser) Read ¶

func (rc *ZstdReadCloser) Read(p []byte) (n int, err error)

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL