strings

package
v0.58.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 7, 2024 License: BSD-3-Clause Imports: 10 Imported by: 2

Documentation

Overview

Package strings provide a library for working with string or slice of strings.

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func Alnum

func Alnum(text string, withSpace bool) string

Alnum remove non alpha-numeric character from text and return it. If withSpace is true then white space is allowed, otherwise it would also be removed from text.

Example
type testCase struct {
	text      string
	withSpace bool
}

var cases = []testCase{
	{`A, b.c`, false},
	{`A, b.c`, true},
	{`A1 b`, false},
	{`A1 b`, true},
}

var (
	c testCase
)
for _, c = range cases {
	fmt.Println(Alnum(c.text, c.withSpace))
}
Output:

Abc
A bc
A1b
A1 b

func AppendUniq

func AppendUniq(in []string, vals ...string) []string

AppendUniq append case-insensitive strings to slice of input without duplicate.

Example
var (
	in   = []string{`a`, ``, `b`, `c`}
	vals = []string{`b`, ``, `C`, `d`}
)

in = AppendUniq(in, vals...)

fmt.Println(in)
Output:

[a  b c C d]

func CleanURI

func CleanURI(text string) string

CleanURI remove known links from text and return it. This function assume that space in URI is using '%20' not literal space, as in ' '.

List of known links: http, https, ftp, ftps, ssh, file, rsync, and ws.

Example
var text = `You can visit ftp://hostname or https://hostname/link%202 for more information`

fmt.Println(CleanURI(text))
Output:

You can visit  or  for more information

func CleanWikiMarkup

func CleanWikiMarkup(text string) string

CleanWikiMarkup remove wiki markup from text.

List of known wiki markups,

  • [[Category: ... ]]
  • [[:Category: ... ]]
  • [[File: ... ]]
  • [[Help: ... ]]
  • [[Image: ... ]]
  • [[Special: ... ]]
  • [[Wikipedia: ... ]]
  • {{DEFAULTSORT: ... }}
  • {{Template: ... }}
  • <ref ... />
Example
var text = `* Test image [[Image:fileto.png]].`

fmt.Println(CleanWikiMarkup(text))
Output:

* Test image .

func CountAlnum

func CountAlnum(text string) (n int)

CountAlnum return number of alpha-numeric character in text.

Example
fmt.Println(CountAlnum(`// A b c 1 2 3`))
Output:

6

func CountAlnumDistribution

func CountAlnumDistribution(text string) (chars []rune, counts []int)

CountAlnumDistribution count distribution of alpha-numeric characters in text.

Example, given a text "abbcccddddeeeee", it will return [a b c d e] and [1 2 3 4 5].

Example
var (
	chars  []rune
	counts []int
)

chars, counts = CountAlnumDistribution(`// A b c A b`)

fmt.Printf(`%c %v`, chars, counts)
Output:

[A b c] [2 2 1]

func CountCharSequence

func CountCharSequence(text string) (chars []rune, counts []int)

CountCharSequence given a string, count number of repeated character more than one in sequence and return list of characters and their counts.

Example
var (
	text   = `aaa abcdee ffgf`
	chars  []rune
	counts []int
)

chars, counts = CountCharSequence(text)

// 'a' is not counted as 4 because its breaked by another character,
// space ' '.
fmt.Printf(`%c %v`, chars, counts)
Output:

[a e f] [3 2 2]

func CountDigit

func CountDigit(text string) (n int)

CountDigit return number of digit in text.

Example
var text = `// Copyright 2018 Mhd Sulhan <ms@kilabit.info>. All rights reserved.`
fmt.Println(CountDigit(text))
Output:

4

func CountMissRate

func CountMissRate(src []string, target []string) (
	missrate float64,
	nmiss, length int,
)

CountMissRate given two slice of string, count number of string that is not equal with each other, and return the miss rate as

number of not equal / number of data

and count of missing, and length of input `src`.

Example
var (
	src = []string{`A`, `B`, `C`, `D`}
	tgt = []string{`A`, `B`, `C`, `D`}
)

fmt.Println(CountMissRate(src, tgt))

src = []string{`A`, `B`, `C`, `D`}
tgt = []string{`B`, `B`, `C`, `D`}
fmt.Println(CountMissRate(src, tgt))

src = []string{`A`, `B`, `C`, `D`}
tgt = []string{`B`, `C`, `C`, `D`}
fmt.Println(CountMissRate(src, tgt))

src = []string{`A`, `B`, `C`, `D`}
tgt = []string{`B`, `C`, `D`, `D`}
fmt.Println(CountMissRate(src, tgt))

src = []string{`A`, `B`, `C`, `D`}
tgt = []string{`C`, `D`, `D`, `E`}
fmt.Println(CountMissRate(src, tgt))
Output:

0 0 4
0.25 1 4
0.5 2 4
0.75 3 4
1 4 4

func CountNonAlnum

func CountNonAlnum(text string, withspace bool) (n int)

CountNonAlnum return number of non alpha-numeric character in text. If `withspace` is true, it will be counted as non-alpha-numeric, if it false it will be ignored.

func CountToken

func CountToken(words []string, token string, sensitive bool) int

CountToken will return number of token occurrence in words.

Example
var (
	words = []string{`A`, `B`, `C`, `a`, `b`, `c`}
)

fmt.Println(CountToken(words, `C`, false))
fmt.Println(CountToken(words, `C`, true))
Output:

2
1

func CountTokens

func CountTokens(words []string, tokens []string, sensitive bool) []int

CountTokens count number of occurrence of each `tokens` values in words. Return number of each tokens based on their index.

Example
var (
	words  = []string{`A`, `B`, `C`, `a`, `b`, `c`}
	tokens = []string{`A`, `B`}
)

fmt.Println(CountTokens(words, tokens, false))
fmt.Println(CountTokens(words, tokens, true))
Output:

[2 2]
[1 1]

func CountUniqChar

func CountUniqChar(text string) (n int)

CountUniqChar count number of character in text without duplication.

Example
fmt.Println(CountUniqChar(`abc abc`))
fmt.Println(CountUniqChar(`abc ABC`))
Output:

4
7

func CountUpperLower

func CountUpperLower(text string) (upper, lower int)

CountUpperLower return number of uppercase and lowercase in text.

Example
fmt.Println(CountUpperLower(`// A B C d e f g h I J K`))
Output:

6 5

func Delete

func Delete(in []string, value string) (out []string, ok bool)

Delete the first item that match with value while still preserving the order. It will return true if there is an item being deleted on slice, otherwise it will return false.

func FrequencyOfToken

func FrequencyOfToken(words []string, token string, sensitive bool) float64

FrequencyOfToken return frequency of token in words using

count-of-token / total-words
Example
var (
	words = []string{`A`, `B`, `C`, `a`, `b`, `c`}
)

fmt.Println(FrequencyOfToken(words, `C`, false))
fmt.Println(FrequencyOfToken(words, `C`, true))
Output:

0.3333333333333333
0.16666666666666666

func FrequencyOfTokens

func FrequencyOfTokens(words, tokens []string, sensitive bool) (probs []float64)

FrequencyOfTokens will compute each frequency of token in words.

Example
var (
	words  = []string{`A`, `B`, `C`, `a`, `b`, `c`}
	tokens = []string{`A`, `B`}
)

fmt.Println(FrequencyOfTokens(words, tokens, false))
fmt.Println(FrequencyOfTokens(words, tokens, true))
Output:

[0.3333333333333333 0.3333333333333333]
[0.16666666666666666 0.16666666666666666]

func IsContain

func IsContain(ss []string, el string) bool

IsContain return true if elemen `el` is in slice of string `ss`, otherwise return false.

func IsEqual

func IsEqual(a, b []string) bool

IsEqual compare elements of two slice of string without regard to their order.

Return true if each both slice have the same elements, false otherwise.

Example
fmt.Println(IsEqual([]string{`a`, `b`}, []string{`a`, `b`}))
fmt.Println(IsEqual([]string{`a`, `b`}, []string{`b`, `a`}))
fmt.Println(IsEqual([]string{`a`, `b`}, []string{`a`}))
fmt.Println(IsEqual([]string{`a`, `b`}, []string{`b`, `b`}))
Output:

true
true
false
false

func LinesOfFile

func LinesOfFile(file string) ([]string, error)

LinesOfFile parse the content of file and return non-empty lines.

func Longest

func Longest(words []string) (string, int)

Longest find the longest word in words and return their value and index.

If words is empty return nil string with negative (-1) index.

Example
var (
	words = []string{`a`, `bb`, `ccc`, `d`, `eee`}
)

fmt.Println(Longest(words))
Output:

ccc 2

func MaxCharSequence

func MaxCharSequence(text string) (rune, int)

MaxCharSequence return character which have maximum sequence in `text`.

Example
var (
	c rune
	n int
)

c, n = MaxCharSequence(`aaa abcdee ffgf`)

fmt.Printf(`%c %d`, c, n)
Output:

a 3

func MergeSpaces

func MergeSpaces(text string, withline bool) string

MergeSpaces replace two or more horizontal spaces (' ', '\t', '\v', '\f', '\r') with single space. If withline is true it also replace two or more new lines with single new-line.

Example
var line = "   a\n\nb c   d\n\n"
fmt.Printf("Without merging newline: '%s'\n", MergeSpaces(line, false))
fmt.Printf("With merging newline: '%s'\n", MergeSpaces(line, true))
Output:

Without merging newline: ' a

b c d

'
With merging newline: ' a
b c d
'

func MostFrequentTokens

func MostFrequentTokens(words []string, tokens []string, sensitive bool) string

MostFrequentTokens return the token that has highest frequency in words.

For example, given input

words:  [A A B A B C C]
tokens: [A B]

it will return A as the majority tokens in words. If tokens has equal frequency, then the first token in order will returned.

Example
var (
	words  = []string{`a`, `b`, `B`, `B`, `a`}
	tokens = []string{`a`, `b`}
)

fmt.Println(MostFrequentTokens(words, tokens, false))
fmt.Println(MostFrequentTokens(words, tokens, true))
Output:

b
a

func RatioAlnum

func RatioAlnum(text string) float64

RatioAlnum compute and return ratio of alpha-numeric within all character in text.

Example
fmt.Println(RatioAlnum(`//A1`))
Output:

0.5

func RatioDigit

func RatioDigit(text string) float64

RatioDigit compute and return digit ratio to all characters in text.

Example
fmt.Println(RatioDigit(`// A b 0 1`))
Output:

0.2

func RatioNonAlnum

func RatioNonAlnum(text string, withspace bool) float64

RatioNonAlnum return ratio of non-alphanumeric character to all character in text.

If `withspace` is true then white-space character will be counted as non-alpha numeric, otherwise it will be skipped.

Example
fmt.Println(RatioNonAlnum(`// A1`, false))
fmt.Println(RatioNonAlnum(`// A1`, true))
Output:

0.4
0.6

func RatioUpper

func RatioUpper(text string) float64

RatioUpper compute and return ratio of uppercase character to all character in text.

Example
fmt.Println(RatioUpper(`// A b c d`))
Output:

0.25

func RatioUpperLower

func RatioUpperLower(text string) float64

RatioUpperLower compute and return ratio of uppercase with lowercase character in text.

Example
fmt.Println(RatioUpperLower(`// A b c d e`))
Output:

0.25

func Reverse

func Reverse(input string) string

Reverse the string.

func SingleSpace

func SingleSpace(in string) string

SingleSpace convert all sequences of white spaces into single space ' '.

func SortByIndex

func SortByIndex(ss *[]string, sortedListID []int)

SortByIndex will sort the slice of string in place using list of index.

Example
var (
	dat = []string{`Z`, `X`, `C`, `V`, `B`, `N`, `M`}
	ids = []int{4, 2, 6, 5, 3, 1, 0}
)

fmt.Println(dat)
SortByIndex(&dat, ids)
fmt.Println(dat)
Output:

[Z X C V B N M]
[B C M N V X Z]

func Split

func Split(text string, cleanit bool, uniq bool) (words []string)

Split given a text, return all words in text.

A word is any sequence of character which have length equal or greater than one and separated by white spaces.

If cleanit is true remove any non-alphanumeric in the start and the end of each words.

If uniq is true remove duplicate words, in case insensitive manner.

Example
var line = `a b   c [A] B C`
fmt.Println(Split(line, false, false))
fmt.Println(Split(line, true, false))
fmt.Println(Split(line, false, true))
fmt.Println(Split(line, true, true))
Output:

[a b c [A] B C]
[a b c A B C]
[a b c [A]]
[a b c]

func Swap

func Swap(ss []string, x, y int)

Swap two indices value of string. If x or y is less than zero, it will return unchanged slice. If x or y is greater than length of slice, it will return unchanged slice.

Example
var (
	ss = []string{`a`, `b`, `c`}
)

Swap(ss, -1, 1)
fmt.Println(ss)
Swap(ss, 1, -1)
fmt.Println(ss)
Swap(ss, 4, 1)
fmt.Println(ss)
Swap(ss, 1, 4)
fmt.Println(ss)
Swap(ss, 1, 2)
fmt.Println(ss)
Output:

[a b c]
[a b c]
[a b c]
[a b c]
[a c b]

func TextFrequencyOfTokens

func TextFrequencyOfTokens(text string, tokens []string, sensitive bool) (
	freq float64,
)

TextFrequencyOfTokens return frequencies of tokens by counting each occurrence of token and divide it with total words in text.

Example
var text = `a b c d A B C D 1 2`

fmt.Println(TextFrequencyOfTokens(text, []string{`a`}, false))
fmt.Println(TextFrequencyOfTokens(text, []string{`a`}, true))
Output:

0.2
0.1

func TextSumCountTokens

func TextSumCountTokens(text string, tokens []string, sensitive bool) (
	cnt int,
)

TextSumCountTokens given a text, count how many tokens inside of it and return sum of all counts.

Example
var (
	text   = `[[aa]] [[AA]]`
	tokens = []string{`[[`}
)

fmt.Println(TextSumCountTokens(text, tokens, false))

tokens = []string{`aa`}
fmt.Println(TextSumCountTokens(text, tokens, false))

fmt.Println(TextSumCountTokens(text, tokens, true))
Output:

2
2
1

func ToBytes

func ToBytes(ss []string) (sv [][]byte)

ToBytes convert slice of string into slice of slice of bytes.

Example
var (
	ss     = []string{`This`, `is`, `a`, `string`}
	sbytes = ToBytes(ss)
)

fmt.Printf(`%s`, sbytes)
Output:

[This is a string]

func ToFloat64

func ToFloat64(ss []string) (sv []float64)

ToFloat64 convert slice of string to slice of float64. If converted string return error it will set the float value to 0.

Example
var (
	in   = []string{`0`, `1.1`, `e`, `3`}
	sf64 = ToFloat64(in)
)

fmt.Println(sf64)
Output:

[0 1.1 0 3]

func ToInt64

func ToInt64(ss []string) (sv []int64)

ToInt64 convert slice of string to slice of int64. If converted string return an error it will set the integer value to 0.

Example
var (
	in   = []string{`0`, `1`, `e`, `3.3`}
	si64 = ToInt64(in)
)

fmt.Println(si64)
Output:

[0 1 0 3]

func ToStrings

func ToStrings(is []interface{}) (vs []string)

ToStrings convert slice of interface to slice of string.

Example
var (
	i64 = []interface{}{0, 1.99, 2, 3}
	ss  = ToStrings(i64)
)

fmt.Println(ss)
Output:

[0 1.99 2 3]

func TotalFrequencyOfTokens

func TotalFrequencyOfTokens(words, tokens []string, sensitive bool) float64

TotalFrequencyOfTokens return total frequency of list of token in words.

Example
var (
	words  = []string{`A`, `B`, `C`, `a`, `b`, `c`}
	tokens = []string{`A`, `B`}
)

fmt.Println(TotalFrequencyOfTokens(words, tokens, false))
fmt.Println(TotalFrequencyOfTokens(words, tokens, true))
Output:

0.6666666666666666
0.3333333333333333

func TrimNonAlnum

func TrimNonAlnum(text string) string

TrimNonAlnum remove non alpha-numeric character at the beginning and end of `text`.

Example
var (
	inputs = []string{
		`[[alpha]]`,
		`[[alpha`,
		`alpha]]`,
		`alpha`,
		`alpha0`,
		`1alpha`,
		`1alpha0`,
		`[a][b][c]`,
		`[][][]`,
	}
	in string
)

for _, in = range inputs {
	fmt.Println(TrimNonAlnum(in))
}
Output:

alpha
alpha
alpha
alpha
alpha0
1alpha
1alpha0
a][b][c

func Uniq

func Uniq(words []string, sensitive bool) (uniques []string)

Uniq remove duplicate string from `words`. It modify the content of slice in words by replacing duplicate word with empty string ("") and return only unique words. If sensitive is true then compare the string with case sensitive.

Example
var (
	words = []string{`a`, ``, `A`}
)

fmt.Printf("%s %s\n", Uniq(words, false), words)
words = []string{`a`, ``, `A`}
fmt.Printf("%s %s\n", Uniq(words, true), words)
Output:

[a] [a  ]
[a A] [a  A]

Types

type Parser

type Parser struct {
	// contains filtered or unexported fields
}

Parser implement text parsing over string.

func NewParser

func NewParser(content, delims string) (p *Parser)

NewParser create and initialize parser from content and delimiters.

Example
content := "[test]\nkey = value"
p := NewParser(content, `=[]`)

for {
	token, del := p.Read()
	token = strings.TrimSpace(token)
	fmt.Printf("%q %q\n", token, del)
	if del == 0 {
		break
	}
}
Output:

"" '['
"test" ']'
"key" '='
"value" '\x00'

func OpenForParser

func OpenForParser(file, delims string) (p *Parser, err error)

OpenForParser create and initialize the Parser using content from file. If delimiters is empty, it would default to all whitespaces characters.

func (*Parser) AddDelimiters

func (p *Parser) AddDelimiters(delims string)

AddDelimiters append new delimiter to existing parser.

func (*Parser) Close

func (p *Parser) Close()

Close the parser by resetting all its internal state to zero value.

func (*Parser) Line

func (p *Parser) Line() (string, rune)

Line read and return a single line. On success it will return a string without '\n' and new line character. In case of EOF it will return the last line and 0.

func (*Parser) Lines

func (p *Parser) Lines() []string

Lines return all non-empty lines from the content.

func (*Parser) Load

func (p *Parser) Load(content, delims string)

Load the new content and delimiters.

func (*Parser) Read

func (p *Parser) Read() (string, rune)

Read read the next token from content until one of the delimiter found. if no delimiter found, its mean all of content has been read, the returned delimiter will be 0.

func (*Parser) ReadEnclosed

func (p *Parser) ReadEnclosed(open, closed rune) (string, rune)

ReadEnclosed read the token inside opening and closing characters, ignoring all delimiters that previously set.

It will return the parsed token and closed character if closed character found, otherwise it will token with 0.

func (*Parser) ReadEscaped

func (p *Parser) ReadEscaped(esc rune) (string, rune)

ReadEscaped read the next token from content until one of the delimiter found, unless its escaped with value of esc character.

For example, if the content is "a b" and one of the delimiter is " ", escaping it with "\" will return as "a b" not "a".

func (*Parser) ReadNoSpace

func (p *Parser) ReadNoSpace() (v string, r rune)

ReadNoSpace read the next token until one of the delimiter found, with leading and trailing spaces are ignored.

Example
var (
	content = " 1 , \r\t\f, 2 , 3 , 4 , "
	p       = NewParser(content, `,`)

	tok string
	r   rune
)
for {
	tok, r = p.ReadNoSpace()
	fmt.Printf("%q\n", tok)
	if r == 0 {
		break
	}
}
Output:

"1"
""
"2"
"3"
"4"
""

func (*Parser) RemoveDelimiters

func (p *Parser) RemoveDelimiters(dels string)

RemoveDelimiters from current parser.

func (*Parser) SetDelimiters

func (p *Parser) SetDelimiters(delims string)

SetDelimiters replace the current delimiters with delims.

func (*Parser) Skip

func (p *Parser) Skip(n int)

Skip parsing n characters or EOF if n is greater then length of content.

func (*Parser) SkipHorizontalSpaces

func (p *Parser) SkipHorizontalSpaces() rune

SkipHorizontalSpaces skip all space (" "), tab ("\t"), carriage return ("\r"), and form feed ("\f") characters; and return the first character found, probably new line.

func (*Parser) SkipLine

func (p *Parser) SkipLine() rune

SkipLine skip all characters until new line. It will return the first character after new line or 0 if EOF.

func (*Parser) Stop

func (p *Parser) Stop() (remain string, pos int)

Stop the parser, return the remaining unparsed content and its last position, and then call Close to reset the internal state back to zero.

type Row

type Row [][]string

Row is simplified name for slice of slice of string.

func (Row) IsEqual

func (row Row) IsEqual(b Row) bool

IsEqual compare two row without regard to their order.

Return true if both contain the same list, false otherwise.

Example
var row = Row{{`a`}, {`b`, `c`}}
fmt.Println(row.IsEqual(Row{{`a`}, {`b`, `c`}}))
fmt.Println(row.IsEqual(Row{{`a`}, {`c`, `b`}}))
fmt.Println(row.IsEqual(Row{{`c`, `b`}, {`a`}}))
fmt.Println(row.IsEqual(Row{{`b`, `c`}, {`a`}}))
fmt.Println(row.IsEqual(Row{{`a`}, {`b`}}))
Output:

true
true
true
true
false

func (Row) Join

func (row Row) Join(lsep string, ssep string) (s string)

Join list of slice of string using `lsep` as separator between row items and `ssep` for element in each item.

Example
var row = Row{{`a`}, {`b`, `c`}}
fmt.Println(row.Join(`;`, `,`))

row = Row{{`a`}, {}}
fmt.Println(row.Join(`;`, `,`))
Output:

a;b,c
a;

type Table

type Table []Row

Table is for working with set of row.

Each element in table is in the form of

[
	[["a"],["b","c"],...], // Row
	[["x"],["y",z"],...]   // Row
]

func Partition

func Partition(ss []string, k int) (table Table)

Partition group the each element of slice "ss" into non-empty record, in such a way that every element is included in one and only of the record.

Given a list of element in "ss", and number of partition "k", return the set of all group of all elements without duplication.

Number of possible list can be computed using Stirling number of second kind.

For more information see,

Example
var ss = []string{`a`, `b`, `c`}

fmt.Println(`Partition k=1:`, Partition(ss, 1))
fmt.Println(`Partition k=2:`, Partition(ss, 2))
fmt.Println(`Partition k=3:`, Partition(ss, 3))
Output:

Partition k=1: [[[a b c]]]
Partition k=2: [[[b a] [c]] [[b] [c a]] [[b c] [a]]]
Partition k=3: [[[a] [b] [c]]]

func SinglePartition

func SinglePartition(ss []string) Table

SinglePartition create a table from a slice of string, where each element in slice become a single record.

Example
var ss = []string{`a`, `b`, `c`}
fmt.Println(SinglePartition(ss))
Output:

[[[a] [b] [c]]]

func (Table) IsEqual

func (table Table) IsEqual(other Table) bool

IsEqual compare two table of string without regard to their order.

Return true if both set is contains the same list, false otherwise.

Example
var table = Table{
	{{`a`}, {`b`, `c`}},
	{{`b`}, {`a`, `c`}},
	{{`c`}, {`a`, `b`}},
}
fmt.Println(table.IsEqual(table))

var other = Table{
	{{`c`}, {`a`, `b`}},
	{{`a`}, {`b`, `c`}},
	{{`b`}, {`a`, `c`}},
}
fmt.Println(table.IsEqual(other))

other = Table{
	{{`a`}, {`b`, `c`}},
	{{`b`}, {`a`, `c`}},
}
fmt.Println(table.IsEqual(other))
Output:

true
true
false

func (Table) JoinCombination

func (table Table) JoinCombination(s string) (tout Table)

JoinCombination for each row in table, generate new row and insert "s" into different record in different new row.

Example
var (
	table = Table{
		{{`a`}, {`b`}, {`c`}},
	}
	s = `X`
)

fmt.Println(table.JoinCombination(s))
Output:

[[[a X] [b] [c]] [[a] [b X] [c]] [[a] [b] [c X]]]

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL