strings

package
v0.26.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 3, 2021 License: BSD-3-Clause Imports: 7 Imported by: 2

Documentation

Overview

Package strings provide a library for working with string or slice of strings.

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func AppendUniq added in v0.7.0

func AppendUniq(in []string, vals ...string) []string

AppendUniq append case-insensitive strings to slice of input without duplicate.

Example
in := []string{"a", "", "b", "c"}
vals := []string{"b", "", "C", "d"}

in = AppendUniq(in, vals...)

fmt.Println(in)
Output:

[a  b c C d]

func CleanURI

func CleanURI(text string) string

CleanURI remove known links from text and return it. This function assume that space in URI is using '%20' not literal space, as in ' '.

List of known links: http, https, ftp, ftps, ssh, file, rsync, and ws.

Example
text := `You can visit ftp://hostname or https://hostname/link%202 for more information`

fmt.Printf("%s\n", CleanURI(text))
Output:

You can visit  or  for more information

func CleanWikiMarkup

func CleanWikiMarkup(text string) string

CleanWikiMarkup remove wiki markup from text.

List of known wiki markups,
- [[Category: ... ]]
- [[:Category: ... ]]
- [[File: ... ]]
- [[Help: ... ]]
- [[Image: ... ]]
- [[Special: ... ]]
- [[Wikipedia: ... ]]
- {{DEFAULTSORT: ... }}
- {{Template: ... }}
- <ref ... />
Example
text := `* Test image [[Image:fileto.png]].`

fmt.Printf("%s\n", CleanWikiMarkup(text))
Output:

* Test image .

func CountAlnum

func CountAlnum(text string) (n int)

CountAlnum return number of alpha-numeric character in text.

Example
fmt.Println(CountAlnum("// A b c 1 2 3"))
Output:

6

func CountAlnumDistribution

func CountAlnumDistribution(text string) (chars []rune, counts []int)

CountAlnumDistribution count distribution of alpha-numeric characters in text.

Example, given a text "abbcccddddeeeee", it will return [a b c d e] and [1 2 3 4 5].

Example
chars, counts := CountAlnumDistribution("// A b c A b")
fmt.Printf("%c %v\n", chars, counts)
Output:

[A b c] [2 2 1]

func CountCharSequence

func CountCharSequence(text string) (chars []rune, counts []int)

CountCharSequence given a string, count number of repeated character more than one in sequence and return list of characters and their counts.

Example
text := "aaa abcdee ffgf"

chars, counts := CountCharSequence(text)

// 'a' is not counted as 4 because its breaked by another character,
// space ' '.
fmt.Printf("%c %v\n", chars, counts)
Output:

[a e f] [3 2 2]

func CountDigit

func CountDigit(text string) (n int)

CountDigit return number of digit in text.

Example
text := "// Copyright 2018 Mhd Sulhan <ms@kilabit.info>. All rights reserved."
fmt.Println(CountDigit(text))
Output:

4

func CountMissRate

func CountMissRate(src []string, target []string) (
	missrate float64,
	nmiss, length int,
)

CountMissRate given two slice of string, count number of string that is not equal with each other, and return the miss rate as

number of not equal / number of data

and count of missing, and length of input `src`.

Example
src := []string{"A", "B", "C", "D"}
tgt := []string{"A", "B", "C", "D"}
fmt.Println(CountMissRate(src, tgt))

src = []string{"A", "B", "C", "D"}
tgt = []string{"B", "B", "C", "D"}
fmt.Println(CountMissRate(src, tgt))

src = []string{"A", "B", "C", "D"}
tgt = []string{"B", "C", "C", "D"}
fmt.Println(CountMissRate(src, tgt))

src = []string{"A", "B", "C", "D"}
tgt = []string{"B", "C", "D", "D"}
fmt.Println(CountMissRate(src, tgt))

src = []string{"A", "B", "C", "D"}
tgt = []string{"C", "D", "D", "E"}
fmt.Println(CountMissRate(src, tgt))
Output:

0 0 4
0.25 1 4
0.5 2 4
0.75 3 4
1 4 4

func CountNonAlnum

func CountNonAlnum(text string, withspace bool) (n int)

CountNonAlnum return number of non alpha-numeric character in text. If `withspace` is true, it will be counted as non-alpha-numeric, if it false it will be ignored.

func CountToken

func CountToken(words []string, token string, sensitive bool) int

CountToken will return number of token occurrence in words.

Example
words := []string{"A", "B", "C", "a", "b", "c"}
fmt.Println(CountToken(words, "C", false))
fmt.Println(CountToken(words, "C", true))
Output:

2
1

func CountTokens

func CountTokens(words []string, tokens []string, sensitive bool) []int

CountTokens count number of occurrence of each `tokens` values in words. Return number of each tokens based on their index.

Example
words := []string{"A", "B", "C", "a", "b", "c"}
tokens := []string{"A", "B"}
fmt.Println(CountTokens(words, tokens, false))
fmt.Println(CountTokens(words, tokens, true))
Output:

[2 2]
[1 1]

func CountUniqChar

func CountUniqChar(text string) (n int)

CountUniqChar count number of character in text without duplication.

Example
fmt.Println(CountUniqChar("abc abc"))
fmt.Println(CountUniqChar("abc ABC"))
Output:

4
7

func CountUpperLower

func CountUpperLower(text string) (upper, lower int)

CountUpperLower return number of uppercase and lowercase in text.

Example
fmt.Println(CountUpperLower("// A B C d e f g h I J K"))
Output:

6 5

func Delete added in v0.18.0

func Delete(in []string, value string) (out []string, ok bool)

Delete the first item that match with value while still preserving the order. It will return true if there is an item being deleted on slice, otherwise it will return false.

func FrequencyOfToken

func FrequencyOfToken(words []string, token string, sensitive bool) float64

FrequencyOfToken return frequency of token in words using

count-of-token / total-words
Example
words := []string{"A", "B", "C", "a", "b", "c"}
fmt.Println(FrequencyOfToken(words, "C", false))
fmt.Println(FrequencyOfToken(words, "C", true))
Output:

0.3333333333333333
0.16666666666666666

func FrequencyOfTokens

func FrequencyOfTokens(words, tokens []string, sensitive bool) (probs []float64)

FrequencyOfTokens will compute each frequency of token in words.

Example
words := []string{"A", "B", "C", "a", "b", "c"}
tokens := []string{"A", "B"}
fmt.Println(FrequencyOfTokens(words, tokens, false))
fmt.Println(FrequencyOfTokens(words, tokens, true))
Output:

[0.3333333333333333 0.3333333333333333]
[0.16666666666666666 0.16666666666666666]

func IsContain

func IsContain(ss []string, el string) bool

IsContain return true if elemen `el` is in slice of string `ss`, otherwise return false.

func IsEqual

func IsEqual(a, b []string) bool

IsEqual compare elements of two slice of string without regard to their order.

Return true if each both slice have the same elements, false otherwise.

Example
fmt.Println(IsEqual([]string{"a", "b"}, []string{"a", "b"}))
fmt.Println(IsEqual([]string{"a", "b"}, []string{"b", "a"}))
fmt.Println(IsEqual([]string{"a", "b"}, []string{"a"}))
fmt.Println(IsEqual([]string{"a", "b"}, []string{"b", "b"}))
Output:

true
true
false
false

func Longest

func Longest(words []string) (string, int)

Longest find the longest word in words and return their value and index.

If words is empty return nil string with negative (-1) index.

Example
words := []string{"a", "bb", "ccc", "d", "eee"}
fmt.Println(Longest(words))
Output:

ccc 2

func MaxCharSequence

func MaxCharSequence(text string) (rune, int)

MaxCharSequence return character which have maximum sequence in `text`.

Example
c, n := MaxCharSequence("aaa abcdee ffgf")

fmt.Printf("%c %d\n", c, n)
Output:

a 3

func MergeSpaces

func MergeSpaces(text string, withline bool) string

MergeSpaces replace two or more horizontal spaces (' ', '\t', '\v', '\f', '\r') with single space. If withline is true it also replace two or more new lines with single new-line.

Example
line := "   a\n\nb c   d\n\n"
fmt.Printf("Without merging newline: '%s'\n", MergeSpaces(line, false))
fmt.Printf("With merging newline: '%s'\n", MergeSpaces(line, true))
Output:

Without merging newline: ' a

b c d

'
With merging newline: ' a
b c d
'

func MostFrequentTokens

func MostFrequentTokens(words []string, tokens []string, sensitive bool) string

MostFrequentTokens return the token that has highest frequency in words.

For example, given input

words:  [A A B A B C C]
tokens: [A B]

it will return A as the majority tokens in words. If tokens has equal frequency, then the first token in order will returned.

Example
words := []string{"a", "b", "B", "B", "a"}
tokens := []string{"a", "b"}
fmt.Println(MostFrequentTokens(words, tokens, false))
fmt.Println(MostFrequentTokens(words, tokens, true))
Output:

b
a

func RatioAlnum

func RatioAlnum(text string) float64

RatioAlnum compute and return ratio of alpha-numeric within all character in text.

Example
fmt.Println(RatioAlnum("//A1"))
Output:

0.5

func RatioDigit

func RatioDigit(text string) float64

RatioDigit compute and return digit ratio to all characters in text.

Example
fmt.Println(RatioDigit("// A b 0 1"))
Output:

0.2

func RatioNonAlnum

func RatioNonAlnum(text string, withspace bool) float64

RatioNonAlnum return ratio of non-alphanumeric character to all character in text.

If `withspace` is true then white-space character will be counted as non-alpha numeric, otherwise it will be skipped.

Example
fmt.Println(RatioNonAlnum("// A1", false))
fmt.Println(RatioNonAlnum("// A1", true))
Output:

0.4
0.6

func RatioUpper

func RatioUpper(text string) float64

RatioUpper compute and return ratio of uppercase character to all character in text.

Example
fmt.Println(RatioUpper("// A b c d"))
Output:

0.25

func RatioUpperLower

func RatioUpperLower(text string) float64

RatioUpperLower compute and return ratio of uppercase with lowercase character in text.

Example
fmt.Println(RatioUpperLower("// A b c d e"))
Output:

0.25

func Reverse added in v0.12.0

func Reverse(input string) string

Reverse the string.

func SingleSpace added in v0.14.0

func SingleSpace(in string) string

SingleSpace convert all sequences of white spaces into single space ' '.

func SortByIndex

func SortByIndex(ss *[]string, sortedIds []int)

SortByIndex will sort the slice of string in place using list of index.

Example
dat := []string{"Z", "X", "C", "V", "B", "N", "M"}
ids := []int{4, 2, 6, 5, 3, 1, 0}

fmt.Println(dat)
SortByIndex(&dat, ids)
fmt.Println(dat)
Output:

[Z X C V B N M]
[B C M N V X Z]

func Split

func Split(text string, cleanit bool, uniq bool) (words []string)

Split given a text, return all words in text.

A word is any sequence of character which have length equal or greater than one and separated by white spaces.

If cleanit is true remove any non-alphanumeric in the start and the end of each words.

If uniq is true remove duplicate words, in case insensitive manner.

Example
line := `a b   c [A] B C`
fmt.Printf("%s\n", Split(line, false, false))
fmt.Printf("%s\n", Split(line, true, false))
fmt.Printf("%s\n", Split(line, false, true))
fmt.Printf("%s\n", Split(line, true, true))
Output:

[a b c [A] B C]
[a b c A B C]
[a b c [A]]
[a b c]

func Swap

func Swap(ss []string, x, y int)

Swap two indices value of string. If x or y is less than zero, it will return unchanged slice. If x or y is greater than length of slice, it will return unchanged slice.

Example
ss := []string{"a", "b", "c"}
Swap(ss, -1, 1)
fmt.Println(ss)
Swap(ss, 1, -1)
fmt.Println(ss)
Swap(ss, 4, 1)
fmt.Println(ss)
Swap(ss, 1, 4)
fmt.Println(ss)
Swap(ss, 1, 2)
fmt.Println(ss)
Output:

[a b c]
[a b c]
[a b c]
[a b c]
[a c b]

func TextFrequencyOfTokens

func TextFrequencyOfTokens(text string, tokens []string, sensitive bool) (
	freq float64,
)

TextFrequencyOfTokens return frequencies of tokens by counting each occurrence of token and divide it with total words in text.

Example
text := "a b c d A B C D 1 2"

fmt.Println(TextFrequencyOfTokens(text, []string{"a"}, false))
fmt.Println(TextFrequencyOfTokens(text, []string{"a"}, true))
Output:

0.2
0.1

func TextSumCountTokens

func TextSumCountTokens(text string, tokens []string, sensitive bool) (
	cnt int,
)

TextSumCountTokens given a text, count how many tokens inside of it and return sum of all counts.

Example
text := "[[aa]] [[AA]]"

tokens := []string{"[["}
fmt.Println(TextSumCountTokens(text, tokens, false))

tokens = []string{"aa"}
fmt.Println(TextSumCountTokens(text, tokens, false))

fmt.Println(TextSumCountTokens(text, tokens, true))
Output:

2
2
1

func ToBytes added in v0.10.1

func ToBytes(ss []string) (sv [][]byte)

ToBytes convert slice of string into slice of slice of bytes.

Example
ss := []string{"This", "is", "a", "string"}
fmt.Printf("%s\n", ToBytes(ss))
Output:

[This is a string]

func ToFloat64

func ToFloat64(ss []string) (sv []float64)

ToFloat64 convert slice of string to slice of float64. If converted string return error it will set the float value to 0.

Example
in := []string{"0", "1.1", "e", "3"}

fmt.Println(ToFloat64(in))
Output:

[0 1.1 0 3]

func ToInt64

func ToInt64(ss []string) (sv []int64)

ToInt64 convert slice of string to slice of int64. If converted string return an error it will set the integer value to 0.

Example
in := []string{"0", "1", "e", "3.3"}

fmt.Println(ToInt64(in))
Output:

[0 1 0 3]

func ToStrings

func ToStrings(is []interface{}) (vs []string)

ToStrings convert slice of interface to slice of string.

Example
i64 := []interface{}{0, 1.99, 2, 3}

fmt.Println(ToStrings(i64))
Output:

[0 1.99 2 3]

func TotalFrequencyOfTokens

func TotalFrequencyOfTokens(words, tokens []string, sensitive bool) float64

TotalFrequencyOfTokens return total frequency of list of token in words.

Example
words := []string{"A", "B", "C", "a", "b", "c"}
tokens := []string{"A", "B"}
fmt.Println(TotalFrequencyOfTokens(words, tokens, false))
fmt.Println(TotalFrequencyOfTokens(words, tokens, true))
Output:

0.6666666666666666
0.3333333333333333

func TrimNonAlnum

func TrimNonAlnum(text string) string

TrimNonAlnum remove non alpha-numeric character at the beginning and end for `text`.

Example
inputs := []string{
	"[[alpha]]",
	"[[alpha",
	"alpha]]",
	"alpha",
	"alpha0",
	"1alpha",
	"1alpha0",
	"[][][]",
}

for _, in := range inputs {
	fmt.Printf("'%s'\n", TrimNonAlnum(in))
}
Output:

'alpha'
'alpha'
'alpha'
'alpha'
'alpha0'
'1alpha'
'1alpha0'
''

func Uniq

func Uniq(words []string, sensitive bool) (uniques []string)

Uniq remove duplicate string from `words`. It modify the content of slice in words by replacing duplicate word with empty string ("") and return only unique words. If sensitive is true then compare the string with case sensitive.

Example
words := []string{"a", "", "A"}
fmt.Printf("%s %s\n", Uniq(words, false), words)
words = []string{"a", "", "A"}
fmt.Printf("%s %s\n", Uniq(words, true), words)
Output:

[a] [a  ]
[a A] [a  A]

Types

type Row

type Row [][]string

Row is simplified name for slice of slice of string.

func (Row) IsEqual

func (row Row) IsEqual(b Row) bool

IsEqual compare two row without regard to their order.

Return true if both contain the same list, false otherwise.

Example
row := Row{{"a"}, {"b", "c"}}
fmt.Println(row.IsEqual(Row{{"a"}, {"b", "c"}}))
fmt.Println(row.IsEqual(Row{{"a"}, {"c", "b"}}))
fmt.Println(row.IsEqual(Row{{"c", "b"}, {"a"}}))
fmt.Println(row.IsEqual(Row{{"b", "c"}, {"a"}}))
fmt.Println(row.IsEqual(Row{{"a"}, {"b"}}))
Output:

true
true
true
true
false

func (Row) Join

func (row Row) Join(lsep string, ssep string) (s string)

Join list of slice of string using `lsep` as separator between row items and `ssep` for element in each item.

Example
row := Row{{"a"}, {"b", "c"}}
fmt.Println(row.Join(";", ","))

row = Row{{"a"}, {}}
fmt.Println(row.Join(";", ","))
Output:

a;b,c
a;

type Table

type Table []Row

Table is for working with set of row.

Each element in table is in the form of

[
	[["a"],["b","c"],...], // Row
	[["x"],["y",z"],...]   // Row
]

func Partition

func Partition(ss []string, k int) (table Table)

Partition group the each element of slice "ss" into non-empty record, in such a way that every element is included in one and only of the record.

Given a list of element in "ss", and number of partition "k", return the set of all group of all elements without duplication.

Number of possible list can be computed using Stirling number of second kind.

For more information see,

Example
ss := []string{"a", "b", "c"}

fmt.Println("Partition k=1:", Partition(ss, 1))
fmt.Println("Partition k=2:", Partition(ss, 2))
fmt.Println("Partition k=3:", Partition(ss, 3))
Output:

Partition k=1: [[[a b c]]]
Partition k=2: [[[b a] [c]] [[b] [c a]] [[b c] [a]]]
Partition k=3: [[[a] [b] [c]]]

func SinglePartition

func SinglePartition(ss []string) Table

SinglePartition create a table from a slice of string, where each element in slice become a single record.

Example
ss := []string{"a", "b", "c"}
fmt.Println(SinglePartition(ss))
Output:

[[[a] [b] [c]]]

func (Table) IsEqual

func (table Table) IsEqual(other Table) bool

IsEqual compare two table of string without regard to their order.

Return true if both set is contains the same list, false otherwise.

Example
table := Table{
	{{"a"}, {"b", "c"}},
	{{"b"}, {"a", "c"}},
	{{"c"}, {"a", "b"}},
}
fmt.Println(table.IsEqual(table))

other := Table{
	{{"c"}, {"a", "b"}},
	{{"a"}, {"b", "c"}},
	{{"b"}, {"a", "c"}},
}
fmt.Println(table.IsEqual(other))

other = Table{
	{{"a"}, {"b", "c"}},
	{{"b"}, {"a", "c"}},
}
fmt.Println(table.IsEqual(other))
Output:

true
true
false

func (Table) JoinCombination

func (table Table) JoinCombination(s string) (tout Table)

JoinCombination for each row in table, generate new row and insert "s" into different record in different new row.

Example
table := Table{
	{{"a"}, {"b"}, {"c"}},
}
s := "X"

fmt.Println(table.JoinCombination(s))
Output:

[[[a X] [b] [c]] [[a] [b X] [c]] [[a] [b] [c X]]]

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL