strings

package

v0.6.1 Latest Latest Go to latest Published: May 10, 2019 License: BSD-3-Clause Imports: 8 Imported by: 2

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/shuLhan/share

Links

Open Source Insights

Documentation ¶

Overview ¶

Package strings provide a library for working with string or slice of strings.

Index ¶

func CleanURI(text string) string
func CleanWikiMarkup(text string) string
func CountAlnum(text string) (n int)
func CountAlnumDistribution(text string) (chars []rune, counts []int)
func CountCharSequence(text string) (chars []rune, counts []int)
func CountDigit(text string) (n int)
func CountMissRate(src []string, target []string) (missrate float64, nmiss, length int)
func CountNonAlnum(text string, withspace bool) (n int)
func CountToken(words []string, token string, sensitive bool) int
func CountTokens(words []string, tokens []string, sensitive bool) []int
func CountUniqChar(text string) (n int)
func CountUpperLower(text string) (upper, lower int)
func FrequencyOfToken(words []string, token string, sensitive bool) float64
func FrequencyOfTokens(words, tokens []string, sensitive bool) (probs []float64)
func IsContain(ss []string, el string) bool
func IsEqual(a, b []string) bool
func JSONEscape(in string) string
func JSONUnescape(in string, strict bool) (string, error)
func Longest(words []string) (string, int)
func MaxCharSequence(text string) (rune, int)
func MergeSpaces(text string, withline bool) string
func MostFrequentTokens(words []string, tokens []string, sensitive bool) string
func RatioAlnum(text string) float64
func RatioDigit(text string) float64
func RatioNonAlnum(text string, withspace bool) float64
func RatioUpper(text string) float64
func RatioUpperLower(text string) float64
func SortByIndex(ss *[]string, sortedIds []int)
func Split(text string, cleanit bool, uniq bool) (words []string)
func Swap(ss []string, x, y int)
func TextFrequencyOfTokens(text string, tokens []string, sensitive bool) (freq float64)
func TextSumCountTokens(text string, tokens []string, sensitive bool) (cnt int)
func ToFloat64(ss []string) (sv []float64)
func ToInt64(ss []string) (sv []int64)
func ToStrings(is []interface{}) (vs []string)
func TotalFrequencyOfTokens(words, tokens []string, sensitive bool) float64
func TrimNonAlnum(text string) string
func Uniq(words []string, sensitive bool) (uniques []string)
type Row
- func (row Row) IsEqual(b Row) bool
- func (row Row) Join(lsep string, ssep string) (s string)
type Table
- func Partition(ss []string, k int) (table Table)
- func SinglePartition(ss []string) Table
- func (table Table) IsEqual(other Table) bool
- func (table Table) JoinCombination(s string) (tout Table)

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func CleanURI ¶

func CleanURI(text string) string

CleanURI remove known links from text and return it. This function assume that space in URI is using '%20' not literal space, as in ' '.

List of known links: http, https, ftp, ftps, ssh, file, rsync, and ws.

Example ¶

text := `You can visit ftp://hostname or https://hostname/link%202 for more information`

fmt.Printf("%s\n", CleanURI(text))

Output:

You can visit  or  for more information

func CleanWikiMarkup ¶

func CleanWikiMarkup(text string) string

CleanWikiMarkup remove wiki markup from text.

List of known wiki markups,
- [[Category: ... ]]
- [[:Category: ... ]]
- [[File: ... ]]
- [[Help: ... ]]
- [[Image: ... ]]
- [[Special: ... ]]
- [[Wikipedia: ... ]]
- {{DEFAULTSORT: ... }}
- {{Template: ... }}
- <ref ... />

Example ¶

text := `* Test image [[Image:fileto.png]].`

fmt.Printf("%s\n", CleanWikiMarkup(text))

Output:

* Test image .

func CountAlnum ¶

func CountAlnum(text string) (n int)

CountAlnum return number of alpha-numeric character in text.

Example ¶

fmt.Println(CountAlnum("// A b c 1 2 3"))

Output:

6

func CountAlnumDistribution ¶

func CountAlnumDistribution(text string) (chars []rune, counts []int)

CountAlnumDistribution count distribution of alpha-numeric characters in text.

Example, given a text "abbcccddddeeeee", it will return [a b c d e] and [1 2 3 4 5].

Example ¶

chars, counts := CountAlnumDistribution("// A b c A b")
fmt.Printf("%c %v\n", chars, counts)

Output:

[A b c] [2 2 1]

func CountCharSequence ¶

func CountCharSequence(text string) (chars []rune, counts []int)

CountCharSequence given a string, count number of repeated character more than one in sequence and return list of characters and their counts.

Example ¶

text := "aaa abcdee ffgf"

chars, counts := CountCharSequence(text)

// 'a' is not counted as 4 because its breaked by another character,
// space ' '.
fmt.Printf("%c %v\n", chars, counts)

Output:

[a e f] [3 2 2]

func CountDigit ¶

func CountDigit(text string) (n int)

CountDigit return number of digit in text.

Example ¶

text := "// Copyright 2018 Mhd Sulhan <ms@kilabit.info>. All rights reserved."
fmt.Println(CountDigit(text))

Output:

4

func CountMissRate ¶

func CountMissRate(src []string, target []string) (
	missrate float64,
	nmiss, length int,
)

CountMissRate given two slice of string, count number of string that is not equal with each other, and return the miss rate as

number of not equal / number of data

and count of missing, and length of input `src`.

Example ¶

src := []string{"A", "B", "C", "D"}
tgt := []string{"A", "B", "C", "D"}
fmt.Println(CountMissRate(src, tgt))

src = []string{"A", "B", "C", "D"}
tgt = []string{"B", "B", "C", "D"}
fmt.Println(CountMissRate(src, tgt))

src = []string{"A", "B", "C", "D"}
tgt = []string{"B", "C", "C", "D"}
fmt.Println(CountMissRate(src, tgt))

src = []string{"A", "B", "C", "D"}
tgt = []string{"B", "C", "D", "D"}
fmt.Println(CountMissRate(src, tgt))

src = []string{"A", "B", "C", "D"}
tgt = []string{"C", "D", "D", "E"}
fmt.Println(CountMissRate(src, tgt))

Output:

0 0 4
0.25 1 4
0.5 2 4
0.75 3 4
1 4 4

func CountNonAlnum ¶

func CountNonAlnum(text string, withspace bool) (n int)

CountNonAlnum return number of non alpha-numeric character in text. If `withspace` is true, it will be counted as non-alpha-numeric, if it false it will be ignored.

func CountToken ¶

func CountToken(words []string, token string, sensitive bool) int

CountToken will return number of token occurrence in words.

Example ¶

words := []string{"A", "B", "C", "a", "b", "c"}
fmt.Println(CountToken(words, "C", false))
fmt.Println(CountToken(words, "C", true))

Output:

2
1

func CountTokens ¶

func CountTokens(words []string, tokens []string, sensitive bool) []int

CountTokens count number of occurrence of each `tokens` values in words. Return number of each tokens based on their index.

Example ¶

words := []string{"A", "B", "C", "a", "b", "c"}
tokens := []string{"A", "B"}
fmt.Println(CountTokens(words, tokens, false))
fmt.Println(CountTokens(words, tokens, true))

Output:

[2 2]
[1 1]

func CountUniqChar ¶

func CountUniqChar(text string) (n int)

CountUniqChar count number of character in text without duplication.

Example ¶

fmt.Println(CountUniqChar("abc abc"))
fmt.Println(CountUniqChar("abc ABC"))

Output:

4
7

func CountUpperLower ¶

func CountUpperLower(text string) (upper, lower int)

CountUpperLower return number of uppercase and lowercase in text.

Example ¶

fmt.Println(CountUpperLower("// A B C d e f g h I J K"))

Output:

6 5

func FrequencyOfToken ¶

func FrequencyOfToken(words []string, token string, sensitive bool) float64

FrequencyOfToken return frequency of token in words using

count-of-token / total-words

Example ¶

words := []string{"A", "B", "C", "a", "b", "c"}
fmt.Println(FrequencyOfToken(words, "C", false))
fmt.Println(FrequencyOfToken(words, "C", true))

Output:

0.3333333333333333
0.16666666666666666

func FrequencyOfTokens ¶

func FrequencyOfTokens(words, tokens []string, sensitive bool) (probs []float64)

FrequencyOfTokens will compute each frequency of token in words.

Example ¶

words := []string{"A", "B", "C", "a", "b", "c"}
tokens := []string{"A", "B"}
fmt.Println(FrequencyOfTokens(words, tokens, false))
fmt.Println(FrequencyOfTokens(words, tokens, true))

Output:

[0.3333333333333333 0.3333333333333333]
[0.16666666666666666 0.16666666666666666]

func IsContain ¶

func IsContain(ss []string, el string) bool

IsContain return true if elemen `el` is in slice of string `ss`, otherwise return false.

func IsEqual ¶

func IsEqual(a, b []string) bool

IsEqual compare elements of two slice of string without regard to their order.

Return true if each both slice have the same elements, false otherwise.

Example ¶

fmt.Println(IsEqual([]string{"a", "b"}, []string{"a", "b"}))
fmt.Println(IsEqual([]string{"a", "b"}, []string{"b", "a"}))
fmt.Println(IsEqual([]string{"a", "b"}, []string{"a"}))
fmt.Println(IsEqual([]string{"a", "b"}, []string{"b", "b"}))

Output:

true
true
false
false

func JSONEscape ¶

func JSONEscape(in string) string

JSONEscape escape the following character: `"` (quotation mark), `\` (reverse solidus), `/` (solidus), `\b` (backspace), `\f` (formfeed), `\n` (newline), `\r` (carriage return`), `\t` (horizontal tab), and control character from 0 - 31.

References ¶

* https://tools.ietf.org/html/rfc7159#page-8

func JSONUnescape ¶

func JSONUnescape(in string, strict bool) (string, error)

JSONUnescape unescape JSON string, reversing what StringJSONEscape do.

If strict is true, any unknown control character will be returned as error. For example, in string "\x", "x" is not valid control character, and the function will return empty string and error. If strict is false, it will return "x".

func Longest ¶

func Longest(words []string) (string, int)

Longest find the longest word in words and return their value and index.

If words is empty return nil string with negative (-1) index.

Example ¶

words := []string{"a", "bb", "ccc", "d", "eee"}
fmt.Println(Longest(words))

Output:

ccc 2

func MaxCharSequence ¶

func MaxCharSequence(text string) (rune, int)

MaxCharSequence return character which have maximum sequence in `text`.

Example ¶

c, n := MaxCharSequence("aaa abcdee ffgf")

fmt.Printf("%c %d\n", c, n)

Output:

a 3

func MergeSpaces ¶

func MergeSpaces(text string, withline bool) string

MergeSpaces replace two or more spaces with single space. If withline is true it also replace two or more new lines with single new-line.

Example ¶

line := "   a\n\nb c   d\n\n"
fmt.Printf("Without merging newline: '%s'\n", MergeSpaces(line, false))
fmt.Printf("With merging newline: '%s'\n", MergeSpaces(line, true))

Output:

Without merging newline: ' a

b c d

'
With merging newline: ' a
b c d
'

func MostFrequentTokens ¶

func MostFrequentTokens(words []string, tokens []string, sensitive bool) string

MostFrequentTokens return the token that has highest frequency in words.

For example, given input

words:  [A A B A B C C]
tokens: [A B]

it will return A as the majority tokens in words. If tokens has equal frequency, then the first token in order will returned.

Example ¶

words := []string{"a", "b", "B", "B", "a"}
tokens := []string{"a", "b"}
fmt.Println(MostFrequentTokens(words, tokens, false))
fmt.Println(MostFrequentTokens(words, tokens, true))

Output:

b
a

func RatioAlnum ¶

func RatioAlnum(text string) float64

RatioAlnum compute and return ratio of alpha-numeric within all character in text.

Example ¶

fmt.Println(RatioAlnum("//A1"))

Output:

0.5

func RatioDigit ¶

func RatioDigit(text string) float64

RatioDigit compute and return digit ratio to all characters in text.

Example ¶

fmt.Println(RatioDigit("// A b 0 1"))

Output:

0.2

func RatioNonAlnum ¶

func RatioNonAlnum(text string, withspace bool) float64

RatioNonAlnum return ratio of non-alphanumeric character to all character in text.

If `withspace` is true then white-space character will be counted as non-alpha numeric, otherwise it will be skipped.

Example ¶

fmt.Println(RatioNonAlnum("// A1", false))
fmt.Println(RatioNonAlnum("// A1", true))

Output:

0.4
0.6

func RatioUpper ¶

func RatioUpper(text string) float64

RatioUpper compute and return ratio of uppercase character to all character in text.

Example ¶

fmt.Println(RatioUpper("// A b c d"))

Output:

0.25

func RatioUpperLower ¶

func RatioUpperLower(text string) float64

RatioUpperLower compute and return ratio of uppercase with lowercase character in text.

Example ¶

fmt.Println(RatioUpperLower("// A b c d e"))

Output:

0.25

func SortByIndex ¶

func SortByIndex(ss *[]string, sortedIds []int)

SortByIndex will sort the slice of string in place using list of index.

Example ¶

dat := []string{"Z", "X", "C", "V", "B", "N", "M"}
ids := []int{4, 2, 6, 5, 3, 1, 0}

fmt.Println(dat)
SortByIndex(&dat, ids)
fmt.Println(dat)

Output:

[Z X C V B N M]
[B C M N V X Z]

func Split ¶

func Split(text string, cleanit bool, uniq bool) (words []string)

Split given a text, return all words in text.

A word is any sequence of character which have length equal or greater than one and separated by white spaces.

If cleanit is true remove any non-alphanumeric in the start and the end of each words.

If uniq is true remove duplicate words.

Example ¶

line := `a b c [A] B C`
fmt.Printf("%s\n", Split(line, false, false))
fmt.Printf("%s\n", Split(line, true, false))
fmt.Printf("%s\n", Split(line, false, true))
fmt.Printf("%s\n", Split(line, true, true))

Output:

[a b c [A] B C]
[a b c A B C]
[a b c [A] B C]
[a b c]

func Swap ¶

func Swap(ss []string, x, y int)

Swap two indices value of string. If x or y is less than zero, it will return unchanged slice. If x or y is greater than length of slice, it will return unchanged slice.

Example ¶

ss := []string{"a", "b", "c"}
Swap(ss, -1, 1)
fmt.Println(ss)
Swap(ss, 1, -1)
fmt.Println(ss)
Swap(ss, 4, 1)
fmt.Println(ss)
Swap(ss, 1, 4)
fmt.Println(ss)
Swap(ss, 1, 2)
fmt.Println(ss)

Output:

[a b c]
[a b c]
[a b c]
[a b c]
[a c b]

func TextFrequencyOfTokens ¶

func TextFrequencyOfTokens(text string, tokens []string, sensitive bool) (
	freq float64,
)

TextFrequencyOfTokens return frequencies of tokens by counting each occurrence of token and divide it with total words in text.

Example ¶

text := "a b c d A B C D 1 2"

fmt.Println(TextFrequencyOfTokens(text, []string{"a"}, false))
fmt.Println(TextFrequencyOfTokens(text, []string{"a"}, true))

Output:

0.2
0.1

func TextSumCountTokens ¶

func TextSumCountTokens(text string, tokens []string, sensitive bool) (
	cnt int,
)

TextSumCountTokens given a text, count how many tokens inside of it and return sum of all counts.

Example ¶

text := "[[aa]] [[AA]]"

tokens := []string{"[["}
fmt.Println(TextSumCountTokens(text, tokens, false))

tokens = []string{"aa"}
fmt.Println(TextSumCountTokens(text, tokens, false))

fmt.Println(TextSumCountTokens(text, tokens, true))

Output:

2
2
1

func ToFloat64 ¶

func ToFloat64(ss []string) (sv []float64)

ToFloat64 convert slice of string to slice of float64. If converted string return error it will set the float value to 0.

Example ¶

in := []string{"0", "1.1", "e", "3"}

fmt.Println(ToFloat64(in))

Output:

[0 1.1 0 3]

func ToInt64 ¶

func ToInt64(ss []string) (sv []int64)

ToInt64 convert slice of string to slice of int64. If converted string return an error it will set the integer value to 0.

Example ¶

in := []string{"0", "1", "e", "3.3"}

fmt.Println(ToInt64(in))

Output:

[0 1 0 3]

func ToStrings ¶

func ToStrings(is []interface{}) (vs []string)

ToStrings convert slice of interface to slice of string.

Example ¶

i64 := []interface{}{0, 1.99, 2, 3}

fmt.Println(ToStrings(i64))

Output:

[0 1.99 2 3]

func TotalFrequencyOfTokens ¶

func TotalFrequencyOfTokens(words, tokens []string, sensitive bool) float64

TotalFrequencyOfTokens return total frequency of list of token in words.

Example ¶

words := []string{"A", "B", "C", "a", "b", "c"}
tokens := []string{"A", "B"}
fmt.Println(TotalFrequencyOfTokens(words, tokens, false))
fmt.Println(TotalFrequencyOfTokens(words, tokens, true))

Output:

0.6666666666666666
0.3333333333333333

func TrimNonAlnum ¶

func TrimNonAlnum(text string) string

TrimNonAlnum remove non alpha-numeric character at the beginning and end for `text`.

Example ¶

inputs := []string{
	"[[alpha]]",
	"[[alpha",
	"alpha]]",
	"alpha",
	"alpha0",
	"1alpha",
	"1alpha0",
	"[][][]",
}

for _, in := range inputs {
	fmt.Printf("'%s'\n", TrimNonAlnum(in))
}

Output:

'alpha'
'alpha'
'alpha'
'alpha'
'alpha0'
'1alpha'
'1alpha0'
''

func Uniq ¶

func Uniq(words []string, sensitive bool) (uniques []string)

Uniq remove duplicate string from `words`. It modify the content of slice in words by replacing duplicate word with empty string ("") and return only unique words. If sensitive is true then compare the string with case sensitive.

Example ¶

words := []string{"a", "", "A"}
fmt.Printf("%s %s\n", Uniq(words, false), words)
words = []string{"a", "", "A"}
fmt.Printf("%s %s\n", Uniq(words, true), words)

Output:

[a] [a  ]
[a A] [a  A]

Types ¶

type Row ¶

type Row [][]string

Row is simplified name for slice of slice of string.

func (Row) IsEqual ¶

func (row Row) IsEqual(b Row) bool

IsEqual compare two row without regard to their order.

Return true if both contain the same list, false otherwise.

Example ¶

row := Row{{"a"}, {"b", "c"}}
fmt.Println(row.IsEqual(Row{{"a"}, {"b", "c"}}))
fmt.Println(row.IsEqual(Row{{"a"}, {"c", "b"}}))
fmt.Println(row.IsEqual(Row{{"c", "b"}, {"a"}}))
fmt.Println(row.IsEqual(Row{{"b", "c"}, {"a"}}))
fmt.Println(row.IsEqual(Row{{"a"}, {"b"}}))

Output:

true
true
true
true
false

func (Row) Join ¶

func (row Row) Join(lsep string, ssep string) (s string)

Join list of slice of string using `lsep` as separator between row items and `ssep` for element in each item.

Example ¶

row := Row{{"a"}, {"b", "c"}}
fmt.Println(row.Join(";", ","))

row = Row{{"a"}, {}}
fmt.Println(row.Join(";", ","))

Output:

a;b,c
a;

type Table ¶

type Table []Row

Table is for working with set of row.

Each element in table is in the form of

[
	[["a"],["b","c"],...], // Row
	[["x"],["y",z"],...]   // Row
]

func Partition ¶

func Partition(ss []string, k int) (table Table)

Partition group the each element of slice "ss" into non-empty record, in such a way that every element is included in one and only of the record.

Given a list of element in "ss", and number of partition "k", return the set of all group of all elements without duplication.

Number of possible list can be computed using Stirling number of second kind.

For more information see,

https://en.wikipedia.org/wiki/Partition_of_a_set

Example ¶

ss := []string{"a", "b", "c"}

fmt.Println("Partition k=1:", Partition(ss, 1))
fmt.Println("Partition k=2:", Partition(ss, 2))
fmt.Println("Partition k=3:", Partition(ss, 3))

Output:

Partition k=1: [[[a b c]]]
Partition k=2: [[[b a] [c]] [[b] [c a]] [[b c] [a]]]
Partition k=3: [[[a] [b] [c]]]

func SinglePartition ¶

func SinglePartition(ss []string) Table

SinglePartition create a table from a slice of string, where each element in slice become a single record.

Example ¶

ss := []string{"a", "b", "c"}
fmt.Println(SinglePartition(ss))

Output:

[[[a] [b] [c]]]

func (Table) IsEqual ¶

func (table Table) IsEqual(other Table) bool

IsEqual compare two table of string without regard to their order.

Return true if both set is contains the same list, false otherwise.

Example ¶

table := Table{
	{{"a"}, {"b", "c"}},
	{{"b"}, {"a", "c"}},
	{{"c"}, {"a", "b"}},
}
fmt.Println(table.IsEqual(table))

other := Table{
	{{"c"}, {"a", "b"}},
	{{"a"}, {"b", "c"}},
	{{"b"}, {"a", "c"}},
}
fmt.Println(table.IsEqual(other))

other = Table{
	{{"a"}, {"b", "c"}},
	{{"b"}, {"a", "c"}},
}
fmt.Println(table.IsEqual(other))

Output:

true
true
false

func (Table) JoinCombination ¶

func (table Table) JoinCombination(s string) (tout Table)

JoinCombination for each row in table, generate new row and insert "s" into different record in different new row.

Example ¶

table := Table{
	{{"a"}, {"b"}, {"c"}},
}
s := "X"

fmt.Println(table.JoinCombination(s))

Output:

[[[a X] [b] [c]] [[a] [b X] [c]] [[a] [b] [c X]]]

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

Documentation ¶

Overview ¶

Index ¶

Examples ¶

Constants ¶

Variables ¶

Functions ¶

func CleanURI ¶

func CleanWikiMarkup ¶

func CountAlnum ¶

func CountAlnumDistribution ¶

func CountCharSequence ¶

func CountDigit ¶

func CountMissRate ¶

func CountNonAlnum ¶

func CountToken ¶

func CountTokens ¶

func CountUniqChar ¶

func CountUpperLower ¶

func FrequencyOfToken ¶

func FrequencyOfTokens ¶

func IsContain ¶

func IsEqual ¶

func JSONEscape ¶

References ¶

func JSONUnescape ¶

func Longest ¶

func MaxCharSequence ¶

func MergeSpaces ¶

func MostFrequentTokens ¶

func RatioAlnum ¶

func RatioDigit ¶

func RatioNonAlnum ¶

func RatioUpper ¶

func RatioUpperLower ¶

func SortByIndex ¶

func Split ¶

func Swap ¶

func TextFrequencyOfTokens ¶

func TextSumCountTokens ¶

func ToFloat64 ¶

func ToInt64 ¶

func ToStrings ¶

func TotalFrequencyOfTokens ¶

func TrimNonAlnum ¶

func Uniq ¶

Types ¶

type Row ¶

func (Row) IsEqual ¶

func (Row) Join ¶

type Table ¶

func Partition ¶

func SinglePartition ¶

func (Table) IsEqual ¶

func (Table) JoinCombination ¶

Source Files ¶