Documentation ¶
Overview ¶
Package strings provide a library for working with string or slice of strings.
Index ¶
- func Alnum(text string, withSpace bool) string
- func AppendUniq(in []string, vals ...string) []string
- func CleanURI(text string) string
- func CleanWikiMarkup(text string) string
- func CountAlnum(text string) (n int)
- func CountAlnumDistribution(text string) (chars []rune, counts []int)
- func CountCharSequence(text string) (chars []rune, counts []int)
- func CountDigit(text string) (n int)
- func CountMissRate(src []string, target []string) (missrate float64, nmiss, length int)
- func CountNonAlnum(text string, withspace bool) (n int)
- func CountToken(words []string, token string, sensitive bool) int
- func CountTokens(words []string, tokens []string, sensitive bool) []int
- func CountUniqChar(text string) (n int)
- func CountUpperLower(text string) (upper, lower int)
- func Delete(in []string, value string) (out []string, ok bool)
- func FrequencyOfToken(words []string, token string, sensitive bool) float64
- func FrequencyOfTokens(words, tokens []string, sensitive bool) (probs []float64)
- func IsContain(ss []string, el string) bool
- func IsEqual(a, b []string) bool
- func LinesOfFile(file string) ([]string, error)
- func Longest(words []string) (string, int)
- func MaxCharSequence(text string) (rune, int)
- func MergeSpaces(text string, withline bool) string
- func MostFrequentTokens(words []string, tokens []string, sensitive bool) string
- func RatioAlnum(text string) float64
- func RatioDigit(text string) float64
- func RatioNonAlnum(text string, withspace bool) float64
- func RatioUpper(text string) float64
- func RatioUpperLower(text string) float64
- func Reverse(input string) string
- func SingleSpace(in string) string
- func SortByIndex(ss *[]string, sortedListID []int)
- func Split(text string, cleanit bool, uniq bool) (words []string)
- func Swap(ss []string, x, y int)
- func TextFrequencyOfTokens(text string, tokens []string, sensitive bool) (freq float64)
- func TextSumCountTokens(text string, tokens []string, sensitive bool) (cnt int)
- func ToBytes(ss []string) (sv [][]byte)
- func ToFloat64(ss []string) (sv []float64)
- func ToInt64(ss []string) (sv []int64)
- func ToStrings(is []interface{}) (vs []string)
- func TotalFrequencyOfTokens(words, tokens []string, sensitive bool) float64
- func TrimNonAlnum(text string) string
- func Uniq(words []string, sensitive bool) (uniques []string)
- type Parser
- func (p *Parser) AddDelimiters(delims string)
- func (p *Parser) Close()
- func (p *Parser) Line() (string, rune)
- func (p *Parser) Lines() []string
- func (p *Parser) Load(content, delims string)
- func (p *Parser) Read() (string, rune)
- func (p *Parser) ReadEnclosed(open, closed rune) (string, rune)
- func (p *Parser) ReadEscaped(esc rune) (string, rune)
- func (p *Parser) ReadNoSpace() (v string, r rune)
- func (p *Parser) RemoveDelimiters(dels string)
- func (p *Parser) SetDelimiters(delims string)
- func (p *Parser) Skip(n int)
- func (p *Parser) SkipHorizontalSpaces() rune
- func (p *Parser) SkipLine() rune
- func (p *Parser) Stop() (remain string, pos int)
- type Row
- type Table
Examples ¶
- Alnum
- AppendUniq
- CleanURI
- CleanWikiMarkup
- CountAlnum
- CountAlnumDistribution
- CountCharSequence
- CountDigit
- CountMissRate
- CountToken
- CountTokens
- CountUniqChar
- CountUpperLower
- FrequencyOfToken
- FrequencyOfTokens
- IsEqual
- Longest
- MaxCharSequence
- MergeSpaces
- MostFrequentTokens
- NewParser
- Parser.ReadNoSpace
- Partition
- RatioAlnum
- RatioDigit
- RatioNonAlnum
- RatioUpper
- RatioUpperLower
- Row.IsEqual
- Row.Join
- SinglePartition
- SortByIndex
- Split
- Swap
- Table.IsEqual
- Table.JoinCombination
- TextFrequencyOfTokens
- TextSumCountTokens
- ToBytes
- ToFloat64
- ToInt64
- ToStrings
- TotalFrequencyOfTokens
- TrimNonAlnum
- Uniq
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func Alnum ¶
Alnum remove non alpha-numeric character from text and return it. If withSpace is true then white space is allowed, otherwise it would also be removed from text.
Example ¶
type testCase struct { text string withSpace bool } var cases = []testCase{ {`A, b.c`, false}, {`A, b.c`, true}, {`A1 b`, false}, {`A1 b`, true}, } var ( c testCase ) for _, c = range cases { fmt.Println(Alnum(c.text, c.withSpace)) }
Output: Abc A bc A1b A1 b
func AppendUniq ¶
AppendUniq append case-insensitive strings to slice of input without duplicate.
Example ¶
var ( in = []string{`a`, ``, `b`, `c`} vals = []string{`b`, ``, `C`, `d`} ) in = AppendUniq(in, vals...) fmt.Println(in)
Output: [a b c C d]
func CleanURI ¶
CleanURI remove known links from text and return it. This function assume that space in URI is using '%20' not literal space, as in ' '.
List of known links: http, https, ftp, ftps, ssh, file, rsync, and ws.
Example ¶
var text = `You can visit ftp://hostname or https://hostname/link%202 for more information` fmt.Println(CleanURI(text))
Output: You can visit or for more information
func CleanWikiMarkup ¶
CleanWikiMarkup remove wiki markup from text.
List of known wiki markups,
- [[Category: ... ]]
- [[:Category: ... ]]
- [[File: ... ]]
- [[Help: ... ]]
- [[Image: ... ]]
- [[Special: ... ]]
- [[Wikipedia: ... ]]
- {{DEFAULTSORT: ... }}
- {{Template: ... }}
- <ref ... />
Example ¶
var text = `* Test image [[Image:fileto.png]].` fmt.Println(CleanWikiMarkup(text))
Output: * Test image .
func CountAlnum ¶
CountAlnum return number of alpha-numeric character in text.
Example ¶
fmt.Println(CountAlnum(`// A b c 1 2 3`))
Output: 6
func CountAlnumDistribution ¶
CountAlnumDistribution count distribution of alpha-numeric characters in text.
Example, given a text "abbcccddddeeeee", it will return [a b c d e] and [1 2 3 4 5].
Example ¶
var ( chars []rune counts []int ) chars, counts = CountAlnumDistribution(`// A b c A b`) fmt.Printf(`%c %v`, chars, counts)
Output: [A b c] [2 2 1]
func CountCharSequence ¶
CountCharSequence given a string, count number of repeated character more than one in sequence and return list of characters and their counts.
Example ¶
var ( text = `aaa abcdee ffgf` chars []rune counts []int ) chars, counts = CountCharSequence(text) // 'a' is not counted as 4 because its breaked by another character, // space ' '. fmt.Printf(`%c %v`, chars, counts)
Output: [a e f] [3 2 2]
func CountDigit ¶
CountDigit return number of digit in text.
Example ¶
var text = `// Copyright 2018 Mhd Sulhan <ms@kilabit.info>. All rights reserved.` fmt.Println(CountDigit(text))
Output: 4
func CountMissRate ¶
CountMissRate given two slice of string, count number of string that is not equal with each other, and return the miss rate as
number of not equal / number of data
and count of missing, and length of input `src`.
Example ¶
var ( src = []string{`A`, `B`, `C`, `D`} tgt = []string{`A`, `B`, `C`, `D`} ) fmt.Println(CountMissRate(src, tgt)) src = []string{`A`, `B`, `C`, `D`} tgt = []string{`B`, `B`, `C`, `D`} fmt.Println(CountMissRate(src, tgt)) src = []string{`A`, `B`, `C`, `D`} tgt = []string{`B`, `C`, `C`, `D`} fmt.Println(CountMissRate(src, tgt)) src = []string{`A`, `B`, `C`, `D`} tgt = []string{`B`, `C`, `D`, `D`} fmt.Println(CountMissRate(src, tgt)) src = []string{`A`, `B`, `C`, `D`} tgt = []string{`C`, `D`, `D`, `E`} fmt.Println(CountMissRate(src, tgt))
Output: 0 0 4 0.25 1 4 0.5 2 4 0.75 3 4 1 4 4
func CountNonAlnum ¶
CountNonAlnum return number of non alpha-numeric character in text. If `withspace` is true, it will be counted as non-alpha-numeric, if it false it will be ignored.
func CountToken ¶
CountToken will return number of token occurrence in words.
Example ¶
var ( words = []string{`A`, `B`, `C`, `a`, `b`, `c`} ) fmt.Println(CountToken(words, `C`, false)) fmt.Println(CountToken(words, `C`, true))
Output: 2 1
func CountTokens ¶
CountTokens count number of occurrence of each `tokens` values in words. Return number of each tokens based on their index.
Example ¶
var ( words = []string{`A`, `B`, `C`, `a`, `b`, `c`} tokens = []string{`A`, `B`} ) fmt.Println(CountTokens(words, tokens, false)) fmt.Println(CountTokens(words, tokens, true))
Output: [2 2] [1 1]
func CountUniqChar ¶
CountUniqChar count number of character in text without duplication.
Example ¶
fmt.Println(CountUniqChar(`abc abc`)) fmt.Println(CountUniqChar(`abc ABC`))
Output: 4 7
func CountUpperLower ¶
CountUpperLower return number of uppercase and lowercase in text.
Example ¶
fmt.Println(CountUpperLower(`// A B C d e f g h I J K`))
Output: 6 5
func Delete ¶
Delete the first item that match with value while still preserving the order. It will return true if there is an item being deleted on slice, otherwise it will return false.
func FrequencyOfToken ¶
FrequencyOfToken return frequency of token in words using
count-of-token / total-words
Example ¶
var ( words = []string{`A`, `B`, `C`, `a`, `b`, `c`} ) fmt.Println(FrequencyOfToken(words, `C`, false)) fmt.Println(FrequencyOfToken(words, `C`, true))
Output: 0.3333333333333333 0.16666666666666666
func FrequencyOfTokens ¶
FrequencyOfTokens will compute each frequency of token in words.
Example ¶
var ( words = []string{`A`, `B`, `C`, `a`, `b`, `c`} tokens = []string{`A`, `B`} ) fmt.Println(FrequencyOfTokens(words, tokens, false)) fmt.Println(FrequencyOfTokens(words, tokens, true))
Output: [0.3333333333333333 0.3333333333333333] [0.16666666666666666 0.16666666666666666]
func IsContain ¶
IsContain return true if elemen `el` is in slice of string `ss`, otherwise return false.
func IsEqual ¶
IsEqual compare elements of two slice of string without regard to their order.
Return true if each both slice have the same elements, false otherwise.
Example ¶
fmt.Println(IsEqual([]string{`a`, `b`}, []string{`a`, `b`})) fmt.Println(IsEqual([]string{`a`, `b`}, []string{`b`, `a`})) fmt.Println(IsEqual([]string{`a`, `b`}, []string{`a`})) fmt.Println(IsEqual([]string{`a`, `b`}, []string{`b`, `b`}))
Output: true true false false
func LinesOfFile ¶
LinesOfFile parse the content of file and return non-empty lines.
func Longest ¶
Longest find the longest word in words and return their value and index.
If words is empty return nil string with negative (-1) index.
Example ¶
var ( words = []string{`a`, `bb`, `ccc`, `d`, `eee`} ) fmt.Println(Longest(words))
Output: ccc 2
func MaxCharSequence ¶
MaxCharSequence return character which have maximum sequence in `text`.
Example ¶
var ( c rune n int ) c, n = MaxCharSequence(`aaa abcdee ffgf`) fmt.Printf(`%c %d`, c, n)
Output: a 3
func MergeSpaces ¶
MergeSpaces replace two or more horizontal spaces (' ', '\t', '\v', '\f', '\r') with single space. If withline is true it also replace two or more new lines with single new-line.
Example ¶
var line = " a\n\nb c d\n\n" fmt.Printf("Without merging newline: '%s'\n", MergeSpaces(line, false)) fmt.Printf("With merging newline: '%s'\n", MergeSpaces(line, true))
Output: Without merging newline: ' a b c d ' With merging newline: ' a b c d '
func MostFrequentTokens ¶
MostFrequentTokens return the token that has highest frequency in words.
For example, given input
words: [A A B A B C C] tokens: [A B]
it will return A as the majority tokens in words. If tokens has equal frequency, then the first token in order will returned.
Example ¶
var ( words = []string{`a`, `b`, `B`, `B`, `a`} tokens = []string{`a`, `b`} ) fmt.Println(MostFrequentTokens(words, tokens, false)) fmt.Println(MostFrequentTokens(words, tokens, true))
Output: b a
func RatioAlnum ¶
RatioAlnum compute and return ratio of alpha-numeric within all character in text.
Example ¶
fmt.Println(RatioAlnum(`//A1`))
Output: 0.5
func RatioDigit ¶
RatioDigit compute and return digit ratio to all characters in text.
Example ¶
fmt.Println(RatioDigit(`// A b 0 1`))
Output: 0.2
func RatioNonAlnum ¶
RatioNonAlnum return ratio of non-alphanumeric character to all character in text.
If `withspace` is true then white-space character will be counted as non-alpha numeric, otherwise it will be skipped.
Example ¶
fmt.Println(RatioNonAlnum(`// A1`, false)) fmt.Println(RatioNonAlnum(`// A1`, true))
Output: 0.4 0.6
func RatioUpper ¶
RatioUpper compute and return ratio of uppercase character to all character in text.
Example ¶
fmt.Println(RatioUpper(`// A b c d`))
Output: 0.25
func RatioUpperLower ¶
RatioUpperLower compute and return ratio of uppercase with lowercase character in text.
Example ¶
fmt.Println(RatioUpperLower(`// A b c d e`))
Output: 0.25
func SingleSpace ¶
SingleSpace convert all sequences of white spaces into single space ' '.
func SortByIndex ¶
SortByIndex will sort the slice of string in place using list of index.
Example ¶
var ( dat = []string{`Z`, `X`, `C`, `V`, `B`, `N`, `M`} ids = []int{4, 2, 6, 5, 3, 1, 0} ) fmt.Println(dat) SortByIndex(&dat, ids) fmt.Println(dat)
Output: [Z X C V B N M] [B C M N V X Z]
func Split ¶
Split given a text, return all words in text.
A word is any sequence of character which have length equal or greater than one and separated by white spaces.
If cleanit is true remove any non-alphanumeric in the start and the end of each words.
If uniq is true remove duplicate words, in case insensitive manner.
Example ¶
var line = `a b c [A] B C` fmt.Println(Split(line, false, false)) fmt.Println(Split(line, true, false)) fmt.Println(Split(line, false, true)) fmt.Println(Split(line, true, true))
Output: [a b c [A] B C] [a b c A B C] [a b c [A]] [a b c]
func Swap ¶
Swap two indices value of string. If x or y is less than zero, it will return unchanged slice. If x or y is greater than length of slice, it will return unchanged slice.
Example ¶
var ( ss = []string{`a`, `b`, `c`} ) Swap(ss, -1, 1) fmt.Println(ss) Swap(ss, 1, -1) fmt.Println(ss) Swap(ss, 4, 1) fmt.Println(ss) Swap(ss, 1, 4) fmt.Println(ss) Swap(ss, 1, 2) fmt.Println(ss)
Output: [a b c] [a b c] [a b c] [a b c] [a c b]
func TextFrequencyOfTokens ¶
TextFrequencyOfTokens return frequencies of tokens by counting each occurrence of token and divide it with total words in text.
Example ¶
var text = `a b c d A B C D 1 2` fmt.Println(TextFrequencyOfTokens(text, []string{`a`}, false)) fmt.Println(TextFrequencyOfTokens(text, []string{`a`}, true))
Output: 0.2 0.1
func TextSumCountTokens ¶
TextSumCountTokens given a text, count how many tokens inside of it and return sum of all counts.
Example ¶
var ( text = `[[aa]] [[AA]]` tokens = []string{`[[`} ) fmt.Println(TextSumCountTokens(text, tokens, false)) tokens = []string{`aa`} fmt.Println(TextSumCountTokens(text, tokens, false)) fmt.Println(TextSumCountTokens(text, tokens, true))
Output: 2 2 1
func ToBytes ¶
ToBytes convert slice of string into slice of slice of bytes.
Example ¶
var ( ss = []string{`This`, `is`, `a`, `string`} sbytes = ToBytes(ss) ) fmt.Printf(`%s`, sbytes)
Output: [This is a string]
func ToFloat64 ¶
ToFloat64 convert slice of string to slice of float64. If converted string return error it will set the float value to 0.
Example ¶
var ( in = []string{`0`, `1.1`, `e`, `3`} sf64 = ToFloat64(in) ) fmt.Println(sf64)
Output: [0 1.1 0 3]
func ToInt64 ¶
ToInt64 convert slice of string to slice of int64. If converted string return an error it will set the integer value to 0.
Example ¶
var ( in = []string{`0`, `1`, `e`, `3.3`} si64 = ToInt64(in) ) fmt.Println(si64)
Output: [0 1 0 3]
func ToStrings ¶
func ToStrings(is []interface{}) (vs []string)
ToStrings convert slice of interface to slice of string.
Example ¶
var ( i64 = []interface{}{0, 1.99, 2, 3} ss = ToStrings(i64) ) fmt.Println(ss)
Output: [0 1.99 2 3]
func TotalFrequencyOfTokens ¶
TotalFrequencyOfTokens return total frequency of list of token in words.
Example ¶
var ( words = []string{`A`, `B`, `C`, `a`, `b`, `c`} tokens = []string{`A`, `B`} ) fmt.Println(TotalFrequencyOfTokens(words, tokens, false)) fmt.Println(TotalFrequencyOfTokens(words, tokens, true))
Output: 0.6666666666666666 0.3333333333333333
func TrimNonAlnum ¶
TrimNonAlnum remove non alpha-numeric character at the beginning and end of `text`.
Example ¶
var ( inputs = []string{ `[[alpha]]`, `[[alpha`, `alpha]]`, `alpha`, `alpha0`, `1alpha`, `1alpha0`, `[a][b][c]`, `[][][]`, } in string ) for _, in = range inputs { fmt.Println(TrimNonAlnum(in)) }
Output: alpha alpha alpha alpha alpha0 1alpha 1alpha0 a][b][c
func Uniq ¶
Uniq remove duplicate string from `words`. It modify the content of slice in words by replacing duplicate word with empty string ("") and return only unique words. If sensitive is true then compare the string with case sensitive.
Example ¶
var ( words = []string{`a`, ``, `A`} ) fmt.Printf("%s %s\n", Uniq(words, false), words) words = []string{`a`, ``, `A`} fmt.Printf("%s %s\n", Uniq(words, true), words)
Output: [a] [a ] [a A] [a A]
Types ¶
type Parser ¶
type Parser struct {
// contains filtered or unexported fields
}
Parser implement text parsing over string.
func NewParser ¶
NewParser create and initialize parser from content and delimiters.
Example ¶
content := "[test]\nkey = value" p := NewParser(content, `=[]`) for { token, del := p.Read() token = strings.TrimSpace(token) fmt.Printf("%q %q\n", token, del) if del == 0 { break } }
Output: "" '[' "test" ']' "key" '=' "value" '\x00'
func OpenForParser ¶
OpenForParser create and initialize the Parser using content from file. If delimiters is empty, it would default to all whitespaces characters.
func (*Parser) AddDelimiters ¶
AddDelimiters append new delimiter to existing parser.
func (*Parser) Close ¶
func (p *Parser) Close()
Close the parser by resetting all its internal state to zero value.
func (*Parser) Line ¶
Line read and return a single line. On success it will return a string without '\n' and new line character. In case of EOF it will return the last line and 0.
func (*Parser) Read ¶
Read read the next token from content until one of the delimiter found. if no delimiter found, its mean all of content has been read, the returned delimiter will be 0.
func (*Parser) ReadEnclosed ¶
ReadEnclosed read the token inside opening and closing characters, ignoring all delimiters that previously set.
It will return the parsed token and closed character if closed character found, otherwise it will token with 0.
func (*Parser) ReadEscaped ¶
ReadEscaped read the next token from content until one of the delimiter found, unless its escaped with value of esc character.
For example, if the content is "a b" and one of the delimiter is " ", escaping it with "\" will return as "a b" not "a".
func (*Parser) ReadNoSpace ¶
ReadNoSpace read the next token until one of the delimiter found, with leading and trailing spaces are ignored.
Example ¶
var ( content = " 1 , \r\t\f, 2 , 3 , 4 , " p = NewParser(content, `,`) tok string r rune ) for { tok, r = p.ReadNoSpace() fmt.Printf("%q\n", tok) if r == 0 { break } }
Output: "1" "" "2" "3" "4" ""
func (*Parser) RemoveDelimiters ¶
RemoveDelimiters from current parser.
func (*Parser) SetDelimiters ¶
SetDelimiters replace the current delimiters with delims.
func (*Parser) SkipHorizontalSpaces ¶
SkipHorizontalSpaces skip all space (" "), tab ("\t"), carriage return ("\r"), and form feed ("\f") characters; and return the first character found, probably new line.
type Row ¶
type Row [][]string
Row is simplified name for slice of slice of string.
func (Row) IsEqual ¶
IsEqual compare two row without regard to their order.
Return true if both contain the same list, false otherwise.
Example ¶
var row = Row{{`a`}, {`b`, `c`}} fmt.Println(row.IsEqual(Row{{`a`}, {`b`, `c`}})) fmt.Println(row.IsEqual(Row{{`a`}, {`c`, `b`}})) fmt.Println(row.IsEqual(Row{{`c`, `b`}, {`a`}})) fmt.Println(row.IsEqual(Row{{`b`, `c`}, {`a`}})) fmt.Println(row.IsEqual(Row{{`a`}, {`b`}}))
Output: true true true true false
type Table ¶
type Table []Row
Table is for working with set of row.
Each element in table is in the form of
[ [["a"],["b","c"],...], // Row [["x"],["y",z"],...] // Row ]
func Partition ¶
Partition group the each element of slice "ss" into non-empty record, in such a way that every element is included in one and only of the record.
Given a list of element in "ss", and number of partition "k", return the set of all group of all elements without duplication.
Number of possible list can be computed using Stirling number of second kind.
For more information see,
Example ¶
var ss = []string{`a`, `b`, `c`} fmt.Println(`Partition k=1:`, Partition(ss, 1)) fmt.Println(`Partition k=2:`, Partition(ss, 2)) fmt.Println(`Partition k=3:`, Partition(ss, 3))
Output: Partition k=1: [[[a b c]]] Partition k=2: [[[b a] [c]] [[b] [c a]] [[b c] [a]]] Partition k=3: [[[a] [b] [c]]]
func SinglePartition ¶
SinglePartition create a table from a slice of string, where each element in slice become a single record.
Example ¶
var ss = []string{`a`, `b`, `c`} fmt.Println(SinglePartition(ss))
Output: [[[a] [b] [c]]]
func (Table) IsEqual ¶
IsEqual compare two table of string without regard to their order.
Return true if both set is contains the same list, false otherwise.
Example ¶
var table = Table{ {{`a`}, {`b`, `c`}}, {{`b`}, {`a`, `c`}}, {{`c`}, {`a`, `b`}}, } fmt.Println(table.IsEqual(table)) var other = Table{ {{`c`}, {`a`, `b`}}, {{`a`}, {`b`, `c`}}, {{`b`}, {`a`, `c`}}, } fmt.Println(table.IsEqual(other)) other = Table{ {{`a`}, {`b`, `c`}}, {{`b`}, {`a`, `c`}}, } fmt.Println(table.IsEqual(other))
Output: true true false
func (Table) JoinCombination ¶
JoinCombination for each row in table, generate new row and insert "s" into different record in different new row.
Example ¶
var ( table = Table{ {{`a`}, {`b`}, {`c`}}, } s = `X` ) fmt.Println(table.JoinCombination(s))
Output: [[[a X] [b] [c]] [[a] [b X] [c]] [[a] [b] [c X]]]