Documentation ¶
Index ¶
- func CleanChineseText(text string) string
- func ProcessLine(r io.Reader, max int, threshold int, cleaner CorpusCleaner, ...) error
- func RemoveNonChinese(text string) string
- func RemoveUrl(text string) string
- type ChineseCorpusCleaner
- type CorpusCleaner
- type EnglishCorpusCleaner
- type MixCorpusCleaner
- type SegRequest
- type SegResponse
- type Token
- type Tokenizer
- type WordFreqResult
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func CleanChineseText ¶
func ProcessLine ¶
func RemoveNonChinese ¶
Types ¶
type ChineseCorpusCleaner ¶
type ChineseCorpusCleaner struct{}
func (*ChineseCorpusCleaner) DoClean ¶
func (self *ChineseCorpusCleaner) DoClean(text string) string
func (*ChineseCorpusCleaner) DoSplit ¶
func (self *ChineseCorpusCleaner) DoSplit(text string) []string
func (*ChineseCorpusCleaner) GetSeparator ¶
func (self *ChineseCorpusCleaner) GetSeparator() string
type CorpusCleaner ¶
type EnglishCorpusCleaner ¶
type EnglishCorpusCleaner struct{}
func (*EnglishCorpusCleaner) DoClean ¶
func (self *EnglishCorpusCleaner) DoClean(text string) string
TODO
func (*EnglishCorpusCleaner) DoSplit ¶
func (self *EnglishCorpusCleaner) DoSplit(text string) []string
func (*EnglishCorpusCleaner) GetSeparator ¶
func (self *EnglishCorpusCleaner) GetSeparator() string
type MixCorpusCleaner ¶
type MixCorpusCleaner struct{}
type SegRequest ¶
type SegResponse ¶
type SegResponse struct {
Result WordFreqResult `json:"result"`
}
type WordFreqResult ¶
type WordFreqResult [][]interface{}
func (WordFreqResult) Len ¶
func (self WordFreqResult) Len() int
func (WordFreqResult) Less ¶
func (self WordFreqResult) Less(i, j int) bool
func (WordFreqResult) Swap ¶
func (self WordFreqResult) Swap(i, j int)
Click to show internal directories.
Click to hide internal directories.