Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var ProcessText model.ProcessFunc = func(c *config.Config, in io.ReadCloser) *model.Result { if c.Verbose { fmt.Println("parsing plain text...") } var docsCount int defer in.Close() buf := new(bytes.Buffer) _, _ = buf.ReadFrom(in) inStr := buf.String() lines := strings.FieldsFunc(inStr, func(r rune) bool { return r == '\n' }) if c.Verbose { fmt.Printf("got %d lines\n", len(lines)) } if len(lines) == 0 { return &model.Result{} } var lang string var reg *stopwords.Register tokenIndex := make(map[string]*model.Tag) tokens := make([]string, 0) for _, l := range lines { if c.StopWords == nil && l != "" { info := whatlanggo.Detect(l) lang = info.Lang.String() c.SetStopWords(info.Lang.Iso6391()) if c.Verbose { fmt.Printf("detected language: %s [%s] [%s]\n ", info.Lang.String(), info.Lang.Iso6391(), info.Lang.Iso6393()) } if c.NoStopWords { reg = c.StopWords } } sentences := util.SplitToSentences([]byte(l)) for _, s := range sentences { docsCount++ tokens = append(tokens, util.Sanitize(bytes.Fields(s), reg)...) visited := map[string]bool{} for _, token := range tokens { visited[token] = true item, ok := tokenIndex[token] if !ok { item = &model.Tag{Value: token} tokenIndex[token] = item } item.Score++ item.Count++ } for token := range visited { tokenIndex[token].Docs++ } } } for _, v := range tokenIndex { v.DocsCount = docsCount } return &model.Result{ RawTags: tokenIndex, Meta: &model.Meta{ ContentType: config.Text, DocHash: fmt.Sprintf("%x", hashTokens(tokens)), Lang: lang, }, } }
ProcessText parses given text lines of text into a slice of tags.
Functions ¶
This section is empty.
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.