Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var ProcessText model.ProcessFunc = func(c *config.Config, in io.ReadCloser) *model.Result { if c.Verbose { fmt.Println("parsing plain text...") } var docsCount int defer in.Close() buf := new(bytes.Buffer) _, _ = buf.ReadFrom(in) inStr := buf.String() lines := strings.FieldsFunc(inStr, func(r rune) bool { return r == '\n' }) if c.Verbose { fmt.Printf("got %d lines\n", len(lines)) } if len(lines) == 0 { return &model.Result{} } tokenIndex := make(map[string]*model.Tag) tokens := make([]string, 0) for _, l := range lines { if !c.SkipLang && c.StopWords == nil && len(l) > 0 { config.DetectLang(c, l) } sentences := util.SplitToSentences([]byte(l)) for _, s := range sentences { docsCount++ tokens = append(tokens, util.SplitToTokens(s, c)...) visited := map[string]bool{} for _, token := range tokens { visited[token] = true item, ok := tokenIndex[token] if !ok { item = &model.Tag{Value: token} tokenIndex[token] = item } item.Score++ item.Count++ } for token := range visited { tokenIndex[token].Docs++ } } } for _, v := range tokenIndex { v.DocsCount = docsCount } return &model.Result{ RawTags: tokenIndex, Meta: &model.Meta{ ContentType: config.Text, DocHash: fmt.Sprintf("%x", hashTokens(tokens)), Lang: c.Lang, }, } }
ProcessText parses given text lines of text into a slice of tags.
Functions ¶
This section is empty.
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.