text

package

v0.62.0 Latest Latest Go to latest Published: Mar 30, 2024 License: Apache-2.0 Imports: 8 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/zoomio/tagify

Links

Open Source Insights

Documentation ¶

Index ¶

Variables

Constants ¶

This section is empty.

Variables ¶

View Source

var ProcessText model.ProcessFunc = func(c *config.Config, in io.ReadCloser) *model.Result {

	if c.Verbose {
		fmt.Println("parsing plain text...")
	}

	var docsCount int

	defer in.Close()
	buf := new(bytes.Buffer)
	_, _ = buf.ReadFrom(in)
	inStr := buf.String()
	lines := strings.FieldsFunc(inStr, func(r rune) bool {
		return r == '\n'
	})

	if c.Verbose {
		fmt.Printf("got %d lines\n", len(lines))
	}

	if len(lines) == 0 {
		return &model.Result{}
	}

	tokenIndex := make(map[string]*model.Tag)
	tokens := make([]string, 0)
	for _, l := range lines {

		if !c.SkipLang && c.StopWords == nil && len(l) > 0 {
			config.DetectLang(c, l)
		}
		sentences := util.SplitToSentences([]byte(l))
		for _, s := range sentences {
			docsCount++
			tokens = append(tokens, util.SplitToTokens(s, c)...)
			visited := map[string]bool{}
			for _, token := range tokens {
				visited[token] = true
				item, ok := tokenIndex[token]
				if !ok {
					item = &model.Tag{Value: token}
					tokenIndex[token] = item
				}
				item.Score++
				item.Count++
			}

			for token := range visited {
				tokenIndex[token].Docs++
			}
		}
	}

	for _, v := range tokenIndex {
		v.DocsCount = docsCount
	}

	return &model.Result{
		RawTags: tokenIndex,
		Meta: &model.Meta{
			ContentType: config.Text,
			DocHash:     fmt.Sprintf("%x", hashTokens(tokens)),
			Lang:        c.Lang,
		},
	}
}

ProcessText parses given text lines of text into a slice of tags.

Functions ¶

This section is empty.

Types ¶

This section is empty.

Source Files ¶

View all Source files

text_processor.go

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL