model

package
v0.48.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 21, 2021 License: Apache-2.0 Imports: 7 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	// Verbose enables high verbosity.
	Verbose = func(verbose bool) ParseOption {
		return func(c *ParseConfig) {
			c.Verbose = verbose
		}
	}

	// NoStopWords enables stop-words exclusion from the output.
	NoStopWords = func(noStopWords bool) ParseOption {
		return func(c *ParseConfig) {
			c.NoStopWords = noStopWords
		}
	}

	// ContentOnly ignores all none content related parts of the HTML page (HTML only).
	ContentOnly = func(v bool) ParseOption {
		return func(c *ParseConfig) {
			c.ContentOnly = v
		}
	}

	// FullSite tells parser to process full site (HTML only).
	FullSite = func(v bool) ParseOption {
		return func(c *ParseConfig) {
			c.FullSite = v
		}
	}

	// Source of the parser.
	Source = func(v string) ParseOption {
		return func(c *ParseConfig) {
			c.Source = v
		}
	}

	TagWeightsString = func(v string) ParseOption {
		return func(c *ParseConfig) {
			c.TagWeights = ParseTagWeights(strings.NewReader(v), String)
		}
	}

	TagWeightsJSON = func(v string) ParseOption {
		return func(c *ParseConfig) {
			f, err := os.Open(v)
			if err != nil {
				println(fmt.Errorf("error: can't open JSON file [%s]: %w", v, err))
				return
			}
			r := bufio.NewReader(f)
			c.TagWeights = ParseTagWeights(r, JSON)
			f.Close()
		}
	}
)

Functions

func ToStrings

func ToStrings(items []*Tag) []string

ToStrings transforms list of given tags into a list of strings.

Types

type InputReader

type InputReader interface {
	ReadLines() ([]string, error)
	io.ReadCloser
}

InputReader ...

type ParseConfig

type ParseConfig struct {
	Verbose     bool
	NoStopWords bool
	ContentOnly bool
	FullSite    bool
	Source      string
	TagWeights
}

type ParseFunc

type ParseFunc func(reader io.ReadCloser, options ...ParseOption) *ParseOutput

ParseFunc represents an arbitrary handler, which goes through given reader and produces tags.

type ParseOption

type ParseOption func(*ParseConfig)

ParseOption allows to customise `Tagger` configuration.

type ParseOutput

type ParseOutput struct {
	Tags     map[string]*Tag
	DocTitle string
	DocHash  []byte
	Err      error
}

ParseOutput is a result of the `ParseFunc`.

func (*ParseOutput) FlatTags

func (po *ParseOutput) FlatTags() []*Tag

FlatTags transforms internal token register into a slice.

type Tag

type Tag struct {
	// Value of the tag, i.e. a word
	Value string
	// Score used to represent importance of the tag
	Score float64
	// Count is the number of times tag appeared in a text
	Count int
	// Docs is the number of documents in a text in which the tag appeared
	Docs int
	// DocsCount is the number of documents in a text
	DocsCount int
}

Tag holds some arbitrary string value (e.g. a word) along with some extra data about it.

func (*Tag) String

func (t *Tag) String() string

type TagWeights

type TagWeights map[string]float64

TagWeights ...

func ParseTagWeights

func ParseTagWeights(reader io.Reader, readerType TagWeightsType) TagWeights

type TagWeightsType

type TagWeightsType byte

TagWeightsType ...

const (
	String TagWeightsType = iota // <tagName1>:<tagScore1>|<tagName2>:<tagScore2>
	JSON                         // { "<tagName1>": <tagScore1>, "<tagName2>": <tagScore2> }
)

Wight input types

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL