Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var ( // Verbose enables high verbosity. Verbose = func(verbose bool) ParseOption { return func(c *ParseConfig) { c.Verbose = verbose } } // NoStopWords enables stop-words exclusion from the output. NoStopWords = func(noStopWords bool) ParseOption { return func(c *ParseConfig) { c.NoStopWords = noStopWords } } // ContentOnly ignores all none content related parts of the HTML page (HTML only). ContentOnly = func(v bool) ParseOption { return func(c *ParseConfig) { c.ContentOnly = v } } // FullSite tells parser to process full site (HTML only). FullSite = func(v bool) ParseOption { return func(c *ParseConfig) { c.FullSite = v } } // Source of the parser. Source = func(v string) ParseOption { return func(c *ParseConfig) { c.Source = v } } TagWeightsString = func(v string) ParseOption { return func(c *ParseConfig) { c.TagWeights = ParseTagWeights(strings.NewReader(v), String) } } TagWeightsJSON = func(v string) ParseOption { return func(c *ParseConfig) { f, err := os.Open(v) if err != nil { println(fmt.Errorf("error: can't open JSON file [%s]: %w", v, err)) return } r := bufio.NewReader(f) c.TagWeights = ParseTagWeights(r, JSON) f.Close() } } )
Functions ¶
Types ¶
type InputReader ¶
type InputReader interface { ReadLines() ([]string, error) io.ReadCloser }
InputReader ...
type ParseConfig ¶
type ParseFunc ¶
type ParseFunc func(reader io.ReadCloser, options ...ParseOption) *ParseOutput
ParseFunc represents an arbitrary handler, which goes through given reader and produces tags.
type ParseOption ¶
type ParseOption func(*ParseConfig)
ParseOption allows to customise `Tagger` configuration.
type ParseOutput ¶
ParseOutput is a result of the `ParseFunc`.
func (*ParseOutput) FlatTags ¶
func (po *ParseOutput) FlatTags() []*Tag
FlatTags transforms internal token register into a slice.
type Tag ¶
type Tag struct { // Value of the tag, i.e. a word Value string // Score used to represent importance of the tag Score float64 // Count is the number of times tag appeared in a text Count int // Docs is the number of documents in a text in which the tag appeared Docs int // DocsCount is the number of documents in a text DocsCount int }
Tag holds some arbitrary string value (e.g. a word) along with some extra data about it.
type TagWeights ¶
TagWeights ...
func ParseTagWeights ¶
func ParseTagWeights(reader io.Reader, readerType TagWeightsType) TagWeights
type TagWeightsType ¶
type TagWeightsType byte
TagWeightsType ...
const ( String TagWeightsType = iota // <tagName1>:<tagScore1>|<tagName2>:<tagScore2> JSON // { "<tagName1>": <tagScore1>, "<tagName2>": <tagScore2> } )
Wight input types
Click to show internal directories.
Click to hide internal directories.