config

package
v0.59.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 21, 2022 License: Apache-2.0 Imports: 11 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	// Source sets target source.
	Source = func(v string) Option {
		return func(c *Config) {
			c.Source = v
		}
	}

	// Language ...
	Language = func(v string) Option {
		return func(c *Config) {
			c.Lang = v
		}
	}

	// Query sets CSS query for the target.
	Query = func(v string) Option {
		return func(c *Config) {
			c.Query = v
		}
	}

	// WaitFor sets CSS query for the target of In-Out.
	WaitFor = func(query string) Option {
		return func(c *Config) {
			c.WaitFor = query
		}
	}

	// WaitUntil sets page load duration to wait for.
	WaitUntil = func(d time.Duration) Option {
		return func(c *Config) {
			c.WaitUntil = d
		}
	}

	// Screenshot captures screenshot, Reader will ImgBytes of the image populated.
	Screenshot = func(v bool) Option {
		return func(c *Config) {
			c.Screenshot = v
		}
	}

	// Content sets content of the target.
	Content = func(v string) Option {
		return func(c *Config) {
			c.Content = v
		}
	}

	// TargetType sets content type of the target.
	TargetType = func(v ContentType) Option {
		return func(c *Config) {
			c.ContentType = v
		}
	}

	// Limit sets the limit of tags for the target.
	Limit = func(v int) Option {
		return func(c *Config) {
			c.Limit = v
		}
	}

	// Verbose enables high verbosity.
	Verbose = func(v bool) Option {
		return func(c *Config) {
			c.Verbose = v
		}
	}

	// NoStopWords enables stop-words exclusion from the output.
	NoStopWords = func(v bool) Option {
		return func(c *Config) {
			c.NoStopWords = v
		}
	}

	// StopWords allows to provide a custom set of stop-words.
	StopWords = func(v []string) Option {
		return func(c *Config) {
			c.StopWords = stopwords.Setup(stopwords.WordsSlice(v))
		}
	}

	// ContentOnly ignores all none content related parts of the HTML page.
	ContentOnly = func(v bool) Option {
		return func(c *Config) {
			c.ContentOnly = v
		}
	}

	// FullSite tells parser to process full site (HTML only).
	FullSite = func(v bool) Option {
		return func(c *Config) {
			c.FullSite = v
		}
	}

	// TagWeightsString ...
	TagWeightsString = func(v string) Option {
		return func(c *Config) {
			c.TagWeights = ParseTagWeights(strings.NewReader(v), String)
		}
	}

	// TagWeightsJSON ...
	TagWeightsJSON = func(v string) Option {
		return func(c *Config) {
			f, err := os.Open(v)
			if err != nil {
				println(fmt.Errorf("error: can't open JSON file [%s]: %w", v, err))
				return
			}
			r := bufio.NewReader(f)
			c.TagWeights = ParseTagWeights(r, JSON)
			f.Close()
		}
	}

	// ExtraTagWeightsString ...
	ExtraTagWeightsString = func(v string) Option {
		return func(c *Config) {
			c.ExtraTagWeights = ParseTagWeights(strings.NewReader(v), String)
		}
	}

	// TagWeightsJSON ...
	ExtraTagWeightsJSON = func(v string) Option {
		return func(c *Config) {
			f, err := os.Open(v)
			if err != nil {
				println(fmt.Errorf("error: can't open JSON file [%s]: %w", v, err))
				return
			}
			r := bufio.NewReader(f)
			c.ExtraTagWeights = ParseTagWeights(r, JSON)
			f.Close()
		}
	}

	// ExcludeTagsString ...
	ExcludeTagsString = func(v string) Option {
		return func(c *Config) {
			c.ExcludeTags = ParseTagWeights(strings.NewReader(v), String)
		}
	}

	// AllTagWeights ...
	AllTagWeights = func(v bool) Option {
		return func(c *Config) {
			c.AllTagWeights = v
		}
	}

	AdjustScores = func(v bool) Option {
		return func(c *Config) {
			c.AdjustScores = v
		}
	}

	Extensions = func(v []extension.Extension) Option {
		return func(c *Config) {
			c.Extensions = make([]extension.Extension, len(v))
			copy(c.Extensions, v)
		}
	}
)

Functions

func DetectLang added in v0.59.0

func DetectLang(cfg *Config, controlStr string, contents Vocabulary)

DetectLang detects language and setups the stop words for it.

Types

type Config

type Config struct {
	Source string

	// headless
	Query      string
	WaitFor    string
	WaitUntil  time.Duration
	Screenshot bool

	Content string
	ContentType
	Limit         int
	Verbose       bool
	NoStopWords   bool
	SkipLang      bool
	Lang          string
	StopWords     *stopwords.Register
	ContentOnly   bool
	FullSite      bool
	AllTagWeights bool
	TagWeights
	ExtraTagWeights TagWeights
	ExcludeTags     TagWeights
	AdjustScores    bool
	Extensions      []extension.Extension
}

Config ...

func New

func New(options ...Option) *Config

New ...

func (*Config) SetStopWords

func (c *Config) SetStopWords(lang string)

SetStopWords ...

type ContentType

type ContentType byte

ContentType ...

const (
	Unknown ContentType = iota
	Text
	HTML
	Markdown
)

Content types

func ContentTypeOf

func ContentTypeOf(contentType string) ContentType

ContentTypeOf returns ContentType based on string value.

func (ContentType) String

func (ct ContentType) String() string

String ...

type Option

type Option func(*Config)

Option allows to customise configuration.

type TagWeights

type TagWeights map[string]float64

TagWeights ...

func ParseTagWeights added in v0.50.0

func ParseTagWeights(reader io.Reader, readerType TagWeightsType) TagWeights

type TagWeightsType added in v0.50.0

type TagWeightsType byte

TagWeightsType ...

const (
	String TagWeightsType = iota // <tagName1>:<tagScore1>|<tagName2>:<tagScore2>
	JSON                         // { "<tagName1>": <tagScore1>, "<tagName2>": <tagScore2> }
)

Wight input types

type Vocabulary added in v0.59.0

type Vocabulary interface {
	SetLang(l string)
	SetReg(r *stopwords.Register)
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL