scraper

package
v0.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 5, 2022 License: GPL-3.0 Imports: 11 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Config

type Config struct {
	Scrapers []Scraper `yaml:"scrapers"`
}

type CoveredDateParts

type CoveredDateParts struct {
	Day   bool `yaml:"day"`
	Month bool `yaml:"month"`
	Year  bool `yaml:"year"`
	Time  bool `yaml:"time"`
}

type DateComponent

type DateComponent struct {
	Covers          CoveredDateParts `yaml:"covers"`
	ElementLocation ElementLocation  `yaml:"location"`
	Layout          string           `yaml:"layout"`
}

type DatePart

type DatePart struct {
	// contains filtered or unexported fields
}

type DynamicField

type DynamicField struct {
	Name string `yaml:"name"`
	Type string `yaml:"type"` // can currently be text, url or date
	// If a field can be found on a subpage the following variable has to contain a field name of
	// a field of type 'url' that is located on the main page.
	ElementLocation ElementLocation `yaml:"location"`
	OnSubpage       string          `yaml:"on_subpage"`    // applies to text, url, date
	CanBeEmpty      bool            `yaml:"can_be_empty"`  // applies to text, url
	Components      []DateComponent `yaml:"components"`    // applies to date
	DateLocation    string          `yaml:"date_location"` // applies to date
	DateLanguage    string          `yaml:"date_language"` // applies to date
	Relative        bool            `yaml:"relative"`      // applies to url
}

type ElementLocation

type ElementLocation struct {
	Selector     string      `yaml:"selector"`
	NodeIndex    int         `yaml:"node_index"`
	ChildIndex   int         `yaml:"child_index"`
	RegexExtract RegexConfig `yaml:"regex_extract"`
	Attr         string      `yaml:"attr"`
	MaxLength    int         `yaml:"max_length"`
}

type Filter

type Filter struct {
	Field       string `yaml:"field"`
	RegexIgnore string `yaml:"regex_ignore"`
}

type RegexConfig

type RegexConfig struct {
	Exp   string `yaml:"exp"`
	Index int    `yaml:"index"`
}

type Scraper

type Scraper struct {
	Name                string   `yaml:"name"`
	URL                 string   `yaml:"url"`
	Item                string   `yaml:"item"`
	ExcludeWithSelector []string `yaml:"exclude_with_selector"`
	Fields              struct {
		Static  []StaticField  `yaml:"static"`
		Dynamic []DynamicField `yaml:"dynamic"`
	} `yaml:"fields"`
	Filters   []Filter `yaml:"filters"`
	Paginator struct {
		Selector  string `yaml:"selector"`
		Relative  bool   `yaml:"relative"`
		MaxPages  int    `yaml:"max_pages"`
		NodeIndex int    `yaml:"node_index"`
	}
}

func (Scraper) GetEvents

func (c Scraper) GetEvents() ([]map[string]interface{}, error)

type StaticField

type StaticField struct {
	Name  string `yaml:"name"`
	Value string `yaml:"value"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL