Versions in this module Expand all Collapse all v0 v0.5.38 Nov 12, 2024 Changes in this version + var DoPruning = true + var SkipSubURLExt = map[string]bool + var SkipTag = map[string]bool + func GQDocument(s *Scraper, gqdoc *goquery.Document, rawDyn bool) (output.ItemMaps, error) + func GQSelection(s *Scraper, sel *goquery.Selection, baseUrl string, rawDyn bool) (output.ItemMap, error) + func GetURL(e *ElementLocation, sel *goquery.Selection, baseURL string) (*url.URL, error) + func Page(s *Scraper, globalConfig *GlobalConfig, rawDyn bool, path string) (output.ItemMaps, error) + func SubGQDocument(c *Config, s *Scraper, im output.ItemMap, fname string, ...) error + func Subpages(c *Config, s *Scraper, ims output.ItemMaps, ...) error + type Config struct + Global GlobalConfig + ID ConfigID + ItemMaps output.ItemMaps + Scrapers []Scraper + Writer output.WriterConfig + func ReadConfig(configPath string) (*Config, error) + func (c Config) Copy() *Config + func (c Config) String() string + func (c Config) WriteToFile(dir string) error + type ConfigID struct + Field string + ID string + Slug string + SubID string + func (cid ConfigID) String() string + type DateComponent struct + Covers date.CoveredDateParts + ElementLocation ElementLocation + Layout []string + Transform []TransformConfig + type ElementLocation struct + AllNodes bool + Attr string + ChildIndex int + EntireSubtree bool + JsonSelector string + MaxLength int + RegexExtract RegexConfig + Selector string + Separator string + type ElementLocations []ElementLocation + func (e *ElementLocations) UnmarshalYAML(value *yaml.Node) error + type Field struct + CanBeEmpty bool + Components []DateComponent + DateLanguage string + DateLocation string + Default string + ElementLocations ElementLocations + GuessYear bool + Hide bool + Name string + OnSubpage string + Separator string + Transform []TransformConfig + Type string + Value string + type Filter struct + DateComp time.Time + DateOp string + Expression string + Field string + Match bool + RegexComp *regexp.Regexp + Type string + func (f *Filter) FilterMatch(value interface{}) bool + func (f *Filter) Initialize(fieldType string) error + type GlobalConfig struct + UserAgent string + type Paginator struct + Location ElementLocation + MaxPages int + type RegexConfig struct + Index int + RegexPattern string + type Scraper struct + Fields []Field + Filters []*Filter + Interaction []*types.Interaction + Item string + Name string + PageLoadWait int + Paginators []Paginator + RenderJs bool + URL string + func (c *Scraper) GetSubpageURLFields() []Field + type TransformConfig struct + RegexPattern string + Replacement string + TransformType string