scraper

package
v0.20.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 17, 2024 License: AGPL-3.0 Imports: 17 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func IsTimeoutError

func IsTimeoutError(err error) bool

Types

type DOMPaths

type DOMPaths struct {
	ResultsContainer string
	Result           string
	URL              string
	Title            string
	Description      string
}

type DOMPathsImages

type DOMPathsImages struct {
	DOMPaths

	OriginalSize struct {
		Height string
		Width  string
	}
	ThumbnailSize struct {
		Height string
		Width  string
	}
	ThumbnailURL string
	SourceName   string
	SourceURL    string
}

type EngineBase

type EngineBase struct {
	Name    engines.Name
	Origins []engines.Name
	// contains filtered or unexported fields
}

Base struct for every search engine.

func (EngineBase) Get

func (e EngineBase) Get(ctx *colly.Context, urll string, anonurll string) error

func (EngineBase) GetName

func (e EngineBase) GetName() engines.Name

Used to get the name of the search engine.

func (EngineBase) GetOrigins

func (e EngineBase) GetOrigins() []engines.Name

Used to get the origins of the search engine.

func (*EngineBase) Init

func (e *EngineBase) Init(ctx context.Context, timings config.CategoryTimings)

Used to initialize the EngineBase collector.

func (*EngineBase) OnHTML

func (e *EngineBase) OnHTML(goquerySelector string, f colly.HTMLCallback)

OnHTML registers a function. Function will be executed on every HTML element matched by the GoQuery Selector parameter. GoQuery Selector is a selector used by https://github.com/PuerkitoBio/goquery.

func (*EngineBase) OnRequest

func (e *EngineBase) OnRequest(f colly.RequestCallback)

OnRequest registers a function. Function will be executed on every request made by the Collector.

func (*EngineBase) OnResponse

func (e *EngineBase) OnResponse(f colly.ResponseCallback)

OnResponse registers a function. Function will be executed on every response.

func (EngineBase) PageFromContext

func (e EngineBase) PageFromContext(ctx *colly.Context) int

func (EngineBase) Post

func (e EngineBase) Post(ctx *colly.Context, urll string, body io.Reader, anonBody string) error

func (*EngineBase) ReInit

func (e *EngineBase) ReInit(ctx context.Context)

Used to allow re-running the Search method.

func (EngineBase) Wait

func (e EngineBase) Wait()

Wait returns when the collector jobs are finished.

type Enginer

type Enginer interface {
	GetName() engines.Name
	GetOrigins() []engines.Name
	Init(context.Context, config.CategoryTimings)
	ReInit(context.Context)
	Search(string, options.Options, chan result.ResultScraped) ([]error, bool)
}

Base interface used by each category specific interface.

type Info

type Info struct {
	Name    engines.Name
	Domain  string
	URL     string
	Origins []engines.Name
}

type PageRankCounter

type PageRankCounter struct {
	// contains filtered or unexported fields
}

A goroutine-safe counter for PageRank.

func NewPageRankCounter

func NewPageRankCounter(pages int) PageRankCounter

Create a new PageRankCounter.

func (*PageRankCounter) GetPlusOne

func (prc *PageRankCounter) GetPlusOne(page int) int

Get the count for a page + 1.

func (*PageRankCounter) Increment

func (prc *PageRankCounter) Increment(page int)

Increment the count for a page.

type Params

type Params struct {
	Page       string
	Locale     string
	LocaleSec  string
	SafeSearch string
}

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL