Documentation ¶
Index ¶
- func IsTimeoutError(err error) bool
- type DOMPaths
- type DOMPathsImages
- type EngineBase
- func (e EngineBase) Get(ctx *colly.Context, urll string, anonurll string) error
- func (e EngineBase) GetName() engines.Name
- func (e EngineBase) GetOrigins() []engines.Name
- func (e *EngineBase) Init(ctx context.Context, timings config.CategoryTimings)
- func (e *EngineBase) OnHTML(goquerySelector string, f colly.HTMLCallback)
- func (e *EngineBase) OnRequest(f colly.RequestCallback)
- func (e *EngineBase) OnResponse(f colly.ResponseCallback)
- func (e EngineBase) PageFromContext(ctx *colly.Context) int
- func (e EngineBase) Post(ctx *colly.Context, urll string, body io.Reader, anonBody string) error
- func (e *EngineBase) ReInit(ctx context.Context)
- func (e EngineBase) Wait()
- type Enginer
- type PageRankCounter
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func IsTimeoutError ¶
Types ¶
type DOMPathsImages ¶
type EngineBase ¶
type EngineBase struct { Name engines.Name Origins []engines.Name // contains filtered or unexported fields }
Base struct for every search engine.
func (EngineBase) Get ¶
func (e EngineBase) Get(ctx *colly.Context, urll string, anonurll string) error
func (EngineBase) GetName ¶
func (e EngineBase) GetName() engines.Name
Used to get the name of the search engine.
func (EngineBase) GetOrigins ¶
func (e EngineBase) GetOrigins() []engines.Name
Used to get the origins of the search engine.
func (*EngineBase) Init ¶
func (e *EngineBase) Init(ctx context.Context, timings config.CategoryTimings)
Used to initialize the EngineBase collector.
func (*EngineBase) OnHTML ¶
func (e *EngineBase) OnHTML(goquerySelector string, f colly.HTMLCallback)
OnHTML registers a function. Function will be executed on every HTML element matched by the GoQuery Selector parameter. GoQuery Selector is a selector used by https://github.com/PuerkitoBio/goquery.
func (*EngineBase) OnRequest ¶
func (e *EngineBase) OnRequest(f colly.RequestCallback)
OnRequest registers a function. Function will be executed on every request made by the Collector.
func (*EngineBase) OnResponse ¶
func (e *EngineBase) OnResponse(f colly.ResponseCallback)
OnResponse registers a function. Function will be executed on every response.
func (EngineBase) PageFromContext ¶
func (e EngineBase) PageFromContext(ctx *colly.Context) int
func (*EngineBase) ReInit ¶
func (e *EngineBase) ReInit(ctx context.Context)
Used to allow re-running the Search method.
func (EngineBase) Wait ¶
func (e EngineBase) Wait()
Wait returns when the collector jobs are finished.
type Enginer ¶
type Enginer interface { GetName() engines.Name GetOrigins() []engines.Name Init(context.Context, config.CategoryTimings) ReInit(context.Context) Search(string, options.Options, chan result.ResultScraped) ([]error, bool) }
Base interface used by each category specific interface.
type PageRankCounter ¶
type PageRankCounter struct {
// contains filtered or unexported fields
}
A goroutine-safe counter for PageRank.
func NewPageRankCounter ¶
func NewPageRankCounter(pages int) PageRankCounter
Create a new PageRankCounter.
func (*PageRankCounter) GetPlusOne ¶
func (prc *PageRankCounter) GetPlusOne(page int) int
Get the count for a page + 1.
func (*PageRankCounter) Increment ¶
func (prc *PageRankCounter) Increment(page int)
Increment the count for a page.