scraper

package
v0.28.5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 30, 2024 License: AGPL-3.0 Imports: 19 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func IsTimeoutError

func IsTimeoutError(err error) bool

func SuggestRespToSuggestions added in v0.22.0

func SuggestRespToSuggestions(data []byte) ([]string, error)

Converts a opensearch.xml compatible suggestions API JSON to a slice of suggestions.

Types

type DOMPaths

type DOMPaths struct {
	ResultsContainer string
	Result           string
	URL              string
	Title            string
	Description      string
}

type DOMPathsImages

type DOMPathsImages struct {
	DOMPaths

	OriginalSize struct {
		Height string
		Width  string
	}
	ThumbnailSize struct {
		Height string
		Width  string
	}
	ThumbnailURL string
	SourceName   string
	SourceURL    string
}

type EngineBase

type EngineBase struct {
	Name    engines.Name
	Origins []engines.Name
	// contains filtered or unexported fields
}

Base struct for every search engine.

func (EngineBase) Get

func (e EngineBase) Get(ctx *colly.Context, urll string, anonurll string) error

func (EngineBase) GetName

func (e EngineBase) GetName() engines.Name

Used to get the name of the search engine.

func (EngineBase) GetOrigins

func (e EngineBase) GetOrigins() []engines.Name

Used to get the origins of the search engine.

func (*EngineBase) Init

func (e *EngineBase) Init(ctx context.Context, timings config.CategoryTimings)

Used to initialize the EngineBase collector.

func (*EngineBase) InitSearcher added in v0.26.0

func (e *EngineBase) InitSearcher(ctx context.Context, timings config.CategoryTimings)

Used to initialize the EngineBase collector for searching.

func (*EngineBase) InitSuggester added in v0.26.0

func (e *EngineBase) InitSuggester(ctx context.Context, timings config.CategoryTimings)

Used to initialize the EngineBase collector for suggesting.

func (*EngineBase) OnHTML

func (e *EngineBase) OnHTML(goquerySelector string, f colly.HTMLCallback)

OnHTML registers a function. Function will be executed on every HTML element matched by the GoQuery Selector parameter. GoQuery Selector is a selector used by https://github.com/PuerkitoBio/goquery.

func (*EngineBase) OnRequest

func (e *EngineBase) OnRequest(f colly.RequestCallback)

OnRequest registers a function. Function will be executed on every request made by the Collector.

func (*EngineBase) OnResponse

func (e *EngineBase) OnResponse(f colly.ResponseCallback)

OnResponse registers a function. Function will be executed on every response.

func (EngineBase) PageFromContext

func (e EngineBase) PageFromContext(ctx *colly.Context) int

func (EngineBase) Post

func (e EngineBase) Post(ctx *colly.Context, urll string, body io.Reader, anonBody string) error

func (*EngineBase) ReInitSearcher added in v0.26.0

func (e *EngineBase) ReInitSearcher(ctx context.Context)

Used to allow re-running the Search method.

func (*EngineBase) ReInitSuggester added in v0.26.0

func (e *EngineBase) ReInitSuggester(ctx context.Context)

Used to allow re-running the Suggest method.

func (EngineBase) Wait

func (e EngineBase) Wait()

Wait returns when the collector jobs are finished.

type Enginer

type Enginer interface {
	GetName() engines.Name
	GetOrigins() []engines.Name
	Init(context.Context, config.CategoryTimings)
}

Interface that each search engine must implement to be a Search Engine.

type ImageSearcher added in v0.28.5

type ImageSearcher interface {
	Enginer

	InitSearcher(context.Context, config.CategoryTimings)
	ReInitSearcher(context.Context)
	ImageSearch(string, options.Options, chan result.ResultScraped) ([]error, bool)
}

Interface that each search engine must implement to support searching image results.

type PageRankCounter

type PageRankCounter struct {
	// contains filtered or unexported fields
}

A goroutine-safe counter for PageRank.

func NewPageRankCounter

func NewPageRankCounter(pages int) PageRankCounter

Create a new PageRankCounter.

func (*PageRankCounter) GetPlusOne

func (prc *PageRankCounter) GetPlusOne(page int) int

Get the count for a page + 1.

func (*PageRankCounter) Increment

func (prc *PageRankCounter) Increment(page int)

Increment the count for a page.

type Searcher added in v0.22.0

type Searcher interface {
	Enginer

	InitSearcher(context.Context, config.CategoryTimings)
	ReInitSearcher(context.Context)
	Search(string, options.Options, chan result.ResultScraped) ([]error, bool)
}

Interface that each search engine must implement to support searching general results.

type Suggester added in v0.22.0

type Suggester interface {
	Enginer

	InitSuggester(context.Context, config.CategoryTimings)
	ReInitSuggester(context.Context)
	Suggest(string, options.Options, chan result.SuggestionScraped) ([]error, bool)
}

Interface that each search engine must implement to support suggesting.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL