service

package module
v0.2.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 17, 2022 License: MIT Imports: 24 Imported by: 0

README

rabida

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ErrNotFound error = errNotFound{}

Functions

func CssOrXpath

func CssOrXpath(cssSelector CssSelector) string

func DelaySleep

func DelaySleep(conf config.RabiConfig, tag string)

func ExecEventCondition

func ExecEventCondition(ctx context.Context, conf config.RabiConfig, event EventSelector, queryActions []chromedp.QueryOption) (bool, error)

Types

type Condition

type Condition struct {
	Value        string
	CheckFunc    func(text, value string) bool
	ExecSelector ExecSelector
}

type CssSelector

type CssSelector struct {
	Css string
	// Attr default is innerText
	Attr string
	// Scope supply a scope to each selector
	// In jQuery, this would look something like this: $(scope).find(selector)
	Scope string
	// Attrs map each attribute to a css selector. when Attrs equals nil, stop recursively populating
	Attrs map[string]CssSelector
	// Iframe if true, we will look for the element(s) within the first iframe in the page
	Iframe bool
	// XpathScope Note: only choose one between xpath and css selector
	XpathScope string
	// Xpath xpath expression
	// eg: //*[@id="zz"]/div[2]/ul/li[1]/text()
	// eg: //div[@id="indexCarousel"]//div[@class="item"]//img/@src
	Xpath    string
	SetAttrs []SetAttribute
	// Before dosomething before retrieve value
	Before []EventSelector
}

type Event

type Event string
const (
	ClickEvent              Event = "click"
	SetAttributesValueEvent Event = "setAttributesValue"
	TextEvent               Event = "getTextValue"
)

type EventSelector

type EventSelector struct {
	Type      Event
	Condition Condition
	Selector  CssSelector
}

type ExecSelector

type ExecSelector struct {
	Type     Event
	Selector CssSelector
}

type HttpCookies

type HttpCookies struct {
	RawCookies string
	Domain     string
	// Expires hour, default 1 year
	Expires int
}

type Job

type Job struct {
	// Link the url you want to crawl
	Link string
	// CssSelector root css selector
	CssSelector CssSelector
	// PrePaginate do something before paginate
	PrePaginate []EventSelector
	// Paginator css selector for next page
	Paginator CssSelector
	// Limit limits how many pages should be crawled
	Limit         int
	StartPageBtn  CssSelector
	StartPageUrl  string
	EnableCookies HttpCookies
}

type Rabida

type Rabida interface {
	Crawl(ctx context.Context, job Job,

		callback func(ret []interface{}, nextPageUrl string, currentPageNo int) bool,

		before []chromedp.Action,

		after []chromedp.Action,
	) error

	CrawlWithConfig(ctx context.Context, job Job,

		callback func(ret []interface{}, nextPageUrl string, currentPageNo int) bool,

		before []chromedp.Action,

		after []chromedp.Action,
		conf config.RabiConfig,
		options ...chromedp.ExecAllocatorOption,
	) error

	CrawlWithListeners(ctx context.Context, job Job,

		callback func(ctx context.Context, ret []interface{}, nextPageUrl string, currentPageNo int) bool,

		before []chromedp.Action,

		after []chromedp.Action,
		confPtr *config.RabiConfig,
		options []chromedp.ExecAllocatorOption,
		listeners ...func(ev interface{}),
	) error

	DownloadFile(ctx context.Context, job Job,

		callback func(file string),
		confPtr *config.RabiConfig,
		options ...chromedp.ExecAllocatorOption,
	) error

	CrawlTraversal(ctx context.Context, conf *config.RabiConfig) error
}

func NewRabida

func NewRabida(conf *config.RabiConfig) Rabida

type RabidaImpl

type RabidaImpl struct {
	// contains filtered or unexported fields
}

func (RabidaImpl) Crawl

func (r RabidaImpl) Crawl(ctx context.Context, job Job, callback func(ret []interface{}, nextPageUrl string, currentPageNo int) bool,
	before []chromedp.Action, after []chromedp.Action) error

func (RabidaImpl) CrawlTraversal added in v0.2.4

func (r RabidaImpl) CrawlTraversal(ctx context.Context, conf *config.RabiConfig) error

func (RabidaImpl) CrawlWithConfig

func (r RabidaImpl) CrawlWithConfig(ctx context.Context, job Job, callback func(ret []interface{}, nextPageUrl string, currentPageNo int) bool, before []chromedp.Action, after []chromedp.Action, conf config.RabiConfig, options ...chromedp.ExecAllocatorOption) error

func (RabidaImpl) CrawlWithListeners

func (r RabidaImpl) CrawlWithListeners(ctx context.Context, job Job, callback func(ctx context.Context, ret []interface{}, nextPageUrl string, currentPageNo int) bool, before []chromedp.Action, after []chromedp.Action, confPtr *config.RabiConfig, options []chromedp.ExecAllocatorOption, listeners ...func(ev interface{})) error

func (RabidaImpl) DownloadFile

func (r RabidaImpl) DownloadFile(ctx context.Context, job Job, callback func(file string), confPtr *config.RabiConfig, options ...chromedp.ExecAllocatorOption) error

func (RabidaImpl) Html

func (r RabidaImpl) Html(ctx context.Context, father *cdp.Node, conf config.RabiConfig) *html.Node

type SetAttribute

type SetAttribute struct {
	AttributeName  string
	AttributeValue string
}

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL