Documentation ¶
Index ¶
- Variables
- func CssOrXpath(cssSelector CssSelector) string
- func DelaySleep(conf config.RabiConfig, tag string)
- func ExecEventCondition(ctx context.Context, conf config.RabiConfig, event EventSelector, ...) (bool, error)
- type Condition
- type CssSelector
- type Event
- type EventSelector
- type ExecSelector
- type HttpCookies
- type Job
- type Rabida
- type RabidaImpl
- func (r RabidaImpl) Crawl(ctx context.Context, job Job, ...) error
- func (r RabidaImpl) CrawlTraversal(ctx context.Context, conf *config.RabiConfig) error
- func (r RabidaImpl) CrawlWithConfig(ctx context.Context, job Job, ...) error
- func (r RabidaImpl) CrawlWithListeners(ctx context.Context, job Job, ...) error
- func (r RabidaImpl) DownloadFile(ctx context.Context, job Job, callback func(file string), ...) error
- func (r RabidaImpl) Html(ctx context.Context, father *cdp.Node, conf config.RabiConfig) *html.Node
- type SetAttribute
Constants ¶
This section is empty.
Variables ¶
View Source
var ErrNotFound error = errNotFound{}
Functions ¶
func CssOrXpath ¶
func CssOrXpath(cssSelector CssSelector) string
func DelaySleep ¶
func DelaySleep(conf config.RabiConfig, tag string)
func ExecEventCondition ¶
func ExecEventCondition(ctx context.Context, conf config.RabiConfig, event EventSelector, queryActions []chromedp.QueryOption) (bool, error)
Types ¶
type Condition ¶
type Condition struct { Value string CheckFunc func(text, value string) bool ExecSelector ExecSelector }
type CssSelector ¶
type CssSelector struct { Css string // Attr default is innerText Attr string // Scope supply a scope to each selector // In jQuery, this would look something like this: $(scope).find(selector) Scope string // Attrs map each attribute to a css selector. when Attrs equals nil, stop recursively populating Attrs map[string]CssSelector // Iframe if true, we will look for the element(s) within the first iframe in the page Iframe bool // XpathScope Note: only choose one between xpath and css selector XpathScope string // Xpath xpath expression // eg: //*[@id="zz"]/div[2]/ul/li[1]/text() // eg: //div[@id="indexCarousel"]//div[@class="item"]//img/@src Xpath string SetAttrs []SetAttribute // Before dosomething before retrieve value Before []EventSelector }
type EventSelector ¶
type EventSelector struct { Type Event Condition Condition Selector CssSelector }
type ExecSelector ¶
type ExecSelector struct { Type Event Selector CssSelector }
type HttpCookies ¶
type Job ¶
type Job struct { // Link the url you want to crawl Link string // CssSelector root css selector CssSelector CssSelector // PrePaginate do something before paginate PrePaginate []EventSelector // Paginator css selector for next page Paginator CssSelector // Limit limits how many pages should be crawled Limit int StartPageBtn CssSelector StartPageUrl string EnableCookies HttpCookies }
type Rabida ¶
type Rabida interface { Crawl(ctx context.Context, job Job, callback func(ret []interface{}, nextPageUrl string, currentPageNo int) bool, before []chromedp.Action, after []chromedp.Action, ) error CrawlWithConfig(ctx context.Context, job Job, callback func(ret []interface{}, nextPageUrl string, currentPageNo int) bool, before []chromedp.Action, after []chromedp.Action, conf config.RabiConfig, options ...chromedp.ExecAllocatorOption, ) error CrawlWithListeners(ctx context.Context, job Job, callback func(ctx context.Context, ret []interface{}, nextPageUrl string, currentPageNo int) bool, before []chromedp.Action, after []chromedp.Action, confPtr *config.RabiConfig, options []chromedp.ExecAllocatorOption, listeners ...func(ev interface{}), ) error DownloadFile(ctx context.Context, job Job, callback func(file string), confPtr *config.RabiConfig, options ...chromedp.ExecAllocatorOption, ) error CrawlTraversal(ctx context.Context, conf *config.RabiConfig) error }
func NewRabida ¶
func NewRabida(conf *config.RabiConfig) Rabida
type RabidaImpl ¶
type RabidaImpl struct {
// contains filtered or unexported fields
}
func (RabidaImpl) CrawlTraversal ¶ added in v0.2.4
func (r RabidaImpl) CrawlTraversal(ctx context.Context, conf *config.RabiConfig) error
func (RabidaImpl) CrawlWithConfig ¶
func (RabidaImpl) CrawlWithListeners ¶
func (r RabidaImpl) CrawlWithListeners(ctx context.Context, job Job, callback func(ctx context.Context, ret []interface{}, nextPageUrl string, currentPageNo int) bool, before []chromedp.Action, after []chromedp.Action, confPtr *config.RabiConfig, options []chromedp.ExecAllocatorOption, listeners ...func(ev interface{})) error
func (RabidaImpl) DownloadFile ¶
func (r RabidaImpl) DownloadFile(ctx context.Context, job Job, callback func(file string), confPtr *config.RabiConfig, options ...chromedp.ExecAllocatorOption) error
type SetAttribute ¶
Click to show internal directories.
Click to hide internal directories.