Documentation ¶
Index ¶
- Variables
- func CssOrXpath(cssSelector CssSelector) string
- func DelaySleep(conf config.RabiConfig, tag string)
- func ExecEventCondition(ctx context.Context, conf config.RabiConfig, condition *Condition, ...) (bool, error)
- type Condition
- type CssSelector
- type Event
- type EventSelector
- type ExecSelector
- type HttpCookies
- type Job
- type Rabida
- type RabidaImpl
- func (r RabidaImpl) Crawl(ctx context.Context, job Job, ...) error
- func (r RabidaImpl) CrawlWithConfig(ctx context.Context, job Job, ...) error
- func (r RabidaImpl) CrawlWithListeners(ctx context.Context, job Job, ...) error
- func (r RabidaImpl) DownloadFile(ctx context.Context, job Job, callback func(file string), ...) error
- func (r RabidaImpl) Html(ctx context.Context, father *cdp.Node, conf config.RabiConfig) *html.Node
- type SetAttribute
Constants ¶
This section is empty.
Variables ¶
View Source
var ErrNotFound error = errNotFound{}
Functions ¶
func CssOrXpath ¶
func CssOrXpath(cssSelector CssSelector) string
func DelaySleep ¶
func DelaySleep(conf config.RabiConfig, tag string)
func ExecEventCondition ¶
func ExecEventCondition(ctx context.Context, conf config.RabiConfig, condition *Condition, queryActions []chromedp.QueryOption) (bool, error)
Types ¶
type Condition ¶
type Condition struct { Value string `json:"value"` CheckFunc func(text, value string) bool ExecSelector ExecSelector `json:"execSelector"` }
type CssSelector ¶
type CssSelector struct { Css string `json:"css"` // Attr default is innerText Attr string `json:"attr"` // Scope supply a scope to each selector // In jQuery, this would look something like this: $(scope).find(selector) Scope string `json:"scope"` // Attrs map each attribute to a css selector. when Attrs equals nil, stop recursively populating Attrs map[string]CssSelector `json:"attrs"` // Iframe if true, we will look for the element(s) within the first iframe in the page. if IframeSelector exist, will look for this. Iframe bool `json:"iframe"` // IframeSelector specify the iframe selector if have multiple iframe elements IframeSelector *CssSelector `json:"iframeSelector"` // XpathScope Note: only choose one between xpath and css selector XpathScope string `json:"xpathScope"` // Xpath xpath expression // eg: //*[@id="zz"]/div[2]/ul/li[1]/text() // eg: //div[@id="indexCarousel"]//div[@class="item"]//img/@src Xpath string `json:"xpath"` SetAttrs []SetAttribute `json:"setAttrs"` // Before dosomething before retrieve value Before []EventSelector `json:"before"` Condition *Condition `json:"condition"` }
type EventSelector ¶
type EventSelector struct { Type Event `json:"type"` Condition Condition `json:"condition"` Selector CssSelector `json:"selector"` }
type ExecSelector ¶
type ExecSelector struct { Type Event `json:"type"` Selector CssSelector `json:"selector"` }
type HttpCookies ¶
type Job ¶
type Job struct { // Link the url you want to crawl Link string `json:"link"` // CssSelector root css selector CssSelector CssSelector `json:"cssSelector"` // PrePaginate do something before paginate PrePaginate []EventSelector `json:"prePaginate"` // Paginator css selector for next page Paginator CssSelector `json:"paginator"` PaginatorFunc func(currentPageNo int) CssSelector // Limit limits how many pages should be crawled Limit int `json:"limit"` StartPageBtn CssSelector `json:"startPageBtn"` StartPageUrl string `json:"startPageUrl"` EnableCookies HttpCookies `json:"enableCookies"` }
type Rabida ¶
type Rabida interface { Crawl(ctx context.Context, job Job, callback func(ret []interface{}, nextPageUrl string, currentPageNo int) bool, before []chromedp.Action, after []chromedp.Action, ) error CrawlWithConfig(ctx context.Context, job Job, callback func(ret []interface{}, nextPageUrl string, currentPageNo int) bool, before []chromedp.Action, after []chromedp.Action, conf config.RabiConfig, options ...chromedp.ExecAllocatorOption, ) error CrawlWithListeners(ctx context.Context, job Job, callback func(ctx context.Context, ret []interface{}, nextPageUrl string, currentPageNo int) bool, before []chromedp.Action, after []chromedp.Action, confPtr *config.RabiConfig, options []chromedp.ExecAllocatorOption, listeners ...func(ev interface{}), ) error DownloadFile(ctx context.Context, job Job, callback func(file string), confPtr *config.RabiConfig, options ...chromedp.ExecAllocatorOption, ) error }
func NewRabida ¶
func NewRabida(conf *config.RabiConfig) Rabida
type RabidaImpl ¶
type RabidaImpl struct {
// contains filtered or unexported fields
}
func (RabidaImpl) CrawlWithConfig ¶
func (RabidaImpl) CrawlWithListeners ¶
func (r RabidaImpl) CrawlWithListeners(ctx context.Context, job Job, callback func(ctx context.Context, ret []interface{}, nextPageUrl string, currentPageNo int) bool, before []chromedp.Action, after []chromedp.Action, confPtr *config.RabiConfig, options []chromedp.ExecAllocatorOption, listeners ...func(ev interface{})) error
func (RabidaImpl) DownloadFile ¶
func (r RabidaImpl) DownloadFile(ctx context.Context, job Job, callback func(file string), confPtr *config.RabiConfig, options ...chromedp.ExecAllocatorOption) error
type SetAttribute ¶
Click to show internal directories.
Click to hide internal directories.