Documentation ¶
Index ¶
- Constants
- type CrawlJob
- func (cj *CrawlJob) AddJS(typ pb.PageReqType, url, js, metaStr string) error
- func (cj *CrawlJob) AddPage(url, metaStr string) error
- func (cj *CrawlJob) IsAlive() bool
- func (cj *CrawlJob) Run()
- func (cj *CrawlJob) SetCallbackXpathMatch(mdata KVMap)
- func (cj *CrawlJob) SetCallbackXpathRegexp(mdata KVMap)
- func (cj *CrawlJob) SetLogin(loginUrl string, loginPayload, loginParseXpath KVMap, loginSuccessCheck KVMap)
- func (cj *CrawlJob) SetLoginChrome(loginUrl string, loginJS string, loginSuccessCheck KVMap)
- func (cj *CrawlJob) Start()
- func (cj *CrawlJob) Stop()
- type KVMap
- type PageHTML
Constants ¶
View Source
const ( PageReqType_BUILTINJS = pb.PageReqType_BUILTINJS PageReqType_JSCRIPT = pb.PageReqType_JSCRIPT )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type CrawlJob ¶
type CrawlJob struct { SeedURL string MinDelay int32 MaxDelay int32 Follow bool CallbackUrlRegexp string FollowUrlRegexp string CallbackXpathMatch []*pb.KVP CallbackXpathRegexp []*pb.KVP MaxConcurrentRequests int32 Useragent string Impolite bool Depth int32 Repeat bool Frequency *google_protobuf1.Duration Firstrun *google_protobuf.Timestamp UnsafeNormalizeURL bool Login bool LoginUrl string LoginJS string LoginPayload []*pb.KVP LoginParseFields bool LoginParseXpath []*pb.KVP LoginSuccessCheck *pb.KVP CheckLoginAfterEachPage bool Chrome bool ChromeBinary string DomLoadTime int32 NetworkIface string CancelOnDisconnect bool CheckContent bool Prefetch bool Callback func(*PageHTML, *CrawlJob) UsePageChan bool PageChan chan *pb.PageHTML // contains filtered or unexported fields }
func NewCrawlJob ¶
func (*CrawlJob) AddJS ¶
func (cj *CrawlJob) AddJS(typ pb.PageReqType, url, js, metaStr string) error
func (*CrawlJob) SetCallbackXpathMatch ¶
func (*CrawlJob) SetCallbackXpathRegexp ¶
func (*CrawlJob) SetLoginChrome ¶
Click to show internal directories.
Click to hide internal directories.