Documentation ¶
Index ¶
- Variables
- func AddJsReq(jreq map[string]interface{}) []*spider.Request
- func AddJsReqs(jsreqs []map[string]interface{}) []*spider.Request
- func GetFields(taskName string, ruleName string) []string
- type Config
- type Crawler
- func (c *Crawler) CreateWork()
- func (c *Crawler) HandleFailure(req *spider.Request)
- func (c *Crawler) HandleResult()
- func (c *Crawler) HasVisited(r *spider.Request) bool
- func (c *Crawler) Run(id string, cluster bool)
- func (c *Crawler) Schedule()
- func (c *Crawler) StoreVisited(reqs ...*spider.Request)
- type CrawlerStore
- type Option
- func WithFetcher(fetcher spider.Fetcher) Option
- func WithLogger(logger *zap.Logger) Option
- func WithRegistryUrl(url string) Option
- func WithScheduler(scheduler Scheduler) Option
- func WithSeeds(seed []*spider.Task) Option
- func WithStorage(s spider.Storage) Option
- func WithWorkCount(workCount int) Option
- type Schedule
- type Scheduler
Constants ¶
This section is empty.
Variables ¶
View Source
var Store = &CrawlerStore{ list: []*spider.Task{}, Hash: map[string]*spider.Task{}, }
Functions ¶
Types ¶
type Config ¶
type Config struct { WorkCount int Fetcher spider.Fetcher Logger *zap.Logger Seeds []*spider.Request }
Config 配置选项
type Crawler ¶
type Crawler struct { Visited map[string]bool VisitedLock sync.Mutex // contains filtered or unexported fields }
func (*Crawler) CreateWork ¶
func (c *Crawler) CreateWork()
func (*Crawler) HandleFailure ¶
func (*Crawler) HandleResult ¶
func (c *Crawler) HandleResult()
func (*Crawler) StoreVisited ¶
type CrawlerStore ¶
func (*CrawlerStore) Add ¶
func (c *CrawlerStore) Add(task *spider.Task)
func (*CrawlerStore) AddJsTask ¶
func (c *CrawlerStore) AddJsTask(m *spider.TaskModule)
type Option ¶
type Option func(opts *options)
func WithFetcher ¶
func WithLogger ¶
func WithRegistryUrl ¶
func WithScheduler ¶
func WithStorage ¶
func WithWorkCount ¶
Click to show internal directories.
Click to hide internal directories.