Documentation ¶
Index ¶
- Constants
- Variables
- type BaseSpiderConf
- type Context
- type MiddlewareHandler
- type MiddlewareHandlerErr
- type Option
- type Resource
- type Schedule
- type Spider
- func (s *Spider) SetConcurrent(num int) *Spider
- func (s *Spider) SetGlobalPreRun(f MiddlewareHandlerErr) *Spider
- func (s *Spider) SetRangeTime(sleepTime int) *Spider
- func (s *Spider) SetRules(key string, h ...MiddlewareHandler) *Spider
- func (s *Spider) SetTimeTicker(num int) *Spider
- func (s *Spider) Start()
- func (s *Spider) Stop()
Constants ¶
View Source
const ( DefaultType = "default" RestyType = "resty" ImocType = "imroc" )
Variables ¶
View Source
var Sc = NewSchedule()
Functions ¶
This section is empty.
Types ¶
type BaseSpiderConf ¶
type BaseSpiderConf struct { EnableCookie bool EnableProxy bool ProxyUrl string DownloaderType string Cookie *cookiejar.Jar }
BaseSpiderConf 基础爬虫配置
type Context ¶
type MiddlewareHandler ¶
type MiddlewareHandler func(ctx *Context)
type MiddlewareHandlerErr ¶
type Option ¶
type Option func(b *BaseSpiderConf)
func NewCookieJar ¶
func NewDownloader ¶
func NewProxyUrl ¶
type Resource ¶
type Resource struct { SpiderUniqueKey string *downloader.Request // contains filtered or unexported fields }
type Schedule ¶
type Schedule struct { ResourcePoolList chan Resource ConcurrentNum int // 并发数量 // contains filtered or unexported fields }
func NewSchedule ¶
func NewSchedule() *Schedule
func (*Schedule) AddResource ¶
type Spider ¶
type Spider struct { UniqueKey string // 唯一标识符 STATUS uint // 状态 Downloader downloader.Downloader // 下载器 RuleHandlers map[string][]MiddlewareHandler // 规则中间件 CloseCallback func(s *Spider) // 回调关闭 // contains filtered or unexported fields }
func (*Spider) SetConcurrent ¶
func (*Spider) SetGlobalPreRun ¶
func (s *Spider) SetGlobalPreRun(f MiddlewareHandlerErr) *Spider
func (*Spider) SetRangeTime ¶
func (*Spider) SetRules ¶
func (s *Spider) SetRules(key string, h ...MiddlewareHandler) *Spider
SetRules 设置爬虫key=规则名
func (*Spider) SetTimeTicker ¶
SetTimeTicker 设置探活时间 默认十秒
Click to show internal directories.
Click to hide internal directories.