Documentation ¶
Index ¶
- type Context
- type DataCell
- type Fetcher
- type LimitConfig
- type Option
- func WithCookie(cookie string) Option
- func WithFetcher(f Fetcher) Option
- func WithLogger(logger *zap.Logger) Option
- func WithMaxDepth(maxDepth int64) Option
- func WithName(name string) Option
- func WithReload(reload bool) Option
- func WithStorage(s Storage) Option
- func WithURL(url string) Option
- func WithWaitTime(waitTime int64) Option
- type Options
- type ParseResult
- type Property
- type Request
- type Rule
- type RuleModule
- type RuleTree
- type Storage
- type Task
- type TaskConfig
- type TaskModule
- type Temp
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Context ¶
func (*Context) OutputJs ¶
func (ctx *Context) OutputJs(reg string) ParseResult
func (*Context) ParseJsReg ¶
func (ctx *Context) ParseJsReg(name string, reg string) ParseResult
ParseJsReg parse规则
type LimitConfig ¶
type Option ¶
type Option func(opts *Options)
func WithCookie ¶
func WithFetcher ¶
func WithLogger ¶
func WithMaxDepth ¶
func WithReload ¶
func WithStorage ¶
func WithWaitTime ¶
type Options ¶
type Options struct { Name string `json:"name"` // 任务名称,应保证唯一性 URL string `json:"url"` Cookie string `json:"cookie"` WaitTime int64 `json:"wait_time"` // 随机休眠时间,秒 Reload bool `json:"reload"` // 网站是否可以重复爬取 MaxDepth int64 `json:"max_depth"` Fetcher Fetcher Storage Storage Limiter limiter.RateLimiter // contains filtered or unexported fields }
type ParseResult ¶
type ParseResult struct { Requests []*Request Items []interface{} }
type Request ¶
type Request struct { Method string Task *Task URL string Depth int64 Priority int64 RuleName string TmpData *Temp }
Request 单个任务请求
type Rule ¶
type Rule struct { ItemFields []string ParseFunc func(*Context) (ParseResult, error) // 内容解析函数 }
Rule 采集规则节点
type RuleModule ¶
type RuleTree ¶
type RuleTree struct { Root func() ([]*Request, error) // 根节点(执行入口) Trunk map[string]*Rule // 规则哈希表 }
RuleTree 采集规则树
type Task ¶
type Task struct { Visited map[string]bool //是否爬过该网站 VisitedLock sync.Mutex Rule RuleTree Closed bool // 用于标识任务已经删除 Options }
Task 一个任务实例
type TaskConfig ¶
type TaskModule ¶
type TaskModule struct { Property Root string `json:"root_script"` Rules []RuleModule `json:"rule"` }
Click to show internal directories.
Click to hide internal directories.