Documentation ¶
Index ¶
- func NewQuotesEngine(opts ...tegenaria.EngineOption) *tegenaria.CrawlEngine
- type ExampleSpider
- func (e *ExampleSpider) ErrorHandler(err *tegenaria.Context, req chan<- *tegenaria.Context)
- func (e *ExampleSpider) GetFeedUrls() []string
- func (e *ExampleSpider) GetName() string
- func (e *ExampleSpider) Parser(resp *tegenaria.Context, req chan<- *tegenaria.Context) error
- func (e *ExampleSpider) StartRequest(req chan<- *tegenaria.Context)
- type HeadersDownloadMiddler
- type ProxyDownloadMiddler
- type QuotesbotItem
- type QuotesbotItemPipeline
- type QuotesbotItemPipeline2
- type QuotesbotItemPipeline3
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func NewQuotesEngine ¶
func NewQuotesEngine(opts ...tegenaria.EngineOption) *tegenaria.CrawlEngine
NewQuotesEngine 创建引擎
Types ¶
type ExampleSpider ¶
ExampleSpider 定义一个spider
func (*ExampleSpider) ErrorHandler ¶
func (e *ExampleSpider) ErrorHandler(err *tegenaria.Context, req chan<- *tegenaria.Context)
ErrorHandler 异常处理函数,用于处理数据抓取过程中出现的错误
func (*ExampleSpider) GetFeedUrls ¶
func (e *ExampleSpider) GetFeedUrls() []string
GetFeedUrls 获取种子urls
func (*ExampleSpider) StartRequest ¶
func (e *ExampleSpider) StartRequest(req chan<- *tegenaria.Context)
StartRequest 爬虫启动,请求种子urls
type HeadersDownloadMiddler ¶
HeadersDownloadMiddler 请求头设置下载中间件
func (HeadersDownloadMiddler) GetName ¶
func (m HeadersDownloadMiddler) GetName() string
func (HeadersDownloadMiddler) GetPriority ¶
func (m HeadersDownloadMiddler) GetPriority() int
GetPriority 获取优先级,数字越小优先级越高
func (HeadersDownloadMiddler) ProcessRequest ¶
func (m HeadersDownloadMiddler) ProcessRequest(ctx *tegenaria.Context) error
ProcessRequest 处理request请求对象 此处用于增加请求头 按优先级执行
func (HeadersDownloadMiddler) ProcessResponse ¶
func (m HeadersDownloadMiddler) ProcessResponse(ctx *tegenaria.Context, req chan<- *tegenaria.Context) error
ProcessResponse 用于处理请求成功之后的response 执行顺序你优先级,及优先级越高执行顺序越晚
type ProxyDownloadMiddler ¶
ProxyDownloadMiddler 代理挂载中间件
type QuotesbotItem ¶
QuotesbotSpider tegenaria item示例
type QuotesbotItemPipeline ¶
type QuotesbotItemPipeline struct {
Priority int
}
QuotesbotItemPipeline tegenaria.PipelinesInterface 接口示例 用于item处理的pipeline
func (*QuotesbotItemPipeline) GetPriority ¶
func (p *QuotesbotItemPipeline) GetPriority() int
GetPriority 获取该pipeline的优先级
func (*QuotesbotItemPipeline) ProcessItem ¶
func (p *QuotesbotItemPipeline) ProcessItem(spider tegenaria.SpiderInterface, item *tegenaria.ItemMeta) error
ProcessItem item处理函数
type QuotesbotItemPipeline2 ¶
type QuotesbotItemPipeline2 struct {
Priority int
}
func (*QuotesbotItemPipeline2) GetPriority ¶
func (p *QuotesbotItemPipeline2) GetPriority() int
func (*QuotesbotItemPipeline2) ProcessItem ¶
func (p *QuotesbotItemPipeline2) ProcessItem(spider tegenaria.SpiderInterface, item *tegenaria.ItemMeta) error
type QuotesbotItemPipeline3 ¶
type QuotesbotItemPipeline3 struct {
Priority int
}
func (*QuotesbotItemPipeline3) GetPriority ¶
func (p *QuotesbotItemPipeline3) GetPriority() int
func (*QuotesbotItemPipeline3) ProcessItem ¶
func (p *QuotesbotItemPipeline3) ProcessItem(spider tegenaria.SpiderInterface, item *tegenaria.ItemMeta) error
Click to show internal directories.
Click to hide internal directories.