Documentation ¶
Index ¶
Constants ¶
View Source
const (
BookListTaskName = "douban_book_list"
)
Variables ¶
View Source
var DoubanBookTask = &spider.Task{ Options: spider.Options{ Name: BookListTaskName, Reload: true, WaitTime: 2, MaxDepth: 5, Cookie: cookie, Limit: limiter.Multi( rate.NewLimiter(limiter.Per(1, 3*time.Second), 1), rate.NewLimiter(limiter.Per(20, 60*time.Second), 20), ), }, Rule: spider.RuleTree{ Root: func() ([]*spider.Request, error) { roots := []*spider.Request{ { Priority: 1, URL: bookURL, Method: "GET", RuleName: "数据tag", }, } return roots, nil }, Trunk: map[string]*spider.Rule{ "数据tag": {ParseFunc: ParseTag}, "书籍列表": {ParseFunc: ParseBookList}, "书籍简介": { ItemFields: []string{ "书名", "作者", "页数", "出版社", "得分", "价格", "简介", }, ParseFunc: ParseBookDetail, }, }, }, }
Functions ¶
func ParseBookDetail ¶
func ParseBookDetail(ctx *spider.Context) (spider.ParseResult, error)
func ParseBookList ¶
func ParseBookList(ctx *spider.Context) (spider.ParseResult, error)
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.