doubanbook

package
v0.0.0-...-278ce41 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 28, 2024 License: Apache-2.0 Imports: 7 Imported by: 0

Documentation

Index

Constants

View Source
const (
	BookListTaskName = "douban_book_list"
)

Variables

View Source
var DoubanBookTask = &spider.Task{
	Options: spider.Options{
		Name: BookListTaskName, Reload: true, WaitTime: 2, MaxDepth: 5, Cookie: cookie,
		Limit: limiter.Multi(
			rate.NewLimiter(limiter.Per(1, 3*time.Second), 1),
			rate.NewLimiter(limiter.Per(20, 60*time.Second), 20),
		),
	},
	Rule: spider.RuleTree{
		Root: func() ([]*spider.Request, error) {
			roots := []*spider.Request{
				{
					Priority: 1,
					URL:      bookURL,
					Method:   "GET",
					RuleName: "数据tag",
				},
			}

			return roots, nil
		},
		Trunk: map[string]*spider.Rule{
			"数据tag": {ParseFunc: ParseTag},
			"书籍列表":  {ParseFunc: ParseBookList},
			"书籍简介": {
				ItemFields: []string{
					"书名",
					"作者",
					"页数",
					"出版社",
					"得分",
					"价格",
					"简介",
				},
				ParseFunc: ParseBookDetail,
			},
		},
	},
}

Functions

func ExtraString

func ExtraString(contents []byte, reg *regexp.Regexp) string

func ParseBookDetail

func ParseBookDetail(ctx *spider.Context) (spider.ParseResult, error)

func ParseBookList

func ParseBookList(ctx *spider.Context) (spider.ParseResult, error)

func ParseTag

func ParseTag(ctx *spider.Context) (spider.ParseResult, error)

Types

This section is empty.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL