pholcus_lib

package
v0.0.0-...-fbc1b07 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 4, 2022 License: Apache-2.0 Imports: 6 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var Miyabaobei = &Spider{
	Name:        "蜜芽宝贝",
	Description: "蜜芽宝贝商品数据 [Auto Page] [www.miyabaobei.com]",

	EnableCookie: false,
	RuleTree: &RuleTree{
		Root: func(ctx *Context) {
			ctx.AddQueue(&request.Request{Url: "http://www.miyabaobei.com/", Rule: "获取版块URL"})
		},

		Trunk: map[string]*Rule{

			"获取版块URL": {
				ParseFunc: func(ctx *Context) {
					query := ctx.GetDom()
					lis := query.Find(".ccon")
					lis.Each(func(i int, s *goquery.Selection) {
						s.Find("a").Each(func(n int, ss *goquery.Selection) {
							if url, ok := ss.Attr("href"); ok {
								if !strings.Contains(url, "http://www.miyabaobei.com") {
									url = "http://www.miyabaobei.com" + url
								}
								ctx.Aid(map[string]interface{}{
									"loop":    [2]int{0, 1},
									"urlBase": url,
									"req": map[string]interface{}{
										"Rule": "生成请求",
										"Temp": map[string]interface{}{"baseUrl": url},
									},
								}, "生成请求")
							}
						})
					})
				},
			},

			"生成请求": {
				AidFunc: func(ctx *Context, aid map[string]interface{}) interface{} {
					req := aid["req"].(*request.Request)
					for loop := aid["loop"].([2]int); loop[0] < loop[1]; loop[0]++ {
						req.Url = aid["urlBase"].(string) + "&per_page=" + strconv.Itoa(loop[0]*40)
						ctx.AddQueue(req)
					}
					return nil
				},
				ParseFunc: func(ctx *Context) {
					query := ctx.GetDom()
					totalPage := "1"

					urls := query.Find(".Lpage.page p a")

					if urls.Length() != 0 {
						if urls.Last().Text() == ">" {
							totalPage = urls.Eq(urls.Length() - 2).Text()
						} else {
							totalPage = urls.Last().Text()
						}
					}
					total, _ := strconv.Atoi(totalPage)

					ctx.Aid(map[string]interface{}{
						"loop":     [2]int{1, total},
						"ruleBase": ctx.GetTemp("baseUrl", "").(string),
						"rep": map[string]interface{}{
							"Rule": "商品列表",
						},
					})

					ctx.Parse("商品列表")
				},
			},

			"商品列表": {

				ItemFields: []string{
					"标题",
					"价格",
					"类别",
				},
				ParseFunc: func(ctx *Context) {
					query := ctx.GetDom()

					goodsType := query.Find(".crumbs").Text()
					re, _ := regexp.Compile("\\s")
					goodsType = re.ReplaceAllString(goodsType, "")
					re, _ = regexp.Compile("蜜芽宝贝>")
					goodsType = re.ReplaceAllString(goodsType, "")
					query.Find(".bmfo").Each(func(i int, s *goquery.Selection) {

						title, _ := s.Find("p a").First().Attr("title")

						price := s.Find(".f20").Text()

						ctx.Output(map[int]interface{}{
							0: title,
							1: price,
							2: goodsType,
						})
					})
				},
			},
		},
	},
}

Functions

This section is empty.

Types

This section is empty.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL