pholcus_lib

package
v0.0.0-...-71bf9ba Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 28, 2020 License: Apache-2.0 Imports: 8 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var Hollandandbarrett = &Spider{
	Name:        "Hollandandbarrett",
	Description: "Hollandand&Barrett商品数据 [Auto Page] [www.Hollandandbarrett.com]",

	EnableCookie: false,
	RuleTree: &RuleTree{
		Root: func(ctx *Context) {
			ctx.AddQueue(&request.Request{
				Url:  "http://www.hollandandbarrett.com/",
				Rule: "获取版块URL",
			},
			)
		},

		Trunk: map[string]*Rule{

			"获取版块URL": {
				ParseFunc: func(ctx *Context) {
					query := ctx.GetDom()
					lis := query.Find(".footer-links nav.l-one-half a")

					lis.Each(func(i int, s *goquery.Selection) {
						if url, ok := s.Attr("href"); ok {
							tit, _ := s.Attr("title")
							ctx.AddQueue(&request.Request{
								Url:  "http://www.hollandandbarrett.com" + url + "?showAll=1&pageHa=1&es=true&vm=grid&imd=true&format=json&single=true",
								Rule: "获取总数",
								Temp: map[string]interface{}{
									"type":    tit,
									"baseUrl": url,
								},
							},
							)
						}
					})
				},
			},

			"获取总数": {
				ParseFunc: func(ctx *Context) {

					query := ctx.GetDom()

					re, _ := regexp.Compile(`(?U)"totalNumRecs":[\d]+,`)
					total := re.FindString(query.Text())
					re, _ = regexp.Compile(`[\d]+`)
					total = re.FindString(total)
					total = strings.Trim(total, " \t\n")

					if total == "0" {
						logs.Log.Critical("[消息提示:| 任务:%v | 关键词:%v | 规则:%v] 没有抓取到任何数据!!!\n", ctx.GetName(), ctx.GetKeyin(), ctx.GetRuleName())
					} else {

						ctx.AddQueue(&request.Request{
							Url:  "http://www.hollandandbarrett.com" + ctx.GetTemp("baseUrl", "").(string) + "?showAll=" + total + "&pageHa=1&es=true&vm=grid&imd=true&format=json&single=true",
							Rule: "商品详情",
							Temp: map[string]interface{}{
								"type": ctx.GetTemp("type", "").(string),
							},
						},
						)

					}
				},
			},

			"商品详情": {

				ItemFields: []string{
					"标题",
					"原价",
					"折后价",
					"打折",
					"星级",
					"分类",
				},
				ParseFunc: func(ctx *Context) {
					query := ctx.GetDom()

					src := query.Text()

					infos := map[string]interface{}{}

					err := json.Unmarshal([]byte(src), &infos)

					if err != nil {
						logs.Log.Error("error is %v\n", err)
						return
					} else {
						for _, info1 := range infos["contents"].([]interface{})[0].(map[string]interface{})["mainContent"].([]interface{})[0].(map[string]interface{})["records"].([]interface{}) {

							info2 := info1.(map[string]interface{})["records"].([]interface{})[0].(map[string]interface{})["attributes"].(map[string]interface{})

							var n, price1, price2, prm, level string

							if info2["Name"] == nil {
								n = ""
							} else {
								n = fmt.Sprint(info2["Name"])
								n = strings.TrimRight(n, "]")
								n = strings.TrimLeft(n, "[")
							}

							if info2["lp"] == nil {
								price1 = ""
							} else {
								price1 = fmt.Sprint(info2["lp"])
								price1 = strings.TrimRight(price1, "]")
								price1 = strings.TrimLeft(price1, "[")
							}

							if info2["sp"] == nil {
								price2 = ""
							} else {
								price2 = fmt.Sprint(info2["sp"])
								price2 = strings.TrimRight(price2, "]")
								price2 = strings.TrimLeft(price2, "[")
							}

							if info2["prm"] == nil {
								prm = ""
							} else {
								prm = fmt.Sprint(info2["prm"])
								prm = strings.TrimRight(prm, "]")
								prm = strings.TrimLeft(prm, "[")
							}

							if info2["ratingCount"] == nil {
								level = "0"
							} else {
								level = fmt.Sprint(info2["ratingCount"])
								level = strings.TrimRight(level, "]")
								level = strings.TrimLeft(level, "[")
							}

							ctx.Output(map[int]interface{}{
								0: n,
								1: price1,
								2: price2,
								3: prm,
								4: level,
								5: ctx.GetTemp("type", ""),
							})
						}
					}
				},
			},
		},
	},
}

Functions

This section is empty.

Types

This section is empty.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL