Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var Hollandandbarrett = &Spider{ Name: "Hollandandbarrett", Description: "Hollandand&Barrett商品数据 [Auto Page] [www.Hollandandbarrett.com]", EnableCookie: false, RuleTree: &RuleTree{ Root: func(ctx *Context) { ctx.AddQueue(&request.Request{ Url: "http://www.hollandandbarrett.com/", Rule: "获取版块URL", }, ) }, Trunk: map[string]*Rule{ "获取版块URL": { ParseFunc: func(ctx *Context) { query := ctx.GetDom() lis := query.Find(".footer-links nav.l-one-half a") lis.Each(func(i int, s *goquery.Selection) { if url, ok := s.Attr("href"); ok { tit, _ := s.Attr("title") ctx.AddQueue(&request.Request{ Url: "http://www.hollandandbarrett.com" + url + "?showAll=1&pageHa=1&es=true&vm=grid&imd=true&format=json&single=true", Rule: "获取总数", Temp: map[string]interface{}{ "type": tit, "baseUrl": url, }, }, ) } }) }, }, "获取总数": { ParseFunc: func(ctx *Context) { query := ctx.GetDom() re, _ := regexp.Compile(`(?U)"totalNumRecs":[\d]+,`) total := re.FindString(query.Text()) re, _ = regexp.Compile(`[\d]+`) total = re.FindString(total) total = strings.Trim(total, " \t\n") if total == "0" { logs.Log.Critical("[消息提示:| 任务:%v | 关键词:%v | 规则:%v] 没有抓取到任何数据!!!\n", ctx.GetName(), ctx.GetKeyin(), ctx.GetRuleName()) } else { ctx.AddQueue(&request.Request{ Url: "http://www.hollandandbarrett.com" + ctx.GetTemp("baseUrl", "").(string) + "?showAll=" + total + "&pageHa=1&es=true&vm=grid&imd=true&format=json&single=true", Rule: "商品详情", Temp: map[string]interface{}{ "type": ctx.GetTemp("type", "").(string), }, }, ) } }, }, "商品详情": { ItemFields: []string{ "标题", "原价", "折后价", "打折", "星级", "分类", }, ParseFunc: func(ctx *Context) { query := ctx.GetDom() src := query.Text() infos := map[string]interface{}{} err := json.Unmarshal([]byte(src), &infos) if err != nil { logs.Log.Error("error is %v\n", err) return } else { for _, info1 := range infos["contents"].([]interface{})[0].(map[string]interface{})["mainContent"].([]interface{})[0].(map[string]interface{})["records"].([]interface{}) { info2 := info1.(map[string]interface{})["records"].([]interface{})[0].(map[string]interface{})["attributes"].(map[string]interface{}) var n, price1, price2, prm, level string if info2["Name"] == nil { n = "" } else { n = fmt.Sprint(info2["Name"]) n = strings.TrimRight(n, "]") n = strings.TrimLeft(n, "[") } if info2["lp"] == nil { price1 = "" } else { price1 = fmt.Sprint(info2["lp"]) price1 = strings.TrimRight(price1, "]") price1 = strings.TrimLeft(price1, "[") } if info2["sp"] == nil { price2 = "" } else { price2 = fmt.Sprint(info2["sp"]) price2 = strings.TrimRight(price2, "]") price2 = strings.TrimLeft(price2, "[") } if info2["prm"] == nil { prm = "" } else { prm = fmt.Sprint(info2["prm"]) prm = strings.TrimRight(prm, "]") prm = strings.TrimLeft(prm, "[") } if info2["ratingCount"] == nil { level = "0" } else { level = fmt.Sprint(info2["ratingCount"]) level = strings.TrimRight(level, "]") level = strings.TrimLeft(level, "[") } ctx.Output(map[int]interface{}{ 0: n, 1: price1, 2: price2, 3: prm, 4: level, 5: ctx.GetTemp("type", ""), }) } } }, }, }, }, }
Functions ¶
This section is empty.
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.