Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var JDSearch = &Spider{ Name: "京东搜索", Description: "京东搜索结果 [search.jd.com]", Keyin: KEYIN, Limit: LIMIT, EnableCookie: false, RuleTree: &RuleTree{ Root: func(ctx *Context) { ctx.Aid(map[string]interface{}{"loop": [2]int{0, 1}, "Rule": "生成请求"}, "生成请求") }, Trunk: map[string]*Rule{ "生成请求": { AidFunc: func(ctx *Context, aid map[string]interface{}) interface{} { for loop := aid["loop"].([2]int); loop[0] < loop[1]; loop[0]++ { ctx.AddQueue( &request.Request{ Url: "http://search.jd.com/Search?keyin=" + ctx.GetKeyin() + "&enc=utf-8&qrst=1&rt=1&stop=1&click=&psort=&page=" + strconv.Itoa(2*loop[0]+1), Rule: aid["Rule"].(string), }, ) ctx.AddQueue( &request.Request{ Url: "http://search.jd.com/Search?keyin=" + ctx.GetKeyin() + "&enc=utf-8&qrst=1&rt=1&stop=1&click=&psort=&page=" + strconv.Itoa(2*loop[0]+2), Rule: aid["Rule"].(string), }, ) } return nil }, ParseFunc: func(ctx *Context) { query := ctx.GetDom() total1 := query.Find("#top_pagi span.text").Text() re, _ := regexp.Compile(`[\d]+$`) total1 = re.FindString(total1) total, _ := strconv.Atoi(total1) if total > ctx.GetLimit() { total = ctx.GetLimit() } else if total == 0 { logs.Log.Critical("[消息提示:| 任务:%v | KEYIN:%v | 规则:%v] 没有抓取到任何数据!!!\n", ctx.GetName(), ctx.GetKeyin(), ctx.GetRuleName()) return } ctx.Aid(map[string]interface{}{"loop": [2]int{1, total}, "Rule": "搜索结果"}) ctx.Parse("搜索结果") }, }, "搜索结果": { ItemFields: []string{ "标题", "价格", "评论数", "星级", "链接", }, ParseFunc: func(ctx *Context) { query := ctx.GetDom() query.Find("#plist .list-h:nth-child(1) > li").Each(func(i int, s *goquery.Selection) { a := s.Find(".p-name a") title := a.Text() re, _ := regexp.Compile("\\<[\\S\\s]+?\\>") title = re.ReplaceAllString(title, " ") title = strings.Trim(title, " \t\n") price, _ := s.Find("strong[data-price]").First().Attr("data-price") e := s.Find(".extra").First() discuss := e.Find("a").First().Text() re, _ = regexp.Compile(`[\d]+`) discuss = re.FindString(discuss) level, _ := e.Find(".star span[id]").First().Attr("class") level = re.FindString(level) url, _ := a.Attr("href") ctx.Output(map[int]interface{}{ 0: title, 1: price, 2: discuss, 3: level, 4: url, }) }) }, }, }, }, }
Functions ¶
This section is empty.
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.