Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var JDSpider = &Spider{ Name: "京东搜索new", Description: "京东搜索结果 [search.jd.com]", Keyin: KEYIN, Limit: LIMIT, EnableCookie: false, RuleTree: &RuleTree{ Root: func(ctx *Context) { ctx.Aid(map[string]interface{}{"Rule": "判断页数"}, "判断页数") }, Trunk: map[string]*Rule{ "判断页数": { AidFunc: func(ctx *Context, aid map[string]interface{}) interface{} { ctx.AddQueue( &request.Request{ Url: "http://search.jd.com/Search?keyword=" + ctx.GetKeyin() + "&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&bs=1&s=1&click=0&page=1", Rule: aid["Rule"].(string), }, ) return nil }, ParseFunc: func(ctx *Context) { query := ctx.GetDom() pageCount := 0 query.Find("script").Each(func(i int, s *goquery.Selection) { if strings.Contains(s.Text(), "page_count") { re, _ := regexp.Compile(`page_count:"\d{1,}"`) temp := re.FindString(s.Text()) re, _ = regexp.Compile(`\d{1,}`) temp2 := re.FindString(temp) pageCount, _ = strconv.Atoi(temp2) } }) ctx.Aid(map[string]interface{}{"PageCount": pageCount}, "生成请求") }, }, "生成请求": { AidFunc: func(ctx *Context, aid map[string]interface{}) interface{} { pageCount := aid["PageCount"].(int) for i := 1; i < pageCount; i++ { ctx.AddQueue( &request.Request{ Url: "http://search.jd.com/Search?keyword=" + ctx.GetKeyin() + "&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&bs=1&s=1&click=0&page=" + strconv.Itoa(i*2-1), Rule: "搜索结果", }, ) ctx.AddQueue( &request.Request{ Url: "http://search.jd.com/s_new.php?keyword=" + ctx.GetKeyin() + "&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&bs=1&s=31&scrolling=y&pos=30&page=" + strconv.Itoa(i*2), Rule: "搜索结果", }, ) } return nil }, }, "搜索结果": { ItemFields: []string{ "标题", "价格", "评论数", "链接", }, ParseFunc: func(ctx *Context) { query := ctx.GetDom() query.Find(".gl-item").Each(func(i int, s *goquery.Selection) { a := s.Find(".p-name.p-name-type-2 > a") title := a.Text() re, _ := regexp.Compile("\\<[\\S\\s]+?\\>") title = re.ReplaceAllString(title, " ") title = strings.Trim(title, " \t\n") price := s.Find(".p-price > strong > i").Text() discuss := s.Find(".p-commit > strong > a").Text() url, _ := a.Attr("href") url = "http:" + url if title != "" { ctx.Output(map[int]interface{}{ 0: title, 1: price, 2: discuss, 3: url, }) } }) }, }, }, }, }
Functions ¶
This section is empty.
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.