Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var Miyabaobei = &Spider{ Name: "蜜芽宝贝", Description: "蜜芽宝贝商品数据 [Auto Page] [www.miyabaobei.com]", EnableCookie: false, RuleTree: &RuleTree{ Root: func(ctx *Context) { ctx.AddQueue(&request.Request{Url: "http://www.miyabaobei.com/", Rule: "获取版块URL"}) }, Trunk: map[string]*Rule{ "获取版块URL": { ParseFunc: func(ctx *Context) { query := ctx.GetDom() lis := query.Find(".ccon") lis.Each(func(i int, s *goquery.Selection) { s.Find("a").Each(func(n int, ss *goquery.Selection) { if url, ok := ss.Attr("href"); ok { if !strings.Contains(url, "http://www.miyabaobei.com") { url = "http://www.miyabaobei.com" + url } ctx.Aid(map[string]interface{}{ "loop": [2]int{0, 1}, "urlBase": url, "req": map[string]interface{}{ "Rule": "生成请求", "Temp": map[string]interface{}{"baseUrl": url}, }, }, "生成请求") } }) }) }, }, "生成请求": { AidFunc: func(ctx *Context, aid map[string]interface{}) interface{} { req := aid["req"].(*request.Request) for loop := aid["loop"].([2]int); loop[0] < loop[1]; loop[0]++ { req.Url = aid["urlBase"].(string) + "&per_page=" + strconv.Itoa(loop[0]*40) ctx.AddQueue(req) } return nil }, ParseFunc: func(ctx *Context) { query := ctx.GetDom() totalPage := "1" urls := query.Find(".Lpage.page p a") if urls.Length() != 0 { if urls.Last().Text() == ">" { totalPage = urls.Eq(urls.Length() - 2).Text() } else { totalPage = urls.Last().Text() } } total, _ := strconv.Atoi(totalPage) ctx.Aid(map[string]interface{}{ "loop": [2]int{1, total}, "ruleBase": ctx.GetTemp("baseUrl", "").(string), "rep": map[string]interface{}{ "Rule": "商品列表", }, }) ctx.Parse("商品列表") }, }, "商品列表": { ItemFields: []string{ "标题", "价格", "类别", }, ParseFunc: func(ctx *Context) { query := ctx.GetDom() goodsType := query.Find(".crumbs").Text() re, _ := regexp.Compile("\\s") goodsType = re.ReplaceAllString(goodsType, "") re, _ = regexp.Compile("蜜芽宝贝>") goodsType = re.ReplaceAllString(goodsType, "") query.Find(".bmfo").Each(func(i int, s *goquery.Selection) { title, _ := s.Find("p a").First().Attr("title") price := s.Find(".f20").Text() ctx.Output(map[int]interface{}{ 0: title, 1: price, 2: goodsType, }) }) }, }, }, }, }
Functions ¶
This section is empty.
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.