pholcus_lib

package
v0.0.0-...-6df184f Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 31, 2018 License: Apache-2.0 Imports: 7 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var JDSearch = &Spider{
	Name:        "京东搜索",
	Description: "京东搜索结果 [search.jd.com]",

	Keyin:        KEYIN,
	Limit:        LIMIT,
	EnableCookie: false,
	RuleTree: &RuleTree{
		Root: func(ctx *Context) {
			ctx.Aid(map[string]interface{}{"loop": [2]int{0, 1}, "Rule": "生成请求"}, "生成请求")
		},

		Trunk: map[string]*Rule{

			"生成请求": {
				AidFunc: func(ctx *Context, aid map[string]interface{}) interface{} {
					for loop := aid["loop"].([2]int); loop[0] < loop[1]; loop[0]++ {
						ctx.AddQueue(
							&request.Request{
								Url:  "http://search.jd.com/Search?keyin=" + ctx.GetKeyin() + "&enc=utf-8&qrst=1&rt=1&stop=1&click=&psort=&page=" + strconv.Itoa(2*loop[0]+1),
								Rule: aid["Rule"].(string),
							},
						)
						ctx.AddQueue(
							&request.Request{
								Url:  "http://search.jd.com/Search?keyin=" + ctx.GetKeyin() + "&enc=utf-8&qrst=1&rt=1&stop=1&click=&psort=&page=" + strconv.Itoa(2*loop[0]+2),
								Rule: aid["Rule"].(string),
							},
						)
					}
					return nil
				},
				ParseFunc: func(ctx *Context) {
					query := ctx.GetDom()

					total1 := query.Find("#top_pagi span.text").Text()

					re, _ := regexp.Compile(`[\d]+$`)
					total1 = re.FindString(total1)
					total, _ := strconv.Atoi(total1)

					if total > ctx.GetLimit() {
						total = ctx.GetLimit()
					} else if total == 0 {
						logs.Log.Critical("[消息提示:| 任务:%v | KEYIN:%v | 规则:%v] 没有抓取到任何数据!!!\n", ctx.GetName(), ctx.GetKeyin(), ctx.GetRuleName())
						return
					}

					ctx.Aid(map[string]interface{}{"loop": [2]int{1, total}, "Rule": "搜索结果"})

					ctx.Parse("搜索结果")
				},
			},

			"搜索结果": {

				ItemFields: []string{
					"标题",
					"价格",
					"评论数",
					"星级",
					"链接",
				},
				ParseFunc: func(ctx *Context) {
					query := ctx.GetDom()

					query.Find("#plist .list-h:nth-child(1) > li").Each(func(i int, s *goquery.Selection) {

						a := s.Find(".p-name a")
						title := a.Text()

						re, _ := regexp.Compile("\\<[\\S\\s]+?\\>")

						title = re.ReplaceAllString(title, " ")
						title = strings.Trim(title, " \t\n")

						price, _ := s.Find("strong[data-price]").First().Attr("data-price")

						e := s.Find(".extra").First()
						discuss := e.Find("a").First().Text()
						re, _ = regexp.Compile(`[\d]+`)
						discuss = re.FindString(discuss)

						level, _ := e.Find(".star span[id]").First().Attr("class")
						level = re.FindString(level)

						url, _ := a.Attr("href")

						ctx.Output(map[int]interface{}{
							0: title,
							1: price,
							2: discuss,
							3: level,
							4: url,
						})
					})
				},
			},
		},
	},
}

Functions

This section is empty.

Types

This section is empty.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL