pholcus_lib

package
v0.0.0-...-6df184f Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 31, 2018 License: Apache-2.0 Imports: 4 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var Zolpc = &Spider{
	Name:        "中关村笔记本",
	Description: "中关村笔记本数据 [Auto Page] [bbs.zol.com.cn/sjbbs/d544_p]",

	EnableCookie: false,
	RuleTree: &RuleTree{
		Root: func(ctx *Context) {
			ctx.Aid(map[string]interface{}{"loop": [2]int{1, 720}, "Rule": "生成请求"}, "生成请求")
		},

		Trunk: map[string]*Rule{

			"生成请求": {
				AidFunc: func(ctx *Context, aid map[string]interface{}) interface{} {
					for loop := aid["loop"].([2]int); loop[0] < loop[1]; loop[0]++ {
						ctx.AddQueue(&request.Request{
							Url:  "http://bbs.zol.com.cn/nbbbs/p" + strconv.Itoa(loop[0]) + ".html#c",
							Rule: aid["Rule"].(string),
						})
					}
					return nil
				},
				ParseFunc: func(ctx *Context) {
					query := ctx.GetDom()
					ss := query.Find("tbody").Find("tr[id]")
					ss.Each(func(i int, goq *goquery.Selection) {
						ctx.SetTemp("html", goq)
						ctx.Parse("获取结果")
					})
				},
			},

			"获取结果": {

				ItemFields: []string{
					"机型",
					"链接",
					"主题",
					"发表者",
					"发表时间",
					"总回复",
					"总查看",
					"最后回复者",
					"最后回复时间",
				},
				ParseFunc: func(ctx *Context) {
					var selectObj = ctx.GetTemp("html", &goquery.Selection{}).(*goquery.Selection)

					outUrls := selectObj.Find("td").Eq(1)
					outUrl, _ := outUrls.Attr("data-url")
					outUrl = "http://bbs.zol.com.cn/" + outUrl

					outTitles := selectObj.Find("td").Eq(1)
					outType := outTitles.Find(".iclass a").Text()
					outTitle := outTitles.Find("div a").Text()

					authors := selectObj.Find("td").Eq(2)
					author := authors.Find("a").Text()
					stime := authors.Find("span").Text()

					replys := selectObj.Find("td").Eq(3)
					reply := replys.Find("span").Text()
					read := replys.Find("i").Text()

					etimes := selectObj.Find("td").Eq(4)
					ereply := etimes.Find("a").Eq(0).Text()
					etime := etimes.Find("a").Eq(1).Text()

					ctx.Output(map[int]interface{}{
						0: outType,
						1: outUrl,
						2: outTitle,
						3: author,
						4: stime,
						5: reply,
						6: read,
						7: ereply,
						8: etime,
					})
				},
			},
		},
	},
}

Functions

This section is empty.

Types

This section is empty.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL