pholcus_list

package
v0.0.0-...-9141c74 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 6, 2020 License: MIT Imports: 9 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var FileTest = &Spider{
	Name:        "应用宝-动态滚动",
	Description: "抓取应用宝列表和动态滚动标签",

	EnableCookie: false,
	RuleTree: &RuleTree{
		Root: func(ctx *Context) {
			ctx.AddQueue(&request.Request{
				Url:  "https://sj.qq.com/myapp/category.htm?orgame=1",
				Rule: "应用列表",
			})
		},

		Trunk: map[string]*Rule{

			"应用列表": {
				ItemFields: []string{
					"名称",
					"大小",
					"下载量",
					"链接",
					"标志",
				},

				ParseFunc: func(ctx *Context) {

					flag.Parse()
					if *flagDevToolWsUrl == "" {
						log.Fatal("must specify -devtools-ws-url")
					}

					allocatorContext, cancel := chromedp.NewRemoteAllocator(context.Background(), *flagDevToolWsUrl)
					defer cancel()

					ctxt, cancel := chromedp.NewContext(allocatorContext)
					defer cancel()

					sel := `.load-more-btn`

					if err := chromedp.Run(ctxt, chromedp.Navigate(`https://sj.qq.com/myapp/category.htm?orgame=1`)); err != nil {
						log.Printf("could not navigate to qq: %v", err)
					}

					if err := chromedp.Run(ctxt, chromedp.WaitVisible(sel)); err != nil {
						log.Printf("could not get section: %v", err)
					}

					for i := 0; i < 5; i++ {
						if err := chromedp.Run(ctxt, chromedp.ScrollIntoView(sel)); err != nil {
							log.Printf("could not scroll to section: %v", err)
						}
						time.Sleep(time.Second * 2)
						log.Printf("Get a srcoll.")
					}

					var lastText string
					if err := chromedp.Run(ctxt, chromedp.Text(sel, &lastText)); err != nil {
						log.Printf("could not get last text: %v", err)
					}

					log.Printf("Get last mode: %s", lastText)

					query := ctx.GetDom()

					newList := query.Find(".main li")
					newList.Each(func(i int, s *goquery.Selection) {

						appTitle := s.Find(".app-info-desc a").Text()

						appSize := s.Find(".size").Text()

						longS := s.Find(".download").Text()
						longS = longS[2 : len(longS)-1]
						appDownload := strings.Trim(longS, " ")

						url, _ := s.Find(".app-info-desc a").Attr("href")

						ctx.Output(map[int]interface{}{
							0: appTitle,
							1: appSize,
							2: appDownload,
							3: url,
							4: lastText,
						})
					})
				},
			},
		},
	},
}

Functions

This section is empty.

Types

This section is empty.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL