Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var People = &Spider{ Name: "人民网新闻抓取", Description: "人民网最新分类新闻", EnableCookie: false, RuleTree: &RuleTree{ Root: func(ctx *Context) { ctx.AddQueue(&request.Request{ Method: "GET", Url: "http://news.people.com.cn/210801/211150/index.js?cache=false", Rule: "新闻列表", }) }, Trunk: map[string]*Rule{ "新闻列表": { ParseFunc: func(ctx *Context) { str := ctx.GetText() err := json.Unmarshal([]byte(str), &news) if err != nil { log.Printf("解析错误: %v\n", err) return } newsLength := len(news.Items) for i := 0; i < newsLength; i++ { ctx.AddQueue(&request.Request{ Url: news.Items[i].Url, Rule: "热点新闻", Temp: map[string]interface{}{ "id": news.Items[i].Id, "title": news.Items[i].Title, "date": news.Items[i].Date, "newsType": news.Items[i].NodeId, }, }) } }, }, "热点新闻": { ItemFields: []string{ "ID", "标题", "内容", "类别", "ReleaseTime", }, ParseFunc: func(ctx *Context) { query := ctx.GetDom() content := query.Find("#p_content").Text() ctx.Output(map[int]interface{}{ 0: ctx.GetTemp("id", ""), 1: ctx.GetTemp("title", ""), 2: content, 3: ctx.GetTemp("newsType", ""), 4: ctx.GetTemp("date", ""), }) }, }, }, }, }
Functions ¶
This section is empty.
Types ¶
Click to show internal directories.
Click to hide internal directories.