Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var BaiduNews = &Spider{ Name: "百度RSS新闻", Description: "百度RSS新闻,实现轮询更新 [Auto Page] [news.baidu.com]", EnableCookie: false, Namespace: nil, SubNamespace: func(self *Spider, dataCell map[string]interface{}) string { return dataCell["Data"].(map[string]interface{})["分类"].(string) }, RuleTree: &RuleTree{ Root: func(ctx *Context) { for k := range rss_BaiduNews { ctx.SetTimer(k, time.Minute*5, nil) ctx.Aid(map[string]interface{}{"loop": k}, "LOOP") } }, Trunk: map[string]*Rule{ "LOOP": { AidFunc: func(ctx *Context, aid map[string]interface{}) interface{} { k := aid["loop"].(string) v := rss_BaiduNews[k] ctx.AddQueue(&request.Request{ Url: v, Rule: "XML列表页", Header: http.Header{"Content-Type": []string{"application/xml"}}, Temp: map[string]interface{}{"src": k}, Reloadable: true, }) return nil }, }, "XML列表页": { ParseFunc: func(ctx *Context) { var src = ctx.GetTemp("src", "").(string) defer func() { ctx.RunTimer(src) ctx.Aid(map[string]interface{}{"loop": src}, "LOOP") }() page := ctx.GetText() rss := new(BaiduNewsRss) if err := xml.Unmarshal([]byte(page), rss); err != nil { logs.Log.Error("XML列表页: %v", err) return } content := rss.Channel for _, v := range content.Item { ctx.AddQueue(&request.Request{ Url: v.Link, Rule: "新闻详情", Temp: map[string]interface{}{ "title": CleanHtml(v.Title, 4), "description": CleanHtml(v.Description, 4), "src": src, "releaseTime": CleanHtml(v.PubDate, 4), "author": CleanHtml(v.Author, 4), }, }) } }, }, "新闻详情": { ItemFields: []string{ "标题", "描述", "内容", "发布时间", "分类", "作者", }, ParseFunc: func(ctx *Context) { var title = ctx.GetTemp("title", "").(string) infoStr, isReload := baiduNewsFn.prase(ctx) if isReload { return } ctx.Output(map[int]interface{}{ 0: title, 1: ctx.GetTemp("description", ""), 2: infoStr, 3: ctx.GetTemp("releaseTime", ""), 4: ctx.GetTemp("src", ""), 5: ctx.GetTemp("author", ""), }) }, }, }, }, }
Functions ¶
This section is empty.
Types ¶
type BaiduNewsData ¶
type BaiduNewsData struct {
Item []BaiduNewsItem `xml:"item"`
}
type BaiduNewsItem ¶
type BaiduNewsRss ¶
type BaiduNewsRss struct {
Channel BaiduNewsData `xml:"channel"`
}
Click to show internal directories.
Click to hide internal directories.