wxsg

package module
v0.0.0-...-f6461b9 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 10, 2024 License: MIT Imports: 13 Imported by: 0

README

wechatgocrawler

Documentation

Index

Examples

Constants

View Source
const (
	WeixinSogouUrl  = "https://weixin.sogou.com"
	SearchUrlFormat = WeixinSogouUrl + "/weixin?ie=utf8&type=%d&page=%d&query=%s"
)
View Source
const DefaultUA = "" /* 132-byte string literal not displayed */

Variables

This section is empty.

Functions

func GetArticleRealUrl

func GetArticleRealUrl(url string) (string, error)

将搜索获得的中间链接转换为文章真实链接

Types

type AccountInfo

type AccountInfo struct {
	Name     string
	Url      string
	Avatar   string
	QRCode   string
	WeixinID string
	// 可从 /websearch/weixin/pc/anti_account.jsp 可获得月发文数
	// Activity          string
	Introduction         string
	Identify             string
	LatestArticleTitle   string
	LatestArticleUrl     string
	LatestArticlePubTime time.Time
}

func SearchAccount

func SearchAccount(query string, page int) ([]AccountInfo, error)

搜狗微信 - 搜公众号

Example
resutls, err := SearchAccount("睡前消息", 1)
if err != nil {
	fmt.Println(err)
	return
}
for i, result := range resutls {
	fmt.Printf("[%d] %s (%s) %s\n", i+1, result.Name, result.WeixinID, result.Introduction)
	if result.Identify != "" {
		fmt.Println(result.Identify)
	}
	if result.LatestArticleTitle != "" {
		fmt.Printf("%s %s\n", result.LatestArticleTitle, result.LatestArticlePubTime.String())
	}
	fmt.Println()
}
Output:

type Album

type Album struct {
	Name     string
	AccName  string
	Count    string
	Articles []ArticleInfo2
	Node     *html.Node
}

话题页数据

func GetAlbumByID

func GetAlbumByID(id string, isReverse bool) (*Album, error)

通过话题 ID 访问话题页, 支持正向或反向排序

func GetAlbumByUrl

func GetAlbumByUrl(url string, isReverse bool) (*Album, error)

通过链接访问话题页, 支持正向或反向排序

Example
album, err := GetAlbumByID("2036709839434842113", false)
if err != nil {
	fmt.Println(err)
	return
}
fmt.Println(album.Name, album.AccName, album.Count)
for _, info := range album.Articles {
	fmt.Printf("%d. %s\n%s\n\n", info.Index, info.Title, info.PubTime.String())
}
Output:

func NewAlbum

func NewAlbum(node *html.Node) *Album

type AlbumInfo

type AlbumInfo struct {
	ID    string
	Name  string
	Count string
	Url   string
}

公众号文章内话题信息

type Article

type Article struct {
	URL     string
	Title   string
	Author  string
	AccName string
	PubTime time.Time
	Albums  []AlbumInfo
	Node    *html.Node
}

公众号文章数据

func GetArticleByTitle

func GetArticleByTitle(title, accName string) (*Article, error)

通过标题与公众号名访问文章, 公众号名可为空, 默认获取搜索结果中第一个完全匹配的文章

Example
article, err := GetArticleByTitle("睡前消息【2021-12-31】政府给“剩女”出“嫁妆”", "睡前消息编辑部")
if err != nil {
	fmt.Println(err)
	return
}
fmt.Println(article.String())
Output:

func GetArticleByUrl

func GetArticleByUrl(url string) (*Article, error)

通过链接访问文章, 支持文章直链或跳转链接

Example
article, err := GetArticleByUrl("https://mp.weixin.qq.com/s/qgr3OR5Xha8MWMMv0mV7_A")
if err != nil {
	fmt.Println(err)
	return
}
fmt.Println(article.Albums)
fmt.Println(article.String())
Output:

func GetLatestArticleByAccount

func GetLatestArticleByAccount(accName, weixinID string) (*Article, error)

获取公众号最新文章信息, 公众号 ID 可为空, 默认读取搜索结果中第一个完全匹配的公众号

Example
article, err := GetLatestArticleByAccount("睡前消息编辑部", "MQZstudio")
if err != nil {
	fmt.Println(err)
	return
}
fmt.Println(article.String())
Output:

func NewArticle

func NewArticle(url string, node *html.Node) *Article

func (*Article) Content

func (a *Article) Content() string

打印文章, 只保留文字和图片链接

func (*Article) String

func (a *Article) String() string

type ArticleInfo

type ArticleInfo struct {
	Title   string
	Url     string
	Preview string
	AccName string
	AccUrl  string
	PubTime time.Time
	Image   string
}

func SearchArticle

func SearchArticle(query string, page int) ([]ArticleInfo, error)

搜狗微信 - 搜文章

Example
resutls, err := SearchArticle("睡前消息【2021-12-31】", 1)
if err != nil {
	fmt.Println(err)
	return
}
for i, result := range resutls {
	fmt.Printf("[%d] %s (%s) %s\n", i+1, result.Title, result.AccName, result.PubTime.String())
	fmt.Println(result.Preview)
	fmt.Println()
}
Output:

type ArticleInfo2

type ArticleInfo2 struct {
	Index   int
	Title   string
	Url     string
	PubTime time.Time
	Image   string
}

话题页文章信息

type Client

type Client struct {
	*http.Client
	UseAgent string
}

可自定义 Client, 注意处理 cookie, 使用统一 ua, 切勿高频调用, 否则容易出现图形验证码, 对此暂未处理

var (
	DefaultClient *Client
)

func (Client) GetAlbumByID

func (c Client) GetAlbumByID(id string, isReverse bool) (album *Album, err error)

func (Client) GetAlbumByUrl

func (c Client) GetAlbumByUrl(url string, isReverse bool) (album *Album, err error)

func (Client) GetArticleByTitle

func (c Client) GetArticleByTitle(title, accName string) (*Article, error)

func (Client) GetArticleByUrl

func (c Client) GetArticleByUrl(url string) (article *Article, err error)

func (Client) GetArticleRealUrl

func (c Client) GetArticleRealUrl(url string) (url2 string, err error)

搜索获得的文章地址往往地址指向了一个中间页面, 需要拼接字符串获得真实地址

func (Client) GetLatestArticleByAccount

func (c Client) GetLatestArticleByAccount(accName, weixinID string) (*Article, error)

func (Client) SearchAccount

func (c Client) SearchAccount(query string, page int) (results []AccountInfo, err error)

func (Client) SearchArticle

func (c Client) SearchArticle(query string, page int) (results []ArticleInfo, err error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL