Documentation ¶
Index ¶
- Constants
- Variables
- func EqualToComparison(n, comparison int) bool
- func GetActionBarNode(root *html.Node) *html.Node
- func GetNodeFromLink(targetURL string) (*html.Node, error)
- func GetRListNode(root *html.Node) *html.Node
- func GreaterThanComparison(n, comparison int) bool
- func GreaterThanOrEqualToComparison(n, comparison int) bool
- func LessThanComparison(n, comparison int) bool
- func LessThanOrEqualToComparison(n, comparison int) bool
- func RemoveBottumAnnouncements(rListNode *html.Node)
- func TweetAmountStringToInt(strTweetAmount string) (intTweetAmount int)
- func WrapBoardPageLink(targetBoard, pageNum string) string
- type BaseInfo
- type BoardCrawler
- func (b *BoardCrawler) Err() error
- func (b *BoardCrawler) GetCurrPageLinkNum() int
- func (b *BoardCrawler) GetNextPageLink() string
- func (b *BoardCrawler) GetPostsInfos() (infos []*BoardInfo)
- func (b *BoardCrawler) GetPostsInfosAndArticles() []*BoardInfoAndArticle
- func (b *BoardCrawler) GetPrevPageLink() string
- type BoardInfo
- type BoardInfoAndArticle
- type IntMatcher
- type MatcherRule
- type Parser
- type PostCrawler
- func (p *PostCrawler) Err() error
- func (p *PostCrawler) GetAuthor() string
- func (p *PostCrawler) GetContent() string
- func (p *PostCrawler) GetDate() string
- func (p *PostCrawler) GetIP() (ip net.IP)
- func (p *PostCrawler) GetTitle() string
- func (p *PostCrawler) GetTweets() (tweets []*Tweet)
- func (p *PostCrawler) GetURL() (url string)
- type RuleSetting
- func SetParserAuthor(author string) RuleSetting
- func SetParserAuthorMatcher(matcher StrMatcher) RuleSetting
- func SetParserContent(content string) RuleSetting
- func SetParserContentMatcher(matcher StrMatcher) RuleSetting
- func SetParserTitle(title string) RuleSetting
- func SetParserTitleMatcher(matcher StrMatcher) RuleSetting
- func SetParserTweetAmount(tweetAmount string) RuleSetting
- type StrMatcher
- type TextRule
- type Tweet
Constants ¶
View Source
const ( TweetTagPraise = "推" TweetTagNormal = "→" TweetTagBoo = "噓" )
View Source
const ( PttBaseURL = "https://www.ptt.cc" PttBaseCrawlingURL = "https://www.ptt.cc/bbs/" DefaultParsingPage = "/index" )
Variables ¶
View Source
var ( ErrActionBarNodeNil = errors.New("pttifier.boardCrawler: action bar node is nil") ErrRListNodeNil = errors.New("pttifier.boardCrawler: R list node is nil") )
View Source
var ( MaxReConnectTimes = 5 MaxReConnectDelayTime time.Duration = 1 )
View Source
var (
ErrMainContainerNodeNil = errors.New("pttifierLib.postCrawler: main container node is nil")
)
Functions ¶
func EqualToComparison ¶
func GreaterThanComparison ¶
func LessThanComparison ¶
func TweetAmountStringToInt ¶
func WrapBoardPageLink ¶
Types ¶
type BoardCrawler ¶
type BoardCrawler struct {
// contains filtered or unexported fields
}
func NewBoardCrawler ¶
func NewBoardCrawler(root *html.Node) *BoardCrawler
func (*BoardCrawler) Err ¶
func (b *BoardCrawler) Err() error
func (*BoardCrawler) GetCurrPageLinkNum ¶
func (b *BoardCrawler) GetCurrPageLinkNum() int
func (*BoardCrawler) GetNextPageLink ¶
func (b *BoardCrawler) GetNextPageLink() string
func (*BoardCrawler) GetPostsInfos ¶
func (b *BoardCrawler) GetPostsInfos() (infos []*BoardInfo)
func (*BoardCrawler) GetPostsInfosAndArticles ¶
func (b *BoardCrawler) GetPostsInfosAndArticles() []*BoardInfoAndArticle
func (*BoardCrawler) GetPrevPageLink ¶
func (b *BoardCrawler) GetPrevPageLink() string
type BoardInfoAndArticle ¶
type IntMatcher ¶
type MatcherRule ¶
type MatcherRule struct { TitleMatcher StrMatcher AuthorMatcher StrMatcher ContentMatcher StrMatcher TweetAmountMatcher IntMatcher }
type Parser ¶
type Parser struct { TextRule MatcherRule // contains filtered or unexported fields }
func NewParser ¶
func NewParser(settings ...RuleSetting) *Parser
func (*Parser) ParsingAll ¶
func (p *Parser) ParsingAll(posts []*BoardInfoAndArticle) (results []*BoardInfoAndArticle)
func (*Parser) ParsingBoard ¶
type PostCrawler ¶
type PostCrawler struct {
// contains filtered or unexported fields
}
func NewPostCrawler ¶
func NewPostCrawler(root *html.Node) *PostCrawler
func (*PostCrawler) Err ¶
func (p *PostCrawler) Err() error
func (*PostCrawler) GetAuthor ¶
func (p *PostCrawler) GetAuthor() string
func (*PostCrawler) GetContent ¶
func (p *PostCrawler) GetContent() string
func (*PostCrawler) GetDate ¶
func (p *PostCrawler) GetDate() string
func (*PostCrawler) GetIP ¶
func (p *PostCrawler) GetIP() (ip net.IP)
func (*PostCrawler) GetTitle ¶
func (p *PostCrawler) GetTitle() string
func (*PostCrawler) GetTweets ¶
func (p *PostCrawler) GetTweets() (tweets []*Tweet)
func (*PostCrawler) GetURL ¶
func (p *PostCrawler) GetURL() (url string)
type RuleSetting ¶
type RuleSetting func(*Parser)
func SetParserAuthor ¶
func SetParserAuthor(author string) RuleSetting
func SetParserAuthorMatcher ¶
func SetParserAuthorMatcher(matcher StrMatcher) RuleSetting
func SetParserContent ¶
func SetParserContent(content string) RuleSetting
func SetParserContentMatcher ¶
func SetParserContentMatcher(matcher StrMatcher) RuleSetting
func SetParserTitle ¶
func SetParserTitle(title string) RuleSetting
func SetParserTitleMatcher ¶
func SetParserTitleMatcher(matcher StrMatcher) RuleSetting
func SetParserTweetAmount ¶
func SetParserTweetAmount(tweetAmount string) RuleSetting
type StrMatcher ¶
Click to show internal directories.
Click to hide internal directories.