Documentation ¶
Index ¶
Constants ¶
View Source
const ( BODY_EXPR = "" /* 224-byte string literal not displayed */ BODY_EXPR_SHORT = ".ArticleBase-Body, .post, .content, article, body" )
View Source
const ( YC_HACKERNEWS_SOURCE = "YC HACKER NEWS" MEDIUM_SOURCE = "MEDIUM" )
View Source
const (
ARTICLE = "article"
)
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Document ¶
type Document struct { Kind string `json:"kind,omitempty"` URL string `json:"url,omitempty"` Source string `json:"source,omitempty"` Title string `json:"title,omitempty"` Text string `json:"text,omitempty"` Author string `json:"author,omitempty"` PublishDate int64 `json:"created,omitempty"` Keywords []string `json:"keywords,omitempty"` Comments int `json:"comments,omitempty"` Likes int `json:"likes,omitempty"` }
type WebLoader ¶
type WebLoader struct { Config *WebLoaderConfig // contains filtered or unexported fields }
// GENERIC WEB SITE LOADER //// loader class for web links and sites the loaded content is cached
func NewDefaultNewsSitemapLoader ¶
Loads articles from https://feeds.feedburner.com/TheHackersNews that have been posted in the last N days
func NewDefaultWebTextLoader ¶
func NewDefaultWebTextLoader(config *WebLoaderConfig) *WebLoader
sitemap_url can be "" if the collector is not purposed for any specific sitemap scrapping
func NewMediumSiteLoader ¶
loades medium posts from https://medium.com/sitemap/sitemap.xml that have been modified in the last N days
func NewRedditLinkLoader ¶
func NewRedditLinkLoader() *WebLoader
func NewYCHackerNewsSiteLoader ¶
func NewYCHackerNewsSiteLoader() *WebLoader
loads story links from https://hacker-news.firebaseio.com/v0/topstories.json posted in the last N days
func (*WebLoader) LoadDocument ¶
this function will return an instance of an extracted WebArticle if the url contains an HTML body
Click to show internal directories.
Click to hide internal directories.