Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var DefaultExtrator = &Extractor{ TextLineBreak: fmt.Sprintln(), }
DefaultExtrator ...
View Source
var LINEREAK = fmt.Sprintln()
Functions ¶
func GetHTMLContent ¶
GetHTMLContent fetch and cleans the raw html from article
func GetTextContent ¶
func GetTextContent(articleContent *goquery.Selection, customRender *TextRenderers) string
GetTextContent fetch and cleans the text from article
Types ¶
type Article ¶
type Article struct { URL string `json:"url"` Meta Metadata `json:"meta"` Text string `json:"text"` HTML string `json:"html"` Images []string `json:"images"` }
Article is the content of an URL
func FromReader ¶
FromReader get readable content from the specified io.Reader
type Extractor ¶
type Extractor struct { TextLineBreak string CustomTextRenderers *TextRenderers }
Extractor ...
func (*Extractor) FromReader ¶
FromReader get readable content from the specified io.Reader
func (*Extractor) FromReaderWithSelector ¶
type Metadata ¶
type Metadata struct { Title string `json:"title"` Image string `json:"image"` Excerpt string `json:"excerpt"` Author string `json:"author"` MinReadTime int `json:"min_read_time"` MaxReadTime int `json:"max_read_time"` }
Metadata is metadata of an article
type TextRenderers ¶
type TextRenderers struct { LineBreak string // contains filtered or unexported fields }
func NewNoobTextRenderers ¶
func NewNoobTextRenderers(lineBreak string) *TextRenderers
func NewTextRenderers ¶
func NewTextRenderers(lineBreak string) *TextRenderers
func (*TextRenderers) Register ¶
func (r *TextRenderers) Register(tag string, before, after RenderFunc) error
func (*TextRenderers) WriteLineBreak ¶
func (r *TextRenderers) WriteLineBreak(buf *bytes.Buffer)
Click to show internal directories.
Click to hide internal directories.