Documentation ¶
Overview ¶
COPYRIGHT https://github.com/golang/tools/blob/master/cmd/html2article/conv.go
Index ¶
- Variables
- func Compress(str string) string
- func CompressHtml(str string) string
- func DecodeHtml(header http.Header, word, src string) (dst string)
- func DefCode(header http.Header, html string) string
- func NewFromHtml(htmlStr string) (ext *extractor, err error)
- func NewFromNode(doc *html.Node) (ext *extractor, err error)
- func NewFromReader(reader io.Reader) (ext *extractor, err error)
- func NewFromUrl(urlStr string) (ext *extractor, err error)
- type Article
- type Info
- type Option
- type Style
Constants ¶
This section is empty.
Variables ¶
View Source
var ( ERROR_NOTFOUND = errors.New("Content not found") DEFAULT_OPTION = &Option{ RemoveNoise: true, } )
Functions ¶
func NewFromHtml ¶
func NewFromNode ¶
func NewFromReader ¶
func NewFromUrl ¶
Types ¶
type Article ¶
type Article struct { // Basic Html string `json:"content_html"` Content string `json:"content"` Title string `json:"title"` Publishtime int64 `json:"publish_time"` // Others Images []string `json:"images"` ReadContent string `json:"read_content"` // contains filtered or unexported fields }
func (*Article) GetContentNode ¶
func (*Article) Paragraphs ¶
func (*Article) ParseImage ¶
ParseImage parse the image src to the absolute path
func (*Article) ParseReadContent ¶
func (a *Article) ParseReadContent()
ParseReadContent parse the ReadContent to be readability
type Info ¶
Click to show internal directories.
Click to hide internal directories.