Documentation ¶
Overview ¶
Package htmlquery provides extract data from HTML documents using XPath expression.
Index ¶
- Variables
- func ExistsAttr(n *html.Node, name string) bool
- func Find(top *html.Node, expr string) []*html.Node
- func FindOne(top *html.Node, expr string) *html.Node
- func InnerText(n *html.Node) string
- func LoadDoc(path string) (*html.Node, error)
- func LoadHTMLDocument(htmlText any) (*html.Node, error)
- func LoadURL(url string) (*html.Node, error)
- func OutputHTML(n *html.Node, self bool) string
- func Parse(r io.Reader) (*html.Node, error)
- func Query(top *html.Node, expr string) (*html.Node, error)
- func QueryAll(top *html.Node, expr string) ([]*html.Node, error)
- func QuerySelector(top *html.Node, selector *xpath.Expr) *html.Node
- func QuerySelectorAll(top *html.Node, selector *xpath.Expr) []*html.Node
- func SelectAttr(n *html.Node, name string) (val string)
- type NodeNavigator
- func (h *NodeNavigator) Copy() xpath.NodeNavigator
- func (h *NodeNavigator) Current() *html.Node
- func (h *NodeNavigator) LocalName() string
- func (h *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool
- func (h *NodeNavigator) MoveToChild() bool
- func (h *NodeNavigator) MoveToFirst() bool
- func (h *NodeNavigator) MoveToNext() bool
- func (h *NodeNavigator) MoveToNextAttribute() bool
- func (h *NodeNavigator) MoveToParent() bool
- func (h *NodeNavigator) MoveToPrevious() bool
- func (h *NodeNavigator) MoveToRoot()
- func (h *NodeNavigator) NodeType() xpath.NodeType
- func (*NodeNavigator) Prefix() string
- func (h *NodeNavigator) String() string
- func (h *NodeNavigator) Value() string
Constants ¶
This section is empty.
Variables ¶
var DisableSelectorCache = false
DisableSelectorCache will disable caching for the query selector if value is true.
var Exports = map[string]interface{}{ "LoadHTMLDocument": LoadHTMLDocument, "Find": Find, "FindOne": FindOne, "QueryAll": QueryAll, "Query": Query, "InnerText": InnerText, "SelectAttr": SelectAttr, "ExistedAttr": ExistsAttr, "CreateXPathNavigator": CreateXPathNavigator, "OutputHTML": outputHTML, "OutputHTMLSelf": outputHTMLSelf, }
var SelectorCacheMaxEntries = 50
SelectorCacheMaxEntries allows how many selector object can be caching. Default is 50. Will disable caching if SelectorCacheMaxEntries <= 0.
Functions ¶
func ExistsAttr ¶
ExistsAttr 判断传入节点是否存在指定名称的属性并返回布尔值 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) node = xpath.FindOne(doc, "//div[@class='content']") existed = xpath.ExistsAttr(node, "class") // true ```
func Find ¶
Find 根据传入的 XPath 表达式从传入的节点开始查找匹配的节点,返回节点数组 如果表达式解析出错会 panic Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) nodes = xpath.Find(doc, "//div[@class='content']/text()") ```
func FindOne ¶
FindOne 根据传入的 XPath 表达式从传入的节点开始查找第一个匹配的节点 如果表达式解析出错会 panic Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) node = xpath.FindOne(doc, "//div[@class='content']/text()") ```
func InnerText ¶
InnerText 返回指定节点及其子节点的字符串 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) node = xpath.FindOne(doc, "//div[@class='content']") text = xpath.InnerText(node) ```
func LoadHTMLDocument ¶ added in v1.3.1
LoadHTMLDocument 解析传入的 HTML 文本,返回根节点结构体引用与错误 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) ```
func OutputHTML ¶
OutputHTML returns the text including tags name.
func Query ¶
Query 根据传入的 XPath 表达式从传入的节点开始查找第一个匹配的节点,返回节点与错误 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) node, err = xpath.Query(doc, "//div[@class='content']/text()") ```
func QueryAll ¶
QueryAll 根据传入的 XPath 表达式从传入的节点开始查找匹配的节点,返回节点数组与错误 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) nodes, err = xpath.QueryAll(doc, "//div[@class='content']/text()") ```
func QuerySelector ¶
QuerySelector returns the first matched html.Node by the specified XPath selector.
func QuerySelectorAll ¶
QuerySelectorAll searches all of the html.Node that matches the specified XPath selectors.
Types ¶
type NodeNavigator ¶
type NodeNavigator struct {
// contains filtered or unexported fields
}
func CreateXPathNavigator ¶
func CreateXPathNavigator(top *html.Node) *NodeNavigator
CreateXPathNavigator 根据传入的节点创建一个新的 XPath 导航器,使用该导航器的方法来遍历该节点及其子节点 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) nav = xpath.CreateXPathNavigator(doc) nav.MoveToChild() println(nav.String()) ```
func (*NodeNavigator) Copy ¶
func (h *NodeNavigator) Copy() xpath.NodeNavigator
func (*NodeNavigator) Current ¶
func (h *NodeNavigator) Current() *html.Node
func (*NodeNavigator) LocalName ¶
func (h *NodeNavigator) LocalName() string
func (*NodeNavigator) MoveTo ¶
func (h *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool
func (*NodeNavigator) MoveToChild ¶
func (h *NodeNavigator) MoveToChild() bool
func (*NodeNavigator) MoveToFirst ¶
func (h *NodeNavigator) MoveToFirst() bool
func (*NodeNavigator) MoveToNext ¶
func (h *NodeNavigator) MoveToNext() bool
func (*NodeNavigator) MoveToNextAttribute ¶
func (h *NodeNavigator) MoveToNextAttribute() bool
func (*NodeNavigator) MoveToParent ¶
func (h *NodeNavigator) MoveToParent() bool
func (*NodeNavigator) MoveToPrevious ¶
func (h *NodeNavigator) MoveToPrevious() bool
func (*NodeNavigator) MoveToRoot ¶
func (h *NodeNavigator) MoveToRoot()
func (*NodeNavigator) NodeType ¶
func (h *NodeNavigator) NodeType() xpath.NodeType
func (*NodeNavigator) Prefix ¶
func (*NodeNavigator) Prefix() string
func (*NodeNavigator) String ¶
func (h *NodeNavigator) String() string
func (*NodeNavigator) Value ¶
func (h *NodeNavigator) Value() string