htmlquery

package
v1.3.1-sp1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 6, 2024 License: AGPL-3.0 Imports: 13 Imported by: 1

Documentation

Overview

Package htmlquery provides extract data from HTML documents using XPath expression.

Index

Constants

This section is empty.

Variables

View Source
var DisableSelectorCache = false

DisableSelectorCache will disable caching for the query selector if value is true.

View Source
var Exports = map[string]interface{}{
	"LoadHTMLDocument":     LoadHTMLDocument,
	"Find":                 Find,
	"FindOne":              FindOne,
	"QueryAll":             QueryAll,
	"Query":                Query,
	"InnerText":            InnerText,
	"SelectAttr":           SelectAttr,
	"ExistedAttr":          ExistsAttr,
	"CreateXPathNavigator": CreateXPathNavigator,

	"OutputHTML":     outputHTML,
	"OutputHTMLSelf": outputHTMLSelf,
}
View Source
var SelectorCacheMaxEntries = 50

SelectorCacheMaxEntries allows how many selector object can be caching. Default is 50. Will disable caching if SelectorCacheMaxEntries <= 0.

Functions

func ExistsAttr

func ExistsAttr(n *html.Node, name string) bool

ExistsAttr 判断传入节点是否存在指定名称的属性并返回布尔值 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) node = xpath.FindOne(doc, "//div[@class='content']") existed = xpath.ExistsAttr(node, "class") // true ```

func Find

func Find(top *html.Node, expr string) []*html.Node

Find 根据传入的 XPath 表达式从传入的节点开始查找匹配的节点,返回节点数组 如果表达式解析出错会 panic Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) nodes = xpath.Find(doc, "//div[@class='content']/text()") ```

func FindOne

func FindOne(top *html.Node, expr string) *html.Node

FindOne 根据传入的 XPath 表达式从传入的节点开始查找第一个匹配的节点 如果表达式解析出错会 panic Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) node = xpath.FindOne(doc, "//div[@class='content']/text()") ```

func InnerText

func InnerText(n *html.Node) string

InnerText 返回指定节点及其子节点的字符串 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) node = xpath.FindOne(doc, "//div[@class='content']") text = xpath.InnerText(node) ```

func LoadDoc

func LoadDoc(path string) (*html.Node, error)

LoadDoc loads the HTML document from the specified file path.

func LoadHTMLDocument added in v1.3.1

func LoadHTMLDocument(htmlText any) (*html.Node, error)

LoadHTMLDocument 解析传入的 HTML 文本,返回根节点结构体引用与错误 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) ```

func LoadURL

func LoadURL(url string) (*html.Node, error)

LoadURL loads the HTML document from the specified URL.

func OutputHTML

func OutputHTML(n *html.Node, self bool) string

OutputHTML returns the text including tags name.

func Parse

func Parse(r io.Reader) (*html.Node, error)

Parse returns the parse tree for the HTML from the given Reader.

func Query

func Query(top *html.Node, expr string) (*html.Node, error)

Query 根据传入的 XPath 表达式从传入的节点开始查找第一个匹配的节点,返回节点与错误 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) node, err = xpath.Query(doc, "//div[@class='content']/text()") ```

func QueryAll

func QueryAll(top *html.Node, expr string) ([]*html.Node, error)

QueryAll 根据传入的 XPath 表达式从传入的节点开始查找匹配的节点,返回节点数组与错误 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) nodes, err = xpath.QueryAll(doc, "//div[@class='content']/text()") ```

func QuerySelector

func QuerySelector(top *html.Node, selector *xpath.Expr) *html.Node

QuerySelector returns the first matched html.Node by the specified XPath selector.

func QuerySelectorAll

func QuerySelectorAll(top *html.Node, selector *xpath.Expr) []*html.Node

QuerySelectorAll searches all of the html.Node that matches the specified XPath selectors.

func SelectAttr

func SelectAttr(n *html.Node, name string) (val string)

SelectAttr 返回传入节点指定名称的属性值 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) node = xpath.FindOne(doc, "//div[@class='content']") attr = xpath.SelectAttr(node, "class") ```

Types

type NodeNavigator

type NodeNavigator struct {
	// contains filtered or unexported fields
}

func CreateXPathNavigator

func CreateXPathNavigator(top *html.Node) *NodeNavigator

CreateXPathNavigator 根据传入的节点创建一个新的 XPath 导航器,使用该导航器的方法来遍历该节点及其子节点 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) nav = xpath.CreateXPathNavigator(doc) nav.MoveToChild() println(nav.String()) ```

func (*NodeNavigator) Copy

func (h *NodeNavigator) Copy() xpath.NodeNavigator

func (*NodeNavigator) Current

func (h *NodeNavigator) Current() *html.Node

func (*NodeNavigator) LocalName

func (h *NodeNavigator) LocalName() string

func (*NodeNavigator) MoveTo

func (h *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool

func (*NodeNavigator) MoveToChild

func (h *NodeNavigator) MoveToChild() bool

func (*NodeNavigator) MoveToFirst

func (h *NodeNavigator) MoveToFirst() bool

func (*NodeNavigator) MoveToNext

func (h *NodeNavigator) MoveToNext() bool

func (*NodeNavigator) MoveToNextAttribute

func (h *NodeNavigator) MoveToNextAttribute() bool

func (*NodeNavigator) MoveToParent

func (h *NodeNavigator) MoveToParent() bool

func (*NodeNavigator) MoveToPrevious

func (h *NodeNavigator) MoveToPrevious() bool

func (*NodeNavigator) MoveToRoot

func (h *NodeNavigator) MoveToRoot()

func (*NodeNavigator) NodeType

func (h *NodeNavigator) NodeType() xpath.NodeType

func (*NodeNavigator) Prefix

func (*NodeNavigator) Prefix() string

func (*NodeNavigator) String

func (h *NodeNavigator) String() string

func (*NodeNavigator) Value

func (h *NodeNavigator) Value() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL