htmlquery

package

v1.3.1-sp1 Latest Latest Go to latest Published: Mar 6, 2024 License: AGPL-3.0 Imports: 13 Imported by: 1

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/yaklang/yaklang

Documentation ¶

Overview ¶

Package htmlquery provides extract data from HTML documents using XPath expression.

Index ¶

Variables
func ExistsAttr(n *html.Node, name string) bool
func Find(top *html.Node, expr string) []*html.Node
func FindOne(top *html.Node, expr string) *html.Node
func InnerText(n *html.Node) string
func LoadDoc(path string) (*html.Node, error)
func LoadHTMLDocument(htmlText any) (*html.Node, error)
func LoadURL(url string) (*html.Node, error)
func OutputHTML(n *html.Node, self bool) string
func Parse(r io.Reader) (*html.Node, error)
func Query(top *html.Node, expr string) (*html.Node, error)
func QueryAll(top *html.Node, expr string) ([]*html.Node, error)
func QuerySelector(top *html.Node, selector *xpath.Expr) *html.Node
func QuerySelectorAll(top *html.Node, selector *xpath.Expr) []*html.Node
func SelectAttr(n *html.Node, name string) (val string)
type NodeNavigator
- func CreateXPathNavigator(top *html.Node) *NodeNavigator

Constants ¶

This section is empty.

Variables ¶

View Source

var DisableSelectorCache = false

DisableSelectorCache will disable caching for the query selector if value is true.

View Source

var Exports = map[string]interface{}{
	"LoadHTMLDocument":     LoadHTMLDocument,
	"Find":                 Find,
	"FindOne":              FindOne,
	"QueryAll":             QueryAll,
	"Query":                Query,
	"InnerText":            InnerText,
	"SelectAttr":           SelectAttr,
	"ExistedAttr":          ExistsAttr,
	"CreateXPathNavigator": CreateXPathNavigator,

	"OutputHTML":     outputHTML,
	"OutputHTMLSelf": outputHTMLSelf,
}

View Source

var SelectorCacheMaxEntries = 50

SelectorCacheMaxEntries allows how many selector object can be caching. Default is 50. Will disable caching if SelectorCacheMaxEntries <= 0.

Functions ¶

func ExistsAttr ¶

func ExistsAttr(n *html.Node, name string) bool

ExistsAttr 判断传入节点是否存在指定名称的属性并返回布尔值 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) node = xpath.FindOne(doc, "//div[@class='content']") existed = xpath.ExistsAttr(node, "class") // true ```

func Find ¶

func Find(top *html.Node, expr string) []*html.Node

Find 根据传入的 XPath 表达式从传入的节点开始查找匹配的节点，返回节点数组如果表达式解析出错会 panic Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) nodes = xpath.Find(doc, "//div[@class='content']/text()") ```

func FindOne ¶

func FindOne(top *html.Node, expr string) *html.Node

FindOne 根据传入的 XPath 表达式从传入的节点开始查找第一个匹配的节点如果表达式解析出错会 panic Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) node = xpath.FindOne(doc, "//div[@class='content']/text()") ```

func InnerText ¶

func InnerText(n *html.Node) string

InnerText 返回指定节点及其子节点的字符串 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) node = xpath.FindOne(doc, "//div[@class='content']") text = xpath.InnerText(node) ```

func LoadDoc ¶

func LoadDoc(path string) (*html.Node, error)

LoadDoc loads the HTML document from the specified file path.

func LoadHTMLDocument ¶ added in v1.3.1

func LoadHTMLDocument(htmlText any) (*html.Node, error)

LoadHTMLDocument 解析传入的 HTML 文本，返回根节点结构体引用与错误 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) ```

func LoadURL ¶

func LoadURL(url string) (*html.Node, error)

LoadURL loads the HTML document from the specified URL.

func OutputHTML ¶

func OutputHTML(n *html.Node, self bool) string

OutputHTML returns the text including tags name.

func Parse ¶

func Parse(r io.Reader) (*html.Node, error)

Parse returns the parse tree for the HTML from the given Reader.

func Query ¶

func Query(top *html.Node, expr string) (*html.Node, error)

Query 根据传入的 XPath 表达式从传入的节点开始查找第一个匹配的节点，返回节点与错误 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) node, err = xpath.Query(doc, "//div[@class='content']/text()") ```

func QueryAll ¶

func QueryAll(top *html.Node, expr string) ([]*html.Node, error)

QueryAll 根据传入的 XPath 表达式从传入的节点开始查找匹配的节点，返回节点数组与错误 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) nodes, err = xpath.QueryAll(doc, "//div[@class='content']/text()") ```

func QuerySelector ¶

func QuerySelector(top *html.Node, selector *xpath.Expr) *html.Node

QuerySelector returns the first matched html.Node by the specified XPath selector.

func QuerySelectorAll ¶

func QuerySelectorAll(top *html.Node, selector *xpath.Expr) []*html.Node

QuerySelectorAll searches all of the html.Node that matches the specified XPath selectors.

func SelectAttr ¶

func SelectAttr(n *html.Node, name string) (val string)

SelectAttr 返回传入节点指定名称的属性值 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) node = xpath.FindOne(doc, "//div[@class='content']") attr = xpath.SelectAttr(node, "class") ```

Types ¶

type NodeNavigator ¶

type NodeNavigator struct {
	// contains filtered or unexported fields
}

func CreateXPathNavigator ¶

func CreateXPathNavigator(top *html.Node) *NodeNavigator

CreateXPathNavigator 根据传入的节点创建一个新的 XPath 导航器，使用该导航器的方法来遍历该节点及其子节点 Example: ``` doc, err = xpath.LoadHTMLDocument(htmlText) nav = xpath.CreateXPathNavigator(doc) nav.MoveToChild() println(nav.String()) ```

func (*NodeNavigator) Copy ¶

func (h *NodeNavigator) Copy() xpath.NodeNavigator

func (*NodeNavigator) Current ¶

func (h *NodeNavigator) Current() *html.Node

func (*NodeNavigator) LocalName ¶

func (h *NodeNavigator) LocalName() string

func (*NodeNavigator) MoveTo ¶

func (h *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool

func (*NodeNavigator) MoveToChild ¶

func (h *NodeNavigator) MoveToChild() bool

func (*NodeNavigator) MoveToFirst ¶

func (h *NodeNavigator) MoveToFirst() bool

func (*NodeNavigator) MoveToNext ¶

func (h *NodeNavigator) MoveToNext() bool

func (*NodeNavigator) MoveToNextAttribute ¶

func (h *NodeNavigator) MoveToNextAttribute() bool

func (*NodeNavigator) MoveToParent ¶

func (h *NodeNavigator) MoveToParent() bool

func (*NodeNavigator) MoveToPrevious ¶

func (h *NodeNavigator) MoveToPrevious() bool

func (*NodeNavigator) MoveToRoot ¶

func (h *NodeNavigator) MoveToRoot()

func (*NodeNavigator) NodeType ¶

func (h *NodeNavigator) NodeType() xpath.NodeType

func (*NodeNavigator) Prefix ¶

func (*NodeNavigator) Prefix() string

func (*NodeNavigator) String ¶

func (h *NodeNavigator) String() string

func (*NodeNavigator) Value ¶

func (h *NodeNavigator) Value() string

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL