parse_html

package
v0.0.0-...-4488bc0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 26, 2021 License: Apache-2.0 Imports: 8 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var DefaultExecOrder = []string{"selects", "each", "select_params", "nodes", "contains"}

Functions

This section is empty.

Types

type Contains

type Contains struct {
	Contains    *TextClassAttrHtml `yaml:"contains"`
	NotContains *TextClassAttrHtml `yaml:"not_contains"`
}

type DocumentSelection

type DocumentSelection struct {
	Selection *goquery.Selection `json:"selection"`
}

func NewDocumentSelectionByNode

func NewDocumentSelectionByNode(node *html.Node) (res *DocumentSelection)

type Each

type Each struct {
	All    *SelectParams        `json:"all" yaml:"all"`
	One    *SelectParams        `json:"one" yaml:"one"`
	Fields *HashMapSelectParams `json:"fields" yaml:"fields"`
}

type Eq

type Eq int

type ExecOrder

type ExecOrder []string

type HashMapSelectParams

type HashMapSelectParams map[string]*SelectParams

func (*HashMapSelectParams) ParsingHtml

func (params *HashMapSelectParams) ParsingHtml(ctx context.Context, html string) (res map[string]interface{}, err error)

ParsingHtml is 解析html的入口

type MatchHtmlMany

type MatchHtmlMany []*MatchParseHtml

func (*MatchHtmlMany) RegexesMatchParseHtml

func (p *MatchHtmlMany) RegexesMatchParseHtml(ctx context.Context, html string) (map[string]interface{}, error)

RegexesMatchParseHtml is 正则匹配解析 html 入口

type MatchParseHtml

type MatchParseHtml struct {
	/// Regex match html
	Regex string `json:"regex" yaml:"regex"`
	/// Custom error message, return error message directly if the regular expression matches successfully
	Err string `json:"err" yaml:"err"`
	/// Parse the configuration of html
	Fields *HashMapSelectParams `json:"fields" yaml:"fields"`
	/// Add version, you can not add
	Version string `json:"version" yaml:"version"`
}

type Node

type Node struct {
	First       bool `json:"first" yaml:"first"`
	Last        bool `json:"last" yaml:"last"`
	Parent      bool `json:"parent" yaml:"parent"`
	Children    bool `json:"children" yaml:"children"`
	PrevSibling bool `json:"prev_sibling" yaml:"prev_sibling"`
	NextSibling bool `json:"next_sibling" yaml:"next_sibling"`
	Eq          *Eq  `json:"eq" yaml:"eq"`
}

type RegexesMatchParseHtml

type RegexesMatchParseHtml struct {
	RegexesMatchParseHtml []*MatchParseHtml `json:"regexes_match_parse_html" yaml:"regexes_match_parse_html"`
}

type SelectParams

type SelectParams struct {
	ExecOrder      ExecOrder               `json:"exec_order" yaml:"exec_order"`
	Selects        Selects                 `json:"selects" yaml:"selects"`
	Each           *Each                   `json:"each" yaml:"each"`
	SelectParams   *SelectParams           `json:"select_params" yaml:"select_params"`
	Nodes          *Node                   `json:"nodes" yaml:"nodes"`
	Contains       *Contains               `json:"contains" yaml:"contains"`
	TextAttrHtml   *TextAttrHtml           `json:"text_attr_html" yaml:"text_attr_html"`
	DataFormat     *data_format.DataFormat `json:"data_format" yaml:"data_format"`
	DefaultValType string                  `json:"default_val_type" yaml:"default_val_type"`
}

type Selects

type Selects []string

type TextAttrHtml

type TextAttrHtml struct {
	Text bool   `yaml:"text"`
	Html bool   `yaml:"html"`
	Attr string `yaml:"attr"`
}

type TextClassAttrHtml

type TextClassAttrHtml struct {
	Class []string `yaml:"class"`
	Attr  []string `yaml:"attr"`
	Html  []string `yaml:"html"`
	Text  []string `yaml:"text"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL