tokenizer

package
v0.41.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 13, 2022 License: MIT Imports: 10 Imported by: 0

Documentation

Index

Constants

View Source
const (
	AttrHref = "href"
)

Variables

View Source
var ErrTokenNotFound = errors.New("token(s) not found")

Functions

func AtomLookupString added in v0.34.2

func AtomLookupString(tagName string) atom.Atom

func NewTokenizerFile added in v0.34.2

func NewTokenizerFile(filename string) (*html.Tokenizer, error)

func NextStartToken

func NextStartToken(z *html.Tokenizer, skipErrors bool, htmlAtoms ...atom.Atom) (html.Token, error)

func NextTextToken

func NextTextToken(z *html.Tokenizer, skipErrors bool, htmlAtoms ...atom.Atom) (html.Token, error)

func NextTokenMatch

func NextTokenMatch(z *html.Tokenizer, skipErrors, includeChain, includeMatch bool, filters ...TokenFilter) ([]html.Token, error)

NextTokenMatch returns a string of matches. `includeMatch` is only used when `includeChain` is included.

func ParseLink(tokens ...html.Token) (href string, desc string, err error)

func TokenAttribute

func TokenAttribute(token html.Token, attrName string) (string, error)

func TokenMap added in v0.34.2

func TokenMap(t html.Token) map[string]string

func TokensBetween

func TokensBetween(z *html.Tokenizer, skipErrors, inclusive bool, begin, end TokenFilters) ([]html.Token, error)

func TokensBetweenAtom

func TokensBetweenAtom(z *html.Tokenizer, skipErrors, inclusive bool, htmlAtom atom.Atom) ([]html.Token, error)

TokensBetweenAtom returns the tokens that represent the `innerHtml` between a start and end tag token.

func TokensSubset added in v0.34.2

func TokensSubset(startFilter, endFilter *TokenFilter, inclusive, greedy bool, toks []html.Token) []html.Token

Types

type AtomSet

type AtomSet struct {
	Atoms map[string]atom.Atom
}

func NewAtomSet

func NewAtomSet(htmlAtoms ...atom.Atom) AtomSet

func NewAtomSetString added in v0.34.2

func NewAtomSetString(tagNames ...string) (AtomSet, error)

func NewAtomSetStringMust added in v0.34.2

func NewAtomSetStringMust(tagNames ...string) AtomSet

func (AtomSet) Add

func (set AtomSet) Add(htmlAtoms ...atom.Atom)

func (AtomSet) Exists

func (set AtomSet) Exists(htmlAtom atom.Atom) bool

func (AtomSet) Len

func (set AtomSet) Len() int

func (AtomSet) Names

func (set AtomSet) Names() []string

type Attributes

type Attributes []html.Attribute

func (Attributes) GetOne

func (attrs Attributes) GetOne(attributeKey string) (html.Attribute, error)

type Description added in v0.34.2

type Description struct {
	Term        []html.Token
	Description []html.Token
}

func (*Description) DescriptionString added in v0.34.2

func (d *Description) DescriptionString() string

func (*Description) Empty added in v0.34.2

func (d *Description) Empty() bool

func (*Description) Strings added in v0.34.2

func (d *Description) Strings() []string

func (*Description) TermString added in v0.34.2

func (d *Description) TermString() string

type DescriptionList added in v0.34.2

type DescriptionList []Description

func ParseDescriptionListTokens added in v0.34.2

func ParseDescriptionListTokens(toks ...html.Token) DescriptionList

func TokenizerDescriptionListNext added in v0.34.2

func TokenizerDescriptionListNext(z *html.Tokenizer) (DescriptionList, error)

func (DescriptionList) Strings added in v0.34.2

func (dl DescriptionList) Strings() [][]string

type DescriptionLists added in v0.34.2

type DescriptionLists []DescriptionList

func TokenizerDescriptionLists added in v0.34.2

func TokenizerDescriptionLists(z *html.Tokenizer) (DescriptionLists, error)

func (DescriptionLists) Strings added in v0.34.2

func (dls DescriptionLists) Strings() [][][]string

type TokenFilter

type TokenFilter struct {
	TokenType html.TokenType
	AtomSet   AtomSet
}

find next <tr> or </table>

func NewTokenFilter added in v0.34.2

func NewTokenFilter(tokenType html.TokenType, atoms ...atom.Atom) *TokenFilter

func (*TokenFilter) Match added in v0.34.2

func (tf *TokenFilter) Match(t html.Token) bool

type TokenFilters

type TokenFilters []TokenFilter

func (TokenFilters) ByTokenType

func (filters TokenFilters) ByTokenType(tt html.TokenType) []TokenFilter

type Tokens

type Tokens []html.Token

func (Tokens) Maps added in v0.34.2

func (tokens Tokens) Maps() []map[string]string

func (Tokens) String

func (tokens Tokens) String() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL