utils

package

v0.0.12 Latest Latest Go to latest Published: Jan 9, 2025 License: BSD-3-Clause Imports: 24 Imported by: 2

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/benoitkugler/webrender

Documentation ¶

Index ¶

Constants
Variables
func Abs(v int) int
func AsciiLower(s string) string
func ElementHasLinkType(element *HTMLNode, linkType string) bool
func FindBaseUrl(htmlDocument *html.Node, fallbackBaseUrl string) string
func GetLinkAttribute(element *HTMLNode, attrName string, baseUrl string) ([2]string, bool)
func Hash(s string) int
func IsIn(l []string, s string) bool
func MaxInt(x, y int) int
func MinInt(x, y int) int
func PathToURL(file string) (out string, err error)
func SafeUrljoin(baseUrl, urls string, allowRelative bool) (string, error)
func Unquote(s string) string
func UrlJoin(baseUrl, urlS string, allowRelative bool, context string) string
type Attachment
type ContentInput
type DocumentMetadata
- func GetHtmlMetadata(wrapperElement *HTMLNode, baseUrl string) DocumentMetadata
type ElementKey
- func (e ElementKey) IsPageType() bool
type Fl
- func Ceil(x Fl) Fl
- func FloatModulo(x Fl, i int) Fl
- func Floor(x Fl) Fl
- func Hypot(a, b Fl) Fl
- func MaxF(x, y Fl) Fl
- func Maxs(values ...Fl) Fl
- func MinF(x, y Fl) Fl
- func Mins(values ...Fl) Fl
- func Round(x Fl) Fl
- func Round6(f Fl) Fl
- func RoundPrec(f Fl, n int) Fl
type HTMLIterator
- func NewHtmlIterator(root *html.Node, tags ...atom.Atom) HTMLIterator
- func (h *HTMLIterator) HasNext() bool
- func (h *HTMLIterator) Next() *HTMLNode
type HTMLNode
- func (h *HTMLNode) AsHtmlNode() *html.Node
- func (h HTMLNode) Get(name string) string
- func (element HTMLNode) GetChildrenText() (content []byte)
- func (element HTMLNode) GetText() string
- func (element HTMLNode) GetUrlAttribute(attrName, baseUrl string, allowRelative bool) string
- func (h HTMLNode) HasAttr(name string) bool
- func (element HTMLNode) HasLinkType(linkType string) bool
- func (element HTMLNode) IsText() (bool, string)
- func (h *HTMLNode) Iter(tags ...atom.Atom) HTMLIterator
- func (element HTMLNode) NodeChildren(skipBlank bool) (children []*HTMLNode)
- func (h *HTMLNode) ToKey(pseudoType string) ElementKey
type InputFilename
- func (c InputFilename) String() string
type InputReader
- func (c InputReader) String() string
type InputString
- func (c InputString) String() string
type InputUrl
- func (c InputUrl) String() string
type PageElement
- func (p PageElement) ToKey(pseudoType string) ElementKey
type RemoteRessource
- func DefaultUrlFetcher(urlTarget string) (RemoteRessource, error)
type Set
- func NewSet(values ...string) Set
- func (s Set) Add(key string)
- func (s Set) Copy() Set
- func (s Set) Equal(other Set) bool
- func (s Set) Extend(keys []string)
- func (s Set) Has(key string) bool
- func (s Set) IsNone() bool
type Source
- func FetchSource(input ContentInput, baseUrl string, urlFetcher UrlFetcher, ...) (out Source, err error)
type Url
- func (u Url) IsNone() bool
type UrlFetcher

Constants ¶

View Source

const (
	Version = "0.62"
)

Variables ¶

View Source

var Has = struct{}{}

View Source

var VersionString = fmt.Sprintf("Go-WebRender %s", Version)

Used for "User-Agent" in HTTP

View Source

var (
	W3CDateReGroupsIndexes = map[string]int{}
)

YYYY (eg 1997) YYYY-MM (eg 1997-07) YYYY-MM-DD (eg 1997-07-16) YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00) YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00) YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)

Functions ¶

func Abs ¶ added in v0.0.10

func Abs(v int) int

func AsciiLower ¶

func AsciiLower(s string) string

Transform (only) ASCII letters to lower case: A-Z is mapped to a-z.

This is used for `ASCII case-insensitive
<http://whatwg.org/C#ascii-case-insensitive>`_ matching.
This is different from the strings.ToLower function
which also affect non-ASCII characters,
sometimes mapping them into the ASCII range:
		keyword = u"Bac\u212Aground"
		assert strings.ToLower(keyword) == u"background"
		assert asciiLower(keyword) != strings.ToLower(keyword)
		assert asciiLower(keyword) == u"bac\u212Aground"

func ElementHasLinkType ¶

func ElementHasLinkType(element *HTMLNode, linkType string) bool

Return whether the given element has a `rel` attribute with the given link type (must be a lower-case string).

func FindBaseUrl ¶

func FindBaseUrl(htmlDocument *html.Node, fallbackBaseUrl string) string

Return the base URL for the document. See http://www.w3.org/TR/html5/urls.html#document-base-url

func GetLinkAttribute ¶

func GetLinkAttribute(element *HTMLNode, attrName string, baseUrl string) ([2]string, bool)

Return ('external', absolute_uri) or ('internal', unquoted_fragment_id) or false

func Hash ¶ added in v0.0.2

func Hash(s string) int

Hash creates an ID from a string.

func IsIn ¶ added in v0.0.10

func IsIn(l []string, s string) bool

func MaxInt ¶

func MaxInt(x, y int) int

func MinInt ¶

func MinInt(x, y int) int

func PathToURL ¶

func PathToURL(file string) (out string, err error)

Return a file URL for the given `file` path.

func SafeUrljoin ¶

func SafeUrljoin(baseUrl, urls string, allowRelative bool) (string, error)

defaut: allowRelative = false

func Unquote ¶

func Unquote(s string) string

func UrlJoin ¶

func UrlJoin(baseUrl, urlS string, allowRelative bool, context string) string

warn if baseUrl is required but missing.

Types ¶

type Attachment ¶

type Attachment struct {
	URL, Title string
}

type ContentInput ¶

type ContentInput interface {
	String() string
	// contains filtered or unexported methods
}

type DocumentMetadata ¶

type DocumentMetadata struct {
	// The title of the document, as a string.
	// Extracted from the `<title>` element in HTML
	// and written to the `/Title` info field in PDF.
	Title string

	// The description of the document, as a string.
	// Extracted from the `<meta name=description>` element in HTML
	// and written to the `/Subject` info field in PDF.
	Description string

	// The name of one of the software packages
	// used to generate the document, as a string.
	// Extracted from the `<meta name=generator>` element in HTML
	// and written to the `/Creator` info field in PDF.
	Generator string

	// Keywords associated with the document, as a list of strings.
	// (Defaults to the empty list.)
	// Extracted from `<meta name=keywords>` elements in HTML
	// and written to the `/Keywords` info field in PDF.
	Keywords []string

	// The authors of the document, as a list of strings.
	// (Defaults to the empty list.)
	// Extracted from the `<meta name=author>` elements in HTML
	// and written to the `/Author` info field in PDF.
	Authors []string

	// The creation date of the document, as a string.
	// Dates are in one of the six formats specified in
	// `W3C’s profile of ISO 8601 <http://www.w3.org/TR/NOTE-datetime>`.
	// Extracted from the `<meta name=dcterms.created>` element in HTML
	// and written to the `/CreationDate` info field in PDF.
	Created time.Time

	// The modification date of the document, as a string.
	// Dates are in one of the six formats specified in
	// `W3C’s profile of ISO 8601 <http://www.w3.org/TR/NOTE-datetime>`.
	// Extracted from the `<meta name=dcterms.modified>` element in HTML
	// and written to the `/ModDate` info field in PDF.
	Modified time.Time

	// File attachments, as a list of tuples of URL and a description.
	// (Defaults to the empty list.)
	// Extracted from the `<link rel=attachment>` elements in HTML
	// and written to the `/EmbeddedFiles` dictionary in PDF.
	Attachments []Attachment
}

Meta-information belonging to a whole `Document`.

func GetHtmlMetadata ¶

func GetHtmlMetadata(wrapperElement *HTMLNode, baseUrl string) DocumentMetadata

Relevant specs:

http://www.whatwg.org/html#the-title-element
http://www.whatwg.org/html#standard-metadata-names
http://wiki.whatwg.org/wiki/MetaExtensions
http://microformats.org/wiki/existing-rel-values#HTML5LinkExtensionsT

type ElementKey ¶

type ElementKey struct {
	Element    *HTMLNode
	PseudoType string
	PageType   PageElement
}

func (ElementKey) IsPageType ¶

func (e ElementKey) IsPageType() bool

type Fl ¶

type Fl = float32

func Ceil ¶ added in v0.0.10

func Ceil(x Fl) Fl

func FloatModulo ¶

func FloatModulo(x Fl, i int) Fl

FloatModulo implements Python modulo for float numbers, like

4.456 % 3

func Floor ¶

func Floor(x Fl) Fl

func Hypot ¶ added in v0.0.2

func Hypot(a, b Fl) Fl

Hypot returns SQRT(a^2 + b^2)

func MaxF ¶

func MaxF(x, y Fl) Fl

func Maxs ¶

func Maxs(values ...Fl) Fl

func MinF ¶

func MinF(x, y Fl) Fl

func Mins ¶

func Mins(values ...Fl) Fl

func Round ¶

func Round(x Fl) Fl

func Round6 ¶ added in v0.0.10

func Round6(f Fl) Fl

Round6 rounds f with 6 digits precision

func RoundPrec ¶ added in v0.0.2

func RoundPrec(f Fl, n int) Fl

RoundPrec rounds f with n digits precision

type HTMLIterator ¶

type HTMLIterator struct {
	// contains filtered or unexported fields
}

HTMLIterator simplify the (depth first) walk on an HTML tree.

func NewHtmlIterator ¶

func NewHtmlIterator(root *html.Node, tags ...atom.Atom) HTMLIterator

NewHtmlIterator use `root` as start point. If `tags` is given, only node matching one of them are returned.

func (*HTMLIterator) HasNext ¶

func (h *HTMLIterator) HasNext() bool

HasNext returns true if a node still has to be visited.

func (*HTMLIterator) Next ¶

func (h *HTMLIterator) Next() *HTMLNode

type HTMLNode ¶

type HTMLNode html.Node

func (*HTMLNode) AsHtmlNode ¶

func (h *HTMLNode) AsHtmlNode() *html.Node

func (HTMLNode) Get ¶

func (h HTMLNode) Get(name string) string

Get returns the attribute `name` or "" See HasAttr if you need to distinguish between no attribute and an attribute with an empty string value.

func (HTMLNode) GetChildrenText ¶

func (element HTMLNode) GetChildrenText() (content []byte)

GetChildrenText returns the text directly in the element, but not descendants. It's the concatenation of all children's TextNodes.

func (HTMLNode) GetText ¶

func (element HTMLNode) GetText() string

GetText returns the content of the first text node child. Due to Go html.Parse() behavior, this method mimic Python xml.etree.text attribute.

func (HTMLNode) GetUrlAttribute ¶

func (element HTMLNode) GetUrlAttribute(attrName, baseUrl string, allowRelative bool) string

Get the URI corresponding to the “attrName“ attribute. Return "" if:

the attribute is empty or missing or,
the value is a relative URI but the document has no base URI and “allowRelative“ is “False“.

Otherwise return an URI, absolute if possible.

func (HTMLNode) HasAttr ¶

func (h HTMLNode) HasAttr(name string) bool

HasAttr returns true if `name` is among the attributes (possibly empty).

func (HTMLNode) HasLinkType ¶

func (element HTMLNode) HasLinkType(linkType string) bool

Return whether the given element has a `rel` attribute with the given link type. `linkType` must be a lower-case string.

func (HTMLNode) IsText ¶

func (element HTMLNode) IsText() (bool, string)

IsText returns true if the node is a non empty text node.

func (*HTMLNode) Iter ¶

func (h *HTMLNode) Iter(tags ...atom.Atom) HTMLIterator

Iter return an iterator over the html tree. If tags are given, only the node matching them will be returned by the iterator.

func (HTMLNode) NodeChildren ¶

func (element HTMLNode) NodeChildren(skipBlank bool) (children []*HTMLNode)

NodeChildren returns the direct children of `element`. Skip empty text nodes

func (*HTMLNode) ToKey ¶

func (h *HTMLNode) ToKey(pseudoType string) ElementKey

type InputFilename ¶

type InputFilename string

func (InputFilename) String ¶

func (c InputFilename) String() string

type InputReader ¶

type InputReader struct {
	io.ReadCloser
}

func (InputReader) String ¶

func (c InputReader) String() string

type InputString ¶

type InputString string

func (InputString) String ¶

func (c InputString) String() string

type InputUrl ¶

type InputUrl string

func (InputUrl) String ¶

func (c InputUrl) String() string

type PageElement ¶

type PageElement struct {
	Side  string
	Name  string
	Index int
	Blank bool
	First bool
}

func (PageElement) ToKey ¶

func (p PageElement) ToKey(pseudoType string) ElementKey

type RemoteRessource ¶

type RemoteRessource struct {
	Content *bytes.Reader

	// MIME type extracted e.g. from a *Content-Type* header. If not provided, the type is guessed from the
	// 	file extension in the URL.
	MimeType string

	// actual URL of the resource
	// 	if there were e.g. HTTP redirects.
	RedirectedUrl string

	// filename of the resource. Usually
	// 	derived from the *filename* parameter in a *Content-Disposition*
	// 	header
	Filename string

	ProtocolEncoding string
}

func DefaultUrlFetcher ¶

func DefaultUrlFetcher(urlTarget string) (RemoteRessource, error)

Fetch an external resource such as an image or stylesheet.

type Set ¶

type Set map[string]struct{}

func NewSet ¶

func NewSet(values ...string) Set

func (Set) Add ¶

func (s Set) Add(key string)

func (Set) Copy ¶

func (s Set) Copy() Set

Copy returns a deepcopy.

func (Set) Equal ¶

func (s Set) Equal(other Set) bool

func (Set) Extend ¶

func (s Set) Extend(keys []string)

func (Set) Has ¶

func (s Set) Has(key string) bool

func (Set) IsNone ¶

func (s Set) IsNone() bool

type Source ¶

type Source struct {
	BaseUrl string
	Content []byte // utf8 encoded
}

func FetchSource ¶

func FetchSource(input ContentInput, baseUrl string, urlFetcher UrlFetcher,
	checkCssMimeType bool,
) (out Source, err error)

FetchSource fetch the html input, and returns it with the normalized “BaseUrl“ (checkCssMimeType=false).

type Url ¶

type Url struct {
	Url      string
	Internal bool
}

Url represent an url which can be either internal or external

func (Url) IsNone ¶

func (u Url) IsNone() bool

type UrlFetcher ¶

type UrlFetcher = func(url string) (RemoteRessource, error)

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
testutils
tracer Package tracer provides a function to dump the current layout tree, which may be used in debug mode.	Package tracer provides a function to dump the current layout tree, which may be used in debug mode.

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL