browser

package
v0.5.11 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 25, 2023 License: MIT Imports: 18 Imported by: 1

Documentation

Overview

Package browser contains the primary browser implementation.

Index

Constants

This section is empty.

Variables

View Source
var InitialAssetsSliceSize = 20

InitialAssetsArraySize is the initial size when allocating a slice of page assets. Increasing this size may lead to a very small performance increase when downloading assets from a page with a lot of assets.

Functions

func DownloadAsset

func DownloadAsset(asset Downloadable, out io.Writer) (int64, error)

DownloadAsset copies a remote file to the given writer.

func DownloadAssetAsync

func DownloadAssetAsync(asset Downloadable, out io.Writer, c AsyncDownloadChannel)

DownloadAssetAsync downloads an asset asynchronously and notifies the given channel when the download is complete.

Types

type Asset

type Asset struct {
	// ID is the value of the id attribute if available.
	ID string

	// URL is the asset URL.
	URL *url.URL

	// Type describes the type of asset.
	Type AssetType
}

Asset implements Assetable.

func (*Asset) AssetType

func (at *Asset) AssetType() AssetType

Type returns the asset type.

func (*Asset) Id

func (at *Asset) Id() string

Id returns the asset ID or an empty string when not available.

func (*Asset) Url

func (at *Asset) Url() *url.URL

Url returns the asset URL.

type AssetType

type AssetType uint16

AssetType describes a type of page asset, such as an image or stylesheet.

const (
	// LinkAsset describes a *Link asset.
	LinkAsset AssetType = iota

	// ImageAsset describes an *Image asset.
	ImageAsset

	// StylesheetAsset describes a *Stylesheet asset.
	StylesheetAsset

	// ScriptAsset describes a *Script asset.
	ScriptAsset
)

type Assetable

type Assetable interface {
	// Url returns the asset URL.
	Url() *url.URL

	// Id returns the asset ID or an empty string when not available.
	Id() string

	// Type describes the type of asset.
	AssetType() AssetType
}

Assetable represents a page asset, such as an image or stylesheet.

type AsyncDownloadChannel

type AsyncDownloadChannel chan *AsyncDownloadResult

AsyncDownloadChannel is a channel upon which the results of an async download are passed.

type AsyncDownloadResult

type AsyncDownloadResult struct {
	// Asset is a pointer to the Downloadable asset that was downloaded.
	Asset Downloadable

	// Writer where the asset data was written.
	Writer io.Writer

	// Size is the number of bytes written to the io.Writer.
	Size int64

	// Error contains any error that occurred during the download or nil.
	Error error
}

AsyncDownloadResult has the results of an asynchronous download.

type Attribute

type Attribute int

Attribute represents a Browser capability

const (
	// SendRefererAttribute instructs a Browser to send the Referer header.
	SendReferer Attribute = iota

	// MetaRefreshHandlingAttribute instructs a Browser to handle the refresh meta tag.
	MetaRefreshHandling

	// FollowRedirectsAttribute instructs a Browser to follow Location headers.
	FollowRedirects
)

type AttributeMap

type AttributeMap map[Attribute]bool

AttributeMap represents a map of Attribute values.

type Browsable

type Browsable interface {
	// GetUserAgent sets the user agent.
	GetUserAgent() string

	// SetUserAgent sets the user agent.
	SetUserAgent(ua string)

	// SetAttribute sets a browser instruction attribute.
	SetAttribute(a Attribute, v bool)

	// SetAttributes is used to set all the browser attributes.
	SetAttributes(a AttributeMap)

	// SetState sets the init browser state.
	SetState(sj *jar.State)

	// GetState gets the init browser state.
	GetState() *jar.State

	// SetBookmarksJar sets the bookmarks jar the browser uses.
	SetBookmarksJar(bj jar.BookmarksJar)

	// SetCookieJar is used to set the cookie jar the browser uses.
	SetCookieJar(cj http.CookieJar)

	// GetCookieJar is used to get the cookie jar the browser uses.
	GetCookieJar() http.CookieJar

	// SetHistoryJar is used to set the history jar the browser uses.
	SetHistoryJar(hj jar.History)

	// SetHistoryCapacity is used to set the capacity for history queue
	SetHistoryCapacity(capacity int)

	// SetHeadersJar sets the headers the browser sends with each request.
	SetHeadersJar(h http.Header)

	// SetTransport sets the http library transport mechanism for each request.
	SetTransport(t *http.Transport)

	// SetTransport sets the http library transport mechanism for each request.
	GetTransport() *http.Transport

	// AddRequestHeader adds a header the browser sends with each request.
	AddRequestHeader(name, value string)

	// GetRequestHeader gets a header the browser sends with each request.
	GetRequestHeader(name string) string

	// GetAllRequestHeaders gets all headers the browser sends with each request.
	GetAllRequestHeaders() string

	// Open requests the given URL using the GET method.
	Open(url string) error

	// Open requests the given URL using the HEAD method.
	Head(url string) error

	// OpenForm appends the data values to the given URL and sends a GET request.
	OpenForm(url string, data url.Values) error

	// OpenBookmark calls Get() with the URL for the bookmark with the given name.
	OpenBookmark(name string) error

	// Post requests the given URL using the POST method.
	Post(url string, contentType string, body io.Reader, ref *url.URL) error

	// PostForm requests the given URL using the POST method with the given data.
	PostForm(url string, data url.Values, ref *url.URL) error

	// PostMultipart requests the given URL using the POST method with the given data using multipart/form-data format.
	PostMultipart(u string, data url.Values, ref *url.URL) error

	// Back loads the previously requested page.
	Back() bool

	// Reload duplicates the last successful request.
	Reload() error

	// Bookmark saves the page URL in the bookmarks with the given name.
	Bookmark(name string) error

	// Click clicks on the page element matched by the given expression.
	Click(expr string) error

	// Form returns the form in the current page that matches the given expr.
	Form(expr string) (Submittable, error)

	// Forms returns an array of every form in the page.
	Forms() []Submittable

	// Links returns an array of every link found in the page.
	Links() []*Link

	// Images returns an array of every image found in the page.
	Images() []*Image

	// Stylesheets returns an array of every stylesheet linked to the document.
	Stylesheets() []*Stylesheet

	// Scripts returns an array of every script linked to the document.
	Scripts() []*Script

	// SiteCookies returns the cookies for the current site.
	SiteCookies() []*http.Cookie

	// ResolveUrl returns an absolute URL for a possibly relative URL.
	ResolveUrl(u *url.URL) *url.URL

	// ResolveStringUrl works just like ResolveUrl, but the argument and return value are strings.
	ResolveStringUrl(u string) (string, error)

	// Download writes the contents of the document to the given writer.
	Download(o io.Writer) (int64, error)

	// Url returns the page URL as a string.
	Url() *url.URL

	// StatusCode returns the response status code.
	StatusCode() int

	// Title returns the page title.
	Title() string

	// ResponseHeaders returns the page headers.
	ResponseHeaders() http.Header

	// Body returns the page body as a string of html.
	Body() string

	// Dom returns the inner *goquery.Selection.
	Dom() *goquery.Selection

	// RequestSize returns the number of bytes for the request.
	RequestSize() int

	// ResponseSize returns the number of bytes for the response.
	ResponseSize() int

	// Find returns the dom selections matching the given expression.
	Find(expr string) *goquery.Selection

	// Register pluggable converter
	SetConverter(content_type string, f func([]byte, string, string) []byte)

	// Unregister pluggable converter
	ClearConverter(content_type string)

	// Set cookie usage settings
	UseCookie(setting bool)
}

Browsable represents an HTTP web browser.

type Browser

type Browser struct {
	// contains filtered or unexported fields
}

Default is the default Browser implementation.

func (*Browser) AddRequestHeader

func (bow *Browser) AddRequestHeader(name, value string)

AddRequestHeader sets a header the browser sends with each request.

func (*Browser) Back

func (bow *Browser) Back() bool

Back loads the previously requested page.

Returns a boolean value indicating whether a previous page existed, and was successfully loaded.

func (*Browser) Body

func (bow *Browser) Body() string

Body returns the page body as a string of html.

func (*Browser) Bookmark

func (bow *Browser) Bookmark(name string) error

Bookmark saves the page URL in the bookmarks with the given name.

func (*Browser) ClearContentFixer

func (bow *Browser) ClearContentFixer(content_type string)

func (*Browser) ClearConverter

func (bow *Browser) ClearConverter(content_type string)

Unregister pluggable converter

func (*Browser) ClearTimeout

func (bow *Browser) ClearTimeout()

ClearTimeout set max timeout == 180 for build requst

func (*Browser) Click

func (bow *Browser) Click(expr string) error

Click clicks on the page element matched by the given expression.

Currently this is only useful for click on links, which will cause the browser to load the page pointed at by the link. Future versions of Surf may support JavaScript and clicking on elements will fire the click event.

func (*Browser) DelRequestHeader

func (bow *Browser) DelRequestHeader(name string)

DelRequestHeader deletes a header so the browser will not send it with future requests.

func (*Browser) Dom

func (bow *Browser) Dom() *goquery.Selection

Dom returns the inner *goquery.Selection.

func (*Browser) Download

func (bow *Browser) Download(o io.Writer) (int64, error)

Download writes the contents of the document to the given writer.

func (*Browser) Find

func (bow *Browser) Find(expr string) *goquery.Selection

Find returns the dom selections matching the given expression.

func (*Browser) Form

func (bow *Browser) Form(expr string) (Submittable, error)

Form returns the form in the current page that matches the given expr.

func (*Browser) Forms

func (bow *Browser) Forms() []Submittable

Forms returns an array of every form in the page.

func (*Browser) GetAllRequestHeaders

func (bow *Browser) GetAllRequestHeaders() string

GetAllRequestHeaders gets a all headers the browser sends with each request.

func (*Browser) GetAsyncStore

func (bow *Browser) GetAsyncStore() *jar.AsyncStore

func (*Browser) GetCookieJar

func (bow *Browser) GetCookieJar() http.CookieJar

GetCookieJar is used to get the cookie jar the browser uses.

func (*Browser) GetRequestHeader

func (bow *Browser) GetRequestHeader(name string) string

GetRequestHeader gets a header the browser sends with each request.

func (*Browser) GetState

func (bow *Browser) GetState() *jar.State

GetState gets the browser state.

func (*Browser) GetTransport

func (bow *Browser) GetTransport() *http.Transport

GetTransport gets the http library transport mechanism.

func (*Browser) GetUserAgent

func (bow *Browser) GetUserAgent() string

GetUserAgent gets the user agent.

func (*Browser) Head

func (bow *Browser) Head(u string) error

Open requests the given URL using the HEAD method.

func (*Browser) Images

func (bow *Browser) Images() []*Image

Images returns an array of every image found in the page.

func (*Browser) InitConverters

func (bow *Browser) InitConverters()

Init pluggable map

func (bow *Browser) Links() []*Link

Links returns an array of every link found in the page.

func (*Browser) Open

func (bow *Browser) Open(u string) error

Open requests the given URL using the GET method.

func (*Browser) OpenAsync

func (bow *Browser) OpenAsync(u, name string) error

func (*Browser) OpenBookmark

func (bow *Browser) OpenBookmark(name string) error

OpenBookmark calls Open() with the URL for the bookmark with the given name.

func (*Browser) OpenForm

func (bow *Browser) OpenForm(u string, data url.Values) error

OpenForm appends the data values to the given URL and sends a GET request.

func (*Browser) Patch

func (bow *Browser) Patch(u string, contentType string, body io.Reader, ref *url.URL) error

Patch requests the given URL using the PATCH method.

func (*Browser) PatchForm

func (bow *Browser) PatchForm(u string, data url.Values, ref *url.URL) error

PatchForm requests the given URL using the PATCH method with the given data.

func (*Browser) Post

func (bow *Browser) Post(u string, contentType string, body io.Reader, ref *url.URL) error

Post requests the given URL using the POST method.

func (*Browser) PostForm

func (bow *Browser) PostForm(u string, data url.Values, ref *url.URL) error

PostForm requests the given URL using the POST method with the given data.

func (*Browser) PostMultipart

func (bow *Browser) PostMultipart(u string, data url.Values, ref *url.URL) error

PostMultipart requests the given URL using the POST method with the given data using multipart/form-data format.

func (*Browser) Put

func (bow *Browser) Put(u string, contentType string, body io.Reader, ref *url.URL) error

Put requests the given URL using the PUT method.

func (*Browser) PutForm

func (bow *Browser) PutForm(u string, data url.Values, ref *url.URL) error

PutForm requests the given URL using the PUT method with the given data.

func (*Browser) Reload

func (bow *Browser) Reload() error

Reload duplicates the last successful request.

func (*Browser) RequestSize added in v0.5.9

func (bow *Browser) RequestSize() int

RequestSize returns HTTP request size in bytes

func (*Browser) ResolveStringUrl

func (bow *Browser) ResolveStringUrl(u string) (string, error)

ResolveStringUrl works just like ResolveUrl, but the argument and return value are strings.

func (*Browser) ResolveUrl

func (bow *Browser) ResolveUrl(u *url.URL) *url.URL

ResolveUrl returns an absolute URL for a possibly relative URL.

func (*Browser) ResponseHeaders

func (bow *Browser) ResponseHeaders() http.Header

ResponseHeaders returns the page headers.

func (*Browser) ResponseSize added in v0.5.9

func (bow *Browser) ResponseSize() int

ResponseSize returns HTTP response size in bytes

func (*Browser) Scripts

func (bow *Browser) Scripts() []*Script

Scripts returns an array of every script linked to the document.

func (*Browser) SetAsyncStore

func (bow *Browser) SetAsyncStore(a *jar.AsyncStore)

func (*Browser) SetAttribute

func (bow *Browser) SetAttribute(a Attribute, v bool)

SetAttribute sets a browser instruction attribute.

func (*Browser) SetAttributes

func (bow *Browser) SetAttributes(a AttributeMap)

SetAttributes is used to set all the browser attributes.

func (*Browser) SetBookmarksJar

func (bow *Browser) SetBookmarksJar(bj jar.BookmarksJar)

SetBookmarksJar sets the bookmarks jar the browser uses.

func (*Browser) SetContentFixer

func (bow *Browser) SetContentFixer(content_type string)

func (*Browser) SetConverter

func (bow *Browser) SetConverter(content_type string, f func([]byte, string, string) []byte)

Register pluggable converter

func (*Browser) SetCookieJar

func (bow *Browser) SetCookieJar(cj http.CookieJar)

SetCookieJar is used to set the cookie jar the browser uses.

func (*Browser) SetHeadersJar

func (bow *Browser) SetHeadersJar(h http.Header)

SetHeadersJar sets the headers the browser sends with each request.

func (*Browser) SetHistoryCapacity

func (bow *Browser) SetHistoryCapacity(capacity int)

SetHistoryCapacity is used to set the capacity for history queue

func (*Browser) SetHistoryJar

func (bow *Browser) SetHistoryJar(hj jar.History)

SetHistoryJar is used to set the history jar the browser uses.

func (*Browser) SetMaxReloads

func (bow *Browser) SetMaxReloads(max int)

SetMaxReloads sets max reloads via meta-equip=refresh

func (*Browser) SetState

func (bow *Browser) SetState(sj *jar.State)

SetState sets the browser state.

func (*Browser) SetTimeout

func (bow *Browser) SetTimeout(t int)

SetTimeout set max timeout for build request

func (*Browser) SetTransport

func (bow *Browser) SetTransport(t *http.Transport)

SetTransport sets the http library transport mechanism for each request.

func (*Browser) SetUserAgent

func (bow *Browser) SetUserAgent(userAgent string)

SetUserAgent sets the user agent.

func (*Browser) SiteCookies

func (bow *Browser) SiteCookies() []*http.Cookie

SiteCookies returns the cookies for the current site.

func (*Browser) StatusCode

func (bow *Browser) StatusCode() int

StatusCode returns the response status code.

func (*Browser) Stylesheets

func (bow *Browser) Stylesheets() []*Stylesheet

Stylesheets returns an array of every stylesheet linked to the document.

func (*Browser) Title

func (bow *Browser) Title() string

Title returns the page title.

func (*Browser) Url

func (bow *Browser) Url() *url.URL

Url returns the page URL as a string.

func (*Browser) UseCookie

func (bow *Browser) UseCookie(setting bool)

UseCookie sets mode for using cookies in specific calls

type Button

type Button struct {
	// contains filtered or unexported fields
}

type Checkbox

type Checkbox struct {
	// contains filtered or unexported fields
}

type Downloadable

type Downloadable interface {
	Assetable

	// Download writes the contents of the element to the given writer.
	//
	// Returns the number of bytes written.
	Download(out io.Writer) (int64, error)

	// DownloadAsync downloads the contents of the element asynchronously.
	//
	// An instance of AsyncDownloadResult will be sent down the given channel
	// when the download is complete.
	DownloadAsync(out io.Writer, ch AsyncDownloadChannel)
}

Downloadable represents an asset that may be downloaded.

type DownloadableAsset

type DownloadableAsset struct {
	Asset
}

DownloadableAsset is an asset that may be downloaded.

func (*DownloadableAsset) Download

func (at *DownloadableAsset) Download(out io.Writer) (int64, error)

Download writes the asset to the given io.Writer type.

func (*DownloadableAsset) DownloadAsync

func (at *DownloadableAsset) DownloadAsync(out io.Writer, ch AsyncDownloadChannel)

DownloadAsync downloads the asset asynchronously.

type Field

type Field struct {
	// contains filtered or unexported fields
}

type Form

type Form struct {
	// contains filtered or unexported fields
}

Form is the default form element.

func NewForm

func NewForm(bow Browsable, s *goquery.Selection) *Form

NewForm creates and returns a *Form type.

func (*Form) Action

func (f *Form) Action() string

Action returns the form action URL. The URL will always be absolute.

func (*Form) Click

func (f *Form) Click(button string) error

Click submits the form by clicking the button with the given name.

func (*Form) Dom

func (f *Form) Dom() *goquery.Selection

Dom returns the inner *goquery.Selection.

func (*Form) Input

func (f *Form) Input(name, value string) error

Input sets the value of a form field.

func (*Form) Method

func (f *Form) Method() string

Method returns the form method, eg "GET" or "POST".

func (*Form) Submit

func (f *Form) Submit(noclick string) error

Submit submits the form. Clicks the first button in the form, or submits the form without using any button when the form does not contain any buttons.

type Image

type Image struct {
	DownloadableAsset

	// Alt is the value of the image alt attribute if available.
	Alt string

	// Title is the value of the image title attribute if available.
	Title string
}

Image stores the properties of an image.

func NewImageAsset

func NewImageAsset(url *url.URL, id, alt, title string) *Image

NewImageAsset creates and returns a new *Image type.

type Link struct {
	Asset

	// Text is the text appearing between the opening and closing anchor tag.
	Text string
}

Link stores the properties of a page link.

func NewLinkAsset

func NewLinkAsset(u *url.URL, id, text string) *Link

NewLinkAsset creates and returns a new *Link type.

type Script

type Script struct {
	DownloadableAsset

	// Type is the value of the type attribute. Defaults to "text/javascript" when not specified.
	Type string
}

Script stores the properties of a linked script.

func NewScriptAsset

func NewScriptAsset(url *url.URL, id, typ string) *Script

NewScriptAsset creates and returns a new *Script type.

type Stylesheet

type Stylesheet struct {
	DownloadableAsset

	// Media is the value of the media attribute. Defaults to "all" when not specified.
	Media string

	// Type is the value of the type attribute. Defaults to "text/css" when not specified.
	Type string
}

Stylesheet stores the properties of a linked stylesheet.

func NewStylesheetAsset

func NewStylesheetAsset(url *url.URL, id, media, typ string) *Stylesheet

NewStylesheetAsset creates and returns a new *Stylesheet type.

type Submittable

type Submittable interface {
	Method() string
	Action() string
	Input(name, value string) error
	Click(button string) error
	Submit(noclick string) error
	Dom() *goquery.Selection
}

Submittable represents an element that may be submitted, such as a form.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL