Documentation ¶
Index ¶
- func ConnectChromeBrowser(leakless, headless bool) (br *rod.Browser, err error)
- func ConnectChromiumBrowser(leakless, headless bool) (br *rod.Browser, err error)
- func ConnectDefaultBrowser(leakless, headless bool) (br *rod.Browser, err error)
- func ConnectEdgeBrowser(leakless, headless bool, ieMode bool) (br *rod.Browser, err error)
- func ElementVisible(page *rod.Page, selector string) bool
- func EmptyDirectory(dir string) error
- func ExecShell(ctx context.Context, command string) (string, error)
- func ExtractUrlParam(urlString, paramName string) (string, error)
- func FileExists(name string) (bool, error)
- func GBK2UTF8(s string) string
- func GetDictAndLastSegmentByPath(data map[string]interface{}, path string) (interface{}, string, error)
- func IsProcessRunning(pid int) bool
- func KillProcess(pid int)
- func MustWaitDownloadRelax(b *rod.Browser) func() ([]byte, string)
- func NormalizeFilename(name string) string
- func OpenPage(browser *rod.Browser, url string, sleep int64, selector string, sign WaitSign) (page *rod.Page, err error)
- func QueryElem(page *rod.Page, selector string) (*rod.Element, error)
- func RaceShow(page *rod.Page, selectors []string, timeoutSeconds int) (int, *rod.Element, error)
- func RenameFileUnique(dir, fileName, ext string) string
- func WaitElementHide(page *rod.Page, selector string, timeoutSeconds int) error
- func WaitElementShow(page *rod.Page, selector string, timeoutSeconds int) (err error)
- func WaitPage(page *rod.Page, sleep int64, selector string, sign WaitSign) (err error)
- type ConfigNode
- type Crawler
- func (c *Crawler) AttachChromeBrowser() error
- func (c *Crawler) AttachDefaultBrowser() error
- func (c *Crawler) AttachEdgeBrowser(ieMode bool) error
- func (c *Crawler) AttachEmbedBrowser() error
- func (c *Crawler) Close()
- func (c *Crawler) CrawlPage(page *rod.Page, cfgOrFile interface{}, autoDownload bool, closeTab bool) (*Result, error)
- func (c *Crawler) CrawlUrl(url string, cfgOrFile interface{}, autoDownload bool, closeTab bool) (*Result, *rod.Page, error)
- type CrawlerConfig
- type DictData
- type DownloadConfig
- type DownloadFileInfo
- type DownloadResult
- type DownloadTypeString
- type ExecuteResult
- type ExternalResult
- type PageLoad
- type Result
- type WaitSign
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ConnectChromeBrowser ¶ added in v1.1.5
ConnectChromeBrowser returns the Chrome browser if installed
func ConnectChromiumBrowser ¶ added in v1.1.6
ConnectChromiumBrowser returns the rod's embed browser
func ConnectDefaultBrowser ¶ added in v1.1.5
ConnectDefaultBrowser returns the system's default browser
func ConnectEdgeBrowser ¶ added in v1.1.6
ConnectEdgeBrowser returns the Edge browser if installed
func ElementVisible ¶
ElementVisible checks if an element is visible on the page
func EmptyDirectory ¶ added in v1.1.3
EmptyDirectory removes all contents of a directory while preserving the directory itself
func ExecShell ¶ added in v1.2.0
ExecShell executes a shell command with timeout control ctx can be created with timeout using context.WithTimeout
func ExtractUrlParam ¶ added in v1.2.2
ExtractUrlParam extracts a specific parameter value from a URL string
func FileExists ¶ added in v1.0.4
FileExists to check if a file exists
func GetDictAndLastSegmentByPath ¶ added in v1.2.0
func GetDictAndLastSegmentByPath(data map[string]interface{}, path string) (interface{}, string, error)
GetDictAndLastSegmentByPath traverses a nested map structure using a path and returns the parent data, the last path segment, and any error encountered.
func IsProcessRunning ¶ added in v1.3.0
IsProcessRunning checks if a process is still running
func KillProcess ¶ added in v1.3.0
func KillProcess(pid int)
KillProcess forcefully terminates a process and its children
func MustWaitDownloadRelax ¶ added in v1.2.10
func NormalizeFilename ¶ added in v1.0.4
NormalizeFilename sanitizes a filename to be safe for all operating systems. It removes invalid characters, handles reserved names, and ensures the result is a valid filename.
func RaceShow ¶ added in v1.1.3
RaceShow waits for the first element to become visible from a list of selectors. Returns the index of the first visible element, the element itself, and any error
func RenameFileUnique ¶ added in v1.0.4
RenameFileUnique generates a unique filename by appending a number if the file already exists
func WaitElementHide ¶
WaitElementHide waits for an element to become invisible on the page
func WaitElementShow ¶
WaitElementShow waits for an element to become visible on the page
Types ¶
type ConfigNode ¶ added in v1.0.5
type Crawler ¶ added in v1.0.1
type Crawler struct { Browser *rod.Browser CfgFetcher func(path string) (*CrawlerConfig, error) }
func (*Crawler) AttachChromeBrowser ¶ added in v1.1.0
func (*Crawler) AttachDefaultBrowser ¶ added in v1.0.1
func (*Crawler) AttachEdgeBrowser ¶ added in v1.1.6
func (*Crawler) AttachEmbedBrowser ¶ added in v1.1.7
type CrawlerConfig ¶ added in v1.0.5
type CrawlerConfig struct { PageLoad PageLoad `json:"pageLoad,omitempty"` DataSection []DictData `json:"dataSection"` SwitchSection DictData `json:"switchSection,omitempty"` DownloadRoot string `json:"downloadRoot,omitempty"` DownloadSection []DownloadConfig `json:"downloadSection,omitempty"` }
type DownloadConfig ¶ added in v1.0.5
type DownloadConfig struct { ConfigNode SavePath string `json:"savePath,omitempty"` NameProper string `json:"nameProper,omitempty"` NameRender string `json:"nameRender,omitempty"` LinkProper string `json:"linkProper,omitempty"` LinkRender string `json:"linkRender,omitempty"` InsertTo string `json:"insertTo,omitempty"` DownloadType DownloadTypeString `json:"downloadType"` }
type DownloadFileInfo ¶ added in v1.2.0
type DownloadResult ¶ added in v1.0.5
type DownloadResult struct { Label string `json:"label"` Files []DownloadFileInfo `json:"files"` }
DownloadResult is a part of result section
type DownloadTypeString ¶ added in v1.2.1
type DownloadTypeString string
const ( DownloadUrl DownloadTypeString = "url" DownloadElement DownloadTypeString = "element" PrintToPDF DownloadTypeString = "toPDF" )
type ExecuteResult ¶ added in v1.2.0
type ExternalResult ¶ added in v1.0.5
type Result ¶ added in v1.0.5
type Result struct { Data DictData `json:"data"` DownloadRoot string `json:"downloadRoot"` Downloads map[string]DownloadResult `json:"downloads"` ExternalSection map[string]ExternalResult `json:"externalSection"` }