Documentation ¶
Index ¶
- func GetATagAnchor(a *goquery.Selection) string
- type DomainFilter
- type Link
- type LinkType
- type Page
- type SiteWalker
- type SiteWalkerOption
- func WithCacheDir(dir string) SiteWalkerOption
- func WithDelay(randomDelay time.Duration, delay time.Duration) SiteWalkerOption
- func WithDeviceType(isMobile bool) SiteWalkerOption
- func WithParallelism(n int) SiteWalkerOption
- func WithTimeout(timeout time.Duration) SiteWalkerOption
- func WithUserAgent(ua string) SiteWalkerOption
- type WebSite
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func GetATagAnchor ¶
Types ¶
type DomainFilter ¶
func NewDomainFilter ¶
func NewDomainFilter() DomainFilter
type Link ¶
type Link struct { Href string `json:"href"` URL *url.URL `json:"-"` Text string `json:"text"` LinkType LinkType `json:"link_type"` }
网站的链接信息
type Page ¶
type Page struct { // seo text 信息 Title string `json:"title"` Description string `json:"description"` Keywords []string `json:"keywords"` // h1标签的内容 H1 string `json:"h1"` // 页面的原始url RawURL string `json:"raw_url"` URL *url.URL `json:"url"` // 页面中的链接 Links []*Link `json:"links"` // 页面中的外部链接 ExternalLinks []*Link `json:"external_links"` // 网站网页数据 Html []byte `json:"html"` // contains filtered or unexported fields }
网站的页面信息
type SiteWalker ¶
type SiteWalker struct {
// contains filtered or unexported fields
}
func NewSiteWalker ¶
func NewSiteWalker(opts ...SiteWalkerOption) *SiteWalker
type SiteWalkerOption ¶
type SiteWalkerOption func(sw *SiteWalker)
func WithDelay ¶
func WithDelay(randomDelay time.Duration, delay time.Duration) SiteWalkerOption
withDelay
func WithDeviceType ¶
func WithDeviceType(isMobile bool) SiteWalkerOption
device type will be used to decide UserAgent
func WithUserAgent ¶
func WithUserAgent(ua string) SiteWalkerOption
WithUserAgent this WithUserAgent will cover DeviceType WithUserAgent
Click to show internal directories.
Click to hide internal directories.