Documentation ¶
Index ¶
- func GetH2(h2s []string) (string, string)
- func GetInlinks(domain string, links []string) []string
- func GetOutlinks(domain string, links []string) []string
- func WordCount(value string) int
- type Crawler
- type CrawlerOption
- type CrawlerResponse
- type CrawlerResponseAmp
- type CrawlerResponseImage
- type LimitRule
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func GetInlinks ¶ added in v1.3.0
func GetOutlinks ¶ added in v1.3.0
Types ¶
type Crawler ¶
type Crawler struct { Domain string UserAgent string MaxDepth int UseSitemap bool AllowedDomains []string Limits []colly.LimitRule CacheDir string OnResponse func(response CrawlerResponse) OnImage func(image CrawlerResponseImage) }
func NewCrawl ¶
func NewCrawl(domain string, options ...CrawlerOption) *Crawler
type CrawlerOption ¶
type CrawlerOption func(*Crawler)
func AllowedDomains ¶
func AllowedDomains(domains ...string) CrawlerOption
func Limit ¶
func Limit(rule LimitRule) CrawlerOption
func MaxDepth ¶
func MaxDepth(depth int) CrawlerOption
func UseSitemap ¶
func UseSitemap(use bool) CrawlerOption
func UserAgent ¶
func UserAgent(ua string) CrawlerOption
type CrawlerResponse ¶
type CrawlerResponse struct { URL string ContentType string StatusCode int Status string H1 string H1Length int H2One string H2OneLength int H2Two string H2TwoLength int MetaDescription string MetaDescriptionLength int Size int WordCount int CrawlDepth int Inlinks []string InlinksCount int Outlinks []string OutlinksCount int Canonicals []string Amp CrawlerResponseAmp Images []CrawlerResponseImage }
type CrawlerResponseAmp ¶ added in v1.3.0
func GetAmpMeta ¶ added in v1.3.0
func GetAmpMeta(e *colly.HTMLElement) CrawlerResponseAmp
type CrawlerResponseImage ¶ added in v1.3.0
type CrawlerResponseImage struct { ParentPage string URL string StatusCode int StatusText string Alt string Size int }
func GetImageMeta ¶ added in v1.3.0
func GetImageMeta(e *colly.HTMLElement) []CrawlerResponseImage
Click to show internal directories.
Click to hide internal directories.