Documentation
¶
Index ¶
- Variables
- type Cache
- type Crawler
- func (c *Crawler) AddMatches(page *webtree.Page)
- func (c *Crawler) Crawl()
- func (c *Crawler) CrawlNodeBlock(w *webtree.Node, levelChangedChan chan int)
- func (c *Crawler) CrawlNodeLive(w *webtree.Node)
- func (c *Crawler) Export(tree *webtree.Node, format string, filename string) error
- func (c *Crawler) ExportJSON(root *webtree.Node, filename string) error
- func (c *Crawler) ExportTXT(root *webtree.Node, filename string) error
- func (c *Crawler) ExportXML(tree *webtree.Node, filename string) error
- func (c *Crawler) ExtractLinks(page *webtree.Page) (links []string)
- func (c *Crawler) Fetch(page *webtree.Page)
- func (c *Crawler) IsSkipablePage(page *webtree.Page) bool
- func (c *Crawler) ProcessANode(node *webtree.Node)
- func (c *Crawler) SaveResults(root *webtree.Node)
Constants ¶
This section is empty.
Variables ¶
View Source
var ( GeneralRegex = `((?:https?)://[\w\-]+(?:\.[\w\-]+)+[\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])` HrefRegex = `href=["']([^"']+)["']` )
View Source
var UnreadableExtensions = []string{
".png",
".jpg",
".jpeg",
".gif",
".pdf",
".doc",
".docx",
".xls",
".xlsx",
".ppt",
".pptx",
".zip",
".rar",
".tar",
".gz",
".exe",
".mp3",
".mp4",
".avi",
".mov",
".wmv",
".flv",
".wav",
".mpeg",
".mpg",
".m4v",
".swf",
".svg",
".ico",
".ttf",
".woff",
".woff2",
".eot",
".otf",
".psd",
".ai",
".eps",
".indd",
".raw",
".webm",
".m4a",
".m4p",
".m4b",
".m4r",
}
Functions ¶
This section is empty.
Types ¶
type Cache ¶
func (*Cache) AddVisited ¶
type Crawler ¶
type Crawler struct { RootURL string Level int ExportFile string RegexMap map[string]string ExcludedStatus []int IncludedUrls []string Client *http.Client UserAgent string Cache Cache Workers int Delay int }
func NewCrawler ¶
func (*Crawler) AddMatches ¶
func (*Crawler) CrawlNodeBlock ¶
func (*Crawler) CrawlNodeLive ¶
func (*Crawler) ExportJSON ¶
func (*Crawler) ProcessANode ¶ added in v2.0.14
func (*Crawler) SaveResults ¶
Click to show internal directories.
Click to hide internal directories.