Documentation ¶
Index ¶
- Variables
- type Cache
- type Crawler
- func (c *Crawler) AddMatches(page webtree.Page)
- func (c *Crawler) Crawl()
- func (c *Crawler) CrawlNodeBlock(w *webtree.Node)
- func (c *Crawler) CrawlNodeLive(w *webtree.Node)
- func (c *Crawler) Export(tree webtree.Node, format string, filename string) error
- func (c *Crawler) ExportJSON(root webtree.Node, filename string) error
- func (c *Crawler) ExportTXT(root webtree.Node, filename string) error
- func (c *Crawler) ExportXML(tree webtree.Node, filename string) error
- func (c *Crawler) ExtractLinks(page *webtree.Page) (links []string)
- func (c *Crawler) Fetch(page *webtree.Page)
- func (c *Crawler) IsSkipablePage(page webtree.Page) bool
- func (c *Crawler) SaveResults(root webtree.Node)
Constants ¶
This section is empty.
Variables ¶
View Source
var (
GeneralRegex = `((?:https?)://[\w\-]+(?:\.[\w\-]+)+[\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])`
)
Functions ¶
This section is empty.
Types ¶
type Cache ¶
func (*Cache) AddVisited ¶
type Crawler ¶
type Crawler struct { RootURL string Level int LiveMode bool ExportFile string RegexMap map[string]string ExcludedStatus []int IncludedUrls []string Client *http.Client Cache Cache }
func NewCrawler ¶
func (*Crawler) AddMatches ¶
func (*Crawler) CrawlNodeBlock ¶
func (*Crawler) CrawlNodeLive ¶
func (*Crawler) SaveResults ¶
Click to show internal directories.
Click to hide internal directories.