The highest tagged major version is v2.

core

package

v1.1.1 Latest Latest Go to latest Published: Jun 19, 2023 License: GPL-3.0 Imports: 13 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/Malwarize/webpalm

Links

Open Source Insights

Documentation ¶

Index ¶

Variables
type Cache
type Crawler
- func NewCrawler(url string, level int, liveMode bool, exportFile string, ...) *Crawler

Constants ¶

This section is empty.

Variables ¶

View Source

var (
	GeneralRegex = `((?:https?)://[\w\-]+(?:\.[\w\-]+)+[\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])`
)

Functions ¶

This section is empty.

Types ¶

type Cache ¶

type Cache struct {
	Visited map[string]bool
	Lock    sync.Mutex
}

func (*Cache) AddVisited ¶

func (c *Cache) AddVisited(url string)

func (*Cache) Flush ¶

func (c *Cache) Flush()

func (*Cache) IsVisited ¶

func (c *Cache) IsVisited(url string) bool

type Crawler ¶

type Crawler struct {
	RootURL        string
	Level          int
	LiveMode       bool
	ExportFile     string
	RegexMap       map[string]string
	ExcludedStatus []int
	IncludedUrls   []string
	Client         *http.Client
	Cache          Cache
}

func NewCrawler ¶

func NewCrawler(url string, level int, liveMode bool, exportFile string, regexMap map[string]string, statusResponses []int, includes []string) *Crawler

func (*Crawler) AddMatches ¶

func (c *Crawler) AddMatches(page webtree.Page)

func (*Crawler) Crawl ¶

func (c *Crawler) Crawl()

func (*Crawler) CrawlNodeBlock ¶

func (c *Crawler) CrawlNodeBlock(w *webtree.Node)

func (*Crawler) CrawlNodeLive ¶

func (c *Crawler) CrawlNodeLive(w *webtree.Node)

func (*Crawler) Export ¶

func (c *Crawler) Export(tree webtree.Node, format string, filename string) error

func (*Crawler) ExportJSON ¶

func (c *Crawler) ExportJSON(root webtree.Node, filename string) error

func (*Crawler) ExportTXT ¶

func (c *Crawler) ExportTXT(root webtree.Node, filename string) error

func (*Crawler) ExportXML ¶

func (c *Crawler) ExportXML(tree webtree.Node, filename string) error

func (*Crawler) ExtractLinks ¶

func (c *Crawler) ExtractLinks(page *webtree.Page) (links []string)

func (*Crawler) Fetch ¶

func (c *Crawler) Fetch(page *webtree.Page)

func (*Crawler) IsSkipablePage ¶

func (c *Crawler) IsSkipablePage(page webtree.Page) bool

func (*Crawler) SaveResults ¶

func (c *Crawler) SaveResults(root webtree.Node)

Source Files ¶

View all Source files

crawler.go

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL