crawler

package
v1.3.33 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 11, 2022 License: BSD-3-Clause Imports: 22 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	BannedExtensions = []string{
		"asc", "avi", "bmp", "dll", "doc", "docx", "exe", "iso", "jpg", "mp3", "odt",
		"pdf", "png", "rar", "rdf", "svg", "tar", "tar.gz", "tar.bz2", "tgz", "txt",
		"wav", "wmv", "xml", "xz", "zip",
	}

	BannedLocalRedirects = map[string]string{
		"www.president.gov.ua": "1",
	}

	BannedCIDRs = []string{
		"10.0.0.0/8",
		"127.0.0.0/8",
		"172.16.0.0/12",
		"192.168.0.0/16",
	}

	IgnoreNoFollow = map[string]string{
		"blogspot.com":  "1",
		"github.io":     "1",
		"tumblr.com":    "1",
		"wordpress.com": "1",
	}
)

Functions

func CrawlURL

func CrawlURL(crawlerClient *apiclient.Client, targetURL string, debugMode bool, serverAddr string, robo RoboTesterInterface)

func FilterAndSubmit

func FilterAndSubmit(domainMap map[string]struct{}, c *apiclient.Client, serverAddr, ua string)

func SubmitOutgoingDomains

func SubmitOutgoingDomains(c *apiclient.Client, domains []string, serverAddr string)

Types

type RoboTesterInterface added in v1.3.6

type RoboTesterInterface interface {
	GetRobots(path string) (robots *robotstxt.RobotsData, err error)
	Test(path string) bool
	GetDelay() time.Duration
	InitWithUA(ua string)
}

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL