spider

package
v0.0.0-...-bedd13b Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 1, 2022 License: MIT Imports: 12 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var UnescapeHTML = strings.NewReplacer(
	`\u002f`, ` `,
	`\u002F`, ` `,

	`\u0020`, ` `,
	`\u0021`, ` `,
	`\u0022`, ` `,
	`\u0023`, ` `,
	`\u0024`, ` `,
	`\u0025`, ` `,
	`\u0026`, ` `,
	`\u0027`, ` `,
	`\u0028`, ` `,
	`\u0029`, ` `,

	`\u002a`, ` `,
	`\u002A`, ` `,

	`\u002b`, ` `,
	`\u002B`, ` `,

	`\u002c`, ` `,
	`\u002C`, ` `,

	`\u002d`, ` `,
	`\u002D`, ` `,

	`\u002e`, ` `,
	`\u002E`, ` `,

	`\u0030`, ` `,
	`\u0031`, ` `,
	`\u0032`, ` `,
	`\u0033`, ` `,
	`\u0034`, ` `,
	`\u0035`, ` `,
	`\u0036`, ` `,
	`\u0037`, ` `,
	`\u0038`, ` `,
	`\u0039`, ` `,

	`\u003a`, ` `,
	`\u003A`, ` `,

	`\u003b`, ` `,
	`\u003B`, ` `,

	`\u003c`, ` `,
	`\u003C`, ` `,

	`\u003d`, ` `,
	`\u003D`, ` `,

	`\u003e`, ` `,
	`\u003E`, ` `,

	`\u003f`, ` `,
	`\u003F`, ` `,

	`\u0040`, ` `,
	`\u0041`, ` `,
	`\u0042`, ` `,
	`\u0043`, ` `,
	`\u0044`, ` `,
	`\u0045`, ` `,
	`\u0046`, ` `,
	`\u0047`, ` `,
	`\u0048`, ` `,
	`\u0049`, ` `,

	`\u004a`, ` `,
	`\u004A`, ` `,

	`\u004b`, ` `,
	`\u004B`, ` `,

	`\u004c`, ` `,
	`\u004C`, ` `,

	`\u004d`, ` `,
	`\u004D`, ` `,

	`\u004e`, ` `,
	`\u004E`, ` `,

	`\u004f`, ` `,
	`\u004F`, ` `,

	`\u0050`, ` `,
	`\u0051`, ` `,
	`\u0052`, ` `,
	`\u0053`, ` `,
	`\u0054`, ` `,
	`\u0055`, ` `,
	`\u0056`, ` `,
	`\u0057`, ` `,
	`\u0058`, ` `,
	`\u0059`, ` `,

	`\u005a`, ` `,
	`\u005A`, ` `,

	`\u005b`, ` `,
	`\u005B`, ` `,

	`\u005c`, ` `,
	`\u005C`, ` `,

	`\u005d`, ` `,
	`\u005D`, ` `,

	`\u005e`, ` `,
	`\u005E`, ` `,

	`\u005f`, ` `,
	`\u005F`, ` `,

	`\u0060`, ` `,
	`\u0061`, ` `,
	`\u0062`, ` `,
	`\u0063`, ` `,
	`\u0064`, ` `,
	`\u0065`, ` `,
	`\u0066`, ` `,
	`\u0067`, ` `,
	`\u0068`, ` `,
	`\u0069`, ` `,

	`\u006a`, ` `,
	`\u006A`, ` `,

	`\u006b`, ` `,
	`\u006B`, ` `,

	`\u006c`, ` `,
	`\u006C`, ` `,

	`\u006d`, ` `,
	`\u006D`, ` `,

	`\u006e`, ` `,
	`\u006E`, ` `,

	`\u006f`, ` `,
	`\u006F`, ` `,

	`\u0070`, ` `,
	`\u0071`, ` `,
	`\u0072`, ` `,
	`\u0073`, ` `,
	`\u0074`, ` `,
	`\u0075`, ` `,
	`\u0076`, ` `,
	`\u0077`, ` `,
	`\u0078`, ` `,
	`\u0079`, ` `,

	`\u007a`, ` `,
	`\u007A`, ` `,

	`\u007b`, ` `,
	`\u007B`, ` `,

	`\u007c`, ` `,
	`\u007C`, ` `,

	`\u007d`, ` `,
	`\u007D`, ` `,

	`\u007e`, ` `,
	`\u007E`, ` `,

	`%20`, ` `,
	`%21`, ` `,
	`%22`, ` `,
	`%23`, ` `,
	`%24`, ` `,
	`%25`, ` `,
	`%26`, ` `,
	`%27`, ` `,
	`%28`, ` `,
	`%29`, ` `,
	`%2A`, ` `,
	`%2B`, ` `,
	`%2C`, ` `,
	`%2D`, ` `,
	`%2E`, ` `,
	`%2F`, ` `,
	`%30`, ` `,
	`%31`, ` `,
	`%32`, ` `,
	`%33`, ` `,
	`%34`, ` `,
	`%35`, ` `,
	`%36`, ` `,
	`%37`, ` `,
	`%38`, ` `,
	`%39`, ` `,
	`%3A`, ` `,
	`%3B`, ` `,
	`%3C`, ` `,
	`%3D`, ` `,
	`%3E`, ` `,
	`%3F`, ` `,
	`%40`, ` `,
	`%41`, ` `,
	`%42`, ` `,
	`%43`, ` `,
	`%44`, ` `,
	`%45`, ` `,
	`%46`, ` `,
	`%47`, ` `,
	`%48`, ` `,
	`%49`, ` `,
	`%4A`, ` `,
	`%4B`, ` `,
	`%4C`, ` `,
	`%4D`, ` `,
	`%4E`, ` `,
	`%4F`, ` `,
	`%50`, ` `,
	`%51`, ` `,
	`%52`, ` `,
	`%53`, ` `,
	`%54`, ` `,
	`%55`, ` `,
	`%56`, ` `,
	`%57`, ` `,
	`%58`, ` `,
	`%59`, ` `,
	`%5A`, ` `,
	`%5B`, ` `,
	`%5C`, ` `,
	`%5D`, ` `,
	`%5E`, ` `,
	`%5F`, ` `,
	`%60`, ` `,
	`%61`, ` `,
	`%62`, ` `,
	`%63`, ` `,
	`%64`, ` `,
	`%65`, ` `,
	`%66`, ` `,
	`%67`, ` `,
	`%68`, ` `,
	`%69`, ` `,
	`%6A`, ` `,
	`%6B`, ` `,
	`%6C`, ` `,
	`%6D`, ` `,
	`%6E`, ` `,
	`%6F`, ` `,
	`%70`, ` `,
	`%71`, ` `,
	`%72`, ` `,
	`%73`, ` `,
	`%74`, ` `,
	`%75`, ` `,
	`%76`, ` `,
	`%77`, ` `,
	`%78`, ` `,
	`%79`, ` `,
	`%7A`, ` `,
	`%7B`, ` `,
	`%7C`, ` `,
	`%7D`, ` `,
	`%7E`, ` `,
	`%7F`, ` `,
	`%80`, ` `,
	`%81`, ` `,
	`%82`, ` `,
	`%83`, ` `,
	`%84`, ` `,
	`%85`, ` `,
	`%86`, ` `,
	`%87`, ` `,
	`%88`, ` `,
	`%89`, ` `,
	`%8A`, ` `,
	`%8B`, ` `,
	`%8C`, ` `,
	`%8D`, ` `,
	`%8E`, ` `,
	`%8F`, ` `,
	`%90`, ` `,
	`%91`, ` `,
	`%92`, ` `,
	`%93`, ` `,
	`%94`, ` `,
	`%95`, ` `,
	`%96`, ` `,
	`%97`, ` `,
	`%98`, ` `,
	`%99`, ` `,
	`%9A`, ` `,
	`%9B`, ` `,
	`%9C`, ` `,
	`%9D`, ` `,
	`%9E`, ` `,
	`%9F`, ` `,
	`%A0`, ` `,
	`%A1`, ` `,
	`%A2`, ` `,
	`%A3`, ` `,
	`%A4`, ` `,
	`%A5`, ` `,
	`%A6`, ` `,
	`%A7`, ` `,
	`%A8`, ` `,
	`%A9`, ` `,
	`%AA`, ` `,
	`%AB`, ` `,
	`%AC`, ` `,
	`%AD`, ` `,
	`%AE`, ` `,
	`%AF`, ` `,
	`%B0`, ` `,
	`%B1`, ` `,
	`%B2`, ` `,
	`%B3`, ` `,
	`%B4`, ` `,
	`%B5`, ` `,
	`%B6`, ` `,
	`%B7`, ` `,
	`%B8`, ` `,
	`%B9`, ` `,
	`%BA`, ` `,
	`%BB`, ` `,
	`%BC`, ` `,
	`%BD`, ` `,
	`%BE`, ` `,
	`%BF`, ` `,
	`%C0`, ` `,
	`%C1`, ` `,
	`%C2`, ` `,
	`%C3`, ` `,
	`%C4`, ` `,
	`%C5`, ` `,
	`%C6`, ` `,
	`%C7`, ` `,
	`%C8`, ` `,
	`%C9`, ` `,
	`%CA`, ` `,
	`%CB`, ` `,
	`%CC`, ` `,
	`%CD`, ` `,
	`%CE`, ` `,
	`%CF`, ` `,
	`%D0`, ` `,
	`%D1`, ` `,
	`%D2`, ` `,
	`%D3`, ` `,
	`%D4`, ` `,
	`%D5`, ` `,
	`%D6`, ` `,
	`%D7`, ` `,
	`%D8`, ` `,
	`%D9`, ` `,
	`%DA`, ` `,
	`%DB`, ` `,
	`%DC`, ` `,
	`%DD`, ` `,
	`%DE`, ` `,
	`%DF`, ` `,
	`%E0`, ` `,
	`%E1`, ` `,
	`%E2`, ` `,
	`%E3`, ` `,
	`%E4`, ` `,
	`%E5`, ` `,
	`%E6`, ` `,
	`%E7`, ` `,
	`%E8`, ` `,
	`%E9`, ` `,
	`%EA`, ` `,
	`%EB`, ` `,
	`%EC`, ` `,
	`%ED`, ` `,
	`%EE`, ` `,
	`%EF`, ` `,
	`%F0`, ` `,
	`%F1`, ` `,
	`%F2`, ` `,
	`%F3`, ` `,
	`%F4`, ` `,
	`%F5`, ` `,
	`%F6`, ` `,
	`%F7`, ` `,
	`%F8`, ` `,
	`%F9`, ` `,
	`%FA`, ` `,
	`%FB`, ` `,
	`%FC`, ` `,
	`%FD`, ` `,
	`%FE`, ` `,
	`%FF`, ` `,
)

UnescapeHTML: replace Unicode Character with a whitespace to avoid getting wrong results when extracting domain from text.

Functions

This section is empty.

Types

type Domain

type Domain struct {
	URL    string
	Name   string
	TLD    string
	Status string
}

Domain

func FindDomains

func FindDomains(body []byte) (domains []Domain)

FindDomains

func (Domain) CSVRow

func (d Domain) CSVRow() []string

CSVRow

type Page

type Page struct {
	URL    *url.URL
	Status int
	Body   []byte
}

Page

type Setting

type Setting struct {
	Crawler struct {
		MaxDepth int32
		Filter   []string
		Limit    struct {
			Rate     int
			Interval time.Duration
		}
		MaxBodySize int64
		UserAgents  []string
		Proxies     []string
	}
	Log struct {
		Rotate int // format: 30d
		Path   string
	}
	Store struct {
		TTL  time.Duration
		Path string
	}
	Result struct {
		Path string
	}
	Parralle int
	Timeout  time.Duration
	TLDs     map[string]bool
}

Setting

func ParseSetting

func ParseSetting(fp string) *Setting

ParseSetting

type Storage

type Storage interface {
	HasChecked(name string) bool
	Close() error
}

Storage

type Writer

type Writer interface {
	Write(*Domain) error
}

Writer

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL