Documentation
¶
Index ¶
- Constants
- Variables
- func BodyToString(b io.Reader) string
- func CleanSubdomain(s string) string
- func DecodeChars(s string) string
- func FilterNewLines(s string) string
- func FixUrl(mainSite *url.URL, nextLoc string) string
- func GetAWSS3(source string) []string
- func GetDomain(site *url.URL) string
- func GetExtType(rawUrl string) string
- func GetRawCookie(cookies []*http.Cookie) string
- func GetSubdomains(source, domain string) []string
- func InScope(u *url.URL, regexps []*regexp.Regexp) bool
- func LinkFinder(source string) ([]string, error)
- func LoadCookies(rawCookie string) []*http.Cookie
- func NormalizePath(path string) string
- func OtherSources(domain string, includeSubs bool) []string
- func ParseRobots(site *url.URL, crawler *Crawler, c *colly.Collector, wg *sync.WaitGroup)
- func ParseSiteMap(site *url.URL, crawler *Crawler, c *colly.Collector, wg *sync.WaitGroup)
- func ReadingLines(filename string) []string
- func Unique(intSlice []string) []string
- type Crawler
- type Output
- type SpiderOutput
Constants ¶
View Source
const ( CLIName = "gocrawler" AUTHOR = "@zerokeeper" VERSION = "v1.0.0" )
View Source
const SUBRE = `(?i)(([a-zA-Z0-9]{1}|[_a-zA-Z0-9]{1}[_a-zA-Z0-9-]{0,61}[a-zA-Z0-9]{1})[.]{1})+`
Variables ¶
View Source
var AWSS3 = regexp.MustCompile(`(?i)[a-z0-9.-]+\.s3\.amazonaws\.com|[a-z0-9.-]+\.s3-[a-z0-9-]\.amazonaws\.com|[a-z0-9.-]+\.s3-website[.-](eu|ap|us|ca|sa|cn)|//s3\.amazonaws\.com/[a-z0-9._-]+|//s3-[a-z0-9-]+\.amazonaws\.com/[a-z0-9._-]+`)
View Source
var DefaultHTTPTransport = &http.Transport{ DialContext: (&net.Dialer{ Timeout: 10 * time.Second, KeepAlive: 30 * time.Second, }).DialContext, MaxIdleConns: 100, MaxConnsPerHost: 1000, IdleConnTimeout: 30 * time.Second, TLSClientConfig: &tls.Config{InsecureSkipVerify: true, Renegotiation: tls.RenegotiateOnceAsClient}, }
View Source
var Logger *logrus.Logger
Functions ¶
func BodyToString ¶
func CleanSubdomain ¶
func DecodeChars ¶
func FilterNewLines ¶
func GetExtType ¶
func GetRawCookie ¶
func GetSubdomains ¶
func LinkFinder ¶
func LoadCookies ¶
func OtherSources ¶
func ParseRobots ¶
func ParseSiteMap ¶
func ReadingLines ¶
ReadingLines Reading file and return content as []string
Types ¶
type Crawler ¶
type Output ¶
type Output struct {
// contains filtered or unexported fields
}
func (*Output) WriteToFile ¶
type SpiderOutput ¶
type SpiderOutput struct { //Input string `json:"input"` Source string `json:"source"` //OutputType string `json:"type"` Url string `json:"output"` Method string `json:"method"` Header map[string]string `json:"header"` Data string `json:"data"` StatusCode int `json:"status"` Length int `json:"length"` }
Click to show internal directories.
Click to hide internal directories.