core

package
v0.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 16, 2024 License: MIT Imports: 27 Imported by: 2

Documentation

Index

Constants

View Source
const (
	CLIName = "gospider"
	AUTHOR  = "@thebl4ckturtle & @j3ssiejjj"
	VERSION = "v1.1.6"
)
View Source
const SUBRE = `(?i)(([a-zA-Z0-9]{1}|[_a-zA-Z0-9]{1}[_a-zA-Z0-9-]{0,61}[a-zA-Z0-9]{1})[.]{1})+`

Variables

View Source
var AWSS3 = regexp.MustCompile(`(?i)[a-z0-9.-]+\.s3\.amazonaws\.com|[a-z0-9.-]+\.s3-[a-z0-9-]\.amazonaws\.com|[a-z0-9.-]+\.s3-website[.-](eu|ap|us|ca|sa|cn)|//s3\.amazonaws\.com/[a-z0-9._-]+|//s3-[a-z0-9-]+\.amazonaws\.com/[a-z0-9._-]+`)
View Source
var DefaultHTTPTransport = &http.Transport{
	DialContext: (&net.Dialer{
		Timeout: 10 * time.Second,

		KeepAlive: 30 * time.Second,
	}).DialContext,
	MaxIdleConns:    100,
	MaxConnsPerHost: 1000,
	IdleConnTimeout: 30 * time.Second,

	TLSClientConfig: &tls.Config{InsecureSkipVerify: true, Renegotiation: tls.RenegotiateOnceAsClient},
}
View Source
var Logger *logrus.Logger

Functions

func CleanSubdomain

func CleanSubdomain(s string) string

func DecodeChars

func DecodeChars(s string) string

func FilterNewLines

func FilterNewLines(s string) string

func FixUrl

func FixUrl(mainSite *url.URL, nextLoc string) string

func GetAWSS3

func GetAWSS3(source string) []string

func GetDomain

func GetDomain(site *url.URL) string

func GetExtType

func GetExtType(rawUrl string) string

func GetRawCookie

func GetRawCookie(cookies []*http.Cookie) string

func GetSubdomains

func GetSubdomains(source, domain string) []string

func InScope

func InScope(u *url.URL, regexps []*regexp.Regexp) bool

func LinkFinder

func LinkFinder(source string) ([]string, error)

func LoadCookies

func LoadCookies(rawCookie string) []*http.Cookie

func NormalizePath

func NormalizePath(path string) string

NormalizePath the path

func OtherSources

func OtherSources(domain string, includeSubs bool) []string

func ReadingLines

func ReadingLines(filename string) []string

ReadingLines Reading file and return content as []string

func Unique

func Unique(intSlice []string) []string

Types

type CollyConfigurator

type CollyConfigurator func(c *colly.Collector) error

func WithBurpFile

func WithBurpFile(burpFile string) CollyConfigurator

func WithCookie

func WithCookie(cookie string) CollyConfigurator

func WithDefaultDisalowedRegexp

func WithDefaultDisalowedRegexp() CollyConfigurator

func WithDisallowedRegexFilter

func WithDisallowedRegexFilter(regFilter string) CollyConfigurator

func WithHTTPClient

func WithHTTPClient(client *http.Client) CollyConfigurator

func WithHTTPClientOpt

func WithHTTPClientOpt(opt ...HTTPClientConfigurator) CollyConfigurator

func WithHeader

func WithHeader(headers ...string) CollyConfigurator

func WithLimit

func WithLimit(concurrent int, delay int, randomDelay int) CollyConfigurator

func WithRegexpFilter

func WithRegexpFilter(regFilter string) CollyConfigurator

func WithScope

func WithScope(scope string) CollyConfigurator

func WithUserAgent

func WithUserAgent(randomUA string) CollyConfigurator

func WithWhiteListDomain

func WithWhiteListDomain(whiteListDomain string) CollyConfigurator

type Crawler

type Crawler struct {
	// C                   *colly.Collector
	// LinkFinderCollector *colly.Collector
	Output io.Writer
	// contains filtered or unexported fields
}

func NewCrawler

func NewCrawler(opt ...CrawlerOption) *Crawler

func (*Crawler) Start

func (crawler *Crawler) Start(site ...string) (<-chan SpiderReport, <-chan error)

func (*Crawler) StreamScrawl

func (crawler *Crawler) StreamScrawl(ctx context.Context, siteC <-chan string) (<-chan SpiderReport, <-chan error)

type CrawlerOption

type CrawlerOption func(crawler *Crawler)

func WithCollyConfig

func WithCollyConfig(opt ...CollyConfigurator) CrawlerOption

func WithCollyOption

func WithCollyOption(options ...colly.CollectorOption) CrawlerOption

func WithDefaultColly

func WithDefaultColly(maxDepth int) CrawlerOption

func WithFilterLength

func WithFilterLength(filterLength string) CrawlerOption

func WithOtherSources

func WithOtherSources() CrawlerOption

func WithOutput

func WithOutput(writer ...io.Writer) CrawlerOption

func WithRobot

func WithRobot() CrawlerOption

func WithSitemap

func WithSitemap() CrawlerOption

type HTTPClientConfigurator

type HTTPClientConfigurator func(client *http.Client)

func WithHTTPNoRedirect

func WithHTTPNoRedirect() HTTPClientConfigurator

func WithHTTPProxy

func WithHTTPProxy(proxy string) HTTPClientConfigurator

func WithHTTPTimeout

func WithHTTPTimeout(timeout int) HTTPClientConfigurator

type OutputType

type OutputType string
var (
	Ref    OutputType = "ref"
	Src    OutputType = "src"
	Upload OutputType = "upload-form"
	Form   OutputType = "form"
	Url    OutputType = "url"
	S3     OutputType = "aws-s3"
	Domain OutputType = "domain"
)

func (OutputType) FixUrl

func (ot OutputType) FixUrl(mainUrl *url.URL, newLoc string) string

func (OutputType) KeepCrawling

func (ot OutputType) KeepCrawling() func(value SpiderReport) []string

type SpiderReport

type SpiderReport struct {
	Output     string     `json:"output" pp:"Output"`
	OutputType OutputType `json:"type" pp:"Type"`
	StatusCode int        `json:"status" pp:"Status"`
	Source     string     `json:"source" pp:"Source"`
	Body       string     `json:"-" pp:"-"`
	Err        error
	Input      *url.URL `json:"input"`
	Length     int      `json:"length"`
}

func (SpiderReport) AsyncDerivatedValues

func (ov SpiderReport) AsyncDerivatedValues() (<-chan []SpiderReport, <-chan error)

func (SpiderReport) AwsS3DerivatedValues

func (ov SpiderReport) AwsS3DerivatedValues() ([]SpiderReport, error)

func (SpiderReport) DerivatedValues

func (ov SpiderReport) DerivatedValues() ([]SpiderReport, error)

func (SpiderReport) FixUrl

func (ov SpiderReport) FixUrl() SpiderReport

func (SpiderReport) KeepCrawling

func (ov SpiderReport) KeepCrawling() []string

func (SpiderReport) SubdomainsDerivatedValues

func (ov SpiderReport) SubdomainsDerivatedValues() ([]SpiderReport, error)

SubdomainsDerivatedValues: search for subdomains in the body of the SpiderReport receiver if body is empty, no search are performed the resulting Outputs values are clone of reveiver execpt for the output which will be the fqdn found and outputType will be set to `Domain`

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL