pkg

package

v0.4.5 Latest Latest Go to latest Published: Feb 10, 2022 License: GPL-3.0 Imports: 14 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/shadow1ng/crawlergo

Links

Open Source Insights

Documentation ¶

Index ¶

func AllDomainCollect(reqList []*model.Request) []string
func GetPathsByFuzz(navReq model.Request) []*model.Request
func GetPathsByFuzzDict(navReq model.Request, dictPath string) []*model.Request
func GetPathsFromRobots(navReq model.Request) []*model.Request
func SubDomainCollect(reqList []*model.Request, HostLimit string) []string
type CrawlerTask
- func NewCrawlerTask(targets []*model.Request, taskConf TaskConfig) (*CrawlerTask, error)
- func (t *CrawlerTask) Run()
type Result
type TaskConfig

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func AllDomainCollect ¶

func AllDomainCollect(reqList []*model.Request) []string

func GetPathsByFuzz ¶

func GetPathsByFuzz(navReq model.Request) []*model.Request

* 使用常见路径列表进行fuzz

func GetPathsByFuzzDict ¶

func GetPathsByFuzzDict(navReq model.Request, dictPath string) []*model.Request

* 使用字典列表进行fuzz

func GetPathsFromRobots ¶

func GetPathsFromRobots(navReq model.Request) []*model.Request

* 从robots.txt文件中获取路径信息

func SubDomainCollect ¶

func SubDomainCollect(reqList []*model.Request, HostLimit string) []string

Types ¶

type CrawlerTask ¶

type CrawlerTask struct {
	Browser    *engine.Browser  //
	RootDomain string           // 当前爬取根域名 用于子域名收集
	Targets    []*model.Request // 输入目标
	Result     *Result          // 最终结果
	Config     *TaskConfig      // 配置信息

	Pool *ants.Pool // 协程池
	// contains filtered or unexported fields
}

func NewCrawlerTask ¶

func NewCrawlerTask(targets []*model.Request, taskConf TaskConfig) (*CrawlerTask, error)

* 新建爬虫任务

func (*CrawlerTask) Run ¶

func (t *CrawlerTask) Run()

* 开始当前任务

type Result ¶

type Result struct {
	ReqList       []*model.Request // 返回的同域名结果
	AllReqList    []*model.Request // 所有域名的请求
	AllDomainList []string         // 所有域名列表
	SubDomainList []string         // 子域名列表
	// contains filtered or unexported fields
}

type TaskConfig ¶

type TaskConfig struct {
	MaxCrawlCount           int    // 最大爬取的数量
	FilterMode              string // simple、smart、strict
	ExtraHeaders            map[string]interface{}
	ExtraHeadersString      string
	AllDomainReturn         bool // 全部域名收集
	SubDomainReturn         bool // 子域名收集
	IncognitoContext        bool // 开启隐身模式
	NoHeadless              bool // headless模式
	DomContentLoadedTimeout time.Duration
	TabRunTimeout           time.Duration     // 单个标签页超时
	PathByFuzz              bool              // 通过字典进行Path Fuzz
	FuzzDictPath            string            //Fuzz目录字典
	PathFromRobots          bool              // 解析Robots文件找出路径
	MaxTabsCount            int               // 允许开启的最大标签页数量 即同时爬取的数量
	ChromiumPath            string            // Chromium的程序路径  `/home/zhusiyu1/chrome-linux/chrome`
	EventTriggerMode        string            // 事件触发的调用方式： 异步 或 顺序
	EventTriggerInterval    time.Duration     // 事件触发的间隔
	BeforeExitDelay         time.Duration     // 退出前的等待时间，等待DOM渲染，等待XHR发出捕获
	EncodeURLWithCharset    bool              // 使用检测到的字符集自动编码URL
	IgnoreKeywords          []string          // 忽略的关键字，匹配上之后将不再扫描且不发送请求
	Proxy                   string            // 请求代理
	CustomFormValues        map[string]string // 自定义表单填充参数
	CustomFormKeywordValues map[string]string // 自定义表单关键词填充内容
}

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
config
engine
filter
js
logger
model
tools 随机数相关函数	随机数相关函数
requests

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL