Documentation ¶
Index ¶
- func AllDomainCollect(reqList []*model.Request) []string
- func GetPathsByFuzz(navReq model2.Request) []*model2.Request
- func GetPathsByFuzzDict(navReq model2.Request, dictPath string) []*model2.Request
- func GetPathsFromRobots(navReq model2.Request) []*model2.Request
- func SubDomainCollect(reqList []*model.Request, HostLimit string) []string
- type CrawlerTask
- type Result
- type TaskConfig
- type TaskConfigOptFunc
- func WithAllDomainReturn(gen bool) TaskConfigOptFunc
- func WithBeforeExitDelay(gen time.Duration) TaskConfigOptFunc
- func WithChromiumPath(gen string) TaskConfigOptFunc
- func WithCustomFormKeywordValues(gen map[string]string) TaskConfigOptFunc
- func WithCustomFormValues(gen map[string]string) TaskConfigOptFunc
- func WithDomContentLoadedTimeout(gen time.Duration) TaskConfigOptFunc
- func WithEncodeURLWithCharset(gen bool) TaskConfigOptFunc
- func WithEventTriggerInterval(gen time.Duration) TaskConfigOptFunc
- func WithEventTriggerMode(gen string) TaskConfigOptFunc
- func WithExtraHeaders(gen map[string]interface{}) TaskConfigOptFunc
- func WithExtraHeadersString(gen string) TaskConfigOptFunc
- func WithFilterMode(gen string) TaskConfigOptFunc
- func WithFuzzDictPath(gen string) TaskConfigOptFunc
- func WithIgnoreKeywords(gen []string) TaskConfigOptFunc
- func WithMaxCrawlCount(maxCrawlCount int) TaskConfigOptFunc
- func WithMaxTabsCount(gen int) TaskConfigOptFunc
- func WithNoHeadless(gen bool) TaskConfigOptFunc
- func WithPathByFuzz(gen bool) TaskConfigOptFunc
- func WithPathFromRobots(gen bool) TaskConfigOptFunc
- func WithProxy(gen string) TaskConfigOptFunc
- func WithSubDomainReturn(gen bool) TaskConfigOptFunc
- func WithTabRunTimeout(gen time.Duration) TaskConfigOptFunc
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func AllDomainCollect ¶
func GetPathsByFuzzDict ¶
* 使用字典列表进行fuzz
func GetPathsFromRobots ¶
* 从robots.txt文件中获取路径信息
Types ¶
type CrawlerTask ¶
type CrawlerTask struct { Browser *engine2.Browser // RootDomain string // 当前爬取根域名 用于子域名收集 Targets []*model.Request // 输入目标 Result *Result // 最终结果 Config *TaskConfig // 配置信息 Pool *ants.Pool // 协程池 Start time.Time // 开始时间 // contains filtered or unexported fields }
func NewCrawlerTask ¶
func NewCrawlerTask(targets []*model.Request, taskConf TaskConfig) (*CrawlerTask, error)
* 新建爬虫任务
type TaskConfig ¶
type TaskConfig struct { MaxCrawlCount int // 最大爬取的数量 FilterMode string // simple、smart、strict ExtraHeaders map[string]interface{} ExtraHeadersString string AllDomainReturn bool // 全部域名收集 SubDomainReturn bool // 子域名收集 NoHeadless bool // headless模式 DomContentLoadedTimeout time.Duration TabRunTimeout time.Duration // 单个标签页超时 PathByFuzz bool // 通过字典进行Path Fuzz FuzzDictPath string //Fuzz目录字典 PathFromRobots bool // 解析Robots文件找出路径 MaxTabsCount int // 允许开启的最大标签页数量 即同时爬取的数量 ChromiumPath string // Chromium的程序路径 `/home/zhusiyu1/chrome-linux/chrome` ChromiumWSUrl string // Websocket debugging URL for a running chrome session EventTriggerMode string // 事件触发的调用方式: 异步 或 顺序 EventTriggerInterval time.Duration // 事件触发的间隔 BeforeExitDelay time.Duration // 退出前的等待时间,等待DOM渲染,等待XHR发出捕获 EncodeURLWithCharset bool // 使用检测到的字符集自动编码URL IgnoreKeywords []string // 忽略的关键字,匹配上之后将不再扫描且不发送请求 Proxy string // 请求代理 CustomFormValues map[string]string // 自定义表单填充参数 CustomFormKeywordValues map[string]string // 自定义表单关键词填充内容 MaxRunTime int64 // 最大爬取时间(单位秒),超时则结束任务,平滑结束(比如某个url还未处理完不能结束,需要一次req完成后才可以结束整个任务) }
func NewTaskConfig ¶
func NewTaskConfig(optFuncs ...TaskConfigOptFunc) *TaskConfig
type TaskConfigOptFunc ¶
type TaskConfigOptFunc func(*TaskConfig)
func WithAllDomainReturn ¶
func WithAllDomainReturn(gen bool) TaskConfigOptFunc
func WithBeforeExitDelay ¶
func WithBeforeExitDelay(gen time.Duration) TaskConfigOptFunc
func WithChromiumPath ¶
func WithChromiumPath(gen string) TaskConfigOptFunc
func WithCustomFormKeywordValues ¶
func WithCustomFormKeywordValues(gen map[string]string) TaskConfigOptFunc
func WithCustomFormValues ¶
func WithCustomFormValues(gen map[string]string) TaskConfigOptFunc
func WithDomContentLoadedTimeout ¶
func WithDomContentLoadedTimeout(gen time.Duration) TaskConfigOptFunc
func WithEncodeURLWithCharset ¶
func WithEncodeURLWithCharset(gen bool) TaskConfigOptFunc
func WithEventTriggerInterval ¶
func WithEventTriggerInterval(gen time.Duration) TaskConfigOptFunc
func WithEventTriggerMode ¶
func WithEventTriggerMode(gen string) TaskConfigOptFunc
func WithExtraHeaders ¶
func WithExtraHeaders(gen map[string]interface{}) TaskConfigOptFunc
func WithExtraHeadersString ¶
func WithExtraHeadersString(gen string) TaskConfigOptFunc
func WithFilterMode ¶
func WithFilterMode(gen string) TaskConfigOptFunc
func WithFuzzDictPath ¶
func WithFuzzDictPath(gen string) TaskConfigOptFunc
func WithIgnoreKeywords ¶
func WithIgnoreKeywords(gen []string) TaskConfigOptFunc
func WithMaxCrawlCount ¶
func WithMaxCrawlCount(maxCrawlCount int) TaskConfigOptFunc
func WithMaxTabsCount ¶
func WithMaxTabsCount(gen int) TaskConfigOptFunc
func WithNoHeadless ¶
func WithNoHeadless(gen bool) TaskConfigOptFunc
func WithPathByFuzz ¶
func WithPathByFuzz(gen bool) TaskConfigOptFunc
func WithPathFromRobots ¶
func WithPathFromRobots(gen bool) TaskConfigOptFunc
func WithProxy ¶
func WithProxy(gen string) TaskConfigOptFunc
func WithSubDomainReturn ¶
func WithSubDomainReturn(gen bool) TaskConfigOptFunc
func WithTabRunTimeout ¶
func WithTabRunTimeout(gen time.Duration) TaskConfigOptFunc
Click to show internal directories.
Click to hide internal directories.