crawlerx

package
v1.3.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 12, 2024 License: AGPL-3.0 Imports: 34 Imported by: 0

README

CrawlerX 爬虫模块使用说明

目录

Example

yakit.AutoInitYakit()

targetUrl = cli.String("targetUrl")
wsAddress = cli.String("wsAddress")
exePath = cli.String("exePath")
proxy = cli.String("proxy")
proxyUsername = cli.String("proxyUsername")
proxyPassword = cli.String("proxyPassword")
pageTimeout = cli.Int("pageTimeout")
fullTimeout = cli.Int("fullTimeout")
formFill = cli.String("formFill")
fileUpload = cli.String("fileUpload")
header = cli.String("header")
cookie = cli.String("cookie")
scanRange = cli.String("scanRange")
scanRepeat = cli.String("scanRepeat")
maxUrl = cli.Int("maxUrl")
maxDepth = cli.Int("maxDepth")
ignoreQuery = cli.String("ignoreQuery")
extraWaitLoad = cli.Int("extraWaitLoad")

blacklist = cli.String("blacklist")
whitelist = cli.String("whitelist")
sensitiveWords = cli.String("sensitiveWords")
leakless = cli.String("leakless", cli.setDefault("default"))
concurrent = cli.Int("concurrent", cli.setDefault(3))
rawHeaders = cli.String("rawHeaders")
rawCookie = cli.String("rawCookie")

func stringToDict(tempStr) {
    result = make(map[string]string, 0)
    items = tempStr.Split(";")
    for _, item := range items {
        if item.Contains(":") {
            kv := item.Split(":")
            result[kv[0]] = kv[1]
        }
    }
    return result
}

scanRangeMap = {
    "AllDomainScan": crawlerx.AllDomainScan,
    "SubMenuScan": crawlerx.SubMenuScan,
}

scanRepeatMap = {
    "UnLimitRepeat": crawlerx.UnLimitRepeat,
    "LowRepeatLevel": crawlerx.LowRepeatLevel,
    "MediumRepeatLevel": crawlerx.MediumRepeatLevel,
    "HighRepeatLevel": crawlerx.HighRepeatLevel,
    "ExtremeRepeatLevel": crawlerx.ExtremeRepeatLevel,
}

browserInfo = {
    "ws_address":"",
    "exe_path":"",
    "proxy_address":"",
    "proxy_username":"",
    "proxy_password":"",
}
if wsAddress != "" {
    browserInfo["ws_address"] = wsAddress
}
if exePath != "" {
    browserInfo["exe_path"] = exePath
}
if proxy != "" {
    browserInfo["proxy_address"] = proxy
    if proxyUsername != "" {
        browserInfo["proxy_username"] = proxyUsername
        browserInfo["proxy_password"] = proxyPassword
    }
}
browserInfoOpt = crawlerx.browserInfo(json.dumps(browserInfo))

pageTimeoutOpt = crawlerx.pageTimeout(pageTimeout)

fullTimeoutOpt = crawlerx.fullTimeout(fullTimeout)

concurrentOpt = crawlerx.concurrent(concurrent)

opts = [
    browserInfoOpt,
    pageTimeoutOpt,
    fullTimeoutOpt,
    concurrentOpt,
]

if formFill != "" {
    formFillInfo = stringToDict(formFill)
    formFillOpt = crawlerx.formFill(formFillInfo)
    opts = append(opts, formFillOpt)
}

if fileUpload != "" {
    fileUploadInfo = stringToDict(fileUpload)
    fileUploadOpt = crawlerx.fileInput(fileUploadInfo)
    opts = append(opts, fileUploadOpt)
}

if header != "" {
    headerInfo = stringToDict(header)
    headerOpt = crawlerx.headers(headerInfo)
    opts = append(opts, headerOpt)
}

if rawHeaders != "" {
    opts = append(opts, crawlerx.rawHeaders(rawHeaders))
}

if rawCookie != "" {
    opts = append(opts, crawlerx.rawCookie(rawCookie))
}

if cookie != "" {
    cookieInfo = stringToDict(cookie)
    cookieOpt = crawlerx.cookies(cookieInfo)
    opts = append(opts, cookieOpt)
}

if scanRange != "" {
    scanRangeItem = scanRangeMap[scanRange]
    scanRangeOpt = crawlerx.scanRangeLevel(scanRangeItem)
    opts = append(opts, scanRangeOpt)
}

if scanRepeat != "" {
    scanRepeatItem = scanRepeatMap[scanRepeat]
    scanRepeatOpt = crawlerx.scanRepeatLevel(scanRepeatItem)
    opts = append(opts, scanRepeatOpt)
}

if maxUrl != 0 {
    opts = append(opts, crawlerx.maxUrl(maxUrl))
}

if maxDepth != 0 {
    opts = append(opts, crawlerx.maxDepth(maxDepth))
}

if extraWaitLoad != 0 {
    opts = append(opts, crawlerx.extraWaitLoadTime(extraWaitLoad))
}

if ignoreQuery != "" {
    queries = ignoreQuery.Split(",")
    opts = append(opts, crawlerx.ignoreQueryName(queries...))
}

if blacklist != "" {
    opts = append(opts, crawlerx.blacklist(blacklist.Split(",")...))
}

if whitelist != "" {
    opts = append(opts, crawlerx.whitelist(whitelist.Split(",")...))
}

if sensitiveWords != "" {
    opts = append(opts, crawlerx.sensitiveWords(sensitiveWords.Split(",")))
}

if leakless != "" {
    opts = append(opts, crawlerx.leakless(leakless))
}

ch, err = crawlerx.StartCrawler(targetUrl, opts...)
for item = range ch{
    yakit.Info(item.Method() + " " + item.Url())
}

Data Structure

crawlerx.ReqInfo

爬虫结果数据结构

struct
type ReqInfo interface {
    PtrStructMethods(指针结构方法/函数):
        func Url() return(string)
        func Method() return(string)

        func RequestHeaders() return(map[string]string)
        func RequestBody() return(string)

        func StatusCode() return(int)
        func ResponseHeaders() return(map[string]string)
        func ResponseBody() return(string)
}
methods

func (*ReqInfo) Url() return(r0: string) 爬虫结果的url

func (*ReqInfo) Method() return(string) 爬虫结果的请求方法

func (*ReqInfo) RequestHeaders() return(map[string]string) 爬虫结果的请求包头文件

func (*ReqInfo) RequestBody() return(string) 爬虫结果的请求包body

func (*ReqInfo) RequestRaw() return([]byte, error) 爬虫结果的原生请求包

func (*ReqInfo) StatusCode() return(int) 爬虫结果的返回包状态码

func (*ReqInfo) ResponseHeaders() return(map[string]string) 爬虫结果的返回包头文件

func (*ReqInfo) ResponseBody() return(string) 爬虫结果的返回包body

func (*ReqInfo) Type() return(string) 爬虫结果的类型 如hijack_result/event url/js url/file upload result

func (*ReqInfo) From() return(string) 该url来源链接,即从哪个链接得到的该链接

API

crawlerx.StartCrawler

设置爬虫参数 开始爬虫任务

定义

func crawlerx.StartCrawler(url: string, opts: ...crawlerx.ConfigOpt) return (ch: chan crawlerx.ReqInfo, err: error)

参数
参数名 参数类型 参数解释
url string 渗透目标
opts ...crawlerx.ConfigOpt 扫描参数
返回值
返回值 返回值类型 返回值解释
ch chan crawlerx.ReqInfo 爬虫结果传递channel
err error 错误信息
crawlerx.PageScreenShot

访问固定页面 加载完成后截图

定义

func crawlerx.PageScreenShot(url: string, opts: ...crawlerx.ConfigOpt) return (screenshot: string, err: error)

参数
参数名 参数类型 参数解释
url string 截图目标url
opts ...crawlerx.ConfigOpt 访问参数
返回值
返回值 返回值类型 返回值解释
screenshot string 目标url页面截图的b64编码
err error 错误信息
crawlerx.browserInfo

设置浏览器参数

定义

func crawlerx.browserInfo(info: string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
info string 浏览器参数

浏览器参数为一个json字符串:

{
    "ws_address":"",
    "exe_path":"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
    "proxy_address":"http://127.0.0.1:8083",
    "proxy_username":"",
    "proxy_password":"",
}

其中ws_address为远程chrome浏览器地址,exe_path为chrome浏览器可执行文件的路径,这两个参数设置一个就可以,不设置则会默认下载chrome浏览器并运行

proxy_address为代理地址,proxy_username和proxy_password分别为代理的用户名和密码(需要则填写)

返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.maxUrl

最大爬虫数量设置

定义

func crawlerx.maxUrl(maxUrlNum: int) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
maxUrlNum int 最大爬取url数量
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.maxDepth

设置最大爬取深度

定义

func crawlerx.maxDepth(depth: int) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
depth int 最大爬虫深度
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.concurrent

最大浏览器打开页面数量(相当于并行数量)

定义

func crawlerx.concurrent(concurrentNumber: int) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
concurrentNumber int 最大浏览器打开页面数量
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.blackList

爬虫黑名单参数设置

定义

func crawlerx.blackList(keywords: ...string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
keywords ...string 黑名单关键词
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.whiteList

爬虫白名单参数设置

定义

func crawlerx.whiteList(keywords: ...string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
keywords ...string 白名单关键词
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.pageTimeout

爬虫单页面超时时间设置

定义

func crawlerx.pageTimeout(timeout: int) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
timeout int 单页面超时时间
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.fullTimeout

爬虫全局超时时间设置

定义

func crawlerx.fullTimeout(timeout: int) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
timeout int 爬虫全局超时时间
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.extraWaitLoadTime

设置页面的额外等待时间 因为有些时候通过devtools拿到的页面状态为加载完成 但是实际上页面仍然在渲染部分内容 此时可以通过该函数进行额外的等待时间的设置

定义

func crawlerx.extraWaitLoadTime(timeout: int) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
timeout int 额外等待时间 (单位Millisecond)
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.formFill

爬虫表单填写设置

定义

func crawlerx.formFill(formFills: map[string]string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
formFills map[string]string 表单填写内容字典
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.fileInput

爬虫文件上传设置

定义

func crawlerx.fileInput(fileInput: map[string]string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
fileInput map[string]string 上传文件设置

参数map的key为关键词 value为文件路径;当key为default时 value为默认上传文件

返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.headers

爬虫request的header设置

定义

func crawlerx.headers(headers: map[string]string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
headers map[string]string header内容
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.rawHeaders

爬虫request的header设置

定义

func crawlerx.rawHeaders(headersInfo: string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
headersInfo string header内容

输入为数据包中的原生headers字符串

返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.cookies

爬虫request的cookie设置

定义

func crawlerx.cookies(cookies: map[string]string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
cookies map[string]string cookie内容
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.rawCookie

爬虫request的cookie设置

定义

func crawlerx.rawCookie(cookieInfo: string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
cookieInfo string cookie内容

输入为数据包中的原生cookie字符串

返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.scanRangeLevel

爬虫爬取范围

定义

func crawlerx.scanRangeLevel(scanRange: crawlerx.scanRangeLevel) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
scanRange crawlerx.scanRangeLevel 爬虫爬取范围等级

crawlerx.scanRangeLevel 包括以下几种:

crawlerx.AllDomainScan 表示爬取全域名 (默认)

crawlerx.SubMenuScan 表示爬取目标URL和子目录

返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.scanRepeatLevel

爬虫结果重复过滤设置

定义

func crawlerx.scanRepeatLevel(scanRepeat: crawlerx.repeatLevel) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
scanRepeat crawlerx.repeatLevel 爬虫结果重复过滤等级

crawlerx.repeatLevel 包括以下几种:

crawlerx.UnLimitRepeat 对page,method,query-name,query-value和post-data敏感

crawlerx.LowRepeatLevel 对page,method,query-name和query-value敏感(默认)

crawlerx.MediumRepeatLevel 对page,method和query-name敏感

crawlerx.HighRepeatLevel 对page和method敏感

crawlerx.ExtremeRepeatLevel 对page敏感

返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.ignoreQueryName

url中的query名称查重忽略设置

定义

func crawlerx.ignoreQueryName(queryNames: ...string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
queryNames ...string 需要跳过查重筛查的query名称

例如现在存在如下几个url:

这两条url可能由于一些特殊情况,导致query中的token不一致,但是页面内容相同,但是两个url毕竟不一致,所以程序默认会认为两个不一样的url都需要进行访问

此时为了避免这种情况我们可以将token输入crawlerx.ignoreQueryName,让程序在进行url去重时忽略token:

... ...
ignore = crawlerx.ignoreQueryName("token")
ch = crawlerx.StartCrawler(urlStr, ignore)
... ...

此时上面两个url在去重检测时会被认为是同一个url,只会对其中一个进行访问

返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.sensitiveWords

敏感词设置,遇到元素中存在敏感词则不会进行点击

定义

func crawlerx.sensitiveWords(words: []string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
words []string 需要过滤的敏感词
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.leakless

浏览器是否自动进程关闭设置 浏览器自动进程关闭进行在windows下会报病毒 默认在windows下会关闭 如在windows下开启请关闭相关安全软件 当关闭时 如果强制关闭爬虫进程时chrome.exe会存在后台 过多时需要手动进行关闭 默认是default, 强制开启为true,强制关闭为false

定义

func crawlerx.leakless(leakless: string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
leakless string 自动进程关闭设置
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.localStorage

在当前域名下的localstorage中存储键值对

定义

func crawlerx.localStorage(storage: map[string]string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
storage map[string]string 要存在当前域名下localstorage的键值映射
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.invalidSuffix

设置非法后缀,遇到url拥有该后缀时不进行访问

定义

func crawlerx.invalidSuffix(suffix: []string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
suffix []string 不进行爬虫的url后缀切片
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.stealth

设置是否运行反-反爬虫代码

定义

func crawlerx.stealth(stealth: bool) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
stealth bool 设置是否运行反-反爬虫代码
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.runtimeID

设置爬虫的runtimeID

定义

func crawlerx.runtimeID(id: string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
id string 设置的runtimeID
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.evalJs

设置在爬到固定页面时执行指定的js代码

定义

func crawlerx.evalJs(target: string, evalJs string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
target string 执行对应js代码的目标url
evalJs string 要执行的js代码内容
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.jsResultSend

设置在获得运行js代码结果后的处理(通常指结果传出)

定义

func crawlerx.jsResultSend(send: func(string)) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
send func(string) 对js代码执行结果的字符串进行操作的函数
stack = make([]string, 0)
strFunc = func(s){
    stack = append(stack, s)
}
opt = crawlerx.jsResultSend(strFunc)
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.vue

强制设置爬虫模式为vue模式,即事件驱动爬虫

定义

func crawlerx.vue(vue: bool) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
vue bool 是否执行vue模式
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数
crawlerx.response

设置指定url的response

定义

func crawlerx.response(targetUrl: string, response: string) return (r0: crawlerx.ConfigOpt)

参数
参数名 参数类型 参数解释
targetUrl string 指定特定响应内容的url
response string 指定的响应内容字符串

response为原生response:

"HTTP/1.1 200\r\nSet-Cookie: JSESSSIONID=E8ECA470AF9F5385159DE0E8E9BD6726; Path=/; HttpOnly\r\nContent-Type: text/html; charset=utf-8\r\nDate: Wed, 01 Nov2023 03:44:53GMT\r\nContent-Length: 35\r\n\r\ne165421110ba03099a1c393373c5b43\n\r\n"
返回值
返回值 返回值类型 返回值解释
r0 crawlerx.ConfigOpt 参数设置函数

Documentation

Overview

Package crawlerx @Author bcy2007 2023/7/13 11:10

Package crawlerx @Author bcy2007 2023/7/12 17:31

Package crawlerx @Author bcy2007 2023/7/12 16:20

Package crawlerx @Author bcy2007 2023/7/12 16:31

Package crawlerx @Author bcy2007 2023/7/13 11:32

Package crawlerx @Author bcy2007 2023/7/12 17:42

Package crawlerx @Author bcy2007 2023/7/12 16:56

Package crawlerx @Author bcy2007 2023/8/1 11:09

Package crawlerx @Author bcy2007 2023/7/17 11:01

Package crawlerx @Author bcy2007 2023/7/14 10:44

Package crawlerx @Author bcy2007 2023/7/14 10:31

Package crawlerx @Author bcy2007 2023/7/14 11:07

Package crawlerx @Author bcy2007 2023/7/12 16:42

Package crawlerx @Author bcy2007 2023/11/1 10:16

Package crawlerx @Author bcy2007 2023/7/12 16:19

Package crawlerx @Author bcy2007 2023/7/13 11:57

Package crawlerx @Author bcy2007 2023/7/12 17:40

Index

Constants

This section is empty.

Variables

View Source
var CrawlerXExports = map[string]interface{}{
	"StartCrawler":   StartCrawler,
	"PageScreenShot": NewPageScreenShot,

	"browserInfo":       WithBrowserInfo,
	"saveToDB":          WithSaveToDB,
	"runtimeId":         WithRuntimeID,
	"maxUrl":            WithMaxUrl,
	"maxDepth":          WithMaxDepth,
	"concurrent":        WithConcurrent,
	"blacklist":         WithBlackList,
	"whitelist":         WithWhiteList,
	"pageTimeout":       WithPageTimeout,
	"fullTimeout":       WithFullTimeout,
	"extraWaitLoadTime": WithExtraWaitLoadTime,
	"formFill":          WithFormFill,
	"fileInput":         WithFileInput,
	"headers":           WithHeaders,
	"rawHeaders":        WithHeaderInfo,
	"cookies":           WithCookies,
	"rawCookie":         WithCookieInfo,
	"scanRangeLevel":    WithScanRangeLevel,
	"scanRepeatLevel":   WithScanRepeatLevel,
	"ignoreQueryName":   WithIgnoreQueryName,
	"sensitiveWords":    WithSensitiveWords,
	"leakless":          WithLeakless,
	"localStorage":      WithLocalStorage,
	"sessionStorage":    WithSessionStorage,
	"invalidSuffix":     WithInvalidSuffix,
	"stealth":           WithStealth,
	"runtimeID":         WithRuntimeID,
	"evalJs":            WithEvalJs,
	"jsResultSend":      WithJsResultSave,
	"vue":               WithVue,
	"response":          WithResponse,
	"sourceType":        WithSourceType,
	"fromPlugin":        WithFromPlugin,
	"urlCheck":          WithUrlCheck,

	"UnLimitRepeat":      unlimited,
	"LowRepeatLevel":     lowLevel,
	"MediumRepeatLevel":  midLevel,
	"HighRepeatLevel":    highLevel,
	"ExtremeRepeatLevel": extremeLevel,

	"AllDomainScan": mainDomain,
	"SubMenuScan":   subDomain,
}
View Source
var RepeatLevelMap = map[int]repeatLevel{
	0: unlimited,
	1: lowLevel,
	2: midLevel,
	3: highLevel,
	4: extremeLevel,
}
View Source
var ScanRangeLevelMap = map[int]scanRangeLevel{
	0: mainDomain,
	1: subDomain,
}

Functions

func BrowserEachEvent added in v1.2.6

func BrowserEachEvent(browser *rod.Browser, sessionID proto.TargetSessionID, callbacks ...interface{}) func()

func EvalOnPage added in v1.2.7

func EvalOnPage(page *rod.Page, evalJs string) (*proto.RuntimeRemoteObject, error)

func GetSortedQuery added in v1.2.5

func GetSortedQuery(rawQuery string) (query []string, err error)

func NewPageScreenShot added in v1.2.8

func NewPageScreenShot(targetUrl string, opts ...ConfigOpt) (code string, err error)

func StartCrawler

func StartCrawler(url string, opts ...ConfigOpt) (chan ReqInfo, error)

func StartCrawlerTest added in v1.2.7

func StartCrawlerTest(url string, opts ...ConfigOpt) (chan ReqInfo, error)

func StringArrayContains added in v1.2.3

func StringArrayContains(array []string, element string) bool

func StringArrayCover added in v1.2.3

func StringArrayCover(array []string, element string) (bool, string)

func StringPrefixList added in v1.2.3

func StringPrefixList(origin string, prefixes []string) bool

func StringSuffixList added in v1.2.3

func StringSuffixList(s string, suffixes []string) bool

func TargetUrlCheck added in v1.2.3

func TargetUrlCheck(targetUrl string, proxy *url.URL) (string, error)

Types

type BaseConfig added in v1.2.3

type BaseConfig struct {
	// contains filtered or unexported fields
}

type BrowserConfig added in v1.2.3

type BrowserConfig struct {
	// contains filtered or unexported fields
}

func NewBrowserConfig added in v1.3.2

func NewBrowserConfig(exePath, wsAddress string, proxyAddress *url.URL) *BrowserConfig

type BrowserInfo added in v1.2.3

type BrowserInfo struct {
	ExePath       string `json:"exe_path,omitempty"`
	WsAddress     string `json:"ws_address,omitempty"`
	ProxyAddress  string `json:"proxy_address,omitempty"`
	ProxyUsername string `json:"proxy_username,omitempty"`
	ProxyPassword string `json:"proxy_password,omitempty"`
}

type BrowserManager added in v1.2.3

type BrowserManager struct {
	// contains filtered or unexported fields
}

func NewBrowserManager added in v1.2.3

func NewBrowserManager(config *Config) *BrowserManager

func (*BrowserManager) CreateBrowserStarters added in v1.2.3

func (manager *BrowserManager) CreateBrowserStarters()

func (*BrowserManager) Start added in v1.2.3

func (manager *BrowserManager) Start()

func (*BrowserManager) Test added in v1.2.7

func (manager *BrowserManager) Test()

type BrowserStarter added in v1.2.3

type BrowserStarter struct {
	// contains filtered or unexported fields
}

func NewBrowserStarter added in v1.2.3

func NewBrowserStarter(browserConfig *BrowserConfig, baseConfig *BaseConfig) *BrowserStarter

func (*BrowserStarter) ActionOnPage added in v1.2.7

func (starter *BrowserStarter) ActionOnPage(page *rod.Page) error

func (*BrowserStarter) GetFormFill added in v1.2.3

func (starter *BrowserStarter) GetFormFill(element *rod.Element) string

func (*BrowserStarter) GetUploadFile added in v1.2.3

func (starter *BrowserStarter) GetUploadFile(element *rod.Element) string

func (*BrowserStarter) HttpPostFile added in v1.2.3

func (starter *BrowserStarter) HttpPostFile(element *rod.Element) error

func (*BrowserStarter) Start added in v1.2.3

func (starter *BrowserStarter) Start()

func (*BrowserStarter) Test added in v1.2.7

func (starter *BrowserStarter) Test()

type Config added in v1.2.3

type Config struct {
	// contains filtered or unexported fields
}

func NewConfig added in v1.2.3

func NewConfig() *Config

type ConfigOpt added in v1.2.3

type ConfigOpt func(*Config)

func WithBlackList added in v1.2.3

func WithBlackList(keywords ...string) ConfigOpt

func WithBrowserData added in v1.3.2

func WithBrowserData(browserConfig *BrowserConfig) ConfigOpt

func WithBrowserInfo added in v1.2.3

func WithBrowserInfo(data string) ConfigOpt

func WithConcurrent added in v1.2.3

func WithConcurrent(concurrent int) ConfigOpt

func WithContext added in v1.2.3

func WithContext(ctx context.Context) ConfigOpt

func WithCookieInfo added in v1.2.3

func WithCookieInfo(domain, cookieInfo string) ConfigOpt

func WithCookies added in v1.2.3

func WithCookies(domain string, cookiesInfo map[string]string) ConfigOpt

func WithEvalJs added in v1.2.6

func WithEvalJs(target string, evalJs string) ConfigOpt

func WithExtraWaitLoadTime added in v1.2.3

func WithExtraWaitLoadTime(extraWaitLoadTime int) ConfigOpt

func WithFileInput added in v1.2.3

func WithFileInput(fileInput map[string]string) ConfigOpt

func WithFormFill added in v1.2.3

func WithFormFill(formFills map[string]string) ConfigOpt

func WithFromPlugin added in v1.3.1

func WithFromPlugin(fromPlugin string) ConfigOpt

func WithFullTimeout added in v1.2.3

func WithFullTimeout(timeout int) ConfigOpt

func WithHeaderInfo added in v1.2.3

func WithHeaderInfo(headerInfo string) ConfigOpt

func WithHeaders added in v1.2.3

func WithHeaders(headersInfo map[string]string) ConfigOpt

func WithIgnoreQueryName added in v1.2.3

func WithIgnoreQueryName(names ...string) ConfigOpt

func WithInvalidSuffix added in v1.2.4

func WithInvalidSuffix(suffix []string) ConfigOpt

func WithJsResultSave added in v1.2.6

func WithJsResultSave(storage func(s string)) ConfigOpt

func WithLeakless added in v1.2.3

func WithLeakless(leakless string) ConfigOpt

func WithLocalStorage added in v1.2.4

func WithLocalStorage(storage map[string]string) ConfigOpt

func WithMaxDepth added in v1.2.3

func WithMaxDepth(depth int) ConfigOpt

func WithMaxUrl added in v1.2.3

func WithMaxUrl(maxUrl int) ConfigOpt

func WithPageSizedWaitGroup added in v1.2.3

func WithPageSizedWaitGroup(pageSizedWaitGroup *utils.SizedWaitGroup) ConfigOpt

func WithPageTimeout added in v1.2.3

func WithPageTimeout(timeout int) ConfigOpt

func WithPageVisitFilter added in v1.2.3

func WithPageVisitFilter(pageVisitFilter *tools.StringCountFilter) ConfigOpt

func WithResponse added in v1.2.8

func WithResponse(targetUrl string, response string) ConfigOpt

func WithResultChannel added in v1.2.3

func WithResultChannel(ch chan ReqInfo) ConfigOpt

func WithResultSentFilter added in v1.2.3

func WithResultSentFilter(resultSentFilter *tools.StringCountFilter) ConfigOpt

func WithRuntimeID added in v1.2.6

func WithRuntimeID(id string) ConfigOpt

func WithSaveToDB added in v1.2.6

func WithSaveToDB(b bool) ConfigOpt

func WithScanRangeLevel added in v1.2.3

func WithScanRangeLevel(scanRange scanRangeLevel) ConfigOpt

func WithScanRepeatLevel added in v1.2.3

func WithScanRepeatLevel(scanRepeat repeatLevel) ConfigOpt

func WithSensitiveWords added in v1.2.3

func WithSensitiveWords(words []string) ConfigOpt

func WithSessionStorage added in v1.3.2

func WithSessionStorage(storage map[string]string) ConfigOpt

func WithSourceType added in v1.3.1

func WithSourceType(sourceType string) ConfigOpt

func WithStartWaitGroup added in v1.2.4

func WithStartWaitGroup(waitGroup *utils.SizedWaitGroup) ConfigOpt

func WithStealth added in v1.2.5

func WithStealth(stealth bool) ConfigOpt

func WithTargetUrl added in v1.2.3

func WithTargetUrl(targetUrl string) ConfigOpt

func WithUChan added in v1.2.3

func WithUChan(uChan *tools.UChan) ConfigOpt

func WithUrlCheck added in v1.3.1

func WithUrlCheck(check bool) ConfigOpt

func WithUrlTree added in v1.2.3

func WithUrlTree(tree *tools.UrlTree) ConfigOpt

func WithVue added in v1.2.7

func WithVue(vue bool) ConfigOpt

func WithWhiteList added in v1.2.3

func WithWhiteList(keywords ...string) ConfigOpt

type CrawlerCore added in v1.2.3

type CrawlerCore struct {
	// contains filtered or unexported fields
}

func NewCrawlerCore added in v1.2.3

func NewCrawlerCore(targetUrl string, opts ...ConfigOpt) (*CrawlerCore, error)

func (*CrawlerCore) Start added in v1.2.3

func (core *CrawlerCore) Start()

func (*CrawlerCore) Test added in v1.2.7

func (core *CrawlerCore) Test()

type CrawlerHijack added in v1.2.6

type CrawlerHijack struct {
	Request  *CrawlerHijackRequest
	Response *CrawlerHijackResponse
	OnError  func(error)

	Skip bool

	CustomState interface{}
	// contains filtered or unexported fields
}

func (*CrawlerHijack) ContinueRequest added in v1.2.6

func (hijack *CrawlerHijack) ContinueRequest(cq *proto.FetchContinueRequest)

func (*CrawlerHijack) LoadResponse added in v1.2.6

func (hijack *CrawlerHijack) LoadResponse(opts []lowhttp.LowhttpOpt, loadBody bool) error

type CrawlerHijackHandler added in v1.2.6

type CrawlerHijackHandler struct {
	// contains filtered or unexported fields
}

type CrawlerHijackRequest added in v1.2.6

type CrawlerHijackRequest struct {
	// contains filtered or unexported fields
}

func (*CrawlerHijackRequest) Body added in v1.2.6

func (hijack *CrawlerHijackRequest) Body() string

func (*CrawlerHijackRequest) Header added in v1.2.6

func (hijack *CrawlerHijackRequest) Header(key string) string

func (*CrawlerHijackRequest) Headers added in v1.2.6

func (hijack *CrawlerHijackRequest) Headers() proto.NetworkHeaders

func (*CrawlerHijackRequest) IsNavigation added in v1.2.6

func (hijack *CrawlerHijackRequest) IsNavigation() bool

func (*CrawlerHijackRequest) JSONBody added in v1.2.6

func (hijack *CrawlerHijackRequest) JSONBody() gson.JSON

func (*CrawlerHijackRequest) Method added in v1.2.6

func (hijack *CrawlerHijackRequest) Method() string

func (*CrawlerHijackRequest) Req added in v1.2.6

func (hijack *CrawlerHijackRequest) Req() *http.Request

func (*CrawlerHijackRequest) SetBody added in v1.2.6

func (hijack *CrawlerHijackRequest) SetBody(obj interface{}) *CrawlerHijackRequest

func (*CrawlerHijackRequest) SetContext added in v1.2.6

func (hijack *CrawlerHijackRequest) SetContext(ctx context.Context) *CrawlerHijackRequest

func (*CrawlerHijackRequest) Type added in v1.2.6

func (*CrawlerHijackRequest) URL added in v1.2.6

func (hijack *CrawlerHijackRequest) URL() *url.URL

type CrawlerHijackResponse added in v1.2.6

type CrawlerHijackResponse struct {
	// contains filtered or unexported fields
}

func (*CrawlerHijackResponse) Body added in v1.2.6

func (hijack *CrawlerHijackResponse) Body() string

func (*CrawlerHijackResponse) Fail added in v1.2.6

func (*CrawlerHijackResponse) Headers added in v1.2.6

func (hijack *CrawlerHijackResponse) Headers() http.Header

func (*CrawlerHijackResponse) Payload added in v1.2.6

func (hijack *CrawlerHijackResponse) Payload() *proto.FetchFulfillRequest

func (*CrawlerHijackResponse) SetBody added in v1.2.6

func (hijack *CrawlerHijackResponse) SetBody(obj interface{}) *CrawlerHijackResponse

func (*CrawlerHijackResponse) SetHeader added in v1.2.6

func (hijack *CrawlerHijackResponse) SetHeader(pairs ...string) *CrawlerHijackResponse

type CrawlerRouter added in v1.2.6

type CrawlerRouter struct {
	// contains filtered or unexported fields
}

func NewBrowserHijackRequests added in v1.2.6

func NewBrowserHijackRequests(browser *rod.Browser) *CrawlerRouter

func NewPageHijackRequests added in v1.2.6

func NewPageHijackRequests(page *rod.Page) *CrawlerRouter

func (*CrawlerRouter) Add added in v1.2.6

func (router *CrawlerRouter) Add(pattern string, resourceType proto.NetworkResourceType, handler func(*CrawlerHijack)) error

func (*CrawlerRouter) Run added in v1.2.6

func (router *CrawlerRouter) Run()

func (*CrawlerRouter) Stop added in v1.2.6

func (router *CrawlerRouter) Stop() error

type HijackRequest added in v1.2.5

type HijackRequest interface {
	Type() proto.NetworkResourceType
	Method() string
	URL() *url.URL
	Header(key string) string
	Headers() proto.NetworkHeaders
	Body() string
	JSONBody() gson.JSON
	Req() *http.Request
}

type HijackResponse added in v1.2.5

type HijackResponse interface {
	Payload() *proto.FetchFulfillRequest
	Body() string
	Headers() http.Header
}

type HttpRequest added in v1.2.3

type HttpRequest struct {
	// contains filtered or unexported fields
}

func CreateFileRequest added in v1.2.3

func CreateFileRequest(url, method string, params, files map[string]string) *HttpRequest

func CreateGetRequest added in v1.2.3

func CreateGetRequest(url string) *HttpRequest

func CreateRequest added in v1.2.3

func CreateRequest() *HttpRequest

func (*HttpRequest) Do added in v1.2.3

func (request *HttpRequest) Do() error

func (*HttpRequest) GetRequest added in v1.2.3

func (request *HttpRequest) GetRequest() error

func (*HttpRequest) GetUrl added in v1.2.3

func (request *HttpRequest) GetUrl() string

func (*HttpRequest) MultiPartRequest added in v1.2.3

func (request *HttpRequest) MultiPartRequest() error

func (*HttpRequest) PostRequest added in v1.2.3

func (request *HttpRequest) PostRequest() error

func (*HttpRequest) Request added in v1.2.3

func (request *HttpRequest) Request() error

func (*HttpRequest) Show added in v1.2.3

func (request *HttpRequest) Show() (string, error)

type JSEval added in v1.2.6

type JSEval struct {
	// contains filtered or unexported fields
}

func CreateJsEval added in v1.2.6

func CreateJsEval() *JSEval

type JsResultSave added in v1.2.6

type JsResultSave struct {
	TargetUrl string `json:"target_url"`
	Js        string `json:"js"`
	Result    string `json:"result"`
}

type JsResults added in v1.2.6

type JsResults []string

type OutputBody added in v1.2.3

type OutputBody struct {
	Size string `json:"size"`
	Md5  string `json:"md5"`
	Data string `json:"data"`
}

type OutputHeader added in v1.2.3

type OutputHeader struct {
	Name  string `json:"name"`
	Value string `json:"value"`
}

type OutputRequest added in v1.2.3

type OutputRequest struct {
	Url     string          `json:"url"`
	Method  string          `json:"method"`
	Headers []*OutputHeader `json:"headers"`
	Body    OutputBody      `json:"body"`
	HTTPRaw string          `json:"http_raw"`
}

type OutputResponse added in v1.2.3

type OutputResponse struct {
	StatusCode int             `json:"status_code"`
	Headers    []*OutputHeader `json:"headers"`
	Body       OutputBody      `json:"body"`
}

type OutputResult added in v1.2.3

type OutputResult struct {
	Url      string         `json:"url"`
	Request  OutputRequest  `json:"request"`
	Response OutputResponse `json:"response"`
}

func GeneratorOutput added in v1.2.3

func GeneratorOutput(reqInfo ReqInfo) *OutputResult

type OutputResults added in v1.2.3

type OutputResults struct {
	// contains filtered or unexported fields
}

type ReqInfo added in v1.2.3

type ReqInfo interface {
	Type() string

	Url() string
	Method() string

	RequestHeaders() map[string]string
	RequestBody() string
	RequestRaw() ([]byte, error)

	StatusCode() int
	ResponseHeaders() map[string]string
	ResponseBody() string

	Screenshot() string

	From() string
}

type RequestResult added in v1.2.3

type RequestResult struct {
	// contains filtered or unexported fields
}

func (*RequestResult) From added in v1.2.3

func (result *RequestResult) From() string

func (*RequestResult) Method added in v1.2.3

func (result *RequestResult) Method() string

func (*RequestResult) RequestBody added in v1.2.3

func (result *RequestResult) RequestBody() string

func (*RequestResult) RequestHeaders added in v1.2.3

func (result *RequestResult) RequestHeaders() map[string]string

func (*RequestResult) RequestRaw added in v1.2.7

func (result *RequestResult) RequestRaw() ([]byte, error)

func (*RequestResult) ResponseBody added in v1.2.3

func (result *RequestResult) ResponseBody() string

func (*RequestResult) ResponseHeaders added in v1.2.3

func (result *RequestResult) ResponseHeaders() map[string]string

func (*RequestResult) Screenshot added in v1.2.3

func (result *RequestResult) Screenshot() string

func (*RequestResult) StatusCode added in v1.2.3

func (result *RequestResult) StatusCode() int

func (*RequestResult) Type added in v1.2.3

func (result *RequestResult) Type() string

func (*RequestResult) Url added in v1.2.3

func (result *RequestResult) Url() string

type SimpleResult added in v1.2.3

type SimpleResult struct {
	// contains filtered or unexported fields
}

func (*SimpleResult) From added in v1.2.3

func (simpleResult *SimpleResult) From() string

func (*SimpleResult) Method added in v1.2.3

func (simpleResult *SimpleResult) Method() string

func (*SimpleResult) RequestBody added in v1.2.3

func (simpleResult *SimpleResult) RequestBody() string

func (*SimpleResult) RequestHeaders added in v1.2.3

func (simpleResult *SimpleResult) RequestHeaders() map[string]string

func (*SimpleResult) RequestRaw added in v1.2.7

func (simpleResult *SimpleResult) RequestRaw() ([]byte, error)

func (*SimpleResult) ResponseBody added in v1.2.3

func (simpleResult *SimpleResult) ResponseBody() string

func (*SimpleResult) ResponseHeaders added in v1.2.3

func (simpleResult *SimpleResult) ResponseHeaders() map[string]string

func (*SimpleResult) Screenshot added in v1.2.3

func (simpleResult *SimpleResult) Screenshot() string

func (*SimpleResult) StatusCode added in v1.2.3

func (*SimpleResult) StatusCode() int

func (*SimpleResult) Type added in v1.2.3

func (simpleResult *SimpleResult) Type() string

func (*SimpleResult) Url added in v1.2.3

func (simpleResult *SimpleResult) Url() string

type TestHijackRequest added in v1.2.5

type TestHijackRequest struct {
	// contains filtered or unexported fields
}

func (*TestHijackRequest) Body added in v1.2.5

func (testHijackRequest *TestHijackRequest) Body() string

func (*TestHijackRequest) Header added in v1.2.5

func (testHijackRequest *TestHijackRequest) Header(key string) string

func (*TestHijackRequest) Headers added in v1.2.5

func (testHijackRequest *TestHijackRequest) Headers() proto.NetworkHeaders

func (*TestHijackRequest) JSONBody added in v1.2.5

func (testHijackRequest *TestHijackRequest) JSONBody() gson.JSON

func (*TestHijackRequest) Method added in v1.2.5

func (testHijackRequest *TestHijackRequest) Method() string

func (*TestHijackRequest) Req added in v1.2.5

func (testHijackRequest *TestHijackRequest) Req() *http.Request

func (*TestHijackRequest) Type added in v1.2.5

func (testHijackRequest *TestHijackRequest) Type() proto.NetworkResourceType

func (*TestHijackRequest) URL added in v1.2.5

func (testHijackRequest *TestHijackRequest) URL() *url.URL

Directories

Path Synopsis
Package cmd @Author bcy2007 2023/7/14 11:11
Package cmd @Author bcy2007 2023/7/14 11:11
Package crawlerx @Author bcy2007 2024/4/2 14:44
Package crawlerx @Author bcy2007 2024/4/2 14:44
Package tools @Author bcy2007 2023/7/12 16:40
Package tools @Author bcy2007 2023/7/12 16:40
config
Package config https://github.com/unknwon/goconfig
Package config https://github.com/unknwon/goconfig

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL