gathertool

package module
v0.0.6 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 24, 2021 License: MIT Imports: 10 Imported by: 9

README

gathertool

数据采集工具包,提高数据采集程序编写效率。

Get

一个get请求例子

func main(){
	// 设置一个 http.Client 也可以是来自第三方代理的 http.Client
	c := &http.Client{
		Timeout: 1*time.Second,
	}
	
	// 执行一个get 请求,最多重试10次
	req, err := gathertool.Get("http://192.168.0.1", 10, c)
	if err != nil{
		log.Println(err)
		return
	}
	// 设置成功执行的回调函数
	req.Succeed(succeed)
	// 设置这个请求遇到失败状态码的重试前的操作,如403,502等的处理事件
	// 例如添加 等待时间,更换代理,更换Header等
	req.Failed(failed)
	// 执行
	req.Do()
}

// 成功后的方法
func succeed(b []byte){
    fmt.Printf(string(b))
    //处理数据
}

// 错误状态码重试前的方法
func failed(c *gathertool.Req){
    //修改 http.Client
    c.Client = &http.Client{
    Timeout: 5*time.Second,
    }
    log.Println("休息1s")
    time.Sleep(1*time.Second)
}

Documentation

Index

Constants

This section is empty.

Variables

View Source
var StatusCodeMap map[int]string = map[int]string{
	200: "success",
	201: "success",
	202: "success",
	203: "success",
	204: "fail",
	300: "success",
	301: "success",
	302: "success",
	400: "fail",
	401: "retry",
	402: "retry",
	403: "retry",
	404: "fail",
	405: "retry",
	406: "retry",
	407: "retry",
	408: "retry",
	500: "fail",
	501: "fail",
	502: "retry",
	503: "retry",
	504: "retry",
}

StatusCodeMap 状态码处理映射 success 该状态码对应执行成功函数 fail 该状态码对应执行失败函数 retry 该状态码对应需要重试前执行的函数

View Source
var UserAgentMap map[int]string = map[int]string{
	1:  "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
	2:  "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36",
	3:  "Mozilla/5.0 (compatible; WOW64; MSIE 10.0; Windows NT 6.2)",
	4:  "Opera/9.80 (Windows NT 6.1; WOW64; U; en) Presto/2.10.229 Version/11.62",
	5:  "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
	6:  "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20130331 Firefox/21.0",
	7:  "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/27.0.1453.93 Chrome/27.0.1453.93 Safari/537.36",
	8:  "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
	9:  "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.9.168 Version/11.52",
	10: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
	11: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0",
	12: "Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166  Safari/535.19",
	13: "Mozilla/5.0 (Linux; U; Android 4.0.4; en-gb; GT-I9300 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30",
	14: "Mozilla/5.0 (Linux; U; Android 2.2; en-gb; GT-P1000 Build/FROYO) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
	15: "Mozilla/5.0 (Android; Mobile; rv:14.0) Gecko/14.0 Firefox/14.0",
	16: "Mozilla/5.0 (Android; Tablet; rv:14.0) Gecko/14.0 Firefox/14.0",
	17: "Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19",
	18: "Mozilla/5.0 (Linux; Android 4.1.2; Nexus 7 Build/JZ054K) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19",
	19: "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3",
	20: "Mozilla/5.0 (iPad; CPU OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3",
	21: "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3",
	22: "Mozilla/5.0 (iPad; CPU OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3",
	23: "Mozilla/5.0 (iPhone; CPU iPhone OS 6_1_4 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) CriOS/27.0.1453.10 Mobile/10B350 Safari/8536.25",
	24: "Mozilla/5.0 (compatible; MSIE 10.0; Windows Phone 8.0; Trident/6.0; IEMobile/10.0; ARM; Touch; NOKIA; Lumia 920)",
	25: "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; SAMSUNG; SGH-i917)",
	26: "User-Agent, UCWEB7.0.2.37/28/999",
	27: "User-Agent, NOKIA5700/ UCWEB7.0.2.37/28/999",
	28: "User-Agent, Openwave/ UCWEB7.0.2.37/28/999",
	29: "User-Agent, Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999",
	30: "Mozilla/5.0 (Windows; U; Windows NT 6.1; ) AppleWebKit/534.12 (KHTML, like Gecko) Maxthon/3.0 Safari/534.12",
	31: "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)",
	32: "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
	33: "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.33 Safari/534.3 SE 2.X MetaSr 1.0",
	34: "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)",
	35: "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1 QQBrowser/6.9.11079.201",
}

Functions

func GetAgent added in v0.0.4

func GetAgent(agentType UserAgentType) string

GetAgent 随机获取那种类型的 user-agent

func JobStartGet added in v0.0.4

func JobStartGet(jobNumber int, queue TodoQueue, client *http.Client, SucceedFunc func(*Task, []byte), RetryFunc func(*Req), FailedFunc func())

JobStartGet 并发执行Get,直到队列任务为空 @jobNumber 并发数, @queue 全局队列, @client 单个并发任务的client, @SucceedFunc 成功方法, @ RetryFunc重试方法, @FailedFunc 失败方法

func NewGoquery added in v0.0.4

func NewGoquery(html string) (*goquery.Document, error)

Types

type Queue added in v0.0.4

type Queue struct {
	// contains filtered or unexported fields
}

func (*Queue) Add added in v0.0.4

func (q *Queue) Add(task *Task)

Add 向队列中添加元素

func (*Queue) Clear added in v0.0.4

func (q *Queue) Clear() bool

func (*Queue) IsEmpty added in v0.0.4

func (q *Queue) IsEmpty() bool

func (*Queue) Poll added in v0.0.4

func (q *Queue) Poll() *Task

Poll 移除队列中最前面的额元素

func (*Queue) Print added in v0.0.4

func (q *Queue) Print()

func (*Queue) Size added in v0.0.4

func (q *Queue) Size() int

type Req

type Req struct {

	// client
	Client *http.Client

	// 请求
	Req *http.Request

	// 最大允许重试次数
	MaxTimes RetryTimes

	// 请求成功了需要处理的事件
	SuccessFunc func([]byte)

	// 任务请求成功后需要处理的事件
	SucceedTaskFunc func(*Task, []byte)

	// 请求失败了需要做的事
	FailFunc func()

	// 请求状态码设置了重试,在重试前的事件
	RetryFunc func(req *Req)

	// 本次请求的任务
	Task *Task
	// contains filtered or unexported fields
}

func Get

func Get(url string, vs ...interface{}) (*Req, error)

Get 请求, 当请求失败或状态码是失败的则会先执行 ff 再回调

@url 请求链接 @maxTimes 重试次数 @sf 请求成功后做的事情, 200等 @ff 请求失败后做的事情, 403等,502等 @vs 可变参数 @vs UserAgentType 设置指定类型 user agent 如 AndroidAgent

func (*Req) Do

func (r *Req) Do() func()

Do 执行请求

func (*Req) Failed added in v0.0.3

func (r *Req) Failed(failedFunc func())

Failed 设置错误处理

func (*Req) Retry added in v0.0.4

func (r *Req) Retry(retryFunc func(c *Req))

Retry 请求状态码设置了重试,在重试前的事件

func (*Req) Succeed added in v0.0.3

func (r *Req) Succeed(successFunc func([]byte))

func (*Req) SucceedTask added in v0.0.5

func (r *Req) SucceedTask(successTask func(*Task, []byte))

type RetryTimes added in v0.0.4

type RetryTimes int64

type Task added in v0.0.5

type Task struct {
	Url     string
	Context map[string]interface{}
}

type TodoQueue added in v0.0.4

type TodoQueue interface {
	Add(task *Task) //向队列中添加元素
	Poll() *Task    //移除队列中最前面的元素
	Clear() bool    //清空队列
	Size() int      //获取队列的元素个数
	IsEmpty() bool  //判断队列是否是空
	Print()         // 打印
}

func NewQueue added in v0.0.4

func NewQueue() TodoQueue

NewQueue 新建一个队列

type UserAgentType added in v0.0.4

type UserAgentType int
const (
	PCAgent UserAgentType = iota + 1
	WindowsAgent
	LinuxAgent
	MacAgent
	AndroidAgent
	IosAgent
	PhoneAgent
	WindowsPhoneAgent
	UCAgent
)

Directories

Path Synopsis
examples
get

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL