Documentation ¶
Index ¶
- Constants
- Variables
- func CloneHeader(h map[string][]string) map[string][]string
- func CopyM(h http.Header) http.Header
- func Log() *logging.Logger
- func MergeCookie(before []*http.Cookie, after []*http.Cookie) []*http.Cookie
- func NewClient() (*http.Client, error)
- func NewHeader(ua interface{}, host string, refer interface{}) map[string][]string
- func NewJar() *cookiejar.Jar
- func NewProxyClient(proxystring string) (*http.Client, error)
- func OutputMaps(info string, args map[string][]string)
- func RandomUa() string
- func SetGlobalTimeout(num int)
- func SetLogLevel(level string)
- func TooSortSizes(data []byte, sizes float64) error
- func UaInit()
- func Wait(waittime int)
- type Spider
- func (sp *Spider) Cookies() []*http.Cookie
- func (sp *Spider) Delete() (body []byte, e error)
- func (sp *Spider) Get() (body []byte, e error)
- func (sp *Spider) Go() (body []byte, e error)
- func (sp *Spider) JsonToString() (string, error)
- func (sp *Spider) NewHeader(ua interface{}, host string, refer interface{})
- func (sp *Spider) OtherGo(method, contenttype string) (body []byte, e error)
- func (sp *Spider) Post() (body []byte, e error)
- func (sp *Spider) PostFILE() (body []byte, e error)
- func (sp *Spider) PostJSON() (body []byte, e error)
- func (sp *Spider) PostXML() (body []byte, e error)
- func (sp *Spider) Put() (body []byte, e error)
- func (sp *Spider) PutFILE() (body []byte, e error)
- func (sp *Spider) PutJSON() (body []byte, e error)
- func (sp *Spider) PutXML() (body []byte, e error)
- func (sp *Spider) ToString() string
- type SpiderConfig
- func (config *SpiderConfig) Clear() *SpiderConfig
- func (config *SpiderConfig) ClearAll() *SpiderConfig
- func (config *SpiderConfig) ClearCookie() *SpiderConfig
- func (config *SpiderConfig) SetBData(data []byte) *SpiderConfig
- func (config *SpiderConfig) SetCookie(v string) *SpiderConfig
- func (config *SpiderConfig) SetForm(form url.Values) *SpiderConfig
- func (config *SpiderConfig) SetFormParm(k, v string) *SpiderConfig
- func (config *SpiderConfig) SetHeader(header http.Header) *SpiderConfig
- func (config *SpiderConfig) SetHeaderParm(k, v string) *SpiderConfig
- func (config *SpiderConfig) SetHost(host string) *SpiderConfig
- func (config *SpiderConfig) SetMethod(method string) *SpiderConfig
- func (config *SpiderConfig) SetRefer(refer string) *SpiderConfig
- func (config *SpiderConfig) SetUa(ua string) *SpiderConfig
- func (config *SpiderConfig) SetUrl(url string) *SpiderConfig
- func (config *SpiderConfig) SetWaitTime(num int) *SpiderConfig
Constants ¶
View Source
const ( // 暂停时间 default wait time WaitTime = 5 // HTTP方法 POST = "POST" POSTJSON = "POSTJSON" POSTXML = "POSTXML" POSTFILE = "POSTFILE" // 实现了! PUT = "PUT" PUTJSON = "PUTJSON" PUTXML = "PUTXML" PUTFILE = "PUTFILE" DELETE = "DELETE" GET = "GET" OTHER = "OTHER" CRITICAL = "CRITICAL" ERROR = "ERROR" WARNING = "WARNING" NOTICE = "NOTICE" INFO = "INFO" DEBUG = "DEBUG" HTTPFORMContentType = "application/x-www-form-urlencoded" HTTPJSONContentType = "application/json" HTTPXMLContentType = "text/xml" HTTPFILEContentType = "multipart/form-data" )
Variables ¶
View Source
var ( //default client to ask get or post // 默认的官方客户端,带cookie,方便使用,没有超时时间 Client = &http.Client{ CheckRedirect: func(req *http.Request, via []*http.Request) error { Logger.Debugf("-----------Redirect:%v------------", req.URL) return nil }, Jar: NewJar(), } // 没有cookie的客户端 NoCookieClient = &http.Client{ CheckRedirect: func(req *http.Request, via []*http.Request) error { Logger.Debugf("-----------Redirect:%v------------", req.URL) return nil }, } )
View Source
var ( // 浏览器头部 default header ua // 默认的,取消使用!! FoxfireLinux = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0" SpiderHeader = map[string][]string{ "User-Agent": { FoxfireLinux, }, } // http get and post No timeout // 不设置时没有超时时间 DefaultTimeOut = 0 )
View Source
var LevelNames = []string{
"CRITICAL",
"ERROR",
"WARNING",
"NOTICE",
"INFO",
"DEBUG",
}
level name you can refer
View Source
var Logger = logging.MustGetLogger("GoSpider")
全局日志
Functions ¶
func CloneHeader ¶
clone a header 克隆头部,因为是引用
func CopyM ¶
Header map[string][]string ,can use to copy a http header, so that they are not effect each other
func MergeCookie ¶
merge Cookie,后来的覆盖前来的 暂时没有用的
func NewProxyClient ¶
a proxy client 带代理客户端,全部有带cookie
func TooSortSizes ¶
if a file size small than sizes(KB) ,it will be throw a error
Types ¶
type Spider ¶
type Spider struct { *SpiderConfig Preurl string // pre url 上一次访问的URL Raw []byte // 抓取到的二进制流 UrlStatuscode int // the last url response code,such as 404 响应状态码 Client *http.Client // 真正客户端 Fetchtimes int // url fetch number times 抓取次数 Errortimes int // error times 失败次数 Ipstring string // spider ip,just for user to record their proxyip 代理IP地址,没有代理默认localhost Request *http.Request // 增加方便外部调试 Response *http.Response // contains filtered or unexported fields }
爬虫结构体
var DefaultSpider *Spider
全局爬虫
func (*Spider) JsonToString ¶
将抓到的数据变成字符串,但数据是编码的JSON
func (*Spider) OtherGo ¶
其他Method
Method = "OPTIONS" ; Section 9.2 | "GET" ; Section 9.3 | "HEAD" ; Section 9.4 | "POST" ; Section 9.5 | "PUT" ; Section 9.6 | "DELETE" ; Section 9.7 | "TRACE" ; Section 9.8 | "CONNECT" ; Section 9.9 | extension-method extension-method = token token = 1*<any CHAR except CTLs or separators>
// content type
"application/x-www-form-urlencoded" "application/json" "text/xml" "multipart/form-data"
type SpiderConfig ¶
type SpiderConfig struct { Url string // now fetch url 这次要抓取的Url Method string // Get Post 请求方法 Header http.Header // 请求头部 Data url.Values // post form data 表单字段 BData []byte // binary data 文件上传二进制流 Wait int // sleep time 等待时间 }
func (*SpiderConfig) Clear ¶
func (config *SpiderConfig) Clear() *SpiderConfig
func (*SpiderConfig) ClearAll ¶
func (config *SpiderConfig) ClearAll() *SpiderConfig
func (*SpiderConfig) ClearCookie ¶
func (config *SpiderConfig) ClearCookie() *SpiderConfig
可以删除设置的Cookie
func (*SpiderConfig) SetBData ¶
func (config *SpiderConfig) SetBData(data []byte) *SpiderConfig
func (*SpiderConfig) SetCookie ¶
func (config *SpiderConfig) SetCookie(v string) *SpiderConfig
Cookie 这样设置如果有jar != nil 那么同名cookie会和这个一起发送过去
func (*SpiderConfig) SetForm ¶
func (config *SpiderConfig) SetForm(form url.Values) *SpiderConfig
func (*SpiderConfig) SetFormParm ¶
func (config *SpiderConfig) SetFormParm(k, v string) *SpiderConfig
func (*SpiderConfig) SetHeader ¶
func (config *SpiderConfig) SetHeader(header http.Header) *SpiderConfig
Java Bean链式结构
func (*SpiderConfig) SetHeaderParm ¶
func (config *SpiderConfig) SetHeaderParm(k, v string) *SpiderConfig
func (*SpiderConfig) SetHost ¶
func (config *SpiderConfig) SetHost(host string) *SpiderConfig
func (*SpiderConfig) SetMethod ¶
func (config *SpiderConfig) SetMethod(method string) *SpiderConfig
func (*SpiderConfig) SetRefer ¶
func (config *SpiderConfig) SetRefer(refer string) *SpiderConfig
func (*SpiderConfig) SetUa ¶
func (config *SpiderConfig) SetUa(ua string) *SpiderConfig
func (*SpiderConfig) SetUrl ¶
func (config *SpiderConfig) SetUrl(url string) *SpiderConfig
SetUrl的同时Set一下Host
func (*SpiderConfig) SetWaitTime ¶
func (config *SpiderConfig) SetWaitTime(num int) *SpiderConfig
Source Files ¶
Click to show internal directories.
Click to hide internal directories.