Documentation ¶
Index ¶
- Constants
- type Request
- func (self *Request) AddHeader(key, value string) *Request
- func (self *Request) Copy() *Request
- func (self *Request) GetConnTimeout() time.Duration
- func (self *Request) GetCookies() string
- func (self *Request) GetDialTimeout() time.Duration
- func (self *Request) GetDownloaderID() int
- func (self *Request) GetEnableCookie() bool
- func (self *Request) GetHeader() http.Header
- func (self *Request) GetMethod() string
- func (self *Request) GetPostData() string
- func (self *Request) GetPriority() int
- func (self *Request) GetProxy() string
- func (self *Request) GetRedirectTimes() int
- func (self *Request) GetReferer() string
- func (self *Request) GetRetryPause() time.Duration
- func (self *Request) GetRuleName() string
- func (self *Request) GetSpiderName() string
- func (self *Request) GetTemp(key string, defaultValue interface{}) interface{}
- func (self *Request) GetTemps() Temp
- func (self *Request) GetTryTimes() int
- func (self *Request) GetUrl() string
- func (self *Request) IsReloadable() bool
- func (self *Request) MarshalJSON() ([]byte, error)
- func (self *Request) Prepare() error
- func (self *Request) Serialize() string
- func (self *Request) SetCookies(cookie string) *Request
- func (self *Request) SetDownloaderID(id int) *Request
- func (self *Request) SetEnableCookie(enableCookie bool) *Request
- func (self *Request) SetHeader(key, value string) *Request
- func (self *Request) SetMethod(method string) *Request
- func (self *Request) SetPriority(priority int) *Request
- func (self *Request) SetProxy(proxy string) *Request
- func (self *Request) SetReferer(referer string) *Request
- func (self *Request) SetReloadable(can bool) *Request
- func (self *Request) SetRuleName(ruleName string) *Request
- func (self *Request) SetSpiderName(spiderName string) *Request
- func (self *Request) SetTemp(key string, value interface{}) *Request
- func (self *Request) SetTemps(temp map[string]interface{}) *Request
- func (self *Request) SetUrl(url string) *Request
- func (self *Request) Unique() string
- type Temp
Constants ¶
View Source
const ( DefaultDialTimeout = 2 * time.Minute // 默认请求服务器超时 DefaultConnTimeout = 2 * time.Minute // 默认下载超时 DefaultTryTimes = 3 // 默认最大下载次数 DefaultRetryPause = 2 * time.Second // 默认重新下载前停顿时长 )
View Source
const ( SURF_ID = 0 // 默认的surf下载内核(Go原生),此值不可改动 PHANTOM_ID = 1 // 备用的phantomjs下载内核,一般不使用(效率差,头信息支持不完善) )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Request ¶
type Request struct { Spider string //规则名,自动设置,禁止人为填写 Url string //目标URL,必须设置 Rule string //用于解析响应的规则节点名,必须设置 Method string //GET POST POST-M HEAD Header http.Header //请求头信息 EnableCookie bool //是否使用cookies,在Spider的EnableCookie设置 PostData string //POST values DialTimeout time.Duration //创建连接超时 dial tcp: i/o timeout ConnTimeout time.Duration //连接状态超时 WSARecv tcp: i/o timeout TryTimes int //尝试下载的最大次数 RetryPause time.Duration //下载失败后,下次尝试下载的等待时间 RedirectTimes int //重定向的最大次数,为0时不限,小于0时禁止重定向 Temp Temp //临时数据 TempIsJson map[string]bool //将Temp中以JSON存储的字段标记为true,自动设置,禁止人为填写 Priority int //指定调度优先级,默认为0(最小优先级为0) Reloadable bool //是否允许重复该链接下载 //Surfer下载器内核ID //0为Surf高并发下载器,各种控制功能齐全 //1为PhantomJS下载器,特点破防力强,速度慢,低并发 DownloaderID int // contains filtered or unexported fields }
Request represents object waiting for being crawled.
func (*Request) GetConnTimeout ¶
func (*Request) GetCookies ¶
func (*Request) GetDialTimeout ¶
func (*Request) GetDownloaderID ¶
func (*Request) GetEnableCookie ¶
func (*Request) GetPostData ¶
func (*Request) GetPriority ¶
func (*Request) GetRedirectTimes ¶
func (*Request) GetReferer ¶
func (*Request) GetRetryPause ¶
func (*Request) GetRuleName ¶
func (*Request) GetSpiderName ¶
func (*Request) GetTryTimes ¶
func (*Request) IsReloadable ¶
func (*Request) MarshalJSON ¶
func (*Request) Prepare ¶
发送请求前的准备工作,设置一系列默认值 Request.Url与Request.Rule必须设置 Request.Spider无需手动设置(由系统自动设置) Request.EnableCookie在Spider字段中统一设置,规则请求中指定的无效 以下字段有默认值,可不设置: Request.Method默认为GET方法; Request.DialTimeout默认为常量DefaultDialTimeout,小于0时不限制等待响应时长; Request.ConnTimeout默认为常量DefaultConnTimeout,小于0时不限制下载超时; Request.TryTimes默认为常量DefaultTryTimes,小于0时不限制失败重载次数; Request.RedirectTimes默认不限制重定向次数,小于0时可禁止重定向跳转; Request.RetryPause默认为常量DefaultRetryPause; Request.DownloaderID指定下载器ID,0为默认的Surf高并发下载器,功能完备,1为PhantomJS下载器,特点破防力强,速度慢,低并发。
func (*Request) SetCookies ¶
func (*Request) SetDownloaderID ¶
func (*Request) SetEnableCookie ¶
func (*Request) SetPriority ¶
func (*Request) SetReferer ¶
func (*Request) SetReloadable ¶
func (*Request) SetRuleName ¶
func (*Request) SetSpiderName ¶
Click to show internal directories.
Click to hide internal directories.