Documentation
¶
Index ¶
- Constants
- Variables
- func PutContext(ctx *Context)
- type Bell
- type Clock
- type Context
- func (self *Context) AddQueue(req *request.Request) *Context
- func (self *Context) Aid(aid map[string]interface{}, ruleName ...string) interface{}
- func (self *Context) CopyRequest() *request.Request
- func (self *Context) CopyTemps() request.Temp
- func (self *Context) CreatItem(item map[int]interface{}, ruleName ...string) map[string]interface{}
- func (self *Context) FileOutput(nameOrExt ...string)
- func (self *Context) GetCookie() string
- func (self *Context) GetDom() *goquery.Document
- func (self *Context) GetError() error
- func (self *Context) GetHeader() http.Header
- func (self *Context) GetHost() string
- func (self *Context) GetItemField(index int, ruleName ...string) (field string)
- func (self *Context) GetItemFieldIndex(field string, ruleName ...string) (index int)
- func (self *Context) GetItemFields(ruleName ...string) []string
- func (self *Context) GetKeyin() string
- func (self *Context) GetLimit() int
- func (self *Context) GetMethod() string
- func (self *Context) GetName() string
- func (self *Context) GetReferer() string
- func (self *Context) GetRequest() *request.Request
- func (self *Context) GetRequestHeader() http.Header
- func (self *Context) GetResponse() *http.Response
- func (self *Context) GetRule(ruleName string) (*Rule, bool)
- func (self *Context) GetRuleName() string
- func (self *Context) GetRules() map[string]*Rule
- func (self *Context) GetSpider() *Spider
- func (self *Context) GetStatusCode() int
- func (self *Context) GetTemp(key string, defaultValue interface{}) interface{}
- func (self *Context) GetTemps() request.Temp
- func (self *Context) GetText() string
- func (self *Context) GetUrl() string
- func (self *Context) JsAddQueue(jreq map[string]interface{}) *Context
- func (*Context) Log() logs.Logs
- func (self *Context) Output(item interface{}, ruleName ...string)
- func (self *Context) Parse(ruleName ...string) *Context
- func (self *Context) PullFiles() (fs []data.FileCell)
- func (self *Context) PullItems() (ds []data.DataCell)
- func (self *Context) ResetText(body string) *Context
- func (self *Context) RunTimer(id string) bool
- func (self *Context) SetError(err error)
- func (self *Context) SetKeyin(keyin string) *Context
- func (self *Context) SetLimit(max int) *Context
- func (self *Context) SetPausetime(pause int64, runtime ...bool) *Context
- func (self *Context) SetReferer(referer string) *Context
- func (self *Context) SetResponse(resp *http.Response) *Context
- func (self *Context) SetTemp(key string, value interface{}) *Context
- func (self *Context) SetTimer(id string, tol time.Duration, bell *Bell) bool
- func (self *Context) SetUrl(url string) *Context
- func (self *Context) UpsertItemField(field string, ruleName ...string) (index int)
- type Rule
- type RuleModle
- type RuleTree
- type Spider
- func (self *Spider) CanStop() bool
- func (self *Spider) Copy() *Spider
- func (self *Spider) Defer()
- func (self *Spider) DoHistory(req *request.Request, ok bool) bool
- func (self *Spider) GetDescription() string
- func (self *Spider) GetEnableCookie() bool
- func (self *Spider) GetId() int
- func (self *Spider) GetItemField(rule *Rule, index int) (field string)
- func (self *Spider) GetItemFieldIndex(rule *Rule, field string) (index int)
- func (self *Spider) GetItemFields(rule *Rule) []string
- func (self *Spider) GetKeyin() string
- func (self *Spider) GetLimit() int64
- func (self *Spider) GetName() string
- func (self *Spider) GetRule(ruleName string) (*Rule, bool)
- func (self *Spider) GetRules() map[string]*Rule
- func (self *Spider) GetSubName() string
- func (self *Spider) IsStopping() bool
- func (self *Spider) MustGetRule(ruleName string) *Rule
- func (self *Spider) OutDefaultField() bool
- func (self Spider) Register() *Spider
- func (self *Spider) ReqmatrixInit() *Spider
- func (self *Spider) RequestFree()
- func (self *Spider) RequestLen() int
- func (self *Spider) RequestPull() *request.Request
- func (self *Spider) RequestPush(req *request.Request)
- func (self *Spider) RequestUse()
- func (self *Spider) RunTimer(id string) bool
- func (self *Spider) SetId(id int)
- func (self *Spider) SetKeyin(keyword string)
- func (self *Spider) SetLimit(max int64)
- func (self *Spider) SetPausetime(pause int64, runtime ...bool)
- func (self *Spider) SetTimer(id string, tol time.Duration, bell *Bell) bool
- func (self *Spider) Start()
- func (self *Spider) Stop()
- func (self *Spider) TryFlushFailure()
- func (self *Spider) TryFlushSuccess()
- func (self *Spider) UpsertItemField(rule *Rule, field string) (index int)
- type SpiderModle
- type SpiderSpecies
- type Timer
Constants ¶
View Source
const ( KEYIN = util.USE_KEYIN LIMIT = math.MaxInt64 FORCED_STOP = "——主动终止Spider——" )
View Source
const ( A = iota T )
Variables ¶
View Source
var Species = &SpiderSpecies{ list: []*Spider{}, hash: map[string]*Spider{}, }
Functions ¶
func PutContext ¶
func PutContext(ctx *Context)
Types ¶
type Context ¶
type Context struct { Request *request.Request Response *http.Response sync.Mutex // contains filtered or unexported fields }
func (*Context) CopyRequest ¶
func (*Context) FileOutput ¶
func (*Context) GetItemField ¶
func (*Context) GetItemFieldIndex ¶
func (*Context) GetItemFields ¶
func (*Context) GetReferer ¶
func (*Context) GetRequest ¶
func (*Context) GetRequestHeader ¶
func (*Context) GetResponse ¶
func (*Context) GetRuleName ¶
func (*Context) GetStatusCode ¶
func (*Context) JsAddQueue ¶
func (*Context) SetPausetime ¶
func (*Context) SetReferer ¶
type Spider ¶
type Spider struct { Name string Description string Pausetime int64 Limit int64 Keyin string EnableCookie bool NotDefaultField bool Namespace func(self *Spider) string SubNamespace func(self *Spider, dataCell map[string]interface{}) string RuleTree *RuleTree // contains filtered or unexported fields }
func (*Spider) GetDescription ¶
func (*Spider) GetEnableCookie ¶
func (*Spider) GetItemField ¶
func (*Spider) GetItemFieldIndex ¶
func (*Spider) GetItemFields ¶
func (*Spider) GetSubName ¶
func (*Spider) IsStopping ¶
func (*Spider) MustGetRule ¶
func (*Spider) OutDefaultField ¶
func (*Spider) ReqmatrixInit ¶
func (*Spider) RequestFree ¶
func (self *Spider) RequestFree()
func (*Spider) RequestLen ¶
func (*Spider) RequestPull ¶
func (*Spider) RequestPush ¶
func (*Spider) RequestUse ¶
func (self *Spider) RequestUse()
func (*Spider) SetPausetime ¶
func (*Spider) TryFlushFailure ¶
func (self *Spider) TryFlushFailure()
func (*Spider) TryFlushSuccess ¶
func (self *Spider) TryFlushSuccess()
type SpiderModle ¶
type SpiderModle struct { Name string `xml:"Name"` Description string `xml:"Description"` Pausetime int64 `xml:"Pausetime"` EnableLimit bool `xml:"EnableLimit"` EnableKeyin bool `xml:"EnableKeyin"` EnableCookie bool `xml:"EnableCookie"` NotDefaultField bool `xml:"NotDefaultField"` Namespace string `xml:"Namespace>Script"` SubNamespace string `xml:"SubNamespace>Script"` Root string `xml:"Root>Script"` Trunk []RuleModle `xml:"Rule"` }
type SpiderSpecies ¶
type SpiderSpecies struct {
// contains filtered or unexported fields
}
func (*SpiderSpecies) Add ¶
func (self *SpiderSpecies) Add(sp *Spider) *Spider
func (*SpiderSpecies) Get ¶
func (self *SpiderSpecies) Get() []*Spider
func (*SpiderSpecies) GetByName ¶
func (self *SpiderSpecies) GetByName(name string) *Spider
Click to show internal directories.
Click to hide internal directories.