spider

package
v0.0.0-...-ad5c98e Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 23, 2024 License: GPL-3.0 Imports: 25 Imported by: 0

Documentation

Index

Constants

View Source
const (
	KEYIN       = util.USE_KEYIN
	LIMIT       = math.MaxInt64
	FORCED_STOP = "——主动终止Spider——"
)
View Source
const (
	A = iota
	T
)

Variables

View Source
var Species = &SpiderSpecies{
	list: []*Spider{},
	hash: map[string]*Spider{},
}

Functions

func PutContext

func PutContext(ctx *Context)

Types

type Bell

type Bell struct {
	Hour int
	Min  int
	Sec  int
}

type Clock

type Clock struct {
	// contains filtered or unexported fields
}

type Context

type Context struct {
	Request  *request.Request
	Response *http.Response

	sync.Mutex
	// contains filtered or unexported fields
}

func GetContext

func GetContext(sp *Spider, req *request.Request) *Context

func (*Context) AddQueue

func (self *Context) AddQueue(req *request.Request) *Context

func (*Context) Aid

func (self *Context) Aid(aid map[string]interface{}, ruleName ...string) interface{}

func (*Context) CopyRequest

func (self *Context) CopyRequest() *request.Request

func (*Context) CopyTemps

func (self *Context) CopyTemps() request.Temp

func (*Context) CreatItem

func (self *Context) CreatItem(item map[int]interface{}, ruleName ...string) map[string]interface{}

func (*Context) FileOutput

func (self *Context) FileOutput(nameOrExt ...string)

func (*Context) GetCookie

func (self *Context) GetCookie() string

func (*Context) GetDom

func (self *Context) GetDom() *goquery.Document

func (*Context) GetError

func (self *Context) GetError() error

func (*Context) GetHeader

func (self *Context) GetHeader() http.Header

func (*Context) GetHost

func (self *Context) GetHost() string

func (*Context) GetItemField

func (self *Context) GetItemField(index int, ruleName ...string) (field string)

func (*Context) GetItemFieldIndex

func (self *Context) GetItemFieldIndex(field string, ruleName ...string) (index int)

func (*Context) GetItemFields

func (self *Context) GetItemFields(ruleName ...string) []string

func (*Context) GetKeyin

func (self *Context) GetKeyin() string

func (*Context) GetLimit

func (self *Context) GetLimit() int

func (*Context) GetMethod

func (self *Context) GetMethod() string

func (*Context) GetName

func (self *Context) GetName() string

func (*Context) GetReferer

func (self *Context) GetReferer() string

func (*Context) GetRequest

func (self *Context) GetRequest() *request.Request

func (*Context) GetRequestHeader

func (self *Context) GetRequestHeader() http.Header

func (*Context) GetResponse

func (self *Context) GetResponse() *http.Response

func (*Context) GetRule

func (self *Context) GetRule(ruleName string) (*Rule, bool)

func (*Context) GetRuleName

func (self *Context) GetRuleName() string

func (*Context) GetRules

func (self *Context) GetRules() map[string]*Rule

func (*Context) GetSpider

func (self *Context) GetSpider() *Spider

func (*Context) GetStatusCode

func (self *Context) GetStatusCode() int

func (*Context) GetTemp

func (self *Context) GetTemp(key string, defaultValue interface{}) interface{}

func (*Context) GetTemps

func (self *Context) GetTemps() request.Temp

func (*Context) GetText

func (self *Context) GetText() string

func (*Context) GetUrl

func (self *Context) GetUrl() string

func (*Context) JsAddQueue

func (self *Context) JsAddQueue(jreq map[string]interface{}) *Context

func (*Context) Log

func (*Context) Log() logs.Logs

func (*Context) Output

func (self *Context) Output(item interface{}, ruleName ...string)

func (*Context) Parse

func (self *Context) Parse(ruleName ...string) *Context

func (*Context) PullFiles

func (self *Context) PullFiles() (fs []data.FileCell)

func (*Context) PullItems

func (self *Context) PullItems() (ds []data.DataCell)

func (*Context) ResetText

func (self *Context) ResetText(body string) *Context

func (*Context) RunTimer

func (self *Context) RunTimer(id string) bool

func (*Context) SetError

func (self *Context) SetError(err error)

func (*Context) SetKeyin

func (self *Context) SetKeyin(keyin string) *Context

func (*Context) SetLimit

func (self *Context) SetLimit(max int) *Context

func (*Context) SetPausetime

func (self *Context) SetPausetime(pause int64, runtime ...bool) *Context

func (*Context) SetReferer

func (self *Context) SetReferer(referer string) *Context

func (*Context) SetResponse

func (self *Context) SetResponse(resp *http.Response) *Context

func (*Context) SetTemp

func (self *Context) SetTemp(key string, value interface{}) *Context

func (*Context) SetTimer

func (self *Context) SetTimer(id string, tol time.Duration, bell *Bell) bool

func (*Context) SetUrl

func (self *Context) SetUrl(url string) *Context

func (*Context) UpsertItemField

func (self *Context) UpsertItemField(field string, ruleName ...string) (index int)

type Rule

type Rule struct {
	ItemFields []string
	ParseFunc  func(*Context)
	AidFunc    func(*Context, map[string]interface{}) interface{}
}

type RuleModle

type RuleModle struct {
	Name      string `xml:"name,attr"`
	ParseFunc string `xml:"ParseFunc>Script"`
	AidFunc   string `xml:"AidFunc>Script"`
}

type RuleTree

type RuleTree struct {
	Root  func(*Context)
	Trunk map[string]*Rule
}

type Spider

type Spider struct {
	Name            string
	Description     string
	Pausetime       int64
	Limit           int64
	Keyin           string
	EnableCookie    bool
	NotDefaultField bool
	Namespace       func(self *Spider) string
	SubNamespace    func(self *Spider, dataCell map[string]interface{}) string
	RuleTree        *RuleTree
	// contains filtered or unexported fields
}

func (*Spider) CanStop

func (self *Spider) CanStop() bool

func (*Spider) Copy

func (self *Spider) Copy() *Spider

func (*Spider) Defer

func (self *Spider) Defer()

func (*Spider) DoHistory

func (self *Spider) DoHistory(req *request.Request, ok bool) bool

func (*Spider) GetDescription

func (self *Spider) GetDescription() string

func (*Spider) GetEnableCookie

func (self *Spider) GetEnableCookie() bool

func (*Spider) GetId

func (self *Spider) GetId() int

func (*Spider) GetItemField

func (self *Spider) GetItemField(rule *Rule, index int) (field string)

func (*Spider) GetItemFieldIndex

func (self *Spider) GetItemFieldIndex(rule *Rule, field string) (index int)

func (*Spider) GetItemFields

func (self *Spider) GetItemFields(rule *Rule) []string

func (*Spider) GetKeyin

func (self *Spider) GetKeyin() string

func (*Spider) GetLimit

func (self *Spider) GetLimit() int64

func (*Spider) GetName

func (self *Spider) GetName() string

func (*Spider) GetRule

func (self *Spider) GetRule(ruleName string) (*Rule, bool)

func (*Spider) GetRules

func (self *Spider) GetRules() map[string]*Rule

func (*Spider) GetSubName

func (self *Spider) GetSubName() string

func (*Spider) IsStopping

func (self *Spider) IsStopping() bool

func (*Spider) MustGetRule

func (self *Spider) MustGetRule(ruleName string) *Rule

func (*Spider) OutDefaultField

func (self *Spider) OutDefaultField() bool

func (Spider) Register

func (self Spider) Register() *Spider

func (*Spider) ReqmatrixInit

func (self *Spider) ReqmatrixInit() *Spider

func (*Spider) RequestFree

func (self *Spider) RequestFree()

func (*Spider) RequestLen

func (self *Spider) RequestLen() int

func (*Spider) RequestPull

func (self *Spider) RequestPull() *request.Request

func (*Spider) RequestPush

func (self *Spider) RequestPush(req *request.Request)

func (*Spider) RequestUse

func (self *Spider) RequestUse()

func (*Spider) RunTimer

func (self *Spider) RunTimer(id string) bool

func (*Spider) SetId

func (self *Spider) SetId(id int)

func (*Spider) SetKeyin

func (self *Spider) SetKeyin(keyword string)

func (*Spider) SetLimit

func (self *Spider) SetLimit(max int64)

func (*Spider) SetPausetime

func (self *Spider) SetPausetime(pause int64, runtime ...bool)

func (*Spider) SetTimer

func (self *Spider) SetTimer(id string, tol time.Duration, bell *Bell) bool

func (*Spider) Start

func (self *Spider) Start()

func (*Spider) Stop

func (self *Spider) Stop()

func (*Spider) TryFlushFailure

func (self *Spider) TryFlushFailure()

func (*Spider) TryFlushSuccess

func (self *Spider) TryFlushSuccess()

func (*Spider) UpsertItemField

func (self *Spider) UpsertItemField(rule *Rule, field string) (index int)

type SpiderModle

type SpiderModle struct {
	Name            string      `xml:"Name"`
	Description     string      `xml:"Description"`
	Pausetime       int64       `xml:"Pausetime"`
	EnableLimit     bool        `xml:"EnableLimit"`
	EnableKeyin     bool        `xml:"EnableKeyin"`
	EnableCookie    bool        `xml:"EnableCookie"`
	NotDefaultField bool        `xml:"NotDefaultField"`
	Namespace       string      `xml:"Namespace>Script"`
	SubNamespace    string      `xml:"SubNamespace>Script"`
	Root            string      `xml:"Root>Script"`
	Trunk           []RuleModle `xml:"Rule"`
}

type SpiderSpecies

type SpiderSpecies struct {
	// contains filtered or unexported fields
}

func (*SpiderSpecies) Add

func (self *SpiderSpecies) Add(sp *Spider) *Spider

func (*SpiderSpecies) Get

func (self *SpiderSpecies) Get() []*Spider

func (*SpiderSpecies) GetByName

func (self *SpiderSpecies) GetByName(name string) *Spider

type Timer

type Timer struct {
	sync.RWMutex
	// contains filtered or unexported fields
}

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL