Versions in this module Expand all Collapse all v0 v0.1.0 Sep 2, 2021 Changes in this version + var ErrorSkip = errors.New("skip") + func CreateDataDB(path string) *leveldb.DB + func DecodeGBK(s []byte) ([]byte, error) + func RequestStringify(req Request) (string, error) + type ByteHandler interface + Handle func(s []byte) ([]byte, error) + type ClientGenerator interface + Generate func() *http.Client + SetProxyProvider func(pxyProvider ProxyProvider) + type ConsoleHandler struct + func (hh *ConsoleHandler) Handle(resp Response, handleResult *Result, ctx context.Context) error + type ConsolePipeline struct + func (c *ConsolePipeline) Process(handleResult *Result, ctx context.Context) error + type DataStore struct + func (lvdb *DataStore) Add(key string, value string) error + func (lvdb *DataStore) BatchAdd(m map[string]string) error + func (lvdb *DataStore) Clear(prefix string, limit ...string) + func (lvdb *DataStore) Del(key string) error + func (lvdb *DataStore) Get(key string) (string, error) + func (lvdb *DataStore) List(prefix string, limit ...string) ([]string, error) + type DefaultListener struct + func (listen *DefaultListener) OnError(req Request, e error, ctx context.Context) + func (listen *DefaultListener) OnSuccess(req Request, ctx context.Context) + type Downloader interface + Download func(req *Request, ctx context.Context) (resp *Response, err error) + SetClientGenerator func(generator ClientGenerator) + type GBKByteHandler struct + func (h *GBKByteHandler) Handle(s []byte) ([]byte, error) + type Handler interface + Handle func(resp Response, handleResult *Result, ctx context.Context) error + type HttpDownloader struct + func (d *HttpDownloader) Download(request *Request, ctx context.Context) (r *Response, err error) + func (d *HttpDownloader) SetClientGenerator(generator ClientGenerator) + type Listener interface + OnError func(req Request, e error, ctx context.Context) + OnSuccess func(req Request, ctx context.Context) + type Pipeline interface + Process func(handleResult *Result, ctx context.Context) error + type Proxy struct + Host string + Password string + Port string + Scheme string + Username string + func CreateProxy(url url.URL) Proxy + func (pxy Proxy) String() string + type ProxyProvider interface + AddProxy func(pxy ...Proxy) + GetProxy func() *Proxy + type Request struct + CycleTime int + Downloader Downloader + Extras map[string]interface{} + Header map[string][]string + Id string + Method string + Skip bool + State RequestState + Url string + func NewRequest() Request + func ParseRequest(str string) (*Request, error) + func (req *Request) AddExtras(key string, value interface{}) + func (req *Request) GetExtras(key string) interface{} + type RequestFilter interface + Filter func(requests ...Request) []Request + type RequestHandle func(req *Request) + type RequestScheduler struct + func (s *RequestScheduler) Len() int + func (s *RequestScheduler) Poll() Request + func (s *RequestScheduler) PollN(n int) ([]Request, int) + func (s *RequestScheduler) Push(reqs ...Request) + type RequestState string + const RequestError + const RequestNormal + const RequestSuccess + type Response struct + Body []byte + ContentLength int64 + Header map[string][]string + Request *Request + Status string + StatusCode int + type Result struct + TargetItems map[string]interface{} + TargetRequests []Request + func (hdl *Result) AddItem(key string, val interface{}) + func (hdl *Result) AddTargetRequest(target Request) + func (hdl *Result) AddTargetUrl(target string) + type Scheduler interface + Len func() int + Poll func() Request + PollN func(n int) ([]Request, int) + Push func(requests ...Request) + type SimpleClientGenerator struct + func (sg *SimpleClientGenerator) Generate() *http.Client + func (sg *SimpleClientGenerator) SetProxyProvider(pxyProvider ProxyProvider) + type SimpleProxyProvider struct + func (sp *SimpleProxyProvider) AddProxy(pxy ...Proxy) + func (sp *SimpleProxyProvider) GetProxy() *Proxy + type Spider struct + PreHandleRequest RequestHandle + RequestsStore []Store + func NewSpider(seedUrl ...string) *Spider + func (s *Spider) AddHandler(handler Handler) + func (s *Spider) AddHeader(key, value string) + func (s *Spider) AddListener(listener Listener) + func (s *Spider) AddPipeline(pipeline Pipeline) + func (s *Spider) AddProxy(pxy ...Proxy) + func (s *Spider) AddRequestStore(store Store) + func (s *Spider) AddSeedUrl(seedUrls ...string) + func (s *Spider) ClearRequestStore() + func (s *Spider) Run() + func (s *Spider) SaveHtml(savepath string, suffixGenerate func() string) + func (s *Spider) SetByteHandler(handler ByteHandler) + func (s *Spider) SetClientGenerator(clientGenerator ClientGenerator) + func (s *Spider) SetCycleTime(time int) + func (s *Spider) SetDownloader(downloader Downloader) + func (s *Spider) SetGoroutines(n int) + func (s *Spider) SetProxyProvider(proxyProvider ProxyProvider) + func (s *Spider) SetRequestFilter(filter RequestFilter) + func (s *Spider) SetScheduler(scheduler Scheduler) + func (s *Spider) SetSleepTime(t time.Duration) + func (s *Spider) SetTimeOut(t time.Duration) + type Store interface + Add func(key string, value string) error + BatchAdd func(m map[string]string) error + Clear func(prefix string, limit ...string) + Del func(key string) error + Get func(key string) (string, error) + List func(prefix string, limit ...string) ([]string, error) + type StoreRequestFilter struct + func (filter *StoreRequestFilter) Filter(requests ...Request) []Request