gocrawler

package module
v1.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 19, 2022 License: AGPL-3.0 Imports: 12 Imported by: 0

README

gocrawler

封装 github.com/gocolly/colly/v2 供当前项目使用

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CopyMap

func CopyMap(m1 map[string]string) map[string]string

CopyMap 非深度拷贝,主要用于query Request.body拷贝

func Map2Reader

func Map2Reader(m map[string]string) io.Reader

Map2Reader github.com/gocolly/colly::createFormReader

Types

type Client

type Client interface {
	Request(ctx *Context, method HttpMethod, url string, requestData io.Reader, header http.Header) error
	MultiRequest(*Context, MultiRequest) error

	Get(ctx *Context, url string) error
	Post(ctx *Context, url string, requestData map[string]string) error
}

type Collector

type Collector interface {
	Client() Client
	Logger() Logger
}

type Constructor

type Constructor interface {
	Construct(options ...Option) Crawler
}

func RecoveryConstructor

func RecoveryConstructor() Constructor

func ReplaceConstructor

func ReplaceConstructor(newConstructor Constructor) (oldConstructor Constructor)

type ConstructorFunc

type ConstructorFunc func(options ...Option) Crawler

func (ConstructorFunc) Construct

func (f ConstructorFunc) Construct(options ...Option) Crawler

type Context

type Context struct {
	// contains filtered or unexported fields
}

func NewContext

func NewContext() *Context

func (*Context) Bool

func (ctx *Context) Bool(key string) bool

func (*Context) Errors

func (ctx *Context) Errors() *Errors

func (*Context) Int

func (ctx *Context) Int(key string) int

func (*Context) MustGot

func (ctx *Context) MustGot(key string, receiver interface{})

func (*Context) Put

func (ctx *Context) Put(key string, value interface{})

func (*Context) Uint

func (ctx *Context) Uint(key string) uint

type Crawler

type Crawler interface {
	Crawl(out interface{}, ins ...interface{}) error
}

func New

func New(opts ...Option) Crawler

func NewSimpleCrawler

func NewSimpleCrawler(opts ...Option) Crawler

type Error

type Error struct {
	errors.LocatorError
	// contains filtered or unexported fields
}

func (*Error) Context

func (err *Error) Context() *Context

func (*Error) Request

func (err *Error) Request() *colly.Request

func (*Error) Response

func (err *Error) Response() *colly.Response

type ErrorHandler

type ErrorHandler interface {
	HandleError(response Response, err error) error
}

type ErrorHandlerFunc

type ErrorHandlerFunc func(response Response, err error) error

func (ErrorHandlerFunc) HandleError

func (f ErrorHandlerFunc) HandleError(response Response, err error) error

type Errors

type Errors struct {
	// contains filtered or unexported fields
}

func (*Errors) Append

func (errs *Errors) Append(err *Error)

func (*Errors) Err

func (errs *Errors) Err() *Error

func (*Errors) Errs

func (errs *Errors) Errs() []*Error

func (*Errors) IsNil

func (errs *Errors) IsNil() bool

type HttpMethod

type HttpMethod string
const (
	HttpMethodGet  HttpMethod = http.MethodGet
	HttpMethodPost HttpMethod = http.MethodPost
)

type HttpStatus

type HttpStatus int

type Logger

type Logger interface {
	Debug(msg string, fields ...zap.Field)
	Info(msg string, fields ...zap.Field)
	Warn(msg string, fields ...zap.Field)
	Error(msg string, fields ...zap.Field)
	DPanic(msg string, fields ...zap.Field)
	Panic(msg string, fields ...zap.Field)
	Fatal(msg string, fields ...zap.Field)
}

type MultiRequest

type MultiRequest struct {
	// contains filtered or unexported fields
}

func NewMultiRequest

func NewMultiRequest() MultiRequest

func (*MultiRequest) Append

func (m *MultiRequest) Append(method HttpMethod, url string, requestData io.Reader, header http.Header)

type Option

type Option interface {
	//Apply 不应该在构造方法外调用
	Apply(c Crawler)
}

func DebuggerOption

func DebuggerOption(d debug.Debugger) Option

func DefaultDebuggerOption

func DefaultDebuggerOption(l *zap.Logger) Option

func ErrorHandlerOption

func ErrorHandlerOption(handler ErrorHandler) Option

func LimitOption

func LimitOption(parallelism int, delay, randomDelay time.Duration) Option

func RequestHandlerOption

func RequestHandlerOption(handler RequestHandler) Option

func ResponseHandlerOption

func ResponseHandlerOption(handler ResponseHandler) Option

func ResponseHeadersHandlerOption

func ResponseHeadersHandlerOption(handler ResponseHeadersHandler) Option

func ScrapedHandlerOption

func ScrapedHandlerOption(handler ScrapedHandler) Option

func SetInvokerOption

func SetInvokerOption(i interface{}) Option

func WithLoggerOption

func WithLoggerOption(l *zap.Logger) Option

type Request

type Request interface {
	Request() *colly.Request
	Context() *Context
	Logger() Logger
}

type RequestHandler

type RequestHandler interface {
	HandleRequest(request Request) error
}

type RequestHandlerFunc

type RequestHandlerFunc func(request Request) error

func (RequestHandlerFunc) HandleRequest

func (f RequestHandlerFunc) HandleRequest(request Request) error

type Response

type Response interface {
	Response() *colly.Response
	Context() *Context
	Logger() Logger
}

type ResponseHandler

type ResponseHandler interface {
	HandleResponse(response Response) error
}

type ResponseHandlerFunc

type ResponseHandlerFunc func(response Response) error

func (ResponseHandlerFunc) HandleResponse

func (f ResponseHandlerFunc) HandleResponse(response Response) error

type ResponseHeadersHandler

type ResponseHeadersHandler interface {
	HandleResponseHeaders(response Response) error
}

type ResponseHeadersHandlerFunc

type ResponseHeadersHandlerFunc func(response Response) error

func (ResponseHeadersHandlerFunc) HandleResponseHeaders

func (f ResponseHeadersHandlerFunc) HandleResponseHeaders(response Response) error

type ScrapedHandler

type ScrapedHandler interface {
	HandleScraped(response Response) error
}

type ScrapedHandlerFunc

type ScrapedHandlerFunc func(response Response) error

func (ScrapedHandlerFunc) HandleScraped

func (f ScrapedHandlerFunc) HandleScraped(response Response) error

type TypeAssertionError

type TypeAssertionError struct {
	errors.LocatorError
}

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL