crawler

package
v0.7.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 20, 2020 License: MIT Imports: 9 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type CollectEvent

type CollectEvent struct {
	Event
	Task  task.Task
	Queue string
}

type Crawler

type Crawler interface {
	Collect(event CollectEvent) (string, error)
	Parser(event ParserEvent) (map[string]interface{}, error)
	Storage(event StorageEvent) error
}

type DefaultHtmlCrawler

type DefaultHtmlCrawler struct {
}

func (*DefaultHtmlCrawler) Collect

func (dhc *DefaultHtmlCrawler) Collect(event CollectEvent) (string, error)

func (*DefaultHtmlCrawler) Parser

func (dhc *DefaultHtmlCrawler) Parser(event ParserEvent) (map[string]interface{}, error)

func (*DefaultHtmlCrawler) Storage

func (dhc *DefaultHtmlCrawler) Storage(event StorageEvent) error

type Event

type Event struct {
	TempStorage *temp.TempStorage
}

func (*Event) ApiSpider

func (event *Event) ApiSpider(size int) spider.Spider

func (*Event) ConvertAssign

func (event *Event) ConvertAssign(src, des interface{}) error

func (*Event) DhtmlSpider

func (event *Event) DhtmlSpider() spider.Spider

func (*Event) FileSpider

func (event *Event) FileSpider(size int) spider.Spider

func (*Event) GenerateRandomID

func (event *Event) GenerateRandomID() string

func (*Event) NewMutex added in v0.1.8

func (event *Event) NewMutex() *sync.Mutex

func (*Event) Parser

func (event *Event) Parser(content, pattern string) (interface{}, error)

func (*Event) ReadabilityParser

func (event *Event) ReadabilityParser(html, url string) (map[string]interface{}, error)

func (*Event) ShtmlSpider

func (event *Event) ShtmlSpider(size int) spider.Spider

func (*Event) Signature

func (event *Event) Signature(obj interface{}) string

func (*Event) SignatureMap

func (event *Event) SignatureMap(data map[string]string) string

type ParserEvent

type ParserEvent struct {
	Event
	Task    task.Task
	Queue   string
	Content string
}

type PipeLine

type PipeLine struct {
	// contains filtered or unexported fields
}

func New

func New(queue string, tempStorage *temp.TempStorage) *PipeLine

func (*PipeLine) AddCrawler

func (p *PipeLine) AddCrawler(crawlerName task.CrawlerName, crawler Crawler)

添加抓取模版

func (*PipeLine) AddReport

func (p *PipeLine) AddReport(reportType ReportType, reportFunc func(report Report) error)

添加上报

func (*PipeLine) Invoke

func (p *PipeLine) Invoke(ctx context.Context, task task.Task) error

type Report

type Report struct {
	Task       task.Task
	Error      error
	ReportType ReportType
}

type ReportType

type ReportType int
const (
	CrawlerBeforeReport ReportType = iota + 1
	CrawlerAfterReport
	ParserBeforeReport
	ParserAfterReport
	StorageBeforeReport
	StorageAfterReport
)

func (ReportType) String

func (r ReportType) String() string

type StorageEvent

type StorageEvent struct {
	Event
	Task  task.Task
	Queue string
	Data  map[string]interface{}
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL