Documentation ¶
Index ¶
- Constants
- type CollectEvent
- type Crawler
- type DefaultHtmlCrawler
- type Event
- func (event *Event) ApiSpider(size int) spider.Spider
- func (event *Event) ConvertAssign(src, des interface{}) error
- func (event *Event) DhtmlSpider() spider.Spider
- func (event *Event) FileSpider(size int) spider.Spider
- func (event *Event) GenerateRandomID() string
- func (event *Event) NewMutex() *sync.Mutex
- func (event *Event) Parser(content, pattern string) (interface{}, error)
- func (event *Event) ReadabilityParser(html, url string) (map[string]interface{}, error)
- func (event *Event) ShtmlSpider(size int) spider.Spider
- func (event *Event) Signature(obj interface{}) string
- func (event *Event) SignatureMap(data map[string]string) string
- type Filter
- type FilterType
- type ParserEvent
- type PipeLine
- func (p *PipeLine) AddCrawler(crawlerName task.CrawlerName, crawler Crawler)
- func (p *PipeLine) AddFilter(filterType FilterType, filterFunc func(filter Filter) bool)
- func (p *PipeLine) AddReport(reportType ReportType, reportFunc func(report Report) error)
- func (p *PipeLine) Invoke(ctx context.Context, task task.Task) error
- type Report
- type ReportType
- type StorageEvent
Constants ¶
View Source
const ( CrawlerBeforeFilter FilterType = iota + 1 CrawlerAfterFilter ParserBeforeFilter ParserAfterFilter StorageBeforeFilter StorageAfterFilter CrawlerBeforeReport ReportType = iota + 1 CrawlerAfterReport ParserBeforeReport ParserAfterReport StorageBeforeReport StorageAfterReport )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Crawler ¶
type Crawler interface { Collect(event CollectEvent) (string, error) Parser(event ParserEvent) (map[string]interface{}, error) Storage(event StorageEvent) error }
type DefaultHtmlCrawler ¶
type DefaultHtmlCrawler struct { }
func (*DefaultHtmlCrawler) Collect ¶
func (dhc *DefaultHtmlCrawler) Collect(event CollectEvent) (string, error)
func (*DefaultHtmlCrawler) Parser ¶
func (dhc *DefaultHtmlCrawler) Parser(event ParserEvent) (map[string]interface{}, error)
func (*DefaultHtmlCrawler) Storage ¶
func (dhc *DefaultHtmlCrawler) Storage(event StorageEvent) error
type Event ¶
type Event struct {
TempStorage *temp.TempStorage
}
func (*Event) ConvertAssign ¶
func (*Event) DhtmlSpider ¶
func (*Event) GenerateRandomID ¶
func (*Event) ReadabilityParser ¶
type FilterType ¶
type FilterType int
func (FilterType) String ¶
func (f FilterType) String() string
type PipeLine ¶
type PipeLine struct {
// contains filtered or unexported fields
}
func (*PipeLine) AddCrawler ¶
func (p *PipeLine) AddCrawler(crawlerName task.CrawlerName, crawler Crawler)
添加抓取模版
func (*PipeLine) AddFilter ¶
func (p *PipeLine) AddFilter(filterType FilterType, filterFunc func(filter Filter) bool)
添加过滤器
type ReportType ¶
type ReportType int
func (ReportType) String ¶
func (r ReportType) String() string
Click to show internal directories.
Click to hide internal directories.