Documentation ¶
Index ¶
- Constants
- type Crawler
- type CrawlerStatus
- func (this *CrawlerStatus) CanWeStop(spiderName string) bool
- func (this *CrawlerStatus) CloseSpider(spiderName string) *SpiderStatus
- func (this *CrawlerStatus) Crawled(spiderName string)
- func (this *CrawlerStatus) Distribute(spiderName string)
- func (this *CrawlerStatus) IsSpiderRunning(spiderName string) bool
- func (this *CrawlerStatus) Push(spiderName string)
- func (this *CrawlerStatus) StartSpider(spiderName string)
- type Downloader
- type RequestQuene
- type ResponseQuene
- type ResultQuene
- type ScrapeResult
- type Scraper
- type SpiderStatus
Constants ¶
View Source
const ( DOWNLOADER_STATUS_STOP = iota DOWNLOADER_STATUS_RUNING DOWNLOADER_STATUS_PAUSE DOWNLOADER_STATUS_STOPED )
View Source
const ( SCRAPY_STATUS_STOP = iota SCRAPY_STATUS_STOPED SCRAPY_STATUS_RUNING SCRAPY_STATUS_PAUSE )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Crawler ¶
type Crawler struct { SpiderMap map[string]*base_spider.Spider //contains all spiders RequestQuene *RequestQuene //all waiting request ResponseQuene *ResponseQuene //all waiting response for scrape Downloader *Downloader //download tools Scraper *Scraper //scrape tools }
crawler
func NewCrawler ¶
func NewCrawler(resultQuene *ResultQuene, settings *util.Settings) *Crawler
resultQuene is for reporter,make sure it is the same ppointer
func (*Crawler) CloseSpider ¶
func (*Crawler) GetStartRequest ¶
func (*Crawler) StartSpider ¶
type CrawlerStatus ¶
type CrawlerStatus struct { CrawledSpider []*SpiderStatus RunningSpider map[string]*SpiderStatus }
status of crawler crawled spiders and running spiders
func NewCrawlerStatus ¶
func NewCrawlerStatus() *CrawlerStatus
func (*CrawlerStatus) CanWeStop ¶
func (this *CrawlerStatus) CanWeStop(spiderName string) bool
judge a is a spider can stop
func (*CrawlerStatus) CloseSpider ¶
func (this *CrawlerStatus) CloseSpider(spiderName string) *SpiderStatus
no more request for spider ,close it remove from runningSpider add to crawledSpider
func (*CrawlerStatus) Crawled ¶
func (this *CrawlerStatus) Crawled(spiderName string)
get crawl result runing -1 crawled +1
func (*CrawlerStatus) Distribute ¶
func (this *CrawlerStatus) Distribute(spiderName string)
if cluster distribute a request, waiting -1 runing +1
func (*CrawlerStatus) IsSpiderRunning ¶
func (this *CrawlerStatus) IsSpiderRunning(spiderName string) bool
func (*CrawlerStatus) Push ¶
func (this *CrawlerStatus) Push(spiderName string)
add a request to wait in spiderName
func (*CrawlerStatus) StartSpider ¶
func (this *CrawlerStatus) StartSpider(spiderName string)
add a spider to running map
type Downloader ¶
type Downloader struct { Status int RequestQuene *RequestQuene ResponseQuene *ResponseQuene ClientList []*http.Client DownloadInterval int }
downloader tools
func NewDownloader ¶
func NewDownloader(resuqstQuene *RequestQuene, responseQuene *ResponseQuene, downloadInterval int) *Downloader
func (*Downloader) Download ¶
func (this *Downloader) Download()
dead loop for download pop a request download it push to response quene
func (*Downloader) IsStop ¶
func (this *Downloader) IsStop() bool
func (*Downloader) Pause ¶
func (this *Downloader) Pause()
func (*Downloader) Start ¶
func (this *Downloader) Start()
DOWNLOADER_STATUS_STOPED means the dead loop is actually dead
func (*Downloader) Stop ¶
func (this *Downloader) Stop()
func (*Downloader) UnPause ¶
func (this *Downloader) UnPause()
type RequestQuene ¶
func NewRequestQuene ¶
func NewRequestQuene() *RequestQuene
func (*RequestQuene) IsEmpty ¶
func (this *RequestQuene) IsEmpty() bool
func (*RequestQuene) Pop ¶
func (this *RequestQuene) Pop() *http.Request
func (*RequestQuene) Push ¶
func (this *RequestQuene) Push(request *http.Request)
type ResponseQuene ¶
func NewResponseQuene ¶
func NewResponseQuene() *ResponseQuene
func (*ResponseQuene) Pop ¶
func (this *ResponseQuene) Pop() *http.Response
for now only one routine pop the request,so do not add lock
func (*ResponseQuene) Push ¶
func (this *ResponseQuene) Push(response *http.Response)
type ResultQuene ¶
func NewResultQuene ¶
func NewResultQuene() *ResultQuene
func (*ResultQuene) Pop ¶
func (this *ResultQuene) Pop() *ScrapeResult
func (*ResultQuene) Push ¶
func (this *ResultQuene) Push(scrapeResult *ScrapeResult)
type ScrapeResult ¶
type Scraper ¶
type Scraper struct { Status int ResultQuene *ResultQuene ResponseQuene *ResponseQuene SpiderMap map[string]*spiders.Spider }
func NewScraper ¶
func NewScraper(resultQuene *ResultQuene, responseQuene *ResponseQuene, spiderMap map[string]*spiders.Spider) *Scraper
type SpiderStatus ¶
type SpiderStatus struct { Name string Crawled int Running int Waiting int StartTime time.Time EndTime time.Time }
func NewSpiderStatus ¶
func NewSpiderStatus(name string) *SpiderStatus
Click to show internal directories.
Click to hide internal directories.