Documentation ¶
Index ¶
Constants ¶
const ( CRAWLER_VB_ATTACHMENTS = "vb-attachments" CRAWLER_SRC = "src" CRAWLER_FILE = "file" )
const ( PAGER_VB4 = "vb4" PAGER_QUERY = "query" PAGER_URLCUT = "cutter" )
const DEFAULT_DL_JOBS = 5
Variables ¶
This section is empty.
Functions ¶
func Crawl ¶
func Crawl(cc *CrawlContext) error
Types ¶
type CrawlContext ¶
type CrawlContext struct { Cookies []*http.Cookie Pager PagerInterface Crawler CrawlerInterface // contains filtered or unexported fields }
func NewCrawlContext ¶
func NewCrawlContext(pager string, crawler string, defaultDir string) (*CrawlContext, error)
func (*CrawlContext) SetOptions ¶
func (cc *CrawlContext) SetOptions(args []string) error
Parse global options and attach them to the CrawlContext
type CrawlerInterface ¶
type CrawlerInterface interface { Crawl(*url.URL) error Finish() SetOptions([]string) error Setup() }
func NewFileCrawler ¶
func NewFileCrawler(cc *CrawlContext) (CrawlerInterface, error)
func NewSrcCrawler ¶ added in v0.1.1
func NewSrcCrawler(cc *CrawlContext) (CrawlerInterface, error)
func NewVBAttachmentCrawler ¶
func NewVBAttachmentCrawler(cc *CrawlContext) (CrawlerInterface, error)
type FileCrawler ¶
type FileCrawler struct {
// contains filtered or unexported fields
}
FileCrawler is a crawler that treats every input from the pager as a file that needs to be downloaded.
func (FileCrawler) Finish ¶
func (c FileCrawler) Finish()
Finish() is a default cleanup function for crawlers, If baseCrawler's Setup() or setup() method was used Finish() closes baseCrawler's DownloadDispatcher and yields until all Downloads have been finished. Otherwise it does nothing.
func (FileCrawler) SetOptions ¶
type PagerInterface ¶
type PagerInterface interface { Next() (*url.URL, error) PageNum() int SetOptions([]string) error SetUrl(string) error }
func NewQueryPager ¶
func NewQueryPager(cc *CrawlContext) PagerInterface
func NewURLCuttingPager ¶
func NewURLCuttingPager(cc *CrawlContext) PagerInterface
func NewVB4Pager ¶
func NewVB4Pager(cc *CrawlContext) PagerInterface
type QueryPager ¶
type QueryPager struct {
// contains filtered or unexported fields
}
func (*QueryPager) PageNum ¶
func (r *QueryPager) PageNum() int
func (*QueryPager) SetOptions ¶
func (r *QueryPager) SetOptions(args []string) error
func (*QueryPager) SetUrl ¶
func (r *QueryPager) SetUrl(addr string) error
type SrcCrawler ¶ added in v0.1.1
type SrcCrawler struct {
// contains filtered or unexported fields
}
func (SrcCrawler) Finish ¶ added in v0.1.1
func (c SrcCrawler) Finish()
Finish() is a default cleanup function for crawlers, If baseCrawler's Setup() or setup() method was used Finish() closes baseCrawler's DownloadDispatcher and yields until all Downloads have been finished. Otherwise it does nothing.
func (*SrcCrawler) SetOptions ¶ added in v0.1.1
func (r *SrcCrawler) SetOptions(args []string) error
type URLCuttingPager ¶
type URLCuttingPager struct {
// contains filtered or unexported fields
}
URLCuttingPager browses through the pages by cutting out a part of itself and replacing that with an increasing number.
func (*URLCuttingPager) PageNum ¶
func (r *URLCuttingPager) PageNum() int
func (*URLCuttingPager) SetOptions ¶
func (r *URLCuttingPager) SetOptions(args []string) error
func (*URLCuttingPager) SetUrl ¶
func (r *URLCuttingPager) SetUrl(addr string) error
type VB4Pager ¶
type VB4Pager struct { Start int End int Thread *url.URL // contains filtered or unexported fields }
func (*VB4Pager) SetOptions ¶
type VBAttachmentCrawler ¶
type VBAttachmentCrawler struct {
// contains filtered or unexported fields
}
func (VBAttachmentCrawler) Finish ¶
func (c VBAttachmentCrawler) Finish()
Finish() is a default cleanup function for crawlers, If baseCrawler's Setup() or setup() method was used Finish() closes baseCrawler's DownloadDispatcher and yields until all Downloads have been finished. Otherwise it does nothing.
func (*VBAttachmentCrawler) SetOptions ¶
func (r *VBAttachmentCrawler) SetOptions(args []string) error
Notes ¶
Bugs ¶
Password needs to be filtered out of the url before printing it.