libcrawl

package
v0.1.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 29, 2024 License: GPL-3.0 Imports: 23 Imported by: 0

Documentation

Index

Constants

View Source
const (
	CRAWLER_VB_ATTACHMENTS = "vb-attachments"
	CRAWLER_SRC            = "src"
	CRAWLER_FILE           = "file"
)
View Source
const (
	PAGER_VB4    = "vb4"
	PAGER_QUERY  = "query"
	PAGER_URLCUT = "cutter"
)
View Source
const DEFAULT_DL_JOBS = 5

Variables

This section is empty.

Functions

func Crawl

func Crawl(cc *CrawlContext) error

Types

type CrawlContext

type CrawlContext struct {
	Cookies []*http.Cookie
	Pager   PagerInterface
	Crawler CrawlerInterface
	// contains filtered or unexported fields
}

func NewCrawlContext

func NewCrawlContext(pager string, crawler string, defaultDir string) (*CrawlContext, error)

func (*CrawlContext) SetOptions

func (cc *CrawlContext) SetOptions(args []string) error

Parse global options and attach them to the CrawlContext

type CrawlerInterface

type CrawlerInterface interface {
	Crawl(*url.URL) error
	Finish()
	SetOptions([]string) error
	Setup()
}

func NewFileCrawler

func NewFileCrawler(cc *CrawlContext) (CrawlerInterface, error)

func NewSrcCrawler added in v0.1.1

func NewSrcCrawler(cc *CrawlContext) (CrawlerInterface, error)

func NewVBAttachmentCrawler

func NewVBAttachmentCrawler(cc *CrawlContext) (CrawlerInterface, error)

type FileCrawler

type FileCrawler struct {
	// contains filtered or unexported fields
}

FileCrawler is a crawler that treats every input from the pager as a file that needs to be downloaded.

func (*FileCrawler) Crawl

func (r *FileCrawler) Crawl(u *url.URL) error

func (FileCrawler) Finish

func (c FileCrawler) Finish()

Finish() is a default cleanup function for crawlers, If baseCrawler's Setup() or setup() method was used Finish() closes baseCrawler's DownloadDispatcher and yields until all Downloads have been finished. Otherwise it does nothing.

func (FileCrawler) SetOptions

func (c FileCrawler) SetOptions(args []string) error

func (FileCrawler) Setup

func (c FileCrawler) Setup()

type PagerInterface

type PagerInterface interface {
	Next() (*url.URL, error)
	PageNum() int
	SetOptions([]string) error
	SetUrl(string) error
}

func NewQueryPager

func NewQueryPager(cc *CrawlContext) PagerInterface

func NewURLCuttingPager

func NewURLCuttingPager(cc *CrawlContext) PagerInterface

func NewVB4Pager

func NewVB4Pager(cc *CrawlContext) PagerInterface

type QueryPager

type QueryPager struct {
	// contains filtered or unexported fields
}

func (*QueryPager) Next

func (r *QueryPager) Next() (*url.URL, error)

func (*QueryPager) PageNum

func (r *QueryPager) PageNum() int

func (*QueryPager) SetOptions

func (r *QueryPager) SetOptions(args []string) error

func (*QueryPager) SetUrl

func (r *QueryPager) SetUrl(addr string) error

type SrcCrawler added in v0.1.1

type SrcCrawler struct {
	// contains filtered or unexported fields
}

func (*SrcCrawler) Crawl added in v0.1.1

func (r *SrcCrawler) Crawl(u *url.URL) error

func (SrcCrawler) Finish added in v0.1.1

func (c SrcCrawler) Finish()

Finish() is a default cleanup function for crawlers, If baseCrawler's Setup() or setup() method was used Finish() closes baseCrawler's DownloadDispatcher and yields until all Downloads have been finished. Otherwise it does nothing.

func (*SrcCrawler) SetOptions added in v0.1.1

func (r *SrcCrawler) SetOptions(args []string) error

func (SrcCrawler) Setup added in v0.1.1

func (c SrcCrawler) Setup()

type URLCuttingPager

type URLCuttingPager struct {
	// contains filtered or unexported fields
}

URLCuttingPager browses through the pages by cutting out a part of itself and replacing that with an increasing number.

func (*URLCuttingPager) Next

func (r *URLCuttingPager) Next() (*url.URL, error)

func (*URLCuttingPager) PageNum

func (r *URLCuttingPager) PageNum() int

func (*URLCuttingPager) SetOptions

func (r *URLCuttingPager) SetOptions(args []string) error

func (*URLCuttingPager) SetUrl

func (r *URLCuttingPager) SetUrl(addr string) error

type VB4Pager

type VB4Pager struct {
	Start  int
	End    int
	Thread *url.URL
	// contains filtered or unexported fields
}

func (*VB4Pager) Next

func (r *VB4Pager) Next() (*url.URL, error)

func (*VB4Pager) PageNum

func (r *VB4Pager) PageNum() int

func (*VB4Pager) SetOptions

func (r *VB4Pager) SetOptions(args []string) error

func (*VB4Pager) SetUrl

func (r *VB4Pager) SetUrl(addr string) error

type VBAttachmentCrawler

type VBAttachmentCrawler struct {
	// contains filtered or unexported fields
}

func (*VBAttachmentCrawler) Crawl

func (r *VBAttachmentCrawler) Crawl(u *url.URL) error

func (VBAttachmentCrawler) Finish

func (c VBAttachmentCrawler) Finish()

Finish() is a default cleanup function for crawlers, If baseCrawler's Setup() or setup() method was used Finish() closes baseCrawler's DownloadDispatcher and yields until all Downloads have been finished. Otherwise it does nothing.

func (*VBAttachmentCrawler) SetOptions

func (r *VBAttachmentCrawler) SetOptions(args []string) error

func (VBAttachmentCrawler) Setup

func (c VBAttachmentCrawler) Setup()

Notes

Bugs

  • Password needs to be filtered out of the url before printing it.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL