Documentation ¶
Index ¶
- func CloseDone(db *DoneDB)
- func CloseJob(db *JobDB)
- func Crawler(running *int32, group *sync.WaitGroup, jbd *JobDB, dbd *DoneDB, config Config, ...)
- func DeleteOldSpider(db *DB)
- func DeleteSpiderDone(db *DoneDB)
- func GetChromeWSEndpoint() string
- func GetDoneSize(db *DoneDB) int
- func GetJobSize(db *JobDB) int
- func GetSize(db *DB) int
- func HasDone(db *DoneDB, url string, stat *Stat) bool
- func HasJob(db *JobDB, url string, stat *Stat) bool
- func Ini()
- func InsertSpider(db *DB, title string, name string, url string, host string, stat *Stat)
- func InsertSpiderDone(db *DoneDB, url string, stat *Stat)
- func InsertSpiderJob(db *JobDB, url string, deps int, stat *Stat)
- func Parser(running *int32, group *sync.WaitGroup, jbd *JobDB, dbd *DoneDB, config Config, ...)
- func PopSpiderJob(db *JobDB, n int, stat *Stat) ([]string, []int)
- func Saver(running *int32, group *sync.WaitGroup, db *DB, save <-chan *DBInfo, stat *Stat)
- func SetCallback(cb func(host string, title string, name string, url string))
- func Start(db *DB, config Config, url string, stat *Stat)
- type Config
- type DB
- type DBInfo
- type DoneDB
- type FindData
- type JobDB
- type LoopSpider
- type LoopSpiderSlot
- type LoopSpiderStatus
- type PageInfo
- type PageLinkInfo
- type SpiderData
- type Stat
- type URLInfo
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func DeleteOldSpider ¶
func DeleteOldSpider(db *DB)
func DeleteSpiderDone ¶
func DeleteSpiderDone(db *DoneDB)
func GetChromeWSEndpoint ¶
func GetChromeWSEndpoint() string
func GetDoneSize ¶
func GetJobSize ¶
func InsertSpider ¶
func InsertSpiderDone ¶
Types ¶
type LoopSpider ¶
type LoopSpider struct { Thread int Buffer int Cur string // contains filtered or unexported fields }
func NewLoopSpider ¶
func NewLoopSpider(lss LoopSpiderSlot) *LoopSpider
func (*LoopSpider) GetLoopSpiderStatus ¶
func (ls *LoopSpider) GetLoopSpiderStatus() LoopSpiderStatus
type LoopSpiderSlot ¶
type LoopSpiderStatus ¶
type PageInfo ¶
type PageInfo struct { UI URLInfo Title string Son []PageLinkInfo }
type PageLinkInfo ¶
type SpiderData ¶
type SpiderData struct {
// contains filtered or unexported fields
}
type Stat ¶
type Stat struct { CrawBePushJobNum int CrawChannelNum int CrawFunc string CrawNum int CrawRetrtyNum int CrawOKNum int64 CrawFailNum int CrawOKTotalTime int64 CrawOKAvgTime int64 ParseChannelNum int ParseNum int ParseValidNum int ParseSpawnNum int ParseFinishNum int ParseTooDeepNum int ParseJobNum int SaveChannelNum int SaveNum int InsertNum int64 InsertTotalTime int64 InsertCBTotalTime int64 InsertAvgTime int64 InsertCBAvgTime int64 JobInsertNum int64 JobInsertTotalTime int64 JobInsertAvgTime int64 JobPopNum int64 JobPopTotalTime int64 JobPopAvgTime int64 JobHasNum int64 JobHasTotalTime int64 JobHasAvgTime int64 DoneInsertNum int64 DoneInsertTotalTime int64 DoneInsertAvgTime int64 DoneHasNum int64 DoneHasTotalTime int64 DoneHasAvgTime int64 }
Click to show internal directories.
Click to hide internal directories.