Documentation ¶
Index ¶
- func InfiniteCrawl(linksIn <-chan string, linksOut chan<- string, wantMore chan<- bool, ...)
- func Rank(url string) float64
- func RankLength(url string) float64
- func RankProtocol(rawURL string) float64
- type DataStore
- type Page
- type RedisDataStore
- type RedisTaskQueue
- type RobotFilter
- type TaskQueue
- type Worker
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func InfiniteCrawl ¶
func RankLength ¶
func RankProtocol ¶
Types ¶
type RedisDataStore ¶
type RedisDataStore struct {
// contains filtered or unexported fields
}
func NewDefaultRedisDataStore ¶
func NewDefaultRedisDataStore() *RedisDataStore
func NewRedisDataStore ¶
func NewRedisDataStore(conn redis.Conn) *RedisDataStore
func (*RedisDataStore) Listen ¶
func (self *RedisDataStore) Listen(pages <-chan *Page)
func (*RedisDataStore) Save ¶
func (self *RedisDataStore) Save(page *Page) error
func (*RedisDataStore) Stop ¶
func (self *RedisDataStore) Stop()
type RedisTaskQueue ¶
type RedisTaskQueue struct {
// contains filtered or unexported fields
}
func NewDefaultRedisTaskQueue ¶
func NewDefaultRedisTaskQueue() *RedisTaskQueue
func NewRedisTaskQueue ¶
func NewRedisTaskQueue(conn redis.Conn) *RedisTaskQueue
func (*RedisTaskQueue) Listen ¶
func (self *RedisTaskQueue) Listen(incoming <-chan string, outgoing chan<- string, wantMore <-chan bool)
func (*RedisTaskQueue) Push ¶
func (self *RedisTaskQueue) Push(link string) error
func (*RedisTaskQueue) Stop ¶
func (self *RedisTaskQueue) Stop()
type RobotFilter ¶
type RobotFilter struct { UserAgent string // contains filtered or unexported fields }
func NewRobotFilter ¶
func NewRobotFilter() *RobotFilter
func (*RobotFilter) Allowed ¶
func (self *RobotFilter) Allowed(rawUrl string) bool
Checks if the given url is allowed to be crawled using github.com/temoto/robotstxt-go
Source Files ¶
Click to show internal directories.
Click to hide internal directories.