Documentation
¶
Index ¶
- type Cache
- type Collector
- type Crawler
- func (c *Crawler) ParseWebsite(ctx context.Context, websiteURL url.URL) ([]link, error)
- func (c *Crawler) Scrape(ctx context.Context, URL url.URL, maxDepth int) (*Graph, error)
- func (c *Crawler) ScrapeChan(ctx context.Context, jobs <-chan Job, results chan<- Job, errCh chan<- error, ...)
- func (c *Crawler) ScrapeRec(ctx context.Context, sourceURL url.URL, depth int, maxDepth int) error
- type Edges
- type Graph
- type Job
- type Node
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Cache ¶
type Cache interface { // Add insert new key-value into cache, if key already exists // in cache returns evicted=true Add(key string, value interface{}) // Get gets value from the coresponding key from cache // ok == true if object exist in cache, otherwire ok == false Get(key string) (value interface{}, ok bool) }
type Collector ¶
type Collector struct {
// contains filtered or unexported fields
}
func NewCollector ¶
func NewCollector(log *zap.SugaredLogger, c Cache) *Collector
type Crawler ¶
type Crawler struct { Graph *Graph // contains filtered or unexported fields }
func (*Crawler) ParseWebsite ¶
ParseWebsite parses html url and returns all <a href> elements and returns its href values.
func (*Crawler) ScrapeChan ¶
type Edges ¶
func (*Edges) MarshalJSON ¶
Click to show internal directories.
Click to hide internal directories.