crawler

package

v0.0.0-...-1e84083 Latest Latest Go to latest Published: Oct 25, 2021 License: MIT Imports: 11 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/adamplansky/go-bridge-mentoring

Links

Open Source Insights

Documentation ¶

Index ¶

type Cache
type Collector
- func NewCollector(log *zap.SugaredLogger, c Cache) *Collector
- func (c *Collector) Work(ctx context.Context, URL url.URL, maxDepth int) *Graph
type Crawler
- func New(log *zap.SugaredLogger, c Cache) *Crawler
type Edges
- func (e *Edges) MarshalJSON() ([]byte, error)
type Graph
type Job
type Node
- func (n *Node) MarshalJSON() ([]byte, error)

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type Cache ¶

type Cache interface {
	// Add insert new key-value into cache, if key already exists
	// in cache returns evicted=true
	Add(key string, value interface{})
	// Get gets value from the coresponding key from cache
	// ok == true if object exist in cache, otherwire ok == false
	Get(key string) (value interface{}, ok bool)
}

type Collector ¶

type Collector struct {
	// contains filtered or unexported fields
}

func NewCollector ¶

func NewCollector(log *zap.SugaredLogger, c Cache) *Collector

func (*Collector) Work ¶

func (c *Collector) Work(ctx context.Context, URL url.URL, maxDepth int) *Graph

type Crawler ¶

type Crawler struct {
	Graph *Graph
	// contains filtered or unexported fields
}

func New ¶

func New(log *zap.SugaredLogger, c Cache) *Crawler

func (*Crawler) ParseWebsite ¶

func (c *Crawler) ParseWebsite(ctx context.Context, websiteURL url.URL) ([]link, error)

ParseWebsite parses html url and returns all <a href> elements and returns its href values.

func (*Crawler) Scrape ¶

func (c *Crawler) Scrape(ctx context.Context, URL url.URL, maxDepth int) (*Graph, error)

func (*Crawler) ScrapeChan ¶

func (c *Crawler) ScrapeChan(ctx context.Context, jobs <-chan Job, results chan<- Job, errCh chan<- error, maxDepth int)

func (*Crawler) ScrapeRec ¶

func (c *Crawler) ScrapeRec(ctx context.Context, sourceURL url.URL, depth int, maxDepth int) error

type Edges ¶

type Edges map[Node][]Node

func (*Edges) MarshalJSON ¶

func (e *Edges) MarshalJSON() ([]byte, error)

type Graph ¶

type Graph struct {
	Nodes []Node `json:"nodes"`
	Edges Edges  `json:"links"`
	// contains filtered or unexported fields
}

func (*Graph) AddEdge ¶

func (g *Graph) AddEdge(src url.URL, dst url.URL)

func (*Graph) AddNode ¶

func (g *Graph) AddNode(URL url.URL)

func (*Graph) EdgeExists ¶

func (g *Graph) EdgeExists(src url.URL, dst url.URL) bool

type Job ¶

type Job struct {
	URL   url.URL
	Depth int
}

type Node ¶

type Node struct {
	URL url.URL `json:"id"`
}

func (*Node) MarshalJSON ¶

func (n *Node) MarshalJSON() ([]byte, error)

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL