crawler

package
v0.0.0-...-bdb5213 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 12, 2024 License: MIT Imports: 14 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Client

type Client interface {
	Get(ctx context.Context, url *url.URL) (string, error)
}

type Config

type Config struct {
	NumWorkers       int
	BaseURL          url.URL
	Resume           bool
	DownloadPathBase string
	Debug            bool
}

type ContentRepository

type ContentRepository interface {
	Save(path string, content string) error
	Exists(path string) bool
	GetData(path string) (string, error)
}

type FileSystemRepository

type FileSystemRepository struct{}

func NewFileSystemRepository

func NewFileSystemRepository() *FileSystemRepository

func (*FileSystemRepository) Exists

func (f *FileSystemRepository) Exists(path string) bool

func (*FileSystemRepository) GetData

func (f *FileSystemRepository) GetData(path string) (string, error)

func (*FileSystemRepository) Save

func (f *FileSystemRepository) Save(path string, content string) error

type HTTPClient

type HTTPClient struct {
	// contains filtered or unexported fields
}

func NewHTTPClient

func NewHTTPClient(cfg *Config) *HTTPClient

func (*HTTPClient) Get

func (h *HTTPClient) Get(ctx context.Context, url *url.URL) (string, error)

type ParseResult

type ParseResult struct {
	URLs []string
}

type Parser

type Parser interface {
	Parse(contents string) (*ParseResult, error)
}

type Queue

type Queue[T any] interface {
	Put(item T)
	Get() T
	Out() <-chan T
	Close()
}

type SimpleParser

type SimpleParser struct{}

func NewParser

func NewParser() *SimpleParser

func (*SimpleParser) Parse

func (p *SimpleParser) Parse(contents string) (*ParseResult, error)

type UnboundedQueue

type UnboundedQueue[T any] struct {
	// contains filtered or unexported fields
}

func NewUnboundedQueue

func NewUnboundedQueue[T any]() *UnboundedQueue[T]

func (*UnboundedQueue[T]) Close

func (q *UnboundedQueue[T]) Close()

func (*UnboundedQueue[T]) Get

func (q *UnboundedQueue[T]) Get() T

func (*UnboundedQueue[T]) Out

func (q *UnboundedQueue[T]) Out() <-chan T

func (*UnboundedQueue[T]) Put

func (q *UnboundedQueue[T]) Put(item T)

type WorkerPool

type WorkerPool struct {
	// contains filtered or unexported fields
}

func NewWorkerPool

func NewWorkerPool(
	cfg *Config,
	queue Queue[string],
	parser Parser,
	contentRepository ContentRepository,
	client Client,
) *WorkerPool

func (*WorkerPool) Start

func (w *WorkerPool) Start(ctx context.Context) error

func (*WorkerPool) WaitOn

func (w *WorkerPool) WaitOn()

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL