Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var ErrInputChannelClosed = errors.New("input channel closed")
Functions ¶
This section is empty.
Types ¶
type FetchDatasource ¶
type FetchResult ¶
type Interceptor ¶
type Interceptor struct {
// contains filtered or unexported fields
}
func NewInterceptor ¶
func NewInterceptor() *Interceptor
func (Interceptor) CreateObserver ¶
func (Interceptor) Dispose ¶
func (i Interceptor) Dispose()
func (Interceptor) HandleResponse ¶
func (i Interceptor) HandleResponse(htmlStr string, target *url.URL)
type InterceptorCallback ¶
type ParallelFetch ¶
type ParallelFetch struct {
// contains filtered or unexported fields
}
func NewParallelFetch ¶
func NewParallelFetch(factory datatypes.Factory[FetchDatasource]) *ParallelFetch
func (*ParallelFetch) Fetch ¶
func (pf *ParallelFetch) Fetch(urls ...string)
func (*ParallelFetch) Responses ¶
func (pf *ParallelFetch) Responses() iter.Seq2[string, FetchResult]
Responses is an iterator over the elements received on output channel.
func (*ParallelFetch) Start ¶
func (pf *ParallelFetch) Start(ctx context.Context, numWorkers int) error
func (*ParallelFetch) Stop ¶
func (pf *ParallelFetch) Stop()
type WebCrawler ¶
type WebCrawler struct {
// contains filtered or unexported fields
}
func NewWebCrawler ¶
func NewWebCrawler(baseURL, userAgent string) *WebCrawler
func (WebCrawler) Crawl ¶
func (wc WebCrawler) Crawl() error
func (WebCrawler) LoadRobotsTXT ¶
func (wc WebCrawler) LoadRobotsTXT() (*robotstxt.RobotsData, error)
Click to show internal directories.
Click to hide internal directories.