Versions in this module Expand all Collapse all v1 v1.0.0 May 31, 2023 Changes in this version + const CSSUri + const HTMLTagA + const HTMLTagForm + const HTMLTagImg + const HTMLTagLinkStylesheet + const HTMLTagScript + const HTTP3xxLocation + func LongestCommonPrefix(path1 string, path2 string) string + func ReduceURL(base *neturl.URL, url *neturl.URL) string + type Crawler interface + AddRequestHeader func(string, string) + Download func(QueueItem) *Downloaded + Downloaded func() (*Downloaded, bool) + DownloadedNotBlocking func() *Downloaded + Enqueue func(QueueItem) + GetAutoDownloadDepth func() uint64 + GetClientTimeout func() time.Duration + GetDownloadedCount func() uint64 + GetEnqueuedCount func() uint64 + GetLinkFoundCount func() uint64 + GetNoCrossHost func() bool + GetRequestHeaderValues func(string) []string + GetWorkerCount func() uint64 + HasStarted func() bool + HasStopped func() bool + IsBusy func() bool + IsRunning func() bool + SetAutoDownloadDepth func(uint64) + SetNoCrossHost func(bool) + SetOnDownload func(func(*url.URL)) + SetOnDownloaded func(func(*Downloaded)) + SetOnURLShouldDownload func(func(*url.URL) bool) + SetOnURLShouldQueue func(func(*url.URL) bool) + SetRequestHeader func(string, string) + SetURLRewriter func(func(*url.URL)) + SetWorkerCount func(uint64) error + Start func() + Stop func() + func New(client *http.Client, logger *logrus.Logger) Crawler + type Downloaded struct + BaseURL *url.URL + Body string + Error error + Input *Input + LinksAssets map[string]Link + LinksDiscovered map[string]Link + StatusCode int + func Download(input *Input) *Downloaded + func (d *Downloaded) AddHeader(key string, value string) + func (d *Downloaded) GetAssetURLs() []*neturl.URL + func (d *Downloaded) GetDiscoveredURLs() []*neturl.URL + func (d *Downloaded) GetHeaderKeys() []string + func (d *Downloaded) GetHeaderValues(key string) []string + func (d *Downloaded) ProcessURL(context urlContext, url string) (string, error) + func (d *Downloaded) Reduce(url *neturl.URL) string + type Input struct + Client *http.Client + Header http.Header + NoCrossHost bool + Rewriter *func(*url.URL) + URL *url.URL + type Link struct + Context urlContext + URL *url.URL + type QueueItem struct + Depth uint64 + ForceDownload bool + URL *url.URL