Versions in this module Expand all Collapse all v1 v1.0.59 Jan 10, 2024 v1.0.58 Jan 10, 2024 v1.0.57 Jan 10, 2024 Changes in this version + const B + const GB + const KB + const MB + func ClosingPipedTeeReader(r io.Reader, pw *io.PipeWriter) io.Reader + type Crawl struct + API bool + APIPort string + ActiveWorkers *ratecounter.Counter + BypassProxy []string + CDXDedupeServer string + CaptureAlternatePages bool + CertValidation bool + Client *warc.CustomHTTPClient + ClientProxied *warc.CustomHTTPClient + CookieFile string + CookieJar http.CookieJar + CrawledAssets *ratecounter.Counter + CrawledSeeds *ratecounter.Counter + DisableAssetsCapture bool + DisableLocalDedupe bool + DisabledHTMLTags []string + DomainsCrawl bool + ElasticSearchURL string + ExcludedHosts []string + ExcludedStrings []string + Finished *utils.TAtomBool + Frontier *frontier.Frontier + HQAddress string + HQBatchSize int + HQChannelsWg *sync.WaitGroup + HQClient *gocrawlhq.Client + HQContinuousPull bool + HQFinishedChannel chan *frontier.Item + HQKey string + HQProducerChannel chan *frontier.Item + HQProject string + HQSecret string + HQStrategy string + HTTPTimeout int + Headless bool + Job string + JobPath string + KeepCookies bool + LiveStats bool + Logger logrus.Logger + MaxConcurrentAssets int + MaxConcurrentRequestsPerDomain int + MaxHops uint8 + MaxRedirect int + MaxRetry int + Paused *utils.TAtomBool + Prometheus bool + PrometheusMetrics *PrometheusMetrics + Proxy string + RateLimitDelay int + SeedList []frontier.Item + Seencheck bool + StartTime time.Time + URIsPerSecond *ratecounter.RateCounter + UseHQ bool + UserAgent string + WARCDedupSize int + WARCFullOnDisk bool + WARCOperator string + WARCPoolSize int + WARCPrefix string + WARCTempDir string + WARCWriter chan *warc.RecordBatch + WARCWriterFinish chan bool + WorkerPool sizedwaitgroup.SizedWaitGroup + Workers int + func (c *Crawl) Capture(item *frontier.Item) + func (c *Crawl) HQConsumer() + func (c *Crawl) HQFinisher() + func (c *Crawl) HQProducer() + func (c *Crawl) HQSeencheckURL(URL *url.URL) (bool, error) + func (c *Crawl) HQSeencheckURLs(URLs []*url.URL) (seencheckedBatch []*url.URL, err error) + func (c *Crawl) HQWebsocket() + func (c *Crawl) Start() (err error) + func (c *Crawl) Worker() + type PrometheusMetrics struct + DownloadedURI prometheus.Counter + Prefix string