Documentation ¶
Index ¶
Constants ¶
View Source
const BINARY_EDGE_API = "https://api.binaryedge.io/v2/query/domains/subdomain"
View Source
const CHROME_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
View Source
const CRAWLER_NAME = "CRAWLER"
View Source
const MODULE_NAME = "ARCHIVE.ORG"
View Source
const SERP_API_URL = "https://serpapi.com/search.json"
Variables ¶
This section is empty.
Functions ¶
func GenerateFiletypeQuery ¶
Returns query string for searching for filetypes given a URL
Types ¶
type BinaryEdgeClient ¶
type BinaryEdgeClient struct {
// contains filtered or unexported fields
}
func NewBinaryEdgeClient ¶
func NewBinaryEdgeClient(apiKey string) BinaryEdgeClient
func (*BinaryEdgeClient) QuerySubdomains ¶
func (client *BinaryEdgeClient) QuerySubdomains(targetUrl url.URL) (BinaryEdgeSubdomains, error)
Queries for subdomains using the BinaryEdge API: https://docs.binaryedge.io/api-v2/#domains
type BinaryEdgeSubdomains ¶
type BinaryEdgeSubdomains struct {
Subdomains []string `json:"events"`
}
type GoogleResults ¶
type GoogleResults struct {
OrganicResults []OrganicResult `json:"organic_results"`
}
type LatestUpload ¶
type LatestUpload struct { Response struct { Docs []struct { Identifier string `json:"identifier"` } `json:"docs"` } `json:"response"` }
Represents the urlteam's latest upload to archive.org
type OrganicResult ¶
type SerpClient ¶
type SerpClient struct {
// contains filtered or unexported fields
}
func NewSerpClient ¶
func NewSerpClient(apiKey string) (SerpClient, error)
func (*SerpClient) SearchGoogle ¶
func (serp *SerpClient) SearchGoogle(queryStr string) ([]url.URL, []error)
type ShortenedUrlFinder ¶
type ShortenedUrlFinder struct { DeletePostDownload bool TargetHost string ZipFilePath string DestinationPath string Comms shared.CommsChannels }
Responsible for storing data necessary for downloading, unzipping, and reading from the archive.org file
func NewShortenedUrlFinder ¶
func NewShortenedUrlFinder(host string, comms shared.CommsChannels) ShortenedUrlFinder
func (*ShortenedUrlFinder) DecompressXZ ¶
func (su *ShortenedUrlFinder) DecompressXZ()
Decompresses .xz.txt files downloaded from archive.org TODO: run the command and capture output in real-time to output via comms channels
func (*ShortenedUrlFinder) DownloadShortenedURLs ¶
func (su *ShortenedUrlFinder) DownloadShortenedURLs() error
Downloads shortened URL data to local directory
func (*ShortenedUrlFinder) UnzipAllDownloads ¶
func (su *ShortenedUrlFinder) UnzipAllDownloads() error
Unzips the downloaded file and the zipped inner files
Click to show internal directories.
Click to hide internal directories.