Documentation ¶
Index ¶
Constants ¶
View Source
const ( // ErrUnexpectedNodeType - found unexpected node type ErrUnexpectedNodeType = "Unexpected node type" // ErrUnexpectedTag - found unexpected tag ErrUnexpectedTag = "Unexpected tag" // ErrEncodingNotFound - not found encoding for content ErrEncodingNotFound = "Not found encoding for content" // ErrBodyNotHTML - not found HTML in body ErrBodyNotHTML = "Body not HTML" // ErrHTMLParse - HTML parse error ErrHTMLParse = "HTML parse error" // ErrReadGZipResponse - Can't read response body as gzip archive ErrReadGZipResponse = "Read response body as a gzip archive error" // ErrReadResponse - Can't read response body archive ErrReadResponse = "Read response body error" // ErrUnknownContentEncoding - Unknown http header Content-Encoding ErrUnknownContentEncoding = "Unknown http header \"Content-Encoding\"" // ErrStatusCode - Wrong status code ErrStatusCode = "Wrong status code" // ErrNotFountContentType - Not found Content-Type in headers ErrNotFountContentType = "Not found Content-Type in headers" // ErrParseContentType - Error parse Content-Type header ErrParseContentType = "Parse Content-Type header" // ErrRenderHTML - Error Render HTML ErrRenderHTML = "Render HTML" // ErrParseBaseURL - Parse base URL ErrParseBaseURL = "Parse base URL" // ErrResolveBaseURL - Resolve base URL by host name ErrResolveBaseURL = "Resolve base URL by hostname: StatusCode != 200" // ErrGetRequest - Error Get request ErrGetRequest = "Get request" // ErrReadResponseBody - Error read response body ErrReadResponseBody = "Read response body" // ErrCloseResponseBody - Close response body ErrCloseResponseBody = "Close response body" // ErrCreateRobotsTxtFromDb - Error create robots.txt from db data ErrCreateRobotsTxtFromDb = "Create robots.txt from db data" // ErrCreateRobotsTxtFromURL - Error create robots.txt from url ErrCreateRobotsTxtFromURL = "Create robots.txt from url" // WarnPageNotIndexed - Page not indexed WarnPageNotIndexed = "Page not indexed (meta tag noindex)" // InfoUnsupportedMimeFormat - Unsupported mime format InfoUnsupportedMimeFormat = "Unsupported mime format" // DbgRequestDuration - Request duration DbgRequestDuration = "Request duration" // DbgBodyProcessingDuration - Body processing duration DbgBodyProcessingDuration = "Body processing duration" // DbgBodySize - Body size after processing DbgBodySize = "Body size" )
Variables ¶
This section is empty.
Functions ¶
func NormalizeHostName ¶
NormalizeHostName - normalize host name
Types ¶
type HTMLMetadata ¶
type HTMLMetadata struct { // [URL]hostID URLs map[string]sql.NullInt64 MetaTagIndex bool // contains filtered or unexported fields }
HTMLMetadata extracted meta data from HTML
func NewHTMLMetadata ¶
func NewHTMLMetadata(hostMng *hostsManager, urlStr string) (*HTMLMetadata, error)
NewHTMLMetadata - create new HTMLMetadata struct
func RunDataExtrator ¶
func RunDataExtrator(hostMng *hostsManager, node *html.Node, urlStr string) (*HTMLMetadata, error)
RunDataExtrator - extart URLs and other meta data from page
func (*HTMLMetadata) AddURL ¶
func (h *HTMLMetadata) AddURL(link string)
AddURL - add not parsed URL
func (*HTMLMetadata) SetTitle ¶
func (h *HTMLMetadata) SetTitle(title string, rewrite bool)
SetTitle - set title
func (*HTMLMetadata) WrongURLsToLog ¶
func (h *HTMLMetadata) WrongURLsToLog(logger zap.Logger)
WrongURLsToLog - write to log add wrong URLs
Click to show internal directories.
Click to hide internal directories.