Documentation ¶
Index ¶
Constants ¶
View Source
const ( CanonicalURL skippable = "canonical_url" ContentText skippable = "content_text" OriginalURL skippable = "original_url" FetchTime skippable = "fetch_time" FetchMethod skippable = "fetch_method" TTL skippable = "ttl" )
Variables ¶
View Source
var ( ErrNoTTL = errors.New("TTL not set") DefaultTTL = 30 * 24 * time.Hour )
View Source
var ErrNoSuchFetchMethod = errors.New("no such FetchMethod")
Functions ¶
Types ¶
type FetchClient ¶ added in v0.8.3
type FetchClient int
const ( Unspecified FetchClient = iota DefaultClient HeadlessChromium )
func (FetchClient) MarshalText ¶ added in v0.8.3
func (f FetchClient) MarshalText() ([]byte, error)
func (FetchClient) String ¶ added in v0.8.3
func (f FetchClient) String() string
func (*FetchClient) UnmarshalText ¶ added in v0.8.3
func (f *FetchClient) UnmarshalText(data []byte) error
type WebPage ¶
type WebPage struct { RequestedURL *nurl.URL `json:"-"` // The page that was actually fetched CanonicalURL *nurl.URL `json:"-"` OriginalURL string `json:"original_url,omitempty"` // The canonical URL of the page TTL time.Duration `json:"-"` // Time to live for the resource FetchTime *time.Time `json:"fetch_time,omitempty"` // When the returned source was fetched FetchMethod FetchClient `json:"fetch_method,omitempty"` // Method used to fetch the page Hostname string `json:"hostname,omitempty"` // Hostname of the page StatusCode int `json:"status_code,omitempty"` // HTTP status code Error error `json:"error,omitempty"` Title string `json:"title,omitempty"` // Title of the page Description string `json:"description,omitempty"` // Description of the page Sitename string `json:"sitename,omitempty"` // Name of the site Authors []string `json:"authors,omitempty"` // Authors of the page Date *time.Time `json:"date,omitempty"` // Date of the page Categories []string `json:"categories,omitempty"` // Categories of the page Tags []string `json:"tags,omitempty"` // Tags of the page Language string `json:"language,omitempty"` // Language of the page Image string `json:"image,omitempty"` // Image of the page PageType string `json:"page_type,omitempty"` // Type of the page License string `json:"license,omitempty"` // License of the page ID string `json:"id,omitempty"` // ID of the page Fingerprint string `json:"fingerprint,omitempty"` // Fingerprint of the page ContentText string `json:"content_text,omitempty"` // Error that occurred during fetching // contains filtered or unexported fields }
Represents a web page that was fetched, including metadata from the page itself, text content, and information about the fetch operation.
func NewWebPage ¶ added in v0.8.0
func (*WebPage) ClearSkipWhenMarshaling ¶ added in v0.8.0
func (r *WebPage) ClearSkipWhenMarshaling()
func (WebPage) MarshalJSON ¶
func (*WebPage) SkipWhenMarshaling ¶ added in v0.8.0
func (r *WebPage) SkipWhenMarshaling(skip ...skippable)
func (*WebPage) UnmarshalJSON ¶ added in v0.7.0
Click to show internal directories.
Click to hide internal directories.