Documentation ¶
Index ¶
- Constants
- Variables
- func ForceRegister(hostName string, parser LinkParser) error
- func HasOverridden(host string) (overridden bool)
- func NewHTTPClient(u *url.URL) (client http.Client, request http.Request)
- func Register(hostName string, parser LinkParser) (regErr error)
- func SetRequestLanguage(lang string) error
- func SetUserAgent(agent string) error
- func Unregister(hostName string)
- type Link
- func GetLink(urlPath string) (link *Link, parseErr error)
- func Primitive(doc *goquery.Document, url *url.URL, fullURL string) (link *Link, parserErr error)
- func Reddit(doc *goquery.Document, url *url.URL, fullURL string) (link *Link, parserErr error)
- func Twitch(_doc *goquery.Document, url *url.URL, fullURL string) (link *Link, parserErr error)
- func Youtube(doc *goquery.Document, url *url.URL, fullURL string) (link *Link, parserErr error)
- type LinkParser
- type TwitchGqlBroadcastSettings
- type TwitchGqlChannelResponse
- type TwitchGqlClipData
- type TwitchGqlClipResponse
- type TwitchGqlClipRoot
- type TwitchGqlData
- type TwitchGqlGame
- type TwitchGqlRoles
- type TwitchGqlStream
- type TwitchGqlUser
Constants ¶
const ( // HostAlreadyRegistered is an error message for when host already has registered parser HostAlreadyRegistered = "Host already has a registered parser" // NoResponse is an error message for when we fail to get a response from a page. This may occur for timeouts. NoResponse = "No response from client to page" // PageContentNotValid is an error message for when the page requested is not HTML PageContentNotValid = "Page content provided is not valid HTML" // PageNotAccessible is an error message for when we get a non-200 status from a page PageNotAccessible = "Page not accessible" )
Variables ¶
var ChannelRequestJSON string
var ClipRequestJSON string
var HasOverriddenInternals map[string]bool
HasOverriddenInternals is a map of our internal parsers and if they have been overridden
var HostToParsers map[string]LinkParser
HostToParsers is our map of hostnames to custom parsers
var MetaImageNames []string
MetaImageNames is an array of meta names commonly associated with site images
var RequestLanguage string
RequestLanguage is the desired language to request a page with. Defaults to en-US / en
var UserAgent string
UserAgent is the desired User Agent to report to a page via request. Defaults to Sauron Bot $VERSION (e.g. Sauron Bot 0.1)
var YoutubeQueriesToExtras map[string]string
YoutubeQueriesToExtras is query info to extra metadata
Functions ¶
func ForceRegister ¶
func ForceRegister(hostName string, parser LinkParser) error
ForceRegister will force register a LinkParser against the provided hostname This is identical to calling Unregister then Register.
func HasOverridden ¶
HasOverridden will check if our internal parsers have been overridden
func NewHTTPClient ¶
NewHTTPClient will create a new request-specific client, with our defined user agent, for the purposes of page fetching. If successful, it will return both the client and the request for use
func Register ¶
func Register(hostName string, parser LinkParser) (regErr error)
Register will attempt to register the provided parser for a specific hostname Hostname can be an exact match, such as "google.com" or regex. Attempting to register when a LinkParser is already associated will return an error.
func SetRequestLanguage ¶
SetRequestLanguage will set the Accept-Language header for page requests This does not necessarily mean the page supports the language or will return with that language
func SetUserAgent ¶
func Unregister ¶
func Unregister(hostName string)
Unregister will unregister a LinkParser with the specified hostname
Types ¶
type Link ¶
type Link struct {
Description, Favicon, Host, Image, Title, URI string
// Extras is our extra metadata.
// This may be used by internal and external parsers to communicate additional information about the URL in question
Extras map[string]string
}
Link is our structured information about a URL provided to Sauron's Parser
func Primitive ¶
Primitive is our primitive parser This parser will get standard page information from the most commonly supported DOM Elements
func Reddit ¶
Reddit is our internal Reddit parser This parser will get page information as well as Reddit post information such as dislikes, likes, and overall score
type LinkParser ¶
LinkParser is a function which takes in a parsed document, URL struct and a string, and returns a pointer to a Link or an error
type TwitchGqlBroadcastSettings ¶
type TwitchGqlBroadcastSettings struct { ID string `json:"id"` Language string `json:"language"` Game TwitchGqlGame `json:"game,omitempty"` Title string `json:"title"` }
TwitchGqlBroadcastSettings is various broadcast settings
type TwitchGqlChannelResponse ¶
type TwitchGqlChannelResponse struct {
Data TwitchGqlData `json:"data"`
}
TwitchGqlChannelResponse is some of the possible GQL response we get from the Twitch endpoint if it is a channel
type TwitchGqlClipData ¶
type TwitchGqlClipData struct { Broadcaster TwitchGqlUser `json:"broadcaster"` Game TwitchGqlGame `json:"game,omitempty"` Slug string `json:"slug"` Title string `json:"title"` }
type TwitchGqlClipResponse ¶
type TwitchGqlClipResponse struct {
Data TwitchGqlClipRoot `json:"data"`
}
type TwitchGqlClipRoot ¶
type TwitchGqlClipRoot struct {
Clip TwitchGqlClipData `json:"clip"`
}
type TwitchGqlData ¶
type TwitchGqlData struct { CurrentUser string `json:"currentUser,omitempty"` Stream TwitchGqlStream `json:"stream,omitempty"` User TwitchGqlUser `json:"user"` }
TwitchGqlData is some of the possible GQL data
type TwitchGqlGame ¶
type TwitchGqlGame struct { ID string `json:"id"` BoxArtURL string `json:"boxArtURL"` DisplayName string `json:"displayName"` Name string `json:"name"` TypeName string `json:"__typename"` }
TwitchGqlGame is various game related settings
type TwitchGqlRoles ¶
type TwitchGqlStream ¶
type TwitchGqlStream struct {
Type string `json:"type,omitempty"`
}
type TwitchGqlUser ¶
type TwitchGqlUser struct { ID string `json:"id"` BroadcastSettings TwitchGqlBroadcastSettings `json:"broadcastSettings"` DisplayName string `json:"displayName"` Login string `json:"login"` ProfileImageURL string `json:"profileImageURL"` MediumProfileImageURL string `json:"medProfileImageUrl"` Roles TwitchGqlRoles `json:"roles"` }
TwitchGqlResponseUser is various user data from GQL