Documentation ¶
Index ¶
- func ConstructFeed(doc *goquery.Document, u string, selectors Selectors, verbose bool) (feed *feeds.Feed, err error)
- func ConstructFeedFromURL(url *url.URL, selectors Selectors, verbose bool) (feed *feeds.Feed, err error)
- func Extract(link *url.URL, selectors Selectors, outputType string, verbose bool, ...)
- func Save(filepath, regex string, selectors Selectors)
- type ByCreated
- type ByTitle
- type Conf
- type HTTPSettings
- type Selectors
- type SortEnum
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ConstructFeed ¶
func ConstructFeedFromURL ¶
Types ¶
type HTTPSettings ¶
type HTTPSettings struct { Cookie map[string]string `optional short:"b" help:"sets the cookie of all outgoing http requests"` Header map[string]string `optional short:"H" help:"sets the headers of all outgoing http requests"` UserAgent string `` /* 196-byte string literal not displayed */ Insecure bool `` /* 222-byte string literal not displayed */ }
var GlobalHTTPSettings *HTTPSettings
func (*HTTPSettings) Client ¶
func (s *HTTPSettings) Client() *http.Client
type Selectors ¶
type Selectors struct { HTTPSettings HTTPSettings `yaml: "httpsettings" embed` Feed struct { Title string `required name:"feed-title" help:"css selector for the feed title"` Description string `optional name:"feed-description" help:"css selector for the feed description"` AuthorName string `optional help:"css selector for the feed author name"` AuthorEmail string `optional help:"css selector for the feed author email"` } `yaml:"feed" embed` Item struct { Container string `required name:"item-container" help:"css selector for the item container"` Title string `required name:"item-title" help:"css selector for the item title"` Link string `required name:"item-link" help:"css selector for the item link"` LinkAttr string `default:"href" name:"item-link-attr" help:"get attribute value of the item link element"` Created string `required name:"item-created" help:"css selector for the item created time"` CreatedFormat string `required name:"item-created-format" help:"css selector for the item created time format"` Description string `name:"item-description" help:"css selector for the item description"` Image string `name:"item-image" help:"css selector for the item image"` ImageAttr string `name:"item-image-attr" default:"src" help:"get attribute value of the item image element"` } `yaml:"item" embed` NextPage string `optional help:"css selector for the link to the next page to be scraped"` NextPageAttr string `optional default:"href" help:"get attribute value of the next page element"` NextPageCount int `optional help:"how deep to follow the next page link (integer value)"` Sort SortEnum `` /* 155-byte string literal not displayed */ }
Click to show internal directories.
Click to hide internal directories.