api

package
v1.2.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 26, 2023 License: MIT Imports: 3 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type API

type API struct {
	// contains filtered or unexported fields
}

func New

func New(c controller.C) *API

func (*API) HandleListMethods

func (a *API) HandleListMethods(c *gin.Context)

HandleListMethods lists available and registered fetch methods

@Summary	list fetch methods
@Accept		json
@Produce	json
@Success	200	{array}	string
@Router		/methods [get]

func (*API) HandleScrape

func (a *API) HandleScrape(c *gin.Context)

HandleScrape handle scraping requests

@Summary		general purpose scraping endpoint
@Param			{object}	body	ScrapeRequest	true	"scrape request"
@Description	scrapes a website and returns an array of data found given the input config
@Accept			json
@Produce		json
@Success		200	{object}	ScrapeResponse
@Failure		400
@Failure		500
@Router			/scrape [post]

type ScrapeRequest

type ScrapeRequest struct {
	// Method is the name of the fetcher method found at /methods
	Method string `json:"method" binding:"required"`
	// URL is the starting URL (e.g. search result request, something else)
	URL string `json:"url" binding:"required"`

	// DataSelectors are a mapping of field name to css-style selectors or xpath expressions
	// For CSS Style selectors the value is equivalent to a jQuery $(<selector>).text() call
	// For XPath expressions it will be compiled and evaluated
	// XPath expressions must be prefixed with "xpath:"
	DataSelectors map[string]string `json:"dataSelectors"`

	// Multiple signifies this is an array of items
	Multiple bool `json:"multiple"`

	// ItemParentSelector would be the HTML node containing all the
	// data selectors. Data Selectors become relative to the parent
	ItemParentSelector string `json:"itemParentSelector"`

	// Required if pagination is desired
	NextPageSelector string `json:"nextPageSelector"`
	Limit            int    `json:"limit"`

	// visit item detail page and _then_ use data selectors
	VisitItemDetailPage bool   `json:"visitItemDetailPage"`
	ItemLinkSelector    string `json:"itemLinkSelector"`
}

type ScrapeResponse

type ScrapeResponse struct {
	Num    int                 `json:"num"`
	Data   []map[string]string `json:"data"`
	Errors []error             `json:"scrapeErrors"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL