Documentation ¶
Index ¶
- Constants
- type Action
- type Client
- func (c *Client) CancelTask(crawlerID, taskID string) error
- func (c *Client) CrawlURLs(crawlerID string, URLs []string, save, saveSpecified bool) (string, error)
- func (c *Client) Create(name string, config Config) (string, error)
- func (c *Client) Get(crawlerID string, withConfig bool) (*Crawler, error)
- func (c *Client) List(itemsPerPage, page int, name, appID string) (*CrawlersResponse, error)
- func (c *Client) ListAll(name, appID string) ([]*CrawlerListItem, error)
- func (c *Client) Pause(crawlerID string) (string, error)
- func (c *Client) Reindex(crawlerID string) (string, error)
- func (c *Client) Run(crawlerID string) (string, error)
- func (c *Client) Stats(crawlerID string) (*StatsResponse, error)
- func (c *Client) Test(crawlerID, URL string, config *Config) (*TestResponse, error)
- type Config
- type Crawler
- type CrawlerListItem
- type CrawlersResponse
- type Err
- type ErrResponse
- type LabeledError
- type RecordExtractor
- type StatsResponse
- type TaskIDResponse
- type TestResponse
Constants ¶
const (
// DefaultBaseURL is the default base URL for the Algolia Crawler API.
DefaultBaseURL = "https://crawler.algolia.com/api/1/"
)
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Action ¶
type Action struct { IndexName string `json:"indexName"` PathsToMatch []string `json:"pathsToMatch"` SelectorsToMatch []string `json:"selectorsToMatch,omitempty"` FileTypesToMatch []string `json:"fileTypesToMatch,omitempty"` RecordExtractor RecordExtractor `json:"recordExtractor"` }
Action is a Crawler configuration action.
type Client ¶
Client provides methods to interact with the Algolia Crawler API.
func NewClientWithHTTPClient ¶
NewClientWithHTTPClient returns a new Crawler API client with a custom HTTP client.
func (*Client) CancelTask ¶
CancelTask cancels a blocking task.
func (*Client) CrawlURLs ¶
func (c *Client) CrawlURLs(crawlerID string, URLs []string, save, saveSpecified bool) (string, error)
CrawlURLs crawls the specified URLs on the specified Crawler. It returns the Task ID if successful.
func (*Client) List ¶
func (c *Client) List(itemsPerPage, page int, name, appID string) (*CrawlersResponse, error)
List lists Crawlers.
func (*Client) ListAll ¶
func (c *Client) ListAll(name, appID string) ([]*CrawlerListItem, error)
ListAll lists all Crawlers
type Config ¶
type Config struct { AppID string `json:"appId"` APIKey string `json:"apiKey"` IndexPrefix string `json:"indexPrefix"` Schedule string `json:"schedule"` StartUrls []string `json:"startUrls"` Sitemaps []string `json:"sitemaps"` ExclusionPatterns []string `json:"exclusionPatterns,omitempty"` IgnoreQueryParams []string `json:"ignoreQueryParams,omitempty"` RenderJavaScript bool `json:"renderJavaScript"` RateLimit int `json:"rateLimit"` ExtraUrls []string `json:"extraUrls,omitempty"` MaxDepth int `json:"maxDepth"` MaxURLs int `json:"maxUrls"` IgnoreRobotsTxtRules bool `json:"ignoreRobotsTxtRules"` IgnoreNoIndex bool `json:"ignoreNoIndex"` IgnoreNoFollowTo bool `json:"ignoreNoFollowTo"` IgnoreCanonicalTo bool `json:"ignoreCanonicalTo"` SaveBackup bool `json:"saveBackup"` InitialIndexSettings map[string]*search.Settings `json:"initialIndexSettings"` Actions []*Action `json:"actions"` }
Config is a Crawler configuration.
type Crawler ¶
type Crawler struct { ID string `json:"id,omitempty"` Name string `json:"name"` Running bool `json:"running,omitempty"` Reindexing bool `json:"reindexing,omitempty"` Blocked bool `json:"blocked,omitempty"` BlockingTaskID string `json:"blockingTaskId,omitempty"` BlockingError string `json:"blockingError,omitempty"` CreatedAt time.Time `json:"createdAt,omitempty"` UpdatedAt time.Time `json:"updatedAt,omitempty"` LastReindexStartedAt time.Time `json:"lastReindexStartedAt,omitempty"` LastReindexEndedAt time.Time `json:"lastReindexEndedAt,omitempty"` Config *Config `json:"config,omitempty"` }
Crawler is a Crawler.
type CrawlerListItem ¶
CrawlerListItem is a crawler list item.
type CrawlersResponse ¶
type CrawlersResponse struct { Items []*CrawlerListItem `json:"items"` // Pagination Page int `json:"page"` ItemsPerPage int `json:"itemsPerPage"` Total int `json:"total"` }
CrawlersResponse is the response from the crawler crawlers endpoint.
type Err ¶
type Err struct { Message string `json:"message"` Code string `json:"code"` Errors []LabeledError `json:"errors,omitempty"` }
Err is a Crawler API error.
type ErrResponse ¶
type ErrResponse struct {
Err Err `json:"error"`
}
ErrResponse is a Crawler API error response.
type LabeledError ¶
type LabeledError struct { Type string `json:"type"` Message string `json:"message"` Label string `json:"label"` }
LabeledError is a Crawler API labeled error.
type RecordExtractor ¶
RecordExtractor is a Crawler configuration record extractor.
type StatsResponse ¶
type StatsResponse struct { Count int `json:"count"` Data []struct { Reason string `json:"reason"` Status string `json:"status"` Category string `json:"category"` Readable string `json:"readable"` Count int `json:"count"` } `json:"data"` }
StatsResponse is the response from the crawler crawlers/{id}/stats/urls endpoint.
type TaskIDResponse ¶
type TaskIDResponse struct {
TaskID string `json:"taskId"`
}
TaskIDResponse is the response when a task is created.
type TestResponse ¶
type TestResponse struct { StartDate time.Time `json:"startDate"` EndDate time.Time `json:"endDate"` Logs interface{} `json:"logs,omitempty"` Records interface{} `json:"records,omitempty"` Links []string `json:"links,omitempty"` ExternalData interface{} `json:"externalData,omitempty"` Error *Err `json:"error,omitempty"` }
TestResponse is the response from the crawler crawlers/{id}/test endpoint.