crawler

package
v1.4.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 26, 2023 License: MIT Imports: 9 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// DefaultBaseURL is the default base URL for the Algolia Crawler API.
	DefaultBaseURL = "https://crawler.algolia.com/api/1/"
)

Variables

This section is empty.

Functions

This section is empty.

Types

type Action

type Action struct {
	IndexName        string          `json:"indexName"`
	PathsToMatch     []string        `json:"pathsToMatch"`
	SelectorsToMatch []string        `json:"selectorsToMatch,omitempty"`
	FileTypesToMatch []string        `json:"fileTypesToMatch,omitempty"`
	RecordExtractor  RecordExtractor `json:"recordExtractor"`
}

Action is a Crawler configuration action.

type Client

type Client struct {
	UserID string
	APIKey string
	// contains filtered or unexported fields
}

Client provides methods to interact with the Algolia Crawler API.

func NewClient

func NewClient(userID, apiKey string) *Client

NewClient returns a new Crawler API client.

func NewClientWithHTTPClient

func NewClientWithHTTPClient(userID, apiKey string, client *http.Client) *Client

NewClientWithHTTPClient returns a new Crawler API client with a custom HTTP client.

func (*Client) CancelTask

func (c *Client) CancelTask(crawlerID, taskID string) error

CancelTask cancels a blocking task.

func (*Client) CrawlURLs

func (c *Client) CrawlURLs(crawlerID string, URLs []string, save, saveSpecified bool) (string, error)

CrawlURLs crawls the specified URLs on the specified Crawler. It returns the Task ID if successful.

func (*Client) Create

func (c *Client) Create(name string, config Config) (string, error)

Create creates a new Crawler. It returns the Crawler ID if successful.

func (*Client) Get

func (c *Client) Get(crawlerID string, withConfig bool) (*Crawler, error)

Get gets a Crawler.

func (*Client) List

func (c *Client) List(itemsPerPage, page int, name, appID string) (*CrawlersResponse, error)

List lists Crawlers.

func (*Client) ListAll

func (c *Client) ListAll(name, appID string) ([]*CrawlerListItem, error)

ListAll lists all Crawlers

func (*Client) Pause

func (c *Client) Pause(crawlerID string) (string, error)

Pause pauses a Crawler. It returns the Task ID if successful.

func (*Client) Reindex

func (c *Client) Reindex(crawlerID string) (string, error)

Reindex reindexes a Crawler. It returns the Task ID if successful.

func (*Client) Run

func (c *Client) Run(crawlerID string) (string, error)

Run runs a Crawler. It returns the Task ID if successful.

func (*Client) Stats

func (c *Client) Stats(crawlerID string) (*StatsResponse, error)

Stats gets the stats of a Crawler.

func (*Client) Test

func (c *Client) Test(crawlerID, URL string, config *Config) (*TestResponse, error)

Test tests an URL on the specified Crawler.

type Config

type Config struct {
	AppID       string   `json:"appId"`
	APIKey      string   `json:"apiKey"`
	IndexPrefix string   `json:"indexPrefix"`
	Schedule    string   `json:"schedule"`
	StartUrls   []string `json:"startUrls"`
	Sitemaps    []string `json:"sitemaps"`

	ExclusionPatterns []string `json:"exclusionPatterns,omitempty"`
	IgnoreQueryParams []string `json:"ignoreQueryParams,omitempty"`
	RenderJavaScript  bool     `json:"renderJavaScript"`
	RateLimit         int      `json:"rateLimit"`
	ExtraUrls         []string `json:"extraUrls,omitempty"`
	MaxDepth          int      `json:"maxDepth"`
	MaxURLs           int      `json:"maxUrls"`

	IgnoreRobotsTxtRules bool `json:"ignoreRobotsTxtRules"`
	IgnoreNoIndex        bool `json:"ignoreNoIndex"`
	IgnoreNoFollowTo     bool `json:"ignoreNoFollowTo"`
	IgnoreCanonicalTo    bool `json:"ignoreCanonicalTo"`

	SaveBackup           bool                        `json:"saveBackup"`
	InitialIndexSettings map[string]*search.Settings `json:"initialIndexSettings"`

	Actions []*Action `json:"actions"`
}

Config is a Crawler configuration.

type Crawler

type Crawler struct {
	ID         string `json:"id,omitempty"`
	Name       string `json:"name"`
	Running    bool   `json:"running,omitempty"`
	Reindexing bool   `json:"reindexing,omitempty"`
	Blocked    bool   `json:"blocked,omitempty"`

	BlockingTaskID string `json:"blockingTaskId,omitempty"`
	BlockingError  string `json:"blockingError,omitempty"`

	CreatedAt time.Time `json:"createdAt,omitempty"`
	UpdatedAt time.Time `json:"updatedAt,omitempty"`

	LastReindexStartedAt time.Time `json:"lastReindexStartedAt,omitempty"`
	LastReindexEndedAt   time.Time `json:"lastReindexEndedAt,omitempty"`

	Config *Config `json:"config,omitempty"`
}

Crawler is a Crawler.

type CrawlerListItem

type CrawlerListItem struct {
	Name string `json:"name"`
	ID   string `json:"id"`
}

CrawlerListItem is a crawler list item.

type CrawlersResponse

type CrawlersResponse struct {
	Items []*CrawlerListItem `json:"items"`

	// Pagination
	Page         int `json:"page"`
	ItemsPerPage int `json:"itemsPerPage"`
	Total        int `json:"total"`
}

CrawlersResponse is the response from the crawler crawlers endpoint.

type Err

type Err struct {
	Message string         `json:"message"`
	Code    string         `json:"code"`
	Errors  []LabeledError `json:"errors,omitempty"`
}

Err is a Crawler API error.

type ErrResponse

type ErrResponse struct {
	Err Err `json:"error"`
}

ErrResponse is a Crawler API error response.

type LabeledError

type LabeledError struct {
	Type    string `json:"type"`
	Message string `json:"message"`
	Label   string `json:"label"`
}

LabeledError is a Crawler API labeled error.

type RecordExtractor

type RecordExtractor struct {
	Type   string `json:"__type"`
	Source string `json:"source"`
}

RecordExtractor is a Crawler configuration record extractor.

type StatsResponse

type StatsResponse struct {
	Count int `json:"count"`
	Data  []struct {
		Reason   string `json:"reason"`
		Status   string `json:"status"`
		Category string `json:"category"`
		Readable string `json:"readable"`
		Count    int    `json:"count"`
	} `json:"data"`
}

StatsResponse is the response from the crawler crawlers/{id}/stats/urls endpoint.

type TaskIDResponse

type TaskIDResponse struct {
	TaskID string `json:"taskId"`
}

TaskIDResponse is the response when a task is created.

type TestResponse

type TestResponse struct {
	StartDate    time.Time   `json:"startDate"`
	EndDate      time.Time   `json:"endDate"`
	Logs         interface{} `json:"logs,omitempty"`
	Records      interface{} `json:"records,omitempty"`
	Links        []string    `json:"links,omitempty"`
	ExternalData interface{} `json:"externalData,omitempty"`
	Error        *Err        `json:"error,omitempty"`
}

TestResponse is the response from the crawler crawlers/{id}/test endpoint.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL