crawler

package

v1.6.11 Latest Latest Go to latest Published: May 28, 2024 License: MIT Imports: 9 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/algolia/cli

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
type Action
type Client
- func NewClient(userID, apiKey string) *Client
- func NewClientWithHTTPClient(userID, apiKey string, client *http.Client) *Client
type Config
type Crawler
type CrawlerListItem
type CrawlersResponse
type Err
type ErrResponse
type LabeledError
type RecordExtractor
type StatsResponse
type TaskIDResponse
type TestResponse

Constants ¶

View Source

const (
	// DefaultBaseURL is the default base URL for the Algolia Crawler API.
	DefaultBaseURL = "https://crawler.algolia.com/api/1/"
)

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type Action ¶

type Action struct {
	IndexName        string          `json:"indexName"`
	PathsToMatch     []string        `json:"pathsToMatch"`
	SelectorsToMatch []string        `json:"selectorsToMatch,omitempty"`
	FileTypesToMatch []string        `json:"fileTypesToMatch,omitempty"`
	RecordExtractor  RecordExtractor `json:"recordExtractor"`
}

Action is a Crawler configuration action.

type Client ¶

type Client struct {
	UserID string
	APIKey string
	// contains filtered or unexported fields
}

Client provides methods to interact with the Algolia Crawler API.

func NewClient ¶

func NewClient(userID, apiKey string) *Client

NewClient returns a new Crawler API client.

func NewClientWithHTTPClient ¶

func NewClientWithHTTPClient(userID, apiKey string, client *http.Client) *Client

NewClientWithHTTPClient returns a new Crawler API client with a custom HTTP client.

func (*Client) CancelTask ¶

func (c *Client) CancelTask(crawlerID, taskID string) error

CancelTask cancels a blocking task.

func (*Client) CrawlURLs ¶

func (c *Client) CrawlURLs(crawlerID string, URLs []string, save, saveSpecified bool) (string, error)

CrawlURLs crawls the specified URLs on the specified Crawler. It returns the Task ID if successful.

func (*Client) Create ¶

func (c *Client) Create(name string, config Config) (string, error)

Create creates a new Crawler. It returns the Crawler ID if successful.

func (*Client) Get ¶

func (c *Client) Get(crawlerID string, withConfig bool) (*Crawler, error)

Get gets a Crawler.

func (*Client) List ¶

func (c *Client) List(itemsPerPage, page int, name, appID string) (*CrawlersResponse, error)

List lists Crawlers.

func (*Client) ListAll ¶

func (c *Client) ListAll(name, appID string) ([]*CrawlerListItem, error)

ListAll lists all Crawlers

func (*Client) Pause ¶

func (c *Client) Pause(crawlerID string) (string, error)

Pause pauses a Crawler. It returns the Task ID if successful.

func (*Client) Reindex ¶

func (c *Client) Reindex(crawlerID string) (string, error)

Reindex reindexes a Crawler. It returns the Task ID if successful.

func (*Client) Run ¶

func (c *Client) Run(crawlerID string) (string, error)

Run runs a Crawler. It returns the Task ID if successful.

func (*Client) Stats ¶

func (c *Client) Stats(crawlerID string) (*StatsResponse, error)

Stats gets the stats of a Crawler.

func (*Client) Test ¶

func (c *Client) Test(crawlerID, URL string, config *Config) (*TestResponse, error)

Test tests an URL on the specified Crawler.

type Config ¶

type Config struct {
	AppID       string   `json:"appId,omitempty"`
	APIKey      string   `json:"apiKey,omitempty"`
	IndexPrefix string   `json:"indexPrefix,omitempty"`
	Schedule    string   `json:"schedule,omitempty"`
	StartUrls   []string `json:"startUrls,omitempty"`
	Sitemaps    []string `json:"sitemaps,omitempty"`

	ExclusionPatterns []string `json:"exclusionPatterns,omitempty"`
	IgnoreQueryParams []string `json:"ignoreQueryParams,omitempty"`
	RenderJavaScript  bool     `json:"renderJavaScript,omitempty"`
	RateLimit         int      `json:"rateLimit,omitempty"`
	ExtraUrls         []string `json:"extraUrls,omitempty"`
	MaxDepth          int      `json:"maxDepth,omitempty"`
	MaxURLs           int      `json:"maxUrls,omitempty"`

	IgnoreRobotsTxtRules bool `json:"ignoreRobotsTxtRules,omitempty"`
	IgnoreNoIndex        bool `json:"ignoreNoIndex,omitempty"`
	IgnoreNoFollowTo     bool `json:"ignoreNoFollowTo,omitempty"`
	IgnoreCanonicalTo    bool `json:"ignoreCanonicalTo,omitempty"`

	SaveBackup           bool                        `json:"saveBackup,omitempty"`
	InitialIndexSettings map[string]*search.Settings `json:"initialIndexSettings,omitempty"`

	Actions []*Action `json:"actions,omitempty"`
}

Config is a Crawler configuration.

type Crawler ¶

type Crawler struct {
	ID         string `json:"id,omitempty"`
	Name       string `json:"name"`
	Running    bool   `json:"running,omitempty"`
	Reindexing bool   `json:"reindexing,omitempty"`
	Blocked    bool   `json:"blocked,omitempty"`

	BlockingTaskID string `json:"blockingTaskId,omitempty"`
	BlockingError  string `json:"blockingError,omitempty"`

	CreatedAt time.Time `json:"createdAt,omitempty"`
	UpdatedAt time.Time `json:"updatedAt,omitempty"`

	LastReindexStartedAt time.Time `json:"lastReindexStartedAt,omitempty"`
	LastReindexEndedAt   time.Time `json:"lastReindexEndedAt,omitempty"`

	Config *Config `json:"config,omitempty"`
}

Crawler is a Crawler.

type CrawlerListItem ¶

type CrawlerListItem struct {
	Name string `json:"name"`
	ID   string `json:"id"`
}

CrawlerListItem is a crawler list item.

type CrawlersResponse ¶

type CrawlersResponse struct {
	Items []*CrawlerListItem `json:"items"`

	// Pagination
	Page         int `json:"page"`
	ItemsPerPage int `json:"itemsPerPage"`
	Total        int `json:"total"`
}

CrawlersResponse is the response from the crawler crawlers endpoint.

type Err ¶

type Err struct {
	Message string         `json:"message"`
	Code    string         `json:"code"`
	Errors  []LabeledError `json:"errors,omitempty"`
}

Err is a Crawler API error.

type ErrResponse ¶

type ErrResponse struct {
	Err Err `json:"error"`
}

ErrResponse is a Crawler API error response.

type LabeledError ¶

type LabeledError struct {
	Type    string `json:"type"`
	Message string `json:"message"`
	Label   string `json:"label"`
}

LabeledError is a Crawler API labeled error.

type RecordExtractor ¶

type RecordExtractor struct {
	Type   string `json:"__type"`
	Source string `json:"source"`
}

RecordExtractor is a Crawler configuration record extractor.

type StatsResponse ¶

type StatsResponse struct {
	Count int `json:"count"`
	Data  []struct {
		Reason   string `json:"reason"`
		Status   string `json:"status"`
		Category string `json:"category"`
		Readable string `json:"readable"`
		Count    int    `json:"count"`
	} `json:"data"`
}

StatsResponse is the response from the crawler crawlers/{id}/stats/urls endpoint.

type TaskIDResponse ¶

type TaskIDResponse struct {
	TaskID string `json:"taskId"`
}

TaskIDResponse is the response when a task is created.

type TestResponse ¶

type TestResponse struct {
	StartDate    time.Time   `json:"startDate"`
	EndDate      time.Time   `json:"endDate"`
	Logs         interface{} `json:"logs,omitempty"`
	Records      interface{} `json:"records,omitempty"`
	Links        []string    `json:"links,omitempty"`
	ExternalData interface{} `json:"externalData,omitempty"`
	Error        *Err        `json:"error,omitempty"`
}

TestResponse is the response from the crawler crawlers/{id}/test endpoint.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL