website

package
v0.9.0-beta Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 2, 2024 License: MIT Imports: 15 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func Init

func Init(logger *zap.Logger) base.IConnector

Types

type Connector

type Connector struct {
	base.Connector
}

func (*Connector) CreateExecution

func (c *Connector) CreateExecution(defUID uuid.UUID, task string, config *structpb.Struct, logger *zap.Logger) (base.IExecution, error)

func (*Connector) Test

func (c *Connector) Test(defUid uuid.UUID, config *structpb.Struct, logger *zap.Logger) (pipelinePB.Connector_State, error)

type Execution

type Execution struct {
	base.Execution
}

func (*Execution) Execute

func (e *Execution) Execute(inputs []*structpb.Struct) ([]*structpb.Struct, error)

type PageInfo

type PageInfo struct {
	Link     string `json:"link"`
	Title    string `json:"title"`
	LinkText string `json:"link_text"`
	LinkHtml string `json:"link_html"`
}

type ScrapeWebsiteInput

type ScrapeWebsiteInput struct {
	// TargetURL: The URL of the website to scrape.
	TargetURL string `json:"target_url"`
	// AllowedDomains: The list of allowed domains to scrape.
	AllowedDomains []string `json:"allowed_domains"`
	// MaxK: The maximum number of pages to scrape.
	MaxK int `json:"max_k"`
	// IncludeLinkText: Whether to include the scraped text of the scraped web page.
	IncludeLinkText *bool `json:"include_link_text"`
	// IncludeLinkHtml: Whether to include the scraped HTML of the scraped web page.
	IncludeLinkHtml *bool `json:"include_link_html"`
}

ScrapeWebsiteInput defines the input of the scrape website task

type ScrapeWebsiteOutput

type ScrapeWebsiteOutput struct {
	// Pages: The list of pages that were scraped.
	Pages []PageInfo `json:"pages"`
}

ScrapeWebsiteOutput defines the output of the scrape website task

func Scrape

Scrape crawls a webpage and returns a slice of PageInfo

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL