website

package

v0.9.0-beta Latest Latest Go to latest Published: Jan 2, 2024 License: MIT Imports: 15 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/instill-ai/connector

Documentation ¶

Index ¶

func Init(logger *zap.Logger) base.IConnector
type Connector
- func (c *Connector) CreateExecution(defUID uuid.UUID, task string, config *structpb.Struct, logger *zap.Logger) (base.IExecution, error)
- func (c *Connector) Test(defUid uuid.UUID, config *structpb.Struct, logger *zap.Logger) (pipelinePB.Connector_State, error)
type Execution
- func (e *Execution) Execute(inputs []*structpb.Struct) ([]*structpb.Struct, error)
type PageInfo
type ScrapeWebsiteInput
type ScrapeWebsiteOutput
- func Scrape(input ScrapeWebsiteInput) (ScrapeWebsiteOutput, error)

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func Init ¶

func Init(logger *zap.Logger) base.IConnector

Types ¶

type Connector ¶

type Connector struct {
	base.Connector
}

func (*Connector) CreateExecution ¶

func (c *Connector) CreateExecution(defUID uuid.UUID, task string, config *structpb.Struct, logger *zap.Logger) (base.IExecution, error)

func (*Connector) Test ¶

func (c *Connector) Test(defUid uuid.UUID, config *structpb.Struct, logger *zap.Logger) (pipelinePB.Connector_State, error)

type Execution ¶

type Execution struct {
	base.Execution
}

func (*Execution) Execute ¶

func (e *Execution) Execute(inputs []*structpb.Struct) ([]*structpb.Struct, error)

type PageInfo ¶

type PageInfo struct {
	Link     string `json:"link"`
	Title    string `json:"title"`
	LinkText string `json:"link_text"`
	LinkHtml string `json:"link_html"`
}

type ScrapeWebsiteInput ¶

type ScrapeWebsiteInput struct {
	// TargetURL: The URL of the website to scrape.
	TargetURL string `json:"target_url"`
	// AllowedDomains: The list of allowed domains to scrape.
	AllowedDomains []string `json:"allowed_domains"`
	// MaxK: The maximum number of pages to scrape.
	MaxK int `json:"max_k"`
	// IncludeLinkText: Whether to include the scraped text of the scraped web page.
	IncludeLinkText *bool `json:"include_link_text"`
	// IncludeLinkHtml: Whether to include the scraped HTML of the scraped web page.
	IncludeLinkHtml *bool `json:"include_link_html"`
}

ScrapeWebsiteInput defines the input of the scrape website task

type ScrapeWebsiteOutput ¶

type ScrapeWebsiteOutput struct {
	// Pages: The list of pages that were scraped.
	Pages []PageInfo `json:"pages"`
}

ScrapeWebsiteOutput defines the output of the scrape website task

func Scrape ¶

func Scrape(input ScrapeWebsiteInput) (ScrapeWebsiteOutput, error)

Scrape crawls a webpage and returns a slice of PageInfo

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL