collector

package module
v1.1.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 30, 2020 License: Apache-2.0 Imports: 12 Imported by: 0

README

collector

基于cdp的简易数据采集工具。

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ErrDifferentRuleGroup = errors.New("different rule group")
View Source
var ErrNoRuleMatched = errors.New("no rule matched")

Functions

This section is empty.

Types

type Field

type Field struct {
	Name   string `yaml:"name"`
	Alias  string `yaml:"alias"`
	Value  string `yaml:"value"`
	Eval   string `yaml:"eval"`
	Export bool   `yaml:"export"`
	Wait   string `yaml:"wait"`
	// contains filtered or unexported fields
}

type Handler

type Handler interface {
	// 只会回调一次,所有字段一次性返回
	OnFields(*Page, map[string]string)

	// 按设置的导出周期回调(如export_cycle=5表示5次循环回调1次),返回值表示是否继续循环
	OnLoop(*Page, int, []string) bool

	// 在OnFields和OnLoop完成后调用
	OnComplete(*Page)
}

type Loop

type Loop struct {
	Name        string   `yaml:"name"`
	Alias       string   `yaml:"alias"`
	ExportCycle int      `yaml:"export_cycle"`
	Prepare     *Prepare `yaml:"prepare"`
	Eval        string   `yaml:"eval"`
	Next        string   `yaml:"next"`
	Wait        string   `yaml:"wait"`
	// contains filtered or unexported fields
}

type Page

type Page struct {
	Url string

	// 在该规则分组下匹配规则
	Group string

	Rule *Rule
	// contains filtered or unexported fields
}

func NewPage

func NewPage(url, group string) *Page

func (*Page) Close

func (p *Page) Close()

func (*Page) Collect

func (p *Page) Collect(chrome *cdp.Chrome, rg *RuleGroup, h Handler) error

func (*Page) OnCdpEvent

func (p *Page) OnCdpEvent(msg *cdp.Message)

func (*Page) OnCdpResponse

func (p *Page) OnCdpResponse(msg *cdp.Message) bool

type Pattern

type Pattern struct {
	Content string
	// contains filtered or unexported fields
}

type Prepare

type Prepare struct {
	Eval string `yaml:"eval"`
	Wait string `yaml:"wait"`
	// contains filtered or unexported fields
}

type Rule

type Rule struct {
	Id       string   `yaml:"id"`
	Version  int      `yaml:"version"`
	Name     string   `yaml:"name"`
	Alias    string   `yaml:"alias"`
	Group    string   `yaml:"group"`
	Priority int      `yaml:"priority"`
	Patterns []string `yaml:"patterns"`

	Prepare *Prepare `yaml:"prepare"`
	Timeout string   `yaml:"timeout"`

	Fields []*Field `yaml:"fields"`
	Loop   *Loop    `yaml:"loop"`
	// contains filtered or unexported fields
}

type RuleGroup

type RuleGroup struct {
	// contains filtered or unexported fields
}

func NewRuleGroup

func NewRuleGroup(name string) *RuleGroup

func (*RuleGroup) AppendBytes

func (rg *RuleGroup) AppendBytes(bytes []byte) error

func (*RuleGroup) AppendFile

func (rg *RuleGroup) AppendFile(file string) error

func (*RuleGroup) Remove

func (rg *RuleGroup) Remove(id string) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL