Documentation ¶
Index ¶
- Constants
- func NewTaskJoint(task *model.Task) Joint
- type EmptyJoint
- type FetchJoint
- type HashJoint
- type HtmlToTextJoint
- type IgnoreTimeoutJoint
- type IndexJoint
- type InitTaskJoint
- type LoadMetadataJoint
- type ParsePageJoint
- type SaveSnapshotToDBJoint
- type SaveSnapshotToFileSystemJoint
- type SaveTaskJoint
- type UrlCheckFilterJoint
- type UrlExtFilterJoint
- type UrlNormalizationJoint
Constants ¶
View Source
const ( CONTEXT_CRAWLER_DOMAIN ParaKey = "CRAWLER_DOMAIN" CONTEXT_CRAWLER_TASK ParaKey = "CRAWLER_TASK" CONTEXT_CRAWLER_SNAPSHOT ParaKey = "CRAWLER_SNAPSHOT" CONTEXT_PAGE_LINKS ParaKey = "PAGE_LINKS" )
View Source
const Cookie ParaKey = "cookie"
View Source
const Empty JointKey = "empty"
View Source
const Fetch JointKey = "fetch"
View Source
const Hash JointKey = "hash"
View Source
const HtmlToText JointKey = "html2text"
View Source
const IgnoreTimeout JointKey = "ignore_timeout"
View Source
const InitTask JointKey = "init_task"
View Source
const LoadMetadata JointKey = "load_metadata"
View Source
const ParsePage JointKey = "parse"
View Source
const Proxy ParaKey = "proxy"
View Source
const Publish JointKey = "index"
View Source
const SaveSnapshotToDB JointKey = "save_snapshot_db"
View Source
const SaveSnapshotToFileSystem JointKey = "save_snapshot_fs"
View Source
const SaveTask JointKey = "save_task"
View Source
const TaskID ParaKey = "TASK_ID"
View Source
const UrlCheckFilter JointKey = "url_check_filter"
View Source
const UrlExtFilter JointKey = "url_ext_filter"
View Source
const UrlNormalization JointKey = "url_normalization"
Variables ¶
This section is empty.
Functions ¶
func NewTaskJoint ¶
Types ¶
type EmptyJoint ¶
type EmptyJoint struct { }
func (EmptyJoint) Name ¶
func (this EmptyJoint) Name() string
func (EmptyJoint) Process ¶
func (this EmptyJoint) Process(s *Context) error
type FetchJoint ¶
type FetchJoint struct { Parameters // contains filtered or unexported fields }
func (FetchJoint) Name ¶
func (this FetchJoint) Name() string
func (FetchJoint) Process ¶
func (this FetchJoint) Process(context *Context) error
type HtmlToTextJoint ¶
type HtmlToTextJoint struct {
MergeWhitespace bool //merge whitespace and \n
}
func (HtmlToTextJoint) Name ¶
func (this HtmlToTextJoint) Name() string
func (HtmlToTextJoint) Process ¶
func (this HtmlToTextJoint) Process(context *Context) error
type IgnoreTimeoutJoint ¶
type IgnoreTimeoutJoint struct {
IgnoreTimeoutAfterCount int64
}
func (IgnoreTimeoutJoint) Name ¶
func (this IgnoreTimeoutJoint) Name() string
func (IgnoreTimeoutJoint) Process ¶
func (this IgnoreTimeoutJoint) Process(context *Context) error
type IndexJoint ¶
type IndexJoint struct { }
func (IndexJoint) Name ¶
func (this IndexJoint) Name() string
func (IndexJoint) Process ¶
func (this IndexJoint) Process(c *Context) error
type InitTaskJoint ¶
func (InitTaskJoint) Name ¶
func (this InitTaskJoint) Name() string
func (InitTaskJoint) Process ¶
func (this InitTaskJoint) Process(context *Context) error
type LoadMetadataJoint ¶
type LoadMetadataJoint struct { }
load metadata from db
func (LoadMetadataJoint) Name ¶
func (this LoadMetadataJoint) Name() string
func (LoadMetadataJoint) Process ¶
func (this LoadMetadataJoint) Process(context *Context) error
type ParsePageJoint ¶
type ParsePageJoint struct { DispatchLinks bool MaxDepth int //max depth of page to follow MaxBreadth int //max breadth of the domain to follow MaxPageOfBreadth map[int]int //max page to fetch in each level's breadth, eg: 1:100;2:50;3:5;4:1 }
func (ParsePageJoint) Name ¶
func (this ParsePageJoint) Name() string
func (ParsePageJoint) Process ¶
func (this ParsePageJoint) Process(context *Context) error
type SaveSnapshotToDBJoint ¶
func (SaveSnapshotToDBJoint) Name ¶
func (this SaveSnapshotToDBJoint) Name() string
func (SaveSnapshotToDBJoint) Process ¶
func (this SaveSnapshotToDBJoint) Process(c *Context) error
type SaveSnapshotToFileSystemJoint ¶
type SaveSnapshotToFileSystemJoint struct {
// contains filtered or unexported fields
}
func (SaveSnapshotToFileSystemJoint) Name ¶
func (this SaveSnapshotToFileSystemJoint) Name() string
func (SaveSnapshotToFileSystemJoint) Process ¶
func (this SaveSnapshotToFileSystemJoint) Process(c *Context) error
type SaveTaskJoint ¶
type SaveTaskJoint struct {
IsCreate bool
}
func (SaveTaskJoint) Name ¶
func (this SaveTaskJoint) Name() string
func (SaveTaskJoint) Process ¶
func (this SaveTaskJoint) Process(context *Context) error
type UrlCheckFilterJoint ¶
type UrlCheckFilterJoint struct { Parameters //ignore files end with js,css,apk,zip SkipPageParsePattern *regexp.Regexp }
func (UrlCheckFilterJoint) Name ¶
func (this UrlCheckFilterJoint) Name() string
func (UrlCheckFilterJoint) Process ¶
func (this UrlCheckFilterJoint) Process(context *Context) error
type UrlExtFilterJoint ¶
type UrlExtFilterJoint struct { //ignore files end with js,css,apk,zip SkipPageParsePattern *regexp.Regexp }
func (UrlExtFilterJoint) Name ¶
func (this UrlExtFilterJoint) Name() string
func (UrlExtFilterJoint) Process ¶
func (this UrlExtFilterJoint) Process(context *Context) error
type UrlNormalizationJoint ¶
type UrlNormalizationJoint struct { FollowAllDomain bool FollowDomainSettings bool FollowSubDomain bool // contains filtered or unexported fields }
func (UrlNormalizationJoint) Name ¶
func (this UrlNormalizationJoint) Name() string
func (UrlNormalizationJoint) Process ¶
func (this UrlNormalizationJoint) Process(context *Context) error
Click to show internal directories.
Click to hide internal directories.