Documentation
¶
Index ¶
- Constants
- func CreateHostConfig(config *HostConfig) error
- func CreateProject(project *Project) error
- func CreateSnapshot(snapshot *Snapshot) error
- func CreateTask(task *Task) error
- func DeleteHostConfig(id string) error
- func DeleteProject(id string) error
- func DeleteSnapshot(snapshot *Snapshot) error
- func DeleteTask(id string) error
- func GetHostStatus(status int) (error, map[string]interface{})
- func GetTaskStatus(host string) (error, map[string]interface{})
- func GetTaskStatusText(status int) string
- func UpdateHostConfig(config *HostConfig) error
- func UpdateProject(project *Project) error
- func UpdateTask(task *Task) error
- type Domain
- type FetchTask
- type Host
- type HostConfig
- type Index
- type KV
- type LinkGroup
- type PageLink
- type Project
- type Snapshot
- type Task
- func GetFailedTasks(offset int64) (int, []Task, error)
- func GetPendingNewFetchTasks(offset int64, size int) (int, []Task, error)
- func GetPendingUpdateFetchTasks(offset int64) (int, []Task, error)
- func GetTask(id string) (Task, error)
- func GetTaskByField(k, v string) ([]Task, error)
- func GetTaskList(from, size int, host string, status int) (int, []Task, error)
- func NewTask(url, ref string, depth int, breadth int) *Task
- type Url
Constants ¶
View Source
const ( CONTEXT_SNAPSHOT pipeline.ParaKey = "SNAPSHOT" CONTEXT_PAGE_LINKS pipeline.ParaKey = "PAGE_LINKS" )
Common pipeline context keys
View Source
const ( CONTEXT_TASK_ID pipeline.ParaKey = "GOPA_TASK_ID" CONTEXT_TASK_URL pipeline.ParaKey = "GOPA_TASK_URL" CONTEXT_TASK_Reference pipeline.ParaKey = "GOPA_TASK_Reference" CONTEXT_TASK_Depth pipeline.ParaKey = "GOPA_TASK_Depth" CONTEXT_TASK_Breadth pipeline.ParaKey = "GOPA_TASK_Breadth" CONTEXT_TASK_Host pipeline.ParaKey = "GOPA_TASK_Host" CONTEXT_TASK_Schema pipeline.ParaKey = "GOPA_TASK_Schema" CONTEXT_TASK_OriginalUrl pipeline.ParaKey = "GOPA_TASK_OriginalUrl" CONTEXT_TASK_Status pipeline.ParaKey = "GOPA_TASK_Status" CONTEXT_TASK_Message pipeline.ParaKey = "GOPA_TASK_Message" CONTEXT_TASK_Created pipeline.ParaKey = "GOPA_TASK_Created" CONTEXT_TASK_Updated pipeline.ParaKey = "GOPA_TASK_Updated" CONTEXT_TASK_LastFetch pipeline.ParaKey = "GOPA_TASK_LastFetch" CONTEXT_TASK_LastCheck pipeline.ParaKey = "GOPA_TASK_LastCheck" CONTEXT_TASK_NextCheck pipeline.ParaKey = "GOPA_TASK_NextCheck" CONTEXT_TASK_SnapshotID pipeline.ParaKey = "GOPA_TASK_SnapshotID" CONTEXT_TASK_SnapshotSimHash pipeline.ParaKey = "GOPA_TASK_SnapshotSimHash" CONTEXT_TASK_SnapshotHash pipeline.ParaKey = "GOPA_TASK_SnapshotHash" CONTEXT_TASK_SnapshotCreated pipeline.ParaKey = "GOPA_TASK_SnapshotCreated" CONTEXT_TASK_SnapshotVersion pipeline.ParaKey = "GOPA_TASK_SnapshotVersion" CONTEXT_TASK_LastScreenshotID pipeline.ParaKey = "GOPA_TASK_LastScreenshotID" CONTEXT_TASK_PipelineConfigID pipeline.ParaKey = "GOPA_TASK_PipelineConfigID" CONTEXT_TASK_Cookies pipeline.ParaKey = "GOPA_TASK_Cookies" CONTEXT_SNAPSHOT_ContentType pipeline.ParaKey = "GOPA_SNAPSHOT_ContentType" )
View Source
const PreFetchCheck = 4
View Source
const PreFetchCheckError = 6
View Source
const PreFetchChecking = 5
View Source
const PreFetchPendingCheck = 3
View Source
const StageAfterFetch = 2
View Source
const StageFetch = 1
View Source
const StagePreFetch = 0
View Source
const Task404 int = 4
View Source
const TaskCreated int = 0
View Source
const TaskDuplicated int = 7
View Source
const TaskFailed int = 2
View Source
const TaskInterrupted int = 8
View Source
const TaskPendingFetch int = 9
View Source
const TaskRedirected int = 5
View Source
const TaskSuccess int = 3
View Source
const TaskTimeout int = 6
Variables ¶
This section is empty.
Functions ¶
func CreateHostConfig ¶
func CreateHostConfig(config *HostConfig) error
func CreateProject ¶
func CreateSnapshot ¶
func CreateTask ¶
func DeleteHostConfig ¶
func DeleteProject ¶
func DeleteSnapshot ¶
func DeleteTask ¶
func GetHostStatus ¶
func GetTaskStatus ¶
func GetTaskStatusText ¶
func UpdateHostConfig ¶
func UpdateHostConfig(config *HostConfig) error
func UpdateProject ¶
func UpdateTask ¶
Types ¶
type FetchTask ¶
func (*FetchTask) UpdateStatus ¶
type Host ¶
type Host struct { Host string `json:"host,omitempty" elastic_meta:"_id" elastic_mapping:"host: { type: keyword, ignore_above: 256 }"` Favicon string `json:"favicon,omitempty"` Enabled bool `json:"enabled"` HostConfigs *[]HostConfig `json:"host_configs,omitempty"` Created time.Time `json:"created,omitempty"` Updated time.Time `json:"updated,omitempty"` }
Host is host struct
type HostConfig ¶
type HostConfig struct { ID string `json:"id,omitempty" elastic_meta:"_id"` Host string `json:"host"` UrlPattern string `json:"url_pattern"` Runner string `json:"runner"` SortOrder int `json:"sort_order"` PipelineID string `json:"pipeline_id"` Cookies string `json:"cookies,omitempty"` Created time.Time `json:"created,omitempty"` Updated time.Time `json:"updated,omitempty"` }
func GetHostConfig ¶
func GetHostConfig(runner, host string) []HostConfig
func GetHostConfigByHostAndUrl ¶
func GetHostConfigByHostAndUrl(runner, host, url string) (*HostConfig, error)
func GetHostConfigByID ¶
func GetHostConfigByID(id string) (HostConfig, error)
func GetHostConfigList ¶
func GetHostConfigList(from, size int, host string) (int, []HostConfig, error)
type Project ¶
type Project struct { ID string `json:"id,omitempty" elastic_meta:"_id"` Name string `json:"name,omitempty"` Description string `json:"description,omitempty"` Enabled bool `json:"enabled"` Created time.Time `json:"created,omitempty"` Updated time.Time `json:"updated,omitempty"` Banner string `json:"banner,omitempty"` Favicon string `json:"favicon,omitempty"` DomainRules config.Rules `json:"domain_rules,omitempty"` UrlRules config.Rules `json:"url_rules,omitempty"` }
Project is a definition, include a collection of Host
func GetProject ¶
type Snapshot ¶
type Snapshot struct { ID string `json:"id,omitempty" elastic_meta:"_id"` Version int `json:"version,omitempty"` Url string `json:"url,omitempty"` TaskID string `json:"task_id,omitempty"` Path string `json:"path,omitempty"` //path of this file File string `json:"file,omitempty"` //filename of this page Ext string `json:"ext,omitempty"` //extension of filename StatusCode int `json:"-"` Payload []byte `json:"-"` Size uint64 `json:"size,omitempty"` ScreenshotID string `json:"screenshot_id,omitempty"` Headers map[string][]string `json:"-"` Metadata *map[string]interface{} `json:"-"` Parameters []KV `json:"-"` Language string `json:"lang,omitempty"` Title string `json:"title,omitempty" elastic_mapping:"title: { type: text, fields: { keyword: { type: keyword } } }"` Summary string `json:"summary,omitempty"` Text string `json:"text,omitempty" elastic_mapping:"text: { type: text }"` ContentType string `json:"content_type,omitempty"` Tags []string `json:"tags,omitempty"` Links LinkGroup `json:"links,omitempty" elastic_mapping:"links:{type:object}"` Images struct { Internal []PageLink `json:"internal,omitempty" elastic_mapping:"internal:{type:object}"` External []PageLink `json:"external,omitempty" elastic_mapping:"external:{type:object}"` } `json:"images,omitempty" elastic_mapping:"images:{type:object}"` H1 []string `json:"h1,omitempty" elastic_mapping:"h1: { type: text }"` H2 []string `json:"h2,omitempty" elastic_mapping:"h2: { type: text }"` H3 []string `json:"h3,omitempty" elastic_mapping:"h3: { type: text }"` H4 []string `json:"h4,omitempty" elastic_mapping:"h4: { type: text }"` H5 []string `json:"h5,omitempty" elastic_mapping:"h5: { type: text }"` Bold []string `json:"bold,omitempty" elastic_mapping:"bold: { type: text }"` Italic []string `json:"italic,omitempty"` Classifications []string `json:"classifications,omitempty"` EnrichedFeatures *map[string]interface{} `json:"enriched_features,omitempty"` Hash string `json:"hash,omitempty"` SimHash string `json:"sim_hash,omitempty"` Created time.Time `json:"created,omitempty"` }
func GetSnapshot ¶
func GetSnapshotByField ¶
type Task ¶
type Task struct { ID string `json:"id" elastic_meta:"_id"` // the url may not cleaned, may miss the host part, need reference to provide the complete url information Url string `json:"url,omitempty"` Reference string `json:"reference_url,omitempty"` Depth int `json:"depth"` Breadth int `json:"breadth"` Host string `json:"host"` Schema string `json:"schema,omitempty"` OriginalUrl string `json:"original_url,omitempty"` Status int `json:"status"` Message string `json:"message,omitempty"` Created time.Time `json:"created,omitempty" elastic_mapping:"created: { type: date }"` Updated time.Time `json:"updated,omitempty" elastic_mapping:"updated: { type: date }"` LastFetch time.Time `json:"last_fetch,omitempty" elastic_mapping:"last_fetch: { type: date }"` LastCheck time.Time `json:"last_check,omitempty" elastic_mapping:"last_check: { type: date }"` NextCheck time.Time `json:"next_check,omitempty" elastic_mapping:"next_check: { type: date }"` SnapshotVersion int `json:"snapshot_version,omitempty"` SnapshotID string `json:"snapshot_id,omitempty"` SnapshotHash string `json:"snapshot_hash,omitempty"` SnapshotSimHash string `json:"snapshot_simhash,omitempty"` SnapshotCreated time.Time `json:"snapshot_created,omitempty" elastic_mapping:"snapshot_created: { type: date }"` LastScreenshotID string `json:"last_screenshot_id,omitempty"` PipelineConfigID string `json:"pipline_config_id,omitempty"` HostConfig *HostConfig `json:"host_config,omitempty"` // transient properties Snapshots []Snapshot `json:"-"` SnapshotCount int `json:"-"` }
func GetPendingNewFetchTasks ¶
func GetTaskByField ¶
Click to show internal directories.
Click to hide internal directories.