Documentation ¶
Index ¶
- func CreateSnapshot(snapshot *Snapshot) error
- func CreateTask(task *Task) error
- func DeleteTask(id string) error
- func IncrementDomainLinkCount(host string) error
- func UpdateTask(task *Task)
- type Domain
- type DomainSetting
- type IndexDocument
- type KV
- type LinkGroup
- type PageLink
- type Seed
- type Snapshot
- type Task
- type TaskStatus
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func CreateSnapshot ¶
func CreateTask ¶
func DeleteTask ¶
func UpdateTask ¶
func UpdateTask(task *Task)
Types ¶
type Domain ¶
type Domain struct { Host string `storm:"id,unique" json:"host,omitempty" gorm:"not null;unique;primary_key"` LinksCount int64 `json:"links_count,omitempty"` Favicon string `json:"favicon,omitempty"` Settings *DomainSetting `storm:"inline" json:"settings,omitempty"` CreateTime *time.Time `storm:"index" json:"created,omitempty"` UpdateTime *time.Time `storm:"index" json:"updated,omitempty"` }
func CreateDomain ¶
type DomainSetting ¶
type DomainSetting struct { }
type IndexDocument ¶
type Seed ¶
type Seed struct { Url string `storm:"index" json:"url,omitempty" gorm:"type:not null;varchar(500)"` // the seed url may not cleaned, may miss the domain part, need reference to provide the complete url information Reference string `json:"reference_url,omitempty"` Depth int `storm:"index" json:"depth,omitempty"` Breadth int `storm:"index" json:"breadth,omitempty"` }
func TaskSeedFromBytes ¶
func (Seed) MustGetBytes ¶
type Snapshot ¶
type Snapshot struct { ID string `json:"id,omitempty" gorm:"not null;unique;primary_key"` Version int `json:"version,omitempty"` Url string `json:"url,omitempty"` TaskID string `json:"task_id,omitempty"` Path string `json:"path,omitempty" gorm:"-"` //path of this file File string `json:"file,omitempty" gorm:"-"` //filename of this page StatusCode int `json:"-" gorm:"-"` Payload []byte `json:"-" gorm:"-"` Size uint64 `json:"size,omitempty"` Headers map[string][]string `json:"-" gorm:"-"` Metadata *map[string]interface{} `json:"-" gorm:"-"` Parameters []KV `json:"-" gorm:"-"` Language string `json:"lang,omitempty" gorm:"-"` Title string `json:"title,omitempty"` Summary string `json:"summary,omitempty" gorm:"-"` Text string `json:"text,omitempty" gorm:"-"` ContentType string `json:"content_type,omitempty"` Tags []string `json:"tags,omitempty" gorm:"-"` Links LinkGroup `json:"links,omitempty" gorm:"-"` Images struct { Internal []PageLink `json:"internal,omitempty"` External []PageLink `json:"external,omitempty"` } `json:"images,omitempty" gorm:"-"` H1 []string `json:"h1,omitempty" gorm:"-"` H2 []string `json:"h2,omitempty" gorm:"-"` H3 []string `json:"h3,omitempty" gorm:"-"` H4 []string `json:"h4,omitempty" gorm:"-"` H5 []string `json:"h5,omitempty" gorm:"-"` Bold []string `json:"bold,omitempty" gorm:"-"` Italic []string `json:"italic,omitempty" gorm:"-"` Classifications []string `json:"classifications,omitempty" gorm:"-"` EnrichedFeatures *map[string]interface{} `json:"enriched_features,omitempty" gorm:"-"` Hash string `json:"hash,omitempty"` SimHash string `json:"sim_hash,omitempty"` CreateTime *time.Time `json:"created,omitempty"` }
type Task ¶
type Task struct { Seed ID string `gorm:"not null;unique;primary_key" json:"id"` Host string `gorm:"index" json:"-"` Schema string `json:"schema,omitempty"` OriginalUrl string `json:"original_url,omitempty"` Phrase pipeline.Phrase `gorm:"index" json:"phrase"` Status TaskStatus `gorm:"index" json:"status"` Message string `json:"-"` CreateTime *time.Time `gorm:"index" json:"created,omitempty"` UpdateTime *time.Time `gorm:"index" json:"updated,omitempty"` LastFetchTime *time.Time `gorm:"index" json:"last_fetch"` LastCheckTime *time.Time `gorm:"index" json:"last_check"` NextCheckTime *time.Time `gorm:"index" json:"next_check"` SnapshotVersion int `json:"snapshot_version"` SnapshotID string `json:"snapshot_id"` //Last Snapshot's ID SnapshotHash string `json:"snapshot_hash"` //Last Snapshot's Hash SnapshotSimHash string `json:"snapshot_simhash"` //Last Snapshot's Simhash SnapshotCreateTime *time.Time `json:"snapshot_created"` //Last Snapshot's Simhash }
func GetPendingNewFetchTasks ¶
func GetTaskByField ¶
type TaskStatus ¶
type TaskStatus int
const Task404Ignore TaskStatus = 4
const TaskCreated TaskStatus = 0
const TaskFetchFailed TaskStatus = 2
const TaskFetchSuccess TaskStatus = 3
const TaskFetchTimeout TaskStatus = 6
const TaskRedirectedIgnore TaskStatus = 5
Click to show internal directories.
Click to hide internal directories.