Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func CreateTask ¶
func DeleteTask ¶
func UpdateTask ¶
func UpdateTask(task *Task)
Types ¶
type Domain ¶
type Domain struct { Host string `storm:"id,unique" json:"host,omitempty" gorm:"not null;unique;primary_key"` LinksCount int64 `json:"links_count,omitempty"` Favicon string `json:"favicon,omitempty"` Settings *DomainSetting `storm:"inline" json:"settings,omitempty"` CreateTime *time.Time `storm:"index" json:"created,omitempty"` UpdateTime *time.Time `storm:"index" json:"updated,omitempty"` }
func CreateDomain ¶
type DomainSetting ¶
type DomainSetting struct { }
type IndexDocument ¶
type Seed ¶
type Seed struct { Url string `storm:"index" json:"url,omitempty" gorm:"type:not null;varchar(500)"` // the seed url may not cleaned, may miss the domain part, need reference to provide the complete url information Reference string `json:"reference_url,omitempty"` Depth int `storm:"index" json:"depth,omitempty"` Breadth int `storm:"index" json:"breadth,omitempty"` }
func TaskSeedFromBytes ¶
func (Seed) MustGetBytes ¶
type Snapshot ¶
type Snapshot struct { ID string `json:"id,omitempty" gorm:"not null;unique;primary_key"` Version int `json:"version,omitempty"` Path string `json:"path,omitempty"` //path of this file File string `json:"file,omitempty"` //filename of this page StatusCode int `json:"-"` Payload []byte `json:"-"` Size uint64 `json:"size,omitempty"` Headers map[string][]string `json:"-"` Metadata *map[string]interface{} `json:"-"` Parameters []KV `json:"-"` Language string `json:"lang,omitempty"` Title string `json:"title,omitempty"` Summary string `json:"summary,omitempty"` Text string `json:"text,omitempty"` ContentType string `json:"content_type,omitempty"` Tags []string `json:"tags,omitempty"` Links LinkGroup `json:"links,omitempty"` Images struct { Internal []PageLink `json:"internal,omitempty"` External []PageLink `json:"external,omitempty"` } `json:"images,omitempty"` H1 []string `json:"h1,omitempty"` H2 []string `json:"h2,omitempty"` H3 []string `json:"h3,omitempty"` H4 []string `json:"h4,omitempty"` H5 []string `json:"h5,omitempty"` Bold []string `json:"bold,omitempty"` Italic []string `json:"italic,omitempty"` Classifications []string `json:"classifications,omitempty"` EnrichedFeatures *map[string]interface{} `json:"enriched_features,omitempty"` Hash string `json:"hash,omitempty"` SimHash string `json:"sim_hash,omitempty"` CreateTime *time.Time `json:"created,omitempty"` }
type Task ¶
type Task struct { Seed ID string `gorm:"not null;unique;primary_key" json:"id"` Host string `gorm:"index" json:"-"` Schema string `json:"schema,omitempty"` OriginalUrl string `json:"original_url,omitempty"` Phrase pipeline.Phrase `gorm:"index" json:"phrase"` Status TaskStatus `gorm:"index" json:"status"` Message string `json:"-"` CreateTime *time.Time `gorm:"index" json:"created,omitempty"` UpdateTime *time.Time `gorm:"index" json:"updated,omitempty"` LastFetchTime *time.Time `gorm:"index" json:"-"` LastCheckTime *time.Time `gorm:"index" json:"-"` NextCheckTime *time.Time `gorm:"index" json:"-"` SnapshotVersion int `json:"-"` SnapshotID string `json:"-"` //Last Snapshot's ID SnapshotHash string `json:"-"` //Last Snapshot's Hash SnapshotSimHash string `json:"-"` //Last Snapshot's Simhash }
func GetPendingFetchTasks ¶
func GetTaskByField ¶
type TaskStatus ¶
type TaskStatus int
const TaskCreated TaskStatus = 0
const TaskFetchFailed TaskStatus = 2
const TaskFetchSuccess TaskStatus = 3
Click to show internal directories.
Click to hide internal directories.