types

package
v0.4.17 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 20, 2023 License: MIT Imports: 3 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type DocumentRow added in v0.3.0

type DocumentRow struct {
	DocumentMd5 string
	UrlMd5      string
	StatusCode  int        // the HTTP status code returned during fetch
	AccessedAt  *time.Time // Nullable
	Body        *string    // Fulltext of the webpage as markdown
}

DocumentRow represents a full-text document. The HTML version of a web page. However, the HTML body is not stored (for now). The page will be distilled to plain text. A markdown version will be stored on disk, again, for now.

type Extractor

type Extractor interface {
	GetName() string
	GetDBPath() string
	SetDBPath(string)
	GetAllUrlsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]UrlRow, error)
	GetAllVisitsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]VisitRow, error)

	// Verify that the passed db can actually be connected to. In the case of
	// sqlite, it's not uncommon for a db to be locked. The Open call will work
	// but the db cannot be read.
	VerifyConnection(ctx context.Context, conn *sql.DB) (bool, error)
}

type SearchableEntity added in v0.3.0

type SearchableEntity struct {
	Id          string     `json:"id"`
	Url         string     `json:"url"`
	Title       *string    `json:"title"`
	Description *string    `json:"description"`
	LastVisit   *time.Time `json:"last_visit"`
	Match       *string    `json:"match"`
	MatchCount  *int       `json:"match_count"`
	SumRank     *float64   `json:"sum_rank"`
}

func UrlDbEntityToSearchableEntity added in v0.3.0

func UrlDbEntityToSearchableEntity(x UrlDbEntity) SearchableEntity

func UrlDbSearchEntityToSearchableEntity added in v0.4.0

func UrlDbSearchEntityToSearchableEntity(x UrlDbSearchEntity) SearchableEntity

type UrlDbEntity

type UrlDbEntity struct {
	UrlMd5      string
	Url         string
	Title       *string
	Description *string
	LastVisit   *time.Time
	Body        *string
	BodyMd5     *string
}

Initially this was a URL row representation but it was later augmented with body, which is only available via join.

type UrlDbSearchEntity added in v0.4.0

type UrlDbSearchEntity struct {
	UrlMd5      string
	Url         string
	Title       *string
	Description *string
	LastVisit   *time.Time
	Match       *string
	MatchCount  *int
	SumRank     *float64
}

type UrlMetaRow

type UrlMetaRow struct {
	Url       string
	IndexedAt *time.Time // Nullable
}

Meta information about the URL

type UrlRow

type UrlRow struct {
	Url         string
	Title       *string    // Nullable
	Description *string    // Nullable
	LastVisit   *time.Time // Nullable
}

type VisitRow

type VisitRow struct {
	Url      string
	Datetime time.Time
	// The data extractor that created this visit. Not present on URls since URLs
	// are often visited in multiple browsers.
	ExtractorName string
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL