article

package
v0.0.0-...-39aa7b3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 6, 2024 License: MIT Imports: 22 Imported by: 0

Documentation

Index

Constants

View Source
const ARTICLE_SELECT = `` /* 255-byte string literal not displayed */
View Source
const CONTENT_EXTRACT_AGO_SECONDS = 60 * 60 * 24 * 28
View Source
const CONTENT_EXTRACT_LIMIT = 100_000
View Source
const REFRESH_AGO_SECONDS = 60 * 60 * 24 * 14
View Source
const REFRESH_LIMIT = 50_000
View Source
const STAGE_COMPLETE = 10
View Source
const STAGE_FAILED = 0
View Source
const STAGE_INDEXED = 1
View Source
const STAGE_VALID_CONTENT = 2

Variables

View Source
var ErrNoBodyFound = fmt.Errorf("no body found")
View Source
var ErrNotInEnglish = fmt.Errorf("not in english")
View Source
var ErrTooManyFailingOnDomain = fmt.Errorf("too many fails on domain")

Functions

func Chunk

func Chunk(slice []*Article, chunkSize int) [][]*Article

Types

type Article

type Article struct {
	Url                  string
	FeedUrl              string
	Domain               string
	PublishedAt          int64
	BodyRaw              *serialize.Content
	LastFetchAt          int64
	LastMetaAt           int64
	LastContentExtractAt int64
	Title                string
	Description          string

	Body            *serialize.Content
	BadCount        int64
	BadElementCount int64
	LinkCount       int64
	BadLinkCount    int64

	HTMLLength int64

	Stage int64

	SentenceEmbedding *serialize.Embeddings
	ExtractedKeywords *serialize.Keywords
	Classifications   *serialize.Keywords

	// Used in live/output
	DomainScore float64
	DayPosition int
}

func (*Article) GetDomain

func (article *Article) GetDomain() string

func (*Article) GetDomainClassName

func (article *Article) GetDomainClassName() string

func (*Article) GetDomainScore

func (article *Article) GetDomainScore() float64

func (*Article) GetHTML

func (article *Article) GetHTML() template.HTML

func (*Article) GetKeywords

func (article *Article) GetKeywords() string

func (*Article) GetPos

func (article *Article) GetPos() string

func (*Article) GetPublishedAt

func (article *Article) GetPublishedAt() string

func (*Article) GetSlug

func (article *Article) GetSlug() string

func (*Article) GetText

func (article *Article) GetText() string

func (*Article) GetTitle

func (article *Article) GetTitle() string

func (*Article) GetURL

func (article *Article) GetURL() string

func (*Article) GetZeroShot

func (article *Article) GetZeroShot() []string

type Engine

type Engine struct {
	// contains filtered or unexported fields
}

func NewEngine

func NewEngine(log *log.Logger, db *sql.DB, sd *statsd.Client, cachePath string, withModels bool) (*Engine, error)

func (*Engine) Close

func (engine *Engine) Close()

func (*Engine) FindByFeedURL

func (engine *Engine) FindByFeedURL(feed string) ([]*Article, error)

func (*Engine) FindByURL

func (engine *Engine) FindByURL(url string) (*Article, error)

func (*Engine) GetAllValid

func (engine *Engine) GetAllValid() ([]*Article, error)

func (*Engine) Insert

func (engine *Engine) Insert(txn *sql.Tx, article *Article, feedUrl string, domain string) error

func (*Engine) RunArticleIndex

func (engine *Engine) RunArticleIndex(ctx context.Context, chunkSize int, workers int) error

func (*Engine) RunArticleMeta

func (engine *Engine) RunArticleMeta(ctx context.Context, chunkSize int) error

func (*Engine) RunArticleMetaPassII

func (engine *Engine) RunArticleMetaPassII(ctx context.Context) error

func (*Engine) Update

func (engine *Engine) Update(txn *sql.Tx, article *Article) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL