Documentation ¶
Index ¶
- Constants
- Variables
- func Chunk(slice []*Article, chunkSize int) [][]*Article
- type Article
- func (article *Article) GetDomain() string
- func (article *Article) GetDomainClassName() string
- func (article *Article) GetDomainScore() float64
- func (article *Article) GetHTML() template.HTML
- func (article *Article) GetKeywords() string
- func (article *Article) GetPos() string
- func (article *Article) GetPublishedAt() string
- func (article *Article) GetSlug() string
- func (article *Article) GetText() string
- func (article *Article) GetTitle() string
- func (article *Article) GetURL() string
- func (article *Article) GetZeroShot() []string
- type Engine
- func (engine *Engine) Close()
- func (engine *Engine) FindByFeedURL(feed string) ([]*Article, error)
- func (engine *Engine) FindByURL(url string) (*Article, error)
- func (engine *Engine) GetAllValid() ([]*Article, error)
- func (engine *Engine) Insert(txn *sql.Tx, article *Article, feedUrl string, domain string) error
- func (engine *Engine) RunArticleIndex(ctx context.Context, chunkSize int, workers int) error
- func (engine *Engine) RunArticleMeta(ctx context.Context, chunkSize int) error
- func (engine *Engine) RunArticleMetaPassII(ctx context.Context) error
- func (engine *Engine) Update(txn *sql.Tx, article *Article) error
Constants ¶
View Source
const ARTICLE_SELECT = `` /* 255-byte string literal not displayed */
View Source
const CONTENT_EXTRACT_AGO_SECONDS = 60 * 60 * 24 * 28
View Source
const CONTENT_EXTRACT_LIMIT = 100_000
View Source
const REFRESH_AGO_SECONDS = 60 * 60 * 24 * 14
View Source
const REFRESH_LIMIT = 50_000
View Source
const STAGE_COMPLETE = 10
View Source
const STAGE_FAILED = 0
View Source
const STAGE_INDEXED = 1
View Source
const STAGE_VALID_CONTENT = 2
Variables ¶
View Source
var ErrNoBodyFound = fmt.Errorf("no body found")
View Source
var ErrNotInEnglish = fmt.Errorf("not in english")
View Source
var ErrTooManyFailingOnDomain = fmt.Errorf("too many fails on domain")
Functions ¶
Types ¶
type Article ¶
type Article struct { Url string FeedUrl string Domain string PublishedAt int64 BodyRaw *serialize.Content LastFetchAt int64 LastMetaAt int64 LastContentExtractAt int64 Title string Description string Body *serialize.Content BadCount int64 BadElementCount int64 LinkCount int64 BadLinkCount int64 HTMLLength int64 Stage int64 SentenceEmbedding *serialize.Embeddings ExtractedKeywords *serialize.Keywords Classifications *serialize.Keywords // Used in live/output DomainScore float64 DayPosition int }
func (*Article) GetDomainClassName ¶
func (*Article) GetDomainScore ¶
func (*Article) GetKeywords ¶
func (*Article) GetPublishedAt ¶
func (*Article) GetZeroShot ¶
type Engine ¶
type Engine struct {
// contains filtered or unexported fields
}
func (*Engine) GetAllValid ¶
func (*Engine) RunArticleIndex ¶
func (*Engine) RunArticleMeta ¶
func (*Engine) RunArticleMetaPassII ¶
Click to show internal directories.
Click to hide internal directories.