Documentation
¶
Index ¶
- type Config
- type DB
- type DBLogLevel
- type DuplicateDetector
- type Faktory
- type FaktoryJob
- type FeedFetcher
- type FeedScheduler
- type GDELTFetcher
- type GRPCServer
- type GeoParser
- type HNSW
- type HNSWIndex
- type HNSWPurger
- type HNSWSpaceType
- type InformationExtractor
- type JobsRecoverer
- type LogLevel
- type OmitItemsPublishedBefore
- type Server
- type Tasks
- type TextClassifier
- type Translator
- type TwitterScheduler
- type TwitterScraper
- type Vectorizer
- type WebScraper
- type Workers
- type ZeroShotClassifier
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Config ¶
type Config struct { DB DB `yaml:"db"` Faktory Faktory `yaml:"faktory"` HNSW HNSW `yaml:"hnsw"` Server Server `yaml:"server"` Tasks Tasks `yaml:"tasks"` Workers Workers `yaml:"workers"` }
Config holds whatsnew application-wide configuration settings.
func FromYAMLFile ¶
FromYAMLFile reads a Config object from a YAML file.
Before being decoded, the whole YAML file content is passed through os.ExpandEnv.
type DB ¶
type DB struct { // DSN, dbname excluded. DSN string `yaml:"dsn"` DBName string `yaml:"dbname"` LogLevel DBLogLevel `yaml:"loglevel"` }
DB holds database settings.
type DBLogLevel ¶
type DBLogLevel gormlogger.LogLevel
DBLogLevel is a redefinition of GORM logger.LogLevel which satisfies encoding.TextUnmarshaler, to be conveniently parsed from YAML.
func (*DBLogLevel) UnmarshalText ¶
func (l *DBLogLevel) UnmarshalText(text []byte) error
UnmarshalText satisfies the encoding.TextUnmarshaler interface, unmarshaling the text to a DBLogLevel.
type DuplicateDetector ¶
type DuplicateDetector struct { Queues []string `yaml:"queues"` TimeframeDays int `yaml:"timeframe_days"` DistanceThreshold float32 `yaml:"distance_threshold"` NonDuplicateWebArticleJobs []FaktoryJob `yaml:"non_duplicate_web_article_jobs"` DuplicateWebArticleJobs []FaktoryJob `yaml:"duplicate_web_article_jobs"` LogLevel LogLevel `yaml:"loglevel"` }
DuplicateDetector holds settings for the duplicate detector worker.
type FaktoryJob ¶
type FaktoryJob struct { JobType string `yaml:"job_type"` Queue string `yaml:"queue"` ReserveFor int `yaml:"reserve_for"` Retry int `yaml:"retry"` }
FaktoryJob describes a Faktory job to be scheduled for execution.
type FeedFetcher ¶
type FeedFetcher struct { Queues []string `yaml:"queues"` Concurrency int `yaml:"concurrency"` NewWebResourceJobs []FaktoryJob `yaml:"new_web_resource_jobs"` MaxAllowedFailures int `yaml:"max_allowed_failures"` OmitItemsPublishedBefore OmitItemsPublishedBefore `yaml:"omit_items_published_before"` LanguageFilter []string `yaml:"language_filter"` LogLevel LogLevel `yaml:"loglevel"` }
FeedFetcher holds settings for the FeedFetcher worker.
type FeedScheduler ¶
type FeedScheduler struct { TimeInterval time.Duration `yaml:"time_interval"` Jobs []FaktoryJob `yaml:"jobs"` LogLevel LogLevel `yaml:"loglevel"` }
FeedScheduler holds settings for scheduling feeds for further processing.
type GDELTFetcher ¶
type GDELTFetcher struct { TimeInterval time.Duration `yaml:"time_interval"` EventRootCodeWhitelist []string `yaml:"event_root_code_whitelist"` NewWebResourceJobs []FaktoryJob `yaml:"new_web_resource_jobs"` LogLevel LogLevel `yaml:"loglevel"` }
GDELTFetcher holds settings for fetching GDELT events and extracting news report URLs for further processing.
type GRPCServer ¶
GRPCServer holds common settings for connecting to a gRPC server.
type GeoParser ¶
type GeoParser struct { Queues []string `yaml:"queues"` Concurrency int `yaml:"concurrency"` ProcessedWebArticleJobs []FaktoryJob `yaml:"processed_web_article_jobs"` CliffURI string `yaml:"cliff_uri"` LogLevel LogLevel `yaml:"loglevel"` }
GeoParser holds settings for the geo-parser worker.
type HNSW ¶
type HNSW struct { Server GRPCServer `yaml:"server"` Index HNSWIndex `yaml:"index"` }
HNSW holds settings for connecting to HNSW server and handling vector indices.
type HNSWIndex ¶
type HNSWIndex struct { NamePrefix string `yaml:"name_prefix"` Dim int32 `yaml:"dim"` EfConstruction int32 `yaml:"ef_construction"` M int32 `yaml:"m"` MaxElements int32 `yaml:"max_elements"` Seed int32 `yaml:"seed"` SpaceType HNSWSpaceType `yaml:"space_type"` }
HNSWIndex holds settings for HNSW vector indices.
type HNSWPurger ¶
type HNSWPurger struct { TimeInterval time.Duration `yaml:"time_interval"` DeleteIndicesOlderThanDays int `yaml:"delete_indices_older_than_days"` LogLevel LogLevel `yaml:"loglevel"` }
HNSWPurger holds settings for the periodic deletion of old HNSW indices.
type HNSWSpaceType ¶
type HNSWSpaceType hnswgrpcapi.CreateIndexRequest_SpaceType
HNSWSpaceType is a redefinition of HNSW gRPC API CreateIndexRequest_SpaceType which satisfies encoding.TextUnmarshaler, to be conveniently parsed from YAML.
func (*HNSWSpaceType) UnmarshalText ¶
func (hst *HNSWSpaceType) UnmarshalText(text []byte) (err error)
UnmarshalText satisfies the encoding.TextUnmarshaler interface, unmarshaling the text to an HNSW gRPC API CreateIndexRequest_SpaceType.
type InformationExtractor ¶
type InformationExtractor struct { Queues []string `yaml:"queues"` Concurrency int `yaml:"concurrency"` SpagoBERTServer GRPCServer `yaml:"spago_bert_server"` ProcessedWebArticleJobs []FaktoryJob `yaml:"processed_web_article_jobs"` LogLevel LogLevel `yaml:"loglevel"` }
InformationExtractor holds settings for the information extractor worker.
type JobsRecoverer ¶
type JobsRecoverer struct { TimeInterval time.Duration `yaml:"time_interval"` LeewayTime time.Duration `yaml:"leeway_time"` LogLevel LogLevel `yaml:"loglevel"` }
JobsRecoverer holds settings for the periodic recovery process of pending jobs.
type LogLevel ¶
LogLevel is a redefinition of zerolog.Level which satisfies encoding.TextUnmarshaler, to be conveniently parsed from YAML.
func (*LogLevel) UnmarshalText ¶
UnmarshalText satisfies the encoding.TextUnmarshaler interface, unmarshaling the text to a LogLevel.
type OmitItemsPublishedBefore ¶
OmitItemsPublishedBefore is part of FeedFetcher settings.
type Server ¶
type Server struct { Address string `yaml:"address"` TLSEnabled bool `yaml:"tls_enabled"` TLSCert string `yaml:"tls_cert"` TLSKey string `yaml:"tls_key"` AllowedOrigins []string `yaml:"allowed_origins"` LogLevel LogLevel `yaml:"loglevel"` }
Server holds settings for the HTTP and gRPC server.
type Tasks ¶
type Tasks struct { FeedScheduler FeedScheduler `yaml:"feed_scheduler"` TwitterScheduler TwitterScheduler `yaml:"twitter_scheduler"` GDELTFetcher GDELTFetcher `yaml:"gdelt_fetcher"` JobsRecoverer JobsRecoverer `yaml:"jobs_recoverer"` HNSWPurger HNSWPurger `yaml:"hnsw_purger"` }
Tasks holds settings for various tasks.
type TextClassifier ¶
type TextClassifier struct { Queues []string `yaml:"queues"` Concurrency int `yaml:"concurrency"` ProcessedWebArticleJobs []FaktoryJob `yaml:"processed_web_article_jobs"` ClassifierServer GRPCServer `yaml:"classifier_server"` LogLevel LogLevel `yaml:"loglevel"` }
TextClassifier holds settings for the text classifier worker.
type Translator ¶
type Translator struct { Queues []string `yaml:"queues"` Concurrency int `yaml:"concurrency"` TranslatorServer GRPCServer `yaml:"translator_server"` ProcessedWebArticleJobs []FaktoryJob `yaml:"processed_web_article_jobs"` LanguageWhitelist []string `yaml:"language_whitelist"` TargetLanguage string `yaml:"target_language"` LogLevel LogLevel `yaml:"loglevel"` }
Translator holds settings for the translator worker.
type TwitterScheduler ¶
type TwitterScheduler struct { TimeInterval time.Duration `yaml:"time_interval"` Jobs []FaktoryJob `yaml:"jobs"` LogLevel LogLevel `yaml:"loglevel"` }
TwitterScheduler holds settings for scheduling twitter sources for further processing.
type TwitterScraper ¶
type TwitterScraper struct { Queues []string `yaml:"queues"` Concurrency int `yaml:"concurrency"` MaxTweetsNumber int `yaml:"max_tweets_number"` NewWebArticleJobs []FaktoryJob `yaml:"new_web_article_jobs"` OmitTweetsPublishedBefore OmitItemsPublishedBefore `yaml:"omit_tweets_published_before"` LanguageFilter []string `yaml:"language_filter"` LogLevel LogLevel `yaml:"loglevel"` }
TwitterScraper holds settings for the TwitterScraper worker.
type Vectorizer ¶
type Vectorizer struct { Queues []string `yaml:"queues"` Concurrency int `yaml:"concurrency"` VectorizedWebArticleJobs []FaktoryJob `yaml:"vectorized_web_article_jobs"` SpagoBERTServer GRPCServer `yaml:"spago_bert_server"` LogLevel LogLevel `yaml:"loglevel"` }
Vectorizer holds settings for the Vectorizer worker.
type WebScraper ¶
type WebScraper struct { Queues []string `yaml:"queues"` Concurrency int `yaml:"concurrency"` NewWebArticleJobs []FaktoryJob `yaml:"new_web_article_jobs"` LanguageFilter []string `yaml:"language_filter"` RequestTimeout time.Duration `yaml:"request_timeout"` UserAgent string `yaml:"user_agent"` LogLevel LogLevel `yaml:"loglevel"` }
WebScraper holds settings for the WebScraper worker.
type Workers ¶
type Workers struct { FeedFetcher FeedFetcher `yaml:"feed_fetcher"` TwitterScraper TwitterScraper `yaml:"twitter_scraper"` WebScraper WebScraper `yaml:"web_scraper"` Translator Translator `yaml:"translator"` ZeroShotClassifier ZeroShotClassifier `yaml:"zero_shot_classifier"` TextClassifier TextClassifier `yaml:"text_classifier"` GeoParser GeoParser `yaml:"geo_parser"` Vectorizer Vectorizer `yaml:"vectorizer"` DuplicateDetector DuplicateDetector `yaml:"duplicate_detector"` InformationExtractor InformationExtractor `yaml:"information_extractor"` }
Workers holds settings for the various workers.
type ZeroShotClassifier ¶
type ZeroShotClassifier struct { Queues []string `yaml:"queues"` Concurrency int `yaml:"concurrency"` ProcessedWebArticleJobs []FaktoryJob `yaml:"processed_web_article_jobs"` SpagoBARTServer GRPCServer `yaml:"spago_bart_server"` LogLevel LogLevel `yaml:"loglevel"` }
ZeroShotClassifier holds settings for the zero-shot classifier worker.