config

package
v1.0.0-beta.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 20, 2021 License: BSD-2-Clause Imports: 8 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Config

type Config struct {
	DB      DB      `yaml:"db"`
	Faktory Faktory `yaml:"faktory"`
	HNSW    HNSW    `yaml:"hnsw"`
	Server  Server  `yaml:"server"`
	Tasks   Tasks   `yaml:"tasks"`
	Workers Workers `yaml:"workers"`
}

Config holds whatsnew application-wide configuration settings.

func FromYAMLFile

func FromYAMLFile(filename string) (*Config, error)

FromYAMLFile reads a Config object from a YAML file.

Before being decoded, the whole YAML file content is passed through os.ExpandEnv.

type DB

type DB struct {
	// DSN, dbname excluded.
	DSN      string     `yaml:"dsn"`
	DBName   string     `yaml:"dbname"`
	LogLevel DBLogLevel `yaml:"loglevel"`
}

DB holds database settings.

type DBLogLevel

type DBLogLevel gormlogger.LogLevel

DBLogLevel is a redefinition of GORM logger.LogLevel which satisfies encoding.TextUnmarshaler, to be conveniently parsed from YAML.

func (*DBLogLevel) UnmarshalText

func (l *DBLogLevel) UnmarshalText(text []byte) error

UnmarshalText satisfies the encoding.TextUnmarshaler interface, unmarshaling the text to a DBLogLevel.

type DuplicateDetector

type DuplicateDetector struct {
	Queues                     []string     `yaml:"queues"`
	TimeframeDays              int          `yaml:"timeframe_days"`
	DistanceThreshold          float32      `yaml:"distance_threshold"`
	NonDuplicateWebArticleJobs []FaktoryJob `yaml:"non_duplicate_web_article_jobs"`
	DuplicateWebArticleJobs    []FaktoryJob `yaml:"duplicate_web_article_jobs"`
	LogLevel                   LogLevel     `yaml:"loglevel"`
}

DuplicateDetector holds settings for the duplicate detector worker.

type Faktory

type Faktory struct {
	URL      string   `yaml:"url"`
	LogLevel LogLevel `yaml:"loglevel"`
}

Faktory holds Faktory settings and generic workers properties.

type FaktoryJob

type FaktoryJob struct {
	JobType    string `yaml:"job_type"`
	Queue      string `yaml:"queue"`
	ReserveFor int    `yaml:"reserve_for"`
	Retry      int    `yaml:"retry"`
}

FaktoryJob describes a Faktory job to be scheduled for execution.

type FeedFetcher

type FeedFetcher struct {
	Queues                   []string                 `yaml:"queues"`
	Concurrency              int                      `yaml:"concurrency"`
	NewWebResourceJobs       []FaktoryJob             `yaml:"new_web_resource_jobs"`
	MaxAllowedFailures       int                      `yaml:"max_allowed_failures"`
	OmitItemsPublishedBefore OmitItemsPublishedBefore `yaml:"omit_items_published_before"`
	LanguageFilter           []string                 `yaml:"language_filter"`
	LogLevel                 LogLevel                 `yaml:"loglevel"`
}

FeedFetcher holds settings for the FeedFetcher worker.

type FeedScheduler

type FeedScheduler struct {
	TimeInterval time.Duration `yaml:"time_interval"`
	Jobs         []FaktoryJob  `yaml:"jobs"`
	LogLevel     LogLevel      `yaml:"loglevel"`
}

FeedScheduler holds settings for scheduling feeds for further processing.

type GDELTFetcher

type GDELTFetcher struct {
	TimeInterval           time.Duration `yaml:"time_interval"`
	EventRootCodeWhitelist []string      `yaml:"event_root_code_whitelist"`
	NewWebResourceJobs     []FaktoryJob  `yaml:"new_web_resource_jobs"`
	LogLevel               LogLevel      `yaml:"loglevel"`
}

GDELTFetcher holds settings for fetching GDELT events and extracting news report URLs for further processing.

type GRPCServer

type GRPCServer struct {
	Target     string `yaml:"target"`
	TLSEnabled bool   `yaml:"tls_enabled"`
}

GRPCServer holds common settings for connecting to a gRPC server.

type GeoParser

type GeoParser struct {
	Queues                  []string     `yaml:"queues"`
	Concurrency             int          `yaml:"concurrency"`
	ProcessedWebArticleJobs []FaktoryJob `yaml:"processed_web_article_jobs"`
	CliffURI                string       `yaml:"cliff_uri"`
	LogLevel                LogLevel     `yaml:"loglevel"`
}

GeoParser holds settings for the geo-parser worker.

type HNSW

type HNSW struct {
	Server GRPCServer `yaml:"server"`
	Index  HNSWIndex  `yaml:"index"`
}

HNSW holds settings for connecting to HNSW server and handling vector indices.

type HNSWIndex

type HNSWIndex struct {
	NamePrefix     string        `yaml:"name_prefix"`
	Dim            int32         `yaml:"dim"`
	EfConstruction int32         `yaml:"ef_construction"`
	M              int32         `yaml:"m"`
	MaxElements    int32         `yaml:"max_elements"`
	Seed           int32         `yaml:"seed"`
	SpaceType      HNSWSpaceType `yaml:"space_type"`
}

HNSWIndex holds settings for HNSW vector indices.

type HNSWPurger

type HNSWPurger struct {
	TimeInterval               time.Duration `yaml:"time_interval"`
	DeleteIndicesOlderThanDays int           `yaml:"delete_indices_older_than_days"`
	LogLevel                   LogLevel      `yaml:"loglevel"`
}

HNSWPurger holds settings for the periodic deletion of old HNSW indices.

type HNSWSpaceType

HNSWSpaceType is a redefinition of HNSW gRPC API CreateIndexRequest_SpaceType which satisfies encoding.TextUnmarshaler, to be conveniently parsed from YAML.

func (*HNSWSpaceType) UnmarshalText

func (hst *HNSWSpaceType) UnmarshalText(text []byte) (err error)

UnmarshalText satisfies the encoding.TextUnmarshaler interface, unmarshaling the text to an HNSW gRPC API CreateIndexRequest_SpaceType.

type InformationExtractor

type InformationExtractor struct {
	Queues                  []string     `yaml:"queues"`
	Concurrency             int          `yaml:"concurrency"`
	SpagoBERTServer         GRPCServer   `yaml:"spago_bert_server"`
	ProcessedWebArticleJobs []FaktoryJob `yaml:"processed_web_article_jobs"`
	LogLevel                LogLevel     `yaml:"loglevel"`
}

InformationExtractor holds settings for the information extractor worker.

type JobsRecoverer

type JobsRecoverer struct {
	TimeInterval time.Duration `yaml:"time_interval"`
	LeewayTime   time.Duration `yaml:"leeway_time"`
	LogLevel     LogLevel      `yaml:"loglevel"`
}

JobsRecoverer holds settings for the periodic recovery process of pending jobs.

type LogLevel

type LogLevel zerolog.Level

LogLevel is a redefinition of zerolog.Level which satisfies encoding.TextUnmarshaler, to be conveniently parsed from YAML.

func (*LogLevel) UnmarshalText

func (l *LogLevel) UnmarshalText(text []byte) (err error)

UnmarshalText satisfies the encoding.TextUnmarshaler interface, unmarshaling the text to a LogLevel.

type OmitItemsPublishedBefore

type OmitItemsPublishedBefore struct {
	Enabled bool      `yaml:"enabled"`
	Time    time.Time `yaml:"time"`
}

OmitItemsPublishedBefore is part of FeedFetcher settings.

type Server

type Server struct {
	Address        string   `yaml:"address"`
	TLSEnabled     bool     `yaml:"tls_enabled"`
	TLSCert        string   `yaml:"tls_cert"`
	TLSKey         string   `yaml:"tls_key"`
	AllowedOrigins []string `yaml:"allowed_origins"`
	LogLevel       LogLevel `yaml:"loglevel"`
}

Server holds settings for the HTTP and gRPC server.

type Tasks

type Tasks struct {
	FeedScheduler    FeedScheduler    `yaml:"feed_scheduler"`
	TwitterScheduler TwitterScheduler `yaml:"twitter_scheduler"`
	GDELTFetcher     GDELTFetcher     `yaml:"gdelt_fetcher"`
	JobsRecoverer    JobsRecoverer    `yaml:"jobs_recoverer"`
	HNSWPurger       HNSWPurger       `yaml:"hnsw_purger"`
}

Tasks holds settings for various tasks.

type TextClassifier

type TextClassifier struct {
	Queues                  []string     `yaml:"queues"`
	Concurrency             int          `yaml:"concurrency"`
	ProcessedWebArticleJobs []FaktoryJob `yaml:"processed_web_article_jobs"`
	ClassifierServer        GRPCServer   `yaml:"classifier_server"`
	LogLevel                LogLevel     `yaml:"loglevel"`
}

TextClassifier holds settings for the text classifier worker.

type Translator

type Translator struct {
	Queues                  []string     `yaml:"queues"`
	Concurrency             int          `yaml:"concurrency"`
	TranslatorServer        GRPCServer   `yaml:"translator_server"`
	ProcessedWebArticleJobs []FaktoryJob `yaml:"processed_web_article_jobs"`
	LanguageWhitelist       []string     `yaml:"language_whitelist"`
	TargetLanguage          string       `yaml:"target_language"`
	LogLevel                LogLevel     `yaml:"loglevel"`
}

Translator holds settings for the translator worker.

type TwitterScheduler

type TwitterScheduler struct {
	TimeInterval time.Duration `yaml:"time_interval"`
	Jobs         []FaktoryJob  `yaml:"jobs"`
	LogLevel     LogLevel      `yaml:"loglevel"`
}

TwitterScheduler holds settings for scheduling twitter sources for further processing.

type TwitterScraper

type TwitterScraper struct {
	Queues                    []string                 `yaml:"queues"`
	Concurrency               int                      `yaml:"concurrency"`
	MaxTweetsNumber           int                      `yaml:"max_tweets_number"`
	NewWebArticleJobs         []FaktoryJob             `yaml:"new_web_article_jobs"`
	OmitTweetsPublishedBefore OmitItemsPublishedBefore `yaml:"omit_tweets_published_before"`
	LanguageFilter            []string                 `yaml:"language_filter"`
	LogLevel                  LogLevel                 `yaml:"loglevel"`
}

TwitterScraper holds settings for the TwitterScraper worker.

type Vectorizer

type Vectorizer struct {
	Queues                   []string     `yaml:"queues"`
	Concurrency              int          `yaml:"concurrency"`
	VectorizedWebArticleJobs []FaktoryJob `yaml:"vectorized_web_article_jobs"`
	SpagoBERTServer          GRPCServer   `yaml:"spago_bert_server"`
	LogLevel                 LogLevel     `yaml:"loglevel"`
}

Vectorizer holds settings for the Vectorizer worker.

type WebScraper

type WebScraper struct {
	Queues            []string      `yaml:"queues"`
	Concurrency       int           `yaml:"concurrency"`
	NewWebArticleJobs []FaktoryJob  `yaml:"new_web_article_jobs"`
	LanguageFilter    []string      `yaml:"language_filter"`
	RequestTimeout    time.Duration `yaml:"request_timeout"`
	UserAgent         string        `yaml:"user_agent"`
	LogLevel          LogLevel      `yaml:"loglevel"`
}

WebScraper holds settings for the WebScraper worker.

type Workers

type Workers struct {
	FeedFetcher          FeedFetcher          `yaml:"feed_fetcher"`
	TwitterScraper       TwitterScraper       `yaml:"twitter_scraper"`
	WebScraper           WebScraper           `yaml:"web_scraper"`
	Translator           Translator           `yaml:"translator"`
	ZeroShotClassifier   ZeroShotClassifier   `yaml:"zero_shot_classifier"`
	TextClassifier       TextClassifier       `yaml:"text_classifier"`
	GeoParser            GeoParser            `yaml:"geo_parser"`
	Vectorizer           Vectorizer           `yaml:"vectorizer"`
	DuplicateDetector    DuplicateDetector    `yaml:"duplicate_detector"`
	InformationExtractor InformationExtractor `yaml:"information_extractor"`
}

Workers holds settings for the various workers.

type ZeroShotClassifier

type ZeroShotClassifier struct {
	Queues                  []string     `yaml:"queues"`
	Concurrency             int          `yaml:"concurrency"`
	ProcessedWebArticleJobs []FaktoryJob `yaml:"processed_web_article_jobs"`
	SpagoBARTServer         GRPCServer   `yaml:"spago_bart_server"`
	LogLevel                LogLevel     `yaml:"loglevel"`
}

ZeroShotClassifier holds settings for the zero-shot classifier worker.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL