Documentation
¶
Index ¶
- Constants
- Variables
- func AddRedacted(newRedacted string, useMutex bool)
- func CSVHeader() []string
- func DayStart(dt time.Time) time.Time
- func EndpointIncluded(ctx *Ctx, ep *RawEndpoint, origin string) (bool, int)
- func EnsureIndex(ctx *Ctx, index string, init bool)
- func EsLog(ctx *Ctx, msg string, dt time.Time) error
- func ExecCommand(ctx *Ctx, cmdAndArgs []string, env map[string]string, tmout *time.Duration) (string, error)
- func FatalNoLog(err error) string
- func FatalOnError(err error) string
- func Fatalf(f string, a ...interface{})
- func FilterRedacted(str string) string
- func GHClient(ctx *Ctx) (ghCtx context.Context, clients []*github.Client)
- func GHClientForKeys(oAuths map[string]struct{}) (ghCtx context.Context, clients []*github.Client)
- func GetAPIToken() (string, error)
- func GetDockerHubRepos(ctx *Ctx, dockerhubOwner string) (repos []string, err error)
- func GetFixtures(ctx *Ctx, path string) (fixtures []string)
- func GetGerritRepos(ctx *Ctx, gerritURL string) (projects, repos []string, err error)
- func GetRateLimits(gctx context.Context, ctx *Ctx, gcs []*github.Client, core bool) (int, []int, []int, []time.Duration)
- func GetRedacted() (str string)
- func GetRocketChatChannels(ctx *Ctx, srv, token, uid string) (channels []string, err error)
- func GetSlackBotUsersConversation(ctx *Ctx, token string) (ids, channels []string, err error)
- func GetThreadsNum(ctx *Ctx) int
- func GroupIncluded(ctx *Ctx, gc *GroupConfig, origin string) bool
- func Hash(str string, nodeIdx, nodeNum int) (int, bool)
- func HourStart(dt time.Time) time.Time
- func InitializeAuth0() error
- func IsRedacted(name string) bool
- func MonthStart(dt time.Time) time.Time
- func NextDayStart(dt time.Time) time.Time
- func NextHourStart(dt time.Time) time.Time
- func NextMonthStart(dt time.Time) time.Time
- func NextQuarterStart(dt time.Time) time.Time
- func NextWeekStart(dt time.Time) time.Time
- func NextYearStart(dt time.Time) time.Time
- func PeriodParse(perStr string) (dur time.Duration, ok bool)
- func PrevDayStart(dt time.Time) time.Time
- func PrevHourStart(dt time.Time) time.Time
- func PrevMonthStart(dt time.Time) time.Time
- func PrevQuarterStart(dt time.Time) time.Time
- func PrevWeekStart(dt time.Time) time.Time
- func PrevYearStart(dt time.Time) time.Time
- func PrintLogf(format string, args ...interface{}) (err error)
- func Printf(format string, args ...interface{}) (n int, err error)
- func PrintfRedacted(format string, args ...interface{}) (n int, err error)
- func ProgressInfo(i, n int, start time.Time, last *time.Time, period time.Duration, msg string)
- func QuarterStart(dt time.Time) time.Time
- func SafeString(str string) string
- func StringToBool(v string) bool
- func TimeParseAny(dtStr string) time.Time
- func ToYMDDate(dt time.Time) string
- func ToYMDHMSDate(dt time.Time) string
- func WeekStart(dt time.Time) time.Time
- func YearStart(dt time.Time) time.Time
- type Alias
- type AliasView
- type ColumnCondition
- type Config
- type CopyConfig
- type Ctx
- type DataSource
- type DockerHubData
- type DockerHubResults
- type Endpoint
- type EndpointProject
- type EsAlias
- type EsBulkItemStatus
- type EsBulkResult
- type EsBulkResultItem
- type EsByQueryPayload
- type EsIndex
- type EsIndexSettings
- type EsIndexSettingsPayload
- type EsLastRunPayload
- type EsLogPayload
- type EsMtxPayload
- type EsSearchPayload
- type EsSearchQuery
- type EsSearchQueryString
- type EsSearchResultHit
- type EsSearchResultHits
- type EsSearchResultPayload
- type EsSearchResultSource
- type EsSearchScrollPayload
- type EsSyncInfoPayload
- type EsUpdateByQueryPayload
- type Fixture
- type GroupConfig
- type MetaDataSource
- type MetaWorkingGroup
- type Metadata
- type MultiConfig
- type Native
- type Project
- type RawEndpoint
- type Task
- type TaskMtx
- type TaskResult
- type WGDataSource
Constants ¶
const APIToken string = "api-token"
APIToken - constant string
const BackendPassword string = "backend-password"
BackendPassword - backend-password
const BackendUser string = "backend-user"
BackendUser - backend-user
const Bitergia string = "bitergia"
Bitergia - bitergia
const Bugzilla string = "bugzilla"
Bugzilla - bugzilla
const BugzillaRest string = "bugzillarest"
BugzillaRest - bugzillarest (requires Bugzilla 5.X)
const Confluence string = "confluence"
Confluence - confluence
const CopyFromDateField = "metadata__enriched_on" // Date when the item was enriched and stored in the index with enriched documents. (currently best IMHO - LG)
CopyFromDateField - field used to find most recent document and start copying from datetime from that field
const DADS string = "dads"
DADS - config flag in the fixture that allows selecting when to run dads instead of p2o
const DadsException string = "DA_DS_ERROR(time="
DadsException - string that identified dads exception
const DadsWarning string = "da-ds WARNING"
DadsWarning - string that identified dads exception
const Delete string = "DELETE"
Delete - DELETE
const Discourse string = "discourse"
Discourse - discourse
const DockerHub string = "dockerhub"
DockerHub - dockerhub
const Email string = "email"
Email - email
const External string = "external"
External - external
const FromDate string = "from-date"
FromDate - from-date
const Gerrit string = "gerrit"
Gerrit - gerrit
const Get string = "GET"
Get - GET
const Git string = "git"
Git - git
const GitHub string = "github"
GitHub - github
const GitHubOrg string = "github_org"
GitHubOrg - github_org
const GitHubUser string = "github_user"
GitHubUser - github_user
const GoogleGroups string = "googlegroups"
GoogleGroups data source
const GroupsIO string = "groupsio"
GroupsIO - groupsio
const Head string = "HEAD"
Head - HEAD
const Jenkins string = "jenkins"
Jenkins - jenkins
const Jira string = "jira"
Jira - jira
const Locked string = "locked"
Locked - locked
const MeetUp string = "meetup"
MeetUp - meetup
const Nil string = "<nil>"
Nil - used to specify an empty environment variable in the fixture (fo dads)
const Null string = "(null)"
Null - used to specify a null value
const OK string = "ok"
OK - common constant string
const Password string = "password"
Password - password
const Pipermail string = "pipermail"
Pipermail - pipermail
const Post string = "POST"
Post - POST
const ProjectNoOrigin string = "--no-origin--"
ProjectNoOrigin - special marker to set project on all index data
const Put string = "PUT"
Put - PUT
const PyException string = "Traceback (most recent call last)"
PyException - string that identified python exception
const Redacted string = "[redacted]"
Redacted - [redacted]
const RocketChat string = "rocketchat"
RocketChat - rocketchat
const SDSMtx string = "sdsmtx"
SDSMtx - sdsmtx
const SSHKey string = "ssh-key"
SSHKey - constant string
const SearchScroll string = "/_search/scroll"
SearchScroll - /_search/scroll
const Slack string = "slack"
Slack - slack
const Unlocked string = "unlocked"
Unlocked - unlocked
const User string = "user"
User - user
const UserID string = "user-id"
UserID - user-id
Variables ¶
var ( // GRedactedStrings - need to be global, to redact them from error logs GRedactedStrings map[string]struct{} // GRedactedMtx - guard access to this map while in MT GRedactedMtx *sync.RWMutex )
var ErrorStrings = map[int]string{
-3: "task was not executed due to frequency check",
-2: "task is configured as a copy from another index pattern",
-1: "task was skipped",
1: "datasource slug contains > 1 '/' separators",
2: "incorrect endpoint value for given data source",
3: "incorrect config option(s) for given data source",
4: "p2o.py error",
5: "setting SSH private key error",
6: "command timeout error",
7: "index copy error",
}
ErrorStrings - array of possible errors returned from enrich tasks
Functions ¶
func AddRedacted ¶
AddRedacted - adds redacted string
func EndpointIncluded ¶
func EndpointIncluded(ctx *Ctx, ep *RawEndpoint, origin string) (bool, int)
EndpointIncluded - checks if given endpoint's origin should be included or excluded based on endpoint's skip/only regular expressions lists First return value specifies if endpoint is included or not Second value specifies: 1 - included by 'only' condition, 2 - skipped by 'skip' condition
func EnsureIndex ¶
EnsureIndex - ensure that given index exists in ES init: when this flag is set, do not use syncdatasources.Printf which would cause infinite recurence
func ExecCommand ¶
func ExecCommand(ctx *Ctx, cmdAndArgs []string, env map[string]string, tmout *time.Duration) (string, error)
ExecCommand - execute command given by array of strings with eventual environment map
func FatalNoLog ¶
FatalNoLog displays error message (if error present) and exits program, should be used for very early init state
func FatalOnError ¶
FatalOnError displays error message (if error present) and exits program
func Fatalf ¶
func Fatalf(f string, a ...interface{})
Fatalf - it will call FatalOnError using fmt.Errorf with args provided
func FilterRedacted ¶
FilterRedacted - filter out all known redacted starings
func GHClientForKeys ¶
GHClientForKeys - get GitHub client for given keys
func GetAPIToken ¶
GetAPIToken - return an API token to use dev-analytics-api API calls If JWT_TOKEN env is specified - just use the provided token without any checks Else get auth0 data from AUTH0_DATA and generate/reuse a token stored in ES cache
func GetDockerHubRepos ¶
GetDockerHubRepos - return list of repos for given dockerhub server
func GetFixtures ¶
GetFixtures - read all fixture files
func GetGerritRepos ¶
GetGerritRepos - return list of repos for given gerrit server (uses HTML crawler)
func GetRateLimits ¶
func GetRateLimits(gctx context.Context, ctx *Ctx, gcs []*github.Client, core bool) (int, []int, []int, []time.Duration)
GetRateLimits - returns all and remaining API points and duration to wait for reset when core=true - returns Core limits, when core=false returns Search limits
func GetRocketChatChannels ¶
GetRocketChatChannels - return list of channels defined on a given RocketChat server
func GetSlackBotUsersConversation ¶
GetSlackBotUsersConversation - return list of channels (Slack users.converstations API) available for a given slack bot user (specified by a bearer token)
func GetThreadsNum ¶
GetThreadsNum returns the number of available CPUs If environment variable SDS_ST is set it retuns 1 It can be used to debug single threaded verion
func GroupIncluded ¶
func GroupIncluded(ctx *Ctx, gc *GroupConfig, origin string) bool
GroupIncluded - checks if given endpoint's origin matches a given group configuration Return value specifies if endpoint is included or not
func Hash ¶
Hash for given string 'str' calculate hash value and then transform it into [0, nodeNum) number If nodeNum matches nodeIdx then hash is correct for this node, otherwise it isn't
func InitializeAuth0 ¶
func InitializeAuth0() error
InitializeAuth0 - initializes Auth0 client using data stored in AUTH0_DATA
func IsRedacted ¶
IsRedacted - returns whatever "name" config option should be redacted or not
func MonthStart ¶
MonthStart - return time rounded to current month start
func NextDayStart ¶
NextDayStart - return time rounded to next day start
func NextHourStart ¶
NextHourStart - return time rounded to next hour start
func NextMonthStart ¶
NextMonthStart - return time rounded to next month start
func NextQuarterStart ¶
NextQuarterStart - return time rounded to next quarter start
func NextWeekStart ¶
NextWeekStart - return time rounded to next week start
func NextYearStart ¶
NextYearStart - return time rounded to next year start
func PeriodParse ¶
PeriodParse - tries to parse period
func PrevDayStart ¶
PrevDayStart - return time rounded to prev day start
func PrevHourStart ¶
PrevHourStart - return time rounded to prev hour start
func PrevMonthStart ¶
PrevMonthStart - return time rounded to prev month start
func PrevQuarterStart ¶
PrevQuarterStart - return time rounded to prev quarter start
func PrevWeekStart ¶
PrevWeekStart - return time rounded to prev week start
func PrevYearStart ¶
PrevYearStart - return time rounded to prev year start
func PrintfRedacted ¶
PrintfRedacted is a wrapper around fmt.Printf(...) that supports logging.
func ProgressInfo ¶
ProgressInfo display info about progress: i/n if current time >= last + period If displayed info, update last
func QuarterStart ¶
QuarterStart - return time rounded to current month start
func SafeString ¶
SafeString - return safe string without control characters and unicode correct Other options would be to replace non-OK characters with "%HH" - their hexcode ES would understand this
func StringToBool ¶
StringToBool - convert string value to boolean value returns false for anything that was parsed as false, zero, empty etc: f, F, false, False, fALSe, 0, "", 0.00 else returns true
func TimeParseAny ¶
TimeParseAny - attempts to parse time from string YYYY-MM-DD HH:MI:SS Skipping parts from right until only YYYY id left
func ToYMDHMSDate ¶
ToYMDHMSDate - return time formatted as YYYY-MM-DD HH:MI:SS
Types ¶
type Alias ¶
type Alias struct { From string `yaml:"from"` To []string `yaml:"to"` Dedup []string `yaml:"dedup"` Views []AliasView `yaml:"views"` NoEnrich bool `yaml:"no_enrich"` }
Alias conatin indexing aliases data, single index from (source) and list of aliases that should point to that index
type AliasView ¶
type AliasView struct { Name string `yaml:"name"` Filter interface{} `yaml:"filter"` }
AliasView - allows creating "filtered aliases"/"views" API: POST /_aliases '{"actions":[{"add":{"index":"sds-lfn-onap-git-for-merge","alias":"test-lg","filter":{"term":{"project":"CLI"}}}}]}'
type ColumnCondition ¶
ColumnCondition - holds single must or must_not condition for setting project witing a single endpoint
type Config ¶
type Config struct { Name string `yaml:"name"` Value string `yaml:"value"` Flags map[string]string `yaml:"flags"` }
Config holds data source config options
func (Config) RedactedString ¶
RedactedString - redacted string output
type CopyConfig ¶
type CopyConfig struct { Pattern string `yaml:"pattern"` Incremental bool `yaml:"incremental"` // if set, data will be copied since the most recent data already copied, so you can use no_origin to specify how to copy data // if not set, every copy operation will overwrite all data in destination index (which is the default) NoOrigin bool `yaml:"no_origin"` // skip checking origin when calculating start date to copy // if no_origin is set, then copying will start from the date of the last document stored in the destination index // (can be used when the source has multiple origins or origin(s) different than endpoint's origin) // if no_origin is not set it will query destination index for origin of the destination endpoint // and will start copying source -> dest from that date (this is the default) Must []ColumnCondition `yaml:"must"` MustNot []ColumnCondition `yaml:"must_not"` }
CopyConfig - holds data related to copy from other index configuration
type Ctx ¶
type Ctx struct { Debug int // From SDS_DEBUG Debug level: 0-no, 1-info, 2-verbose CmdDebug int // From SDS_CMDDEBUG Commands execution Debug level: 0-no, 1-only output commands, 2-output commands and their output, 3-output full environment as well, default 0 MaxRetry int // From SDS_MAXRETRY Try to run grimoire stack (perceval, p2o.py etc) that many times before reporting failure, default 0 (1 original - always runs and 0 more attempts). ST bool // From SDS_ST true: use single threaded version, false: use multi threaded version, default false NCPUs int // From SDS_NCPUS, set to override number of CPUs to run, this overwrites SDS_ST, default 0 (which means do not use it, use all CPU reported by go library) NCPUsScale float64 // From SDS_NCPUS_SCALE, scale number of CPUs, for example 2.0 will report number of cpus 2.0 the number of actually available CPUs FixturesRE *regexp.Regexp // From SDS_FIXTURES_RE - you can set regular expression specifying which fixtures should be processed, default empty which means all. DatasourcesRE *regexp.Regexp // From SDS_DATASOURCES_RE - you can set regular expression specifying which datasources should be processed, default empty which means all. ProjectsRE *regexp.Regexp // From SDS_PROJECTS_RE - you can set regular expression specifying which projects/subprojects should be processed, default empty which means all. EndpointsRE *regexp.Regexp // From SDS_ENDPOINTS_RE - you can set regular expression specifying which endpoints/origins should be processed, default empty which means all. TasksRE *regexp.Regexp // From SDS_TASKS_RE - you can set regular expression specifying which tasks should be processed, default empty which means all, exampel task is "sds-lfn-onap-slack:SLACK_CHAN_ID" FixturesSkipRE *regexp.Regexp // From SDS_FIXTURES_SKIP_RE - you can set regular expression specifying which fixtures should be skipped, default empty which means none. DatasourcesSkipRE *regexp.Regexp // From SDS_DATASOURCES_SKIP_RE - you can set regular expression specifying which datasources should be skipped, default empty which means none. ProjectsSkipRE *regexp.Regexp // From SDS_PROJECTS_SKIP_RE - you can set regular expression specifying which projects/subprojects should be slkipped, default empty which means none. EndpointsSkipRE *regexp.Regexp // From SDS_ENDPOINTS_SKIP_RE - you can set regular expression specifying which endpoints/origins should be skipped, default empty which means none. TasksSkipRE *regexp.Regexp // From SDS_TASKS_SKIP_RE - you can set regular expression specifying which tasks should be skipped, default empty which means none. CtxOut bool // From SDS_CTXOUT output all context data (this struct), default false LogTime bool // From SDS_SKIPTIME, output time with all lib.Printf(...) calls, default true, use SDS_SKIPTIME to disable ExecFatal bool // default true, set this manually to false to avoid lib.ExecCommand calling os.Exit() on failure and return error instead ExecQuiet bool // default false, set this manually to true to have quiet exec failures ExecOutput bool // default false, set to true to capture commands STDOUT ExecOutputStderr bool // default false, set to true to capture commands STDOUT ElasticURL string // From SDS_ES_URL, ElasticSearch URL, default http://127.0.0.1:9200 EsBulkSize int // From SDS_ES_BULKSIZE, ElasticSearch bulk size when enriching data, defaults to 0 which means "not specified" (10000) NodeHash bool // From SDS_NODE_HASH, if set it will generate hashes for each task and only execute them when node number matches hash result NodeNum int // From SDS_NODE_NUM, set number of nodes, so hashing function will return [0, ... n) NodeIdx int // From SDS_NODE_IDX, set number of current node, so only hashes matching this node will run NodeSettleTime int // From SDS_NODE_SETTLE_TIME, number of seconds that master gives nodes to start-up and wait for ES mutex9es) to sync with master node, default 10 (in seconds) DryRun bool // From SDS_DRY_RUN, if set it will do everything excluding actual grimoire stack execution (will report success for all commands instead) DryRunCode int // From SDS_DRY_RUN_CODE, dry run exit code, default 0 which means success, possible values 1, 2, 3, 4 DryRunCodeRandom bool // From SDS_DRY_RUN_CODE_RANDOM, dry run exit code, will return random value from 0 to 5 DryRunSeconds int // From SDS_DRY_RUN_SECONDS, simulate each dry run command taking some time to execute DryRunSecondsRandom bool // From SDS_DRY_RUN_SECONDS_RANDOM, make running time from 0 to SDS_DRY_RUN_SECONDS (in ms resolution) DryRunAllowSSH bool // From SDS_DRY_RUN_ALLOW_SSH, if set it will allow setting SSH keys in dry run mode DryRunAllowFreq bool // From SDS_DRY_RUN_ALLOW_FREQ, if set it will allow processing sync frequency data in dry run mode DryRunAllowMtx bool // From SDS_DRY_RUN_ALLOW_MTX, if set it will allow handling ES mutexes (for nodes concurrency support) in dry run mode DryRunAllowRename bool // From SDS_DRY_RUN_ALLOW_RENAME, if set it will allow handling ES index renaming in dry run mode DryRunAllowOrigins bool // From SDS_DRY_RUN_ALLOW_ORIGINS, if set it will allow fetching external indices origins list in dry run mode DryRunAllowDedup bool // From SDS_DRY_RUN_ALLOW_DEDUP, if set it will allow dedup bitergia data by deleting origins shared with existing SDS indices DryRunAllowFAliases bool // From SDS_DRY_RUN_ALLOW_F_ALIASES, if set it will allow creating/maintaining foundaion-f aliases in dry run mode DryRunAllowProject bool // From SDS_DRY_RUN_ALLOW_PROJECT, if set it will allow running set project by SDS (on endpoints with project set and p2o mode set to false) DryRunAllowSyncInfo bool // From SDS_DRY_RUN_ALLOW_SYNC_INFO, if set it will allow setting sync info in sds-sync-info index DryRunAllowSortDuration bool // From SDS_DRY_RUN_ALLOW_SORT_DURATION, if set it will allow setting sync info in sds-sync-info index DryRunAllowMerge bool // From SDS_DRY_RUN_ALLOW_MERGE, if set it will allow calling DA-affiliation merge_all API after all tasks finished in dry run mode DryRunAllowHideEmails bool // From SDS_DRY_RUN_ALLOW_HIDE_EMAILS, if set it will allow calling DA-affiliation hide_emails API in dry run mode DryRunAllowCacheTopContributors bool // From SDS_DRY_RUN_ALLOW_CACHE_TOP_CONTRIBUTORS, if set it will allow calling DA-affiliation cache_top_contributors API in dry run mode DryRunAllowOrgMap bool // From SDS_DRY_RUN_ALLOW_ORG_MAP, if set it will allow calling DA-affiliation map_org_names API in dry run mode DryRunAllowEnrichDS bool // From SDS_DRY_RUN_ALLOW_ENRICH_DS, if set it will allow calling DA-metrics enrich API in dry run mode DryRunAllowDetAffRange bool // From SDS_DRY_RUN_ALLOW_DET_AFF_RANGE, if set it will allow calling DA-affiliation det_aff_range API in dry run mode DryRunAllowCopyFrom bool // From SDS_DRY_RUN_ALLOW_COPY_FROM, if set it will allow copy index in dry run mode DryRunAllowMetadata bool // From SDS_DRY_RUN_ALLOW_METADATA, if set it will allow processing fixture metadata in dry run mode TimeoutSeconds int // From SDS_TIMEOUT_SECONDS, set entire program execution timeout, program will finish with return code 2 if anything still runs after this time, default 47 h 45 min = 258660 TaskTimeoutSeconds int // From SDS_TASK_TIMEOUT_SECONDS, set single p2o.py task execution timeout, default is 86400s (10 hours) NLongest int // From SDS_N_LONGEST, number of longest running tasks to display in stats, default 30 SkipSH bool // From SDS_SKIP_SH, if set sorting hata database processing will be skipped SkipData bool // From SDS_SKIP_DATA, if set - it will not run incremental data sync SkipAffs bool // From SDS_SKIP_AFFS, if set - it will not run p2o.py historical affiliations enrichment (--only-enrich --refresh-identities --no_incremental) SkipAliases bool // From SDS_SKIP_ALIASES, if set - sds will not attempt to create index aliases and will not attempt to drop unused aliases SkipDropUnused bool // From SDS_SKIP_DROP_UNUSED, if set - it will not attempt to drop unused indexes and aliases NoIndexDrop bool // From SDS_NO_INDEX_DROP, if set - it will warning about index drop needed instead of actual index drop SkipCheckFreq bool // From SDS_SKIP_CHECK_FREQ, will skip maximum task sync frequency if set SkipEsData bool // From SDS_SKIP_ES_DATA, will totally skip anything related to "sdsdata" index processing (storing SDS state) SkipEsLog bool // From SDS_SKIP_ES_LOG, will skip writing logs to "sdslog" index SkipDedup bool // From SDS_SKIP_DEDUP, will skip attemting to dedup data shared on existing SDS index and external bitergia index (by deleting shared origin data from the external Bitergia index) SkipFAliases bool // From SDS_SKIP_F_ALIASES, will skip attemting to create/maintain oundation-f aliases SkipExternal bool // From SDS_SKIP_EXTERNAL, will skip any external indices processing: enrichments, deduplication, affiliations etc. SkipProject bool // From SDS_SKIP_PROJECT, will skip adding column "project": "project name" on all documents where origin = endpoint name, will also add timestamp column "project_ts", so next run can start on documents newer than that SkipProjectTS bool // From SDS_SKIP_PROJECT_TS, will add project column as described above, without using "project_ts" column to determine from which document to start SkipSyncInfo bool // From SDS_SKIP_SYNC_INFO, will skip adding sync info to sds-sync-info index SkipValGitHubAPI bool // From SDS_SKIP_VALIDATE_GITHUB_API, will not process GitHub orgs/users in validate step (will not attempt to get org's/user's repo lists) SkipSortDuration bool // From SDS_SKIP_SORT_DURATION, if set - it will skip tasks run order by last running time duration desc SkipMerge bool // From SDS_SKIP_MERGE, if set - it will skip calling DA-affiliation merge_all API after all tasks finished SkipHideEmails bool // From SDS_SKIP_HIDE_EMAILS, if set - it will skip calling DA-affiliation hide_emails API SkipMetadata bool // From SDS_SKIP_METADATA, if set - it will skip processing fixture metadata SkipCacheTopContributors bool // From SDS_SKIP_CACHE_TOP_CONTRIBUTORS, if set - it will skip calling DA-affiliation cache_top_contributors API SkipOrgMap bool // From SDS_SKIP_ORG_MAP, if set - it will skip calling DA-affiliation map_org_name API SkipEnrichDS bool // From SDS_SKIP_ENRICH_DS, if set - it will skip calling DA-matrics enrich API SkipCopyFrom bool // From SDS_SKIP_COPY_FROM, if set - it will skip copying index feature RunDetAffRange bool // From SDS_RUN_DET_AFF_RANGE, if set - it will call DA-affiliation det_aff_range API (this is a very resource intensive API) SkipP2O bool // From SDS_SKIP_P2O, if set - it will skip all p2o tasks and execute everything else StripErrorSize int // From SDS_STRIP_ERROR_SIZE, default 16384, error messages longer that this value will be stripped by this value from beginning and from end, so for 16384 error 64000 bytes long will be 16384 bytes from the beginning \n(...)\n 16384 from the end GitHubOAuth string // From SDS_GITHUB_OAUTH, if not set it attempts to use public access, if contains "/" it will assume that it contains file name, if "," found then it will assume that this is a list of OAuth tokens instead of just one LatestItems bool // From SDS_LATEST_ITEMS, if set pass "latest items" or similar flag to the p2o.py backend (that should be handled by p2o.py using ES, so this is probably not a good ide, git backend, for example, can return no data then) CSVPrefix string // From SDS_CSV_PREFIX, CSV logs filename prefix, default "jobs", so files would be "/root/.perceval/jobs_I_N.csv" Silent bool // From SDS_SILENT, skip p2o.py debug mode if set, else it will pass "-g" flag to 'p2o.py' call NoMultiAliases bool // From SDS_NO_MULTI_ALIASES, if set alias can only be defined for single index, so only one index maps to any alias, if not defined multiple input indexies can be accessed through a single alias (so it can have data from more than 1 p2o.py call) CleanupAliases bool // From SDS_CLEANUP_ALIASES, will delete all aliases before creating them (so it can delete old indexes that were pointed by given alias before adding new indexes to it (single or multiple)) ScrollWait int // From SDS_SCROLL_WAIT, will pass 'p2o.py' '--scroll-wait=N' if set - this is to specify time to wait for available scrolls (in seconds), default 2700 (45 minutes) ScrollSize int // From SDS_SCROLL_SIZE, ElasticSearch scroll size when enriching data, default 500 MaxDeleteTrials int // From SDS_MAX_DELETE_TRIALS, default 10 MaxMtxWait int // From SDS_MAX_MTX_WAIT, in seconds, default 900s MaxMtxWaitFatal bool // From SDS_MAX_MTX_WAIT_FATAL, exit with error when waiting for mutex is more than configured amount of time EnrichExternalFreq time.Duration // From SDS_ENRICH_EXTERNAL_FREQ, how often enrich external indexes, default is 168h (7 days, week) which means no more often than 168h. OnlyValidate bool // From SDS_ONLY_VALIDATE, if defined, SDS will only validate fixtures and exit 0 if all of them are valide, non-zero + error message otherwise OnlyP2O bool // From SDS_ONLY_P2O, if defined, SDS will only run p2o tasks, will not do anything else. SkipReenrich string // From SDS_SKIP_REENRICH, list of backend types where re-enrich phase is not needed, because they always fetch full data (don't support incremental updates), probably we can specify "jira,gerrit,confluence,bugzilla" AffiliationAPIURL string // From AFFILIATION_API_URL - DA affiliations API url Auth0Data string // From AUTH0_DATA - auth0 data for da-ds (can be different than SDS auth0 data) - it's a stringified JSON MetricsAPIURL string // From METRICS_API_URL - DA metrics API url Auth0URL string // From AUTH0_URL: Auth0 parameters for obtaining DA-affiliation API token Auth0Audience string // From AUTH0_AUDIENCE Auth0ClientID string // From AUTH0_CLIENT_ID Auth0ClientSecret string // From AUTH0_CLIENT_SECRET Auth0GrantType string // From AUTH0_GRANT_TYPE ShUser string // From SH_USER: Sorting Hat database parameters ShHost string // From SH_HOST ShPort string // From SH_PORT ShPass string // From SH_PASS ShDB string // From SH_DB TestMode bool // True when running tests OAuthKeys []string // GitHub oauth keys recevide from SDS_GITHUB_OAUTH configuration (initialized only when lib.GHClient() is called) DynamicOAuth bool // From SDS_DYNAMIC_OAUTH - instead of getting OAuth keys once, get the dynamically every time they're passed to subcommand da-ds/p2o.py GapURL string // Data gab handelar api url Retries string // number of retries to insert into elastic Delay string // duration between each retry Environment string // From ENVIRONMENT AwsDefaultRegion string // From AWS_DEFAULT_REGION AwsAccessKeyID string // From AWS_ACCESS_KEY_ID AwsSecretAccessKey string // From AWS_SECRET_ACCESS_KEY LeFromAddr string // FROM LE_FROMADDR LePassword string // FROM LE_PASSWORD LeToAddrs string // FROM LE_TOADDRS }
Ctx - environment context packed in structure
type DataSource ¶
type DataSource struct { Slug string `yaml:"slug"` Config []Config `yaml:"config"` MaxFrequency string `yaml:"max_frequency"` Projects []Project `yaml:"projects"` RawEndpoints []RawEndpoint `yaml:"endpoints"` HistEndpoints []RawEndpoint `yaml:"historical_endpoints"` IndexSuffix string `yaml:"index_suffix"` Endpoints []Endpoint `yaml:"-"` MaxFreq time.Duration `yaml:"-"` FullSlug string `yaml:"-"` Settings *interface{} `yaml:"settings"` }
DataSource contains data source spec from dev-analytics-api
func (DataSource) Configs ¶
func (ds DataSource) Configs() string
Configs - return redacted configs as a string
func (DataSource) String ¶
func (ds DataSource) String() string
type DockerHubData ¶
type DockerHubData struct { Count int `json:"count"` Next string `json:"next"` Results []DockerHubResults `json:"results"` }
DockerHubData - docker hub response format
type DockerHubResults ¶
DockerHubResults - holds user data
type Endpoint ¶
type Endpoint struct { Name string // Endpoint name Project string // optional project (allows groupping endpoints), for example "Project value" ProjectP2O bool // if true SDS will pass `--project "Project value"` to p2o.py // if false, SDS will post-process index and will add `"project": "Project value"` // column where `"origin": "Endpoint name"` ProjectNoOrigin bool Timeout time.Duration // specifies maximum running time for a given endpoint (if specified) CopyFrom CopyConfig // specifies optional 'copy_from' configuration AffiliationSource string Projects []EndpointProject PairProgramming bool Dummy bool // used to mark that there is endpoint, but nothing should be done for it Groups []GroupConfig }
Endpoint holds data source endpoint (final endpoint generated from RawEndpoint)
type EndpointProject ¶
type EndpointProject struct { Name string `yaml:"name"` Origin string `yaml:"origin"` Must []ColumnCondition `yaml:"must"` MustNot []ColumnCondition `yaml:"must_not"` }
EndpointProject - holds data for a single sub-endpoint project configuration
type EsBulkItemStatus ¶
type EsBulkItemStatus struct { Status int `json:"status"` Error interface{} `json:"error"` }
EsBulkItemStatus - status
type EsBulkResult ¶
type EsBulkResult struct {
Items []EsBulkResultItem `json:"items"`
}
EsBulkResult - item statuses
type EsBulkResultItem ¶
type EsBulkResultItem struct {
Index EsBulkItemStatus `json:"index"`
}
EsBulkResultItem - index status
type EsByQueryPayload ¶
EsByQueryPayload - update/delete by query result payload
type EsIndex ¶
type EsIndex struct {
Index string `json:"index"`
}
EsIndex - keeps index data as returned by ElasticSearch
type EsIndexSettings ¶
type EsIndexSettings struct {
IndexBlocksWrite *bool `json:"index.blocks.write"`
}
EsIndexSettings - index settings
type EsIndexSettingsPayload ¶
type EsIndexSettingsPayload struct {
Settings EsIndexSettings `json:"settings"`
}
EsIndexSettingsPayload - index settings payload
type EsLastRunPayload ¶
type EsLastRunPayload struct { Index string `json:"index"` Endpoint string `json:"endpoint"` Type string `json:"type"` Dt time.Time `json:"dt"` }
EsLastRunPayload - last run support
type EsLogPayload ¶
EsLogPayload - ES log single document
type EsMtxPayload ¶
EsMtxPayload - ES mutex support (for locking concurrent nodes)
type EsSearchPayload ¶
type EsSearchPayload struct {
Query EsSearchQuery `json:"query"`
}
EsSearchPayload - ES search payload
type EsSearchQuery ¶
type EsSearchQuery struct {
QueryString EsSearchQueryString `json:"query_string"`
}
EsSearchQuery - ES search query
type EsSearchQueryString ¶
type EsSearchQueryString struct {
Query string `json:"query"`
}
EsSearchQueryString - ES search query string
type EsSearchResultHit ¶
type EsSearchResultHit struct { Source EsSearchResultSource `json:"_source"` ID string `json:"_id"` }
EsSearchResultHit - search result single hit
type EsSearchResultHits ¶
type EsSearchResultHits struct {
Hits []EsSearchResultHit `json:"hits"`
}
EsSearchResultHits - search result hits
type EsSearchResultPayload ¶
type EsSearchResultPayload struct { Hits EsSearchResultHits `json:"hits"` Aggregations interface{} `json:"aggregations"` }
EsSearchResultPayload - search result payload
type EsSearchResultSource ¶
type EsSearchResultSource struct { Index string `json:"index"` Endpoint string `json:"endpoint"` Type string `json:"type"` Mtx string `json:"mtx"` Dt time.Time `json:"dt"` ProjectTS int64 `json:"project_ts"` MDTimestamp time.Time `json:"metadata__timestamp"` MDEnrichedOn time.Time `json:"metadata__enriched_on"` MDUpdatedOn time.Time `json:"metadata__updated_on"` GrimoireCreationDate time.Time `json:"grimoire_creation_date"` }
EsSearchResultSource - search result single hit's source document
type EsSearchScrollPayload ¶
type EsSearchScrollPayload struct {
ScrollID string `json:"_scroll_id"`
}
EsSearchScrollPayload - search scroll result payload
type EsSyncInfoPayload ¶
type EsSyncInfoPayload struct { Index string `json:"index"` Endpoint string `json:"endpoint"` Dt time.Time `json:"dt"` DataSyncAttemptDt *time.Time `json:"data_sync_attempt_dt"` DataSyncSuccessDt *time.Time `json:"data_sync_success_dt"` DataSyncErrorDt *time.Time `json:"data_sync_error_dt"` DataSyncError *string `json:"data_sync_error"` DataSyncCL *string `json:"data_sync_command_line"` DataSyncRCL *string `json:"data_sync_redacted_command_line"` EnrichAttemptDt *time.Time `json:"enrich_attempt_dt"` EnrichSuccessDt *time.Time `json:"enrich_success_dt"` EnrichErrorDt *time.Time `json:"enrich_error_dt"` EnrichError *string `json:"enrich_error"` EnrichCL *string `json:"enrich_command_line"` EnrichRCL *string `json:"enrich_redacted_command_line"` }
EsSyncInfoPayload - sync info support
type EsUpdateByQueryPayload ¶
type EsUpdateByQueryPayload struct {
Updated int64 `json:"updated"`
}
EsUpdateByQueryPayload - update by query result payload
type Fixture ¶
type Fixture struct { Disabled bool `yaml:"disabled"` AllowEmpty bool `yaml:"allow_empty"` Native Native `yaml:"native"` DataSources []DataSource `yaml:"data_sources"` Aliases []Alias `yaml:"aliases"` Metadata Metadata `yaml:"metadata"` Fn string Slug string }
Fixture contains full YAML structure of dev-analytics-api fixture files
type GroupConfig ¶
type GroupConfig struct { Name string `yaml:"name"` Skip []string `yaml:"skip"` Only []string `yaml:"only"` Self bool `yaml:"self"` // If true, then group name = endpoint origin will be added Default bool `yaml:"default"` // If set - this group will be used when no other groups match SkipREs []*regexp.Regexp `yaml:"-"` OnlyREs []*regexp.Regexp `yaml:"-"` }
GroupConfig - holds repo group configuration (name + skip/only REGEXPs)
type MetaDataSource ¶
type MetaDataSource struct { Name string `yaml:"name"` // can be git, github/pull_request etc Slugs []string `yaml:"slugs"` // list of indices like 'finos/open-developer-platform/jira-for-merge', can start with 'pattern:', 'pattern:sds-finos-*-git-for-merge' Externals []string `yaml:"externals"` // external indices, for example 'bitergia-git-dump' }
MetaDataSource - information about indices configured for a given data source (metadata section)
type MetaWorkingGroup ¶
type MetaWorkingGroup struct { Name string `yaml:"name"` // will map to "workinggroup" ES document field Meta map[string]string `yaml:"meta"` // values from this map (key/value) will map to ES "meta_key" = "value" NoOverwrite bool `yaml:"no_overwrite"` // only set workinggroup and meta_* filed if they're not present yet DataSources []WGDataSource `yaml:"datasources"` // condintion where to apply metadata (origins and filters) }
MetaWorkingGroup - information about working groups configured in a fixture (metadata section) To actually apply config at MetaDataSource must be found for WGDataSource and Meta map must have at least one element If meta map is empty, only "workinggroup" value will be set
type Metadata ¶
type Metadata struct { DataSources []MetaDataSource `yaml:"datasources"` WorkingGroups []MetaWorkingGroup `yaml:"workinggroups"` }
Metadata - keeps special data settings, currently this is used by FINOS
type MultiConfig ¶
MultiConfig holds massaged config options, it can have >1 value for single option, for example GitHub API tokens: -t token1 token2 token3 ... tokenN
func (MultiConfig) String ¶
func (mc MultiConfig) String() string
type Native ¶
type Native struct { Slug string `yaml:"slug"` AffiliationSource string `yaml:"affiliation_source"` }
Native - keeps fixture slug and eventual global affiliation source
type Project ¶
type Project struct { Name string `yaml:"name"` P2O *bool `yaml:"p2o"` NoOrigin *bool `yaml:"no_origin"` // if set, it will set project on a given index without conditional origin // so it should be used only to set a single project withing an entire datasource // possibly after copy_from usage RawEndpoints []RawEndpoint `yaml:"endpoints"` HistEndpoints []RawEndpoint `yaml:"historical_endpoints"` }
Project holds project data and list of endpoints
type RawEndpoint ¶
type RawEndpoint struct { Name string `yaml:"name"` Flags map[string]string `yaml:"flags"` Skip []string `yaml:"skip"` Only []string `yaml:"only"` Project string `yaml:"project"` ProjectP2O *bool `yaml:"p2o"` ProjectNoOrigin *bool `yaml:"no_origin"` Timeout *string `yaml:"timeout"` Projects []EndpointProject `yaml:"endpoint_projects"` CopyFrom CopyConfig `yaml:"copy_from"` AffiliationSource string `yaml:"affiliation_source"` PairProgramming bool `yaml:"pair_programming"` Groups []GroupConfig `yaml:"groups"` SkipREs []*regexp.Regexp `yaml:"-"` OnlyREs []*regexp.Regexp `yaml:"-"` }
RawEndpoint holds data source endpoint with possible flags how to generate the final endpoints flags can be "type: github_org/github_user" which means that we need to get actual repository list from github org/user
type Task ¶
type Task struct { Endpoint string Config []Config DsSlug string FxSlug string FxFn string MaxFreq time.Duration CommandLine string RedactedCommandLine string Env map[string]string Retries int Err error Duration time.Duration DsFullSlug string ExternalIndex string Project string ProjectP2O bool ProjectNoOrigin bool Projects []EndpointProject Millis int64 Timeout time.Duration CopyFrom CopyConfig PairProgramming bool AffiliationSource string Groups []string Dummy bool Flags map[string]string }
Task holds single endpoint task and its context (required config, fixture filename etc.)
func (Task) ShortString ¶
ShortString - output quick endpoint info (usually used for non finished tasks)
func (Task) ShortStringCmd ¶
ShortStringCmd - output quick endpoint info (with command line)
func (Task) ToCSVNotRedacted ¶
ToCSVNotRedacted - outputs array of string for CSV output of this task (without redacting sensitive data)
type TaskMtx ¶
type TaskMtx struct { SSHKeyMtx *sync.Mutex TaskOrderMtx *sync.Mutex SyncInfoMtx *sync.Mutex SyncFreqMtx *sync.RWMutex OrderMtx map[int]*sync.Mutex }
TaskMtx - holds are mutexes used in task processing
type TaskResult ¶
type TaskResult struct { Code [2]int CommandLine string RedactedCommandLine string Env map[string]string Retries int Affs bool Err error Index string Endpoint string Ds string Fx string Projects []EndpointProject }
TaskResult is a return type from task execution It contains task index Code[0], error code Code[1] and task final commandline
type WGDataSource ¶
type WGDataSource struct { Name string `yaml:"name"` // must match name from MetaDataSource to find indices/patterns to apply to Origins []string `yaml:"origins"` // List of origins to apply metadata to Filter map[string]interface{} `yaml:"filter"` // Eventual filter definition - to apply metadata to (in addition to origins) }
WGDataSource - contains origins and eventually filter(s) to specify where to apply metadata