Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type BulkIndex ¶
type BulkIndex struct { ForceDeletion bool `yaml:"force_deletion"` // Glean's force deletion flag ForceRestart bool `yaml:"force_restart"` // Glean's force restart flag ReaddirEntries int `yaml:"readdir_entries"` // number of entries per Readdir call. DocumentRequestSize int `yaml:"document_request_size"` // number of documents to include in a single request in a single bulk index request. UserRequestSize int `yaml:"user_request_size"` // number of user to include in a single request in a single bulk index request. CacheConcurrency int `yaml:"cache_concurrency"` // number of concurrent cache reads. }
BulkIndex represents the configuration info for a Glean builk index operation.
type Conversion ¶
type Conversion struct { Type content.Type Converter *Converter Datasource *gleansdk.CustomDatasourceConfig }
Conversion represents the configuration for a single content conversion operation.
type Converter ¶
type Converter struct { FromContentType []content.Type `yaml:"from_content_types,flow"` // Content types that this converter can handle. ViewURLRewrites []string `yaml:"view_url_rewrites"` // Rewrite rules for viewurls specified as textutil.RewriteRules AllowAnonymousAccess bool `yaml:"allow_anonymous_access"` // allow anonymous access to the converted documents. // Default author to use if none can be obtained from the document itself. DefaultAuthor User `yaml:"default_author"` CustomConfig yaml.Node `yaml:"custom"` }
Converter represents the ability to convert from a set of content types to to a Glean document ("glean/document")
type Crawl ¶
type Crawl struct { crawlcmd.Config `yaml:",inline" cmd:"crawl configuration"` Service CrawlService `yaml:",inline" cmd:"service to be crawled"` }
Crawl represents a single crawl that contributes data to a datasource.
type CrawlService ¶
type CrawlService struct { Name string `yaml:"service_name" cmd:"name of service to crawl, eg. s3/aws"` Config yaml.Node `yaml:"service_config" cmd:"service specific configuration, eg. cloudeng.io/aws/awsconfig.AWSFlags"` }
CrawlService represents the configuration of a specific service to be crawled, eg. to contain configuration for accessing a cloud service.
type Datasource ¶
type Datasource struct { // Datasource name. Datasource string `yaml:"datasource" cmd:"name of the datasource"` Crawls []Crawl `yaml:"crawls,omitempty" cmd:"file based crawls to run for this datasource"` // API based 'crawls' that obtain data for this datasource. APICrawls apicrawlcmd.Crawls `yaml:"api_crawls,omitempty" cmd:"api crawls to run for this datasource"` // Bulk index configuration for this datasource. *BulkIndex `yaml:"bulk_index,omitempty" cmd:"bulk index configuration for this datasource"` // Incremental index configuration for this datasource. *IncrementalIndex `yaml:"incremental_index,omitempty" cmd:"incremental index configuration for this datasource"` // Converters (from download.Result to Glean document) configuration. Converters []Converter `yaml:"converters,omitempty" cmd:"converters for this datasource"` // GleanDomain is the domain of the Glean instance to use. GleanDomain string `yaml:"glean_domain" cmd:"glean domain to use"` // GleanTokenName is the name of the glean token to use to access the Glean instance. GleanTokenName string `yaml:"glean_token_name" cmd:"glean token name to use, if empty the glean_domain will be used"` // The Glean datasource configuration in YAML as opposed to JSON // format. GleanDatasource GleanDatasource `yaml:"glean_datasource_config" cmd:"glean datasource configuration, ie. the glean datasource to be indexed"` }
Datasource represents a single datasource or corpus to be crawled and indexed.
func DatasourceForName ¶
DatasourceForName returns the datasource configuration for the named datasource read from the specified config file.
func (Datasource) ConfigForContentType ¶
func (d Datasource) ConfigForContentType() map[content.Type]Conversion
ConfigForContentType returns a map from content type to all of the configuration information that pertains to that content type.
type DatasourceName ¶
type DatasourceName struct {
Datasource string `subcmd:"datasource,,name of the datasource"`
}
type Datasources ¶
type Datasources []Datasource
Datasources represents a list of named datasources.
func (Datasources) ConfigForName ¶
func (d Datasources) ConfigForName(name string) (Datasource, bool)
ConfigForName for returns the configuration for the named datasource.
type FileFlags ¶
type FileFlags struct {
ConfigFile string `subcmd:"datasource-configs,,datasource config file"`
}
FileFlags represents a command line flag for the datasource config file.
type GleanDatasource ¶
type GleanDatasource struct { // GleanConfig is the datasource configuration for the Glean instance. gleansdk.CustomDatasourceConfig `yaml:",inline" cmd:"glean custom datasource configuration"` }
GleanDatasource represents the configuration of the datasource with Glean's API.
type IncrementalIndex ¶
type IncrementalIndex struct {
DeletionDelay time.Duration `yaml:"deletion_delay"` // Documents that have not been updated within deletion delay will be removed the Glean index.
}
IncrementalIndex represents the configuration info for incremental, document at a time, indexing.