config

package
v0.0.0-...-a1bc676 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 27, 2024 License: Apache-2.0 Imports: 9 Imported by: 9

README

Package cloudeng.io/glean/crawlindex/config

import cloudeng.io/glean/crawlindex/config

Types

Type BulkIndex
type BulkIndex struct {
	ForceDeletion       bool `yaml:"force_deletion"`        // Glean's force deletion flag
	ForceRestart        bool `yaml:"force_restart"`         // Glean's force restart flag
	ReaddirEntries      int  `yaml:"readdir_entries"`       // number of entries per Readdir call.
	DocumentRequestSize int  `yaml:"document_request_size"` // number of documents to include in a single request in a single bulk index request.
	UserRequestSize     int  `yaml:"user_request_size"`     // number of user to include in a single request in a single bulk index request.
	CacheConcurrency    int  `yaml:"cache_concurrency"`     // number of concurrent cache reads.
}

BulkIndex represents the configuration info for a Glean builk index operation.

Type Conversion
type Conversion struct {
	Type       content.Type
	Converter  *Converter
	Datasource *gleansdk.CustomDatasourceConfig
}

Conversion represents the configuration for a single content conversion operation.

Type Converter
type Converter struct {
	FromContentType []content.Type `yaml:"from_content_types,flow"` // Content types that this converter can handle.

	ViewURLRewrites []string `yaml:"view_url_rewrites"` // Rewrite rules for viewurls specified as textutil.RewriteRules

	AllowAnonymousAccess bool `yaml:"allow_anonymous_access"` // allow anonymous access to the converted documents.

	// Default author to use if none can be obtained from the document itself.
	DefaultAuthor User `yaml:"default_author"`

	CustomConfig yaml.Node `yaml:"custom"`
}

Converter represents the ability to convert from a set of content types to to a Glean document ("glean/document")

Type Converters
type Converters []Converter

Converters represents a set of converters.

Type Crawl
type Crawl struct {
	crawlcmd.Config `yaml:",inline" cmd:"crawl configuration"`
	Service         CrawlService `yaml:",inline" cmd:"service to be crawled"`
}

Crawl represents a single crawl that contributes data to a datasource.

Type CrawlService
type CrawlService struct {
	Name   string    `yaml:"service_name" cmd:"name of service to crawl, eg. s3/aws"`
	Config yaml.Node `yaml:"service_config" cmd:"service specific configuration, eg. cloudeng.io/aws/awsconfig.AWSFlags"`
}

CrawlService represents the configuration of a specific service to be crawled, eg. to contain configuration for accessing a cloud service.

Type Datasource
type Datasource struct {
	// Datasource name.
	Datasource string `yaml:"datasource" cmd:"name of the datasource"`

	Crawls []Crawl `yaml:"crawls,omitempty" cmd:"file based crawls to run for this datasource"`

	// API based 'crawls' that obtain data for this datasource.
	APICrawls apicrawlcmd.Crawls `yaml:"api_crawls,omitempty" cmd:"api crawls to run for this datasource"`

	// Bulk index configuration for this datasource.
	*BulkIndex `yaml:"bulk_index,omitempty" cmd:"bulk index configuration for this datasource"`

	// Incremental index configuration for this datasource.
	*IncrementalIndex `yaml:"incremental_index,omitempty" cmd:"incremental index configuration for this datasource"`

	// Converters (from download.Result to Glean document) configuration.
	Converters []Converter `yaml:"converters,omitempty" cmd:"converters for this datasource"`

	// GleanDomain is the domain of the Glean instance to use.
	GleanDomain string `yaml:"glean_domain" cmd:"glean domain to use"`

	// The Glean datasource configuration in YAML as opposed to JSON
	// format.
	GleanDatasource GleanDatasource `yaml:"glean_datasource_config" cmd:"glean datasource configuration, ie. the glean datasource to be indexed"`
}

Datasource represents a single datasource or corpus to be crawled and indexed.

Functions
func DatasourceForName(ctx context.Context, filename string, name string) (Datasource, error)

DatasourceForName returns the datasource configuration for the named datasource read from the specified config file.

Methods
func (d Datasource) ConfigForContentType() map[content.Type]Conversion

ConfigForContentType returns a map from content type to all of the configuration information that pertains to that content type.

Type DatasourceName
type DatasourceName struct {
	Datasource string `subcmd:"datasource,,name of the datasource"`
}
Type Datasources
type Datasources []Datasource

Datasources represents a list of named datasources.

Methods
func (d Datasources) ConfigForName(name string) (Datasource, bool)

ConfigForName for returns the configuration for the named datasource.

Type FileFlags
type FileFlags struct {
	ConfigFile string `subcmd:"datasource-configs,,datasource config file"`
}

FileFlags represents a command line flag for the datasource config file.

Type GleanDatasource
type GleanDatasource struct {
	// GleanConfig is the datasource configuration for the Glean instance.
	gleansdk.CustomDatasourceConfig `yaml:",inline" cmd:"glean custom datasource configuration"`
}

GleanDatasource represents the configuration of the datasource with Glean's API.

Type IncrementalIndex
type IncrementalIndex struct {
	DeletionDelay time.Duration `yaml:"deletion_delay"` // Documents that have not been updated within deletion delay will be removed the Glean index.
}

IncrementalIndex represents the configuration info for incremental, document at a time, indexing.

Type User
type User struct {
	Email  string `yaml:"email"`
	UserID string `yaml:"user_id"`
	Name   string `yaml:"name"`
}

User represents a user in the system/datasource beinfg indexed.

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type BulkIndex

type BulkIndex struct {
	ForceDeletion       bool `yaml:"force_deletion"`        // Glean's force deletion flag
	ForceRestart        bool `yaml:"force_restart"`         // Glean's force restart flag
	ReaddirEntries      int  `yaml:"readdir_entries"`       // number of entries per Readdir call.
	DocumentRequestSize int  `yaml:"document_request_size"` // number of documents to include in a single request in a single bulk index request.
	UserRequestSize     int  `yaml:"user_request_size"`     // number of user to include in a single request in a single bulk index request.
	CacheConcurrency    int  `yaml:"cache_concurrency"`     // number of concurrent cache reads.
}

BulkIndex represents the configuration info for a Glean builk index operation.

type Conversion

type Conversion struct {
	Type       content.Type
	Converter  *Converter
	Datasource *gleansdk.CustomDatasourceConfig
}

Conversion represents the configuration for a single content conversion operation.

type Converter

type Converter struct {
	FromContentType []content.Type `yaml:"from_content_types,flow"` // Content types that this converter can handle.

	ViewURLRewrites []string `yaml:"view_url_rewrites"` // Rewrite rules for viewurls specified as textutil.RewriteRules

	AllowAnonymousAccess bool `yaml:"allow_anonymous_access"` // allow anonymous access to the converted documents.

	// Default author to use if none can be obtained from the document itself.
	DefaultAuthor User `yaml:"default_author"`

	CustomConfig yaml.Node `yaml:"custom"`
}

Converter represents the ability to convert from a set of content types to to a Glean document ("glean/document")

type Converters

type Converters []Converter

Converters represents a set of converters.

type Crawl

type Crawl struct {
	crawlcmd.Config `yaml:",inline" cmd:"crawl configuration"`
	Service         CrawlService `yaml:",inline" cmd:"service to be crawled"`
}

Crawl represents a single crawl that contributes data to a datasource.

type CrawlService

type CrawlService struct {
	Name   string    `yaml:"service_name" cmd:"name of service to crawl, eg. s3/aws"`
	Config yaml.Node `yaml:"service_config" cmd:"service specific configuration, eg. cloudeng.io/aws/awsconfig.AWSFlags"`
}

CrawlService represents the configuration of a specific service to be crawled, eg. to contain configuration for accessing a cloud service.

type Datasource

type Datasource struct {
	// Datasource name.
	Datasource string `yaml:"datasource" cmd:"name of the datasource"`

	Crawls []Crawl `yaml:"crawls,omitempty" cmd:"file based crawls to run for this datasource"`

	// API based 'crawls' that obtain data for this datasource.
	APICrawls apicrawlcmd.Crawls `yaml:"api_crawls,omitempty" cmd:"api crawls to run for this datasource"`

	// Bulk index configuration for this datasource.
	*BulkIndex `yaml:"bulk_index,omitempty" cmd:"bulk index configuration for this datasource"`

	// Incremental index configuration for this datasource.
	*IncrementalIndex `yaml:"incremental_index,omitempty" cmd:"incremental index configuration for this datasource"`

	// Converters (from download.Result to Glean document) configuration.
	Converters []Converter `yaml:"converters,omitempty" cmd:"converters for this datasource"`

	// GleanDomain is the domain of the Glean instance to use.
	GleanDomain string `yaml:"glean_domain" cmd:"glean domain to use"`

	// GleanTokenName is the name of the glean token to use to access the Glean instance.
	GleanTokenName string `yaml:"glean_token_name" cmd:"glean token name to use, if empty the glean_domain will be used"`

	// The Glean datasource configuration in YAML as opposed to JSON
	// format.
	GleanDatasource GleanDatasource `yaml:"glean_datasource_config" cmd:"glean datasource configuration, ie. the glean datasource to be indexed"`
}

Datasource represents a single datasource or corpus to be crawled and indexed.

func DatasourceForName

func DatasourceForName(ctx context.Context, filename string, name string) (Datasource, error)

DatasourceForName returns the datasource configuration for the named datasource read from the specified config file.

func (Datasource) ConfigForContentType

func (d Datasource) ConfigForContentType() map[content.Type]Conversion

ConfigForContentType returns a map from content type to all of the configuration information that pertains to that content type.

type DatasourceName

type DatasourceName struct {
	Datasource string `subcmd:"datasource,,name of the datasource"`
}

type Datasources

type Datasources []Datasource

Datasources represents a list of named datasources.

func (Datasources) ConfigForName

func (d Datasources) ConfigForName(name string) (Datasource, bool)

ConfigForName for returns the configuration for the named datasource.

type FileFlags

type FileFlags struct {
	ConfigFile string `subcmd:"datasource-configs,,datasource config file"`
}

FileFlags represents a command line flag for the datasource config file.

type GleanDatasource

type GleanDatasource struct {
	// GleanConfig is the datasource configuration for the Glean instance.
	gleansdk.CustomDatasourceConfig `yaml:",inline" cmd:"glean custom datasource configuration"`
}

GleanDatasource represents the configuration of the datasource with Glean's API.

type IncrementalIndex

type IncrementalIndex struct {
	DeletionDelay time.Duration `yaml:"deletion_delay"` // Documents that have not been updated within deletion delay will be removed the Glean index.
}

IncrementalIndex represents the configuration info for incremental, document at a time, indexing.

type User

type User struct {
	Email  string `yaml:"email"`
	UserID string `yaml:"user_id"`
	Name   string `yaml:"name"`
}

User represents a user in the system/datasource beinfg indexed.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL