metadata

package
v0.0.0-...-39ca940 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 26, 2021 License: Apache-2.0 Imports: 18 Imported by: 14

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func AugmentVariablesFromHeader

func AugmentVariablesFromHeader(dr *model.DataResource, header []string) []*model.Variable

AugmentVariablesFromHeader augments the metadata variables with variables found in the header. All variables found in the header default to strings.

func DatasetMatches

func DatasetMatches(m *model.Metadata, variables []string) bool

DatasetMatches determines if the metadata variables match.

func IsMetadataVariable

func IsMetadataVariable(v *model.Variable) bool

IsMetadataVariable indicates whether or not a variable is additional metadata added to the source.

func LoadDatasetStats

func LoadDatasetStats(m *model.Metadata, datasetPath string) error

LoadDatasetStats loads the dataset and computes various stats.

func LoadImportance

func LoadImportance(m *model.Metadata, importanceFile string) error

LoadImportance wiull load the importance feature selection metric.

func LoadMetadataFromClassification

func LoadMetadataFromClassification(schemaPath string, classificationPath string, normalizeVariableNames bool, mergedFallback bool) (*model.Metadata, error)

LoadMetadataFromClassification loads metadata from a merged schema and classification file.

func LoadMetadataFromMergedSchema

func LoadMetadataFromMergedSchema(schemaPath string) (*model.Metadata, error)

LoadMetadataFromMergedSchema loads metadata from a merged schema file.

func LoadMetadataFromOriginalSchema

func LoadMetadataFromOriginalSchema(schemaPath string, augmentFromData bool) (*model.Metadata, error)

LoadMetadataFromOriginalSchema loads metadata from a schema file.

func LoadMetadataFromRawFile

func LoadMetadataFromRawFile(datasetPath string, classificationPath string) (*model.Metadata, error)

LoadMetadataFromRawFile loads metadata from a raw file and a classification file.

func LoadSummary

func LoadSummary(m *model.Metadata, summaryFile string, useCache bool)

LoadSummary loads a description summary

func LoadSummaryFromDescription

func LoadSummaryFromDescription(m *model.Metadata, summaryFile string)

LoadSummaryFromDescription loads a summary from the description.

func LoadSummaryMachine

func LoadSummaryMachine(m *model.Metadata, summaryFile string) error

LoadSummaryMachine loads a machine-learned summary.

func VerifyAndUpdate

func VerifyAndUpdate(m *model.Metadata, dataPath string, source DatasetSource) (bool, error)

VerifyAndUpdate will update the metadata when inconsistentices or errors are found.

func WriteClassification

func WriteClassification(classification *model.ClassificationData, classificationPath string) error

WriteClassification writes classification information to disk.

func WriteMergedSchema

func WriteMergedSchema(m *model.Metadata, path string, mergedDataResource *model.DataResource) error

WriteMergedSchema exports the current meta data as a merged schema file.

Types

type DataResourceParser

type DataResourceParser interface {
	Parse(res *gabs.Container) (*model.DataResource, error)
}

DataResourceParser is a parser for a data resource in the schema document.

type DatasetSource

type DatasetSource string

DatasetSource flags the type of ingest action that created a dataset

const (
	// ProvenanceSimon identifies the type provenance as Simon
	ProvenanceSimon = "d3m.primitives.distil.simon"
	// ProvenanceSchema identifies the type provenance as schema
	ProvenanceSchema = "schema"

	// Seed flags a dataset as ingested from seed data
	Seed DatasetSource = "seed"

	// Contrib flags a dataset as being ingested from contributed data
	Contrib DatasetSource = "contrib"

	// Augmented flags a dataset as being ingested from augmented data
	Augmented DatasetSource = "augmented"

	// Batch flags a dataset as being a batch
	Batch DatasetSource = "batch"

	// Public flags a dataset as being ingested from public data
	Public DatasetSource = "public"
)

type Media

type Media struct {
	Type string
}

Media is a data resource that is backed by media files.

func NewMedia

func NewMedia(typ string) *Media

NewMedia creates a new Media instance.

func (*Media) Parse

func (r *Media) Parse(res *gabs.Container) (*model.DataResource, error)

Parse extracts the data resource from the data schema document.

type Raw

type Raw struct {
	// contains filtered or unexported fields
}

Raw is a data resource that is contained within one file which does not have fields specified in the schema.

func (*Raw) Parse

func (r *Raw) Parse(res *gabs.Container) (*model.DataResource, error)

Parse extracts the data resource from the data schema document.

type SummaryResult

type SummaryResult struct {
	Summary string `json:"summary"`
}

SummaryResult captures the output of a summarization primitive.

type Table

type Table struct {
}

Table is a data respurce that is contained within one or many tabular files.

func (*Table) Parse

func (r *Table) Parse(res *gabs.Container) (*model.DataResource, error)

Parse extracts the data resource from the data schema document.

type Timeseries

type Timeseries struct {
}

Timeseries is a data resource that is contained within one or many timeseries files.

func (*Timeseries) Parse

func (r *Timeseries) Parse(res *gabs.Container) (*model.DataResource, error)

Parse extracts the data resource from the data schema document.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL