Documentation ¶
Index ¶
- func ReadMetadata(schemaPath string) (*model.Metadata, error)
- func ResultToInputCSV(resultURI string) ([][]string, error)
- func WriteData(uri string, data [][]string) error
- func WriteDataset(folderPath string, dataset *RawDataset) error
- func WriteMetadata(uri string, metadata *model.Metadata) error
- type CSV
- func (d *CSV) ReadData(uri string) ([][]string, error)
- func (d *CSV) ReadDataset(schemaFile string) (*RawDataset, error)
- func (d *CSV) ReadMetadata(uri string) (*model.Metadata, error)
- func (d *CSV) ReadRawVariables(uri string) ([]string, error)
- func (d *CSV) WriteData(uri string, data [][]string) error
- func (d *CSV) WriteDataset(uri string, data *RawDataset) error
- func (d *CSV) WriteMetadata(uri string, meta *model.Metadata, extended bool, update bool) error
- type Parquet
- func (d *Parquet) ReadData(uri string) ([][]string, error)
- func (d *Parquet) ReadDataset(schemaFile string) (*RawDataset, error)
- func (d *Parquet) ReadMetadata(uri string) (*model.Metadata, error)
- func (d *Parquet) ReadRawVariables(uri string) ([]string, error)
- func (d *Parquet) WriteData(uri string, data [][]string) error
- func (d *Parquet) WriteDataset(uri string, data *RawDataset) error
- func (d *Parquet) WriteMetadata(uri string, meta *model.Metadata, extended bool, update bool) error
- type RawDataset
- func (d *RawDataset) AddField(variable *model.Variable) error
- func (d *RawDataset) FieldExists(variable *model.Variable) bool
- func (d *RawDataset) FilterDataset(filter map[string]bool)
- func (d *RawDataset) GetVariableIndex(variableHeaderName string) int
- func (d *RawDataset) GetVariableIndices(variableHeaderNames []string) (map[string]int, error)
- func (d *RawDataset) GetVariableMetadata(variableHeaderName string) *model.Variable
- func (d *RawDataset) SyncMetadata(metaToSync *model.Metadata)
- func (d *RawDataset) UpdateDataset(updates map[int]map[string]string)
- type Storage
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ReadMetadata ¶
ReadMetadata reads the metadata in the specified path.
func ResultToInputCSV ¶
ResultToInputCSV takes a result produced by a TA2 pipeline run ensures that it is in a format suitable for storage as a D3M dataset.
func WriteDataset ¶
func WriteDataset(folderPath string, dataset *RawDataset) error
WriteDataset determines which storage engine to use and then writes out the metadata and the data using it.
Types ¶
type CSV ¶
type CSV struct { }
CSV represents a dataset storage backed with csv data and json schema doc.
func (*CSV) ReadDataset ¶
func (d *CSV) ReadDataset(schemaFile string) (*RawDataset, error)
ReadDataset reads a raw dataset from the file system, loading the csv data into memory.
func (*CSV) ReadMetadata ¶
ReadMetadata reads the dataset doc from disk.
func (*CSV) ReadRawVariables ¶
ReadRawVariables reads the csv header file to get a list of variables in the file.
func (*CSV) WriteDataset ¶
func (d *CSV) WriteDataset(uri string, data *RawDataset) error
WriteDataset writes the raw dataset to the file system, writing out the data to a csv file.
type Parquet ¶
type Parquet struct { }
Parquet represents a dataset storage backed with parquet data and json schema doc.
func (*Parquet) ReadDataset ¶
func (d *Parquet) ReadDataset(schemaFile string) (*RawDataset, error)
ReadDataset reads a raw dataset from the file system, loading the parquet data into memory.
func (*Parquet) ReadMetadata ¶
ReadMetadata reads the dataset doc from disk.
func (*Parquet) ReadRawVariables ¶
ReadRawVariables reads the metadata and extracts the field names.
func (*Parquet) WriteDataset ¶
func (d *Parquet) WriteDataset(uri string, data *RawDataset) error
WriteDataset writes the raw dataset to the file system, writing out the data to a parquet file.
type RawDataset ¶
type RawDataset struct { ID string Name string Metadata *model.Metadata Data [][]string DefinitiveTypes bool }
RawDataset contains basic information about the structure of the dataset as well as the raw learning data.
func ReadDataset ¶
func ReadDataset(schemaPath string) (*RawDataset, error)
ReadDataset reads the metadata to find the main data reference, then reads that.
func (*RawDataset) AddField ¶
func (d *RawDataset) AddField(variable *model.Variable) error
AddField adds a field to the dataset, updating both the data and the metadata.
func (*RawDataset) FieldExists ¶
func (d *RawDataset) FieldExists(variable *model.Variable) bool
FieldExists returns true if a field is already part of the metadata.
func (*RawDataset) FilterDataset ¶
func (d *RawDataset) FilterDataset(filter map[string]bool)
FilterDataset updates the dataset to only keep the rows that have the specified column in the filter map set to true.
func (*RawDataset) GetVariableIndex ¶
func (d *RawDataset) GetVariableIndex(variableHeaderName string) int
GetVariableIndex returns the index of the variable as found in the header or -1 if not found in the header.
func (*RawDataset) GetVariableIndices ¶
func (d *RawDataset) GetVariableIndices(variableHeaderNames []string) (map[string]int, error)
GetVariableIndices returns the mapping of variable header name to header index. It will error if a field is not found in the header.
func (*RawDataset) GetVariableMetadata ¶
func (d *RawDataset) GetVariableMetadata(variableHeaderName string) *model.Variable
GetVariableMetadata returns the variable metadata using the header name.
func (*RawDataset) SyncMetadata ¶
func (d *RawDataset) SyncMetadata(metaToSync *model.Metadata)
SyncMetadata updates the key metadata properties to match a given metadata. This is often use to update the metadata for prediction or prefeaturization purposes.
func (*RawDataset) UpdateDataset ¶
func (d *RawDataset) UpdateDataset(updates map[int]map[string]string)
UpdateDataset updates a dataset with the value specified in the updates dictionary. If the specified column value is not found in the dictionary, then it is left unchanged. Updates are specified by column index value.
type Storage ¶
type Storage interface { ReadDataset(uri string) (*RawDataset, error) WriteDataset(uri string, data *RawDataset) error ReadData(uri string) ([][]string, error) WriteData(uri string, data [][]string) error ReadMetadata(uri string) (*model.Metadata, error) WriteMetadata(uri string, metadata *model.Metadata, extended bool, update bool) error ReadRawVariables(uri string) ([]string, error) }
Storage defines the base functions needed to store datasets to a backing storage for interactions with an auto ml server.
func GetCSVStorage ¶
func GetCSVStorage() Storage
GetCSVStorage returns the instantiated csv storage.
func GetParquetStorage ¶
func GetParquetStorage() Storage
GetParquetStorage returns the instantiated parquet storage.
func GetStorage ¶
GetStorage returns the storage to use based on URI.