Documentation ¶
Index ¶
- Constants
- func CreatePreFeaturizedDatasetPipeline(name string, description string, datasetDescription *UserDatasetDescription, ...) (*pipeline.PipelineDescription, error)
- func CreateUserDatasetPipeline(name string, description string, datasetDescription *UserDatasetDescription, ...) (*pipeline.PipelineDescription, error)
- func MarshalSteps(step *pipeline.PipelineDescription) (string, error)
- type ClusterParams
- type ColumnUpdate
- type DataRef
- type FullySpecifiedPipeline
- func CreateDSBoxJoinPipeline(name string, description string, leftJoinCols []string, rightJoinCols []string, ...) (*FullySpecifiedPipeline, error)
- func CreateDataCleaningPipeline(name string, description string, variables []*model.Variable, impute bool) (*FullySpecifiedPipeline, error)
- func CreateDataFilterPipeline(name string, description string, variables []*model.Variable, ...) (*FullySpecifiedPipeline, error)
- func CreateDatamartAugmentPipeline(name string, description string, searchResult string, systemIdentifier string) (*FullySpecifiedPipeline, error)
- func CreateDatamartDownloadPipeline(name string, description string, searchResult string, systemIdentifier string) (*FullySpecifiedPipeline, error)
- func CreateDenormalizePipeline(name string, description string) (*FullySpecifiedPipeline, error)
- func CreateDukePipeline(name string, description string) (*FullySpecifiedPipeline, error)
- func CreateGeneralClusteringPipeline(name string, description string, datasetDescription *UserDatasetDescription, ...) (*FullySpecifiedPipeline, error)
- func CreateGoatForwardPipeline(name string, description string, placeCol *model.Variable) (*FullySpecifiedPipeline, error)
- func CreateGoatReversePipeline(name string, description string, lonSource *model.Variable, ...) (*FullySpecifiedPipeline, error)
- func CreateGroupingFieldComposePipeline(name string, description string, colIndices []int, joinChar string, ...) (*FullySpecifiedPipeline, error)
- func CreateImageClusteringPipeline(name string, description string, imageVariables []*model.Variable, ...) (*FullySpecifiedPipeline, error)
- func CreateImageFeaturizationPipeline(name string, description string, variables []*model.Variable) (*FullySpecifiedPipeline, error)
- func CreateImageOutlierDetectionPipeline(name string, description string, imageVariables []*model.Variable) (*FullySpecifiedPipeline, error)
- func CreateImageQueryPipeline(name string, description string, cacheLocation string, colsToDrop []int) (*FullySpecifiedPipeline, error)
- func CreateJoinPipeline(name string, description string, join *JoinDescription) (*FullySpecifiedPipeline, error)
- func CreateMultiBandImageClusteringPipeline(name string, description string, grouping *model.MultiBandImageGrouping, ...) (*FullySpecifiedPipeline, error)
- func CreateMultiBandImageFeaturizationPipeline(name string, description string, variables []*model.Variable, numJobs int, ...) (*FullySpecifiedPipeline, error)
- func CreateMultiBandImageOutlierDetectionPipeline(name string, description string, imageVariables []*model.Variable, ...) (*FullySpecifiedPipeline, error)
- func CreatePCAFeaturesPipeline(name string, description string) (*FullySpecifiedPipeline, error)
- func CreatePreFeaturizedMultiBandImageClusteringPipeline(name string, description string, variables []*model.Variable, ...) (*FullySpecifiedPipeline, error)
- func CreateRemoteSensingSegmentationPipeline(name string, description string, targetVariable *model.Variable, numJobs int) (*FullySpecifiedPipeline, error)
- func CreateSimonPipeline(name string, description string) (*FullySpecifiedPipeline, error)
- func CreateSlothPipeline(name string, description string, timeColumn string, valueColumn string, ...) (*FullySpecifiedPipeline, error)
- func CreateTabularOutlierDetectionPipeline(name string, description string, datasetDescription *UserDatasetDescription, ...) (*FullySpecifiedPipeline, error)
- func CreateTargetRankingPipeline(name string, description string, target *model.Variable, ...) (*FullySpecifiedPipeline, error)
- func CreateTimeseriesFormatterPipeline(name string, description string, resource string) (*FullySpecifiedPipeline, error)
- func CreateVerticalConcatPipeline(name string, description string) (*FullySpecifiedPipeline, error)
- type InferenceStepData
- func (s *InferenceStepData) BuildDescriptionStep() (*pipeline.PipelineDescriptionStep, error)
- func (s *InferenceStepData) GetArguments() map[string]DataRef
- func (s *InferenceStepData) GetHyperparameters() map[string]interface{}
- func (s *InferenceStepData) GetOutputMethods() []string
- func (s *InferenceStepData) GetPrimitive() *pipeline.Primitive
- type Join
- type JoinDescription
- type ListStepDataRef
- type PipelineBuilder
- type PipelineDataRef
- type PrimitiveReference
- type Step
- type StepData
- func NewAddSemanticTypeStep(inputs map[string]DataRef, outputMethods []string, add *ColumnUpdate) *StepData
- func NewBinaryEncoderStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewCSVReaderStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewCategoricalImputerStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewColumnParserStep(inputs map[string]DataRef, outputMethods []string, types []string) *StepData
- func NewConstructPredictionStep(inputs map[string]DataRef, outputMethods []string, reference DataRef) *StepData
- func NewDSBoxJoinStep(inputs map[string]DataRef, outputMethods []string, leftCols []string, ...) *StepData
- func NewDataCleaningStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewDataFrameFlattenStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewDataframeImageReaderStep(inputs map[string]DataRef, outputMethods []string, columns []int) *StepData
- func NewDatamartAugmentStep(inputs map[string]DataRef, outputMethods []string, searchResult string, ...) *StepData
- func NewDatamartDownloadStep(inputs map[string]DataRef, outputMethods []string, searchResult string, ...) *StepData
- func NewDatasetToDataframeStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewDatasetToDataframeStepWithResource(inputs map[string]DataRef, outputMethods []string, resourceName string) *StepData
- func NewDatasetWrapperStep(inputs map[string]DataRef, outputMethods []string, primitiveIndex int, ...) *StepData
- func NewDateTimeRangeFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, ...) *StepData
- func NewDenormalizeStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewDistilColumnParserStep(inputs map[string]DataRef, outputMethods []string, types []string) *StepData
- func NewDukeStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewEnrichDatesStep(inputs map[string]DataRef, outputMethods []string, replace bool) *StepData
- func NewExtractColumnsBySemanticTypeStep(inputs map[string]DataRef, outputMethods []string, semanticTypes []string) *StepData
- func NewExtractColumnsByStructuralTypeStep(inputs map[string]DataRef, outputMethods []string, structuralTypes []string) *StepData
- func NewExtractColumnsStep(inputs map[string]DataRef, outputMethods []string, indices []int) *StepData
- func NewGoatForwardStep(inputs map[string]DataRef, outputMethods []string, placeColIndex int) *StepData
- func NewGoatReverseStep(inputs map[string]DataRef, outputMethods []string, lonCol int, latCol int) *StepData
- func NewGroupingFieldComposeStep(inputs map[string]DataRef, outputMethods []string, colIndices []int, ...) *StepData
- func NewHDBScanStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewHorizontalConcatStep(inputs map[string]DataRef, outputMethods []string, useIndex bool, ...) *StepData
- func NewImageRetrievalStep(inputs map[string]DataRef, outputMethods []string, cacheLocation string) *StepData
- func NewImageSegmentationPrimitiveStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewImageTransferStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewIsolationForestStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewJoinStep(inputs map[string]DataRef, outputMethods []string, leftCol []string, ...) *StepData
- func NewKMeansClusteringStep(inputs map[string]DataRef, outputMethods []string, clusterCount int) *StepData
- func NewListEncoderStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewNumericRangeFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, ...) *StepData
- func NewOneHotEncoderStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewPCAFeaturesStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewPrefeaturisedPoolingStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewProfilerStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewRegexFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, ...) *StepData
- func NewRemoteSensingPretrainedStep(inputs map[string]DataRef, outputMethods []string, batchSize int, pool bool) *StepData
- func NewRemoveColumnsStep(inputs map[string]DataRef, outputMethods []string, colIndices []int) *StepData
- func NewRemoveDuplicateColumnsStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewRemoveSemanticTypeStep(inputs map[string]DataRef, outputMethods []string, remove *ColumnUpdate) *StepData
- func NewReplaceSingletonStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewSKImputerStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewSKMissingIndicatorStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewSatelliteImageLoaderStep(inputs map[string]DataRef, outputMethods []string, numJobs int) *StepData
- func NewSimonStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewSlothStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewStepData(primitive *pipeline.Primitive, outputMethods []string, ...) *StepData
- func NewTargetRankingStep(inputs map[string]DataRef, outputMethods []string, targetCol int) *StepData
- func NewTermFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, ...) *StepData
- func NewTextEncoderStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewTimeseriesFormatterStep(inputs map[string]DataRef, outputMethods []string, mainResID string, ...) *StepData
- func NewVectorBoundsFilterStep(inputs map[string]DataRef, outputMethods []string, column int, inclusive bool, ...) *StepData
- func NewVerticalConcatenationPrimitiveStep(inputs map[string]DataRef, outputMethods []string, removeDuplicate bool) *StepData
- func (s *StepData) BuildDescriptionStep() (*pipeline.PipelineDescriptionStep, error)
- func (s *StepData) GetArguments() map[string]DataRef
- func (s *StepData) GetHyperparameters() map[string]interface{}
- func (s *StepData) GetOutputMethods() []string
- func (s *StepData) GetPrimitive() *pipeline.Primitive
- type StepDataRef
- type UserDatasetAugmentation
- type UserDatasetDescription
Constants ¶
const ( // JoinTypeLeft represents a left outer join operation JoinTypeLeft = "left" // JoinTypeRight represents a right outer join operation JoinTypeRight = "right" // JoinTypeOuter represents an outer join operation JoinTypeOuter = "outer" // JoinTypeInner represents an inner join operation JoinTypeInner = "inner" // JoinTypeCross represents a cross join operation JoinTypeCross = "cross" )
Variables ¶
This section is empty.
Functions ¶
func CreatePreFeaturizedDatasetPipeline ¶
func CreatePreFeaturizedDatasetPipeline(name string, description string, datasetDescription *UserDatasetDescription, augmentations []*UserDatasetAugmentation) (*pipeline.PipelineDescription, error)
CreatePreFeaturizedDatasetPipeline creates a pipeline that acts on a pre featurized dataset. The created prepend is a simplified version due to the dataset already having all features for the end task stored on disk.
func CreateUserDatasetPipeline ¶
func CreateUserDatasetPipeline(name string, description string, datasetDescription *UserDatasetDescription, augmentations []*UserDatasetAugmentation) (*pipeline.PipelineDescription, error)
CreateUserDatasetPipeline creates a pipeline description to capture user feature selection and semantic type information.
func MarshalSteps ¶
func MarshalSteps(step *pipeline.PipelineDescription) (string, error)
MarshalSteps marshals a pipeline description into a json representation.
Types ¶
type ClusterParams ¶
ClusterParams defines parameters to use when clustering.
type ColumnUpdate ¶
ColumnUpdate defines a set of column indices to add/remvoe a set of semantic types to/from.
type DataRef ¶
type DataRef interface { CreateDataRef() *pipeline.PrimitiveStepArgument RefString() string }
DataRef defines an interface for creating input reference strings that are used to connect primitive inputs to outputs.
type FullySpecifiedPipeline ¶
type FullySpecifiedPipeline struct { Pipeline *pipeline.PipelineDescription EquivalentValues []interface{} }
FullySpecifiedPipeline wraps a fully specified pipeline along with the fields which can be used to determine equivalent pipelines.
func CreateDSBoxJoinPipeline ¶
func CreateDSBoxJoinPipeline(name string, description string, leftJoinCols []string, rightJoinCols []string, accuracy float32) (*FullySpecifiedPipeline, error)
CreateDSBoxJoinPipeline creates a pipeline that joins two input datasets using caller supplied columns.
func CreateDataCleaningPipeline ¶
func CreateDataCleaningPipeline(name string, description string, variables []*model.Variable, impute bool) (*FullySpecifiedPipeline, error)
CreateDataCleaningPipeline creates a pipeline to run data cleaning on a dataset.
func CreateDataFilterPipeline ¶
func CreateDataFilterPipeline(name string, description string, variables []*model.Variable, filters []*model.FilterSet) (*FullySpecifiedPipeline, error)
CreateDataFilterPipeline creates a pipeline that will filter a dataset.
func CreateDatamartAugmentPipeline ¶
func CreateDatamartAugmentPipeline(name string, description string, searchResult string, systemIdentifier string) (*FullySpecifiedPipeline, error)
CreateDatamartAugmentPipeline creates a pipeline to augment data with datamart data.
func CreateDatamartDownloadPipeline ¶
func CreateDatamartDownloadPipeline(name string, description string, searchResult string, systemIdentifier string) (*FullySpecifiedPipeline, error)
CreateDatamartDownloadPipeline creates a pipeline to download data from a datamart.
func CreateDenormalizePipeline ¶
func CreateDenormalizePipeline(name string, description string) (*FullySpecifiedPipeline, error)
CreateDenormalizePipeline creates a pipeline to run the denormalize primitive on an input dataset.
func CreateDukePipeline ¶
func CreateDukePipeline(name string, description string) (*FullySpecifiedPipeline, error)
CreateDukePipeline creates a pipeline to peform image featurization on a dataset.
func CreateGeneralClusteringPipeline ¶
func CreateGeneralClusteringPipeline(name string, description string, datasetDescription *UserDatasetDescription, augmentations []*UserDatasetAugmentation, params *ClusterParams) (*FullySpecifiedPipeline, error)
CreateGeneralClusteringPipeline creates a pipeline that will cluster tabular data.
func CreateGoatForwardPipeline ¶
func CreateGoatForwardPipeline(name string, description string, placeCol *model.Variable) (*FullySpecifiedPipeline, error)
CreateGoatForwardPipeline creates a forward geocoding pipeline.
func CreateGoatReversePipeline ¶
func CreateGoatReversePipeline(name string, description string, lonSource *model.Variable, latSource *model.Variable) (*FullySpecifiedPipeline, error)
CreateGoatReversePipeline creates a forward geocoding pipeline.
func CreateGroupingFieldComposePipeline ¶
func CreateGroupingFieldComposePipeline(name string, description string, colIndices []int, joinChar string, outputName string) (*FullySpecifiedPipeline, error)
CreateGroupingFieldComposePipeline creates a pipeline to create a grouping key field for a dataset.
func CreateImageClusteringPipeline ¶
func CreateImageClusteringPipeline(name string, description string, imageVariables []*model.Variable, params *ClusterParams) (*FullySpecifiedPipeline, error)
CreateImageClusteringPipeline creates a fully specified pipeline that will cluster images together, returning a column with the resulting cluster.
func CreateImageFeaturizationPipeline ¶
func CreateImageFeaturizationPipeline(name string, description string, variables []*model.Variable) (*FullySpecifiedPipeline, error)
CreateImageFeaturizationPipeline creates a pipline that will featurize images.
func CreateImageOutlierDetectionPipeline ¶
func CreateImageOutlierDetectionPipeline(name string, description string, imageVariables []*model.Variable) (*FullySpecifiedPipeline, error)
CreateImageOutlierDetectionPipeline makes a pipeline for outlier detection with remote sensing data
func CreateImageQueryPipeline ¶
func CreateImageQueryPipeline(name string, description string, cacheLocation string, colsToDrop []int) (*FullySpecifiedPipeline, error)
CreateImageQueryPipeline creates a pipeline that will perform image retrieval. The cacheLocation parameter is passed down to the image retrieval primitive, and is used to cache dot products across query operations. When a new dataset is being labelled, the cache location should be updated.
func CreateJoinPipeline ¶
func CreateJoinPipeline(name string, description string, join *JoinDescription) (*FullySpecifiedPipeline, error)
CreateJoinPipeline creates a pipeline that joins two input datasets using a caller supplied column. Accuracy is a normalized value that controls how exact the join has to be.
func CreateMultiBandImageClusteringPipeline ¶
func CreateMultiBandImageClusteringPipeline(name string, description string, grouping *model.MultiBandImageGrouping, variables []*model.Variable, params *ClusterParams, batchSize int, numJobs int) (*FullySpecifiedPipeline, error)
CreateMultiBandImageClusteringPipeline creates a fully specified pipeline that will cluster multiband images together, returning a column with the resulting cluster.
func CreateMultiBandImageFeaturizationPipeline ¶
func CreateMultiBandImageFeaturizationPipeline(name string, description string, variables []*model.Variable, numJobs int, batchSize int, poolFeatures bool) (*FullySpecifiedPipeline, error)
CreateMultiBandImageFeaturizationPipeline creates a pipline that will featurize multiband images.
func CreateMultiBandImageOutlierDetectionPipeline ¶
func CreateMultiBandImageOutlierDetectionPipeline(name string, description string, imageVariables []*model.Variable, prefeaturised bool, pooled bool, grouping *model.MultiBandImageGrouping, batchSize int, numJobs int) (*FullySpecifiedPipeline, error)
CreateMultiBandImageOutlierDetectionPipeline does outlier detection for multiband images for both prefeaturised and featurised
func CreatePCAFeaturesPipeline ¶
func CreatePCAFeaturesPipeline(name string, description string) (*FullySpecifiedPipeline, error)
CreatePCAFeaturesPipeline creates a pipeline to run feature ranking on an input dataset.
func CreatePreFeaturizedMultiBandImageClusteringPipeline ¶
func CreatePreFeaturizedMultiBandImageClusteringPipeline(name string, description string, variables []*model.Variable, params *ClusterParams) (*FullySpecifiedPipeline, error)
CreatePreFeaturizedMultiBandImageClusteringPipeline creates a fully specified pipeline that will cluster multiband images together, returning a column with the resulting cluster.
func CreateRemoteSensingSegmentationPipeline ¶
func CreateRemoteSensingSegmentationPipeline(name string, description string, targetVariable *model.Variable, numJobs int) (*FullySpecifiedPipeline, error)
CreateRemoteSensingSegmentationPipeline creates a pipeline to segment remote sensing images.
func CreateSimonPipeline ¶
func CreateSimonPipeline(name string, description string) (*FullySpecifiedPipeline, error)
CreateSimonPipeline creates a pipeline to run semantic type inference on a dataset's columns.
func CreateSlothPipeline ¶
func CreateSlothPipeline(name string, description string, timeColumn string, valueColumn string, timeseriesGrouping *model.TimeseriesGrouping, timeSeriesFeatures []*model.Variable) (*FullySpecifiedPipeline, error)
CreateSlothPipeline creates a pipeline to peform timeseries clustering on a dataset.
func CreateTabularOutlierDetectionPipeline ¶
func CreateTabularOutlierDetectionPipeline(name string, description string, datasetDescription *UserDatasetDescription, augmentations []*UserDatasetAugmentation) (*FullySpecifiedPipeline, error)
CreateTabularOutlierDetectionPipeline makes a pipeline for outlier detection
func CreateTargetRankingPipeline ¶
func CreateTargetRankingPipeline(name string, description string, target *model.Variable, features []*model.Variable, selectedFeatures map[string]bool) (*FullySpecifiedPipeline, error)
CreateTargetRankingPipeline creates a pipeline to run feature ranking on an input dataset.
func CreateTimeseriesFormatterPipeline ¶
func CreateTimeseriesFormatterPipeline(name string, description string, resource string) (*FullySpecifiedPipeline, error)
CreateTimeseriesFormatterPipeline creates a time series formatter pipeline.
func CreateVerticalConcatPipeline ¶
func CreateVerticalConcatPipeline(name string, description string) (*FullySpecifiedPipeline, error)
CreateVerticalConcatPipeline creates a pipeline that will vertically concat two datasets (union).
type InferenceStepData ¶
type InferenceStepData struct { Inputs []string Outputs []string // contains filtered or unexported fields }
InferenceStepData provides data for a pipeline description placeholder step, which marks the point at which a TA2 should be begin pipeline inference.
func NewInferenceStepData ¶
func NewInferenceStepData(arguments map[string]DataRef) *InferenceStepData
NewInferenceStepData creates a InferenceStepData instance with default values.
func (*InferenceStepData) BuildDescriptionStep ¶
func (s *InferenceStepData) BuildDescriptionStep() (*pipeline.PipelineDescriptionStep, error)
BuildDescriptionStep creates protobuf structures from a pipeline step definition.
func (*InferenceStepData) GetArguments ¶
func (s *InferenceStepData) GetArguments() map[string]DataRef
GetArguments adapts the internal placeholder step argument type to the primitive step argument type.
func (*InferenceStepData) GetHyperparameters ¶
func (s *InferenceStepData) GetHyperparameters() map[string]interface{}
GetHyperparameters returns an empty map since inference steps don't take hyper parameters.
func (*InferenceStepData) GetOutputMethods ¶
func (s *InferenceStepData) GetOutputMethods() []string
GetOutputMethods returns a list of methods that will be called to generate primitive output. These feed into downstream primitives.
func (*InferenceStepData) GetPrimitive ¶
func (s *InferenceStepData) GetPrimitive() *pipeline.Primitive
GetPrimitive returns nil since there is no primitive associated with a placeholder step.
type Join ¶
Join captures a specific join relationship and constraint to be used in dataset joining.
type JoinDescription ¶
type JoinDescription struct { Type string Joins []*Join RightExcludes []*model.Variable RightVariables []*model.Variable LeftExcludes []*model.Variable LeftVariables []*model.Variable }
JoinDescription represents the complete information necessary to join two datasets via a join pipeline.
type ListStepDataRef ¶
type ListStepDataRef struct {
// contains filtered or unexported fields
}
ListStepDataRef points to a list of data references.
func (*ListStepDataRef) AddDataRef ¶
func (s *ListStepDataRef) AddDataRef(dataRef DataRef)
AddDataRef adds a data reference to the list.
func (*ListStepDataRef) CreateDataRef ¶
func (s *ListStepDataRef) CreateDataRef() *pipeline.PrimitiveStepArgument
CreateDataRef creates a primitive step argument.
func (*ListStepDataRef) RefString ¶
func (s *ListStepDataRef) RefString() string
RefString creates a string representation of a PipelineDataRef.
type PipelineBuilder ¶
type PipelineBuilder struct {
// contains filtered or unexported fields
}
PipelineBuilder compiles a pipeline DAG into a protobuf pipeline description that can be passed to a downstream TA2 for inference (optional) and execution.
func NewPipelineBuilder ¶
func NewPipelineBuilder(name string, description string, inputs []string, outputs []DataRef, steps []Step) *PipelineBuilder
NewPipelineBuilder creates a new pipeline builder instance. All of the source nodes in the pipeline DAG need to be passed in to the builder via the sources argument, which is variadic.
func (*PipelineBuilder) Compile ¶
func (p *PipelineBuilder) Compile() (*pipeline.PipelineDescription, error)
Compile creates the protobuf pipeline description from the step graph. It can only be called once.
func (*PipelineBuilder) GetSteps ¶
func (p *PipelineBuilder) GetSteps() []Step
GetSteps returns compiled steps.
type PipelineDataRef ¶
type PipelineDataRef struct {
InputNum int
}
PipelineDataRef points to an input of the pipeline itself (typically a dataset) instead of the output of another primitive.
func (*PipelineDataRef) CreateDataRef ¶
func (p *PipelineDataRef) CreateDataRef() *pipeline.PrimitiveStepArgument
CreateDataRef creates a primitive step argument.
func (*PipelineDataRef) RefString ¶
func (p *PipelineDataRef) RefString() string
RefString creates a string representation of a PipelineDataRef.
type PrimitiveReference ¶
type PrimitiveReference struct {
// contains filtered or unexported fields
}
PrimitiveReference is a marker struct for a an argument that is an integer, but should be interpreted as a reference to another primitive.
type Step ¶
type Step interface { BuildDescriptionStep() (*pipeline.PipelineDescriptionStep, error) GetPrimitive() *pipeline.Primitive GetArguments() map[string]DataRef GetHyperparameters() map[string]interface{} GetOutputMethods() []string }
Step provides data for a pipeline description step and an operation to create a protobuf PipelineDescriptionStep from that data.
type StepData ¶
type StepData struct { Primitive *pipeline.Primitive Arguments map[string]DataRef Hyperparameters map[string]interface{} OutputMethods []string }
StepData contains the minimum amount of data used to describe a pipeline step
func NewAddSemanticTypeStep ¶
func NewAddSemanticTypeStep(inputs map[string]DataRef, outputMethods []string, add *ColumnUpdate) *StepData
NewAddSemanticTypeStep adds semantic data values to an input dataset. An add of (1, 2), ("type a", "type b") would result in "type a" and "type b" being added to index 1 and 2.
func NewBinaryEncoderStep ¶
NewBinaryEncoderStep adds a binary encoder for categoricals of high cardinality.
func NewCSVReaderStep ¶
NewCSVReaderStep reads data from csv files into a nested dataframe structure.
func NewCategoricalImputerStep ¶
NewCategoricalImputerStep finds missing categorical values and replaces them with an imputed value.
func NewColumnParserStep ¶
func NewColumnParserStep(inputs map[string]DataRef, outputMethods []string, types []string) *StepData
NewColumnParserStep takes obj/string columns in a dataframe and parses them into their associated raw python types based on the attached d3m metadata.
func NewConstructPredictionStep ¶
func NewConstructPredictionStep(inputs map[string]DataRef, outputMethods []string, reference DataRef) *StepData
NewConstructPredictionStep maps the dataframe index to d3m index.
func NewDSBoxJoinStep ¶
func NewDSBoxJoinStep(inputs map[string]DataRef, outputMethods []string, leftCols []string, rightCols []string, accuracy float32) *StepData
NewDSBoxJoinStep creates a step that will attempt to join two datasets using key columns from each dataset.
func NewDataCleaningStep ¶
NewDataCleaningStep creates a wrapper for the Punk data cleaning primitive.
func NewDataFrameFlattenStep ¶
NewDataFrameFlattenStep searches for nested dataframes and pulls them out.
func NewDataframeImageReaderStep ¶
func NewDataframeImageReaderStep(inputs map[string]DataRef, outputMethods []string, columns []int) *StepData
NewDataframeImageReaderStep reads images for further processing.
func NewDatamartAugmentStep ¶
func NewDatamartAugmentStep(inputs map[string]DataRef, outputMethods []string, searchResult string, systemIdentifier string) *StepData
NewDatamartAugmentStep creates a primitive call that augments a dataset with a datamart dataset.
func NewDatamartDownloadStep ¶
func NewDatamartDownloadStep(inputs map[string]DataRef, outputMethods []string, searchResult string, systemIdentifier string) *StepData
NewDatamartDownloadStep creates a primitive call that downloads a dataset from a datamart.
func NewDatasetToDataframeStep ¶
NewDatasetToDataframeStep creates a primitive call that transforms an input dataset into a PANDAS dataframe.
func NewDatasetToDataframeStepWithResource ¶
func NewDatasetToDataframeStepWithResource(inputs map[string]DataRef, outputMethods []string, resourceName string) *StepData
NewDatasetToDataframeStepWithResource creates a primitive call that transforms an input dataset into a PANDAS dataframe using the specified resource.
func NewDatasetWrapperStep ¶
func NewDatasetWrapperStep(inputs map[string]DataRef, outputMethods []string, primitiveIndex int, resourceID string) *StepData
NewDatasetWrapperStep creates a primitive that wraps a dataframe primitive such that it can be used as a datset primitive in the pipeline prepend. The primitive to wrap is indicated using its index in the pipeline. Leaving the resource ID as the empty value allows the primitive to infer the main resource from the dataset.
func NewDateTimeRangeFilterStep ¶
func NewDateTimeRangeFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, inclusive bool, min float64, max float64, strict bool) *StepData
NewDateTimeRangeFilterStep creates a primitive step that filters dataset rows based on an included/excluded date/time range. Inclusion of boundaries is controlled by the strict flag. Min and Max values are a unix timestamp expressed as floats.
func NewDenormalizeStep ¶
NewDenormalizeStep denormalize data that is contained in multiple resource files.
func NewDistilColumnParserStep ¶
func NewDistilColumnParserStep(inputs map[string]DataRef, outputMethods []string, types []string) *StepData
NewDistilColumnParserStep takes obj/string columns in a datafram and parsaer them into raw python types based on their metadata. Avoids some performance issues present in the common ColumnParser but does not support as many data types.
func NewDukeStep ¶
NewDukeStep creates a wrapper for the Duke dataset classifier.
func NewEnrichDatesStep ¶
NewEnrichDatesStep adds extra information for date fields.
func NewExtractColumnsBySemanticTypeStep ¶
func NewExtractColumnsBySemanticTypeStep(inputs map[string]DataRef, outputMethods []string, semanticTypes []string) *StepData
NewExtractColumnsBySemanticTypeStep extracts columns by supplied semantic types.
func NewExtractColumnsByStructuralTypeStep ¶
func NewExtractColumnsByStructuralTypeStep(inputs map[string]DataRef, outputMethods []string, structuralTypes []string) *StepData
NewExtractColumnsByStructuralTypeStep extracts columns by supplied semantic types.
func NewExtractColumnsStep ¶
func NewExtractColumnsStep(inputs map[string]DataRef, outputMethods []string, indices []int) *StepData
NewExtractColumnsStep retains columns in the index list in an input dataframe. Columns are specified by numiercal index (not our decision).
func NewGoatForwardStep ¶
func NewGoatForwardStep(inputs map[string]DataRef, outputMethods []string, placeColIndex int) *StepData
NewGoatForwardStep creates a GOAT forward geocoding primitive. A string column containing a place name or address is passed in, and the primitive will return a DataFrame containing the lat/lon coords of the place. If location could not be found, the row in the data frame will be empty.
func NewGoatReverseStep ¶
func NewGoatReverseStep(inputs map[string]DataRef, outputMethods []string, lonCol int, latCol int) *StepData
NewGoatReverseStep creates a GOAT reverse geocoding primitive. Columns containing lat and lon values are passed in, and the primitive will return a DataFrame containing the name of the place, with an empty value for coords that no meaningful place could be computed.
func NewGroupingFieldComposeStep ¶
func NewGroupingFieldComposeStep(inputs map[string]DataRef, outputMethods []string, colIndices []int, joinChar string, outputName string) *StepData
NewGroupingFieldComposeStep creates a primitive call that joins suggested grouping keys.
func NewHDBScanStep ¶
NewHDBScanStep adds clustering features.
func NewHorizontalConcatStep ¶
func NewHorizontalConcatStep(inputs map[string]DataRef, outputMethods []string, useIndex bool, removeSecondIndex bool) *StepData
NewHorizontalConcatStep creates a primitive call that concats two data frames.
func NewImageRetrievalStep ¶
func NewImageRetrievalStep(inputs map[string]DataRef, outputMethods []string, cacheLocation string) *StepData
NewImageRetrievalStep creates a step that will rank images based on nearnest to images with the positive label.
func NewImageSegmentationPrimitiveStep ¶
NewImageSegmentationPrimitiveStep takes inputs images and segments them.
func NewImageTransferStep ¶
NewImageTransferStep processes images.
func NewIsolationForestStep ¶
NewIsolationForestStep returns labels for whether or not a data point is an anomoly
func NewJoinStep ¶
func NewJoinStep(inputs map[string]DataRef, outputMethods []string, leftCol []string, rightCol []string, accuracies []float64, absoluteAccuracies []bool, joinType string) *StepData
NewJoinStep creates a step that will attempt to join two datasets a key column from each. This is currently a placeholder for testing/debugging only.
func NewKMeansClusteringStep ¶
func NewKMeansClusteringStep(inputs map[string]DataRef, outputMethods []string, clusterCount int) *StepData
NewKMeansClusteringStep clusters the input using a siple k-means clustering.
func NewListEncoderStep ¶
NewListEncoderStep expands a list across columns.
func NewNumericRangeFilterStep ¶
func NewNumericRangeFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, inclusive bool, min float64, max float64, strict bool) *StepData
NewNumericRangeFilterStep creates a primitive step that filters dataset rows based on an included/excluded numeric range. Inclusion of boundaries is controlled by the strict flag.
func NewOneHotEncoderStep ¶
NewOneHotEncoderStep adds a one hot encoder for categoricals of low cardinality.
func NewPCAFeaturesStep ¶
NewPCAFeaturesStep creates a PCA-based feature ranking call that can be added to a pipeline.
func NewPrefeaturisedPoolingStep ¶
NewPrefeaturisedPoolingStep takes inputs of non-pooled remote sensing data to pool it
func NewProfilerStep ¶
NewProfilerStep creates a profile primitive that infers the columns type using rules
func NewRegexFilterStep ¶
func NewRegexFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, inclusive bool, regex string) *StepData
NewRegexFilterStep creates a primitive step that filter dataset rows based on a regex match.
func NewRemoteSensingPretrainedStep ¶
func NewRemoteSensingPretrainedStep(inputs map[string]DataRef, outputMethods []string, batchSize int, pool bool) *StepData
NewRemoteSensingPretrainedStep featurizes a remote sensing column
func NewRemoveColumnsStep ¶
func NewRemoveColumnsStep(inputs map[string]DataRef, outputMethods []string, colIndices []int) *StepData
NewRemoveColumnsStep removes columns from an input dataframe. Columns are specified by numerical index (not our decision).
func NewRemoveDuplicateColumnsStep ¶
NewRemoveDuplicateColumnsStep removes duplicate columns from a dataframe.
func NewRemoveSemanticTypeStep ¶
func NewRemoveSemanticTypeStep(inputs map[string]DataRef, outputMethods []string, remove *ColumnUpdate) *StepData
NewRemoveSemanticTypeStep removes semantic data values from an input dataset. A remove of (1, 2), ("type a", "type b") would result in "type a" and "type b" being removed from index 1 and 2.
func NewReplaceSingletonStep ¶
NewReplaceSingletonStep replaces a field that has only one value with a constant.
func NewSKImputerStep ¶
NewSKImputerStep adds SK learn simple imputer
func NewSKMissingIndicatorStep ¶
NewSKMissingIndicatorStep adds SK learn missing indicator.
func NewSatelliteImageLoaderStep ¶
func NewSatelliteImageLoaderStep(inputs map[string]DataRef, outputMethods []string, numJobs int) *StepData
NewSatelliteImageLoaderStep loads multi band images.
func NewSimonStep ¶
NewSimonStep creates a SIMON data classification step. It examines an input dataframe, and assigns types to the columns based on the exposed metadata.
func NewSlothStep ¶
NewSlothStep creates a Sloth timeseries clustering step.
func NewStepData ¶
func NewStepData( primitive *pipeline.Primitive, outputMethods []string, hyperparameters map[string]interface{}, arguments map[string]DataRef) *StepData
NewStepData creates a pipeline step instance from the required field subset. Hyperparameters, Arguments and OutputMethods are all opional in the d3m runtime so so nil is a valid value. Valid types fror hyper parameters are intXX, string, bool, or PrimitiveRef, which is the index of another primitive in the pipeline.
func NewTargetRankingStep ¶
func NewTargetRankingStep(inputs map[string]DataRef, outputMethods []string, targetCol int) *StepData
NewTargetRankingStep creates a target ranking call that can be added to a pipeline. Ranking is based on mutual information between features and a selected target. Returns a DataFrame containing (col_idx, col_name, score) tuples for each ranked feature. Features that could not be ranked are excluded from the returned set.
func NewTermFilterStep ¶
func NewTermFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, inclusive bool, terms []string, matchWhole bool) *StepData
NewTermFilterStep creates a primitive step that filters dataset rows based on a match against a term list. The term match can be partial, or apply to whole terms only.
func NewTextEncoderStep ¶
NewTextEncoderStep adds an svm text encoder for text fields.
func NewTimeseriesFormatterStep ¶
func NewTimeseriesFormatterStep(inputs map[string]DataRef, outputMethods []string, mainResID string, fileColIndex int) *StepData
NewTimeseriesFormatterStep creates a step that will format a time series to the long form. The input dataset must be structured using resource files for time series data. If mainResID is empty the primitive will attempt to infer the main resource. If fileColIndex < 0, the file column will also be inferred.
func NewVectorBoundsFilterStep ¶
func NewVectorBoundsFilterStep(inputs map[string]DataRef, outputMethods []string, column int, inclusive bool, min []float64, max []float64, strict bool) *StepData
NewVectorBoundsFilterStep creates a primitive that will allow for a vector of values to be filtered included/excluded value range. The input min and max ranges are specified as lists, where the i'th element of the min/max lists are applied to the i'th value of the target vectors as the filter.
func NewVerticalConcatenationPrimitiveStep ¶
func NewVerticalConcatenationPrimitiveStep(inputs map[string]DataRef, outputMethods []string, removeDuplicate bool) *StepData
NewVerticalConcatenationPrimitiveStep takes inputs and combines them into a single output.
func (*StepData) BuildDescriptionStep ¶
func (s *StepData) BuildDescriptionStep() (*pipeline.PipelineDescriptionStep, error)
BuildDescriptionStep creates protobuf structs from step data.
func (*StepData) GetArguments ¶
GetArguments returns a map of arguments that will be passed to the methods of the primitive step.
func (*StepData) GetHyperparameters ¶
GetHyperparameters returns a map of arguments that will be passed to the primitive methods of the primitive step. Types are currently restricted to intXX, bool, string
func (*StepData) GetOutputMethods ¶
GetOutputMethods returns a list of methods that will be called to generate primitive output. These feed into downstream primitives.
func (*StepData) GetPrimitive ¶
GetPrimitive returns a primitive definition for a pipeline step.
type StepDataRef ¶
StepDataRef provides an input reference that points a step in the pipline, and its associated output method name.
func (*StepDataRef) CreateDataRef ¶
func (i *StepDataRef) CreateDataRef() *pipeline.PrimitiveStepArgument
CreateDataRef creates a primitive step argument.
func (*StepDataRef) RefString ¶
func (i *StepDataRef) RefString() string
RefString creates a string representation of a StepDataRef.
type UserDatasetAugmentation ¶
UserDatasetAugmentation contains the augmentation parameters required for user dataset pipelines.