description

package

v0.0.0-...-39ca940 Latest Latest Go to latest Published: Nov 26, 2021 License: Apache-2.0 Imports: 9 Imported by: 3

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/uncharted-distil/distil-compute

Documentation ¶

Index ¶

Constants
func CreatePreFeaturizedDatasetPipeline(name string, description string, datasetDescription *UserDatasetDescription, ...) (*pipeline.PipelineDescription, error)
func CreateUserDatasetPipeline(name string, description string, datasetDescription *UserDatasetDescription, ...) (*pipeline.PipelineDescription, error)
func MarshalSteps(step *pipeline.PipelineDescription) (string, error)
type ClusterParams
type ColumnUpdate
type DataRef
type FullySpecifiedPipeline
- func CreateDSBoxJoinPipeline(name string, description string, leftJoinCols []string, rightJoinCols []string, ...) (*FullySpecifiedPipeline, error)
- func CreateDataCleaningPipeline(name string, description string, variables []*model.Variable, impute bool) (*FullySpecifiedPipeline, error)
- func CreateDataFilterPipeline(name string, description string, variables []*model.Variable, ...) (*FullySpecifiedPipeline, error)
- func CreateDatamartAugmentPipeline(name string, description string, searchResult string, systemIdentifier string) (*FullySpecifiedPipeline, error)
- func CreateDatamartDownloadPipeline(name string, description string, searchResult string, systemIdentifier string) (*FullySpecifiedPipeline, error)
- func CreateDenormalizePipeline(name string, description string) (*FullySpecifiedPipeline, error)
- func CreateDukePipeline(name string, description string) (*FullySpecifiedPipeline, error)
- func CreateGeneralClusteringPipeline(name string, description string, datasetDescription *UserDatasetDescription, ...) (*FullySpecifiedPipeline, error)
- func CreateGoatForwardPipeline(name string, description string, placeCol *model.Variable) (*FullySpecifiedPipeline, error)
- func CreateGoatReversePipeline(name string, description string, lonSource *model.Variable, ...) (*FullySpecifiedPipeline, error)
- func CreateGroupingFieldComposePipeline(name string, description string, colIndices []int, joinChar string, ...) (*FullySpecifiedPipeline, error)
- func CreateImageClusteringPipeline(name string, description string, imageVariables []*model.Variable, ...) (*FullySpecifiedPipeline, error)
- func CreateImageFeaturizationPipeline(name string, description string, variables []*model.Variable) (*FullySpecifiedPipeline, error)
- func CreateImageOutlierDetectionPipeline(name string, description string, imageVariables []*model.Variable) (*FullySpecifiedPipeline, error)
- func CreateImageQueryPipeline(name string, description string, cacheLocation string, colsToDrop []int) (*FullySpecifiedPipeline, error)
- func CreateJoinPipeline(name string, description string, join *JoinDescription) (*FullySpecifiedPipeline, error)
- func CreateMultiBandImageClusteringPipeline(name string, description string, grouping *model.MultiBandImageGrouping, ...) (*FullySpecifiedPipeline, error)
- func CreateMultiBandImageFeaturizationPipeline(name string, description string, variables []*model.Variable, numJobs int, ...) (*FullySpecifiedPipeline, error)
- func CreateMultiBandImageOutlierDetectionPipeline(name string, description string, imageVariables []*model.Variable, ...) (*FullySpecifiedPipeline, error)
- func CreatePCAFeaturesPipeline(name string, description string) (*FullySpecifiedPipeline, error)
- func CreatePreFeaturizedMultiBandImageClusteringPipeline(name string, description string, variables []*model.Variable, ...) (*FullySpecifiedPipeline, error)
- func CreateRemoteSensingSegmentationPipeline(name string, description string, targetVariable *model.Variable, numJobs int) (*FullySpecifiedPipeline, error)
- func CreateSimonPipeline(name string, description string) (*FullySpecifiedPipeline, error)
- func CreateSlothPipeline(name string, description string, timeColumn string, valueColumn string, ...) (*FullySpecifiedPipeline, error)
- func CreateTabularOutlierDetectionPipeline(name string, description string, datasetDescription *UserDatasetDescription, ...) (*FullySpecifiedPipeline, error)
- func CreateTargetRankingPipeline(name string, description string, target *model.Variable, ...) (*FullySpecifiedPipeline, error)
- func CreateTimeseriesFormatterPipeline(name string, description string, resource string) (*FullySpecifiedPipeline, error)
- func CreateVerticalConcatPipeline(name string, description string) (*FullySpecifiedPipeline, error)
type InferenceStepData
- func NewInferenceStepData(arguments map[string]DataRef) *InferenceStepData
- func (s *InferenceStepData) BuildDescriptionStep() (*pipeline.PipelineDescriptionStep, error)
- func (s *InferenceStepData) GetArguments() map[string]DataRef
- func (s *InferenceStepData) GetHyperparameters() map[string]interface{}
- func (s *InferenceStepData) GetOutputMethods() []string
- func (s *InferenceStepData) GetPrimitive() *pipeline.Primitive
type Join
type JoinDescription
type ListStepDataRef
- func (s *ListStepDataRef) AddDataRef(dataRef DataRef)
- func (s *ListStepDataRef) CreateDataRef() *pipeline.PrimitiveStepArgument
- func (s *ListStepDataRef) RefString() string
type PipelineBuilder
- func NewPipelineBuilder(name string, description string, inputs []string, outputs []DataRef, ...) *PipelineBuilder
- func (p *PipelineBuilder) Compile() (*pipeline.PipelineDescription, error)
- func (p *PipelineBuilder) GetSteps() []Step
type PipelineDataRef
- func (p *PipelineDataRef) CreateDataRef() *pipeline.PrimitiveStepArgument
- func (p *PipelineDataRef) RefString() string
type PrimitiveReference
type Step
type StepData
- func NewAddSemanticTypeStep(inputs map[string]DataRef, outputMethods []string, add *ColumnUpdate) *StepData
- func NewBinaryEncoderStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewCSVReaderStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewCategoricalImputerStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewColumnParserStep(inputs map[string]DataRef, outputMethods []string, types []string) *StepData
- func NewConstructPredictionStep(inputs map[string]DataRef, outputMethods []string, reference DataRef) *StepData
- func NewDSBoxJoinStep(inputs map[string]DataRef, outputMethods []string, leftCols []string, ...) *StepData
- func NewDataCleaningStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewDataFrameFlattenStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewDataframeImageReaderStep(inputs map[string]DataRef, outputMethods []string, columns []int) *StepData
- func NewDatamartAugmentStep(inputs map[string]DataRef, outputMethods []string, searchResult string, ...) *StepData
- func NewDatamartDownloadStep(inputs map[string]DataRef, outputMethods []string, searchResult string, ...) *StepData
- func NewDatasetToDataframeStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewDatasetToDataframeStepWithResource(inputs map[string]DataRef, outputMethods []string, resourceName string) *StepData
- func NewDatasetWrapperStep(inputs map[string]DataRef, outputMethods []string, primitiveIndex int, ...) *StepData
- func NewDateTimeRangeFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, ...) *StepData
- func NewDenormalizeStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewDistilColumnParserStep(inputs map[string]DataRef, outputMethods []string, types []string) *StepData
- func NewDukeStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewEnrichDatesStep(inputs map[string]DataRef, outputMethods []string, replace bool) *StepData
- func NewExtractColumnsBySemanticTypeStep(inputs map[string]DataRef, outputMethods []string, semanticTypes []string) *StepData
- func NewExtractColumnsByStructuralTypeStep(inputs map[string]DataRef, outputMethods []string, structuralTypes []string) *StepData
- func NewExtractColumnsStep(inputs map[string]DataRef, outputMethods []string, indices []int) *StepData
- func NewGoatForwardStep(inputs map[string]DataRef, outputMethods []string, placeColIndex int) *StepData
- func NewGoatReverseStep(inputs map[string]DataRef, outputMethods []string, lonCol int, latCol int) *StepData
- func NewGroupingFieldComposeStep(inputs map[string]DataRef, outputMethods []string, colIndices []int, ...) *StepData
- func NewHDBScanStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewHorizontalConcatStep(inputs map[string]DataRef, outputMethods []string, useIndex bool, ...) *StepData
- func NewImageRetrievalStep(inputs map[string]DataRef, outputMethods []string, cacheLocation string) *StepData
- func NewImageSegmentationPrimitiveStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewImageTransferStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewIsolationForestStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewJoinStep(inputs map[string]DataRef, outputMethods []string, leftCol []string, ...) *StepData
- func NewKMeansClusteringStep(inputs map[string]DataRef, outputMethods []string, clusterCount int) *StepData
- func NewListEncoderStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewNumericRangeFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, ...) *StepData
- func NewOneHotEncoderStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewPCAFeaturesStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewPrefeaturisedPoolingStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewProfilerStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewRegexFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, ...) *StepData
- func NewRemoteSensingPretrainedStep(inputs map[string]DataRef, outputMethods []string, batchSize int, pool bool) *StepData
- func NewRemoveColumnsStep(inputs map[string]DataRef, outputMethods []string, colIndices []int) *StepData
- func NewRemoveDuplicateColumnsStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewRemoveSemanticTypeStep(inputs map[string]DataRef, outputMethods []string, remove *ColumnUpdate) *StepData
- func NewReplaceSingletonStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewSKImputerStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewSKMissingIndicatorStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewSatelliteImageLoaderStep(inputs map[string]DataRef, outputMethods []string, numJobs int) *StepData
- func NewSimonStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewSlothStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewStepData(primitive *pipeline.Primitive, outputMethods []string, ...) *StepData
- func NewTargetRankingStep(inputs map[string]DataRef, outputMethods []string, targetCol int) *StepData
- func NewTermFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, ...) *StepData
- func NewTextEncoderStep(inputs map[string]DataRef, outputMethods []string) *StepData
- func NewTimeseriesFormatterStep(inputs map[string]DataRef, outputMethods []string, mainResID string, ...) *StepData
- func NewVectorBoundsFilterStep(inputs map[string]DataRef, outputMethods []string, column int, inclusive bool, ...) *StepData
- func NewVerticalConcatenationPrimitiveStep(inputs map[string]DataRef, outputMethods []string, removeDuplicate bool) *StepData
- func (s *StepData) BuildDescriptionStep() (*pipeline.PipelineDescriptionStep, error)
- func (s *StepData) GetArguments() map[string]DataRef
- func (s *StepData) GetHyperparameters() map[string]interface{}
- func (s *StepData) GetOutputMethods() []string
- func (s *StepData) GetPrimitive() *pipeline.Primitive
type StepDataRef
- func (i *StepDataRef) CreateDataRef() *pipeline.PrimitiveStepArgument
- func (i *StepDataRef) RefString() string
type UserDatasetAugmentation
type UserDatasetDescription

Constants ¶

View Source

const (
	// JoinTypeLeft represents a left outer join operation
	JoinTypeLeft = "left"
	// JoinTypeRight represents a right outer join operation
	JoinTypeRight = "right"
	// JoinTypeOuter represents an outer join operation
	JoinTypeOuter = "outer"
	// JoinTypeInner represents an inner join operation
	JoinTypeInner = "inner"
	// JoinTypeCross represents a cross join operation
	JoinTypeCross = "cross"
)

Variables ¶

This section is empty.

Functions ¶

func CreatePreFeaturizedDatasetPipeline ¶

func CreatePreFeaturizedDatasetPipeline(name string, description string, datasetDescription *UserDatasetDescription,
	augmentations []*UserDatasetAugmentation) (*pipeline.PipelineDescription, error)

CreatePreFeaturizedDatasetPipeline creates a pipeline that acts on a pre featurized dataset. The created prepend is a simplified version due to the dataset already having all features for the end task stored on disk.

func CreateUserDatasetPipeline ¶

func CreateUserDatasetPipeline(name string, description string, datasetDescription *UserDatasetDescription,
	augmentations []*UserDatasetAugmentation) (*pipeline.PipelineDescription, error)

CreateUserDatasetPipeline creates a pipeline description to capture user feature selection and semantic type information.

func MarshalSteps ¶

func MarshalSteps(step *pipeline.PipelineDescription) (string, error)

MarshalSteps marshals a pipeline description into a json representation.

Types ¶

type ClusterParams ¶

type ClusterParams struct {
	UseKMeans    bool
	ClusterCount int
	PoolFeatures bool
}

ClusterParams defines parameters to use when clustering.

type ColumnUpdate ¶

type ColumnUpdate struct {
	Indices       []int
	SemanticTypes []string
}

ColumnUpdate defines a set of column indices to add/remvoe a set of semantic types to/from.

type DataRef ¶

type DataRef interface {
	CreateDataRef() *pipeline.PrimitiveStepArgument
	RefString() string
}

DataRef defines an interface for creating input reference strings that are used to connect primitive inputs to outputs.

type FullySpecifiedPipeline ¶

type FullySpecifiedPipeline struct {
	Pipeline         *pipeline.PipelineDescription
	EquivalentValues []interface{}
}

FullySpecifiedPipeline wraps a fully specified pipeline along with the fields which can be used to determine equivalent pipelines.

func CreateDSBoxJoinPipeline ¶

func CreateDSBoxJoinPipeline(name string, description string, leftJoinCols []string, rightJoinCols []string, accuracy float32) (*FullySpecifiedPipeline, error)

CreateDSBoxJoinPipeline creates a pipeline that joins two input datasets using caller supplied columns.

func CreateDataCleaningPipeline ¶

func CreateDataCleaningPipeline(name string, description string, variables []*model.Variable, impute bool) (*FullySpecifiedPipeline, error)

CreateDataCleaningPipeline creates a pipeline to run data cleaning on a dataset.

func CreateDataFilterPipeline ¶

func CreateDataFilterPipeline(name string, description string, variables []*model.Variable, filters []*model.FilterSet) (*FullySpecifiedPipeline, error)

CreateDataFilterPipeline creates a pipeline that will filter a dataset.

func CreateDatamartAugmentPipeline ¶

func CreateDatamartAugmentPipeline(name string, description string, searchResult string, systemIdentifier string) (*FullySpecifiedPipeline, error)

CreateDatamartAugmentPipeline creates a pipeline to augment data with datamart data.

func CreateDatamartDownloadPipeline ¶

func CreateDatamartDownloadPipeline(name string, description string, searchResult string, systemIdentifier string) (*FullySpecifiedPipeline, error)

CreateDatamartDownloadPipeline creates a pipeline to download data from a datamart.

func CreateDenormalizePipeline ¶

func CreateDenormalizePipeline(name string, description string) (*FullySpecifiedPipeline, error)

CreateDenormalizePipeline creates a pipeline to run the denormalize primitive on an input dataset.

func CreateDukePipeline ¶

func CreateDukePipeline(name string, description string) (*FullySpecifiedPipeline, error)

CreateDukePipeline creates a pipeline to peform image featurization on a dataset.

func CreateGeneralClusteringPipeline ¶

func CreateGeneralClusteringPipeline(name string, description string, datasetDescription *UserDatasetDescription,
	augmentations []*UserDatasetAugmentation, params *ClusterParams) (*FullySpecifiedPipeline, error)

CreateGeneralClusteringPipeline creates a pipeline that will cluster tabular data.

func CreateGoatForwardPipeline ¶

func CreateGoatForwardPipeline(name string, description string, placeCol *model.Variable) (*FullySpecifiedPipeline, error)

CreateGoatForwardPipeline creates a forward geocoding pipeline.

func CreateGoatReversePipeline ¶

func CreateGoatReversePipeline(name string, description string, lonSource *model.Variable, latSource *model.Variable) (*FullySpecifiedPipeline, error)

CreateGoatReversePipeline creates a forward geocoding pipeline.

func CreateGroupingFieldComposePipeline ¶

func CreateGroupingFieldComposePipeline(name string, description string, colIndices []int, joinChar string, outputName string) (*FullySpecifiedPipeline, error)

CreateGroupingFieldComposePipeline creates a pipeline to create a grouping key field for a dataset.

func CreateImageClusteringPipeline ¶

func CreateImageClusteringPipeline(name string, description string, imageVariables []*model.Variable, params *ClusterParams) (*FullySpecifiedPipeline, error)

CreateImageClusteringPipeline creates a fully specified pipeline that will cluster images together, returning a column with the resulting cluster.

func CreateImageFeaturizationPipeline ¶

func CreateImageFeaturizationPipeline(name string, description string, variables []*model.Variable) (*FullySpecifiedPipeline, error)

CreateImageFeaturizationPipeline creates a pipline that will featurize images.

func CreateImageOutlierDetectionPipeline ¶

func CreateImageOutlierDetectionPipeline(name string, description string, imageVariables []*model.Variable) (*FullySpecifiedPipeline, error)

CreateImageOutlierDetectionPipeline makes a pipeline for outlier detection with remote sensing data

func CreateImageQueryPipeline ¶

func CreateImageQueryPipeline(name string, description string, cacheLocation string, colsToDrop []int) (*FullySpecifiedPipeline, error)

CreateImageQueryPipeline creates a pipeline that will perform image retrieval. The cacheLocation parameter is passed down to the image retrieval primitive, and is used to cache dot products across query operations. When a new dataset is being labelled, the cache location should be updated.

func CreateJoinPipeline ¶

func CreateJoinPipeline(name string, description string, join *JoinDescription) (*FullySpecifiedPipeline, error)

CreateJoinPipeline creates a pipeline that joins two input datasets using a caller supplied column. Accuracy is a normalized value that controls how exact the join has to be.

func CreateMultiBandImageClusteringPipeline ¶

func CreateMultiBandImageClusteringPipeline(name string, description string,
	grouping *model.MultiBandImageGrouping, variables []*model.Variable, params *ClusterParams,
	batchSize int, numJobs int) (*FullySpecifiedPipeline, error)

CreateMultiBandImageClusteringPipeline creates a fully specified pipeline that will cluster multiband images together, returning a column with the resulting cluster.

func CreateMultiBandImageFeaturizationPipeline ¶

func CreateMultiBandImageFeaturizationPipeline(name string, description string, variables []*model.Variable,
	numJobs int, batchSize int, poolFeatures bool) (*FullySpecifiedPipeline, error)

CreateMultiBandImageFeaturizationPipeline creates a pipline that will featurize multiband images.

func CreateMultiBandImageOutlierDetectionPipeline ¶

func CreateMultiBandImageOutlierDetectionPipeline(name string, description string, imageVariables []*model.Variable,
	prefeaturised bool, pooled bool, grouping *model.MultiBandImageGrouping, batchSize int, numJobs int) (*FullySpecifiedPipeline, error)

CreateMultiBandImageOutlierDetectionPipeline does outlier detection for multiband images for both prefeaturised and featurised

func CreatePCAFeaturesPipeline ¶

func CreatePCAFeaturesPipeline(name string, description string) (*FullySpecifiedPipeline, error)

CreatePCAFeaturesPipeline creates a pipeline to run feature ranking on an input dataset.

func CreatePreFeaturizedMultiBandImageClusteringPipeline ¶

func CreatePreFeaturizedMultiBandImageClusteringPipeline(name string, description string, variables []*model.Variable, params *ClusterParams) (*FullySpecifiedPipeline, error)

CreatePreFeaturizedMultiBandImageClusteringPipeline creates a fully specified pipeline that will cluster multiband images together, returning a column with the resulting cluster.

func CreateRemoteSensingSegmentationPipeline ¶

func CreateRemoteSensingSegmentationPipeline(name string, description string, targetVariable *model.Variable, numJobs int) (*FullySpecifiedPipeline, error)

CreateRemoteSensingSegmentationPipeline creates a pipeline to segment remote sensing images.

func CreateSimonPipeline ¶

func CreateSimonPipeline(name string, description string) (*FullySpecifiedPipeline, error)

CreateSimonPipeline creates a pipeline to run semantic type inference on a dataset's columns.

func CreateSlothPipeline ¶

func CreateSlothPipeline(name string, description string, timeColumn string, valueColumn string,
	timeseriesGrouping *model.TimeseriesGrouping, timeSeriesFeatures []*model.Variable) (*FullySpecifiedPipeline, error)

CreateSlothPipeline creates a pipeline to peform timeseries clustering on a dataset.

func CreateTabularOutlierDetectionPipeline ¶

func CreateTabularOutlierDetectionPipeline(name string, description string, datasetDescription *UserDatasetDescription,
	augmentations []*UserDatasetAugmentation) (*FullySpecifiedPipeline, error)

CreateTabularOutlierDetectionPipeline makes a pipeline for outlier detection

func CreateTargetRankingPipeline ¶

func CreateTargetRankingPipeline(name string, description string, target *model.Variable,
	features []*model.Variable, selectedFeatures map[string]bool) (*FullySpecifiedPipeline, error)

CreateTargetRankingPipeline creates a pipeline to run feature ranking on an input dataset.

func CreateTimeseriesFormatterPipeline ¶

func CreateTimeseriesFormatterPipeline(name string, description string, resource string) (*FullySpecifiedPipeline, error)

CreateTimeseriesFormatterPipeline creates a time series formatter pipeline.

func CreateVerticalConcatPipeline ¶

func CreateVerticalConcatPipeline(name string, description string) (*FullySpecifiedPipeline, error)

CreateVerticalConcatPipeline creates a pipeline that will vertically concat two datasets (union).

type InferenceStepData ¶

type InferenceStepData struct {
	Inputs  []string
	Outputs []string
	// contains filtered or unexported fields
}

InferenceStepData provides data for a pipeline description placeholder step, which marks the point at which a TA2 should be begin pipeline inference.

func NewInferenceStepData ¶

func NewInferenceStepData(arguments map[string]DataRef) *InferenceStepData

NewInferenceStepData creates a InferenceStepData instance with default values.

func (*InferenceStepData) BuildDescriptionStep ¶

func (s *InferenceStepData) BuildDescriptionStep() (*pipeline.PipelineDescriptionStep, error)

BuildDescriptionStep creates protobuf structures from a pipeline step definition.

func (*InferenceStepData) GetArguments ¶

func (s *InferenceStepData) GetArguments() map[string]DataRef

GetArguments adapts the internal placeholder step argument type to the primitive step argument type.

func (*InferenceStepData) GetHyperparameters ¶

func (s *InferenceStepData) GetHyperparameters() map[string]interface{}

GetHyperparameters returns an empty map since inference steps don't take hyper parameters.

func (*InferenceStepData) GetOutputMethods ¶

func (s *InferenceStepData) GetOutputMethods() []string

GetOutputMethods returns a list of methods that will be called to generate primitive output. These feed into downstream primitives.

func (*InferenceStepData) GetPrimitive ¶

func (s *InferenceStepData) GetPrimitive() *pipeline.Primitive

GetPrimitive returns nil since there is no primitive associated with a placeholder step.

type Join ¶

type Join struct {
	Left     *model.Variable
	Right    *model.Variable
	Accuracy float64
	Absolute bool
}

Join captures a specific join relationship and constraint to be used in dataset joining.

type JoinDescription ¶

type JoinDescription struct {
	Type           string
	Joins          []*Join
	RightExcludes  []*model.Variable
	RightVariables []*model.Variable
	LeftExcludes   []*model.Variable
	LeftVariables  []*model.Variable
}

JoinDescription represents the complete information necessary to join two datasets via a join pipeline.

type ListStepDataRef ¶

type ListStepDataRef struct {
	// contains filtered or unexported fields
}

ListStepDataRef points to a list of data references.

func (*ListStepDataRef) AddDataRef ¶

func (s *ListStepDataRef) AddDataRef(dataRef DataRef)

AddDataRef adds a data reference to the list.

func (*ListStepDataRef) CreateDataRef ¶

func (s *ListStepDataRef) CreateDataRef() *pipeline.PrimitiveStepArgument

CreateDataRef creates a primitive step argument.

func (*ListStepDataRef) RefString ¶

func (s *ListStepDataRef) RefString() string

RefString creates a string representation of a PipelineDataRef.

type PipelineBuilder ¶

type PipelineBuilder struct {
	// contains filtered or unexported fields
}

PipelineBuilder compiles a pipeline DAG into a protobuf pipeline description that can be passed to a downstream TA2 for inference (optional) and execution.

func NewPipelineBuilder ¶

func NewPipelineBuilder(name string, description string, inputs []string, outputs []DataRef, steps []Step) *PipelineBuilder

NewPipelineBuilder creates a new pipeline builder instance. All of the source nodes in the pipeline DAG need to be passed in to the builder via the sources argument, which is variadic.

func (*PipelineBuilder) Compile ¶

func (p *PipelineBuilder) Compile() (*pipeline.PipelineDescription, error)

Compile creates the protobuf pipeline description from the step graph. It can only be called once.

func (*PipelineBuilder) GetSteps ¶

func (p *PipelineBuilder) GetSteps() []Step

GetSteps returns compiled steps.

type PipelineDataRef ¶

type PipelineDataRef struct {
	InputNum int
}

PipelineDataRef points to an input of the pipeline itself (typically a dataset) instead of the output of another primitive.

func (*PipelineDataRef) CreateDataRef ¶

func (p *PipelineDataRef) CreateDataRef() *pipeline.PrimitiveStepArgument

CreateDataRef creates a primitive step argument.

func (*PipelineDataRef) RefString ¶

func (p *PipelineDataRef) RefString() string

RefString creates a string representation of a PipelineDataRef.

type PrimitiveReference ¶

type PrimitiveReference struct {
	// contains filtered or unexported fields
}

PrimitiveReference is a marker struct for a an argument that is an integer, but should be interpreted as a reference to another primitive.

type Step ¶

type Step interface {
	BuildDescriptionStep() (*pipeline.PipelineDescriptionStep, error)
	GetPrimitive() *pipeline.Primitive
	GetArguments() map[string]DataRef
	GetHyperparameters() map[string]interface{}
	GetOutputMethods() []string
}

Step provides data for a pipeline description step and an operation to create a protobuf PipelineDescriptionStep from that data.

type StepData ¶

type StepData struct {
	Primitive       *pipeline.Primitive
	Arguments       map[string]DataRef
	Hyperparameters map[string]interface{}
	OutputMethods   []string
}

StepData contains the minimum amount of data used to describe a pipeline step

func NewAddSemanticTypeStep ¶

func NewAddSemanticTypeStep(inputs map[string]DataRef, outputMethods []string, add *ColumnUpdate) *StepData

NewAddSemanticTypeStep adds semantic data values to an input dataset. An add of (1, 2), ("type a", "type b") would result in "type a" and "type b" being added to index 1 and 2.

func NewBinaryEncoderStep ¶

func NewBinaryEncoderStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewBinaryEncoderStep adds a binary encoder for categoricals of high cardinality.

func NewCSVReaderStep ¶

func NewCSVReaderStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewCSVReaderStep reads data from csv files into a nested dataframe structure.

func NewCategoricalImputerStep ¶

func NewCategoricalImputerStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewCategoricalImputerStep finds missing categorical values and replaces them with an imputed value.

func NewColumnParserStep ¶

func NewColumnParserStep(inputs map[string]DataRef, outputMethods []string, types []string) *StepData

NewColumnParserStep takes obj/string columns in a dataframe and parses them into their associated raw python types based on the attached d3m metadata.

func NewConstructPredictionStep ¶

func NewConstructPredictionStep(inputs map[string]DataRef, outputMethods []string, reference DataRef) *StepData

NewConstructPredictionStep maps the dataframe index to d3m index.

func NewDSBoxJoinStep ¶

func NewDSBoxJoinStep(inputs map[string]DataRef, outputMethods []string, leftCols []string, rightCols []string, accuracy float32) *StepData

NewDSBoxJoinStep creates a step that will attempt to join two datasets using key columns from each dataset.

func NewDataCleaningStep ¶

func NewDataCleaningStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewDataCleaningStep creates a wrapper for the Punk data cleaning primitive.

func NewDataFrameFlattenStep ¶

func NewDataFrameFlattenStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewDataFrameFlattenStep searches for nested dataframes and pulls them out.

func NewDataframeImageReaderStep ¶

func NewDataframeImageReaderStep(inputs map[string]DataRef, outputMethods []string, columns []int) *StepData

NewDataframeImageReaderStep reads images for further processing.

func NewDatamartAugmentStep ¶

func NewDatamartAugmentStep(inputs map[string]DataRef, outputMethods []string, searchResult string, systemIdentifier string) *StepData

NewDatamartAugmentStep creates a primitive call that augments a dataset with a datamart dataset.

func NewDatamartDownloadStep ¶

func NewDatamartDownloadStep(inputs map[string]DataRef, outputMethods []string, searchResult string, systemIdentifier string) *StepData

NewDatamartDownloadStep creates a primitive call that downloads a dataset from a datamart.

func NewDatasetToDataframeStep ¶

func NewDatasetToDataframeStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewDatasetToDataframeStep creates a primitive call that transforms an input dataset into a PANDAS dataframe.

func NewDatasetToDataframeStepWithResource ¶

func NewDatasetToDataframeStepWithResource(inputs map[string]DataRef, outputMethods []string, resourceName string) *StepData

NewDatasetToDataframeStepWithResource creates a primitive call that transforms an input dataset into a PANDAS dataframe using the specified resource.

func NewDatasetWrapperStep ¶

func NewDatasetWrapperStep(inputs map[string]DataRef, outputMethods []string, primitiveIndex int, resourceID string) *StepData

NewDatasetWrapperStep creates a primitive that wraps a dataframe primitive such that it can be used as a datset primitive in the pipeline prepend. The primitive to wrap is indicated using its index in the pipeline. Leaving the resource ID as the empty value allows the primitive to infer the main resource from the dataset.

func NewDateTimeRangeFilterStep ¶

func NewDateTimeRangeFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, inclusive bool, min float64, max float64, strict bool) *StepData

NewDateTimeRangeFilterStep creates a primitive step that filters dataset rows based on an included/excluded date/time range. Inclusion of boundaries is controlled by the strict flag. Min and Max values are a unix timestamp expressed as floats.

func NewDenormalizeStep ¶

func NewDenormalizeStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewDenormalizeStep denormalize data that is contained in multiple resource files.

func NewDistilColumnParserStep ¶

func NewDistilColumnParserStep(inputs map[string]DataRef, outputMethods []string, types []string) *StepData

NewDistilColumnParserStep takes obj/string columns in a datafram and parsaer them into raw python types based on their metadata. Avoids some performance issues present in the common ColumnParser but does not support as many data types.

func NewDukeStep ¶

func NewDukeStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewDukeStep creates a wrapper for the Duke dataset classifier.

func NewEnrichDatesStep ¶

func NewEnrichDatesStep(inputs map[string]DataRef, outputMethods []string, replace bool) *StepData

NewEnrichDatesStep adds extra information for date fields.

func NewExtractColumnsBySemanticTypeStep ¶

func NewExtractColumnsBySemanticTypeStep(inputs map[string]DataRef, outputMethods []string, semanticTypes []string) *StepData

NewExtractColumnsBySemanticTypeStep extracts columns by supplied semantic types.

func NewExtractColumnsByStructuralTypeStep ¶

func NewExtractColumnsByStructuralTypeStep(inputs map[string]DataRef, outputMethods []string, structuralTypes []string) *StepData

NewExtractColumnsByStructuralTypeStep extracts columns by supplied semantic types.

func NewExtractColumnsStep ¶

func NewExtractColumnsStep(inputs map[string]DataRef, outputMethods []string, indices []int) *StepData

NewExtractColumnsStep retains columns in the index list in an input dataframe. Columns are specified by numiercal index (not our decision).

func NewGoatForwardStep ¶

func NewGoatForwardStep(inputs map[string]DataRef, outputMethods []string, placeColIndex int) *StepData

NewGoatForwardStep creates a GOAT forward geocoding primitive. A string column containing a place name or address is passed in, and the primitive will return a DataFrame containing the lat/lon coords of the place. If location could not be found, the row in the data frame will be empty.

func NewGoatReverseStep ¶

func NewGoatReverseStep(inputs map[string]DataRef, outputMethods []string, lonCol int, latCol int) *StepData

NewGoatReverseStep creates a GOAT reverse geocoding primitive. Columns containing lat and lon values are passed in, and the primitive will return a DataFrame containing the name of the place, with an empty value for coords that no meaningful place could be computed.

func NewGroupingFieldComposeStep ¶

func NewGroupingFieldComposeStep(inputs map[string]DataRef, outputMethods []string, colIndices []int, joinChar string, outputName string) *StepData

NewGroupingFieldComposeStep creates a primitive call that joins suggested grouping keys.

func NewHDBScanStep ¶

func NewHDBScanStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewHDBScanStep adds clustering features.

func NewHorizontalConcatStep ¶

func NewHorizontalConcatStep(inputs map[string]DataRef, outputMethods []string, useIndex bool, removeSecondIndex bool) *StepData

NewHorizontalConcatStep creates a primitive call that concats two data frames.

func NewImageRetrievalStep ¶

func NewImageRetrievalStep(inputs map[string]DataRef, outputMethods []string, cacheLocation string) *StepData

NewImageRetrievalStep creates a step that will rank images based on nearnest to images with the positive label.

func NewImageSegmentationPrimitiveStep ¶

func NewImageSegmentationPrimitiveStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewImageSegmentationPrimitiveStep takes inputs images and segments them.

func NewImageTransferStep ¶

func NewImageTransferStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewImageTransferStep processes images.

func NewIsolationForestStep ¶

func NewIsolationForestStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewIsolationForestStep returns labels for whether or not a data point is an anomoly

func NewJoinStep ¶

func NewJoinStep(inputs map[string]DataRef, outputMethods []string, leftCol []string, rightCol []string, accuracies []float64, absoluteAccuracies []bool, joinType string) *StepData

NewJoinStep creates a step that will attempt to join two datasets a key column from each. This is currently a placeholder for testing/debugging only.

func NewKMeansClusteringStep ¶

func NewKMeansClusteringStep(inputs map[string]DataRef, outputMethods []string, clusterCount int) *StepData

NewKMeansClusteringStep clusters the input using a siple k-means clustering.

func NewListEncoderStep ¶

func NewListEncoderStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewListEncoderStep expands a list across columns.

func NewNumericRangeFilterStep ¶

func NewNumericRangeFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, inclusive bool, min float64, max float64, strict bool) *StepData

NewNumericRangeFilterStep creates a primitive step that filters dataset rows based on an included/excluded numeric range. Inclusion of boundaries is controlled by the strict flag.

func NewOneHotEncoderStep ¶

func NewOneHotEncoderStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewOneHotEncoderStep adds a one hot encoder for categoricals of low cardinality.

func NewPCAFeaturesStep ¶

func NewPCAFeaturesStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewPCAFeaturesStep creates a PCA-based feature ranking call that can be added to a pipeline.

func NewPrefeaturisedPoolingStep ¶

func NewPrefeaturisedPoolingStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewPrefeaturisedPoolingStep takes inputs of non-pooled remote sensing data to pool it

func NewProfilerStep ¶

func NewProfilerStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewProfilerStep creates a profile primitive that infers the columns type using rules

func NewRegexFilterStep ¶

func NewRegexFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, inclusive bool, regex string) *StepData

NewRegexFilterStep creates a primitive step that filter dataset rows based on a regex match.

func NewRemoteSensingPretrainedStep ¶

func NewRemoteSensingPretrainedStep(inputs map[string]DataRef, outputMethods []string, batchSize int, pool bool) *StepData

NewRemoteSensingPretrainedStep featurizes a remote sensing column

func NewRemoveColumnsStep ¶

func NewRemoveColumnsStep(inputs map[string]DataRef, outputMethods []string, colIndices []int) *StepData

NewRemoveColumnsStep removes columns from an input dataframe. Columns are specified by numerical index (not our decision).

func NewRemoveDuplicateColumnsStep ¶

func NewRemoveDuplicateColumnsStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewRemoveDuplicateColumnsStep removes duplicate columns from a dataframe.

func NewRemoveSemanticTypeStep ¶

func NewRemoveSemanticTypeStep(inputs map[string]DataRef, outputMethods []string, remove *ColumnUpdate) *StepData

NewRemoveSemanticTypeStep removes semantic data values from an input dataset. A remove of (1, 2), ("type a", "type b") would result in "type a" and "type b" being removed from index 1 and 2.

func NewReplaceSingletonStep ¶

func NewReplaceSingletonStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewReplaceSingletonStep replaces a field that has only one value with a constant.

func NewSKImputerStep ¶

func NewSKImputerStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewSKImputerStep adds SK learn simple imputer

func NewSKMissingIndicatorStep ¶

func NewSKMissingIndicatorStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewSKMissingIndicatorStep adds SK learn missing indicator.

func NewSatelliteImageLoaderStep ¶

func NewSatelliteImageLoaderStep(inputs map[string]DataRef, outputMethods []string, numJobs int) *StepData

NewSatelliteImageLoaderStep loads multi band images.

func NewSimonStep ¶

func NewSimonStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewSimonStep creates a SIMON data classification step. It examines an input dataframe, and assigns types to the columns based on the exposed metadata.

func NewSlothStep ¶

func NewSlothStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewSlothStep creates a Sloth timeseries clustering step.

func NewStepData ¶

func NewStepData(
	primitive *pipeline.Primitive,
	outputMethods []string,
	hyperparameters map[string]interface{},
	arguments map[string]DataRef) *StepData

NewStepData creates a pipeline step instance from the required field subset. Hyperparameters, Arguments and OutputMethods are all opional in the d3m runtime so so nil is a valid value. Valid types fror hyper parameters are intXX, string, bool, or PrimitiveRef, which is the index of another primitive in the pipeline.

func NewTargetRankingStep ¶

func NewTargetRankingStep(inputs map[string]DataRef, outputMethods []string, targetCol int) *StepData

NewTargetRankingStep creates a target ranking call that can be added to a pipeline. Ranking is based on mutual information between features and a selected target. Returns a DataFrame containing (col_idx, col_name, score) tuples for each ranked feature. Features that could not be ranked are excluded from the returned set.

func NewTermFilterStep ¶

func NewTermFilterStep(inputs map[string]DataRef, outputMethods []string, colindex int, inclusive bool, terms []string, matchWhole bool) *StepData

NewTermFilterStep creates a primitive step that filters dataset rows based on a match against a term list. The term match can be partial, or apply to whole terms only.

func NewTextEncoderStep ¶

func NewTextEncoderStep(inputs map[string]DataRef, outputMethods []string) *StepData

NewTextEncoderStep adds an svm text encoder for text fields.

func NewTimeseriesFormatterStep ¶

func NewTimeseriesFormatterStep(inputs map[string]DataRef, outputMethods []string, mainResID string, fileColIndex int) *StepData

NewTimeseriesFormatterStep creates a step that will format a time series to the long form. The input dataset must be structured using resource files for time series data. If mainResID is empty the primitive will attempt to infer the main resource. If fileColIndex < 0, the file column will also be inferred.

func NewVectorBoundsFilterStep ¶

func NewVectorBoundsFilterStep(inputs map[string]DataRef, outputMethods []string, column int, inclusive bool, min []float64, max []float64, strict bool) *StepData

NewVectorBoundsFilterStep creates a primitive that will allow for a vector of values to be filtered included/excluded value range. The input min and max ranges are specified as lists, where the i'th element of the min/max lists are applied to the i'th value of the target vectors as the filter.

func NewVerticalConcatenationPrimitiveStep ¶

func NewVerticalConcatenationPrimitiveStep(inputs map[string]DataRef, outputMethods []string, removeDuplicate bool) *StepData

NewVerticalConcatenationPrimitiveStep takes inputs and combines them into a single output.

func (*StepData) BuildDescriptionStep ¶

func (s *StepData) BuildDescriptionStep() (*pipeline.PipelineDescriptionStep, error)

BuildDescriptionStep creates protobuf structs from step data.

func (*StepData) GetArguments ¶

func (s *StepData) GetArguments() map[string]DataRef

GetArguments returns a map of arguments that will be passed to the methods of the primitive step.

func (*StepData) GetHyperparameters ¶

func (s *StepData) GetHyperparameters() map[string]interface{}

GetHyperparameters returns a map of arguments that will be passed to the primitive methods of the primitive step. Types are currently restricted to intXX, bool, string

func (*StepData) GetOutputMethods ¶

func (s *StepData) GetOutputMethods() []string

GetOutputMethods returns a list of methods that will be called to generate primitive output. These feed into downstream primitives.

func (*StepData) GetPrimitive ¶

func (s *StepData) GetPrimitive() *pipeline.Primitive

GetPrimitive returns a primitive definition for a pipeline step.

type StepDataRef ¶

type StepDataRef struct {
	StepNum int
	Output  string
}

StepDataRef provides an input reference that points a step in the pipline, and its associated output method name.

func (*StepDataRef) CreateDataRef ¶

func (i *StepDataRef) CreateDataRef() *pipeline.PrimitiveStepArgument

CreateDataRef creates a primitive step argument.

func (*StepDataRef) RefString ¶

func (i *StepDataRef) RefString() string

RefString creates a string representation of a StepDataRef.

type UserDatasetAugmentation ¶

type UserDatasetAugmentation struct {
	SearchResult  string
	SystemID      string
	BaseDatasetID string
}

UserDatasetAugmentation contains the augmentation parameters required for user dataset pipelines.

type UserDatasetDescription ¶

type UserDatasetDescription struct {
	AllFeatures      []*model.Variable
	TargetFeature    *model.Variable
	SelectedFeatures []string
	Filters          []*model.FilterSet
}

UserDatasetDescription contains the basic parameters needs to generate the user dataset pipeline.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL