compute

package

v0.0.0-...-0d40728 Latest Latest Go to latest Published: Dec 12, 2021 License: Apache-2.0 Imports: 36 Imported by: 8

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/uncharted-distil/distil

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func CreateBatches(schemaFile string, maxBatchSize int) ([]string, error)
func DefaultMetrics(taskKeywords []string) []string
func DefaultTaskType(targetType string, problemType string) []string
func ExplainFeatureOutput(resultURI string, outputURI string) (*api.SolutionExplainResult, error)
func ExtractDatasetFromRawRequest(data encjson.RawMessage) (string, error)
func FeaturizeDataset(dataset string, target string) ([]string, error)
func HarmonizeDataMetadata(datasetFolder string) error
func HasTaskType(task *Task, taskType string) bool
func InitializeCache(sourceFile string, readEnabled bool) error
func InitializeQueue(config *env.Config)
func SampleData(rawData [][]string, maxRows int) [][]string
func SampleDataset(schemaFile string, outputFolder string, maxRows int, stratify bool, ...) (string, error)
func SplitDataset(schemaFile string, splitter datasetSplitter) (string, string, error)
func SubmitPipeline(client *compute.Client, datasets []string, datasetsProduce []string, ...) (string, error)
type Cache
- func (c *Cache) PersistCache() error
type FilteredDataProvider
type PredictRequest
- func NewPredictRequest(data []byte) (*PredictRequest, error)
type PredictStatus
type PredictionResult
- func GeneratePredictions(datasetURI string, solutionID string, fittedSolutionID string, ...) (*PredictionResult, error)
type ProblemPersist
- func CreateProblemSchema(datasetDir string, dataset string, targetVar *model.Variable, ...) (*ProblemPersist, string, error)
- func LoadProblemSchemaFromFile(filename string) (*ProblemPersist, error)
type ProblemPersistAbout
type ProblemPersistData
type ProblemPersistDataSplits
type ProblemPersistExpectedOutput
type ProblemPersistInput
type ProblemPersistPerformanceMetric
type ProblemPersistTarget
type QueryRequest
- func NewQueryRequest(data []byte) (*QueryRequest, error)
type QueryStatus
type Queue
- func (q *Queue) Dequeue() (*QueueItem, bool)
- func (q *Queue) Done()
- func (q *Queue) Enqueue(key string, data interface{}) chan *QueueResponse
type QueueItem
type QueueResponse
type SolutionRequest
- func NewSolutionRequest(variables []*model.Variable, data []byte) (*SolutionRequest, error)
- func (s *SolutionRequest) Cancel()
- func (s *SolutionRequest) Listen(listener SolutionStatusListener) error
- func (s *SolutionRequest) PersistAndDispatch(client *compute.Client, solutionStorage api.SolutionStorage, ...) error
type SolutionStatus
type SolutionStatusListener
type StopSolutionSearchRequest
- func NewStopSolutionSearchRequest(data []byte) (*StopSolutionSearchRequest, error)
- func (s *StopSolutionSearchRequest) Dispatch(client *compute.Client) error
type Task
- func ResolveTask(storage api.DataStorage, datasetStorageName string, ...) (*Task, error)
type TimeStampSplit
- func SplitTimeSeries(timeseries []*api.TimeseriesObservation, trainPercentage float64) TimeStampSplit
- func SplitTimeStamps(timestamps []float64, trainPercentage float64) TimeStampSplit
type VariableProvider
type VariablesProvider

Constants ¶

View Source

const (
	// ExplainableTypeSolution represents output that explains the solution as a whole.
	ExplainableTypeSolution = "solution"
	// ExplainableTypeStep represents output that explains a specific row.
	ExplainableTypeStep = "step"
	// ExplainableTypeConfidence represents confidence output.
	ExplainableTypeConfidence = "confidence"
)

View Source

const (

	// ModelQualityFast indicates that the system should try to generate models quickly at the expense of quality
	ModelQualityFast = "speed"
	// ModelQualityHigh indicates the the system should focus on higher quality models at the expense of speed
	ModelQualityHigh = "quality"
)

View Source

const (
	// D3MProblem name of the expected problem file.
	D3MProblem = "problemDoc.json"
)

Variables ¶

This section is empty.

Functions ¶

func CreateBatches ¶

func CreateBatches(schemaFile string, maxBatchSize int) ([]string, error)

CreateBatches splits the dataset into batches of at most maxBatchSize rows, returning paths to the schema files for all resulting batches.

func DefaultMetrics ¶

func DefaultMetrics(taskKeywords []string) []string

DefaultMetrics returns default metric for a given task.

func ExplainFeatureOutput ¶

func ExplainFeatureOutput(resultURI string, outputURI string) (*api.SolutionExplainResult, error)

ExplainFeatureOutput parses the explain feature output.

func ExtractDatasetFromRawRequest ¶

func ExtractDatasetFromRawRequest(data encjson.RawMessage) (string, error)

ExtractDatasetFromRawRequest extracts the dataset name from the raw message.

func FeaturizeDataset ¶

func FeaturizeDataset(dataset string, target string) ([]string, error)

FeaturizeDataset creates feature outputs that can then be used directly when modelling instead of needing to run the complete pipeline.

func HarmonizeDataMetadata ¶

func HarmonizeDataMetadata(datasetFolder string) error

HarmonizeDataMetadata updates a dataset on disk to have the schema info match the header of the backing data file, as well as limit variables to valid auto ml fields.

func HasTaskType ¶

func HasTaskType(task *Task, taskType string) bool

HasTaskType indicates whether or not a given Task includes the supplied task type.

func InitializeCache ¶

func InitializeCache(sourceFile string, readEnabled bool) error

InitializeCache sets up an empty cache or if a source file provided, reads the cache from the source file.

func InitializeQueue ¶

func InitializeQueue(config *env.Config)

InitializeQueue creates the pipeline queue and runs go routine to process pipeline requests

func SampleData ¶

func SampleData(rawData [][]string, maxRows int) [][]string

SampleData shuffles a dataset's rows and takes a subsample, returning the raw byte data of the sampled dataset.

func SampleDataset ¶

func SampleDataset(schemaFile string, outputFolder string, maxRows int, stratify bool, targetCol int, groupingCol int) (string, error)

SampleDataset shuffles a dataset's rows and stores a subsample, the schema doc URI.

func SplitDataset ¶

func SplitDataset(schemaFile string, splitter datasetSplitter) (string, string, error)

SplitDataset splits a dataset into train and test, using an approach to splitting suitable to the task performed.

func SubmitPipeline ¶

func SubmitPipeline(client *compute.Client, datasets []string, datasetsProduce []string, searchRequest *pipeline.SearchSolutionsRequest,
	fullySpecifiedStep *description.FullySpecifiedPipeline, allowedValueTypes []string, shouldCache bool) (string, error)

SubmitPipeline executes pipelines using the client and returns the result URI.

Types ¶

type Cache ¶

type Cache struct {
	// contains filtered or unexported fields
}

Cache is used to cache data in memory. It can be persisted to disk as needed.

func (*Cache) PersistCache ¶

func (c *Cache) PersistCache() error

PersistCache stores the cache to disk.

type FilteredDataProvider ¶

type FilteredDataProvider func(dataset string, index string, filters *api.FilterParams) (*api.FilteredData, error)

FilteredDataProvider defines a function that will fetch data from a back end source given a set of filter parameters.

type PredictRequest ¶

type PredictRequest struct {
	DatasetID        string
	DatasetPath      string
	FittedSolutionID string
	TimestampField   string
	MaxTime          int
	IntervalCount    int
	IntervalLength   float64
	ExistingDataset  bool
	// contains filtered or unexported fields
}

PredictRequest defines a request to generate new predictions from a fitted model and input data.

func NewPredictRequest ¶

func NewPredictRequest(data []byte) (*PredictRequest, error)

NewPredictRequest instantiates a predict request from a raw byte stream.

type PredictStatus ¶

type PredictStatus struct {
	Progress  string    `json:"progress"`
	RequestID string    `json:"requestId"`
	ResultID  string    `json:"resultId"`
	Error     error     `json:"error"`
	Timestamp time.Time `json:"timestamp"`
}

PredictStatus defines a prediction status update from a downstream autoML system

type PredictionResult ¶

type PredictionResult struct {
	ProduceRequestID         string
	FittedSolutionID         string
	ResultURI                string
	Confidences              *api.SolutionExplainResult
	SolutionFeatureWeightURI string
	StepFeatureWeightURI     string
}

PredictionResult contains the output from a prediction produce call.

func GeneratePredictions ¶

func GeneratePredictions(datasetURI string, solutionID string, fittedSolutionID string, client *compute.Client) (*PredictionResult, error)

GeneratePredictions produces predictions using the specified.

type ProblemPersist ¶

type ProblemPersist struct {
	About           *ProblemPersistAbout          `json:"about"`
	Inputs          *ProblemPersistInput          `json:"inputs"`
	ExpectedOutputs *ProblemPersistExpectedOutput `json:"expectedOutputs,omitempty"`
}

ProblemPersist contains the problem file data.

func CreateProblemSchema ¶

func CreateProblemSchema(datasetDir string, dataset string, targetVar *model.Variable, filters *api.FilterParams) (*ProblemPersist, string, error)

CreateProblemSchema captures the problem information in the required D3M problem format.

func LoadProblemSchemaFromFile ¶

func LoadProblemSchemaFromFile(filename string) (*ProblemPersist, error)

LoadProblemSchemaFromFile loads the problem schema from file.

type ProblemPersistAbout ¶

type ProblemPersistAbout struct {
	ProblemID            string   `json:"problemID"`
	ProblemName          string   `json:"problemName"`
	ProblemDescription   string   `json:"problemDescription"`
	TaskKeywords         []string `json:"taskKeywords"`
	ProblemVersion       string   `json:"problemVersion"`
	ProblemSchemaVersion string   `json:"problemSchemaVersion"`
}

ProblemPersistAbout represents the basic information of a problem.

type ProblemPersistData ¶

type ProblemPersistData struct {
	DatasetID string                  `json:"datasetID"`
	Targets   []*ProblemPersistTarget `json:"targets"`
}

ProblemPersistData ties targets to a dataset.

type ProblemPersistDataSplits ¶

type ProblemPersistDataSplits struct {
	Method     string  `json:"method"`
	TestSize   float64 `json:"testSize"`
	Stratified bool    `json:"stratified"`
	NumRepeats int     `json:"numRepeats"`
	RandomSeed int     `json:"randomSeed"`
	SplitsFile string  `json:"splitsFile"`
}

ProblemPersistDataSplits contains the information about the data splits.

type ProblemPersistExpectedOutput ¶

type ProblemPersistExpectedOutput struct {
	PredictionsFile string `json:"predictionsFile"`
}

ProblemPersistExpectedOutput represents the expected output of a problem.

type ProblemPersistInput ¶

type ProblemPersistInput struct {
	Data               []*ProblemPersistData              `json:"data"`
	PerformanceMetrics []*ProblemPersistPerformanceMetric `json:"performanceMetrics"`
	DataSplits         *ProblemPersistDataSplits          `json:"dataSplits,omitempty"`
}

ProblemPersistInput lists the information of a problem.

type ProblemPersistPerformanceMetric ¶

type ProblemPersistPerformanceMetric struct {
	Metric string `json:"metric"`
}

ProblemPersistPerformanceMetric captures the metrics of a problem.

type ProblemPersistTarget ¶

type ProblemPersistTarget struct {
	TargetIndex int    `json:"targetIndex"`
	ResID       string `json:"resID"`
	ColIndex    int    `json:"colIndex"`
	ColName     string `json:"colName"`
}

ProblemPersistTarget represents the target information of the problem.

type QueryRequest ¶

type QueryRequest struct {
	DatasetID string
	Target    string
	Filters   *api.FilterParams
	// contains filtered or unexported fields
}

QueryRequest defines a request to query a dataset for similar images to labelled observations.

func NewQueryRequest ¶

func NewQueryRequest(data []byte) (*QueryRequest, error)

NewQueryRequest instantiates a query request from a raw byte stream.

type QueryStatus ¶

type QueryStatus struct {
	Progress  string    `json:"progress"`
	RequestID string    `json:"requestId"`
	ResultID  string    `json:"resultId"`
	Error     error     `json:"error"`
	Timestamp time.Time `json:"timestamp"`
}

QueryStatus defines a query status update from a downstream autoML system

type Queue ¶

type Queue struct {
	// contains filtered or unexported fields
}

Queue uses a buffered channel to queue tasks and provides the result via channels.

func (*Queue) Dequeue ¶

func (q *Queue) Dequeue() (*QueueItem, bool)

Dequeue removes one item from the queue.

func (*Queue) Done ¶

func (q *Queue) Done()

Done flags a task queue as being completed, which removes it from the in progress slot.

func (*Queue) Enqueue ¶

func (q *Queue) Enqueue(key string, data interface{}) chan *QueueResponse

Enqueue adds one entry to the queue, providing the response channel as result. If the key is already in the queue, then the data is not added a second time. Rather, a new output channel is added

type QueueItem ¶

type QueueItem struct {
	// contains filtered or unexported fields
}

QueueItem is the wrapper for the data to process and the response channel.

type QueueResponse ¶

type QueueResponse struct {
	Output interface{}
	Error  error
}

QueueResponse represents the result from processing a queue item.

type SolutionRequest ¶

type SolutionRequest struct {
	Dataset              string
	DatasetMetadata      *api.Dataset
	TargetFeature        *model.Variable
	Task                 []string
	TimestampField       string
	TimestampSplitValue  float64
	MaxSolutions         int
	MaxTime              int
	Quality              string
	ProblemType          string
	Metrics              []string
	Filters              *api.FilterParams
	DatasetAugmentations []*model.DatasetOrigin
	TrainTestSplit       float64
	CancelFuncs          map[string]context.CancelFunc
	PosLabel             string
	// contains filtered or unexported fields
}

SolutionRequest represents a solution search request.

func NewSolutionRequest ¶

func NewSolutionRequest(variables []*model.Variable, data []byte) (*SolutionRequest, error)

NewSolutionRequest instantiates a new SolutionRequest.

func (*SolutionRequest) Cancel ¶

func (s *SolutionRequest) Cancel()

Cancel inovkes the context cancel function calls associated with this request. This stops any further messaging between the ta3 and ta2 for each solution.

func (*SolutionRequest) Listen ¶

func (s *SolutionRequest) Listen(listener SolutionStatusListener) error

Listen listens ont he solution requests for new solution statuses.

func (*SolutionRequest) PersistAndDispatch ¶

func (s *SolutionRequest) PersistAndDispatch(client *compute.Client, solutionStorage api.SolutionStorage, metaStorage api.MetadataStorage, dataStorage api.DataStorage) error

PersistAndDispatch persists the solution request and dispatches it.

type SolutionStatus ¶

type SolutionStatus struct {
	Progress   string    `json:"progress"`
	RequestID  string    `json:"requestId"`
	SolutionID string    `json:"solutionId"`
	ResultID   string    `json:"resultId"`
	Error      error     `json:"error"`
	Timestamp  time.Time `json:"timestamp"`
}

SolutionStatus represents a solution status.

type SolutionStatusListener ¶

type SolutionStatusListener func(status SolutionStatus)

SolutionStatusListener executes on a new solution status.

type StopSolutionSearchRequest ¶

type StopSolutionSearchRequest struct {
	RequestID string `json:"requestId"`
}

StopSolutionSearchRequest represents a request to stop any pending siolution searches.

func NewStopSolutionSearchRequest ¶

func NewStopSolutionSearchRequest(data []byte) (*StopSolutionSearchRequest, error)

NewStopSolutionSearchRequest instantiates a new StopSolutionSearchRequest.

func (*StopSolutionSearchRequest) Dispatch ¶

func (s *StopSolutionSearchRequest) Dispatch(client *compute.Client) error

Dispatch dispatches the stop search request.

type Task ¶

type Task struct {
	Task []string `json:"task"`
}

Task provides an array of task keywords. These are mapped to string definitions derfined by the LL d3m problem schema.

func ResolveTask ¶

func ResolveTask(storage api.DataStorage, datasetStorageName string, targetVariable *model.Variable, features []*model.Variable) (*Task, error)

ResolveTask will determine the task and subtask given a dataset, training and target variables.

type TimeStampSplit ¶

type TimeStampSplit struct {
	StartValue float64
	SplitValue float64
	EndValue   float64
}

TimeStampSplit defines a train/test split in a timeseries based on time values.

func SplitTimeSeries ¶

func SplitTimeSeries(timeseries []*api.TimeseriesObservation, trainPercentage float64) TimeStampSplit

SplitTimeSeries splits a set of (timestamps, value) tuples such that `trainPercentage` *data points* are less than or equal to the split value, and the remaining data points are greater than the split value. The timestamps are assumed to be ordered.

func SplitTimeStamps ¶

func SplitTimeStamps(timestamps []float64, trainPercentage float64) TimeStampSplit

SplitTimeStamps splits a set of time stamps such that `trainPercentage` *data points* are less than or equal to the split value, and the remaining data points are greater than the split value. The timestamps are assumed to be ordered.

type VariableProvider ¶

type VariableProvider func(dataset string, index string, name string) (*model.Variable, error)

VariableProvider defines a function that will get the provided variable.

type VariablesProvider ¶

type VariablesProvider func(dataset string, index string) ([]*model.Variable, error)

VariablesProvider defines a function that will get the variables for a dataset.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

Documentation ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func CreateBatches ¶

func DefaultMetrics ¶

func DefaultTaskType ¶

func ExplainFeatureOutput ¶

func ExtractDatasetFromRawRequest ¶

func FeaturizeDataset ¶

func HarmonizeDataMetadata ¶

func HasTaskType ¶

func InitializeCache ¶

func InitializeQueue ¶

func SampleData ¶

func SampleDataset ¶

func SplitDataset ¶

func SubmitPipeline ¶

Types ¶

type Cache ¶

func (*Cache) PersistCache ¶

type FilteredDataProvider ¶

type PredictRequest ¶

func NewPredictRequest ¶

type PredictStatus ¶

type PredictionResult ¶

func GeneratePredictions ¶

type ProblemPersist ¶

func CreateProblemSchema ¶

func LoadProblemSchemaFromFile ¶

type ProblemPersistAbout ¶

type ProblemPersistData ¶

type ProblemPersistDataSplits ¶

type ProblemPersistExpectedOutput ¶

type ProblemPersistInput ¶

type ProblemPersistPerformanceMetric ¶

type ProblemPersistTarget ¶

type QueryRequest ¶

func NewQueryRequest ¶

type QueryStatus ¶

type Queue ¶

func (*Queue) Dequeue ¶

func (*Queue) Done ¶

func (*Queue) Enqueue ¶

type QueueItem ¶

type QueueResponse ¶

type SolutionRequest ¶

func NewSolutionRequest ¶

func (*SolutionRequest) Cancel ¶

func (*SolutionRequest) Listen ¶

func (*SolutionRequest) PersistAndDispatch ¶

type SolutionStatus ¶

type SolutionStatusListener ¶

type StopSolutionSearchRequest ¶

func NewStopSolutionSearchRequest ¶

func (*StopSolutionSearchRequest) Dispatch ¶

type Task ¶

func ResolveTask ¶

type TimeStampSplit ¶

func SplitTimeSeries ¶

func SplitTimeStamps ¶

type VariableProvider ¶

type VariablesProvider ¶

Source Files ¶