Documentation ¶
Index ¶
- Constants
- func CreateBatches(schemaFile string, maxBatchSize int) ([]string, error)
- func DefaultMetrics(taskKeywords []string) []string
- func DefaultTaskType(targetType string, problemType string) []string
- func ExplainFeatureOutput(resultURI string, outputURI string) (*api.SolutionExplainResult, error)
- func ExtractDatasetFromRawRequest(data encjson.RawMessage) (string, error)
- func FeaturizeDataset(dataset string, target string) ([]string, error)
- func HarmonizeDataMetadata(datasetFolder string) error
- func HasTaskType(task *Task, taskType string) bool
- func InitializeCache(sourceFile string, readEnabled bool) error
- func InitializeQueue(config *env.Config)
- func SampleData(rawData [][]string, maxRows int) [][]string
- func SampleDataset(schemaFile string, outputFolder string, maxRows int, stratify bool, ...) (string, error)
- func SplitDataset(schemaFile string, splitter datasetSplitter) (string, string, error)
- func SubmitPipeline(client *compute.Client, datasets []string, datasetsProduce []string, ...) (string, error)
- type Cache
- type FilteredDataProvider
- type PredictRequest
- type PredictStatus
- type PredictionResult
- type ProblemPersist
- type ProblemPersistAbout
- type ProblemPersistData
- type ProblemPersistDataSplits
- type ProblemPersistExpectedOutput
- type ProblemPersistInput
- type ProblemPersistPerformanceMetric
- type ProblemPersistTarget
- type QueryRequest
- type QueryStatus
- type Queue
- type QueueItem
- type QueueResponse
- type SolutionRequest
- type SolutionStatus
- type SolutionStatusListener
- type StopSolutionSearchRequest
- type Task
- type TimeStampSplit
- type VariableProvider
- type VariablesProvider
Constants ¶
const ( // ExplainableTypeSolution represents output that explains the solution as a whole. ExplainableTypeSolution = "solution" // ExplainableTypeStep represents output that explains a specific row. ExplainableTypeStep = "step" // ExplainableTypeConfidence represents confidence output. ExplainableTypeConfidence = "confidence" )
const ( // ModelQualityFast indicates that the system should try to generate models quickly at the expense of quality ModelQualityFast = "speed" // ModelQualityHigh indicates the the system should focus on higher quality models at the expense of speed ModelQualityHigh = "quality" )
const (
// D3MProblem name of the expected problem file.
D3MProblem = "problemDoc.json"
)
Variables ¶
This section is empty.
Functions ¶
func CreateBatches ¶
CreateBatches splits the dataset into batches of at most maxBatchSize rows, returning paths to the schema files for all resulting batches.
func DefaultMetrics ¶
DefaultMetrics returns default metric for a given task.
func DefaultTaskType ¶
DefaultTaskType returns a default task.
func ExplainFeatureOutput ¶
func ExplainFeatureOutput(resultURI string, outputURI string) (*api.SolutionExplainResult, error)
ExplainFeatureOutput parses the explain feature output.
func ExtractDatasetFromRawRequest ¶
func ExtractDatasetFromRawRequest(data encjson.RawMessage) (string, error)
ExtractDatasetFromRawRequest extracts the dataset name from the raw message.
func FeaturizeDataset ¶
FeaturizeDataset creates feature outputs that can then be used directly when modelling instead of needing to run the complete pipeline.
func HarmonizeDataMetadata ¶
HarmonizeDataMetadata updates a dataset on disk to have the schema info match the header of the backing data file, as well as limit variables to valid auto ml fields.
func HasTaskType ¶
HasTaskType indicates whether or not a given Task includes the supplied task type.
func InitializeCache ¶
InitializeCache sets up an empty cache or if a source file provided, reads the cache from the source file.
func InitializeQueue ¶
InitializeQueue creates the pipeline queue and runs go routine to process pipeline requests
func SampleData ¶
SampleData shuffles a dataset's rows and takes a subsample, returning the raw byte data of the sampled dataset.
func SampleDataset ¶
func SampleDataset(schemaFile string, outputFolder string, maxRows int, stratify bool, targetCol int, groupingCol int) (string, error)
SampleDataset shuffles a dataset's rows and stores a subsample, the schema doc URI.
func SplitDataset ¶
SplitDataset splits a dataset into train and test, using an approach to splitting suitable to the task performed.
func SubmitPipeline ¶
func SubmitPipeline(client *compute.Client, datasets []string, datasetsProduce []string, searchRequest *pipeline.SearchSolutionsRequest, fullySpecifiedStep *description.FullySpecifiedPipeline, allowedValueTypes []string, shouldCache bool) (string, error)
SubmitPipeline executes pipelines using the client and returns the result URI.
Types ¶
type Cache ¶
type Cache struct {
// contains filtered or unexported fields
}
Cache is used to cache data in memory. It can be persisted to disk as needed.
func (*Cache) PersistCache ¶
PersistCache stores the cache to disk.
type FilteredDataProvider ¶
type FilteredDataProvider func(dataset string, index string, filters *api.FilterParams) (*api.FilteredData, error)
FilteredDataProvider defines a function that will fetch data from a back end source given a set of filter parameters.
type PredictRequest ¶
type PredictRequest struct { DatasetID string DatasetPath string FittedSolutionID string TimestampField string MaxTime int IntervalCount int IntervalLength float64 ExistingDataset bool // contains filtered or unexported fields }
PredictRequest defines a request to generate new predictions from a fitted model and input data.
func NewPredictRequest ¶
func NewPredictRequest(data []byte) (*PredictRequest, error)
NewPredictRequest instantiates a predict request from a raw byte stream.
type PredictStatus ¶
type PredictStatus struct { Progress string `json:"progress"` RequestID string `json:"requestId"` ResultID string `json:"resultId"` Error error `json:"error"` Timestamp time.Time `json:"timestamp"` }
PredictStatus defines a prediction status update from a downstream autoML system
type PredictionResult ¶
type PredictionResult struct { ProduceRequestID string FittedSolutionID string ResultURI string Confidences *api.SolutionExplainResult SolutionFeatureWeightURI string StepFeatureWeightURI string }
PredictionResult contains the output from a prediction produce call.
func GeneratePredictions ¶
func GeneratePredictions(datasetURI string, solutionID string, fittedSolutionID string, client *compute.Client) (*PredictionResult, error)
GeneratePredictions produces predictions using the specified.
type ProblemPersist ¶
type ProblemPersist struct { About *ProblemPersistAbout `json:"about"` Inputs *ProblemPersistInput `json:"inputs"` ExpectedOutputs *ProblemPersistExpectedOutput `json:"expectedOutputs,omitempty"` }
ProblemPersist contains the problem file data.
func CreateProblemSchema ¶
func CreateProblemSchema(datasetDir string, dataset string, targetVar *model.Variable, filters *api.FilterParams) (*ProblemPersist, string, error)
CreateProblemSchema captures the problem information in the required D3M problem format.
func LoadProblemSchemaFromFile ¶
func LoadProblemSchemaFromFile(filename string) (*ProblemPersist, error)
LoadProblemSchemaFromFile loads the problem schema from file.
type ProblemPersistAbout ¶
type ProblemPersistAbout struct { ProblemID string `json:"problemID"` ProblemName string `json:"problemName"` ProblemDescription string `json:"problemDescription"` TaskKeywords []string `json:"taskKeywords"` ProblemVersion string `json:"problemVersion"` ProblemSchemaVersion string `json:"problemSchemaVersion"` }
ProblemPersistAbout represents the basic information of a problem.
type ProblemPersistData ¶
type ProblemPersistData struct { DatasetID string `json:"datasetID"` Targets []*ProblemPersistTarget `json:"targets"` }
ProblemPersistData ties targets to a dataset.
type ProblemPersistDataSplits ¶
type ProblemPersistDataSplits struct { Method string `json:"method"` TestSize float64 `json:"testSize"` Stratified bool `json:"stratified"` NumRepeats int `json:"numRepeats"` RandomSeed int `json:"randomSeed"` SplitsFile string `json:"splitsFile"` }
ProblemPersistDataSplits contains the information about the data splits.
type ProblemPersistExpectedOutput ¶
type ProblemPersistExpectedOutput struct {
PredictionsFile string `json:"predictionsFile"`
}
ProblemPersistExpectedOutput represents the expected output of a problem.
type ProblemPersistInput ¶
type ProblemPersistInput struct { Data []*ProblemPersistData `json:"data"` PerformanceMetrics []*ProblemPersistPerformanceMetric `json:"performanceMetrics"` DataSplits *ProblemPersistDataSplits `json:"dataSplits,omitempty"` }
ProblemPersistInput lists the information of a problem.
type ProblemPersistPerformanceMetric ¶
type ProblemPersistPerformanceMetric struct {
Metric string `json:"metric"`
}
ProblemPersistPerformanceMetric captures the metrics of a problem.
type ProblemPersistTarget ¶
type ProblemPersistTarget struct { TargetIndex int `json:"targetIndex"` ResID string `json:"resID"` ColIndex int `json:"colIndex"` ColName string `json:"colName"` }
ProblemPersistTarget represents the target information of the problem.
type QueryRequest ¶
type QueryRequest struct { DatasetID string Target string Filters *api.FilterParams // contains filtered or unexported fields }
QueryRequest defines a request to query a dataset for similar images to labelled observations.
func NewQueryRequest ¶
func NewQueryRequest(data []byte) (*QueryRequest, error)
NewQueryRequest instantiates a query request from a raw byte stream.
type QueryStatus ¶
type QueryStatus struct { Progress string `json:"progress"` RequestID string `json:"requestId"` ResultID string `json:"resultId"` Error error `json:"error"` Timestamp time.Time `json:"timestamp"` }
QueryStatus defines a query status update from a downstream autoML system
type Queue ¶
type Queue struct {
// contains filtered or unexported fields
}
Queue uses a buffered channel to queue tasks and provides the result via channels.
func (*Queue) Done ¶
func (q *Queue) Done()
Done flags a task queue as being completed, which removes it from the in progress slot.
func (*Queue) Enqueue ¶
func (q *Queue) Enqueue(key string, data interface{}) chan *QueueResponse
Enqueue adds one entry to the queue, providing the response channel as result. If the key is already in the queue, then the data is not added a second time. Rather, a new output channel is added
type QueueItem ¶
type QueueItem struct {
// contains filtered or unexported fields
}
QueueItem is the wrapper for the data to process and the response channel.
type QueueResponse ¶
type QueueResponse struct { Output interface{} Error error }
QueueResponse represents the result from processing a queue item.
type SolutionRequest ¶
type SolutionRequest struct { Dataset string DatasetMetadata *api.Dataset TargetFeature *model.Variable Task []string TimestampField string TimestampSplitValue float64 MaxSolutions int MaxTime int Quality string ProblemType string Metrics []string Filters *api.FilterParams DatasetAugmentations []*model.DatasetOrigin TrainTestSplit float64 CancelFuncs map[string]context.CancelFunc PosLabel string // contains filtered or unexported fields }
SolutionRequest represents a solution search request.
func NewSolutionRequest ¶
func NewSolutionRequest(variables []*model.Variable, data []byte) (*SolutionRequest, error)
NewSolutionRequest instantiates a new SolutionRequest.
func (*SolutionRequest) Cancel ¶
func (s *SolutionRequest) Cancel()
Cancel inovkes the context cancel function calls associated with this request. This stops any further messaging between the ta3 and ta2 for each solution.
func (*SolutionRequest) Listen ¶
func (s *SolutionRequest) Listen(listener SolutionStatusListener) error
Listen listens ont he solution requests for new solution statuses.
func (*SolutionRequest) PersistAndDispatch ¶
func (s *SolutionRequest) PersistAndDispatch(client *compute.Client, solutionStorage api.SolutionStorage, metaStorage api.MetadataStorage, dataStorage api.DataStorage) error
PersistAndDispatch persists the solution request and dispatches it.
type SolutionStatus ¶
type SolutionStatus struct { Progress string `json:"progress"` RequestID string `json:"requestId"` SolutionID string `json:"solutionId"` ResultID string `json:"resultId"` Error error `json:"error"` Timestamp time.Time `json:"timestamp"` }
SolutionStatus represents a solution status.
type SolutionStatusListener ¶
type SolutionStatusListener func(status SolutionStatus)
SolutionStatusListener executes on a new solution status.
type StopSolutionSearchRequest ¶
type StopSolutionSearchRequest struct {
RequestID string `json:"requestId"`
}
StopSolutionSearchRequest represents a request to stop any pending siolution searches.
func NewStopSolutionSearchRequest ¶
func NewStopSolutionSearchRequest(data []byte) (*StopSolutionSearchRequest, error)
NewStopSolutionSearchRequest instantiates a new StopSolutionSearchRequest.
type Task ¶
type Task struct {
Task []string `json:"task"`
}
Task provides an array of task keywords. These are mapped to string definitions derfined by the LL d3m problem schema.
type TimeStampSplit ¶
TimeStampSplit defines a train/test split in a timeseries based on time values.
func SplitTimeSeries ¶
func SplitTimeSeries(timeseries []*api.TimeseriesObservation, trainPercentage float64) TimeStampSplit
SplitTimeSeries splits a set of (timestamps, value) tuples such that `trainPercentage` *data points* are less than or equal to the split value, and the remaining data points are greater than the split value. The timestamps are assumed to be ordered.
func SplitTimeStamps ¶
func SplitTimeStamps(timestamps []float64, trainPercentage float64) TimeStampSplit
SplitTimeStamps splits a set of time stamps such that `trainPercentage` *data points* are less than or equal to the split value, and the remaining data points are greater than the split value. The timestamps are assumed to be ordered.
type VariableProvider ¶
VariableProvider defines a function that will get the provided variable.