Documentation ¶
Index ¶
- Variables
- type AssessmentCategory
- type AssessmentKey
- type Assessments
- func (a Assessments) Add(x Assessments)
- func (a Assessments) Award(key AssessmentKey)
- func (a Assessments) AwardPoints(key AssessmentKey, count uint64)
- func (a Assessments) Category(totalTasks uint64) *AssessmentCategory
- func (a Assessments) Equal(x Assessments) bool
- func (a Assessments) Score() (score uint64)
- func (a Assessments) String() string
- func (a Assessments) StringCSV() (row []string)
Constants ¶
This section is empty.
Variables ¶
var ( // AssessmentKeyFilesExecuted holds the successfully executed files. AssessmentKeyFilesExecuted = RegisterAssessmentKey("files-executed", 1) // AssessmentKeyFilesExecutedMaximumReachable holds the maximum theoretically reachable executed files. AssessmentKeyFilesExecutedMaximumReachable = RegisterAssessmentKey("files-executed-maximum-reachable", 0) // AssessmentKeyProcessingTime holds the time in milliseconds that it took to complete the task. AssessmentKeyProcessingTime = RegisterAssessmentKey("processing-time", 0) // AssessmentKeyCoverage counts execution coverage objects. AssessmentKeyCoverage = RegisterAssessmentKey("coverage", 10) // AssessmentKeyTestsPassing holds the percentage of passing tests. AssessmentKeyTestsPassing = RegisterAssessmentKey("tests-passing", 10) // AssessmentKeyResponseCharacterCount counts the number of characters of a response. AssessmentKeyResponseCharacterCount = RegisterAssessmentKey("response-character-count", 0) // AssessmentKeyGenerateTestsForFileCharacterCount counts the number of characters of a generated test file. AssessmentKeyGenerateTestsForFileCharacterCount = RegisterAssessmentKey("generate-tests-for-file-character-count", 0) // AssessmentKeyResponseNoError indicates that a model responded without error. AssessmentKeyResponseNoError = RegisterAssessmentKey("response-no-error", 1) // AssessmentKeyResponseWithCode indicates that a model responded with code. AssessmentKeyResponseWithCode = RegisterAssessmentKey("response-with-code", 1) // AssessmentKeyResponseNoExcess indicates that a model did not produce more content as requested. // TODO Infer if a model produced "too much" code. https://github.com/symflower/eval-dev-quality/issues/44 AssessmentKeyResponseNoExcess = RegisterAssessmentKey("response-no-excess", 1) )
var ( // AssessmentCategoryUnknown indicates that it is not possible to compute a model's category. AssessmentCategoryUnknown = registerAssessmentCategory(AssessmentCategory{ ID: "category-unknown", Name: "category unknown", Description: "Models in this category could not be categorized.", }) // AssessmentCategoryResponseError indicates that a model has encountered an error trying to produce a response. AssessmentCategoryResponseError = registerAssessmentCategory(AssessmentCategory{ ID: "response-error", Name: "response error", Description: "Models in this category encountered an error.", }) // AssessmentCategoryResponseNoCode indicates that a model's response did not contain any source code. AssessmentCategoryResponseNoCode = registerAssessmentCategory(AssessmentCategory{ ID: "response-no-code", Name: "no code", Description: "Models in this category produced no code.", }) // AssessmentCategoryCodeInvalid indicates that a model's generated code produced an error when executed. AssessmentCategoryCodeInvalid = registerAssessmentCategory(AssessmentCategory{ ID: "code-invalid", Name: "invalid code", Description: "Models in this category produced invalid code.", }) // AssessmentCategoryCodeExecuted indicates that a model's generated code could be executed without an error. AssessmentCategoryCodeExecuted = registerAssessmentCategory(AssessmentCategory{ ID: "code-executed", Name: "executable code", Description: "Models in this category produced executable code.", }) // AssessmentCategoryCodeCoverageStatementReached indicates that a model's generated code reached 100% statement coverage. AssessmentCategoryCodeCoverageStatementReached = registerAssessmentCategory(AssessmentCategory{ ID: "code-coverage-statement", Name: "statement coverage reached", Description: "Models in this category produced code that reached full statement coverage.", }) // AssessmentCategoryCodeNoExcess indicates that a model's response did not contain more content than requested. AssessmentCategoryCodeNoExcess = registerAssessmentCategory(AssessmentCategory{ ID: "code-no-excess", Name: "no excess response", Description: "Models in this category did not respond with more content than requested.", }) )
var AllAssessmentCategories []*AssessmentCategory
AllAssessmentCategories holds all assessment categories.
var ( // AllAssessmentKeysStrings returns all registered assessment keys as strings. AllAssessmentKeysStrings []string )
Functions ¶
This section is empty.
Types ¶
type AssessmentCategory ¶
type AssessmentCategory struct { // ID holds a unique identifier. ID string // Name holds a short name. Name string // Description holds the description. Description string }
AssessmentCategory represents a categorical ranking of a model based on Assessments.
type AssessmentKey ¶
type AssessmentKey string
AssessmentKey defines a key for a numerical key-value assessment pair.
func RegisterAssessmentKey ¶
func RegisterAssessmentKey(key string, multiplier uint64) AssessmentKey
RegisterAssessmentKey registers a new assessment key. If the multiplier for this assessment type is zero, it is ignored for the score computation.
type Assessments ¶
type Assessments map[AssessmentKey]uint64
Assessments holds a collection of numerical assessment metrics.
func CombineWithSymflowerFixAssessments ¶ added in v0.6.0
func CombineWithSymflowerFixAssessments(model Assessments, fixed Assessments) (combined Assessments)
CombineWithSymflowerFixAssessments combines the model assessments with the ones from "symflower fix".
func Merge ¶
func Merge(a Assessments, b Assessments) (c Assessments)
Merge combines two assessment collections into a new assessment collection and returns the new assessment collection.
func NewAssessments ¶
func NewAssessments() Assessments
NewAssessments creates a new assessment collection.
func (Assessments) Add ¶
func (a Assessments) Add(x Assessments)
Add adds the given assessment collection to the current one.
func (Assessments) Award ¶
func (a Assessments) Award(key AssessmentKey)
Award yields the score points defined for the given key.
func (Assessments) AwardPoints ¶
func (a Assessments) AwardPoints(key AssessmentKey, count uint64)
AwardPoints yields multiple score points defined for the given key.
func (Assessments) Category ¶
func (a Assessments) Category(totalTasks uint64) *AssessmentCategory
Category infers a categorical ranking of a model based on assessment values. A models overall category corresponds to the criterion where the model was consistently able to receive "total" amount of points. I.e. if there were 3 tasks in total and a model was able to produce executing code for all tasks, but only in one case the coverage goal was reached, then the category is only "CodeExecuted" because the coverage goal was not reached consistently. The returned category is never "nil".
func (Assessments) Equal ¶
func (a Assessments) Equal(x Assessments) bool
Equal checks if both assessment collections are equal.
func (Assessments) Score ¶
func (a Assessments) Score() (score uint64)
Score computes the score over all assessments in the collection.
func (Assessments) String ¶
func (a Assessments) String() string
String returns a string representation of the metrics.
func (Assessments) StringCSV ¶
func (a Assessments) StringCSV() (row []string)
StringCSV returns a CSV row string representation of the metrics.