metrics

package

v0.6.2 Latest Latest Go to latest Published: Sep 9, 2024 License: MIT Imports: 4 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/symflower/eval-dev-quality

Links

Open Source Insights

Documentation ¶

Index ¶

Variables
type AssessmentCategory
type AssessmentKey
- func RegisterAssessmentKey(key string, multiplier uint64) AssessmentKey
type Assessments

Constants ¶

This section is empty.

Variables ¶

View Source

var (
	// AssessmentKeyFilesExecuted holds the successfully executed files.
	AssessmentKeyFilesExecuted = RegisterAssessmentKey("files-executed", 1)
	// AssessmentKeyFilesExecutedMaximumReachable holds the maximum theoretically reachable executed files.
	AssessmentKeyFilesExecutedMaximumReachable = RegisterAssessmentKey("files-executed-maximum-reachable", 0)
	// AssessmentKeyProcessingTime holds the time in milliseconds that it took to complete the task.
	AssessmentKeyProcessingTime = RegisterAssessmentKey("processing-time", 0)

	// AssessmentKeyCoverage counts execution coverage objects.
	AssessmentKeyCoverage = RegisterAssessmentKey("coverage", 10)

	// AssessmentKeyTestsPassing holds the percentage of passing tests.
	AssessmentKeyTestsPassing = RegisterAssessmentKey("tests-passing", 10)

	// AssessmentKeyResponseCharacterCount counts the number of characters of a response.
	AssessmentKeyResponseCharacterCount = RegisterAssessmentKey("response-character-count", 0)
	// AssessmentKeyGenerateTestsForFileCharacterCount counts the number of characters of a generated test file.
	AssessmentKeyGenerateTestsForFileCharacterCount = RegisterAssessmentKey("generate-tests-for-file-character-count", 0)

	// AssessmentKeyResponseNoError indicates that a model responded without error.
	AssessmentKeyResponseNoError = RegisterAssessmentKey("response-no-error", 1)
	// AssessmentKeyResponseWithCode indicates that a model responded with code.
	AssessmentKeyResponseWithCode = RegisterAssessmentKey("response-with-code", 1)
	// AssessmentKeyResponseNoExcess indicates that a model did not produce more content as requested.
	// TODO Infer if a model produced "too much" code. https://github.com/symflower/eval-dev-quality/issues/44
	AssessmentKeyResponseNoExcess = RegisterAssessmentKey("response-no-excess", 1)
)

View Source

var (
	// AssessmentCategoryUnknown indicates that it is not possible to compute a model's category.
	AssessmentCategoryUnknown = registerAssessmentCategory(AssessmentCategory{
		ID:          "category-unknown",
		Name:        "category unknown",
		Description: "Models in this category could not be categorized.",
	})
	// AssessmentCategoryResponseError indicates that a model has encountered an error trying to produce a response.
	AssessmentCategoryResponseError = registerAssessmentCategory(AssessmentCategory{
		ID:          "response-error",
		Name:        "response error",
		Description: "Models in this category encountered an error.",
	})
	// AssessmentCategoryResponseNoCode indicates that a model's response did not contain any source code.
	AssessmentCategoryResponseNoCode = registerAssessmentCategory(AssessmentCategory{
		ID:          "response-no-code",
		Name:        "no code",
		Description: "Models in this category produced no code.",
	})
	// AssessmentCategoryCodeInvalid indicates that a model's generated code produced an error when executed.
	AssessmentCategoryCodeInvalid = registerAssessmentCategory(AssessmentCategory{
		ID:          "code-invalid",
		Name:        "invalid code",
		Description: "Models in this category produced invalid code.",
	})
	// AssessmentCategoryCodeExecuted indicates that a model's generated code could be executed without an error.
	AssessmentCategoryCodeExecuted = registerAssessmentCategory(AssessmentCategory{
		ID:          "code-executed",
		Name:        "executable code",
		Description: "Models in this category produced executable code.",
	})
	// AssessmentCategoryCodeCoverageStatementReached indicates that a model's generated code reached 100% statement coverage.
	AssessmentCategoryCodeCoverageStatementReached = registerAssessmentCategory(AssessmentCategory{
		ID:          "code-coverage-statement",
		Name:        "statement coverage reached",
		Description: "Models in this category produced code that reached full statement coverage.",
	})
	// AssessmentCategoryCodeNoExcess indicates that a model's response did not contain more content than requested.
	AssessmentCategoryCodeNoExcess = registerAssessmentCategory(AssessmentCategory{
		ID:          "code-no-excess",
		Name:        "no excess response",
		Description: "Models in this category did not respond with more content than requested.",
	})
)

View Source

var AllAssessmentCategories []*AssessmentCategory

AllAssessmentCategories holds all assessment categories.

View Source

var (

	// AllAssessmentKeysStrings returns all registered assessment keys as strings.
	AllAssessmentKeysStrings []string
)

Functions ¶

This section is empty.

Types ¶

type AssessmentCategory ¶

type AssessmentCategory struct {
	// ID holds a unique identifier.
	ID string
	// Name holds a short name.
	Name string
	// Description holds the description.
	Description string
}

AssessmentCategory represents a categorical ranking of a model based on Assessments.

type AssessmentKey ¶

type AssessmentKey string

AssessmentKey defines a key for a numerical key-value assessment pair.

func RegisterAssessmentKey ¶

func RegisterAssessmentKey(key string, multiplier uint64) AssessmentKey

RegisterAssessmentKey registers a new assessment key. If the multiplier for this assessment type is zero, it is ignored for the score computation.

type Assessments ¶

type Assessments map[AssessmentKey]uint64

Assessments holds a collection of numerical assessment metrics.

func CombineWithSymflowerFixAssessments ¶ added in v0.6.0

func CombineWithSymflowerFixAssessments(model Assessments, fixed Assessments) (combined Assessments)

CombineWithSymflowerFixAssessments combines the model assessments with the ones from "symflower fix".

func Merge ¶

func Merge(a Assessments, b Assessments) (c Assessments)

Merge combines two assessment collections into a new assessment collection and returns the new assessment collection.

func NewAssessments ¶

func NewAssessments() Assessments

NewAssessments creates a new assessment collection.

func (Assessments) Add ¶

func (a Assessments) Add(x Assessments)

Add adds the given assessment collection to the current one.

func (Assessments) Award ¶

func (a Assessments) Award(key AssessmentKey)

Award yields the score points defined for the given key.

func (Assessments) AwardPoints ¶

func (a Assessments) AwardPoints(key AssessmentKey, count uint64)

AwardPoints yields multiple score points defined for the given key.

func (Assessments) Category ¶

func (a Assessments) Category(totalTasks uint64) *AssessmentCategory

Category infers a categorical ranking of a model based on assessment values. A models overall category corresponds to the criterion where the model was consistently able to receive "total" amount of points. I.e. if there were 3 tasks in total and a model was able to produce executing code for all tasks, but only in one case the coverage goal was reached, then the category is only "CodeExecuted" because the coverage goal was not reached consistently. The returned category is never "nil".

func (Assessments) Equal ¶

func (a Assessments) Equal(x Assessments) bool

Equal checks if both assessment collections are equal.

func (Assessments) Score ¶

func (a Assessments) Score() (score uint64)

Score computes the score over all assessments in the collection.

func (Assessments) String ¶

func (a Assessments) String() string

String returns a string representation of the metrics.

func (Assessments) StringCSV ¶

func (a Assessments) StringCSV() (row []string)

StringCSV returns a CSV row string representation of the metrics.

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
testing

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL