From c71f70868581d92c16d3288ca3446ccfddfc4548 Mon Sep 17 00:00:00 2001 From: Heath Stewart Date: Thu, 25 Jun 2026 21:11:11 -0700 Subject: [PATCH] Define thresholds, p0 PR suite --- .gitignore | 3 +++ .vally.yaml | 9 ++++----- evals/linting/eval.yaml | 5 +++++ evals/security/eval.yaml | 5 +++++ 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 575fe22..df3f4dd 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ **/__pycache__/ **/*.pyc **/*.pyo + +# Vally +vally-results/ diff --git a/.vally.yaml b/.vally.yaml index afc1f9d..e700a1e 100644 --- a/.vally.yaml +++ b/.vally.yaml @@ -1,9 +1,8 @@ paths: - evals: evals + evals: evals/ suites: pr: - description: Run all plugin skill evals for pull requests - evals: - - evals/linting/eval.yaml - - evals/security/eval.yaml + description: Run p0 plugin skill evals for pull requests + filter: + priority: p0 diff --git a/evals/linting/eval.yaml b/evals/linting/eval.yaml index 08b91bf..c803120 100644 --- a/evals/linting/eval.yaml +++ b/evals/linting/eval.yaml @@ -1,11 +1,16 @@ name: linting skills evals version: 1 description: Evaluates the linting plugin skills against representative file fixes +tags: + priority: p0 defaults: runs: 1 timeout: 5m +scoring: + threshold: 1.0 + stimuli: - name: check-spelling-fixes-typos-and-updates-dictionary prompt: | diff --git a/evals/security/eval.yaml b/evals/security/eval.yaml index a951727..203be0b 100644 --- a/evals/security/eval.yaml +++ b/evals/security/eval.yaml @@ -1,11 +1,16 @@ name: security skills evals version: 1 description: Evaluates the security plugin skills against representative workflow hardening tasks +tags: + priority: p0 defaults: runs: 1 timeout: 5m +scoring: + threshold: 1.0 + stimuli: - name: pin-github-actions-to-shas prompt: |