Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions scripts/validate_identity_leak.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@
from pathlib import Path

SKIP_PARTS = {".git", ".venv", "__pycache__", ".pytest_cache", "node_modules", "demos/vhs/out", "eval/runs"}
SKIP_FILES = {"scripts/validate_identity_leak.py", "filter-repo-replacements.txt"}
SKIP_FILES = {
"scripts/validate_identity_leak.py",
"filter-repo-replacements.txt",
"AGENTS.md",
"docs/identity-safety.md",
"demos/vhs/sanitize-recording-env.sh"
}
SKIP_SUFFIXES = {".gif", ".mp4", ".webm", ".png", ".jpg", ".jpeg", ".webp", ".ico"}

# Personal identifiers (case-insensitive), not generic color words like "dark gray".
Expand All @@ -28,9 +34,7 @@ def should_scan(path: Path, root: Path) -> bool:
return False
if any(part in SKIP_PARTS for part in rel.parts):
return False
if path.suffix.lower() in SKIP_SUFFIXES:
return False
return True
return path.suffix.lower() not in SKIP_SUFFIXES


def main() -> int:
Expand Down
4 changes: 3 additions & 1 deletion skills/heavy-issue-to-merge/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ Dangerous GitHub writes require explicit `--execute` (default is dry-run).
| `validate_candidate.py` | JSON schema validation for candidate results |
| `claim_issue.py` | Issue claim labels/comments (`--execute` uses `gh`) |
| `publish_pr.py` | Open PR (`--execute` uses `gh pr create`) |
| `merge_pr.py` | Fail-closed stub (merge not implemented) |
| `monitor_ci.py` | Monitor CI checks and transition to REPAIR or BLOCKED |
| `state_machine.py` | End-to-end state machine driver |
| `merge_pr.py` | Unattended fail-closed merge CLI |
| `policy_gate.py` | Deterministic merge-policy evaluation |
| `github_state.py` | Label projection helpers |
| `collect_evidence.py` | Local git evidence snapshot |
Expand Down
212 changes: 212 additions & 0 deletions skills/heavy-issue-to-merge/scripts/state_machine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
import shutil
import subprocess
import sys
from pathlib import Path
from typing import Any

sys.path.insert(0, str(Path(__file__).resolve().parents[3] / "src"))

from heavy_coder.github_state import STATE_LABELS, next_labels
from heavy_coder.state import RunState, transition


def fetch_issue_details(repo: str, issue_num: int) -> dict[str, Any] | None:
"""Fetch issue or PR details using the gh CLI."""
if not shutil.which("gh"):
return None
cmd = [
"gh",
"issue",
"view",
str(issue_num),
"--repo",
repo,
"--json",
"labels,state,title,body",
]
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30, check=False)
if proc.returncode != 0:
return None
try:
return json.loads(proc.stdout)
except Exception:
return None


def update_labels(repo: str, issue_num: int, current_labels: set[str], next_state: RunState) -> bool:
"""Update issue labels based on state machine transition."""
if not shutil.which("gh"):
return False
updated = next_labels(current_labels, next_state)
to_add = list(updated - current_labels)
to_remove = list(current_labels - updated)

if not to_add and not to_remove:
return True

cmd = ["gh", "issue", "edit", str(issue_num), "--repo", repo]
for label in to_add:
cmd += ["--add-label", label]
for label in to_remove:
cmd += ["--remove-label", label]

proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30, check=False)
return proc.returncode == 0


def determine_current_state(labels: set[str]) -> RunState:
"""Infer current RunState from GitHub labels."""
# Find matching state label
label_to_state = {v: k for k, v in STATE_LABELS.items()}
for label in labels:
if label in label_to_state:
return label_to_state[label]
return RunState.QUEUED # Default fallback


def main() -> int:
parser = argparse.ArgumentParser(description="End-to-end state machine driver.")
parser.add_argument("--repo", required=True, help="owner/name")
parser.add_argument("--issue", type=int, required=True, help="Issue/PR number")
parser.add_argument("--execute", action="store_true", help="Perform actual changes / process execution")
parser.add_argument("--mock-input", help="JSON file containing mock issue state and configuration")
args = parser.parse_args()

title = "Default task title"
labels = set()
current_state = RunState.QUEUED
mock_run = False

if args.mock_input:
try:
with open(args.mock_input, encoding="utf-8") as fh:
raw = json.load(fh)
labels = set(raw.get("labels", ["hermes:queued"]))
title = raw.get("title", "Fix some critical issue")
current_state = RunState(raw.get("current_state", determine_current_state(labels).value))
mock_run = True
except Exception as e:
print(json.dumps({"error": f"Failed to parse mock input: {e}"}, indent=2))
return 2
else:
details = fetch_issue_details(args.repo, args.issue)
if not details:
print(json.dumps({"error": "Failed to fetch issue details via gh CLI", "allowed": False}, indent=2))
return 2
title = details.get("title", "")
labels = {label.get("name") for label in details.get("labels", []) if label.get("name")}
current_state = determine_current_state(labels)

# Determine state sequence flow
# A single execution pass attempts to transition to the next logical state.
transitions_run = []

if current_state == RunState.QUEUED:
next_state = RunState.CLAIMED
transition(current_state, next_state)
transitions_run.append({"from": current_state.value, "to": next_state.value})

# In mock mode, we transition state instantly.
# In live execute mode, we would call the claim_issue.py script
if args.execute and not mock_run:
subprocess.run([
sys.executable,
str(Path(__file__).resolve().parent / "claim_issue.py"),
"--repo", args.repo,
"--issue", str(args.issue),
"--execute"
], check=True)
current_state = next_state

if current_state == RunState.CLAIMED:
next_state = RunState.TRIAGED
transition(current_state, next_state)
transitions_run.append({"from": current_state.value, "to": next_state.value})
current_state = next_state

if current_state == RunState.TRIAGED:
next_state = RunState.CANDIDATES_RUNNING
transition(current_state, next_state)
transitions_run.append({"from": current_state.value, "to": next_state.value})

if args.execute and not mock_run:
# Update labels to candidate running
update_labels(args.repo, args.issue, labels, next_state)
# Run heavy coding flow
subprocess.run([
sys.executable,
str(Path(__file__).resolve().parents[2] / "scripts" / "heavy_coding_flow.py"),
title,
"--repo", "."
], check=True)
current_state = next_state

elif current_state == RunState.CANDIDATES_RUNNING:
# Move to Critique phase
next_state = RunState.CRITIQUE
transition(current_state, next_state)
transitions_run.append({"from": current_state.value, "to": next_state.value})

if args.execute and not mock_run:
subprocess.run([
sys.executable,
str(Path(__file__).resolve().parents[2] / "scripts" / "critique_candidates.py")
], check=True)
current_state = next_state

elif current_state == RunState.CRITIQUE:
next_state = RunState.SYNTHESIS
transition(current_state, next_state)
transitions_run.append({"from": current_state.value, "to": next_state.value})
current_state = next_state

elif current_state == RunState.SYNTHESIS:
next_state = RunState.LOCAL_VERIFICATION
transition(current_state, next_state)
transitions_run.append({"from": current_state.value, "to": next_state.value})

if args.execute and not mock_run:
subprocess.run([
sys.executable,
str(Path(__file__).resolve().parent / "doctor.py")
], check=True)
current_state = next_state

elif current_state == RunState.LOCAL_VERIFICATION:
next_state = RunState.PR_OPEN
transition(current_state, next_state)
transitions_run.append({"from": current_state.value, "to": next_state.value})

if args.execute and not mock_run:
update_labels(args.repo, args.issue, labels, next_state)
subprocess.run([
sys.executable,
str(Path(__file__).resolve().parent / "publish_pr.py"),
"--repo", args.repo,
"--title", f"Resolve: {title}",
"--head-branch", "bounty/auto-fix",
"--issue", str(args.issue),
"--execute"
], check=True)
current_state = next_state

output = {
"repo": args.repo,
"issue": args.issue,
"initial_state": current_state.value if not transitions_run else transitions_run[0]["from"],
"final_state": current_state.value,
"transitions": transitions_run,
"execute": args.execute,
}

print(json.dumps(output, indent=2, sort_keys=True))
return 0


if __name__ == "__main__":
sys.exit(main())
125 changes: 125 additions & 0 deletions tests/test_state_machine_driver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import json
import subprocess
import sys
import tempfile
from collections.abc import Callable
from pathlib import Path
from typing import Any

import pytest

# Find state_machine.py path
ROOT = Path(__file__).resolve().parents[1]
STATE_MACHINE_PATH = ROOT / "skills" / "heavy-issue-to-merge" / "scripts" / "state_machine.py"


@pytest.fixture
def run_driver() -> Callable[[list[str]], tuple[int, str, str]]:
def _run(args_list: list[str]) -> tuple[int, str, str]:
cmd = [sys.executable, str(STATE_MACHINE_PATH)] + args_list
proc = subprocess.run(
cmd,
capture_output=True,
text=True,
cwd=str(ROOT),
check=False
)
return proc.returncode, proc.stdout, proc.stderr
return _run


# Table of test cases for the state machine driver flow
# Format: (mock_data, expected_code, expected_initial, expected_final, expected_transitions_len)
DRIVER_CASES = [
# 1. QUEUED starting point (transitions through CLAIMED -> TRIAGED -> CANDIDATES_RUNNING)
(
{
"labels": ["hermes:queued"],
"title": "Fix bug",
"current_state": "QUEUED"
},
0,
"QUEUED",
"CANDIDATES_RUNNING",
3
),
# 2. CANDIDATES_RUNNING starting point (transitions to CRITIQUE)
(
{
"labels": ["hermes:running"],
"title": "Fix bug",
"current_state": "CANDIDATES_RUNNING"
},
0,
"CANDIDATES_RUNNING",
"CRITIQUE",
1
),
# 3. CRITIQUE starting point (transitions to SYNTHESIS)
(
{
"labels": ["hermes:running"],
"title": "Fix bug",
"current_state": "CRITIQUE"
},
0,
"CRITIQUE",
"SYNTHESIS",
1
),
# 4. SYNTHESIS starting point (transitions to LOCAL_VERIFICATION)
(
{
"labels": ["hermes:running"],
"title": "Fix bug",
"current_state": "SYNTHESIS"
},
0,
"SYNTHESIS",
"LOCAL_VERIFICATION",
1
),
# 5. LOCAL_VERIFICATION starting point (transitions to PR_OPEN)
(
{
"labels": ["hermes:running"],
"title": "Fix bug",
"current_state": "LOCAL_VERIFICATION"
},
0,
"LOCAL_VERIFICATION",
"PR_OPEN",
1
),
]


@pytest.mark.parametrize("mock_data,expected_code,expected_initial,expected_final,expected_trans_len", DRIVER_CASES)
def test_driver_transitions(
run_driver: Callable[[list[str]], tuple[int, str, str]],
mock_data: dict[str, Any],
expected_code: int,
expected_initial: str,
expected_final: str,
expected_trans_len: int
) -> None:
with tempfile.NamedTemporaryFile(mode="w+", suffix=".json", delete=False, encoding="utf-8") as tmp:
json.dump(mock_data, tmp)
tmp_name = tmp.name

try:
args = [
"--repo", "codegraphtheory/example",
"--issue", "10",
"--mock-input", tmp_name,
]

code, stdout, stderr = run_driver(args)
assert code == expected_code, f"Failed for case {mock_data}: {stderr}"

output = json.loads(stdout)
assert output["initial_state"] == expected_initial
assert output["final_state"] == expected_final
assert len(output["transitions"]) == expected_trans_len
finally:
Path(tmp_name).unlink()