codegraphtheory · adamsithr · Jun 28, 2026
diff --git a/scripts/validate_identity_leak.py b/scripts/validate_identity_leak.py
@@ -8,7 +8,13 @@
 from pathlib import Path
 
 SKIP_PARTS = {".git", ".venv", "__pycache__", ".pytest_cache", "node_modules", "demos/vhs/out", "eval/runs"}
-SKIP_FILES = {"scripts/validate_identity_leak.py", "filter-repo-replacements.txt"}
+SKIP_FILES = {
+    "scripts/validate_identity_leak.py",
+    "filter-repo-replacements.txt",
+    "AGENTS.md",
+    "docs/identity-safety.md",
+    "demos/vhs/sanitize-recording-env.sh"
+}
 SKIP_SUFFIXES = {".gif", ".mp4", ".webm", ".png", ".jpg", ".jpeg", ".webp", ".ico"}
 
 # Personal identifiers (case-insensitive), not generic color words like "dark gray".
@@ -28,9 +34,7 @@ def should_scan(path: Path, root: Path) -> bool:
         return False
     if any(part in SKIP_PARTS for part in rel.parts):
         return False
-    if path.suffix.lower() in SKIP_SUFFIXES:
-        return False
-    return True
+    return path.suffix.lower() not in SKIP_SUFFIXES
 
 
 def main() -> int:

diff --git a/skills/heavy-issue-to-merge/SKILL.md b/skills/heavy-issue-to-merge/SKILL.md
@@ -38,7 +38,9 @@ Dangerous GitHub writes require explicit `--execute` (default is dry-run).
 | `validate_candidate.py` | JSON schema validation for candidate results |
 | `claim_issue.py` | Issue claim labels/comments (`--execute` uses `gh`) |
 | `publish_pr.py` | Open PR (`--execute` uses `gh pr create`) |
-| `merge_pr.py` | Fail-closed stub (merge not implemented) |
+| `monitor_ci.py` | Monitor CI checks and transition to REPAIR or BLOCKED |
+| `state_machine.py` | End-to-end state machine driver |
+| `merge_pr.py` | Unattended fail-closed merge CLI |
 | `policy_gate.py` | Deterministic merge-policy evaluation |
 | `github_state.py` | Label projection helpers |
 | `collect_evidence.py` | Local git evidence snapshot |

diff --git a/skills/heavy-issue-to-merge/scripts/state_machine.py b/skills/heavy-issue-to-merge/scripts/state_machine.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+from typing import Any
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[3] / "src"))
+
+from heavy_coder.github_state import STATE_LABELS, next_labels
+from heavy_coder.state import RunState, transition
+
+
+def fetch_issue_details(repo: str, issue_num: int) -> dict[str, Any] | None:
+    """Fetch issue or PR details using the gh CLI."""
+    if not shutil.which("gh"):
+        return None
+    cmd = [
+        "gh",
+        "issue",
+        "view",
+        str(issue_num),
+        "--repo",
+        repo,
+        "--json",
+        "labels,state,title,body",
+    ]
+    proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30, check=False)
+    if proc.returncode != 0:
+        return None
+    try:
+        return json.loads(proc.stdout)
+    except Exception:
+        return None
+
+
+def update_labels(repo: str, issue_num: int, current_labels: set[str], next_state: RunState) -> bool:
+    """Update issue labels based on state machine transition."""
+    if not shutil.which("gh"):
+        return False
+    updated = next_labels(current_labels, next_state)
+    to_add = list(updated - current_labels)
+    to_remove = list(current_labels - updated)
+
+    if not to_add and not to_remove:
+        return True
+
+    cmd = ["gh", "issue", "edit", str(issue_num), "--repo", repo]
+    for label in to_add:
+        cmd += ["--add-label", label]
+    for label in to_remove:
+        cmd += ["--remove-label", label]
+
+    proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30, check=False)
+    return proc.returncode == 0
+
+
+def determine_current_state(labels: set[str]) -> RunState:
+    """Infer current RunState from GitHub labels."""
+    # Find matching state label
+    label_to_state = {v: k for k, v in STATE_LABELS.items()}
+    for label in labels:
+        if label in label_to_state:
+            return label_to_state[label]
+    return RunState.QUEUED  # Default fallback
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="End-to-end state machine driver.")
+    parser.add_argument("--repo", required=True, help="owner/name")
+    parser.add_argument("--issue", type=int, required=True, help="Issue/PR number")
+    parser.add_argument("--execute", action="store_true", help="Perform actual changes / process execution")
+    parser.add_argument("--mock-input", help="JSON file containing mock issue state and configuration")
+    args = parser.parse_args()
+
+    title = "Default task title"
+    labels = set()
+    current_state = RunState.QUEUED
+    mock_run = False
+
+    if args.mock_input:
+        try:
+            with open(args.mock_input, encoding="utf-8") as fh:
+                raw = json.load(fh)
+            labels = set(raw.get("labels", ["hermes:queued"]))
+            title = raw.get("title", "Fix some critical issue")
+            current_state = RunState(raw.get("current_state", determine_current_state(labels).value))
+            mock_run = True
+        except Exception as e:
+            print(json.dumps({"error": f"Failed to parse mock input: {e}"}, indent=2))
+            return 2
+    else:
+        details = fetch_issue_details(args.repo, args.issue)
+        if not details:
+            print(json.dumps({"error": "Failed to fetch issue details via gh CLI", "allowed": False}, indent=2))
+            return 2
+        title = details.get("title", "")
+        labels = {label.get("name") for label in details.get("labels", []) if label.get("name")}
+        current_state = determine_current_state(labels)
+
+    # Determine state sequence flow
+    # A single execution pass attempts to transition to the next logical state.
+    transitions_run = []
+
+    if current_state == RunState.QUEUED:
+        next_state = RunState.CLAIMED
+        transition(current_state, next_state)
+        transitions_run.append({"from": current_state.value, "to": next_state.value})
+
+        # In mock mode, we transition state instantly.
+        # In live execute mode, we would call the claim_issue.py script
+        if args.execute and not mock_run:
+            subprocess.run([
+                sys.executable,
+                str(Path(__file__).resolve().parent / "claim_issue.py"),
+                "--repo", args.repo,
+                "--issue", str(args.issue),
+                "--execute"
+            ], check=True)
+        current_state = next_state
+
+    if current_state == RunState.CLAIMED:
+        next_state = RunState.TRIAGED
+        transition(current_state, next_state)
+        transitions_run.append({"from": current_state.value, "to": next_state.value})
+        current_state = next_state
+
+    if current_state == RunState.TRIAGED:
+        next_state = RunState.CANDIDATES_RUNNING
+        transition(current_state, next_state)
+        transitions_run.append({"from": current_state.value, "to": next_state.value})
+
+        if args.execute and not mock_run:
+            # Update labels to candidate running
+            update_labels(args.repo, args.issue, labels, next_state)
+            # Run heavy coding flow
+            subprocess.run([
+                sys.executable,
+                str(Path(__file__).resolve().parents[2] / "scripts" / "heavy_coding_flow.py"),
+                title,
+                "--repo", "."
+            ], check=True)
+        current_state = next_state
+
+    elif current_state == RunState.CANDIDATES_RUNNING:
+        # Move to Critique phase
+        next_state = RunState.CRITIQUE
+        transition(current_state, next_state)
+        transitions_run.append({"from": current_state.value, "to": next_state.value})
+
+        if args.execute and not mock_run:
+            subprocess.run([
+                sys.executable,
+                str(Path(__file__).resolve().parents[2] / "scripts" / "critique_candidates.py")
+            ], check=True)
+        current_state = next_state
+
+    elif current_state == RunState.CRITIQUE:
+        next_state = RunState.SYNTHESIS
+        transition(current_state, next_state)
+        transitions_run.append({"from": current_state.value, "to": next_state.value})
+        current_state = next_state
+
+    elif current_state == RunState.SYNTHESIS:
+        next_state = RunState.LOCAL_VERIFICATION
+        transition(current_state, next_state)
+        transitions_run.append({"from": current_state.value, "to": next_state.value})
+
+        if args.execute and not mock_run:
+            subprocess.run([
+                sys.executable,
+                str(Path(__file__).resolve().parent / "doctor.py")
+            ], check=True)
+        current_state = next_state
+
+    elif current_state == RunState.LOCAL_VERIFICATION:
+        next_state = RunState.PR_OPEN
+        transition(current_state, next_state)
+        transitions_run.append({"from": current_state.value, "to": next_state.value})
+
+        if args.execute and not mock_run:
+            update_labels(args.repo, args.issue, labels, next_state)
+            subprocess.run([
+                sys.executable,
+                str(Path(__file__).resolve().parent / "publish_pr.py"),
+                "--repo", args.repo,
+                "--title", f"Resolve: {title}",
+                "--head-branch", "bounty/auto-fix",
+                "--issue", str(args.issue),
+                "--execute"
+            ], check=True)
+        current_state = next_state
+
+    output = {
+        "repo": args.repo,
+        "issue": args.issue,
+        "initial_state": current_state.value if not transitions_run else transitions_run[0]["from"],
+        "final_state": current_state.value,
+        "transitions": transitions_run,
+        "execute": args.execute,
+    }
+
+    print(json.dumps(output, indent=2, sort_keys=True))
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/test_state_machine_driver.py b/tests/test_state_machine_driver.py
@@ -0,0 +1,125 @@
+import json
+import subprocess
+import sys
+import tempfile
+from collections.abc import Callable
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+# Find state_machine.py path
+ROOT = Path(__file__).resolve().parents[1]
+STATE_MACHINE_PATH = ROOT / "skills" / "heavy-issue-to-merge" / "scripts" / "state_machine.py"
+
+
+@pytest.fixture
+def run_driver() -> Callable[[list[str]], tuple[int, str, str]]:
+    def _run(args_list: list[str]) -> tuple[int, str, str]:
+        cmd = [sys.executable, str(STATE_MACHINE_PATH)] + args_list
+        proc = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            cwd=str(ROOT),
+            check=False
+        )
+        return proc.returncode, proc.stdout, proc.stderr
+    return _run
+
+
+# Table of test cases for the state machine driver flow
+# Format: (mock_data, expected_code, expected_initial, expected_final, expected_transitions_len)
+DRIVER_CASES = [
+    # 1. QUEUED starting point (transitions through CLAIMED -> TRIAGED -> CANDIDATES_RUNNING)
+    (
+        {
+            "labels": ["hermes:queued"],
+            "title": "Fix bug",
+            "current_state": "QUEUED"
+        },
+        0,
+        "QUEUED",
+        "CANDIDATES_RUNNING",
+        3
+    ),
+    # 2. CANDIDATES_RUNNING starting point (transitions to CRITIQUE)
+    (
+        {
+            "labels": ["hermes:running"],
+            "title": "Fix bug",
+            "current_state": "CANDIDATES_RUNNING"
+        },
+        0,
+        "CANDIDATES_RUNNING",
+        "CRITIQUE",
+        1
+    ),
+    # 3. CRITIQUE starting point (transitions to SYNTHESIS)
+    (
+        {
+            "labels": ["hermes:running"],
+            "title": "Fix bug",
+            "current_state": "CRITIQUE"
+        },
+        0,
+        "CRITIQUE",
+        "SYNTHESIS",
+        1
+    ),
+    # 4. SYNTHESIS starting point (transitions to LOCAL_VERIFICATION)
+    (
+        {
+            "labels": ["hermes:running"],
+            "title": "Fix bug",
+            "current_state": "SYNTHESIS"
+        },
+        0,
+        "SYNTHESIS",
+        "LOCAL_VERIFICATION",
+        1
+    ),
+    # 5. LOCAL_VERIFICATION starting point (transitions to PR_OPEN)
+    (
+        {
+            "labels": ["hermes:running"],
+            "title": "Fix bug",
+            "current_state": "LOCAL_VERIFICATION"
+        },
+        0,
+        "LOCAL_VERIFICATION",
+        "PR_OPEN",
+        1
+    ),
+]
+
+
+@pytest.mark.parametrize("mock_data,expected_code,expected_initial,expected_final,expected_trans_len", DRIVER_CASES)
+def test_driver_transitions(
+    run_driver: Callable[[list[str]], tuple[int, str, str]],
+    mock_data: dict[str, Any],
+    expected_code: int,
+    expected_initial: str,
+    expected_final: str,
+    expected_trans_len: int
+) -> None:
+    with tempfile.NamedTemporaryFile(mode="w+", suffix=".json", delete=False, encoding="utf-8") as tmp:
+        json.dump(mock_data, tmp)
+        tmp_name = tmp.name
+
+    try:
+        args = [
+            "--repo", "codegraphtheory/example",
+            "--issue", "10",
+            "--mock-input", tmp_name,
+        ]
+
+        code, stdout, stderr = run_driver(args)
+        assert code == expected_code, f"Failed for case {mock_data}: {stderr}"
+
+        output = json.loads(stdout)
+        assert output["initial_state"] == expected_initial
+        assert output["final_state"] == expected_final
+        assert len(output["transitions"]) == expected_trans_len
+    finally:
+        Path(tmp_name).unlink()