cppalliance · clean6378-max-it · Jun 25, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -105,7 +105,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           python -m pip install -r requirements-lock.txt
-          python -m pip install 'pytest>=8,<9' 'hypothesis>=6.100,<7'
+          python -m pip install 'pytest>=8,<9' 'pytest-benchmark==4.0.0' 'hypothesis>=6.100,<7'
 
       - name: Run unittest suite
         run: python -m unittest discover tests -v
@@ -114,7 +114,8 @@ jobs:
         # Pytest fixtures (tests/conftest.py) build a temp workspaceStorage and
         # exercise Flask routes via app.test_client(). Only listed files — not
         # `pytest tests/` — to avoid re-collecting unittest.TestCase classes above.
-        run: python -m pytest tests/test_api_search.py tests/test_api_workspaces.py tests/test_api_export.py tests/test_pdf_export.py tests/test_search_helpers.py -v --tb=short
+        # -o addopts= avoids inheriting benchmark-only options from pyproject.toml.
+        run: python -m pytest tests/test_api_search.py tests/test_api_workspaces.py tests/test_api_export.py tests/test_pdf_export.py tests/test_search_helpers.py tests/test_check_benchmark_regression.py -v --tb=short -o addopts=
 
       # ── PyInstaller desktop build (Windows only, once per workflow) ────────
       # Closes #44. Builds the onedir bundle and smoke-tests --help so the
@@ -213,3 +214,42 @@ jobs:
             --verbose \
             --redact \
             --exit-code 1
+
+  # ── Performance benchmarks: summary cache (issue #115) ─────────────────────
+  benchmarks:
+    name: Performance benchmarks (gated)
+    needs: [unittest]
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - name: Set up Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: "3.12"
+
+      - name: Install runtime + benchmark dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -r requirements-lock.txt
+          python -m pip install 'pytest>=8,<9' 'pytest-benchmark==4.0.0'
+
+      - name: Run summary-cache benchmarks
+        run: >
+          python -m pytest tests/benchmarks/
+          --benchmark-only
+          --benchmark-min-rounds=5
+          --benchmark-json=benchmark-results.json
+          --benchmark-columns=min,max,mean,stddev,rounds
+          -o addopts=
+
+      - name: Regression gate
+        run: python scripts/check_benchmark_regression.py benchmark-results.json benchmarks/baselines.json
+
+      - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
+        if: always()
+        with:
+          name: benchmark-results
+          path: benchmark-results.json
diff --git a/.gitignore b/.gitignore
@@ -44,3 +44,5 @@ Thumbs.db
 htmlcov/
 coverage.xml
 .hypothesis/
+benchmark-results.json
+benchmarks/_raw.json
diff --git a/benchmarks/baselines.json b/benchmarks/baselines.json
@@ -0,0 +1,15 @@
+{
+  "_note": "Gated means from ubuntu-latest CI benchmark-results.json (PR #120, run 28123677675). Refresh after intentional perf changes: download benchmark-results.json from the CI artifacts job, then `python scripts/check_benchmark_regression.py benchmark-results.json benchmarks/baselines.json` (re-seed with reduce_baselines or edit means). Local capture: `pytest tests/benchmarks/ --benchmark-only --benchmark-json=benchmark-results.json -o addopts=` on ubuntu-latest.",
+  "updated": "2026-06-24T19:20:27Z",
+  "machine": "Linux",
+  "groups": {
+    "summary-cache": {
+      "test_summary_cache_hit": 6.3e-05,
+      "test_summary_cache_miss": 6.3e-05,
+      "test_fingerprint_workspace_entries[10]": 0.001844,
+      "test_fingerprint_workspace_entries[50]": 0.007759,
+      "test_fingerprint_workspace_entries[200]": 0.022231,
+      "test_summary_cache_round_trip": 0.000351
+    }
+  }
+}
diff --git a/pyproject.toml b/pyproject.toml
@@ -31,10 +31,20 @@ desktop = ["pywebview>=5.0,<6"]
 # Development tooling: testing + type checking.
 dev = [
     "pytest>=8,<9",
+    "pytest-benchmark>=4,<5",
     "mypy>=1.10,<2",
     "hypothesis>=6.100,<7",
 ]
 
+[tool.pytest.ini_options]
+# Requires pytest-benchmark (pip install -e ".[dev]") for default `pytest` invocations.
+pythonpath = ["."]
+addopts = "--benchmark-skip"
+testpaths = ["tests"]
+markers = [
+    "benchmark: performance benchmarks (pytest-benchmark)",
+]
+
 [project.scripts]
 # Primary CLI: export Cursor chat histories to Markdown / zip.
 # Usage: cursor-chat-export [--since all|last] [--out DIR] [--no-zip] [--help]

diff --git a/requirements-lock.txt b/requirements-lock.txt
@@ -6,7 +6,7 @@
 # Lock is generated on Linux (CI / update-lock.yml). Windows-only transitives (e.g.
 # colorama via click) are omitted — pip still installs them on Windows when needed.
 blinker==1.9.0            # via flask
-click==8.4.1              # via flask
+click==8.4.2              # via flask
 defusedxml==0.7.1         # via fpdf2
 flask==3.1.3              # via -r requirements.txt
 fonttools==4.63.0         # via fpdf2

diff --git a/scripts/check_benchmark_regression.py b/scripts/check_benchmark_regression.py
@@ -0,0 +1,163 @@
+"""Compare pytest-benchmark JSON output against stored baselines."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+THRESHOLD = 1.20
+
+
+class BenchmarkDataError(ValueError):
+    """Raised when benchmark JSON input is malformed or missing required fields."""
+
+
+def normalize_benchmark_name(name: str) -> str:
+    """Strip pytest file node prefix so baselines match short or full benchmark names."""
+    text = str(name)
+    if "::" not in text:
+        return text
+    prefix, _, suffix = text.partition("::")
+    # Only strip module paths (…/test_foo.py::test_name); leave "::" inside [param::value] intact.
+    if prefix.endswith(".py"):
+        return suffix
+    return text
+
+
+def load_results(results_path: str | Path) -> dict[str, float]:
+    path = Path(results_path)
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+    except OSError as exc:
+        raise BenchmarkDataError(f"cannot read {path}: {exc}") from exc
+    except json.JSONDecodeError as exc:
+        raise BenchmarkDataError(f"invalid JSON in {path}: {exc}") from exc
+    try:
+        benchmarks = data["benchmarks"]
+    except (KeyError, TypeError) as exc:
+        raise BenchmarkDataError(f"{path} missing top-level 'benchmarks' array") from exc
+    if not isinstance(benchmarks, list):
+        raise BenchmarkDataError(f"{path} 'benchmarks' must be an array")
+
+    results: dict[str, float] = {}
+    for index, entry in enumerate(benchmarks):
+        if not isinstance(entry, dict):
+            raise BenchmarkDataError(f"{path} benchmarks[{index}] must be an object")
+        try:
+            raw_name = entry["name"]
+            mean = float(entry["stats"]["mean"])
+        except (KeyError, TypeError, ValueError) as exc:
+            raise BenchmarkDataError(
+                f"{path} benchmarks[{index}] missing 'name' or 'stats.mean'"
+            ) from exc
+        name = normalize_benchmark_name(str(raw_name))
+        if name in results:
+            raise BenchmarkDataError(f"{path} duplicate benchmark name {name!r}")
+        results[name] = mean
+    return results
+
+
+def load_baseline_means(baselines_path: str | Path) -> dict[str, float]:
+    path = Path(baselines_path)
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+    except OSError as exc:
+        raise BenchmarkDataError(f"cannot read {path}: {exc}") from exc
+    except json.JSONDecodeError as exc:
+        raise BenchmarkDataError(f"invalid JSON in {path}: {exc}") from exc
+    if not isinstance(data, dict):
+        raise BenchmarkDataError(f"{path} root value must be an object")
+
+    if "groups" not in data:
+        raise BenchmarkDataError(f"{path} missing required 'groups' key")
+    groups = data["groups"]
+    if not isinstance(groups, dict):
+        raise BenchmarkDataError(f"{path} 'groups' must be an object")
+
+    means: dict[str, float] = {}
+    for group_name, value in groups.items():
+        if not isinstance(value, dict):
+            raise BenchmarkDataError(
+                f"{path} groups[{group_name!r}] must be an object of benchmark means"
+            )
+        for name, mean in value.items():
+            bench_name = normalize_benchmark_name(str(name))
+            if bench_name in means:
+                raise BenchmarkDataError(
+                    f"{path} duplicate benchmark name {bench_name!r} across groups"
+                )
+            try:
+                means[bench_name] = float(mean)
+            except (TypeError, ValueError) as exc:
+                raise BenchmarkDataError(
+                    f"{path} groups[{group_name!r}][{name!r}] is not a numeric mean"
+                ) from exc
+    return means
+
+
+def check_regression(
+    results_path: str | Path,
+    baselines_path: str | Path,
+    *,
+    threshold: float = THRESHOLD,
+) -> int:
+    """Return 0 when within threshold; 1 when any gated benchmark regresses."""
+    flat = load_results(results_path)
+    baseline_means = load_baseline_means(baselines_path)
+
+    failures: list[str] = []
+    missing: list[str] = []
+    for name, base in baseline_means.items():
+        cur = flat.get(name)
+        if cur is None:
+            print(f"FAIL: no current result for gated baseline {name!r}")
+            missing.append(name)
+            continue
+        if base == 0:
+            print(f"WARN: baseline for {name!r} is zero; skipping ratio check")
+            continue
+        ratio = cur / base
+        tag = "FAIL" if ratio > threshold else "ok"
+        print(f"[{tag}] {name}: {cur:.6f}s vs {base:.6f}s ({ratio:.2f}x)")
+        if ratio > threshold:
+            failures.append(name)
+
+    for name in flat:
+        if name not in baseline_means:
+            print(f"WARN: {name!r} has no baseline yet; not gated")
+
+    if failures:
+        print(f"\nREGRESSION: {len(failures)} benchmark(s) exceeded {threshold:.0%}")
+    if missing:
+        print(f"\nMISSING: {len(missing)} gated benchmark(s) absent from current results")
+    if failures or missing:
+        return 1
+    return 0
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("results_path", help="pytest-benchmark --benchmark-json output")
+    parser.add_argument("baselines_path", help="path to benchmarks/baselines.json")
+    parser.add_argument(
+        "--threshold",
+        type=float,
+        default=THRESHOLD,
+        help="fail when current mean exceeds baseline by more than this ratio (default: 1.20)",
+    )
+    args = parser.parse_args(argv)
+    try:
+        return check_regression(
+            args.results_path,
+            args.baselines_path,
+            threshold=args.threshold,
+        )
+    except BenchmarkDataError as exc:
+        print(f"ERROR: {exc}", file=sys.stderr)
+        return 2
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/benchmarks/__init__.py b/tests/benchmarks/__init__.py
diff --git a/tests/benchmarks/conftest.py b/tests/benchmarks/conftest.py
@@ -0,0 +1,89 @@
+"""Synthetic workspace trees for summary-cache performance benchmarks."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from services import summary_cache
+from services.summary_cache import fingerprint_workspace_storage
+
+
+def make_workspace_entries(workspace_root: Path, count: int) -> list[dict[str, Any]]:
+    """Build *count* synthetic workspace entries with on-disk state files."""
+    entries: list[dict[str, Any]] = []
+    for i in range(count):
+        name = f"ws_{i:04d}"
+        entry_dir = workspace_root / name
+        entry_dir.mkdir(parents=True, exist_ok=True)
+        (entry_dir / "state.vscdb").write_bytes(b"bench")
+        workspace_json = entry_dir / "workspace.json"
+        workspace_json.write_text('{"folder": "/bench"}', encoding="utf-8")
+        entries.append(
+            {
+                "name": name,
+                "workspaceJsonPath": str(workspace_json),
+            }
+        )
+    return entries
+
+
+@pytest.fixture
+def summary_cache_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
+    """Redirect summary-cache files to an isolated temp directory.
+
+    Patches ``CACHE_DIR`` (also used by tab-summary paths via ``_tab_summaries_path``)
+    plus the projects/composer-map file constants used by current benchmarks.
+    Tab-summary cache benchmarks are deferred to issue #110 (unified benchmark suite).
+    """
+    cache_dir = tmp_path / "cache"
+    cache_dir.mkdir()
+    monkeypatch.setattr(summary_cache, "CACHE_DIR", cache_dir)
+    monkeypatch.setattr(summary_cache, "PROJECTS_CACHE_FILE", cache_dir / "projects.json")
+    monkeypatch.setattr(
+        summary_cache,
+        "COMPOSER_MAP_CACHE_FILE",
+        cache_dir / "composer-id-to-ws.json",
+    )
+    return cache_dir
+
+
+@pytest.fixture
+def sample_projects() -> list[dict[str, Any]]:
+    return [
+        {
+            "id": "ws_0000",
+            "name": "Bench Project",
+            "conversationCount": 3,
+            "lastModified": "2026-06-24T00:00:00Z",
+        }
+    ]
+
+
+@pytest.fixture
+def synthetic_workspace(tmp_path: Path, request: pytest.FixtureRequest) -> tuple[str, list[dict[str, Any]]]:
+    """Workspace path + entries. Parametrize via indirect ``workspace_entry_count``."""
+    count = getattr(request, "param", 10)
+    workspace_root = tmp_path / "workspaceStorage"
+    workspace_root.mkdir()
+    entries = make_workspace_entries(workspace_root, count)
+    return str(workspace_root), entries
+
+
+@pytest.fixture
+def workspace_fingerprint(synthetic_workspace: tuple[str, list[dict[str, Any]]]) -> dict[str, Any]:
+    workspace_path, entries = synthetic_workspace
+    return fingerprint_workspace_storage(
+        workspace_path,
+        entries,
+        global_db_path=None,
+        rules=[],
+    )
+
+
+@pytest.fixture
+def stale_fingerprint(workspace_fingerprint: dict[str, Any]) -> dict[str, Any]:
+    """Return a fingerprint guaranteed to differ from the stored one."""
+    return {**workspace_fingerprint, "rules_digest": "deadbeefdeadbeef"}