From 46539152ab5ef57d6b2d4c2791c09a798136e3b7 Mon Sep 17 00:00:00 2001 From: stacknil Date: Mon, 29 Jun 2026 14:24:57 +0800 Subject: [PATCH] feat(report): add finding verdict boundary --- CHANGELOG.md | 1 + README.md | 10 ++++++---- docs/quality-gates.md | 2 +- docs/report-artifacts.md | 12 ++++++++++-- docs/rule-catalog.md | 9 +++++++++ src/detector.cpp | 15 +++++++++++++++ src/detector.hpp | 2 ++ src/report.cpp | 12 ++++++++++-- .../journalctl_short_full/report.json | 7 +++++-- .../multi_host_journalctl_short_full/report.json | 7 +++++-- .../multi_host_syslog_legacy/report.json | 7 +++++-- .../report_contracts/syslog_legacy/report.json | 7 +++++-- tests/test_detector.cpp | 6 ++++++ tests/test_report.cpp | 8 ++++++-- tests/test_report_contracts.cpp | 1 + 15 files changed, 87 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f63c4d5..1538995 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ All notable user-visible changes should be recorded here. - Added sanitized golden `report.md` / `report.json` regression fixtures to lock report contracts. - Added `schema` and `schema_version` fields to `report.json` so downstream tooling can identify the report artifact contract. +- Added `verdict_boundary` to JSON findings and advanced the report artifact contract to `loglens.report.v2`. - Expanded parser coverage for `Accepted publickey` and selected `pam_faillock` / `pam_sss` variants. - Added compact host-level summaries for multi-host reports. - Added optional CSV export for findings and warnings when explicitly requested. diff --git a/README.md b/README.md index f47fd07..13c92da 100644 --- a/README.md +++ b/README.md @@ -15,11 +15,13 @@ A compact finding summary is a bounded triage signal, not attribution: { "rule_id": "brute_force", "subject_kind": "source_ip", - "subject": "198.51.100.23", - "window": "10m", + "subject": "203.0.113.10", + "window_start": "2026-03-10 08:11:22", + "window_end": "2026-03-10 08:18:05", "threshold": 5, - "observed": 8, - "verdict_boundary": "triage_signal_not_attribution" + "observed_count": 5, + "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], + "verdict_boundary": "triage_signal_not_compromise_or_attribution" } ``` diff --git a/docs/quality-gates.md b/docs/quality-gates.md index 83a4058..f306e5a 100644 --- a/docs/quality-gates.md +++ b/docs/quality-gates.md @@ -15,7 +15,7 @@ The main review principle is: | Parser coverage is visible | [`parser-coverage-notes.md`](./parser-coverage-notes.md), [`tests/fixtures/parser_matrix/noisy_auth_expected.json`](../tests/fixtures/parser_matrix/noisy_auth_expected.json) | `test_parser` compares noisy-auth coverage output to the checked-in expected summary | Reviewer can see parsed lines, skipped blanks, warnings, failure categories, and unknown-pattern buckets | | Unsupported evidence does not silently become detector evidence | [`parser-contract.md`](./parser-contract.md), [`rule-catalog.md`](./rule-catalog.md), [`case-study-linux-auth-bruteforce.md`](./case-study-linux-auth-bruteforce.md) | `test_parser` covers unknown-pattern warnings; `test_detector` covers signal-boundary behavior | Reviewer can explain why unsupported lines remain warnings instead of findings | | Report artifacts are deterministic | [`report-artifacts.md`](./report-artifacts.md), report-contract fixtures under [`tests/fixtures/report_contracts`](../tests/fixtures/report_contracts) | `test_report_contracts` compares generated `report.md`, `report.json`, `findings.csv`, and `warnings.csv` against golden fixtures | Reviewer can regenerate reports and see schema or text changes as explicit snapshot diffs | -| Findings are explainable | [`rule-catalog.md`](./rule-catalog.md), [`report-artifacts.md`](./report-artifacts.md) | `test_report` checks JSON finding fields; report-contract fixtures lock `rule_id`, `window_start`, `window_end`, `threshold`, `observed_count`, `grouping_key`, and `evidence_event_ids` | Reviewer can trace a finding from rule context back to source line IDs | +| Findings are explainable | [`rule-catalog.md`](./rule-catalog.md), [`report-artifacts.md`](./report-artifacts.md) | `test_report` checks JSON finding fields; report-contract fixtures lock `rule_id`, `window_start`, `window_end`, `threshold`, `observed_count`, `grouping_key`, `evidence_event_ids`, and `verdict_boundary` | Reviewer can trace a finding from rule context back to source line IDs and see the non-verdict boundary | | False-positive boundaries are visible | [`rule-catalog.md`](./rule-catalog.md), [`case-study-linux-auth-bruteforce.md`](./case-study-linux-auth-bruteforce.md) | Documentation review gate; detector tests ensure unsupported evidence does not inflate counts | Reviewer can state NAT, internal scanner, lab replay, shared bastion, scheduled admin task, and malformed replay boundaries | | Parser failure taxonomy is exposed | [`parser-contract.md`](./parser-contract.md), [`parser-conformance-matrix.md`](./parser-conformance-matrix.md), [`report-artifacts.md`](./report-artifacts.md) | `test_parser`, `test_report`, `test_cli`, and `test_report_contracts` cover `failure_categories` and warning `category` output | Reviewer can distinguish timestamp, program, known-program unknown-message, malformed-source-IP, and unsupported-PAM failures | | Local scale expectations are reproducible | [`performance-envelope.md`](./performance-envelope.md), [`scripts/benchmark-performance-envelope.ps1`](../scripts/benchmark-performance-envelope.ps1) | `pwsh -File scripts/benchmark-performance-envelope.ps1` regenerates sanitized benchmark inputs and local summary artifacts | Reviewer can reproduce the 1k/10k/100k-line envelope and understand its caveats | diff --git a/docs/report-artifacts.md b/docs/report-artifacts.md index f146b69..710128e 100644 --- a/docs/report-artifacts.md +++ b/docs/report-artifacts.md @@ -40,16 +40,24 @@ The JSON report keeps parser observability visible next to findings: - `findings` - `warnings` -Finding objects contain `rule_id`, `rule`, `subject_kind`, `subject`, `grouping_key`, `threshold`, `observed_count`, `event_count`, `window_start`, `window_end`, `evidence_event_ids`, `usernames`, and `summary`. +Finding objects contain `rule_id`, `rule`, `subject_kind`, `subject`, `grouping_key`, `threshold`, `observed_count`, `event_count`, `window_start`, `window_end`, `evidence_event_ids`, `verdict_boundary`, `usernames`, and `summary`. `evidence_event_ids` are deterministic local event identifiers derived from the source line number, formatted as `line:`. They let reviewers trace a finding back to the normalized input events that satisfied the rule window without implying global event identity. +`verdict_boundary` is a stable token that states what the finding must not be +read as. It keeps machine-readable findings aligned with LogLens's triage +scope: + +- `triage_signal_not_compromise_or_attribution` +- `triage_signal_not_intent_or_attribution` +- `triage_signal_not_maliciousness_or_authorization` + Warning objects contain the original `line_number`, parser `category`, and parser `reason`. `schema` and `schema_version` identify the report artifact contract, not the application release. They are intended for downstream tooling that needs a stable way to reject incompatible report shapes. The current JSON contract is -`loglens.report.v1` with `schema_version` set to `1`. +`loglens.report.v2` with `schema_version` set to `2`. Parser failure categories are stable reviewer-facing buckets for unsupported lines: `unknown_timestamp`, `unknown_program`, diff --git a/docs/rule-catalog.md b/docs/rule-catalog.md index 2b42883..9dd932a 100644 --- a/docs/rule-catalog.md +++ b/docs/rule-catalog.md @@ -36,9 +36,18 @@ JSON findings include both the finding conclusion and the rule context used to r - `observed_count`: observed value compared against the threshold - `window_start` and `window_end`: selected evidence window - `evidence_event_ids`: deterministic local event IDs in the selected window, formatted as `line:` +- `verdict_boundary`: stable token that names the non-verdict boundary for the finding For `multi_user_probing`, `observed_count` is the distinct username count, while `event_count` remains the number of attempt-evidence events in the selected window. +Current `verdict_boundary` values are: + +| Rule | `verdict_boundary` | +| --- | --- | +| `brute_force` | `triage_signal_not_compromise_or_attribution` | +| `multi_user_probing` | `triage_signal_not_intent_or_attribution` | +| `sudo_burst` | `triage_signal_not_maliciousness_or_authorization` | + ## False-Positive Taxonomy The taxonomy names benign or ambiguous explanations a reviewer should consider before interpreting a finding. It is not an allow-list, suppression policy, or automatic disposition. diff --git a/src/detector.cpp b/src/detector.cpp index c1c9940..903894f 100644 --- a/src/detector.cpp +++ b/src/detector.cpp @@ -88,6 +88,7 @@ Finding make_brute_force_finding(const std::string& ip, finding.first_seen = first_seen; finding.last_seen = last_seen; finding.evidence_event_ids = std::move(evidence_event_ids); + finding.verdict_boundary = default_verdict_boundary(finding.type); finding.summary = std::to_string(count) + " failed SSH attempts from " + ip + " within " + std::to_string(window.count()) + " minutes."; return finding; @@ -114,6 +115,7 @@ Finding make_multi_user_finding(const std::string& ip, finding.first_seen = first_seen; finding.last_seen = last_seen; finding.evidence_event_ids = std::move(evidence_event_ids); + finding.verdict_boundary = default_verdict_boundary(finding.type); finding.usernames = std::move(usernames); finding.summary = ip + " targeted " + std::to_string(finding.usernames.size()) + " usernames within " + std::to_string(window.count()) + " minutes."; @@ -139,6 +141,7 @@ Finding make_sudo_finding(const std::string& user, finding.first_seen = first_seen; finding.last_seen = last_seen; finding.evidence_event_ids = std::move(evidence_event_ids); + finding.verdict_boundary = default_verdict_boundary(finding.type); finding.summary = user + " ran " + std::to_string(count) + " sudo commands within " + std::to_string(window.count()) + " minutes."; return finding; @@ -306,6 +309,18 @@ std::string to_string(FindingType type) { } } +std::string default_verdict_boundary(FindingType type) { + switch (type) { + case FindingType::BruteForce: + return "triage_signal_not_compromise_or_attribution"; + case FindingType::MultiUserProbing: + return "triage_signal_not_intent_or_attribution"; + case FindingType::SudoBurst: + default: + return "triage_signal_not_maliciousness_or_authorization"; + } +} + Detector::Detector(DetectorConfig config) : config_(config) {} diff --git a/src/detector.hpp b/src/detector.hpp index 0acfbc9..cf9e40a 100644 --- a/src/detector.hpp +++ b/src/detector.hpp @@ -39,11 +39,13 @@ struct Finding { std::chrono::sys_seconds first_seen{}; std::chrono::sys_seconds last_seen{}; std::vector evidence_event_ids; + std::string verdict_boundary; std::vector usernames; std::string summary; }; std::string to_string(FindingType type); +std::string default_verdict_boundary(FindingType type); class Detector { public: diff --git a/src/report.cpp b/src/report.cpp index 35bf41f..aec4526 100644 --- a/src/report.cpp +++ b/src/report.cpp @@ -312,6 +312,13 @@ std::size_t finding_observed_count(const Finding& finding) { return finding.event_count; } +std::string finding_verdict_boundary(const Finding& finding) { + if (!finding.verdict_boundary.empty()) { + return finding.verdict_boundary; + } + return default_verdict_boundary(finding.type); +} + void write_json_string_array(std::ostream& output, const std::vector& values) { output << '['; for (std::size_t index = 0; index < values.size(); ++index) { @@ -637,8 +644,8 @@ std::string render_json_report(const ReportData& data) { output << "{\n"; output << " \"tool\": \"LogLens\",\n"; - output << " \"schema\": \"loglens.report.v1\",\n"; - output << " \"schema_version\": 1,\n"; + output << " \"schema\": \"loglens.report.v2\",\n"; + output << " \"schema_version\": 2,\n"; output << " \"input\": \"" << escape_json(data.input_path.generic_string()) << "\",\n"; output << " \"input_mode\": \"" << to_string(data.parse_metadata.input_mode) << "\",\n"; if (data.parse_metadata.assume_year.has_value()) { @@ -718,6 +725,7 @@ std::string render_json_report(const ReportData& data) { output << " \"evidence_event_ids\": "; write_json_string_array(output, finding.evidence_event_ids); output << ",\n"; + output << " \"verdict_boundary\": \"" << escape_json(finding_verdict_boundary(finding)) << "\",\n"; output << " \"usernames\": "; write_json_string_array(output, finding.usernames); output << ",\n"; diff --git a/tests/fixtures/report_contracts/journalctl_short_full/report.json b/tests/fixtures/report_contracts/journalctl_short_full/report.json index 763052c..24e95ac 100644 --- a/tests/fixtures/report_contracts/journalctl_short_full/report.json +++ b/tests/fixtures/report_contracts/journalctl_short_full/report.json @@ -1,7 +1,7 @@ { "tool": "LogLens", - "schema": "loglens.report.v1", - "schema_version": 1, + "schema": "loglens.report.v2", + "schema_version": 2, "input": "tests/fixtures/report_contracts/journalctl_short_full/input.log", "input_mode": "journalctl_short_full", "timezone_present": true, @@ -45,6 +45,7 @@ "window_start": "2026-03-10 08:11:22", "window_end": "2026-03-10 08:18:05", "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], + "verdict_boundary": "triage_signal_not_compromise_or_attribution", "usernames": [], "summary": "5 failed SSH attempts from 203.0.113.10 within 10 minutes." }, @@ -60,6 +61,7 @@ "window_start": "2026-03-10 08:11:22", "window_end": "2026-03-10 08:18:05", "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], + "verdict_boundary": "triage_signal_not_intent_or_attribution", "usernames": ["admin", "deploy", "guest", "root", "test"], "summary": "203.0.113.10 targeted 5 usernames within 15 minutes." }, @@ -75,6 +77,7 @@ "window_start": "2026-03-10 08:21:00", "window_end": "2026-03-10 08:24:15", "evidence_event_ids": ["line:7", "line:8", "line:9"], + "verdict_boundary": "triage_signal_not_maliciousness_or_authorization", "usernames": [], "summary": "alice ran 3 sudo commands within 5 minutes." } diff --git a/tests/fixtures/report_contracts/multi_host_journalctl_short_full/report.json b/tests/fixtures/report_contracts/multi_host_journalctl_short_full/report.json index 99c7ce9..69f0f36 100644 --- a/tests/fixtures/report_contracts/multi_host_journalctl_short_full/report.json +++ b/tests/fixtures/report_contracts/multi_host_journalctl_short_full/report.json @@ -1,7 +1,7 @@ { "tool": "LogLens", - "schema": "loglens.report.v1", - "schema_version": 1, + "schema": "loglens.report.v2", + "schema_version": 2, "input": "tests/fixtures/report_contracts/multi_host_journalctl_short_full/input.log", "input_mode": "journalctl_short_full", "timezone_present": true, @@ -73,6 +73,7 @@ "window_start": "2026-03-11 09:00:00", "window_end": "2026-03-11 09:04:05", "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], + "verdict_boundary": "triage_signal_not_compromise_or_attribution", "usernames": [], "summary": "5 failed SSH attempts from 203.0.113.10 within 10 minutes." }, @@ -88,6 +89,7 @@ "window_start": "2026-03-11 09:00:00", "window_end": "2026-03-11 09:04:05", "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], + "verdict_boundary": "triage_signal_not_intent_or_attribution", "usernames": ["admin", "deploy", "guest", "root", "test"], "summary": "203.0.113.10 targeted 5 usernames within 15 minutes." }, @@ -103,6 +105,7 @@ "window_start": "2026-03-11 09:11:00", "window_end": "2026-03-11 09:14:15", "evidence_event_ids": ["line:9", "line:10", "line:13"], + "verdict_boundary": "triage_signal_not_maliciousness_or_authorization", "usernames": [], "summary": "alice ran 3 sudo commands within 5 minutes." } diff --git a/tests/fixtures/report_contracts/multi_host_syslog_legacy/report.json b/tests/fixtures/report_contracts/multi_host_syslog_legacy/report.json index 9b620f3..177dbed 100644 --- a/tests/fixtures/report_contracts/multi_host_syslog_legacy/report.json +++ b/tests/fixtures/report_contracts/multi_host_syslog_legacy/report.json @@ -1,7 +1,7 @@ { "tool": "LogLens", - "schema": "loglens.report.v1", - "schema_version": 1, + "schema": "loglens.report.v2", + "schema_version": 2, "input": "tests/fixtures/report_contracts/multi_host_syslog_legacy/input.log", "input_mode": "syslog_legacy", "assume_year": 2026, @@ -74,6 +74,7 @@ "window_start": "2026-03-11 09:00:00", "window_end": "2026-03-11 09:04:05", "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], + "verdict_boundary": "triage_signal_not_compromise_or_attribution", "usernames": [], "summary": "5 failed SSH attempts from 203.0.113.10 within 10 minutes." }, @@ -89,6 +90,7 @@ "window_start": "2026-03-11 09:00:00", "window_end": "2026-03-11 09:04:05", "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], + "verdict_boundary": "triage_signal_not_intent_or_attribution", "usernames": ["admin", "deploy", "guest", "root", "test"], "summary": "203.0.113.10 targeted 5 usernames within 15 minutes." }, @@ -104,6 +106,7 @@ "window_start": "2026-03-11 09:11:00", "window_end": "2026-03-11 09:14:15", "evidence_event_ids": ["line:9", "line:10", "line:13"], + "verdict_boundary": "triage_signal_not_maliciousness_or_authorization", "usernames": [], "summary": "alice ran 3 sudo commands within 5 minutes." } diff --git a/tests/fixtures/report_contracts/syslog_legacy/report.json b/tests/fixtures/report_contracts/syslog_legacy/report.json index 384843e..6c66fab 100644 --- a/tests/fixtures/report_contracts/syslog_legacy/report.json +++ b/tests/fixtures/report_contracts/syslog_legacy/report.json @@ -1,7 +1,7 @@ { "tool": "LogLens", - "schema": "loglens.report.v1", - "schema_version": 1, + "schema": "loglens.report.v2", + "schema_version": 2, "input": "tests/fixtures/report_contracts/syslog_legacy/input.log", "input_mode": "syslog_legacy", "assume_year": 2026, @@ -46,6 +46,7 @@ "window_start": "2026-03-10 08:11:22", "window_end": "2026-03-10 08:18:05", "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], + "verdict_boundary": "triage_signal_not_compromise_or_attribution", "usernames": [], "summary": "5 failed SSH attempts from 203.0.113.10 within 10 minutes." }, @@ -61,6 +62,7 @@ "window_start": "2026-03-10 08:11:22", "window_end": "2026-03-10 08:18:05", "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], + "verdict_boundary": "triage_signal_not_intent_or_attribution", "usernames": ["admin", "deploy", "guest", "root", "test"], "summary": "203.0.113.10 targeted 5 usernames within 15 minutes." }, @@ -76,6 +78,7 @@ "window_start": "2026-03-10 08:21:00", "window_end": "2026-03-10 08:24:15", "evidence_event_ids": ["line:7", "line:8", "line:9"], + "verdict_boundary": "triage_signal_not_maliciousness_or_authorization", "usernames": [], "summary": "alice ran 3 sudo commands within 5 minutes." } diff --git a/tests/test_detector.cpp b/tests/test_detector.cpp index 695e025..8c52554 100644 --- a/tests/test_detector.cpp +++ b/tests/test_detector.cpp @@ -212,6 +212,8 @@ void test_default_thresholds() { expect((brute_force->evidence_event_ids == std::vector{ "line:1", "line:2", "line:3", "line:4", "line:5"}), "expected brute force evidence event ids"); + expect(brute_force->verdict_boundary == "triage_signal_not_compromise_or_attribution", + "expected brute force verdict boundary"); const auto* multi_user = find_finding(findings, loglens::FindingType::MultiUserProbing, "203.0.113.10"); expect(multi_user != nullptr, "expected multi-user finding"); @@ -223,6 +225,8 @@ void test_default_thresholds() { expect((multi_user->evidence_event_ids == std::vector{ "line:1", "line:2", "line:3", "line:4", "line:5"}), "expected multi-user evidence event ids"); + expect(multi_user->verdict_boundary == "triage_signal_not_intent_or_attribution", + "expected multi-user verdict boundary"); expect(multi_user->usernames.size() == 5, "expected five usernames"); const auto* sudo = find_finding(findings, loglens::FindingType::SudoBurst, "alice"); @@ -234,6 +238,8 @@ void test_default_thresholds() { expect(sudo->event_count == 3, "expected sudo count"); expect((sudo->evidence_event_ids == std::vector{"line:6", "line:7", "line:8"}), "expected sudo evidence event ids"); + expect(sudo->verdict_boundary == "triage_signal_not_maliciousness_or_authorization", + "expected sudo verdict boundary"); } void test_custom_thresholds() { diff --git a/tests/test_report.cpp b/tests/test_report.cpp index 8c5673f..c7d4990 100644 --- a/tests/test_report.cpp +++ b/tests/test_report.cpp @@ -165,6 +165,7 @@ void test_json_finding_includes_explainability_fields() { finding.first_seen = timestamp_at_minute(21); finding.last_seen = timestamp_at_minute(24); finding.evidence_event_ids = {"line:6", "line:7", "line:8", "line:9"}; + finding.verdict_boundary = "triage_signal_not_maliciousness_or_authorization"; finding.summary = "alice ran 4 sudo commands within 5 minutes."; data.findings.push_back(finding); @@ -181,14 +182,17 @@ void test_json_finding_includes_explainability_fields() { expect(json.find("\"evidence_event_ids\": [\"line:6\", \"line:7\", \"line:8\", \"line:9\"]") != std::string::npos, "expected json finding to include evidence event ids"); + expect(json.find("\"verdict_boundary\": \"triage_signal_not_maliciousness_or_authorization\"") + != std::string::npos, + "expected json finding to include verdict boundary"); } void test_json_report_includes_schema_identity() { const auto json = loglens::render_json_report(make_report_data()); - expect(json.find("\"schema\": \"loglens.report.v1\"") != std::string::npos, + expect(json.find("\"schema\": \"loglens.report.v2\"") != std::string::npos, "expected json report to include schema identifier"); - expect(json.find("\"schema_version\": 1") != std::string::npos, + expect(json.find("\"schema_version\": 2") != std::string::npos, "expected json report to include schema version"); } diff --git a/tests/test_report_contracts.cpp b/tests/test_report_contracts.cpp index 8c7ca6b..b9727fe 100644 --- a/tests/test_report_contracts.cpp +++ b/tests/test_report_contracts.cpp @@ -170,6 +170,7 @@ std::vector extract_json_contract_lines(const std::string& json) { || starts_with(line, "\"window_start\": ") || starts_with(line, "\"window_end\": ") || starts_with(line, "\"evidence_event_ids\": ") + || starts_with(line, "\"verdict_boundary\": ") || starts_with(line, "\"usernames\": ") || starts_with(line, "\"summary\": ") || starts_with(line, "{\"line_number\": ")) {