Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ All notable user-visible changes should be recorded here.
- Added `verdict_boundary` to JSON findings and advanced the report artifact contract to `loglens.report.v2`.
- Expanded parser coverage for `Accepted publickey` and selected `pam_faillock` / `pam_sss` variants.
- Added a 150-line sanitized mixed auth corpus fixture covering Ubuntu / Debian-style `auth.log`, RHEL-family `secure`-style syslog, unknown lines, malformed source IPs, and blank-line handling.
- Added a reviewer-facing parser coverage JSON artifact for the mixed auth corpus.
- Added compact host-level summaries for multi-host reports.
- Added optional CSV export for findings and warnings when explicitly requested.

Expand Down
92 changes: 92 additions & 0 deletions assets/mixed_auth_parser_coverage.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
{
"artifact": "loglens.parser_coverage_sample",
"schema_version": 1,
"fixture": "assets/mixed_auth_corpus.log",
"input_mode": "syslog_legacy",
"assume_year": 2026,
"parser_quality": {
"total_input_lines": 150,
"total_lines": 140,
"skipped_blank_lines": 10,
"parsed_lines": 90,
"unparsed_lines": 50,
"parse_success_rate": 0.6428571429,
"top_unknown_patterns": [
{"pattern": "invalid_month_token", "count": 10},
{"pattern": "malformed_source_ip", "count": 10},
{"pattern": "pam_unix_session_closed", "count": 10},
{"pattern": "program_cron", "count": 10},
{"pattern": "sshd_connection_closed_preauth", "count": 10}
],
"failure_categories": [
{"category": "known_program_unknown_message", "count": 10},
{"category": "malformed_source_ip", "count": 10},
{"category": "unknown_program", "count": 10},
{"category": "unknown_timestamp", "count": 10},
{"category": "unsupported_pam_variant", "count": 10}
]
},
"parsed_event_count": 90,
"warning_count": 50,
"event_type_counts": [
{"event_type": "ssh_accepted_publickey", "count": 10},
{"event_type": "ssh_invalid_user", "count": 10},
{"event_type": "ssh_failed_publickey", "count": 10},
{"event_type": "pam_auth_failure", "count": 30},
{"event_type": "sudo_command", "count": 10},
{"event_type": "sudo_auth_failure", "count": 10},
{"event_type": "su_auth_failure", "count": 10}
],
"warnings": [
{"line_number": 10, "category": "known_program_unknown_message", "reason": "unrecognized auth pattern: sshd_connection_closed_preauth"},
{"line_number": 11, "category": "unsupported_pam_variant", "reason": "unrecognized auth pattern: pam_unix_session_closed"},
{"line_number": 12, "category": "unknown_program", "reason": "unrecognized auth pattern: program_cron"},
{"line_number": 13, "category": "malformed_source_ip", "reason": "malformed source IP"},
{"line_number": 14, "category": "unknown_timestamp", "reason": "invalid month token"},
{"line_number": 25, "category": "known_program_unknown_message", "reason": "unrecognized auth pattern: sshd_connection_closed_preauth"},
{"line_number": 26, "category": "unsupported_pam_variant", "reason": "unrecognized auth pattern: pam_unix_session_closed"},
{"line_number": 27, "category": "unknown_program", "reason": "unrecognized auth pattern: program_cron"},
{"line_number": 28, "category": "malformed_source_ip", "reason": "malformed source IP"},
{"line_number": 29, "category": "unknown_timestamp", "reason": "invalid month token"},
{"line_number": 40, "category": "known_program_unknown_message", "reason": "unrecognized auth pattern: sshd_connection_closed_preauth"},
{"line_number": 41, "category": "unsupported_pam_variant", "reason": "unrecognized auth pattern: pam_unix_session_closed"},
{"line_number": 42, "category": "unknown_program", "reason": "unrecognized auth pattern: program_cron"},
{"line_number": 43, "category": "malformed_source_ip", "reason": "malformed source IP"},
{"line_number": 44, "category": "unknown_timestamp", "reason": "invalid month token"},
{"line_number": 55, "category": "known_program_unknown_message", "reason": "unrecognized auth pattern: sshd_connection_closed_preauth"},
{"line_number": 56, "category": "unsupported_pam_variant", "reason": "unrecognized auth pattern: pam_unix_session_closed"},
{"line_number": 57, "category": "unknown_program", "reason": "unrecognized auth pattern: program_cron"},
{"line_number": 58, "category": "malformed_source_ip", "reason": "malformed source IP"},
{"line_number": 59, "category": "unknown_timestamp", "reason": "invalid month token"},
{"line_number": 70, "category": "known_program_unknown_message", "reason": "unrecognized auth pattern: sshd_connection_closed_preauth"},
{"line_number": 71, "category": "unsupported_pam_variant", "reason": "unrecognized auth pattern: pam_unix_session_closed"},
{"line_number": 72, "category": "unknown_program", "reason": "unrecognized auth pattern: program_cron"},
{"line_number": 73, "category": "malformed_source_ip", "reason": "malformed source IP"},
{"line_number": 74, "category": "unknown_timestamp", "reason": "invalid month token"},
{"line_number": 85, "category": "known_program_unknown_message", "reason": "unrecognized auth pattern: sshd_connection_closed_preauth"},
{"line_number": 86, "category": "unsupported_pam_variant", "reason": "unrecognized auth pattern: pam_unix_session_closed"},
{"line_number": 87, "category": "unknown_program", "reason": "unrecognized auth pattern: program_cron"},
{"line_number": 88, "category": "malformed_source_ip", "reason": "malformed source IP"},
{"line_number": 89, "category": "unknown_timestamp", "reason": "invalid month token"},
{"line_number": 100, "category": "known_program_unknown_message", "reason": "unrecognized auth pattern: sshd_connection_closed_preauth"},
{"line_number": 101, "category": "unsupported_pam_variant", "reason": "unrecognized auth pattern: pam_unix_session_closed"},
{"line_number": 102, "category": "unknown_program", "reason": "unrecognized auth pattern: program_cron"},
{"line_number": 103, "category": "malformed_source_ip", "reason": "malformed source IP"},
{"line_number": 104, "category": "unknown_timestamp", "reason": "invalid month token"},
{"line_number": 115, "category": "known_program_unknown_message", "reason": "unrecognized auth pattern: sshd_connection_closed_preauth"},
{"line_number": 116, "category": "unsupported_pam_variant", "reason": "unrecognized auth pattern: pam_unix_session_closed"},
{"line_number": 117, "category": "unknown_program", "reason": "unrecognized auth pattern: program_cron"},
{"line_number": 118, "category": "malformed_source_ip", "reason": "malformed source IP"},
{"line_number": 119, "category": "unknown_timestamp", "reason": "invalid month token"},
{"line_number": 130, "category": "known_program_unknown_message", "reason": "unrecognized auth pattern: sshd_connection_closed_preauth"},
{"line_number": 131, "category": "unsupported_pam_variant", "reason": "unrecognized auth pattern: pam_unix_session_closed"},
{"line_number": 132, "category": "unknown_program", "reason": "unrecognized auth pattern: program_cron"},
{"line_number": 133, "category": "malformed_source_ip", "reason": "malformed source IP"},
{"line_number": 134, "category": "unknown_timestamp", "reason": "invalid month token"},
{"line_number": 145, "category": "known_program_unknown_message", "reason": "unrecognized auth pattern: sshd_connection_closed_preauth"},
{"line_number": 146, "category": "unsupported_pam_variant", "reason": "unrecognized auth pattern: pam_unix_session_closed"},
{"line_number": 147, "category": "unknown_program", "reason": "unrecognized auth pattern: program_cron"},
{"line_number": 148, "category": "malformed_source_ip", "reason": "malformed source IP"},
{"line_number": 150, "category": "unknown_timestamp", "reason": "invalid month token"}
]
}
2 changes: 1 addition & 1 deletion docs/parser-conformance-matrix.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ coverage telemetry path.
| [`assets/parser_auth_families_syslog.log`](../assets/parser_auth_families_syslog.log) | Selected `sshd`, `pam_unix`, `pam_faillock`, `pam_sss`, and session-opened auth-family support, plus five unsupported PAM-family telemetry buckets |
| [`assets/parser_auth_families_journalctl_short_full.log`](../assets/parser_auth_families_journalctl_short_full.log) | Same auth-family event and warning shape as the syslog auth-family fixture, with journalctl timestamp parsing |
| [`assets/noisy_auth_sample.log`](../assets/noisy_auth_sample.log) and [`tests/fixtures/parser_matrix/noisy_auth_expected.json`](../tests/fixtures/parser_matrix/noisy_auth_expected.json) | Noisy syslog coverage fixture with malformed lines, blank lines, unsupported auth-family evidence, irrelevant service lines, and locked parser quality counts |
| [`assets/mixed_auth_corpus.log`](../assets/mixed_auth_corpus.log) | 150-line sanitized mixed syslog corpus with Ubuntu / Debian-style `auth.log` and RHEL-family `secure` host labels, 90 parsed events, 50 parser warnings, 10 blank lines, and locked unknown-pattern and failure-category coverage |
| [`assets/mixed_auth_corpus.log`](../assets/mixed_auth_corpus.log) and [`assets/mixed_auth_parser_coverage.json`](../assets/mixed_auth_parser_coverage.json) | 150-line sanitized mixed syslog corpus with Ubuntu / Debian-style `auth.log` and RHEL-family `secure` host labels, 90 parsed events, 50 parser warnings, 10 blank lines, and locked unknown-pattern and failure-category coverage |

## Review Rule

Expand Down
1 change: 1 addition & 0 deletions docs/parser-contract.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ Parsed successes and audit-only events remain reportable but do not count as bru
| [`assets/parser_auth_families_syslog.log`](../assets/parser_auth_families_syslog.log) | Syslog PAM/auth-family parser coverage |
| [`assets/parser_auth_families_journalctl_short_full.log`](../assets/parser_auth_families_journalctl_short_full.log) | Journalctl PAM/auth-family parser coverage |
| [`assets/noisy_auth_sample.log`](../assets/noisy_auth_sample.log) and [`tests/fixtures/parser_matrix/noisy_auth_expected.json`](../tests/fixtures/parser_matrix/noisy_auth_expected.json) | Noisy syslog parser-coverage matrix for malformed, unsupported, blank, irrelevant, multi-host, and unusual-username input |
| [`assets/mixed_auth_corpus.log`](../assets/mixed_auth_corpus.log) and [`assets/mixed_auth_parser_coverage.json`](../assets/mixed_auth_parser_coverage.json) | 150-line mixed auth corpus plus reviewer-facing parser coverage artifact for dirty syslog input |
| [`tests/test_report_contracts.cpp`](../tests/test_report_contracts.cpp) | Stable report-shape expectations for generated artifacts |

## Non-goals
Expand Down
2 changes: 2 additions & 0 deletions docs/parser-coverage-notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ The locked expected coverage summary lives in [`tests/fixtures/parser_matrix/noi

The corpus repeats ten small evidence batches. Each batch includes recognized `sshd`, `sudo`, `su`, `pam_unix`, `pam_faillock`, and `pam_sss` evidence; unsupported `sshd` preauth and `pam_unix` session-close telemetry; an unsupported service program; a malformed source IP; an invalid timestamp; and one blank line.

For reviewer inspection without running the test suite, [`assets/mixed_auth_parser_coverage.json`](../assets/mixed_auth_parser_coverage.json) captures the deterministic parser coverage view for this corpus: parser-quality counters, normalized event-type counts, unknown-pattern buckets, failure categories, and warning line references.

Locked parser expectations:

- `total_input_lines`: 150
Expand Down
3 changes: 2 additions & 1 deletion docs/reviewer-path.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This path is for reviewers who want to understand LogLens quickly without readin
| What log formats are supported? | [`docs/parser-contract.md`](./parser-contract.md) | Can name `syslog_legacy` and `journalctl_short_full` behavior |
| What artifacts does it produce? | [`docs/report-artifacts.md`](./report-artifacts.md) and report-contract fixtures | Can inspect Markdown, JSON, and optional CSV outputs |
| How do rules use evidence? | [`docs/rule-catalog.md`](./rule-catalog.md) | Can explain grouping keys, windows, thresholds, and unsupported-evidence boundaries |
| Can the parser behavior be trusted? | Parser contract, fixture matrix, and parser coverage fields | Can see known, unknown, and malformed line handling |
| Can the parser behavior be trusted? | Parser contract, fixture matrix, and [`assets/mixed_auth_parser_coverage.json`](../assets/mixed_auth_parser_coverage.json) | Can see known, unknown, and malformed line handling |
| What proves the main claims? | [`docs/quality-gates.md`](./quality-gates.md) | Can map claims to tests, fixtures, docs, and repeatable commands |
| How should a finding be interpreted? | [`docs/case-study-linux-auth-bruteforce.md`](./case-study-linux-auth-bruteforce.md) | Can trace raw evidence to normalized events, findings, warnings, and non-goals |
| How does it behave on larger local inputs? | [`docs/performance-envelope.md`](./performance-envelope.md) | Can state the local 1k/10k/100k-line envelope and its caveats |
Expand Down Expand Up @@ -43,6 +43,7 @@ Inspect:
- [`tests/fixtures/report_contracts/syslog_legacy/report.json`](../tests/fixtures/report_contracts/syslog_legacy/report.json)
- [`docs/report-artifacts.md`](./report-artifacts.md)
- [`docs/parser-contract.md`](./parser-contract.md)
- [`assets/mixed_auth_parser_coverage.json`](../assets/mixed_auth_parser_coverage.json)
- [`docs/quality-gates.md`](./quality-gates.md)
- [`docs/rule-catalog.md`](./rule-catalog.md)
- [`docs/case-study-linux-auth-bruteforce.md`](./case-study-linux-auth-bruteforce.md)
Expand Down
105 changes: 105 additions & 0 deletions tests/test_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <stdexcept>
#include <string>
#include <string_view>
#include <utility>
#include <vector>

namespace {
Expand Down Expand Up @@ -107,6 +108,42 @@ std::size_t failure_category_count(const loglens::ParserQualityMetrics& quality,
return it == quality.failure_categories.end() ? 0 : it->count;
}

std::vector<std::pair<loglens::EventType, std::size_t>> parser_event_type_counts(
const std::vector<loglens::Event>& events) {
std::vector<std::pair<loglens::EventType, std::size_t>> counts{
{loglens::EventType::SshFailedPassword, 0},
{loglens::EventType::SshAcceptedPassword, 0},
{loglens::EventType::SshAcceptedPublicKey, 0},
{loglens::EventType::SshAcceptedKeyboardInteractive, 0},
{loglens::EventType::SshInvalidUser, 0},
{loglens::EventType::SshFailedPublicKey, 0},
{loglens::EventType::SshFailedKeyboardInteractive, 0},
{loglens::EventType::SshMaxAuthTries, 0},
{loglens::EventType::PamAuthFailure, 0},
{loglens::EventType::SessionOpened, 0},
{loglens::EventType::SudoCommand, 0},
{loglens::EventType::SudoAuthFailure, 0},
{loglens::EventType::SudoPolicyDenied, 0},
{loglens::EventType::SuAuthFailure, 0}};

for (const auto& event : events) {
for (auto& [type, count] : counts) {
if (type == event.event_type) {
++count;
break;
}
}
}

counts.erase(
std::remove_if(counts.begin(), counts.end(), [](const auto& entry) {
return entry.second == 0;
}),
counts.end());

return counts;
}

std::string noisy_auth_coverage_json(const loglens::ParseReport& result) {
std::ostringstream output;
output << "{\n"
Expand Down Expand Up @@ -156,6 +193,70 @@ std::string noisy_auth_coverage_json(const loglens::ParseReport& result) {
return output.str();
}

std::string mixed_auth_coverage_json(const loglens::ParseReport& result) {
std::ostringstream output;
const auto event_counts = parser_event_type_counts(result.events);

output << "{\n"
<< " \"artifact\": \"loglens.parser_coverage_sample\",\n"
<< " \"schema_version\": 1,\n"
<< " \"fixture\": \"assets/mixed_auth_corpus.log\",\n"
<< " \"input_mode\": \"" << loglens::to_string(result.metadata.input_mode) << "\",\n"
<< " \"assume_year\": " << *result.metadata.assume_year << ",\n"
<< " \"parser_quality\": {\n"
<< " \"total_input_lines\": " << total_input_lines(result) << ",\n"
<< " \"total_lines\": " << result.quality.total_lines << ",\n"
<< " \"skipped_blank_lines\": " << result.quality.skipped_blank_lines << ",\n"
<< " \"parsed_lines\": " << result.quality.parsed_lines << ",\n"
<< " \"unparsed_lines\": " << result.quality.unparsed_lines << ",\n"
<< " \"parse_success_rate\": " << std::fixed << std::setprecision(10)
<< result.quality.parse_success_rate << ",\n"
<< " \"top_unknown_patterns\": [\n";

for (std::size_t index = 0; index < result.quality.top_unknown_patterns.size(); ++index) {
const auto& entry = result.quality.top_unknown_patterns[index];
output << " {\"pattern\": \"" << entry.pattern << "\", \"count\": " << entry.count << "}";
output << (index + 1 == result.quality.top_unknown_patterns.size() ? "\n" : ",\n");
}

output << " ],\n"
<< " \"failure_categories\": [\n";

for (std::size_t index = 0; index < result.quality.failure_categories.size(); ++index) {
const auto& entry = result.quality.failure_categories[index];
output << " {\"category\": \"" << loglens::to_string(entry.category)
<< "\", \"count\": " << entry.count << "}";
output << (index + 1 == result.quality.failure_categories.size() ? "\n" : ",\n");
}

output << " ]\n"
<< " },\n"
<< " \"parsed_event_count\": " << result.events.size() << ",\n"
<< " \"warning_count\": " << result.warnings.size() << ",\n"
<< " \"event_type_counts\": [\n";

for (std::size_t index = 0; index < event_counts.size(); ++index) {
const auto& [type, count] = event_counts[index];
output << " {\"event_type\": \"" << loglens::to_string(type) << "\", \"count\": " << count << "}";
output << (index + 1 == event_counts.size() ? "\n" : ",\n");
}

output << " ],\n"
<< " \"warnings\": [\n";

for (std::size_t index = 0; index < result.warnings.size(); ++index) {
const auto& warning = result.warnings[index];
output << " {\"line_number\": " << warning.line_number
<< ", \"category\": \"" << loglens::to_string(warning.category) << "\""
<< ", \"reason\": \"" << warning.reason << "\"}";
output << (index + 1 == result.warnings.size() ? "\n" : ",\n");
}

output << " ]\n"
<< "}\n";
return output.str();
}

void test_invalid_user_failure() {
const auto parser = make_syslog_parser();
std::string error;
Expand Down Expand Up @@ -1186,6 +1287,10 @@ void test_mixed_auth_corpus_fixture_file() {
"expected ten unknown-timestamp failures");
expect(failure_category_count(result.quality, loglens::ParserFailureCategory::UnsupportedPamVariant) == 10,
"expected ten unsupported-PAM-variant failures");

const auto actual = mixed_auth_coverage_json(result);
const auto expected = read_text_file(asset_path("mixed_auth_parser_coverage.json"));
expect(actual == expected, "expected mixed auth parser coverage artifact to match fixture");
}

} // namespace
Expand Down
Loading