diff --git a/docs/temporal-covering.md b/docs/temporal-covering.md new file mode 100644 index 0000000..d5bac18 --- /dev/null +++ b/docs/temporal-covering.md @@ -0,0 +1,81 @@ +# Temporal-covering descriptor + +`meta/temporal-covering.json` is the **single codegen source of truth** +(RFC #870 TemporalParquet / #913 Temporal Data Lake) for projecting a MEOS +temporal column into Parquet/Iceberg **covering columns**. The pipeline +folds it into `meos-idl.json` as `temporalCovering`. Every binding/engine +(PyMEOS, JMEOS, MobilityDuck, MobilitySpark, …) generates the **identical** +covering schema from this one mapping, so a temporal table prunes the same +way on every platform — no per-engine covering code to maintain. + +## What it is + +A temporal value is stored on disk as a canonical MEOS-WKB `BLOB`. Iceberg +and Parquet cannot prune on a `BLOB`. The covering descriptor names, per +temporal-type **class**, the primitive columns to *materialise alongside* +the value — the bounding box and SRID — which Iceberg collects as manifest +statistics and Parquet as row-group min/max. A bbox/time predicate then +prunes whole files and row groups with **no spatial-aware engine** +(GeoParquet 1.1 `covering.bbox`; MVB v3 measured this as ~10× faster than +the `ST_Intersects` path). + +The mapping is keyed by **class**, not by type — adding a type is one entry +in its class: + +| Class | Box | Types | Covering columns | +|---|---|---|---| +| `spatial` | `STBOX` via `tspatial_to_stbox` | tgeompoint, tgeogpoint, tgeometry, tgeography, tcbuffer, tnpoint, tpose, trgeometry | `xmin xmax ymin ymax [zmin zmax] tmin tmax srid` | +| `number` | `TBOX` via `tnumber_to_tbox` | tint, tfloat, tbigint | `vmin vmax tmin tmax` | + +The canonical value column is unchanged and lossless; covering columns are +denormalised derivations of the value's box. `zmin`/`zmax` are emitted only +for 3D values (`when: hasZ`). + +## In the catalog + +`temporalCovering` carries the verbatim `classes`, plus derived lookups for +codegen: + +```json +"temporalCovering": { + "valueCodec": { "asHexWkb": "temporal_as_hexwkb", + "fromHexWkb": "temporal_from_hexwkb" }, + "byType": { "tgeompoint": { "class": "spatial", "box": {...}, + "srid": "tspatial_srid", "columns": [...] }, ... }, + "symbols": ["stbox_xmin", "tbox_xmin", "tspatial_to_stbox", ...], + "count": 11 +} +``` + +- `byType` — `"tgeompoint"` → its class, box converter, SRID accessor, and + covering columns (each with its MEOS bbox accessor and SQL type). A + generator reads this directly; it never re-derives the mapping. +- `symbols` — every MEOS C symbol the descriptor depends on. The covering + parity audit (`tools/covering_parity.py`) checks each is exported by the + catalog and each covered type is a real `MeosType` — a miss is reported as + a worklist (add/export the accessor in MEOS), never a fabricated pass. + +## How a generator uses it + +For a column `traj TGEOMPOINT`, emit alongside the WKB value column: + +```sql +xmin = stbox_xmin(tspatial_to_stbox(traj)), xmax = stbox_xmax(...), +ymin = stbox_ymin(...), ymax = stbox_ymax(...), +tmin = stbox_tmin(...), tmax = stbox_tmax(...), +srid = tspatial_srid(traj) +``` + +(each engine in its own idiom — DuckDB generated columns, a Spark UDF +projection, a PyMEOS writer), plus the `temporal` and GeoParquet `geo` / +`covering.bbox` file metadata keys from `metadataKeys`. + +## Not yet covered + +- **Time-only** (`tbool`, `ttext`): a `tmin`/`tmax` covering needs a span + lower/upper bound accessor; `temporal_to_tstzspan` is exported but a span + bound accessor is not. Surfaced as a MEOS export gap (close in MEOS C), + not filled binding-side. +- **Point-cloud / cell-index** (`tpcpoint`, `tpcpatch`, `th3index`, + `tquadbin`): fold into the `spatial` class once the catalog confirms a + uniform temporal→`STBOX` converter for these families. diff --git a/meta/temporal-covering.json b/meta/temporal-covering.json new file mode 100644 index 0000000..111282f --- /dev/null +++ b/meta/temporal-covering.json @@ -0,0 +1,74 @@ +{ + "_comment": "Temporal-covering descriptor — the single codegen source of truth for projecting a MEOS temporal column into Parquet/Iceberg covering columns (GeoParquet 1.1 `covering.bbox`). Every binding/engine generates the IDENTICAL covering schema from this mapping, so a temporal table prunes the same way on every platform (Iceberg manifest pruning + Parquet row-group min/max) with no spatial-aware engine. Curated canonical data keyed by temporal-type FAMILY (a `class`), not per type — adding a type is one entry in its class. The canonical MEOS-WKB value column is unchanged and lossless; the covering columns are denormalised derivations of the value's bounding box. RFC #870 (TemporalParquet) / #913 (Temporal Data Lake).", + "provenance": { + "rfc": "MobilityDB RFC #870 (TemporalParquet) + #913 (Temporal Data Lake)", + "discussion": "MobilityDB#861 (edge-to-cloud SQL portability: one query, three platforms)", + "geoParquet": "GeoParquet 1.1 covering.bbox (geoparquet.org/releases/v1.1.0)", + "benchmark": "MVB v3 — the scalar AND-chain on materialised covering columns prunes row groups identically to the spatial-aware path and ~10x faster, with no DuckDB spatial extension" + }, + "version": "1.0.0", + "valueCodec": { + "asHexWkb": "temporal_as_hexwkb", + "fromHexWkb": "temporal_from_hexwkb", + "note": "The canonical MEOS-WKB stays the lossless value column (BLOB); covering columns are denormalised and never the source of truth." + }, + "metadataKeys": { + "temporal": "temporal", + "geo": "geo", + "covering": "bbox" + }, + "classes": { + "spatial": { + "doc": "Spatial temporal types — STBOX covering (x/y[/z] extent + time extent + SRID).", + "box": {"type": "STBOX", "from": "tspatial_to_stbox"}, + "srid": "tspatial_srid", + "types": ["tgeompoint", "tgeogpoint", "tgeometry", "tgeography", "tcbuffer", "tnpoint", "tpose", "trgeometry"], + "columns": [ + {"name": "xmin", "sqlType": "double", "accessor": "stbox_xmin", "source": "box"}, + {"name": "xmax", "sqlType": "double", "accessor": "stbox_xmax", "source": "box"}, + {"name": "ymin", "sqlType": "double", "accessor": "stbox_ymin", "source": "box"}, + {"name": "ymax", "sqlType": "double", "accessor": "stbox_ymax", "source": "box"}, + {"name": "zmin", "sqlType": "double", "accessor": "stbox_zmin", "source": "box", "when": "hasZ"}, + {"name": "zmax", "sqlType": "double", "accessor": "stbox_zmax", "source": "box", "when": "hasZ"}, + {"name": "tmin", "sqlType": "timestamptz", "accessor": "stbox_tmin", "source": "box"}, + {"name": "tmax", "sqlType": "timestamptz", "accessor": "stbox_tmax", "source": "box"}, + {"name": "srid", "sqlType": "int", "accessor": "tspatial_srid", "source": "value"} + ] + }, + "number": { + "doc": "Numeric temporal types — TBOX covering (value range + time extent).", + "box": {"type": "TBOX", "from": "tnumber_to_tbox"}, + "srid": null, + "types": ["tint", "tfloat", "tbigint"], + "columns": [ + {"name": "vmin", "sqlType": "double", "accessor": "tbox_xmin", "source": "box"}, + {"name": "vmax", "sqlType": "double", "accessor": "tbox_xmax", "source": "box"}, + {"name": "tmin", "sqlType": "timestamptz", "accessor": "tbox_tmin", "source": "box"}, + {"name": "tmax", "sqlType": "timestamptz", "accessor": "tbox_tmax", "source": "box"} + ] + }, + "timeOnly": { + "doc": "Time-only temporal types — no spatial box; time extent only.", + "box": null, + "srid": null, + "types": ["tbool", "ttext"], + "columns": [ + {"name": "tmin", "sqlType": "timestamptz", "accessor": "temporal_start_timestamptz", "source": "value"}, + {"name": "tmax", "sqlType": "timestamptz", "accessor": "temporal_end_timestamptz", "source": "value"} + ] + } + }, + "deferred": { + "pointcloudCellIndex": { + "types": ["tpcpoint", "tpcpatch", "th3index", "tquadbin"], + "reason": "STBOX covering via a type-specific box path (e.g. tpcbox_to_stbox); fold into the `spatial` class once the catalog confirms a uniform temporal->STBOX converter for these families." + } + }, + "notes": [ + "The covering columns are a denormalisation of the value's bounding box; the canonical MEOS-WKB BLOB remains the lossless source of truth.", + "Materialising the covering columns as primitive Parquet columns gives Iceberg manifest-level file pruning and Parquet row-group min/max pruning, with no spatial-aware engine.", + "zmin/zmax are emitted only for 3D values (`when: hasZ`); 2D values omit them or store null.", + "`source: box` accessors take the box returned by `class.box.from(value)`; `source: value` accessors take the temporal value directly.", + "This descriptor is type-agnostic per class exactly as `portable-aliases.json` is type-agnostic per operator family — codegen consumes it identically across every binding." + ] +} diff --git a/meta/temporal-covering.schema.json b/meta/temporal-covering.schema.json new file mode 100644 index 0000000..ffb516b --- /dev/null +++ b/meta/temporal-covering.schema.json @@ -0,0 +1,88 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/MobilityDB/MEOS-API/blob/main/meta/temporal-covering.schema.json", + "title": "Temporal-covering descriptor — canonical SoT", + "description": "Schema for `meta/temporal-covering.json` (RFC #870/#913). Catches shape regressions earlier than the unit tests; validated as a test step in `tests/test_covering.py`.", + "type": "object", + "additionalProperties": true, + "required": ["provenance", "version", "valueCodec", "metadataKeys", "classes", "notes"], + "properties": { + "_comment": {"type": "string"}, + "provenance": { + "type": "object", + "additionalProperties": true, + "required": ["rfc"], + "properties": { + "rfc": {"type": "string"}, + "discussion": {"type": "string"}, + "geoParquet": {"type": "string"}, + "benchmark": {"type": "string"} + } + }, + "version": {"type": "string"}, + "valueCodec": { + "type": "object", + "additionalProperties": true, + "required": ["asHexWkb", "fromHexWkb"], + "properties": { + "asHexWkb": {"type": "string"}, + "fromHexWkb": {"type": "string"}, + "note": {"type": "string"} + } + }, + "metadataKeys": { + "type": "object", + "additionalProperties": true, + "required": ["temporal", "covering"], + "properties": { + "temporal": {"type": "string"}, + "geo": {"type": "string"}, + "covering": {"type": "string"} + } + }, + "classes": { + "type": "object", + "minProperties": 1, + "additionalProperties": { + "type": "object", + "additionalProperties": true, + "required": ["types", "columns"], + "properties": { + "doc": {"type": "string"}, + "srid": {"type": ["string", "null"]}, + "box": { + "type": ["object", "null"], + "required": ["type", "from"], + "properties": { + "type": {"type": "string"}, + "from": {"type": "string"} + } + }, + "types": { + "type": "array", + "minItems": 1, + "items": {"type": "string", "pattern": "^t[a-z0-9]+$"} + }, + "columns": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": ["name", "sqlType", "accessor", "source"], + "properties": { + "name": {"type": "string", "pattern": "^[a-z][a-z0-9]*$"}, + "sqlType": {"enum": ["double", "int", "timestamptz"]}, + "accessor": {"type": "string"}, + "source": {"enum": ["box", "value"]}, + "when": {"enum": ["hasZ"]} + } + } + } + } + } + }, + "deferred": {"type": "object"}, + "notes": {"type": "array", "items": {"type": "string"}} + } +} diff --git a/parser/covering.py b/parser/covering.py new file mode 100644 index 0000000..789d453 --- /dev/null +++ b/parser/covering.py @@ -0,0 +1,71 @@ +"""Temporal-covering descriptor — the single codegen source of truth for +projecting a MEOS temporal column into Parquet/Iceberg covering columns. + +`meta/temporal-covering.json` is the curated, authoritative mapping (RFC +#870 TemporalParquet / #913 Temporal Data Lake): per temporal-type *class* +(spatial → STBOX, number → TBOX) it names the box converter, the SRID +accessor, and the covering columns with their MEOS bbox accessors. Folding +it into the catalog means every binding/engine generates the *identical* +covering schema, so a temporal table prunes the same way on every platform +(Iceberg manifest pruning + Parquet row-group min/max) with no spatial-aware +engine. + +This is curated canonical data, not a heuristic — it is preserved verbatim +and only *derived* lookups are added (a flat `byType` index and the set of +referenced C symbols), so a generator never has to re-derive the mapping. +Pure dict → dict; no libclang. +""" + +import json +from pathlib import Path + + +def attach_temporal_covering(idl: dict, path: Path) -> dict: + """Attach ``idl["temporalCovering"]`` from the canonical mapping file.""" + if not Path(path).exists(): + return idl + data = json.loads(Path(path).read_text()) + + classes = data["classes"] + + # Integrity: a temporal type may belong to at most one covering class — + # two classes claiming the same type would make codegen ambiguous. + by_type = {} + for class_name, spec in classes.items(): + for t in spec["types"]: + if t in by_type: + raise ValueError( + f"temporal-covering: type {t!r} in two classes " + f"({by_type[t]['class']!r} and {class_name!r})") + by_type[t] = { + "class": class_name, + "box": spec.get("box"), + "srid": spec.get("srid"), + "columns": spec["columns"], + } + + # The complete set of MEOS C symbols this descriptor depends on — the + # covering parity audit checks every one is actually in the catalog. + symbols = {data["valueCodec"]["asHexWkb"], data["valueCodec"]["fromHexWkb"]} + for spec in classes.values(): + if spec.get("box"): + symbols.add(spec["box"]["from"]) + if spec.get("srid"): + symbols.add(spec["srid"]) + for col in spec["columns"]: + symbols.add(col["accessor"]) + + idl["temporalCovering"] = { + "provenance": data["provenance"], + "version": data["version"], + "valueCodec": data["valueCodec"], + "metadataKeys": data["metadataKeys"], + "classes": classes, + "deferred": data.get("deferred", {}), + "notes": data["notes"], + "byType": by_type, # "tgeompoint" -> class + columns + "types": sorted(by_type), + "symbols": sorted(symbols), # referenced C symbols (audit set) + "count": len(by_type), + } + return idl diff --git a/run.py b/run.py index 640dd9e..578d91d 100644 --- a/run.py +++ b/run.py @@ -4,12 +4,14 @@ from parser.parser import parse_all_headers, merge_meta from parser.portable import attach_portable_aliases +from parser.covering import attach_temporal_covering from parser.typerecover import recover_collapsed_types HEADERS_DIR = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("./meos/include") META_PATH = Path("./meta/meos-meta.json") PORTABLE_PATH = Path("./meta/portable-aliases.json") +COVERING_PATH = Path("./meta/temporal-covering.json") OUTPUT_DIR = Path("./output") @@ -36,20 +38,27 @@ def main(): print(f"[2/3] No meta found at {META_PATH}, skipping.", file=sys.stderr) # 3. Attach the canonical portable bare-name mapping (codegen truth) - print(f"[3/3] Attaching portable aliases from {PORTABLE_PATH}...", + print(f"[3/4] Attaching portable aliases from {PORTABLE_PATH}...", file=sys.stderr) idl = attach_portable_aliases(idl, PORTABLE_PATH) + # 4. Attach the temporal-covering descriptor (Parquet/Iceberg projection) + print(f"[4/4] Attaching temporal covering from {COVERING_PATH}...", + file=sys.stderr) + idl = attach_temporal_covering(idl, COVERING_PATH) + idl_path = OUTPUT_DIR / "meos-idl.json" with open(idl_path, "w") as f: json.dump(idl, f, indent=2) print(f" → {idl_path} written", file=sys.stderr) pa = idl.get("portableAliases", {}).get("count", 0) + cov = idl.get("temporalCovering", {}).get("count", 0) print(f"\nDone: {len(idl['functions'])} functions, " f"{len(idl['structs'])} structs, " f"{len(idl['enums'])} enums, " - f"{pa} portable bare-name aliases", file=sys.stderr) + f"{pa} portable bare-name aliases, " + f"{cov} temporal covering types", file=sys.stderr) if __name__ == "__main__": diff --git a/tests/test_covering.py b/tests/test_covering.py new file mode 100644 index 0000000..768a10c --- /dev/null +++ b/tests/test_covering.py @@ -0,0 +1,73 @@ +"""Unit tests for parser/covering.py and the descriptor shape. +python3 tests/test_covering.py +""" + +import json +import sys +import unittest +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT)) + +from parser.covering import attach_temporal_covering + +MAP = ROOT / "meta" / "temporal-covering.json" +SCHEMA = ROOT / "meta" / "temporal-covering.schema.json" + + +class AttachTests(unittest.TestCase): + def test_attaches_and_indexes(self): + idl = attach_temporal_covering({"functions": []}, MAP) + cov = idl["temporalCovering"] + # tgeompoint resolves to the spatial class with an STBOX box + self.assertEqual(cov["byType"]["tgeompoint"]["class"], "spatial") + self.assertEqual(cov["byType"]["tgeompoint"]["box"]["type"], "STBOX") + # tfloat resolves to the number class with a TBOX box + self.assertEqual(cov["byType"]["tfloat"]["class"], "number") + self.assertEqual(cov["byType"]["tfloat"]["box"]["type"], "TBOX") + # tbool resolves to the time-only class with no box + self.assertEqual(cov["byType"]["tbool"]["class"], "timeOnly") + self.assertIsNone(cov["byType"]["tbool"]["box"]) + # count == number of covered types; types sorted + self.assertEqual(cov["count"], len(cov["byType"])) + self.assertEqual(cov["types"], sorted(cov["byType"])) + + def test_symbols_collected(self): + cov = attach_temporal_covering({}, MAP)["temporalCovering"] + # the value codec and both box converters are in the audit set + for sym in ("temporal_as_hexwkb", "temporal_from_hexwkb", + "tspatial_to_stbox", "tnumber_to_tbox", "stbox_xmin", + "tbox_xmin", "tspatial_srid"): + self.assertIn(sym, cov["symbols"]) + + def test_missing_file_is_noop(self): + idl = attach_temporal_covering({"x": 1}, ROOT / "nope.json") + self.assertEqual(idl, {"x": 1}) + + def test_duplicate_type_rejected(self): + bad = json.loads(MAP.read_text()) + # claim tfloat in a second class too -> ambiguous codegen + bad["classes"]["spatial"]["types"].append("tfloat") + p = ROOT / "output" / "_dup_covering.json" + p.parent.mkdir(exist_ok=True) + p.write_text(json.dumps(bad)) + try: + with self.assertRaises(ValueError): + attach_temporal_covering({}, p) + finally: + p.unlink() + + +class SchemaTests(unittest.TestCase): + def test_descriptor_validates(self): + try: + import jsonschema + except ImportError: + self.skipTest("jsonschema not installed") + jsonschema.validate( + json.loads(MAP.read_text()), json.loads(SCHEMA.read_text())) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_covering_parity.py b/tests/test_covering_parity.py new file mode 100644 index 0000000..7cd6b7a --- /dev/null +++ b/tests/test_covering_parity.py @@ -0,0 +1,80 @@ +"""Unit tests for covering_parity.py. python3 tests/test_covering_parity.py + +Also the CI gate: when an enriched catalog with `temporalCovering` is +present, every C symbol the descriptor names must be backed by the catalog +and every covered type must be a real MeosType — never silently missing. +""" + +import json +import sys +import unittest +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT)) + +from parser.covering import attach_temporal_covering +from tools.covering_parity import build_parity + +MAP = ROOT / "meta" / "temporal-covering.json" +_CATALOG = ROOT / "output" / "meos-idl.json" + + +def _catalog(fn_names, enum_types=None): + cat = attach_temporal_covering( + {"functions": [{"name": n} for n in fn_names]}, MAP) + if enum_types is not None: + cat["enums"] = [{"name": "MeosType", + "values": [{"name": "T_" + t.upper()} + for t in enum_types]}] + return cat + + +class ParityLogicTests(unittest.TestCase): + def test_all_backed_when_symbols_present(self): + cov = attach_temporal_covering({}, MAP)["temporalCovering"] + r = build_parity(_catalog(cov["symbols"], enum_types=cov["types"])) + self.assertEqual(r["symbolsMissing"], []) + self.assertEqual(r["symbolsBacked"], r["symbolsTotal"]) + self.assertEqual(r["parityPct"], 100.0) + self.assertEqual(r["typesInvalid"], []) + self.assertTrue(r["typesChecked"]) + + def test_missing_symbol_reported_not_dropped(self): + cov = attach_temporal_covering({}, MAP)["temporalCovering"] + present = [s for s in cov["symbols"] if s != "stbox_xmin"] + r = build_parity(_catalog(present, enum_types=cov["types"])) + self.assertIn("stbox_xmin", r["symbolsMissing"]) + self.assertEqual(r["symbolsBacked"] + len(r["symbolsMissing"]), + r["symbolsTotal"]) + + def test_invalid_type_reported(self): + # a type absent from the MeosType enum is flagged, not silently ok + r = build_parity(_catalog([], enum_types=["tgeompoint"])) + self.assertIn("tfloat", r["typesInvalid"]) + + def test_types_unverified_without_enum(self): + r = build_parity(_catalog([])) # no MeosType enum present + self.assertFalse(r["typesChecked"]) + self.assertEqual(r["typesInvalid"], []) + + def test_requires_temporal_covering(self): + with self.assertRaises(ValueError): + build_parity({"functions": []}) + + +@unittest.skipUnless(_CATALOG.exists(), "run `python run.py` first") +class LiveParityGate(unittest.TestCase): + def test_every_symbol_backed_and_types_valid(self): + cat = json.loads(_CATALOG.read_text()) + cat = attach_temporal_covering(cat, MAP) + r = build_parity(cat) + self.assertEqual(r["symbolsMissing"], [], + "covering descriptor references unexported symbols") + self.assertEqual(r["typesInvalid"], [], + "covering descriptor references non-MeosType types") + self.assertEqual(r["parityPct"], 100.0) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tools/covering_parity.py b/tools/covering_parity.py new file mode 100644 index 0000000..2391b01 --- /dev/null +++ b/tools/covering_parity.py @@ -0,0 +1,93 @@ +# Temporal-covering parity audit — the catalog analogue of the portable +# bare-name audit (tools/portable_parity.py). +# +# python run.py # catalog with `temporalCovering` + functions +# python tools/covering_parity.py # -> output/meos-covering-parity.json +# +# The covering descriptor (meta/temporal-covering.json) is only useful to a +# binding generator if every C symbol it names is actually exported by the +# catalog, and every temporal type it lists is a real MeosType. This audit +# checks both and reports any miss as a precise worklist — an honest signal +# that an accessor must be added/exported in MEOS (close-in-MEOS-C), never a +# fabricated pass. + +import json +import sys +from pathlib import Path + +IN_PATH = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("output/meos-idl.json") +OUT_PATH = (Path(sys.argv[2]) if len(sys.argv) > 2 + else Path("output/meos-covering-parity.json")) + + +def _meos_type_tokens(catalog: dict) -> set: + """Canonical lower-case temporal type names from the MeosType enum + (`T_TGEOMPOINT` -> `tgeompoint`).""" + tokens = set() + for enum in catalog.get("enums", []): + if enum.get("name") != "MeosType": + continue + for v in enum.get("values", []): + name = v.get("name") if isinstance(v, dict) else v + if isinstance(name, str) and name.startswith("T_"): + tokens.add(name[2:].lower()) + return tokens + + +def build_parity(catalog: dict) -> dict: + cov = catalog.get("temporalCovering") + if not cov: + raise ValueError("catalog has no `temporalCovering` — run run.py") + names = {f["name"] for f in catalog.get("functions", [])} + type_tokens = _meos_type_tokens(catalog) + + # 1. Every referenced C symbol must be exported by the catalog. + symbols = {s: (s in names) for s in cov["symbols"]} + missing_symbols = sorted(s for s, ok in symbols.items() if not ok) + + # 2. Every covered type must be a real MeosType (skip the enum check only + # when the catalog carries no MeosType enum, e.g. a synthetic unit-test + # catalog — then types are reported `unverified`, never silently ok). + if type_tokens: + types = {t: (t in type_tokens) for t in cov["types"]} + invalid_types = sorted(t for t, ok in types.items() if not ok) + types_checked = True + else: + types = {t: None for t in cov["types"]} + invalid_types = [] + types_checked = False + + total_sym = len(symbols) + backed_sym = total_sym - len(missing_symbols) + return { + "symbolsTotal": total_sym, + "symbolsBacked": backed_sym, + "symbolsMissing": missing_symbols, # accessors to add/export + "typesTotal": len(types), + "typesValid": sum(1 for ok in types.values() if ok), + "typesInvalid": invalid_types, + "typesChecked": types_checked, + "parityPct": round(backed_sym * 100 / total_sym, 1) if total_sym else 0, + "bySymbol": symbols, + "byType": types, + } + + +def main() -> None: + if not IN_PATH.exists(): + sys.exit(f"Catalog not found: {IN_PATH} — run `python run.py` first.") + rep = build_parity(json.loads(IN_PATH.read_text())) + OUT_PATH.parent.mkdir(parents=True, exist_ok=True) + OUT_PATH.write_text(json.dumps(rep, indent=2)) + print(f"[covering-parity] {rep['symbolsBacked']}/{rep['symbolsTotal']} " + f"referenced symbols backed in the catalog ({rep['parityPct']}%); " + f"{rep['typesValid']}/{rep['typesTotal']} types valid " + f"→ {OUT_PATH}", file=sys.stderr) + for s in rep["symbolsMissing"]: + print(f" missing-symbol: {s!r} — add/export in MEOS", file=sys.stderr) + for t in rep["typesInvalid"]: + print(f" invalid-type: {t!r} — not a MeosType", file=sys.stderr) + + +if __name__ == "__main__": + main()