diff --git a/internal/engine/baker/meta.go b/internal/engine/baker/meta.go index bff8d7c..e677970 100644 --- a/internal/engine/baker/meta.go +++ b/internal/engine/baker/meta.go @@ -1,8 +1,12 @@ package baker import ( + "path" "sort" "strconv" + + "github.com/beetlebugorg/chartplotter/pkg/iso8211" + "github.com/beetlebugorg/chartplotter/pkg/s57" ) // CellMeta is the per-cell metadata extracted at import time for the chart @@ -23,12 +27,16 @@ type CellMeta struct { HasBBox bool `json:"-"` } -// ExtractCellMeta parses each cell's header + coverage (coverage-only, cheap) and -// returns per-cell metadata keyed by cell stem. Cells that fail to parse are -// reported via onSkip and omitted. Title is left empty (S-57 headers carry no human -// chart name — only the cell code); the caller overlays the CATALOG.031 long name -// where the exchange set provides one. -func ExtractCellMeta(cells map[string]CellData, onSkip func(name string, err error)) map[string]CellMeta { +// ExtractCellMeta returns per-cell metadata keyed by cell stem. Identity and scale +// come from each cell's S-57 header (DSID/DSPM); coverage comes from the exchange +// -set catalogue when it covers the cell — sparing a parse — and otherwise from an +// M_COVR-only coverage parse. Pass cat=nil when there is no catalogue. +// +// Cells that fail to parse are reported via onSkip and omitted. Title is left empty +// (S-57 headers carry no human chart name — only the cell code); the caller overlays +// the CATALOG.031 long name where the exchange set provides one. +func ExtractCellMeta(cells map[string]CellData, cat *s57.Catalog, onSkip func(name string, err error)) map[string]CellMeta { + catBBox := catalogBBoxes(cat) out := make(map[string]CellMeta, len(cells)) names := make([]string, 0, len(cells)) for n := range cells { @@ -36,36 +44,84 @@ func ExtractCellMeta(cells map[string]CellData, onSkip func(name string, err err } sort.Strings(names) for _, name := range names { - cd := cells[name] - chart, err := ParseCellCoverage(name, cd.Base, cd.Updates) + m, err := cellMetaFor(name, cells[name], catBBox) if err != nil { if onSkip != nil { onSkip(name, err) } continue } - stem := cellStem(chart.DatasetName()) - if stem == "" { - stem = cellStem(name) - } - m := CellMeta{ - Name: stem, - Scale: int(chart.CompilationScale()), - Edition: chart.Edition(), - Update: chart.UpdateNumber(), - IssueDate: chart.IssueDate(), - Agency: chart.ProducingAgency(), - } - b := chart.Bounds() - if b.MaxLon > b.MinLon && b.MaxLat > b.MinLat { - m.BBox = [4]float64{b.MinLon, b.MinLat, b.MaxLon, b.MaxLat} - m.HasBBox = true + out[m.Name] = m + } + return out +} + +// catalogBBoxes indexes an exchange-set catalogue's per-cell coverage by cell stem, +// or returns nil when there's no catalogue / no coverage in it. +func catalogBBoxes(cat *s57.Catalog) map[string][4]float64 { + if cat == nil { + return nil + } + out := map[string][4]float64{} + for _, e := range cat.Cells() { + if e.HasBBox { + out[e.CellStem()] = [4]float64{e.West, e.South, e.East, e.North} } - out[stem] = m } return out } +// cellMetaFor builds one cell's metadata. When the catalogue already supplies the +// cell's coverage AND the cell has no updates (so its base-cell header still carries +// the current identity), it reads only the header — DSID/DSPM, no geometry — and +// takes the bbox from the catalogue, skipping the M_COVR coverage parse entirely. +// Otherwise it falls back to the coverage parse, which also applies updates so the +// reported edition/update/date reflect the cell's current state. +func cellMetaFor(name string, cd CellData, catBBox map[string][4]float64) (CellMeta, error) { + stem := cellStem(name) + if len(cd.Updates) == 0 { + if box, ok := catBBox[stem]; ok { + p := "/" + path.Base(name) + if h, err := s57.ReadHeaderFS(iso8211.MemFS{p: cd.Base}, p); err == nil { + return CellMeta{ + Name: stem, + Scale: int(h.CompilationScale), + Edition: h.Edition, + Update: h.UpdateNumber, + IssueDate: h.IssueDate, + Agency: h.ProducingAgency, + BBox: box, + HasBBox: true, + }, nil + } + // Header read failed (malformed front matter) — fall through to a full parse. + } + } + + chart, err := ParseCellCoverage(name, cd.Base, cd.Updates) + if err != nil { + return CellMeta{}, err + } + s := cellStem(chart.DatasetName()) + if s == "" { + s = stem + } + m := CellMeta{ + Name: s, + Scale: int(chart.CompilationScale()), + Edition: chart.Edition(), + Update: chart.UpdateNumber(), + IssueDate: chart.IssueDate(), + Agency: chart.ProducingAgency(), + } + b := chart.Bounds() + if b.MaxLon > b.MinLon && b.MaxLat > b.MinLat { + m.BBox = [4]float64{b.MinLon, b.MinLat, b.MaxLon, b.MaxLat} + m.HasBBox = true + } + return m, nil +} + // cellStem trims a trailing ".000"/".NNN" or directory path from a cell name. func cellStem(name string) string { // Strip any directory. diff --git a/internal/engine/baker/meta_test.go b/internal/engine/baker/meta_test.go index 83c2a67..ec0b87b 100644 --- a/internal/engine/baker/meta_test.go +++ b/internal/engine/baker/meta_test.go @@ -3,6 +3,8 @@ package baker import ( "os" "testing" + + "github.com/beetlebugorg/chartplotter/pkg/s57" ) func TestExtractCellMeta(t *testing.T) { @@ -10,7 +12,7 @@ func TestExtractCellMeta(t *testing.T) { if err != nil { t.Fatal(err) } - meta := ExtractCellMeta(map[string]CellData{"US5MD1MC.000": {Base: data}}, nil) + meta := ExtractCellMeta(map[string]CellData{"US5MD1MC.000": {Base: data}}, nil, nil) m, ok := meta["US5MD1MC"] if !ok { t.Fatalf("no metadata for US5MD1MC; got keys %v", keys(meta)) @@ -33,6 +35,47 @@ func TestExtractCellMeta(t *testing.T) { } } +// TestExtractCellMeta_CatalogFastPath proves the catalogue short-circuit: when the +// exchange-set catalogue already carries a (base) cell's coverage, identity is read +// from the cheap header and the bbox is taken verbatim from the catalogue — no +// M_COVR coverage parse. The stored bbox being the catalogue's exact rectangle (not +// the geometry-derived M_COVR extent) is what confirms the fast path engaged. +func TestExtractCellMeta_CatalogFastPath(t *testing.T) { + data, err := os.ReadFile("../../../testdata/US5MD1MC.000") + if err != nil { + t.Fatal(err) + } + catData, err := os.ReadFile("../../../pkg/s57/testdata/US5MD1MC_CATALOG.031") + if err != nil { + t.Fatal(err) + } + cat, err := s57.ParseCatalog(catData) + if err != nil { + t.Fatal(err) + } + var catBox [4]float64 + for _, e := range cat.Cells() { + if e.CellStem() == "US5MD1MC" && e.HasBBox { + catBox = [4]float64{e.West, e.South, e.East, e.North} + } + } + if catBox == ([4]float64{}) { + t.Fatal("catalogue fixture lacks US5MD1MC coverage") + } + + meta := ExtractCellMeta(map[string]CellData{"US5MD1MC.000": {Base: data}}, cat, nil) + m, ok := meta["US5MD1MC"] + if !ok { + t.Fatalf("no metadata for US5MD1MC; got %v", keys(meta)) + } + if m.Scale != 12000 || m.Agency != 550 { + t.Errorf("identity = scale %d agency %d, want 12000 / 550", m.Scale, m.Agency) + } + if !m.HasBBox || m.BBox != catBox { + t.Errorf("BBox = %v (has=%v), want catalogue box %v verbatim", m.BBox, m.HasBBox, catBox) + } +} + func keys(m map[string]CellMeta) []string { out := make([]string, 0, len(m)) for k := range m { diff --git a/internal/engine/server/cellindex.go b/internal/engine/server/cellindex.go index 9c3ce39..1ce8227 100644 --- a/internal/engine/server/cellindex.go +++ b/internal/engine/server/cellindex.go @@ -9,15 +9,23 @@ import ( "sync" "github.com/beetlebugorg/chartplotter/internal/engine/baker" + "github.com/beetlebugorg/chartplotter/pkg/s57" ) +// boundsUsable reports whether a parsed cell yielded a non-degenerate bbox (a real +// extent, not the empty/zero box of a cell whose coverage couldn't be derived). +func boundsUsable(b s57.Bounds) bool { + return b.MaxLon > b.MinLon && b.MaxLat > b.MinLat +} + // cellIndex is a small, persistent name→bounding-box index over the cached source // cells (/ENC_ROOT//.000). It lets the server answer "where // is cell X" and "which installed cells are active" without re-parsing thousands -// of cells on every request: each cell's header is read ONCE (the bbox cached to -// /cells-index.json), then queries hit the in-memory map. Kept -// deliberately simple — a flat JSON map, not a database; the data is tiny (a few -// floats per cell) and read-mostly. +// of cells on every request: each cell is parsed ONCE — only its M_COVR coverage, +// not the whole cell (see scan) — with the bbox cached to /cells-index +// .json, then queries hit the in-memory map. Kept deliberately simple — a flat +// JSON map, not a database; the data is tiny (a few floats per cell) and +// read-mostly. type cellIndex struct { mu sync.RWMutex cond *sync.Cond // broadcast when a scan finishes (for wait()) @@ -126,8 +134,12 @@ func (ci *cellIndex) wait() { ci.mu.Unlock() } -// scan reads every cached cell's header once (bbox cached so repeat scans skip the -// already-indexed) and reconciles: drops index entries for cells no longer on disk. +// scan derives every cached cell's bbox once (cached, so repeat scans skip the +// already-indexed) and reconciles: drops index entries for cells no longer on +// disk. The bbox comes from an M_COVR-only coverage parse — the cell's data +// coverage is all we need, so we skip building the geometry, R-tree and portrayal +// of every other feature that a full parse would. A cell with no M_COVR (rare: +// synthetic/test cells) falls back to a full parse so it still gets a bbox. func (ci *cellIndex) scan() { entries, err := os.ReadDir(ci.encRoot) if err != nil { @@ -148,11 +160,23 @@ func (ci *cellIndex) scan() { if err != nil { continue } - chart, err := baker.ParseCellBytes(name, data) + // M_COVR-only parse: builds just the coverage rings, not every feature's + // geometry — all the bbox needs. nil updates: the index tracks base cells. + chart, err := baker.ParseCellCoverage(name, data, nil) if err != nil { continue } b := chart.Bounds() + if !boundsUsable(b) { + // No M_COVR coverage polygon (rare — synthetic cells omit it). Fall back to + // a full parse so the cell still lands in the index with a real bbox. + if full, ferr := baker.ParseCellBytes(name, data); ferr == nil { + b = full.Bounds() + } + } + if !boundsUsable(b) { + continue // still nothing usable; skip rather than index a degenerate box + } ci.mu.Lock() ci.bbox[name] = [4]float64{b.MinLon, b.MinLat, b.MaxLon, b.MaxLat} ci.mu.Unlock() diff --git a/internal/engine/server/import.go b/internal/engine/server/import.go index a98db07..0240b0f 100644 --- a/internal/engine/server/import.go +++ b/internal/engine/server/import.go @@ -615,7 +615,7 @@ func (s *Server) bakeAndRegister(jobID, set string, cells map[string]baker.CellD // agency/coverage (cheap coverage-only parse) overlaid with the catalogue's chart // titles + coverage. Best-effort — a write failure only costs the extracted detail. s.imports.update(jobID, func(j *importJob) { j.Phase, j.Note = "meta", "Reading chart metadata" }) - cellMeta := baker.ExtractCellMeta(cells, func(name string, e error) { + cellMeta := baker.ExtractCellMeta(cells, cat, func(name string, e error) { log.Printf("import %s: meta skip %s: %v", jobID, name, e) }) meta := buildSetMeta(set, cellMeta, cat) diff --git a/internal/engine/server/import_meta_test.go b/internal/engine/server/import_meta_test.go index ced80c8..a464047 100644 --- a/internal/engine/server/import_meta_test.go +++ b/internal/engine/server/import_meta_test.go @@ -76,7 +76,7 @@ func TestImport_NoCatalog(t *testing.T) { if set := s.deriveUploadSet(cat, cells); set != "user-us5md1mc" { t.Errorf("deriveUploadSet = %q, want user-us5md1mc", set) } - meta := buildSetMeta("user-us5md1mc", baker.ExtractCellMeta(cells, nil), cat) + meta := buildSetMeta("user-us5md1mc", baker.ExtractCellMeta(cells, cat, nil), cat) if meta.ScaleMin != 12000 || len(meta.BBox) != 4 || meta.Agency != "NOAA (US)" { t.Errorf("header metadata missing: scale=%d bbox=%v agency=%q", meta.ScaleMin, meta.BBox, meta.Agency) } @@ -115,7 +115,7 @@ func TestImport_AutoNameAndMeta(t *testing.T) { } // The post-bake metadata tail (bakeAndRegister does exactly this after baking). - cellMeta := baker.ExtractCellMeta(cells, nil) + cellMeta := baker.ExtractCellMeta(cells, cat, nil) meta := buildSetMeta(set, cellMeta, cat) meta.Imported = "2026-06-25T00:00:00Z" if err := s.writeSetMeta(set, meta); err != nil { diff --git a/internal/s57/parser/header.go b/internal/s57/parser/header.go new file mode 100644 index 0000000..b95b22d --- /dev/null +++ b/internal/s57/parser/header.go @@ -0,0 +1,87 @@ +package parser + +import ( + "fmt" + "io" + "io/fs" + + "github.com/beetlebugorg/chartplotter/pkg/iso8211" +) + +// CellHeader is the lightweight identity + compilation scale of an S-57 cell, +// read from only its leading DSID/DSPM records — no feature or spatial records, +// no geometry. It is the cheap answer when a caller needs to know WHAT a cell is +// (and at what scale/band) without portraying it. +// +// Note: S-57 stores NO bounding box in the header. A cell's geographic extent +// comes from its M_COVR coverage features (or the exchange-set catalogue's CATD +// bbox), neither of which is read here. Use the catalogue or an M_COVR-only parse +// for bounds. +type CellHeader struct { + DatasetName string // DSID DSNM — cell code, e.g. "US5MD1MC" + Edition string // DSID EDTN + UpdateNumber string // DSID UPDN ("0" for a base cell) + IssueDate string // DSID ISDT (YYYYMMDD) + ProducingAgency int // DSID AGEN — IHO agency code (550 = NOAA) + CompilationScale int32 // DSPM CSCL — scale denominator (0 if no DSPM) +} + +// ReadHeaderFS reads only a cell's leading dataset-metadata records (DSID + DSPM) +// from fsys, stopping as soon as both are seen — or when the metadata block ends +// (the first feature/spatial record) — without ever reading the feature or +// spatial records. This is dramatically cheaper than Parse when only identity and +// scale are needed (e.g. bucketing cells by band, or filling in metadata whose +// bounds come from elsewhere). Updates are NOT applied: the result reflects the +// base cell as given. +func ReadHeaderFS(fsys fs.FS, filename string) (*CellHeader, error) { + p, err := iso8211.OpenFS(fsys, filename) + if err != nil { + return nil, err + } + defer p.Close() + return readHeader(p) +} + +func readHeader(p *iso8211.Parser) (*CellHeader, error) { + h := &CellHeader{} + var gotDSID, gotDSPM bool + // DSID and DSPM live in the dataset general-information / geographic-reference + // records at the very front of the file, before any feature (FRID) or spatial + // (VRID) record. Read records one at a time until both are in hand, or until the + // metadata block is over — so a cell that omits DSPM doesn't drag us through the + // whole file. + for !(gotDSID && gotDSPM) { + rec, err := p.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + if d, ok := rec.Fields["DSID"]; ok && !gotDSID { + m := parseDSID(d) + h.DatasetName = m.dsnm + h.Edition = m.edtn + h.UpdateNumber = m.updn + h.IssueDate = m.isdt + h.ProducingAgency = m.agen + gotDSID = true + } + if d, ok := rec.Fields["DSPM"]; ok && !gotDSPM { + h.CompilationScale = parseDSPM(d).CSCL + gotDSPM = true + } + // First feature/spatial record ⇒ the metadata block has ended; nothing more + // to find. Stop rather than scan the rest of the cell. + if _, ok := rec.Fields["FRID"]; ok { + break + } + if _, ok := rec.Fields["VRID"]; ok { + break + } + } + if !gotDSID { + return nil, fmt.Errorf("no DSID record in cell header") + } + return h, nil +} diff --git a/pkg/iso8211/parser.go b/pkg/iso8211/parser.go index 79de0cd..2e153e7 100644 --- a/pkg/iso8211/parser.go +++ b/pkg/iso8211/parser.go @@ -11,9 +11,10 @@ import ( // For S-57/S-52 implementation details, see IHO S-57 Part 3: // https://iho.int/uploads/user/pubs/standards/s-57/31Main.pdf type Parser struct { - reader io.Reader // Underlying data reader - closer io.Closer // Optional closer (for files) - offset int64 // Current read offset + reader io.Reader // Underlying data reader + closer io.Closer // Optional closer (for files) + offset int64 // Current read offset + ddr *DataDescriptiveRecord // DDR, parsed lazily by Next on its first call } // NewParser creates a new ISO 8211 parser from an io.Reader @@ -98,6 +99,24 @@ func (p *Parser) Parse() (*ISO8211File, error) { return result, nil } +// Next parses and returns the next data record, reading the file's DDR on the +// first call. It returns io.EOF after the final record. Unlike Parse — which +// reads every record up front — Next reads one record at a time, so a caller that +// only needs the leading records (e.g. the DSID/DSPM dataset-metadata records at +// the front of an S-57 cell) can stop early without reading the rest of the file. +// +// Next and Parse must not be mixed on the same Parser. +func (p *Parser) Next() (*DataRecord, error) { + if p.ddr == nil { + ddr, err := p.parseDDR() + if err != nil { + return nil, err + } + p.ddr = ddr + } + return p.parseDataRecord(p.ddr) +} + // parseDDR parses the Data Descriptive Record (first record in file) func (p *Parser) parseDDR() (*DataDescriptiveRecord, error) { // Parse leader diff --git a/pkg/s57/header.go b/pkg/s57/header.go new file mode 100644 index 0000000..ee6318e --- /dev/null +++ b/pkg/s57/header.go @@ -0,0 +1,54 @@ +package s57 + +import ( + "io/fs" + + "github.com/beetlebugorg/chartplotter/internal/s57/parser" + "github.com/beetlebugorg/chartplotter/pkg/iso8211" +) + +// CellHeader is a cell's identity and compilation scale, read from only its +// leading DSID/DSPM records — no features, no geometry. It answers "what is this +// cell, and at what scale/band" far more cheaply than a full Parse. +// +// S-57 stores NO bounding box in the header. A cell's geographic extent comes from +// its M_COVR coverage features or the exchange-set catalogue (see Catalog); read +// one of those when bounds are needed. +type CellHeader struct { + DatasetName string // cell code, e.g. "US5MD1MC" + Edition string // edition number + UpdateNumber string // "0" for a base cell + IssueDate string // YYYYMMDD + ProducingAgency int // IHO agency code (550 = NOAA) + CompilationScale int32 // scale denominator (0 if the cell has no DSPM) +} + +func convertHeader(h *parser.CellHeader) *CellHeader { + return &CellHeader{ + DatasetName: h.DatasetName, + Edition: h.Edition, + UpdateNumber: h.UpdateNumber, + IssueDate: h.IssueDate, + ProducingAgency: h.ProducingAgency, + CompilationScale: h.CompilationScale, + } +} + +// ReadHeaderFS reads a cell's header (DSID/DSPM) from a custom io/fs.FS without +// parsing its features or geometry. Pair with iso8211.MemFS to read from raw +// in-memory bytes: +// +// h, err := s57.ReadHeaderFS(iso8211.MemFS{"/c.000": data}, "/c.000") +func ReadHeaderFS(fsys fs.FS, filename string) (*CellHeader, error) { + h, err := parser.ReadHeaderFS(fsys, filename) + if err != nil { + return nil, err + } + return convertHeader(h), nil +} + +// ReadHeader reads a cell's header from the OS filesystem — the convenience +// equivalent of ReadHeaderFS over the local files. +func ReadHeader(filename string) (*CellHeader, error) { + return ReadHeaderFS(iso8211.OSFS(), filename) +} diff --git a/pkg/s57/header_test.go b/pkg/s57/header_test.go new file mode 100644 index 0000000..97ae57d --- /dev/null +++ b/pkg/s57/header_test.go @@ -0,0 +1,61 @@ +package s57 + +import ( + "os" + "testing" + + "github.com/beetlebugorg/chartplotter/pkg/iso8211" +) + +// TestReadHeader checks the cheap header-only reader against known values and, +// more importantly, against the authoritative full Parse: every header field it +// reports must match what a full parse derives for the same cell. +func TestReadHeader(t *testing.T) { + const path = "../../testdata/US5MD1MC.000" + data, err := os.ReadFile(path) + if err != nil { + t.Fatal(err) + } + fsys := iso8211.MemFS{"/US5MD1MC.000": data} + + h, err := ReadHeaderFS(fsys, "/US5MD1MC.000") + if err != nil { + t.Fatal(err) + } + + // This fixture encodes DSNM with the extension; downstream callers strip it via + // cellStem. The header reader returns the field verbatim, as a full parse does. + if h.DatasetName != "US5MD1MC.000" { + t.Errorf("DatasetName = %q, want US5MD1MC.000", h.DatasetName) + } + if h.CompilationScale != 12000 { + t.Errorf("CompilationScale = %d, want 12000", h.CompilationScale) + } + if h.ProducingAgency != 550 { + t.Errorf("ProducingAgency = %d, want 550 (NOAA)", h.ProducingAgency) + } + + // Cross-check every field against a full parse of the same cell. + full, err := ParseFS(fsys, "/US5MD1MC.000") + if err != nil { + t.Fatal(err) + } + if h.DatasetName != full.DatasetName() { + t.Errorf("DatasetName = %q, full parse = %q", h.DatasetName, full.DatasetName()) + } + if h.Edition != full.Edition() { + t.Errorf("Edition = %q, full parse = %q", h.Edition, full.Edition()) + } + if h.UpdateNumber != full.UpdateNumber() { + t.Errorf("UpdateNumber = %q, full parse = %q", h.UpdateNumber, full.UpdateNumber()) + } + if h.IssueDate != full.IssueDate() { + t.Errorf("IssueDate = %q, full parse = %q", h.IssueDate, full.IssueDate()) + } + if h.ProducingAgency != full.ProducingAgency() { + t.Errorf("ProducingAgency = %d, full parse = %d", h.ProducingAgency, full.ProducingAgency()) + } + if h.CompilationScale != full.CompilationScale() { + t.Errorf("CompilationScale = %d, full parse = %d", h.CompilationScale, full.CompilationScale()) + } +}