Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ editor library — it adds nothing to the single served file). On top of that:
onto the editor: a schema identifier drops as text at the caret, and a
saved/history query drops as a `( … )` subquery at the drop point (its trailing
`FORMAT`/`;` stripped). Undoable; click-to-load still works for keyboard users.
Dragging a **database or table onto the results pane** instead renders a
[schema lineage graph](#schema-lineage-graph).

**The keystroke rule:** none of this runs SQL while you type. Reference data —
the server's keyword and function lists — is fetched **once per connection**
Expand Down Expand Up @@ -99,6 +101,30 @@ tab (e.g. `EXPLAIN ESTIMATE …` opens **Estimate**); anything else opens the
verbatim **Explain** tab. An explicit `… FORMAT <name>` on an EXPLAIN bypasses the
views and shows ClickHouse's raw response.

## Schema lineage graph

Drag a **database** or **table** row from the schema sidebar onto the results pane
to see how its ClickHouse objects relate — not generic foreign keys, but the
engine-specific lineage: materialized views (`feeds` from sources, `writes` to the
target), regular views (`reads` their sources), dictionaries (`dict` from a source
table), and `Distributed`/`Buffer`/`Merge` engines pointing at their backing
tables. Nodes are coloured by kind (table / view / materialized view / dictionary /
distributed / external) with a legend; edges are coloured and labelled by
relationship. Drag a **database** → the whole-DB lineage (isolated tables with no
relationships are dropped so the lineage is the focus); drag a **table** → its
1-hop neighbourhood. **Click any node** to re-centre on it, and **Expand** for a
fullscreen pan/zoom view (same controls as the pipeline graph).

Discovery is **structured-first, parse-fallback**, because the helpful
`system.tables` columns are build-dependent: it prefers `dependencies_table` /
`loading_dependencies_*` / `system.dictionaries.source` when populated, and
otherwise lets ClickHouse parse the SQL via **`EXPLAIN AST`** (for query sources)
plus light regex on `create_table_query` (`TO` target) and `engine_full`
(Distributed/Buffer/Merge args). This keeps it working on older deployed builds
(e.g. Altinity-antalya 26.3, where `target_*` is absent and `dependencies_*` can be
empty). Graph math is pure in `src/core/schema-graph.js` (100%-covered); the SVG is
the same dagre-laid-out renderer the pipeline graph uses.

## Saved queries & the Library

Queries you save (★ **Save** next to Run, or `⌘S`) land in the sidebar **★ Library**
Expand Down
5 changes: 3 additions & 2 deletions src/core/dot-layout.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,11 @@ export function dagreLayout(dagre, graph) {

const outNodes = nodes.map((n) => {
const dn = g.node(n.id);
return { id: n.id, label: n.label, x: dn.x - dn.width / 2, y: dn.y - dn.height / 2, w: dn.width, h: dn.height };
// `kind` (node) / `label` (edge) pass through for the schema graph's colouring.
return { id: n.id, label: n.label, kind: n.kind, x: dn.x - dn.width / 2, y: dn.y - dn.height / 2, w: dn.width, h: dn.height };
});
const outEdges = edges.map((e) => ({
from: e.from, to: e.to,
from: e.from, to: e.to, kind: e.kind, label: e.label,
points: g.edge(e.from, e.to).points.map((p) => ({ x: p.x, y: p.y })),
}));
const gg = g.graph();
Expand Down
204 changes: 204 additions & 0 deletions src/core/schema-graph.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
// Pure assembly of a ClickHouse object-lineage graph from system.* rows. No DOM,
// no globals, no fetch — the queries live in src/net/ch-client.js (loadSchemaLineage)
// and the SVG drawing in src/ui (reusing the dagre graph renderer). Mirrors the
// load→assemble pattern of src/core/completions.js.
//
// Discovery is structured-first, parse-fallback (see the plan): structured columns
// (dependencies_table, loading_dependencies_*, dictionaries.source) when populated,
// else parse — EXPLAIN AST `TableIdentifier`s for query sources (attached as
// row.astTables by the loader), create_table_query `TO`/`.inner` for the MV target,
// engine_full for Distributed/Buffer/Merge. All best-effort: a miss yields a node
// with no edge, never a throw.

/** Map a ClickHouse engine name to a node kind. */
export function objectKind(engine) {
const e = String(engine || '');
if (e === 'MaterializedView') return 'mv';
if (e === 'View' || e === 'LiveView' || e === 'WindowView') return 'view';
if (e === 'Dictionary') return 'dictionary';
if (e === 'Distributed') return 'distributed';
if (e === 'Buffer') return 'buffer';
if (e === 'Merge') return 'merge';
return 'table';
}

/** Table names from `EXPLAIN AST` text — the `TableIdentifier <name> (alias …)` lines. */
export function parseAstTables(astText) {
const out = [];
const re = /^\s*TableIdentifier\s+([^\s(]+)/gm;
let m;
while ((m = re.exec(String(astText || '')))) out.push(m[1]);
return out;
}

/** The explicit `TO db.table` target of a materialized view, or null. */
export function parseMvTarget(createTableQuery) {
const s = String(createTableQuery || '');
const head = s.split(/\sAS\s+SELECT/i)[0]; // only look before the SELECT body
const m = /\sTO\s+([A-Za-z_][\w]*(?:\.[A-Za-z_][\w]*)?)/.exec(head);
return m ? m[1] : null;
}

/** A dictionary's source as `{ db, table }` (ClickHouse source) or `{ external }`. */
export function parseDictSource(source, createTableQuery) {
const src = String(source || '');
let m = /^ClickHouse:\s*([\w]+)\.([\w]+)/i.exec(src);
if (m) return { db: m[1], table: m[2] };
// pre-load `source` can be empty — fall back to the CREATE's SOURCE(CLICKHOUSE(…)).
const cq = String(createTableQuery || '');
if (/SOURCE\s*\(\s*CLICKHOUSE/i.test(cq)) {
const t = /\bTABLE\s+'([^']+)'/i.exec(cq);
const d = /\bDB\s+'([^']+)'/i.exec(cq);
if (t) return { db: d ? d[1] : null, table: t[1] };
}
if (src) return { external: src.split(':')[0].trim() };
return null;
}

/** Engine-arg reference for Distributed/Buffer/Merge from `engine_full`. */
export function parseEngineRef(engine, engineFull) {
const s = String(engineFull || '');
if (engine === 'Distributed') {
const m = /Distributed\(\s*'([^']*)'\s*,\s*'([^']*)'\s*,\s*'([^']*)'/.exec(s);
if (m) return { kind: 'distributed', cluster: m[1], db: m[2], table: m[3] };
} else if (engine === 'Buffer') {
const m = /Buffer\(\s*'([^']*)'\s*,\s*'([^']*)'/.exec(s);
if (m) return { kind: 'buffer', db: m[1], table: m[2] };
} else if (engine === 'Merge') {
const m = /Merge\(\s*'([^']*)'\s*,\s*'([^']*)'/.exec(s);
if (m) return { kind: 'merge', db: m[1], regex: m[2] };
}
return null;
}

// A *reference* may already be `db.table` or a bare `table`; an actual row's id is
// always `database.name` (table names like `.inner_id.<uuid>` contain dots).
const qualify = (db, name) => (name && name.includes('.') ? name : db + '.' + name);
const rowId = (r) => r.database + '.' + r.name;

/**
* Build `{ nodes:[{id,label,kind}], edges:[{from,to,kind}] }` from system.* rows.
* `rows = { tables:[…], dictionaries:[…] }`; each table row may carry `astTables`
* (EXPLAIN AST sources). `focus = { kind:'db'|'table', db, table? }` scopes the
* result (table focus → the table + its 1-hop neighbours).
*/
export function buildSchemaGraph(rows, focus) {
const tables = (rows && rows.tables) || [];
const dicts = (rows && rows.dictionaries) || [];
const nodes = new Map();
const byId = new Map(); // id → table row, for lookups
const innerByUuid = new Map(); // implicit-MV inner storage, keyed by owner uuid

const node = (id, kind) => {
if (!nodes.has(id)) {
const dot = id.indexOf('.');
nodes.set(id, { id, label: id, kind, db: id.slice(0, dot), name: id.slice(dot + 1) });
}
return nodes.get(id);
};
// external (non-CH dictionary source) leaf
const external = (label) => {
const id = 'ext:' + label;
if (!nodes.has(id)) nodes.set(id, { id, label, kind: 'external', db: '', name: label });
return id;
};

for (const t of tables) {
const id = rowId(t);
byId.set(id, t);
if (/^\.inner/.test(t.name)) {
const uuid = t.name.replace(/^\.inner(_id)?\./, '');
innerByUuid.set(uuid, id);
}
node(id, objectKind(t.engine));
}
// friendlier labels for inner storage tables
for (const [uuid, id] of innerByUuid) {
const n = nodes.get(id);
if (n) n.label = '·inner';
void uuid;
}

const edges = [];
const seen = new Set();
const addEdge = (from, to, kind) => {
if (!from || !to || from === to) return;
if (!nodes.has(from) || !nodes.has(to)) return; // both endpoints must be real nodes
const k = JSON.stringify([from, to, kind]);
if (seen.has(k)) return;
seen.add(k);
edges.push({ from, to, kind });
};
const zip = (dbs, names) => (names || []).map((nm, i) => qualify((dbs && dbs[i]) || '', nm));

for (const t of tables) {
const id = rowId(t);
const kind = nodes.get(id).kind;
// source → MV/View (structured dependents on the source side)
for (const dep of zip(t.dependencies_database, t.dependencies_table)) {
node(dep, byId.has(dep) ? nodes.get(dep).kind : 'table');
addEdge(id, dep, 'feeds');
}
// fallback: EXPLAIN AST sources of a view/MV → source → this object. Only real
// (in-scope) objects count, so CTE/alias names from the AST are dropped.
if ((kind === 'mv' || kind === 'view') && Array.isArray(t.astTables)) {
for (const src of t.astTables) {
const sid = qualify(t.database, src);
if (byId.has(sid)) addEdge(sid, id, kind === 'mv' ? 'feeds' : 'reads');
}
}
if (kind === 'mv') {
const target = parseMvTarget(t.create_table_query);
const targetId = target ? qualify(t.database, target) : innerByUuid.get(String(t.uuid || ''));
if (targetId) { node(targetId, byId.has(targetId) ? nodes.get(targetId).kind : 'table'); addEdge(id, targetId, 'writes'); }
} else if (kind === 'distributed' || kind === 'buffer' || kind === 'merge') {
const ref = parseEngineRef(t.engine, t.engine_full);
if (ref && ref.table) {
const refId = qualify(ref.db || t.database, ref.table);
node(refId, byId.has(refId) ? nodes.get(refId).kind : 'table');
addEdge(refId, id, ref.kind === 'buffer' ? 'buffer' : 'shard');
} else if (ref && ref.regex) {
let rx = null;
try { rx = new RegExp(ref.regex); } catch { /* keep the no-throw contract */ }
for (const cand of rx ? tables : []) {
if (cand.database === (ref.db || t.database) && cand.name !== t.name && rx.test(cand.name)) {
addEdge(rowId(cand), id, 'merge');
}
}
}
}
}

// dictionaries: prefer loading_dependencies (structured) else parse source/CREATE
for (const t of tables) {
if (nodes.get(rowId(t)).kind !== 'dictionary') continue;
const id = rowId(t);
const ld = zip(t.loading_dependencies_database, t.loading_dependencies_table);
const d = dicts.find((x) => x.database === t.database && x.name === t.name);
if (ld.length) {
for (const src of ld) { node(src, byId.has(src) ? nodes.get(src).kind : 'table'); addEdge(src, id, 'dict'); }
} else {
const s = parseDictSource(d && d.source, t.create_table_query);
if (s && s.table) { const sid = qualify(s.db || t.database, s.table); node(sid, 'table'); addEdge(sid, id, 'dict'); }
else if (s && s.external) addEdge(external(s.external), id, 'dict');
}
}

let outNodes = [...nodes.values()];
let outEdges = edges;
if (focus && focus.kind === 'table') {
const center = qualify(focus.db, focus.table);
const keep = new Set([center]);
for (const e of edges) { if (e.from === center) keep.add(e.to); if (e.to === center) keep.add(e.from); }
outNodes = outNodes.filter((n) => keep.has(n.id));
outEdges = edges.filter((e) => keep.has(e.from) && keep.has(e.to));
} else if (edges.length) {
// Whole-DB lineage: drop isolated (degree-0) tables so the relationships are
// the focus — but only when there ARE relationships, so a DB with no lineage
// still shows its tables rather than an empty pane.
const linked = new Set();
for (const e of edges) { linked.add(e.from); linked.add(e.to); }
outNodes = outNodes.filter((n) => linked.has(n.id));
}
return { nodes: outNodes, edges: outEdges };
}
30 changes: 30 additions & 0 deletions src/net/ch-client.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
// so the whole module is unit-testable with plain stubs.

import { parseExceptionText, isAuthExpiredBody, authDeniedMessage } from '../core/stream.js';
import { parseAstTables } from '../core/schema-graph.js';

/** Build a ClickHouse HTTP URL with query-string options. Pure. */
export function chUrl(origin, opts = {}) {
Expand Down Expand Up @@ -128,6 +129,35 @@ export async function loadSchema(ctx) {
return [...byDb.entries()].map(([db, tables]) => ({ db, expanded: false, tables }));
}

/**
* Load object-lineage rows for a database: the `system.tables` columns the graph
* builder needs + `system.dictionaries` sources, and (for views/MVs) the
* `EXPLAIN AST` source tables attached as `row.astTables`. `target_database`/
* `target_table` are intentionally not selected — they're a ClickHouse-Cloud-only
* column (absent on OSS/Altinity builds), so the MV target is parsed from
* `create_table_query` in `buildSchemaGraph`. Returns `{ tables, dictionaries }`.
*/
export async function loadSchemaLineage(ctx, focus) {
const q = (s) => "'" + String(s).replace(/'/g, "''") + "'";
const db = (focus && focus.db) || '';
const cols = 'database, name, engine, engine_full, create_table_query, as_select, '
+ 'toString(uuid) AS uuid, dependencies_database, dependencies_table, '
+ 'loading_dependencies_database, loading_dependencies_table';
const tablesJson = await queryJson(ctx, `SELECT ${cols} FROM system.tables WHERE database = ${q(db)} ORDER BY name`);
const tables = tablesJson.data || [];
const dictsJson = await queryJson(ctx, `SELECT database, name, source FROM system.dictionaries WHERE database = ${q(db)}`);
const dictionaries = dictsJson.data || [];
// Robust source extraction for views/MVs: let ClickHouse parse the SELECT.
await Promise.all(tables.map(async (t) => {
if (!t.as_select || (t.engine !== 'View' && t.engine !== 'MaterializedView')) return;
try {
const ast = await queryJson(ctx, 'EXPLAIN AST ' + t.as_select);
t.astTables = parseAstTables((ast.data || []).map((r) => r.explain).join('\n'));
} catch { /* best-effort — leave astTables undefined */ }
}));
return { tables, dictionaries };
}

/** Load the columns of one table. Returns [{name,type,comment}]. */
export async function loadColumns(ctx, db, table, sqlString) {
const sql =
Expand Down
36 changes: 36 additions & 0 deletions src/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,9 @@ body {
background: var(--bg-table); cursor: grab;
}
.explain-graph-view.grabbing { cursor: grabbing; }
/* Schema graph: normal pointer (click a node → SHOW CREATE); ⌘/Ctrl-drag to pan
swaps in the grabbing hand via .grabbing above. */
.schema-graph-view { cursor: default; }
.explain-graph-view:focus { outline: none; }
.explain-graph-view > svg.explain-graph { width: 100%; height: 100%; }
/* `color` drives the arrowhead (fill:currentColor) and edge stroke. */
Expand All @@ -642,6 +645,39 @@ body {
}
.explain-graph .eg-edge { stroke: var(--fg-faint); stroke-width: 1.3; fill: none; }
.explain-graph .eg-arrowhead { fill: var(--fg-faint); }
.explain-graph .eg-edge-label { fill: var(--fg-faint); font-family: var(--mono); font-size: 9px; }

/* ------------ schema lineage graph (kind-coloured nodes + edges) ------------ */
/* Node fill by object kind. Scoped under `.explain-graph` so these beat the base
`.explain-graph .eg-node` fill (same specificity → source order would lose). */
.explain-graph .eg-node--table { fill: var(--bg-chip); stroke: var(--border); }
.explain-graph .eg-node--view { fill: color-mix(in oklab, #14b8a6 22%, var(--bg-table)); stroke: #14b8a6; }
.explain-graph .eg-node--mv { fill: color-mix(in oklab, #8b5cf6 24%, var(--bg-table)); stroke: #8b5cf6; }
.explain-graph .eg-node--dictionary { fill: color-mix(in oklab, #3b82f6 22%, var(--bg-table)); stroke: #3b82f6; }
.explain-graph .eg-node--distributed { fill: color-mix(in oklab, #f97316 22%, var(--bg-table)); stroke: #f97316; }
.explain-graph .eg-node--buffer { fill: color-mix(in oklab, #eab308 22%, var(--bg-table)); stroke: #eab308; }
.explain-graph .eg-node--merge { fill: color-mix(in oklab, #64748b 26%, var(--bg-table)); stroke: #64748b; }
.explain-graph .eg-node--external { fill: transparent; stroke: var(--fg-faint); stroke-dasharray: 3 2; }
.explain-graph .eg-edge--writes { stroke: #8b5cf6; }
.explain-graph .eg-edge--dict { stroke: #3b82f6; }
.explain-graph .eg-edge--shard { stroke: #f97316; }
.explain-graph .eg-edge--buffer { stroke: #eab308; }
.explain-graph .eg-edge--merge { stroke: #64748b; }
.schema-graph-view { position: relative; }
.schema-graph-legend {
position: absolute; top: 8px; left: 10px; pointer-events: none;
display: flex; flex-wrap: wrap; gap: 4px 12px; max-width: 70%;
font-size: 10.5px; color: var(--fg-mute);
}
.schema-graph-legend .sg-leg { display: flex; align-items: center; gap: 5px; }
.schema-graph-legend .sg-swatch { width: 11px; height: 11px; border-radius: 2px; border: 1px solid var(--border); display: inline-block; }
.sg-swatch--table { background: var(--bg-chip); border-color: var(--border); }
.sg-swatch--view { background: #14b8a6; border-color: #14b8a6; }
.sg-swatch--mv { background: #8b5cf6; border-color: #8b5cf6; }
.sg-swatch--dictionary { background: #3b82f6; border-color: #3b82f6; }
.sg-swatch--distributed { background: #f97316; border-color: #f97316; }
.sg-swatch--external { background: transparent; border-style: dashed; }
.res-graph-title { font-size: 11.5px; color: var(--fg-mute); font-weight: 500; padding: 0 4px; }

/* ------------ fullscreen pipeline overlay (pan/zoom) ------------ */
.graph-overlay {
Expand Down
Loading
Loading