Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 2 additions & 0 deletions python/ql/consistency-queries/CfgConsistency.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import semmle.python.controlflow.internal.AstNodeImpl
import ControlFlow::Consistency
11 changes: 6 additions & 5 deletions python/ql/consistency-queries/DataFlowConsistency.ql
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.internal.DataFlowImplSpecific
private import semmle.python.dataflow.new.internal.DataFlowDispatch
private import semmle.python.dataflow.new.internal.TaintTrackingImplSpecific
private import codeql.dataflow.internal.DataFlowImplConsistency
private import semmle.python.controlflow.internal.Cfg as Cfg

private module Input implements InputSig<Location, PythonDataFlow> {
private import Private
Expand Down Expand Up @@ -74,7 +75,7 @@ private module Input implements InputSig<Location, PythonDataFlow> {
// resolve to multiple functions), but we only make _one_ ArgumentNode for each
// argument in the CallNode, we end up violating this consistency check in those
// cases. (see `getCallArg` in DataFlowDispatch.qll)
exists(DataFlowCall other, CallNode cfgCall | other != call |
exists(DataFlowCall other, Cfg::CallNode cfgCall | other != call |
call.getNode() = cfgCall and
other.getNode() = cfgCall and
isArgumentNode(arg, call, _) and
Expand All @@ -90,16 +91,16 @@ private module Input implements InputSig<Location, PythonDataFlow> {
// allow it instead.
(
call.getScope() = attr.getScope() and
any(CfgNode n | n.asCfgNode() = call.getNode().(CallNode).getFunction()).getALocalSource() =
attr
any(CfgNode n | n.asCfgNode() = call.getNode().(Cfg::CallNode).getFunction())
.getALocalSource() = attr
or
not exists(call.getScope().(Function).getDefinition()) and
call.getScope().getScope+() = attr.getScope()
) and
(
other.getScope() = attr.getScope() and
any(CfgNode n | n.asCfgNode() = other.getNode().(CallNode).getFunction()).getALocalSource() =
attr
any(CfgNode n | n.asCfgNode() = other.getNode().(Cfg::CallNode).getFunction())
.getALocalSource() = attr
or
not exists(other.getScope().(Function).getDefinition()) and
other.getScope().getScope+() = attr.getScope()
Expand Down
4 changes: 4 additions & 0 deletions python/ql/lib/change-notes/2026-05-19-add-shared-cfg.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* A new Python control flow graph implementation has been added under `semmle.python.controlflow.internal.Cfg` (backed by `AstNodeImpl.qll`), built on the shared `codeql.controlflow.ControlFlowGraph` library. It is not yet used by the dataflow library or any production query; the legacy CFG in `semmle/python/Flow.qll` remains the default. The new library is exposed for tests and for upcoming migrations.
4 changes: 4 additions & 0 deletions python/ql/lib/change-notes/2026-05-19-add-shared-ssa.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* A new SSA adapter has been added under `semmle.python.dataflow.new.internal.SsaImpl`, built on the shared `codeql.ssa.Ssa` library and the new shared CFG (`semmle.python.controlflow.internal.Cfg`). It is not yet used by the dataflow library or any production query; the legacy ESSA SSA in `semmle/python/essa/*` remains the default. The new SSA adapter is exposed for tests and for the upcoming dataflow migration.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: breaking
---
* The deprecated `AstNode.getAFlowNode()` and `Function.getAReturnValueFlowNode()` predicates now return nodes from the new shared CFG (`Cfg::ControlFlowNode`) rather than from the legacy CFG (`ControlFlowNode`). Callers that still rely on these deprecated APIs and feed the result into legacy-CFG-aware predicates will no longer type-check; migrate to `n.getNode() = e` (or, for return values, the explicit `Return` pattern shown in the deprecation message) to get nodes from the dataflow library's current CFG.
45 changes: 45 additions & 0 deletions python/ql/lib/printCfgNew.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/**
* @name Print CFG (New)
* @description Produces a representation of a file's Control Flow Graph
* using the new shared control flow library.
* This query is used by the VS Code extension.
* @id python/print-cfg
* @kind graph
* @tags ide-contextual-queries/print-cfg
*/

private import python as Py
import semmle.python.controlflow.internal.AstNodeImpl

external string selectedSourceFile();

private predicate selectedSourceFileAlias = selectedSourceFile/0;

external int selectedSourceLine();

private predicate selectedSourceLineAlias = selectedSourceLine/0;

external int selectedSourceColumn();

private predicate selectedSourceColumnAlias = selectedSourceColumn/0;

module ViewCfgQueryInput implements ControlFlow::ViewCfgQueryInputSig<Py::File> {
predicate selectedSourceFile = selectedSourceFileAlias/0;

predicate selectedSourceLine = selectedSourceLineAlias/0;

predicate selectedSourceColumn = selectedSourceColumnAlias/0;

predicate cfgScopeSpan(
Ast::Callable callable, Py::File file, int startLine, int startColumn, int endLine,
int endColumn
) {
exists(Py::Scope scope |
scope = callable.asScope() and
file = scope.getLocation().getFile() and
scope.getLocation().hasLocationInfo(_, startLine, startColumn, endLine, endColumn)
)
}
}

import ControlFlow::ViewCfgQuery<Py::File, ViewCfgQueryInput>
13 changes: 7 additions & 6 deletions python/ql/lib/semmle/python/ApiGraphs.qll
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
* directed and labeled; they specify how the components represented by nodes relate to each other.
*/

// Importing python under the `py` namespace to avoid importing `CallNode` from `Flow.qll` and thereby having a naming conflict with `API::CallNode`.
// Importing python under the `PY` namespace to avoid pulling in `CallNode` from `Flow.qll` (via `import python`) and thereby having a naming conflict with `API::CallNode`.
private import python as PY
private import semmle.python.controlflow.internal.Cfg as Cfg
import semmle.python.dataflow.new.DataFlow
private import semmle.python.internal.CachedStages

Expand Down Expand Up @@ -282,15 +283,15 @@ module API {
index = this.getIndex() and
(
// subscripting
exists(PY::SubscriptNode subscript |
exists(Cfg::SubscriptNode subscript |
subscript.getObject() = this.getAValueReachableFromSource().asCfgNode() and
subscript.getIndex() = index.asSink().asCfgNode()
|
// reading
subscript = result.asSource().asCfgNode()
or
// writing
subscript.(PY::DefinitionNode).getValue() = result.asSink().asCfgNode()
subscript.(Cfg::DefinitionNode).getValue() = result.asSink().asCfgNode()
)
or
// dictionary literals
Expand Down Expand Up @@ -684,7 +685,7 @@ module API {
* Ignores relative imports, such as `from ..foo.bar import baz`.
*/
private predicate imports(DataFlow::CfgNode imp, string name) {
exists(PY::ImportExprNode iexpr |
exists(Cfg::ImportExprNode iexpr |
imp.getNode() = iexpr and
not iexpr.getNode().isRelative() and
name = iexpr.getNode().getImportedModuleName()
Expand Down Expand Up @@ -775,7 +776,7 @@ module API {
// list literals, from `x` to `[x]`
// TODO: once convenient, this should be done at a higher level than the AST,
// at least at the CFG layer, to take splitting into account.
// Also consider `SequenceNode for generality.
// Also consider `Cfg::SequenceNode` for generality.
exists(PY::List list | list = pred.(DataFlow::ExprNode).getNode().getNode() |
rhs.(DataFlow::ExprNode).getNode().getNode() = list.getAnElt() and
lbl = Label::subscript()
Expand Down Expand Up @@ -805,7 +806,7 @@ module API {
subscript = trackUseNode(src).getSubscript(index)
|
// from `x` to a definition of `x[...]`
rhs.asCfgNode() = subscript.asCfgNode().(PY::DefinitionNode).getValue() and
rhs.asCfgNode() = subscript.asCfgNode().(Cfg::DefinitionNode).getValue() and
lbl = Label::subscript()
or
// from `x` to `"key"` in `x["key"]`
Expand Down
14 changes: 7 additions & 7 deletions python/ql/lib/semmle/python/AstExtended.qll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module;

import python
private import semmle.python.internal.CachedStages
private import semmle.python.controlflow.internal.Cfg as Cfg

/** A syntactic node (Class, Function, Module, Expr, Stmt or Comprehension) corresponding to a flow node */
abstract class AstNode extends AstNode_ {
Expand All @@ -19,17 +20,16 @@ abstract class AstNode extends AstNode_ {
/**
* DEPRECATED: use `ControlFlowNode.getNode()` from the other direction instead;
* that is, replace `e.getAFlowNode() = n` with `n.getNode() = e`. This API is
* being removed to untangle the AST and CFG hierarchies in preparation for
* migrating the dataflow library off the legacy CFG.
* being removed to untangle the AST and CFG hierarchies.
*
* Gets a flow node corresponding directly to this node.
* NOTE: For some statements and other purely syntactic elements,
* there may not be a `ControlFlowNode`.
* Gets a flow node corresponding directly to this node, from the new
* (shared) CFG. NOTE: For some statements and other purely syntactic
* elements, there may not be a `ControlFlowNode`.
*/
cached
deprecated ControlFlowNode getAFlowNode() {
deprecated Cfg::ControlFlowNode getAFlowNode() {
Stages::AST::ref() and
py_flow_bb_node(result, this, _, _)
result.getNode() = this
}

/** Gets the location for this AST node */
Expand Down
5 changes: 3 additions & 2 deletions python/ql/lib/semmle/python/Concepts.qll
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/

private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowImplSpecific
private import semmle.python.dataflow.new.RemoteFlowSources
Expand Down Expand Up @@ -214,7 +215,7 @@ module Path {
SafeAccessCheck() { this = DataFlow::BarrierGuard<safeAccessCheck/3>::getABarrierNode() }
}

private predicate safeAccessCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
private predicate safeAccessCheck(DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch) {
g.(SafeAccessCheck::Range).checks(node, branch)
}

Expand All @@ -223,7 +224,7 @@ module Path {
/** A data-flow node that checks that a path is safe to access in some way, for example by having a controlled prefix. */
abstract class Range extends DataFlow::GuardNode {
/** Holds if this guard validates `node` upon evaluating to `branch`. */
abstract predicate checks(ControlFlowNode node, boolean branch);
abstract predicate checks(Cfg::ControlFlowNode node, boolean branch);
}
}
}
Expand Down
15 changes: 8 additions & 7 deletions python/ql/lib/semmle/python/Exprs.qll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module;

private import python
private import semmle.python.internal.CachedStages
private import semmle.python.controlflow.internal.Cfg as Cfg

/** An expression */
class Expr extends Expr_, AstNode {
Expand Down Expand Up @@ -70,7 +71,7 @@ class Attribute extends Attribute_ {
/* syntax: Expr.name */
override Expr getASubExpression() { result = this.getObject() }

deprecated override AttrNode getAFlowNode() { result = super.getAFlowNode() }
deprecated override Cfg::AttrNode getAFlowNode() { result = super.getAFlowNode() }

/** Gets the name of this attribute. That is the `name` in `obj.name` */
string getName() { result = Attribute_.super.getAttr() }
Expand Down Expand Up @@ -99,7 +100,7 @@ class Subscript extends Subscript_ {

Expr getObject() { result = Subscript_.super.getValue() }

deprecated override SubscriptNode getAFlowNode() { result = super.getAFlowNode() }
deprecated override Cfg::SubscriptNode getAFlowNode() { result = super.getAFlowNode() }
}

/** A call expression, such as `func(...)` */
Expand All @@ -115,7 +116,7 @@ class Call extends Call_ {

override string toString() { result = this.getFunc().toString() + "()" }

deprecated override CallNode getAFlowNode() { result = super.getAFlowNode() }
deprecated override Cfg::CallNode getAFlowNode() { result = super.getAFlowNode() }

/** Gets a tuple (*) argument of this call. */
Expr getStarargs() { result = this.getAPositionalArg().(Starred).getValue() }
Expand Down Expand Up @@ -203,7 +204,7 @@ class IfExp extends IfExp_ {
result = this.getTest() or result = this.getBody() or result = this.getOrelse()
}

deprecated override IfExprNode getAFlowNode() { result = super.getAFlowNode() }
deprecated override Cfg::IfExprNode getAFlowNode() { result = super.getAFlowNode() }
}

/** A starred expression, such as the `*rest` in the assignment `first, *rest = seq` */
Expand Down Expand Up @@ -413,7 +414,7 @@ class PlaceHolder extends PlaceHolder_ {

override string toString() { result = "$" + this.getId() }

deprecated override NameNode getAFlowNode() { result = super.getAFlowNode() }
deprecated override Cfg::NameNode getAFlowNode() { result = super.getAFlowNode() }
}

/** A tuple expression such as `( 1, 3, 5, 7, 9 )` */
Expand Down Expand Up @@ -480,7 +481,7 @@ class Name extends Name_ {

override string toString() { result = this.getId() }

deprecated override NameNode getAFlowNode() { result = super.getAFlowNode() }
deprecated override Cfg::NameNode getAFlowNode() { result = super.getAFlowNode() }

override predicate isArtificial() {
/* Artificial variable names in comprehensions all start with "." */
Expand Down Expand Up @@ -587,7 +588,7 @@ abstract class NameConstant extends Name, ImmutableLiteral {

override predicate isConstant() { any() }

deprecated override NameConstantNode getAFlowNode() { result = Name.super.getAFlowNode() }
deprecated override Cfg::NameConstantNode getAFlowNode() { result = Name.super.getAFlowNode() }

override predicate isArtificial() { none() }
}
Expand Down
Loading