From 87bb302fd2cf8a520e2e720a37a7d9658f49e0e6 Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Thu, 25 Jun 2026 22:03:43 +0800 Subject: [PATCH 1/2] gh-152204: Validate date fields in pure-Python date.fromisoformat The pure-Python _parse_isoformat_date read each fixed-width field with int() on a slice, which silently accepts a leading sign or whitespace, or a short slice that runs off the end of the string. Malformed basic-format inputs such as '2020+12' or '2020061' were therefore parsed into a wrong-but-plausible date instead of raising, while the C accelerator rejects them via parse_digits(). Validate that each field slice is exactly N ASCII digits before converting. --- Lib/_pydatetime.py | 19 ++++++++++++++----- Lib/test/datetimetester.py | 10 ++++++++++ ...-06-25-14-05-00.gh-issue-152204.k9Qm3v.rst | 5 +++++ 3 files changed, 29 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-06-25-14-05-00.gh-issue-152204.k9Qm3v.rst diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py index db4ea8d30c7064..91d95d01ad3468 100644 --- a/Lib/_pydatetime.py +++ b/Lib/_pydatetime.py @@ -360,14 +360,23 @@ def _parse_isoformat_date(dtstr): # see the comment on Modules/_datetimemodule.c:_find_isoformat_datetime_separator if len(dtstr) not in (7, 8, 10): raise ValueError("Invalid isoformat string") - year = int(dtstr[0:4]) + def _read(s, n): + # Each date field is a fixed width of exactly n ASCII digits. int() + # would otherwise accept a leading sign or whitespace, or a short slice + # that runs off the end of the string, so validate before converting + # (this is what the C accelerator's parse_digits() enforces). + if len(s) != n or not all(map(_is_ascii_digit, s)): + raise ValueError(f"Invalid isoformat string: {dtstr!r}") + return int(s) + + year = _read(dtstr[0:4], 4) has_sep = dtstr[4] == '-' pos = 4 + has_sep if dtstr[pos:pos + 1] == "W": # YYYY-?Www-?D? pos += 1 - weekno = int(dtstr[pos:pos + 2]) + weekno = _read(dtstr[pos:pos + 2], 2) pos += 2 dayno = 1 @@ -377,17 +386,17 @@ def _parse_isoformat_date(dtstr): pos += has_sep - dayno = int(dtstr[pos:pos + 1]) + dayno = _read(dtstr[pos:pos + 1], 1) return list(_isoweek_to_gregorian(year, weekno, dayno)) else: - month = int(dtstr[pos:pos + 2]) + month = _read(dtstr[pos:pos + 2], 2) pos += 2 if (dtstr[pos:pos + 1] == "-") != has_sep: raise ValueError("Inconsistent use of dash separator") pos += has_sep - day = int(dtstr[pos:pos + 2]) + day = _read(dtstr[pos:pos + 2], 2) return [year, month, day] diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 28c3ab2605c45d..079611165da009 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -2106,6 +2106,16 @@ def test_fromisoformat_fails(self): '10000-W25-1', # Invalid year '2020-W25-0', # Invalid day-of-week '2020-W25-8', # Invalid day-of-week + # gh-152204: each fixed-width field must be exactly N ASCII digits + '2020+12', # '+' accepted in a basic-format field + '2020 12', # space accepted in a basic-format field + '+020-06-15', # leading sign in the year + '202012+9', # '+' in the day field + '2020-W 5', # space in the week day-of-week field + '2020061', # 7 chars: day slice reads a 1-character tail + '2020123', # 7 chars: day slice reads a 1-character tail + '9999121', # 7 chars: day slice reads a 1-character tail + '2020-W2', # 1-digit week number '٢025-03-09' # Unicode characters '2009\ud80002\ud80028', # Separators are surrogate codepoints ] diff --git a/Misc/NEWS.d/next/Library/2026-06-25-14-05-00.gh-issue-152204.k9Qm3v.rst b/Misc/NEWS.d/next/Library/2026-06-25-14-05-00.gh-issue-152204.k9Qm3v.rst new file mode 100644 index 00000000000000..a0fcaeadfa45ec --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-25-14-05-00.gh-issue-152204.k9Qm3v.rst @@ -0,0 +1,5 @@ +Fix the pure-Python implementation of :meth:`datetime.date.fromisoformat` +silently mis-parsing some malformed ISO 8601 basic-format dates (for example +``'2020+12'`` or ``'2020061'``). Each fixed-width field is now required to be +exactly *N* ASCII digits before conversion, matching the C implementation, +which already rejected these inputs. From e780e2c573704c77dcb88cc939636404465db233 Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Thu, 25 Jun 2026 22:06:07 +0800 Subject: [PATCH 2/2] Tighten the _read() comment --- Lib/_pydatetime.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py index 91d95d01ad3468..41f3cb6cf87e78 100644 --- a/Lib/_pydatetime.py +++ b/Lib/_pydatetime.py @@ -361,10 +361,7 @@ def _parse_isoformat_date(dtstr): if len(dtstr) not in (7, 8, 10): raise ValueError("Invalid isoformat string") def _read(s, n): - # Each date field is a fixed width of exactly n ASCII digits. int() - # would otherwise accept a leading sign or whitespace, or a short slice - # that runs off the end of the string, so validate before converting - # (this is what the C accelerator's parse_digits() enforces). + # Require exactly n ASCII digits, as the C parse_digits() does. if len(s) != n or not all(map(_is_ascii_digit, s)): raise ValueError(f"Invalid isoformat string: {dtstr!r}") return int(s)