From e6267adc965927e45a96bfc375c517af42c76147 Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Wed, 24 Jun 2026 22:58:07 +0800 Subject: [PATCH 1/4] gh-152140: Replace `_Extra` class with `ZipFile._strip_extra_fields()` The `_Extra` class was over-engineered. Its only active usage was its `strip()` method, called by `ZipFile._write_end_record()` to provide the stripping logic for ZIP64 fields. The context-dependent nature of extra fields also made it difficult to be reused by `_decodeExtra()` or other methods efficiently. Additionally, its `split()` method called `_Extra` directly rather than utilizing `cls`, which was a suboptimal pattern that hindered extensibility. Remove the `_Extra` class entirely and reimplement it as a private static method `_strip_extra_fields()` that processes a bytearray inside `ZipFile`, positioned directly beneath its caller. This eliminates dead and suboptimal code, achieves clean encapsulation and code locality, and improves performance by avoiding temporary class allocations. --- Lib/test/test_zipfile/test_core.py | 56 +++++++++++------------ Lib/zipfile/__init__.py | 73 ++++++++++++++---------------- 2 files changed, 60 insertions(+), 69 deletions(-) diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 4f20209927e7b3..43a2fddfd4e173 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -5840,20 +5840,23 @@ class StripExtraTests(unittest.TestCase): ZIP64_EXTRA = 1 + @classmethod + def setUpClass(cls): + cls.strip_extra = staticmethod(zipfile.ZipFile._strip_extra_fields) + def test_no_data(self): s = struct.Struct(" mv_len: + break + if xid not in field_ids: + result.extend(mv[pos:pos + 4 + xlen]) + pos += 4 + xlen + + # keep remaining trailing bytes (e.g. truncated or malformed data) + if pos < mv_len: + result.extend(mv[pos:]) + + return result + def _fpclose(self, fp): assert self._fileRefCnt > 0 self._fileRefCnt -= 1 From f334193f92d96a2294fcc0764bb3f5779b245f66 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Thu, 25 Jun 2026 00:42:00 +0000 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2026-06-25-00-41-52.gh-issue-152140.u0phBe.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-06-25-00-41-52.gh-issue-152140.u0phBe.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-06-25-00-41-52.gh-issue-152140.u0phBe.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-25-00-41-52.gh-issue-152140.u0phBe.rst new file mode 100644 index 00000000000000..958c00b7e53eec --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-25-00-41-52.gh-issue-152140.u0phBe.rst @@ -0,0 +1 @@ +Replace :class:`!_Extra` with :meth:`!ZipFile._strip_extra_fields` in the :mod:`zipfile` module. From 4eae06b019d35646597015ba545bcf6bac35eda1 Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Thu, 25 Jun 2026 08:47:41 +0800 Subject: [PATCH 3/4] Make `strip_extra` a direct class property in the test --- Lib/test/test_zipfile/test_core.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 43a2fddfd4e173..3db85d0df275cf 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -5840,9 +5840,7 @@ class StripExtraTests(unittest.TestCase): ZIP64_EXTRA = 1 - @classmethod - def setUpClass(cls): - cls.strip_extra = staticmethod(zipfile.ZipFile._strip_extra_fields) + strip_extra = staticmethod(zipfile.ZipFile._strip_extra_fields) def test_no_data(self): s = struct.Struct(" Date: Thu, 25 Jun 2026 18:55:57 +0800 Subject: [PATCH 4/4] Remove `memoryview` usage Since `struct.unpack_from` is already an offset-based approach at the C level, and `result.extend` requires data copy anyway, avoiding a `memoryview` wrapper prevents redundant Python object allocation and pointer-shifting overhead, yielding optimal runtime memory footprint and CPU performance. --- Lib/zipfile/__init__.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index de57ff9c40ede9..7e89f47d7722ff 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -2717,20 +2717,19 @@ def _strip_extra_fields(data, field_ids): return result # use memoryview for zero-copy slices - mv = memoryview(data) - mv_len = len(mv) + data_len = len(data) pos = 0 - while pos + 4 <= mv_len: - xid, xlen = struct.unpack_from(' mv_len: + while pos + 4 <= data_len: + xid, xlen = struct.unpack_from(' data_len: break if xid not in field_ids: - result.extend(mv[pos:pos + 4 + xlen]) + result.extend(data[pos:pos + 4 + xlen]) pos += 4 + xlen # keep remaining trailing bytes (e.g. truncated or malformed data) - if pos < mv_len: - result.extend(mv[pos:]) + if pos < data_len: + result.extend(data[pos:]) return result