From cb35544e97d021f8ccb09c47775015a50d1f48d2 Mon Sep 17 00:00:00 2001 From: Arpit Jain Date: Mon, 29 Jun 2026 11:15:13 +0900 Subject: [PATCH] Reject variable-length (SHAKE) checksum algorithms instead of crashing bagit advertised the SHAKE algorithms (shake_128, shake_256) as usable manifest checksums because they are in hashlib.algorithms_guaranteed, but their hexdigest() needs a length argument that bagit never supplies, so make_bag crashed with a TypeError partway through. Detect variable-length digests by behavior (not by name) and filter them out of CHECKSUM_ALGOS, raise a clear BagError if one is requested directly, and skip them in get_hashers as a safeguard. Closes #158. Signed-off-by: Arpit Jain --- src/bagit/__init__.py | 46 +++++++++++++++++++++++++++++++++++++++++-- test.py | 25 +++++++++++++++++++++++ 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/src/bagit/__init__.py b/src/bagit/__init__.py index 2b4adce..718de30 100755 --- a/src/bagit/__init__.py +++ b/src/bagit/__init__.py @@ -120,12 +120,38 @@ def find_locale_dir(): # Payload-Oxum is autogenerated ] + +def _has_fixed_length_digest(alg): + """ + Return True if the named algorithm produces a fixed-length digest. + + The variable-length algorithms (the SHAKE family) require a ``length`` + argument to ``hexdigest()`` that bagit never supplies, so they cannot be + used for manifests. We detect that by behavior rather than by name so that + any future variable-length algorithm hashlib adds is handled too. + """ + + try: + hashlib.new(alg).hexdigest() + except TypeError: + # Variable-length digest, e.g. shake_128/shake_256 + return False + except ValueError: + # Unsupported algorithm; leave it for the existing warning path + return True + return True + + try: - CHECKSUM_ALGOS = hashlib.algorithms_guaranteed + CHECKSUM_ALGOS = { + alg for alg in hashlib.algorithms_guaranteed if _has_fixed_length_digest(alg) + } except AttributeError: # FIXME: remove when we drop Python 2 (https://github.com/LibraryOfCongress/bagit-python/issues/102) # Python 2.7.0-2.7.8 - CHECKSUM_ALGOS = set(hashlib.algorithms) + CHECKSUM_ALGOS = { + alg for alg in hashlib.algorithms if _has_fixed_length_digest(alg) + } DEFAULT_CHECKSUMS = ["sha256", "sha512"] #: Block size used when reading files for hashing: @@ -160,6 +186,13 @@ def make_bag( if checksums is None: checksums = DEFAULT_CHECKSUMS + variable_length = [alg for alg in checksums if not _has_fixed_length_digest(alg)] + if variable_length: + raise BagError( + _("Cannot create manifests with variable-length digest algorithm(s): %s") + % ", ".join(sorted(variable_length)) + ) + bag_dir = os.path.abspath(bag_dir) cwd = os.path.abspath(os.path.curdir) @@ -1129,6 +1162,15 @@ def get_hashers(algorithms): ) continue + if not _has_fixed_length_digest(alg): + LOGGER.warning( + _( + "Disabling requested hash algorithm %s: it has a variable-length digest" + ), + alg, + ) + continue + hashers[alg] = hasher if not hashers: diff --git a/test.py b/test.py index a0e7f31..a668a49 100644 --- a/test.py +++ b/test.py @@ -96,6 +96,31 @@ def test_make_bag_md5_sha1_sha256_manifest(self): # check valid with three manifests self.assertTrue(self.validate(bag, fast=True)) + def test_make_bag_shake_raises_bagerror(self): + # SHAKE algorithms have a variable-length digest and cannot be used for + # manifests; make_bag should reject them cleanly instead of crashing + # with a TypeError later on. See issue #158. + for alg in ("shake_128", "shake_256"): + self.assertRaises( + bagit.BagError, bagit.make_bag, self.tmpdir, checksums=[alg] + ) + + def test_make_bag_shake_mixed_with_valid_raises_bagerror(self): + # A SHAKE algorithm mixed in with a usable one should still be rejected + # rather than crashing in the tagmanifest path. + self.assertRaises( + bagit.BagError, + bagit.make_bag, + self.tmpdir, + checksums=["sha256", "shake_128"], + ) + + def test_shake_not_offered_as_checksum_algorithm(self): + # Variable-length algorithms must not be advertised, so they are never + # auto-discovered as a manifest type or exposed as a CLI flag. + self.assertNotIn("shake_128", bagit.CHECKSUM_ALGOS) + self.assertNotIn("shake_256", bagit.CHECKSUM_ALGOS) + def test_validate_flipped_bit(self): bag = bagit.make_bag(self.tmpdir) readme = j(self.tmpdir, "data", "README")