Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 44 additions & 2 deletions src/bagit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,38 @@ def find_locale_dir():
# Payload-Oxum is autogenerated
]


def _has_fixed_length_digest(alg):
"""
Return True if the named algorithm produces a fixed-length digest.

The variable-length algorithms (the SHAKE family) require a ``length``
argument to ``hexdigest()`` that bagit never supplies, so they cannot be
used for manifests. We detect that by behavior rather than by name so that
any future variable-length algorithm hashlib adds is handled too.
"""

try:
hashlib.new(alg).hexdigest()
except TypeError:
# Variable-length digest, e.g. shake_128/shake_256
return False
except ValueError:
# Unsupported algorithm; leave it for the existing warning path
return True
return True


try:
CHECKSUM_ALGOS = hashlib.algorithms_guaranteed
CHECKSUM_ALGOS = {
alg for alg in hashlib.algorithms_guaranteed if _has_fixed_length_digest(alg)
}
except AttributeError:
# FIXME: remove when we drop Python 2 (https://github.com/LibraryOfCongress/bagit-python/issues/102)
# Python 2.7.0-2.7.8
CHECKSUM_ALGOS = set(hashlib.algorithms)
CHECKSUM_ALGOS = {
alg for alg in hashlib.algorithms if _has_fixed_length_digest(alg)
}
DEFAULT_CHECKSUMS = ["sha256", "sha512"]

#: Block size used when reading files for hashing:
Expand Down Expand Up @@ -160,6 +186,13 @@ def make_bag(
if checksums is None:
checksums = DEFAULT_CHECKSUMS

variable_length = [alg for alg in checksums if not _has_fixed_length_digest(alg)]
if variable_length:
raise BagError(
_("Cannot create manifests with variable-length digest algorithm(s): %s")
% ", ".join(sorted(variable_length))
)

bag_dir = os.path.abspath(bag_dir)
cwd = os.path.abspath(os.path.curdir)

Expand Down Expand Up @@ -1129,6 +1162,15 @@ def get_hashers(algorithms):
)
continue

if not _has_fixed_length_digest(alg):
LOGGER.warning(
_(
"Disabling requested hash algorithm %s: it has a variable-length digest"
),
alg,
)
continue

hashers[alg] = hasher

if not hashers:
Expand Down
25 changes: 25 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,31 @@ def test_make_bag_md5_sha1_sha256_manifest(self):
# check valid with three manifests
self.assertTrue(self.validate(bag, fast=True))

def test_make_bag_shake_raises_bagerror(self):
# SHAKE algorithms have a variable-length digest and cannot be used for
# manifests; make_bag should reject them cleanly instead of crashing
# with a TypeError later on. See issue #158.
for alg in ("shake_128", "shake_256"):
self.assertRaises(
bagit.BagError, bagit.make_bag, self.tmpdir, checksums=[alg]
)

def test_make_bag_shake_mixed_with_valid_raises_bagerror(self):
# A SHAKE algorithm mixed in with a usable one should still be rejected
# rather than crashing in the tagmanifest path.
self.assertRaises(
bagit.BagError,
bagit.make_bag,
self.tmpdir,
checksums=["sha256", "shake_128"],
)

def test_shake_not_offered_as_checksum_algorithm(self):
# Variable-length algorithms must not be advertised, so they are never
# auto-discovered as a manifest type or exposed as a CLI flag.
self.assertNotIn("shake_128", bagit.CHECKSUM_ALGOS)
self.assertNotIn("shake_256", bagit.CHECKSUM_ALGOS)

def test_validate_flipped_bit(self):
bag = bagit.make_bag(self.tmpdir)
readme = j(self.tmpdir, "data", "README")
Expand Down