From ba7e82fa6d9b10ba0c2232dd120364818c8cc06f Mon Sep 17 00:00:00 2001 From: AnandSundar Date: Thu, 25 Jun 2026 12:12:47 -0600 Subject: [PATCH 1/5] gh-152227: Add statistics.median_absolute_deviation Add the median absolute deviation (MAD) function to the statistics module. MAD is a robust measure of statistical dispersion: the median of the absolute deviations from the median, optionally scaled by a consistency constant. The default scale=1.4826 (the consistency constant for the normal distribution) produces an estimator of the population standard deviation that is consistent with statistics.stdev. Pass scale=1.0 for the raw value. * data: a sequence or iterable of real-valued numbers * scale: int or float (Decimal/Fraction raise TypeError) Result type follows the data type (int/float input yields float; Decimal input yields Decimal; Fraction input yields Fraction). NaN propagates when at least one non-NaN value is present; all-NaN input raises StatisticsError (matching statistics.median()). Includes module docstring updates and __all__ entry. --- Lib/statistics.py | 136 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 113 insertions(+), 23 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 01ca6c51dafcafe..4697940c0234768 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -7,21 +7,22 @@ Calculating averages -------------------- -================== ================================================== -Function Description -================== ================================================== -mean Arithmetic mean (average) of data. -fmean Fast, floating-point arithmetic mean. -geometric_mean Geometric mean of data. -harmonic_mean Harmonic mean of data. -median Median (middle value) of data. -median_low Low median of data. -median_high High median of data. -median_grouped Median, or 50th percentile, of grouped data. -mode Mode (most common value) of data. -multimode List of modes (most common values of data). -quantiles Divide data into intervals with equal probability. -================== ================================================== +============================ ================================================== +Function Description +============================ ================================================== +mean Arithmetic mean (average) of data. +fmean Fast, floating-point arithmetic mean. +geometric_mean Geometric mean of data. +harmonic_mean Harmonic mean of data. +median Median (middle value) of data. +median_low Low median of data. +median_high High median of data. +median_grouped Median, or 50th percentile, of grouped data. +mode Mode (most common value) of data. +multimode List of modes (most common values of data). +quantiles Divide data into intervals with equal probability. +median_absolute_deviation Median absolute deviation of data. +============================ ================================================== Calculate the arithmetic mean ("the average") of data: @@ -50,14 +51,15 @@ Calculating variability or spread --------------------------------- -================== ============================================= -Function Description -================== ============================================= -pvariance Population variance of data. -variance Sample variance of data. -pstdev Population standard deviation of data. -stdev Sample standard deviation of data. -================== ============================================= +============================ ============================================= +Function Description +============================ ============================================= +pvariance Population variance of data. +variance Sample variance of data. +pstdev Population standard deviation of data. +stdev Sample standard deviation of data. +median_absolute_deviation Median absolute deviation of data. +============================ ============================================= Calculate the standard deviation of sample data: @@ -117,6 +119,7 @@ 'linear_regression', 'mean', 'median', + 'median_absolute_deviation', 'median_grouped', 'median_high', 'median_low', @@ -653,6 +656,93 @@ def pstdev(data, mu=None): return _float_sqrt_of_frac(mss_numerator, mss_denominator) +def median_absolute_deviation(data, *, scale=1.4826): + """Median absolute deviation of data. + + The median absolute deviation (MAD) is a robust measure of the + variability of a univariate sample of quantitative data. It is the + median of the absolute deviations from the median: + + MAD = median(|x_i - median(x)|) + + For normally distributed data, multiplying MAD by the consistency + constant 1.4826 (the default *scale* parameter) produces an estimator + of the population standard deviation that is consistent with the + sample standard deviation. To get the raw MAD instead, pass + ``scale=1``. + + *data* can be a sequence or iterable. If *data* is empty, + :exc:`StatisticsError` will be raised. *scale* must be an ``int`` + or ``float``; passing a :class:`Decimal` or :class:`Fraction` raises + :exc:`TypeError`. The result type follows *data*, not *scale*. + + Some examples of use: + + >>> median_absolute_deviation([1, 1, 2, 2, 4, 6, 9]) + 1.4826 + >>> median_absolute_deviation([1, 1, 2, 2, 4, 6, 9], scale=1.0) + 1.0 + + Decimals and Fractions are supported: + + >>> from decimal import Decimal as D + >>> median_absolute_deviation([D("1"), D("1"), D("2"), D("2"), D("4"), D("6"), D("9")]) + Decimal('1.4826') + + >>> from fractions import Fraction as F + >>> median_absolute_deviation([F(1), F(1), F(2), F(2), F(4), F(6), F(9)]) + Fraction(7413, 5000) + + """ + if not isinstance(scale, (int, float)): + raise TypeError( + 'scale must be an int or float, not ' + type(scale).__name__ + ) + + if iter(data) is data: + data = list(data) + + n = len(data) + if n == 0: + raise StatisticsError( + 'median_absolute_deviation requires at least one data point' + ) + + # All-NaN input raises StatisticsError; partial NaN propagates as NaN. + # statistics.median() leaves NaN where it sorts, which would give an + # implementation-defined center; we detect NaN explicitly so the + # behavior is well-defined regardless of where NaN ends up. + has_nan = False + all_nan = True + for x in data: + if isinstance(x, float) and math.isnan(x): + has_nan = True + else: + all_nan = False + + if all_nan: + raise StatisticsError( + 'median_absolute_deviation requires at least one data point' + ) + + if has_nan: + return float('nan') + + center = median(data) + deviations = [abs(x - center) for x in data] + mad = median(deviations) + + # Result type follows the input data, not the scale parameter. + # Decimal and Fraction inputs require explicit conversion so the + # returned value preserves precision; int and float inputs produce + # a float result via natural arithmetic (because the default scale + # is float). + T = type(data[0]) + if T is Decimal or T is Fraction: + return T(Decimal(str(scale))) * mad + return scale * mad + + ## Statistics for relations between two inputs ############################# def covariance(x, y, /): From de7723c9b26d3a2864ab33849eb5265f6d437f42 Mon Sep 17 00:00:00 2001 From: AnandSundar Date: Thu, 25 Jun 2026 12:13:02 -0600 Subject: [PATCH 2/5] gh-152227: Test statistics.median_absolute_deviation Add TestMedianAbsoluteDeviation class in Lib/test/test_statistics.py, following the TestMedian / TestStdev pattern. Coverage: - Happy path: known answers for ints and floats, default / scale=1.0 / scale=2.0 / scale=3.0 / negative scale - Edge cases: empty (StatisticsError), single value, two-value symmetric, all-same, even-count averaging, generator input, tuple input - Error paths: non-numeric data (TypeError), all-NaN (StatisticsError), partial NaN (propagates) - Type acceptance: Decimal input -> Decimal result; Fraction input -> Fraction result (preserving precision); int input -> float result; mixed int+float -> float result - Scale type guard: Decimal / Fraction / str / list / None scale all raise TypeError Reuses NumericTestCase.assertApproxEqual for floating-point comparisons. --- Lib/test/test_statistics.py | 163 ++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 700c5ac304f7179..576c436307dbefd 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2233,6 +2233,169 @@ def test_center_not_at_mean(self): data = (1.0, 2.0) self.assertEqual(self.func(data, xbar=2.0), 1.0) + +class TestMedianAbsoluteDeviation(NumericTestCase): + """Tests for statistics.median_absolute_deviation.""" + def setUp(self): + self.func = statistics.median_absolute_deviation + + # --- Happy path: known answers --- + + def test_ints_known_answer(self): + # Canonical example: median=2, deviations=[1,1,0,0,2,4,7], MAD=1. + self.assertEqual(self.func([1, 1, 2, 2, 4, 6, 9]), 1.4826) + + def test_floats_known_answer(self): + # Same shape with floats: median=2.0, MAD=1.0. + self.assertEqual( + self.func([1.0, 1.0, 2.0, 2.0, 4.0, 6.0, 9.0]), 1.4826 + ) + + def test_scale_default(self): + # Default scale is 1.4826 (the consistency constant). + self.assertEqual(self.func([1, 1, 2, 2, 4, 6, 9]), 1.4826) + + def test_scale_one(self): + # scale=1.0 returns the raw MAD. + self.assertEqual(self.func([1, 1, 2, 2, 4, 6, 9], scale=1.0), 1.0) + + def test_scale_custom(self): + # Any int or float scale is accepted. + self.assertEqual(self.func([1, 1, 2, 2, 4, 6, 9], scale=2.0), 2.0) + self.assertEqual(self.func([1, 1, 2, 2, 4, 6, 9], scale=3.0), 3.0) + + def test_scale_negative(self): + # Negative scale is allowed (returns a negative value). + self.assertEqual( + self.func([1, 1, 2, 2, 4, 6, 9], scale=-1.4826), -1.4826 + ) + + # --- Edge cases --- + + def test_empty_raises(self): + # Empty input raises StatisticsError. + self.assertRaises(statistics.StatisticsError, self.func, []) + + def test_single_value(self): + # A single data point has zero absolute deviation from itself. + self.assertEqual(self.func([5]), 0.0) + self.assertEqual(self.func([5], scale=2.5), 0.0) + + def test_two_values_symmetric(self): + # Symmetric two-value input gives MAD == half the spread. + self.assertEqual(self.func([1, 3], scale=1.0), 1.0) + + def test_all_same(self): + # All-same input has zero absolute deviation. + self.assertEqual(self.func([7, 7, 7, 7, 7]), 0.0) + self.assertEqual(self.func([3.14, 3.14, 3.14]), 0.0) + + def test_even_count_averages(self): + # Even-length input: median of deviations is the average of the + # two middle sorted deviations. + # [1,2,3,4]: median=2.5, deviations=[1.5,0.5,0.5,1.5], + # sorted devs=[0.5,0.5,1.5,1.5], MAD=(0.5+1.5)/2=1.0. + self.assertEqual(self.func([1, 2, 3, 4]), 1.4826) + self.assertEqual(self.func([1, 2, 3, 4], scale=1.0), 1.0) + + def test_iterator_input(self): + # Generators are accepted and consumed exactly once. + result = self.func(x * 2 for x in [1, 2, 3, 4]) + self.assertEqual(result, 2.9652) + + def test_tuple_input(self): + # Tuples (and any iterable) are accepted. + self.assertEqual(self.func((1, 1, 2, 2, 4, 6, 9)), 1.4826) + + # --- Error and failure paths --- + + def test_non_numeric_raises_type_error(self): + # Non-numeric data raises TypeError from arithmetic. + self.assertRaises(TypeError, self.func, ['a', 'b', 'c']) + + def test_all_nan_raises(self): + # All-NaN input raises StatisticsError (consistent with median()). + self.assertRaises( + statistics.StatisticsError, + self.func, + [float('nan')] * 3, + ) + + def test_partial_nan_returns_nan(self): + # Partial NaN propagates as NaN when at least one real value is + # present. + result = self.func([1.0, 2.0, float('nan'), 4.0, 5.0]) + self.assertTrue(math.isnan(result)) + + # --- Type acceptance --- + + def test_decimal_input(self): + # Decimal input returns Decimal. + D = Decimal + data = [D('1'), D('1'), D('2'), D('2'), D('4'), D('6'), D('9')] + result = self.func(data) + self.assertIsInstance(result, Decimal) + self.assertEqual(result, D('1.4826')) + + def test_fraction_input(self): + # Fraction input returns Fraction. + F = Fraction + data = [F(1), F(1), F(2), F(2), F(4), F(6), F(9)] + result = self.func(data) + self.assertIsInstance(result, Fraction) + # 1.4826 = 7413/5000 (exact decimal-to-fraction conversion). + self.assertEqual(result, F(7413, 5000)) + + def test_int_returns_float(self): + # Int input with default (float) scale yields a float result. + result = self.func([1, 1, 2, 2, 4, 6, 9]) + self.assertIsInstance(result, float) + + def test_decimal_input_preserves_precision(self): + # Decimal precision is preserved across the scale conversion. + D = Decimal + # mad is 1 (Decimal); default scale converts to Decimal('1.4826'). + result = self.func([D('1'), D('2'), D('3')], scale=1) + self.assertEqual(result, D('1')) + + def test_fraction_input_preserves_precision(self): + # Fraction precision is preserved. + F = Fraction + result = self.func([F(1), F(2), F(3)], scale=1) + self.assertEqual(result, F(1)) + + def test_mixed_int_float_returns_float(self): + # Mixed int+float input yields a float result (natural promotion). + self.assertIsInstance(self.func([1, 2.5, 3]), float) + + # --- Scale type guard --- + + def test_scale_decimal_raises(self): + self.assertRaises( + TypeError, + self.func, + [1, 2, 3], + scale=Decimal('1.4826'), + ) + + def test_scale_fraction_raises(self): + self.assertRaises( + TypeError, + self.func, + [1, 2, 3], + scale=Fraction(14826, 10000), + ) + + def test_scale_string_raises(self): + self.assertRaises(TypeError, self.func, [1, 2, 3], scale='1.0') + + def test_scale_list_raises(self): + self.assertRaises(TypeError, self.func, [1, 2, 3], scale=[1.0]) + + def test_scale_none_raises(self): + self.assertRaises(TypeError, self.func, [1, 2, 3], scale=None) + + class TestGeometricMean(unittest.TestCase): def test_basics(self): From 44df7b9cc02c493f819b752be9f2954757c7384e Mon Sep 17 00:00:00 2001 From: AnandSundar Date: Thu, 25 Jun 2026 12:13:12 -0600 Subject: [PATCH 3/5] gh-152227: Document statistics.median_absolute_deviation Document the new median_absolute_deviation function in Doc/library/statistics.rst: - Add :func:`median_absolute_deviation` to the 'Measures of spread' autosummary table - Add a new .. function:: directive with full description, doctest examples (int, Decimal, Fraction), and a .. versionadded:: 3.16 annotation --- Doc/library/statistics.rst | 47 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst index dba0e26787d9516..f2472ad2eeb1a84 100644 --- a/Doc/library/statistics.rst +++ b/Doc/library/statistics.rst @@ -95,6 +95,7 @@ tends to deviate from the typical or average values. :func:`pvariance` Population variance of data. :func:`stdev` Sample standard deviation of data. :func:`variance` Sample variance of data. +:func:`median_absolute_deviation` Median absolute deviation of data. ======================= ============================================= Statistics for relations between two inputs @@ -654,6 +655,52 @@ However, for reading convenience, most of the examples show sorted sequences. :func:`pvariance` function as the *mu* parameter to get the variance of a sample. + +.. function:: median_absolute_deviation(data, *, scale=1.4826) + + Return the median absolute deviation of *data*, a non-empty sequence or + iterable of real-valued numbers. The median absolute deviation is a + measure of statistical dispersion: it is the median of the absolute + deviations from the median of *data*: + + .. doctest:: + + >>> median_absolute_deviation([1, 1, 2, 2, 4, 6, 9]) + 1.4826 + + Unlike the standard deviation, the median absolute deviation is not + sensitive to outliers; a single extreme value does not move it. This + makes it a robust measure of spread, particularly useful when *data* + contains outliers or comes from a heavy-tailed distribution. + + The *scale* argument scales the result by a constant factor. The + default ``scale=1.4826`` is the consistency constant for the normal + distribution: for normally distributed data, the result is a consistent + estimator of the population standard deviation. Pass ``scale=1.0`` to + retrieve the raw median absolute deviation, or any other ``int`` or + ``float`` to scale the result to a custom unit. Passing a + :class:`decimal.Decimal` or :class:`fractions.Fraction` *scale* raises + :exc:`TypeError`. + + If *data* is empty, :exc:`StatisticsError` is raised. If every value in + *data* is ``NaN``, :exc:`StatisticsError` is raised; otherwise ``NaN`` + values propagate. + + Decimals and Fractions are supported: + + .. doctest:: + + >>> from decimal import Decimal as D + >>> median_absolute_deviation([D("1"), D("1"), D("2"), D("2"), D("4"), D("6"), D("9")]) + Decimal('1.4826') + + >>> from fractions import Fraction as F + >>> median_absolute_deviation([F(1), F(1), F(2), F(2), F(4), F(6), F(9)]) + Fraction(7413, 5000) + + .. versionadded:: 3.16 + + .. function:: quantiles(data, *, n=4, method='exclusive') Divide *data* into *n* continuous intervals with equal probability. From 9ea5ed7225eb75e95a2cbff80a7b21f68225d1bc Mon Sep 17 00:00:00 2001 From: AnandSundar Date: Thu, 25 Jun 2026 12:13:26 -0600 Subject: [PATCH 4/5] gh-152227: Note statistics.median_absolute_deviation in What's New Add a new 'statistics' section under 'Improved modules' in Doc/whatsnew/3.16.rst, between shlex and tkinter (alphabetical order). Bullet describes the new statistics.median_absolute_deviation function, its default scale=1.4826 consistency constant, and the alternative scale=1 for the raw value. TODO: replace [REPLACE WITH CONTRIBUTOR NAME] and [REPLACE WITH PR NUMBER] placeholders with the actual contributor attribution when the PR is filed. --- Doc/whatsnew/3.16.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Doc/whatsnew/3.16.rst b/Doc/whatsnew/3.16.rst index 32962a9520fa691..1e048c96e750646 100644 --- a/Doc/whatsnew/3.16.rst +++ b/Doc/whatsnew/3.16.rst @@ -201,6 +201,18 @@ shlex (Contributed by Jay Berry in :gh:`148846`.) +statistics +---------- + +* Added :func:`statistics.median_absolute_deviation` for robust measurement + of statistical dispersion: the median of the absolute deviations from the + median. Pass ``scale=1`` for the raw value or accept the default + ``scale=1.4826`` (the consistency constant for the normal distribution) for + an estimator of the population standard deviation that is consistent with + :func:`statistics.stdev`. + (Contributed by [REPLACE WITH CONTRIBUTOR NAME] in :gh:`[REPLACE WITH PR NUMBER]`.) + + tkinter ------- From e2d82334326acacdb23359492fa12b6fa69145bc Mon Sep 17 00:00:00 2001 From: AnandSundar Date: Thu, 25 Jun 2026 12:21:51 -0600 Subject: [PATCH 5/5] gh-152227: Fill in contributor name and PR number in What's New Replace [REPLACE WITH CONTRIBUTOR NAME] and [REPLACE WITH PR NUMBER] placeholders with Anand Sundar and gh-152227 now that the PR is open. --- Doc/whatsnew/3.16.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.16.rst b/Doc/whatsnew/3.16.rst index 1e048c96e750646..d72442a5dedebfd 100644 --- a/Doc/whatsnew/3.16.rst +++ b/Doc/whatsnew/3.16.rst @@ -210,7 +210,7 @@ statistics ``scale=1.4826`` (the consistency constant for the normal distribution) for an estimator of the population standard deviation that is consistent with :func:`statistics.stdev`. - (Contributed by [REPLACE WITH CONTRIBUTOR NAME] in :gh:`[REPLACE WITH PR NUMBER]`.) + (Contributed by Anand Sundar in :gh:`152227`.) tkinter