From 170697c955262218188e651ce8baefb4da7e0ab1 Mon Sep 17 00:00:00 2001 From: Daniel Heras Quesada Date: Fri, 5 Dec 2025 23:12:26 +0100 Subject: [PATCH] feat: some more statistics --- src/modules/statistics.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/modules/statistics.py b/src/modules/statistics.py index fc6c80a..2a65059 100644 --- a/src/modules/statistics.py +++ b/src/modules/statistics.py @@ -61,6 +61,17 @@ def normal_cumulative_density_function(x, mean, difference_list): std_dev = standard_deviation(difference_list, False) return norm.cdf(x, mean, std_dev) +# Check exected value for a given probability +def inverse_cumulative_density_function(prob, mean, std_dev): + x = norm.ppf(prob, mean, std_dev) + return x + +# Z-scores are valuable in order to normalize 2 pieces of data +def z_score(value, data_mean, std_deviation): + return (value - data_mean) / std_deviation + +def coeficient_of_variation(std_deviation, mean): + return (std_deviation / mean) def test_statistics_module(): print("=== Statistics module ===") @@ -82,3 +93,10 @@ def test_statistics_module(): print("== Normal distribution ==") print(">> The probability_density_function for x = 1 over the example data is {0}".format(normal_probability_density_function(1, sum(differences) / len(differences), standard_deviation(differences, False)))) print(">> The probability for observing a value smaller than 1 is given by the cumulative density function and it is: {0}".format(normal_cumulative_density_function(1, sum(differences) / len(differences), differences))) + + print("== Z-scores ==") + print("A house (A) of 150K in a neighborhood of 140K mean and 3K std_dev has a Z-score: {0}".format(z_score(150000, 140000, 3000))) + print("A house (B) of 815K in a neighborhood of 800K mean and 10K std_dev has a Z-score: {0}".format(z_score(815000, 800000, 10000))) + print("The House A is much more expensive because its z-score is higher.") + print("The neighborhood of B has a coeficient of variation: {0}, and the one of A: {1}".format(coeficient_of_variation(3000, 140000), coeficient_of_variation(10000, 800000))) + print("This means that the neighborhood of A has more spread in its prices")