feat: some more statistics

This commit is contained in:
2025-12-05 23:12:26 +01:00
parent c37dba44a8
commit 170697c955

View File

@@ -61,6 +61,17 @@ def normal_cumulative_density_function(x, mean, difference_list):
std_dev = standard_deviation(difference_list, False) std_dev = standard_deviation(difference_list, False)
return norm.cdf(x, mean, std_dev) return norm.cdf(x, mean, std_dev)
# Check exected value for a given probability
def inverse_cumulative_density_function(prob, mean, std_dev):
x = norm.ppf(prob, mean, std_dev)
return x
# Z-scores are valuable in order to normalize 2 pieces of data
def z_score(value, data_mean, std_deviation):
return (value - data_mean) / std_deviation
def coeficient_of_variation(std_deviation, mean):
return (std_deviation / mean)
def test_statistics_module(): def test_statistics_module():
print("=== Statistics module ===") print("=== Statistics module ===")
@@ -82,3 +93,10 @@ def test_statistics_module():
print("== Normal distribution ==") print("== Normal distribution ==")
print(">> The probability_density_function for x = 1 over the example data is {0}".format(normal_probability_density_function(1, sum(differences) / len(differences), standard_deviation(differences, False)))) print(">> The probability_density_function for x = 1 over the example data is {0}".format(normal_probability_density_function(1, sum(differences) / len(differences), standard_deviation(differences, False))))
print(">> The probability for observing a value smaller than 1 is given by the cumulative density function and it is: {0}".format(normal_cumulative_density_function(1, sum(differences) / len(differences), differences))) print(">> The probability for observing a value smaller than 1 is given by the cumulative density function and it is: {0}".format(normal_cumulative_density_function(1, sum(differences) / len(differences), differences)))
print("== Z-scores ==")
print("A house (A) of 150K in a neighborhood of 140K mean and 3K std_dev has a Z-score: {0}".format(z_score(150000, 140000, 3000)))
print("A house (B) of 815K in a neighborhood of 800K mean and 10K std_dev has a Z-score: {0}".format(z_score(815000, 800000, 10000)))
print("The House A is much more expensive because its z-score is higher.")
print("The neighborhood of B has a coeficient of variation: {0}, and the one of A: {1}".format(coeficient_of_variation(3000, 140000), coeficient_of_variation(10000, 800000)))
print("This means that the neighborhood of A has more spread in its prices")