feat: some more statistics
This commit is contained in:
@@ -61,6 +61,17 @@ def normal_cumulative_density_function(x, mean, difference_list):
|
|||||||
std_dev = standard_deviation(difference_list, False)
|
std_dev = standard_deviation(difference_list, False)
|
||||||
return norm.cdf(x, mean, std_dev)
|
return norm.cdf(x, mean, std_dev)
|
||||||
|
|
||||||
|
# Check exected value for a given probability
|
||||||
|
def inverse_cumulative_density_function(prob, mean, std_dev):
|
||||||
|
x = norm.ppf(prob, mean, std_dev)
|
||||||
|
return x
|
||||||
|
|
||||||
|
# Z-scores are valuable in order to normalize 2 pieces of data
|
||||||
|
def z_score(value, data_mean, std_deviation):
|
||||||
|
return (value - data_mean) / std_deviation
|
||||||
|
|
||||||
|
def coeficient_of_variation(std_deviation, mean):
|
||||||
|
return (std_deviation / mean)
|
||||||
|
|
||||||
def test_statistics_module():
|
def test_statistics_module():
|
||||||
print("=== Statistics module ===")
|
print("=== Statistics module ===")
|
||||||
@@ -82,3 +93,10 @@ def test_statistics_module():
|
|||||||
print("== Normal distribution ==")
|
print("== Normal distribution ==")
|
||||||
print(">> The probability_density_function for x = 1 over the example data is {0}".format(normal_probability_density_function(1, sum(differences) / len(differences), standard_deviation(differences, False))))
|
print(">> The probability_density_function for x = 1 over the example data is {0}".format(normal_probability_density_function(1, sum(differences) / len(differences), standard_deviation(differences, False))))
|
||||||
print(">> The probability for observing a value smaller than 1 is given by the cumulative density function and it is: {0}".format(normal_cumulative_density_function(1, sum(differences) / len(differences), differences)))
|
print(">> The probability for observing a value smaller than 1 is given by the cumulative density function and it is: {0}".format(normal_cumulative_density_function(1, sum(differences) / len(differences), differences)))
|
||||||
|
|
||||||
|
print("== Z-scores ==")
|
||||||
|
print("A house (A) of 150K in a neighborhood of 140K mean and 3K std_dev has a Z-score: {0}".format(z_score(150000, 140000, 3000)))
|
||||||
|
print("A house (B) of 815K in a neighborhood of 800K mean and 10K std_dev has a Z-score: {0}".format(z_score(815000, 800000, 10000)))
|
||||||
|
print("The House A is much more expensive because its z-score is higher.")
|
||||||
|
print("The neighborhood of B has a coeficient of variation: {0}, and the one of A: {1}".format(coeficient_of_variation(3000, 140000), coeficient_of_variation(10000, 800000)))
|
||||||
|
print("This means that the neighborhood of A has more spread in its prices")
|
||||||
|
|||||||
Reference in New Issue
Block a user