Skip to content

Commit e87aa7b

Browse files
committed
black format
1 parent be89d29 commit e87aa7b

7 files changed

+1314
-801
lines changed

Chapter 1 - Descriptive Statistics.ipynb

+39-30
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
"import matplotlib.pyplot as plt\n",
1111
"import scipy as sp\n",
1212
"from scipy import stats\n",
13-
"plt.style.use('fivethirtyeight')\n",
13+
"\n",
14+
"plt.style.use(\"fivethirtyeight\")\n",
1415
"import pandas as pd"
1516
]
1617
},
@@ -106,8 +107,8 @@
106107
"rollings = np.random.randint(1, 7, 1000)\n",
107108
"\n",
108109
"fig, ax = plt.subplots(figsize=(9, 9))\n",
109-
"n, bins, patches = ax.hist(rollings, bins = 6)\n",
110-
"ax.set_title('Frequency Histogram of 1000 Times of Rolling a Dice', size = 19)\n",
110+
"n, bins, patches = ax.hist(rollings, bins=6)\n",
111+
"ax.set_title(\"Frequency Histogram of 1000 Times of Rolling a Dice\", size=19)\n",
111112
"ax.set_xlim(0, 7)\n",
112113
"ax.set_ylim(0, 400)\n",
113114
"plt.show()"
@@ -139,7 +140,7 @@
139140
"source": [
140141
"x = np.random.randn(1000)\n",
141142
"fig, ax = plt.subplots(figsize=(9, 9))\n",
142-
"n, bins, patches = ax.hist(x, bins = 50, density=True)"
143+
"n, bins, patches = ax.hist(x, bins=50, density=True)"
143144
]
144145
},
145146
{
@@ -168,7 +169,7 @@
168169
}
169170
],
170171
"source": [
171-
"fig, ax = plt.subplots(nrows = 2, ncols = 1,figsize=(9, 9))\n",
172+
"fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(9, 9))\n",
172173
"ax[0].plot(bins[:50], n)\n",
173174
"ax[1].plot(np.cumsum(n))\n",
174175
"plt.show()"
@@ -374,7 +375,7 @@
374375
}
375376
],
376377
"source": [
377-
"q75, q25 = np.percentile(x, [75 ,25]) # IQR\n",
378+
"q75, q25 = np.percentile(x, [75, 25]) # IQR\n",
378379
"q75 - q25"
379380
]
380381
},
@@ -441,8 +442,8 @@
441442
}
442443
],
443444
"source": [
444-
"sample_height = np.random.choice(population_height, size = 100)\n",
445-
"np.var(sample_height, ddof = 1)"
445+
"sample_height = np.random.choice(population_height, size=100)\n",
446+
"np.var(sample_height, ddof=1)"
446447
]
447448
},
448449
{
@@ -473,13 +474,18 @@
473474
"source": [
474475
"sample_height_array = []\n",
475476
"for i in range(10000):\n",
476-
" sample_height = np.random.choice(population_height, size = 100)\n",
477+
" sample_height = np.random.choice(population_height, size=100)\n",
477478
" sample_height_array.append(np.var(sample_height, ddof=1))\n",
478479
"fig, ax = plt.subplots(figsize=(9, 9))\n",
479-
"n, bins, patches = ax.hist(sample_height_array, bins = 50)\n",
480-
"ax.axvline(x=np.mean(sample_height_array), color = 'tomato')\n",
481-
"ax.text(np.mean(sample_height_array)+1, np.max(n), r'$\\mu_\\sigma = {:.2f}$'.format(np.mean(sample_height_array)), size = 16)\n",
482-
"ax.set_title('Sampling Distribution of Variance Estimates', size = 19)\n",
480+
"n, bins, patches = ax.hist(sample_height_array, bins=50)\n",
481+
"ax.axvline(x=np.mean(sample_height_array), color=\"tomato\")\n",
482+
"ax.text(\n",
483+
" np.mean(sample_height_array) + 1,\n",
484+
" np.max(n),\n",
485+
" r\"$\\mu_\\sigma = {:.2f}$\".format(np.mean(sample_height_array)),\n",
486+
" size=16,\n",
487+
")\n",
488+
"ax.set_title(\"Sampling Distribution of Variance Estimates\", size=19)\n",
483489
"plt.show()"
484490
]
485491
},
@@ -577,7 +583,7 @@
577583
],
578584
"source": [
579585
"x = np.random.randn(10)\n",
580-
"z = (x - np.mean(x))/np.std(x)\n",
586+
"z = (x - np.mean(x)) / np.std(x)\n",
581587
"np.round(z, 2)"
582588
]
583589
},
@@ -607,8 +613,8 @@
607613
"metadata": {},
608614
"outputs": [],
609615
"source": [
610-
"z_l = (166 - 174)/4 # lower z-score\n",
611-
"z_u = (182 - 174)/4 # upper z-score"
616+
"z_l = (166 - 174) / 4 # lower z-score\n",
617+
"z_u = (182 - 174) / 4 # upper z-score"
612618
]
613619
},
614620
{
@@ -632,8 +638,8 @@
632638
}
633639
],
634640
"source": [
635-
"p = 1 - 1/z_l**2\n",
636-
"print('At least {0}% of people are within 168cm and 182cm in Helsinki.'.format(p*100))"
641+
"p = 1 - 1 / z_l**2\n",
642+
"print(\"At least {0}% of people are within 168cm and 182cm in Helsinki.\".format(p * 100))"
637643
]
638644
},
639645
{
@@ -663,19 +669,20 @@
663669
],
664670
"source": [
665671
"def chebyshev(z):\n",
666-
" return 1 - 1/z**2\n",
672+
" return 1 - 1 / z**2\n",
673+
"\n",
667674
"\n",
668675
"chebyshev_array = []\n",
669676
"for z in np.arange(1, 21, 0.5):\n",
670677
" chebyshev_array.append(chebyshev(z))\n",
671678
"\n",
672679
"fig, ax = plt.subplots(figsize=(9, 9))\n",
673680
"ax.plot(np.arange(1, 21, 0.5), chebyshev_array)\n",
674-
"ax.scatter(2.5, chebyshev(2.5), s = 100, color = 'red', zorder = 3)\n",
675-
"ax.text(2.5+.5, chebyshev(2.5), r'(2.5, {}%)'.format(chebyshev(2.5)*100))\n",
681+
"ax.scatter(2.5, chebyshev(2.5), s=100, color=\"red\", zorder=3)\n",
682+
"ax.text(2.5 + 0.5, chebyshev(2.5), r\"(2.5, {}%)\".format(chebyshev(2.5) * 100))\n",
676683
"ax.set_title(\"Chebyshev's Theorem\")\n",
677-
"ax.set_xlabel('z-score')\n",
678-
"ax.set_ylabel('Probability')\n",
684+
"ax.set_xlabel(\"z-score\")\n",
685+
"ax.set_ylabel(\"Probability\")\n",
679686
"plt.show()"
680687
]
681688
},
@@ -785,6 +792,7 @@
785792
],
786793
"source": [
787794
"import plot_material\n",
795+
"\n",
788796
"plot_material.reg_corr_plot()"
789797
]
790798
},
@@ -827,11 +835,12 @@
827835
],
828836
"source": [
829837
"X = np.linspace(-10, 10, 200)\n",
830-
"Y = 1/(1+np.exp(-X))\n",
831-
"df_dict = {'X': X, 'Y': Y}\n",
838+
"Y = 1 / (1 + np.exp(-X))\n",
839+
"df_dict = {\"X\": X, \"Y\": Y}\n",
832840
"df = pd.DataFrame(df_dict)\n",
833841
"\n",
834-
"df.plot(x ='X', y ='Y', kind='scatter', figsize=(16, 7)); plt.show()"
842+
"df.plot(x=\"X\", y=\"Y\", kind=\"scatter\", figsize=(16, 7))\n",
843+
"plt.show()"
835844
]
836845
},
837846
{
@@ -891,7 +900,7 @@
891900
}
892901
],
893902
"source": [
894-
"df.corr(method='pearson')"
903+
"df.corr(method=\"pearson\")"
895904
]
896905
},
897906
{
@@ -908,7 +917,7 @@
908917
}
909918
],
910919
"source": [
911-
"print('Pearson coeffcient: {}'.format(sp.stats.stats.pearsonr(df['X'], df['Y'])[0]))"
920+
"print(\"Pearson coeffcient: {}\".format(sp.stats.stats.pearsonr(df[\"X\"], df[\"Y\"])[0]))"
912921
]
913922
},
914923
{
@@ -925,7 +934,7 @@
925934
}
926935
],
927936
"source": [
928-
"print('Pearson coeffcient: {}'.format(sp.stats.stats.spearmanr(df['X'], df['Y'])[0]))"
937+
"print(\"Pearson coeffcient: {}\".format(sp.stats.stats.spearmanr(df[\"X\"], df[\"Y\"])[0]))"
929938
]
930939
},
931940
{
@@ -943,7 +952,7 @@
943952
],
944953
"source": [
945954
"sp.stats.stats.kendalltau(X, Y)\n",
946-
"print('Pearson coeffcient: {}'.format(sp.stats.stats.kendalltau(df['X'], df['Y'])[0]))"
955+
"print(\"Pearson coeffcient: {}\".format(sp.stats.stats.kendalltau(df[\"X\"], df[\"Y\"])[0]))"
947956
]
948957
},
949958
{

0 commit comments

Comments
 (0)