weijie-chen
diff --git a/‎Chapter 1 - Descriptive Statistics.ipynb
+39-30 b/‎Chapter 1 - Descriptive Statistics.ipynb
+39-30
@@ -10,7 +10,8 @@
     "import matplotlib.pyplot as plt\n",
     "import scipy as sp\n",
     "from scipy import stats\n",
-    "plt.style.use('fivethirtyeight')\n",
+    "\n",
+    "plt.style.use(\"fivethirtyeight\")\n",
     "import pandas as pd"
    ]
   },
@@ -106,8 +107,8 @@
     "rollings = np.random.randint(1, 7, 1000)\n",
     "\n",
     "fig, ax = plt.subplots(figsize=(9, 9))\n",
-    "n, bins, patches = ax.hist(rollings, bins = 6)\n",
-    "ax.set_title('Frequency Histogram of 1000 Times of Rolling a Dice', size = 19)\n",
+    "n, bins, patches = ax.hist(rollings, bins=6)\n",
+    "ax.set_title(\"Frequency Histogram of 1000 Times of Rolling a Dice\", size=19)\n",
     "ax.set_xlim(0, 7)\n",
     "ax.set_ylim(0, 400)\n",
     "plt.show()"
@@ -139,7 +140,7 @@
    "source": [
     "x = np.random.randn(1000)\n",
     "fig, ax = plt.subplots(figsize=(9, 9))\n",
-    "n, bins, patches = ax.hist(x, bins = 50, density=True)"
+    "n, bins, patches = ax.hist(x, bins=50, density=True)"
    ]
   },
   {
@@ -168,7 +169,7 @@
     }
    ],
    "source": [
-    "fig, ax = plt.subplots(nrows = 2, ncols = 1,figsize=(9, 9))\n",
+    "fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(9, 9))\n",
     "ax[0].plot(bins[:50], n)\n",
     "ax[1].plot(np.cumsum(n))\n",
     "plt.show()"
@@ -374,7 +375,7 @@
     }
    ],
    "source": [
-    "q75, q25 = np.percentile(x, [75 ,25]) # IQR\n",
+    "q75, q25 = np.percentile(x, [75, 25])  # IQR\n",
     "q75 - q25"
    ]
   },
@@ -441,8 +442,8 @@
     }
    ],
    "source": [
-    "sample_height = np.random.choice(population_height, size = 100)\n",
-    "np.var(sample_height, ddof = 1)"
+    "sample_height = np.random.choice(population_height, size=100)\n",
+    "np.var(sample_height, ddof=1)"
    ]
   },
   {
@@ -473,13 +474,18 @@
    "source": [
     "sample_height_array = []\n",
     "for i in range(10000):\n",
-    "    sample_height = np.random.choice(population_height, size = 100)\n",
+    "    sample_height = np.random.choice(population_height, size=100)\n",
     "    sample_height_array.append(np.var(sample_height, ddof=1))\n",
     "fig, ax = plt.subplots(figsize=(9, 9))\n",
-    "n, bins, patches = ax.hist(sample_height_array, bins = 50)\n",
-    "ax.axvline(x=np.mean(sample_height_array), color = 'tomato')\n",
-    "ax.text(np.mean(sample_height_array)+1, np.max(n), r'$\\mu_\\sigma = {:.2f}$'.format(np.mean(sample_height_array)), size = 16)\n",
-    "ax.set_title('Sampling Distribution of Variance Estimates', size = 19)\n",
+    "n, bins, patches = ax.hist(sample_height_array, bins=50)\n",
+    "ax.axvline(x=np.mean(sample_height_array), color=\"tomato\")\n",
+    "ax.text(\n",
+    "    np.mean(sample_height_array) + 1,\n",
+    "    np.max(n),\n",
+    "    r\"$\\mu_\\sigma = {:.2f}$\".format(np.mean(sample_height_array)),\n",
+    "    size=16,\n",
+    ")\n",
+    "ax.set_title(\"Sampling Distribution of Variance Estimates\", size=19)\n",
     "plt.show()"
    ]
   },
@@ -577,7 +583,7 @@
    ],
    "source": [
     "x = np.random.randn(10)\n",
-    "z = (x - np.mean(x))/np.std(x)\n",
+    "z = (x - np.mean(x)) / np.std(x)\n",
     "np.round(z, 2)"
    ]
   },
@@ -607,8 +613,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "z_l = (166 - 174)/4 # lower z-score\n",
-    "z_u = (182 - 174)/4 # upper z-score"
+    "z_l = (166 - 174) / 4  # lower z-score\n",
+    "z_u = (182 - 174) / 4  # upper z-score"
    ]
   },
   {
@@ -632,8 +638,8 @@
     }
    ],
    "source": [
-    "p = 1 - 1/z_l**2\n",
-    "print('At least {0}% of people are within 168cm and 182cm in Helsinki.'.format(p*100))"
+    "p = 1 - 1 / z_l**2\n",
+    "print(\"At least {0}% of people are within 168cm and 182cm in Helsinki.\".format(p * 100))"
    ]
   },
   {
@@ -663,19 +669,20 @@
    ],
    "source": [
     "def chebyshev(z):\n",
-    "    return 1 - 1/z**2\n",
+    "    return 1 - 1 / z**2\n",
+    "\n",
     "\n",
     "chebyshev_array = []\n",
     "for z in np.arange(1, 21, 0.5):\n",
     "    chebyshev_array.append(chebyshev(z))\n",
     "\n",
     "fig, ax = plt.subplots(figsize=(9, 9))\n",
     "ax.plot(np.arange(1, 21, 0.5), chebyshev_array)\n",
-    "ax.scatter(2.5, chebyshev(2.5), s = 100, color = 'red', zorder = 3)\n",
-    "ax.text(2.5+.5, chebyshev(2.5), r'(2.5, {}%)'.format(chebyshev(2.5)*100))\n",
+    "ax.scatter(2.5, chebyshev(2.5), s=100, color=\"red\", zorder=3)\n",
+    "ax.text(2.5 + 0.5, chebyshev(2.5), r\"(2.5, {}%)\".format(chebyshev(2.5) * 100))\n",
     "ax.set_title(\"Chebyshev's Theorem\")\n",
-    "ax.set_xlabel('z-score')\n",
-    "ax.set_ylabel('Probability')\n",
+    "ax.set_xlabel(\"z-score\")\n",
+    "ax.set_ylabel(\"Probability\")\n",
     "plt.show()"
    ]
   },
@@ -785,6 +792,7 @@
    ],
    "source": [
     "import plot_material\n",
+    "\n",
     "plot_material.reg_corr_plot()"
    ]
   },
@@ -827,11 +835,12 @@
    ],
    "source": [
     "X = np.linspace(-10, 10, 200)\n",
-    "Y = 1/(1+np.exp(-X))\n",
-    "df_dict = {'X': X, 'Y': Y}\n",
+    "Y = 1 / (1 + np.exp(-X))\n",
+    "df_dict = {\"X\": X, \"Y\": Y}\n",
     "df = pd.DataFrame(df_dict)\n",
     "\n",
-    "df.plot(x ='X', y ='Y', kind='scatter', figsize=(16, 7)); plt.show()"
+    "df.plot(x=\"X\", y=\"Y\", kind=\"scatter\", figsize=(16, 7))\n",
+    "plt.show()"
    ]
   },
   {
@@ -891,7 +900,7 @@
     }
    ],
    "source": [
-    "df.corr(method='pearson')"
+    "df.corr(method=\"pearson\")"
    ]
   },
   {
@@ -908,7 +917,7 @@
     }
    ],
    "source": [
-    "print('Pearson coeffcient: {}'.format(sp.stats.stats.pearsonr(df['X'], df['Y'])[0]))"
+    "print(\"Pearson coeffcient: {}\".format(sp.stats.stats.pearsonr(df[\"X\"], df[\"Y\"])[0]))"
    ]
   },
   {
@@ -925,7 +934,7 @@
     }
    ],
    "source": [
-    "print('Pearson coeffcient: {}'.format(sp.stats.stats.spearmanr(df['X'], df['Y'])[0]))"
+    "print(\"Pearson coeffcient: {}\".format(sp.stats.stats.spearmanr(df[\"X\"], df[\"Y\"])[0]))"
    ]
   },
   {
@@ -943,7 +952,7 @@
    ],
    "source": [
     "sp.stats.stats.kendalltau(X, Y)\n",
-    "print('Pearson coeffcient: {}'.format(sp.stats.stats.kendalltau(df['X'], df['Y'])[0]))"
+    "print(\"Pearson coeffcient: {}\".format(sp.stats.stats.kendalltau(df[\"X\"], df[\"Y\"])[0]))"
    ]
   },
   {