|
10 | 10 | "import matplotlib.pyplot as plt\n",
|
11 | 11 | "import scipy as sp\n",
|
12 | 12 | "from scipy import stats\n",
|
13 |
| - "plt.style.use('fivethirtyeight')\n", |
| 13 | + "\n", |
| 14 | + "plt.style.use(\"fivethirtyeight\")\n", |
14 | 15 | "import pandas as pd"
|
15 | 16 | ]
|
16 | 17 | },
|
|
106 | 107 | "rollings = np.random.randint(1, 7, 1000)\n",
|
107 | 108 | "\n",
|
108 | 109 | "fig, ax = plt.subplots(figsize=(9, 9))\n",
|
109 |
| - "n, bins, patches = ax.hist(rollings, bins = 6)\n", |
110 |
| - "ax.set_title('Frequency Histogram of 1000 Times of Rolling a Dice', size = 19)\n", |
| 110 | + "n, bins, patches = ax.hist(rollings, bins=6)\n", |
| 111 | + "ax.set_title(\"Frequency Histogram of 1000 Times of Rolling a Dice\", size=19)\n", |
111 | 112 | "ax.set_xlim(0, 7)\n",
|
112 | 113 | "ax.set_ylim(0, 400)\n",
|
113 | 114 | "plt.show()"
|
|
139 | 140 | "source": [
|
140 | 141 | "x = np.random.randn(1000)\n",
|
141 | 142 | "fig, ax = plt.subplots(figsize=(9, 9))\n",
|
142 |
| - "n, bins, patches = ax.hist(x, bins = 50, density=True)" |
| 143 | + "n, bins, patches = ax.hist(x, bins=50, density=True)" |
143 | 144 | ]
|
144 | 145 | },
|
145 | 146 | {
|
|
168 | 169 | }
|
169 | 170 | ],
|
170 | 171 | "source": [
|
171 |
| - "fig, ax = plt.subplots(nrows = 2, ncols = 1,figsize=(9, 9))\n", |
| 172 | + "fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(9, 9))\n", |
172 | 173 | "ax[0].plot(bins[:50], n)\n",
|
173 | 174 | "ax[1].plot(np.cumsum(n))\n",
|
174 | 175 | "plt.show()"
|
|
374 | 375 | }
|
375 | 376 | ],
|
376 | 377 | "source": [
|
377 |
| - "q75, q25 = np.percentile(x, [75 ,25]) # IQR\n", |
| 378 | + "q75, q25 = np.percentile(x, [75, 25]) # IQR\n", |
378 | 379 | "q75 - q25"
|
379 | 380 | ]
|
380 | 381 | },
|
|
441 | 442 | }
|
442 | 443 | ],
|
443 | 444 | "source": [
|
444 |
| - "sample_height = np.random.choice(population_height, size = 100)\n", |
445 |
| - "np.var(sample_height, ddof = 1)" |
| 445 | + "sample_height = np.random.choice(population_height, size=100)\n", |
| 446 | + "np.var(sample_height, ddof=1)" |
446 | 447 | ]
|
447 | 448 | },
|
448 | 449 | {
|
|
473 | 474 | "source": [
|
474 | 475 | "sample_height_array = []\n",
|
475 | 476 | "for i in range(10000):\n",
|
476 |
| - " sample_height = np.random.choice(population_height, size = 100)\n", |
| 477 | + " sample_height = np.random.choice(population_height, size=100)\n", |
477 | 478 | " sample_height_array.append(np.var(sample_height, ddof=1))\n",
|
478 | 479 | "fig, ax = plt.subplots(figsize=(9, 9))\n",
|
479 |
| - "n, bins, patches = ax.hist(sample_height_array, bins = 50)\n", |
480 |
| - "ax.axvline(x=np.mean(sample_height_array), color = 'tomato')\n", |
481 |
| - "ax.text(np.mean(sample_height_array)+1, np.max(n), r'$\\mu_\\sigma = {:.2f}$'.format(np.mean(sample_height_array)), size = 16)\n", |
482 |
| - "ax.set_title('Sampling Distribution of Variance Estimates', size = 19)\n", |
| 480 | + "n, bins, patches = ax.hist(sample_height_array, bins=50)\n", |
| 481 | + "ax.axvline(x=np.mean(sample_height_array), color=\"tomato\")\n", |
| 482 | + "ax.text(\n", |
| 483 | + " np.mean(sample_height_array) + 1,\n", |
| 484 | + " np.max(n),\n", |
| 485 | + " r\"$\\mu_\\sigma = {:.2f}$\".format(np.mean(sample_height_array)),\n", |
| 486 | + " size=16,\n", |
| 487 | + ")\n", |
| 488 | + "ax.set_title(\"Sampling Distribution of Variance Estimates\", size=19)\n", |
483 | 489 | "plt.show()"
|
484 | 490 | ]
|
485 | 491 | },
|
|
577 | 583 | ],
|
578 | 584 | "source": [
|
579 | 585 | "x = np.random.randn(10)\n",
|
580 |
| - "z = (x - np.mean(x))/np.std(x)\n", |
| 586 | + "z = (x - np.mean(x)) / np.std(x)\n", |
581 | 587 | "np.round(z, 2)"
|
582 | 588 | ]
|
583 | 589 | },
|
|
607 | 613 | "metadata": {},
|
608 | 614 | "outputs": [],
|
609 | 615 | "source": [
|
610 |
| - "z_l = (166 - 174)/4 # lower z-score\n", |
611 |
| - "z_u = (182 - 174)/4 # upper z-score" |
| 616 | + "z_l = (166 - 174) / 4 # lower z-score\n", |
| 617 | + "z_u = (182 - 174) / 4 # upper z-score" |
612 | 618 | ]
|
613 | 619 | },
|
614 | 620 | {
|
|
632 | 638 | }
|
633 | 639 | ],
|
634 | 640 | "source": [
|
635 |
| - "p = 1 - 1/z_l**2\n", |
636 |
| - "print('At least {0}% of people are within 168cm and 182cm in Helsinki.'.format(p*100))" |
| 641 | + "p = 1 - 1 / z_l**2\n", |
| 642 | + "print(\"At least {0}% of people are within 168cm and 182cm in Helsinki.\".format(p * 100))" |
637 | 643 | ]
|
638 | 644 | },
|
639 | 645 | {
|
|
663 | 669 | ],
|
664 | 670 | "source": [
|
665 | 671 | "def chebyshev(z):\n",
|
666 |
| - " return 1 - 1/z**2\n", |
| 672 | + " return 1 - 1 / z**2\n", |
| 673 | + "\n", |
667 | 674 | "\n",
|
668 | 675 | "chebyshev_array = []\n",
|
669 | 676 | "for z in np.arange(1, 21, 0.5):\n",
|
670 | 677 | " chebyshev_array.append(chebyshev(z))\n",
|
671 | 678 | "\n",
|
672 | 679 | "fig, ax = plt.subplots(figsize=(9, 9))\n",
|
673 | 680 | "ax.plot(np.arange(1, 21, 0.5), chebyshev_array)\n",
|
674 |
| - "ax.scatter(2.5, chebyshev(2.5), s = 100, color = 'red', zorder = 3)\n", |
675 |
| - "ax.text(2.5+.5, chebyshev(2.5), r'(2.5, {}%)'.format(chebyshev(2.5)*100))\n", |
| 681 | + "ax.scatter(2.5, chebyshev(2.5), s=100, color=\"red\", zorder=3)\n", |
| 682 | + "ax.text(2.5 + 0.5, chebyshev(2.5), r\"(2.5, {}%)\".format(chebyshev(2.5) * 100))\n", |
676 | 683 | "ax.set_title(\"Chebyshev's Theorem\")\n",
|
677 |
| - "ax.set_xlabel('z-score')\n", |
678 |
| - "ax.set_ylabel('Probability')\n", |
| 684 | + "ax.set_xlabel(\"z-score\")\n", |
| 685 | + "ax.set_ylabel(\"Probability\")\n", |
679 | 686 | "plt.show()"
|
680 | 687 | ]
|
681 | 688 | },
|
|
785 | 792 | ],
|
786 | 793 | "source": [
|
787 | 794 | "import plot_material\n",
|
| 795 | + "\n", |
788 | 796 | "plot_material.reg_corr_plot()"
|
789 | 797 | ]
|
790 | 798 | },
|
|
827 | 835 | ],
|
828 | 836 | "source": [
|
829 | 837 | "X = np.linspace(-10, 10, 200)\n",
|
830 |
| - "Y = 1/(1+np.exp(-X))\n", |
831 |
| - "df_dict = {'X': X, 'Y': Y}\n", |
| 838 | + "Y = 1 / (1 + np.exp(-X))\n", |
| 839 | + "df_dict = {\"X\": X, \"Y\": Y}\n", |
832 | 840 | "df = pd.DataFrame(df_dict)\n",
|
833 | 841 | "\n",
|
834 |
| - "df.plot(x ='X', y ='Y', kind='scatter', figsize=(16, 7)); plt.show()" |
| 842 | + "df.plot(x=\"X\", y=\"Y\", kind=\"scatter\", figsize=(16, 7))\n", |
| 843 | + "plt.show()" |
835 | 844 | ]
|
836 | 845 | },
|
837 | 846 | {
|
|
891 | 900 | }
|
892 | 901 | ],
|
893 | 902 | "source": [
|
894 |
| - "df.corr(method='pearson')" |
| 903 | + "df.corr(method=\"pearson\")" |
895 | 904 | ]
|
896 | 905 | },
|
897 | 906 | {
|
|
908 | 917 | }
|
909 | 918 | ],
|
910 | 919 | "source": [
|
911 |
| - "print('Pearson coeffcient: {}'.format(sp.stats.stats.pearsonr(df['X'], df['Y'])[0]))" |
| 920 | + "print(\"Pearson coeffcient: {}\".format(sp.stats.stats.pearsonr(df[\"X\"], df[\"Y\"])[0]))" |
912 | 921 | ]
|
913 | 922 | },
|
914 | 923 | {
|
|
925 | 934 | }
|
926 | 935 | ],
|
927 | 936 | "source": [
|
928 |
| - "print('Pearson coeffcient: {}'.format(sp.stats.stats.spearmanr(df['X'], df['Y'])[0]))" |
| 937 | + "print(\"Pearson coeffcient: {}\".format(sp.stats.stats.spearmanr(df[\"X\"], df[\"Y\"])[0]))" |
929 | 938 | ]
|
930 | 939 | },
|
931 | 940 | {
|
|
943 | 952 | ],
|
944 | 953 | "source": [
|
945 | 954 | "sp.stats.stats.kendalltau(X, Y)\n",
|
946 |
| - "print('Pearson coeffcient: {}'.format(sp.stats.stats.kendalltau(df['X'], df['Y'])[0]))" |
| 955 | + "print(\"Pearson coeffcient: {}\".format(sp.stats.stats.kendalltau(df[\"X\"], df[\"Y\"])[0]))" |
947 | 956 | ]
|
948 | 957 | },
|
949 | 958 | {
|
|
0 commit comments