Skip to content

Commit daa15a6

Browse files
committed
chapter 4
1 parent 10d0236 commit daa15a6

8 files changed

+26873
-4
lines changed

.gitignore

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,8 @@ cabal.sandbox.config
1515
*.aux
1616
*.hp
1717

18-
/earthquakes.sql
19-
.idea
18+
/*.sql
19+
/*.png
20+
/*.dat
21+
.idea
22+
.directory

LearningHaskellDataAnalysis.cabal

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,9 @@ cabal-version: >=1.10
6363
-- build-depends: base >=4.8 && <4.9, Cabal >= 1.10, csv
6464

6565
library
66-
build-depends: base >=4.8 && <4.9, csv, HDBC, sqlite, HDBC-sqlite3, regex-posix
66+
build-depends: base >=4.8 && <4.9, csv, HDBC, sqlite, HDBC-sqlite3, regex-posix, easyplot
6767
hs-source-dirs: src
68-
exposed-modules: LearningHaskellDataAnalysis01, LearningHaskellDataAnalysis02, LearningHaskellDataAnalysis03
68+
exposed-modules: LearningHaskellDataAnalysis01, LearningHaskellDataAnalysis02, LearningHaskellDataAnalysis03, LearningHaskellDataAnalysis04
6969
exposed: True
7070

7171
executable median

README.md

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,52 @@ better version
9191
fixed data
9292

9393
identifyInCSVFileFromColumn (\ x -> not (x =~ "^[1-9][0-9]?/[1-9][0-9]?/[12][0-9][0-9][0-9]$")) "poordataFixed.csv" "Number" "Birthday"
94+
95+
# Chapter 4
96+
97+
convertCSVFileToSQL "aapl.csv" "aapl.sql" "aapl" ["date STRING", "open REAL", "high REAL", "low REAL", "close REAL", "volume REAL", "adjclose REAL"]
98+
99+
## pullStockClosingPrices
100+
101+
aapl <- pullStockClosingPrices "aapl.sql" "aapl"
102+
plot (PNG "aapl.png") $ Data2D [Title "AAPL"] [] $ aapl
103+
plot (PNG "aapl_line.png") $ Data2D [Title "AAPL", Style Lines] [] $ aapl
104+
plot (PNG "aapl_oneyear.png") $ Data2D [Title "AAPL", Style Lines] [] $ take 252 aapl
105+
106+
## applyPercentChangeToData
107+
108+
aapl <- pullStockClosingPrices "aapl.sql" "aapl"
109+
let aapl252 = take 252 aapl
110+
let aapl252pc = applyPercentChangeToData aapl252
111+
112+
google
113+
114+
convertCSVFileToSQL "googl.csv" "googl.sql" "googl" ["date STRING", "open REAL", "high REAL", "low REAL", "close REAL", "volume REAL", "adjclose REAL"]
115+
googl <- pullStockClosingPrices "googl.sql" "googl"
116+
let googl252 = take 252 googl
117+
let googl252pc = applyPercentChangeToData googl252
118+
119+
microsoft
120+
121+
convertCSVFileToSQL "msft.csv" "msft.sql" "msft" ["date STRING", "open REAL", "high REAL", "low REAL", "close REAL", "volume REAL", "adjclose REAL"]
122+
msft <- pullStockClosingPrices "msft.sql" "msft"
123+
let msft252 = take 252 msft
124+
let msft252pc = applyPercentChangeToData msft252
125+
126+
all data
127+
128+
plot (PNG "aapl_googl_msft_pc.png") [Data2D [Title "AAPL - One Year, % Change", Style Lines, Color Red] [] aapl252pc, Data2D [Title "GOOGL - One Year, % Change", Style Lines, Color Blue] [] googl252pc, Data2D [Title "MSFT - One Year, % Change", Style Lines, Color Green] [] msft252pc]
129+
130+
## applyMovingAverageToData
131+
132+
aapl <- pullStockClosingPrices "aapl.sql" "aapl"
133+
let aapl252 = take 252 aapl
134+
let aapl252pc = applyPercentChangeToData aapl252
135+
let aapl252ma20 = applyMovingAverageToData aapl252pc 20
136+
plot (PNG "aapl_20dayma.png") [Data2D [Title "AAPL - One Year, % Change", Style Lines, Color Red] [] aapl252pc, Data2D [Title "AAPL 20-Day MA", Style Lines, Color Black] [] aapl252ma20]
137+
138+
earthquakes
139+
140+
convertCSVFileToSQL "all_month.csv" "earthquakes.sql" "oneMonth" ["time TEXT", "latitude REAL", "longitude REAL", "depth REAL", "mag REAL", "magType TEXT", "nst INTEGER", "gap REAL", "dmin REAL", "rms REAL", "net REAL", "id TEXT", "updated TEXT", "place TEXT", "type TEXT"]
141+
coords <- pullLatitudeLongitude "earthquakes.sql" "oneMonth"
142+
plot (PNG "earthquakes.png") [Data2D [Title "Earthquakes", Color Red, Style Dots] [] coords]

aapl.csv

Lines changed: 8554 additions & 0 deletions
Large diffs are not rendered by default.

all_month.csv

Lines changed: 8403 additions & 0 deletions
Large diffs are not rendered by default.

googl.csv

Lines changed: 2577 additions & 0 deletions
Large diffs are not rendered by default.

msft.csv

Lines changed: 7229 additions & 0 deletions
Large diffs are not rendered by default.

src/LearningHaskellDataAnalysis04.hs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
module LearningHaskellDataAnalysis04 where
2+
import Data.List
3+
import Database.HDBC.Sqlite3
4+
import Database.HDBC
5+
import Graphics.EasyPlot
6+
import LearningHaskellDataAnalysis02
7+
8+
readIntegerColumn :: [[SqlValue]] -> Integer -> [Integer]
9+
readIntegerColumn sqlResult index = map (\ row -> fromSql $ genericIndex row index :: Integer) sqlResult
10+
11+
readDoubleColumn :: [[SqlValue]] -> Integer -> [Double]
12+
readDoubleColumn sqlResult index = map (\ row -> fromSql $ genericIndex row index :: Double) sqlResult
13+
14+
readStringColumn :: [[SqlValue]] -> Integer -> [String]
15+
readStringColumn sqlResult index = map (\ row -> fromSql $ genericIndex row index :: String) sqlResult
16+
17+
queryDatabase :: FilePath -> String -> IO [[SqlValue]]
18+
queryDatabase databaseFile sqlQuery = do
19+
conn <- connectSqlite3 databaseFile
20+
result <- quickQuery' conn sqlQuery []
21+
disconnect conn
22+
return result
23+
24+
pullStockClosingPrices :: String -> String -> IO [(Double, Double)]
25+
pullStockClosingPrices databaseFile database = do
26+
sqlResult <- queryDatabase databaseFile ("SELECT rowid, adjclose FROM " ++ database)
27+
return $ zip (reverse $ readDoubleColumn sqlResult 0) (readDoubleColumn sqlResult 1)
28+
29+
percentChange :: Double -> Double -> Double
30+
percentChange value first = 100.0 * (value - first) / first
31+
32+
applyPercentChangeToData :: [(Double, Double)] -> [(Double, Double)]
33+
applyPercentChangeToData dataset = zip indices scaledData
34+
where
35+
(_, first) = last dataset
36+
indices = reverse [1.0..(genericLength dataset)]
37+
scaledData = map (\ (_, value) -> percentChange value first) dataset
38+
39+
movingAverage :: [Double] -> Integer -> [Double]
40+
movingAverage values window =
41+
if window >= genericLength values
42+
then [average values]
43+
else average (genericTake window values):(movingAverage (tail values) window)
44+
45+
applyMovingAverageToData :: [(Double, Double)] -> Integer -> [(Double, Double)]
46+
applyMovingAverageToData dataset window =
47+
zip [fromIntegral window..] $ movingAverage (map snd (reverse dataset)) window
48+
49+
pullLatitudeLongitude :: String -> String -> IO [(Double, Double)]
50+
pullLatitudeLongitude databaseFile database = do
51+
sqlResult <- queryDatabase
52+
databaseFile
53+
("SELECT latitude, longitude FROM " ++ database)
54+
return $ zip (readDoubleColumn sqlResult 1) (readDoubleColumn sqlResult 0)

0 commit comments

Comments
 (0)