Skip to content

Commit c29acd1

Browse files
Add files via upload
0 parents  commit c29acd1

File tree

1 file changed

+122
-0
lines changed

1 file changed

+122
-0
lines changed

finalproject-oberai.R

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# A brief explanation of why you chose this data and the source of the data.
2+
# An explanation of the data set and it's variables.
3+
# A correlation matrix
4+
# Scatter plots for all pairwise variables (not dummy variables).
5+
# A description of what the correlation matrix and scatter plots indicate.
6+
# A description of the initial model to be tested.
7+
# An analysis of the overall fit of the model.
8+
# An analysis of the tests of significance for the coefficients.
9+
# Interpret each coefficient that is not a dummy variable or interaction term.
10+
# An explanation of the value of the coefficient of determination.
11+
# A residual plot for each non-dummy predictor in the model with a description of what is indicated.
12+
# A boxplot of the residuals and analysis.
13+
# A QQ-plot of the residuals and analysis.
14+
# Choose one reasonable combination of your predictor values and calculate and interpret the predictions.
15+
# A summarizing paragraph describing how well the model fits the data.
16+
# A written summary of your analysis submitted in an R Markdown file will be worth 100 points.
17+
18+
19+
# packages
20+
library(ggplot2)
21+
library(GGally)
22+
library(qqplotr)
23+
24+
25+
26+
# Tesla data set
27+
# Close.Last is the dependent variable
28+
# Volume, Open, High, and Low are the predictors
29+
# Date is excluded from the Data set as the dates are all from the same month and plays no significance
30+
tesla_stock <- read.csv("Tesla-data.csv")
31+
32+
33+
34+
# excluding the "data" column and removing the "$" symbol from the "close", "Open", "High" and "Low" column
35+
tesla_stock_numeric <- tesla_stock[, !names(tesla_stock) %in% "Date"]
36+
tesla_stock_numeric$Close.Last <- as.numeric(gsub("\\$", "", tesla_stock_numeric$Close.Last))
37+
tesla_stock_numeric$Volume <- as.numeric(gsub("\\$", "", tesla_stock_numeric$Volume))
38+
tesla_stock_numeric$Open <- as.numeric(gsub("\\$", "", tesla_stock_numeric$Open))
39+
tesla_stock_numeric$High <- as.numeric(gsub("\\$", "", tesla_stock_numeric$High))
40+
tesla_stock_numeric$Low <- as.numeric(gsub("\\$", "", tesla_stock_numeric$Low))
41+
42+
43+
44+
# correlation matrix
45+
cor_matrix <- cor(tesla_stock_numeric)
46+
print(cor_matrix)
47+
48+
49+
50+
# scatterplot for all pairwise variables
51+
ggpairs(data = tesla_stock_numeric,
52+
columnLabels = c("Close/Last", "Volume", "Open", "High", "Low"))
53+
54+
55+
56+
# regression model
57+
regression_model <- lm(data = tesla_stock_numeric, formula = Close.Last ~ Volume + Open + High + Low)
58+
summary(regression_model)
59+
anova(regression_model)
60+
61+
62+
63+
# coefficients
64+
regression_model$coefficients
65+
66+
67+
68+
# adjusted R Squared Value
69+
summary(regression_model)$adj.r.squared
70+
71+
72+
73+
74+
ggplot(tesla_stock_numeric, aes(x = Volume, y = regression_model$residuals)) +
75+
geom_point() +
76+
geom_hline(yintercept = 0, color = "blue")
77+
78+
ggplot(tesla_stock_numeric, aes(x = Open, y = regression_model$residuals)) +
79+
geom_point() +
80+
geom_hline(yintercept = 0, color = "blue")
81+
82+
ggplot(tesla_stock_numeric, aes(x = High, y = regression_model$residuals)) +
83+
geom_point() +
84+
geom_hline(yintercept = 0, color = "blue")
85+
86+
ggplot(tesla_stock_numeric, aes(x = Low, y = regression_model$residuals)) +
87+
geom_point() +
88+
geom_hline(yintercept = 0, color = "blue")
89+
90+
91+
92+
# Residual Boxplot
93+
residuals <- data.frame(residual = regression_model$residuals)
94+
95+
ggplot(residuals, aes(x = residual)) +
96+
geom_boxplot()
97+
98+
99+
100+
# qqplot for the normality of the residuals
101+
ggplot(residuals, aes(sample = residual)) +
102+
stat_qq_point() +
103+
stat_qq_line() +
104+
stat_qq_band()
105+
106+
107+
108+
109+
# predictions
110+
111+
new_data <- data.frame(
112+
Volume = c(125000000),
113+
Open = c(175),
114+
High = c(182),
115+
Low = c(171)
116+
)
117+
118+
predicted_closing_price <- predict(regression_model, newdata = new_data, interval = "confidence")
119+
120+
predicted_closing_price
121+
122+

0 commit comments

Comments
 (0)