David Sichinava, Rati Shubladze
December 20, 2019
Tenth meeting
library(ggplot2)
### Get the dataset from here: https://goo.gl/vE26bN
poll08 <- read.csv("../data/pres08data.csv")
summary(poll08)
state.name state Obama McCain EV
Alabama : 1 AK : 1 Min. :33.00 Min. : 7.00 Min. : 3.00
Alaska : 1 AL : 1 1st Qu.:43.00 1st Qu.:40.00 1st Qu.: 4.50
Arizona : 1 AR : 1 Median :51.00 Median :47.00 Median : 8.00
Arkansas : 1 AZ : 1 Mean :51.37 Mean :47.06 Mean :10.55
California: 1 CA : 1 3rd Qu.:57.50 3rd Qu.:56.00 3rd Qu.:11.50
Colorado : 1 CO : 1 Max. :92.00 Max. :66.00 Max. :55.00
(Other) :45 (Other):45
margin predicted.margin.poll
Min. :-32.000 Min. :-29.000
1st Qu.:-13.000 1st Qu.: -9.500
Median : 4.000 Median : 3.000
Mean : 4.314 Mean : 3.252
3rd Qu.: 17.500 3rd Qu.: 16.000
Max. : 85.000 Max. : 69.000
## Create a new variable which will indicate which candidate carried the state:
poll08$winner <- "Obama"
poll08$winner[poll08$Obama < poll08$McCain] <- "McCain"
table(poll08$winner)
McCain Obama
22 29
model <- lm(margin~predicted.margin.poll, data=poll08)
summary(model)
Call:
lm(formula = margin ~ predicted.margin.poll, data = poll08)
Residuals:
Min 1Q Median 3Q Max
-16.3834 -2.7205 0.3556 3.4224 13.0111
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.70908 0.78695 0.901 0.372
predicted.margin.poll 1.10856 0.04063 27.285 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 5.54 on 49 degrees of freedom
Multiple R-squared: 0.9382, Adjusted R-squared: 0.937
F-statistic: 744.5 on 1 and 49 DF, p-value: < 2.2e-16
model$r.squared
NULL
ggplot(poll08, aes(x=predicted.margin.poll, y=margin))+
geom_point(aes(color=winner))+
geom_smooth(method='lm')+
labs(title="Opinion Polls and Election Results",
subtitle="2012 U.S. Presidential Elections",
x="Predicted Obama Margin in the Last Opinion Poll",
y="Actual Margin for Obama")+
scale_color_manual(name="Winner",
values=c("red", "blue"))+
theme_minimal()
poll08$residuals <- model$residuals
poll08$predicted <- predict(model)
ggplot(poll08, aes(x=residuals))+
geom_histogram(bins = 51)
mean(poll08$residuals)
[1] -1.783534e-16
ggplot(poll08, aes(x=predicted, y=residuals))+
geom_point(aes(color=winner))+
labs(title="Opinion Polls and Election Results",
subtitle="2012 U.S. Presidential Elections",
x="Predicted Values",
y="Residuals")+
geom_text(aes(label=state),
position = position_dodge(width = 5),
size=3)+
theme_minimal()
par(mfrow=c(2,2))
plot(model)
ggplot(poll08, aes(x=seq(residuals), y=residuals))+
geom_point(aes(color=winner))+
labs(title="Opinion Polls and Election Results",
subtitle="2012 U.S. Presidential Elections",
x="Sequence Variable",
y="Residuals")+
geom_text(aes(label=state),
position = position_dodge(width = 5),
size=3)+
theme_minimal()
par(mfrow=c(2,2))
plot(model)