Suggested Programming Solutions
library(dplyr)
library(readr)
library(ggplot2)
library(ggrepel)
library(forcats)
library(scales)
15.5 Chapter 10: Visualization
1 State Proportions
cen10 <- readRDS("data/input/usc2010_001percent.Rds")
Group by state, noting that the mean of a set of logicals is a mean of 1s (TRUE
) and 0s (FALSE
).
grp_st <- cen10 %>%
group_by(state) %>%
summarize(prop = mean(age >= 65)) %>%
arrange(prop) %>%
mutate(state = as_factor(state))
## `summarise()` ungrouping output (override with `.groups` argument)
Plot points
ggplot(grp_st, aes(x = state, y = prop)) +
geom_point() +
coord_flip() +
scale_y_continuous(labels = percent_format(accuracy = 1)) + # use the scales package to format percentages
labs(
y = "Proportion Senior",
x = "",
caption = "Source: 2010 Census sample"
)
2 Swing Justice
justices <- read_csv("data/input/justices_court-median.csv")
Keep justices who are in the dataset in 2016,
in_2017 <- justices %>%
filter(term >= 2016) %>%
distinct(justice) %>% # unique values
mutate(present_2016 = 1) # keep an indicator to distinguish from rest after merge
df_indicator <- justices %>%
left_join(in_2017)
## Joining, by = "justice"
All together
ggplot(df_indicator, aes(x = term, y = idealpt, group = justice_id)) +
geom_line(aes(y = median_idealpt), color = "red", size = 2, alpha = 0.1) +
geom_line(alpha = 0.5) +
geom_line(data = filter(df_indicator, !is.na(present_2016))) +
geom_point(data = filter(df_indicator, !is.na(present_2016), term == 2018)) +
geom_text_repel(
data = filter(df_indicator, term == 2016), aes(label = justice),
nudge_x = 10,
direction = "y"
) + # labels nudged and vertical
scale_x_continuous(breaks = seq(1940, 2020, 10), limits = c(1937, 2020)) + # axis breaks
scale_y_continuous(limits = c(-5, 5)) + # axis limits
labs(
x = "SCOTUS Term",
y = "Estimated Martin-Quinn Ideal Point",
caption = "Outliers capped at -5 to 5. Red lines indicate median justice. Current justices of the 2017 Court in black."
) +
theme_bw()
## Warning: Removed 19 row(s) containing missing values (geom_path).
15.6 Chapter 9: Objects and Loops
cen10 <- read_csv("data/input/usc2010_001percent.csv")
sample_acs <- read_csv("data/input/acs2015_1percent.csv")
Checkpoint #3
cen10 %>%
group_by(state) %>%
summarise(avg_age = mean(age)) %>%
arrange(desc(avg_age)) %>%
slice(1:10)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 10 x 2
## state avg_age
## <chr> <dbl>
## 1 West Virginia 44.1
## 2 Maine 42.1
## 3 Florida 41.3
## 4 New Hampshire 41.2
## 5 North Dakota 41.1
## 6 Montana 40.6
## 7 Vermont 40.3
## 8 Connecticut 40.1
## 9 Wisconsin 39.9
## 10 New Mexico 39.3
Exercise 2
states_of_interest <- c("California", "Massachusetts", "New Hampshire", "Washington")
for (state_i in states_of_interest) {
state_subset <- cen10 %>% filter(state == state_i)
print(state_i)
print(table(state_subset$race, state_subset$sex))
}
## [1] "California"
##
## Female Male
## American Indian or Alaska Native 21 21
## Black/Negro 127 126
## Chinese 76 65
## Japanese 15 12
## Other Asian or Pacific Islander 182 177
## Other race, nec 283 302
## Three or more major races 7 7
## Two major races 91 83
## White 1085 1083
## [1] "Massachusetts"
##
## Female Male
## American Indian or Alaska Native 4 1
## Black/Negro 21 17
## Chinese 8 7
## Japanese 1 1
## Other Asian or Pacific Islander 14 14
## Other race, nec 9 17
## Two major races 10 8
## White 272 243
## [1] "New Hampshire"
##
## Female Male
## American Indian or Alaska Native 1 0
## Black/Negro 0 1
## Chinese 0 1
## Japanese 1 0
## Other Asian or Pacific Islander 2 1
## Other race, nec 1 0
## Two major races 0 1
## White 66 63
## [1] "Washington"
##
## Female Male
## American Indian or Alaska Native 9 5
## Black/Negro 11 9
## Chinese 2 7
## Japanese 4 0
## Other Asian or Pacific Islander 28 18
## Other race, nec 19 18
## Three or more major races 0 2
## Two major races 17 16
## White 267 257
Exercise 3
race_d <- c()
state_d <- c()
proportion_d <- c()
answer <- data.frame(race_d, state_d, proportion_d)
Then
for (state in states_of_interest) {
for (race in unique(cen10$race)) {
race_state_num <- nrow(cen10[cen10$race == race & cen10$state == state, ])
state_pop <- nrow(cen10[cen10$state == state, ])
race_perc <- round(100 * (race_state_num / (state_pop)), digits = 2)
line <- data.frame(race_d = race, state_d = state, proportion_d = race_perc)
answer <- rbind(answer, line)
}
}
15.7 Chapter 11: Demoratic Peace Project
Task 1: Data Input and Standardization
mid_b <- read_csv("data/input/MIDB_4.2.csv")
polity <- read_excel("data/input/p4v2017.xls")
Task 2: Data Merging
mid_y_by_y <- data_frame(ccode = numeric(),
year = numeric(),
dispute = numeric())
colnames(mid_b)
for(i in 1:nrow(mid_b)) {
x <- data_frame(ccode = mid_b$ccode[i], ## row i's country
year = mid_b$styear[i]:mid_b$endyear[i], ## sequence of years for dispute in row i
dispute = 1)## there was a dispute
mid_y_by_y <- rbind(mid_y_by_y, x)
}
merged_mid_polity <- left_join(polity,
distinct(mid_y_by_y),
by = c("ccode", "year"))
Task 3: Tabulations and Visualization
#don't include the -88, -77, -66 values in calculating the mean of polity
mean_polity_by_year <- merged_mid_polity %>% group_by(year) %>% summarise(mean_polity = mean(polity[which(polity <11 & polity > -11)]))
mean_polity_by_year_ordered <- arrange(mean_polity_by_year, year)
mean_polity_by_year_mid <- merged_mid_polity %>% group_by(year, dispute) %>% summarise(mean_polity_mid = mean(polity[which(polity <11 & polity > -11)]))
mean_polity_by_year_mid_ordered <- arrange(mean_polity_by_year_mid, year)
mean_polity_no_mid <- mean_polity_by_year_mid_ordered %>% filter(dispute == 0)
mean_polity_yes_mid <- mean_polity_by_year_mid_ordered %>% filter(dispute == 1)
answer <- ggplot(data = mean_polity_by_year_ordered, aes(x = year, y = mean_polity)) +
geom_line() +
labs(y = "Mean Polity Score",
x = "") +
geom_vline(xintercept = c(1914, 1929, 1939, 1989, 2008), linetype = "dashed")
answer + geom_line(data =mean_polity_no_mid, aes(x = year, y = mean_polity_mid), col = "indianred") + geom_line(data =mean_polity_yes_mid, aes(x = year, y = mean_polity_mid), col = "dodgerblue")
15.8 Chapter 12: Simulation
15.8.1 Census Sampling
pop <- read_csv("data/input/usc2010_001percent.csv")
## Parsed with column specification:
## cols(
## state = col_character(),
## sex = col_character(),
## age = col_double(),
## race = col_character()
## )
mean(pop$race != "White")
## [1] 0.2806517
set.seed(1669482)
samp <- sample_n(pop, 100)
mean(samp$race != "White")
## [1] 0.22
ests <- c()
set.seed(1669482)
for (i in 1:20) {
samp <- sample_n(pop, 100)
ests[i] <- mean(samp$race != "White")
}
mean(ests)
pop_with_prop <- mutate(pop, propensity = ifelse(race != "White", 0.9, 1))
ests <- c()
set.seed(1669482)
for (i in 1:20) {
samp <- sample_n(pop_with_prop, 100, weight = propensity)
ests[i] <- mean(samp$race != "White")
}
mean(ests)
ests <- c()
set.seed(1669482)
for (i in 1:20) {
samp <- sample_n(pop_with_prop, 10000, weight = propensity)
ests[i] <- mean(samp$race != "White")
}
mean(ests)