# Suggested Programming Solutions

library(dplyr)
library(ggplot2)
library(ggrepel)
library(forcats)
library(scales)

## 15.5 Chapter 10: Visualization

### 1 State Proportions

cen10 <- readRDS("data/input/usc2010_001percent.Rds")

Group by state, noting that the mean of a set of logicals is a mean of 1s (TRUE) and 0s (FALSE).

grp_st <- cen10 %>%
group_by(state) %>%
summarize(prop = mean(age >= 65)) %>%
arrange(prop) %>%
mutate(state = as_factor(state))
## summarise() ungrouping output (override with .groups argument)

Plot points

ggplot(grp_st, aes(x = state, y = prop)) +
geom_point() +
coord_flip() +
scale_y_continuous(labels = percent_format(accuracy = 1)) + # use the scales package to format percentages
labs(
y = "Proportion Senior",
x = "",
caption = "Source: 2010 Census sample"
)

### 2 Swing Justice

justices <- read_csv("data/input/justices_court-median.csv")

Keep justices who are in the dataset in 2016,

in_2017 <- justices %>%
filter(term >= 2016) %>%
distinct(justice) %>% # unique values
mutate(present_2016 = 1) # keep an indicator to distinguish from rest after merge

df_indicator <- justices %>%
left_join(in_2017)
## Joining, by = "justice"

All together

ggplot(df_indicator, aes(x = term, y = idealpt, group = justice_id)) +
geom_line(aes(y = median_idealpt), color = "red", size = 2, alpha = 0.1) +
geom_line(alpha = 0.5) +
geom_line(data = filter(df_indicator, !is.na(present_2016))) +
geom_point(data = filter(df_indicator, !is.na(present_2016), term == 2018)) +
geom_text_repel(
data = filter(df_indicator, term == 2016), aes(label = justice),
nudge_x = 10,
direction = "y"
) + # labels nudged and vertical
scale_x_continuous(breaks = seq(1940, 2020, 10), limits = c(1937, 2020)) + # axis breaks
scale_y_continuous(limits = c(-5, 5)) + # axis limits
labs(
x = "SCOTUS Term",
y = "Estimated Martin-Quinn Ideal Point",
caption = "Outliers capped at -5 to 5. Red lines indicate median justice. Current justices of the 2017 Court in black."
) +
theme_bw()
## Warning: Removed 19 row(s) containing missing values (geom_path).

## 15.6 Chapter 9: Objects and Loops

cen10 <- read_csv("data/input/usc2010_001percent.csv")
sample_acs <- read_csv("data/input/acs2015_1percent.csv")

### Checkpoint #3

cen10 %>%
group_by(state) %>%
summarise(avg_age = mean(age)) %>%
arrange(desc(avg_age)) %>%
slice(1:10)
## summarise() ungrouping output (override with .groups argument)
## # A tibble: 10 x 2
##    state         avg_age
##    <chr>           <dbl>
##  1 West Virginia    44.1
##  2 Maine            42.1
##  3 Florida          41.3
##  4 New Hampshire    41.2
##  5 North Dakota     41.1
##  6 Montana          40.6
##  7 Vermont          40.3
##  8 Connecticut      40.1
##  9 Wisconsin        39.9
## 10 New Mexico       39.3

### Exercise 2

states_of_interest <- c("California", "Massachusetts", "New Hampshire", "Washington")

for (state_i in states_of_interest) {
state_subset <- cen10 %>% filter(state == state_i)

print(state_i)

print(table(state_subset$race, state_subset$sex))
}
## [1] "California"
##
##                                    Female Male
##   American Indian or Alaska Native     21   21
##   Black/Negro                         127  126
##   Chinese                              76   65
##   Japanese                             15   12
##   Other Asian or Pacific Islander     182  177
##   Other race, nec                     283  302
##   Three or more major races             7    7
##   Two major races                      91   83
##   White                              1085 1083
## [1] "Massachusetts"
##
##                                    Female Male
##   American Indian or Alaska Native      4    1
##   Black/Negro                          21   17
##   Chinese                               8    7
##   Japanese                              1    1
##   Other Asian or Pacific Islander      14   14
##   Other race, nec                       9   17
##   Two major races                      10    8
##   White                               272  243
## [1] "New Hampshire"
##
##                                    Female Male
##   American Indian or Alaska Native      1    0
##   Black/Negro                           0    1
##   Chinese                               0    1
##   Japanese                              1    0
##   Other Asian or Pacific Islander       2    1
##   Other race, nec                       1    0
##   Two major races                       0    1
##   White                                66   63
## [1] "Washington"
##
##                                    Female Male
##   American Indian or Alaska Native      9    5
##   Black/Negro                          11    9
##   Chinese                               2    7
##   Japanese                              4    0
##   Other Asian or Pacific Islander      28   18
##   Other race, nec                      19   18
##   Three or more major races             0    2
##   Two major races                      17   16
##   White                               267  257

### Exercise 3

race_d <- c()
state_d <- c()
proportion_d <- c()
answer <- data.frame(race_d, state_d, proportion_d)

Then

for (state in states_of_interest) {
for (race in unique(cen10$race)) { race_state_num <- nrow(cen10[cen10$race == race & cen10$state == state, ]) state_pop <- nrow(cen10[cen10$state == state, ])
race_perc <- round(100 * (race_state_num / (state_pop)), digits = 2)
line <- data.frame(race_d = race, state_d = state, proportion_d = race_perc)
}
}

## 15.7 Chapter 11: Demoratic Peace Project

### Task 1: Data Input and Standardization

mid_b <- read_csv("data/input/MIDB_4.2.csv")
polity <- read_excel("data/input/p4v2017.xls")

mid_y_by_y <- data_frame(ccode = numeric(),
year = numeric(),
dispute = numeric())
colnames(mid_b)
for(i in 1:nrow(mid_b)) {
x <- data_frame(ccode = mid_b$ccode[i], ## row i's country year = mid_b$styear[i]:mid_b$endyear[i], ## sequence of years for dispute in row i dispute = 1)## there was a dispute mid_y_by_y <- rbind(mid_y_by_y, x) } merged_mid_polity <- left_join(polity, distinct(mid_y_by_y), by = c("ccode", "year")) ### Task 3: Tabulations and Visualization #don't include the -88, -77, -66 values in calculating the mean of polity mean_polity_by_year <- merged_mid_polity %>% group_by(year) %>% summarise(mean_polity = mean(polity[which(polity <11 & polity > -11)])) mean_polity_by_year_ordered <- arrange(mean_polity_by_year, year) mean_polity_by_year_mid <- merged_mid_polity %>% group_by(year, dispute) %>% summarise(mean_polity_mid = mean(polity[which(polity <11 & polity > -11)])) mean_polity_by_year_mid_ordered <- arrange(mean_polity_by_year_mid, year) mean_polity_no_mid <- mean_polity_by_year_mid_ordered %>% filter(dispute == 0) mean_polity_yes_mid <- mean_polity_by_year_mid_ordered %>% filter(dispute == 1) answer <- ggplot(data = mean_polity_by_year_ordered, aes(x = year, y = mean_polity)) + geom_line() + labs(y = "Mean Polity Score", x = "") + geom_vline(xintercept = c(1914, 1929, 1939, 1989, 2008), linetype = "dashed") answer + geom_line(data =mean_polity_no_mid, aes(x = year, y = mean_polity_mid), col = "indianred") + geom_line(data =mean_polity_yes_mid, aes(x = year, y = mean_polity_mid), col = "dodgerblue") ## 15.8 Chapter 12: Simulation ### 15.8.1 Census Sampling pop <- read_csv("data/input/usc2010_001percent.csv") ## Parsed with column specification: ## cols( ## state = col_character(), ## sex = col_character(), ## age = col_double(), ## race = col_character() ## ) mean(pop$race != "White")
## [1] 0.2806517
set.seed(1669482)
samp <- sample_n(pop, 100)
mean(samp$race != "White") ## [1] 0.22 ests <- c() set.seed(1669482) for (i in 1:20) { samp <- sample_n(pop, 100) ests[i] <- mean(samp$race != "White")
}

mean(ests)
pop_with_prop <- mutate(pop, propensity = ifelse(race != "White", 0.9, 1))
ests <- c()
set.seed(1669482)

for (i in 1:20) {
samp <- sample_n(pop_with_prop, 100, weight = propensity)
ests[i] <- mean(samp$race != "White") } mean(ests) ests <- c() set.seed(1669482) for (i in 1:20) { samp <- sample_n(pop_with_prop, 10000, weight = propensity) ests[i] <- mean(samp$race != "White")
}

mean(ests)