Skip to content

Commit 80b90de

Browse files
authored
Merge pull request #5 from mansueto-institute/updates-prepub4
updates to repo
2 parents 99b8daf + 3615b5a commit 80b90de

File tree

5 files changed

+254
-133
lines changed

5 files changed

+254
-133
lines changed

complexity-analysis.R

Lines changed: 25 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,7 @@ library(readxl)
1111
library(readr)
1212
library(writexl)
1313
library(units)
14-
library(ggsn)
1514
library(arrow)
16-
#library(geoarrow)
1715
library(sfarrow)
1816
library(osmdata)
1917
library(viridis)
@@ -23,6 +21,9 @@ library(DescTools)
2321
options(scipen = 9999)
2422
options(lifecycle_verbosity = "warning")
2523

24+
library(ggsn)
25+
# devtools::install_github('mansueto-institute/ggsn') # fork of 'oswaldosantos/ggsn'
26+
2627
# Load aggregation function
2728
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
2829
getwd()
@@ -134,17 +135,6 @@ write_xlsx(list('dictionary_labels' = file_labels, 'dictionary_levels' = file_gr
134135

135136
# Aggregation arguments ---------------------------------------------------
136137

137-
log_10 <- function(x) {
138-
x = replace_na(na_if(na_if(na_if(log10(x), NaN), Inf), -Inf), 0)
139-
x = ifelse(x < 1, 1, x)
140-
return(x)
141-
}
142-
143-
share <- function(x) {
144-
x = replace_na(na_if(na_if(na_if(x / sum(x), NaN), Inf), -Inf), 0)
145-
return(x)
146-
}
147-
148138
sum_cols = c("k_complexity_weighted_landscan_un", "k_complexity_weighted_worldpop_un",
149139
"block_count", "block_area_m2", "block_hectares", "block_area_km2", "block_perimeter_meters", "building_area_m2",
150140
"building_count", "parcel_count", "landscan_population", "landscan_population_un", "worldpop_population", "worldpop_population_un",
@@ -1045,12 +1035,14 @@ if (!file.exists(paste0("data/africa_geodata.parquet"))) {
10451035
print('africa_geodata.parquet already downloaded.')
10461036
}
10471037

1048-
area_data <- arrow::open_dataset(paste0('data/africa_geodata.parquet')) %>%
1038+
#area_data <- arrow::open_dataset(paste0('data/africa_geodata.parquet')) %>%
1039+
area_data <- arrow::open_dataset('data/lusaka_blocks.parquet') %>%
10491040
filter(urban_id %in% c('ghsl_3798','periurban_925')) %>%
10501041
read_sf_dataset() %>%
10511042
st_set_crs(4326) %>%
10521043
st_make_valid()
10531044

1045+
10541046
# Zoom map ----------------------------------------------------------------
10551047

10561048
sites <- data.frame(place=c('nga','sle','zmb'),
@@ -1109,8 +1101,8 @@ area_data_framed <- area_data %>%
11091101
theme_void() + inset_element(zoom_inset, left = -.5, bottom = .12, right = 1, top = .55, align_to = 'full')
11101102
)
11111103

1112-
df_combined_prep %>% group_by(country_code, country_name) %>%
1113-
tally() %>% print(n = 100)
1104+
#df_combined_prep %>% group_by(country_code, country_name) %>%
1105+
# tally() %>% print(n = 100)
11141106

11151107
# buildings <- arrow::open_dataset('buildings_polygons_ZMB.parquet')) %>%
11161108
# filter(gadm_code %in% unique(area_data_framed$gadm_code))%>%
@@ -1162,7 +1154,7 @@ colorhexes <- colorRampPalette(c('#93328E','#CF3F80','#F7626B','#FF925A','#FFC55
11621154
fill = 'white', alpha = 0, linewidth = 1.7) +
11631155
geom_sf(data = sites %>% filter(place == place_var), color = '#434343',
11641156
fill = '#434343', alpha = 0, linewidth = .5) +
1165-
scale_fill_manual(expand = c(0,0), values = c(grey2, colorhexes), name = 'Block complexity') +
1157+
scale_fill_manual(expand = c(0,0), values = c(grey2, colorhexes), name = 'Block\ncomplexity') +
11661158
theme_void() + theme(text = element_text(color = "#333333"),
11671159
legend.position = 'none',
11681160
legend.spacing.x = unit(1, 'pt'),
@@ -1176,10 +1168,10 @@ colorhexes <- colorRampPalette(c('#93328E','#CF3F80','#F7626B','#FF925A','#FFC55
11761168
legend.title = element_blank(),
11771169
plot.caption = element_text(size = 11, hjust = .5, vjust = 5), #margin=margin(0,0,0,0)),
11781170
axis.text = element_blank()) +
1179-
ggsn::scalebar(y.min = st_bbox(area_data_framed)$ymin - (height_decdegs*.03),
1180-
x.min = st_bbox(area_data_framed)$xmin,
1181-
y.max = st_bbox(area_data_framed)$ymax,
1182-
x.max = st_bbox(area_data_framed)$xmax,
1171+
ggsn::scalebar(y.min = st_bbox(area_data_framed)$ymin - (height_decdegs*.03),
1172+
x.min = st_bbox(area_data_framed)$xmin,
1173+
y.max = st_bbox(area_data_framed)$ymax,
1174+
x.max = st_bbox(area_data_framed)$xmax,
11831175
location = 'bottomleft',
11841176
height = .01, box.fill = c('#333333','#ffffff'),
11851177
border.size = .4, st.color = '#333333', st.size = 2.5, box.color = '#333333',
@@ -1193,7 +1185,7 @@ colorhexes <- colorRampPalette(c('#93328E','#CF3F80','#F7626B','#FF925A','#FFC55
11931185
geom_sf(data = streets, color = 'white', fill = 'white', linewidth = 1) +
11941186
geom_sf(data = sites %>% filter(place == place_var), color = 'white', fill = 'white', alpha = 0, linewidth = 1.5) +
11951187
scale_fill_manual(expand = c(0,0), breaks = c('1', '2', '3', '4', '5', '6', '7', '8', '9', '10+', 'Off\nnetwork'),
1196-
values = c(grey2, colorhexes), name = 'Block complexity') +
1188+
values = c(grey2, colorhexes), name = 'Block\ncomplexity') +
11971189
theme_void() +
11981190
theme(legend.position = 'none'))
11991191

@@ -1216,7 +1208,7 @@ colorhexes <- colorRampPalette(c('#93328E','#CF3F80','#F7626B','#FF925A','#FFC55
12161208
geom_sf(data = layers %>% filter(block_property == 'on-network-streets',
12171209
block_id %in% c("ZMB.5.6_2_9371")),
12181210
fill = 'yellow', color = 'white', alpha = .8, linewidth = .7, lineend = 'round') + # linetype = "dashed",
1219-
scale_fill_manual(name = 'Block complexity', values = c(grey2, colorhexes), breaks = c('1', '2', '3', '4', '5', '6', '7', '8', '9', '10+', 'Off\nnetwork')) +
1211+
scale_fill_manual(name = 'Block\ncomplexity', values = c(grey2, colorhexes), breaks = c('1', '2', '3', '4', '5', '6', '7', '8', '9', '10+', 'Off\nnetwork')) +
12201212
labs(subtitle = '') +
12211213
theme_void() + theme(legend.title = element_text(size = 14, face = 'bold'),
12221214
legend.text = element_text(size = 14),
@@ -1237,7 +1229,7 @@ colorhexes <- colorRampPalette(c('#93328E','#CF3F80','#F7626B','#FF925A','#FFC55
12371229
geom_sf(data = layers %>% filter(block_property == 'on-network-streets',
12381230
block_id %in% c("ZMB.5.6_2_9339", "ZMB.5.6_2_9334", "ZMB.5.6_2_9335")),
12391231
fill = 'yellow', color = 'white', alpha = .8, linewidth = .7, lineend = 'round') + # linetype = "dashed",
1240-
scale_fill_manual(name = 'Block complexity', values = c(grey2, colorhexes), breaks = c('1', '2', '3', '4', '5', '6', '7', '8', '9', '10+', 'Off\nnetwork')) +
1232+
scale_fill_manual(name = 'Block\ncomplexity', values = c(grey2, colorhexes), breaks = c('1', '2', '3', '4', '5', '6', '7', '8', '9', '10+', 'Off\nnetwork')) +
12411233
labs(subtitle = '') +
12421234
theme_void() + theme(legend.position = 'none',
12431235
plot.subtitle = element_text(size = 10),
@@ -1246,11 +1238,16 @@ colorhexes <- colorRampPalette(c('#93328E','#CF3F80','#F7626B','#FF925A','#FFC55
12461238
design <- "
12471239
AAAABBBBBB
12481240
AAAABBBBBB
1249-
CCCCDDDDEE
1250-
CCCCDDDDEE
1241+
AAAABBBBBB
1242+
AAAABBBBBB
1243+
AAAABBBBBB
1244+
CCCDDDEEFF
1245+
CCCDDDEEFF
1246+
CCCDDDEEFF
1247+
CCCDDDEEFF
12511248
"
12521249

1253-
(layers_viz <- africa_zoom + plot_k_discrete + map_block + l1 + l2 +
1250+
(layers_viz <- africa_zoom + plot_k_discrete + l1 + map_block + l2 + guide_area() +
12541251
#plot_layout(design = design, guides = "collect", tag_level = 'new') + plot_annotation( tag_levels = c('A')) &
12551252
plot_layout(design = design, guides = "collect") + plot_annotation(tag_levels = list(c("A", "B", "C", "D", "E", "F"))) &
12561253
theme(plot.tag = element_text(size = 12)))
@@ -1393,7 +1390,7 @@ colorhexes <- colorRampPalette(c('#93328E','#CF3F80','#F7626B','#FF925A','#FFC55
13931390

13941391
(plot_k_discrete <- ggplot() +
13951392
geom_sf(data = area_data, aes(fill = as.factor(k_labels)), color = '#ffffff', linewidth = .0075) +
1396-
scale_fill_manual(expand = c(0,0), values = c(grey2, colorhexes), name = 'Block complexity') +
1393+
scale_fill_manual(expand = c(0,0), values = c(grey2, colorhexes), name = 'Block\ncomplexity') +
13971394
labs(caption = paste0('Population-weighted average block complexity: ', area_data %>% st_drop_geometry() %>%
13981395
summarise(wm_var = weighted.mean(as.integer(k_complexity), landscan_population_un)) %>% pull() %>% round(.,2))) +
13991396
#guides(color = guide_legend(nrow = 1, label.position = "bottom", keywidth = 2, keyheight = 1),

dhs-analysis.R

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ library(scales)
66
library(sf)
77
library(rmapshaper)
88
library(patchwork)
9+
library(ggrepel)
910
library(arrow)
1011
library(sfarrow)
1112
library(readxl)
@@ -39,7 +40,7 @@ load_saved = TRUE
3940

4041
# RData file
4142
if (load_saved == TRUE) {
42-
load("data/dhs_data.RData")
43+
load(paste0(wd_path,"/data/dhs_data.RData"))
4344
# Requires authorization from DHS: https://uchicago.box.com/s/anw4fxc376dgtgol9tt87tuozipnk0kj
4445

4546
} else {
@@ -55,10 +56,10 @@ if (load_saved == TRUE) {
5556
# Load staging files
5657
# Box.com link with access to staging files:
5758
# https://uchicago.box.com/s/anw4fxc376dgtgol9tt87tuozipnk0kj
58-
# subnat_indicators <- read_csv(paste0(wd_path,'/dhs_download.csv'))
59-
# subnat_all <- st_read(paste0(wd_path,'/dhs_geographies.geojson'))
60-
# subnat_all_wide <- st_read(paste0(wd_path,'/dhs_all_wide.geojson'))
61-
# subnat_to_blocks <- read_parquet(paste0(wd_path,'/blocks_to_dhs.parquet'))
59+
# subnat_indicators <- read_csv(paste0(wd_path,'/data/dhs_download.csv'))
60+
# subnat_all <- st_read(paste0(wd_path,'/data/dhs_geographies.geojson'))
61+
# subnat_all_wide <- st_read(paste0(wd_path,'/data/dhs_all_wide.geojson'))
62+
subnat_to_blocks <- read_parquet(paste0(wd_path,'/data/blocks_to_dhs.parquet'))
6263

6364
# CHANGE TO DHS USER LOGIN CREDENTIAL
6465
# Follow instructions here: # https://docs.ropensci.org/rdhs/articles/introduction.html
@@ -258,7 +259,7 @@ if (load_saved == FALSE) {
258259
# st_make_valid() %>% mutate(is_valid = st_is_valid(geometry)) %>%
259260
# filter(is_valid == TRUE) %>% select(-any_of('is_valid'))
260261

261-
iso_blocks <- arrow::open_dataset(paste0(wd_input,'/africa_geodata.parquet')) %>%
262+
iso_blocks <- arrow::open_dataset(paste0('data/africa_geodata.parquet')) %>%
262263
select(block_id, gadm_code, country_code, geometry) %>%
263264
filter(country_code %in% c(country_iso)) %>%
264265
read_sf_dataset() %>%
@@ -412,7 +413,7 @@ if (load_saved == FALSE) {
412413
un_slums_k <- un_slums %>%
413414
left_join(., urban_k, by = c('country_code'='country_code'))
414415

415-
city_k <- read_parquet(paste0(wd_input,'/africa_data.parquet')) %>%
416+
city_k <- read_parquet(paste0('data/africa_data.parquet')) %>%
416417
mutate(population_k_1 = case_when(k_labels == '1' ~ landscan_population_un, TRUE ~ as.numeric(0)),
417418
population_k_2 = case_when(k_labels == '2' ~ landscan_population_un, TRUE ~ as.numeric(0)),
418419
population_k_3 = case_when(k_labels == '3' ~ landscan_population_un, TRUE ~ as.numeric(0)),
@@ -1477,7 +1478,7 @@ k_country <- read_parquet(paste0('data/africa_data.parquet')) %>%
14771478
ungroup() %>%
14781479
mutate(k_complexity_wt = k_complexity_wt/landscan_population_un)
14791480

1480-
street_validation <- read_csv('data/streets_external_validation.csv') %>%
1481+
street_validation <- read_csv('data/streets_validation.csv') %>%
14811482
mutate(osm_cia_ratio = osm_total_streets_km / cia_roadways_km,
14821483
osm_ecopia_ratio = osm_total_streets_km / ecopia_ml_roads_km,
14831484
osm_irf_ratio = osm_total_streets_km / irf_all_roads_km,
@@ -1764,7 +1765,7 @@ k_thresh_corr <- k_levels_vs_pc1 %>%
17641765
geom_line(group = 1) +
17651766
scale_y_continuous(expand = c(0,0), limits = c(-.75,.7)) +
17661767
scale_size_continuous( labels = label_percent(), name = 'Share of population') +
1767-
labs(subtitle = 'Block complexity correlation with PC1 begins to slightly weaken\nwhen k = 15+ and for off-network blocks across DHS regions',
1768+
labs(#subtitle = 'Block complexity correlation with PC1 begins to slightly weaken\nwhen k = 15+ and for off-network blocks across DHS regions',
17681769
y = 'Spearman correlation with PC1 at each block complexity level', x = 'Population share at each block complexity level correlated with PC1') +
17691770
theme_classic() +
17701771
theme(plot.subtitle = element_text(hjust = .5),
@@ -1847,16 +1848,17 @@ k_15_ratios <- street_validation %>%
18471848
#geom_text(x = 4, y = log10(15), label = 'OSM reports more vehicular roadway than CIA') +
18481849
#scale_x_continuous(labels = scales::percent, breaks = c(0, .05, .1, .15, .2, .25, .3)) +
18491850
scale_y_continuous(breaks = c( log10(.5), log10(1), log10(10), log10(100)), labels = c( .5, 1, 10, 100)) +
1850-
labs(subtitle = 'South Sudan and Eritrea have relatively high block complexity\nand report less roadway in OSM than the CIA World Factbook',
1851+
labs(#subtitle = 'South Sudan and Eritrea have relatively high block complexity\nand report less roadway in OSM than the CIA World Factbook',
18511852
#x = "Ratio of population share in k = 15+ / off-network blocks to non-urban population share",
18521853
x = "Average block complexity (weighted to population)",
18531854
y = "OSM completeness\n(ratio of OSM vehicular street distance to CIA roadway distance)") +
18541855
theme_classic() +
18551856
theme(legend.position = 'none',
18561857
plot.subtitle = element_text(hjust = .5)))
18571858

1859+
line_correl_k_thresh + scatter_15plus_outliers
18581860
(k_thresh_charts <- line_correl_k_thresh + scatter_15plus_outliers &
1859-
plot_layout(ncol = 2) &
1861+
#plot_layout(ncol = 2) &
18601862
plot_annotation(tag_levels = list(c("A", "B"))) &
18611863
theme(plot.tag = element_text(size = 13)))
18621864

0 commit comments

Comments
 (0)