library(tidyverse)
library(gapminder)
geo <- read_csv("https://raw.githubusercontent.com/open-numbers/ddf--gapminder--fasttrack/master/ddf--entities--geo--country.csv")
glimpse(geo)
Rows: 273
Columns: 21
$ country <chr> "abkh", "abw", "afg", "ago", "aia", "akr_a_dhe", "ala", "alb", "and", "a…
$ g77_and_oecd_countries <chr> "others", "others", "g77", "g77", "others", "others", "others", "others"…
$ income_groups <chr> NA, "high_income", "low_income", "lower_middle_income", NA, NA, NA, "upp…
$ `is--country` <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, …
$ iso3166_1_alpha2 <chr> NA, "AW", "AF", "AO", "AI", NA, "AX", "AL", "AD", NA, "AE", "AR", "AM", …
$ unicode_region_subtag <chr> NA, "AW", "AF", "AO", "AI", NA, "AX", "AL", "AD", NA, "AE", "AR", "AM", …
$ iso3166_1_alpha3 <chr> NA, "ABW", "AFG", "AGO", "AIA", NA, "ALA", "ALB", "AND", NA, "ARE", "ARG…
$ iso3166_1_numeric <dbl> NA, 533, 4, 24, 660, NA, 248, 8, 20, NA, 784, 32, 51, 16, 10, 28, 36, 40…
$ iso3166_2 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ landlocked <chr> NA, "coastline", "landlocked", "coastline", "coastline", "coastline", "c…
$ latitude <dbl> NA, 12.50000, 33.00000, -12.50000, 18.21667, NA, 60.25000, 41.00000, 42.…
$ longitude <dbl> NA, -69.96667, 66.00000, 18.50000, -63.05000, NA, 20.00000, 20.00000, 1.…
$ main_religion_2008 <chr> NA, "christian", "muslim", "christian", "christian", NA, NA, "muslim", "…
$ name <chr> "Abkhazia", "Aruba", "Afghanistan", "Angola", "Anguilla", "Akrotiri and …
$ un_state <lgl> FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, TRUE, …
$ world_4region <chr> "europe", "americas", "asia", "africa", "americas", "europe", "europe", …
$ world_6region <chr> "europe_central_asia", "america", "south_asia", "sub_saharan_africa", "a…
$ unicef_region <chr> NA, NA, "sa", "ssa", NA, NA, NA, "eca", "eca", NA, "mena", "lac", "eca",…
$ income_3groups <chr> NA, "high_income", "low_income", "middle_income", NA, NA, NA, "middle_in…
$ un_sdg_region <chr> NA, "un_latin_america_and_the_caribbean", "un_central_and_southern_asia"…
$ un_sdg_ldc <chr> NA, "un_not_least_developed", "un_least_developed", "un_least_developed"…
geo <- geo %>% dplyr::select(country, income_groups, main_religion_2008,
name, world_6region)
glimpse(geo)
Rows: 273
Columns: 5
$ country <chr> "abkh", "abw", "afg", "ago", "aia", "akr_a_dhe", "ala", "alb", "and", "ant",…
$ income_groups <chr> NA, "high_income", "low_income", "lower_middle_income", NA, NA, NA, "upper_m…
$ main_religion_2008 <chr> NA, "christian", "muslim", "christian", "christian", NA, NA, "muslim", "chri…
$ name <chr> "Abkhazia", "Aruba", "Afghanistan", "Angola", "Anguilla", "Akrotiri and Dhek…
$ world_6region <chr> "europe_central_asia", "america", "south_asia", "sub_saharan_africa", "ameri…
marriage1 <- read_csv("https://raw.githubusercontent.com/open-numbers/ddf--gapminder--systema_globalis/master/countries-etc-datapoints/ddf--datapoints--age_at_1st_marriage_women--by--geo--time.csv")
glimpse(marriage1)
Rows: 678
Columns: 3
$ geo <chr> "afg", "afg", "ago", "alb", "alb", "alb", "alb", "alb", "ant", "ant",…
$ time <dbl> 1979, 2005, 1970, 1946, 1955, 1970, 1989, 2005, 1971, 2005, 1975, 200…
$ age_at_1st_marriage_women <dbl> 17.84, 17.84, 19.39, 21.52, 21.00, 21.50, 22.47, 23.33, 24.86, 30.23,…
mar1_geo <- dplyr::inner_join(marriage1, geo, by = c("geo" = "country"))
glimpse(mar1_geo)
Rows: 678
Columns: 7
$ geo <chr> "afg", "afg", "ago", "alb", "alb", "alb", "alb", "alb", "ant", "ant",…
$ time <dbl> 1979, 2005, 1970, 1946, 1955, 1970, 1989, 2005, 1971, 2005, 1975, 200…
$ age_at_1st_marriage_women <dbl> 17.84, 17.84, 19.39, 21.52, 21.00, 21.50, 22.47, 23.33, 24.86, 30.23,…
$ income_groups <chr> "low_income", "low_income", "lower_middle_income", "upper_middle_inco…
$ main_religion_2008 <chr> "muslim", "muslim", "christian", "muslim", "muslim", "muslim", "musli…
$ name <chr> "Afghanistan", "Afghanistan", "Angola", "Albania", "Albania", "Albani…
$ world_6region <chr> "south_asia", "south_asia", "sub_saharan_africa", "europe_central_asi…
mar1_comparison_time <- mar1_geo %>%
dplyr::mutate(past_20_years =
dplyr::if_else(condition = time > 2000, true = TRUE, false = FALSE, missing = NA))
glimpse(mar1_comparison_time)
Rows: 678
Columns: 8
$ geo <chr> "afg", "afg", "ago", "alb", "alb", "alb", "alb", "alb", "ant", "ant",…
$ time <dbl> 1979, 2005, 1970, 1946, 1955, 1970, 1989, 2005, 1971, 2005, 1975, 200…
$ age_at_1st_marriage_women <dbl> 17.84, 17.84, 19.39, 21.52, 21.00, 21.50, 22.47, 23.33, 24.86, 30.23,…
$ income_groups <chr> "low_income", "low_income", "lower_middle_income", "upper_middle_inco…
$ main_religion_2008 <chr> "muslim", "muslim", "christian", "muslim", "muslim", "muslim", "musli…
$ name <chr> "Afghanistan", "Afghanistan", "Angola", "Albania", "Albania", "Albani…
$ world_6region <chr> "south_asia", "south_asia", "sub_saharan_africa", "europe_central_asi…
$ past_20_years <lgl> TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, …
Hints:
mar1_comparison_time %>%
ggplot() +
geom_boxplot(mapping = aes(x = world_6region,
y = age_at_1st_marriage_women,
color = past_20_years) ) +
scale_y_continuous(limits = c(12,35),
breaks = seq(from = 12, to = 35, by = 1),
name = "Women's age at their 1st marriage "
) +
scale_color_discrete(name = "", labels = c("long ago", "past 20 years"))
NA