Skip to content

Feedback #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 45 commits into
base: feedback
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
27a7710
Setting up GitHub Classroom Feedback
github-classroom[bot] Dec 1, 2021
aeac934
setting up workflow
milanschroeder Dec 4, 2021
1c09d58
added some comments
milanschroeder Dec 5, 2021
b4b4128
added comments on dashboard ideas
milanschroeder Dec 5, 2021
a2cbcdd
- created placeholder/non-final index to work on dashboard (i.e. fold…
milanschroeder Dec 6, 2021
cb14216
Basic layout for shinyapp
Dec 8, 2021
7e8cf12
Map completed.
Dec 9, 2021
8c7d189
Better map (plotly)
Dec 9, 2021
77f62c5
Loaded plotly map in shiny app.
Dec 12, 2021
c29da1c
-Updated widgets
Dec 16, 2021
47f609e
finished index construction and fixed bugs
milanschroeder Dec 16, 2021
39b243c
added indicator-count per case
milanschroeder Dec 17, 2021
c26854d
new dashboard layout
Dec 17, 2021
e2d5cc2
- included reactive filtering
milanschroeder Dec 17, 2021
d535a04
show coastlines
Dec 17, 2021
3fb0730
small changes
Dec 18, 2021
3cb2c53
Update README.md
fradanov Dec 18, 2021
41329ef
- fixed NA and empty row issue
milanschroeder Dec 19, 2021
62dc646
- renamed dashboard file for clarity
milanschroeder Dec 19, 2021
e81c914
- included KGI 2.0 as dashboard option
milanschroeder Dec 19, 2021
fab85e9
dashboard is blue
Dec 19, 2021
8fc89e6
Report in html
fradanov Dec 19, 2021
0d887a4
Merge branch 'main' of https://github.com/intro-to-data-science-21/da…
fradanov Dec 19, 2021
66c1ce1
- updated links and sidebar
milanschroeder Dec 19, 2021
2bb5628
Delete 02_build_dashboard.R
milanschroeder Dec 19, 2021
ebce914
Delete 03_plotlymap.R
milanschroeder Dec 19, 2021
79d21ba
created downloadable csv file
milanschroeder Dec 19, 2021
6df753c
Merge branch 'main' of https://github.com/intro-to-data-science-21/da…
milanschroeder Dec 19, 2021
ec0fa04
- download in csv format
milanschroeder Dec 19, 2021
2fdfd5e
.xlsx download
milanschroeder Dec 19, 2021
8887ac0
- included description text
milanschroeder Dec 19, 2021
1c3902d
- changed sidbar width
milanschroeder Dec 19, 2021
93c70da
- added headers in Readme
milanschroeder Dec 19, 2021
263f953
- refined links
milanschroeder Dec 19, 2021
c6887fb
- added description with list of indicators
milanschroeder Dec 20, 2021
8e35fda
- entered reactive description to dashboard
milanschroeder Dec 20, 2021
ac40b59
merged conflicts
milanschroeder Dec 20, 2021
cd61e76
merged conflicts
milanschroeder Dec 20, 2021
d8df8f3
- small changes in report
milanschroeder Dec 20, 2021
9cded84
- added contact adresses
milanschroeder Dec 20, 2021
7606b6b
added data sources
milanschroeder Dec 20, 2021
e620f78
- hosting app on shiny: https://milanschroeder.shinyapps.io/Kessler-G…
milanschroeder Dec 25, 2021
1840a7c
fixed literature link
milanschroeder Mar 20, 2022
d710231
fixed literature link
milanschroeder Mar 20, 2022
c4ac744
Update README.md
milanschroeder Nov 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
data/
*.rds
02_build_dashboard.R
03_map.R
03_plotlymap.R
99_updates.R
126 changes: 126 additions & 0 deletions 00_load_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
library(pacman)
p_load(wbstats, rio, tidyverse, magrittr)


### get data from WDI: ----
from <- 1990
to <- 2020

wdi <- wb_data(indicator = c(pop = "SP.POP.TOTL",
area = "AG.LND.TOTL.K2",
internet = "IT.NET.USER.ZS",
import = "BM.GSR.GNFS.CD",
export = "BX.GSR.GNFS.CD",
fdi_in = "BX.KLT.DINV.CD.WD",
fdi_out = "BM.KLT.DINV.CD.WD",
tourism_in = "ST.INT.ARVL",
tourism_out = "ST.INT.DPRT"),
start_date = from, end_date = to,
return_wide = T) %>%
rowwise() %>%
# if only in OR out is not reported, it's assumed that the other is neglectable (see Schröder 2020)
mutate(fdi = ifelse(is.na(fdi_in) && is.na(fdi_out),
NA,
sum(abs(fdi_in), abs(fdi_out), na.rm = T)),
trade = ifelse(is.na(import) && is.na(export),
NA,
sum(import, export, na.rm = T)),
tourism = ifelse(is.na(tourism_in) && is.na(tourism_out),
NA,
sum(tourism_in, tourism_out, na.rm = T)))

### Indicators not available via WDI: ----
# - International telephone traffic (ITU)
# - International Meetings/Conferences (UIA)
# - International aircraft passengers (ICAO)

### join data from other sources:
icao <- rio::import("data/ICAO.xlsx")

uia <- rio::import("data/UIA.xlsx",
na.strings = c("NA", "..")) %>%
pivot_longer(., cols = `1990`:`2018`,
names_to = "Year",
values_to = "int_meetings",
names_transform = list(Year = as.integer))

phone <- rio::import("data/phone.xlsx",
which = "total total",
na.strings = c("NA", "..")) %>%
pivot_longer(., cols = `1990_value`:`2017_value`,
# string removal not very elegant!
names_to = c("Year", "drop"),
values_to = "int_phone_minutes",
names_sep = 4, names_transform = list(Year = as.integer))


# join local data:
other_sources <- right_join(uia, full_join(phone, icao,
by = c("Year", "Country" = "Name")),
by = c("Year", "Code" = "State")) %>%
select(Code, Year, Int_Departures, int_phone_minutes, int_meetings)


### join wdi and others: ----
data_raw <- full_join(wdi, other_sources,
by = c("date" = "Year",
"iso3c" = "Code"))



### Even more valid indicators (see Schröder 2020): ----
# Note: Some of this data is not publicly available.
# The processed data can be found in folder /data_processed.
# Raw data for replication upon request.

# - Replace air passengers with international revenue passenger kilometres (ICAO)
RPK <- rio::import("data/ICAO_RPK.xlsx",
which = "Int. RPK clean",
na.strings = c("NA", "..")) %>%
pivot_longer(., cols = `1990`:`2017`,
names_to = "date",
values_to = "int_rpk",
names_transform = list(date = as.integer))

data_raw %<>% left_join(., RPK,
by = c("iso3c" = "Code", "date"))

# - Replace number of internet users with internationally transferred bandwidth (ITU)
international_internet <- rio::import("data/ITU.xlsx",
which = "int. IT bandwidth",
na.strings = c("NA", "..")) %>%
pivot_longer(., cols = `1990`:`2017`,
names_to = "date",
values_to = "int_mbits",
names_transform = list(date = as.integer)) %>%
select(-`1988`, -`1989`)

data_raw %<>% left_join(., international_internet,
by = c("country" = "Country", "date"))

# - Extend Trade in goods & services with primary income (WDI)
data_raw <- wbstats::wb_data(indicator = c(
import_g_s_pi = "BM.GSR.TOTL.CD",
export_g_s_pi = "BX.GSR.TOTL.CD"),
start_date = from, end_date = to,
return_wide = T) %>%
select(-iso2c, -country) %>%
rowwise() %>%
# if only in OR out is not reported, the other is assumed to be neglectable (see Schröder 2020)
mutate(trade_g_s_pi = ifelse(is.na(import_g_s_pi) && is.na(export_g_s_pi),
NA,
sum(import_g_s_pi, export_g_s_pi, na.rm = T))) %>%
right_join(., data_raw,
by = c("iso3c", "date"))


# - create communication technology indicator reflecting technological change relevant for globalization:
# - until 2005: phone traffic correlates highly with all other globalization indicators while internet does not
# - from 2006: other way round, hence include:
# - telephone traffic prior to 2006
# - internet traffic from 2006
data_raw %<>% mutate(comtech = ifelse(date < 2006,
int_phone_minutes,
int_mbits))

# however, coverage (esp. in terms of years) are not that good for some indicators, hence we offer both indices.
159 changes: 159 additions & 0 deletions 01_build_index.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
source("00_load_data.R")
p_load(tidyverse,
magrittr,
stats,
magrittr)
options(scipen = 999)

### normalize data:

# by population:
data_pc <- data_raw %>%
select(country,
iso3c,
date,
area,
pop,
import,
int_rpk:comtech,
everything(),
-iso2c) %>%
mutate(across(import:int_meetings,
~ .x / pop)) %>%
filter(!is.na(iso3c) && !is.na(country)) # excluding rows with no information




### panel normalization:

# exclude small states (as defined by Kessler 2016) for normalization
data_pc %<>%
mutate(small = if_else(pop < 1000000 | area < 3000,
T,
F),
# There are some NAs for variable small:
# NAs <- filter(data_pc, is.na(small))
# some years for Kosovo, South Sudan, Eritrea, and Kuwait, none of them being a small state according to the definition
small = ifelse(is.na(small),
F,
small))

# get long df:
data_pc_long <- data_pc %>%
pivot_longer(cols = import:int_meetings,
names_to = "variable",
values_to = "value")


# only relevant variables
normal_range <- data_pc %>%
# for now, we use all years. This is to allow to adapt code later to avoid biases induced by skewed patterns of data availability:
filter(., between(date, 1990, 2020)) %>%
select(import:int_meetings)

distribution_step1 <- as.data.frame(apply(normal_range, 2, summary)) %>%
t() %>%
as.data.frame() %>%
mutate(iqr = `3rd Qu.`- `1st Qu.`) %>%
select(lower_quartile = `1st Qu.`,
upper_quartile = `3rd Qu.`,
iqr) %>%
rownames_to_column(var = "variable") %>%
right_join(., data_pc_long,
by = "variable")

# exclude small states and extreme outliers before defining max/min:
distribution_step2 <- distribution_step1 %>%
mutate(value = ifelse(value > upper_quartile + 3 * iqr |
value < lower_quartile - 3 * iqr |
small == T,
NA,
value)) %>%
select(variable, value) %>%
rownames_to_column(var = "unique_identifier_i_actually_dont_need") %>%
pivot_wider(names_from = "variable",
values_from = "value") %>%
select(-unique_identifier_i_actually_dont_need) # not pretty but works as well

# join data with max/min
distribution <- as.data.frame(apply(distribution_step2, 2, summary)) %>%
t() %>%
as.data.frame() %>%
select(minimum = Min.,
maximum = Max.) %>%
rownames_to_column(var = "variable")

data_pc_long %<>% left_join(., distribution,
by = "variable")

# exclude extreme outlier values outside between(25% quantile - 3 * IQR, 75% quantile + 3 * IQR)(see Schröder 2020):
data_normalized_long <- data_pc_long %>%
mutate(normalized = ((value - minimum) / (maximum - minimum)) * 100) %>%
# set outliers to 0 / 100:
mutate(normalized = case_when(normalized > 100 ~ 100,
normalized < 0 ~ 0,
TRUE ~ normalized))


indicators <- c("internet", "fdi", "trade", "tourism", "Int_Departures", "int_phone_minutes", "int_meetings", "comtech", "int_rpk", "trade_g_s_pi")
data_normalized <- data_pc %>%
select(country:pop, small)

for (i in 1:length(indicators)){
data_normalized %<>%
bind_cols(., data_normalized_long %>%
filter(variable == indicators[i]) %>%
select(normalized))
}

data_normalized %<>% rename("internet" = normalized...7,
"fdi" = normalized...8,
"trade" = normalized...9,
"tourism" = normalized...10,
"Int_Departures" = normalized...11,
"int_phone_minutes" = normalized...12,
"int_meetings" = normalized...13,
"comtech" = normalized...14,
"int_rpk" = normalized...15,
"trade_g_s_pi" = normalized...16)

### Combining to index ----

# all variables are theoretically valid, load strongly on a common factor and are highly intercorrelated (Kessler 2016, Schröder 2020)
# therefore the index can be constructed simply by taking the average of all available normalized variables:

index <- data_normalized %>%
rowwise() %>%
mutate(KGI_original = mean(c(internet,
fdi,
trade,
tourism,
int_rpk,
int_phone_minutes,
int_meetings),
na.rm = T),
KGI_new = mean(c(comtech,
fdi,
trade_g_s_pi,
tourism,
int_rpk,
int_meetings),
na.rm = T)) %>%
bind_cols(.,
apply(data_normalized %>%
select(internet:int_meetings),
1,
function(x) sum(!is.na(x))),
apply(data_normalized %>%
select(fdi, tourism, int_meetings:trade_g_s_pi),
1,
function(x) sum(!is.na(x)))) %>%
rename(n_vars_original = ...19,
n_vars_new = ...20)

### save processed data ----
try(dir.create("data_processed"), silent = T)
save(index, file = "data_processed/KGI.Rdata")
rio::export(index, file = "data_processed/KGI.csv")
rio::export(index, file = "data_processed/KGI.xlsx")
Loading