Summarise across each column by grouping their names - r

I want to calculate the weighted variance using the weights provided in the dataset, while group for the countries and cities, however the function returns NAs:
library(Hmisc) #for the 'wtd.var' function
weather_winter.std<-weather_winter %>%
group_by(country, capital_city) %>%
summarise(across(starts_with("winter"),wtd.var))
The provided output from the console (when in long format):
# A tibble: 35 x 3
# Groups: country [35]
country capital_city winter
<chr> <chr> <dbl>
1 ALBANIA Tirane NA
2 AUSTRIA Vienna NA
3 BELGIUM Brussels NA
4 BULGARIA Sofia NA
5 CROATIA Zagreb NA
6 CYPRUS Nicosia NA
7 CZECHIA Prague NA
8 DENMARK Copenhagen NA
9 ESTONIA Tallinn NA
10 FINLAND Helsinki NA
# … with 25 more rows
This is the code that I used to get the data from a wide format into a long format:
weather_winter <- weather_winter %>% pivot_longer(-c(31:33))
weather_winter$name <- NULL
names(weather_winter)[4] <- "winter"
Some example data:
structure(list(`dec-wet_2011` = c(12.6199998855591, 12.6099996566772,
14.75, 11.6899995803833, 18.2899990081787), `dec-wet_2012` = c(13.6300001144409,
14.2199993133545, 14.2299995422363, 16.1000003814697, 18.0299987792969
), `dec-wet_2013` = c(4.67999982833862, 5.17000007629395, 4.86999988555908,
7.56999969482422, 5.96000003814697), `dec-wet_2014` = c(14.2999992370605,
14.4799995422363, 13.9799995422363, 15.1499996185303, 16.1599998474121
), `dec-wet_2015` = c(0.429999977350235, 0.329999983310699, 1.92999994754791,
3.30999994277954, 7.42999982833862), `dec-wet_2016` = c(1.75,
1.29999995231628, 3.25999999046326, 6.60999965667725, 8.67999935150146
), `dec-wet_2017` = c(13.3400001525879, 13.3499994277954, 15.960000038147,
10.6599998474121, 14.4699993133545), `dec-wet_2018` = c(12.210000038147,
12.4399995803833, 11.1799993515015, 10.75, 18.6299991607666),
`dec-wet_2019` = c(12.7199993133545, 13.3800001144409, 13.9899997711182,
10.5299997329712, 12.3099994659424), `dec-wet_2020` = c(15.539999961853,
16.5200004577637, 11.1799993515015, 14.7299995422363, 13.5499992370605
), `jan-wet_2011` = c(8.01999950408936, 7.83999967575073,
10.2199993133545, 13.8899993896484, 14.5299997329712), `jan-wet_2012` = c(11.5999994277954,
11.1300001144409, 12.5500001907349, 10.1700000762939, 22.6199989318848
), `jan-wet_2013` = c(17.5, 17.4099998474121, 15.5599994659424,
13.3199996948242, 20.9099998474121), `jan-wet_2014` = c(12.5099992752075,
12.2299995422363, 15.210000038147, 9.73999977111816, 9.63000011444092
), `jan-wet_2015` = c(17.6900005340576, 16.9799995422363,
11.75, 9.9399995803833, 19), `jan-wet_2016` = c(15.6099996566772,
15.5, 14.5099992752075, 10.3899993896484, 18.4499988555908
), `jan-wet_2017` = c(9.17000007629395, 9.61999988555908,
9.30999946594238, 15.8499994277954, 11.210000038147), `jan-wet_2018` = c(8.55999946594238,
9.10999965667725, 13.2599992752075, 9.85999965667725, 15.8899993896484
), `jan-wet_2019` = c(17.0699996948242, 16.8699989318848,
14.5699996948242, 19.0100002288818, 19.4699993133545), `jan-wet_2020` = c(6.75999975204468,
6.25999975204468, 6.00999975204468, 5.35999965667725, 8.15999984741211
), `feb-wet_2011` = c(9.1899995803833, 8.63999938964844,
6.21999979019165, 9.82999992370605, 4.67999982833862), `feb-wet_2012` = c(12.2699995040894,
11.6899995803833, 8.27999973297119, 14.9399995803833, 13.0499992370605
), `feb-wet_2013` = c(15.3599996566772, 15.9099998474121,
17.0599994659424, 13.3599996566772, 16.75), `feb-wet_2014` = c(10.1999998092651,
11.1399993896484, 13.8599996566772, 10.7399997711182, 7.35999965667725
), `feb-wet_2015` = c(11.9200000762939, 12.2699995040894,
8.01000022888184, 14.5299997329712, 5.71999979019165), `feb-wet_2016` = c(14.6999998092651,
14.7799997329712, 16.7899990081787, 4.90000009536743, 19.3500003814697
), `feb-wet_2017` = c(8.98999977111816, 9.17999935150146,
11.7699995040894, 6.3899998664856, 13.9899997711182), `feb-wet_2018` = c(16.75,
16.8599987030029, 12.0599994659424, 16.1900005340576, 8.51000022888184
), `feb-wet_2019` = c(7.58999967575073, 7.26999998092651,
8.21000003814697, 7.57999992370605, 8.81999969482422), `feb-wet_2020` = c(10.6399993896484,
10.4399995803833, 13.4399995803833, 8.53999996185303, 19.939998626709
), country = c("SERBIA", "SERBIA", "SLOVENIA", "GREECE",
"CZECHIA"), capital_city = c("Belgrade", "Belgrade", "Ljubljana",
"Athens", "Prague"), weight = c(20.25, 19.75, 14.25, 23.75,
14.25)), row.names = c(76L, 75L, 83L, 16L, 5L), class = "data.frame")

Your code seems to provide the right answer, now there's more data:
# Groups: country [4]
country capital_city winter
<chr> <chr> <dbl>
1 CZECHIA Prague 27.2
2 GREECE Athens 14.6
3 SERBIA Belgrade 19.1
4 SLOVENIA Ljubljana 16.3
Is this what you were looking for?
I took the liberty of streamlining your code:
weather_winter <- weather_winter %>%
pivot_longer(-c(31:33), values_to = "winter") %>%
select(-name)
weather_winter.std <- weather_winter %>%
group_by(country, capital_city) %>%
summarise(winter = wtd.var(winter))
With only one "winter" column, there's no need for the across().
Finally, you are not using the weights. If these are needed, then change the last line to:
summarise(winter = wtd.var(winter, weights = weight))
To give:
# A tibble: 4 x 3
# Groups: country [4]
country capital_city winter
<chr> <chr> <dbl>
1 CZECHIA Prague 26.3
2 GREECE Athens 14.2
3 SERBIA Belgrade 18.8
4 SLOVENIA Ljubljana 15.8

Related

R pivot longer with both annual and monthly data

I'm unsure how to structure my pivot longer command when I have both annual and monthly data. For example I have:
wide <- data.frame(region_name = character(), # Create empty data frame
total_population_2019 = numeric(),
total_population_2020 = numeric(),
mean_temperature_2019_1 = numeric(),
mean_temperature_2019_2 = numeric(),
mean_temperature_2020_1 = numeric(),
mean_temperature_2020_2 = numeric(),
stringsAsFactors = FALSE)
wide[1, ] <- list("funville", 50000, 51250, 26.3, 24.6, 25.7, 24.9)
region_name total_population_2019 total_population_2020 mean_temperature_2019_1 mean_temperature_2019_2 mean_temperature_2020_1 mean_temperature_2020_2
funville 50000 51250 26.3 24.6 25.7 24.9
I'm able to pivot on the monthly columns using spread:
long <- pivot_longer(wide, cols = 4:7, names_to = c("layer" ,"year", "month"),
names_pattern = "(.*)_(.*)_?_(.*)") %>%
group_by(layer) %>%
mutate(n = 1:n()) %>%
spread(layer, value) %>%
select(-n)
which gives
region_name total_population_2019 total_population_2020 year month mean_temperature
1 funville 50000 51250 2019 1 26.3
2 funville 50000 51250 2019 2 24.6
3 funville 50000 51250 2020 1 25.7
4 funville 50000 51250 2020 2 24.9
I'd like to now have a population column where the values are attributed for each row/month that falls in that year, ideally would look like:
desired.df <- data.frame(region_name = c("funville", "funville", "funville", "funville"),
year = c("2019", "2019", "2020", "2020"),
month = c("1", "2", "1", "2"),
population = c("50000", "50000", "51250", "51250"),
mean_temperature = c("26.3", "24.6", "25.7", "24.9"))
which gives
region_name year month population mean_temperature
1 funville 2019 1 50000 26.3
2 funville 2019 2 50000 24.6
3 funville 2020 1 51250 25.7
4 funville 2020 2 51250 24.9
Does anyone have a solution? Thanks in advance
One option would be to use the names_pattern argument and the special .value. To make this work I first add a helper month to your population columns. Additionally I use tidyr::fill to fill up the population column:
library(dplyr)
library(tidyr)
wide |>
rename_with(~ paste(.x, 1, sep = "_"), starts_with("total")) |>
pivot_longer(-region_name,
names_to = c(".value", "year", "month"),
names_pattern = "^(.*?)_(\\d+)_(\\d+)$") |>
group_by(year) |>
fill(total_population) |>
arrange(year)
#> # A tibble: 4 × 5
#> # Groups: year [2]
#> region_name year month total_population mean_temperature
#> <chr> <chr> <chr> <dbl> <dbl>
#> 1 funville 2019 1 50000 26.3
#> 2 funville 2019 2 50000 24.6
#> 3 funville 2020 1 51250 25.7
#> 4 funville 2020 2 51250 24.9

Aggregate dataframe by condition in R

I have the following DataFrame in R:
Y ... Price Year Quantity Country
010190 ... 4781 2021 4 Germany
010190 ... 367 2021 3 Germany
010190 ... 4781 2021 6 France
010190 ... 250 2021 3 France
020190 ... 690 2021 NA USA
020190 ... 10 2021 6 USA
...... ... .... .. ...
217834 ... 56 2021 3 USA
217834 ... 567 2021 9 USA
As you see the numbers in Y column startin with 01.., 02..., 21... I want to aggregate such kind of rows from 6 digit to 2 digit by considering different categorical column (e.g. Country and Year) and sum numerical columns like Quantity and Price. Also I want to take into account rows with NAs during caclulation. So, in the end I want such kind of output:
Y Price Year Quantity Country
01 5148 2021 7 Germany
01 5031 2021 9 USA
02 700 2021 6 USA
.. .... ... .... ...
21 623 2021 12 USA
You can use group_by and summarize from dplyr
library(dplyr)
df %>%
mutate(Y = sprintf(as.numeric(factor(Y, unique(Y))), fmt = '%02d')) %>%
group_by(Y, Year, Country) %>%
summarize(across(where(is.numeric), sum))
#> # A tibble: 4 x 5
#> # Groups: Y, Year [3]
#> Y Year Country Price Quantity
#> <chr> <int> <chr> <int> <int>
#> 1 01 2021 France 5031 9
#> 2 01 2021 Germany 5148 7
#> 3 02 2021 USA 700 NA
update: request:
library(dplyr)
df %>%
mutate(Y = substr(Y, 1, 2)) %>%
group_by(Y, Year, Country) %>%
summarise(across(c(Price, Quantity), ~sum(., na.rm = TRUE)))
We could use substr to get the first two characters from Y and group_by and summarise() with sum()
library(dplyr)
df %>%
mutate(Y = substr(Y, 1, 2)) %>%
group_by(Y, Year, Country) %>%
summarise(Price = sum(Price, na.rm = TRUE),
Quantity = sum(Quantity, na.rm = TRUE)
)
Y Year Country Price Quantity
<chr> <dbl> <chr> <dbl> <dbl>
1 01 2021 France 5031 9
2 01 2021 Germany 5148 7
3 02 2021 USA 700 6
4 21 2021 USA 623 12
Using aggregate and the substring of Y.
aggregate(cbind(Quantity, Price) ~ Y + Year + Country,
transform(dat, Y=substr(Y, 1, 2)), sum)
# Y Year Country Quantity Price
# 1 10 2021 France 9 5031
# 2 10 2021 Germany 7 5148
# 3 20 2021 USA 7 700
# 4 21 2021 USA 12 623
Data:
dat <- structure(list(Y = c(10190L, 10190L, 10190L, 10190L, 20190L,
20190L, 217834L, 217834L), foo = c("...", "...", "...", "...",
"...", "...", "...", "..."), Price = c(4781L, 367L, 4781L, 250L,
690L, 10L, 56L, 567L), Year = c(2021L, 2021L, 2021L, 2021L, 2021L,
2021L, 2021L, 2021L), model = c(NA, NA, NA, NA, NA, NA, "Tesla",
"Tesla"), Quantity = c(4L, 3L, 6L, 3L, 1L, 6L, 3L, 9L), Country = c("Germany",
"Germany", "France", "France", "USA", "USA", "USA", "USA")), class = "data.frame", row.names = c(NA,
-8L))

R: Is there a way to select a column according to the current year?

Say you have a database like gapminder with the population per country. Even though the current year is 2021, you also have predictions for the following years to come.
location 2020.0 2021.0 2022.0
Canada 5 7 9
China 23 34 54
Congo 1 2 3
and another database like this, vaccins
location date amount_of_vaccins
Canada 2020-01-02 50
China 2021-05-03 59
Congo 2022-03-05 34
How can I merge the population of each country into the second database, but following the dates in the second database.
I managed to merge them by country like this:
merge(gapminder,vaccins, by = "location")
but I'm getting this
location date amount_of_vaccins 2020.0 2021.0 2022.0
Canada 2020-01-02 50 5 7 9
China 2021-05-03 59 23 34 54
Congo 2022-03-05 34 1 2 3
I'd like to have only a new variable giving the population of the country according to the year. Thank you.
You could do something like this with tidyverse.
library(tidyverse)
df1 <- df1 %>%
pivot_longer(!location, names_to = "date", values_to = "population") %>%
dplyr::mutate(year = str_sub(date, 1, 4))
df2 %>%
dplyr::mutate(year = str_sub(date, end = 4)) %>%
dplyr::left_join(., df1, by = c("location", "year")) %>%
dplyr::select(-c(date.y, year)) %>%
dplyr::rename(date = date.x)
Output
location date amount_of_vaccins population
1 Canada 2020-01-02 50 5
2 China 2021-05-03 59 34
3 Congo 2022-03-05 54 3
Data
df1 <-
structure(
list(
location = c("Canada", "China", "Congo"),
`2020.0` = c(5, 23, 1),
`2021.0` = c(7, 34, 2),
`2022.0` = c(9, 54, 3)
),
class = "data.frame",
row.names = c(NA,-3L)
)
df2 <-
structure(
list(
location = c("Canada", "China", "Congo"),
date = c("2020-01-02",
"2021-05-03", "2022-03-05"),
amount_of_vaccins = c(50, 59, 54)
),
class = "data.frame",
row.names = c(NA,-3L)
)

Converting from long to wide, using pivot_wide() on two columns in R

I would like to transform my data from long format to wide by the values in two columns. How can I do this using tidyverse?
Updated dput
structure(list(Country = c("Algeria", "Benin", "Ghana", "Algeria",
"Benin", "Ghana", "Algeria", "Benin", "Ghana"
), Indicator = c("Indicator 1",
"Indicator 1",
"Indicator 1",
"Indicator 2",
"Indicator 2",
"Indicator 2",
"Indicator 3",
"Indicator 3",
"Indicator 3"
), Status = c("Actual", "Forecast", "Target", "Actual", "Forecast",
"Target", "Actual", "Forecast", "Target"), Value = c(34, 15, 5,
28, 5, 2, 43, 5,
1)), row.names
= c(NA, -9L), class = c("tbl_df", "tbl", "data.frame"))
Country Indicator Status Value
<chr> <chr> <chr> <dbl>
1 Algeria Indicator 1 Actual 34
2 Benin Indicator 1 Forecast 15
3 Ghana Indicator 1 Target 5
4 Algeria Indicator 2 Actual 28
5 Benin Indicator 2 Forecast 5
6 Ghana Indicator 2 Target 2
7 Algeria Indicator 3 Actual 43
8 Benin Indicator 3 Forecast 5
9 Ghana Indicator 3 Target 1
Expected output
Country Indicator1_Actual Indicator1_Forecast Indicator1_Target Indicator2_Actual
Algeria 34 15 5 28
etc
Appreciate any tips!
foo <- data %>% pivot_wider(names_from = c("Indicator","Status"), values_from = "Value")
works perfectly!
I think the mistake is in your pivot_wider() command
data %>% pivot_wider(names_from = Indicator, values_from = c(Indicator, Status))
I bet you can't use the same column for both names and values.
Try this code
data %>% pivot_wider(names_from = c(Indicator, Status), values_from = Value))
Explanation: Since you want the column names to be Indicator 1_Actual, you need both columns indicator and status going into your names_from
It would be helpful if you provided example data and expected output. But I tested this on my dummy data and it gives the expected output -
Data:
# A tibble: 4 x 4
a1 a2 a3 a4
<int> <int> <chr> <dbl>
1 1 5 s 10
2 2 4 s 20
3 3 3 n 30
4 4 2 n 40
Call : a %>% pivot_wider(names_from = c(a2, a3), values_from = a4)
Output :
# A tibble: 4 x 5
a1 `5_s` `4_s` `3_n` `2_n`
<int> <dbl> <dbl> <dbl> <dbl>
1 1 10 NA NA NA
2 2 NA 20 NA NA
3 3 NA NA 30 NA
4 4 NA NA NA 40
Data here if you want to reproduce
structure(list(a1 = 1:4, a2 = 5:2, a3 = c("s", "s", "n", "n"),
a4 = c(10, 20, 30, 40)), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
Edit : For the edited question after trying out the correct pivot_wider() command - It looks like your data could actually have duplicates, in which case the output you are seeing would make sense - I would suggest you try to figure out if your data actually has duplicates by using filter(Country == .., Indicator == .., Status == ..)
This can be achieved by calling both your columns to pivot wider in the names_from argument in pivot_wider().
data %>%
pivot_wider(names_from = c("Indicator","Status"),
values_from = "Value")
Result
Country `Indicator 1_Ac… `Indicator 1_Fo… `Indicator 1_Ta… `Indicator 2_Ac… `Indicator 2_Fo…
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Algeria 34 15 5 28 5

Can you use multiple conditions in match() function - R

I'm trying to graph excess deaths for 2020 against confirmed covid-19 deaths.
I have 2 dataframes, one x_worldwide_weekly_deaths (covid-19) and the other containing excess deaths, I want to add an excess deaths column to x_worldwide_weekly_deaths and match by both ISO3 country code, and week number;
Not every country tracks excess deaths so I want those not within the original excess df to have an NA value
Likewise, not every country who track excess deaths are as up to date, some have 37 weeks of data, others might only have 24, so I want the NA values for the missing weeks also
Using the below, I've gotten halfway there, countries not on the original list have NA and those who are have a value, however it only uses the first value rather than changing total per week
x_worldwide_weekly_death_values["excess_2020"] <- excess_death_2020$DTotal[match(x_worldwide_weekly_death_values$ISO3,
excess_death_2020$ISO3)]
Example of the data not in the original excess_death_2020 file which have had NA's added successfully
ISO3 administrative_~ population pop_density_km2 week_number weekly_deaths date excess_2020
<chr> <chr> <int> <chr> <dbl> <dbl> <date> <dbl>
1 AFG Afghanistan 37172386 56.937760009803 1 0 2020-01-06 NA
2 AFG Afghanistan 37172386 56.937760009803 2 0 2020-01-13 NA
3 AFG Afghanistan 37172386 56.937760009803 3 0 2020-01-20 NA
dput() for the above:
dput(x_worldwide_weekly_death_values[1:3,])
structure(list(ISO3 = c("AFG", "AFG", "AFG"), administrative_area_level_1 = c("Afghanistan",
"Afghanistan", "Afghanistan"), population = c(37172386L, 37172386L,
37172386L), pop_density_km2 = c("56.937760009803", "56.937760009803",
"56.937760009803"), week_number = c(1, 2, 3), weekly_deaths = c(0,
0, 0), date = structure(c(18267, 18274, 18281), class = "Date"),
excess_2020 = c(NA_real_, NA_real_, NA_real_)), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"))
Compared to Austria, where the week 1 value has been added to all cells
ISO3 administrative_a~ population pop_density_km2 week_number weekly_deaths date excess_2020
<chr> <chr> <int> <chr> <dbl> <dbl> <date> <dbl>
1 AUT Austria 8840521 107.1279668605~ 1 0 2020-01-06 1610
2 AUT Austria 8840521 107.1279668605~ 2 0 2020-01-13 1610
3 AUT Austria 8840521 107.1279668605~ 3 0 2020-01-20 1610
dput() for the above:
dput(x_worldwide_weekly_death_values[371:373,])
structure(list(ISO3 = c("AUT", "AUT", "AUT"), administrative_area_level_1 = c("Austria",
"Austria", "Austria"), population = c(8840521L, 8840521L, 8840521L
), pop_density_km2 = c("107.127966860564", "107.127966860564",
"107.127966860564"), week_number = c(1, 2, 3), weekly_deaths = c(0,
0, 0), date = structure(c(18267, 18274, 18281), class = "Date"),
excess_2020 = c(1610, 1610, 1610)), row.names = c(NA, -3L
), class = c("tbl_df", "tbl", "data.frame"))
Expected output for excess_2020 column would be the DTotal column figures associated to the Week number; Week 1 = 1610, Week 2 = 1702, Week 3 = 1797
ISO3 Year Week Sex D0_14 D15_64 D65_74 D75_84 D85p DTotal R0_14 R15_64 R65_74 R75_84 R85p
<chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AUT 2020 1 b 1 220 221 481 687 1610 4.07e-5 0.00196 0.0134 0.0399 0.157
2 AUT 2020 2 b 8 231 261 490 712 1702 3.26e-4 0.00206 0.0158 0.0407 0.163
3 AUT 2020 3 b 12 223 272 537 753 1797 4.89e-4 0.00198 0.0165 0.0446 0.173
dput() for the above
dput(excess_death_2020[1:3,])
structure(list(ISO3 = c("AUT", "AUT", "AUT"), Year = c(2020,
2020, 2020), Week = c(1, 2, 3), Sex = c("b", "b", "b"), D0_14 = c(1,
8, 12), D15_64 = c(220, 231, 223), D65_74 = c(221, 261, 272),
D75_84 = c(481, 490, 537), D85p = c(687, 712, 753), DTotal = c(1610,
1702, 1797), R0_14 = c(4.07296256273503e-05, 0.000325837005018803,
0.000488755507528204), R15_64 = c(0.00195783568851069, 0.00205572747293622,
0.00198453344789947), R65_74 = c(0.0133964529296798, 0.0158211502925177,
0.0164879420672982), R75_84 = c(0.0399495248686277, 0.0406970211759409,
0.044600613003021), R85p = c(0.157436284517545, 0.163165406952681,
0.172561167746305), RTotal = c(0.00948052042945739, 0.0100222644539978,
0.0105816740445559), Split = c(0, 0, 0), SplitSex = c(0,
0, 0), Forecast = c(1, 1, 1), date = structure(c(18267, 18274,
18281), class = "Date")), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"))
I tried a few variations of the below with little success
x_worldwide_weekly_deaths["excess_2020"] <- excess_death_2020$DTotal[excess_death_2020$Week[match(x_worldwide_weekly_death_values$week_number
[x_worldwide_weekly_death_values$ISO3],
excess_death_2020$Week[excess_death_2020$CountryCode])]]
Should I not be using match() on multiple criteria or am I not formatting it correctly?
Really appreciate any help and suggestions!
dplyr is reaaly good/easy for this kind of thing. Here's a simplified example that achieves both of your goals (adding NA for countries that are not in the excess death data, and adding NA for weeks that are not in the excess death data)...
library(dplyr)
x_worldwide_weekly_death_values <-
tribble(
~iso3c, ~week, ~covid_deaths,
"AFG", 1, 0,
"AFG", 2, 10,
"AFG", 3, 30,
"AFG", 4, 50,
"AUT", 1, 120,
"AUT", 2, 200,
"AUT", 3, 320,
"AUT", 4, 465,
"XXX", 1, 10,
"XXX", 2, 20,
"XXX", 3, 30,
"XXX", 4, 40,
)
excess_death_2020 <-
tribble(
~iso3c, ~week, ~DTotal,
"AFG", 1, 0,
"AFG", 2, 0,
"AFG", 3, 0,
"AUT", 1, 1610,
"AUT", 2, 1702,
"AUT", 3, 1797,
)
x_worldwide_weekly_death_values %>%
left_join(excess_death_2020, by = c("iso3c", "week"))
#> # A tibble: 12 x 4
#> iso3c week covid_deaths DTotal
#> <chr> <dbl> <dbl> <dbl>
#> 1 AFG 1 0 0
#> 2 AFG 2 10 0
#> 3 AFG 3 30 0
#> 4 AFG 4 50 NA
#> 5 AUT 1 120 1610
#> 6 AUT 2 200 1702
#> 7 AUT 3 320 1797
#> 8 AUT 4 465 NA
#> 9 XXX 1 10 NA
#> 10 XXX 2 20 NA
#> 11 XXX 3 30 NA
#> 12 XXX 4 40 NA

Resources