leaflet:: Mapping wrong name of the countries

leaflet:: Mapping wrong name of the countries - r

I have a hard time to figure out why I can't map the right country on the map. I have gone through all of my codes, I still don't understand why is not working right.
If you see any problems, please let me know. I appreciate.
This is dataset
Country Total Code
1 China 34620 CHN
2 Japan 89 JPN
3 Singapore 40 SGP
4 Thailand 32 THA
5 Hong Kong 26 HKG
6 S. Korea 24 KOR
7 Taiwan 17 TWN
8 Malaysia 16 MYS
9 Australia 15 AUS
10 Germany 14 DEU
11 Vietnam 13 VNM
12 USA 12 USA
13 France 11 FRA
14 Macao 10 MAC
15 U.A.E. 7 ARE
16 Canada 7 CAN
17 Philippines 3 PHL
18 India 3 IND
19 Italy 3 ITA
20 U.K. 3 GBR
21 Russia 2 RUS
22 Finland 1 FIN
23 Sri Lanka 1 LKA
24 Sweden 1 SWE
25 Nepal 1 NPL
26 Cambodia 1 KHM
27 Spain 1 ESP
28 Belgium 1 BEL
library(leaflet)
library(maps)
library(maptools)
case <- read.csv("Cases_02072020_v1.csv",stringsAsFactors = FALSE)
Country = map("world", fill = TRUE, plot = FALSE, regions=iso.expand(case$Code,regex = TRUE))
IDs <- sapply(strsplit(Country$names, ":"), function(x) x[1])
Country <- map2SpatialPolygons(Country,
IDs=IDs,
proj4string=CRS("+proj=longlat +datum=WGS84"))
pal <- colorNumeric(
palette = "Blues",
domain = as.numeric(case$Total))
case$labels <- sprintf(
"<strong>Country:%s</strong><br/>Total:%g",
case$Country, case$Total)%>% lapply(htmltools::HTML)
leaflet(Country) %>% addTiles() %>%
addPolygons(fillOpacity = 0.6, smoothFactor = 0.5, stroke = TRUE, weight = 1,
color = pal(as.numeric(case$Total)),
label = case$labels)

You need to repeat each row of your case data frame so that the countries match each individual polygon on the map. This means ensuring you order them correctly and also you need to incorporate Macao and Hong Kong into China (or change the way you split IDs to handle them there).
Here is a full working version:
library(leaflet)
library(maps)
library(maptools)
case <- structure(list(Country = c("China", "Japan", "Singapore", "Thailand",
"Hong Kong", "S. Korea", "Taiwan", "Malaysia", "Australia", "Germany",
"Vietnam", "USA", "France", "Macao", "U.A.E.", "Canada", "Philippines",
"India", "Italy", "U.K.", "Russia", "Finland", "Sri Lanka", "Sweden",
"Nepal", "Cambodia", "Spain", "Belgium"), Total = c(34620, 89,
40, 32, 26, 24, 17, 16, 15, 14, 13, 12, 11, 10, 7, 7, 3, 3, 3,
3, 2, 1, 1, 1, 1, 1, 1, 1), Code = c("CHN", "JPN", "SGP", "THA",
"HKG", "KOR", "TWN", "MYS", "AUS", "DEU", "VNM", "USA", "FRA",
"MAC", "ARE", "CAN", "PHL", "IND", "ITA", "GBR", "RUS", "FIN",
"LKA", "SWE", "NPL", "KHM", "ESP", "BEL")), row.names = c(NA,
-28L), class = "data.frame")
case <- case[order(case$Country), ]
Country = map("world", fill = TRUE, plot = FALSE, regions = iso.expand(case$Code,regex = F))
IDs <- Country$names
Country <- map2SpatialPolygons(Country,
IDs=IDs,
proj4string=CRS("+proj=longlat +datum=WGS84"))
case[nrow(case) + 1, ] <- case[case$Code == "ESP",]
case$Country[nrow(case)] <- "Canary Islands"
case$Country[case$Country == "S. Korea"] <- "South Korea"
case$Country[case$Country == "U.K."] <- "UK"
case$Country[case$Country == "U.A.E."] <- "United Arab Emirates"
case$Total[case$Country == "China"] <- case$Total[case$Country == "China"] +
case$Total[case$Country == "Hong Kong"] +
case$Total[case$Country == "Macao"]
case <- case[-which(case$Country == "Hong Kong"), ]
case <- case[-which(case$Country == "Macao"), ]
case <- case[order(case$Country), ]
reps <- as.numeric(table(sapply(strsplit(IDs, ":"), function(x) x[1])))
case <- do.call(rbind, mapply(function(x, y){ x[rep(1,y),]},
split(case, case$Country),
reps,
SIMPLIFY = F))
pal <- colorNumeric(
palette = "Blues",
domain = as.numeric(case$Total))
case$labels <- sprintf(
"<strong>Country:%s</strong><br/>Total:%g",
case$Country, case$Total)%>% lapply(htmltools::HTML)
leaflet(Country) %>% addTiles() %>%
addPolygons(fillOpacity = 0.6, smoothFactor = 0.5, stroke = TRUE, weight = 1,
color = pal(as.numeric(case$Total)),
label = case$labels)
Obviously this is just a snapshot, but you can see China is coloured correctly.

Related

Is there an R function to add a common 'word' to all row under a particular column

I have this dataset below
Country Sales
France 12000
Germany 2400
Italy 1000
Belgium 500
Please can you help with a code to add a common 'word' to the Country. I have tried all my best. Here is the intended output I want. thanks
Country Sales
France - Europe 12000
Germany - Europe 2400
Italy - Europe 1000
Belgium - Europe 500
Thanks, as you help me

country_data <-
data.frame(
country = c(
"France",
"Germany",
"Italy",
"Belgium"
),
sales = c(
12000,
2400,
1000,
500
)
)
country_data_2 <-
country_data |>
dplyr::mutate(
continent = dplyr::case_when(
country %in% c("France", "Germany", "Italy", "Belgium") ~ "Europe",
country %in% c("Egypt", "South Africa", "Morroco") ~ "Africa",
country %in% c("Canada", "Mexico", "United States") ~ "North America"
# ...
)
) |>
dplyr::transmute(
country = paste(
country,
continent,
sep = " - "
),
sales = sales
)
country_data_2
#> country sales
#> 1 France - Europe 12000
#> 2 Germany - Europe 2400
#> 3 Italy - Europe 1000
#> 4 Belgium - Europe 500
Created on 2022-11-08 with reprex v2.0.2

Aggregate R (absolute) difference

I have a dataframe like this:
structure(list(from = c("China", "China", "Canada", "Canada",
"USA", "China", "Trinidad and Tobago", "China", "USA", "USA"),
to = c("Japan", "Japan", "USA", "USA", "Japan", "USA", "USA",
"Rep. of Korea", "Canada", "Japan"), weight = c(4766781396,
4039683737, 3419468319, 3216051707, 2535151299, 2513604035,
2303474559, 2096033823, 2091906420, 2066357443)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -10L), groups = structure(list(
from = c("Canada", "China", "China", "China", "Trinidad and Tobago",
"USA", "USA"), to = c("USA", "Japan", "Rep. of Korea", "USA",
"USA", "Canada", "Japan"), .rows = structure(list(3:4, 1:2,
8L, 6L, 7L, 9L, c(5L, 10L)), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -7L), .drop = TRUE))
I would like to perform the absolute value of difference in weight column grouped by from and to.
I'm trying with the function aggregate() but it seems to work for means and sums and not for difference. For example (df is the name of my dataframe):
aggregate(weight~from+to, data = df, FUN=mean)
which produces:
from to weight
1 USA Canada 2091906420
2 China Japan 4403232567
3 USA Japan 2300754371
4 China Rep. of Korea 2096033823
5 Canada USA 3317760013
6 China USA 2513604035
7 Trinidad and Tobago USA 2303474559
EDIT. The desired result is instead
from to weight
1 USA Canada 2091906420
2 China Japan 727097659
3 USA Japan 468793856
4 China Rep. of Korea 2096033823
5 Canada USA 203416612
6 China USA 2513604035
7 Trinidad and Tobago USA 2303474559
As we can see, the countries that appear two times in the columns from and to colllapsed in only one row with the difference between weights in the column weight. E.g.,
from to weight
China Japan 4766781396
China Japan 4039683737
become
from to weight
China Japan 727097659
because
> 4766781396-4039683737
[1] 727097659
The difference should be positive (and this is why I wrote "the absolute value of difference of the weights").
The couples of countries which instead appear just in one row of dataframe df remain the same, as e.g.
from to weight
7 Trinidad and Tobago USA 2303474559

Assuming at most 2 values per group and that the order of the difference is not important
aggregate(weight~from+to, data=df, FUN=function(x){
abs(ifelse(length(x)==1,x,diff(x)))
})
from to weight
1 USA Canada 2091906420
2 China Japan 727097659
3 USA Japan 468793856
4 China Rep. of Korea 2096033823
5 Canada USA 203416612
6 China USA 2513604035
7 Trinidad and Tobago USA 2303474559

Is the following what you are looking for?
f <- function(x) abs(x[2] - x[1])
aggregate(weight ~ from + to, data = df, FUN = f)
#> from to weight
#> 1 USA Canada NA
#> 2 China Japan 727097659
#> 3 USA Japan 468793856
#> 4 China Rep. of Korea NA
#> 5 Canada USA 203416612
#> 6 China USA NA
#> 7 Trinidad and Tobago USA NA

Hot encoding for a set of columns in R

I am trying to do hot encoding for a subset of df columns in R,
One hot encoding is a process by which categorical variables are converted into a form that could be provided to ML algorithms to do a better job in prediction by converting string columns to binary columns for each string in that column.
Supose we are having a df that looks like this:
mes work_location birth_place
01/01/2000 China Chile
01/02/2000 Mexico Japan
01/03/2000 China Chile
01/04/2000 China Argentina
01/05/2000 USA Poland
01/06/2000 Mexico Poland
01/07/2000 USA Finland
01/08/2000 USA Finland
01/09/2000 Japan Norway
01/10/2000 Japan Kenia
01/11/2000 Japan Mali
01/12/2000 India Mali
Here's the code to hot encode :
## function to hot-encode ##
columna_dummy <- function(df, columna) {
df %>%
mutate_at(columna, ~paste(columna, eval(as.symbol(columna)), sep = "_")) %>%
mutate(valor = 1) %>%
spread(key = columna, value = valor, fill = 0)
}
## selecting columns ##
columnas <- c("work_location", "birth_place")
## applying loop to repeat columna_dummy function for each df column ##
for(i in 1:length(columnas)){
new_dataset <- columna_dummy(df, i)
}
Console output:
Error: Problem with `mutate()` input `mes`.
x objeto '1' no encontrado
i Input `mes` is `(structure(function (..., .x = ..1, .y = ..2, . = ..1) ...`.
Run `rlang::last_error()` to see where the error occurred.
Called from: signal_abort(cnd)
Column mes it's a date class column, however it is not included into columns atomic vector
and it still raises the above error,
Expected output should look somewhat like this for each string in selected string df column:
(I could not add every single column, but work_location_China it's an example of
how columns should look)
mes work_location birth_place work_location_China
01/01/2000 China Chile 1
01/02/2000 Mexico Japan 0
01/03/2000 China Chile 1
01/04/2000 China Argentina 1
01/05/2000 USA Poland 0
01/06/2000 Mexico Poland 0
01/07/2000 USA Finland 0
01/08/2000 USA Finland 0
01/09/2000 Japan Norway 0
01/10/2000 Japan Kenia 0
01/11/2000 Japan Mali 0
01/12/2000 India Mali 0
Is there any other way to apply this loop?

As we are passing strings, an option is to select the column (select can take both quoted/unquoted), create a column of 1s ('valor') and a row number column ('rn'), then do the reshaping from 'long' to 'wide' (pivot_wider)
library(dplyr)
library(tidyr)
library(purrr)
library(stringr)
columna_dummy <- function(df, columna) {
df %>%
select(columna) %>%
mutate(valor = 1, rn = row_number()) %>%
pivot_wider(names_from = all_of(columna),
values_from = valor, values_fill = 0) %>%
select(-rn)
}
-testing
For more than one column, an option is to loop over the column names of interest with map, apply the function and bind them with _dfc and bind with the original dataset (bind_cols)
out <- imap_dfc(setNames(c("work_location", "birth_place"),
c("work_location", "birth_place")) , ~ {
nm1 <- as.character(.y)
columna_dummy(df = df, columna = .x) %>%
rename_all(~ str_c(nm1, ., sep="_"))
}) %>%
bind_cols(df, .)
-output
head(out, 2)
# mes work_location birth_place work_location_China work_location_Mexico work_location_USA work_location_Japan
#1 01/01/2000 China Chile 1 0 0 0
#2 01/02/2000 Mexico Japan 0 1 0 0
# work_location_India birth_place_Chile birth_place_Japan birth_place_Argentina birth_place_Poland birth_place_Finland
#1 0 1 0 0 0 0
#2 0 0 1 0 0 0
# birth_place_Norway birth_place_Kenia birth_place_Mali
#1 0 0 0
#2 0 0 0
data
df <- structure(list(mes = c("01/01/2000", "01/02/2000", "01/03/2000",
"01/04/2000", "01/05/2000", "01/06/2000", "01/07/2000", "01/08/2000",
"01/09/2000", "01/10/2000", "01/11/2000", "01/12/2000"), work_location = c("China",
"Mexico", "China", "China", "USA", "Mexico", "USA", "USA", "Japan",
"Japan", "Japan", "India"), birth_place = c("Chile", "Japan",
"Chile", "Argentina", "Poland", "Poland", "Finland", "Finland",
"Norway", "Kenia", "Mali", "Mali")), class = "data.frame",
row.names = c(NA,
-12L))

By using purrr library I solved the issue:
## data ##
df <- structure(list(mes = c("01/01/2000", "01/02/2000", "01/03/2000",
"01/04/2000", "01/05/2000", "01/06/2000", "01/07/2000", "01/08/2000",
"01/09/2000", "01/10/2000", "01/11/2000", "01/12/2000"), work_location = c("China",
"Mexico", "China", "China", "USA", "Mexico", "USA", "USA", "Japan",
"Japan", "Japan", "India"), birth_place = c("Chile", "Japan",
"Chile", "Argentina", "Poland", "Poland", "Finland", "Finland",
"Norway", "Kenia", "Mali", "Mali")), class = "data.frame",
row.names = c(NA,
-12L))
## function to hot-encode ##
columna_dummy <- function(df, columna) {
df %>%
mutate_at(columna, ~paste(columna, eval(as.symbol(columna)), sep = "_")) %>%
mutate(valor = 1) %>%
spread(key = columna, value = valor, fill = 0)
}
## vector of columns ##
columnas <- c("work_location", "birth_place")
## hot_encoded_dataset ##
library(purrr)
hot_encoded_dataset <- purrr :: map(columnas , columna_dummy, df = df) %>%
reduce(inner_join)

mutate a tibble column in dplyr to hold the label text rather than the value

I have a tibble from a SPSS file loaded by haven. Some columns have data and lablels:
tmp <- structure(list(CNT = structure(c("ALB", "ALB", "ARE"), label = "Country code 3-character", labels = c(Moldova = "MDA",
Thailand = "THA", Brazil = "BRA", France = "FRA", `United States` = "USA",
Italy = "ITA", Latvia = "LVA", Albania = "ALB", Serbia = "SRB",
Macao = "MAC", `Moscow City (RUS)` = "QMC", Greece = "GRC", `North Macedonia` = "MKD",
Netherlands = "NLD", Switzerland = "CHE", Montenegro = "MNE",
`United Arab Emirates` = "ARE", Sweden = "SWE", `Czech Republic` = "CZE",
`Hong Kong` = "HKG", Argentina = "ARG", `Bosnia and Herzegovina` = "BIH",
`B-S-J-Z (China)` = "QCI", `Costa Rica` = "CRI", Denmark = "DNK",
`Slovak Republic` = "SVK", Belgium = "BEL", Chile = "CHL", Philippines = "PHL",
Colombia = "COL", Poland = "POL", Ireland = "IRL", Iceland = "ISL",
`New Zealand` = "NZL", Vietnam = "VNM", `Dominican Republic` = "DOM",
Canada = "CAN", Panama = "PAN", Lebanon = "LBN", Indonesia = "IDN",
Finland = "FIN", Japan = "JPN", `Brunei Darussalam` = "BRN",
Hungary = "HUN", Slovenia = "SVN", Georgia = "GEO", `Chinese Taipei` = "TAP",
Singapore = "SGP", Spain = "ESP", Morocco = "MAR", `United Kingdom` = "GBR",
Peru = "PER", Bulgaria = "BGR", Ukraine = "UKR", Belarus = "BLR",
`Moscow Region (RUS)` = "QMR", Jordan = "JOR", Korea = "KOR",
Norway = "NOR", Israel = "ISR", Turkey = "TUR", Australia = "AUS",
`Russian Federation` = "RUS", Malaysia = "MYS", Qatar = "QAT",
Malta = "MLT", Portugal = "PRT", `Tatarstan (RUS)` = "QRT", Estonia = "EST",
Austria = "AUT", `Saudi Arabia` = "SAU", Germany = "DEU", Romania = "ROU",
Lithuania = "LTU", Croatia = "HRV", Kosovo = "KSV", Mexico = "MEX",
Luxembourg = "LUX", Cyprus = "QCY", Uruguay = "URY", Kazakhstan = "KAZ",
`Baku (Azerbaijan)` = "QAZ"), class = "haven_labelled"), SC156Q05HA = structure(c(1,
2, 1), label = "At school: A specific programme to prepare students for responsible Internet behaviour", labels = c(Yes = 1,
No = 2, `Valid Skip` = 5, `Not Applicable` = 7, Invalid = 8,
`No Response` = 9), class = "haven_labelled"), percentage = c(71.1,
28.9, 81.5), total_schools = c(325L, 325L, 692L)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -3L), groups = structure(list(
CNT = structure(c("ALB", "ALB", "ARE"), label = "Country code 3-character", labels = c(Moldova = "MDA",
Thailand = "THA", Brazil = "BRA", France = "FRA", `United States` = "USA",
Italy = "ITA", Latvia = "LVA", Albania = "ALB", Serbia = "SRB",
Macao = "MAC", `Moscow City (RUS)` = "QMC", Greece = "GRC",
`North Macedonia` = "MKD", Netherlands = "NLD", Switzerland = "CHE",
Montenegro = "MNE", `United Arab Emirates` = "ARE", Sweden = "SWE",
`Czech Republic` = "CZE", `Hong Kong` = "HKG", Argentina = "ARG",
`Bosnia and Herzegovina` = "BIH", `B-S-J-Z (China)` = "QCI",
`Costa Rica` = "CRI", Denmark = "DNK", `Slovak Republic` = "SVK",
Belgium = "BEL", Chile = "CHL", Philippines = "PHL", Colombia = "COL",
Poland = "POL", Ireland = "IRL", Iceland = "ISL", `New Zealand` = "NZL",
Vietnam = "VNM", `Dominican Republic` = "DOM", Canada = "CAN",
Panama = "PAN", Lebanon = "LBN", Indonesia = "IDN", Finland = "FIN",
Japan = "JPN", `Brunei Darussalam` = "BRN", Hungary = "HUN",
Slovenia = "SVN", Georgia = "GEO", `Chinese Taipei` = "TAP",
Singapore = "SGP", Spain = "ESP", Morocco = "MAR", `United Kingdom` = "GBR",
Peru = "PER", Bulgaria = "BGR", Ukraine = "UKR", Belarus = "BLR",
`Moscow Region (RUS)` = "QMR", Jordan = "JOR", Korea = "KOR",
Norway = "NOR", Israel = "ISR", Turkey = "TUR", Australia = "AUS",
`Russian Federation` = "RUS", Malaysia = "MYS", Qatar = "QAT",
Malta = "MLT", Portugal = "PRT", `Tatarstan (RUS)` = "QRT",
Estonia = "EST", Austria = "AUT", `Saudi Arabia` = "SAU",
Germany = "DEU", Romania = "ROU", Lithuania = "LTU", Croatia = "HRV",
Kosovo = "KSV", Mexico = "MEX", Luxembourg = "LUX", Cyprus = "QCY",
Uruguay = "URY", Kazakhstan = "KAZ", `Baku (Azerbaijan)` = "QAZ"
), class = "haven_labelled"), SC156Q05HA = structure(c(1,
2, 1), label = "At school: A specific programme to prepare students for responsible Internet behaviour", labels = c(Yes = 1,
No = 2, `Valid Skip` = 5, `Not Applicable` = 7, Invalid = 8,
`No Response` = 9), class = "haven_labelled"), .rows = list(
1L, 2L, 3L)), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
I want to use the lbl values in the tibble, e.g. Italy instead of ITA, so try to mutate them:
tmp %>% ungroup() %>% mutate(SC156Q05HA = attr(SC156Q05HA, "lbl"))
But all seems to do is remove the SC156Q05HA field:
# A tibble: 3 x 4
# Groups: CNT, SC156Q05HA [3]
CNT percentage total_schools
<chr+lbl> <dbl> <int>
1 ALB [Albania] 71.1 325
2 ALB [Albania] 28.9 325
3 ARE [United Arab Emirates] 81.5 692

One option would be to convert to factor with as_factor from haven
library(dplyr)
library(haven)
tmp %>%
ungroup %>%
mutate(SC156Q05HA = as_factor(SC156Q05HA))
# A tibble: 3 x 4
# CNT SC156Q05HA percentage total_schools
# <chr+lbl> <fct> <dbl> <int>
#1 ALB [Albania] Yes 71.1 325
#2 ALB [Albania] No 28.9 325
#3 ARE [United Arab Emirates] Yes 81.5 692
Or to convert all the labelled columns to factor, can use mutate_if
tmp %>%
ungroup %>%
mutate_if(is.labelled, as_factor)
# A tibble: 3 x 4
# CNT SC156Q05HA percentage total_schools
# <fct> <fct> <dbl> <int>
#1 Albania Yes 71.1 325
#2 Albania No 28.9 325
#3 United Arab Emirates Yes 81.5 692
Inspired from #M--'s comments with mutate_all

In base we can use factor while stacking the attributes:
tmp %>%
ungroup %>%
mutate(SC156Q05HA = factor(x = SC156Q05HA,
levels = stack(attr(SC156Q05HA, 'labels'))$value,
labels = stack(attr(SC156Q05HA, 'labels'))$ind))
#> # A tibble: 3 x 4
#> CNT SC156Q05HA percentage total_schools
#> <chr+lbl> <fct> <dbl> <int>
#> 1 ALB [Albania] Yes 71.1 325
#> 2 ALB [Albania] No 28.9 325
#> 3 ARE [United Arab Emirates] Yes 81.5 692

Conditionally Fill NA Values in R with String

I have a data frame in R that has x and y values. There are NA values in x I would like to fill with specific strings dependent upon the y values.
Example data frame:
df1 = data.frame(x = c("Canada", "United States", NA, NA, NA),
y = c("CAN", "USA", "TWN", "TWN", "ARG"))
Expected result:
df2 = data.frame(x = c("Canada", "United States", "Taiwan", "Taiwan", NA),
y = c("CAN", "USA", "TWN", "TWN", "ARG"))
I have tried
df2 <- df1 %>% transform(df1, x = ifelse(y == "TWN", "Taiwan", x))
but this removes all values of x and y for all other observations not containing "TWN". Most of the other examples I've found on here are replacing all NA values and I can't seem to figure out how to do it conditionally upon strings in another column. This will need to be done with very large data frames so efficiency is ideal.

I think a better approach here would be to maintain a data frame which has all the keys and replacements which you intend to use should a full country name be missing:
df1 <- data.frame(x=c("Canada", "United States", NA, NA, NA),
y=c("CAN", "USA", "TWN", "TWN", "ARG"), stringsAsFactors=FALSE)
df2 <- data.frame(y=c("TWN", "ARG"), value=c("Taiwan", "Argentina"),
stringsAsFactors=FALSE)
result <- merge(df1, df2, by="y", all=TRUE)
result$x <- ifelse(is.na(result$x), result$value, result$x)
result <- result[, c("y", "x")]
result
y x
1 ARG Argentina
2 CAN Canada
3 TWN Taiwan
4 TWN Taiwan
5 USA United States

You first need to augment the levels of the factor column before you use "[" with logical indexing for the joint condion of 'x' being NA and 'y' being TWN.
> levels(df1$x) <- c(levels(df1$x), 'Taiwan')
> df1[is.na(df1$x) & df1$y=='TWN', 'x'] <- 'Taiwan'
> df1
x y
1 Canada CAN
2 United States USA
3 Taiwan TWN
4 Taiwan TWN
5 <NA> ARG

The simplest approach would be to use data.table library in R
library(data.table)
setDT(df1)
df1[y=="TWN",x:="TAIWAN"]
That should work like a pro.

I like to use a lookUpTable (named character) for those problems.
lookUpTable = c("Canada", "United States", "Taiwan", "Argentina")
names(lookUpTable) = c("CAN", "USA", "TWN", "ARG")
# CAN USA TWN ARG
#"Canada" "United States" "Taiwan" "Argentina"
df1$x <- lookUpTable[df1$y]
# x y
#1 United States CAN
#2 Argentina USA
#3 Taiwan TWN
#4 Taiwan TWN
#5 Canada ARG
data:
df1 = data.frame(x = c("Canada", "United States", NA, NA, NA),
y = c("CAN", "USA", "TWN", "TWN", "ARG"))

Categories

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

leaflet:: Mapping wrong name of the countries - r

Related

Is there an R function to add a common 'word' to all row under a particular column

Aggregate R (absolute) difference

Hot encoding for a set of columns in R

mutate a tibble column in dplyr to hold the label text rather than the value

Conditionally Fill NA Values in R with String

Categories

Resources