Make a column based a repetitive numbers that follows another column - r

I have this data and I want to make a new column:
structure(list(AGE_GROUP = c("21-30", "31-40", "41-50"), DATE = c("12/17/2020",
"12/17/2020", "12/17/2020"), VACCINE_COUNT = c(36L, 47L, 26L),
PERC_TOTAL_VACC = c(24.82758621, 32.4137931, 17.93103448),
RECIPIENT_COUNT = c(NA_integer_, NA_integer_, NA_integer_
), PERC_TOTAL_RECIP = c(NA_real_, NA_real_, NA_real_), RECIP_FULLY_VACC = c(NA_integer_,
NA_integer_, NA_integer_), PERC_FULLY_VACC = c(NA_real_,
NA_real_, NA_real_)), row.names = c(NA, 3L), class = "data.frame")
based on age group I want to make a column that includes this numbers c(8, 12,13,16,14,12), and repeat this column 3 times. So the outcome is a new column that 3times have the mentioned numbers.
I have used this code vaccine<-vaccine %>% mutate(new_col = rep(list(vals), n())) %>% unnest()
and I have something like this
"12/18/2020", "12/18/2020"), VACCINE_COUNT = c(421L, 421L, 421L
), PERC_TOTAL_VACC = c(15.52932497, 15.52932497, 15.52932497),
RECIPIENT_COUNT = c(NA_integer_, NA_integer_, NA_integer_
), PERC_TOTAL_RECIP = c(NA_real_, NA_real_, NA_real_), RECIP_FULLY_VACC = c(NA_integer_,
NA_integer_, NA_integer_), PERC_FULLY_VACC = c(NA_real_,
NA_real_, NA_real_), X = c(NA, NA, NA), X.1 = c(14L, 14L,
14L), new_col = c(8, 12, 13)), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"))```
While I want to keep my data and just repeat the data

Do you mean to repeat the values c(8, 12,13,16,14,12) for each row in the dataframe? Try :
library(dplyr)
library(tidyr)
vals <- c(8, 12,13,16,14,12)
df %>%
mutate(new_col = rep(list(vals), n())) %>%
unnest(new_col)

Using base R
transform(df1[rep(seq_len(nrow(df1)), each = length(vals)),], new_col = vals)
Or with uncount
library(dplyr)
library(tidyr)
df1 %>%
uncount(length(vals)) %>%
mutate(new_col = rep(vals, length.out = n()))
If we need to just replicate and store the column, wrap in a list
df1 %>%
mutate(new_col = list(vals))
data
vals <- c(8, 12,13,16,14,12)

Related

ggplot with tryCatch: want blank plot if there's an error during expression

Some data:
x %>% dput
structure(list(date = structure(c(18782, 18783, 18784, 18785,
18786, 18787, 18789, 18791, 18792, 18793, 18795, 18797, 18798,
18799, 18801, 18803, 18805, 18806), class = "Date"), `Expired Trials` = c(3L,
1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), `Trial Sign Ups` = c(3L, 1L, 1L, 2L, 3L, 4L, 1L, 1L, 1L,
1L, 2L, 1L, 3L, 2L, 2L, 1L, 1L, 1L), `Total Site Conversions` = c(3,
1, 1, 2, 3, 4, 1, 1, 1, 1, 2, 1, 3, 2, 2, 1, 1, 1), `Site Conversion Rate` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), `Trial to Paid Conversion Rate` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_)), row.names = c(NA, -18L), class = c("tbl_df",
"tbl", "data.frame"))
Context is within a shiny app where sometimes field 'Sessions' will exist and others it won't, depending on the users selections. Rather than display the red warning message, I just want nothing or a blank plot shown instead of an error message:
x %>%
ggplot(aes(date, Sessions)) +
geom_col(na.rm = T) +
geom_line(aes(y = `Site Conversion Rate`), na.rm = T)
Error in FUN(X[[i]], ...) : object 'Sessions' not found
Tried:
tryCatch(expr = {x %>%
ggplot(aes(date, Sessions)) +
geom_col(na.rm = T) +
geom_line(aes(y = `Site Conversion Rate`), na.rm = T)
},
error = function(e) {message(''); print(e)},
finally = {ggplot() + theme_void()})
But, this still spits out the error, wanted/expected a blank plot instead.
How can I do this?
Consider using an if/else expression with all i.e. we plot only if all the column names specified in plot are present or else return a blank plot
nm1 <- c("date", "Sessions", "Site Conversion Rate")
if(!all(nm1 %in% names(x))) {
message("Not all columns are found")
ggplot()
} else {x %>%
ggplot(aes(date, Sessions)) +
geom_col(na.rm = TRUE) +
geom_line(aes(y = `Site Conversion Rate`), na.rm = TRUE)}
Or another option is possibly with specifying otherwise
library(purrr)
f1 <- function(x) {
p1 <- x %>%
ggplot(aes(date, Sessions)) +
geom_col(na.rm = TRUE) +
geom_line(aes(y = `Site Conversion Rate`), na.rm = TRUE)
print(p1)
}
f1p <- possibly(f1, otherwise = ggplot())
-testing
f1p(x)
-output
Or a modification of the OP's tryCatch
tryCatch(expr = {print(x %>%
ggplot(aes(date, Sessions)) +
geom_col(na.rm = T) +
geom_line(aes(y = `Site Conversion Rate`), na.rm = TRUE))
},
error = function(e) {message(''); print(e)},
finally = {
ggplot() +
theme_void()
})
<simpleError in FUN(X[[i]], ...): object 'Sessions' not found>

R create column in dataframe value name of dataframe

I have a list of dataframes (these are spatial dataframes) named for example "map_g1_r1_airport", "map_g1_r1_hotel", "map_g1_r2_bank", "map_g1_r2_market"
These are elements that were digitized from several maps. The maps were originally called "map_g1_r1", "map_g1_r2".
I am trying to add a column to each dataframe with the name of the original map using a loop.
Here is what I am trying to do:
map_g1_r1_airport$mapid<-map_g1_r1
With the loop (Unfortunately this does not do what I intend to do. Instead it simply creates a "content" field in the Values board.):
list_df<-c("map_g1_r1_airport", "map_g1_r1_hotel", "map_g1_r2_bank", "map_g1_r2_market")
for (df in 1:length(list_df)){
paste(list_df[df],"$mapid<-",
print(content<-gsub("(.*)_.*","\\1",
c(paste(list_df[df]))),sep=""),
quote=FALSE)}
Any help is most welcome!
Here is one example of the data before change:
structure(list(id = c(1, 2, 3), Name = structure(c(1L, 3L, 4L
), .Label = c("A", "B", "C", "D", "E"
), class = "factor"), Year = structure(c(NA_integer_, NA_integer_,
NA_integer_), .Label = character(0), class = "factor"), geometry = structure(list(
structure(c(41.4086152370865, 2.44718243982123), class = c("XY",
"POINT", "sfg")), structure(c(45.3852740543083, -4.31103098867136
), class = c("XY", "POINT", "sfg")), structure(c(38.4200314592624,
-6.96113884231683), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 41.4086152370865,
ymin = 2.31103098867136, xmax = 45.4200314592624, ymax = -4.44718243982123
), class = "bbox"), crs = structure(list(epsg = NA_integer_,
proj4string = NA_character_), class = "crs"), n_empty = 0L)), sf_column = "geometry", agr = structure(c(id = NA_integer_,
Name = NA_integer_, Year = NA_integer_), .Label = c("constant",
"aggregate", "identity"), class = "factor"), row.names = c(NA,
3L), class = c("sf", "data.frame"))
This is what I would like to get (with the mapid map_g1_r1):
structure(list(id = c(1, 2, 3), Name = structure(c(1L, 3L, 4L
), .Label = c("A", "B", "C", "D", "E"
), class = "factor"), Year = structure(c(NA_integer_, NA_integer_,
NA_integer_), .Label = character(0), class = "factor"), geometry = structure(list(
structure(c(41.4086152370865, 2.44718243982123), class = c("XY",
"POINT", "sfg")), structure(c(45.3852740543083, -4.31103098867136
), class = c("XY", "POINT", "sfg")), structure(c(38.4200314592624,
-6.96113884231683), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 41.4086152370865,
ymin = 2.31103098867136, xmax = 45.4200314592624, ymax = -4.44718243982123
), class = "bbox"), crs = structure(list(epsg = NA_integer_,
proj4string = NA_character_), class = "crs"), n_empty = 0L),
mapid = c("map_g1_r1", "map_g1_r1", "map_g1_r1")), sf_column = "geometry", agr = structure(c(id = NA_integer_,
Name = NA_integer_, Year = NA_integer_, mapid = NA_integer_), .Label = c("constant",
"aggregate", "identity"), class = "factor"), row.names = c(NA,
3L), class = c("sf", "data.frame"))
You can achieve that even without a loop.
I would first start by creating a list with the names you want to see in each spatial data.frame. I assume they are derived from the names of the list.
mapid = names(list_df)
following that you can employ mapply to use a function that takes
the first element of a list (or vector) and the first element of another list/vector. Them it moves on and apply the same function to the second elements of each vector. It is essentially a multiple input version of lapply.
The function we will give to mapply is cbind which creates takes takes two data.frames and joins them by column. In this case one data.frame will be your spatial object and the other will be a vector with one single element: the current map name. cbind will naturally convert this name to a 1-column data.frame and repeat the name to match the number of rows in the spatial object.
final = mapply(cbind, list_df, mapid)
I haven't tested it, but it should work.
You can get all the individual dataframes in a list using mget and add a new column with their name using mutate.
Using tidyverse functions you can do this as :
library(dplyr)
library(purrr)
list_df<-c("map_g1_r1_airport", "map_g1_r1_hotel", "map_g1_r2_bank", "map_g1_r2_market")
tmp <- mget(list_df)
result <- imap(tmp, ~.x %>% mutate(map_id = .y))
result will have all changed dataframes in a list, if you want these changes to reflect in the original object you can use list2env.
list2env(result, .GlobalEnv)

How to select variables from dataframe inside other recursive dataframe in r

I have the following dataframe:
str(data_raw)
'data.frame': 20 obs. of 18 variables:
$ id : chr "2306173214621953247_176548637" "2304792897512042631_176548637" "2298783867773662543_176548637" "2249480367030200759_176548637" ...
$ user :'data.frame': 20 obs. of 4 variables:
..$ id : chr "176548637" "176548637" "176548637" "176548637" ...
..$ full_name : chr "Carlos Costa" "Carlos Costa" "Carlos Costa" "Carlos Costa" ...
But when I try to get the user$id variable, it shows the error:
data_raw<- data_raw %>%
select(id,user.id)
Error: Can't subset columns that don't exist.
x The column user.id doesn't exist.
I also tried this way:
data_raw<- data_raw %>%
+ select(id,user$id)
Error: object 'user' not found
I know that the user variable is a dataframe, but how do I get information from a dataframe within another dataframe?
I simplified the structure to help. The dataframe has only 1 line.
dput(head(data_raw, 2))
structure(list(id = "2306173214621953247_176548637", user = structure(list(
id = "176548637", full_name = "Carlos Costa", profile_picture = "link.com",
username = "carlosocosta"), row.names = 1L, class = "data.frame"),
images = structure(list(thumbnail = structure(list(width = 150L, height = 150L, url = "link.com"), row.names = 1L, class = "data.frame"),
low_resolution = structure(list(width = 320L, height = 320L,
url = "link.com"), row.names = 1L, class = "data.frame"),
standard_resolution = structure(list(width = 640L, height = 640L,
url = "link.com"), row.names = 1L, class = "data.frame")), row.names = 1L, class = "data.frame"),
created_time = "1589137292", caption = structure(list(id = "18105905566138276",
text = "Não basta ser mãe! Tem que aprender a jogar Fortnite! Feliz dia das mães! #maedemenino",
created_time = "1589137292", from = structure(list(id = "176548637",
full_name = "Carlos Costa", profile_picture = "link.com",
username = "carlosocosta"), row.names = 1L, class = "data.frame")), row.names = 1L, class = "data.frame"),
user_has_liked = FALSE, likes = structure(list(count = 69L), row.names = 1L, class = "data.frame"),
tags = list("maedemenino"), filter = "Normal", comments = structure(list(
count = 3L), row.names = 1L, class = "data.frame"), type = "image",
link = "link.com", location = structure(list(
latitude = NA_real_, longitude = NA_real_, name = NA_character_,
id = NA_integer_), row.names = 1L, class = "data.frame"),
attribution = NA, users_in_photo = list(structure(list(user = structure(list(
username = "vivicosta_oficial"), class = "data.frame", row.names = 1L),
position = structure(list(x = 0.2210144928, y = 0.5857487923), class = "data.frame", row.names = 1L)), class = "data.frame", row.names = 1L)),
carousel_media = list(NULL), videos = structure(list(standard_resolution = structure(list(
width = NA_integer_, height = NA_integer_, url = NA_character_,
id = NA_character_), row.names = 1L, class = "data.frame"),
low_resolution = structure(list(width = NA_integer_,
height = NA_integer_, url = NA_character_, id = NA_character_), row.names = 1L, class = "data.frame"),
low_bandwidth = structure(list(width = NA_integer_, height = NA_integer_,
url = NA_character_, id = NA_character_), row.names = 1L, class = "data.frame")), row.names = 1L, class = "data.frame"),
video_views = NA_integer_), row.names = 1L, class = "data.frame")
Since we have a dataframe inside a dataframe, make it a single dataframe using do.call +cbind and then subset the columns needed.
do.call(cbind, data_raw)[c('id', 'user.id')]
# id user.id
#1 2306173214621953247_176548637 176548637
#2 2304792897512042631_176548637 176548637
Or with dplyr::select
library(dplyr)
do.call(cbind, data_raw) %>% select(id, user.id)
data
Tested on this data :
data_raw <- data.frame(id = c('2306173214621953247_176548637',
'2304792897512042631_176548637'))
user <- data.frame(id = c('176548637', '176548637'), full_name = c('a', 'b'))
data_raw$user <- user
str(data_raw)
#'data.frame': 2 obs. of 2 variables:
# $ id : chr "2306173214621953247_176548637" "2304792897512042631_176548637"
# $ user:'data.frame': 2 obs. of 2 variables:
# ..$ id : chr "176548637" "176548637"
# ..$ full_name: chr "a" "b"

using if within filter_at()

DATA
df <- structure(list(ID = c("51-07519", "51-07522", "51-07525", "51-07526",
"51-07527", "51-07530"), name = c("Fyb", "Fyb", "Fyb", "Fyb",
"Fyb", "Fyb"), serology_charts = c(0L, 0L, NA, 0L, 1L, 1L), antibodies_chart = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), bioarray_charts = c(NA, 0L, NA, 0L, NA, NA), others_charts = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), Fyb = c(1, 1, 1, 1, 1, 1), GATAfactor = c(0, 0, 1, 0, 0.5,
0.5)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
I currently run the following filter:
df%>%
filter_at(vars(ends_with("charts")), any_vars(!is.na(.) & . != Fyb*GATAfactor))
Is it possible to write an if statement as follows:
if Fyb!=1 {filter_at(vars(ends_with("charts")), any_vars(!is.na(.) & . != Fyb))}
else {filter_at(vars(ends_with("charts")), any_vars(!is.na(.) & . != Fyb*GATAfactor))}
We can wrap the condition in a case_when or ifelse
library(dplyr)
df %>%
filter_at(vars(ends_with("charts")),
any_vars(case_when(Fyb == 1 ~ !is.na(.) & . != Fyb*GATAfactor,
TRUE ~ !is.na(.) & . != Fyb)))
Or using ifelse
df %>%
filter_at(vars(ends_with("charts")),
any_vars(ifelse(Fyb == 1, !is.na(.) & . != Fyb*GATAfactor, !is.na(.) & . != Fyb)))

R - Populate one data frame with values from another dataframe, based on row matching

I'm trying to replace values in myDF1 from myDF2, where rows match for column "studyno" but the solutions I have found so far don't seem to be giving me the desired output.
Below are the data.frames:
myDF1 <- structure(list(studyno = c("J1000/9", "J1000/9", "J1000/9", "J1000/9",
"J1000/9", "J1000/9"), date = structure(c(17123, 17127, 17135,
17144, 17148, 17155), class = "Date"), pf_mcl = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), year = c(2016, 2016, 2016, 2016, 2016, 2016)), .Names = c("studyno",
"date", "pf_mcl", "year"), row.names = c(NA, 6L), class = "data.frame")
myDF2 <- structure(list(studyno = c("J740/4", "J1000/9", "J895/7", "J931/6",
"J609/1", "J941/3"), pf_mcl = c(0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("studyno",
"pf_mcl"), row.names = c(NA, 6L), class = "data.frame")
One solution I tried that seemed to work is shown below, however, I find that whatever values were in myDF1 before have been removed.
myDF1$pf_mcl <- myDF2$pf_mcl[match(myDF1$studyno, myDF2$studyno)]
# Merge myDF1 & myDF2 by the "studyno", keeping all the rows in myDF1
agg_df = merge(myDF1, myDF2, "studyno", all.x=TRUE)
# Populate pf_mcl in the merged dataframe by using pf_mcl in myDF2 if it is available. Otherwise, use pf_mcl from myDF1
# is missing in myDF1
agg_df$pf_mcl = ifelse(is.na(agg_df$pf_mcl.y), agg_df$pf_mcl.x, agg_df$pf_mcl.y)
myDF1 = agg_df[, names(myDF1)]

Resources