I am having an issue assigning variable objects to a list of data frames. For example,
df1 <- data.frame(a = 1, b = 1:10)
df2 <- data.frame(a = 2, b = 1:10)
df3 <- data.frame(a = 3, b = 1:10)
x <- c("a", "b", "c")
y <- list("df1", "df2", "df3")
My goal is to assign each data frame in the y list to the object in x. I can do it long hand.
a <- y[[1]]
But I have many iterations. I have tried the following without any luck
map2(x, y, function(x, y) x <- y)
and
map2(x, y, ~assign(x, y))
Appreciate any help!
We can unlist the list of object names (unlist(y)), get the values with mget in a list, set the names of the list elements with 'x' vector values and use list2env to create objects in the global env (not recommended though)
list2env(setNames(mget(unlist(y)), x), .GlobalEnv)
If we use map2, then we need to get the value of 'y' and also specify the environment to assign the value
map2(x, y, ~ assign(.x, get(.y), envir = .GlobalEnv))
-output
a
# a b
#1 1 1
#2 1 2
#3 1 3
#4 1 4
#5 1 5
#6 1 6
#7 1 7
#8 1 8
#9 1 9
#10 1 10
b
# a b
#1 2 1
#2 2 2
#3 2 3
#4 2 4
#5 2 5
#6 2 6
#7 2 7
#8 2 8
#9 2 9
#10 2 10
Unfortunately, the above options didn't work in my case. I ended up adapting the suggestions made by akrun which worked.
names(y) <- x
list2env(y, envir=.GlobalEnv)
I need the following:
1) Keep the columns if: i) The last n observations (n = 3) aren't NA's, ii) there is no NA's at all, iii) Backwards from the last NA's, there are not more than 3 adjacent NA observations
2) Drop the columns if: i) There are 3 or more adjacent NA observations
I'd like if the answer is using dplyr
Some example:
data = data.frame(
A = c(3,3,3,3,4, rep(NA,5)),
B = c(rnorm(10)),
C = c(rep(NA,3), rnorm(7)),
D = c(rnorm(8), NA, NA)
)
I've tried:
data %>%
select_if(~sum(!is.na(.)) >= 3)
select_if(~sum(is.na(.)) > 0)
In my example, I'd only keep B, C and D.
We can use tail to get last n entries and drop the columns where all of them are NA.
n <- 3
library(dplyr)
data %>% select_if(~!all(is.na(tail(., n))))
# B C D
#1 0.5697 NA 0.29145
#2 -0.1351 NA -0.44329
#3 2.4016 NA 0.00111
#4 -0.0392 0.153 0.07434
#5 0.6897 2.173 -0.58952
#6 0.0280 0.476 -0.56867
#7 -0.7433 -0.710 -0.13518
#8 0.1888 0.611 1.17809
#9 -1.8050 -0.934 NA
#10 1.4656 -1.254 NA
Or with inverted logic
data %>% select_if(~any(!is.na(tail(., n))))
For the second condition,
Drop the columns if: i) There are 3 or more adjacent NA observations
we can use rle to get adjacent values
data %>% select_if(~!any(with(rle(is.na(.)), lengths[values]) >= n))
# B D
#1 0.5697 0.29145
#2 -0.1351 -0.44329
#3 2.4016 0.00111
#4 -0.0392 0.07434
#5 0.6897 -0.58952
#6 0.0280 -0.56867
#7 -0.7433 -0.13518
#8 0.1888 1.17809
#9 -1.8050 NA
#10 1.4656 NA
Since we already have the functions, we can use the same in base R as well with sapply
#Condition 1
data[!sapply(data, function(x) all(is.na(tail(x, n))))]
#Condition 2
data[!sapply(data, function(x) any(with(rle(is.na(x)), lengths[values]) >= n))]
I have the following data frame:
df <- data.frame(
x = rep(letters[1:3], 2)
)
And the following vector:
vec <- c(1.5, 3.2)
This vector belongs to each b in df. How do I mutate vec if it matches b and return NA values if not?
Expected outcome:
1 a NA
2 b 1.5
3 c NA
4 a NA
5 b 3.2
6 c NA
Simplest way would be to get indexes of "b" and replace them with vec.
df$output[df$x == "b"] <- vec
df
# x output
#1 a NA
#2 b 1.5
#3 c NA
#4 a NA
#5 b 3.2
#6 c NA
Another option is with replace
df$output <- replace(df$output, df$x == "b", vec)
Forcefully, fitting this into tidyverse
library(dplyr)
df$output <- NA
df %>%
mutate(output = replace(output, x == "b", vec))
I have a data frame with two columns "A" and "B". I created a function that works as mentioned below:
If X (user entered value) is found in column A, then return the X value found in column A and it's corresponding value in B column.
Here's my code:
myfunction <- function(x) {
r<- with(my_dataframe, my_dataframe[A %in% x, c("A", "B")])
return(data.frame(r))
}
I want to tweak this in such a way that if user input (value for X) doesn't appear in column A, return that value and NA for column B.
Example:
A B
1 A12
2 F1222
If the values for X are 1, 5. I want the output to look like this --
1 A12
5 NA
One approach could be to first find matched rows using condition as matched = my_dataframe$A==x.
Now, there are any matched rows found use matched value to return corresponding rows. Otherwise create a row with NA value for B.
myfunction <- function(x) {
r <- data.frame()
matched = my_dataframe$A %in% x
if(sum(matched) > 0){
r<- with(my_dataframe, my_dataframe[matched, c("A", "B")])
} else{
r<-data.frame(A = x, B = NA)
}
return(r)
}
#Test
myfunction(2)
# A B
# 2 2 A34
myfunction(11)
# A B
# 1 11 NA
Edited: Based on latest feedback from OP, I think dplyr::left_join will do the trick for him as:
a <- 1
dplyr::left_join(data.frame(A=a), my_dataframe, by="A")
# A B
# 1 1 A21
a <- c(2,3,12,34,45)
dplyr::left_join(data.frame(A=a), my_dataframe, by="A")
# A B
# 1 2 A34
# 2 3 D345
# 3 12 <NA>
# 4 34 <NA>
# 5 45 <NA>
Data
my_dataframe <- data.frame(A = 1:4,
B=c("A21", "A34", "D345", "E45"), stringsAsFactors = FALSE)
myfunction <- function(x) {
r<- with(my_dataframe, my_dataframe[A %in% x, c("A", "B")])
if(!nrow(r)) data.frame(A=x,B=NA) else data.frame(r)
}
> myfunction(3)
A B
1 3 NA
> myfunction(2)
A B
2 2 F1222
edit to allow vectors:
my=function(x){
s=subset(data,A==x)
m=x%in%s$A
if(all(m)) s else rbind(s,cbind(A=x[!m],B=NA))
}
my(1)
A B
1 1 A12
> my(1:10)
A B
1 1 A12
2 2 F1222
3 3 <NA>
4 4 <NA>
5 5 <NA>
6 6 <NA>
7 7 <NA>
8 8 <NA>
9 9 <NA>
10 10 <NA>
> my(4)
A B
1 4 NA
my(c(1,3.11))
A B
1 1.00 A12
2 3.11 <NA>
I have a dataframe, say
df = data.frame(x = c("a","a","b","b","b","c","d","t","c","b","t","c","t","a","a","b","d","t","t","c"),
y = c(2,4,5,2,6,2,4,5,2,6,2,4,5,2,6,2,4,5,2,6))
I want to remove only those rows in which one or multiple ts are directly in between a d and a c, in all other cases I want to retain the cases. So for this example, I would like to remove the ts on row 8, 18 and 19, but keep the others. I have over thousands of cases so doing this manually would be a true horror. Any help is very much appreciated.
One option would be to use rle to get runs of the same string and then you can use an sapply to check forward/backward and return all the positions you want to drop:
rle_vals <- rle(as.character(df$x))
drop <- unlist(sapply(2:length(rle_vals$values), #loop over values
function(i, vals, lengths) {
if(vals[i] == "t" & vals[i-1] == "d" & vals[i+1] == "c"){#Check if value is "t", previous is "d" and next is "c"
(sum(lengths[1:i-1]) + 1):sum(lengths[1:i]) #Get row #s
}
},vals = rle_vals$values, lengths = rle_vals$lengths))
drop
#[1] 8 18 19
df[-drop,]
# x y
#1 a 2
#2 a 4
#3 b 5
#4 b 2
#5 b 6
#6 c 2
#7 d 4
#9 c 2
#10 b 6
#11 t 2
#12 c 4
#13 t 5
#14 a 2
#15 a 6
#16 b 2
#17 d 4
#20 c 6
This also works, by collapsing to a string, identifying groups of t's between d and c (or c and d - not sure whether you wanted this option as well), then working out where they are and removing the rows as appropriate.
df = data.frame(x=c("a","a","b","b","b","c","d","t","c","b","t","c","t","a","a","b","d","t","t","c"),
y=c(2,4,5,2,6,2,4,5,2,6,2,4,5,2,6,2,4,5,2,6),stringsAsFactors = FALSE)
dfs <- paste0(df$x,collapse="") #collapse to a string
dfs2 <- do.call(rbind,lapply(list(gregexpr("dt+c",dfs),gregexpr("ct+d",dfs)),
function(L) data.frame(x=L[[1]],y=attr(L[[1]],"match.length"))))
dfs2 <- dfs2[dfs2$x>0,] #remove any -1 values (if string not found)
drop <- unlist(mapply(function(a,b) (a+1):(a+b-2),dfs2$x,dfs2$y))
df2 <- df[-drop,]
Here is another solution with base R:
df = data.frame(x = c("a","a","b","b","b","c","d","t","c","b","t","c","t","a","a","b","d","t","t","c"),
y = c(2,4,5,2,6,2,4,5,2,6,2,4,5,2,6,2,4,5,2,6))
#
s <- paste0(df$x, collapse="")
L <- c(NA, NA)
while (TRUE) {
r <- regexec("dt+c", s)[[1]]
if (r[1]==-1) break
L <- rbind(L, c(pos=r[1]+1, length=attr(r, "match.length")-2))
s <- sub("d(t+)c", "x\\1x", s)
}
L <- L[-1,]
drop <- unlist(apply(L,1, function(x) seq(from=x[1], len=x[2])))
df[-drop, ]
# > drop
# 8 18 19
# > df[-drop, ]
# x y
# 1 a 2
# 2 a 4
# 3 b 5
# 4 b 2
# 5 b 6
# 6 c 2
# 7 d 4
# 9 c 2
# 10 b 6
# 11 t 2
# 12 c 4
# 13 t 5
# 14 a 2
# 15 a 6
# 16 b 2
# 17 d 4
# 20 c 6
With gregexpr() it is shorter:
s <- paste0(df$x, collapse="")
g <- gregexpr("dt+c", s)[[1]]
L <- data.frame(pos=g+1, length=attr(g, "match.length")-2)
drop <- unlist(apply(L,1, function(x) seq(from=x[1], len=x[2])))
df[-drop, ]