I'm trying to add a named row between two data.frames d1 and d2. My Desired_output is shown below.
I have tried a solution but it failed to get me to my desired output. Is there a solution to this?
d1 <- data.frame(b = 1:2, SE = 2:3)
d2 <- data.frame(b = 0:1, SE = 1:2)
a <- "obs"
# Solution failed:
dplyr::bind_rows(d1, !!a := rep(NA, ncol(d1)), d2)
Desired_output =
" b SE
1 1 2
2 2 3
obs NA NA
4 0 1
5 1 2"
In base R, we may also do
rbind(d1, `row.names<-`(d1[NA,][1,], a),
`row.names<-`(d2, nrow(d1) + seq_len(nrow(d2))))
-output
b SE
1 1 2
2 2 3
obs NA NA
3 0 1
4 1 2
Maybe this will help -
d1 <- data.frame(b = 1:2, SE = 2:3)
d2 <- data.frame(b = 0:1, SE = 1:2)
a <- "obs"
d3 <- d1[1, ]
d3[] <- NA
rownames(d3) <- a
rbind(d1, d3, d2)
# b SE
#1 1 2
#2 2 3
#obs NA NA
#11 0 1
#21 1 2
Here is a an alternative solution, in case you want it in a pipe!
We wrap the whole procedure arround add_row from tibble package.
With bind_rows we bind both tables together and add a row before index2.
Then we have to change between rownames_to_column and vice versa.
library(tibble)
library(dplyr)
add_row(bind_rows(d1,d2),
b = NA,
SE = NA,
.before = 3)%>%
data.frame() %>%
rownames_to_column("x") %>%
mutate(x = ifelse(x == "3", "obs", x)) %>%
column_to_rownames("x")
)
b SE
1 1 2
2 2 3
obs NA NA
4 0 1
5 1 2
Related
I have a dataframe in r, but instead of NA, there is question mark. So using na.omit doesn't work. How can i remove rows having ? in it.
Thanks
dat <- data.frame(v1 = c(1, 5, "?"), v2 = c(3, 3, 9))
dat[dat$v1 != "?",]
v1 v2
1 1 3
2 5 3
> a <- c(1:5)
> b <- c(2:5,"?")
> c <- c("a","b","?","d","e")
>
> df <- data.frame(a,b,c)
> df
a b c
1 1 2 a
2 2 3 b
3 3 4 ?
4 4 5 d
5 5 ? e
> df[df == "?"] <- NA
> df
a b c
1 1 2 a
2 2 3 b
3 3 4 <NA>
4 4 5 d
5 5 <NA> e
then you can use na.omit()
Try it with rowSums
df[rowSums(df=="?")>0,]
Another option is converting the question mark to NA using na_if:
library(dplyr)
df %>%
na_if("?") %>%
na.omit()
Output:
v1 v2
2 d c
Data
df <- data.frame(v1 = c("?", "d"),
v2 = c("e", "c"))
I have 2 data frames, and I want to write a code that will allow me to check if a row from data frame1 exist in data frame2, and if so then I want to replace the row(s) from data frame1 with the row(s) from data frame2. Here is an example:
dataframe1:
name
A
B
AA
1
1
BB
1
0
CC
0
1
dataframe2:
name
A
B
AA
1
2
DD
1
3
EE
4
1
I want to switch rows between both dataframes, and the outcome will be:
dataframe1:
name
A
B
AA
1
2
BB
1
0
CC
0
1
To clarify, I want to row AA from dataframe1 to be switched by the row AA dataframe2.
This is what I tried to do:
df1[which(df1$name %in% df2$name)[1:nrow(df2)],] <- df2
And:
df1$name[match(df2$name,df1$name)] <- df2$name
Both didn't work unfortunately.
Thanks for helping!
Does this work:
df1
name A B
1 AA 1 1
2 BB 1 0
3 CC 0 1
df2
name A B
1 AA 1 2
2 DD 1 3
3 EE 4 1
df2$name %in% df1$name
[1] TRUE FALSE FALSE
df1[df1$name %in% df2$name, ] = df2[df2$name %in% df1$name, ]
df1
name A B
1 AA 1 2
2 BB 1 0
3 CC 0 1
The natural_join is the function you are looking for
library(rqdatatable)
dataframe1 <- data.frame(
name = c('AA', 'BB', 'CC'),
A = c(1,1,0),
B = c(1,0,1)
)
dataframe2 <- data.frame(
name = c('AA', 'DD', 'EE'),
A = c(1,1,4),
B = c(2,3,1)
)
natural_join(dataframe2, dataframe1, by = "name",
jointype = 'RIGHT')
You can make an update join:
i <- match(df1$name, df2$name)
j <- which(!is.na(i))
df1[j,] <- df2[i[j],]
df1
# name A B
#1 AA 1 2
#2 BB 1 0
#3 CC 0 1
Data:
df1 <- data.frame(name = c("AA","BB","CC"), A = c(1,1,0), B = c(1,0,1))
df2 <- data.frame(name = c("AA","DD","EE"), A = c(1,1,4), B = c(2,3,1))
A dplyr way using across, left_join and coalesce
library(dplyr, warn.conflicts = F)
df1 <- data.frame(name = c("AA","BB","CC"), A = c(1,1,0), B = c(1,0,1))
df2 <- data.frame(name = c("AA","DD","EE"), A = c(1,1,4), B = c(2,3,1))
df1 %>% left_join(df2, by = 'name') %>%
mutate(across(ends_with('.y'), ~coalesce(., get(gsub('\\.y', '\\.x', cur_column()))),
.names = "{gsub('.y', '', .col)}"), .keep = 'unused')
#> name A B
#> 1 AA 1 2
#> 2 BB 1 0
#> 3 CC 0 1
Created on 2021-07-06 by the reprex package (v2.0.0)
Let's say I have this data frame:
df <- data.frame(
a = c(NA,6,6,8),
x= c(1,2,2,4),
y = c(NA,2,NA,NA),
z = c("apple", 2, "2", NA),
d = c(NA, 5, 5, 5),stringsAsFactors = FALSE)
Rows 2 and 3 are duplicates and row 3 has an NA value. I want to delete the duplicate row with the NA value so that it looks like this:
df <- data.frame(
a = c(NA,6,8),
x= c(1,2,4),
y = c(NA,2,NA),
z = c("apple", 2, NA),
d = c(NA, 5, 5),stringsAsFactors = FALSE)
I tried this but it doesn't work:
df2 <- df %>% group_by (a,x,z,d) %>% filter(y == max(y))
Any suggestions?
df %>%
arrange_all() %>%
filter(!duplicated(fill(., everything())))
a x y z d
1 NA 1 NA apple NA
2 6 2 2 2 5
3 8 4 NA <NA> 5
df %>% arrange(a,x,z,d) %>% distinct(a,x,z,d,.keep_all=TRUE)
a x y z d
1 6 2 2 2 5
2 8 4 NA <NA> 5
3 NA 1 NA apple NA
Fill NA values with previous non-NA and select unique rows with distinct.
library(dplyr)
library(tidyr)
df %>% fill(everything()) %>% distinct()
# a x y z d
#1 NA 1 NA apple NA
#2 6 2 2 2 5
#3 8 4 NA <NA> 5
I would like to match two columns based on another. I'm trying to use the match function but gets NA values.
a <- data.frame( x = c(1,2,3,4,5))
b <- data.frame( y = c(3,4),
z = c("A","B"))
a$x <- b$z[match(a$x, b$y)]
I get:
> a
x
1 <NA>
2 <NA>
3 A
4 B
5 <NA>
I would like :
> a
x
1 1
2 2
3 A
4 B
5 5
First, rename the numeric column of b so that you can merge the two data frames:
b <- b %>% rename(x = y)
Then, merge them, turn variables into character and replace the values of column x with those of z if not NA.
a <- merge(a, b, by = "x", all.x = TRUE) %>%
mutate_all(as.character) %>%
mutate(x = ifelse(is.na(z), x, z))
Result:
x z
1 1 <NA>
2 2 <NA>
3 A A
4 B B
5 5 <NA>
Without renaming I would propose this which ends with the same result that broti
tmp.merge<- merge(a,b,by.x = "x", by.y="y", all = TRUE)
for (elm in as.numeric(row.names(tmp.merge[which(!is.na(tmp.merge$z)),]))){
tmp.merge[elm,'x'] <- as.character(tmp.merge[elm,'z'])
}
tmp.merge
result :
> tmp.merge
x z
1 1 <NA>
2 2 <NA>
3 A A
4 B B
5 5 <NA>
The following works but you need to set stringsAsFactors = F, when defining dataframe b
a <- data.frame( x = c(1,2,3,4,10,13,12,11))
b <- data.frame( y = c(10,12,13),
z = c("A","B","C"),stringsAsFactors = F)
#
a %>% mutate(x = ifelse(x %in% b$y,b$z[match(x,b$y)],x))
Output
x
1 1
2 2
3 3
4 4
5 A
6 C
7 B
8 11
This is my reproducible code:
df <- data.frame(x = c(1, 2), y = c(3, 4))
df1 <- df %>% mutate(z = 1)
df2 <- df %>% mutate(z = 2)
df3 <- df %>% mutate(z = 3)
df <- rbind(df1, df2, df3)
df
I repeat the original data frame df 3 times, whilst adding one column where the number in the column indicated the repetition. In my use case, I have to do this more than 3 times. I could use a loop but is there a neater way? I guess i cannot use expand.grid.
You can also do it with a merge:
dfz <- data.frame(z = 1:3)
merge(df, dfz)
# x y z
# 1 1 3 1
# 2 2 4 1
# 3 1 3 2
# 4 2 4 2
# 5 1 3 3
# 6 2 4 3
We can create a list column and unnest
library(tidyverse)
df %>%
mutate(z = list(1:3)) %>%
unnest %>%
arrange(z)
# x y z
#1 1 3 1
#2 2 4 1
#3 1 3 2
#4 2 4 2
#5 1 3 3
#6 2 4 3
We can also do a cross join with sqldf. This creates a Cartesian Product of df and the reps tables:
library(sqldf)
reps <- data.frame(z = 1:3)
sqldf("select * from df, reps order by z")
or simply with map_dfr from purrr:
library(purrr)
map_dfr(1:3, ~cbind(df, z = .))
Output:
x y z
1 1 3 1
2 2 4 1
3 1 3 2
4 2 4 2
5 1 3 3
6 2 4 3
Yet another option using base R
n <- 3
do.call(rbind,
Map(`[<-`, replicate(n = n,
expr = df,
simplify = FALSE),
"z",
value = seq_len(n)))
# x y z
#1 1 3 1
#2 2 4 1
#3 1 3 2
#4 2 4 2
#5 1 3 3
#6 2 4 3
A few other ways not covered yet:
# setup
df = data.frame(x = c(1, 2), y = c(3, 4))
n = 3
# simple row indexing, add column manually
result = df[rep(1:nrow(df), 3), ]
result$id = rep(1:n, each = nrow(df))
# cross join in base
merge(df, data.frame(id = 1:n), by = NULL)
# cross join in tidyr
tidyr::crossing(df, data.frame(id = 1:n))
# dplyr version of the row-index method above
slice(df, rep(1:n(), n)) %>% mutate(id = rep(1:n, each = nrow(df)))
Inspiration drawn heavily from an old question of mine, How can I repeat a data frame?. Basically the same question but without the id column requirement.