Hie,
I have two data frames that are like this for example
df1
V1 V2
a b
m n
h i
l m
n i
e f
and
df2
V1 V2
a b
c d
e f
b a
and I want to get rows that are the same in both data frames in a new one
like this
res2
V1 V2
a b
e f
b a
I tried
res1<-df1[df1$v1%in%df2$V1, ]
res2<-res1[res1$V2%in%df2$V2, ]
but I was unsuccessful. Any better idea?
You need to merge your two data frames based on V1 amd V2 with an inner join:
df1 <- data.frame(V1 = c("a", "m", "h", "l", "n", "e"), V2 = c("b", "n", "i", "m", "i", "f"), stringsAsFactors = F)
df2 <- data.frame(V1 = c("a", "c", "e"), V2 = c("b", "d", "f"), stringsAsFactors = F)
merge(df1, df2, by = c("V1", "V2"))
The result will be the unique couple of V1 and V2 which are both on df1 and df2.
Depending on if you want to keep duplicates values in df1 or df2, you could use as well the options all.x = T or all.y = T.
Related
I have my original data.frame looking like this:
df1
Group Variable Text
AB a Sentence1
AB b Sentence2
AB c Sentence3
XY d Sentence4
XY e Sentence5
XY f Sentence6
ZW g Sentence7
ZW h Sentence8
ZW i Sentence9
Now I need to re-arrange it like this:
df2
AB XY ZW Text1 Text2 Text3
a d g Sentence1 Sentence4 Sentence7
b e h Sentence2 Sentence5 Sentence8
c f i Sentence3 Sentence6 Sentence9
P.S: The reason my output data.frame looks like this, is that I would later concatenate Text1-Text3 columns per row. But I do it outside of R
Great thanks for any help!
Code for df1 & df2:
df1 <- data.frame(Group = c("AB", "AB", "AB", "XY", "XY", "XY", "ZW", "ZW", "ZW"),
Variable = c("a", "b", "c", "d", "e", "f", "g", "h", "i"),
Text = c("Sentence1", "Sentence2", "Sentence3", "Sentence4", "Sentence5", "Sentence6", "Sentence7", "Sentence8", "Sentence9"))
df2 <- data.frame(AB = c("a", "b", "c"),
XY = c("d", "e", "f"),
ZW = c("g", "h", "i"),
Text1 = c("Sentence1", "Sentence2", "Sentence3"),
Text2 = c("Sentence4", "Sentence5", "Sentence6"),
Text3 = c("Sentence7", "Sentence8", "Sentence9"))
We can do this with dcast from data.table by specifying 'Variable' and 'Text' in the value.var
library(data.table)
dcast(setDT(df1), rowid(Group) ~ Group, value.var = c('Variable', 'Text'))[, Group := NULL][]
I have two data frames containing the names of genetic elements. I want another data frame with the elements in common in both data frames.
Example:
data.a data.b
Column Column
1 a c
2 b e
3 c l
4 d a
I want this result:
data.c
Column
1 a
2 c
This is just an example. The data frame data.b has more elements than data.a.
The %in% operator lets you find which elements are in both.
data.c = data.frame(Column = data.a$Column[data.a$Column %in% data.b$Column])
data.c
Column
1 a
2 c
a <- data.frame(a = c("a","b","c","d"))
a
b <- data.frame(b = c("c","d","e","f"))
b
c <- data.frame(c = a[a$a %in% b$b,])
c
The merge function allows you control the type of join you want.
df1 <- data.frame(a = c("a", "b", "c", "d"))
df2 <- data.frame(a = c("c", "e", "l", "a"))
merge(x=df1, y=df2, by.x="a", by.y="a", all = FALSE)
library(dplyr)
data.a <- data_frame(a = c("a", "b", "c", "d"))
data.b <- data_frame(a = c("c", "e", "l", "a"))
data.c <- data.a %>% inner_join(data.b)
How can i get rows of a data frame that has a same value in a element of that comparing with another data frame ?
I have written this but it didn't work.
# example of two data frame
df1 <- data.frame(V1 = c("a", "g", "h", "l", "n", "e"), V2 = c("b", "n", "i", "m", "i", "f"), stringsAsFactors = F)
df2 <- data.frame(V1 = c("a", "c", "f","h"), V2 = c("b", "d", "e","z"), stringsAsFactors = F)
# finding joint values in each element of two data frames
res1<-intersect(df1$V1,df2$V1)
res2<-intersect(df1$V2,df2$V2)
res3<-intersect(df1$V1,df2$V2)
res4<-intersect(df1$V1,df2$V2)
# Getting rows that has joint value at least in one element of df1
ress1<-df1[apply(df1, MARGIN = 1, function(x) all(x== res1)), ]
ress2<-df1[apply(df1, MARGIN = 1, function(x) all(x== res2)), ]
ress3<-df1[apply(df1, MARGIN = 1, function(x) all(x== res3)), ]
ress4<-df1[apply(df1, MARGIN = 1, function(x) all(x== res4)), ]
# Getting rows that has joint value at least in one element of df2
resss1<-df2[apply(df2, MARGIN = 1, function(x) all(x== res1)), ]
resss2<-df2[apply(df2, MARGIN = 1, function(x) all(x== res2)), ]
resss3<-df2[apply(df2, MARGIN = 1, function(x) all(x== res3)), ]
resss4<-df2[apply(df2, MARGIN = 1, function(x) all(x== res4)), ]
# then combine above results
final.res<-rbind(ress1,ress2,ress3,ress4,resss1,resss2,resss3,resss4)
My favorite result is:
a b
h z
h i
f e
e f
This should work
#Import data
df1 <- data.frame(V1 = c("a", "g", "h", "l", "n", "e"), V2 = c("b", "n", "i", "m", "i", "f"), stringsAsFactors = F)
df2 <- data.frame(V1 = c("a", "c", "f","h"), V2 = c("b", "d", "e","z"), stringsAsFactors = F)
# Get the intersects
vals <- intersect(c(df1$V1, df1$V2), c(df2$V1, df2$V2))
#Get the subsets and rbind them
full <- rbind(
subset(df1, df1$V1 %in% vals),
subset(df1, df1$V2 %in% vals),
subset(df2, df2$V1 %in% vals),
subset(df2, df2$V2 %in% vals)
)
#Remove duplicates
full <- full[!duplicated(full),]
I have the following data frame in r
ID COL.1 COL.2 COL.3 COL.4
1 a b
2 v b b
3 x a n h
4 t
I am new to R and I don't understand how to call the data fram in order to have this at the end, another problem is that i have more than 100 columns
stream <- c("1,a,b","2,v,b,b","3,x,a,n,h","4,t")
another problem is that I have more than 100 columns .
Try this
Reduce(function(...)paste(...,sep=","), df)
Where df is your data.frame
This might be what you're looking for, even though it's not elegant.
my_df <- data.frame(ID = seq(1, 4, by = 1),
COL.1 = c("a", "v", "x", "t"),
COL.2 = c("b", "b", "a", NULL),
COL.3 = c(NULL, "b", "n", NULL),
COL.4 = c(NULL, NULL, "h", NULL))
stream <- substring(paste(my_df$ID,
my_df$COL.1,
my_df$COL.2,
my_df$COL.3,
my_df$COL.4,
sep =","), 3)
stream <- gsub(",NA", "", stream)
stream <- gsub("NA,", "", stream)
How can I convert these loops to lapply function or another fast function to speed up?
Example:
df1 <- data.frame(
V1 = c("a", "g", "h", "l", "n", "e"),
V2 = c("b", "n", "i", "m", "i", "f"),
stringsAsFactors = FALSE)
df2 <- data.frame(
V1 = c("a", "c", "b"),
V2 = c("b", "d", "a"),
stringsAsFactors = FALSE)
for (i in 1:nrow(df1)) {
for (j in 1:nrow(df2)) {
if (df1[i,]$V1==df2[j,]$V1 & df1[i,]$V2==df2[j,]$V2 |
df1[i,]$V1==df2[j,]$V2 & df1[i,]$V2==df2[j,]$V1) {
res1 <- df1[i,]
res2 <- df2[j,]
res <- rbind(res1, res2)
}
}
}
If you only have two columns, you could also use pmin and pmax. and then combine it with merge in order to find common rows
lookup <- setNames(data.frame(do.call(pmin, df2),
do.call(pmax, df2),
1:nrow(df2)),
c(names(df2), "indx"))
df2[merge(lookup, df1)$indx, ]
# V1 V2
# 1 a b
# 3 b a
Or using data.table for more efficiency
library(data.table)
lookup <- setnames(data.table(do.call(pmin, df2),
do.call(pmax, df2)),
names(df2))
indx <- lookup[df1, on = names(df2), which = TRUE, nomatch = 0L]
df2[indx, ]
# V1 V2
# 1 a b
# 3 b a
We can try
df2[do.call(paste0,
as.data.frame(t(apply(df2, 1, sort)))) %in%
do.call(paste0, df1),]
# V1 V2
#1 a b
#3 b a