Dplyr function to match column value to row values and replace it

Dplyr function to match column value to row values and replace it - r

I have 2 data frames. Data Frame A is and Data Frame B is
I want to take the values of column active ident in Data Frame B and input them as a row on the top of Data Frame A in a way where they match up according to column name from Data Frame B
I have tried using dplyr but I cant seem to figure out how to do this in R. Would appreciate any help
Attaching dput(head) for both my files
Cell Labels
structure(list(`hnscc.vp.fibroblasts#active.ident` = structure(c(3L,
2L, 3L, 3L, 3L, 3L), .Label = c("Cluster_0", "Cluster_4", "Cluster_3",
"Cluster_2", "Cluster_1"), class = "factor")), row.names = c("pat01.pre_AAACCTGAGGAGCGAG",
"pat01.pre_AAACCTGCACTACAGT", "pat01.pre_AAACCTGTCACCGTAA", "pat01.pre_AAATGCCCACTATCTT",
"pat01.pre_AACCATGAGCATCATC", "pat01.pre_AACCGCGCAGATGGCA"), class = "data.frame")
Gene Count Per Cell :
dput(head(Gene_Counts_per_Cell[, c(1:5)]))
structure(list(pat01.pre_AAACCTGAGGAGCGAG = c(1.99399322071276,
1.5433201979508, 2.4725719042226, -2.59159111384049, 1.56977481481343,
0.192853860719877), pat01.pre_AAACCTGCACTACAGT = c(2.90248911455912,
2.27707326162242, 2.12992680712843, -1.44512552229319, 0.541062218328074,
1.8626908687607), pat01.pre_AAACCTGTCACCGTAA = c(3.99090573935858,
3.00560247848693, 2.9656947677965, -3.23693215603618, 4.72557633990864,
0.0247894431208639), pat01.pre_AAATGCCCACTATCTT = c(1.08405270702075,
-0.884466121620786, 0.500175551980942, -2.28142505510742, 3.97105313918843,
-1.01130712883293), pat01.pre_AACCATGAGCATCATC = c(4.55944063063621,
2.43937477176712, 3.93016796802459, -1.92695887361317, 3.16070890309665,
1.65917938530014)), row.names = c("ACTB", "ACTG1", "ACTN1", "ADAP2",
"ADM", "ADRB2"), class = "data.frame")

This maybe what you are looking for. Note have had to convert the clusters into numeric to ensure type consistency for columns. Use rownames to distinguish between clusters and other numeric data in the columns.
library(dplyr)
library(tidyr)
library(tibble)
library(stringr)
dfb %>%
rownames_to_column("rowname") %>%
pivot_wider(names_from = rowname, values_from = `hnscc.vp.fibroblasts#active.ident`) %>%
mutate(across(everything(), ~as.numeric(str_extract(.x, "\\d$")))) %>%
mutate(cluster = "cluster") %>%
column_to_rownames(var = "cluster") %>%
bind_rows(dfa)
#> pat01.pre_AAACCTGAGGAGCGAG pat01.pre_AAACCTGCACTACAGT
#> cluster 3.0000000 4.0000000
#> ACTB 1.9939932 2.9024891
#> ACTG1 1.5433202 2.2770733
#> ACTN1 2.4725719 2.1299268
#> ADAP2 -2.5915911 -1.4451255
#> ADM 1.5697748 0.5410622
#> ADRB2 0.1928539 1.8626909
#> pat01.pre_AAACCTGTCACCGTAA pat01.pre_AAATGCCCACTATCTT
#> cluster 3.00000000 3.0000000
#> ACTB 3.99090574 1.0840527
#> ACTG1 3.00560248 -0.8844661
#> ACTN1 2.96569477 0.5001756
#> ADAP2 -3.23693216 -2.2814251
#> ADM 4.72557634 3.9710531
#> ADRB2 0.02478944 -1.0113071
#> pat01.pre_AACCATGAGCATCATC pat01.pre_AACCGCGCAGATGGCA
#> cluster 3.000000 3
#> ACTB 4.559441 NA
#> ACTG1 2.439375 NA
#> ACTN1 3.930168 NA
#> ADAP2 -1.926959 NA
#> ADM 3.160709 NA
#> ADRB2 1.659179 NA
data
dfb <- structure(list(`hnscc.vp.fibroblasts#active.ident` = structure(c(3L,
2L, 3L, 3L, 3L, 3L), .Label = c("Cluster_0", "Cluster_4", "Cluster_3",
"Cluster_2", "Cluster_1"), class = "factor")), row.names = c("pat01.pre_AAACCTGAGGAGCGAG",
"pat01.pre_AAACCTGCACTACAGT", "pat01.pre_AAACCTGTCACCGTAA", "pat01.pre_AAATGCCCACTATCTT",
"pat01.pre_AACCATGAGCATCATC", "pat01.pre_AACCGCGCAGATGGCA"), class = "data.frame")
dfa <- structure(list(pat01.pre_AAACCTGAGGAGCGAG = c(1.99399322071276,
1.5433201979508, 2.4725719042226, -2.59159111384049, 1.56977481481343,
0.192853860719877), pat01.pre_AAACCTGCACTACAGT = c(2.90248911455912,
2.27707326162242, 2.12992680712843, -1.44512552229319, 0.541062218328074,
1.8626908687607), pat01.pre_AAACCTGTCACCGTAA = c(3.99090573935858,
3.00560247848693, 2.9656947677965, -3.23693215603618, 4.72557633990864,
0.0247894431208639), pat01.pre_AAATGCCCACTATCTT = c(1.08405270702075,
-0.884466121620786, 0.500175551980942, -2.28142505510742, 3.97105313918843,
-1.01130712883293), pat01.pre_AACCATGAGCATCATC = c(4.55944063063621,
2.43937477176712, 3.93016796802459, -1.92695887361317, 3.16070890309665,
1.65917938530014)), row.names = c("ACTB", "ACTG1", "ACTN1", "ADAP2",
"ADM", "ADRB2"), class = "data.frame")
Created on 2022-03-21 by the reprex package (v2.0.1)

Related

Aggregate columns based on categories given by another dataframe

I have a dataframe where each column has some vector of data. I want to apply the mean columnwise, but filtered by groups which are given by a second dataframe. That is, each column belongs to a group and this information is in the second dataframe.
Here is some example dataset: df is the dataframe with the data vectors, df_category contains the category for each column.
df=structure(list(x1 = c(0.461302090047301, -1.19974381763812, -0.888258056235799,
0.300889698419314, 0.836911163114131, 0.0540388337324712), x2 = c(1.33736696170763,
-0.687026295689823, 1.12205295626651, -0.848925266014684, 1.16092168555067,
0.591202293337843), x3 = c(-0.279052669225263, -0.780435476613128,
-0.852870619718068, -0.708611614262357, -0.761659405740852, 0.487033696695474
), x4 = c(-0.222767493777229, 1.50328295132467, 0.934670132217215,
1.37678188537077, 0.343280062984192, 1.23279081824003), x5 = c(-1.08074586121729,
0.208120194894818, -0.52245832008453, 0.944618465137011, 0.749834485631317,
-0.81118414509141)), class = "data.frame", row.names = c(NA,
-6L))
df_category=structure(list(Col_name = structure(1:5, .Label = c("x1", "x2",
"x3", "x4", "x5"), class = "factor"), Category = structure(c(1L,
1L, 2L, 2L, 2L), .Label = c("A", "B"), class = "factor")), class = "data.frame", row.names = c(NA,
-5L))
The result I want is this one:
df_result=structure(list(mean_A = c(0.899334525877468, -0.943385056663974,
0.116897450015357, -0.274017783797685, 0.998916424332403, 0.322620563535157
), mean_B = c(-0.527522008073261, 0.310322556535454, -0.146886269195128,
0.537596245415141, 0.110485047624885, 0.302880123281364)), class = "data.frame", row.names = c(NA,
-6L))

in Base R:
a <- with(df_category, setNames(Category, Col_name))[names(df)[col(df)]]
tapply(unlist(df), list(row(df), a), mean)
A B
1 0.8993345 -0.5275220
2 -0.9433851 0.3103226
3 0.1168975 -0.1468863
4 -0.2740178 0.5375962
5 0.9989164 0.1104850
6 0.3226206 0.3028801
Another option:
sapply(with(df_category, split.default(df[Col_name], Category)), rowMeans)
A B
[1,] 0.8993345 -0.5275220
[2,] -0.9433851 0.3103226
[3,] 0.1168975 -0.1468863
[4,] -0.2740178 0.5375962
[5,] 0.9989164 0.1104850
[6,] 0.3226206 0.3028801

We can use tidyverse to reshape the data values, merge the category data, and compute means for groups "A" and "B":
library(tidyverse)
df_result <- df %>%
mutate(idx = row_number()) %>%
pivot_longer(-idx) %>%
inner_join(df_category, c(name = 'Col_name')) %>%
group_by(Category, idx) %>%
summarize(mean = mean(value)) %>%
pivot_wider(names_from = Category, values_from = mean, names_prefix = 'mean_') %>%
select(-idx)
mean_A mean_B
<dbl> <dbl>
1 0.899 -0.528
2 -0.943 0.310
3 0.117 -0.147
4 -0.274 0.538
5 0.999 0.110
6 0.323 0.303

r transfer values from one dataset to another by ID

I have two datasets , the first dataset is like this
ID Weight State
1 12.34 NA
2 11.23 IA
2 13.12 IN
3 12.67 MA
4 10.89 NA
5 14.12 NA
The second dataset is a lookup table for state values by ID
ID State
1 WY
2 IA
3 MA
4 OR
4 CA
5 FL
As you can see there are two different state values for ID 4, which is normal.
What I want to do is replace the NAs in dataset1 State column with State values from dataset 2. Expected dataset
ID Weight State
1 12.34 WY
2 11.23 IA
2 13.12 IN
3 12.67 MA
4 10.89 OR,CA
5 14.12 FL
Since ID 4 has two state values in dataset2 , these two values are collapsed and separated by , and used to replace the NA in dataset1. Any suggestion on accomplishing this is much appreciated. Thanks in advance.

Collapse df2 value and join it with df1 by 'ID'. Use coalesce to use non-NA value from the two state columns.
library(dplyr)
df1 %>%
left_join(df2 %>%
group_by(ID) %>%
summarise(State = toString(State)), by = 'ID') %>%
mutate(State = coalesce(State.x, State.y)) %>%
select(-State.x, -State.y)
# ID Weight State
#1 1 12.3 WY
#2 2 11.2 IA
#3 2 13.1 IN
#4 3 12.7 MA
#5 4 10.9 OR, CA
#6 5 14.1 FL
In base R with merge and transform.
merge(df1, aggregate(State~ID, df2, toString), by = 'ID') |>
transform(State = ifelse(is.na(State.x), State.y, State.x))

Tidyverse way:
library(tidyverse)
df1 %>%
left_join(df2 %>%
group_by(ID) %>%
summarise(State = toString(State)) %>%
ungroup(), by = 'ID') %>%
transmute(ID, Weight, State = coalesce(State.x, State.y))
Base R alternative:
na_idx <- which(is.na(df1$State))
df1$State[na_idx] <- with(
aggregate(State ~ ID, df2, toString),
State[match(df1$ID, ID)]
)[na_idx]
Data:
df1 <- structure(list(ID = c(1L, 2L, 2L, 3L, 4L, 5L), Weight = c(12.34,
11.23, 13.12, 12.67, 10.89, 14.12), State = c("WY", "IA", "IN",
"MA", "OR, CA", "FL")), row.names = c(NA, -6L), class = "data.frame")
df2 <- structure(list(ID = c(1L, 2L, 3L, 4L, 4L, 5L), State = c("WY",
"IA", "MA", "OR", "CA", "FL")), class = "data.frame", row.names = c(NA,
-6L))

In R, make a conditional indicator variable based on (a) the first instance of a record type and (b) a date difference

Background
Here's a df with some data in it from a Costco-like members-only big-box store:
d <- data.frame(ID = c("a","a","b","c","c","d"),
purchase_type = c("grocery","grocery",NA,"auto","grocery",NA),
date_joined = as.Date(c("2014-01-01","2014-01-01","2013-04-30","2009-03-08","2009-03-08","2015-03-04")),
date_purchase = as.Date(c("2014-04-30","2016-07-08","2013-06-29","2015-04-07","2017-09-10","2017-03-10")),
stringsAsFactors=T)
d <- d %>%
mutate(date_diff = d$date_purchase - d$date_joined)
This yields the following table:
As you can see, it's got a member ID, purchase types based on the broad category of what people bought, and two dates: the date the member originally became a member, and the date of a given purchase. I've also made a variable date_diff to tally the time between a given purchase and the beginning of membership.
The Problem
I'd like to make a new variable early_shopper that's marked 1 on all of a member's purchases if
That member's first purchase was made within a year of joining (so date_diff <= 365 days).
This first purchase doesn't have an NA in purchase_type.
If these criteria aren't met, give a 0.
What I'm looking for is a table that looks like this:
Note that Member a is the only "true" early_shopper: their first purchase is non-NA in purchase_type, and only 119 days passed between their joining the store and making a purchase there. Member b looks like they could be based on my date_diff criterion, but since they don't have a non-NA value in purchase_type, they don't count as an early_shopper.
What I've Tried
So far, I've tried using mutate and first functions like this:
d <- d %>%
mutate(early_shopper = if_else(!is.na(first(purchase_type,order_by = date_joined)) & date_diff < 365, 1, 0))
Which gives me this:
Something's kinda working here, but not fully. As you can see, I get the correct early_shopper = 1 in Member a's first purchase, but not their second. I also get a false positive with member b, who's marked as an early_shopper when I don't want them to be (because their purchase_type is NA).
Any ideas? I can further clarify if need be. Thanks!

You could use
library(dplyr)
d %>%
mutate(date_diff = date_purchase - date_joined) %>%
group_by(ID, purchase_type) %>%
arrange(ID, date_joined) %>%
mutate(
early_shopper = +(!is.na(first(purchase_type)) & date_diff <= 365)
) %>%
group_by(ID) %>%
mutate(early_shopper = max(early_shopper)) %>%
ungroup()
which returns
# A tibble: 6 x 6
ID purchase_type date_joined date_purchase date_diff early_shopper
<fct> <fct> <date> <date> <drtn> <int>
1 a grocery 2014-01-01 2014-04-30 119 days 1
2 a grocery 2014-01-01 2016-07-08 919 days 1
3 b NA 2013-04-30 2013-06-29 60 days 0
4 c auto 2009-03-08 2015-04-07 2221 days 0
5 c grocery 2009-03-08 2017-09-10 3108 days 0
6 d NA 2015-03-04 2017-03-10 737 days 0
If you want the early_shopper column to be boolean/logical, just remove the +.
Data
I used this data, here the date_joined for b is 2013-04-30 like shown in your images and not like in your actual data posted.
structure(list(ID = structure(c(1L, 1L, 2L, 3L, 3L, 4L), .Label = c("a",
"b", "c", "d"), class = "factor"), purchase_type = structure(c(2L,
2L, NA, 1L, 2L, NA), .Label = c("auto", "grocery"), class = "factor"),
date_joined = structure(c(16071, 16071, 15825, 14311, 14311,
16498), class = "Date"), date_purchase = structure(c(16190,
16990, 15885, 16532, 17419, 17235), class = "Date")), class = "data.frame", row.names = c(NA,
-6L))

Here is my approach using a join to get the early_shopper value to be the same for all rows of the same ID.
library(dplyr)
d <- structure(list(ID = structure(c(1L, 1L, 2L, 3L, 3L, 4L),
.Label = c("a","b", "c", "d"),
class = "factor"),
purchase_type = structure(c(2L, 2L, NA, 1L, 2L, NA),
.Label = c("auto", "grocery"),
class = "factor"),
date_joined = structure(c(16071, 16071, 15825, 14311, 14311, 16498),
class = "Date"),
date_purchase = structure(c(16190, 16990, 15885, 16532, 17419, 17235),
class = "Date")),
class = "data.frame", row.names = c(NA, -6L))
d %>%
inner_join(d %>%
mutate(date_diff = d$date_purchase - d$date_joined) %>%
group_by(ID) %>%
slice_min(date_diff) %>%
transmute(early_shopper = if_else(!is.na(first(purchase_type,
order_by = date_joined)) &
date_diff < 365, 1, 0)) %>%
ungroup()
)
ID purchase_type date_joined date_purchase early_shopper
1 a grocery 2014-01-01 2014-04-30 1
2 a grocery 2014-01-01 2016-07-08 1
3 b <NA> 2013-04-30 2013-06-29 0
4 c auto 2009-03-08 2015-04-07 0
5 c grocery 2009-03-08 2017-09-10 0
6 d <NA> 2015-03-04 2017-03-10 0

Divide data by the preceding row and create new dataframe

I have a data set and I'm trying to calculate the rate of change between the rows.
My input looks like this:
foo = structure(list(date = structure(c(5L, 1L, 2L, 3L, 4L), .Label = c("10/03/2020",
"11/03/2020", "12/03/2020", "13/03/2020", "9/03/2020"), class = "factor"),
A = c(0.60256322, 0.634543306, 0.022976661, 0.009839044,
0.319456765), B = c(45.42320826, 57.32689951, 32.49487759,
29.40804164, 54.33691346), C = c(5.114123914, 3.674167652,
2.330610757, 5.510280192, 5.717950467), D = c(4.187409484,
4.835943165, 4.340614439, 4.607468576, 3.14338155)), row.names = c(NA,
5L), class = "data.frame")
I'm trying to divide each of the following cells with the one before
eg. [5,2] / [4,2]; [4,2] / [3,2]... etc
and I'm trying to create a new output like this:
bar = structure(list(date = structure(c(5L, 1L, 2L, 3L, 4L), .Label = c("10/03/2020",
"11/03/2020", "12/03/2020", "13/03/2020", "9/03/2020"), class = "factor"),
A = c(0, 1.053073412, 0.03620976, 0.428219052, 32.46827283
), B = c(0, 1.262061878, 0.56683473, 0.90500546, 1.847688946
), C = c(0, 0.718435398, 0.634323465, 2.364307371, 1.037687789
), D = c(0, 1.154877063, 0.897573501, 1.061478424, 0.682236134
)), row.names = c(NA, 5L), class = "data.frame")
I'm sure there's a better way than finding the length of the column and looping through. Can anyone point me in the right direction?

You cans use mutate_if or mutate_at from dplyr package.
library(dplyr)
foo %>%
mutate_if(!grepl("date", names(.)), function(x) x/lag(x))
OR
foo %>%
mutate_at(vars(-date), function(x) x/lag(x))

In base R, we can use head and tail to divide data.
foo[-1] <- lapply(foo[-1], function(x) c(0, tail(x, -1)/head(x, -1)))
foo
# date A B C D
#1 9/03/2020 0.00000000 0.0000000 0.0000000 0.0000000
#2 10/03/2020 1.05307341 1.2620619 0.7184354 1.1548771
#3 11/03/2020 0.03620976 0.5668347 0.6343235 0.8975735
#4 12/03/2020 0.42821905 0.9050055 2.3643074 1.0614784
#5 13/03/2020 32.46827283 1.8476889 1.0376878 0.6822361

Another tidyverse approach.
library(tidyverse)
bar <- foo %>%
mutate_if(is.double, ~ replace_na(./lag(.), replace = 0))
bar
#> date A B C D
#> 1 9/03/2020 0.00000000 0.0000000 0.0000000 0.0000000
#> 2 10/03/2020 1.05307341 1.2620619 0.7184354 1.1548771
#> 3 11/03/2020 0.03620976 0.5668347 0.6343235 0.8975735
#> 4 12/03/2020 0.42821905 0.9050055 2.3643074 1.0614784
#> 5 13/03/2020 32.46827283 1.8476889 1.0376878 0.6822361

How to look up values from a table and insert name of the lookup-list?

I have a (sample)table like this:
df <- read.table(header = TRUE,
stringsAsFactors = FALSE,
text="Gene SYMBOL Values
TP53 2 3.55
XBP1 5 4.06
TP27 1 2.53
REDD1 4 3.99
ERO1L 6 5.02
STK11 9 3.64
HIF2A 8 2.96")
I want to look up the symbols from two different genelists, given here as genelist1 and genelist2:
genelist1 <- read.table(header = TRUE,
stringsAsFactors = FALSE,
text="Gene SYMBOL
P4H 10
PLK 7
TP27 1
KTD 11
ERO1L 6")
genelist2 <- read.table(header = TRUE,
stringsAsFactors = FALSE,
text="Gene SYMBOL
TP53 2
XBP1 5
BHLHB 12
STK11 9
TP27 1
UPK 18")
What I want to is to get a new column where I can see in which genelist(s) I can find each of the genes in my dataframe, but when I run the following code it is just the symbols that are repeated in the new columns.
df_geneinfo <- df %>%
join(genelist1,by="SYMBOL") %>%
join(genelist2, by="SYMBOL")
Any suggestions of how to solve this, either to make one new column with the name of the genelists, or to make one column for each of the genelists?
Thanks in advance! :)

For the sake of completeness (and performance with large tables, perhaps), here is a data.table approach:
library(data.table)
rbindlist(list(genelist1, genelist2), idcol = "glid")[, -"Gene"][
setDT(df), on = "SYMBOL"][, .(glid = toString(glid)), by = .(Gene, SYMBOL, Values)][]
Gene SYMBOL Values glid
1: TP53 2 3.55 2
2: XBP1 5 4.06 2
3: TP27 1 2.53 1, 2
4: REDD1 4 3.99 1
5: ERO1L 6 5.02 NA
6: STK11 9 3.64 2
7: HIF2A 8 2.96 NA
rbindlist() creates a data.table from all genelists and adds a column glid to identify the origin of each row. The Gene column is ignored as the subsequent join is only on SYMBOL. Before joining, df is coerced to class data.table using setDT(). The joined result is then aggregated by SYMBOL to exhibit cases where a symbol appears in both genelists which is the case for SYMBOL == 1.
Edit
In case there are many genelists or the full name of the genelist is required instead of just a number, we can try this:
rbindlist(mget(ls(pattern = "^genelist")), idcol = "glid")[, -"Gene"][
setDT(df), on = "SYMBOL"][, .(glid = toString(glid)), by = .(Gene, SYMBOL, Values)][]
Gene SYMBOL Values glid
1: TP53 2 3.55 genelist2
2: XBP1 5 4.06 genelist2
3: TP27 1 2.53 genelist1, genelist2
4: REDD1 4 3.99 NA
5: ERO1L 6 5.02 genelist1
6: STK11 9 3.64 genelist2
7: HIF2A 8 2.96 NA
ls()is looking for objects in the environment the name of which is starting with genelist.... mget() returns a named list of those objects which is passed to rbindlist().
Data
As provided by the OP
df <- structure(list(Gene = c("TP53", "XBP1", "TP27", "REDD1", "ERO1L",
"STK11", "HIF2A"), SYMBOL = c(2L, 5L, 1L, 4L, 6L, 9L, 8L), Values = c(3.55,
4.06, 2.53, 3.99, 5.02, 3.64, 2.96)), .Names = c("Gene", "SYMBOL",
"Values"), class = "data.frame", row.names = c(NA, -7L))
genelist1 <- structure(list(Gene = c("P4H", "PLK", "TP27", "KTD", "ERO1L"),
SYMBOL = c(10L, 7L, 1L, 11L, 4L)), .Names = c("Gene", "SYMBOL"
), class = "data.frame", row.names = c(NA, -5L))
genelist2 <- structure(list(Gene = c("TP53", "XBP1", "BHLHB", "STK11", "TP27",
"UPK"), SYMBOL = c(2L, 5L, 12L, 9L, 1L, 18L)), .Names = c("Gene",
"SYMBOL"), class = "data.frame", row.names = c(NA, -6L))

I just wrote my own function, which replaces the column values:
replace_by_lookuptable <- function(df, col, lookup) {
assertthat::assert_that(all(col %in% names(df))) # all cols exist in df
assertthat::assert_that(all(c("new", "old") %in% colnames(lookup)))
cond_na_exists <- is.na(unlist(lapply(df[, col], function(x) my_match(x, lookup$old))))
assertthat::assert_that(!any(cond_na_exists))
df[, col] <- unlist(lapply(df[, col], function(x) lookup$new[my_match(x, lookup$old)]))
return(df)
}
df is the data.frame, col is a vector of column names which should be replaced using lookup, a data.frame with column "old" and "new".

If you add a listid column to your genelists
genelist1$listid = 1
genelist2$listid = 2
you can then merge your df with the genelists:
merge(df,rbind(genelist1,genelist2),all.x=T, by = "SYMBOL")
Note that ERO1L is SYMBOL 6 in your df and 4 in genelist1, and HIF2A and REDD1 are missing from genelists but REDD1 is symbol 4 in your df (which is ERO1L in genlist1... so I'm a not sure of what output you're expecting in that case.
You could also merge only on Gene names:
merge(df,rbind(genelist1,genelist2),all.x=T, by.x = "Gene", by.y= "Gene")

You could put all of your genlists in a list:
gen_list <- list(genelist1 = genelist1,genelist2 = genelist2)
and compare them to your target data.frame:
cbind(df,do.call(cbind,lapply(seq_along(gen_list),function(x) ifelse( df$Gene %in% gen_list[[x]]$Gene,names(gen_list[x]),NA))))

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Dplyr function to match column value to row values and replace it - r

Related

Aggregate columns based on categories given by another dataframe

r transfer values from one dataset to another by ID

In R, make a conditional indicator variable based on (a) the first instance of a record type and (b) a date difference

Divide data by the preceding row and create new dataframe

How to look up values from a table and insert name of the lookup-list?

Categories

Resources