Multiple comparison after ANOVA - reordering factor levels post hoc test - r

I have a data frame with 15 factor levels:
Value <- runif(225)
df <- data.frame(Variant=rep(c(1:15), each=15),
Value=Value)
df$Variant <- factor(df$Variant)
levels(df$Variant)
I am using ANOVA and after Duncan test:
model <- lm(Value~Variant, data=df)
anova(model)
library(agricolae)
out <- duncan.test(model, "Variant", group=F); out
After Duncan test I get the following comparison order of factor levels:
1-10,1-11,1-12,1-13,1-14,1-15,1-2,1-3, 1-4, 1-5, 1-6, 1-7, 1-8, 1-9, 10-11, 10-12, 10-13, 10-14, 10-15 etc.
How can I manually change this order like this:
1-2,1-3, 1-4, 1-5, 1-6, 1-7, 1-8, 1-9, 1-10, 1-11, 1-12, 1-13, 1-14, 1-15, 2-1, 2-2, 2-3, 2-4, 2-5, 2-6 etc.
Thank You very much.

You can do
ord <- do.call(rbind, lapply(strsplit(row.names(out$comparison), " - "), as.numeric))
out$comparison[order(ord[,1], ord[,2]),]
#> difference pvalue signif. LCL UCL
#> 1 - 2 0.038843696 0.7161 -0.16755260 0.24523999
#> 1 - 3 -0.082464843 0.4965 -0.31294877 0.14801909
#> 1 - 4 -0.026633590 0.8112 -0.23991700 0.18664982
#> 1 - 5 -0.008944756 0.9333 -0.21534105 0.19745153
#> 1 - 6 -0.113440226 0.3555 -0.34762702 0.12074656
#> 1 - 7 -0.033132833 0.7772 -0.25544634 0.18918067
#> 1 - 8 -0.006618202 0.9470 -0.20269177 0.18945536
#> 1 - 9 0.046838403 0.6741 -0.16644500 0.26012181
#> 1 - 10 -0.091429309 0.4543 -0.32388649 0.14102787
#> 1 - 11 -0.140899981 0.2522 -0.37661897 0.09481900
#> 1 - 12 0.001178870 0.9906 -0.19489469 0.19725243
#> 1 - 13 -0.047673502 0.6884 -0.27319857 0.17785157
#> 1 - 14 -0.031176107 0.7857 -0.24953106 0.18717885
#> 1 - 15 -0.074161352 0.5370 -0.30236480 0.15404210
#> 2 - 3 -0.121308539 0.3226 -0.35549533 0.11287825
#> 2 - 4 -0.065477286 0.5752 -0.28779079 0.15683622
#> 2 - 5 -0.047788453 0.6766 -0.26614341 0.17056650
#> 2 - 6 -0.152283922 0.2177 -0.38937226 0.08480442
#> 2 - 7 -0.071976529 0.5492 -0.30017998 0.15622692
#> 2 - 8 -0.045461898 0.6831 -0.25874530 0.16782151
#> 2 - 9 0.007994707 0.9360 -0.18807886 0.20406827
#> 3 - 4 0.055831254 0.6330 -0.16648225 0.27814476
#> 3 - 5 0.073520087 0.5354 -0.15200498 0.29904515
#> 3 - 6 -0.030975383 0.7719 -0.23737167 0.17542091
#> 3 - 7 0.049332010 0.6577 -0.16395140 0.26261542
#> 3 - 8 0.075846641 0.5277 -0.15235681 0.30405009
#> 3 - 9 0.129303246 0.2943 -0.10641574 0.36502223
#> 4 - 5 0.017688833 0.8590 -0.17838473 0.21376240
#> 4 - 6 -0.086806636 0.4694 -0.31501009 0.14139682
#> 4 - 7 -0.006499243 0.9515 -0.21289553 0.19989705
#> 4 - 8 0.020015388 0.8514 -0.18638090 0.22641168
#> 4 - 9 0.073471993 0.5357 -0.15205307 0.29899706
#> 5 - 6 -0.104495470 0.3877 -0.33497940 0.12598846
#> 5 - 7 -0.024188077 0.8282 -0.23747148 0.18909533
#> 5 - 8 0.002326554 0.9814 -0.19374701 0.19840012
#> 5 - 9 0.055783160 0.6333 -0.16653035 0.27809667
#> 6 - 7 0.080307393 0.4913 -0.14200611 0.30262090
#> 6 - 8 0.106822024 0.3812 -0.12563515 0.33927920
#> 6 - 9 0.160278629 0.1961 -0.07804271 0.39859996
#> 7 - 8 0.026514631 0.8172 -0.19184032 0.24486959
#> 7 - 9 0.079971236 0.5097 -0.15051269 0.31045517
#> 8 - 9 0.053456605 0.6407 -0.16489835 0.27181156
#> 10 - 2 0.130273005 0.2906 -0.10544598 0.36599199
#> 10 - 3 0.008964466 0.9283 -0.18710910 0.20503803
#> 10 - 4 0.064795720 0.5852 -0.16072935 0.29032079
#> 10 - 5 0.082484553 0.4920 -0.14571890 0.31068801
#> 10 - 6 -0.022010917 0.8251 -0.21808448 0.17406265
#> 10 - 7 0.058296476 0.6107 -0.16005848 0.27665143
#> 10 - 8 0.084811107 0.4842 -0.14567282 0.31529504
#> 10 - 9 0.138267713 0.2641 -0.09882063 0.37535605
#> 10 - 11 -0.049470671 0.6431 -0.25586696 0.15692562
#> 10 - 12 0.092608180 0.4517 -0.14157861 0.32679497
#> 10 - 13 0.043755807 0.6944 -0.16952760 0.25703921
#> 10 - 14 0.060253203 0.6062 -0.16206030 0.28256671
#> 10 - 15 0.017267958 0.8716 -0.18912833 0.22366425
#> 11 - 2 0.179743677 0.1456 -0.05857766 0.41806501
#> 11 - 3 0.058435138 0.5995 -0.15484827 0.27171854
#> 11 - 4 0.114266391 0.3442 -0.11621754 0.34475032
#> 11 - 5 0.131955224 0.2776 -0.10050195 0.36441240
#> 11 - 6 0.027459755 0.7828 -0.16861381 0.22353332
#> 11 - 7 0.107767148 0.3617 -0.11775792 0.33329222
#> 11 - 8 0.134281779 0.2726 -0.09990501 0.36846857
#> 11 - 9 0.187738384 0.1296 -0.05170032 0.42717709
#> 11 - 12 0.142078851 0.2509 -0.09500949 0.37916719
#> 11 - 13 0.093226479 0.4236 -0.12908703 0.31553999
#> 11 - 14 0.109723874 0.3591 -0.11847958 0.33792733
#> 11 - 15 0.066738629 0.5597 -0.15161633 0.28509358
#> 12 - 2 0.037664826 0.7053 -0.15840874 0.23373839
#> 12 - 3 -0.083643714 0.4940 -0.31610089 0.14881346
#> 12 - 4 -0.027812460 0.8084 -0.24616741 0.19054249
#> 12 - 5 -0.010123627 0.9277 -0.22340703 0.20315978
#> 12 - 6 -0.114619097 0.3535 -0.35033808 0.12109989
#> 12 - 7 -0.034311704 0.7730 -0.25983677 0.19121336
#> 12 - 8 -0.007797073 0.9418 -0.21419336 0.19859922
#> 12 - 9 0.045659533 0.6690 -0.16073676 0.25205582
#> 12 - 13 -0.048852373 0.6848 -0.27705583 0.17935108
#> 12 - 14 -0.032354977 0.7823 -0.25466848 0.18995853
#> 12 - 15 -0.075340222 0.5347 -0.30582415 0.15514371
#> 13 - 2 0.086517198 0.4754 -0.14396673 0.31700113
#> 13 - 3 -0.034791341 0.7447 -0.24118763 0.17160495
#> 13 - 4 0.021039912 0.8503 -0.19224349 0.23432332
#> 13 - 5 0.038728746 0.7355 -0.17962621 0.25708370
#> 13 - 6 -0.065766724 0.5655 -0.28412168 0.15258823
#> 13 - 7 0.014540669 0.8839 -0.18153289 0.21061423
#> 13 - 8 0.041055300 0.7258 -0.18125821 0.26336881
#> 13 - 9 0.094511905 0.4391 -0.13794527 0.32696908
#> 13 - 14 0.016497396 0.8773 -0.18989890 0.22289369
#> 13 - 15 -0.026487850 0.7903 -0.22256141 0.16958571
#> 14 - 2 0.070019803 0.5552 -0.15550526 0.29554487
#> 14 - 3 -0.051288737 0.6544 -0.26964369 0.16706622
#> 14 - 4 0.004542517 0.9636 -0.19153105 0.20061608
#> 14 - 5 0.022231350 0.8352 -0.18416494 0.22862764
#> 14 - 6 -0.082264120 0.4877 -0.30778919 0.14326095
#> 14 - 7 -0.001956726 0.9843 -0.19803029 0.19411684
#> 14 - 8 0.024557904 0.8256 -0.18872550 0.23784131
#> 14 - 9 0.078014510 0.5159 -0.15018894 0.30621796
#> 14 - 15 -0.042985245 0.6996 -0.25626865 0.17029816
#> 15 - 2 0.113005048 0.3539 -0.11945213 0.34546222
#> 15 - 3 -0.008303492 0.9335 -0.20437706 0.18777007
#> 15 - 4 0.047527762 0.6783 -0.17082719 0.26588272
#> 15 - 5 0.065216595 0.5768 -0.15709691 0.28753010
#> 15 - 6 -0.039278874 0.7244 -0.25256228 0.17400453
#> 15 - 7 0.041028519 0.7009 -0.16536777 0.24742481
#> 15 - 8 0.067543150 0.5693 -0.15798192 0.29306822
#> 15 - 9 0.120999755 0.3239 -0.11318703 0.35518654
EDIT
The OP has asked how this might be achieved with Roman numerals. In that case, you would need to do:
install.packages("gtools")
ord <- do.call(rbind,
lapply(strsplit(row.names(out$comparison), "\\W"),
function(x) gtools::roman2int(x[x != ""])))
out$comparison[order(ord[,1], ord[,2]),]

Related

data.frame Using Vector of Names

Can I use a vector of variable names to make a data frame?
have=c("aaa","bbb","ccc","ddd","eee","fff","ggg","hhh","iii","jjj")
for(i in 1:10){assign(have[i],rnorm(10))}
want=data.frame(aaa,bbb,ccc,ddd,eee,fff,ggg,hhh,iii,jjj)
I wonder if I can alter the last aaa,bbb,ccc,ddd,eee,fff,ggg,hhh,iii,jjj somehow using have.
Assume that all variables in have are stored in the Global environment. Then you can also try this:
want <- as.data.frame(mget(have))
You could do
have=c("aaa","bbb","ccc","ddd","eee","fff","ggg","hhh","iii","jjj")
for(i in 1:10){assign(have[i],rnorm(10))}
want <- data.frame(sapply(have, get))
want
#> aaa bbb ccc ddd eee fff
#> 1 2.2111971 0.58169621 0.7558816 -1.6408627 0.7975625 0.09160389
#> 2 -0.7847731 1.60423888 0.3819555 -1.2061538 0.7545381 -0.64964125
#> 3 -1.2757056 0.57714761 0.4700359 -1.1041282 -0.3816839 0.40549014
#> 4 -0.0360762 -1.29007252 -0.7820075 -0.5319163 -0.2999686 0.51213744
#> 5 0.1763021 0.82259576 -0.4409983 1.4809103 -0.3658530 -0.16434920
#> 6 1.3196823 -0.18163744 1.5261259 1.3087872 -1.0644242 -1.31891628
#> 7 0.4076277 -0.89769591 -0.7778384 -0.3837985 -1.8659484 -1.53683062
#> 8 1.1872413 -0.06917426 0.3875081 0.4146543 -0.7035016 -0.63534985
#> 9 0.9037385 0.10581530 0.6210197 2.4435195 -1.2323838 0.84316865
#> 10 -0.8933586 1.47698413 0.4561502 1.0824430 2.2895535 0.05699095
#> ggg hhh iii jjj
#> 1 -0.4915989 -0.02034347 -1.6870239 -1.08651315
#> 2 1.7595238 0.47375431 0.5408044 0.65031636
#> 3 -2.0502394 0.85440730 -0.4114844 -0.17392623
#> 4 -1.1268393 0.68303043 1.1722424 -0.90590156
#> 5 -1.3235682 0.59603361 -0.8958801 -0.94192724
#> 6 -0.3669457 -0.27870024 1.8228263 0.01478657
#> 7 0.6525810 -0.00354290 0.3757264 0.34386963
#> 8 -0.3378531 -0.45219282 -0.8959065 -0.43244283
#> 9 0.3931531 0.61264470 0.6359348 0.02984539
#> 10 -0.5256779 0.79624735 -2.2912426 -1.06220090
Created on 2020-10-03 by the reprex package (v0.3.0)

How to create a 2 variable barplot using ggplot2 in r

I have a data frame and i want to plot a bar chart with two numerical values bars side by side namely Mean_dbh and Low_AGC for each given location (SU)
How do i plot this using ggplot2
SU Mean_dbh Low_AGC
1 1 16.98921 17.696251
2 2 13.48199 8.108352
3 3 15.97746 14.584501
4 4 12.14046 28.910114
5 5 16.47509 38.047385
6 6 19.80792 31.183069
7 7 17.44469 38.192385
8 8 18.78043 12.138436
9 10 15.68889 24.195719
10 11 17.39620 26.621287
11 15 16.71296 32.219763
By using tidyverse and pivot_longer you can merge the two variables. geom_col allows to define SU as the x-axis and the value of merged variable as the y-axis. The color is defined by fill=name where name is the merged column. Axis are renamed to make things clear.
library(tidyverse)
df <- read.table(text = " SU Mean_dbh Low_AGC
1 1 16.98921 17.696251
2 2 13.48199 8.108352
3 3 15.97746 14.584501
4 4 12.14046 28.910114
5 5 16.47509 38.047385
6 6 19.80792 31.183069
7 7 17.44469 38.192385
8 8 18.78043 12.138436
9 10 15.68889 24.195719
10 11 17.39620 26.621287
11 15 16.71296 32.219763", header=T)
df
#> SU Mean_dbh Low_AGC
#> 1 1 16.98921 17.696251
#> 2 2 13.48199 8.108352
#> 3 3 15.97746 14.584501
#> 4 4 12.14046 28.910114
#> 5 5 16.47509 38.047385
#> 6 6 19.80792 31.183069
#> 7 7 17.44469 38.192385
#> 8 8 18.78043 12.138436
#> 9 10 15.68889 24.195719
#> 10 11 17.39620 26.621287
#> 11 15 16.71296 32.219763
ggplot(df %>% pivot_longer(cols = Mean_dbh:Low_AGC),
aes(x=SU, y = value, fill=name)) +geom_col(position = 'dodge') +
labs(x='Location', y='Mean_dbh or Low_AGC') +
theme(legend.title = element_blank())

Check and count conditions for following value

I have a dataframe with 18 rows and 25 variables. The values are between 0 and 1. For each row, I want to count the number of times a high value (> than 0.7) is followed by a low value (<0.4) and stored that count in a new column.
So far I have been using:
df$n_calls<-rowSums(df > 0.7)
I know it is possible to use different conditions but in my case it is very important to check that the low value is right after the high value
Here is an example of my df
1 2 3 4 5 6 7 8 9 10 11
1 0.186158072 0.27738592 0.42165043 0.43501515 0.10918095 0.09976244 0.09571536 0.08674526 0.09239877 0.07523392 0.043679510
2 0.773469188 0.75381254 0.20389633 0.46444408 0.30433377 0.68334244 0.42105103 0.66224478 0.32412056 0.30951402 0.616658953
3 0.201245200 0.26873094 0.25892904 0.38605874 0.68438397 0.30236790 0.51493090 0.66314468 0.68910974 0.59134860 0.625550641
4 0.033746517 0.06388212 0.06978669 0.05517553 0.06032239 0.06736223 0.06514233 0.05133860 0.06034266 0.05702451 0.011144861
5 0.590297759 0.40352955 0.08106493 0.06063485 0.07780428 0.09633069 0.10882515 0.11468680 0.28375374 0.63941033 0.629284574
6 0.165001648 0.31174739 0.36955514 0.47581249 0.65349233 0.66471913 0.58004314 0.50790858 0.51298260 0.18651107 0.501195655
7 0.033164989 0.05678890 0.05941058 0.04139692 0.04660761 0.05452679 0.04939543 0.02780824 0.03680599 0.04645522 0.018496662
8 0.080893779 0.07228276 0.07473865 0.05536056 0.05732153 0.06403365 0.06139970 0.05142047 0.05698089 0.06998986 0.032598440
9 0.557273680 0.49226191 0.63900601 0.37497255 0.72114277 0.37557355 0.34360391 0.37502000 0.41622472 0.46852220 0.410656260
10 -0.004010143 0.03051558 0.04403711 0.02749514 0.04770637 0.05800898 0.05603494 0.04163723 0.04622024 0.04677767 0.007736933
11 0.280273472 0.59839662 0.74167893 0.75352655 0.75108785 0.72345468 0.65395063 0.32957749 0.08357061 0.33165070 0.731228429
12 0.107398713 0.10983041 0.13630594 0.19905651 0.47014034 0.72519345 0.69545405 0.62194265 0.49873996 0.16549282 0.087689371
13 0.164520925 0.22763832 0.50824238 0.59686660 0.68419908 0.66837348 0.62380175 0.20226234 0.11425066 0.09725765 0.078701134
14 0.076934267 0.09684586 0.10703672 0.08436558 0.10789735 0.24130640 0.36615645 0.42805115 0.42937392 0.51390288 0.584757257
15 0.055565174 0.06796064 0.07519020 0.05498454 0.05754891 0.06377643 0.06537049 0.05152625 0.05783594 0.05963775 0.022556411
16 0.126975964 0.19394191 0.53324900 0.60905758 0.67072084 0.61613836 0.55415573 0.18317823 0.13453799 0.09835233 0.067080267
17 0.730333357 0.65759923 0.59045925 0.63148539 0.36305458 0.40829673 0.48734552 0.58647457 0.66968986 0.48312152 0.453863785
18 0.196450179 0.33968393 0.51538678 0.44868341 0.22221050 0.18934329 0.19179838 0.18764290 0.22423578 0.27524872 0.608625015
12 13 14 15 16 17 18 19 20 21 22
1 0.038553121 0.040081485 0.05358118 0.07403555 0.05091901 0.042299806 0.04322122 0.05587749 0.06881493 0.09753878 0.10462942
2 0.618447812 0.048885425 0.06231155 0.08228801 0.05963307 0.022666894 0.09384802 0.07914030 0.08549148 0.08373159 0.07404309
3 0.179434300 0.679981042 0.69176338 0.74453573 0.70937271 0.289762839 0.17956945 0.68770664 0.73864122 0.73187173 0.34604987
4 0.005094105 0.007952117 0.02076629 0.04174891 0.02129751 0.010066515 0.01454399 0.04337116 0.05259742 0.05795045 0.04533231
5 0.554122074 0.322792638 0.21839661 0.18322419 0.05764354 0.041600287 0.04692187 0.04305403 0.05762126 0.06212474 0.05289008
6 0.719147265 0.481543275 0.20168371 0.19885731 0.27223662 0.587549079 0.66694312 0.76974309 0.45266122 0.23338301 0.09435850
7 0.019041585 0.005380972 0.01856521 0.03947278 0.01221314 0.004858193 0.01322566 0.02001854 0.02755861 0.03889634 0.03102918
8 0.031368415 0.024535386 0.04031225 0.06011198 0.03558484 0.027890723 0.04100022 0.04572906 0.05465957 0.06437218 0.06308497
9 0.290487995 0.109253389 0.09076971 0.11177720 0.08365271 0.074780381 0.07845467 0.08843678 0.12696256 0.15252180 0.16108674
10 0.004599971 0.004843833 0.02327683 0.05022203 0.02867540 0.013674600 0.02376855 0.03408261 0.04563785 0.04991278 0.04216682
11 0.702763718 0.204497547 0.05554607 0.07056242 0.04561622 0.027652748 0.05185238 0.03544719 0.04735368 0.05194280 0.05193089
12 0.087884047 0.068055513 0.07587232 0.09912338 0.09637278 0.085378227 0.09348430 0.09237792 0.10785289 0.22242136 0.28522539
13 0.050134608 0.060945434 0.07203437 0.09687331 0.07316602 0.067771770 0.07634787 0.08154630 0.09157153 0.08930093 0.09904561
14 0.255098748 0.323642069 0.34568802 0.42105224 0.41797424 0.434900416 0.39764147 0.30798058 0.31269146 0.42912436 0.52562571
15 0.015262751 0.027712972 0.03813722 0.07103989 0.05202094 0.040513502 0.04066496 0.23360454 0.34666910 0.62701471 0.61683636
16 0.052436966 0.080045644 0.11447572 0.10672800 0.07924541 0.064626998 0.07234429 0.06744468 0.07878329 0.08901864 0.07953835
17 0.422132751 0.127518376 0.13062324 0.15104667 0.12490013 0.110841862 0.10892834 0.07984952 0.09097741 0.15193027 0.18654107
18 0.662904286 0.247251060 0.20583902 0.32290931 0.47391488 0.574805088 0.64776018 0.73091902 0.27798841 0.35922799 0.36333131
23 24 n_calls
1 0.23100480 0.30027592 0
2 0.07209460 0.06670631 1
3 0.30800154 0.27452357 2
4 0.04148986 0.03842700 0
5 0.05362370 0.05018294 0
6 0.08703911 0.08242964 0
7 0.03186000 0.03233006 0
8 0.05789078 0.05637648 0
9 0.25593446 0.29909342 1
10 0.03615961 0.03356159 0
11 0.05754763 0.06368048 1
12 0.45794999 0.56138753 0
13 0.16676533 0.22718405 0
14 0.63646856 0.29169414 0
15 0.64039251 0.60901138 0
16 0.08805636 0.09688941 0
17 0.36883747 0.41561690 1
18 0.37085132 0.36292634
Any idea how to proceed?
We can use the rowSums based on subsetting the dataset by removing the last column, first column so that dimensions will the same and it compares the adjacent columns
rowSums(df[-length(df)] > 0.7 & df[-1] < 0.4)

Error in makebin(data, file) : 'sid' invalid

I am getting the same error "Error in makebin(data, file) : 'sid' invalid"
running cspade on the small dataset below. Both my transactionID and eventID are ordered blockwise (as somebody mentioned in another post that they hsould be). So I don't see any reason for that error. Please let me know what could be the problem.
items transactionID sequenceID eventID
1 {item=/} 1 1 1458565800
2 {item=/login} 2 2 1458565803
3 {item=/profile} 3 3 1458565811
4 {item=/shop_list} 4 4 1458565814
5 {item=/} 5 1 1458565912
6 {item=/login} 6 2 1458565915
7 {item=/shop_list} 7 3 1458565918
8 {item=/} 8 1 1458565802
9 {item=/login} 9 2 1458565808
10 {item=/profile} 10 3 1458565812
11 {item=/product} 11 4 1458565818
12 {item=/} 12 1 1458565911
13 {item=/login} 13 2 1458565916
14 {item=/shop_list} 14 3 1458565922
15 {item=/profile} 15 4 1458565927
16 {item=/contact} 16 5 1458565929
17 {item=/profile} 17 6 1458565933
traffic <- read.csv("C:\\buczaal1\\RProg\\web_traffic.csv")
traffic_data <- data.frame(item=traffic$Page)
traffic.tran <- as(traffic_data, "transactions")
transactionInfo(traffic.tran)$sequenceID <- traffic$Seq
transactionInfo(traffic.tran)$eventID <- traffic$Timestamp
frequent_pattern <- cspade(traffic.tran, parameter= list(support=0.3))

extract rows with common characters in a column by comparing two data.frame

How can compare two data.frame (df1 and df2) and extract the rows with common gene names
df1 =
logp chr start end CNA Genes No.of.genes
25.714.697 1 90100868 90212160 gain Iqca,Ackr3 2
2.213.423 1 175422136 176019087 loss Rgs7,Fh1,Kmo,Opn3,Chml,Wdr64,Gm25560,Exo1,Gm23805,Pld5,B020018G12Rik 11
5.607.005 2 145619035 147312698 gain Slc24a3,Rin2,Naa20,Crnkl1,4930529M08Rik,Insm1,Ralgapa2,Xrn2,Nkx2-4,Nkx2-2,Gm22261 11
3.756.075 2 141246149 141653989 loss Macrod2 1
4.852.608 2 41586450 41739605 loss Lrp1b 1
590.684 2 86729423 86860061 loss Olfr1089,Olfr1090,Olfr1093,Olfr1093,Olfr141,Olfr1094,Olfr1094,Olfr1095 8
5.721.239 3 25408115 25519319 gain Nlgn1 1
4.295.527 3 92005564 92134972 gain Pglyrp3,Prr9 2
4.257.749 3 15244004 15897870 gain Gm9733,Gm9733,Gm9733,Gm9733,Sirpb1a,Sirpb1a,Sirpb1a,Sirpb1a,Sirpb1b,Sirpb1b,Sirpb1b,Sirpb1b,Sirpb1c,Sirpb1c,Sirpb1c,Sirpb1c 16
418.259 3 154861710 155490219 loss Tnni3k,Tnni3k,Fpgt,Gm26456,Lrriq3 5
2.284.327 4 134885344 137474898 gain Rhd,Rhd,Tmem50a,D4Wsu53e,Syf2,Runx3,Clic4,Srrm1,Ncmap,Rcan3,Nipal3,Stpg1,Gm25317,Grhl3,Gm23106,Ifnlr1,Il22ra1,Myom3,Srsf10,Pnrc2,Pnrc2,Cnr2,Fuca1,Hmgcl,Gale,Lypla2,Pithd1,Tceb3,Rpl11,Gm26001,Id3,E2f2,Asap3,Tcea3,Zfp46,Hnrnpr,Htr1d,Luzp1,Kdm1a,4930549C01Rik,Lactbl1,Ephb2,C1qb,C1qc,C1qa,Epha8,Zbtb40,Gm23834,Gm23834,Wnt4,Cdc42,Gm13011,Gm13011,Cela3b,Cela3b,Hspg2 56
1.017.899 4 108176679 108417038 gain Echdc2,Zyg11a,Zyg11b,Selrc1,Fam159a,Gpx7 6
2.229.929 4 80406963 83998058 gain Tyrp1,Lurap1l,Mpdz,n-R5s187,Nfib,Zdhhc21,Cer1,Frem1,Ttc39b,Gm23412,Snapc3,Psip1,Ccdc171,Gm25899,Gm25899 15
279.458 4 110534756 110628705 gain Agbl4 1
1.103.167 4 121565222 124833802 gain Ppt1,Cap1,Mfsd2a,Mycl,Trit1,Bmp8b,Bmp8b,Oxct2b,Ppie,Hpcal4,Nt5c1a,Heyl,Pabpc4,Gm25788,Gm22154,Bmp8a,Bmp8a,Oxct2a,Macf1,Ndufs5,Akirin1,Rhbdl2,Mycbp,Rragc,Gm22983,Pou3f1,Utp11l,Gm24480,Fhl3,Sf3a3,Inpp5b,Mtf1,n-R5s192 33
1.781.441 4 139917291 140083763 loss Klhdc7a,Igsf21 2
6.829.744 6 147086557 147179673 gain Mansc4,Klhl42 2
1.070.905 6 63350920 64077379 loss Grid2 1
3.132.886 7 17188025 18205037 gain Psg29,Ceacam5,Ceacam14,Gm5155,Ceacam11,Ceacam13,Ceacam12,Igfl3,Igfl3 9
591.926 7 26773232 26976928 gain Cyp2a5,Cyp2a5,Cyp2a5,Cyp2a22,Cyp2a22,Cyp2a22 6
4.170.656 7 20654493 24128503 gain Nlrp4e,Nlrp5,Gm10175,Zfp180,Zfp112 5
2.494.001 7 38898625 38991306 loss Gm21142,Gm25671 2
13.222.294 7 67330026 67943164 loss Mef2a,Lrrc28,Gm23233,Ttc23,Synm 5
1.330.269 7 7171339 10865583 loss Zfp418,Clcn4-2,Zik1,Nlrp4b 4
3.414.431 8 49942996 51497632 loss Gm23986 1
3.059.542 9 21959210 22072123 gain Epor,Rgl3,Ccdc151,Prkcsh,Elavl3,Zfp653 6
5.277.845 10 80335500 80575991 gain Reep6,Adamtsl5,Plk5,Mex3d,Mbd3,Uqcr11,Uqcr11,Tcf3,Gm25044,Gm25044,Gm25044,Gm25044,Onecut3,Atp8b3,Rexo1,Klf16 16
26.812.338 10 100597718 100692256 loss 1700017N19Rik 1
6.998.267 11 60393963 60504695 gain Lrrc48,Atpaf2,Gid4,Drg2,Myo15 5
2.624.723 11 75676344 76212635 gain Crk,Ywhae,Doc2b,Rph3al,1700016K19Rik,Fam101b,Vps53,Glod4,Fam57a,Gemin4 10
11.851.916 11 97742687 97853778 gain Pip4k2b,Cwc25,1700001P01Rik,Rpl23,SNORA21,Snora21,Lasp1 7
3.553.325 11 74899198 75121318 loss Tsr1,Srr,Smg6,Gm22733 4
309.751 11 105624215 107309569 loss Tanc2,Cyb561,Ace,Ace,Kcnh6,Dcaf7,Taco1,Map3k3,Limd2,Strada,Ccdc47,Ddx42,Ftsj3,Psmc5,Gm23645,Smarcd2,Tcam1,Gh,Gh,Gh,Gh,Gh,Cd79b,Scn4a,2310007L24Rik,Icam2,Ern1,Snord104,Gm22711,Tex2,Milr1,Gm25889,Polg2,Ddx5,Cep95,Smurf2,Bptf,Nol11,Pitpnc1 39
2.642.471 11 30118384 30155192 loss Sptbn1 1
10.304.184 12 114641806 116183315 gain Ighv1-73,Ighv1-83,Zfp386,Zfp386,Zfp386,Zfp386,Zfp386,Zfp386,Zfp386,Zfp386,Vipr2 11
1.414.343 12 116239354 117192837 loss Wdr60,Esyt2,Ncapg2,Gm25112,Gm24354,Ptprn2 6
2.875.469 14 10676764 10768859 loss Fhit 1
7.743.121 14 52237972 52331429 loss Rab2b,Gm23758,Tox4,Mettl3,Sall2 5
2.689.596 14 43932587 45325020 loss Ang5,Ang6,Ear2,Ear2,Ptgdr,Ptger2,Txndc16,Gpr137c,Ero1l 9
1.912.962 14 119385279 119496386 loss Hs6st3 1
950.029 14 118589508 118681878 loss Abcc4 1
4.105.345 14 3004822 8437757 loss Flnb,Dnase1l3,Abhd6,Rpp14,Rpp14,Pxk,Pdhb,Kctd6,Acox2,Fam107a,Oit1,4930452B06Rik 12
1.870.555 16 33446020 33668062 loss Zfp148,Slc12a8 2
3.148.258 17 5087550 8333690 gain Arid1b,Tmem242,Zdhhc14,Snx9,Synj2,Serac1,Gtf2h5,Tulp4,n-R5s26,Tmem181a,Dynlt1a,Dynlt1b,Tmem181b-ps,Tmem181b-ps,Dynlt1c,Tmem181c-ps,Dynlt1f,Sytl3,Ezr,Rsph3b,Tagap1,Rnaset2b,Rnaset2b,Gm25119,Rps6ka2,Ttll2,Gm9992,Gm26057,Fndc1,Tagap,Rsph3a,Gm22416,Rnaset2a,Rnaset2a,Fgfr1op,Ccr6,Mpc1,Sft2d1 38
50.819.398 17 40052632 40331607 gain Gm7148,Pgk2,Crisp3,Crisp1 4
4.099.936 17 14074943 15508274 loss Dact2,Smoc2,Thbs2,Gm23352,Wdr27,1600012H06Rik,Phf10,Gm3417,9030025P20Rik,Gm3448,Gm3435,Tcte3,Ermard,Dll1,Fam120b,Psmb1,Tbp 17
12.022.555 17 30590875 31053645 loss Glo1,Dnah8,Gm24661,Gm24661,Gm24661,Gm24661,Gm24661,Gm24661,Gm24661,Gm24661,Glp1r,Umodl1 12
5.135.466 17 36160573 36277761 loss Gm22453,Rpp21,Trim39 3
4.254.769 17 27372278 27593833 loss Grm4,Hmga1,Nudt3 3
5.565.997 18 87905985 87999255 loss Gm24987,Gm24987 2
df2 =
Recursive_level logp chr start end CNA Genes No.of.Gene
1 1.416.541 1 68580000 68640000 loss Erbb4 1
1 7.876.897 1 173840000 174010000 loss Mndal,Mnda,Ifi203,Ifi202b 4
1 6.280.751 1 173500000 173660000 loss BC094916,Pydc4,Pyhin1 3
1 7.369.317 1 115900000 116280000 loss Cntnap5a 1
2 128.766 2 146170000 146660000 gain 4930529M08Rik,Insm1,Ralgapa2 3
1 5.777.222 2 76720000 76800000 loss Ttn 1
2 1.448.913 3 15360000 16000000 loss Sirpb1a,Sirpb1a,Sirpb1a,Sirpb1a,Sirpb1b,Sirpb1b,Sirpb1b,Sirpb1b,Sirpb1c,Sirpb1c,Sirpb1c,Sirpb1c 12
1 3.845.977 4 119500000 125160000 gain AA415398,AA415398,AA415398,AA415398,AA415398,Foxj3,Guca2a,Guca2b,Hivep3,Edn2,Foxo6,Scmh1,Slfnl1,Ctps,Cited4,Kcnq4,Nfyc,Mir30c-1,Mir30e,Rims3,Exo5,Zfp69,Smap2,Col9a2,Zmpste24,Tmco2,Rlf,Ppt1,Cap1,Mfsd2a,Mycl,Trit1,Bmp8b,Bmp8b,Oxct2b,Ppie,Hpcal4,Nt5c1a,Heyl,Pabpc4,Bmp8a,Bmp8a,Oxct2a,Macf1,Ndufs5,Akirin1,Rhbdl2,Mycbp,Rragc,Pou3f1,Utp11l,Fhl3,Sf3a3,Inpp5b,Mtf1,n-R5s192,1110065P20Rik,Yrdc,Maneal,Cdca8,Rspo1,Gnl2,Dnali1,Snip1,Meaf6,Zc3h12a 66
1 1.446.699 4 73900000 74180000 gain Frmd3 1
1 2.262.305 4 72740000 72880000 gain Aldoart1 1
1 1.234.215 4 80820000 84340000 gain Tyrp1,Lurap1l,Mpdz,n-R5s187,Nfib,Zdhhc21,Cer1,Frem1,Ttc39b,Snapc3,Psip1,Ccdc171,Bnc2 13
1 123.671 4 108480000 108760000 gain Zcchc11,Prpf38a,Orc1,Cc2d1b,Zfyve9 5
1 1.418.261 4 139400000 147600000 loss Ubr4,Iffo2,Aldh4a1,Tas1r2,Pax7,Klhdc7a,Igsf21,Arhgef10l,Rcc2,Padi4,Padi3,Padi1,Padi2,Sdhb,Atp13a2,Mfap2,Crocc,Necap2,Spata21,Szrd1,Fbxo42,Rsg1,Arhgef19,Epha2,Fam131c,Clcnka,Clcnka,Clcnkb,Clcnkb,Hspb7,Zbtb17,Spen,Fblim1,Tmem82,Slc25a34,Plekhm2,Ddi2,Rsc1a1,Agmat,Dnajc16,Casp9,Cela2a,Cela2a,Ctrc,Efhd2,Fhad1,Tmem51,Kazn,Prdm2,Pdpn,Lrrc38,1700012P22Rik,Aadacl3,9430007A20Rik,Dhrs3,Vps13d,Tnfrsf1b,Tnfrsf8,Zfp600,Zfp600,Rex2 61
1 8.113.817 6 129740000 129800000 gain Klri2 1
1 15.569.108 6 41360000 41480000 loss Prss3,Prss3,Prss1,Prss1 4
1 2.037.683 6 63480000 63700000 loss Grid2 1
2 14.694 7 38260000 38280000 gain Pop4 1
1 14.946 7 35780000 38280000 gain Zfp507,Tshz3,Zfp536,Uri1,Ccne1,1600014C10Rik,Plekhf1,Pop4 8
1 7.192.011 7 47500000 47620000 loss Mrgpra2b,Mrgpra3 2
1 1.722.108 7 26000000 26200000 loss Cyp2b13,Cyp2b9 2
1 12.683.495 7 11350000 11680000 loss Zscan4f 1
1 1.360.954 10 80900000 81100000 gain Timm13,Lmnb2,Gadd45b,Gng7,Diras1,Slc39a3,Sgta,Thop1,Creb3l3 9
1 267.959 11 97880000 98000000 gain Fbxo47,Plxdc1,Arl5c 3
1 1.872.174 11 75860000 76420000 gain Rph3al,1700016K19Rik,Fam101b,Vps53,Glod4,Fam57a,Gemin4,Rnmtl1,Nxn,Timm22,Abr 11
1 2.811.352 12 113560000 114920000 gain Ighv14-3,Ighv13-1,Ighv13-1,Ighv13-1,Ighv13-1,Ighv13-1,Ighv6-4,Ighv6-4,Ighv6-4,Ighv6-4,Ighv6-4,Ighv6-5,Ighv6-5,Ighv6-5,Ighv6-5,Ighv6-5 16
1 1.979.667 12 115860000 115980000 loss Ighv1-83 1
1 2.098.521 12 17420000 21160000 loss Nol10,Odc1,Hpcal1,5730507C01Rik,Asap2 5
1 21.864.853 13 12580000 12650000 loss Ero1lb 1
1 3.233.185 13 61500000 62780000 loss Ctsm,Cts3,Zfp808 3
1 5.640.895 14 53540000 53780000 gain Trav12-2,Trav12-3,Trav13-2,Trav14-2,Trav15-2-dv6-2,Trav3-3,Trav9-4,Trav9-4,Trav9-4,Trav9-4,Trav4-4-dv10,Trav5-4,Trav6-7-dv9,Trav7-6,Trav7-6,Trav7-6,Trav16,Trav13-4-dv7,Trav14-3,Trav3-4 20
1 2.942.081 14 86300000 97240000 gain Diap3,Tdrd3,Rps3a2,Pcdh20,Pcdh9,Klhl1 6
1 4.662.806 14 9840000 9880000 loss Fhit 1
1 3.638.346 14 43740000 44640000 loss Ear1,Ear1,Ear10,Ear10,Ang5,Ang6,Ear2,Ear2 8
1 1.709.546 14 35320000 37400000 loss Grid1,n-R5s46,Ccser2,Rgr,Lrit1,Lrit2,Cdhr1,2610528A11Rik,Ghitm 9
2 3.387.282 14 84060000 85740000 loss Pcdh17 1
1 2.140.909 14 68280000 86300000 loss Adam7,Adamdec1,Adam28,Stc1,Nkx2-6,Nkx3-1,Slc25a37,Synb,Entpd4,SYNB,Loxl2,R3hcc1,Chmp7,Tnfrsf10b,Tnfrsf10b,Tnfrsf10b,Tnfrsf10b,Rhobtb2,Pebp4,Egr3,Bin3,Ccar2,9930012K11Rik,9930012K11Rik,Pdlim2,Sorbs3,Ppp3cc,Slc39a14,Piwil2,Polr3d,Mir320,Phyhip,Bmp1,Sftpc,Lgi3,Reep4,Hr,Nudt18,Fam160b2,Dmtn,Fgf17,Npm2,Xpo7,Dok2,Gfra2,Fndc3a,Cysltr2,Rcbtb2,Rb1,Lpar6,Itm2b,Med4,Nudt15,Sucla2,Htr2a,Esd,Lrch1,5031414D18Rik,Lrrc63,Lcp1,Cpb2,Zc3h13,Siah3,Spert,Cog3,Slc25a30,Tpt1,Snora31,Gtf2f2,Kctd4,Gpalpp1,Nufip1,Rps2-ps6,Tsc22d1,Serp2,Lacc1,Ccdc122,Enox1,n-R5s48,Dnajc15,Epsti1,Fam216b,Tnfsf11,Akap11,Dgkh,Vwa8,Zfp957,Rgcc,Naa16,Mtrf1,Kbtbd7,Zbtbd6,Wbp4,Elf1,Sugt1,Lect1,Pcdh8,Olfm4,Pcdh17 99
1 3.810.267 14 109680000 111240000 loss n-R5s50,Slitrk6 2
1 3.924.724 15 77460000 77560000 loss Apol10a,Apol10a,Apol10a,Apol10a,Apol11a,Apol11a,Apol11a,Apol11a,Apol7c 9
1 7.728.161 16 44780000 44920000 gain Cd200r1,Cd200r1,Cd200r4,Cd200r4,Cd200r2,Cd200r2 6
1 348.511 17 73500000 76640000 gain Galnt14,Ehd3,Xdh,Memo1,Dpy30,Spast,Slc30a6,Nlrc4,Yipf4,Birc6,Ttc27,Ltbp1,Rasgrp3,Fam98a 14
1 1.052.043 17 36120000 36540000 gain Rpp21,Trim39 2
1 1.325.386 17 90420000 90540000 loss Nrxn1 1
1 4.438.061 17 38300000 38360000 loss Olfr137,Olfr137 2
1 125.062 17 30380000 30920000 loss Btbd9,Glo1,Dnah8,Glp1r 4
1 2.998.359 19 13860000 13900000 gain Olfr1502 1
2 3.307.524 19 30910000 30970000 loss Prkg1 1
When i tried
df2[mapply(function(x, y) length(intersect(x,y))>0,
strsplit(df1$Gene, ','), strsplit(df2$Gene, ',')),]
i got out
logp chr start end CNA Genes No.of.genes
39 2.689.596 14 43932587 45325020 loss Ang5,Ang6,Ear2,Ear2,Ptgdr,Ptger2,Txndc16,Gpr137c,Ero1l 9
But i can find many rows with at least one common Gene
We could split up the "Genes" column in each datasets with strsplit, then compare the corresponding list elements with mapply, check if there is any intersect and use that index to subset the "df2"
df2[mapply(function(x,y) any(x %in% y),
strsplit(df1$Gene, ','), strsplit(df2$Gene, ",")),]
# chr start end Gene
#1 1179 3360 gain Recl,Bcl,Trim3,Pop4
#3 7180 9229 loss Sox1
#4 8159 8360 loss Sox1
#5 9154 10588 loss Pekg
Or use intersect and length
df2[mapply(function(x, y) length(intersect(x,y))>0,
strsplit(df1$Gene, ','), strsplit(df2$Gene, ',')),]
Update
If we need to find whether a single "Gene" of first dataset is found in any of the rows of second data (using the updated dataset)
df2[sapply(strsplit(df2$Gene, ','), function(x)
any(sapply(strsplit(df1$Gene, ','), function(y) any(x %in% y)))),]

Resources