Related
That's the data which I would like to plot:
structure(list(`10` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
`34` = c(0, 0, 0, 0, 0, 0, 0, 0, 547725, 0),
`59` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
`84` = c(0, 0, 0, 8173070.8, 0, 0, 0, 0, 0, 0),
`110` = c(0, 0, 0, 20302893.6, 0, 0, 0, 0, 0, 0),
`134` = c(0, 0, 0, 13696077.5, 0, 0, 0, 0, 0, 0),
`165` = c(1024325, 0, 0, 10486165.5, 0, 0, 0, 0, 0, 0),
`199` = c(1183267.5, 0, 0, 6015700, 0, 0, 0, 0, 0, 0),
`234` = c(1771708.3, 0, 0, 3384495.8, 3384495.8, 0, 0, 0, 0, 1144700),
`257` = c(2007712.3, 0, 0, 0, 6980230.6, 0, 0, 0, 0, 0),
`362` = c(3339118.9, 0, 0, 0, 7280030.6, 1119625, 0, 0, 0, 0),
`433` = c(973797.9, 0, 0, 0, 6230170, 1497625, 0, 0, 0, 0),
`506` = c(0, 0, 0, 0, 12905925, 0, 0, 0, 0, 0),
`581` = c(0, 2140050, 0, 0, 4560645.8, 0, 3170133.3, 0, 0, 0),
`652` = c(0, 639437.7, 639437.7, 0, 2349711.3, 0, 902318.3, 902318.3, 0, 0),
`733` = c(0, 0, 1397257.5, 0, 2274710, 0, 0, 1414458.3, 0, 0),
`818` = c(0, 0, 742731.8, 0, 2953550, 0, 0, 563876.7, 0, 0),
`896` = c(0, 0, 714654.7, 0, 1199563.3, 0, 0, 561000, 0, 0),
`972` = c(0, 0, 434271.5, 0, 1358225, 0, 0, 0, 0, 0),
`1039` = c(0, 0, 227435, 0, 934840, 0, 0, 0, 0, 0)),
.Names = c("10", "34", "59", "84", "110", "134", "165", "199", "234", "257", "362", "433", "506", "581", "652", "733", "818", "896", "972", "1039"),
row.names = c("Mark121_1", "Mark121_2", "Mark121_3", "Mark143_1", "Mark143_2", "Mark152_1", "Mark152_2", "Mark152_3", "Mark444_1", "Mark444_2"),
class = "data.frame")
I would like to put the lines for the rows which differ only in the number after _ (dash) on the same plot. The different colors for the lines are necessary. I was thinking about using matplot but I have no idea how to select the rows with similar strings.
Using simple words I would like to have lines for:
Mark121_*
Mark143_*
Mark152_*
Mark444_*
on the same graph. It means 4 different graphs containing multiple lines.
This solution uses "dplyr" and "ggplot2" and "purrr". There is a large difference in scale so I change to log10, you might not want that.
df2 <- df %>% mutate(Name= rownames(.)) %>%
gather(key=period, value=value,-Name) %>%
mutate(person= sub("_.", "", Name), period=as.numeric(period))
df2 %>% ggplot(., aes(x=period, y=log10(value), colour=Name, group=Name)) +
geom_line() + facet_wrap(~person)
Edit: Additional request
In order to plot each figure individually
#This saves the figures as a list of plot objects
FiguresList <- unique(df2$person) %>% map(function(P) {
df2 %>% filter(person ==P) %>%
ggplot(., aes(x=period, y=log10(value), colour=Name, group=Name)) +
geom_line()}
)
FiguresList[[1]]
#This saves each plot as a pdf named by the person e.g "Mark121.pdf"
unique(df2$person) %>% map(function(P) {
df2 %>% filter(person ==P) %>%
ggplot(., aes(x=period, y=log10(value), colour=Name, group=Name)) +
geom_line()
ggsave(paste(P,".pdf", sep=""))}
)
I have a data frame like that one below:
> dput(data)
structure(list(`28` = c(0, 0, 0, 0, 0, 0), `38` = c(0, 0, 0,
0, 0, 0), `45` = c(0, 0, 0, 0, 0, 0), `53` = c(0, 0, 0, 0, 0,
0), `60` = c(0, 0, 0, 0, 0, 0), `78` = c(0, 0, 0, 0, 0, 0), `116` = c(0,
0, 0, 0, 0, 0.983309489747258), `145` = c(0, 0, 0, 0, 0, 1),
`189` = c(0, 1, 0.560384508734634, 0, 0, 0.875695437927198
), `223` = c(0, 0.988158197286733, 1, 0, 0, 0.492500108379937
), `281` = c(1, 0.677856978615774, 0.448525741750624, 0,
0.362088745790311, 0.180474270603026), `362` = c(0.79151704397606,
0.763278914693033, 0.35864682503004, 1, 1, 0.114178985852806
), `440` = c(0.662841530054645, 0.818636468153598, 0.448488769756909,
0, 0.448447503793346, 0), `524` = c(0, 0.638192687974247,
0, 0, 0, 0), `634` = c(0, 0, 0, 0, 0, 0), `759` = c(0, 0,
0, 0, 0, 0), `848` = c(0, 0, 0, 0, 0, 0), `979` = c(0, 0,
0, 0, 0, 0), `1120` = c(0, 0, 0, 0, 0, 0), `1248` = c(0,
0, 0, 0, 0, 0)), .Names = c("28", "38", "45", "53", "60",
"78", "116", "145", "189", "223", "281", "362", "440", "524",
"634", "759", "848", "979", "1120", "1248"), row.names = c("Mark",
"Gregg", "Tim", "Oscar", "Tom", "Matthew"
), class = "data.frame")
I would like to calculate euclidean distance between all the profiles from this data and Tim should be used as a reference. The results can be stored in additional column.
Mark to Tim
Gregg to Tim
Oscar to Tim
and etc
You can use dist function (which actually computes all the distances between all the profiles) :
m <- as.matrix(DF)
distances <- as.matrix(dist(m, method = "euclidean", upper = TRUE,diag = TRUE))
> distances['Mark','Tim']
[1] 1.36069
> distances['Gregg','Tim']
[1] 0.9767401
> distances['Oscar','Tim']
[1] 1.458658
I have a data frame that look something like this
> dput(tes)
structure(list(path = structure(1:6, .Label = c("1893-chicago-fair",
"1960s-afghanistan", "1970s-iran", "1980s-new-york", "20-bizarre-vintage-ads",
"20-bizarre-vintage-ads?utm_campaign=6678&utm_medium=rpages&utm_source=Facebook&utm_term=1e8e704f7b587515c72e6cf7895d55fd110b652c480d98c1440f0a7acba5fb0e",
"20-photos-segregation-america-show-far-weve-come-much-farther-go",
"7-bizarre-cultural-practices", "7-creepy-abandoned-cities?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=4015a7368b588ff09694c96ba720c58f4e7f41a05b4181908b582bae682bef5e",
"a-brief-history-of-hippies", "abandoned-photographs", "albert-kahn",
"amazing-facts", "american-bison-extinction-1800s", "american-english-vs-british-english",
"andre-the-giant-photos", "andre-the-giant-photos??utm_source=facebook&sr_source=lift_facebook&utm_campaign=simplereach_andre-the-giant-photos&utm_medium=social",
"andre-the-giant-photos?grvVariant=d27feef0bfad84d60f335d3a8d241d9e",
"andre-the-giant-photos?grvVariant=d27feef0bfad84d60f335d3a8d241d9e&utm_campaign=gravityus2_142deb68f67fb1a99e7b80250fecc932&utm_medium=referral&utm_source=gravity",
"andre-the-giant-photos?grvVariant=d27feef0bfad84d60f335d3a8d241d9e&utm_campaign=gravityus2_16d63cf07ecf656f602b2d6b209344f7&utm_medium=referral&utm_source=gravity",
"andre-the-giant-photos?grvVariant=d27feef0bfad84d60f335d3a8d241d9e&utm_campaign=gravityus2_713050ecffc51540af02b2246ddf57dd&utm_medium=referral&utm_source=gravity",
"andre-the-giant-photos?grvVariant=d27feef0bfad84d60f335d3a8d241d9e&utm_campaign=gravityus2_c5bb3bc5e9408e0ad52ec9e787bd8654&utm_medium=referral&utm_source=gravity",
"andre-the-giant-photos?sr_source=lift_facebook&utm_campaign=simplereach_andre-the-giant-photos&utm_medium=social&utm_source=facebook",
"astounding-aerial-photography", "astounding-aerial-photography?utm_campaign=7002&utm_medium=rpages&utm_source=Facebook&utm_term=38e9e903d9ba59106d8b4d19be593f3de7ff8b91b12eafa03f2e382228f7b0d1",
"august-landmesser", "ben-franklin", "best-all-that-is-interesting-articles",
"bigfoot-facts", "celebrity-school-photos?grvVariant=82c0ce57a33dfd0209bdefc878665de0&utm_campaign=gravityus2_bc8646aefd6d0a16af03d7caf248f226&utm_medium=referral&utm_source=gravity",
"coolest-mushrooms?utm_campaign=taboolaINTL&utm_medium=referral&utm_source=taboola",
"craziest-ways-drugs-smuggled", "creepy-halloween-costumes",
"danakil-depression", "dark-john-lennon-quotes", "david-bowie-quotes",
"days-in-groundhog-day", "death-photos", "death-photos?utm_campaign=taboolaINTL&utm_medium=referral&utm_source=taboola",
"dr-seuss-quotes", "dream-chaser-spacecraft", "dust-bowl", "earth-two-planets",
"eixample-barcelona", "email-to-space", "evil-science-experiments",
"famous-incest", "famous-spies", "fun-facts-trivia", "golden-age-air-travel?utm_campaign=taboolaINTL&utm_medium=referral&utm_source=taboola",
"gross-foods", "gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=106965c54919c24bf37356500ec50f0709b1de621d6950bb4c5d48759ea3677e",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=184e0ee39e66af82f9b124b904f6e07964b211e902cb0dc00c28771ff46163a2",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=1a0ddea7bed770d5473c45e9f8d81dfd0c4fdd232f207c6b88b53c41ff220c59",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=538659f1fc53f28d2c87b93ac73973681c1a46a04954964ab6c52ed1ab09b33a",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=87caf0acb91ae2b202f1b00ad9eaad3fef20bbfb23405b9047fb2b5a5462ab9c",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=91eae42c8fc9568103d46e0b6b6ec08fc34fd68b2e1918ffe2333ec73035c95a",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=a72946874b2003a8e40635c6cf10c851d4e1c0ed45e645d69663214239550602",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=ab594f0a1be002c8c3db297e8d33b04678af40e6a6469ac815884ae0a014b3a3",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=fb1e333dd58cb7bb9251ec52290aae21771149f73e083440047068a69aaeae09",
"hilarious-insults", "hippie-communes", "hippie-communes?grvVariant=fda07538efb1c25617f7cc3d09c37c79",
"hippie-communes?grvVariant=fda07538efb1c25617f7cc3d09c37c79&utm_campaign=gravityus2_e3cd42d4745768460dab4694a972fd82&utm_medium=referral&utm_source=gravity",
"hippie-communes?pp=0", "history-of-the-vibrator", "history-of-the-vibrator?utm_campaign=whfbpd&utm_medium=social&utm_source=facebook",
"homosexuality-norm", "hunger-games-facts?utm_campaign=6905&utm_medium=rpages&utm_source=Facebook&utm_term=1a9e42ac8abb6ffa90bf0542206505e74d3df12114a2c4445527fb2b88ef8880",
"influential-photographs", "ingeniously-creative-ads", "insane-cults",
"insane-rulers", "inspirational-quotes", "inspirational-quotes?utm_medium=referral&utm_source=taboolainternal",
"interesting-facts-about-the-world", "interesting-quotes", "krokodil",
"making-a-murderer-theories", "maya-angelou-greatest-quotes",
"medieval-torture-devices", "milky-way-colorado", "montreal-metro",
"most-popular-female-names-in-america", "neil-degrasse-tyson-tweets",
"new-york-city-cinemagraphs", "new-york-subways-1980s", "north-korea-photographs",
"north-korea-photographs?utm_campaign=taboolaINTL&utm_medium=referral&utm_source=taboola",
"north-korea-photographs?utm_medium=referral&utm_source=taboolainternal",
"obama-aging", "pablo-escobar", "pablo-escobar??utm_source=facebook",
"pablo-escobar??utm_source=facebook&sr_source=lift_facebook&utm_campaign=simplereach_pablo-escobar&utm_medium=social",
"pablo-escobar?utm_campaign=whfbpd&utm_medium=social&utm_source=facebook",
"panda-facts", "photo-of-the-day-nasa-releases-crystal-clear-image-of-pluto",
"pollution-in-china-photographs", "pollution-in-china-photographs?utm_campaign=3434&utm_medium=rpages&utm_source=Facebook&utm_term=1a0ddea7bed770d5473c45e9f8d81dfd0c4fdd232f207c6b88b53c41ff220c59",
"pollution-in-china-photographs?utm_campaign=3434&utm_medium=rpages&utm_source=Facebook&utm_term=e28a76c1572c36c3a13965e52b4b2ea10518eb9f9c79c4bc84cfb85db16be81e",
"pollution-in-china-photographs?utm_campaign=6806&utm_medium=rpages&utm_source=Facebook&utm_term=1a0ddea7bed770d5473c45e9f8d81dfd0c4fdd232f207c6b88b53c41ff220c59",
"pollution-in-china-photographs?utm_campaign=7048&utm_medium=rpages&utm_source=Facebook&utm_term=2ef4bd7b6cd587601d6eeb35925282a1ed095ebbd4e9e4c0337ef868c7de7a0b",
"pollution-in-china-photographs?utm_campaign=7458&utm_medium=rpages&utm_source=Facebook&utm_term=b9e79a51cd4daf4c3ec02accce75b3e1fc9a22cb3133460c9c32a4f2f9cdb68c",
"powerful-photos-of-2014", "real-x-files", "romanovs-last-days",
"science-of-human-decay", "scientific-discoveries-2015", "scully-effect",
"serial-killer-quotes", "shah-iran", "six-of-the-craziest-gods-in-mythology",
"space-facts", "sun-facts", "sunken-cities", "sunken-ships",
"super-bowl-i-facts", "superhero-movies", "surreal-places", "syrian-civil-war-photographs",
"the-five-greatest-mysteries-of-human-history", "the-four-most-important-battles-of-ancient-greece",
"the-most-colorful-cities-in-the-world", "titanic-facts", "titanic-facts?utm_campaign=6385&utm_medium=rpages&utm_source=Facebook&utm_term=f5905e878216d14e20457ee3265caf6c10022d9545609edfb9a3cb0642c1a310",
"titanic-facts?utm_campaign=6899&utm_medium=rpages&utm_source=Facebook&utm_term=b9e79a51cd4daf4c3ec02accce75b3e1fc9a22cb3133460c9c32a4f2f9cdb68c",
"titanic-facts?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=106965c54919c24bf37356500ec50f0709b1de621d6950bb4c5d48759ea3677e",
"titanic-facts?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=538659f1fc53f28d2c87b93ac73973681c1a46a04954964ab6c52ed1ab09b33a",
"titanic-facts?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=91eae42c8fc9568103d46e0b6b6ec08fc34fd68b2e1918ffe2333ec73035c95a",
"titanic-facts?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=ab594f0a1be002c8c3db297e8d33b04678af40e6a6469ac815884ae0a014b3a3",
"titanic-facts?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=d1864657a05e5b716bb5cb16a29f068a55652eb39fb669ea9c22a6486198f227",
"titanic-facts?utm_campaign=7292&utm_medium=rpages&utm_source=Facebook&utm_term=f5905e878216d14e20457ee3265caf6c10022d9545609edfb9a3cb0642c1a310",
"us-veterans-portraits", "vintage-disneyland", "wall-street-early-20th-century",
"what-we-love-this-week-the-incredible-last-words-of-famous-historical-figures",
"woodstock-photos", "zombie-proof-house"), class = "factor"),
`0089` = c(0, 0, 0, 0, 0, 1), `0096` = c(0, 0, 0, 0, 0, 0
), `02` = c(0, 0, 0, 0, 0, 0), `0215` = c(0, 0, 0, 0, 0,
0), `0225` = c(0, 0, 0, 0, 0, 0), `0252` = c(0, 0, 0, 0,
0, 0), `0271` = c(0, 0, 0, 0, 0, 0), `0272` = c(0, 0, 0,
0, 0, 0), `03` = c(0, 0, 0, 0, 1, 1)), .Names = c("path",
"0089", "0096", "02", "0215", "0225", "0252", "0271", "0272",
"03"), row.names = c(NA, 6L), class = "data.frame")
and I need to apply the min(x,1) function such that this function scan each value in the dataframe (except first column which is not numeric) and return the min(x,1). that way I have only zero's and one's.
I have tried:
f <- function(x) min(1,x)
res1<-do.call(f,tes[,2:ncol(tes)])
but that does not output the right result.
Any help aapreciated
We can use pmin
tes[,-1] <- pmin(1, as.matrix(tes[,-1]))
Or if we need only binary values
tes[,-1] <- +(!!tes[,-1])
I have a data.frame like this:
> dput(head(dat))
structure(list(`Gene name` = c("at1g01050", "at1g01080", "at1g01090",
"at1g01220", "at1g01320", "at1g01420"), `1_1` = c(0, 0, 0, 0,
0, 0), `1_2` = c(0, 0, 0, 0, 0, 0), `1_3` = c(0, 2.2266502274762,
0, 0, 0, 0), `1_4` = c(0, 1.42835007256373, 0, 0, 0, 0), `1_5` = c(0,
1, 0, 0, 0, 0.680307288653971), `1_6` = c(0, 0.974694551708235,
0.0703315834738149, 0, 0, 1.5411058346636), `1_7` = c(1, 1.06166030205396,
0, 0, 0, 0), `1_8` = c(1, 1.07309874414745, 0.129442847788922,
0, 0, 0), `1_9` = c(1.83566164452602, 0.770848509662441, 1.16522133036595,
1.02360016370994, 0, 0), `1_10` = c(0, 0, 0.96367393959757, 0,
0, 0), `1_11` = c(0, 1, 1.459452636222, 0, 0.992067202742928,
0), `1_12` = c(0, 0, 0.670100384155585, 0, 0.461601636474094,
0), `1_13` = c(0, 0, 1.43074917909221, 0, 1.35246977730244, 0
), `1_14` = c(0, 0, 1.13052717277684, 0, 1.27971261718285, 0),
`1_15` = c(0, 0, 0, 0, 0, 0), `1_16` = c(0, 0, 1.02186950513655,
0, 0.937805171752374, 0), `1_17` = c(0, 0, 0, 0, 1.82226410514639,
0), `1_18` = c(0, 0, 1.2057581396188, 0, 1, 0), `1_19` = c(0,
0, 2.54080080087007, 0, 1.74014162763125, 0), `1_20` = c(0,
0, 0, 0, 0, 0), `1_21` = c(0, 0, 1.85335086627868, 0, 2.93605031878879,
0), `1_22` = c(0, 0, 0, 0, 0, 0), `1_23` = c(0, 0, 0, 0,
0, 0), `1_24` = c(0, 0.59685787388353, 4.74450895485671,
0, 1.64665192735547, 0), `1_25` = c(0, 0, 0, 0, 0, 0), `1_26` = c(0,
0, 0, 0, 0, 0), `1_27` = c(0, 1.70324142554566, 0, 0, 0,
0), `1_28` = c(0, 4.02915818089525, 0, 0, 0, 0), `1_29` = c(0,
1.10050253348262, 0, 0, 0, 1.78705663080963), `1_30` = c(0,
0, 0, 0, 0, 0), `1_31` = c(0.525193634811661, 1.19203674964562,
0, 0, 0, 0), `1_32` = c(0.949695564218912, 0.511935958918944,
0.698256748091399, 0.924419021307232, 0, 0), `1_33` = c(1,
0.392202418854686, 0.981531026331928, 1, 0, 0), `1_34` = c(0,
0, 1.04480642952605, 0, 0, 0), `1_35` = c(0.875709646300199,
0.416787083481068, 0.910412293707794, 0, 0.931813162802324,
0), `1_36` = c(0.235817844851986, 0, 0.695496044366791, 0,
0, 0), `1_37` = c(0, 0, 0, 0, 0, 0), `1_38` = c(0, 0, 0,
0, 0, 0), `1_39` = c(0, 0, 0, 0, 0, 0), `1_40` = c(0, 0.426301584359177,
1.05916031917965, 0, 1.11716924423855, 0), `1_41` = c(0,
0, 0, 0, 0, 0), `1_42` = c(0, 0, 0, 0, 0, 0), `1_43` = c(0,
0, 0, 0, 0, 0), `1_44` = c(0, 0.817605484758179, 1, 0, 1,
0), `1_45` = c(0, 0, 0, 0, 1.83706702696725, 0), `1_46` = c(0,
0, 0, 0, 0, 0), `1_48` = c(0, 0, 0, 0, 0, 0), `1_49` = c(0,
0, 0, 0, 0, 0), `1_50` = c(0, 0, 0, 0, 0, 0), `1_51` = c(0,
0.822966241998042, 0, 0, 0, 0), `1_52` = c(0, 1.38548267401525,
0, 0, 0, 0), `1_53` = c(0, 0.693090058304095, 0, 0, 0, 1.200664746484
), `1_54` = c(0, 7.58136662752864, 0, 0, 0, 0), `1_55` = c(0.519878111919004,
0.530809413647805, 0.343274113384907, 0, 0, 0), `1_56` = c(1.24511715957891,
0.545097856366912, 0.397440073804376, 0, 0, 0), `1_57` = c(1.26748496499576,
0.502893153188496, 1, 1.09278985531586, 0, 0), `1_58` = c(0.696198684496234,
0.68197003689249, 1.30108437738319, 0.778091049180591, 0.533017938104689,
0), `1_59` = c(1.15255606344999, 0.294294436704185, 1.07862692616479,
1, 0.250091116406616, 0), `1_60` = c(1.95634163405497, 0,
1.1602014253913, 0, 0, 0), `1_61` = c(1.09287167009628, 0,
2.05939536537347, 1.08165521287259, 0.68027384701565, 0),
`1_62` = c(0.791776166968497, 0, 0.846107162142824, 0, 0.77013323652256,
0), `1_63` = c(0.378787010943447, 0.391876271945063, 0.623223753921758,
0, 0.651918444771296, 0), `1_64` = c(0.189585762007804, 0.361452381684218,
0.799519726870751, 0, 1.06818683719768, 0), `1_65` = c(0,
0, 2.5212953775211, 0, 0, 0), `1_66` = c(0, 0, 0, 0, 0, 0
), `1_67` = c(0, 0, 0, 0, 2.44827717262786, 0), `1_68` = c(0,
0, 0, 0, 0, 0), `1_69` = c(0, 0, 0, 0, 0, 0), `1_70` = c(0,
0, 2.36142611074334, 0, 2.391093649557, 0), `1_71` = c(0,
0, 0.35565044656798, 0, 0, 0), `1_72` = c(0, 0, 5.86951313801941,
0, 0, 0)), .Names = c("Gene name", "1_1", "1_2", "1_3", "1_4",
"1_5", "1_6", "1_7", "1_8", "1_9", "1_10", "1_11", "1_12", "1_13",
"1_14", "1_15", "1_16", "1_17", "1_18", "1_19", "1_20", "1_21",
"1_22", "1_23", "1_24", "1_25", "1_26", "1_27", "1_28", "1_29",
"1_30", "1_31", "1_32", "1_33", "1_34", "1_35", "1_36", "1_37",
"1_38", "1_39", "1_40", "1_41", "1_42", "1_43", "1_44", "1_45",
"1_46", "1_48", "1_49", "1_50", "1_51", "1_52", "1_53", "1_54",
"1_55", "1_56", "1_57", "1_58", "1_59", "1_60", "1_61", "1_62",
"1_63", "1_64", "1_65", "1_66", "1_67", "1_68", "1_69", "1_70",
"1_71", "1_72"), row.names = c(NA, 6L), class = "data.frame")
That's the code I use for calculation of the mean for 3 replicates which I have in the data frame:
## Calculating the mean of 3 "replicates"
ind <- c(1, 25, 49)
dat2 <- dat[-1]
tbl_end <- cbind(dat[1], sapply(0:23, function(i) rowMeans(dat2[ind+i])))
That's an error which comes:
Error in `[.data.frame`(dat2, ind + i) : undefined columns selected
Called from: eval(substitute(browser(skipCalls = pos), list(pos = 9 - frame)),
envir = sys.frame(frame))
I have 71 columns of results (should be 72 because I have 24 fractions and 3 replicates what gives 72 in total) but there should be one more column. No idea why it's missing but anyway I have to solve it. There is no 1_47 which should come with 1_23 and 1_71. Do you have any idea how can I edit my function to just ignore fraction 1_47 and still get a mean of 1_23 and 1_71 ?
Why not just add in a dummy column for 1_47. That will make your data more regular and make it much easier to extract the indexes you need. To do this, try
dat2<-cbind(dat[1:47], 1_47=rep(NA, nrow(dat)), dat[48:72])
ind <- c(1, 25, 49)
tbl_end <- cbind(dat[1], sapply(0:23, function(i) rowMeans(dat2[ind+i+1], na.rm=T)))
Let's start with my data:
> dput(head(tbl_end))
structure(list(`Gene name` = c("at1g01050.1", "at1g01080.1",
"at1g01090.1", "at1g01220.1", "at1g01320.2", "at1g01420.1"),
`1_1` = c(0, 0, 0, 0, 0, 0), `1_2` = c(0, 0, 0, 0, 0, 0),
`1_3` = c(0, 1, 0, 0, 0, 0), `1_4` = c(0, 0.660693687777888,
0, 0, 0, 0), `1_5` = c(0, 0.521435654491704, 0, 0, 0, 1),
`1_6` = c(0, 0.437291194705566, 0, 0, 0, 1), `1_7` = c(0,
0.52204783488213, 0, 0, 0, 0), `1_8` = c(0, 0.524298383907171,
0, 0, 0, 0), `1_9` = c(1, 0.376865096972469, 0, 1, 0, 0),
`1_10` = c(0, 0, 0, 0, 0, 0), `1_11` = c(0, 0, 0, 0, 0, 0
), `1_12` = c(0, 0, 0, 0, 0, 0), `1_13` = c(0, 0, 0, 0, 0,
0), `1_14` = c(0, 0, 0, 0, 0, 0), `1_15` = c(0, 0, 0, 0,
0, 0), `1_16` = c(0, 0, 0, 0, 0, 0), `1_17` = c(0, 0, 0,
0, 0, 0), `1_18` = c(0, 0, 0.476101907006443, 0, 0, 0), `1_19` = c(0,
0, 1, 0, 0, 0), `1_20` = c(0, 0, 0, 0, 0, 0), `1_21` = c(0,
0, 0, 0, 1, 0), `1_22` = c(0, 0, 0, 0, 0, 0), `1_23` = c(0,
0, 0, 0, 0, 0), `1_24` = c(0, 0, 0, 0, 0, 0)), .Names = c("Gene name",
"1_1", "1_2", "1_3", "1_4", "1_5", "1_6", "1_7", "1_8", "1_9",
"1_10", "1_11", "1_12", "1_13", "1_14", "1_15", "1_16", "1_17",
"1_18", "1_19", "1_20", "1_21", "1_22", "1_23", "1_24"), row.names = c(NA,
6L), class = "data.frame")
so I have more than 2k rows. As a name of the row I set the gene name but there is a problem. Sometimes same gene has a different "models" (so they put the dot after name and the number 1 or 2) but still it's the same gene so I want to find all of those duplicates (same gene name) and average the values in different columns for this gene and just leave the 1 row with the averaged values.
Is it possible to do ?
Just showing some of the gene names I have:
> dput(vec_names)
c("at1g01050.1", "at1g01080.1", "at1g01090.1", "at1g01220.1",
"at1g01320.2", "at1g01420.1", "at1g01470.1", "at1g01800.1", "at1g01910.5",
"at1g01920.2", "at1g01960.1", "at1g01980.1", "at1g02020.2", "at1g02100.2",
"at1g02130.1", "at1g02140.1", "at1g02150.1", "at1g02305.1", "at1g02500.2",
"at1g02560.1", "at1g02780.1", "at1g02880.3", "at1g02920.1", "at1g02930.2",
"at1g03030.1", "at1g03090.2", "at1g03110.1", "at1g03130.1", "at1g03210.1",
"at1g03220.1", "at1g03230.1", "at1g03310.2", "at1g03330.1", "at1g03475.1",
"at1g03630.2", "at1g03680.1", "at1g03870.1", "at1g03900.1", "at1g04080.2",
"at1g04130.1", "at1g04170.1", "at1g04190.1", "at1g04270.2", "at1g04350.1",
"at1g04410.1", "at1g04420.1", "at1g04530.1", "at1g04640.2", "at1g04690.1",
"at1g04750.2", "at1g04810.1", "at1g04850.1", "at1g04870.2", "at1g05010.1",
"at1g05180.1", "at1g05190.1", "at1g05320.3", "at1g05350.1", "at1g05520.1",
"at1g05560.1", "at1g05620.2", "at1g06000.1", "at1g06110.1", "at1g06130.2",
"at1g06290.1", "at1g06410.1", "at1g06550.1", "at1g06560.1", "at1g06570.1",
I think there is a function for that but can't find it.
Using data.table
library(data.table)
dt <- data.table(dat)
dt[, gene_unique := gsub("[.]*", "", dt$Gene)]
cols <- colnames(dt)[2:25]
dt[, lapply(.SD, mean), by = gene_unique, .SDcols = cols]
Using aggregate as suggested in comments
dat$`Gene name` = gsub("[.]*", "", dat$Gene)
aggregate(. ~ `Gene name`, dat, mean)