Related
I have been creating a bar plot for the result of a sentiment analysis model in R. The data is very confidential feedbacks from the customers. So, the feedbacks are then fed into a sentiment analysis model to generate outputs. My work is to generate a chart for each combination for example zone = delhi and delhi has sub zones like eastdelhi, westdelhi,northdelhi,southdelhi. I want to generate charts with combination like
zone = delhi and sub-zone = eastdelhi. And I want to save it to a jpeg file.I have written a for loop to do so. But for some reason it isn't working. This is the code
#Set locales
rm(list = ls())
Sys.setlocale(category = "LC_ALL",locale = "English")
#Load libraries
LoadLibraries <- c("openxlsx",
"dplyr",
"tidyr",
"plotly",
"RColorBrewer",
"shiny",
"officer",
"parallel",
"dplyr",
"tidyr",
"magrittr",
"knitr")
lapply(LoadLibraries, require, character.only = TRUE)
path = "C:/Users/R_Visual/Data/visual_data.xlsx"
input_data <- read.xlsx(path)
name <- names(input_data[,1:10])
#Filtering the zones and circles
for (i in 1:length(unique(Zone.Final))){
for (j in 1:length(unique(Circle.Final))){
fileName = 'C:/Users/R_Visual/'+ str(i) + str(j) + '.jpeg'
jpeg(fileName, width = 900, height = 450)
df <- input_data %>%
filter(input_data$Zone.Final[i])
df <- df %>%
filter(df$Circle.Final[j])
color <- c("#ca2f27","#f56d43","#f8c38a","#fde08b","#d9ef8b","#a7d86f","#67bd64","#1a984f","#D3D3D3","#A9A9A9")
plot <- barplot(sort(colSums(input_data[, 1:10])),
main = paste("Sentiment Analysis for Zone",df$Zone.Final[i]," and Circle",df$Circle.Final[j], sep = ""),
xlab = "Sentiments",
ylab = "Count",
horiz = FALSE,
names = name,
col = color,
border = FALSE,
legend = TRUE,
beside = TRUE,
legend.text = name,
args.legend = list(bty = "n", x = "topleft",ncol = 1, cex = 0.8, y.intersp = 0.8, x.intersp = 0.25, horiz = F, xpd = TRUE, inset = c(0,0)))
dev.off()
}
}
EDIT:
This is the sample of input_data
> dput(input_data)
structure(list(anger = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), anticipation = c(1,
0, 0, 0, 0, 0, 1, 0, 0, 0), disgust = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), fear = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), joy = c(0,
0, 0, 0, 0, 0, 1, 0, 0, 0), sadness = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), surprise = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), trust = c(0,
0, 1, 1, 1, 0, 2, 0, 0, 0), negative = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), positive = c(1, 0, 0, 0, 1, 1, 2, 1, 0, 1), Zone.Final = c("Delhi",
"Lucknow", "Durgapur", "Lucknow", "Mumbai", "Bhopal", "Chandigarh",
"Chandigarh", "Gurugram", "Chandigarh"), Circle.Final = c("Noida",
"Gorakhpur", "Murshidabad", "Gorakhpur", "Mumbai City", "Bhopal",
"Chandigarh", "Panchkula", "Hisar", "Karnal")), row.names = c(NA,
10L), class = "data.frame")
If anyone could help me with the code, it would be of great help.
You can try creating a list combining the zone and subzone:
#Data
input_data <- structure(list(anger = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), anticipation = c(1,
0, 0, 0, 0, 0, 1, 0, 0, 0), disgust = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), fear = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), joy = c(0,
0, 0, 0, 0, 0, 1, 0, 0, 0), sadness = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), surprise = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), trust = c(0,
0, 1, 1, 1, 0, 2, 0, 0, 0), negative = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), positive = c(1, 0, 0, 0, 1, 1, 2, 1, 0, 1), Zone.Final = c("Delhi",
"Lucknow", "Durgapur", "Lucknow", "Mumbai", "Bhopal", "Chandigarh",
"Chandigarh", "Gurugram", "Chandigarh"), Circle.Final = c("Noida",
"Gorakhpur", "Murshidabad", "Gorakhpur", "Mumbai City", "Bhopal",
"Chandigarh", "Panchkula", "Hisar", "Karnal")), row.names = c(NA,
10L), class = "data.frame")
#Code
#First create and global id to combine zone and subzone
df <- input_data
df$id <- paste(df$Zone.Final,df$Circle.Final,sep = '-')
#Split
List <- split(df,df$id)
#Plot
color <- c("#ca2f27","#f56d43","#f8c38a","#fde08b","#d9ef8b","#a7d86f","#67bd64","#1a984f","#D3D3D3","#A9A9A9")
#Plot names
vnames <- paste0(names(List),'.jpeg')
#Loop
for(i in 1:length(List))
{
name <- names(List[[i]][, 1:10])
#Plot
jpeg(filename = vnames[i], width = 900, height = 450)
barplot(sort(colSums(List[[i]][, 1:10])),
main = paste("Sentiment Analysis for Zone ",
unique(List[[i]]$Zone.Final),
" and Circle ",unique(List[[i]]$Circle.Final), sep = ""),
xlab = "Sentiments",
ylab = "Count",
horiz = FALSE,
names = name,
col = color,
border = FALSE,
legend = TRUE,
beside = TRUE,
legend.text = name,
args.legend = list(bty = "n", x = "topleft",ncol = 1,
cex = 0.8, y.intersp = 0.8, x.intersp = 0.25,
horiz = F, xpd = TRUE, inset = c(0,0)))
dev.off()
}
That will create the plots. Of course you can add a path to vnames like the dir you have to save the plots in that folder.
I have a data frame that look something like this
> dput(tes)
structure(list(path = structure(1:6, .Label = c("1893-chicago-fair",
"1960s-afghanistan", "1970s-iran", "1980s-new-york", "20-bizarre-vintage-ads",
"20-bizarre-vintage-ads?utm_campaign=6678&utm_medium=rpages&utm_source=Facebook&utm_term=1e8e704f7b587515c72e6cf7895d55fd110b652c480d98c1440f0a7acba5fb0e",
"20-photos-segregation-america-show-far-weve-come-much-farther-go",
"7-bizarre-cultural-practices", "7-creepy-abandoned-cities?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=4015a7368b588ff09694c96ba720c58f4e7f41a05b4181908b582bae682bef5e",
"a-brief-history-of-hippies", "abandoned-photographs", "albert-kahn",
"amazing-facts", "american-bison-extinction-1800s", "american-english-vs-british-english",
"andre-the-giant-photos", "andre-the-giant-photos??utm_source=facebook&sr_source=lift_facebook&utm_campaign=simplereach_andre-the-giant-photos&utm_medium=social",
"andre-the-giant-photos?grvVariant=d27feef0bfad84d60f335d3a8d241d9e",
"andre-the-giant-photos?grvVariant=d27feef0bfad84d60f335d3a8d241d9e&utm_campaign=gravityus2_142deb68f67fb1a99e7b80250fecc932&utm_medium=referral&utm_source=gravity",
"andre-the-giant-photos?grvVariant=d27feef0bfad84d60f335d3a8d241d9e&utm_campaign=gravityus2_16d63cf07ecf656f602b2d6b209344f7&utm_medium=referral&utm_source=gravity",
"andre-the-giant-photos?grvVariant=d27feef0bfad84d60f335d3a8d241d9e&utm_campaign=gravityus2_713050ecffc51540af02b2246ddf57dd&utm_medium=referral&utm_source=gravity",
"andre-the-giant-photos?grvVariant=d27feef0bfad84d60f335d3a8d241d9e&utm_campaign=gravityus2_c5bb3bc5e9408e0ad52ec9e787bd8654&utm_medium=referral&utm_source=gravity",
"andre-the-giant-photos?sr_source=lift_facebook&utm_campaign=simplereach_andre-the-giant-photos&utm_medium=social&utm_source=facebook",
"astounding-aerial-photography", "astounding-aerial-photography?utm_campaign=7002&utm_medium=rpages&utm_source=Facebook&utm_term=38e9e903d9ba59106d8b4d19be593f3de7ff8b91b12eafa03f2e382228f7b0d1",
"august-landmesser", "ben-franklin", "best-all-that-is-interesting-articles",
"bigfoot-facts", "celebrity-school-photos?grvVariant=82c0ce57a33dfd0209bdefc878665de0&utm_campaign=gravityus2_bc8646aefd6d0a16af03d7caf248f226&utm_medium=referral&utm_source=gravity",
"coolest-mushrooms?utm_campaign=taboolaINTL&utm_medium=referral&utm_source=taboola",
"craziest-ways-drugs-smuggled", "creepy-halloween-costumes",
"danakil-depression", "dark-john-lennon-quotes", "david-bowie-quotes",
"days-in-groundhog-day", "death-photos", "death-photos?utm_campaign=taboolaINTL&utm_medium=referral&utm_source=taboola",
"dr-seuss-quotes", "dream-chaser-spacecraft", "dust-bowl", "earth-two-planets",
"eixample-barcelona", "email-to-space", "evil-science-experiments",
"famous-incest", "famous-spies", "fun-facts-trivia", "golden-age-air-travel?utm_campaign=taboolaINTL&utm_medium=referral&utm_source=taboola",
"gross-foods", "gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=106965c54919c24bf37356500ec50f0709b1de621d6950bb4c5d48759ea3677e",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=184e0ee39e66af82f9b124b904f6e07964b211e902cb0dc00c28771ff46163a2",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=1a0ddea7bed770d5473c45e9f8d81dfd0c4fdd232f207c6b88b53c41ff220c59",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=538659f1fc53f28d2c87b93ac73973681c1a46a04954964ab6c52ed1ab09b33a",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=87caf0acb91ae2b202f1b00ad9eaad3fef20bbfb23405b9047fb2b5a5462ab9c",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=91eae42c8fc9568103d46e0b6b6ec08fc34fd68b2e1918ffe2333ec73035c95a",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=a72946874b2003a8e40635c6cf10c851d4e1c0ed45e645d69663214239550602",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=ab594f0a1be002c8c3db297e8d33b04678af40e6a6469ac815884ae0a014b3a3",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=fb1e333dd58cb7bb9251ec52290aae21771149f73e083440047068a69aaeae09",
"hilarious-insults", "hippie-communes", "hippie-communes?grvVariant=fda07538efb1c25617f7cc3d09c37c79",
"hippie-communes?grvVariant=fda07538efb1c25617f7cc3d09c37c79&utm_campaign=gravityus2_e3cd42d4745768460dab4694a972fd82&utm_medium=referral&utm_source=gravity",
"hippie-communes?pp=0", "history-of-the-vibrator", "history-of-the-vibrator?utm_campaign=whfbpd&utm_medium=social&utm_source=facebook",
"homosexuality-norm", "hunger-games-facts?utm_campaign=6905&utm_medium=rpages&utm_source=Facebook&utm_term=1a9e42ac8abb6ffa90bf0542206505e74d3df12114a2c4445527fb2b88ef8880",
"influential-photographs", "ingeniously-creative-ads", "insane-cults",
"insane-rulers", "inspirational-quotes", "inspirational-quotes?utm_medium=referral&utm_source=taboolainternal",
"interesting-facts-about-the-world", "interesting-quotes", "krokodil",
"making-a-murderer-theories", "maya-angelou-greatest-quotes",
"medieval-torture-devices", "milky-way-colorado", "montreal-metro",
"most-popular-female-names-in-america", "neil-degrasse-tyson-tweets",
"new-york-city-cinemagraphs", "new-york-subways-1980s", "north-korea-photographs",
"north-korea-photographs?utm_campaign=taboolaINTL&utm_medium=referral&utm_source=taboola",
"north-korea-photographs?utm_medium=referral&utm_source=taboolainternal",
"obama-aging", "pablo-escobar", "pablo-escobar??utm_source=facebook",
"pablo-escobar??utm_source=facebook&sr_source=lift_facebook&utm_campaign=simplereach_pablo-escobar&utm_medium=social",
"pablo-escobar?utm_campaign=whfbpd&utm_medium=social&utm_source=facebook",
"panda-facts", "photo-of-the-day-nasa-releases-crystal-clear-image-of-pluto",
"pollution-in-china-photographs", "pollution-in-china-photographs?utm_campaign=3434&utm_medium=rpages&utm_source=Facebook&utm_term=1a0ddea7bed770d5473c45e9f8d81dfd0c4fdd232f207c6b88b53c41ff220c59",
"pollution-in-china-photographs?utm_campaign=3434&utm_medium=rpages&utm_source=Facebook&utm_term=e28a76c1572c36c3a13965e52b4b2ea10518eb9f9c79c4bc84cfb85db16be81e",
"pollution-in-china-photographs?utm_campaign=6806&utm_medium=rpages&utm_source=Facebook&utm_term=1a0ddea7bed770d5473c45e9f8d81dfd0c4fdd232f207c6b88b53c41ff220c59",
"pollution-in-china-photographs?utm_campaign=7048&utm_medium=rpages&utm_source=Facebook&utm_term=2ef4bd7b6cd587601d6eeb35925282a1ed095ebbd4e9e4c0337ef868c7de7a0b",
"pollution-in-china-photographs?utm_campaign=7458&utm_medium=rpages&utm_source=Facebook&utm_term=b9e79a51cd4daf4c3ec02accce75b3e1fc9a22cb3133460c9c32a4f2f9cdb68c",
"powerful-photos-of-2014", "real-x-files", "romanovs-last-days",
"science-of-human-decay", "scientific-discoveries-2015", "scully-effect",
"serial-killer-quotes", "shah-iran", "six-of-the-craziest-gods-in-mythology",
"space-facts", "sun-facts", "sunken-cities", "sunken-ships",
"super-bowl-i-facts", "superhero-movies", "surreal-places", "syrian-civil-war-photographs",
"the-five-greatest-mysteries-of-human-history", "the-four-most-important-battles-of-ancient-greece",
"the-most-colorful-cities-in-the-world", "titanic-facts", "titanic-facts?utm_campaign=6385&utm_medium=rpages&utm_source=Facebook&utm_term=f5905e878216d14e20457ee3265caf6c10022d9545609edfb9a3cb0642c1a310",
"titanic-facts?utm_campaign=6899&utm_medium=rpages&utm_source=Facebook&utm_term=b9e79a51cd4daf4c3ec02accce75b3e1fc9a22cb3133460c9c32a4f2f9cdb68c",
"titanic-facts?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=106965c54919c24bf37356500ec50f0709b1de621d6950bb4c5d48759ea3677e",
"titanic-facts?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=538659f1fc53f28d2c87b93ac73973681c1a46a04954964ab6c52ed1ab09b33a",
"titanic-facts?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=91eae42c8fc9568103d46e0b6b6ec08fc34fd68b2e1918ffe2333ec73035c95a",
"titanic-facts?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=ab594f0a1be002c8c3db297e8d33b04678af40e6a6469ac815884ae0a014b3a3",
"titanic-facts?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=d1864657a05e5b716bb5cb16a29f068a55652eb39fb669ea9c22a6486198f227",
"titanic-facts?utm_campaign=7292&utm_medium=rpages&utm_source=Facebook&utm_term=f5905e878216d14e20457ee3265caf6c10022d9545609edfb9a3cb0642c1a310",
"us-veterans-portraits", "vintage-disneyland", "wall-street-early-20th-century",
"what-we-love-this-week-the-incredible-last-words-of-famous-historical-figures",
"woodstock-photos", "zombie-proof-house"), class = "factor"),
`0089` = c(0, 0, 0, 0, 0, 1), `0096` = c(0, 0, 0, 0, 0, 0
), `02` = c(0, 0, 0, 0, 0, 0), `0215` = c(0, 0, 0, 0, 0,
0), `0225` = c(0, 0, 0, 0, 0, 0), `0252` = c(0, 0, 0, 0,
0, 0), `0271` = c(0, 0, 0, 0, 0, 0), `0272` = c(0, 0, 0,
0, 0, 0), `03` = c(0, 0, 0, 0, 1, 1)), .Names = c("path",
"0089", "0096", "02", "0215", "0225", "0252", "0271", "0272",
"03"), row.names = c(NA, 6L), class = "data.frame")
and I need to apply the min(x,1) function such that this function scan each value in the dataframe (except first column which is not numeric) and return the min(x,1). that way I have only zero's and one's.
I have tried:
f <- function(x) min(1,x)
res1<-do.call(f,tes[,2:ncol(tes)])
but that does not output the right result.
Any help aapreciated
We can use pmin
tes[,-1] <- pmin(1, as.matrix(tes[,-1]))
Or if we need only binary values
tes[,-1] <- +(!!tes[,-1])
I have a data.frame like this:
> dput(head(dat))
structure(list(`Gene name` = c("at1g01050", "at1g01080", "at1g01090",
"at1g01220", "at1g01320", "at1g01420"), `1_1` = c(0, 0, 0, 0,
0, 0), `1_2` = c(0, 0, 0, 0, 0, 0), `1_3` = c(0, 2.2266502274762,
0, 0, 0, 0), `1_4` = c(0, 1.42835007256373, 0, 0, 0, 0), `1_5` = c(0,
1, 0, 0, 0, 0.680307288653971), `1_6` = c(0, 0.974694551708235,
0.0703315834738149, 0, 0, 1.5411058346636), `1_7` = c(1, 1.06166030205396,
0, 0, 0, 0), `1_8` = c(1, 1.07309874414745, 0.129442847788922,
0, 0, 0), `1_9` = c(1.83566164452602, 0.770848509662441, 1.16522133036595,
1.02360016370994, 0, 0), `1_10` = c(0, 0, 0.96367393959757, 0,
0, 0), `1_11` = c(0, 1, 1.459452636222, 0, 0.992067202742928,
0), `1_12` = c(0, 0, 0.670100384155585, 0, 0.461601636474094,
0), `1_13` = c(0, 0, 1.43074917909221, 0, 1.35246977730244, 0
), `1_14` = c(0, 0, 1.13052717277684, 0, 1.27971261718285, 0),
`1_15` = c(0, 0, 0, 0, 0, 0), `1_16` = c(0, 0, 1.02186950513655,
0, 0.937805171752374, 0), `1_17` = c(0, 0, 0, 0, 1.82226410514639,
0), `1_18` = c(0, 0, 1.2057581396188, 0, 1, 0), `1_19` = c(0,
0, 2.54080080087007, 0, 1.74014162763125, 0), `1_20` = c(0,
0, 0, 0, 0, 0), `1_21` = c(0, 0, 1.85335086627868, 0, 2.93605031878879,
0), `1_22` = c(0, 0, 0, 0, 0, 0), `1_23` = c(0, 0, 0, 0,
0, 0), `1_24` = c(0, 0.59685787388353, 4.74450895485671,
0, 1.64665192735547, 0), `1_25` = c(0, 0, 0, 0, 0, 0), `1_26` = c(0,
0, 0, 0, 0, 0), `1_27` = c(0, 1.70324142554566, 0, 0, 0,
0), `1_28` = c(0, 4.02915818089525, 0, 0, 0, 0), `1_29` = c(0,
1.10050253348262, 0, 0, 0, 1.78705663080963), `1_30` = c(0,
0, 0, 0, 0, 0), `1_31` = c(0.525193634811661, 1.19203674964562,
0, 0, 0, 0), `1_32` = c(0.949695564218912, 0.511935958918944,
0.698256748091399, 0.924419021307232, 0, 0), `1_33` = c(1,
0.392202418854686, 0.981531026331928, 1, 0, 0), `1_34` = c(0,
0, 1.04480642952605, 0, 0, 0), `1_35` = c(0.875709646300199,
0.416787083481068, 0.910412293707794, 0, 0.931813162802324,
0), `1_36` = c(0.235817844851986, 0, 0.695496044366791, 0,
0, 0), `1_37` = c(0, 0, 0, 0, 0, 0), `1_38` = c(0, 0, 0,
0, 0, 0), `1_39` = c(0, 0, 0, 0, 0, 0), `1_40` = c(0, 0.426301584359177,
1.05916031917965, 0, 1.11716924423855, 0), `1_41` = c(0,
0, 0, 0, 0, 0), `1_42` = c(0, 0, 0, 0, 0, 0), `1_43` = c(0,
0, 0, 0, 0, 0), `1_44` = c(0, 0.817605484758179, 1, 0, 1,
0), `1_45` = c(0, 0, 0, 0, 1.83706702696725, 0), `1_46` = c(0,
0, 0, 0, 0, 0), `1_48` = c(0, 0, 0, 0, 0, 0), `1_49` = c(0,
0, 0, 0, 0, 0), `1_50` = c(0, 0, 0, 0, 0, 0), `1_51` = c(0,
0.822966241998042, 0, 0, 0, 0), `1_52` = c(0, 1.38548267401525,
0, 0, 0, 0), `1_53` = c(0, 0.693090058304095, 0, 0, 0, 1.200664746484
), `1_54` = c(0, 7.58136662752864, 0, 0, 0, 0), `1_55` = c(0.519878111919004,
0.530809413647805, 0.343274113384907, 0, 0, 0), `1_56` = c(1.24511715957891,
0.545097856366912, 0.397440073804376, 0, 0, 0), `1_57` = c(1.26748496499576,
0.502893153188496, 1, 1.09278985531586, 0, 0), `1_58` = c(0.696198684496234,
0.68197003689249, 1.30108437738319, 0.778091049180591, 0.533017938104689,
0), `1_59` = c(1.15255606344999, 0.294294436704185, 1.07862692616479,
1, 0.250091116406616, 0), `1_60` = c(1.95634163405497, 0,
1.1602014253913, 0, 0, 0), `1_61` = c(1.09287167009628, 0,
2.05939536537347, 1.08165521287259, 0.68027384701565, 0),
`1_62` = c(0.791776166968497, 0, 0.846107162142824, 0, 0.77013323652256,
0), `1_63` = c(0.378787010943447, 0.391876271945063, 0.623223753921758,
0, 0.651918444771296, 0), `1_64` = c(0.189585762007804, 0.361452381684218,
0.799519726870751, 0, 1.06818683719768, 0), `1_65` = c(0,
0, 2.5212953775211, 0, 0, 0), `1_66` = c(0, 0, 0, 0, 0, 0
), `1_67` = c(0, 0, 0, 0, 2.44827717262786, 0), `1_68` = c(0,
0, 0, 0, 0, 0), `1_69` = c(0, 0, 0, 0, 0, 0), `1_70` = c(0,
0, 2.36142611074334, 0, 2.391093649557, 0), `1_71` = c(0,
0, 0.35565044656798, 0, 0, 0), `1_72` = c(0, 0, 5.86951313801941,
0, 0, 0)), .Names = c("Gene name", "1_1", "1_2", "1_3", "1_4",
"1_5", "1_6", "1_7", "1_8", "1_9", "1_10", "1_11", "1_12", "1_13",
"1_14", "1_15", "1_16", "1_17", "1_18", "1_19", "1_20", "1_21",
"1_22", "1_23", "1_24", "1_25", "1_26", "1_27", "1_28", "1_29",
"1_30", "1_31", "1_32", "1_33", "1_34", "1_35", "1_36", "1_37",
"1_38", "1_39", "1_40", "1_41", "1_42", "1_43", "1_44", "1_45",
"1_46", "1_48", "1_49", "1_50", "1_51", "1_52", "1_53", "1_54",
"1_55", "1_56", "1_57", "1_58", "1_59", "1_60", "1_61", "1_62",
"1_63", "1_64", "1_65", "1_66", "1_67", "1_68", "1_69", "1_70",
"1_71", "1_72"), row.names = c(NA, 6L), class = "data.frame")
That's the code I use for calculation of the mean for 3 replicates which I have in the data frame:
## Calculating the mean of 3 "replicates"
ind <- c(1, 25, 49)
dat2 <- dat[-1]
tbl_end <- cbind(dat[1], sapply(0:23, function(i) rowMeans(dat2[ind+i])))
That's an error which comes:
Error in `[.data.frame`(dat2, ind + i) : undefined columns selected
Called from: eval(substitute(browser(skipCalls = pos), list(pos = 9 - frame)),
envir = sys.frame(frame))
I have 71 columns of results (should be 72 because I have 24 fractions and 3 replicates what gives 72 in total) but there should be one more column. No idea why it's missing but anyway I have to solve it. There is no 1_47 which should come with 1_23 and 1_71. Do you have any idea how can I edit my function to just ignore fraction 1_47 and still get a mean of 1_23 and 1_71 ?
Why not just add in a dummy column for 1_47. That will make your data more regular and make it much easier to extract the indexes you need. To do this, try
dat2<-cbind(dat[1:47], 1_47=rep(NA, nrow(dat)), dat[48:72])
ind <- c(1, 25, 49)
tbl_end <- cbind(dat[1], sapply(0:23, function(i) rowMeans(dat2[ind+i+1], na.rm=T)))
Let's start with my data:
> dput(head(tbl_end))
structure(list(`Gene name` = c("at1g01050.1", "at1g01080.1",
"at1g01090.1", "at1g01220.1", "at1g01320.2", "at1g01420.1"),
`1_1` = c(0, 0, 0, 0, 0, 0), `1_2` = c(0, 0, 0, 0, 0, 0),
`1_3` = c(0, 1, 0, 0, 0, 0), `1_4` = c(0, 0.660693687777888,
0, 0, 0, 0), `1_5` = c(0, 0.521435654491704, 0, 0, 0, 1),
`1_6` = c(0, 0.437291194705566, 0, 0, 0, 1), `1_7` = c(0,
0.52204783488213, 0, 0, 0, 0), `1_8` = c(0, 0.524298383907171,
0, 0, 0, 0), `1_9` = c(1, 0.376865096972469, 0, 1, 0, 0),
`1_10` = c(0, 0, 0, 0, 0, 0), `1_11` = c(0, 0, 0, 0, 0, 0
), `1_12` = c(0, 0, 0, 0, 0, 0), `1_13` = c(0, 0, 0, 0, 0,
0), `1_14` = c(0, 0, 0, 0, 0, 0), `1_15` = c(0, 0, 0, 0,
0, 0), `1_16` = c(0, 0, 0, 0, 0, 0), `1_17` = c(0, 0, 0,
0, 0, 0), `1_18` = c(0, 0, 0.476101907006443, 0, 0, 0), `1_19` = c(0,
0, 1, 0, 0, 0), `1_20` = c(0, 0, 0, 0, 0, 0), `1_21` = c(0,
0, 0, 0, 1, 0), `1_22` = c(0, 0, 0, 0, 0, 0), `1_23` = c(0,
0, 0, 0, 0, 0), `1_24` = c(0, 0, 0, 0, 0, 0)), .Names = c("Gene name",
"1_1", "1_2", "1_3", "1_4", "1_5", "1_6", "1_7", "1_8", "1_9",
"1_10", "1_11", "1_12", "1_13", "1_14", "1_15", "1_16", "1_17",
"1_18", "1_19", "1_20", "1_21", "1_22", "1_23", "1_24"), row.names = c(NA,
6L), class = "data.frame")
so I have more than 2k rows. As a name of the row I set the gene name but there is a problem. Sometimes same gene has a different "models" (so they put the dot after name and the number 1 or 2) but still it's the same gene so I want to find all of those duplicates (same gene name) and average the values in different columns for this gene and just leave the 1 row with the averaged values.
Is it possible to do ?
Just showing some of the gene names I have:
> dput(vec_names)
c("at1g01050.1", "at1g01080.1", "at1g01090.1", "at1g01220.1",
"at1g01320.2", "at1g01420.1", "at1g01470.1", "at1g01800.1", "at1g01910.5",
"at1g01920.2", "at1g01960.1", "at1g01980.1", "at1g02020.2", "at1g02100.2",
"at1g02130.1", "at1g02140.1", "at1g02150.1", "at1g02305.1", "at1g02500.2",
"at1g02560.1", "at1g02780.1", "at1g02880.3", "at1g02920.1", "at1g02930.2",
"at1g03030.1", "at1g03090.2", "at1g03110.1", "at1g03130.1", "at1g03210.1",
"at1g03220.1", "at1g03230.1", "at1g03310.2", "at1g03330.1", "at1g03475.1",
"at1g03630.2", "at1g03680.1", "at1g03870.1", "at1g03900.1", "at1g04080.2",
"at1g04130.1", "at1g04170.1", "at1g04190.1", "at1g04270.2", "at1g04350.1",
"at1g04410.1", "at1g04420.1", "at1g04530.1", "at1g04640.2", "at1g04690.1",
"at1g04750.2", "at1g04810.1", "at1g04850.1", "at1g04870.2", "at1g05010.1",
"at1g05180.1", "at1g05190.1", "at1g05320.3", "at1g05350.1", "at1g05520.1",
"at1g05560.1", "at1g05620.2", "at1g06000.1", "at1g06110.1", "at1g06130.2",
"at1g06290.1", "at1g06410.1", "at1g06550.1", "at1g06560.1", "at1g06570.1",
I think there is a function for that but can't find it.
Using data.table
library(data.table)
dt <- data.table(dat)
dt[, gene_unique := gsub("[.]*", "", dt$Gene)]
cols <- colnames(dt)[2:25]
dt[, lapply(.SD, mean), by = gene_unique, .SDcols = cols]
Using aggregate as suggested in comments
dat$`Gene name` = gsub("[.]*", "", dat$Gene)
aggregate(. ~ `Gene name`, dat, mean)
Propably the code is very simple but I have never tried plotting in R yet.
I would like to have a linear plot for every row and all the plots on different graph.
The number in my data goes from 0 to 1. Value one is the maximum of the plot, in some cases there might be few maximums in a single row. I would like to have a pdf file as an output.
Data:
> dput(head(tbl_end))
structure(list(`NA` = structure(1:6, .Label = c("AT1G01050",
"AT1G01080", "AT1G01090", "AT1G01220", "AT1G01320", "AT1G01420",
"ATCG00800", "ATCG00810", "ATCG00820", "ATCG01090", "ATCG01110",
"ATCG01120", "ATCG01240", "ATCG01300", "ATCG01310", "ATMG01190"
), class = "factor"), `10` = c(0, 0, 0, 0, 0, 0), `20` = c(0,
0, 0, 0, 0, 0), `52.5` = c(0, 1, 0, 0, 0, 0), `81` = c(0, 0.660693687777888,
0, 0, 0, 0), `110` = c(0, 0.521435654491704, 0, 0, 0, 1), `140.5` = c(0,
0.437291194705566, 0, 0, 0, 1), `189` = c(0, 0.52204783488213,
0, 0, 0, 0), `222.5` = c(0, 0.524298383907171, 0, 0, 0, 0), `278` = c(1,
0.376865096972469, 0, 1, 0, 0), `340` = c(0, 0, 0, 0, 0, 0),
`397` = c(0, 0, 0, 0, 0, 0), `453.5` = c(0, 0, 0, 0, 0, 0
), `529` = c(0, 0, 0, 0, 0, 0), `580` = c(0, 0, 0, 0, 0,
0), `630.5` = c(0, 0, 0, 0, 0, 0), `683.5` = c(0, 0, 0, 0,
0, 0), `735.5` = c(0, 0, 0, 0, 0, 0), `784` = c(0, 0, 0.476101907006443,
0, 0, 0), `832` = c(0, 0, 1, 0, 0, 0), `882.5` = c(0, 0,
0, 0, 0, 0), `926.5` = c(0, 0, 0, 0, 1, 0), `973` = c(0,
0, 0, 0, 0, 0), `1108` = c(0, 0, 0, 0, 0, 0), `1200` = c(0,
0, 0, 0, 0, 0)), .Names = c(NA, "10", "20", "52.5", "81",
"110", "140.5", "189", "222.5", "278", "340", "397", "453.5",
"529", "580", "630.5", "683.5", "735.5", "784", "832", "882.5",
"926.5", "973", "1108", "1200"), row.names = c(NA, 6L), class = "data.frame").
Would be great to have a name of the row on the top of each page in pdf.
Here's an example using your dputed data:
# open the pdf file
pdf(file='myfile.pdf')
# since I don't know what values should be on the X axis,
# I'm just using values from 1 to number of y-values
x <- 1:(ncol(tbl_end)-1)
for(i in 1:nrow(tbl_end)){
# plot onto a new pdf page
plot(x=x,y=tbl_end[i,-1],type='b',main=tbl_end[i,1],xlab='X',ylab='Y')
}
# close the pdf file
dev.off()
where the first page is something like this:
If you want to change the style (e.g. lines without the little circles etc.) of the plot, have a look at the documentation.