Bar-chart legend position (avoiding operlap) in R - r

I have the following kind of data:
A B C D E F
Series1 681968620 814707019 689302814 827844038 778849469 826532174
Series2 41507149 53403451 52857261 52319991 59246699 104253758
Series3 869316619 722165946 858134539 716641489 759754131 668183913
Series4 12642153 15158215 5140017 8629111 7170466 6464783
When I am plotting my bar chart, using the following command:
barplot(height = m,
beside=T, ylab = "Area (m^2)", col=colorcode,
legend.text = c("Series1", "Series2","Series3",
"Series4"),
args.legend = list(x = "topright"))
the legend overlaps with the bars present in the diagram. How can I place my legend properly, so that my graph looks good.

par(mfrow=c(1, 1), mar=c(5, 5, 4, 10))
barplot(height = m,
beside=T, ylab = "Area (m^2)", col=1:4,
legend.text = c("Series1", "Series2","Series3",
"Series4"),
args.legend = list(x ='topright', bty='n', inset=c(-0.25,0)))
If you don't want the scientific notation on y-axis, you can change the options before running the code, for example
op <- options(scipen=999)
data
m <- structure(c(681968620L, 41507149L, 869316619L, 12642153L, 814707019L,
53403451L, 722165946L, 15158215L, 689302814L, 52857261L, 858134539L,
5140017L, 827844038L, 52319991L, 716641489L, 8629111L, 778849469L,
59246699L, 759754131L, 7170466L, 826532174L, 104253758L, 668183913L,
6464783L), .Dim = c(4L, 6L), .Dimnames = list(c("Series1", "Series2",
"Series3", "Series4"), c("A", "B", "C", "D", "E", "F")))

Related

Formatting p-value cut-off line in a volcano plot in R

I am using the following function in R to develop a simple volcano plot:
EnhancedVolcano(all_genes, x = "logFC", y = "adjust.p.value", lab = all_genes$Gene.ID,
pCutoff = 10e-2, FCcutoff = 1)
I would like my pCutoff line to appear to represent p = 0.05 which on a log scale for this figure would appear as 1.3 on the y-axis. However, changing "10e-2" to say "10e-2.5" generates an error
Error: unexpected numeric constant in: "EnhancedVolcano(all_genes, x =
"logFC", y = "adjust.p.value", lab = all_genes$Gene.ID,
pCutoff = 10e-2.5"
Any suggestions on how I can get a horizontal p-value cut-off line at exactly 1.3 (currently appears at 1.2). Here is some reproducible data:
structure(list(X = 1:14, Gene.ID = c("A", "B", "C", "D", "E", "F",
"G", "H", "I", "J", "K", "L", "M", "N"), logFC = c(1.5,
0.17212922, 0.145542174, 0.304348578, 0.124636936, 0.247841044,
0.160818268, 0.123741518, 0.148530876, 0.148960225, 0.114135472,
-0.147118359, 0.095549291, 0.138521594), AveExpr = c(5.426424957,
4.289728233, 4.901134193, 4.742864705, 5.447030699, 4.539641767,
4.650750102, 5.901020922, 5.365944907, 5.818788787, 4.837214384,
7.017656548, 4.531897822, 5.192294452), t = c(6.15098624, 5.452898247,
4.979246654, 4.949519834, 4.818043279, 4.73403717, 4.701937811,
4.522692175, 4.518518374, 4.281900066, 4.247981727, -4.194421592,
4.10350597, 4.088357671), p.value = c(1.27e-09, 6.8e-08, 7.99e-07,
9.26e-07, 1.77e-06, 2.65e-06, 3.09e-06, 7.13e-06, 7.27e-06, 2.1e-05,
2.44e-05, 3.07e-05, 4.53e-05, 4.83e-05), adjust.p.value = c(1.64e-05,
0.000438854, 0.002987004, 0.002987004, 0.004558267, 0.005687325,
0.005687325, 0.010422933, 0.010422933, 0.027128901, 0.028601707,
0.033061438, 0.04452146, 0.04452146), B = c(11.2786109, 7.664706936,
5.439886439, 5.306497286, 4.725465519, 4.361868581, 4.224515919,
3.473656504, 3.45649938, 2.508304771, 2.376338878, 2.169980059,
1.825392322, 1.76867543)), class = "data.frame", row.names = c(NA,
-14L))
I think you want the following code where the p-value is calculated like p=10^-s where s is your 1.3 like this:
library(EnhancedVolcano)
EnhancedVolcano(all_genes, x = "logFC", y = "adjust.p.value", lab = all_genes$Gene.ID,
pCutoff = 10^-1.3, FCcutoff = 1)
Output:

R Barplot Chart Problem: Error in -0.01 * height : non-numeric argument to binary operator

I have a tab file .txt as the following example:
> dput(data)
structure(list(file = structure(c(1L, 3L, 2L, 4L, 5L, 6L), .Label = c("3D7B1-C22-R1",
"3D7B1-C22-R1Nuc", "3D7B1-C22-R2", "Coh03-C26-R1", "Coh05-C22-R1",
"Coh05-C22-R2", "Coh05-C22-R3", "Coh06-C26-R1", "Coh06-C26-R2",
"Coh06-C26-R3", "Dec02-C26-R1", "Dec02-C26-R2", "Dec02-C26-R3",
"Dec03-C26-R1", "Dec03-C26-R2", "Dec03-C26-R3", "Dry01-C22-R1",
"Dry01-C22-R2", "Dry01-C22-R3", "Dry02-C22-R1", "Dry02-C22-R2",
"Dry02-C22-R3", "Dry03-C26-R1", "Dry03-C26-R2", "Dry04-C22-R1",
"Dry04-C22-R2", "Dry04-C22-R3", "Dry05-C22-R1", "Dry05-C22-R2",
"PCD01-C22-R1", "PCD01-C22-R2", "PCD03-C26-R1", "PCD03-C26-R2",
"PCD03-C26-R3", "PCD04-C22-R1", "PCD04-C22-R2", "PCD04-C22-R3",
"Wet01-C22-R1", "Wet01-C22-R2", "Wet01-C26-R1", "Wet01-C26-R2",
"Wet03-C22-R1", "Wet03-C22-R2", "Wet03-C22-R3", "Wet04-C22-R1"
), class = "factor"), percmapped = c(72.59865886, 69.76716768,
87.65023774, 12.53737526, 26.34900309, 27.35084022), Percdedup = c(63.82326317,
62.34470705, 71.51045653, 8.384373203, 20.56819736, 21.01794928
), percunique = c(61.37889243, 59.98338518, 68.20508459, 7.715037189,
19.58090215, 20.01226738)), row.names = c(NA, 6L), class = "data.frame")
I wish to make a barplot.
So I made the following orders:
data <- read.table("Book1.txt", header = TRUE, sep = "\t", dec = ",")
head(data)
m <- t(as.matrix(data[1:4]))
colnames(m) <- data[,1]
barplot(m, main = "STAR pipeline", beside = T, ylab = "Percentage", col = colours, las = 2, cex.names = 0.6)
During the barplot, R me returns the error message below:
Error in -0.01 * height : non-numeric argument to binary operator
What does that mean? How to make my barplot?
Thanks for your help.
Simply adjust your matrix build to keep character and numeric values separate. Then plot accordingly. Also, be sure to add the legend.text argument.
m <- t(as.matrix(data[2:4])) # REMOVE 1
colnames(m) <- data[,1]
m
# 3D7B1-C22-R1 3D7B1-C22-R2 3D7B1-C22-R1Nuc Coh03-C26-R1 Coh05-C22-R1 Coh05-C22-R2
# percmapped 72.59866 69.76717 87.65024 12.537375 26.3490 27.35084
# Percdedup 63.82326 62.34471 71.51046 8.384373 20.5682 21.01795
# percunique 61.37889 59.98339 68.20508 7.715037 19.5809 20.01227
barplot(m, main = "STAR pipeline", beside = T, ylab = "Percentage", col = colours,
las = 2, cex.names = 0.6, legend.text = row.names(m))
Using rainbow color palette and legend.text argument:
barplot(m, main = "STAR pipeline", beside = T, ylab = "Percentage", las = 2, cex.names = 0.6,
col = rainbow(length(row.names(m))), legend.text = row.names(m))
Even more, add a ylim() for proper axis rendering and place legend on top using a separate legend() call:
barplot(m, main = "STAR pipeline", beside = T, ylab = "Percentage", las = 2, cex.names = 0.6,
col = rainbow(length(row.names(m))), ylim=c(0,100))
legend(x="top", legend = row.names(m), fill = rainbow(length(row.names(m))),
ncol=length(row.names(m)))
Rextester demo (click Run it (F8) at bottom for graphs)

How to set the y range in boxplot graph?

I'm using boxplot() in R. My code is:
#rm(list=ls())
#B2
fps_error <- c(0.058404273, 0.028957446, 0.026276044, 0.07084294, 0.078438563, 0.024000178, 0.120678965, 0.081774358, 0.025644741, 0.02931614)
fps_error = fps_error *100
fps_qp_error <-c(1.833333333, 1.69047619, 1.666666667, 3.095238095, 2.738095238, 1.714285714, 3.634146341, 5.142857143, 1.238095238, 2.30952381)
bit_error <- c(0.141691737, 0.136173785, 0.073808209, 0.025057931, 0.165722097, 0.004276999, 0.365353752, 0.164757488, 0.003362543, 0.022423845)
bit_error = bit_error *100
bit_qp_error <-c(0.666666667, 0.785714286, 0.428571429, 0.142857143, 0.785714286, 0.023809524, 1.523809524, 0.976190476, 0.023809524, 0.142857143)
ssim_error <-c(0.01193773, 0.015151569, 0.003144532, 0.003182908, 0.008125274, 0.013796366, 0.00359078, 0.019002591, 0.005031524, 0.004370175)
ssim_error = ssim_error * 100
ssim_qp_error <-c(3.833333333, 1.80952381, 0.69047619, 0.571428571, 2, 1.904761905, 0.761904762, 2.119047619, 0.857142857, 0.976190476)
all_errors = cbind(fps_error, bit_error, ssim_error)
all_qp_errors = cbind(fps_qp_error, bit_qp_error, ssim_qp_error)
modes = cbind(rep("FPS error",10), rep("Bitrate error",10), rep("SSIM error",10))
journal_linear_data <-data.frame(fps_error, fps_qp_error,bit_error,bit_qp_error,ssim_error,ssim_qp_error )
yvars <- c("fps_error","bit_error","ssim_error")
yvars_qp <-c("fps_qp_error","bit_qp_error","ssim_qp_error")
xvars <- c("FPS", "Bitrate", "SSIM")
graphics.off()
bmp(filename="boxplot_B2_error.bmp")
op <- par(mfrow = c(1, 3), #matrix of plots
oma = c(0,0,2,0),mar=c(5.1, 7.1, 2.1, 2.1),mgp=c(4,1,0)) #outer margins
par(cex.lab=3)
par(cex.axis=3)
for (i in 1:3) {boxplot(journal_linear_data[,yvars[i]], xlab=xvars[i], ylab="Percentage error", outcex = 2)}
par(op)
mtext(text="Percentage error per mode for B2",side=3, line=1.5, font=2, cex=2,adj=0.95, col='black')
dev.off()
The image output is shown below. As you can see the y-axis does not have the same range for all graphs. How can I fix this? For example starting in 0.5 or 0.
You can simply put an ylim = c(0, 5) in all your boxplot() call. This sets y-axis range (roughly) between 0 and 5.
Perhaps you did not see ylim argument in ?boxplot; the "Arguments" section also does not mention it. But ylim is just a trivial graphical parameter passed via "...". You can also find such example in the "Examples" session of ?boxplot:
boxplot(len ~ dose, data = ToothGrowth,
boxwex = 0.25, at = 1:3 - 0.2,
subset = supp == "VC", col = "yellow",
main = "Guinea Pigs' Tooth Growth",
xlab = "Vitamin C dose mg",
ylab = "tooth length",
xlim = c(0.5, 3.5), ylim = c(0, 35), yaxs = "i")

Intersect Spatial Lines in R

I have the next spatial object in R.
library(sp)
library(rgeos)
poly1 <- structure(c(-3.25753225, -3.33532866, -3.33503723, -3.35083008,
-3.35420388, -3.407372, -3.391667, -3.254167, -3.248129, -3.25753225,
47.78513433, 47.73738617, 47.73793803, 47.74440261, 47.74004583,
47.803846, 47.866667, 47.866667, 47.806292, 47.78513433),
.Dim = c(10L, 2L), .Dimnames = list(NULL, c("x", "y")))
poly2 <- structure(c(-3.101871, -3.097764, -3.20532, -3.260711, -3.248129,
-3.101871, 47.777041, 47.735975, 47.709087, 47.777982, 47.806292, 47.777041),
.Dim = c(6L, 2L), .Dimnames = list(NULL, c("x", "y")))
sobj <- SpatialPolygons(
list(
Polygons(list(Polygon(poly1)), ID = '1'),
Polygons(list(Polygon(poly2)), ID = '2')),
proj4string = CRS('+proj=merc'))
plot(sobj)
I would like to obtain a Spatial Object containing the border line that the two polygons have in common, that is, the line that is in green in the next image.
lines <- matrix(c(-3.248129, -3.25753225, 47.806292, 47.78513433), 2, 2)
lobj <- SpatialLines(
list(
Lines(list(Line(lines)), ID = '1')),
proj4string = CRS('+proj=merc'))
plot(lobj, col = 'green', add = TRUE)
lines <- matrix(c(-3.248129, -3.25753225, 47.806292, 47.78513433), 2, 2)
lobj <- SpatialLines(
list(
Lines(list(Line(lines)), ID = '1')),
proj4string = CRS('+proj=merc'))
plot(lobj, col = 'green', add = TRUE)
So far I have tried with the gIntersection function in rgeos package but it does not do what I require. How would I get this?
I think rgeos::gIntersection would be the method of choice, if your lines perfectly overlap. Consider the following simple example:
l1 <- SpatialLines(list(Lines(list(Line(rbind(c(1, 1), c(5, 1)))), 1)))
l2 <- SpatialLines(list(Lines(list(Line(rbind(c(3, 1), c(10, 1)))), 1)))
plot(0, 0, ylim = c(0, 2), xlim = c(0, 10), type = "n")
lines(l1, lwd = 2, lty = 2)
lines(l2, lwd = 2, lty = 3)
lines(gIntersection(l1, l2), col = "red", lwd = 2)
One solution to your problem, although not perfect and maybe someone else has a better solution, would be to add a tiny buffer.
xx <- as(sobj, "SpatialLines")
xx <- gBuffer(xx, width = 1e-5, byid = TRUE)
xx <- gIntersection(xx[1, ], xx[2, ])
plot(sobj)
plot(xx, border = "red", add = TRUE, lwd = 2)

R barplot - keep same colours after sorting

I want to plot percentages for 3 variables (a,b,c) one after the others. So I have a matrix (%) for a set of activities for variable a, b and c.
dta = structure(c(0.0073, 0.1467, 0.0111, 0.0294, 0.0451, 0.0031, 0.1823,
0.0452, 0.2212, 0.1123, 7e-04, 0.1138, 0.0723, 0.1649, 0.0634),
.Dim = c(5L, 3L),
.Dimnames = list(c("c Work", "e Travel/Commute",
"f Cooking", "g Housework", "h Odd jobs"),
c("a", "b", "c")))
However, I would like to plot each variables sorted and but keeping the same colours for the set of activities.
So this is the colours of the activities.
library(RColorBrewer)
rc = c(brewer.pal(n = 5, name = 'Set2'))
kol = list()
kol$act <- c("c Work", "e Travel/Commute", "f Cooking", "g Housework", "h Odd jobs" )
kol$colours <- rc
kol = as.data.frame(kol)
act colours
1 c Work #66C2A5
2 e Travel/Commute #FC8D62
3 f Cooking #8DA0CB
4 g Housework #E78AC3
5 h Odd jobs #A6D854
So here are my barplots
par(mfrow = c(2,2))
barplot(dta[,1], horiz = T, las = 2, col = kol$colours)
barplot(dta[,2], horiz = T, las = 2, col = kol$colours)
barplot(dta[,3], horiz = T, las = 2, col = kol$colours)
So I want is to sort by keep the same colours for the activities
par(mfrow = c(2,2))
barplot(sort(dta[,1]), horiz = T, las = 2)
barplot(sort(dta[,2]), horiz = T, las = 2)
barplot(sort(dta[,3]), horiz = T, las = 2)
How can I make it "match" ?
You can use the function match to match the names of the "entities" and the desired colours, for example, for the first column:
kol$colours[match(names(sort(dta[,1])), kol$act)]
so, to obtain your barplot, just do:
par(mfrow = c(2,2), mar=c(5, 8, 4, 1)) # also modifying the margins to make the names fit in
for (i in 1:3) {
barplot(sort(dta[,i]), horiz = T, las = 2, col=kol$colours[match(names(sort(dta[, i])), kol$act)])
}

Resources