R barplot - keep same colours after sorting - r

I want to plot percentages for 3 variables (a,b,c) one after the others. So I have a matrix (%) for a set of activities for variable a, b and c.
dta = structure(c(0.0073, 0.1467, 0.0111, 0.0294, 0.0451, 0.0031, 0.1823,
0.0452, 0.2212, 0.1123, 7e-04, 0.1138, 0.0723, 0.1649, 0.0634),
.Dim = c(5L, 3L),
.Dimnames = list(c("c Work", "e Travel/Commute",
"f Cooking", "g Housework", "h Odd jobs"),
c("a", "b", "c")))
However, I would like to plot each variables sorted and but keeping the same colours for the set of activities.
So this is the colours of the activities.
library(RColorBrewer)
rc = c(brewer.pal(n = 5, name = 'Set2'))
kol = list()
kol$act <- c("c Work", "e Travel/Commute", "f Cooking", "g Housework", "h Odd jobs" )
kol$colours <- rc
kol = as.data.frame(kol)
act colours
1 c Work #66C2A5
2 e Travel/Commute #FC8D62
3 f Cooking #8DA0CB
4 g Housework #E78AC3
5 h Odd jobs #A6D854
So here are my barplots
par(mfrow = c(2,2))
barplot(dta[,1], horiz = T, las = 2, col = kol$colours)
barplot(dta[,2], horiz = T, las = 2, col = kol$colours)
barplot(dta[,3], horiz = T, las = 2, col = kol$colours)
So I want is to sort by keep the same colours for the activities
par(mfrow = c(2,2))
barplot(sort(dta[,1]), horiz = T, las = 2)
barplot(sort(dta[,2]), horiz = T, las = 2)
barplot(sort(dta[,3]), horiz = T, las = 2)
How can I make it "match" ?

You can use the function match to match the names of the "entities" and the desired colours, for example, for the first column:
kol$colours[match(names(sort(dta[,1])), kol$act)]
so, to obtain your barplot, just do:
par(mfrow = c(2,2), mar=c(5, 8, 4, 1)) # also modifying the margins to make the names fit in
for (i in 1:3) {
barplot(sort(dta[,i]), horiz = T, las = 2, col=kol$colours[match(names(sort(dta[, i])), kol$act)])
}

Related

R Barplot Chart Problem: Error in -0.01 * height : non-numeric argument to binary operator

I have a tab file .txt as the following example:
> dput(data)
structure(list(file = structure(c(1L, 3L, 2L, 4L, 5L, 6L), .Label = c("3D7B1-C22-R1",
"3D7B1-C22-R1Nuc", "3D7B1-C22-R2", "Coh03-C26-R1", "Coh05-C22-R1",
"Coh05-C22-R2", "Coh05-C22-R3", "Coh06-C26-R1", "Coh06-C26-R2",
"Coh06-C26-R3", "Dec02-C26-R1", "Dec02-C26-R2", "Dec02-C26-R3",
"Dec03-C26-R1", "Dec03-C26-R2", "Dec03-C26-R3", "Dry01-C22-R1",
"Dry01-C22-R2", "Dry01-C22-R3", "Dry02-C22-R1", "Dry02-C22-R2",
"Dry02-C22-R3", "Dry03-C26-R1", "Dry03-C26-R2", "Dry04-C22-R1",
"Dry04-C22-R2", "Dry04-C22-R3", "Dry05-C22-R1", "Dry05-C22-R2",
"PCD01-C22-R1", "PCD01-C22-R2", "PCD03-C26-R1", "PCD03-C26-R2",
"PCD03-C26-R3", "PCD04-C22-R1", "PCD04-C22-R2", "PCD04-C22-R3",
"Wet01-C22-R1", "Wet01-C22-R2", "Wet01-C26-R1", "Wet01-C26-R2",
"Wet03-C22-R1", "Wet03-C22-R2", "Wet03-C22-R3", "Wet04-C22-R1"
), class = "factor"), percmapped = c(72.59865886, 69.76716768,
87.65023774, 12.53737526, 26.34900309, 27.35084022), Percdedup = c(63.82326317,
62.34470705, 71.51045653, 8.384373203, 20.56819736, 21.01794928
), percunique = c(61.37889243, 59.98338518, 68.20508459, 7.715037189,
19.58090215, 20.01226738)), row.names = c(NA, 6L), class = "data.frame")
I wish to make a barplot.
So I made the following orders:
data <- read.table("Book1.txt", header = TRUE, sep = "\t", dec = ",")
head(data)
m <- t(as.matrix(data[1:4]))
colnames(m) <- data[,1]
barplot(m, main = "STAR pipeline", beside = T, ylab = "Percentage", col = colours, las = 2, cex.names = 0.6)
During the barplot, R me returns the error message below:
Error in -0.01 * height : non-numeric argument to binary operator
What does that mean? How to make my barplot?
Thanks for your help.
Simply adjust your matrix build to keep character and numeric values separate. Then plot accordingly. Also, be sure to add the legend.text argument.
m <- t(as.matrix(data[2:4])) # REMOVE 1
colnames(m) <- data[,1]
m
# 3D7B1-C22-R1 3D7B1-C22-R2 3D7B1-C22-R1Nuc Coh03-C26-R1 Coh05-C22-R1 Coh05-C22-R2
# percmapped 72.59866 69.76717 87.65024 12.537375 26.3490 27.35084
# Percdedup 63.82326 62.34471 71.51046 8.384373 20.5682 21.01795
# percunique 61.37889 59.98339 68.20508 7.715037 19.5809 20.01227
barplot(m, main = "STAR pipeline", beside = T, ylab = "Percentage", col = colours,
las = 2, cex.names = 0.6, legend.text = row.names(m))
Using rainbow color palette and legend.text argument:
barplot(m, main = "STAR pipeline", beside = T, ylab = "Percentage", las = 2, cex.names = 0.6,
col = rainbow(length(row.names(m))), legend.text = row.names(m))
Even more, add a ylim() for proper axis rendering and place legend on top using a separate legend() call:
barplot(m, main = "STAR pipeline", beside = T, ylab = "Percentage", las = 2, cex.names = 0.6,
col = rainbow(length(row.names(m))), ylim=c(0,100))
legend(x="top", legend = row.names(m), fill = rainbow(length(row.names(m))),
ncol=length(row.names(m)))
Rextester demo (click Run it (F8) at bottom for graphs)

Plotting over multiple lengths and columns

I am wanting to plot on the X axis 17K sets of intervals, where I plot the start and stop intervals for each Chr column. However, these intervals are not plotting correctly to each chr? This I figured out from how the plot was running off the right side and the red dots did not match up with input data. Thoughts on a fix? To be clear chr.len is the length of each chromosome.
Data;
ID Chr Start Stop
XLOC_007681 2R 11896162 11896597
XLOC_024365 3R 11283380 11286479
XLOC_021494 3R 16392979 16396291
XLOC_012125 3L 136830 138533
XLOC_031405 X 8002493 8004054
XLOC_014371 3L 15537489 15538755
XLOC_005808 2L 20704834 20706685
XLOC_005809 2L 20706861 20708183
XLOC_005807 2L 20703325 20703897
============================================================
chr.len <- c(22422827, 204112, 347038, 23011544, 368872, 21146708, 3288731, 24543557, 2555491, 27905053, 2517507, 1351857, 10049037)
names(chr.len) <- c("X", "XHet", "YHet", "2L", "2LHet", "2R", "2RHet", "3L", "3LHet", "3R", "3RHet", "4", "U")
chr.gap <- 2000000
chr.cum <- cumsum(c(0, chr.len[1:12])) + (0:12)*chr.gap
names(chr.cum) <- names(chr.len)
# ============================================================
png(file = "C:/Users/cahighfi/Desktop/XLOC_Position.png", width = 10, height = 5, units = "in", res = 300)
plot(c(0, chr.cum["U"] + chr.len["U"]), c(0, 1), type = "n", axes = FALSE, ylab = "", xlab = "", )
segments(XLOC.pos$Start + chr.cum[XLOC.pos$Chr], 0.5, XLOC.pos$Stop + chr.cum[XLOC.pos$Chr], 0.5, lwd = 10)
segments(DrugXLOC.pos$Start + chr.cum[DrugXLOC.pos$Chr], 0.5, DrugXLOC.pos$Stop + chr.cum[DrugXLOC.pos$Chr], 0.5, lwd = 10, col = c("red"))
axis(side = 1, at = chr.cum + chr.len/2, labels = parse(text = paste("italic(\"", names(chr.len), "\")", sep = "")), mgp = c(2.5, 0.5, 0), tck = -0.015, cex.axis = 1.0)
dev.off()
[Output plot][[1]]
[[1]]: https://i.stack.imgur.com/qPObX.png

Intersect Spatial Lines in R

I have the next spatial object in R.
library(sp)
library(rgeos)
poly1 <- structure(c(-3.25753225, -3.33532866, -3.33503723, -3.35083008,
-3.35420388, -3.407372, -3.391667, -3.254167, -3.248129, -3.25753225,
47.78513433, 47.73738617, 47.73793803, 47.74440261, 47.74004583,
47.803846, 47.866667, 47.866667, 47.806292, 47.78513433),
.Dim = c(10L, 2L), .Dimnames = list(NULL, c("x", "y")))
poly2 <- structure(c(-3.101871, -3.097764, -3.20532, -3.260711, -3.248129,
-3.101871, 47.777041, 47.735975, 47.709087, 47.777982, 47.806292, 47.777041),
.Dim = c(6L, 2L), .Dimnames = list(NULL, c("x", "y")))
sobj <- SpatialPolygons(
list(
Polygons(list(Polygon(poly1)), ID = '1'),
Polygons(list(Polygon(poly2)), ID = '2')),
proj4string = CRS('+proj=merc'))
plot(sobj)
I would like to obtain a Spatial Object containing the border line that the two polygons have in common, that is, the line that is in green in the next image.
lines <- matrix(c(-3.248129, -3.25753225, 47.806292, 47.78513433), 2, 2)
lobj <- SpatialLines(
list(
Lines(list(Line(lines)), ID = '1')),
proj4string = CRS('+proj=merc'))
plot(lobj, col = 'green', add = TRUE)
lines <- matrix(c(-3.248129, -3.25753225, 47.806292, 47.78513433), 2, 2)
lobj <- SpatialLines(
list(
Lines(list(Line(lines)), ID = '1')),
proj4string = CRS('+proj=merc'))
plot(lobj, col = 'green', add = TRUE)
So far I have tried with the gIntersection function in rgeos package but it does not do what I require. How would I get this?
I think rgeos::gIntersection would be the method of choice, if your lines perfectly overlap. Consider the following simple example:
l1 <- SpatialLines(list(Lines(list(Line(rbind(c(1, 1), c(5, 1)))), 1)))
l2 <- SpatialLines(list(Lines(list(Line(rbind(c(3, 1), c(10, 1)))), 1)))
plot(0, 0, ylim = c(0, 2), xlim = c(0, 10), type = "n")
lines(l1, lwd = 2, lty = 2)
lines(l2, lwd = 2, lty = 3)
lines(gIntersection(l1, l2), col = "red", lwd = 2)
One solution to your problem, although not perfect and maybe someone else has a better solution, would be to add a tiny buffer.
xx <- as(sobj, "SpatialLines")
xx <- gBuffer(xx, width = 1e-5, byid = TRUE)
xx <- gIntersection(xx[1, ], xx[2, ])
plot(sobj)
plot(xx, border = "red", add = TRUE, lwd = 2)

r density plot - fill area under curve [duplicate]

This question already has answers here:
Shading a kernel density plot between two points.
(5 answers)
Closed 7 years ago.
I've written code to plot density data for variations of an A/B test. I'd like to improve the visual by shading (with the fill being slightly transparent) the area below each curve. I'm currently using matplot, but understand ggplot might be a better option.
Any ideas? Thanks.
# Setup data frame - these are results from an A/B experiment
conv_data = data.frame(
VarNames = c("Variation 1", "Variation 2", "Variation 3") # Set variation names
,NumSuccess = c(1,90,899) # Set number of successes / conversions
,NumTrials = c(10,100,1070) # Set number of trials
)
conv_data$NumFailures = conv_data$NumTrials - conv_data$NumSuccess # Set number of failures [no conversions]
num_var = NROW(conv_data) # Set total number of variations
plot_col = rainbow(num_var) # Set plot colors
get_density_data <- function(n_var, s, f) {
x = seq(0,1,length.out=100) # 0.01,0.02,0.03...1
dens_data = matrix(data = NA, nrow=length(x), ncol=(n_var+1))
dens_data[,1] = x
# set density data
for(j in 1:n_var) {
# +1 to s[], f[] to ensure uniform prior
dens_data[,j+1] = dbeta(x, s[j]+1, f[j]+1)
}
return(dens_data)
}
density_data = get_density_data(num_var, conv_data$NumSuccess, conv_data$NumFailures)
matplot(density_data[,1]*100, density_data[,-1], type = "l", lty = 1, col = plot_col, ylab = "Probability Density", xlab = "Conversion Rate %", yaxt = "n")
legend("topleft", col=plot_col, legend = conv_data$VarNames, lwd = 1)
This produces the following plot:
# Setup data frame - these are results from an A/B experiment
conv_data = data.frame(
VarNames = c("Variation 1", "Variation 2", "Variation 3") # Set variation names
,NumSuccess = c(1,90,899) # Set number of successes / conversions
,NumTrials = c(10,100,1070) # Set number of trials
)
conv_data$NumFailures = conv_data$NumTrials - conv_data$NumSuccess # Set number of failures [no conversions]
num_var = NROW(conv_data) # Set total number of variations
plot_col = rainbow(num_var) # Set plot colors
get_density_data <- function(n_var, s, f) {
x = seq(0,1,length.out=100) # 0.01,0.02,0.03...1
dens_data = matrix(data = NA, nrow=length(x), ncol=(n_var+1))
dens_data[,1] = x
# set density data
for(j in 1:n_var) {
# +1 to s[], f[] to ensure uniform prior
dens_data[,j+1] = dbeta(x, s[j]+1, f[j]+1)
}
return(dens_data)
}
density_data = get_density_data(num_var, conv_data$NumSuccess, conv_data$NumFailures)
matplot(density_data[,1]*100, density_data[,-1], type = "l",
lty = 1, col = plot_col, ylab = "Probability Density",
xlab = "Conversion Rate %", yaxt = "n")
legend("topleft", col=plot_col, legend = conv_data$VarNames, lwd = 1)
## and add this part
for (ii in seq_along(plot_col))
polygon(c(density_data[, 1] * 100, rev(density_data[, 1] * 100)),
c(density_data[, ii + 1], rep(0, nrow(density_data))),
col = adjustcolor(plot_col[ii], alpha.f = .25))
Was able to answer own question with:
df = as.data.frame(t(conversion_data))
dfs = stack(df)
ggplot(dfs, aes(x=values)) + geom_density(aes(group=ind, colour=ind, fill=ind), alpha=0.3)

Bar-chart legend position (avoiding operlap) in R

I have the following kind of data:
A B C D E F
Series1 681968620 814707019 689302814 827844038 778849469 826532174
Series2 41507149 53403451 52857261 52319991 59246699 104253758
Series3 869316619 722165946 858134539 716641489 759754131 668183913
Series4 12642153 15158215 5140017 8629111 7170466 6464783
When I am plotting my bar chart, using the following command:
barplot(height = m,
beside=T, ylab = "Area (m^2)", col=colorcode,
legend.text = c("Series1", "Series2","Series3",
"Series4"),
args.legend = list(x = "topright"))
the legend overlaps with the bars present in the diagram. How can I place my legend properly, so that my graph looks good.
par(mfrow=c(1, 1), mar=c(5, 5, 4, 10))
barplot(height = m,
beside=T, ylab = "Area (m^2)", col=1:4,
legend.text = c("Series1", "Series2","Series3",
"Series4"),
args.legend = list(x ='topright', bty='n', inset=c(-0.25,0)))
If you don't want the scientific notation on y-axis, you can change the options before running the code, for example
op <- options(scipen=999)
data
m <- structure(c(681968620L, 41507149L, 869316619L, 12642153L, 814707019L,
53403451L, 722165946L, 15158215L, 689302814L, 52857261L, 858134539L,
5140017L, 827844038L, 52319991L, 716641489L, 8629111L, 778849469L,
59246699L, 759754131L, 7170466L, 826532174L, 104253758L, 668183913L,
6464783L), .Dim = c(4L, 6L), .Dimnames = list(c("Series1", "Series2",
"Series3", "Series4"), c("A", "B", "C", "D", "E", "F")))

Resources