plot vertical color line in R - r

I would like to plot multiple color vertical line for a time series as below,
[1,] 4.698478 0
[2,] 4.698205 1
[3,] 4.698569 0
[4,] 4.697385 -1
...
to plot a blue color vertical line when [,2] is 1, and a red line when [,2] is -1, wonder if someone can help, thanks!

You can do the following, assuming data[, 1] is the x-value for the vertical line:
abline(v = data[data[, 2] == 1, 1], col = 'blue')
abline(v = data[data[, 2] == -1, 1], col = 'red')

Nothing wrong with #ChristopherLouden's answer, but this would be a way to do it in a single call to abline:
abline(v=m[,1], col=c('red', NA, 'blue')[as.numeric(as.factor(m[,2]))])

I ended up using quantmod's charting function, pretty good results.
chartSeries(price[,1])
addTA(price[,2]==1,pch=1, on=1,col="blue")
addTA(price[,2]==-1,pch=1, on=1,col="red")

Related

I'd like to extend a custom function for barplots so that I can stack the bars

I'd like to make a three-panel figure with each panel containing grouped barplots . And for the third panel I'd like to make the plot stacked. I've found excellent code here: Simplest way to do grouped barplot. I can get everything except for the stacked plot using the elegant solution at the end of this entry.
I tried many iterations of base R code to get a stacked barplot, but kept running into issues with having a dataframe and not a matrix or vector for the height argument. When I converted to a matrix though I lost the factors in my data. I'll put my working code for the other plots below and indicate where I'd like to add the stacked variable.
Here is a sample of my data
Population Sex numsnpsused numoutliers percentoutliers numoutspc1 numoutspc2
all f_and_m 8728 70 0.80 55 15
all f 6613 11 0.17 9 2
all m 9958 151 1.52 91 60
south f_and_m 7358 51 0.69 15 36
south f 6547 65 0.99 17 48
south m 8068 69 0.86 18 51
Code:
bar <- function(dv, factors, dataframe, percentage=FALSE, errbar=!percentage, half.errbar=TRUE, conf.level=.95,
xlab=NULL, ylab=NULL, main=NULL, names.arg=NULL, bar.col="black", whisker=.015,args.errbar=NULL,
legend=TRUE, legend.text=NULL, args.legend=NULL,legend.border=FALSE, box=TRUE, args.yaxis=NULL,
mar=c(5,4,3,2),...){
axes=!percentage
dv.name<-substitute(dv)
if(length(dv.name)>1) stop("'dv' only takes one variable")
dv.name<-as.character(dv.name)
dv<-dataframe[[dv.name]]
fnames<-substitute(factors)
if(length(fnames)==1){
factors<-as.character(fnames)
nf<-1
}else{
factors<-as.character(fnames[-1L])
nf<-length(factors)
}
if(nf>2) stop("This function accepts no more than 2 factors \n",
"\t-i.e., it only plots one-way or two-way designs.")
if(percentage & errbar){
warning("percentage=TRUE; error bars were not plotted")
errbar<-FALSE
}
if(!percentage) xbars<-tapply(dv, dataframe[,factors], mean, na.rm=TRUE)
else {
xbars<-tapply(dv, list(interaction(dataframe[,factors], lex.order=TRUE)), mean, na.rm=TRUE)
if(sum(na.omit(dv)!=0&na.omit(dv)!=1)>0)
stop("Data points in 'dv' need to be 0 or 1 in order to set 'percentage' to TRUE")
xbars<-rbind(xbars, 1-xbars)*100
}
if(errbar){
se<-tapply(dv, dataframe[,factors], sd, na.rm=TRUE)/sqrt(tapply(dv, dataframe[,factors], length))
conf.level=1-(1-conf.level)/2
lo.bar<-xbars-se*qnorm(conf.level)
hi.bar<-xbars+se*qnorm(conf.level)
}
extras<-list(...)
if(legend & !percentage){
if(is.null(legend.text))
legend.text<-sort(unique(dataframe[[factors[1]]]))
args.legend.temp<-list(x="topright", bty=if(!legend.border)"n" else "o",
inset=c(0,0))
if(is.list(args.legend))
args.legend<-modifyList(args.legend.temp, args.legend)
else
args.legend<-args.legend.temp
} else if(legend & percentage){
if(is.null(legend.text))
legend.text<-c("1", "0")
args.legend.temp<-list(x="topright", bty=if(!legend.border)"n" else "o",
inset=c(0,0))
if(is.list(args.legend))
args.legend<-modifyList(args.legend.temp, args.legend)
else
args.legend<-args.legend.temp
} else if(!legend){
args.legend<-NULL
legend.text<-NULL
}
if(errbar && legend && !percentage) ymax<-max(hi.bar)+max(hi.bar)/20
else if(errbar && legend && percentage) ymax<-115
else if(errbar && !legend) ymax <- max(xbars)
else if(!errbar && legend && percentage) ymax<-110
else if(!errbar) ymax<-max(xbars) + max(xbars)/20
if(!percentage){
args.barplot<-list(beside=TRUE, height=xbars, ylim=c(0, ymax), main=main, names.arg=names.arg,
col=hcl(h=seq(0,270, 270/(length(unique(dataframe[[factors[1]]]))))[-length(unique(dataframe[[factors[1]]]))]),
legend.text=legend.text, args.legend=args.legend, xpd=TRUE,
xlab=if(is.null(xlab)) factors[length(factors)] else xlab,
ylab=if(is.null(ylab)) dv.name else ylab, axes=axes)
}else{
args.barplot<-list(beside=TRUE, height=xbars, ylim=c(0, ymax), main=main, names.arg=names.arg,
col=hcl(h=seq(0,270, 270/(length(unique(dataframe[[factors[1]]]))))[-length(unique(dataframe[[factors[1]]]))]),
legend.text=legend.text, args.legend=args.legend, xpd=TRUE,
xlab=if(is.null(xlab)) " "[length(factors)] else xlab,
ylab=if(is.null(ylab)) "percentage" else ylab, axes=axes)
}
args.barplot<-modifyList(args.barplot, extras)
errbars = function(xvals, cilo, cihi, whisker, nc, args.errbar = NULL, half.errbar=TRUE) {
if(half.errbar){
cilo<-(cihi+cilo)/2
}
fixedArgs.bar = list(matlines, x=list(xvals),
y=lapply(split(as.data.frame(t(do.call("rbind",
list(cihi, cilo)))),1:nc),matrix,
nrow=2, byrow=T))
allArgs.bar = c(fixedArgs.bar, args.errbar)
whisker.len = whisker*(par("usr")[2] - par("usr")[1])/2
whiskers = rbind((xvals - whisker.len)[1,],
(xvals + whisker.len)[1,])
fixedArgs.lo = list(matlines, x=list(whiskers),
y=lapply(split(as.data.frame(t(do.call("rbind",
list(cilo, cilo)))), 1:nc), matrix, nrow=2, byrow=T))
allArgs.bar.lo = c(fixedArgs.lo, args.errbar)
fixedArgs.hi = list(matlines, x=list(whiskers),
y=lapply(split(as.data.frame(t(do.call("rbind",
list(cihi, cihi)))), 1:nc), matrix, nrow=2, byrow=T))
allArgs.bar.hi = c(fixedArgs.hi, args.errbar)
invisible(do.call(mapply, allArgs.bar))
if(!half.errbar) invisible(do.call(mapply, allArgs.bar.lo))
invisible(do.call(mapply, allArgs.bar.hi))
}
par(mar=mar)
errloc<-as.vector(do.call(barplot, args.barplot))
if(errbar){
errloc<-rbind(errloc, errloc)
lo.bar<-matrix(as.vector(lo.bar))
hi.bar<-matrix(as.vector(hi.bar))
args.errbar.temp<-list(col=bar.col, lty=1)
args.errbar<-if(is.null(args.errbar)|!is.list(args.errbar))
args.errbar.temp
else if(is.list(args.errbar))
modifyList(args.errbar.temp, args.errbar)
errbars(errloc, cilo=lo.bar, cihi=hi.bar, nc=1, whisker=whisker,
args.errbar=args.errbar, half.errbar=half.errbar)
}
if(box) box()
if(percentage){
args.yaxis.temp<-list(at=seq(0,100, 20), las=1)
args.yaxis<-if(!is.list(args.yaxis)) args.yaxis.temp else modifyList(args.yaxis.temp, args.yaxis)
do.call(axis, c(side=2, args.yaxis))
}
}
bar(dv = numsnpsused,
factors = c(Sex, Population),
dataframe = WALL,
errbar = FALSE,
col=c("red","purple","blue"),
ylab=c("Number of SNPs used in analysis"),
ylim=c(0, 12000)) #I increased the upper y-limit to accommodate the legend.
parpar(mfrow=c(2,2))
bar(dv = percentoutliers,
factors = c(Sex, Population),
dataframe = WALL,
errbar = FALSE,
col=c("red","purple","blue"),
ylab=c("% of SNPs that were outliers"),
ylim=c(0,3)) #I increased the upper y-limit to accommodate the legend.
##I want to include a second variable to stack in this plot
bar(dv = numoutspc1,
factors = c(Sex, Population),
dataframe = WALL,
errbar = FALSE,
col=c("red","purple","blue"),
ylab=c("Number of outliers associated with each PCaxis"),
ylim=c(0, 240)) #I increased the upper y-limit to accommodate the legend.
Here is the plot that I made
It's hard to provide a good answer without more details and what graph you expect as output. Here is a solution using ggplot2 that might start you off. You can imitate the grouped bar plot by merging the two grouping variables while still stacking by another variable.
df <- data.frame(y=sample(c(2000:2002),1000,T), k=sample(letters[1:3],1000,T),
g=sample(c(1:2),1000,T),
c=sample(c('t','f'),1000,T),stringsAsFactors = F)
df %>% count(y,k,g,c) %>% mutate(k1=paste0(k,g)) %>%
filter(y==2002|c!='f') %>%
ggplot(aes(k1,n,fill=c)) + geom_bar(stat='identity') + facet_wrap(~y)

Add a line to coplot {graphics}, classic approaches don't work

I found coplot {graphics} very useful for my plots. However, I would like to include there not only one line, but add there one another. For basic graphic I just need to add = TRUE to add another line, or tu use plot(..) and lines(..). For {lattice} I can save my plots as objects
a<-xyplot(..)
b<-xyplot(..)
and display it simply by a + as.layer(b). No one of these approaches works for coplot(), apparently because creating objects as a<-coplot() doesn't produce trellis graphic but NULL object.
Please, any help how to add data line in coplot()? I really like its graphic so I wish to keep it. Thank you !!
my exemle data are here: http://ulozto.cz/xPfS1uRH/repr-exemple-csv
My code:
sub.tab<-read.csv("repr_exemple.csv", , header = T, sep = "")
attach(sub.tab)
cells.f<-factor(cells, levels=c(2, 25, 100, 250, 500), # unique(cells.in.cluster)???
labels=c("size2", "size25", "size100", "size250", "size500"))
perc.f<-factor(perc, levels=c(5, 10), # unique(cells.in.cluster)???
labels=c("perc5", "perc10"))
# how to put these plots together?
a<- coplot(max_dist ~ time |cells.f + perc.f, data = sub.tab,
xlab = "ticks", type = "l", col = "black", lwd = 1)
b<- coplot(mean_dist ~ time |cells.f * perc.f, data = sub.tab,
xlab = "ticks", type = "l", col = "grey", lwd = 1)
a + as.layer(b) # this doesn't work
Please, how to merge these two plots (grey and black lines)? I couldn't figure it out... Thank you !
Linking to sample data isn't really as helpful. Here's a randomly created sample data set
set.seed(15)
dd <- do.call("rbind",
do.call("Map", c(list(function(a,b) {
cbind.data.frame(a,b, x=1:5,
y1=cumsum(rpois(5,7)),
y2=cumsum(rpois(5,9)))
}),
expand.grid(a=letters[1:5], b=letters[20:22])))
)
head(dd)
# a b x y1 y2
# 1 a t 1 8 16
# 2 a t 2 13 28
# 3 a t 3 25 35
# 4 a t 4 33 45
# 5 a t 5 39 57
# 6 b t 1 4 12
I will note the coplot is a base graphics function, not Lattice. But it does have a panel= parameter. And you can have the coplot() take care of subsetting your data for you (well, calculating the indexes at least). But, like other base graphics functions, plotting different groups isn't exactly trivial. You can do it in this case with
coplot(y~x|a+b,
# make a fake y col to cover range of all y1 and y2 values
cbind(dd, y=seq(min(dd$y1, dd$y2), max(dd$y1, dd$y2), length.out=nrow(dd))),
#request subscripts to be sent to panel function
subscripts=TRUE,
panel=function(x,y,subscripts, ...) {
# draw group 1
lines(x, dd$y1[subscripts])
# draw group 2
lines(x, dd$y2[subscripts], col="red")
})
This gives

Plotting raster images using custom colours in R

This might sound like a strange process, but its the best I can think of to control rasterised colour gradients with respect to discrete objects (points, lines, polygons). I'm 95% there but can't quite plot correctly.
This should illustrate proof of concept:
require(raster)
r = matrix(56:255, ncol=20) # reds
b = t(matrix(56:255, ncol=10)) # blues
col = matrix(rgb(r, 0, b, max=255), ncol=20) # matrix of colour strings
ras = raster(r) # data raster object
extent(ras) = extent(1,200,1,100) # set extent for aspect
plot(ras, col = col, axes=F, asp=T) # overwrite data with custom colours
Here I want to clip a raster to a triangle and create colour gradient of pixels inside based on their distances to one of the sides. Sorry for length but its the most minimal example I can design.
require(raster); require(reshape2); require(rgeos)
# equilateral triangle
t_s = 100 # half side
t_h = floor(tan(pi*60/180) * t_s) # height
corners = cbind(c(0, -t_s, t_s, 0), c(t_h, 0, 0, t_h))
trig = SpatialPolygons(list(Polygons(list(Polygon(corners)),"triangle")))
# line to measure pixel distances to
redline = SpatialLines(list(Lines(Line(corners[1:2,]), ID='redline')))
plot(trig); plot(redline, add=T, col='red', lwd=3)
# create a blank raster and clip to triangle
r = raster(mat.or.vec(nc = t_s*2 + 1, nr = t_h))
extent(r) = extent(-t_s, t_s, 0, t_h)
r = mask(r, trig)
image(r, asp=T)
# extract cell coordinates into d.f.
cells = as.data.frame(coordinates(rasterToPoints(r, spatial=T)))
# calculate distance of each pixel to redline with apply
dist_to_line = function(xy, line){
point = readWKT(paste('POINT(', xy[1], xy[2], ')'))
gDistance(point, line) / t_h
}
cells$dists = apply(cells, 1, dist_to_line, line=redline)
cells$cols = rgb(1 - cells$dists, 0, 0)
length(unique(cells$cols)) # count unique colours
# use custom colours to colour triangle pixels
image(r, col = cells$cols, asp=T)
plot(r, col = cells$cols, asp=T)
As you can see the plotting fails to overwrite as in the first example, but the data seems fine. Trying to convert to matrix also fails:
# try convertying colours to matrix
col_ras = acast(cells, y~x, value.var='cols')
col_ras = apply(col_ras, 1, rev) # rotate acw to match r
plot(r, col = col_ras, asp=T)
Very grateful for any assistance on what's going wrong.
Edit:
To show Spacedman's plotRGB method:
b = brick(draster, 1-draster, 1-draster)
plotRGB(b, scale=1)
plot(trig, col=NA, border='white', lwd=5, add=T)
Easy way is to go from your points to a spatial pixels data frame to a raster, then do the colour mapping...
Start with:
> head(cells)
x y dists
1 0.0000000 172.5 0.0014463709
2 0.0000000 171.5 0.0043391128
3 -0.9950249 170.5 0.0022523089
4 0.0000000 170.5 0.0072318546
5 0.9950249 170.5 0.0122114004
convert:
> coordinates(cells)=~x+y
> draster = raster(as(cells,"SpatialPixelsDataFrame"))
colourise:
> cols=draster
> cols[!is.na(draster)]= rgb(1-draster[!is.na(draster)],0,0)
> plot(cols, col=cols)
I'm not sure this is the right way to do things though, you might be better off creating an RGB raster stack and using plotRGB if you want fine colour control.

forestplot x-axis omit labels but draw tickmarks

I have a plot made with forestplot in the rmeta package. Notice the horizontal axis has no tick marks and labels between 0.2 and 7. How could I add tick marks without labels at 1,2,3,4,5 and 6 without labelling them? I just want the tick marks here. Here is the plot:
How do I have the ticks at 0.2,1,2,3,4,5,6 and 7, but I labelled only at c(0.2,7)? This the code:
library(rmeta)
tabletext<-rbind(c("A","3.77"),
c("B","1.33"),
c("C","1.32"),
c("D","1.12"),
c("E","1.58"),
c("F","0.9"))
m=c(3.77,1.33,1.32,1.12,1.58,0.9)
l=c(0.6144,0.644,0.6536,0.4536,1.0116,0.7236)
u=c(6.9256,2.016,1.9864,1.7864,2.1484,1.0764)
#overview datafile:
cbind(tabletext, m,l,u)
m l u
[1,] "A" "3.77" "3.77" "0.6144" "6.9256"
[2,] "B" "1.33" "1.33" "0.644" "2.016"
[3,] "C" "1.32" "1.32" "0.6536" "1.9864"
[4,] "D" "1.12" "1.12" "0.4536" "1.7864"
[5,] "E" "1.58" "1.58" "1.0116" "2.1484"
[6,] "F" "0.9" "0.9" "0.7236" "1.0764"
forestplot(tabletext,m,l,u, zero=1, xticks=c(0.2,7),col=meta.colors(box="royalblue",line="darkblue", summary="royalblue"))
I could extend the xticks=c(0.2,7) to xticks=c(0.2,1,2,3,4,5,6,7), but then all the labels at 2,3,4,5,6 would also be printed, which I dont want to.
Thanks for the suggestion. The rmeta does not have this option but I've added this to the forestplot-package (currently in the develop branch 1.2.1):
tabletext<-rbind(c("A","3.77"),
c("B","1.33"),
c("C","1.32"),
c("D","1.12"),
c("E","1.58"),
c("F","0.9"))
m=c(3.77,1.33,1.32,1.12,1.58,0.9)
l=c(0.6144,0.644,0.6536,0.4536,1.0116,0.7236)
u=c(6.9256,2.016,1.9864,1.7864,2.1484,1.0764)
#overview datafile:
xticks <- seq(from = 0.2, to = 7, by = .5)
xlabels <- rep(TRUE, length.out = length(xticks))
xlabels[xticks > 2] <- FALSE
xlabels[length(xlabels)] <- TRUE
attr(xticks, "labels") <- xlabels
forestplot(tabletext,new_page = TRUE,
m,l,u,
zero=1,
xticks=xticks,
col=fpColors(box="royalblue",line="darkblue", summary="royalblue"))
Download the develop version using devtools:
devtools::install_github("gforge/forestplot", ref="develop")

Print frequencies (as numbers) in plot

In R, I would like to insert frequencies (as numbers) in a plot:
my code to create the plot:
par(mar=c(4.5,4.5,9.5,4), xpd=TRUE)
plot(factor(ArtMehrspr)~Mehrspr_Vielf, data=datProjektMehr, col=terrain.colors(4),
bty='L', main="Vielfalt nutzen")
legend("topright", inset=c(0,-.225), title="Art der Mehrsprachigkeit", levels(factor(datProjektMehr$ArtMehrspr)),
fill=terrain.colors(4), horiz=TRUE)
par(mar=c(5,4,4,2)+0.1)
In the plot, 2 columns of my dataframe are depicted: ArtMehrspr and Mehrspr_Vielf.
Now what I would like to know is, how many "Kombi" are in category "1", how many "Paral" are in category "1" and so on, and then to print this number in the plot, so that in every box of the plot, I can see the corresponding number of observations. R must know these numbers, otherwise it could not vary the height of the different boxes according to the number of observations. So it cannot be that hard to get these numbers into the plot, can it?
With the command table(), I can get these numbers, but I would have to have 5 table()-commands to get all the numbers. Example for category = 1:
> table(subset(datProjektMehr, Mehrspr_Vielf=="1")$ArtMehrspr)
einspr Kombi Paral Versc Wechs
0 1 9 2 1
Apparently, you can achieve what I am looking for by adding the command labels = TRUE. But it does not work:
par(mar=c(4.5,4.5,9.5,4), xpd=TRUE, labels = TRUE)
plot(factor(ArtMehrspr)~Mehrspr_Vielf, data=datProjektMehr, col=terrain.colors(4),
bty='L', main="Vielfalt nutzen")
legend("topright", inset=c(0,-.225), title="Art der Mehrsprachigkeit", levels(factor(datProjektMehr$ArtMehrspr)),
fill=terrain.colors(4), horiz=TRUE)
par(mar=c(5,4,4,2)+0.1)
R gives me the following warning message:
Warning message:
In par(mar = c(4.5, 4.5, 9.5, 4), xpd = TRUE, labels = TRUE) :
"labels" is not a graphical parameter
Is this not the right command? Does anyone know how to do this?
First of all, the warning informs that there is not a labels argument you can use inside par.
Regarding the plotting of the table output, I'm not aware if there is an easy way of doing this, but I managed a pretty UNreliable and, maybe, inefficient code. In my machine, though, it works every time I run it.
The concept I had in mind is to text all values from your table inside the plot. To do so, coordinates in xx' and yy' had to be estimated. I prefer the term "estimated" instead of "calculated" because I didn't find a way to compute absolute values for the coordinates, due to the fact that the plot method was plot.factor.
So:
#random data. DF = datProjektMehr, artmehr = ArtMehrspr, mehrviel = Mehrspr_Vielf
DF <- data.frame(artmehr = sample(letters[1:4], 20, T), mehrviel = as.factor(sample(1:5, 20, T)))
#your code of plotting
par(mar = c(4.5,4.5,9.5,4), xpd = TRUE)
plot(factor(artmehr) ~ mehrviel, data = DF, col = terrain.colors(4),
bty = 'L', main = "Vielfalt nutzen")
legend("topright", inset=c(0,-.225), title="Art der Mehrsprachigkeit", levels(factor(DF$artmehr)),
fill=terrain.colors(4), horiz=TRUE)
#no need to "table()" many times
tab = table(DF$artmehr, DF$mehrviel)
#maximum value of x axis (at least in my machine)
#I found -through trial and error- that for a factor of n levels, x.max = 1 + (n-1)*0.02
x.max = 1 + (length(levels(DF$mehrviel)) - 1) * 0.02
#coordinates of "mehrviel" (as I named it)
mehrviel.coords = ((cumsum(apply(tab, 2, sum)) / sum(tab)) * x.max) - ((apply(tab, 2, sum) / sum(tab)) / 2)
#coordinates of "artmehr" (as I named it)
artmehr.coords <- apply(tab, 2, function(x) { cumsum(x / sum(x)) })
artmehr.coords <- apply(artmehr.coords, 2, function(x) { x - c(x[1]/2, diff(x)/2) })
#"text" the values in your table
#don't plot "0"s
for(i in 1:ncol(artmehr.coords))
{
text(x = mehrviel.coords[i], y = artmehr.coords[,i], labels = ifelse(tab[,i] != 0, tab[,i], ""), cex = 2)
}
The values of table:
tab
1 2 3 4 5
a 1 1 0 1 0
b 0 0 2 1 2
c 1 1 2 1 0
d 2 0 0 3 2
The plot:
EDIT: 1) "Tidied" the answer. 2) Aadded an extra level to the factor ploted in xx' axis to match your data exactly. 3)texted the frequencies in the middle of each box.

Resources