how can I create violin plot in different colours? - r

I am using package vioplot. I would like to ask, how can I create violinplot in different colours.
This is my reproducible example:
# Violin Plots library(vioplot)
x1 <- mtcars$mpg[mtcars$cyl==4]
x2 <- mtcars$mpg[mtcars$cyl==6]
x3 <- mtcars$mpg[mtcars$cyl==8]
vioplot(x1, x2, x3,
names=c("4 cyl", "6 cyl", "8 cyl"), col="gold")
title("Violin Plots of Miles Per Gallon")
Thank you.

It is not possible to have many colors. But it is not difficult to hack the function vioplot and edit the source code. Here steps you should follow to accomplish this:
copy the initial function:
my.vioplot <- vioplot()
edit this function:
edit(my.vioplot)
Search the word "polygon" and and replace col by col[i]
Do a test in the beginning of function for the case you give a single color. and add this line :
if(length(col)==1) col <- rep(col,n)
For example using your data :
vioplot(x1, x2, x3, names=c("4 cyl", "6 cyl", "8 cyl"), col="gold")
title("Violin Plots of Miles Per Gallon")
my.vioplot(x1, x2, x3, names=c("4 cyl", "6 cyl", "8 cyl"), col=c("gold","red","blue"))
title("Violin Plots of Miles Per Gallon multi colors")

To expand on agstudy's answer and correct one thing, here is the complete and new vioplot script.
Use source("vioplot.R") instead of library(vioplot) in your script to use this multicolor version instead. This one will repeat any colors until it reaches the same number of datasets.
library(sm)
vioplot <- function(x,...,range=1.5,h=NULL,ylim=NULL,names=NULL, horizontal=FALSE,
col="magenta", border="black", lty=1, lwd=1, rectCol="black", colMed="white", pchMed=19, at, add=FALSE, wex=1,
drawRect=TRUE)
{
# process multiple datas
datas <- list(x,...)
n <- length(datas)
if(missing(at)) at <- 1:n
# pass 1
#
# - calculate base range
# - estimate density
#
# setup parameters for density estimation
upper <- vector(mode="numeric",length=n)
lower <- vector(mode="numeric",length=n)
q1 <- vector(mode="numeric",length=n)
q3 <- vector(mode="numeric",length=n)
med <- vector(mode="numeric",length=n)
base <- vector(mode="list",length=n)
height <- vector(mode="list",length=n)
baserange <- c(Inf,-Inf)
# global args for sm.density function-call
args <- list(display="none")
if (!(is.null(h)))
args <- c(args, h=h)
for(i in 1:n) {
data<-datas[[i]]
# calculate plot parameters
# 1- and 3-quantile, median, IQR, upper- and lower-adjacent
data.min <- min(data)
data.max <- max(data)
q1[i]<-quantile(data,0.25)
q3[i]<-quantile(data,0.75)
med[i]<-median(data)
iqd <- q3[i]-q1[i]
upper[i] <- min( q3[i] + range*iqd, data.max )
lower[i] <- max( q1[i] - range*iqd, data.min )
# strategy:
# xmin = min(lower, data.min))
# ymax = max(upper, data.max))
#
est.xlim <- c( min(lower[i], data.min), max(upper[i], data.max) )
# estimate density curve
smout <- do.call("sm.density", c( list(data, xlim=est.xlim), args ) )
# calculate stretch factor
#
# the plots density heights is defined in range 0.0 ... 0.5
# we scale maximum estimated point to 0.4 per data
#
hscale <- 0.4/max(smout$estimate) * wex
# add density curve x,y pair to lists
base[[i]] <- smout$eval.points
height[[i]] <- smout$estimate * hscale
# calculate min,max base ranges
t <- range(base[[i]])
baserange[1] <- min(baserange[1],t[1])
baserange[2] <- max(baserange[2],t[2])
}
# pass 2
#
# - plot graphics
# setup parameters for plot
if(!add){
xlim <- if(n==1)
at + c(-.5, .5)
else
range(at) + min(diff(at))/2 * c(-1,1)
if (is.null(ylim)) {
ylim <- baserange
}
}
if (is.null(names)) {
label <- 1:n
} else {
label <- names
}
boxwidth <- 0.05 * wex
# setup plot
if(!add)
plot.new()
if(!horizontal) {
if(!add){
plot.window(xlim = xlim, ylim = ylim)
axis(2)
axis(1,at = at, label=label )
}
box()
for(i in 1:n) {
# plot left/right density curve
polygon( c(at[i]-height[[i]], rev(at[i]+height[[i]])),
c(base[[i]], rev(base[[i]])),
col = col[i %% length(col) + 1], border=border, lty=lty, lwd=lwd)
if(drawRect){
# plot IQR
lines( at[c( i, i)], c(lower[i], upper[i]) ,lwd=lwd, lty=lty)
# plot 50% KI box
rect( at[i]-boxwidth/2, q1[i], at[i]+boxwidth/2, q3[i], col=rectCol)
# plot median point
points( at[i], med[i], pch=pchMed, col=colMed )
}
}
}
else {
if(!add){
plot.window(xlim = ylim, ylim = xlim)
axis(1)
axis(2,at = at, label=label )
}
box()
for(i in 1:n) {
# plot left/right density curve
polygon( c(base[[i]], rev(base[[i]])),
c(at[i]-height[[i]], rev(at[i]+height[[i]])),
col = col[i %% length(col) + 1], border=border, lty=lty, lwd=lwd)
if(drawRect){
# plot IQR
lines( c(lower[i], upper[i]), at[c(i,i)] ,lwd=lwd, lty=lty)
# plot 50% KI box
rect( q1[i], at[i]-boxwidth/2, q3[i], at[i]+boxwidth/2, col=rectCol)
# plot median point
points( med[i], at[i], pch=pchMed, col=colMed )
}
}
}
invisible (list( upper=upper, lower=lower, median=med, q1=q1, q3=q3))
}

Don't forget geom_violin in the ggplot2 package. There are examples of how to change the fill colour in the docs: http://docs.ggplot2.org/0.9.3/geom_violin.html

Plotting the vectors 1-by-1 seem easier than modifying the function:
require(vioplot)
yalist = list( rnorm(100), rnorm(100, sd = 1),rnorm(100, sd = 2) )
plot(0,0,type="n",xlim=c(0.5,3.5), ylim=c(-10,10), xaxt = 'n', xlab ="", ylab = "Pc [%]", main ="Skanderbeg")
for (i in 1:3) { vioplot(na.omit(yalist[[i]]), at = i, add = T, col = c(1:3)[i]) }
axis(side=1,at=1:3,labels=3:1)

Related

Conway Maxwell Distribution Density Plot

I have written my own code to simulate the Conway maxwell distribution sample.
This is the pmf (Guikema & Goffelt, 2008):
However, I have met some problem to plot the density plot.
rcomp <- function(n,lamb,v)
{
u <- runif(n)
w <- integer(n)
for(i in 1:n) {
z=sum(sapply( 0:100, function(j) (( ((lamb)^j) / (factorial(j)) )^v) ))
x <- seq(1, 50, 1) #seq of 1 to 50, increase by 1
px <- (((lamb^x)/factorial(x))^v)/z
# px is pmf of re-parameter conway maxwell
w[i] <- if (u[i] < px[1]) 0 else (max (which (cumsum(px) <= u[i])))
}
return (w)
}
dcomp <- function(x,lamb,v) {
z=sum(sapply( 0:100, function(j) (( ((lamb)^j) / (factorial(j)) )^v) ))
px <- (((lamb^x)/factorial(x))^v)/z
return(px)
}
As I wanna plot the density plot to check whether lamb or v is location parameter, the plot I get is weird.
x = rcomp(100,6,0.2); pdf = dcomp(x,6,0.2)
x1 = rcomp(100,6,0.5); pdf1 = dcomp(x1,6,0.5)
x2 = rcomp(100,6,0.7); pdf2 = dcomp(x2,6,0.7)
plot(x2, pdf2, type="l", lwd=1,lty=1,col="blue")
How could I solve this problem?
Source: Guikema & Goffelt (2008), A Flexible Count Data Regression Model for Risk Analysis. Risk Analysis 28(1): 215.
You have to sort the values of the x coordinate if you want a graph to connect the points in their axis order.
Note, however, that there might be better ways to graph the density you want. See the red curve. I first create a vector x of values within a certain range and then compute the PDF for those values. These pairs (x, y) are what function lines plots.
set.seed(2673) # Make the results reproducible
x2 <- rcomp(100, 6, 0.7)
x2 <- sort(x2)
pdf2 <- dcomp(x2, 6, 0.7)
plot(x2, pdf2, type = "l", lwd = 1, lty = 1, col = "blue")
x <- seq(0, 50, length.out = 100)
y <- dcomp(x, 6, 0.2)
lines(x, y, type = "l", col = "red")

2 Y axis histogram (normal frequency vs relative frequency)

I would like your help, please.
I have this 2 plots, separately. One is normal frequency and the other one, with exactly the same data, is for relative frequency.
Can you tell me how can i join them in a single plot with 2 y axis ( frequency and relative frequency?)
x<- AAA$starch
h<-hist(x, breaks=40, col="lightblue", xlab="Starch ~ Corn",
main="Histogram with Normal Curve", xlim=c(58,70),ylim = c(0,2500),axes=TRUE)
xfit<-seq(min(x),max(x),length=40)
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
yfit <- yfit*diff(h$mids[1:2])*length(x)
lines(xfit, yfit, col="blue", lwd=3)
library(HistogramTools)
x<- AAA$starch
c <- hist(x,breaks=10, ylab="Relative Frequency", main="Histogram with Normal Curve",ylim=c(0,2500), xlim=c(58,70), axes=TRUE)
PlotRelativeFrequency((c))
Thank you!!
EDIT:
This is just an example image of what I want...
I use doubleYScale from package latticeExtra.
Here is an example (I am not sure about relative frequency calculation) :
library(latticeExtra)
set.seed(42)
firstSet <- rnorm(500,4)
breaks = 0:10
#Cut data into sections
firstSet.cut = cut(firstSet, breaks, right=FALSE)
firstSet.freq = table(firstSet.cut)
#Calculate relative frequency
firstSet.relfreq = firstSet.freq / length(firstSet)
#Parse to a list to use xyplot later and assigning x values
firstSet.list <- list(x = 1:10, y = as.vector(firstSet.relfreq))
#Build histogram and relative frequency curve
hist1 <- histogram(firstSet, breaks = 10, freq = TRUE, col='skyblue', xlab="Starch ~ Corn", ylab="Frequency", main="Histogram with Normal Curve", ylim=c(0,40), xlim=c(0,10), plot=FALSE)
relFreqCurve <- xyplot(y ~ x, firstSet.list, type="l", ylab = "Relative frequency", ylim=c(0,1))
#Build double objects plot
doubleYScale(hist1, relFreqCurve, add.ylab2 = TRUE)
And here is the result with two y axis with different scales :

Plotting empirical cumulative probability function and its inverse

I have data cdecn:
set.seed(0)
cdecn <- sample(1:10,570,replace=TRUE)
a <- rnorm(cdecn,mean(cdecn),sd(cdecn))
I have created a plot which displays the cumulative probabilities.
aprob <- ecdf(a)
plot(aprob)
I am wondering how I can switch the x-axis and y-axis to get a new plot, i.e., the inverse of ECDF.
Also, for the new plot, is there a way to add a vertical line through where the my curve intersects 0?
We can do the following. My comments along the code is very explanatory.
## reproducible example
set.seed(0)
cdecn <- sample(1:10,570,replace=TRUE)
a <- rnorm(cdecn,mean(cdecn),sd(cdecn)) ## random samples
a <- sort(a) ## sort samples in ascending order
e_cdf <- ecdf(a) ## ecdf function
e_cdf_val <- 1:length(a) / length(a) ## the same as: e_cdf_val <- e_cdf(a)
par(mfrow = c(1,2))
## ordinary ecdf plot
plot(a, e_cdf_val, type = "s", xlab = "ordered samples", ylab = "ECDF",
main = "ECDF")
## switch axises to get 'inverse' ECDF
plot(e_cdf_val, a, type = "s", xlab = "ECDF", ylab = "ordered sample",
main = "'inverse' ECDF")
## where the curve intersects 0
p <- e_cdf(0)
## [1] 0.01578947
## highlight the intersection point
points(p, 0, pch = 20, col = "red")
## add a dotted red vertical line through intersection
abline(v = p, lty = 3, col = "red")
## display value p to the right of the intersection point
## round up to 4 digits
text(p, 0, pos = 4, labels = round(p, 4), col = "red")
cdecn <- sample(1:10,570,replace=TRUE)
a <- rnorm(cdecn,mean(cdecn),sd(cdecn))
aprob <- ecdf(a)
plot(aprob)
# Switch the x and y axes
x <- seq(0,1,0.001754386)
plot(y=knots(aprob), x=x, ylab = "Fn(y)")
# Add a 45 degree straight line at 0, 0
my_line <- function(x,y,...){
points(x,y,...)
segments(min(x), y==0, max(x), max(y),...)
}
lines(my_line(x=x, y = knots(aprob)))
The "straight line at x==0" bit makes me suspect that you want a QQplot:
qqnorm(a)
qqline(a)

Creating half a polar plot (rose diagram) with circular package

I am plotting a distribution of angles with the rose.diag function from the circular library. Input data are radiants. My MWE code is
library(circular);
dat<-read.csv(file.choose(),header=F);
data=unlist(dat);
rose.diag(data, bins=24)
and I get this graph:
I'm interested in showing only part of the data, from -pi/2 to pi/2, and that the length of the biggest tick equals the length of the radius of the circle as show here:
Any help would be appreciated!
EDIT
As suggested by #lawyeR here is the code with a sample of the data:
library(circular);
data<- c(-0.188,-0.742,-0.953,-0.948,-0.953,-1.187,-0.9327200,-0.855,- 0.024,1.303,-1.041,-1.068,-1.066,1.442,1.150,0.965,0.665,0.649,0.984,-1.379,-0.584,-0.573,-0.357,-0.237,-0.287,-0.486,-0.783,-0.298,0.849,1.088,-1.003,-0.952,-0.776,-0.811,-0.880);
rose.diag(data, bins=24);
Perhaps you can first draw the half circle without the distribution, using default plot functions. Then fill in the distribution without its circle:
library(circular)
data<- c(-0.188,-0.742,-0.953,-0.948,-0.953,-1.187,-0.9327200,-0.855,- 0.024,1.303,-1.041,-1.068,-1.066,1.442,1.150,0.965,0.665,0.649,0.984,-1.379,-0.584,-0.573,-0.357,-0.237,-0.287,-0.486,-0.783,-0.298,0.849,1.088,-1.003,-0.952,-0.776,-0.811,-0.880)
freq <- diff(colSums(outer( data %% (2*pi), (1:24)*pi/12,"<"))) / length(data)
r.max <- sqrt(max(freq))
#-----------------------------------------------------------------
# Plot the half circle:
lab.width <- 0.15*r.max
lab.height <- 0.15*r.max
plot( c(-r.max,r.max), c(0,0),
axes=FALSE,
ylim=c(0,r.max+lab.height),
xlim=c(-r.max-lab.width,r.max+lab.width),
xlab="", ylab="", type="l")
for ( i in 0:(5*12-1) )
{
psi <- i*pi/(5*12)
x1 <- r.max*cos(psi)
y1 <- r.max*sin(psi)
x2 <- r.max*cos(psi+pi/(5*12))
y2 <- r.max*sin(psi+pi/(5*12))
lines( c(x1,x2), c(y1,y2), type="l")
if (i %% 5 == 0) { lines( x1*c(1,0.95), y1*c(1,0.95), type="l" ) }
}
par(cex=2.0)
text( x = c(-r.max,0,r.max),
y = c(0,r.max,0),
labels = c("-pi/2","0","pi/2"),
pos = c(2,3,4))
#------------------------------------------------------------
# Plot the distribution, but without the circle:
rose.diag(data,
bins = 24,
rotation = "clock",
tcl.text = NA,
ticks = FALSE,
zero = pi/2,
control.circle = circle.control( col="white" ),
add = TRUE )
.

How can I adjust the axes to start from zero origin in r plot

To plot the empirical cumulative density of three variables, x1, x2 and x3, I used the following in r:
plot.ecdf(x1, col="blue",
main="Distribution XYZ",
xlab="x_i", ylab="Prob(x_i<=y)")
lines(ecdf(x2), col="red") # adds a line
lines(ecdf(x3), col="green") # adds line
legend(600,0.6, # places a legend from (x,y)=(600,0.6) on
c("x1","x2","x3"), # puts text in the legend
lty=c(1,1,1), # gives the legend appropriate symbols (lines)
lwd=c(1,1,1),col=c("blue","red","green")) # gives the legend lines the correct color and width
The resulting plot however has two horizontal lines (broken lines) at 0 and 1 besides the box. And, the origin of the box has a space below zero on the vertical axis and a space to the left of zero on the horizontal axis. May you suggest how to remove these space and the additional lines. I wanted but could not post the plot.
EDITED:
A sample data can be generated as follows:
sample data
n <- 1000; u <- runif(n)
a <- -4.46; b <- 1.6; c <- -4.63
d <- ( a * u ) + (b * ( ( 1.5 * ( u ** 2 )) - 0.5 )) + (c * ( (2.5 * (u ** 3)) - (1.5 * u )))
x1 <- -126/d; x2 <- -131/d; x3 <- -187/d
It sounds like you are asking for the style provided by different settings for 'xaxs' and 'yaxs', and maybe 'xlim':
Try:
plot.ecdf(x1, col="blue",
main="Distribution XYZ",
xlab="x_i", ylab="Prob(x_i<=y)",
xaxs="i",yaxs="i", xlim=c(0,1000) )
lines(ecdf(x2), col="red")
lines(ecdf(x3), col="green")
legend(600,0.6,
c("x1","x2","x3"),
lty=c(1,1,1),
lwd=c(1,1,1),col=c("blue","red","green"))
use the yaxs = "i" argument:
plot.ecdf(x1, col="blue",
main="Distribution XYZ",
xlab="x_i", ylab="Prob(x_i<=y)", yaxs = "i")
This will align the axis at the ylim points (which supposedly are 0 and 1 for plot.ecdf).
If you also want to remove the dotted line at 0 and 1, just call box():
box()
You could use the arguments xlim, ylim and col.01line:
x1 = runif(100)
x2 = runif(100)
x3 = runif(100)
plot.ecdf(x1, col="blue", main="Distribution XYZ",xlab="x_i", ylab="Prob(x_i<=y)", ylim=c(0, 1), xlim=c(0,1), col.01line="white", verticals=FALSE)
lines(ecdf(x2), col="red", col.01line="white")
lines(ecdf(x3), col="green", col.01line="white")
legend(600,0.6,c("x1","x2","x3"), lty=c(1,1,1), lwd=c(1,1,1),col=c("blue","red","green"))
Alternatively, you could just use the generic plot:
f1 = ecdf(x1)
f2 = ecdf(x2)
f3 = ecdf(x3)
pts = seq(0, 1, 0.01)
plot(pts, f1(pts), col="blue", type="b", xlab="x_i", ylab="Prob(x_i<=y)", pch=16, main="Distribution XYZ")
lines(pts, f2(pts), col="red", type="b", pch=16)
lines(pts, f3(pts), col="green", type="b", pch=16)
legend("topleft", c("x1","x2","x3"), fill=c("blue","red","green"))

Resources