I'm using the following code in R to draw two density curves on a single graph;
mydata1<-read.csv(file="myfile1.csv",head=TRUE,sep=",")
mydata2<-read.csv(file="myfile2.csv",head=TRUE,sep=",")
pdf("comparison.pdf")
plot.multi.dens <- function(s)
{
junk.x = NULL
junk.y = NULL
for(i in 1:length(s)) {
junk.x = c(junk.x, density(s[[i]])$x)
junk.y = c(junk.y, density(s[[i]])$y)
}
xr <- range(junk.x)
yr <- range(junk.y)
plot(density(s[[1]]), xlim = xr, ylim = yr, xlab="Usage",main = "comparison")
for(i in 1:length(s)) {
lines(density(s[[i]]), xlim = xr, ylim = yr, col = i)
}
}
plot.multi.dens( list(mydata2$usage,mydata1$usage))
dev.off()
Now the problem is that the graph which is being produced shows two lines but the graph doesn't include the information that which line is which. For example, in the output, it should show that the red line is "a" and the black line is "b". I'm a newbie to R which is why i'm having some difficulty. any help will be appreciated!
Answer from quickR website
# Compare MPG distributions for cars with
# 4,6, or 8 cylinders
library(sm)
attach(mtcars)
# create value labels
cyl.f <- factor(cyl, levels= c(4,6,8),
labels = c("4 cylinder", "6 cylinder", "8 cylinder"))
# plot densities
sm.density.compare(mpg, cyl, xlab="Miles Per Gallon")
title(main="MPG Distribution by Car Cylinders")
# add legend via mouse click
colfill<-c(2:(2+length(levels(cyl.f))))
legend(locator(1), levels(cyl.f), fill=colfill)
Related
I am trying to plot two or more lines on the same graph using a loop. My plot is a population dynamic in which I want to repeatedly change the value of the starting population but keep all other parameters the same. I want to plot the different outcomes on one graph. Can anyone help?
Try the following:
library(ggplot2)
MAX.Y<-30
# year<-0:30
year<-1:30
rlp<-0.1
lp<-rep(0,MAX.Y)
lp[1]<-4000
K<-4000000
for(n in 1: (MAX.Y-1)) {lp[n+1]<-lp[n]+(rlp)*(1-lp[n]/K)*lp[n]}
# plot(lp~year, xlab="Time (years)", ylab="Population size", main=c(paste("B) Anchovy population growth"), paste ("in less productive environment")), col="darkorchid", type="l", cex.main=1.0)
sp<-rep(0,MAX.Y)
sp[1]<-100000
for(n in 1: (MAX.Y-1)) {sp[n+1]<-sp[n]+(rlp)*(1-sp[n]/K)*sp[n]}
# lines(sp~year, type="l", col="black")
data = data.frame(year=year,lp=lp, sp=sp)
data = reshape2::melt(data, id.vars = 'year')
ggplot(data, aes(year, value, colour = variable))+
geom_line()+
labs(x = "Time (years)", y = "Population size",
title = "B) Anchovy population growth \n in less productive environment")+
theme_minimal()
Here is what I would do.
First, since the computations for lp and sp are the same, only the initial values change, create a function to do it.
f <- function(initial, MAX, rlp, K){
x <- numeric(MAX)
x[1] <- initial
for(i in seq_len(MAX - 1)) {
x[i + 1] <- x[i] + rlp*(1 - x[i]/K)*x[i]
}
x
}
Now sapply the function to a vector of initial values.
MAX.Y <- 30
rlp <- 0.1
year <- seq_len(MAX.Y)
K <- 4000000
InitialValues <- setNames(c(4000, 100000), c("lp", "sp"))
x <- sapply(InitialValues, f, MAX.Y, rlp, K)
And plot it with matlines. But for matlines to work the plot must be created with the custom title, axis limits, etc.
plot(1, type = "n",
xlim = range(year), ylim = range(x),
main = c(paste("B) Anchovy population growth"), paste ("in less productive environment")),
xlab = "Time (years)",
ylab = "Population size",
cex.main = 1.0,
col = c("darkorchid", "black"))
matlines(x, lty = "solid")
I would like your help, please.
I have this 2 plots, separately. One is normal frequency and the other one, with exactly the same data, is for relative frequency.
Can you tell me how can i join them in a single plot with 2 y axis ( frequency and relative frequency?)
x<- AAA$starch
h<-hist(x, breaks=40, col="lightblue", xlab="Starch ~ Corn",
main="Histogram with Normal Curve", xlim=c(58,70),ylim = c(0,2500),axes=TRUE)
xfit<-seq(min(x),max(x),length=40)
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
yfit <- yfit*diff(h$mids[1:2])*length(x)
lines(xfit, yfit, col="blue", lwd=3)
library(HistogramTools)
x<- AAA$starch
c <- hist(x,breaks=10, ylab="Relative Frequency", main="Histogram with Normal Curve",ylim=c(0,2500), xlim=c(58,70), axes=TRUE)
PlotRelativeFrequency((c))
Thank you!!
EDIT:
This is just an example image of what I want...
I use doubleYScale from package latticeExtra.
Here is an example (I am not sure about relative frequency calculation) :
library(latticeExtra)
set.seed(42)
firstSet <- rnorm(500,4)
breaks = 0:10
#Cut data into sections
firstSet.cut = cut(firstSet, breaks, right=FALSE)
firstSet.freq = table(firstSet.cut)
#Calculate relative frequency
firstSet.relfreq = firstSet.freq / length(firstSet)
#Parse to a list to use xyplot later and assigning x values
firstSet.list <- list(x = 1:10, y = as.vector(firstSet.relfreq))
#Build histogram and relative frequency curve
hist1 <- histogram(firstSet, breaks = 10, freq = TRUE, col='skyblue', xlab="Starch ~ Corn", ylab="Frequency", main="Histogram with Normal Curve", ylim=c(0,40), xlim=c(0,10), plot=FALSE)
relFreqCurve <- xyplot(y ~ x, firstSet.list, type="l", ylab = "Relative frequency", ylim=c(0,1))
#Build double objects plot
doubleYScale(hist1, relFreqCurve, add.ylab2 = TRUE)
And here is the result with two y axis with different scales :
I have a function in R which creates a standard normal plot, and then uses a for loop that calls density plots for the t distribution for various degrees of freedom. The plot looks like:
Note that the density for degrees of freedom = 2 extends outside of the y axis limits. I am wondering if there is a way to edit the for loop so that the axis limits are adjusted according to the range of the density lines that are drawn.
The for loop code that I am using is as follows:
N <- 1000
n <- c(25,50,100,200)
df<-c(1:4,seq(5,25,by=5))
histPlot <- function(data) {
x <- seq(-4, 4, length=100)
y <- dnorm(x, mean=0, sd=1)
plot(x, y, type="l",
main=paste("Distribution of size", nrow(data)/9000, sep=" "),
xlab="standard deviation")
colors <- brewer.pal(n = 9, name = "Spectral")
i<-1
for (d in df) {
lines(density(data[data$df==d, "t"]),col=colors[i])
legend("topright", pch=c(21,21), col=c(colors, "black"), legend=c(df, "normal"), bty="o", cex=.8)
i <- i+1
}
}
The lines functions called inside the for loop add up to the existing plot.
This means you have to change the ylim parameter in the plot function call. This will make a higher plot, and lines will be visible when added.
Try like this:
plot(x, y, type="l",
main=paste("Distribution of size", nrow(data)/9000, sep=" "),
xlab="standard deviation",
ylim = c(0, 1)) # This line will make the plot higher, i.e. the y axis range will be from 0 to 1
I am trying to include a legend for a scatterplot where size of plot indicates number of pairings
freqData <- as.data.frame(table(galton$child, galton$parent))
names(freqData) <- c("child", "parent", "freq")
plot(as.numeric(as.vector(freqData$parent)),
as.numeric(as.vector(freqData$child)),
pch = 21, col = "black", bg = "lightblue",
cex = .10 * freqData$freq,
xlab = "parent", ylab = "child")
legend("bottomright","(freqData)",pch=21, title="freqData")
Changing the size of points in the legend can be done by passing a vector of pt.cex values to legend(). The following code was used to generate the sample plot. The example uses a square root of the frequency so that the point area is proportional to the count in that pairing.
# historical data
library('HistData')
# Galton Data
rawData <- Galton
# making a set of unique parings and counting frequency
freqData <- unique(rawData)
freqData$count <- NA
for(i in 1:nrow(freqData)){
freqData$count[i] <- length(intersect(which(rawData$parent %in% freqData$parent[i]),which(rawData$chil %in% freqData$child[i])))
}
# making plots
plot(freqData$parent
,freqData$child
,pch=19 # plot symbol
,cex=0.1*sqrt(freqData$count)) # point expansion
# adding legend
legend('bottomright' # location
,legend=c(1,5,10,15,20,25,30,35) # entries
,title='count' # title
,pt.cex=0.1*sqrt(c(1,5,10,15,20,25,30,35)) # point expansion
,pch=19 # plot symbol
,ncol=2 # number of columns
)
I am using package vioplot. I would like to ask, how can I create violinplot in different colours.
This is my reproducible example:
# Violin Plots library(vioplot)
x1 <- mtcars$mpg[mtcars$cyl==4]
x2 <- mtcars$mpg[mtcars$cyl==6]
x3 <- mtcars$mpg[mtcars$cyl==8]
vioplot(x1, x2, x3,
names=c("4 cyl", "6 cyl", "8 cyl"), col="gold")
title("Violin Plots of Miles Per Gallon")
Thank you.
It is not possible to have many colors. But it is not difficult to hack the function vioplot and edit the source code. Here steps you should follow to accomplish this:
copy the initial function:
my.vioplot <- vioplot()
edit this function:
edit(my.vioplot)
Search the word "polygon" and and replace col by col[i]
Do a test in the beginning of function for the case you give a single color. and add this line :
if(length(col)==1) col <- rep(col,n)
For example using your data :
vioplot(x1, x2, x3, names=c("4 cyl", "6 cyl", "8 cyl"), col="gold")
title("Violin Plots of Miles Per Gallon")
my.vioplot(x1, x2, x3, names=c("4 cyl", "6 cyl", "8 cyl"), col=c("gold","red","blue"))
title("Violin Plots of Miles Per Gallon multi colors")
To expand on agstudy's answer and correct one thing, here is the complete and new vioplot script.
Use source("vioplot.R") instead of library(vioplot) in your script to use this multicolor version instead. This one will repeat any colors until it reaches the same number of datasets.
library(sm)
vioplot <- function(x,...,range=1.5,h=NULL,ylim=NULL,names=NULL, horizontal=FALSE,
col="magenta", border="black", lty=1, lwd=1, rectCol="black", colMed="white", pchMed=19, at, add=FALSE, wex=1,
drawRect=TRUE)
{
# process multiple datas
datas <- list(x,...)
n <- length(datas)
if(missing(at)) at <- 1:n
# pass 1
#
# - calculate base range
# - estimate density
#
# setup parameters for density estimation
upper <- vector(mode="numeric",length=n)
lower <- vector(mode="numeric",length=n)
q1 <- vector(mode="numeric",length=n)
q3 <- vector(mode="numeric",length=n)
med <- vector(mode="numeric",length=n)
base <- vector(mode="list",length=n)
height <- vector(mode="list",length=n)
baserange <- c(Inf,-Inf)
# global args for sm.density function-call
args <- list(display="none")
if (!(is.null(h)))
args <- c(args, h=h)
for(i in 1:n) {
data<-datas[[i]]
# calculate plot parameters
# 1- and 3-quantile, median, IQR, upper- and lower-adjacent
data.min <- min(data)
data.max <- max(data)
q1[i]<-quantile(data,0.25)
q3[i]<-quantile(data,0.75)
med[i]<-median(data)
iqd <- q3[i]-q1[i]
upper[i] <- min( q3[i] + range*iqd, data.max )
lower[i] <- max( q1[i] - range*iqd, data.min )
# strategy:
# xmin = min(lower, data.min))
# ymax = max(upper, data.max))
#
est.xlim <- c( min(lower[i], data.min), max(upper[i], data.max) )
# estimate density curve
smout <- do.call("sm.density", c( list(data, xlim=est.xlim), args ) )
# calculate stretch factor
#
# the plots density heights is defined in range 0.0 ... 0.5
# we scale maximum estimated point to 0.4 per data
#
hscale <- 0.4/max(smout$estimate) * wex
# add density curve x,y pair to lists
base[[i]] <- smout$eval.points
height[[i]] <- smout$estimate * hscale
# calculate min,max base ranges
t <- range(base[[i]])
baserange[1] <- min(baserange[1],t[1])
baserange[2] <- max(baserange[2],t[2])
}
# pass 2
#
# - plot graphics
# setup parameters for plot
if(!add){
xlim <- if(n==1)
at + c(-.5, .5)
else
range(at) + min(diff(at))/2 * c(-1,1)
if (is.null(ylim)) {
ylim <- baserange
}
}
if (is.null(names)) {
label <- 1:n
} else {
label <- names
}
boxwidth <- 0.05 * wex
# setup plot
if(!add)
plot.new()
if(!horizontal) {
if(!add){
plot.window(xlim = xlim, ylim = ylim)
axis(2)
axis(1,at = at, label=label )
}
box()
for(i in 1:n) {
# plot left/right density curve
polygon( c(at[i]-height[[i]], rev(at[i]+height[[i]])),
c(base[[i]], rev(base[[i]])),
col = col[i %% length(col) + 1], border=border, lty=lty, lwd=lwd)
if(drawRect){
# plot IQR
lines( at[c( i, i)], c(lower[i], upper[i]) ,lwd=lwd, lty=lty)
# plot 50% KI box
rect( at[i]-boxwidth/2, q1[i], at[i]+boxwidth/2, q3[i], col=rectCol)
# plot median point
points( at[i], med[i], pch=pchMed, col=colMed )
}
}
}
else {
if(!add){
plot.window(xlim = ylim, ylim = xlim)
axis(1)
axis(2,at = at, label=label )
}
box()
for(i in 1:n) {
# plot left/right density curve
polygon( c(base[[i]], rev(base[[i]])),
c(at[i]-height[[i]], rev(at[i]+height[[i]])),
col = col[i %% length(col) + 1], border=border, lty=lty, lwd=lwd)
if(drawRect){
# plot IQR
lines( c(lower[i], upper[i]), at[c(i,i)] ,lwd=lwd, lty=lty)
# plot 50% KI box
rect( q1[i], at[i]-boxwidth/2, q3[i], at[i]+boxwidth/2, col=rectCol)
# plot median point
points( med[i], at[i], pch=pchMed, col=colMed )
}
}
}
invisible (list( upper=upper, lower=lower, median=med, q1=q1, q3=q3))
}
Don't forget geom_violin in the ggplot2 package. There are examples of how to change the fill colour in the docs: http://docs.ggplot2.org/0.9.3/geom_violin.html
Plotting the vectors 1-by-1 seem easier than modifying the function:
require(vioplot)
yalist = list( rnorm(100), rnorm(100, sd = 1),rnorm(100, sd = 2) )
plot(0,0,type="n",xlim=c(0.5,3.5), ylim=c(-10,10), xaxt = 'n', xlab ="", ylab = "Pc [%]", main ="Skanderbeg")
for (i in 1:3) { vioplot(na.omit(yalist[[i]]), at = i, add = T, col = c(1:3)[i]) }
axis(side=1,at=1:3,labels=3:1)