So I have a dataframe tmp, and each column follows different distributions. What I want to do is plot the histograms in a pdf, each page a histogram. But why do I get three times the same histogram?
When I type in g11 I get the histogram g13, but when I plot the histograms in the pdf, instead of 3 identical pages (with histograms 1-3), I get 3 different pages with the same histogram on it.
Might it be that ggplot works with a pointer, and that due to the second for-loop, it plots g1i?
Is there a way to rewrite my code? (The example is simplified from my problem)
tmp <- data.frame(x=rnorm(n=20, mean=0, sd=1),
y=rnorm(n=20, mean=10, sd=2),
z=rnorm(n=20, mean=40, sd=5))
for (i in 1:3){
assign(paste("g1", i, sep=""),ggplot(tmp,aes(x=get(colnames(tmp)[i]))) + geom_histogram(binwidth=1))
}
pdf("/pathto/plot.pdf")
for(i in 1:3){
#i <- 1
grid.arrange(get(paste("g1", 1, sep="")), get(paste("g1", 2, sep="")), get(paste("g1", 3, sep="")))
}
dev.off()
I think the get in the first for loop is not changing the column correctly.
You could try this instead:
for (i in 1:3){
assign(paste("g1", i, sep=""),ggplot(tmp,aes_string(x=colnames(tmp)[i])) + geom_histogram(binwidth=1))
}
I would rewrite it like in the code below.
First I would put the data from wide to long format and then subset each level (x, y, z) and plot it.
library(ggplot2)
library(tidyr)
tmp <- data.frame(x=rnorm(n=20, mean=0, sd=1),
y=rnorm(n=20, mean=10, sd=2),
z=rnorm(n=20, mean=40, sd=5))
xy <- gather(tmp)
pdf("histogram.pdf")
for (i in unique(xy$key)) {
x <- droplevels(xy[xy$key == i, ])
print(
ggplot(x, aes(x = value)) +
theme_bw() +
geom_histogram()
)
}
dev.off()
Related
I am trying to construct a list of ggplot graphics, which will be plotted later. What I have so far, using Anscombe's quartet for an example, is:
library(ggplot2)
library(gridExtra)
base <- ggplot() + xlim(4,19)
plots = vector(mode = "list", length = 4)
for(i in 1:4) {
x <- anscombe[,i]
y <- anscombe[,i+4]
p <- geom_point(aes(x,y),colour="blue")
q <- geom_smooth(aes(x,y),method="lm",colour="red",fullrange=T)
plots[[i]] <- base+p+q
}
grid.arrange(grobs = plots,ncol=2)
As I travel through the loop, I want the current values of the plots p and q to be added with the base plot, into the i-th value of the list. That is, so that list element number i contains the plots relating to the i-th x and y columns from the dataset.
However, what happens is that the last plot only is drawn, four times. I've done something very similar with base R, using mfrow, plot and abline, so that I believe my logic is correct, but my implementation isn't. I suspect that the issue is with these lines:
plots = vector(mode = "list", length = 4)
plots[[i]] <- base+p+q
How can I create a list of ggplot graphics; starting with an empty list?
(If this is a trivial and stupid question, I apologise. I am very new both to R and to the Grammar of Graphics.)
The code works properly if lapply() is used instead of a for loop.
plots <- lapply(1:4, function(i) {
# create plot number i
})
The reason for this issue is that ggplot uses lazy evaluation. By the time the plots are rendered, the loop already iterated to i=4 and the last plot will be displayed four times.
Full working example:
library(ggplot2)
library(gridExtra)
base <- ggplot() + xlim(4,19)
plots <- lapply(1:4, function(i) {
x <- anscombe[,i]
y <- anscombe[,i+4]
p <- geom_point(aes(x,y),colour="blue")
q <- geom_smooth(aes(x,y),method="lm",colour="red",fullrange=T)
base+p+q
})
grid.arrange(grobs = plots,ncol=2)
To force evaluation, there's a simple solution, change aes(...) into aes_(...) and your code works.
library(ggplot2)
library(gridExtra)
base <- ggplot() + xlim(4,19)
plots <- lapply(1:4, function(i) {
x <- anscombe[,i]
y <- anscombe[,i+4]
p <- geom_point(aes_(x,y),colour="blue")
q <- geom_smooth(aes_(x,y),method="lm",colour="red",fullrange=T)
base+p+q
})
grid.arrange(grobs = plots,ncol=2)
I need to plot a couple of curves on one graph. I've got trajectories of Brownian simulation which I got from the function:
brownian <- function(T,N){
alpha=0
sigma=1
delta_t=T/N
t=seq(0,T,by=delta_t)
#x=c(0,alpha*delta_t+sigma*sqrt(delta_t)*rnorm(N,mean=0,sd=1))
x=c(0,alpha*delta_t+sqrt(delta_t)*rnorm(N,mean=0,sd=1))
Xt=cumsum(x)
plot(t,Xt,type='l',col = rep(1:3, each = 10),xlab="t=[0,T]",ylab = "B(t,ω)")
}
For example for brownian(1,1000) I get:
And for brownian(10,1000) I get:
As you can see I get black graphs. I have to plot these trajectories on one graph (every trajectory should have different color). When it takes several trajectories, it should look like:
Do you have any advices how can I plot these curves on one graph and each curve has different color?
Thanks in advance
You could do this pretty easily by modifying the function and using ggplot() to make the graph. The function below takes ntimes as an argument which specifies the number of times you want to do the simulation. It then uses ggplot() to make the graph. You could adjust the internals of the function to have it produce a different looking plot if you like.
brownian <- function(T,N, ntimes){
if((length(N) != length(T)) & length(N) != 1){
stop("N has to be either length of T or 1\n")
}
alpha=0
sigma=1
if(length(N) == 1 & length(T) > 1)N <- rep(N, length(T))
dat <- NULL
for(i in 1:ntimes){
delta_t=T/N
t=seq(0,T,by=delta_t)
#x=c(0,alpha*delta_t+sigma*sqrt(delta_t)*rnorm(N,mean=0,sd=1))
x=c(0,alpha*delta_t+sqrt(delta_t)*rnorm(N,mean=0,sd=1))
Xt=cumsum(x)
dat <- rbind(dat, data.frame(xt=Xt, t=t, n=i))
}
require(ggplot2)
ggplot(dat, aes(x=t, y=xt, colour=as.factor(n))) +
geom_line(show.legend=FALSE) +
labs(x="t=[0,T]",y = "B(t,ω)", colour="T") +
theme_classic()
}
brownian(10,1000, 5)
Here is a base R solution with matplot. It is ideal for this type of plot, since it computes the x and y axis ranges and plots all lines in one call only. It uses DaveArmstrong's idea of adding an extra argument ntimes. This argument is also used for the color scheme.
brownian <- function(T, N, ntimes){
alpha <- 0
sigma <- 1
delta_t <- T/N
t <- seq(0, T, by = delta_t)
#x=c(0,alpha*delta_t+sigma*sqrt(delta_t)*rnorm(N,mean=0,sd=1))
Xt <- replicate(ntimes,
cumsum(c(0, alpha*delta_t+sqrt(delta_t)*rnorm(N, mean = 0, sd = 1)))
)
matplot(t, Xt,
type = "l", lty = 1,
col = seq_len(ntimes),
xlab = "t=[0,T]", ylab = "B(t,ω)")
}
set.seed(2020)
brownian(1, 1000, 5)
I want to plot linear-model-lines for each ID.
How can I create predictions for multiple lms (or glms) using sequences of different length? I tried:
#some fake data
res<-runif(60,1,20)
var<-runif(60,10,50)
ID<-rep(c("A","B","B","C","C","C"),10)
data<- data.frame(ID,res,var)
#lm
library(data.table)
dt <- data.table(data,key="ID")
fits <- lapply(unique(data$ID),
function(z)lm(res~var, data=dt[J(z),], y=T))
#sequence for each ID of length var(ID)
mins<-matrix(with(data, tapply(var,ID,min)))
mins1<-mins[,1]
maxs<-matrix(with(data,tapply(var,ID,max)))
maxs1<-maxs[,1]
my_var<-list()
for(i in 1:3){
my_var[[i]]<- seq(from=mins1[[i]],to=maxs1[[i]],by=1)
}
# predict on sequences
predslist<- list()
predslist[[i]] <- for(i in 1:3){
dat<-fits[[i]]
predict(dat,newdata= data.frame("var"= my_var,type= "response", se=TRUE))
}
predict results error
Plotting lm lines only for var[i] ranges works in ggplot:
library(ggplot2)
# create ID, x, y as coded by Matt
p <- qplot(x, y)
p + geom_smooth(aes(group=ID), method="lm", size=1, se=F)
Is something like this what you're after?
# generating some fake data
ID <- rep(letters[1:4],each=10)
x <- rnorm(40,mean=5,sd=10)
y <- as.numeric(as.factor(ID))*x + rnorm(40)
# plotting in base R
plot(x, y, col=as.factor(ID), pch=16)
# calling lm() and adding lines
lmlist <- lapply(sort(unique(ID)), function(i) lm(y[ID==i]~x[ID==i]))
for(i in 1:length(lmlist)) abline(lmlist[[i]], col=i)
Don't know if the plotting part is where you're stuck, but the abline() function will draw a least-squares line if you pass in an object returned from lm().
If you want the least-squares lines to begin & end with the min & max x values, here's a workaround. It's not pretty, but seems to work.
plot(x, y, col=as.factor(ID), pch=16)
IDnum <- as.numeric(as.factor(ID))
for(i in 1:length(lmlist)) lines(x[IDnum==i], predict(lmlist[[i]]), col=i)
I have a plot whose legend should contain two levels. Ggplot shows a legend with six levels, including four which do not appear in the data frame. A simple reproduction of the problem is shown below:
x <- seq(from=1, to=10, by=0.5)
y.2 <- x^2
y.3 <- x^3
exponent.2 <- 2
exponent.3 <- 3
data2 <- data.frame(x=x, y=y.2, exponent = exponent.2)
data3 <- data.frame(x=x, y=y.3, exponent = exponent.3)
data <- rbind(data2, data3)
p <- ggplot(data,aes(x,y,group=exponent, color=exponent)) + geom_line()
p
I am obviously doing something wrong, but need help in finding the problem.
ggplot2 interprets exponent as a continuous variable; thus it displays a number of breaks similarly to what pretty(c(2, 3)) would return.
You can use colour = factor(exponent), or specify explicitely the colour breaks.
Try
p <- ggplot(data,aes(x,y,group=factor(exponent), color=factor(exponent))) + geom_line()
Let's say I've got this dataframe with 2 levels. LC and HC.
Now i want to get 2 plots like below on top of eachother.
data <- data.frame(
welltype=c("LC","LC","LC","LC","LC","HC","HC","HC","HC","HC"),
value=c(1,2,1,2,1,5,4,5,4,5))
The code to get following plot =
x <- rnorm(1000)
y <- hist(x)
plot(y$breaks,
c(y$counts,0),
type="s",col="blue")
(with thanks to Joris Meys)
So, how do I even start on this. Since I'm used to java I was thinking of a for loop, but I've been told not to do it this way.
Next to the method provided by Aaron, there's a ggplot solution as well (see below),
but I would strongly advise you to use the densities, as they will give nicer plots and are a whole lot easier to construct :
# make data
wells <- c("LC","HC","BC")
Data <- data.frame(
welltype=rep(wells,each=100),
value=c(rnorm(100),rnorm(100,2),rnorm(100,3))
)
ggplot(Data,aes(value,fill=welltype)) + geom_density(alpha=0.2)
gives :
For the plot you requested :
# make hists dataframe
hists <- tapply(Data$value,Data$welltype,
function(i){
tmp <- hist(i)
data.frame(br=tmp$breaks,co=c(tmp$counts,0))
})
ll <- sapply(hists,nrow)
hists <- do.call(rbind,hists)
hists$fac <- rep(wells,ll)
# make plot
require(ggplot2)
qplot(br,co,data=hists,geom="step",colour=fac)
You can use the same code except with points instead of plot for adding additional lines to the plot.
Making up some data
set.seed(5)
d <- data.frame(x=c(rnorm(1000)+3, rnorm(1000)),
g=rep(1:2, each=1000) )
And doing it in a fairly straightforward way:
x1 <- d$x[d$g==1]
x2 <- d$x[d$g==2]
y1 <- hist(x1, plot=FALSE)
y2 <- hist(x2, plot=FALSE)
plot(y1$breaks, c(y1$counts,0), type="s",col="blue",
xlim=range(c(y1$breaks, y2$breaks)), ylim=range(c(0,y1$counts, y2$counts)))
points(y2$breaks, c(y2$counts,0), type="s", col="red")
Or in a more R-ish way:
col <- c("blue", "red")
ds <- split(d$x, d$g)
hs <- lapply(ds, hist, plot=FALSE)
plot(0,0,type="n",
ylim=range(c(0,unlist(lapply(hs, function(x) x$counts)))),
xlim=range(unlist(lapply(hs, function(x) x$breaks))) )
for(i in seq_along(hs)) {
points(hs[[i]]$breaks, c(hs[[i]]$counts,0), type="s", col=col[i])
}
EDIT: Inspired by Joris's answer, I'll note that lattice can also easily do overlapping density plots.
library(lattice)
densityplot(~x, group=g, data=d)