im using a loop to plot histograms of monthly air quality data, which grouped by year using the facet_grid() function. in my plots i have a mean line of mean of the month of all years, and i would like it to be the mean by month per year.
my code is:
for (z in vec) {
df.g <- pol %>% filter(poluentes==z)
df.g$year <- as.character(df.g$year)
df.g$month<- as.character(df.g$month)
mu <- ddply(df.g, "month", summarise, grp.mean=mean(value)) # mean line
print(ggplot(df.g, aes(x=value, fill=month, color=month)) +
geom_histogram(position="identity", alpha=0.2) +
labs(title=z,x="µg/m3", caption = "Análise: poluente") +
geom_vline(data=mu, aes(xintercept=grp.mean, color=month),
linetype="dashed") + facet_grid(year ~.))
}
the output is:
and as you can see, the mean line is the same for the 3 histograms
Your calculations of means need to include the year as well:
set.seed(111)
df.g = data.frame(year = sample(18:20,1000,replace=TRUE),
month = factor(sample(3:4,1000,replace=TRUE)),
value = rnbinom(1000,mu=50,size=1))
mu = aggregate(df.g$value,list(month=df.g$month,year=df.g$year),mean)
Then pass it:
ggplot(df.g,aes(x=value,fill=month,col=month)) +
geom_histogram(bins=20,position="identity", alpha=0.2) +
facet_grid(year ~ .) +
geom_vline(data = mu,aes(xintercept = x,col=month))
Related
If possible, I need help to understand why the code below is not working. This code I was found on the page: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3817376/. Would it be possible for any expert member to adapt it to work?
library(ggplot2)
library(nlme)
head(Theoph)
ggplot(data=Theoph, aes(x=Time, y=conc, group=Subject)) + geom_line() + labs(x=“Time (hr)”, y=“Concentration (mg/L)”)
p <- ggplot(data=Theoph, aes(x=Time, y=conc, group=Subject)) + geom_line() + labs(x=“Time (hr)”, y=“Concentration (mg/L)”) + stat_summary(fun.y=median, geom=“line”,aes(x=ntpd, y=conc, group=1), color=“red”, size=1)
print(p) # “p” is a ggplot object
# create a flag for body weight
Theoph$WT <- ifelse(Theoph$Wt<70, “WT < 70kg”, “WT >= 70kg”)
p + facet_grid(.~WT)""t>
There are a couple things to help you run this.
First, you have curly/smart quotes “ in your code, and should just use plain quotes ". Sometimes we get this excess formatting when we copy/paste code from other sources like this.
Second, you need to use the supplementary materials to calculate ntpd, add to the Theoph dataset.
Below is code that seemed to work at my end to reproduce the spaghetti plots.
library(ggplot2)
library(nlme)
# Reference:
# https://ascpt.onlinelibrary.wiley.com/doi/10.1038/psp.2013.56
head(Theoph)
ggplot(data = Theoph, aes(x = Time, y = conc, group = Subject)) +
geom_line() +
labs(x = "Time (hr)", y = "Concentration (mg/L)")
##################################################################################
## we need some data manipulation for Figure 1(e) and Figure (f)
## below code is how to calculate approximate ntpd (nominal post time dose)
## "ntpd" is used for summarizing conc data (calculate mean at each time point)
## create body weight category for <70 kg or >=70 kg
##################################################################################
#--create a cut (time intervals)
Theoph$cut <- cut(Theoph$Time, breaks=c(-0.1,0,1,1.5, 2,3,4,6,8,12,16,20,24))
#--make sure each time point has reasonable data
table(Theoph$cut)
#--calcuate approximate ntpd
library(plyr)
tab <- ddply(Theoph, .(cut), summarize, ntpd=round(mean(Time, na.rm=T),2))
#--merge ntpd into Theoph data
Theoph <- merge(Theoph, tab, by=c("cut"), all.x=T)
#--sort the data by Subject and Time, select only nessesary columns
Theoph <- Theoph[order(Theoph$Subject, Theoph$Time),c("Subject","Wt","Dose","Time","conc","ntpd")]
#--create body weight category for <70 kg or >=70 kg for Figure 1(f)
Theoph$WT <- ifelse(Theoph$Wt<70, "WT < 70kg", "WT >= 70kg")
#--end of data manipulation
##################################################################################
p <- ggplot(data = Theoph, aes(x=Time, y=conc, group=Subject)) +
geom_line() +
labs(x="Time (hr)", y="Concentration (mg/L)") +
stat_summary(fun = median, geom = "line", aes(x = ntpd, y = conc, group = 1), color = "red", size=1)
print(p)
p + facet_grid(. ~ WT)
I am trying to draw a grouped density plot and add the mean line of each plot; here is the code
data <- data.frame(
Accuracy=abs(rnorm(140)),
Species=c(rep("A.All",20),rep("B. double",60),rep("C.single",60),
rep("D.All",20),rep("E.double",60),rep("F.single",60)),
Modality=c(rep("All,w0",10),rep("double1,w0",10),rep("double2,w0",10),rep("double3,w0",10),
rep("single1,w0",10),rep("single2,w0",10),rep("single3,w0",10),
rep("All,w2",10),rep("double1,w2",10),rep("double2,w2",10),rep("double3,w2",10),
rep("single1,w2",10),rep("single2,w2",10),rep("single3,w2",10))
)
p<-ggplot(data, aes(x=Accuracy, fill=Modality)) +
geom_density(alpha=0.4)+
facet_wrap(. ~ Species) +
xlab("Accuracy") + ylab("Density")
library(plyr)
mu <- ddply(data, "Modality", summarise, grp.mean=mean(Accuracy))
head(mu)
# Add mean lines
a<-p+geom_vline(data=mu, aes(xintercept=grp.mean, color=Modality),
linetype="dashed")+ xlab("Accuracy") + ylab("Density")
However, based on the output figure as
The mean lines are absolutely incorrect, e.g. for the first picture on the top left, there should be two lines for two density plots, but a couple of lines are created and being repeated for all the figures.
You may specify both Species and Modality
plyr
dummy <- ddply(data, c("Species","Modality"), summarise, grp.mean=mean(Accuracy))
ggplot(data, aes(x=Accuracy, fill=Modality)) +
geom_density(alpha=0.4)+
facet_wrap(. ~ Species) +
xlab("Accuracy") + ylab("Density") +
geom_vline(data = dummy, aes(xintercept = grp.mean, color = Modality))
dplyr
library(dplyr)
dummy <- data %>%
group_by(Species, Modality) %>%
summarize(mean = mean(Accuracy))
ggplot(data, aes(x=Accuracy, fill=Modality)) +
geom_density(alpha=0.4)+
facet_wrap(. ~ Species) +
xlab("Accuracy") + ylab("Density") +
geom_vline(data = dummy, aes(xintercept = mean, color = Modality))
I've poked around, but been unable to find an answer. I want to do a weighted geom_bar plot overlaid with a vertical line that shows the overall weighted average per facet. I'm unable to make this happen. The vertical line seems to a single value applied to all facets.
require('ggplot2')
require('plyr')
# data vectors
panel <- c("A","A","A","A","A","A","B","B","B","B","B","B","B","B","B","B")
instrument <-c("V1","V2","V1","V1","V1","V2","V1","V1","V2","V1","V1","V2","V1","V1","V2","V1")
cost <- c(1,4,1.5,1,4,4,1,2,1.5,1,2,1.5,2,1.5,1,2)
sensitivity <- c(3,5,2,5,5,1,1,2,3,4,3,2,1,3,1,2)
# put an initial data frame together
mydata <- data.frame(panel, instrument, cost, sensitivity)
# add a "contribution to" vector to the data frame: contribution of each instrument
# to the panel's weighted average sensitivity.
myfunc <- function(cost, sensitivity) {
return(cost*sensitivity/sum(cost))
}
mydata <- ddply(mydata, .(panel), transform, contrib=myfunc(cost, sensitivity))
# two views of each panels weighted average; should be the same numbers either way
ddply(mydata, c("panel"), summarize, wavg=weighted.mean(sensitivity, cost))
ddply(mydata, c("panel"), summarize, wavg2=sum(contrib))
# plot where each panel is getting its overall cost-weighted sensitivity from. Also
# put each panel's weighted average on the plot as a simple vertical line.
#
# PROBLEM! I don't know how to get geom_vline to honor the facet breakdown. It
# seems to be computing it overall the data and showing the resulting
# value identically in each facet plot.
ggplot(mydata, aes(x=sensitivity, weight=contrib)) +
geom_bar(binwidth=1) +
geom_vline(xintercept=sum(contrib)) +
facet_wrap(~ panel) +
ylab("contrib")
If you pass in the presumarized data, it seems to work:
ggplot(mydata, aes(x=sensitivity, weight=contrib)) +
geom_bar(binwidth=1) +
geom_vline(data = ddply(mydata, "panel", summarize, wavg = sum(contrib)), aes(xintercept=wavg)) +
facet_wrap(~ panel) +
ylab("contrib") +
theme_bw()
Example using dplyr and facet_wrap incase anyone wants it.
library(dplyr)
library(ggplot2)
df1 <- mutate(iris, Big.Petal = Petal.Length > 4)
df2 <- df1 %>%
group_by(Species, Big.Petal) %>%
summarise(Mean.SL = mean(Sepal.Length))
ggplot() +
geom_histogram(data = df1, aes(x = Sepal.Length, y = ..density..)) +
geom_vline(data = df2, mapping = aes(xintercept = Mean.SL)) +
facet_wrap(Species ~ Big.Petal)
vlines <- ddply(mydata, .(panel), summarize, sumc = sum(contrib))
ggplot(merge(mydata, vlines), aes(sensitivity, weight = contrib)) +
geom_bar(binwidth = 1) + geom_vline(aes(xintercept = sumc)) +
facet_wrap(~panel) + ylab("contrib")
I wish to highlight segments above or below a certain value in a time series by a unique colour or a shape. In the example data I am decomposing a mortality time series into its components. My goal is to highlight the segments when the mortality in the trend component falls below 35 (deep between 1997 and 2000) and when the residual component is above 100 (the spike). I have tried to use annotate, but that did not produce what I wanted.
#Load library and obtain data
library(gamair)
library(tsModel)
library(ggplot2)
library(reshape2)
data<-data(chicago)
## create variables, decompose TS
chicago$date<-seq(from=as.Date("1987-01-01"), to=as.Date("2000-12-31"),length=5114)
data<- chicago[,c("date","death")]
mort <- tsdecomp(data$death, c(1, 2, 15, 5114))
## Convert matrix to df, rename, melt
df<-as.data.frame(mort)
names(df)[1] <- "Trend"
names(df)[2] <- "Seasonal"
names(df)[3] <- "Residual"
df$date<-seq(as.Date("1987-01-01"), as.Date("2000-12-31"), "day")
meltdf <- melt(df,id="date")
## Plot
ggplot(meltdf,aes(x=date,y=value,colour=variable,group=variable)) + geom_line() +
theme_bw() +
ylab("") + xlab("") +
facet_grid(variable ~ . , scales = "free") +
theme(legend.position = "none")
annotate("rect", xmin=1995-01-01,xmax=1996-01-01,ymin= 10, ymax=300, alpha = .2,fill="blue")
Well, this works but I must admit it's more work that I'd hoped.
get.box <- function(data) {
rng <- range(data$date) + c(-50,50)
z <- meltdf[meltdf$date>=rng[1] & meltdf$date <=rng[2] & meltdf$variable==unique(data$variable),]
data.frame(variable=unique(z$variable),
xmin=min(z$date),xmax=max(z$date),ymin=min(z$value),ymax=max(z$value))
}
hilight.trend <- get.box(with(meltdf,meltdf[variable=="Trend" & value<35,]))
hilight.resid <- get.box(with(meltdf,meltdf[variable=="Residual" & value>100,]))
ggplot(meltdf,aes(colour=variable,group=variable)) +
geom_line(aes(x=date,y=value)) +
theme_bw() +
ylab("") + xlab("") +
facet_grid(variable ~ . , scales = "free") +
theme(legend.position = "none") +
geom_rect(data=hilight.trend, alpha=0.2, fill="red",
aes(xmax=xmax,xmin=xmin,ymax=ymax,ymin=ymin)) +
geom_rect(data=hilight.resid, alpha=0.2, fill="blue",
aes(xmax=xmax,xmin=xmin,ymax=ymax,ymin=ymin))
You can't really use annotate(...) with facets, because you will get the same annotation on all the facets. So you're left with something like geom_rect(...). The problem here is that geom_rect(...) draws a rectangle for every row in the data. So you need to create an auxiliary dataset with just one row for each variable, containing the x- and y- min and max.
I am trying to plot the change in a time series for each calendar year using ggplot and I am having problems with the fine control of the x-axis. If I do not use scale="free_x" then I end up with an x-axis that shows several years as well as the year in question, like this:
If I do use scale="free_x" then as one would expect I end up with tick labels for each plot, and that in some cases vary by plot, which I do not want:
I have made various attempts to define the x-axis using scale_x_date etc but without any success. My question is therefore:
Q. How can I control the x-axis breaks and labels on a ggplot facet grid so that the (time series) x-axis is identical for each facet, shows only at the bottom of the panel and is in the form of months formatted 1, 2, 3 etc or as 'Jan','Feb','Mar'?
Code follows:
require(lubridate)
require(ggplot2)
require(plyr)
# generate data
df <- data.frame(date=seq(as.Date("2009/1/1"), by="day", length.out=1115),price=runif(1115, min=100, max=200))
# remove weekend days
df <- df[!(weekdays(as.Date(df$date)) %in% c('Saturday','Sunday')),]
# add some columns for later
df$year <- as.numeric(format(as.Date(df$date), format="%Y"))
df$month <- as.numeric(format(as.Date(df$date), format="%m"))
df$day <- as.numeric(format(as.Date(df$date), format="%d"))
# calculate change in price since the start of the calendar year
df <- ddply(df, .(year), transform, pctchg = ((price/price[1])-1))
p <- ggplot(df, aes(date, pctchg)) +
geom_line( aes(group = 1, colour = pctchg),size=0.75) +
facet_wrap( ~ year, ncol = 2,scale="free_x") +
scale_y_continuous(formatter = "percent") +
opts(legend.position = "none")
print(p)
here is an example:
df <- transform(df, doy = as.Date(paste(2000, month, day, sep="/")))
p <- ggplot(df, aes(doy, pctchg)) +
geom_line( aes(group = 1, colour = pctchg),size=0.75) +
facet_wrap( ~ year, ncol = 2) +
scale_x_date(format = "%b") +
scale_y_continuous(formatter = "percent") +
opts(legend.position = "none")
p
Do you want this one?
The trick is to generate day of year of a same dummy year.
UPDATED
here is an example for the dev version (i.e., ggplot2 0.9)
p <- ggplot(df, aes(doy, pctchg)) +
geom_line( aes(group = 1, colour = pctchg), size=0.75) +
facet_wrap( ~ year, ncol = 2) +
scale_x_date(label = date_format("%b"), breaks = seq(min(df$doy), max(df$doy), "month")) +
scale_y_continuous(label = percent_format()) +
opts(legend.position = "none")
p