Using ggplot2 to plot geom_errorbar for Date in R - r

In many cases, we need to demonstrate the standard error. In ggplot2, we can do it using the geom_errorbar function. I find that when the x variable is of the Date type, ggplot2 could not plot the error bar completely. See the R script below for more information.
library(gcookbook) # For the data set
# Take a subset of the cabbage_exp data for this example
ce <- subset(cabbage_exp, Cultivar == "c39")
# With a line graph
p1 = ggplot(ce, aes(x=Date, y=Weight)) +
geom_line(aes(group=1)) +
geom_point(size=4) +
geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se), width=.2)
ce$Date = as.Date(c('01/01/2001', '01/01/2002', '01/01/2003'), "%m/%d/%Y")
p2 = ggplot(ce, aes(x=Date, y=Weight)) +
geom_line(aes(group=1)) +
geom_point(size=4) +
geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se), width=.2)
p1
p2

Simply following RHA's directions (code below). #RHA, please feel free to copy my answer into a new one as it's more yours then it's mine.
# install.packages("gcookbook", dependencies = TRUE)
library(gcookbook) # For the data set
# Take a subset of the cabbage_exp data for this example
ce <- subset(cabbage_exp, Cultivar == "c39")
# With a line graph
# install.packages("ggplot2", dependencies = TRUE)
require(ggplot2)
ce$Date = as.Date(c('01/01/2001', '01/01/2002', '01/01/2003'), "%m/%d/%Y")
(p2 = ggplot(ce, aes(x=Date, y=Weight)) +
geom_line(aes(group=1)) +
geom_point(size=4) +
geom_errorbar(aes(ymin = Weight- se, ymax= Weight + se), width=45)))

Related

Why the R script provide by site pubmed is not executing ? Is it possible to make it run?

If possible, I need help to understand why the code below is not working. This code I was found on the page: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3817376/. Would it be possible for any expert member to adapt it to work?
library(ggplot2)
library(nlme)
head(Theoph)
ggplot(data=Theoph, aes(x=Time, y=conc, group=Subject)) + geom_line() + labs(x=“Time (hr)”, y=“Concentration (mg/L)”)
p <- ggplot(data=Theoph, aes(x=Time, y=conc, group=Subject)) + geom_line() + labs(x=“Time (hr)”, y=“Concentration (mg/L)”) + stat_summary(fun.y=median, geom=“line”,aes(x=ntpd, y=conc, group=1), color=“red”, size=1)
print(p) # “p” is a ggplot object
# create a flag for body weight
Theoph$WT <- ifelse(Theoph$Wt<70, “WT < 70kg”, “WT >= 70kg”)
p + facet_grid(.~WT)""t>
There are a couple things to help you run this.
First, you have curly/smart quotes “ in your code, and should just use plain quotes ". Sometimes we get this excess formatting when we copy/paste code from other sources like this.
Second, you need to use the supplementary materials to calculate ntpd, add to the Theoph dataset.
Below is code that seemed to work at my end to reproduce the spaghetti plots.
library(ggplot2)
library(nlme)
# Reference:
# https://ascpt.onlinelibrary.wiley.com/doi/10.1038/psp.2013.56
head(Theoph)
ggplot(data = Theoph, aes(x = Time, y = conc, group = Subject)) +
geom_line() +
labs(x = "Time (hr)", y = "Concentration (mg/L)")
##################################################################################
## we need some data manipulation for Figure 1(e) and Figure (f)
## below code is how to calculate approximate ntpd (nominal post time dose)
## "ntpd" is used for summarizing conc data (calculate mean at each time point)
## create body weight category for <70 kg or >=70 kg
##################################################################################
#--create a cut (time intervals)
Theoph$cut <- cut(Theoph$Time, breaks=c(-0.1,0,1,1.5, 2,3,4,6,8,12,16,20,24))
#--make sure each time point has reasonable data
table(Theoph$cut)
#--calcuate approximate ntpd
library(plyr)
tab <- ddply(Theoph, .(cut), summarize, ntpd=round(mean(Time, na.rm=T),2))
#--merge ntpd into Theoph data
Theoph <- merge(Theoph, tab, by=c("cut"), all.x=T)
#--sort the data by Subject and Time, select only nessesary columns
Theoph <- Theoph[order(Theoph$Subject, Theoph$Time),c("Subject","Wt","Dose","Time","conc","ntpd")]
#--create body weight category for <70 kg or >=70 kg for Figure 1(f)
Theoph$WT <- ifelse(Theoph$Wt<70, "WT < 70kg", "WT >= 70kg")
#--end of data manipulation
##################################################################################
p <- ggplot(data = Theoph, aes(x=Time, y=conc, group=Subject)) +
geom_line() +
labs(x="Time (hr)", y="Concentration (mg/L)") +
stat_summary(fun = median, geom = "line", aes(x = ntpd, y = conc, group = 1), color = "red", size=1)
print(p)
p + facet_grid(. ~ WT)

Barplot with ggplot2 using data from read.csv2

I am trying to create a barplot with the ggplot2 library. My data is stored in read.csv2 format.
# Library
library(ggplot2)
library(tidyverse) # function "%>%"
# 1. Read data (comma separated)
data = read.csv2(text = "Age;Frequency
0 - 10;1
11 - 20;5
21 - 30;20
31 - 40;13
41 - 49;1")
# 2. Print table
df <- as.data.frame(data)
df
# 3. Plot bar chart
ggplot(df, aes(x = Age)) +
geom_bar() +
theme_classic()
The code runs fine, but it produces a graph that looks like all data are at max all the time.
You need to specify your y axis as well:
ggplot(df, aes(x = Age, y = Frequency)) +
geom_bar(stat = "identity") +
theme_classic()
The default value of geom_bar plots the frequency of the values which is 1 for all the Age values here (Check table(df$Age)). You may use geom_bar with stat = 'identity'
library(ggplot2)
ggplot(df, aes(Age, Frequency)) +
geom_bar(stat = 'identity') +
theme_classic()
OR geom_col :
ggplot(df, aes(Age, Frequency)) +
geom_col() +
theme_classic()

Merging two plots in ggplot2 and keeping individual features of the plots (geom_text, geom_vline)

I have two geom_line time series with some geom_text & geom_points on each plot.
One of the two plots additionally has geom_vline. I wonder if it is possible to merge two but i failed to get a solution.
Here are the two plots:
require(zoo)
require(ggplot2)
set.seed(10)
# plot 1:
tmp1 <- xts(cumsum(rnorm(5000,1,10)), Sys.Date()-5000:1)
data.tmp1 = data.frame(date=as.Date(index(tmp1)),
value=drop(coredata(tmp1)))
data.tmp1.year.end = data.frame(date=as.Date(index(tmp1[endpoints(tmp1, "years", 1)])),
value= drop(coredata(tmp1[endpoints(tmp1, "years", 1)])))
plot1 =
ggplot(data.tmp1, aes(x=date, y=value)) +
geom_line(aes(y=value), size=1) +
geom_point(data=data.tmp1.year.end, col="red") +
geom_text(data=data.tmp1.year.end, label=data.tmp1.year.end$value, vjust=0, hjust=1)
# plot 2:
tmp2 <- xts(cumsum(rnorm(5000,1,100)), Sys.Date()-5000:1)
data.tmp2 = data.frame(date=as.Date(index(tmp2)),
value=drop(coredata(tmp2)))
data.tmp2.year.end = data.frame(date=as.Date(index(tmp2[endpoints(tmp2, "years", 1)])),
value= drop(coredata(tmp2[endpoints(tmp2, "years", 1)])))
tmp2.date =as.Date(c("2008-01-01"))
plot2 =
ggplot(data.tmp2, aes(x=date, y=value)) +
geom_line(aes(y=value), size=1) +
geom_point(data=data.tmp2.year.end, col="red") +
geom_vline(xintercept=as.numeric(tmp2.date), linetype="dotted") +
geom_text(data=data.tmp2.year.end, label=data.tmp2.year.end$value, vjust=0, hjust=1)
The goal now is that plot1 and plot2 share one xaxis and all features of the individual graphs are kept in the corresponding plot.
The result should look like this:
You might try combining your daily data sets and your year end data sets into single data frames and then using ggplot's faceting to display on a single date axis. Code could look like:
data.tmp1 <- cbind(data.tmp1, data_name="tmp1")
data.tmp1.year.end <- cbind(data.tmp1.year.end, data_name="tmp1")
data.tmp2 <- cbind(data.tmp2, data_name="tmp2")
data.tmp2.year.end <- cbind(data.tmp2.year.end, data_name="tmp2")
data.tmp <- rbind(data.tmp1,data.tmp2)
data.tmp.year.end <- rbind(data.tmp1.year.end, data.tmp2.year.end)
ggplot(data.tmp, aes(x=date, y=value)) +
geom_line(aes(y=value), size=1) +
geom_point(data=data.tmp.year.end, col="red") +
geom_text(data=data.tmp.year.end, aes(label=data.tmp.year.end$value), vjust=0, hjust=1) +
geom_vline(xintercept=as.numeric(tmp2.date), linetype="dotted") +
facet_grid( data_name ~ . , scales="free_y")
which gives the chart

Adjusting y axis origin for stacked geom_bar in ggplot2

I want to plot stacked bar plot with ggplot2 and am having difficulty getting the colour mapping and stack order right. The following example has been developed from this SO answer to achieve a non-zero y-axis origin, but as you can see it creates other problems. The colours don't map properly and the plot order is wrong. Grateful for any pointers on the best way to handle this. The desired output should scale colours as per the factor levels for rating, with colours in the order specified.
require(ggplot2)
d = data.frame(grp = rep(c('A','B'), each = 4),
rating = rep(c('bad','poor','ok','good'), 2),
value = c(15,45,35,5,5,15,55,30), stringsAsFactors = F)
if(require(reshape2)) reshape2::dcast(d, grp ~ rating) # show structure
d$rating = ordered(d$rating, levels=c('bad','poor','ok','good'))
d$grp = ordered(d$grp, levels=c('B','A'))
# split datsets so we can plot 'negative' bars
d1 = subset(d, rating %in% c('ok','good'))
d2 = subset(d, rating %in% c('poor','bad'))
ggplot() +
geom_bar(data = d1, aes(x=grp, y=value, fill=rating), stat='identity', position='stack') +
geom_bar(data = d2, aes(x=grp, y=-value, fill=rating), stat='identity', position='stack') +
scale_fill_manual(values=c('red','pink','lightgreen','green')) +
geom_line(data=d1, aes(x=c(.5,2.5), y=c(0,0)), size=2, linetype='dashed') +
coord_flip()
Perhaps a bit of reordering and using limits() will help:
d2 <- d2[order(d2$rating, decreasing =T),]
ggplot() +
geom_bar(data = d1, aes(x=grp, y=value, fill=rating), stat='identity',
position='stack') +
geom_bar(data = d2, aes(x=grp, y=-value, fill=rating), stat='identity',
position='stack') +
scale_fill_manual(values=c('red','pink','lightgreen','green'),
limits=c("bad","poor","ok","good"))+
geom_line(data=d1, aes(x=c(.5,2.5), y=c(0,0)), size=2, linetype='dashed') +
coord_flip()
For anyone who wishes to learn ggplot2, I strongly recommend getting the Winston Chang's R Graphics Cookbook.

How to highlight an item of time-series in a ggplot2 plot

I wish to highlight segments above or below a certain value in a time series by a unique colour or a shape. In the example data I am decomposing a mortality time series into its components. My goal is to highlight the segments when the mortality in the trend component falls below 35 (deep between 1997 and 2000) and when the residual component is above 100 (the spike). I have tried to use annotate, but that did not produce what I wanted.
#Load library and obtain data
library(gamair)
library(tsModel)
library(ggplot2)
library(reshape2)
data<-data(chicago)
## create variables, decompose TS
chicago$date<-seq(from=as.Date("1987-01-01"), to=as.Date("2000-12-31"),length=5114)
data<- chicago[,c("date","death")]
mort <- tsdecomp(data$death, c(1, 2, 15, 5114))
## Convert matrix to df, rename, melt
df<-as.data.frame(mort)
names(df)[1] <- "Trend"
names(df)[2] <- "Seasonal"
names(df)[3] <- "Residual"
df$date<-seq(as.Date("1987-01-01"), as.Date("2000-12-31"), "day")
meltdf <- melt(df,id="date")
## Plot
ggplot(meltdf,aes(x=date,y=value,colour=variable,group=variable)) + geom_line() +
theme_bw() +
ylab("") + xlab("") +
facet_grid(variable ~ . , scales = "free") +
theme(legend.position = "none")
annotate("rect", xmin=1995-01-01,xmax=1996-01-01,ymin= 10, ymax=300, alpha = .2,fill="blue")
Well, this works but I must admit it's more work that I'd hoped.
get.box <- function(data) {
rng <- range(data$date) + c(-50,50)
z <- meltdf[meltdf$date>=rng[1] & meltdf$date <=rng[2] & meltdf$variable==unique(data$variable),]
data.frame(variable=unique(z$variable),
xmin=min(z$date),xmax=max(z$date),ymin=min(z$value),ymax=max(z$value))
}
hilight.trend <- get.box(with(meltdf,meltdf[variable=="Trend" & value<35,]))
hilight.resid <- get.box(with(meltdf,meltdf[variable=="Residual" & value>100,]))
ggplot(meltdf,aes(colour=variable,group=variable)) +
geom_line(aes(x=date,y=value)) +
theme_bw() +
ylab("") + xlab("") +
facet_grid(variable ~ . , scales = "free") +
theme(legend.position = "none") +
geom_rect(data=hilight.trend, alpha=0.2, fill="red",
aes(xmax=xmax,xmin=xmin,ymax=ymax,ymin=ymin)) +
geom_rect(data=hilight.resid, alpha=0.2, fill="blue",
aes(xmax=xmax,xmin=xmin,ymax=ymax,ymin=ymin))
You can't really use annotate(...) with facets, because you will get the same annotation on all the facets. So you're left with something like geom_rect(...). The problem here is that geom_rect(...) draws a rectangle for every row in the data. So you need to create an auxiliary dataset with just one row for each variable, containing the x- and y- min and max.

Resources