Plot data from several columns of data frame using plyr and ggplot2 - r

I have a data frame with 124 columns and observations. Part of it would be something like:
date <- c("2014-01-03", "2014-05-03","2014-02-04")
App <- c(0,2,4)
Email <- c(1,5,0)
Print <- c(0,0,1)
mgt <- c(1,9,12)
df<- data.frame (date, App, Email, Print, mgt)
I want to plot App against date, then Email against date, then Print against date etc in different plots. I am trying to use plyr and ggplot2 to output these plots and have come up with:
Plots <- function (Y){print(ggplot(df, aes(x=date, y= Y)) + geom_line() +
scale_x_date(breaks = date_breaks('month'), label= date_format('%b-%Y')) +
labs(title="A", x="Date Issued", y="Number of tickets issued")+
theme_bw()) }
ServicePlots <- d_ply (df, col , Plots, .print=TRUE)
The packages lubridate, chron and scales are also being used in the plots. However, this does not seem to work at all. Could someone please point out what I am doing wrong? And maybe help me out a bit?

Have no idea what plyr has to do with anything here just melt the data and plot it as is:
library(reshape2)
library(ggplot2)
library(scales)
df <- melt(df)
ggplot(df, aes(as.Date(date), value)) +
geom_line(aes(group = 1)) +
scale_x_date(breaks = date_breaks('month')) +
facet_wrap( ~ variable, scales = "free") +
labs(title="A", x="Date Issued", y="Number of tickets issued") +
theme_bw()
If you want it in a function form, do
Plots <- function(x){
x <- melt(x)
ggplot(x, aes(as.Date(date), value)) +
geom_line(aes(group = 1)) +
scale_x_date(breaks = date_breaks('month')) +
facet_wrap( ~ variable, scales = "free") +
labs(title="A", x="Date Issued", y="Number of tickets issued") +
theme_bw()
}
Plots(df)

Following is not ggplot based but it works:
par(mfrow=c(1,3))
for(i in 2:5)
plot(mydf[,1], mydf[,i], main=colnames(mydf)[i])

Related

Why the R script provide by site pubmed is not executing ? Is it possible to make it run?

If possible, I need help to understand why the code below is not working. This code I was found on the page: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3817376/. Would it be possible for any expert member to adapt it to work?
library(ggplot2)
library(nlme)
head(Theoph)
ggplot(data=Theoph, aes(x=Time, y=conc, group=Subject)) + geom_line() + labs(x=“Time (hr)”, y=“Concentration (mg/L)”)
p <- ggplot(data=Theoph, aes(x=Time, y=conc, group=Subject)) + geom_line() + labs(x=“Time (hr)”, y=“Concentration (mg/L)”) + stat_summary(fun.y=median, geom=“line”,aes(x=ntpd, y=conc, group=1), color=“red”, size=1)
print(p) # “p” is a ggplot object
# create a flag for body weight
Theoph$WT <- ifelse(Theoph$Wt<70, “WT < 70kg”, “WT >= 70kg”)
p + facet_grid(.~WT)""t>
There are a couple things to help you run this.
First, you have curly/smart quotes “ in your code, and should just use plain quotes ". Sometimes we get this excess formatting when we copy/paste code from other sources like this.
Second, you need to use the supplementary materials to calculate ntpd, add to the Theoph dataset.
Below is code that seemed to work at my end to reproduce the spaghetti plots.
library(ggplot2)
library(nlme)
# Reference:
# https://ascpt.onlinelibrary.wiley.com/doi/10.1038/psp.2013.56
head(Theoph)
ggplot(data = Theoph, aes(x = Time, y = conc, group = Subject)) +
geom_line() +
labs(x = "Time (hr)", y = "Concentration (mg/L)")
##################################################################################
## we need some data manipulation for Figure 1(e) and Figure (f)
## below code is how to calculate approximate ntpd (nominal post time dose)
## "ntpd" is used for summarizing conc data (calculate mean at each time point)
## create body weight category for <70 kg or >=70 kg
##################################################################################
#--create a cut (time intervals)
Theoph$cut <- cut(Theoph$Time, breaks=c(-0.1,0,1,1.5, 2,3,4,6,8,12,16,20,24))
#--make sure each time point has reasonable data
table(Theoph$cut)
#--calcuate approximate ntpd
library(plyr)
tab <- ddply(Theoph, .(cut), summarize, ntpd=round(mean(Time, na.rm=T),2))
#--merge ntpd into Theoph data
Theoph <- merge(Theoph, tab, by=c("cut"), all.x=T)
#--sort the data by Subject and Time, select only nessesary columns
Theoph <- Theoph[order(Theoph$Subject, Theoph$Time),c("Subject","Wt","Dose","Time","conc","ntpd")]
#--create body weight category for <70 kg or >=70 kg for Figure 1(f)
Theoph$WT <- ifelse(Theoph$Wt<70, "WT < 70kg", "WT >= 70kg")
#--end of data manipulation
##################################################################################
p <- ggplot(data = Theoph, aes(x=Time, y=conc, group=Subject)) +
geom_line() +
labs(x="Time (hr)", y="Concentration (mg/L)") +
stat_summary(fun = median, geom = "line", aes(x = ntpd, y = conc, group = 1), color = "red", size=1)
print(p)
p + facet_grid(. ~ WT)

Display Greek symbols and charge facet titles at the same time with ggplot

I know how to modify titles in ggplot without altering the original data. Suppose I have the following data frame and I want to change the labels. Then, I would do so in the following way
df <- data.frame(x = 1:4, y = 1:4, label = c(c("params[1]", "params[2]", "params[3]",
"params[4]")))
params_names <- list(
'params[1]'= "beta[11]",
'params[2]'= "beta[22]",
'params[3]'= "beta[33]",
'params[4]'= "beta[44]"
)
param_labeller <- function(variable, value){
params_names[value]
}
ggplot(df, aes(x=x,y=y)) +
geom_point() +
facet_grid(~label, labeller = param_labeller)
If I wanted to display the subscripts, I would just do this
ggplot(df, aes(x=x,y=y)) +
geom_point() +
facet_grid(~label, labeller = label_parsed)
How do I apply both operations at the same time?
I don't know exactly if this conflicts with you not wanting to "alter" the original data, but you add the labelling information to the factor itself:
df$label2 <- factor(df$label,
labels = c("beta[4]", "beta[24]", "beta[42]", "beta[43]"))
ggplot(df, aes(x = x, y = y)) +
geom_point() +
facet_grid( ~ label2, labeller = label_parsed)
This produces the following plot:
Plot with formatted facet labels

Merging two plots in ggplot2 and keeping individual features of the plots (geom_text, geom_vline)

I have two geom_line time series with some geom_text & geom_points on each plot.
One of the two plots additionally has geom_vline. I wonder if it is possible to merge two but i failed to get a solution.
Here are the two plots:
require(zoo)
require(ggplot2)
set.seed(10)
# plot 1:
tmp1 <- xts(cumsum(rnorm(5000,1,10)), Sys.Date()-5000:1)
data.tmp1 = data.frame(date=as.Date(index(tmp1)),
value=drop(coredata(tmp1)))
data.tmp1.year.end = data.frame(date=as.Date(index(tmp1[endpoints(tmp1, "years", 1)])),
value= drop(coredata(tmp1[endpoints(tmp1, "years", 1)])))
plot1 =
ggplot(data.tmp1, aes(x=date, y=value)) +
geom_line(aes(y=value), size=1) +
geom_point(data=data.tmp1.year.end, col="red") +
geom_text(data=data.tmp1.year.end, label=data.tmp1.year.end$value, vjust=0, hjust=1)
# plot 2:
tmp2 <- xts(cumsum(rnorm(5000,1,100)), Sys.Date()-5000:1)
data.tmp2 = data.frame(date=as.Date(index(tmp2)),
value=drop(coredata(tmp2)))
data.tmp2.year.end = data.frame(date=as.Date(index(tmp2[endpoints(tmp2, "years", 1)])),
value= drop(coredata(tmp2[endpoints(tmp2, "years", 1)])))
tmp2.date =as.Date(c("2008-01-01"))
plot2 =
ggplot(data.tmp2, aes(x=date, y=value)) +
geom_line(aes(y=value), size=1) +
geom_point(data=data.tmp2.year.end, col="red") +
geom_vline(xintercept=as.numeric(tmp2.date), linetype="dotted") +
geom_text(data=data.tmp2.year.end, label=data.tmp2.year.end$value, vjust=0, hjust=1)
The goal now is that plot1 and plot2 share one xaxis and all features of the individual graphs are kept in the corresponding plot.
The result should look like this:
You might try combining your daily data sets and your year end data sets into single data frames and then using ggplot's faceting to display on a single date axis. Code could look like:
data.tmp1 <- cbind(data.tmp1, data_name="tmp1")
data.tmp1.year.end <- cbind(data.tmp1.year.end, data_name="tmp1")
data.tmp2 <- cbind(data.tmp2, data_name="tmp2")
data.tmp2.year.end <- cbind(data.tmp2.year.end, data_name="tmp2")
data.tmp <- rbind(data.tmp1,data.tmp2)
data.tmp.year.end <- rbind(data.tmp1.year.end, data.tmp2.year.end)
ggplot(data.tmp, aes(x=date, y=value)) +
geom_line(aes(y=value), size=1) +
geom_point(data=data.tmp.year.end, col="red") +
geom_text(data=data.tmp.year.end, aes(label=data.tmp.year.end$value), vjust=0, hjust=1) +
geom_vline(xintercept=as.numeric(tmp2.date), linetype="dotted") +
facet_grid( data_name ~ . , scales="free_y")
which gives the chart

How to highlight an item of time-series in a ggplot2 plot

I wish to highlight segments above or below a certain value in a time series by a unique colour or a shape. In the example data I am decomposing a mortality time series into its components. My goal is to highlight the segments when the mortality in the trend component falls below 35 (deep between 1997 and 2000) and when the residual component is above 100 (the spike). I have tried to use annotate, but that did not produce what I wanted.
#Load library and obtain data
library(gamair)
library(tsModel)
library(ggplot2)
library(reshape2)
data<-data(chicago)
## create variables, decompose TS
chicago$date<-seq(from=as.Date("1987-01-01"), to=as.Date("2000-12-31"),length=5114)
data<- chicago[,c("date","death")]
mort <- tsdecomp(data$death, c(1, 2, 15, 5114))
## Convert matrix to df, rename, melt
df<-as.data.frame(mort)
names(df)[1] <- "Trend"
names(df)[2] <- "Seasonal"
names(df)[3] <- "Residual"
df$date<-seq(as.Date("1987-01-01"), as.Date("2000-12-31"), "day")
meltdf <- melt(df,id="date")
## Plot
ggplot(meltdf,aes(x=date,y=value,colour=variable,group=variable)) + geom_line() +
theme_bw() +
ylab("") + xlab("") +
facet_grid(variable ~ . , scales = "free") +
theme(legend.position = "none")
annotate("rect", xmin=1995-01-01,xmax=1996-01-01,ymin= 10, ymax=300, alpha = .2,fill="blue")
Well, this works but I must admit it's more work that I'd hoped.
get.box <- function(data) {
rng <- range(data$date) + c(-50,50)
z <- meltdf[meltdf$date>=rng[1] & meltdf$date <=rng[2] & meltdf$variable==unique(data$variable),]
data.frame(variable=unique(z$variable),
xmin=min(z$date),xmax=max(z$date),ymin=min(z$value),ymax=max(z$value))
}
hilight.trend <- get.box(with(meltdf,meltdf[variable=="Trend" & value<35,]))
hilight.resid <- get.box(with(meltdf,meltdf[variable=="Residual" & value>100,]))
ggplot(meltdf,aes(colour=variable,group=variable)) +
geom_line(aes(x=date,y=value)) +
theme_bw() +
ylab("") + xlab("") +
facet_grid(variable ~ . , scales = "free") +
theme(legend.position = "none") +
geom_rect(data=hilight.trend, alpha=0.2, fill="red",
aes(xmax=xmax,xmin=xmin,ymax=ymax,ymin=ymin)) +
geom_rect(data=hilight.resid, alpha=0.2, fill="blue",
aes(xmax=xmax,xmin=xmin,ymax=ymax,ymin=ymin))
You can't really use annotate(...) with facets, because you will get the same annotation on all the facets. So you're left with something like geom_rect(...). The problem here is that geom_rect(...) draws a rectangle for every row in the data. So you need to create an auxiliary dataset with just one row for each variable, containing the x- and y- min and max.

Varying factor order in each facet of ggplot2

I am trying to create a Cleveland Dot Plot given for two categories in this case J and K. The problem is the elements A,B,C are in both categories so R keeps farting. I have made a simple example:
x <- c(LETTERS[1:10],LETTERS[1:3],LETTERS[11:17])
type <- c(rep("J",10),rep("K",10))
y <- rnorm(n=20,10,2)
data <- data.frame(x,y,type)
data
data$type <- as.factor(data$type)
nameorder <- data$x[order(data$type,data$y)]
data$x <- factor(data$x,levels=nameorder)
ggplot(data, aes(x=y, y=x)) +
geom_segment(aes(yend=x), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=type)) +
scale_colour_brewer(palette="Set1", limits=c("J","K"), guide=FALSE) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(type ~ ., scales="free_y", space="free_y")
Ideally, I would want a dot plot for both categories(J,K) individually with each factor(vector x) decreasing with respect to the y vector. What ends up happening is that both categories aren't going from biggest to smallest and are erratic at the end instead. Please help!
Unfortunately factors can only have one set of levels. The only way i've found to do this is actually to create two separate data.frames from your data and re-level the factor in each. For example
data <- data.frame(
x = c(LETTERS[1:10],LETTERS[1:3],LETTERS[11:17]),
y = rnorm(n=20,10,2),
type= c(rep("J",10),rep("K",10))
)
data$type <- as.factor(data$type)
J<-subset(data, type=="J")
J$x <- reorder(J$x, J$y, max)
K<-subset(data, type=="K")
K$x <- reorder(K$x, K$y, max)
Now we can plot them with
ggplot(mapping = aes(x=y, y=x, xend=0, yend=x)) +
geom_segment(data=J, colour="grey50") +
geom_point(data=J, size=3, aes(colour=type)) +
geom_segment(data=K, colour="grey50") +
geom_point(data=K, size=3, aes(colour=type)) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(type ~ ., scales="free_y", space="free_y")
which results in

Resources