Related
I'm trying to produce a boxplot of some numeric outcome broken down by treatment condition and visit number, with the number of observations in each box placed under the plot, and the visit numbers labeled as well. Here's some fake data that will serve to illustrate, and I give two examples of things I've tried that didn't quite work.
library(ggplot2)
library(plyr)
trt <- factor(rep(LETTERS[1:2],150),ordered=TRUE)
vis <- factor(c(rep(1,150),rep(2,100),rep(3,50)),ordered=TRUE)
val <- rnorm(300)
data <- data.frame(trt,vis,val)
data.sum <- ddply(data, .(vis, trt), summarise,
N=length(na.omit(val)))
mytheme <- theme_bw() + theme(panel.margin = unit(0, "lines"), strip.background = element_blank())
The below code produces a plot that has N labels where I want them. It does this by grabbing summary data from an auxiliary dataset I created. However, I couldn't figure out how to also label visit on the x-axis (ideally, below the individual box labels), or to delineate visits visually in other ways (e.g. lines separating them into panels).
plot1 <- ggplot(data) +
geom_boxplot(aes(x=vis:trt,y=val,group=vis:trt,colour=trt), show.legend=FALSE) +
scale_x_discrete(labels=paste(data.sum$trt,data.sum$N,sep="\n")) +
labs(x="Visit") + mytheme
The plot below is closer to what I want than the one above, in that it has a nice hierarchy of treatments and visits, and a pretty format delineating the visits. However, for each panel it grabs the Ns from the first row in the summary data that matches the treatment condition, because it doesn't "know" that each facet needs to use the row corresponding to that visit.
plot2 <- ggplot(data) + geom_boxplot(aes(x=trt,y=val,group=trt,colour=trt), show.legend=FALSE) +
facet_wrap(~ vis, drop=FALSE, switch="x", nrow=1) +
scale_x_discrete(labels=paste(data.sum$trt,data.sum$N,sep="\n")) +
labs(x="Visit") + mytheme
One workaround is to manipulate your dataset so your x variable is the interaction between trt and N.
Working off what you already have, you can add N to the original dataset via a merge.
test = merge(data, data.sum)
Then make a new variable that is the combination of trt and N.
test = transform(test, trt2 = paste(trt, N, sep = "\n"))
Now make the plot, using the new trt2 variable on the x axis and using scales = "free_x" in facet_wrap to allow for the different labels per facet.
ggplot(test) +
geom_boxplot(aes(x = trt2, y = val, group = trt, colour = trt), show.legend = FALSE) +
facet_wrap(~ vis, drop = FALSE, switch="x", nrow = 1, scales = "free_x") +
labs(x="Visit") +
mytheme
Since this functionality isn't built in a good work-around is grid.extra:
library(gridExtra)
p1 <- ggplot(data[data$vis==1,]) + geom_boxplot(aes(x=trt,y=val,group=trt,colour=trt), show.legend=FALSE) +
#facet_wrap(~ vis, drop=FALSE, switch="x", nrow=1) +
scale_x_discrete(labels=lb[1:2]) + #paste(data.sum$trt,data.sum$N,sep="\n")
labs(x="Visit") + mytheme
p2 <- ggplot(data[data$vis==2,]) + geom_boxplot(aes(x=trt,y=val,group=trt,colour=trt), show.legend=FALSE) +
#facet_wrap(~ vis, drop=FALSE, switch="x", nrow=1) +
scale_x_discrete(labels=lb[3:4]) + #paste(data.sum$trt,data.sum$N,sep="\n")
labs(x="Visit") + mytheme
p3 <- ggplot(data[data$vis==3,]) + geom_boxplot(aes(x=trt,y=val,group=trt,colour=trt), show.legend=FALSE) +
#facet_wrap(~ vis, drop=FALSE, switch="x", nrow=1) +
scale_x_discrete(labels=lb[5:6]) + #paste(data.sum$trt,data.sum$N,sep="\n")
labs(x="Visit") + mytheme
grid.arrange(p1,p2,p3,nrow=1,ncol=3) # fully customizable
Related:
Varying axis labels formatter per facet in ggplot/R
You can also make them vertical or do other transformations:
I'm using ggplot2 to plot different time series (one for Alice, one for Bob, one for Eve), which have a different number of missing values.
require('ggplot2')
df3 <- data.frame(name=c(rep("Alice",10),rep("Bob",10),rep("Eve",10)),value=c(seq(1,10), seq(4,13), seq(5,14)), time=rep(seq(1,10),3))
df3$value[c(3,4,15,16,17,22,23,24,25)]<- NA
ggplot(data=df3, aes(time, value)) +
geom_line() +
geom_point() + facet_wrap(~ name, nrow=1)
I'd like to have the count of NAs displayed in each of the plots, e.g. as an overlay of a number (2 for Alice, 3 for Bob, 4 for Eve). Is there an elegant way to do this?
As #MLavoie suggested in the comments, generate a new dataframe for the text labels then work with that. This should work for your purposes:
require('ggplot2')
require('dplyr')
df3 <- data.frame(name=c(rep("Alice",10),rep("Bob",10),rep("Eve",10)),value=c(seq(1,10), seq(4,13), seq(5,14)), time=rep(seq(1,10),3))
df3$value[c(3,4,15,16,17,22,23,24,25)]<- NA
NAdf<-df3 %>%
group_by(name) %>%
summarise(ycoor=mean(value, na.rm=TRUE),
xcoor=mean(time, na.rm=TRUE),
num_NA=sum(is.na(value)))
ggplot(data=df3, aes(time, value)) +
geom_line() +
geom_point() +
geom_text(data=NAdf, aes(x=xcoor, y=ycoor, label=paste(num_NA,"for",name))) +
facet_wrap(~ name, nrow=1)
HTH
Updated
In response to the comment below. Generally I find placing text labels into a facetted plot fairly finicky. In your example you could simply define the x and y coordinates as 5,5 for all panels like this:
NAdf<-df3 %>%
group_by(name) %>%
summarise(ycoor=5,
xcoor=5,
num_NA=sum(is.na(value)))
Then you could plot using the same code as before:
ggplot(data=df3, aes(time, value)) +
geom_line() +
geom_point() +
geom_text(data=NAdf, aes(x=xcoor, y=ycoor, label=paste(num_NA,"for",name))) +
facet_wrap(~ name, nrow=1)
The issue with this is that it isn't a generalized solution. In practice though I find you need to fiddle with your geom_text plotting coordinates each and every time to get it just right. Truth be told #Sam Dickson's solution is very elegant for this particular problem.
One option is to add the count to the variable used in the faceting:
df3$NAs <- ave(df$value,df$name,FUN=function(x) sum(is.na(x))))
df3$name1 <- paste0(df3$name,' (NA = ',df3$NAs,')')
ggplot(data=df3, aes(time, value)) +
geom_line() +
geom_point() + facet_wrap(~ name1, nrow=1)
I'd like to show data values on stacked bar chart in ggplot2. After many attempts, the only way I found to show the total amount (for each bean) is using the following code
set.seed(1234)
df <- data.frame(
sex=factor(rep(c("F", "M"), each=200)),
weight=round(c(rnorm(200, mean=55, sd=5), rnorm(200, mean=65, sd=5)))
)
p<-ggplot(df, aes(x=weight, fill=sex, color=sex))
p<-p + geom_histogram(position="stack", alpha=0.5, binwidth=5)
tbl <- (ggplot_build(p)$data[[1]])[, c("x", "count")]
agg <- aggregate(tbl["count"], by=tbl["x"], FUN=sum)
for(i in 1:length(agg$x))
if(agg$count[i])
p <- p + geom_text(x=agg$x[i], y=agg$count[i] + 1.5, label=agg$count[i], colour="black" )
which generates the following plot:
Is there a better (and more efficient) way to get the same result using ggplot2?
Thanks a lot in advance
You can use stat_bin to count up the values and add text labels.
p <- ggplot(df, aes(x=weight)) +
geom_histogram(aes(fill=sex, color=sex),
position="stack", alpha=0.5, binwidth=5) +
stat_bin(aes(y=..count.. + 2, label=..count..), geom="text", binwidth=5)
I moved the fill and color aesthetics to geom_histogram so that they would apply only to that layer and not globally to the whole plot, because we want stat_bin to generate and overall count for each bin, rather than separate counts for each level of sex. ..count.. is an internal variable returned by stat_bin that stores the counts.
In this case, it was straightforward to add the counts directly. However, in more complicated situations, you might sometimes want to summarise the data outside of ggplot and then feed the summary data to ggplot. Here's how you would do that in this case:
library(dplyr)
counts = df %>% group_by(weight = cut(weight, seq(30,100,5), right=FALSE)) %>%
summarise(n = n())
countsByGroup = df %>% group_by(sex, weight = cut(weight, seq(30,100,5), right=FALSE)) %>%
summarise(n = n())
ggplot(countsByGroup, aes(x=weight, y=n, fill=sex, color=sex)) +
geom_bar(stat="identity", alpha=0.5, width=1) +
geom_text(data=counts, aes(label=n, y=n+2), colour="black")
Or, you can just create countsByGroup and then create the equivalent of counts on the fly inside ggplot:
ggplot(countsByGroup, aes(x=weight, y=n, fill=sex, color=sex)) +
geom_bar(stat="identity", alpha=0.5, width=1) +
geom_text(data=countsByGroup %>% group_by(weight) %>% mutate(n=sum(n)),
aes(label=n, y=n+2), colour="black")
I have a data frame with 124 columns and observations. Part of it would be something like:
date <- c("2014-01-03", "2014-05-03","2014-02-04")
App <- c(0,2,4)
Email <- c(1,5,0)
Print <- c(0,0,1)
mgt <- c(1,9,12)
df<- data.frame (date, App, Email, Print, mgt)
I want to plot App against date, then Email against date, then Print against date etc in different plots. I am trying to use plyr and ggplot2 to output these plots and have come up with:
Plots <- function (Y){print(ggplot(df, aes(x=date, y= Y)) + geom_line() +
scale_x_date(breaks = date_breaks('month'), label= date_format('%b-%Y')) +
labs(title="A", x="Date Issued", y="Number of tickets issued")+
theme_bw()) }
ServicePlots <- d_ply (df, col , Plots, .print=TRUE)
The packages lubridate, chron and scales are also being used in the plots. However, this does not seem to work at all. Could someone please point out what I am doing wrong? And maybe help me out a bit?
Have no idea what plyr has to do with anything here just melt the data and plot it as is:
library(reshape2)
library(ggplot2)
library(scales)
df <- melt(df)
ggplot(df, aes(as.Date(date), value)) +
geom_line(aes(group = 1)) +
scale_x_date(breaks = date_breaks('month')) +
facet_wrap( ~ variable, scales = "free") +
labs(title="A", x="Date Issued", y="Number of tickets issued") +
theme_bw()
If you want it in a function form, do
Plots <- function(x){
x <- melt(x)
ggplot(x, aes(as.Date(date), value)) +
geom_line(aes(group = 1)) +
scale_x_date(breaks = date_breaks('month')) +
facet_wrap( ~ variable, scales = "free") +
labs(title="A", x="Date Issued", y="Number of tickets issued") +
theme_bw()
}
Plots(df)
Following is not ggplot based but it works:
par(mfrow=c(1,3))
for(i in 2:5)
plot(mydf[,1], mydf[,i], main=colnames(mydf)[i])
Hi I really have googled this a lot without any joy. Would be happy to get a reference to a website if it exists. I'm struggling to understand the Hadley documentation on polar coordinates and I know that pie/donut charts are considered inherently evil.
That said, what I'm trying to do is
Create a donut/ring chart (so a pie with an empty middle) like the tikz ring chart shown here
Add a second layer circle on top (with alpha=0.5 or so) that shows a second (comparable) variable.
Why? I'm looking to show financial information. The first ring is costs (broken down) and the second is total income. The idea is then to add + facet=period for each review period to show the trend in both revenues and expenses and the growth in both.
Any thoughts would be most appreciated
Note: Completely arbitrarily if an MWE is needed if this was tried with
donut_data=iris[,2:4]
revenue_data=iris[,1]
facet=iris$Species
That would be similar to what I'm trying to do.. Thanks
I don't have a full answer to your question, but I can offer some code that may help get you started making ring plots using ggplot2.
library(ggplot2)
# Create test data.
dat = data.frame(count=c(10, 60, 30), category=c("A", "B", "C"))
# Add addition columns, needed for drawing with geom_rect.
dat$fraction = dat$count / sum(dat$count)
dat = dat[order(dat$fraction), ]
dat$ymax = cumsum(dat$fraction)
dat$ymin = c(0, head(dat$ymax, n=-1))
p1 = ggplot(dat, aes(fill=category, ymax=ymax, ymin=ymin, xmax=4, xmin=3)) +
geom_rect() +
coord_polar(theta="y") +
xlim(c(0, 4)) +
labs(title="Basic ring plot")
p2 = ggplot(dat, aes(fill=category, ymax=ymax, ymin=ymin, xmax=4, xmin=3)) +
geom_rect(colour="grey30") +
coord_polar(theta="y") +
xlim(c(0, 4)) +
theme_bw() +
theme(panel.grid=element_blank()) +
theme(axis.text=element_blank()) +
theme(axis.ticks=element_blank()) +
labs(title="Customized ring plot")
library(gridExtra)
png("ring_plots_1.png", height=4, width=8, units="in", res=120)
grid.arrange(p1, p2, nrow=1)
dev.off()
Thoughts:
You may get more useful answers if you post some well-structured sample data. You have mentioned using some columns from the iris dataset (a good start), but I am unable to see how to use that data to make a ring plot. For example, the ring plot you have linked to shows proportions of several categories, but neither iris[, 2:4] nor iris[, 1] are categorical.
You want to "Add a second layer circle on top": Do you mean to superimpose the second ring directly on top of the first? Or do you want the second ring to be inside or outside of the first? You could add a second internal ring with something like geom_rect(data=dat2, xmax=3, xmin=2, aes(ymax=ymax, ymin=ymin))
If your data.frame has a column named period, you can use facet_wrap(~ period) for facetting.
To use ggplot2 most easily, you will want your data in 'long-form'; melt() from the reshape2 package may be useful for converting the data.
Make some barplots for comparison, even if you decide not to use them. For example, try:
ggplot(dat, aes(x=category, y=count, fill=category)) +
geom_bar(stat="identity")
Just trying to solve question 2 with the same approach from bdemarest's answer. Also using his code as a scaffold. I added some tests to make it more complete but feel free to remove them.
library(broom)
library(tidyverse)
# Create test data.
dat = data.frame(count=c(10,60,20,50),
ring=c("A", "A","B","B"),
category=c("C","D","C","D"))
# compute pvalue
cs.pvalue <- dat %>% spread(value = count,key=category) %>%
ungroup() %>% select(-ring) %>%
chisq.test() %>% tidy()
cs.pvalue <- dat %>% spread(value = count,key=category) %>%
select(-ring) %>%
fisher.test() %>% tidy() %>% full_join(cs.pvalue)
# compute fractions
#dat = dat[order(dat$count), ]
dat %<>% group_by(ring) %>% mutate(fraction = count / sum(count),
ymax = cumsum(fraction),
ymin = c(0,ymax[1:length(ymax)-1]))
# Add x limits
baseNum <- 4
#numCat <- length(unique(dat$ring))
dat$xmax <- as.numeric(dat$ring) + baseNum
dat$xmin = dat$xmax -1
# plot
p2 = ggplot(dat, aes(fill=category,
alpha = ring,
ymax=ymax,
ymin=ymin,
xmax=xmax,
xmin=xmin)) +
geom_rect(colour="grey30") +
coord_polar(theta="y") +
geom_text(inherit.aes = F,
x=c(-1,1),
y=0,
data = cs.pvalue,aes(label = paste(method,
"\n",
format(p.value,
scientific = T,
digits = 2))))+
xlim(c(0, 6)) +
theme_bw() +
theme(panel.grid=element_blank()) +
theme(axis.text=element_blank()) +
theme(axis.ticks=element_blank(),
panel.border = element_blank()) +
labs(title="Customized ring plot") +
scale_fill_brewer(palette = "Set1") +
scale_alpha_discrete(range = c(0.5,0.9))
p2
And the result: