Meaningless Y-axis values in ggplot2 dot plot - r

I am trying to replicate Fivethirtyeight's Tarantino movie plot. This plot is using ggplot2's dotplot. The data can be found here.
My code is following:
tara <- read.csv("tarantino.csv")
dim(tara)
names(tara)
table(tara$movie)
rd <- subset(tara, movie=="Reservoir Dogs")
du <- subset(tara, movie=="Django Unchained")
ib <- subset(tara, movie=="Inglorious Basterds")
jb <- subset(tara, movie=="Jackie Brown")
kb1 <- subset(tara, movie=="Kill Bill: Vol. 1")
kb2 <- subset(tara, movie=="Kill Bill: Vol. 2")
pf <- subset(tara, movie=="Pulp Fiction")
plot1 <- ggplot(rd, aes(x=minutes_in, fill=type)) +
geom_dotplot(binwidth=0.5, method="histodot") +
theme_bw() + theme(legend.position="none") +
ylim(0,20)+
# Set the entire chart region to a light gray color
theme(panel.background=element_rect(fill="#F0F0F0")) +
theme(plot.background=element_rect(fill="#F0F0F0")) +
theme(panel.border=element_rect(colour="#F0F0F0")) +
ggtitle(" RESERVIOR DOGS") +
theme(plot.title=element_text(face="bold",hjust=-.08,vjust=2,colour="#535353",size=12)) +
ylab("") +
xlab("")
plot2 <- ggplot(pf, aes(x=minutes_in, fill=type)) +
geom_dotplot(binwidth=0.5, method="histodot") +
theme_bw() +theme(legend.position="top")+
ylim(0,20)+
# Set the entire chart region to a light gray color
theme(panel.background=element_rect(fill="#F0F0F0")) +
theme(plot.background=element_rect(fill="#F0F0F0")) +
theme(panel.border=element_rect(colour="#F0F0F0")) +
theme(legend.position="none") +
ggtitle(" PULP FICTION") +
theme(plot.title=element_text(face="bold",hjust=-.08,vjust=2,colour="#535353",size=12)) +
ylab("") +
xlab("")
plot3 <- ggplot(du, aes(x=minutes_in, fill=type)) +
geom_dotplot(binwidth=0.5, method="histodot") +
theme_bw() +theme(legend.position="top")+
ylim(0,20)+
# Set the entire chart region to a light gray color
theme(panel.background=element_rect(fill="#F0F0F0")) +
theme(plot.background=element_rect(fill="#F0F0F0")) +
theme(panel.border=element_rect(colour="#F0F0F0")) +
theme(legend.position="none") +
ggtitle(" DJANGO UNCHAINED") +
theme(plot.title=element_text(face="bold",hjust=-.08,vjust=2,colour="#535353",size=12)) +
ylab("") +
xlab("")
plot4 <- ggplot(ib, aes(x=minutes_in, fill=type)) +
geom_dotplot(binwidth=0.5, method="histodot") +
theme_bw() +theme(legend.position="top")+
ylim(0,20)+
# Set the entire chart region to a light gray color
theme(panel.background=element_rect(fill="#F0F0F0")) +
theme(plot.background=element_rect(fill="#F0F0F0")) +
theme(panel.border=element_rect(colour="#F0F0F0")) +
theme(legend.position="none") +
ggtitle(" INGLORIOUS BASTARDS") +
theme(plot.title=element_text(face="bold",hjust=-.08,vjust=2,colour="#535353",size=12)) +
ylab("") +
xlab("")
plot5 <- ggplot(jb, aes(x=minutes_in, fill=type)) +
geom_dotplot(binwidth=0.5, method="histodot") +
theme_bw() +theme(legend.position="top")+
ylim(0,20)+
# Set the entire chart region to a light gray color
theme(panel.background=element_rect(fill="#F0F0F0")) +
theme(plot.background=element_rect(fill="#F0F0F0")) +
theme(panel.border=element_rect(colour="#F0F0F0")) +
theme(legend.position="none") +
ggtitle(" JACKIE BROWN") +
theme(plot.title=element_text(face="bold",hjust=-.08,vjust=2,colour="#535353",size=12)) +
ylab("") +
xlab("")
plot6 <- ggplot(kb1, aes(x=minutes_in, fill=type)) +
geom_dotplot(binwidth=0.5, method="histodot") +
theme_bw() +theme(legend.position="top")+
ylim(0,20)+
# Set the entire chart region to a light gray color
theme(panel.background=element_rect(fill="#F0F0F0")) +
theme(plot.background=element_rect(fill="#F0F0F0")) +
theme(panel.border=element_rect(colour="#F0F0F0")) +
theme(legend.position="none") +
ggtitle(" KILL BILL: VOL 1") +
theme(plot.title=element_text(face="bold",hjust=-.08,vjust=2,colour="#535353",size=12)) +
ylab("") +
xlab("")
plot7 <- ggplot(kb2, aes(x=minutes_in, fill=type)) +
geom_dotplot(binwidth=0.5, method="histodot") +
theme_bw() +theme(legend.position="none")+
ylim(0,20)+
# Set the entire chart region to a light gray color
theme(panel.background=element_rect(fill="#F0F0F0")) +
theme(plot.background=element_rect(fill="#F0F0F0")) +
theme(panel.border=element_rect(colour="#F0F0F0")) +
ggtitle(" KILL BILL: VOL 2") +
theme(plot.title=element_text(face="bold",hjust=-.08,vjust=2,colour="#535353",size=12)) +
ylab("") +
xlab("minutes")
library(grid)
grid.newpage()
pushViewport(viewport(layout = grid.layout(7, 1)))
vplayout <- function(x, y)
viewport(layout.pos.row = x, layout.pos.col = y)
print(plot1, vp = vplayout(1, 1))
print(plot2, vp = vplayout(2, 1))
print(plot3, vp = vplayout(3, 1))
print(plot4, vp = vplayout(4, 1))
print(plot5, vp = vplayout(5, 1))
print(plot6, vp = vplayout(6, 1))
print(plot7, vp = vplayout(7, 1))
The output looks like this:
The plot has several issues:
Y axis is uncontrollable. I can use coord_fixed(ratio=X); but it's not helping much.
The dots are not same size. I have tried to use dotsize; but it's not working.
Failed to keep background grey in the legends.

Here's an approach:
# download data
download.file('https://raw.githubusercontent.com/fivethirtyeight/data/master/tarantino/tarantino.csv', '~/Desktop/tarantino.csv', method = 'curl')
# read in data
tara <- read.csv('~/Desktop/tarantino.csv')
library(ggplot2)
library(ggthemes) # has theme_fivethirtyeight, which will save a lot of work
ggplot(tara, aes(x=minutes_in, fill=type)) +
geom_dotplot(binwidth = 1, method = "histodot") +
theme_fivethirtyeight() +
ylim(0, 20) +
ggtitle('Deaths and Swearing in Tarantino Films') +
theme(strip.text = element_text(hjust = 0)) + # left justify facet titles
ylab("") +
xlab("") +
# facet_wrap defaults to titles on top. The labeller capitalizes movie titles.
facet_wrap(~movie, ncol = 1, labeller = labeller(movie = toupper))
# bin width/dot size is CRAZY SENSITIVE to aspect ratio. width 4:height 5:binwidth 1 lines up with your y-axis
ggsave('~/Desktop/Rplot.png', width = 8, height = 10, units = 'in')
What you get:

Related

how to overlay geom_rect and geom_boxplot in ggplot?

I am trying to use ggplot in order to overlay geom_rect under some boxplots.
I want the grey rectangle will be behind the box plots but I can't do it for some reason.
This is the code that I'm using:
ggplot(data, aes(x = reorder(genotype, -Shann.div, FUN = median), y = Shann.div)) +
geom_jitter(color="black", size=0.3, alpha=0.9) + geom_boxplot(outlier.shape = NA, coef = 0) +
geom_hline(yintercept = avg, color="red") +
#geom_hline(yintercept = (avg + 2 * SE), linetype='dashed', color="black") +
#geom_hline(yintercept = (avg - 2 * SE), linetype='dashed', color="black") +
geom_rect(aes(xmin=0, xmax=Inf, ymin=avg - SD, ymax=avg + SD), fill="grey", alpha=0.01) +
theme(panel.background = element_blank()) +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
ggtitle('Microbial UTO - Shannon diversity median') + xlab('genotype')
The ggplot package draws the geom layers in the order that you declare them. If you want the geom_rect layer in the back, put it before the other layers in the code:
ggplot(data, aes(x = reorder(genotype, -Shann.div, FUN = median), y = Shann.div)) +
geom_rect(aes(xmin=0, xmax=Inf, ymin=avg - SD, ymax=avg + SD), fill="grey", alpha=0.01) +
geom_jitter(color="black", size=0.3, alpha=0.9) + geom_boxplot(outlier.shape = NA, coef = 0) +
geom_hline(yintercept = avg, color="red") +
#geom_hline(yintercept = (avg + 2 * SE), linetype='dashed', color="black") +
#geom_hline(yintercept = (avg - 2 * SE), linetype='dashed', color="black") +
theme(panel.background = element_blank()) +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
ggtitle('Microbial UTO - Shannon diversity median') + xlab('genotype')

ggplot2 legend in 2 rows

I was unable to find a solution for putting ggplot2 legend in 2 rows.
Example
library(ggplot2)
theme_set(theme_bw())
data("midwest", package = "ggplot2")
ggplot(midwest, aes(x=area, y=poptotal)) +
geom_point(aes(col=state, size=popdensity)) +
geom_smooth(method="loess", se=F) +
xlim(c(0, 0.1)) +
ylim(c(0, 500000)) +
labs(y="Population",
x="Area",
title="") +
theme(legend.position = "top")
In the above image, I would like to have popdensity annotation on top (first row) and state annotation in the second row.
I think you're looking for theme(legend.box = "vertical") and guide_legend(order = ...)
library(ggplot2)
data("midwest", package = "ggplot2")
ggplot(midwest, aes(x=area, y=poptotal)) +
geom_point(aes(col=state, size=popdensity)) +
geom_smooth(method="loess", se=F) +
xlim(c(0, 0.1)) +
ylim(c(0, 500000)) +
labs(y="Population",
x="Area",
title="") +
theme_bw() +
theme(legend.position = "top",
legend.box = "vertical") +
guides(size = guide_legend(order = 1),
colour = guide_legend(order = 2))

How to add another legend to circular heat map in R

I want to add another legend that tells me what ring of a circular heat map represents (from outer ring to inner ring).
I tried the following from another answer previously:
library(reshape)
library(ggplot2)
library(plyr)
nba <- read.csv("http://datasets.flowingdata.com/ppg2008.csv")
nba$Name <- with(nba, reorder(Name, PTS))
nba.m <- melt(nba)
nba.m <- ddply(nba.m, .(variable), transform, value = scale(value))
# Convert the factor levels (variables) to numeric + quanity to determine size of hole.
nba.m$var2 = as.numeric(nba.m$variable) + 15
# Labels and breaks need to be added with scale_y_discrete.
y_labels = levels(nba.m$variable)
y_breaks = seq_along(y_labels) + 15
nba.labs <- subset(nba.m, variable==levels(nba.m$variable) [nlevels(nba.m$variable)])
nba.labs <- nba.labs[order(nba.labs$Name),]
nba.labs$ang <- seq(from=(360/nrow(nba.labs))/1.5, to=(1.5* (360/nrow(nba.labs)))-360, length.out=nrow(nba.labs))+80
nba.labs$hjust <- 0
nba.labs$hjust[which(nba.labs$ang < -90)] <- 1
nba.labs$ang[which(nba.labs$ang < -90)] <- (180+nba.labs$ang)[which(nba.labs$ang < -90)]
p2 = ggplot(nba.m, aes(x=Name, y=var2, fill=value)) +
geom_tile(colour="white") +
geom_text(data=nba.labs, aes(x=Name, y=var2+1.5,
label=Name, angle=ang, hjust=hjust), size=3) +
scale_fill_gradient(low = "white", high = "steelblue") +
ylim(c(0, max(nba.m$var2) + 1.5)) +
scale_y_discrete(breaks=y_breaks, labels=y_labels) +
coord_polar(theta="x") +
theme(panel.background=element_blank(),
axis.title=element_blank(),
panel.grid=element_blank(),
axis.text.x=element_blank(),
axis.ticks=element_blank(),
axis.text.y=element_text(size=5))
print(p2)
However, instead of getting the legend, I'm having this error message instead:
Scale for 'y' is already present. Adding another scale for 'y', which
will replace the existing scale.
Any solutions?
Thanks in advance!
It's not entirely clear to me what you're looking for but this may be it.
You were originally using scale_y_discrete(breaks=y_breaks, labels=y_labels) to project to a continuous variable,var2, in aes(x=Name, y=var2, fill=value). By changing that to scale_y_continuous(breaks=y_breaks, labels=y_labels) you can get the categorical labels listed for nba.m$variable.
ggplot(nba.m, aes(x=Name, y=var2, fill=value)) +
geom_tile(colour="white") +
geom_text(data=nba.labs, aes(x=Name, y=var2+1.5,
label=Name, angle=ang, hjust=hjust), size=3) +
scale_fill_gradient(low = "white", high = "steelblue") +
ylim(c(0, max(nba.m$var2) + 1.5)) +
scale_y_continuous(breaks=y_breaks, labels=y_labels) +
coord_polar(theta="x") +
theme(panel.background=element_blank(),
axis.title=element_blank(),
panel.grid=element_blank(),
axis.text.x=element_blank(),
axis.ticks=element_blank(),
axis.text.y=element_text(size=5))
UPDATE
I'm not sure what you're trying to do here -those values are not blank in the center because there's data there, removing scale_y_continuous(breaks=y_breaks, labels=y_labels) limits the scale of the y-axis such that the date is no longer graphed. That's why you're not seeing the middle filled when that line of code is removed. At any rate, if that's what you're looking for, what you need to do is delete scale_y_continuous(breaks=y_breaks, labels=y_labels) and turn off the labels for the y-axis, then manually add those labels using grob. I'm sure there's a better way to accomplish what you need but this will get you started at least.
p<-ggplot(nba.m, aes(x=Name, y=var2, fill=value)) +
geom_tile(colour="white") +
geom_text(data=nba.labs, aes(x=Name, y=var2+1.5,
label=Name, angle=ang, hjust=hjust), size=2.5) +
scale_fill_gradient(low = "white", high = "steelblue") +
ylim(c(0, 50)) +
coord_polar(theta="x") +
theme(panel.background=element_blank(),
axis.title=element_blank(),
panel.grid=element_blank(),
axis.text.x=element_blank(),
axis.ticks=element_blank(),
axis.text.y=element_text(size=5))+ theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())
lab = textGrob((paste("G MIN PTS FGM FGA FGP FTM FTA FTP X3PM X3PA X3PP ORB DRB TRB AST STL BLK TO PF")),
x = unit(.1, "npc"), just = c("left"),
gp = gpar(fontsize = 7))
gp = ggplotGrob(p)
gp = gtable_add_rows(gp, unit(10, "grobheight", lab), -1)
gp = gtable_add_grob(gp, lab, t = -2, l = gp$layout[gp$layout$name == "panel",]$l)
grid.newpage()
grid.draw(gp)

Changing x-axis to discrete alters margins

I have a plot produced with the following code:
plot <- ggplot(lmeans, aes(x=Day, y=value*100, group=variable, colour=variable)) +
geom_point(aes(shape=variable), size=4) +
geom_line(aes(linetype=variable), size=1.5) +
ggtitle(paste("Nausea and Vomitting Frequencies by Day for", group_name)) +
ylab("Frequency (%)") +
ylim(0, 40) +
theme(legend.title=element_blank()) +
theme(legend.justification = c(1, 1), legend.position = c(1, 1))
Which results in a plot like so:
However I would like the days to be discretely labeled rather than being given as a continuous axis. When I try to achieve this by adding scale_x_discrete(), I get the following result:
In which the 'margins' on the x-axis are altered in an unsightful manner. How can I avoid these unsightly changes?
Here's a minimal example for reproduction:
require(ggplot2)
lmeans <- data.frame(Day=c(0,1,2,3,0,1,2,3),
variable=c("x","x","x","x","y","y","y","y"),
value=c(5,4,2,1,7,3,2,0))
plot <- ggplot(lmeans, aes(x=Day, y=value, group=variable, colour=variable)) +
geom_point(aes(shape=variable)) +
geom_line(aes(linetype=variable)) +
ylim(0, 10) +
scale_x_discrete() +
theme(legend.justification = c(1, 1), legend.position = c(1, 1))
print(plot)
Which produces this:
Without scale_x_discrete and setting Day as a factor, the plot is looking OK:
ggplot(lmeans, aes(x=factor(Day), y=value, group=variable, colour=variable)) +
geom_point(aes(shape=variable), size=4) +
geom_line(aes(linetype=variable), size=1.5) +
theme(legend.justification = c(1, 1), legend.position = c(1, 1))
which gives:
When you use scale_x_discrete, you can include the expand parameter in order to set the margins. An example:
ggplot(lmeans, aes(x=factor(Day), y=value, group=variable, colour=variable)) +
geom_point(aes(shape=variable), size=4) +
geom_line(aes(linetype=variable), size=1.5) +
ylim(0, 10) +
scale_x_discrete("Day", expand=c(0.05,0.1), breaks=c(0,1,2,3)) +
theme(legend.justification = c(1, 1), legend.position = c(1, 1))
which gives:
I changed scale_x_discrete() to scale_x_continuous() and used limits. Does this work for you?
ggplot(lmeans, aes(x=Day, y=value, group=variable, colour=variable)) +
geom_point(aes(shape=variable)) +
geom_line(aes(linetype=variable)) +
ylim(0, 10) +
scale_x_continuous(limits = c(-0.5, 4)) +
theme(legend.justification = c(1, 1), legend.position = c(1, 1))
Use factor(Day) in the aesthetic mapping:
plot <- ggplot(lmeans, aes(x=factor(Day), y=value,
group=variable, colour=variable)) +
geom_point(aes(shape=variable)) +
geom_line(aes(linetype=variable)) +
ylim(0, 10) +
labs(x="Day") +
theme(legend.justification=c(1, 1), legend.position=c(1, 1))
print(plot)

How to create a legend for lines in scatterplot?

I want to add a legend for the main diagonal and the regression line to the scatter plot.
What I have got now:
library(ggplot2)
df = data.frame(x = 1:10, y = 1:10)
p <- ggplot(df, aes(x, y)) +
geom_point(size=1.2) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
geom_smooth(method="lm", se=FALSE, formula=y~x, colour="blue", fill=NA, size=1.2) +
geom_abline(intercept=0, slope=1, size=1.2, colour="red") +
geom_text(aes(x=max(df[,1])/1.4, y=max(df[,2])/1.2, label=lm_eqn(df)), colour="blue", parse=TRUE) +
# doesn't work: scale_colour_manual("Lines", labels=c("Main Diagonal", "Regression"), values=c("red", "blue")) +
labs(x="X", y="Y")
use show_guide=TRUE e.g.
p <- ggplot(df, aes(x, y)) +
geom_point(size=1.2) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
geom_smooth(method="lm", se=FALSE, formula=y~x, colour="blue", fill=NA, size=1.2) +
geom_abline(aes(colour="red"),intercept=0, slope=1, size=1.2,show_guide=TRUE) +
geom_text(aes(x=max(df[,1])/1.4, y=max(df[,2])/1.2, label="lm_eqn(df)"), colour="blue", parse=TRUE) +
# doesn't work: scale_colour_manual("Lines", labels=c("Main Diagonal", "Regression"), values=c("red", "blue")) +
labs(x="X", y="Y") + opts(legend.position = 'left')
plus you can move legends about using things like+ opts(legend.position = 'left') to get it on the left. I suggest you look at the link provided by Tyler Rinker and also the following:
https://github.com/hadley/ggplot2/wiki/Legend-Attributes
Also no idea what lm_eqn ia so in my code i have surrounded it with "" so it will appear as it is written..
I could finally manage to create a legend for the regression and the diagonal line which is located in the bottom right corner and that makes sense:
p <- ggplot(df, aes(x, y)) +
geom_point(size=1.2) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
geom_abline(aes(colour="red"),intercept=0, slope=1, size=1.2, aes(colour="1"), show_guide=TRUE) + # new code
geom_smooth(method="lm", se=FALSE, formula=y~x, fill=NA, size=1.2, aes(colour="2"), show_guide=TRUE) + # new code
scale_colour_manual("Lines", labels=c("Diagonal", "Regression"), values=c("red", "blue")) +
opts(legend.position = c(0.85, 0.15)) # relative values, must be set individually

Resources