I've tried finding an answer but nothing seems to work. The first image below is a scatterplot drawn in ggplot2, which has been post-processed by a specific LaTeX journal template on Overleaf. I would like to re-create the chart without having to use the template.
Unfortunately, I haven't been able to figure out how to draw the horizontal lines that separate the title area, and note area (respectively) from the main plot region (see red arrows.
How can I do this?
Oh, the second image is the one that is produced from the code below.
Thanks!
library(ggplot2)
theme_set(theme_bw()) # pre-set the bw theme.
data("midwest", package = "ggplot2")
# Scatterplot
gg <- ggplot(midwest, aes(x=area, y=poptotal)) +
geom_point(aes(col=state, size=popdensity)) +
geom_smooth(method="loess", se=F) +
xlim(c(0, 0.1)) +
ylim(c(0, 500000)) +
labs(y="Population",
x="Area",
title="Figure 4: Scatterplot",
caption = "Source: midwest") +
theme(plot.background = element_rect(colour="black",size=1))
plot(gg)
You can set coord_cartesian(clip = "off") and add a couple of annotation_customcalls. This allows plotting relative to the panel without having to specify co-ordinates relative to your data:
ggplot(midwest, aes(x=area, y=poptotal)) +
geom_point(aes(col=state, size=popdensity)) +
geom_smooth(method="loess", se=F) +
xlim(c(0, 0.1)) +
ylim(c(0, 500000)) +
labs(y="Population",
x="Area",
title="FIGURE 4: Scatterplot",
caption = "Source: midwest") +
coord_cartesian(clip = "off") +
annotation_custom(grid::linesGrob(x = c(-0.12, 1.19), y = c(1.03, 1.03))) +
annotation_custom(grid::linesGrob(x = c(-0.12, 1.19), y = c(-.07, -.07))) +
theme(plot.background = element_rect(colour="black", size = 1),
plot.title = element_text(size = 16, face = 2, vjust = 5, hjust = -0.2),
plot.margin = margin(20, 20, 20, 20))
Related
I'm hoping to create a ggplot2 title overlaying a doughnut graph, with my reprex adapted this example from https://www.r-graph-gallery.com/128-ring-or-donut-plot.html.
# load library
library(ggplot2)
# Create test data.
data <- data.frame(
category=c("A", "B", "C"),
count=c(10, 60, 30)
)
# Compute percentages
data$fraction <- data$count / sum(data$count)
# Compute the cumulative percentages (top of each rectangle)
data$ymax <- cumsum(data$fraction)
# Compute the bottom of each rectangle
data$ymin <- c(0, head(data$ymax, n=-1))
# Compute label position
data$labelPosition <- (data$ymax + data$ymin) / 2
# Compute a good label
data$label <- paste0(data$count)
# Make the plot
ggplot(data, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=category)) +
geom_rect() +
coord_polar(theta="y") + # Try to remove that to understand how the chart is built initially
xlim(c(2, 4))+ # Try to remove that to see how to make a pie chart
theme_void()+
scale_fill_brewer(palette = 1)+
geom_label( x=3.5, aes(y=labelPosition, label=label), size=6)+
theme(legend.position = "top",
plot.title = element_text(hjust=0.5))+
ggtitle("My title")
This is what I have currently:
And this is what I want:
I haven't been able to find any documentation demonstrating how to do this in ggplot2. Any suggestions are appreciated.
You can add an annotation layer :
library(ggplot2)
ggplot(data, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=category)) +
geom_rect() +
coord_polar(theta="y") +
xlim(c(2, 4))+
theme_void()+
scale_fill_brewer(palette = 1)+
geom_label( x=3.5, aes(y=labelPosition, label=label), size=6)+
theme(legend.position = "top") +
annotate('text', x = 2, y = 0.5, label = 'My title', color = 'blue', size = 5)
I would like to apply a position_nudge to an object, but it should always be a certain distance (e.g. in "cm") rather than relative to the scale of the measured variable.
data <- data.frame(
name=c("de","gb","cn","ir","ru") ,
value=c(3,12,5,18,45)*1
)
ggplot(data,
aes(x=name, y=value)) +
geom_bar(stat = "identity") +
geom_text(aes(y = 0,
label = paste0(name,value)),
position = position_nudge(y = -12)) +
coord_cartesian(ylim = c(0, 50), # This focuses the x-axis on the range of interest
clip = 'off') + # This keeps the labels from disappearing
theme(plot.margin = unit(c(1,1,1,1), "lines"))
When changing the scale of the variable, that adjustment should not need to be made in the position_nudge argument, e.g.
factor = 100
data <- data.frame(
name=c("de","gb","cn","ir","ru") ,
value=c(3,12,5,18,45)*factor
)
ggplot(data,
aes(x=name, y=value)) +
geom_bar(stat = "identity") +
geom_text(aes(y = 0,
label = paste0(name,value)),
position = position_nudge(y = -12)) +
coord_cartesian(ylim = c(0, 50*factor), # This focuses the x-axis on the range of interest
clip = 'off') + # This keeps the labels from disappearing
theme(plot.margin = unit(c(1,1,1,1), "lines"))
Currently, this does not work, so that I need to manually change -12 to -1200 to achieve this:
This is of course only a short reproducible example, the actual use-case is placing country flags as x-axis labels below the plot.
The final product will look somewhat like this, but currently requires updating the nudges each time the y-values change:
Thank you very much!
The easiest "hack" is to make this two plots and bind them with patchwork or cowplot. If you try it differently, you'd soon get into deep grid ... trouble.
Related
baptiste on github
baptiste on stackoverflow
Sandy Muspratt's answer
The easy way:
library(ggplot2)
library(patchwork)
foo <- data.frame(
name=c("de","gb","cn","ir","ru") ,
value=c(3,12,5,18,45)*1
)
foo_label = paste(foo$name, foo$value)
p <- ggplot(foo, aes(x=name, y=value)) +
geom_blank() # essential, so that both plots have same scaling
p_1 <-
p + geom_col() +
coord_cartesian(ylim = c(0, 50),clip = 'off') +
theme(plot.margin = margin())
p_text <-
p + annotate("text", label = foo_label, x = 1:5, y = 0, col="red") +
theme_void() +
coord_cartesian(clip = "off") +
theme(plot.margin = margin(1,0,1,0, unit = "lines"))
p_1/p_text + plot_layout(heights = c(1,0)) #this is a workaround to make the height of the text plot minimal!
You can then of course annotate with anything.
For your stated goal, the ggtext library may be more appropriate, as it allows you to embed images directly into the x axis labels. See also here for another example.
library(ggplot2)
library(ggtext)
labels <- c(
setosa = "<img src='https://upload.wikimedia.org/wikipedia/commons/thumb/8/86/Iris_setosa.JPG/180px-Iris_setosa.JPG'
width='100' /><br>*I. setosa*",
virginica = "<img src='https://upload.wikimedia.org/wikipedia/commons/thumb/3/38/Iris_virginica_-_NRCS.jpg/320px-Iris_virginica_-_NRCS.jpg'
width='100' /><br>*I. virginica*",
versicolor = "<img src='https://upload.wikimedia.org/wikipedia/commons/thumb/2/27/20140427Iris_versicolor1.jpg/320px-20140427Iris_versicolor1.jpg'
width='100' /><br>*I. versicolor*"
)
ggplot(iris, aes(Species, Sepal.Width)) +
geom_boxplot() +
scale_x_discrete(
name = NULL,
labels = labels
) +
theme(
axis.text.x = element_markdown(color = "black", size = 11)
)
What I'm trying to do is overlay circles that have a dark outline over the ones I have but I'm not sure how to size them since I already have varying sizes. Also is there anyway to change the legend symbols to something like $1M, $2m?
mikebay_usergraph <-
ggplot(mikebay_movies_dt, aes(y = tomatoUserMeter, x = Released, label = Title)) +
geom_point(aes(size = BoxOffice)) + (aes(color = tomatoImage)) +
geom_text(hjust = .45, vjust = -.75, family = "Futura", size = 5, colour = "#535353") +
ggtitle("The Fall of Bayhem: How Michael Bay movies have declined") +
theme(plot.title = element_text(size = 15, vjust = 1, family = "Futura"),
axis.text.x = element_text(size = 12.5, family = "Futura"),
axis.text.y = element_text(size = 12.0, family = "Futura"),
panel.background = element_rect(fill = '#F0F0F0'),
panel.grid.major=element_line(colour ="#D0D0D0",size=.75)) +
scale_colour_manual(values = c('#336333', '#B03530')) +
geom_hline(yintercept = 0,size = 1.2, colour = "#535353") +
scale_x_date(limits = c(as.Date("1994-1-1"),as.Date("2017-1-1"))) +
theme(axis.ticks = element_blank())
I offer two possible solutions for adding a circle or outline around size-scaled points in a scatterplot. For the first solution, I propose using plotting symbols that allow separate fill and outline colors. The drawback here is that you cannot control the thickness of the outline. For the second solution I propose adding an extra layer of slightly larger black points positioned under the primary geom_point layer. In this case, the thickness of the outline can be manually adjusted by setting thickness to a value between 0 and 1.
Finally, dollar legend formatting can be added by loading the scales package, and adding scale_size_continuous(labels=dollar) to your ggplot call.
library(ggplot2)
library(scales) # Needed for dollar labelling.
dat = data.frame(rating=c(80, 60, 40),
date=as.Date(c("1995-1-1", "2005-1-1", "2015-1-1")),
boxoffice=c(3e7, 1e8, 7e7),
tomato=c("fresh", "rotten", "rotten"))
p1 = ggplot(dat, aes(x=date, y=rating, size=boxoffice, fill=tomato)) +
geom_point(shape=21, colour="black") +
scale_fill_manual(values = c(fresh="green", rotten="red")) +
scale_size_continuous(labels=dollar, range=c(8, 22))
thickness = 0.35
p2 = ggplot(dat, aes(x=date, y=rating)) +
geom_point(colour="black",
aes(size=boxoffice + (thickness * mean(boxoffice)))) +
geom_point(aes(colour=tomato, size=boxoffice)) +
scale_colour_manual(values = c(fresh="green", rotten="red")) +
scale_size_continuous(labels=dollar, range=c(8, 22), name="Box Office")
I have this overplotting issue going on. Even after reading a lot of posts on dodge, jitter and jitter dodge in all kinds of implementations I can't figure it out.
Here you can get my data: http://pastebin.com/embed_js.php?i=uPXN7nPt
library(dplyr)
library(gdata)
library(ggplot2)
library(directlabels)
all<-read.xls('all_auto_bio_adjusted_c.xls')
all$size.new<-sqrt(all$size.new)
all$station<-as.factor(all$station)
all$group.new<-factor(all$group, levels=c('C. hyperboreus','C. glacialis','Special Calanus','M. longa','Pseudocalanus sp.','Copepoda'))
pd <- position_dodge(w = 50)
allp <- ggplot(data = all, aes(y = averagebiol, x = automatic, colour = group.new, group=group.new)) +
geom_abline(intercept = 0, slope = 1) +
geom_point(aes(size = size.new), show_guide=TRUE, position=pd) +
scale_size_identity()+
geom_errorbar(aes(ymin = averagebiol - stdevbiol, ymax = averagebiol + stdevbiol),colour = "grey", width = 0.1, position=pd) +
facet_grid(group.new~station, scales="free") +
xlab("Automatic identification") + ylab("Manual identification") +
ggtitle("Comparison of automatic vs manual identification") +
theme_bw() +
theme(plot.title = element_text(lineheight=.8, face="bold", size=20,vjust=1), axis.text.x = element_text(colour="grey20",size=15,angle=0,hjust=.5,vjust=.5,face="bold"), axis.text.y = element_text(colour="grey20",size=15,angle=0,hjust=1,vjust=0,face="bold"), axis.title.x = element_text(colour="grey20",size=20,angle=0,hjust=.5,vjust=0,face="bold"), axis.title.y = element_text(colour="grey20",size=20,angle=90,hjust=.5,vjust=1,face="bold"), legend.position="none", strip.text.x = element_text(size = 12, face="bold", colour = "black", angle = 0), strip.text.y = element_text(size = 12, face="bold", colour = "black"))
allp
Which produces this nice plot
But as you can see a lot of the points and error bars are cramped together. Shouldn't my implementation of position dodge work?
If I understood right position dodge takes the scale of the axes, so with a doge of 50 I should see some results. I also tried putting the dodge argument directly into the geom, but that had no effect either.
Any ideas?
If you leave out position = pd in both geom_errorbar() and geom_point() you get the same plot. The reason the data look 'cramped' is because of the spread of the x-values. As far as I know, dodging will only happen if two points 'overlap', which I interpret as having the same x-value, e.g. data on a categorical x-axis like in the case of a bar plot. Your x-axis is continuous so the points will not be dodged.
To deal with the overplotting you could try logarithmic scales:
library(ggplot2)
tmp <- tempfile()
download.file("http://pastebin.com/raw.php?i=uPXN7nPt", tmp)
all <- read.csv(tmp)
all$size.new <- sqrt(all$size.new)
all$station <- as.factor(all$station)
all$group.new <- factor(all$group, levels = c("C. hyperboreus", "C. glacialis",
"Special Calanus", "M. longa",
"Pseudocalanus sp.", "Copepoda"))
# explicitly remove missing data
all <- all[complete.cases(all), ]
allp <- ggplot(data = all, aes(y = averagebiol, x = automatic, colour = group.new,
group = group.new, ymin = averagebiol - stdevbiol,
ymax = averagebiol + stdevbiol)) +
theme_bw() +
geom_abline(intercept = 0, slope = 1) +
geom_errorbar(colour = "grey", width = 0.1) +
geom_point(aes(size = size.new)) +
scale_size_area() + # Just so I could see all the points on my monitor :)
xlab("Automatic identification") +
ylab("Manual identification") +
ggtitle("Comparison of automatic vs manual identification")
allp + scale_x_log10() +
scale_y_log10() +
facet_grid(group.new ~ station, scales = "fixed")
I have the following code:
library(ggplot2)
library(gridExtra)
data = data.frame(fit = c(9.8,15.4,17.6,21.6,10.8), lower = c(7.15,12.75,14.95,18.95,8.15), upper = c(12.44,18.04,20.24,24.24,13.44), factors = c(15,20,25,30,35), var = rep("Fator", 5))
gp <- ggplot(data, aes(x=factors, y=fit, ymax=upper, ymin=lower))
gp <- gp + geom_line(aes(group=var),size=1.2) +
geom_errorbar(width=.8, size=1, aes(colour='red')) +
geom_point(size=4, shape=21, fill="grey") +
labs(x = paste("\n",data$var[1],sep=""), y =paste("Values","\n",sep="")) +
theme(legend.position = 'none', axis.text = element_text(size = 11), plot.margin=unit(c(0.4,0.4,0.4,0.4), "cm"), axis.text.x = element_text(angle=45, hjust = 1, vjust = 1)) +
ylim((min(data$lower)), (max(data$upper)))
I want to change the line color after I have the ggplot object. I'm trying:
gp + scale_color_manual(values = "green")
but it change the error bar color and not the line color.
1)What should I do to change the line color?
2)How can I change the points color?
Thanks!
Try this:
gp$layers[[1]] <- NULL
gp + geom_line(aes(group = var),color = "green",size = 1.2)
A similar technique should work for the points layer. Technique was dredged up from my memories of a similar question.
I just looked at the contents of gp$layers manually to see which was which. I presume that the order will be the order in which they appear in your code, but I wouldn't necessarily rely on that.