Axes will not start at 0 in ggplot - r

I have just created my first bubble chart in ggplot and, despite trying just about every 'solution' I could find on the internet for the past few hours, I have not been able to force the axes to start at 0. The x and y axes both start at 10 but I would like the axes to start at 0. I have tried using various combination of xlim, ylim, expand_limits, scale_y_continuous and scale_x_continuous with no luck.
Here's the code I use to create the graph:
ggplot(mean_diffs, aes(x=sample, y=marked, size = pop_est)) +
geom_point(alpha=0.7) + scale_size(range = c(2, 10), name="Absolute error", breaks = c(5,10,20,40))

It's not clear what exactly you tried, but the following seems to work well, assuming your data are all numeric:
ggplot(mean_diffs, aes(x = sample, y = marked, size = pop_est)) +
geom_point(alpha = 0.7) +
scale_size(range = c(2, 10), name = "Absolute error",
breaks = c(5, 10, 20, 40)) +
scale_x_continuous(limits = c(0, 60), breaks = 0:6 * 10) +
scale_y_continuous(limits = c(0, 60), breaks = 0:6 * 10)
Reproducible data set, inferred from plot in question
mean_diffs <- data.frame(sample = rep(c(10, 30, 50), each = 3),
marked = rep(c(10, 30, 50), 3),
pop_est = c(30, 20, 10, 40, 20, 3, 30, 7, 3))

Related

How to get ggplot Heatmap (R) to use two colors? One for between a fill value of -.1 to .1 and one for not

How to get ggplot Heatmap (R) to use two colors? One for between a fill value of -.1 to .1 and one for not
ggplot(base, aes(x,y, fill= base$`Equal Opportunity Difference`)) +
geom_tile() +
#axis formatting
scale_x_discrete(breaks = c(10, 20, 30, 40 , 50, 60, 70, 80, 90),
labels = c("10%", "20%","30%", "40%","50%", "60%", "70%","80%", "90%"),
limits = c(10,90)) +
scale_y_discrete(breaks = c(10, 20, 30, 40 , 50, 60, 70, 80, 90),
labels = c("10%", "20%","30%", "40%","50%", "60%", "70%","80%", "90%"),
limits = c(10,90)) +
geom_text(aes(label = signif(base$`Equal Opportunity Difference`,2)), color = "white",
size = 4) +
scale_fill_gradient2(midpoint=c(-.1, s.1), low="#B2182B", high="#2166AC")
This is what I have right now, which isn't working. Also the axis are only showing 10% and 90%
I would appreciate if someone had a solution for that too.
Without a minimal reproducible example it's difficult to guess potential solutions to your problem. Is this what you're hoping to achieve? If not, what do you want to change?
library(ggplot2)
library(scales)
x <- seq(1:10)
y <- seq(1:10)
df <- expand.grid(x = x, y = y)
df$z <- signif(c(runif(50, -10, 0), runif(50, 0, 10)), 2)
df$z_categorised <- cut(df$z, c(seq(-10, -1, 1), seq(1, 10, 1)))
palette_red_blue <- colorRampPalette(colors = c("#B2182B","white", "#2166AC"))
ggplot(df, aes(x = x, y = y, fill = z_categorised)) +
geom_tile(color = "white") +
geom_text(aes(label = z)) +
scale_fill_manual(values = palette_red_blue(19)) +
scale_x_continuous(breaks = seq(0, 10, 1),
labels = percent_format(scale = 10)) +
scale_y_continuous(breaks = seq(0, 10, 1),
labels = percent_format(scale = 10)) +
coord_cartesian(expand = 0)
Created on 2022-06-21 by the reprex package (v2.0.1)

ggplot time series: messed up x axis - 2

This is modified version of this question.
I need to create time series plot for 2 lines for the following data:
# Creating data set
year <- c(rep(2018,4), rep(2019,4), rep(2020,4))
month_1 <- c(2, 3, 7, 8, 6, 10, 11, 12, 5, 7, 8, 12)
avg_dlt_calc <- c(10, 20, 11, 21, 13, 7, 10, 15, 9, 14, 16, 32)
avg_dlt_standard <- c(rep(9,12))
data_to_plot <- data.frame(cbind(year,month_1,avg_dlt_calc,avg_dlt_standard ))
data_to_plot$month_1 <- factor(data_to_plot$month_1, levels=unique(data_to_plot$month_1))
ggplot(data_to_plot,aes(x = as.factor(month_1))) +
geom_line(aes(y = avg_dlt_calc, group = year, colour = "DLT Calculated"), size = 0.5) +
geom_line(aes(y = avg_dlt_standard, group = year, colour = "DLT standard"), size = 0.5) +
geom_point(aes(y = avg_dlt_calc, colour = "DLT Calculated")) +
scale_x_discrete(name = "months", limits = data_to_plot$month_1) +
facet_grid(~year, scales = "free")+
scale_color_manual(name="",
labels = c("DLT Calculated",
"DLT standard"),
values = c( "blue",
"red")) +
theme(legend.position="top",
legend.text = element_text(size = 8))
s = data_to_plot$month_1) +
facet_grid(~year, scales = "free")+
But x-axis looks wrong:
If to plot data without this line:
data_to_plot$month_1 <- factor(data_to_plot$month_1, levels=unique(data_to_plot$month_1))
Then x-axis will still be messy:
I am setting limits for x-axis, but looks like it is not working.
How can I fix it?
I've skipped some lines and features of your plot, but in essence, this is what needs to be changed:
ggplot(data_to_plot, aes(x=month_1))+ # no as.factor
geom_point(aes(y=avg_dlt_calc)) +
geom_line(aes(y=avg_dlt_calc)) +
geom_line(aes(y=avg_dlt_standard), colour='red') +
scale_x_continuous(breaks=1:12, limits=c(1,2)) + # do *not* use scale_x_discrete,
# your x-axis is *continuous*; use breaks-argument to set the ticks.
# note, limits should only have 2 values - upper and lower limit.
facet_grid(~year)
In your code, you used limits = data_to_plot$month_1, but ggplot2 only used the 2 first elements of month_1 - it did not interpret it as a set of acceptable values.

How to add a point on the y-intercept (y-axis) using ggplot2

I have a scatter plot where the y-axis scaling changes at a certain point to plot data with some extreme values. I'm trying to add some sort of visual cue on the y-axis that indicates that the scaling changes at the point.
Here's an example of a plot
library(scales)
library(ggplot2)
set.seed(104)
ggdata <- data.frame('x' = rep('a',100),
'y' = c(runif(90, 0, 20), runif(10, 90, 100)))
transformation <- trans_new(
"my_transformation",
transform = function(x) ifelse(x <= 30, x / 5, (x - 30) / 20 + 30 / 5),
inverse = function(x) ifelse(x <= 30 / 5, x * 5, (x - 30 / 5) * 20 + 30)
)
ggplot(data = ggdata) +
geom_jitter(aes(x = x, y = y)) +
scale_y_continuous(trans = transformation, breaks = c(0, 10, 20, 30, 50, 70, 90, 110))
I want to add some marker to "tick 30" on y axis for scale change.
I was thinking of adding a double tick on the axis, but there is no linetype that looks like a double line. The product should look something like this. I'm aware of transforms like scale_y_log10, but I'd rather work with custom scaling that dynamically changes with the data.
EDIT: per #Tjebo's suggestion, I used annotate to add a "=" to the y axis breakpoint:
library(scales)
library(ggplot2)
set.seed(104)
ggdata <- data.frame('x' = rep('a',100),
'y' = c(runif(90, 0, 20), runif(10, 90, 100)))
transformation <- trans_new(
"my_transformation",
transform = function(x) ifelse(x <= 30, x / 5, (x - 30) / 20 + 30 / 5),
inverse = function(x) ifelse(x <= 30 / 5, x * 5, (x - 30 / 5) * 20 + 30)
)
mybreaks <- c(0, 10, 20, 30, 50, 70, 90, 110)
tick_linetype <- rep("solid", length(mybreaks))
tick_linetype[4] <- "blank"
ggplot(data = ggdata) +
geom_jitter(aes(x = x, y = y)) +
annotate(geom = "point", shape = "=", x = -Inf, y = 30, size = 3) +
scale_y_continuous(trans = transformation, breaks = mybreaks) +
theme(axis.ticks.y = element_line(linetype = tick_linetype)) +
coord_cartesian(clip = 'off')
I was thinking of adding a double tick on the axis, but there is no
linetype that looks like a double line.
You can use any character as point shape. Also an equal sign, or back slash, etc.
For example:
library(scales)
library(ggplot2)
set.seed(104)
ggdata <- data.frame('x' = rep('a',100),
'y' = c(runif(90, 0, 20), runif(10, 90, 100)))
transformation <- trans_new(
"my_transformation",
transform = function(x) ifelse(x <= 30, x / 5, (x - 30) / 20 + 30 / 5),
inverse = function(x) ifelse(x <= 30 / 5, x * 5, (x - 30 / 5) * 20 + 30)
)
ggplot(data = ggdata) +
geom_jitter(aes(x = x, y = y)) +
annotate(geom = "point", shape = "=", x = -Inf, y = 30, size = 8, color = 'red') +
scale_y_continuous(trans = transformation, breaks = c(0, 10, 20, 30, 50, 70, 90, 110))+
coord_cartesian(clip = 'off')
I removed the clipping, but you can also leave it. The color was just chosen for highlighting.
Or, even better, use text annotation. You can then also change the angle - kind of nice.
ggplot(data = ggdata) +
geom_jitter(aes(x = x, y = y)) +
annotate(geom = "text", label = "=", x = -Inf, y = 30, size = 8, color = "red", angle = 45) +
scale_y_continuous(trans = transformation, breaks = c(0, 10, 20, 30, 50, 70, 90, 110)) +
coord_cartesian(clip = "off")
Created on 2020-04-21 by the reprex package (v0.3.0)
I cannot get the exact look that you linked to, but perhaps some of these ideas are useful to you.
You can make your specified value a minor break, and add a line only to minor breaks (here I was unable to pick the exact value of 20, since that was already a major break, but perhaps you can play around with the numbers to get something you like):
ggplot(data = ggdata) +
geom_jitter(aes(x = x, y = y)) +
scale_y_continuous(trans = transformation, minor_breaks=20.05,breaks = c(0, 10,20, 30, 50, 70, 90, 110))+
theme(
panel.grid.minor.y = element_line(1)
)
Another option is to change the labels themselves. Here I have bolded and wrapped in () the 20 value, but you can add other symbols as well:
ggplot(data = ggdata) +
geom_jitter(aes(x = x, y = y)) +
scale_y_continuous(trans = transformation, minor_breaks = c(0, 10, 20, 30, 50, 70, 90, 110),
breaks = c(0, 10, 20, 30, 50, 70, 90, 110), labels=c(0, 10, expression(bold(("20"))), 30, 50, 70, 90, 110))
You can add a segment to the plot, which here isn't the prettiest option since the x axis isn't continuous, but perhaps it will spur ideas:
ggplot(data = ggdata) +
geom_jitter(aes(x = x, y = y)) +
scale_y_continuous(trans = transformation, breaks = c(0, 10, 20, 30, 50, 70, 90, 110))+
geom_segment(aes(x=-.01,y=19.5,xend=.01,yend=20.5),size=1.5)
Perhaps you could also just shade the bottom (or top) portion of your plot:
ggplot(data = ggdata,aes(x = x, y = y)) +
geom_jitter() +
scale_y_continuous(trans = transformation,breaks = c(0, 10,20, 30, 50, 70, 90, 110))+
annotate("rect", xmin = .4, xmax = 1.6, ymin = 0, ymax = 21,
alpha = .2)
This solution should help with how you want your axis to look like. FWIW I would like to caution against breaking axes unless you explicitly tell your audience about them. In the code below I created two plots, one is for the data below 30 and the other data is for the extreme points (and remove its x axis and labels). Then I use plot.margin to set the plots margins so that they overlap a bit when I put them in a grid.arrange. You might have to mess with the margins to get the labels to line up.
library(scales)
library(ggplot2)
library(gridExtra)
set.seed(104)
ggdata <- data.frame('x' = rep('a',100),
'y' = c(runif(90, 0, 20), runif(10, 90, 100)))
p1 <- ggplot(data = ggdata) +
geom_jitter(aes(x = x, y = y)) +
scale_y_continuous(breaks = seq(0,30,5), limits = c(0,30))+
theme(plot.margin=unit(c(0,.83,0,1), "cm"))
p2 <- ggplot(data = ggdata) +
geom_jitter(aes(x = x, y = y)) +
scale_y_continuous( breaks = seq(60,100,10), limits = c(60,100)) +
scale_x_discrete()+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
plot.margin=unit(c(0,1,-0.1,1), "cm"))
grid.arrange(p2,p1)

How to add subtext to axes in ggplot2 R

For the main y-axis and x-axis, I have generic titles like "Tank's Ratio" and "Counts". I want a second line of label where I specify the ratio and counts. eg. Just below "Tank's Ratio" I want "# in water/# in sand" in a smaller font but along the y-axis. Similarly for the x-axis.
Here is the basic code
data <- data.frame(set = c(1, 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 4), density = c(1, 3, 3, 1, 3, 1, 1, 1, 3, 3, 1, 3), counts = c(100, 2, 3, 76, 33, 12, 44, 13, 54, 36, 65, 1), ratio = c(1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 90, 1))
library(ggplot2)
ggplot(data, aes(x = counts, y = ratio)) +
geom_point() +
ylab("Tank's Ratio") +
xlab("Counts")
You can add x and main titles.
EDIT: This is ridiculously slooow!
#library(extrafont)
#loadfonts(dev="win")
library(tidyverse)
data %>%
ggplot(aes(x=counts, y=ratio)) + geom_point() +
labs(y=expression(atop(bold("Tank's Ratio"),atop(italic("#in water #in sand")))))+
theme_minimal()+
theme(axis.title.y = element_text(size=15,family="Comic Sans MS"))
ORIGINAL:
library(tidyverse)
data %>%
ggplot(aes(x=counts, y=ratio)) + geom_point() +
labs(y="Tank's Ratio \n #in Water#in sand")
It's not the most elegant solution, but hope it helps:
library(ggplot2)
library(gridExtra)
library(grid)
First, create plot without ylab:
g <- ggplot(data, aes(x = counts, y = ratio)) +
geom_point() +
ylab("") +
xlab("Counts")
Then add subtitle for both axis:
g2 <- grid.arrange(g,
bottom = textGrob("in water/ # in sand",
x = 0.55, y = 1, gp = gpar(fontsize = 9)),
left = textGrob("in water/ # in sand", rot = 90,
x = 1.5, gp = gpar(fontsize = 9)))
And finally, add description of y-axis
grid.arrange(g2,
left = textGrob("Tank's Ratio", rot = 90,
x = 1.7, gp = gpar(fontsize = 12)))
You could use the following code, defining the margins, the axis titles and sub-titles yourself:
We use theme to increase the bottom and left margin, and to suppress the automatically generated axis titles.
We use annotate to generate the text that serves as axis title and sub-title, if necessary, the text is rotated.
We generate the plot, turn it in a grob, and with this grob we can turn of clipping, and show the plot.
g1 <- ggplot(data = data, aes(x = counts, y = ratio, group = 1)) +
geom_point() +
## increase margin size for left and bottom and
## remove the axis titles
theme(plot.margin = unit(c(1, 1, 4, 4), "lines"),
axis.title.y = element_blank(),
axis.title.x = element_blank() ) +
## define the plotting area to NOT include the annotations
coord_cartesian(xlim = c(0, 100), ylim= c(0, 100), expand = FALSE) +
## annotate y axis
annotate(geom = "text", x = -9, y = 50, label = "Tank's Ratio", angle = 90, size = 5) +
annotate(geom = "text", x = -5, y = 50, label = "#in water/#in sand", angle = 90, size = 4) +
## annotate x axis
annotate(geom = "text", x = 50, y = -5, label = "Counts", size = 5) +
annotate(geom = "text", x = 50, y = -9, label = "#in water/#in sand", size = 4)
## turn off clipping for axis extra labels
g2 <- ggplot_gtable(ggplot_build(g1))
g2$layout$clip[g2$layout$name == "panel"] <- "off"
grid::grid.draw(g2)
This yields the following picture:
Please let me know whether this is what you want.

How to create different colours for the same group between geom_smooth and geom_point?

I would like to have to colours in the same same group. It would also be nice to select line type.
I would like the colour to be a bit different in the same group in geom_point for a given line type than the points plotted with geom_point. I would like to have the line for a given given group be different from the points. How would I go about doing this?
I have created some sample data.
Note: I'm getting errors when I try to use linetype in geom_smooth().
#test data
obs=rep(1:3, each=30)
length(obs)
set.seed(50)
x=sample(seq(from = 20, to = 50, by = 5), size = 90, replace = TRUE)
y=sample(seq(from = 200, to = 500, by = 5), size = 90, replace = TRUE)
df = data.frame(obs,x,y)
ggplot(df, aes(x, y, color = factor(obs)))+
geom_point()+
theme(legend.position="bottom")+
scale_x_continuous(breaks = seq(0, 50, by = 4),expand = c(0, 0), labels = comma_format())+
scale_y_continuous(breaks = seq(0, 500, by = 10),limits = c(0, 500),expand = c(0, 0), labels = comma_format())+
geom_smooth(aes(group=obs), method="lm")+
scale_colour_manual(values = c("wheat3", "slategray1","dimgray"),name = "Average Density Band:")
I'm not sure this is a particularly good idea as you lose a visual clue as to what points are connected to what regression line, however the following does work for me. Essentially I took the colour aesthetic out of the ggplot() call and pass it to geom_point() and geom_smooth() individually.
library(ggplot2)
library(scales)
#test data
obs=rep(1:3, each=30)
length(obs)
set.seed(50)
x=sample(seq(from = 20, to = 50, by = 5), size = 90, replace = TRUE)
y=sample(seq(from = 200, to = 500, by = 5), size = 90, replace = TRUE)
df = data.frame(obs,x,y)
ggplot(df, aes(x, y))+
geom_point(color = factor(obs))+
theme(legend.position="bottom")+
scale_x_continuous(breaks = seq(0, 50, by = 4),expand = c(0, 0), labels = comma_format())+
scale_y_continuous(breaks = seq(0, 500, by = 10),limits = c(0, 500),expand = c(0, 0), labels = comma_format())+
geom_smooth(aes(group=obs, color = factor(obs)), method="lm")+
scale_colour_manual(values = c("orange", "yellow","blue"),name = "Average Density Band:")
I changed your line colours as I couldn't see them with my eyesight.
I suspect this isn't meant to work like this and to be frank I'm not 100% sure as to why it does.

Resources