R ggplot Line With Vertical Bands - r

I wish to create a plot as the above with data such as this,
data1=data.frame("School"=c(1,2,3,4,5,6,7,8,9,10),
"Score"=c(80,64,79,64,64,89,69,71,61,98),
"ScoreLow"=c(65,62,62,60,60,84,54,55,55,69),
"ScoreHigh"=c(98,79,85,97,88,95,97,90,79,99))
The blue line is 'Score' and score is on the Y-AXIS and 'SChool' is on the X-AXIS. The length of the black line gets determined from 'ScoreLow' and 'ScoreHigh'

geom_errorbar would also work, in case you want to add some ticks at the edges (or leave them out, setting width=0, as below):
library(ggplot2)
data1=data.frame("School"=c(1,2,3,4,5,6,7,8,9,10),
"Score"=c(80,64,79,64,64,89,69,71,61,98),
"ScoreLow"=c(65,62,62,60,60,84,54,55,55,69),
"ScoreHigh"=c(98,79,85,97,88,95,97,90,79,99))
ggplot(data1, aes(x=School, y=Score)) + geom_line(colour="#507bc7", size=2)+
geom_errorbar(aes(ymin=ScoreLow, ymax=ScoreHigh), width=0, col="black", size=1.5) +
theme_minimal()
Created on 2020-04-10 by the reprex package (v0.3.0)

I think you are looking for a combination of geom_line() and geom_segment().
library(ggplot2)
ggplot(data1) +
geom_line(aes(x = School, y = Score), color = "blue", size = 1.5) +
geom_segment(aes(x = School, xend = School, y = ScoreLow, yend = ScoreHigh), size = 2) +
scale_x_continuous(breaks = 1:10) +
scale_y_continuous(limits = c(0, 100), breaks = 0:10 * 10) +
theme_minimal()
Need to probably play around a bit to get it how you want it.

Related

Bunched up x axis ticks on multi panelled plot in ggplot

I am attempting to make a multi-panelled plot from three individual plots (see images).However, I am unable to rectify the bunched x-axis tick labels when the plots are in the multi-panel format. Following is the script for the individual plots and the multi-panel:
Individual Plot:
NewDat [[60]]
EstRes <- NewDat [[60]]
EstResPlt = ggplot(EstRes,aes(Distance3, `newBa`))+geom_line() + scale_x_continuous(n.breaks = 10, limits = c(0, 3500))+ scale_y_continuous(n.breaks = 10, limits = c(0,25))+ xlab("Distance from Core (μm)") + ylab("Ba:Ca concentration(μmol:mol)") + geom_hline(yintercept=2.25, linetype="dashed", color = "red")+ geom_vline(xintercept = 1193.9, linetype="dashed", color = "grey")+ geom_vline(xintercept = 1965.5, linetype="dashed", color = "grey") + geom_vline(xintercept = 2616.9, linetype="dashed", color = "grey") + geom_vline(xintercept = 3202.8, linetype="dashed", color = "grey")+ geom_vline(xintercept = 3698.9, linetype="dashed", color = "grey")
EstResPlt
Multi-panel plot:
MultiP <- grid.arrange(MigrPlt,OcResPlt,EstResPlt, nrow =1)
I have attempted to include:
MultiP <- grid.arrange(MigrPlt,OcResPlt,EstResPlt, nrow =1)+
theme(axis.text.x = element_text (angle = 45)) )
MultiP
but have only received errors. It's not necessary for all tick marks to be included. An initial, mid and end value is sufficient and therefore they would not need to all be included or angled. I'm just not sure how to do this. Assistance would be much appreciated.
There are several options to resolve the crowded axes. Let's consider the following example which parallels your case. The default labelling strategy wouldn't overcrowd the x-axis.
library(ggplot2)
library(patchwork)
library(scales)
df <- data.frame(
x = seq(0, 3200, by = 20),
y = cumsum(rnorm(161))
)
p <- ggplot(df, aes(x, y)) +
geom_line()
(p + p + p) / p &
scale_x_continuous(
name = "Distance (um)"
)
However, because you've given n.breaks = 10 to the scale, it becomes crowded. So a simple solution would just be to remove that.
(p + p + p) / p &
scale_x_continuous(
n.breaks = 10,
name = "Distance (um)"
)
Alternatively, you could convert the micrometers to millimeters, which makes the labels less wide.
(p + p + p) / p &
scale_x_continuous(
n.breaks = 10,
labels = label_number(scale = 1e-3, accuracy = 0.1),
name = "Distance (mm)"
)
Yet another alternative is to put breaks only every n units, in the case below, a 1000. This happens to coincide with omitting n.breaks = 10 by chance.
(p + p + p) / p &
scale_x_continuous(
breaks = breaks_width(1000),
name = "Distance (um)"
)
Created on 2021-11-02 by the reprex package (v2.0.1)
I thought it would be better to show with an example.
What I mean was, you made MigrPlt, OcResPlt, EstResPlt each with ggplot() +...... For plot that you want to rotate x axis, add + theme(axis.text.x = element_text (angle = 45)).
For example, in iris data, only rotate x axis text for a like
a <- ggplot(iris, aes(Sepal.Width, Sepal.Length)) +
geom_point() +
theme(axis.text.x = element_text (angle = 45))
b <- ggplot(iris, aes(Petal.Width, Petal.Length)) +
geom_point()
gridExtra::grid.arrange(a,b, nrow = 1)

Calculate axis tick locations based on data in faceted plot

I have an issue where I would like to calculate locations of y-axis labels in a large plot mad with facet_grid(). Let me show you what I mean using the mpg dataset.
require(tidyverse)
ggplot(mpg, aes(x = displ)) +
geom_point(aes(y = hwy)) +
facet_grid(drv ~ class, scales = "free")
You will notice that both axes use variable numbers of labels. Ignoring problems with the x-axis, I am interested in labelling only three values on the y-axis: 0, half of max value, and max value. It makes sense in my use-case, so here is what I tried.
ggplot(mpg, aes(x = displ)) +
geom_point(aes(y = hwy)) +
facet_grid(drv ~ class, scales = "free") +
geom_blank(aes(y = 0)) + # extends y-axis to 0
scale_y_continuous(expand = expansion(mult = c(0, 0.1)), # prevents ggplot2 from extending beyond y = 0
n.breaks = 3) # Three axis labels, please.
The plot correctly starts at y = 0 and labels it correctly. However, remaining labels are poorly assigned, and have labels n = 2 and n = 4 instead of n = 3 for some reason.
If I could only directly calculate the label positions!
ggplot(mpg, aes(x = displ)) +
geom_point(aes(y = hwy)) +
facet_grid(drv ~ class, scales = "free") +
geom_blank(aes(y = 0)) +
scale_y_continuous(expand = expansion(mult = c(0, 0.1)),
n.breaks = 3,
breaks = c(0, 0.5*max(hwy), 1*max(hwy))) # Maybe these formulas works?
Error in check_breaks_labels(breaks, labels) : object 'hwy' not found
I believe providing break points by this way should work, but that my syntax is bad. How do I access and work with the data underlying the plot? Alternatively, if this doesn't work, can I manually specify y-axis labels for each row of panels?
I could really use some assistance here, please.
If you want custom rules for breaks, the easiest thing is to use a function implementing those rules given the (panel) limits.
Below an example for labeling 0, the max and half-max.
library(ggplot2)
ggplot(mpg, aes(x = displ)) +
geom_point(aes(y = hwy)) +
facet_grid(drv ~ class, scales = "free") +
scale_y_continuous(expand = expansion(mult = c(0, 0.1)),
limits = c(0, NA), # <- fixed min, flexible max. replaces geom_blank
breaks = function(x){c(x[1], mean(x), x[2])})
You can remove scales free: Do you get then what you desire?
require(tidyverse)
ggplot(mpg, aes(x = displ)) +
geom_point(aes(y = hwy)) +
facet_grid(drv ~ class)

How to colour background on a scatterplot using ggplot but still show data points in R?

This is my first question here so hope this makes sense and thank you for your time in advance!
I am trying to generate a scatterplot with the data points being the log2 expression values of genes from 2 treatments from an RNA-Seq data set. With this code I have generated the plot below:
ggplot(control, aes(x=log2_iFGFR1_uninduced, y=log2_iFGFR4_uninduced)) +
geom_point(shape = 21, color = "black", fill = "gray70") +
ggtitle("Uninduced iFGFR1 vs Uninduced iFGFR4 ") +
xlab("Uninduced iFGFR1") +
ylab("Uninduced iFGFR4") +
scale_y_continuous(breaks = seq(-15,15,by = 1)) +
scale_x_continuous(breaks = seq(-15,15,by = 1)) +
geom_abline(intercept = 1, slope = 1, color="blue", size = 1) +
geom_abline(intercept = 0, slope = 1, colour = "black", size = 1) +
geom_abline(intercept = -1, slope = 1, colour = "red", size = 1) +
theme_classic() +
theme(plot.title = element_text(hjust=0.5))
Current scatterplot:
However, I would like to change the background of the plot below the red line to a lighter red and above the blue line to a lighter blue, but still being able to see the data points in these regions. I have tried so far by using polygons in the code below.
pol1 <- data.frame(x = c(-14, 15, 15), y = c(-15, -15, 14))
pol2 <- data.frame(x = c(-15, -15, 14), y = c(-14, 15, 15))
ggplot(control, aes(x=log2_iFGFR1_uninduced, y=log2_iFGFR4_uninduced)) +
geom_point(shape = 21, color = "black", fill = "gray70") +
ggtitle("Uninduced iFGFR1 vs Uninduced iFGFR4 ") +
xlab("Uninduced iFGFR1") +
ylab("Uninduced iFGFR4") +
scale_y_continuous(breaks = seq(-15,15,by = 1)) +
scale_x_continuous(breaks = seq(-15,15,by = 1)) +
geom_polygon(data = pol1, aes(x = x, y = y), color ="pink1") +
geom_polygon(data = pol2, aes(x = x, y = y), color ="powderblue") +
geom_abline(intercept = 1, slope = 1, color="blue", size = 1) +
geom_abline(intercept = 0, slope = 1, colour = "black", size = 1) +
geom_abline(intercept = -1, slope = 1, colour = "red", size = 1) +
theme_classic() +
theme(plot.title = element_text(hjust=0.5))
New scatterplot:
However, these polygons hide my data points in this area and I don't know how to keep the polygon color but see the data points as well. I have also tried adding "fill = NA" to the geom_polygon code but this makes the area white and only keeps a colored border. Also, these polygons shift my axis limits so how do I change the axes to begin at -15 and end at 15 rather than having that extra unwanted length?
Any help would be massively appreciated as I have struggled with this for a while now and asked friends and colleagues who were unable to help.
Thanks,
Liv
Your question has two parts, so I'll answer each in turn using a dummy dataset:
df <- data.frame(x=rnorm(20,5,1), y=rnorm(20,5,1))
Stop geom_polygon from hiding geom_point
Stefan had commented with the answer to this one. Here's an illustration. Order of operations matters in ggplot. The plot you create is a result of each geom (drawing operation) performed in sequence. In your case, you have geom_polygon after geom_point, so it means that it will plot on top of geom_point. To have the points plotted on top of the polygons, just have geom_point happen after geom_polygon. Here's an illustrative example:
p <- ggplot(df, aes(x,y)) + theme_bw()
p + geom_point() + xlim(0,10) + ylim(0,10)
Now if we add a geom_rect after, it hides the points:
p + geom_point() +
geom_rect(ymin=0, ymax=5, xmin=0, xmax=5, fill='lightblue') +
xlim(0,10) + ylim(0,10)
The way to prevent that is to just reverse the order of geom_point and geom_rect. It works this way for all geoms.
p + geom_rect(ymin=0, ymax=5, xmin=0, xmax=5, fill='lightblue') +
geom_point() +
xlim(0,10) + ylim(0,10)
Removing whitespace between the axis and limits of the axis
The second part of your question asks about how to remove the white space between the edges of your geom_polygon and the axes. Notice how I have been using xlim and ylim to set limits? It is a shortcut for scale_x_continuous(limits=...) and scale_y_continuous(limits=...); however, we can use the argument expand= within scale_... functions to set how far to "expand" the plot before reaching the axis. You can set the expand setting for upper and lower axis limits independently, which is why this argument expects a two-component number vector, similar to the limits= argument.
Here's how to remove that whitespace:
p + geom_rect(ymin=0, ymax=5, xmin=0, xmax=5, fill='lightblue') +
geom_point() +
scale_x_continuous(limits=c(0,10), expand=c(0,0)) +
scale_y_continuous(limits=c(0,10), expand=c(0,0))

ground geom_text to x axis (e.g. y =0)

I have a graph made in ggplot that looks like this:
I wish to have the numeric labels at each of the bars to be grounded/glued to the x axis where y <= 0.
This is the code to generate the graph as such:
ggplot(data=df) +
geom_bar(aes(x=row, y=numofpics, fill = crop, group = 1), stat='identity') +
geom_point(data=df, aes(x = df$row, y=df$numofparcels*50, group = 2), alpha = 0.25) +
geom_line(data=df, aes(x = df$row, y=df$numofparcels*50, group = 2), alpha = 0.25) +
geom_text(aes(x=row, y=numofpics, label=bbch)) +
geom_hline(yintercept=300, linetype="dashed", color = "red", size=1) +
scale_y_continuous(sec.axis= sec_axis(~./50, name="Number of Parcels")) +
scale_x_discrete(name = c(),breaks = unique(df$crop), labels = as.character(unique(df$crop)))+
labs(x=c(), y="Number of Pictures")
I've tried vjust and experimenting with position_nudge for the geom_text element, but every solution I can find changes the position of each element of the geom_text respective to its current position. As such everything I try results in situation like this one:
How can I make ggplot ground the text to the bottom of the x axis where y <= 0, possibly with the possibility to also introduce a angle = 45?
Link to dataframe = https://drive.google.com/file/d/1b-5AfBECap3TZjlpLhl1m3v74Lept2em/view?usp=sharing
As I said in the comments, just set the y-coordinate of the text to 0 or below, and specify the angle : geom_text(aes(x=row, y=-100, label=bbch), angle=45)
I'm behind a proxy server that blocks connections to google drive so I can't access your data. I'm not able to test this, but I would introduce a new label field in my dataset that sets y to be 0 if y<0:
df <- df %>%
mutate(labelField = if_else(numofpics<0, 0, numofpics)
I would then use this label field in my geom_text call:
geom_text(aes(x=row, y=labelField, label=bbch), angle = 45)
Hope that helps.
You can simply define the y-value in geom_text (e.g. -50)
ggplot(data=df) +
geom_bar(aes(x=row, y=numofpics, fill = crop, group = 1), stat='identity') +
geom_point(data=df, aes(x = df$row, y=df$numofparcels*50, group = 2), alpha = 0.25) +
geom_line(data=df, aes(x = df$row, y=df$numofparcels*50, group = 2), alpha = 0.25) +
geom_text(aes(x=row, y=-50, label=bbch)) +
geom_hline(yintercept=300, linetype="dashed", color = "red", size=1) +
scale_y_continuous(sec.axis= sec_axis(~./50, name="Number of Parcels")) +
scale_x_discrete(name = c(),breaks = unique(df$crop), labels =
as.character(unique(df$crop)))+
labs(x=c(), y="Number of Pictures")

Line colour of ggplot looks very weird when plotting a large number of data points

I'm trying to plot a line with ggplot, however the colour of the line looks very weird. At some points it looks much thicker than at other points. Is this maybe because I have too many data points? How can this be solved?
I only have this problem with large datasets, the data for the plot in this example can be found here
ggplot(data=largedataframe, aes(x=Time, y=Value)) +
geom_line(size=2, aes(colour='T1'))
Like Michael Bird, I got a plot that looks all right. There appears to be a slight "bloat" to the line's thickness at some places (one example circled below), but I wouldn't have noticed it if I wasn't looking for it:
Zooming all the way in until the individual data points become distinct gives a clearer view:
p1 <- ggplot(df, aes(x = Time, y = Value)) +
geom_point(color = "salmon", size = 1) +
coord_cartesian(xlim = c(1000000, 1300000),
ylim = c(0.75, 0.8))
p2 <- ggplot(df, aes(x = Time, y = Value)) +
geom_point(color = "salmon", size = 1) +
coord_cartesian(xlim = c(1180000, 1220000),
ylim = c(0.76, 0.79))
p3 <- ggplot(df, aes(x = Time, y = Value)) +
geom_point(color = "salmon", size = 0.1) +
coord_cartesian(xlim = c(1207000, 1211000),
ylim = c(0.769, 0.774))
p4 <- ggplot(df, aes(x = Time, y = Value)) +
geom_line() +
geom_point(color = "salmon", size = 1) +
coord_cartesian(xlim = c(1207700, 1208000),
ylim = c(0.7695, 0.7705))
gridExtra::grid.arrange(p1, p2, p3, p4, nrow = 2)
There are points where the y value zig-zags slightly, which would account for a slightly thicker line at those points.
Personally I don't think it's a major problem, but if you're looking for an explanation why some line segments look thicker, this could account for it.

Resources