How to use hjust to move the label on only one plot? - r

I am creating a multipanel graph using plot_grid of the relationship of discharge and surface area with species richness in Amazon Rivers. Here is my data:
riverssub<-structure(list(richness = c(127L, 110L, 89L, 74L, 62L, 18L, 22L,
71L, 38L, 91L, 56L, 39L, 90L, 37L, 147L, 53L, 92L, 207L, 52L,
126L, 79L, 32L, 100L, 181L, 83L, 690L), surface = c(33490, 4410,
770, 164.7, 288.5, 9.85, 33.1, 750, 46.9, 970, 85.2, 39.2, 780,
97.3, 3983.71, 220, 500, 11250, 115, 1350, 278, 23.05, 310, 2050,
560, 34570), disch = c(2640L, 687L, 170L, 353L, 384L, 16L, 31L,
513L, 32L, 392L, 50L, 32L, 206L, 81L, 1260L, 104L, 220L, 6100L,
308L, 2060L, 443L, 102L, 348L, 4758L, 913L, 40487L)), class = "data.frame", row.names = c(NA,
-26L))
Here is the code for my graphs and multiplot:
library(cowplot)
a <- ggplot(data = riverssub, aes(x = surface , y = richness)) +
geom_point() +
scale_y_log10() +
scale_x_log10() +
labs(x='Surface Area (100 km\u00b2)', y="Fish Species Richness") +
theme_bw()
b <- ggplot(data = riverssub, aes(x = disch , y = richness)) +
geom_point() +
scale_y_log10() +
scale_x_log10() +
labs(x=bquote('Mean Annual Discharge'~(m^3 * s^-1)), y=" ") +
theme_bw()
plot_grid(a, b + theme(axis.text.y = element_blank()),
nrow = 1, align = 'h', labels="AUTO", label_y=0.97, label_x=0.1)
I want the "A" label to be in the same position on the first plot as the "B" label is on the second plot. I know I can use hjust() within plot_grid() to achieve this, although I am unsure how to do it. Can anyone help? Thanks in advance.

Instead of fiddling around with hjust to place the labels I would suggest to add the labels on the plots before aligning them via plot_grid as already suggested by #Guillaume in his comment. One option to do so and to ensure that the labels will be put on the same relative positions would be to make use annotation_custom:
library(cowplot)
library(ggplot2)
library(magrittr)
a <- ggplot(data = riverssub, aes(x = surface, y = richness)) +
geom_point() +
scale_y_log10() +
scale_x_log10() +
labs(x = "Surface Area (100 km\u00b2)", y = "Fish Species Richness") +
theme_bw()
b <- ggplot(data = riverssub, aes(x = disch, y = richness)) +
geom_point() +
scale_y_log10() +
scale_x_log10() +
labs(x = bquote("Mean Annual Discharge" ~ (m^3 * s^-1)), y = " ") +
theme_bw() +
theme(axis.text.y = element_blank())
list(A = a, B = b) %>%
purrr::imap(function(x, y) x + annotation_custom(grid::textGrob(label = y, x = .05, y = .97, gp = grid::gpar(fontface = "bold")))) %>%
plot_grid(plotlist = ., nrow = 1, align = "h")

Related

20 panel graphs from ggplot have shifted values

I am trying to make a graph with 20 panels so as to be able to graph 20 y-values and 1 x-value individually. The x-axis are days of July to October and y-axis values are temperatures for each of the years 1996 to 2015.
The 20 panels that I am getting have shifted temp values for July-October months of each year (see attached and do not show all the 20 years but only for 10 years (1996-2006).
The output of dput(head(atlanta_temp_data)) is as following
structure(list(DAY = structure(c(19539, 19540, 19541, 19542, 19543, 19544), class = "Date"), X1996 = c(98L, 97L, 97L, 90L,
89L, 93L), X1997 = c(86L, 90L, 93L, 91L, 84L, 84L), X1998 = c(91L,
88L, 91L, 91L, 91L, 89L), X1999 = c(84L, 82L, 87L, 88L, 90L,
91L), X2000 = c(89L, 91L, 93L, 95L, 96L, 96L), X2001 = c(84L,
87L, 87L, 84L, 86L, 87L), X2002 = c(90L, 90L, 87L, 89L, 93L,
93L), X2003 = c(73L, 81L, 87L, 86L, 80L, 84L), X2004 = c(82L,
81L, 86L, 88L, 90L, 90L), X2005 = c(91L, 89L, 86L, 86L, 89L,
82L), X2006 = c(93L, 93L, 93L, 91L, 90L, 81L), X2007 = c(95L,
85L, 82L, 86L, 88L, 87L), X2008 = c(85L, 87L, 91L, 90L, 88L,
82L), X2009 = c(95L, 90L, 89L, 91L, 80L, 87L), X2010 = c(87L,
84L, 83L, 85L, 88L, 89L), X2011 = c(92L, 94L, 95L, 92L, 90L,
90L), X2012 = c(105L, 93L, 99L, 98L, 100L, 98L), X2013 = c(82L,
85L, 76L, 77L, 83L, 83L), X2014 = c(90L, 93L, 87L, 84L, 86L,
87L), X2015 = c(85L, 87L, 79L, 85L, 84L, 84L)), row.names = c(NA,
6L), class = "data.frame")
Please help to fix it.
Following is the R-code that I am using,
rm(list = ls())
library(data.table)
library(ggplot2)
set.seed(20)
atlanta_temp_data=read.table("temps.txt",header=TRUE)
atlanta_temp_data$DAY=as.Date(atlanta_temp_data$DAY,format="%d-%b")
class(atlanta_temp_data$DAY)
atlanta_temp_data$Year = rep(1996:2015, each=12)[1:nrow(atlanta_temp_data)]
ggplot(atlanta_temp_data, aes(DAY, group = Year, color = Year)) +
geom_line(aes(y = X1996, color = "X1996"))+
geom_line(aes(y = X1997, color = "X1997")) +
geom_line(aes(y = X1998, color = "X1998")) +
geom_line(aes(y = X1999, color = "X1999")) +
geom_line(aes(y = X2000, color = "X2000")) +
geom_line(aes(y = X2001, color = "X2001")) +
geom_line(aes(y = X2002, color = "X2002")) +
geom_line(aes(y = X2003, color = "X2003")) +
geom_line(aes(y = X2004, color = "X2004")) +
geom_line(aes(y = X2005, color = "X2005")) +
geom_line(aes(y = X2006, color = "X2006")) +
geom_line(aes(y = X2007, color = "X2007")) +
geom_line(aes(y = X2008, color = "X2008")) +
geom_line(aes(y = X2009, color = "X2009")) +
geom_line(aes(y = X2010, color = "X2010")) +
geom_line(aes(y = X2011, color = "X2011")) +
geom_line(aes(y = X2012, color = "X2012")) +
geom_line(aes(y = X2013, color = "X2013")) +
geom_line(aes(y = X2014, color = "X2014")) +
geom_line(aes(y = X2015, color = "X2015")) + geom_point(aes(y = X1996, color = "X1996"))+
geom_point(aes(y = X1997, color = "X1997")) +
geom_point(aes(y = X1998, color = "X1998")) +
geom_point(aes(y = X1999, color = "X1999")) +
geom_point(aes(y = X2000, color = "X2000")) +
geom_point(aes(y = X2001, color = "X2001")) +
geom_point(aes(y = X2002, color = "X2002")) +
geom_point(aes(y = X2003, color = "X2003")) +
geom_point(aes(y = X2004, color = "X2004")) +
geom_point(aes(y = X2005, color = "X2005")) +
geom_point(aes(y = X2006, color = "X2006")) +
geom_point(aes(y = X2007, color = "X2007")) +
geom_point(aes(y = X2008, color = "X2008")) +
geom_point(aes(y = X2009, color = "X2009")) +
geom_point(aes(y = X2010, color = "X2010")) +
geom_point(aes(y = X2011, color = "X2011")) +
geom_point(aes(y = X2012, color = "X2012")) +
geom_point(aes(y = X2013, color = "X2013")) +
geom_point(aes(y = X2014, color = "X2014")) +
geom_point(aes(y = X2015, color = "X2015")) +
scale_color_discrete(name = "Year") + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+
ggtitle("Atlanta temps for July to October (1996-2015)")+labs(y = "Temperature (degF)")+facet_wrap(~Year)
library(tidyverse)
[Your data] %>%
pivot_longer(-DAY) %>%
mutate(year = parse_number(name)) %>%
ggplot(aes(DAY, value, color = year)) +
geom_point() +
facet_wrap(~year)

How to implement stacked bar graph with a line chart in R

I have a dataset containing y variable as Year and x variables as (A, B, C(%)). I have attached the dataset here.
dput(result)
structure(list(Year = 2008:2021, A = c(4L, 22L, 31L, 48L, 54L,
61L, 49L, 56L, 59L, 85L, 72L, 58L, 92L, 89L), B = c(1L, 2L, 6L,
7L, 14L, 21L, 15L, 27L, 27L, 46L, 41L, 26L, 51L, 62L), C... = c(25,
9.09, 19.35, 14.58, 25.93, 34.43, 30.61, 48.21, 45.76, 54.12,
56.94, 44.83, 55.43, 69.66)), class = "data.frame", row.names = c(NA,
-14L))
The variables A and B will be plotted as stacked bar graph and the C will be plotted as line chart in the same plot. I have generated the plot using excel like below:
How can I create the same plot in R?
You first need to reshape longer, for example with pivot_longer() from tidyr, and then you can use ggplot2 to plot the bars and the line in two separate layers. The fill = argument in the geom_bar(aes()) lets you stratify each bar according to a categorical variable - name is created automatically by pivot_longer().
library(ggplot2)
library(tidyr)
dat |>
pivot_longer(A:B) |>
ggplot(aes(x = Year)) +
geom_bar(stat = "identity", aes(y = value, fill = name)) +
geom_line(aes(y = `C(%)`), size = 2)
Created on 2022-06-09 by the reprex package (v2.0.1)
You're asking for overlaid bars, in which case there's no need to pivot, and you can add separate layers. However I would argue that this could confuse or mislead many people - usually in stacked plots bars are stacked, not overlaid, so thread with caution!
library(ggplot2)
library(tidyr)
dat |>
ggplot(aes(x = Year)) +
geom_bar(stat = "identity", aes(y = A), fill = "lightgreen") +
geom_bar(stat = "identity", aes(y = B), fill = "red", alpha = 0.5) +
geom_line(aes(y = `C(%)`), size = 2) +
labs(y = "", caption = "NB: bars are overlaid, not stacked!")
Created on 2022-06-09 by the reprex package (v2.0.1)
I propose this:
library(data.table)
library(ggplot2)
library(ggthemes)
dt <- fread("dataset.csv")
dt.long <- melt(dt, id.vars = c("Year"))
dt.AB <- dt.long[variable %in% c("A", "B"), ]
dt.C <- copy(dt.long[variable == "C(%)", .(Year, variable, value = value * 3/2)])
ggplot(dt.AB, aes(x = Year, y = value, fill = variable), ) +
geom_bar(stat = "identity") +
geom_line(data=dt.C, colour='red', aes(x = Year, y = value)) +
scale_x_continuous(breaks = pretty(dt.AB$Year,
n = length(unique(dt.AB$Year)))) +
scale_y_continuous(
name = "A&B",
breaks = seq (0, 150, 10),
sec.axis = sec_axis(~.*2/3, name="C(%)", breaks = seq (0, 100, 10))
) + theme_hc() +
scale_fill_manual(values=c("grey70", "grey50", "grey30")) +
theme(
axis.line.y = element_line(colour = 'black', size=0.5,
linetype='solid'))

Extract slope of multiple trend lines from geom_smooth()

I am trying to plot multiple trend lines (every ten years) in a time series using ggplot.
Here's the data:
dat <- structure(list(YY = 1961:2010, a = c(98L, 76L, 83L, 89L, 120L,
107L, 83L, 83L, 92L, 104L, 98L, 91L, 81L, 69L, 86L, 76L, 85L,
86L, 70L, 81L, 77L, 89L, 60L, 80L, 94L, 66L, 77L, 85L, 77L, 80L,
79L, 79L, 65L, 70L, 80L, 87L, 84L, 67L, 106L, 129L, 95L, 79L,
67L, 105L, 118L, 85L, 86L, 103L, 97L, 106L)), .Names = c("YY",
"a"), row.names = c(NA, -50L), class = "data.frame")
Here's the script:
p <- ggplot(dat, aes(x = YY))
p <- p + geom_line(aes(y=a),colour="blue",lwd=1)
p <- p + geom_point(aes(y=a),colour="blue",size=2)
p <- p + theme(panel.background=element_rect(fill="white"),
plot.margin = unit(c(0.5,0.5,0.5,0.5),"cm"),
panel.border=element_rect(colour="black",fill=NA,size=1),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
axis.text=element_text(size=15,colour="black",family="serif"),
axis.title=element_text(size=15,colour="black",family="serif"),
legend.position = "top")
p <- p + scale_x_discrete(limits = c(seq(1961,2010,5)),expand=c(0,0))
p <- p + geom_smooth(data=dat[1:10,],aes(x=YY,y=a),method="lm",se=FALSE,color="black",formula=y~x,linetype="dashed")
p <- p + geom_smooth(data=dat[11:20,],aes(x=YY,y=a),method="lm",se=FALSE,color="black",formula=y~x,linetype="dashed")
p <- p + geom_smooth(data=dat[21:30,],aes(x=YY,y=a),method="lm",se=FALSE,color="black",formula=y~x,linetype="dashed")
p <- p + geom_smooth(data=dat[31:40,],aes(x=YY,y=a),method="lm",se=FALSE,color="black",formula=y~x,linetype="dashed")
p <- p + geom_smooth(data=dat[41:50,],aes(x=YY,y=a),method="lm",se=FALSE,color="black",formula=y~x,linetype="dashed")
p <- p + labs(x="Year",y="Number of Days")
outImg <- paste0("test",".png")
ggsave(outImg,p,width=8,height=5)
This is the resulting image:
WHAT I WANT/PROBLEMS
I want to extract the slope and add them on the the trend lines in the figure. How can I extract the slope of each line from the geom_smooth()?
Currently, I am plotting the trend lines one by one. I want to know if there is an efficient way of doing this with adjustable time window. Suppose for example, I want to plot the trend lines for every 5 years. In the figure above the time window is 10.
Suppose, I only want to plot the significant trend lines (i.e., p-value < 0.05, null: no trend or slope equals 0), is it possible to implement this with geom_smooth()?
I'll appreciate any help.
So, each of these tasks are best handled before you pipe your data into ggplot2, but they are all made fairly easy using some of the other packages from the tidyverse.
Beginning with questions 1 and 2:
While ggplot2 can plot the regression line, to extract the estimated slope coefficients you need to work with the lm() object explicitly. Using group_by() and mutate(), you can add a grouping variable (my code below does this for 5 year groups just for example) and then calculate and extract just the slope estimate into columns in your existing data frame. Then those slope estimates can be plotted in ggplot using the geom_text() call. I've done this below in a quick and dirty way (placing each label at the mean of the x and y values they regress) but you can specify their exact placement in your dataframe.
Grouping variables and data prep makes question 2 a breeze too: now that you have the grouping variables explicitly in your dataframe there is no need to plot one by one, geom_smooth() accepts the group aesthetic.
Additionally, to answer question 3, you can extract the pvalue from the summary of your lm objects and filter out only those that are significant to the level you care about. If you pass this now complete dataframe to geom_smooth() and geom_text() you will get the plot you're looking for!
library(tidyverse)
# set up our base plot
p <- ggplot(dat, aes(x = YY, y = a)) +
geom_line(colour = "blue", lwd = 1) +
geom_point(colour = "blue", size = 2) +
theme(
panel.background = element_rect(fill = "white"),
plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"),
panel.border = element_rect(colour = "black", fill = NA, size = 1),
axis.line.x = element_line(colour = "black"),
axis.line.y = element_line(colour = "black"),
axis.text = element_text(size = 15, colour = "black", family = "serif"),
axis.title = element_text(size = 15, colour = "black", family = "serif"),
legend.position = "top"
) +
scale_x_discrete(limits = c(seq(1961, 2010, 5)), expand = c(0, 0))
# add a grouping variable (or many!)
prep5 <- dat %>%
mutate(group5 = rep(1:10, each = 5)) %>%
group_by(group5) %>%
mutate(
slope = round(lm(YY ~ a)$coefficients[2], 2),
significance = summary(lm(YY ~ a))$coefficients[2, 4],
x = mean(YY), # x coordinate for slope label
y = mean(a) # y coordinate for slope label
) %>%
filter(significance < .2) # only keep those with a pvalue < .2
p + geom_smooth(
data = prep5, aes(x = YY, y = a, group = group5), # grouping variable does the plots for us!
method = "lm", se = FALSE, color = "black",
formula = y ~ x, linetype = "dashed"
) +
geom_text(
data = prep5, aes(x = x, y = y, label = slope),
nudge_y = 12, nudge_x = -1
)
Now you may want to be a little more careful about specifying the location of your text labels than I have been here. I used means and the nudge_* arguments of geom_text() to do a quick example but keep in mind since these values are mapped explicitly to x and y coordinates, you have complete control!
Created on 2018-07-16 by the reprex
package (v0.2.0).

Reshape and plot 3 quantities with commun Label (geom_text)

My data as follow :
k2=structure(list(Hour = c("17:02:00", "17:04:00", "17:07:00", "17:13:00",
"17:14:00", "17:17:00", "17:19:00", "17:22:00", "17:28:00", "17:29:00"
), Cat1 = c(300L, 304L, 272L, 171L, 271L, 376L, 284L, 177L, 218L,
284L), Cat2 = c(15L, 45L, 36L, 31L, 36L, 26L, 26L, 32L, 46L,
32L), Cat3 = c(850L, 1073L, 612L, 537L, 709L, 929L, 870L, 452L,
474L, 696L), Label = c("BA", "EL", "BA", "CI", "MO",
"BA", "EL", "BA", "CI", "RO")), .Names = c("Hour",
"Cat1", "Cat2", "Cat3", "Label"), row.names = c("163", "164",
"165", "167", "168", "169", "170", "171", "173", "174"), class = "data.frame")
I'm stack with a simple question of plot as follow : X will express time, and Y will cumul respectively quantities of Cat1 , Cat2 , Cat3. For a given time, the three quantities will have the same Label.
I reshaped my data as follow, but it is not ok because I have a geom_text = Label for each cat while the three have the same Label.
k2$Hour=format(k2$Hour, format='%H:%M:%S' )
meltk2 = melt(k2, id = c("Hour","Label"))
meltk2$Hour <- as.POSIXct(paste("2012-11-03", meltk2$Hour, "CEST"))
ggplot(meltk2, aes(x=Hour , y = value, group = Hour, colour = variable)) +
geom_bar(stat = "identity") +
scale_x_datetime(breaks=date_breaks("1 hour"), labels=date_format("%H:%M:%S")) +
geom_text(aes(label = as.character(Label)), position = position_dodge(width = 0.8), vjust = -0.6)
What is the healthiest way to do this ?
Do you mean you want only 1 common label visible at each timestamp? Changing geom_text's y value to a number would set all 3 labels to the same location, effectively showing only one:
ggplot(meltk2, aes(x=Hour, y = value, group = Hour, fill = variable)) +
geom_bar(stat = "identity") +
scale_x_datetime(breaks=date_breaks("1 hour"), labels=date_format("%H:%M:%S")) +
geom_text(aes(label = as.character(Label)), y=0, vjust = 1)
(I set y=0 to position the labels at the bottom. You may want to pick some other height.)
If you don't want the bars to be stacked (as per PoGibas' answer):
ggplot(meltk2, aes(x=Hour, y = value, fill = variable)) +
geom_bar(stat = "identity", position="dodge") +
scale_x_datetime(breaks=date_breaks("1 hour"), labels=date_format("%H:%M:%S")) +
geom_text(aes(label = as.character(Label)), y=0, vjust = 1)

Always use first of default colors instead of black in ggplot2 [duplicate]

This question already has answers here:
Setting Defaults for geoms and scales ggplot2
(3 answers)
Closed 9 years ago.
I want to plot a very simple line plot (timeseries chart, actually). The only thing I want is the line to appear not in black but in the first color of the default color wheel that ggplot2 uses (i.e. replace the default "black" --- which might be specified somewhere --- with the first color of the default ggplot2 color palette).
I have only come up with a non-elegant version where I manually specify the HCL value of the first color (hcl(h=15, l=65, c=100)) - thanks to info in this answer: https://stackoverflow.com/a/8197703/1477035
Isn't there a more elegant way? I've also thought about including a factor variable which only has one value and then specifying colour=as.factor(MyFactorVariable), but this messes up the na.rm = T somehow (because I need to have gaps in the line where NAs appear).
Data sample:
structure(list(Date = structure(c(15701, 15702, 15703, 15704,
15705, 15706, 15707, 15708, 15709, 15710, 15711, 15712, 15713,
15714, 15715, 15716, 15717, 15718, 15719, 15720, 15721, 15722,
15723, 15724, 15725, 15726, 15727, 15728, 15729, 15730, 15731,
15732, 15733, 15734, 15735, 15736, 15737, 15738, 15739, 15740
), class = "Date"), Additions = c(398L, 212L, 171L, 133L, 124L,
99L, 105L, 103L, 99L, 101L, 104L, 102L, 99L, 70L, 76L, 95L, 51L,
76L, 76L, 65L, 73L, 84L, 71L, 82L, 47L, 78L, 65L, 73L, 79L, 64L,
45L, NA, NA, NA, NA, NA, NA, NA, 149L, 198L)), .Names = c("Date",
"Additions"), row.names = c(NA, 40L), class = "data.frame")
Code:
p <- ggplot(data, aes(Date, Additions), na.rm = T)
p <- p + geom_line(colour=hcl(h=15, l=65, c=100)) + scale_x_date(labels = date_format("%B"))
p <- p + xlab("Date of addition") + ylab("Daily additions to user database")
p <- p + theme_minimal() + theme(plot.margin = unit(c(1,1,1,1), "cm"),
axis.title.x = element_text(vjust=-1),
axis.title.y = element_text(angle=90, vjust=0),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = rgb(0.99,0.99,0.99))
)
p
This gives:
Is this elegant enough?
ggdefault_cols <- function(n) hcl(h=seq(15, 375-360/n, length=n)%%360,
c=100, l=65)
#First 4 ggplot default colours:
ggdefault_cols(4)
#[1] "#F8766D" "#7CAE00" "#00BFC4" "#C77CFF"
ggplot(data, aes(Date, Additions), na.rm = T) +
geom_line(colour=ggdefault_cols(1))

Resources