Add character variable (weekdays) to plot - r

I want to add on this plot the weekday as text on top of the bars.
The only function to add text in ggplot I found, is "annotate", which does not work the way I want.
It should look like this:
Plot with weekdays
geom_text gives me this
Geom_text
My code:
ggplot(data = filter(T2G2_dayav, site %in% c("S17S", "S17N"), !is.na(distance)),
mapping = aes(as.factor(x = date_days))) +
geom_col(mapping = aes(y = T2pn_av, fill = as.factor(distance)),
position = position_dodge(width = 0.9)) +
theme_bw() + ylab("Particle Number (#/cm³), day-av") + xlab("Date") +
scale_y_continuous(limits = c(0, 30000)) +
scale_fill_discrete(name = "T2, Distance from road (m)") +
scale_color_grey(name = "Reference intrument G2") +
ggtitle("Day-averaged Particle Number (PN) per distance")
the head of my data:
distance date_days site T2pn_av T2pn_avambient T2wdir_med weekday Date G2pn_av G2pn_min G2pn_max G2ws_av G2ws_min G2ws_max G2wdir_med
<int> <dttm> <chr> <dbl> <dbl> <dbl> <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 -10 2017-07-18 S17N 28814.83 16917.831 110 Di 2017-07-18 13655.29 4621 105100 0.6781284 0 3.6 51.0
2 -10 2017-07-19 S17N 24210.95 15565.951 100 Mi 2017-07-19 10627.73 2908 67250 1.3673618 0 5.5 70.0
3 -10 2017-07-24 S17N 16143.44 7907.442 80 Mo 2017-07-24 11686.54 3582 55080 0.8178753 0 4.8 95.5
4 -10 2017-07-29 S17N 11762.56 5574.563 270 Sa 2017-07-29 12180.73 5413 45490 1.0304985 0 5.7 265.0
5 -10 2017-07-30 S17N 12138.22 6360.225 290 So 2017-07-30 10404.75 6113 23860 1.2385791 0 6.6 274.0
6 -10 2017-07-31 S17N 13815.32 9008.320 270 Mo 2017-07-31 11849.89 4595 46270 0.8554044 0 4.4 230.0
dput(head(T2G2_dayav))
structure(list(distance = c(-10L, -10L, -10L, -10L, -10L, -10L
), date_days = structure(c(1500328800, 1500415200, 1500847200,
1501279200, 1501365600, 1501452000), class = c("POSIXct", "POSIXt"
), tzone = "Europe/Berlin"), site = c("S17N", "S17N", "S17N",
"S17N", "S17N", "S17N"), T2pn_av = c(28814.8306772908, 24210.9512670565,
16143.442364532, 11762.5630630631, 12138.2247114732, 13815.3198380567
), T2pn_avambient = c(16917.8306772908, 15565.9512670565, 7907.44236453202,
5574.56306306306, 6360.22471147318, 9008.31983805668), T2wdir_med = c(110,
100, 80, 270, 290, 270), weekday = c("Di", "Mi", "Mo", "Sa",
"So", "Mo"), Date = structure(c(1500328800, 1500415200, 1500847200,
1501279200, 1501365600, 1501452000), class = c("POSIXct", "POSIXt"
), tzone = "Europe/Berlin"), G2pn_av = c(13655.2885517401, 10627.7329973352,
11686.5429216867, 12180.7308516181, 10404.7472642001, 11849.8893070109
), G2pn_min = c(4621, 2908, 3582, 5413, 6113, 4595), G2pn_max = c(105100,
67250, 55080, 45490, 23860, 46270), G2ws_av = c(0.678128438241936,
1.36736183524505, 0.817875347544022, 1.0304984658137, 1.23857912107,
0.855404388351763), G2ws_min = c(0, 0, 0, 0, 0, 0), G2ws_max = c(3.6,
5.5, 4.8, 5.7, 6.6, 4.4), G2wdir_med = c(51, 70, 95.5, 265, 274,
230)), .Names = c("distance", "date_days", "site", "T2pn_av",
"T2pn_avambient", "T2wdir_med", "weekday", "Date", "G2pn_av",
"G2pn_min", "G2pn_max", "G2ws_av", "G2ws_min", "G2ws_max", "G2wdir_med"
), row.names = c(NA, -6L), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), vars = c("distance", "date_days"), drop = TRUE, indices = list(
0L, 1L, 2L, 3L, 4L, 5L), group_sizes = c(1L, 1L, 1L, 1L,
1L, 1L), biggest_group_size = 1L, labels = structure(list(distance = c(-10L,
-10L, -10L, -10L, -10L, -10L), date_days = structure(c(1500328800,
1500415200, 1500847200, 1501279200, 1501365600, 1501452000), class = c("POSIXct",
"POSIXt"), tzone = "Europe/Berlin")), row.names = c(NA, -6L), class = "data.frame", vars = c("distance",
"date_days"), drop = TRUE, .Names = c("distance", "date_days"
)))

The idea is that you add a text on top of every bar (that's why vjust = 0, but you could also do vjust = -.5 to allow more space or vjust = 1.5 to put it in the bars, which is nice as well). The rest within the geom_text ist basically the same as in geom_col. But in general, you could put commonly used aesthetics in the first occurency within ggplot(aes(...)), as you already did with the x-value.
ggplot(data = filter(T2G2_dayav, site %in% c("S17S", "S17N"), !is.na(distance)),
mapping = aes(as.factor(x = date_days))) +
geom_col(mapping = aes(y = T2pn_av, fill = as.factor(distance)),
position = position_dodge(width = 0.9)) +
geom_text(aes(label = weekday, y = T2pn_av), vjust = -.5, # add these
position = position_dodge(width = 0.9)) + # lines
theme_bw() + ylab("Particle Number (#/cm³), day-av") + xlab("Date") +
scale_y_continuous(limits = c(0, 30000)) +
scale_fill_discrete(name = "T2, Distance from road (m)") +
scale_color_grey(name = "Reference intrument G2") +
ggtitle("Day-averaged Particle Number (PN) per distance")
The following should solve your problem with too many labels. It takes the highest label and places it in the center of the bars of that x-value. Find a plot below with additional rows added to your data:
T2G2_dayav <- rbind(T2G2_dayav %>% ungroup(), T2G2_dayav %>% ungroup() %>% mutate(distance = 5)) # add more observations for testing
T2G2_dayav <- T2G2_dayav %>% mutate(T2pn_av = ifelse(distance == 5, T2pn_av/2, T2pn_av)) # label only the highest bar
The following should work with your data:
ggplot(data = filter(T2G2_dayav, site %in% c("S17S", "S17N"), !is.na(distance)) %>%
group_by(date_days) %>% # group by days
mutate(weekday2 = ifelse(T2pn_av == max(T2pn_av), weekday, NA)), # within each day (group), only label the highest
mapping = aes(as.factor(x = date_days))) +
geom_col(mapping = aes(y = T2pn_av, fill = as.factor(distance)),
position = position_dodge(width = 0.9)) +
geom_text(aes(label = weekday2, y = T2pn_av), vjust = -.5, # add these
position = position_dodge(with = 0.9)) + # lines
theme_bw() + ylab("Particle Number (#/cm³), day-av") + xlab("Date") +
scale_y_continuous(limits = c(0, 30000)) +
scale_fill_discrete(name = "T2, Distance from road (m)") +
scale_color_grey(name = "Reference intrument G2") +
ggtitle("Day-averaged Particle Number (PN) per distance")

Related

Adding line with specific df value to each bar in ggplot

My df is organized this way for example:
OCCURED_COUNTRY_DESC | a | b | c | d | flagged | type | MedDRA_PT| **E** |
__________________________________________________________________________
UNITED STATES |403|1243|473|4077| yes | disp | Seizure |144.208|
__________________________________________________________________________
My data:
structure(list(OCCURED_COUNTRY_DESC = c("AUSTRALIA", "AUSTRIA",
"BELGIUM", "BRAZIL", "CANADA"), a = c(4L, 7L, 20L, 5L, 11L),
b = c(31, 27, 100, 51, 125), c = c(872, 869, 856, 871, 865
), d = c(5289, 5293, 5220, 5269, 5195), w = c(876, 876, 876,
876, 876), x = c(5320, 5320, 5320, 5320, 5320), y = c(35L,
34L, 120L, 56L, 136L), z = c(6161, 6162, 6076, 6140, 6060
), N = c(6196, 6196, 6196, 6196, 6196), k = c("0.5", "0.5",
"0.5", "0.5", "0.5"), SOR = c(0.80199821407511, 1.52042360060514,
1.21312776329214, 0.615857869962066, 0.539569644832803),
log = c(-0.318329070860348, 0.604473324558599, 0.278731499148795,
-0.699330656240263, -0.890118907611227), LL99 = c(-0.695969674426877,
0.382102954188229, 0.198127619344382, -1.00534117464748,
-1.03425468471322), UL99 = c(-0.0544058884186467, 0.763880731966007,
0.337239065783058, -0.482651467660248, -0.785935460582379
), flagged = c("no", "no", "no", "no", "yes"), type = c(NA,
NA, NA, NA, "under"), MedDRA_PT = c("Seizure", "Seizure",
"Seizure", "Seizure", "Seizure"), E = c(5.11098506333901,
4.43283582089552, 16.3984674329502, 8.43063199848168, 20.8132820019249
)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
))
I am using ggplot2 to create a bar chart using the following piece of code:
test2 %>% #using test2 as the df
ggplot(aes(a, OCCURED_COUNTRY_DESC, fill=type)) +
geom_bar(stat="identity")+
scale_fill_manual(values = c("disp" = "#FF8C00",
"under" = "#7EC0EE",
"NA"="#EEE9E9"))+
theme_classic()+
labs(title = "Seizure",
x = "Count",
y = "")
What I would like to do is to add a black line in each bar correspondent to the E value, from the dataframe, for that country. However I haven't been successful. Can someone kindly guide me on how to achieve this?
Thanks!
One option to achieve your desired result would be via a geom_segment, where you map your E column on both the x and the xend position. "Tricky" part are the y positions. However, as a categorical axis is still a numeric axis we could add a helper column to your data which contains the numeric positions of your categorical OCCURED_COUNTRY_DESC column. This helper column could then be mapped on the y and the yend aes needed by geom_segment where we also take into account the width of the bars:
library(ggplot2)
test2$OCCURED_COUNTRY_DESC_num <- as.numeric(factor(test2$OCCURED_COUNTRY_DESC))
width <- .9 # defautlt width of bars
ggplot(test2, aes(a, OCCURED_COUNTRY_DESC, fill = type)) +
geom_bar(stat = "identity") +
geom_segment(aes(x = E, xend = E,
y = OCCURED_COUNTRY_DESC_num - width / 2,
yend = OCCURED_COUNTRY_DESC_num + width / 2),
color = "black", size = 1) +
scale_fill_manual(values = c(
"disp" = "#FF8C00",
"under" = "#7EC0EE",
"NA" = "#EEE9E9"
)) +
theme_classic() +
labs(
title = "Seizure",
x = "Count",
y = ""
)

R ggplot2 facet_wrap() with scales = "free", but each panel having fixed

I want to produce a figure that will have several scatterplots. Each scatterplot should have its own axis limits, but I want the aspect ratio of each plot to be 1 so that the 1:1 line goes through the plot at a 45 degree angle.
Here's an example of what I have going on:
met <- structure(list(datetime = structure(c(946756800, 946756800, 946756800,946756800, 946756800, 946756800, 946756800, 960465600, 960465600,960465600), class = c("POSIXct", "POSIXt"), tzone = "UTC"), Met_Parameter = c("1/MO l","albedo", "Bowen r", "conv mix h", "mech mix h", "MO l", "sens h flux","wind s", "wstar", "z0"), `Original Met` = c(0.0746268656716418,0.15, 0.8, -999, 24, 13.4, -0.7, 4, 1.2, 1), `Converted Met` = c(-0.0105263157894737,0.15, 0.8, 600, 367, -95, 21.6, 4, 1.2, 1)), row.names = c(1L,2L, 3L, 4L, 5L, 6L, 7L, 1054L, 1055L, 1056L), class = "data.frame")
pOUT <-
ggplot2::ggplot(data = met,
aes(x = `Original Met`,
y = `Converted Met`)) +
ggplot2::geom_point()+
ggplot2::geom_abline(intercept = 0,
slope = 1
) +
ggplot2::facet_wrap("Met_Parameter",
scales = "free") +
ggplot2::ggtitle("Comparison of Original Meteorology Versus Up Over Down Meterology") +
ggplot2::theme_bw()
What I am missing still is how to make the x and y axes fixed within the plot.
Thanks!

Using segment labels in ggplot with ggrepel with smooth segments

This is my dataframe:
df<-structure(list(year = c(1984, 1984), team = c("Australia", "Brazil"
), continent = c("Oceania", "Americas"), medal = structure(c(3L,
3L), .Label = c("Bronze", "Silver", "Gold"), class = "factor"),
n = c(84L, 12L)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))
And this is my ggplot (my question is related to the annotations regard Brazil label):
ggplot(data = df)+
geom_point(aes(x = year, y = n)) +
geom_text_repel(aes(x = year, y = n, label = team),
size = 3, color = 'black',
seed = 10,
nudge_x = -.029,
nudge_y = 35,
segment.size = .65,
segment.curvature = -1,
segment.angle = 178.975,
segment.ncp = 1)+
coord_flip()
So, I have a segment divided by two parts. On both parts I have 'small braks'. How can I avoid them?
I already tried to use segment.ncp, change nudge_xor nudge_ynut its not working.
Any help?
Not really sure what is going on here. This is the best I could generate by experimenting with variations to the input values for segment... arguments.
There is some guidance at: https://ggrepel.slowkow.com/articles/examples.html which has an example with shorter leader lines, maybe that's an approach you could use.
df<-structure(list(year = c(1984, 1984), team = c("Australia", "Brazil"
), continent = c("Oceania", "Americas"), medal = structure(c(3L,
3L), .Label = c("Bronze", "Silver", "Gold"), class = "factor"),
n = c(84L, 12L)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))
library(ggplot2)
library(ggrepel)
ggplot(data = df)+
geom_point(aes(x = year, y = n)) +
geom_text_repel(aes(x = year, y = n, label = team),
size = 3, color = 'black',
seed = 1,
nudge_x = -0.029,
nudge_y = 35,
segment.size = 0.5,
segment.curvature = -0.0000002,
segment.angle = 1,
segment.ncp = 1000)+
coord_flip()
Created on 2021-08-26 by the reprex package (v2.0.0)

ggplot does not find the value that is present in the dataset

I have data as follows:
library(ggplot2)
library(dplyr)
library(tidyverse)
library(ggsignif)
graph <- structure(list(Constraint = structure(c(4L, 2L, 3L, 1L, 5L, 4L,
2L, 3L, 1L, 5L), .Label = c("Major Constraint", "Minor Constraint",
"Moderate Constraint", "No Constraint", "Total"), class = "factor"),
`Observation for Crime = 0` = c(3124, 2484, 3511, 4646, 13765,
3124, 2484, 3511, 4646, 13765), `Observation for Crime = 1` = c(762,
629, 1118, 1677, 4186, 762, 629, 1118, 1677, 4186), `Total Observations` = c(3886,
3113, 4629, 6323, 17951, 3886, 3113, 4629, 6323, 17951),
variable = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L), .Label = c("Crime = 0", "Crime = 1"), class = "factor"),
value = c(80.3911477097272, 79.79441053646, 75.847915316483,
73.4777795350308, 76.6809648487549, 19.6088522902728, 20.20558946354,
24.152084683517, 26.5222204649692, 23.3190351512451)), row.names = c(NA,
-10L), class = "data.frame")
Constraint Observation for Crime = 0 Observation for Crime = 1 Total Observations variable value
1 No Constraint 3124 762 3886 Crime = 0 80.39115
2 Minor Constraint 2484 629 3113 Crime = 0 79.79441
3 Moderate Constraint 3511 1118 4629 Crime = 0 75.84792
4 Major Constraint 4646 1677 6323 Crime = 0 73.47778
5 Total 13765 4186 17951 Crime = 0 76.68096
6 No Constraint 3124 762 3886 Crime = 1 19.60885
7 Minor Constraint 2484 629 3113 Crime = 1 20.20559
8 Moderate Constraint 3511 1118 4629 Crime = 1 24.15208
9 Major Constraint 4646 1677 6323 Crime = 1 26.52222
10 Total 13765 4186 17951 Crime = 1 23.31904
I am trying to create something like this:
graph %>%
mutate(`Constraint` = fct_relevel(`Constraint`, "No Constraint", "Minor Constraint", "Moderate Constraint", "Major Constraint")) %>%
ggplot(aes(x = `Constraint`, y = value, fill = variable, label=sprintf("%.02f %%", round(value, digits = 1)))) +
geom_col(position = 'dodge') +
geom_text(position = position_dodge(width = .9), # move to center of bars
vjust = -0.5, # nudge above top of bar
size = 4) +
scale_fill_grey(start = 0.8, end = 0.5) +
theme_bw(base_size = 15) +
geom_signif(stat="identity",
data=data.frame(x=c(0.875, 1.875), xend=c(1.125, 2.125),
y=c(5.8, 8.5), annotation=c("**", "NS")),
aes(x=x,xend=xend, y=y, yend=y, annotation=annotation)) +
geom_signif(comparisons=list(c("treatment", "control")), annotations="***",
y_position = 9.3, tip_length = 0, vjust=0.4)
Hoping for an appearance close to the following picture:
But it gives the error that the value is not found, while the value is in the data. Does anyone know what the problem could be?
Include fill and label in geom_col and geom_text -
library(tidyverse)
library(ggsignif)
graph %>%
mutate(`Constraint` = fct_relevel(`Constraint`, "No Constraint", "Minor Constraint", "Moderate Constraint", "Major Constraint")) %>%
ggplot(aes(x = `Constraint`, y = value)) +
geom_col(position = 'dodge', aes(fill = variable)) +
geom_text(position = position_dodge(width = .9), # move to center of bars
aes(label=sprintf("%.02f %%", round(value, digits = 1))),
vjust = -0.5, # nudge above top of bar
size = 4) +
scale_fill_grey(start = 0.8, end = 0.5) +
theme_bw(base_size = 15) +
geom_signif(stat="identity",
data=data.frame(x=c(0.875, 1.875), xend=c(1.125, 2.125),
y=c(5.8, 8.5), annotation=c("**", "NS")),
aes(x=x,xend=xend, y=y, yend=y, annotation=annotation))

How to specify a certain csv in the errorbar line

I am trying to make a plot with three different csvs. In 2 of them, the columns are the same i.e. Year, GMSL and GMSLerror.
In the Frederikse file the columns are Year, GMSL, GMSLerrorlow and GMSLerrorup. How can I tell R to plot the Frederikse error using the columns GMSLerrorlow and GMSLerrorup? I tried the following but it did not work. Thanks.
p1<-files <- c("Frederikse.csv", "ChurchandWhite.csv","Hay.csv")
map_dfr(files, ~ read_csv(.x) %>%
mutate(Author = .x)) %>%
ggplot(aes(x = Time, y = GMSL, color = Author,fill=Author)) +
geom_line(size=0.6)+
theme_bw(12)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
labs(x = "Year", y = "GMSL (mm)",color="Author")+
geom_errorbar(aes(ymin=GMSL-GMSLerror, ymax =GMSL+GMSLerror,alpha=Author))+
geom_errorbar("Frederikse.csv",(aes(ymin=GMSL-GMSLerrorlow, ymax =GMSL+GMSLerrorup,alpha=Author)))
scale_alpha_manual(values = c(0.3, 0.3, 0.8))+
scale_colour_manual(values=c("#BAB3F0","#1D3E72","#201641"))
p1
structure(list(Year = 1900:1905, GMSLerrorlow = c(-203.5572666,
-201.0185091, -212.0740442, -202.6975639, -200.1670151, -192.1312551
), GMSL = c(-173.2614421, -168.8016753, -180.389967, -170.2678322,
-168.7200709, -160.9814287), GMSLerrorup = c(-141.002807, -135.8976091,
-148.213824, -138.9305182, -137.4501224, -130.3514508)), row.names = c(NA,
6L), class = "data.frame")
structure(list(Time = 1900:1905, GMSL = c(-131.15, -130.5, -129.77,
-128.85, -128.1, -127.56), GMSLerror = c(25.32, 25.17, 25.01,
24.86, 24.7, 24.55)), row.names = c(NA, 6L), class = "data.frame")
structure(list(Time = c(1880.0417, 1880.125, 1880.2083, 1880.2917,
1880.375, 1880.4583), GMSL = c(-183, -171.1, -164.3, -158.2,
-158.7, -159.6), GMSLerror = c(24.2, 24.2, 24.2, 24.2, 24.2,
24.2)), row.names = c(NA, 6L), class = "data.frame")````
You can do this with mutate to make GMSLerrorlow column for all datasets
p1<-files <- c("Frederikse.csv", "ChurchandWhite.csv","Hay.csv")
set_names(files) %>% # give names - can use str_remove to drop `.csv` from names
map_dfr( ~ read_csv(.x), .id = "Author") %>% #use .id argument
mutate(
GMSLerrorlow = if_else(Author != "Frederikse.csv", GMSLerror, GMSLerrorlow),
GMSLerrorup = if_else(Author != "Frederikse.csv", GMSLerror, GMSLerrorup)
) %>%
ggplot(aes(x = Time, y = GMSL, color = Author,fill=Author)) +
geom_line(size=0.6)+
theme_bw(12)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
labs(x = "Year", y = "GMSL (mm)",color="Author")+
geom_errorbar(aes(ymin=GMSL-GMSLerrorlow, ymax =GMSL+GMSLerrorup,alpha=Author))+
scale_alpha_manual(values = c(0.3, 0.3, 0.8))+
scale_colour_manual(values=c("#BAB3F0","#1D3E72","#201641"))

Resources