make subway graph include 102 topics in ggplot2 r - r

This is a followup from subway-style graph for word frequency across three datasets in ggplot2
I used the code in the answer from this question, but am struggling with how best to manipulate the graph to make it fits 100 unique dict entries within the subway graph without completely messing up the dict word entries on the margins.
I have tested out different amounts of words to feed into the subway graph, and found that it cannot contain more than 25 words.
I have data:
structure(list(dict = c("apple", "apple", "apple",
"mandarin", "mandarin", "mandarin", "orange", "orange", "orange", "pear"),
name = c("freq_ongov", "freq_onindiv", "freq_onmedia", "freq_ongov",
"freq_onindiv", "freq_onmedia", "freq_ongov", "freq_onindiv",
"freq_onmedia", "freq_ongov"), value = c(0, 87, 63, 0, 44,
20, 3, 27, 25, 0), rank = c(26, 85, 70, 26, 61, 42.5, 86,
47, 48, 26)), row.names = c(NA, -10L), groups = structure(list(
name = c("freq_ongov", "freq_onindiv", "freq_onmedia"), .rows = structure(list(
c(1L, 4L, 7L, 10L), c(2L, 5L, 8L), c(3L, 6L, 9L)), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
But there are 100 rows within this data that I want to include in the following code:
leftlabels <- df$dict[df$name == "freq_ongov"]
leftlabels <- leftlabels[order(df$rank[df$name == "freq_ongov"])]
rightlabels <- df$dict[df$name == "freq_onmedia"]
rightlabels <- rightlabels[order(df$rank[df$name == "freq_onmedia"])]
ggplot(df, aes(name, rank, color = dict, group = dict)) +
geom_line(size = 4) +
geom_point(shape = 21, fill = "white", size = 4) +
scale_y_continuous(breaks = seq(max(df$rank)), labels = leftlabels,
sec.axis = sec_axis(~., breaks = seq(max(df$rank)),
labels = rightlabels)) +
scale_x_discrete(expand = c(0.01, 0)) +
guides(color = guide_none()) +
coord_cartesian(clip = "off") +
theme(axis.ticks.length.y = unit(0, "points"))
I tried changing the y.int and width of the y axis to fit in 100 words, but that only makes the y-axis longer, without changing the spacing between each word label on the y-axis, so all the words get squeezed together. Any suggestions?

Related

Using ggalluvial with nodes holding different values

My data is a set of activities completed by persons. The sequence of activities a person takes varies. The data below show the activities for each step (Step1, Step2, etc). I'd like an alluvial plot that labels the activities at each step (each a different node 1, 2, 3...) What is the best approach? Here's what I have so far:
df<-structure(list(acts_activity_id = c("9928131", "445661", "686203", "687868", "688564"), Step1 = c("Unable to Reach", "Unable to Reach",
"Search Correspondence", "Unable to Reach", "Unable to Reach"), Step2 = c("Match Request", NA, "Connection Made", NA, "Match Request"
), Step3 = c("Support Group Request", NA, "Connection Contact Attempt", NA, "Support Group Request"),Step4 = c("Information Provided",
NA, "Not Available to Support", NA, "Information Provided"),
Step5 = c(NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"),
row.names = c(NA, -5L),
groups = structure(list(acts_activity_id = c("9928131", "445661", "686203", "687868", "688564"), .rows = structure(list(1L, 2L, 3L, 4L, 5L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -5L), .drop = TRUE))
df %>%
ggplot(
aes(
axis1=Step1, #each step has different values; individuals go thru different sequence of steps
axis2=Step2, axis3=Step3, axis4=Step4, axis5=Step5 ))+
geom_flow()+
geom_stratum()+
labs(title="Activity Sequence")
The first
If you have your data in this order (each column is a set of different activities), then use ggsankey:
df$acts_activity_id<-NULL
x<-df %>% ggsankey::make_long(Step1,Step2,Step3,Step4,Step5)
ggplot(x, aes(x = x, next_x = next_x,
node = node, next_node = next_node,
fill = factor(node), label = node)) +
geom_sankey(flow.alpha = 0.6, node.color = "gray30") +
geom_sankey_label(size = 3, color = "white", fill = "gray40") +
scale_fill_viridis_d() +
theme_sankey(base_size = 18) +
labs(x = NULL) +
theme(legend.position = "none",
plot.title = element_text(hjust = .5))

Using segment labels in ggplot with ggrepel with smooth segments

This is my dataframe:
df<-structure(list(year = c(1984, 1984), team = c("Australia", "Brazil"
), continent = c("Oceania", "Americas"), medal = structure(c(3L,
3L), .Label = c("Bronze", "Silver", "Gold"), class = "factor"),
n = c(84L, 12L)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))
And this is my ggplot (my question is related to the annotations regard Brazil label):
ggplot(data = df)+
geom_point(aes(x = year, y = n)) +
geom_text_repel(aes(x = year, y = n, label = team),
size = 3, color = 'black',
seed = 10,
nudge_x = -.029,
nudge_y = 35,
segment.size = .65,
segment.curvature = -1,
segment.angle = 178.975,
segment.ncp = 1)+
coord_flip()
So, I have a segment divided by two parts. On both parts I have 'small braks'. How can I avoid them?
I already tried to use segment.ncp, change nudge_xor nudge_ynut its not working.
Any help?
Not really sure what is going on here. This is the best I could generate by experimenting with variations to the input values for segment... arguments.
There is some guidance at: https://ggrepel.slowkow.com/articles/examples.html which has an example with shorter leader lines, maybe that's an approach you could use.
df<-structure(list(year = c(1984, 1984), team = c("Australia", "Brazil"
), continent = c("Oceania", "Americas"), medal = structure(c(3L,
3L), .Label = c("Bronze", "Silver", "Gold"), class = "factor"),
n = c(84L, 12L)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))
library(ggplot2)
library(ggrepel)
ggplot(data = df)+
geom_point(aes(x = year, y = n)) +
geom_text_repel(aes(x = year, y = n, label = team),
size = 3, color = 'black',
seed = 1,
nudge_x = -0.029,
nudge_y = 35,
segment.size = 0.5,
segment.curvature = -0.0000002,
segment.angle = 1,
segment.ncp = 1000)+
coord_flip()
Created on 2021-08-26 by the reprex package (v2.0.0)

Adding p-values to ggplot; ggsignif says it can only handle data with groups that are plotted on the x-axis

I have data as follows, to which I am trying to add p-values:
library(ggplot2)
library(ggsignif)
library(dplyr)
data <- structure(list(treatment = c(0, 1, 0, 1, 0, 1, 0, 1, 0, 1), New_Compare_Truth = c(57,
61, 12, 14, 141, 87, 104, 90, 12, 14), total_Hy = c(135,
168, 9, 15, 103, 83, 238, 251, 9, 15), total = c(285, 305, 60,
70, 705, 435, 520, 450, 60, 70), ratio = c(47.3684210526316,
55.0819672131148, 15, 21.4285714285714, 14.6099290780142, 19.0804597701149,
45.7692307692308, 55.7777777777778, 15, 21.4285714285714), Type = structure(c(2L,
2L, 1L, 1L, 3L, 3L, 5L, 5L, 4L, 4L), .Label = c("A1. Others \nMore \nH",
"A2. Similar \nNorm", "A3. Others \nLess \nH", "B1. Others \nMore \nH",
"B2. Similar \nNorm or \nHigher"), class = "factor"), `Sample Selection` = c("Answers pr",
"Answers pu", "Answers pr", "Answers pu", "Answers pr",
"Answers pu", "Answers pr", "Answers pu", "Answers pr",
"Answers pu"), p_value = c(0.0610371842601616, 0.0610371842601616,
0.346302201593934, 0.346302201593934, 0.0472159407450147, 0.0472159407450147,
0.0018764377521242, 0.0018764377521242, 0.346302201593934, 0.346302201593934
), x = c(2, 2, 1, 1, 3, 3, 5.5, 5.5, 4.5, 4.5)), row.names = c(NA,
-10L), class = c("data.table", "data.frame"))
breaks_labels <- structure(list(Type = structure(c(2L, 1L, 3L, 5L, 4L), .Label = c("A1. Others \nMore \nH",
"A2. Similar \nNorm", "A3. Others \nLess \nH", "B1. Others \nMore \nH",
"B2. Similar \nNorm or \nHigher"), class = "factor"), x = c(2,
1, 3, 5.5, 4.5)), row.names = c(NA, -5L), class = c("data.table",
"data.frame"))
data %>%
ggplot(aes(x = x, y = ratio)) +
geom_col(aes(fill = `Sample Selection`), position = position_dodge(preserve = "single"), na.rm = TRUE) +
geom_text(position = position_dodge(width = .9), # move to center of bars
aes(label=sprintf("%.02f %%", round(ratio, digits = 1)), group = `Sample Selection`),
vjust = -1.5, # nudge above top of bar
size = 4,
na.rm = TRUE) +
# geom_text(position = position_dodge(width = .9), # move to center of bars
# aes(label= paste0("(", ifelse(variable == "Crime = 0", `Observation for Crime = 0`, `Observation for Crime = 1`), ")"), group = `Sample Selection`),
# vjust = -0.6, # nudge above top of bar
# size = 4,
# na.rm = TRUE) +
scale_fill_grey(start = 0.8, end = 0.5) +
scale_y_continuous(expand = expansion(mult = c(0, .1))) +
scale_x_continuous(breaks = breaks_labels$x, labels = breaks_labels$Type) +
theme_bw(base_size = 15) +
xlab("Norm group for corporate Hy") +
ylab("Percentage Compliant Decisions") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_signif(annotation=c("p=0.35", "p=0.06", "p=0.05", "p=0.34", "p=0.00"), y_position = c(30, 40, 55 ,75, 90), xmin=c(0.75,1.75,2.75,3.75,4.75),
xmax=c(1.25,2.25,3.25,4.25,5.25))
For some reason, the last line causes the following error:
Error in f(...) :
Can only handle data with groups that are plotted on the x-axis
Since I am just putting in text and not referring to any variable, I don't really understand why this happens. Can anyone help me out? Without the last line it looks like this:
EDIT: Please note that I would like to keep the space between the third and the fourth column (which is apparently also what caused the problem, see Jared's answer).
Edit
Thanks for clarifying your expected outcome. Here is one way to include geom_signif() annotations without altering the original plot:
library(tidyverse)
library(ggsignif)
data <- structure(list(treatment = c(0, 1, 0, 1, 0, 1, 0, 1, 0, 1), New_Compare_Truth = c(57,
61, 12, 14, 141, 87, 104, 90, 12, 14), total_Hy = c(135,
168, 9, 15, 103, 83, 238, 251, 9, 15), total = c(285, 305, 60,
70, 705, 435, 520, 450, 60, 70), ratio = c(47.3684210526316,
55.0819672131148, 15, 21.4285714285714, 14.6099290780142, 19.0804597701149,
45.7692307692308, 55.7777777777778, 15, 21.4285714285714), Type = structure(c(2L,
2L, 1L, 1L, 3L, 3L, 5L, 5L, 4L, 4L), .Label = c("A1. Others \nMore \nH",
"A2. Similar \nNorm", "A3. Others \nLess \nH", "B1. Others \nMore \nH",
"B2. Similar \nNorm or \nHigher"), class = "factor"), `Sample Selection` = c("Answers pr",
"Answers pu", "Answers pr", "Answers pu", "Answers pr",
"Answers pu", "Answers pr", "Answers pu", "Answers pr",
"Answers pu"), p_value = c(0.0610371842601616, 0.0610371842601616,
0.346302201593934, 0.346302201593934, 0.0472159407450147, 0.0472159407450147,
0.0018764377521242, 0.0018764377521242, 0.346302201593934, 0.346302201593934
), x = c(2, 2, 1, 1, 3, 3, 5.5, 5.5, 4.5, 4.5)), row.names = c(NA,
-10L), class = c("data.table", "data.frame"))
breaks_labels <- structure(list(Type = structure(c(2L, 1L, 3L, 5L, 4L), .Label = c("A1. Others \nMore \nH",
"A2. Similar \nNorm", "A3. Others \nLess \nH", "B1. Others \nMore \nH",
"B2. Similar \nNorm or \nHigher"), class = "factor"), x = c(2,
1, 3, 5.5, 4.5)), row.names = c(NA, -5L), class = c("data.table",
"data.frame"))
annotation_df <- data.frame(signif = c("p=0.35", "p=0.06", "p=0.05", "p=0.34", "p=0.00"),
y_position = c(30, 40, 55 ,75, 90),
xmin = c(0.75,1.75,2.75,4.25,5.25),
xmax = c(1.25,2.25,3.25,4.75,5.75),
group = c(1,2,3,4,5))
data %>%
ggplot(aes(x = x, y = ratio, group = `Sample Selection`)) +
geom_col(aes(fill = `Sample Selection`),
position = position_dodge(preserve = "single"), na.rm = TRUE) +
geom_text(position = position_dodge(width = .9), # move to center of bars
aes(label=sprintf("%.02f %%", round(ratio, digits = 1))),
vjust = -1.5, # nudge above top of bar
size = 4,
na.rm = TRUE) +
scale_fill_grey(start = 0.8, end = 0.5) +
scale_y_continuous(expand = expansion(mult = c(0, .1))) +
scale_x_continuous(breaks = breaks_labels$x, labels = breaks_labels$Type) +
theme_bw(base_size = 15) +
xlab("Norm group for corporate Hy") +
ylab("Percentage Compliant Decisions") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_signif(aes(xmin = xmin,
xmax = xmax,
y_position = y_position,
annotations = signif,
group = group),
data = annotation_df, manual = TRUE)
#> Warning: Ignoring unknown aesthetics: xmin, xmax, y_position, annotations
Created on 2021-07-20 by the reprex package (v2.0.0)
Previous answer
One potential solution to your problem is to plot "Type" on the x axis instead of "x", e.g.
data %>%
ggplot(aes(x = Type, y = ratio)) +
geom_col(aes(fill = `Sample Selection`),
position = position_dodge(preserve = "single"), na.rm = TRUE) +
geom_text(position = position_dodge(width = .9), # move to center of bars
aes(label=sprintf("%.02f %%", round(ratio, digits = 1)),
group = `Sample Selection`),
vjust = -1.5,
size = 4,
na.rm = TRUE) +
scale_fill_grey(start = 0.8, end = 0.5) +
scale_y_continuous(expand = expansion(mult = c(0, .1))) +
theme_bw(base_size = 15) +
xlab("Norm group for corporate Hy") +
ylab("Percentage Compliant Decisions") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_signif(annotation=c("p=0.35", "p=0.06", "p=0.05", "p=0.34", "p=0.00"),
y_position = c(30, 40, 55 ,75, 90),
xmin=c(0.75,1.75,2.75,3.75,4.75),
xmax=c(1.25,2.25,3.25,4.25,5.25))

Visualize blinks in conversation

I have conversational data with pupillary data showing when ppl blink, like so (reproducible data below):
df
# A tibble: 6 x 8
# Groups: Blinks_grp [6]
Speaker Utterance Starttime_ms Endtime_ms Blink_onset Blink_offset Blinks_grp Blink_dur
<chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ID16.B an Americ… 289569 293940 289879 289946 113 67
2 ID16.B an Americ… 289569 293940 290696 290879 114 183
3 ID16.B an Americ… 289569 293940 290962 291046 115 84
4 ID16.A [°gotcha°] 290604 291004 290696 290879 116 183
5 ID16.B =↓my fath… 300938 302140 301529 301612 117 83
6 ID16.B =↓my fath… 300938 302140 302062 302146 118 78
I want to visualize when blinks occur relative to speech (in column Utterance). So far I've come up with this code:
df %>%
mutate(Utterance = paste0(sub(".*(.)$", "\\1",Speaker), ": ", Utterance),
Utterance = factor(Utterance, levels = unique(Utterance))) %>%
ggplot(aes(x = Blink_onset, xend = Blink_offset,
y = Blinks_grp, yend = Blinks_grp)) +
geom_segment(size = 3) +
facet_wrap(~ Utterance, ncol = 1, scales= "free_x")
which produces this graph:
However, the graph does not make the relationship Utterance v. blinks clear enough:
it fails to take into account the duration of the Utterance
it fails to indicate whether blinks occur (partially) before the start or after the end of Utterances
if more than one blink occurs during one Utterance it puts these blinks on different lines
So what I'm looking for is a visulization that shows both blinks and Utterance and makes clear where blinks occur relative to Utterance. What I have in mind looks somewhat like this:
Reproducible data:
structure(list(Speaker = c("ID16.B", "ID16.B", "ID16.B", "ID16.A",
"ID16.B", "ID16.B"), Utterance = c("an American family that (.) [uh] moved to Germany in <nineteen ninety one>",
"an American family that (.) [uh] moved to Germany in <nineteen ninety one>",
"an American family that (.) [uh] moved to Germany in <nineteen ninety one>",
"[°gotcha°]", "=↓my father's↓ like ~°we're going to Germany°~",
"=↓my father's↓ like ~°we're going to Germany°~"), Starttime_ms = c(289569L,
289569L, 289569L, 290604L, 300938L, 300938L), Endtime_ms = c(293940,
293940, 293940, 291004, 302140, 302140), Blink_onset = c(289879,
290696, 290962, 290696, 301529, 302062), Blink_offset = c(289946,
290879, 291046, 290879, 301612, 302146), Blinks_grp = c(113,
114, 115, 116, 117, 118), Blink_dur = c(67, 183, 84, 183, 83,
78)), row.names = c(NA, -6L), groups = structure(list(Blinks_grp = c(113,
114, 115, 116, 117, 118), .rows = structure(list(1L, 2L, 3L,
4L, 5L, 6L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
The main problem here is, how to shape the data so that ggplot can plot it. Here's a proposal: in the essence I'm replacing blink_onset and blink_offset with an attribute that tells whehter an entry in the table is blinking or talking, then ggplot can easily draw for each event a separate line:
## Preparing the data:
df <- structure(list(Speaker = c("ID16.B", "ID16.B", "ID16.B", "ID16.A",
"ID16.B", "ID16.B"), Utterance = c("an American family that (.) [uh] moved to Germany in <nineteen ninety one>",
"an American family that (.) [uh] moved to Germany in <nineteen ninety one>",
"an American family that (.) [uh] moved to Germany in <nineteen ninety one>",
"[°gotcha°]", "=↓my father's↓ like ~°we're going to Germany°~",
"=↓my father's↓ like ~°we're going to Germany°~"), Starttime_ms = c(289569L,
289569L, 289569L, 290604L, 300938L, 300938L), Endtime_ms = c(293940,
293940, 293940, 291004, 302140, 302140), Blink_onset = c(289879,
290696, 290962, 290696, 301529, 302062), Blink_offset = c(289946,
290879, 291046, 290879, 301612, 302146), Blinks_grp = c(113,
114, 115, 116, 117, 118), Blink_dur = c(67, 183, 84, 183, 83,
78)), row.names = c(NA, -6L), groups = structure(list(Blinks_grp = c(113,
114, 115, 116, 117, 118), .rows = structure(list(1L, 2L, 3L,
4L, 5L, 6L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
df <- df %>%
mutate(Utterance = paste0(sub(".*(.)$", "\\1",Speaker), ": ", Utterance),
Utterance = factor(Utterance, levels = unique(Utterance)))
## separate into a "Blink" and a "talk" data frame, add an attribute "Event" that represents talking and blinking:
blink_df <- df %>% select(Speaker, Utterance, Blink_onset, Blink_offset, Blinks_grp) %>%
mutate(Starttime_ms = Blink_onset, Endtime_ms = Blink_offset, Event = "blink")
talk_df <- df %>% select(Speaker, Utterance, Starttime_ms, Endtime_ms, Blinks_grp) %>%
mutate(Event = "talk")
## combine datasets again:
plot_df <- bind_rows(talk_df, blink_df)
## and plot, using "Event" as attribute to separate talking and blinking lines:
plot_df %>%
ggplot(aes(x = Starttime_ms, xend = Endtime_ms,
y = Event, yend = Event, colour = Event)) +
geom_segment(size = 3) +
facet_wrap(~ Utterance, ncol = 1, scales= "free_x")

Annotate ggplot based on a second data frame

I have a faceted plot made with ggplot that is already working, it shows data about river altitude against years. I'm trying to add arrows based on a second dataframe which details when floods occurred.
Here's the current plot:
I would like to draw arrows in the top part of each graph based on date information in my second dataframe where each row corresponds to a flood and contains a date.
The link between the two dataframes is the Station_code column, each river has one or more stations which is indicated by this data (in this case only the Var river has two stations).
Here is the dput of the data frame used to create the original plot:
structure(list(River = c("Durance", "Durance", "Durance", "Durance",
"Roya", "Var"), Reach = c("La Brillanne", "Les Mées", "La Brillanne",
"Les Mées", "Basse vallée", "Basse vallée"), Area_km = c(465,
465, 465, 465, 465, 465), Type = c("restored", "target", "restored",
"target", "witness", "restored"), Year = c(2017, 2017, 2012,
2012, 2018, 2011), Restoration_year = c(2013, 2013, 2013, 2013,
NA, 2009), Station_code = c("X1130010", "X1130010", "X1130010",
"X1130010", "Y6624010", "Y6442015"), BRI_adi_moy_sstransect = c(0.00375820736746399,
0.00244752138003355, 0.00446807607783864, 0.0028792618981479,
0.00989200896930529, 0.00357247516596474), SD_sstransect = c(0.00165574247612667,
0.0010044634990875, 0.00220534492332107, 0.00102694633805149,
0.00788573233793128, 0.00308489160008849), min_BRI_sstransect = c(0.00108123849595469,
0.00111493913953216, 0.000555500340370182, 0.00100279590198288,
0, 0), max_BRI_sstransect = c(0.0127781240385231, 0.00700537285706352,
0.0210216858227621, 0.00815151653110584, 0.127734814926934, 0.0223738711013954
), Nb_sstr_unique_m = c(0.00623321576795815, 0.00259754717331206,
0.00117035034437559, 0.00209845092352825, 0.0458628969163946,
3.60620609570031), BRI_adi_moy_transect = c(0.00280232169999531,
0.00173868254527501, 0.00333818552810438, 0.00181398859573415,
0.00903651639185542, 0.00447856455432537), SD_transect = c(0.00128472161839638,
0.000477209421076879, 0.00204050725984513, 0.000472466654940182,
0.00780731734792112, 0.00310039904793707), min_BRI_transect = c(0.00108123849595469,
0.00106445386542223, 0.000901992689363725, 0.000855135344651009,
0.000944414463851629, 0.000162012161197014), max_BRI_transect = c(0.00709151795418251,
0.00434366293208643, 0.011717024999411, 0.0031991369873946, 0.127734814926934,
0.0187952134332499), Nb_tr_unique_m = c(0, 0, 0, 0, 0, 0), Error_reso = c(0.0011,
8e-04, 0.0018, 0.0011, 0.0028, 0.0031), W_BA = c(296.553323029366,
411.056574923547, 263.944186046512, 363.32874617737, 88.6420798065296,
158.66866970576), W_BA_sd = c(84.1498544481585, 65.3909073242282,
100.067554749308, 55.5534084807705, 35.2337070278364, 64.6978349498119
), W_BA_min = c(131, 206, 33, 223, 6, 45), W_BA_max = c(472,
564, 657, 513, 188, 381), W_norm = c(5.73271228619998, 7.9461900926133,
5.10234066090722, 7.02355699765464, 5.09378494746752, 4.81262001531126
), W_norm_sd = c(1.62671218635823, 1.2640804493236, 1.93441939783807,
1.07391043231191, 2.02469218788178, 1.96236658443141), W_norm_min = c(2.53237866910643,
3.98221378500706, 0.637927450996277, 4.31084307794454, 0.344787822572658,
1.36490651299098), W_norm_max = c(9.12429566273463, 10.9027600715727,
12.7005556152895, 9.91687219276031, 10.8033517739433, 11.5562084766569
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
And here is the dput of the date frame containing the flooding date:
structure(list(Station_code = c("Y6042010", "Y6042010", "Y6042010",
"Y6042010", "Y6042010", "Y6042010"), Date = structure(c(12006,
12007, 12016, 12017, 13416, 13488), class = "Date"), Qm3s = c(156,
177, 104, 124, 125, 90.4), Qual = c(5, 5, 5, 5, 5, 5), Year = c(2002,
2002, 2002, 2002, 2006, 2006), Month = c(11, 11, 11, 11, 9, 12
), Station_river = c("Var#Entrevaux", "Var#Entrevaux", "Var#Entrevaux",
"Var#Entrevaux", "Var#Entrevaux", "Var#Entrevaux"), River = c("Var",
"Var", "Var", "Var", "Var", "Var"), Mod_inter = c(13.32, 13.32,
13.32, 13.32, 13.32, 13.32), Qm3s_norm = c(11.7117117117117,
13.2882882882883, 7.80780780780781, 9.30930930930931, 9.38438438438438,
6.78678678678679), File_name = c("Var#Entrevaux.dat", "Var#Entrevaux.dat",
"Var#Entrevaux.dat", "Var#Entrevaux.dat", "Var#Entrevaux.dat",
"Var#Entrevaux.dat"), Station_name = c("#Entrevaux", "#Entrevaux",
"#Entrevaux", "#Entrevaux", "#Entrevaux", "#Entrevaux"), Reach = c("Daluis",
"Daluis", "Daluis", "Daluis", "Daluis", "Daluis"), Restauration_year = c(2009,
2009, 2009, 2009, 2009, 2009), `Area_km[BH]` = c(676, 676, 676,
676, 676, 676), Starting_year = c(1920, 1920, 1920, 1920, 1920,
1920), Ending_year = c("NA", "NA", "NA", "NA", "NA", "NA"), Accuracy = c("good",
"good", "good", "good", "good", "good"), Q2 = c(86, 86, 86, 86,
86, 86), Q5 = c(120, 120, 120, 120, 120, 120), Q10 = c(150, 150,
150, 150, 150, 150), Q20 = c(170, 170, 170, 170, 170, 170), Q50 = c(200,
200, 200, 200, 200, 200), Data_producer = c("DREAL_PACA", "DREAL_PACA",
"DREAL_PACA", "DREAL_PACA", "DREAL_PACA", "DREAL_PACA"), Coord_X_L2e_Z32 = c(959313,
959313, 959313, 959313, 959313, 959313), Coord_Y_L2e_Z32 = c(1893321,
1893321, 1893321, 1893321, 1893321, 1893321), Coord_X_L93 = c(1005748.88,
1005748.88, 1005748.88, 1005748.88, 1005748.88, 1005748.88),
Coord_Y_L93 = c(6324083.97, 6324083.97, 6324083.97, 6324083.97,
6324083.97, 6324083.97), New_FN = c("Var#Entrevaux.csv",
"Var#Entrevaux.csv", "Var#Entrevaux.csv", "Var#Entrevaux.csv",
"Var#Entrevaux.csv", "Var#Entrevaux.csv"), NA_perc = c(14.92,
14.92, 14.92, 14.92, 14.92, 14.92), Q2_norm = c(6.45645645645646,
6.45645645645646, 6.45645645645646, 6.45645645645646, 6.45645645645646,
6.45645645645646), Q5_norm = c(9.00900900900901, 9.00900900900901,
9.00900900900901, 9.00900900900901, 9.00900900900901, 9.00900900900901
), Q10_norm = c(11.2612612612613, 11.2612612612613, 11.2612612612613,
11.2612612612613, 11.2612612612613, 11.2612612612613), Q20_norm = c(12.7627627627628,
12.7627627627628, 12.7627627627628, 12.7627627627628, 12.7627627627628,
12.7627627627628), Q50_norm = c(15.015015015015, 15.015015015015,
15.015015015015, 15.015015015015, 15.015015015015, 15.015015015015
)), row.names = c(NA, -6L), groups = structure(list(Station_code = "Y6042010",
.rows = structure(list(1:6), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1L, class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
EDIT
Here is an example of what I would like to do on the plot:
This is the code I use currently to do the plot:
ggplot(data = tst_formule[tst_formule$River != "Roya",], aes(x = Year, y = BRI_adi_moy_transect, shape = River, col = Type)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = BRI_adi_moy_transect - SD_transect, ymax = BRI_adi_moy_transect + SD_transect), size = 0.7, width = 0.3) +
geom_errorbar(aes(ymin = BRI_adi_moy_transect - Error_reso, ymax = BRI_adi_moy_transect + Error_reso, linetype = "Error due to resolution"), size = 0.3, width = 0.3, colour = "black") +
scale_linetype_manual(name = NULL, values = 2) +
scale_shape_manual(values = c(15, 18, 17, 16)) +
scale_colour_manual(values = c("chocolate1", "darkcyan")) +
new_scale("linetype") +
geom_vline(aes(xintercept = Restoration_year, linetype = "Restoration"), colour = "chocolate1") +
scale_linetype_manual(name = NULL, values = 5) +
new_scale("linetype") +
geom_hline(aes(yintercept = 0.004, linetype = "Threshold"), colour= 'black') +
scale_linetype_manual(name = NULL, values = 4) +
scale_y_continuous("BRI*", limits = c(min(tst_formule$BRI_adi_moy_transect - tst_formule$SD_transect, tst_formule$BRI_adi_moy_transect - tst_formule$Error_reso ), max(tst_formule$BRI_adi_moy_transect + tst_formule$SD_transect, tst_formule$BRI_adi_moy_transect + tst_formule$Error_reso))) +
scale_x_continuous(limits = c(min(tst_formule$Year - 1),max(tst_formule$Year + 1)), breaks = scales::breaks_pretty(n = 6)) +
theme_bw() +
facet_wrap(vars(River)) +
theme(legend.spacing.y = unit(-0.01, "cm")) +
guides(shape = guide_legend(order = 1),
colour = guide_legend(order = 2),
line = guide_legend(order = 3))
After tests and more research, I managed to do it by adding the second dataframe in geom_text():
new_scale("linetype") +
geom_segment(data = Flood_plot, aes(x = Date, xend = Date, y = 0.025, yend = 0.020, linetype = "Morphogenic flood"), arrow = arrow(length = unit(0.2, "cm")), inherit.aes = F, guide = guide_legend(order = 6)) +
scale_linetype_manual(name = NULL, values = 1) +
new_scale() creates a new linetype definition after the ones I created before, geom_segment() allows to draw arrows which I wanted but it works with geom_text() and scale_linetype_manual() draws the arrow in the legend without the mention "linetype" above. The second dataframe has the same column (River) as the 1st one to wrap and create the panels.

Resources