Trying to replicate a visualisation in R - r

Relatively inexperienced R user. I am trying to create something similar to the visualisation below with data for another country.
I've gone as far as creating the basic structure with data plotted in a vertical annual timeline with months running along the x axis but I have no idea how to edit the individual data points. I would appreciate any idea on how to move forward or even a completely different approach.
Here is my code using ggplot2:
p <- ggplot(forestfiresv, aes(y=year, x=dtstart))
p+geom_point() +
scale_x_datetime(lim=as.POSIXct(c("2021-01-01 00:01","2021-12-31 00:00", origin=lubridate::origin), "%m/%d %H:%M",tz="UTC"),expand = c(0,0), date_breaks="2 months", labels = date_format("%b"))+
theme_bw()
A data sample:
structure(list(year = c("2000", "2000", "2000", "2000", "2000",
"2000", "2000", "2000", "2000", "2000"), `Start date` = structure(c(11174, 11167, 11166, 11191,
11222, 11144, 11151, 11192, 11244, 11187), class = "Date"), `Start time` = c("02:15",
"16:05", "10:47", "15:41", "23:30", "15:29", "14:00", "13:53",
"17:39", "11:09"), `End date` = structure(c(11174,
11178, 11166, 11192, 11223, 11146, 11152, 11197, 11244, 11191
), class = "Date"), `End time` = c("14:00", "07:00", "19:00",
"22:00", "02:00", "12:00", "00:10", "13:30", "19:07", "11:30"
), Δάση = c(200, 1400, 400, 0, 0, 0, 600, 2000, 0, 260), `Forest` = c(800,
0, 0, 100, 100, 700, 0, 0, 0, 0), `Agricultural land` = c(0, 0, 0, 200, 0, 0, 200, 500, 0, 0), totalareaburnt = c(1000, 1400, 400, 500, 500, 700, 800, 2500, 350, 360), dtstart = structure(c(1628129700, 1627574700, 1627469220, 1629646860, 1632353400, 1625585340, 1626184800, 1629726780, 1634233140, 1629284940), class = c("POSIXct", "POSIXt"), tzone = "UTC"), dtend = structure(c(1628172000, 1628492400, 1627498800, 1629756000, 1632362400, 1625745600, 1626221400, 1630157400, 1634238420, 1629631800), class = c("POSIXct", "POSIXt"), tzone = "UTC")), .internal.selfref = <pointer: (nil)>, row.names = c(NA, 10L), class = c("data.table", "data.frame"))

This is the best I've obtained so far, but I bet it could be better. I've increased your example data frame because there was only one year of observation and I've injected some randomness to make the plot look better.
library(ggplot2)
ddf <- rbind(df,df,df,df,df,df,df,df,df,df)
ddf$year <- rep(2000:2009,each=10)
ddf$totalareaburnt <- sample(200:2500,100,replace = T)
ddf$dtstart <- ddf$dtstart+sample(86400*1:90,100,replace = T)
#duration in days
ddf$duration <- as.numeric(df$dtend-df$dtstart)/24
ddf$year <- as.integer(ddf$year)
ggplot(ddf,
aes(y = year,
x = dtstart)) +
geom_point(aes(size = totalareaburnt,
col = duration),
shape = 17,
alpha = 0.7) +
scale_x_datetime(
lim = as.POSIXct(
c("2021-01-01 00:01", "2021-12-31 00:00", origin = lubridate::origin),
"%m/%d %H:%M",
tz = "UTC"
),
expand = c(0, 0),
date_breaks = "1 months",
labels = scales::date_format("%b")
) +
theme_minimal() +
theme(
legend.position = "top",
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
axis.line = element_line(),
axis.ticks = element_line()
) +
scale_y_continuous(trans = "reverse", breaks = unique(ddf$year))+
scale_colour_gradientn(name= "Duartion (day)",colours = c( "yellow", "orange","darkred"))+
scale_size_continuous(name="Area burned (ha)")

Related

I cannot figure out why my dates don't work

structure(list(Position = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), Date = structure(c(1685750400, 1685750400, 1685750400, 1685750400, 1685750400, 1685750400, 1685750400, 1685750400, 1685750400, 1685750400), tzone = "UTC", class = c("POSIXct", "POSIXt")), Time
= structure(c(-2209017523, -2209017518, -2209017513, -2209017508, -2209017503, -2209017498, -2209017493, -2209017488, -2209017483, -2209017478), tzone = "UTC", class = c("POSIXct", "POSIXt")), DateTime = structure(c(1685808077, 1685808082, 1685808087,
1685808092, 1685808097, 1685808102, 1685808107, 1685808112, 1685808117, 1685808122), tzone = "UTC", class = c("POSIXct", "POSIXt")), Temperatuur = c(21.2, 21.2, 21.6, 21.7, 22, 22.2, 20.1, 20.2, 20.3, 20.3), Treatment = c("Tempex", "Tempex", "Tempex",
"Tempex", "Tempex", "Tempex", "Tempex", "Tempex", "Tempex", "Tempex")), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"))
My R code:
time.start=as.POSIXct("2023-06-03, 16:00",format="%H:%M")
time.end=as.POSIXct("2023-07-03, 07:30",format="%H:%M")
ggplot(logger, aes(DateTime, Temperatuur, color = Treatment))+
geom_line(size = 1)+
scale_x_datetime(limits = c(time.start, time.end),
breaks = date_breaks("24 hours"),
labels = date_format("%H:%M"))
I just cannot figure out why I am getting this: plot
Can someone please help me?
I tried to follow advice online, but that got me nowhere. Changing from 24 to 12 hours also didn't help. Maybe it's a problem in my excel file, but that all seems alright.
If we remove the commas from the time.* strings and fix the errant format=, it should work.
time.start <- as.POSIXct("2023-06-03 16:00")
time.end <- as.POSIXct("2023-07-03 07:30")
ggplot(logger, aes(DateTime, Temperatuur, color = Treatment)) +
geom_line(size = 1) +
scale_x_datetime(
limits = c(time.start, time.end),
breaks = date_breaks("24 hours"),
labels = date_format("%H:%M"))
I think perhaps you are confounding the use of labels=date_format("%H:%M") as what you think time.start/time.end should be (or how they should look), which is incorrect. Since the x-axis is (I'm inferring) a POSIXt object, then the time.* objects also must be the same object. You don't need to make them "look" (i.e., "%H:%M") the same as how you want the axis labels rendered, it is handled automatically by ggplot.

r ggplot not recognizing date format

I have the following chart.
p1 <- ggplot(data = mydat, aes(x = time))+
geom_line(aes(y = sumabsdiff, colour = 'sumabsdiff'))+
geom_line(aes(y = windsize, col='windsize'))+
scale_x_time(breaks = scales::date_breaks('1 sec'))+ #('15 secs'))+
scale_color_manual(values=c('sumabsdiff' = 'black',
"windsize" = "red"))+
theme(legend.position = "top")
As you can see, the date is all messed up even though time seems perfectly fine to me.
> mydat$time
[1] "2022-09-19 12:44:47 UTC" "2022-09-19 12:44:48 UTC" "2022-09-19 12:44:49 UTC" "2022-09-19 12:44:50 UTC"
[5] "2022-09-19 12:44:50 UTC" "2022-09-19 12:44:50 UTC".
Any idea why?
Data:
mydf <- structure(list(time = structure(c(1663591487.801, 1663591488.614,
1663591489.626, 1663591490.097, 1663591490.202, 1663591490.717
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), bid = c(11735.68,
11735.18, 11734.93, 11734.43, 11734.3, 11734.43), ask = c(11737.58,
11737.08, 11736.83, 11736.33, 11736.2, 11736.33), flags = c(6,
6, 6, 6, 6, 6), typical = c(11736.63, 11736.13, 11735.88, 11735.38,
11735.25, 11735.38), row = 266:271, prevrow_short = c(258L, 258L,
260L, 261L, 262L, 265L), windsize = c(9, 10, 9, 9, 9, 7), diff = c(-0.119999999998981,
-0.5, -0.25, -0.5, -0.130000000001019, 0.130000000001019), absdiff = c(0.119999999998981,
0.5, 0.25, 0.5, 0.130000000001019, 0.130000000001019), sumabsdiff = c(3.60999999999694,
4.10999999999694, 3.72999999999593, 3.85999999999694, 3.61999999999898,
2.13000000000102), positive = c(FALSE, FALSE, FALSE, FALSE, FALSE,
TRUE), meanpos = c(0.444444444444444, 0.4, 0.333333333333333,
0.222222222222222, 0.222222222222222, 0.285714285714286), posdiff = c(0,
0, 0, 0, 0, 0.130000000001019), negdiff = c(0.119999999998981,
0.5, 0.25, 0.5, 0.130000000001019, 0), sumposdiff_short = c(1.36999999999898,
1.36999999999898, 1.23999999999796, 0.869999999998981, 0.869999999998981,
0.630000000001019), sumnegdiff_short = c(2.23999999999796, 2.73999999999796,
2.48999999999796, 2.98999999999796, 2.75, 1.5), power_short = c(0.37950138504159,
0.333333333333333, 0.332439678283999, 0.225388601036184, 0.240331491712493,
0.295774647887661), market_open = c(FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE), timediff = c(0.219000101089478, 0.812999963760376,
1.01199984550476, 0.470999956130981, 0.105000019073486, 0.515000104904175
), avgspeed = c(2.71247733209185, 2.42072135038066, 2.44698207323042,
2.3255814641193, 2.8019926211971, 2.14658085742225), relative_positive_diff = c(0.37950138504159,
0.333333333333333, 0.332439678283999, 0.225388601036184, 0.240331491712493,
0.295774647887661), timesec = structure(c(1663591487, 1663591488,
1663591489, 1663591490, 1663591490, 1663591490), class = c("POSIXct",
"POSIXt"), tzone = "UTC")), pandas.index = <environment>, row.names = 266:271, class = "data.frame")
By the way, the time actually includes milliseconds, perhaps that is the cause
?scale_x_time:
These are the default scales for the three date/time class. These
will usually be added automatically. To override manually, use
scale_*_date for dates (class 'Date'), scale_*_datetime for
datetimes (class 'POSIXct'), and scale_*_time for times (class
'hms').
Your time variable is class POSIXt, not hms, so you should be using scale_x_datetime instead.
ggplot(data = mydf, aes(x = time))+
geom_line(aes(y = sumabsdiff, colour = 'sumabsdiff'))+
geom_line(aes(y = windsize, col='windsize'))+
scale_x_datetime(breaks = "1 secs") +
scale_color_manual(values=c('sumabsdiff' = 'black',
"windsize" = "red"))+
theme(legend.position = "top")
You can format the axis labels with date_labels= and %-codes (listed in ?strptime):
ggplot(data = mydf, aes(x = time))+
geom_line(aes(y = sumabsdiff, colour = 'sumabsdiff'))+
geom_line(aes(y = windsize, col='windsize'))+
# scale_x_datetime(breaks = scales::date_breaks('1 sec'))+ #('15 secs'))+
scale_x_datetime(breaks = "1 sec", date_labels = "%H:%M:%S") +
scale_color_manual(values=c('sumabsdiff' = 'black',
"windsize" = "red"))+
theme(legend.position = "top")

Visualizing average sentiment by day&year (ggplot)

I would like to visualize consumer sentiment by day&year throughout different years. For example, I am interested in comparing consumer sentiment in Dec 18th of 2011, to Dec 18th in 2012.
Currently, I have been able to do so by month&year, but I want to visualize the data at a more granular level.
#Creating a month-year variable
valences_by_post<- valences_by_post %>%
mutate(month_year = zoo::as.yearmon(date))
#2011 & 2012
valence_11_12<-valences_by_post %>%
filter(year == 2011 | year ==2012)%>%
group_by(month_year) %>%
summarize(mean_valence= mean(valence), n=n())
ggplot(valence_11_12, aes(x =factor(month_year), y = mean_valence, group=1)) +
geom_point() +
geom_line()+
geom_smooth()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
Which produces:
However, to compute sentiment by day&year, and visualize across different years, I ran the following:
valences_by_post<- valences_by_post %>%
mutate(year_day = paste(lubridate::year(date), lubridate::yday(date), sep = "-"))
head(valences_by_post$year_day)
valence_day<-valences_by_post %>%
filter(year == 2011| year == 2012)%>%
group_by(year_day) %>%
summarize(mean_valence= mean(valence), n=n())
And then the graph, but I receive an error that, "Error: Discrete value supplied to continuous scale" because the year_day variable is stored as "character", and I was wondering if there is a workaround for this or an equivalent of the "zoo::as.yearmon(date))" function from other packages?
ggplot(valence_day, aes(x =year_day, y = mean_valence)) +
geom_point() +
geom_line()+
scale_x_continuous(breaks=seq(1,365,1)) +
geom_smooth()
Here are data samples:
dput(head(valence_day,5))
structure(list(year_day = c("2011-175", "2011-176", "2011-177",
"2011-182", "2011-189"), mean_valence = c(0, 0.0806100217864924,
0.0714285714285714, 0, 0.5), n = c(1L, 9L, 1L, 1L, 1L)), row.names = c(NA,
-5L), class = c("tbl_df", "tbl", "data.frame"))
And
dput(head(valences_by_post,5))
structure(list(document = c("1", "2", "3", "4", "5"), positive = c(1,
0, 2, 1, 1), negative = c(1, 1, 0, 0, 1), total_words = c(34,
13, 4, 3, 6), valence = c(0, -0.0769230769230769, 0.5, 0.333333333333333,
0), date = structure(c(1308873600, 1308960000, 1308960000, 1308960000,
1308960000), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
year = c(2011, 2011, 2011, 2011, 2011), month = c(6, 6, 6,
6, 6), year_day = c("2011-175", "2011-176", "2011-176", "2011-176",
"2011-176"), month_year = structure(c(2011.41666666667, 2011
IMHO there is no need to add a year_day. Basically this is the same as the date. Hence, you could do your computations by converting your date (which is a datetime object) to a Date . And to show the yearday in the plot this could be achieved via the labels argument of scale_x_date:
library(dplyr)
library(ggplot2)
valence_day <- valences_by_post %>%
filter(year %in% c(2011, 2012)) %>%
group_by(date = as.Date(date)) %>%
summarize(mean_valence = mean(valence), n = n())
ggplot(valence_day, aes(x = date, y = mean_valence)) +
geom_point() +
geom_line() +
scale_x_date(labels = ~ paste(lubridate::year(.x), lubridate::yday(.x), sep = "-")) +
geom_smooth()
DATA
valences_by_post <- structure(list(
document = c("1", "2", "3", "4", "5"), positive = c(
1,
0, 2, 1, 1
), negative = c(1, 1, 0, 0, 1), total_words = c(
34,
13, 4, 3, 6
), valence = c(
0, -0.0769230769230769, 0.5, 0.333333333333333,
0
), date = structure(c(
1308873600, 1308960000, 1308960000, 1308960000,
1308960000
), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
year = c(2011, 2011, 2011, 2011, 2011), month = c(
6, 6, 6,
6, 6
), month_year = structure(c(
2011.41666666667, 2011.41666666667,
2011.41666666667, 2011.41666666667, 2011.41666666667
), class = "yearmon")
), row.names = c(
NA,
5L
), class = "data.frame")

Using segment labels in ggplot with ggrepel with smooth segments

This is my dataframe:
df<-structure(list(year = c(1984, 1984), team = c("Australia", "Brazil"
), continent = c("Oceania", "Americas"), medal = structure(c(3L,
3L), .Label = c("Bronze", "Silver", "Gold"), class = "factor"),
n = c(84L, 12L)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))
And this is my ggplot (my question is related to the annotations regard Brazil label):
ggplot(data = df)+
geom_point(aes(x = year, y = n)) +
geom_text_repel(aes(x = year, y = n, label = team),
size = 3, color = 'black',
seed = 10,
nudge_x = -.029,
nudge_y = 35,
segment.size = .65,
segment.curvature = -1,
segment.angle = 178.975,
segment.ncp = 1)+
coord_flip()
So, I have a segment divided by two parts. On both parts I have 'small braks'. How can I avoid them?
I already tried to use segment.ncp, change nudge_xor nudge_ynut its not working.
Any help?
Not really sure what is going on here. This is the best I could generate by experimenting with variations to the input values for segment... arguments.
There is some guidance at: https://ggrepel.slowkow.com/articles/examples.html which has an example with shorter leader lines, maybe that's an approach you could use.
df<-structure(list(year = c(1984, 1984), team = c("Australia", "Brazil"
), continent = c("Oceania", "Americas"), medal = structure(c(3L,
3L), .Label = c("Bronze", "Silver", "Gold"), class = "factor"),
n = c(84L, 12L)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))
library(ggplot2)
library(ggrepel)
ggplot(data = df)+
geom_point(aes(x = year, y = n)) +
geom_text_repel(aes(x = year, y = n, label = team),
size = 3, color = 'black',
seed = 1,
nudge_x = -0.029,
nudge_y = 35,
segment.size = 0.5,
segment.curvature = -0.0000002,
segment.angle = 1,
segment.ncp = 1000)+
coord_flip()
Created on 2021-08-26 by the reprex package (v2.0.0)

Annotate ggplot based on a second data frame

I have a faceted plot made with ggplot that is already working, it shows data about river altitude against years. I'm trying to add arrows based on a second dataframe which details when floods occurred.
Here's the current plot:
I would like to draw arrows in the top part of each graph based on date information in my second dataframe where each row corresponds to a flood and contains a date.
The link between the two dataframes is the Station_code column, each river has one or more stations which is indicated by this data (in this case only the Var river has two stations).
Here is the dput of the data frame used to create the original plot:
structure(list(River = c("Durance", "Durance", "Durance", "Durance",
"Roya", "Var"), Reach = c("La Brillanne", "Les Mées", "La Brillanne",
"Les Mées", "Basse vallée", "Basse vallée"), Area_km = c(465,
465, 465, 465, 465, 465), Type = c("restored", "target", "restored",
"target", "witness", "restored"), Year = c(2017, 2017, 2012,
2012, 2018, 2011), Restoration_year = c(2013, 2013, 2013, 2013,
NA, 2009), Station_code = c("X1130010", "X1130010", "X1130010",
"X1130010", "Y6624010", "Y6442015"), BRI_adi_moy_sstransect = c(0.00375820736746399,
0.00244752138003355, 0.00446807607783864, 0.0028792618981479,
0.00989200896930529, 0.00357247516596474), SD_sstransect = c(0.00165574247612667,
0.0010044634990875, 0.00220534492332107, 0.00102694633805149,
0.00788573233793128, 0.00308489160008849), min_BRI_sstransect = c(0.00108123849595469,
0.00111493913953216, 0.000555500340370182, 0.00100279590198288,
0, 0), max_BRI_sstransect = c(0.0127781240385231, 0.00700537285706352,
0.0210216858227621, 0.00815151653110584, 0.127734814926934, 0.0223738711013954
), Nb_sstr_unique_m = c(0.00623321576795815, 0.00259754717331206,
0.00117035034437559, 0.00209845092352825, 0.0458628969163946,
3.60620609570031), BRI_adi_moy_transect = c(0.00280232169999531,
0.00173868254527501, 0.00333818552810438, 0.00181398859573415,
0.00903651639185542, 0.00447856455432537), SD_transect = c(0.00128472161839638,
0.000477209421076879, 0.00204050725984513, 0.000472466654940182,
0.00780731734792112, 0.00310039904793707), min_BRI_transect = c(0.00108123849595469,
0.00106445386542223, 0.000901992689363725, 0.000855135344651009,
0.000944414463851629, 0.000162012161197014), max_BRI_transect = c(0.00709151795418251,
0.00434366293208643, 0.011717024999411, 0.0031991369873946, 0.127734814926934,
0.0187952134332499), Nb_tr_unique_m = c(0, 0, 0, 0, 0, 0), Error_reso = c(0.0011,
8e-04, 0.0018, 0.0011, 0.0028, 0.0031), W_BA = c(296.553323029366,
411.056574923547, 263.944186046512, 363.32874617737, 88.6420798065296,
158.66866970576), W_BA_sd = c(84.1498544481585, 65.3909073242282,
100.067554749308, 55.5534084807705, 35.2337070278364, 64.6978349498119
), W_BA_min = c(131, 206, 33, 223, 6, 45), W_BA_max = c(472,
564, 657, 513, 188, 381), W_norm = c(5.73271228619998, 7.9461900926133,
5.10234066090722, 7.02355699765464, 5.09378494746752, 4.81262001531126
), W_norm_sd = c(1.62671218635823, 1.2640804493236, 1.93441939783807,
1.07391043231191, 2.02469218788178, 1.96236658443141), W_norm_min = c(2.53237866910643,
3.98221378500706, 0.637927450996277, 4.31084307794454, 0.344787822572658,
1.36490651299098), W_norm_max = c(9.12429566273463, 10.9027600715727,
12.7005556152895, 9.91687219276031, 10.8033517739433, 11.5562084766569
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
And here is the dput of the date frame containing the flooding date:
structure(list(Station_code = c("Y6042010", "Y6042010", "Y6042010",
"Y6042010", "Y6042010", "Y6042010"), Date = structure(c(12006,
12007, 12016, 12017, 13416, 13488), class = "Date"), Qm3s = c(156,
177, 104, 124, 125, 90.4), Qual = c(5, 5, 5, 5, 5, 5), Year = c(2002,
2002, 2002, 2002, 2006, 2006), Month = c(11, 11, 11, 11, 9, 12
), Station_river = c("Var#Entrevaux", "Var#Entrevaux", "Var#Entrevaux",
"Var#Entrevaux", "Var#Entrevaux", "Var#Entrevaux"), River = c("Var",
"Var", "Var", "Var", "Var", "Var"), Mod_inter = c(13.32, 13.32,
13.32, 13.32, 13.32, 13.32), Qm3s_norm = c(11.7117117117117,
13.2882882882883, 7.80780780780781, 9.30930930930931, 9.38438438438438,
6.78678678678679), File_name = c("Var#Entrevaux.dat", "Var#Entrevaux.dat",
"Var#Entrevaux.dat", "Var#Entrevaux.dat", "Var#Entrevaux.dat",
"Var#Entrevaux.dat"), Station_name = c("#Entrevaux", "#Entrevaux",
"#Entrevaux", "#Entrevaux", "#Entrevaux", "#Entrevaux"), Reach = c("Daluis",
"Daluis", "Daluis", "Daluis", "Daluis", "Daluis"), Restauration_year = c(2009,
2009, 2009, 2009, 2009, 2009), `Area_km[BH]` = c(676, 676, 676,
676, 676, 676), Starting_year = c(1920, 1920, 1920, 1920, 1920,
1920), Ending_year = c("NA", "NA", "NA", "NA", "NA", "NA"), Accuracy = c("good",
"good", "good", "good", "good", "good"), Q2 = c(86, 86, 86, 86,
86, 86), Q5 = c(120, 120, 120, 120, 120, 120), Q10 = c(150, 150,
150, 150, 150, 150), Q20 = c(170, 170, 170, 170, 170, 170), Q50 = c(200,
200, 200, 200, 200, 200), Data_producer = c("DREAL_PACA", "DREAL_PACA",
"DREAL_PACA", "DREAL_PACA", "DREAL_PACA", "DREAL_PACA"), Coord_X_L2e_Z32 = c(959313,
959313, 959313, 959313, 959313, 959313), Coord_Y_L2e_Z32 = c(1893321,
1893321, 1893321, 1893321, 1893321, 1893321), Coord_X_L93 = c(1005748.88,
1005748.88, 1005748.88, 1005748.88, 1005748.88, 1005748.88),
Coord_Y_L93 = c(6324083.97, 6324083.97, 6324083.97, 6324083.97,
6324083.97, 6324083.97), New_FN = c("Var#Entrevaux.csv",
"Var#Entrevaux.csv", "Var#Entrevaux.csv", "Var#Entrevaux.csv",
"Var#Entrevaux.csv", "Var#Entrevaux.csv"), NA_perc = c(14.92,
14.92, 14.92, 14.92, 14.92, 14.92), Q2_norm = c(6.45645645645646,
6.45645645645646, 6.45645645645646, 6.45645645645646, 6.45645645645646,
6.45645645645646), Q5_norm = c(9.00900900900901, 9.00900900900901,
9.00900900900901, 9.00900900900901, 9.00900900900901, 9.00900900900901
), Q10_norm = c(11.2612612612613, 11.2612612612613, 11.2612612612613,
11.2612612612613, 11.2612612612613, 11.2612612612613), Q20_norm = c(12.7627627627628,
12.7627627627628, 12.7627627627628, 12.7627627627628, 12.7627627627628,
12.7627627627628), Q50_norm = c(15.015015015015, 15.015015015015,
15.015015015015, 15.015015015015, 15.015015015015, 15.015015015015
)), row.names = c(NA, -6L), groups = structure(list(Station_code = "Y6042010",
.rows = structure(list(1:6), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1L, class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
EDIT
Here is an example of what I would like to do on the plot:
This is the code I use currently to do the plot:
ggplot(data = tst_formule[tst_formule$River != "Roya",], aes(x = Year, y = BRI_adi_moy_transect, shape = River, col = Type)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = BRI_adi_moy_transect - SD_transect, ymax = BRI_adi_moy_transect + SD_transect), size = 0.7, width = 0.3) +
geom_errorbar(aes(ymin = BRI_adi_moy_transect - Error_reso, ymax = BRI_adi_moy_transect + Error_reso, linetype = "Error due to resolution"), size = 0.3, width = 0.3, colour = "black") +
scale_linetype_manual(name = NULL, values = 2) +
scale_shape_manual(values = c(15, 18, 17, 16)) +
scale_colour_manual(values = c("chocolate1", "darkcyan")) +
new_scale("linetype") +
geom_vline(aes(xintercept = Restoration_year, linetype = "Restoration"), colour = "chocolate1") +
scale_linetype_manual(name = NULL, values = 5) +
new_scale("linetype") +
geom_hline(aes(yintercept = 0.004, linetype = "Threshold"), colour= 'black') +
scale_linetype_manual(name = NULL, values = 4) +
scale_y_continuous("BRI*", limits = c(min(tst_formule$BRI_adi_moy_transect - tst_formule$SD_transect, tst_formule$BRI_adi_moy_transect - tst_formule$Error_reso ), max(tst_formule$BRI_adi_moy_transect + tst_formule$SD_transect, tst_formule$BRI_adi_moy_transect + tst_formule$Error_reso))) +
scale_x_continuous(limits = c(min(tst_formule$Year - 1),max(tst_formule$Year + 1)), breaks = scales::breaks_pretty(n = 6)) +
theme_bw() +
facet_wrap(vars(River)) +
theme(legend.spacing.y = unit(-0.01, "cm")) +
guides(shape = guide_legend(order = 1),
colour = guide_legend(order = 2),
line = guide_legend(order = 3))
After tests and more research, I managed to do it by adding the second dataframe in geom_text():
new_scale("linetype") +
geom_segment(data = Flood_plot, aes(x = Date, xend = Date, y = 0.025, yend = 0.020, linetype = "Morphogenic flood"), arrow = arrow(length = unit(0.2, "cm")), inherit.aes = F, guide = guide_legend(order = 6)) +
scale_linetype_manual(name = NULL, values = 1) +
new_scale() creates a new linetype definition after the ones I created before, geom_segment() allows to draw arrows which I wanted but it works with geom_text() and scale_linetype_manual() draws the arrow in the legend without the mention "linetype" above. The second dataframe has the same column (River) as the 1st one to wrap and create the panels.

Resources