How to specify a certain csv in the errorbar line - r

I am trying to make a plot with three different csvs. In 2 of them, the columns are the same i.e. Year, GMSL and GMSLerror.
In the Frederikse file the columns are Year, GMSL, GMSLerrorlow and GMSLerrorup. How can I tell R to plot the Frederikse error using the columns GMSLerrorlow and GMSLerrorup? I tried the following but it did not work. Thanks.
p1<-files <- c("Frederikse.csv", "ChurchandWhite.csv","Hay.csv")
map_dfr(files, ~ read_csv(.x) %>%
mutate(Author = .x)) %>%
ggplot(aes(x = Time, y = GMSL, color = Author,fill=Author)) +
geom_line(size=0.6)+
theme_bw(12)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
labs(x = "Year", y = "GMSL (mm)",color="Author")+
geom_errorbar(aes(ymin=GMSL-GMSLerror, ymax =GMSL+GMSLerror,alpha=Author))+
geom_errorbar("Frederikse.csv",(aes(ymin=GMSL-GMSLerrorlow, ymax =GMSL+GMSLerrorup,alpha=Author)))
scale_alpha_manual(values = c(0.3, 0.3, 0.8))+
scale_colour_manual(values=c("#BAB3F0","#1D3E72","#201641"))
p1
structure(list(Year = 1900:1905, GMSLerrorlow = c(-203.5572666,
-201.0185091, -212.0740442, -202.6975639, -200.1670151, -192.1312551
), GMSL = c(-173.2614421, -168.8016753, -180.389967, -170.2678322,
-168.7200709, -160.9814287), GMSLerrorup = c(-141.002807, -135.8976091,
-148.213824, -138.9305182, -137.4501224, -130.3514508)), row.names = c(NA,
6L), class = "data.frame")
structure(list(Time = 1900:1905, GMSL = c(-131.15, -130.5, -129.77,
-128.85, -128.1, -127.56), GMSLerror = c(25.32, 25.17, 25.01,
24.86, 24.7, 24.55)), row.names = c(NA, 6L), class = "data.frame")
structure(list(Time = c(1880.0417, 1880.125, 1880.2083, 1880.2917,
1880.375, 1880.4583), GMSL = c(-183, -171.1, -164.3, -158.2,
-158.7, -159.6), GMSLerror = c(24.2, 24.2, 24.2, 24.2, 24.2,
24.2)), row.names = c(NA, 6L), class = "data.frame")````

You can do this with mutate to make GMSLerrorlow column for all datasets
p1<-files <- c("Frederikse.csv", "ChurchandWhite.csv","Hay.csv")
set_names(files) %>% # give names - can use str_remove to drop `.csv` from names
map_dfr( ~ read_csv(.x), .id = "Author") %>% #use .id argument
mutate(
GMSLerrorlow = if_else(Author != "Frederikse.csv", GMSLerror, GMSLerrorlow),
GMSLerrorup = if_else(Author != "Frederikse.csv", GMSLerror, GMSLerrorup)
) %>%
ggplot(aes(x = Time, y = GMSL, color = Author,fill=Author)) +
geom_line(size=0.6)+
theme_bw(12)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
labs(x = "Year", y = "GMSL (mm)",color="Author")+
geom_errorbar(aes(ymin=GMSL-GMSLerrorlow, ymax =GMSL+GMSLerrorup,alpha=Author))+
scale_alpha_manual(values = c(0.3, 0.3, 0.8))+
scale_colour_manual(values=c("#BAB3F0","#1D3E72","#201641"))

Related

r ggplot not recognizing date format

I have the following chart.
p1 <- ggplot(data = mydat, aes(x = time))+
geom_line(aes(y = sumabsdiff, colour = 'sumabsdiff'))+
geom_line(aes(y = windsize, col='windsize'))+
scale_x_time(breaks = scales::date_breaks('1 sec'))+ #('15 secs'))+
scale_color_manual(values=c('sumabsdiff' = 'black',
"windsize" = "red"))+
theme(legend.position = "top")
As you can see, the date is all messed up even though time seems perfectly fine to me.
> mydat$time
[1] "2022-09-19 12:44:47 UTC" "2022-09-19 12:44:48 UTC" "2022-09-19 12:44:49 UTC" "2022-09-19 12:44:50 UTC"
[5] "2022-09-19 12:44:50 UTC" "2022-09-19 12:44:50 UTC".
Any idea why?
Data:
mydf <- structure(list(time = structure(c(1663591487.801, 1663591488.614,
1663591489.626, 1663591490.097, 1663591490.202, 1663591490.717
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), bid = c(11735.68,
11735.18, 11734.93, 11734.43, 11734.3, 11734.43), ask = c(11737.58,
11737.08, 11736.83, 11736.33, 11736.2, 11736.33), flags = c(6,
6, 6, 6, 6, 6), typical = c(11736.63, 11736.13, 11735.88, 11735.38,
11735.25, 11735.38), row = 266:271, prevrow_short = c(258L, 258L,
260L, 261L, 262L, 265L), windsize = c(9, 10, 9, 9, 9, 7), diff = c(-0.119999999998981,
-0.5, -0.25, -0.5, -0.130000000001019, 0.130000000001019), absdiff = c(0.119999999998981,
0.5, 0.25, 0.5, 0.130000000001019, 0.130000000001019), sumabsdiff = c(3.60999999999694,
4.10999999999694, 3.72999999999593, 3.85999999999694, 3.61999999999898,
2.13000000000102), positive = c(FALSE, FALSE, FALSE, FALSE, FALSE,
TRUE), meanpos = c(0.444444444444444, 0.4, 0.333333333333333,
0.222222222222222, 0.222222222222222, 0.285714285714286), posdiff = c(0,
0, 0, 0, 0, 0.130000000001019), negdiff = c(0.119999999998981,
0.5, 0.25, 0.5, 0.130000000001019, 0), sumposdiff_short = c(1.36999999999898,
1.36999999999898, 1.23999999999796, 0.869999999998981, 0.869999999998981,
0.630000000001019), sumnegdiff_short = c(2.23999999999796, 2.73999999999796,
2.48999999999796, 2.98999999999796, 2.75, 1.5), power_short = c(0.37950138504159,
0.333333333333333, 0.332439678283999, 0.225388601036184, 0.240331491712493,
0.295774647887661), market_open = c(FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE), timediff = c(0.219000101089478, 0.812999963760376,
1.01199984550476, 0.470999956130981, 0.105000019073486, 0.515000104904175
), avgspeed = c(2.71247733209185, 2.42072135038066, 2.44698207323042,
2.3255814641193, 2.8019926211971, 2.14658085742225), relative_positive_diff = c(0.37950138504159,
0.333333333333333, 0.332439678283999, 0.225388601036184, 0.240331491712493,
0.295774647887661), timesec = structure(c(1663591487, 1663591488,
1663591489, 1663591490, 1663591490, 1663591490), class = c("POSIXct",
"POSIXt"), tzone = "UTC")), pandas.index = <environment>, row.names = 266:271, class = "data.frame")
By the way, the time actually includes milliseconds, perhaps that is the cause
?scale_x_time:
These are the default scales for the three date/time class. These
will usually be added automatically. To override manually, use
scale_*_date for dates (class 'Date'), scale_*_datetime for
datetimes (class 'POSIXct'), and scale_*_time for times (class
'hms').
Your time variable is class POSIXt, not hms, so you should be using scale_x_datetime instead.
ggplot(data = mydf, aes(x = time))+
geom_line(aes(y = sumabsdiff, colour = 'sumabsdiff'))+
geom_line(aes(y = windsize, col='windsize'))+
scale_x_datetime(breaks = "1 secs") +
scale_color_manual(values=c('sumabsdiff' = 'black',
"windsize" = "red"))+
theme(legend.position = "top")
You can format the axis labels with date_labels= and %-codes (listed in ?strptime):
ggplot(data = mydf, aes(x = time))+
geom_line(aes(y = sumabsdiff, colour = 'sumabsdiff'))+
geom_line(aes(y = windsize, col='windsize'))+
# scale_x_datetime(breaks = scales::date_breaks('1 sec'))+ #('15 secs'))+
scale_x_datetime(breaks = "1 sec", date_labels = "%H:%M:%S") +
scale_color_manual(values=c('sumabsdiff' = 'black',
"windsize" = "red"))+
theme(legend.position = "top")

How to create two stacked bar charts next to each other using ggplot. I want to recreate the below chart:

I have the below 2 dataframes:
lc2 <- structure(list(group = 1:3, sumpct = c(13, 32, 54)), class = "data.frame", row.names = c(NA,
-3L))
note this is for the "likelihood to click" bar (see image), where "extremely/somewhat likely" is
13%, neutral is 32, and extremely/somewhat unlikely is 54)
and
le2 <- structure(list(e = 1:3, t = c(13, 38, 48)), class = "data.frame", row.names = c(NA,
-3L))
note similarly this code above is for "likelihood to enroll" bar below.
But I want to create this:
lc2 <- structure(list(group = 1:3, sumpct = c(13, 32, 54)),
class = "data.frame", row.names = c(NA, -3L))
le2 <- structure(list(e = 1:3, t = c(13, 38, 48)),
class = "data.frame", row.names = c(NA, -3L))
lc2$type <- "click"
le2$type <- "enroll"
colnames(lc2) <- c("group", "pct", "type")
colnames(le2) <- c("group", "pct", "type")
library(data.table)
library(ggplot2)
dt <- rbindlist(list(lc2, le2))
dt[, group := as.factor(group)]
ggplot(dt, aes(x = type, y = pct, fill = group)) +
geom_bar(stat = "identity") +
geom_text(aes(label=scales::percent(pct/100)), position = position_stack(vjust = .5))+
theme_classic() +
coord_flip()

Using segment labels in ggplot with ggrepel with smooth segments

This is my dataframe:
df<-structure(list(year = c(1984, 1984), team = c("Australia", "Brazil"
), continent = c("Oceania", "Americas"), medal = structure(c(3L,
3L), .Label = c("Bronze", "Silver", "Gold"), class = "factor"),
n = c(84L, 12L)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))
And this is my ggplot (my question is related to the annotations regard Brazil label):
ggplot(data = df)+
geom_point(aes(x = year, y = n)) +
geom_text_repel(aes(x = year, y = n, label = team),
size = 3, color = 'black',
seed = 10,
nudge_x = -.029,
nudge_y = 35,
segment.size = .65,
segment.curvature = -1,
segment.angle = 178.975,
segment.ncp = 1)+
coord_flip()
So, I have a segment divided by two parts. On both parts I have 'small braks'. How can I avoid them?
I already tried to use segment.ncp, change nudge_xor nudge_ynut its not working.
Any help?
Not really sure what is going on here. This is the best I could generate by experimenting with variations to the input values for segment... arguments.
There is some guidance at: https://ggrepel.slowkow.com/articles/examples.html which has an example with shorter leader lines, maybe that's an approach you could use.
df<-structure(list(year = c(1984, 1984), team = c("Australia", "Brazil"
), continent = c("Oceania", "Americas"), medal = structure(c(3L,
3L), .Label = c("Bronze", "Silver", "Gold"), class = "factor"),
n = c(84L, 12L)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))
library(ggplot2)
library(ggrepel)
ggplot(data = df)+
geom_point(aes(x = year, y = n)) +
geom_text_repel(aes(x = year, y = n, label = team),
size = 3, color = 'black',
seed = 1,
nudge_x = -0.029,
nudge_y = 35,
segment.size = 0.5,
segment.curvature = -0.0000002,
segment.angle = 1,
segment.ncp = 1000)+
coord_flip()
Created on 2021-08-26 by the reprex package (v2.0.0)

Annotate ggplot based on a second data frame

I have a faceted plot made with ggplot that is already working, it shows data about river altitude against years. I'm trying to add arrows based on a second dataframe which details when floods occurred.
Here's the current plot:
I would like to draw arrows in the top part of each graph based on date information in my second dataframe where each row corresponds to a flood and contains a date.
The link between the two dataframes is the Station_code column, each river has one or more stations which is indicated by this data (in this case only the Var river has two stations).
Here is the dput of the data frame used to create the original plot:
structure(list(River = c("Durance", "Durance", "Durance", "Durance",
"Roya", "Var"), Reach = c("La Brillanne", "Les Mées", "La Brillanne",
"Les Mées", "Basse vallée", "Basse vallée"), Area_km = c(465,
465, 465, 465, 465, 465), Type = c("restored", "target", "restored",
"target", "witness", "restored"), Year = c(2017, 2017, 2012,
2012, 2018, 2011), Restoration_year = c(2013, 2013, 2013, 2013,
NA, 2009), Station_code = c("X1130010", "X1130010", "X1130010",
"X1130010", "Y6624010", "Y6442015"), BRI_adi_moy_sstransect = c(0.00375820736746399,
0.00244752138003355, 0.00446807607783864, 0.0028792618981479,
0.00989200896930529, 0.00357247516596474), SD_sstransect = c(0.00165574247612667,
0.0010044634990875, 0.00220534492332107, 0.00102694633805149,
0.00788573233793128, 0.00308489160008849), min_BRI_sstransect = c(0.00108123849595469,
0.00111493913953216, 0.000555500340370182, 0.00100279590198288,
0, 0), max_BRI_sstransect = c(0.0127781240385231, 0.00700537285706352,
0.0210216858227621, 0.00815151653110584, 0.127734814926934, 0.0223738711013954
), Nb_sstr_unique_m = c(0.00623321576795815, 0.00259754717331206,
0.00117035034437559, 0.00209845092352825, 0.0458628969163946,
3.60620609570031), BRI_adi_moy_transect = c(0.00280232169999531,
0.00173868254527501, 0.00333818552810438, 0.00181398859573415,
0.00903651639185542, 0.00447856455432537), SD_transect = c(0.00128472161839638,
0.000477209421076879, 0.00204050725984513, 0.000472466654940182,
0.00780731734792112, 0.00310039904793707), min_BRI_transect = c(0.00108123849595469,
0.00106445386542223, 0.000901992689363725, 0.000855135344651009,
0.000944414463851629, 0.000162012161197014), max_BRI_transect = c(0.00709151795418251,
0.00434366293208643, 0.011717024999411, 0.0031991369873946, 0.127734814926934,
0.0187952134332499), Nb_tr_unique_m = c(0, 0, 0, 0, 0, 0), Error_reso = c(0.0011,
8e-04, 0.0018, 0.0011, 0.0028, 0.0031), W_BA = c(296.553323029366,
411.056574923547, 263.944186046512, 363.32874617737, 88.6420798065296,
158.66866970576), W_BA_sd = c(84.1498544481585, 65.3909073242282,
100.067554749308, 55.5534084807705, 35.2337070278364, 64.6978349498119
), W_BA_min = c(131, 206, 33, 223, 6, 45), W_BA_max = c(472,
564, 657, 513, 188, 381), W_norm = c(5.73271228619998, 7.9461900926133,
5.10234066090722, 7.02355699765464, 5.09378494746752, 4.81262001531126
), W_norm_sd = c(1.62671218635823, 1.2640804493236, 1.93441939783807,
1.07391043231191, 2.02469218788178, 1.96236658443141), W_norm_min = c(2.53237866910643,
3.98221378500706, 0.637927450996277, 4.31084307794454, 0.344787822572658,
1.36490651299098), W_norm_max = c(9.12429566273463, 10.9027600715727,
12.7005556152895, 9.91687219276031, 10.8033517739433, 11.5562084766569
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
And here is the dput of the date frame containing the flooding date:
structure(list(Station_code = c("Y6042010", "Y6042010", "Y6042010",
"Y6042010", "Y6042010", "Y6042010"), Date = structure(c(12006,
12007, 12016, 12017, 13416, 13488), class = "Date"), Qm3s = c(156,
177, 104, 124, 125, 90.4), Qual = c(5, 5, 5, 5, 5, 5), Year = c(2002,
2002, 2002, 2002, 2006, 2006), Month = c(11, 11, 11, 11, 9, 12
), Station_river = c("Var#Entrevaux", "Var#Entrevaux", "Var#Entrevaux",
"Var#Entrevaux", "Var#Entrevaux", "Var#Entrevaux"), River = c("Var",
"Var", "Var", "Var", "Var", "Var"), Mod_inter = c(13.32, 13.32,
13.32, 13.32, 13.32, 13.32), Qm3s_norm = c(11.7117117117117,
13.2882882882883, 7.80780780780781, 9.30930930930931, 9.38438438438438,
6.78678678678679), File_name = c("Var#Entrevaux.dat", "Var#Entrevaux.dat",
"Var#Entrevaux.dat", "Var#Entrevaux.dat", "Var#Entrevaux.dat",
"Var#Entrevaux.dat"), Station_name = c("#Entrevaux", "#Entrevaux",
"#Entrevaux", "#Entrevaux", "#Entrevaux", "#Entrevaux"), Reach = c("Daluis",
"Daluis", "Daluis", "Daluis", "Daluis", "Daluis"), Restauration_year = c(2009,
2009, 2009, 2009, 2009, 2009), `Area_km[BH]` = c(676, 676, 676,
676, 676, 676), Starting_year = c(1920, 1920, 1920, 1920, 1920,
1920), Ending_year = c("NA", "NA", "NA", "NA", "NA", "NA"), Accuracy = c("good",
"good", "good", "good", "good", "good"), Q2 = c(86, 86, 86, 86,
86, 86), Q5 = c(120, 120, 120, 120, 120, 120), Q10 = c(150, 150,
150, 150, 150, 150), Q20 = c(170, 170, 170, 170, 170, 170), Q50 = c(200,
200, 200, 200, 200, 200), Data_producer = c("DREAL_PACA", "DREAL_PACA",
"DREAL_PACA", "DREAL_PACA", "DREAL_PACA", "DREAL_PACA"), Coord_X_L2e_Z32 = c(959313,
959313, 959313, 959313, 959313, 959313), Coord_Y_L2e_Z32 = c(1893321,
1893321, 1893321, 1893321, 1893321, 1893321), Coord_X_L93 = c(1005748.88,
1005748.88, 1005748.88, 1005748.88, 1005748.88, 1005748.88),
Coord_Y_L93 = c(6324083.97, 6324083.97, 6324083.97, 6324083.97,
6324083.97, 6324083.97), New_FN = c("Var#Entrevaux.csv",
"Var#Entrevaux.csv", "Var#Entrevaux.csv", "Var#Entrevaux.csv",
"Var#Entrevaux.csv", "Var#Entrevaux.csv"), NA_perc = c(14.92,
14.92, 14.92, 14.92, 14.92, 14.92), Q2_norm = c(6.45645645645646,
6.45645645645646, 6.45645645645646, 6.45645645645646, 6.45645645645646,
6.45645645645646), Q5_norm = c(9.00900900900901, 9.00900900900901,
9.00900900900901, 9.00900900900901, 9.00900900900901, 9.00900900900901
), Q10_norm = c(11.2612612612613, 11.2612612612613, 11.2612612612613,
11.2612612612613, 11.2612612612613, 11.2612612612613), Q20_norm = c(12.7627627627628,
12.7627627627628, 12.7627627627628, 12.7627627627628, 12.7627627627628,
12.7627627627628), Q50_norm = c(15.015015015015, 15.015015015015,
15.015015015015, 15.015015015015, 15.015015015015, 15.015015015015
)), row.names = c(NA, -6L), groups = structure(list(Station_code = "Y6042010",
.rows = structure(list(1:6), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1L, class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
EDIT
Here is an example of what I would like to do on the plot:
This is the code I use currently to do the plot:
ggplot(data = tst_formule[tst_formule$River != "Roya",], aes(x = Year, y = BRI_adi_moy_transect, shape = River, col = Type)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = BRI_adi_moy_transect - SD_transect, ymax = BRI_adi_moy_transect + SD_transect), size = 0.7, width = 0.3) +
geom_errorbar(aes(ymin = BRI_adi_moy_transect - Error_reso, ymax = BRI_adi_moy_transect + Error_reso, linetype = "Error due to resolution"), size = 0.3, width = 0.3, colour = "black") +
scale_linetype_manual(name = NULL, values = 2) +
scale_shape_manual(values = c(15, 18, 17, 16)) +
scale_colour_manual(values = c("chocolate1", "darkcyan")) +
new_scale("linetype") +
geom_vline(aes(xintercept = Restoration_year, linetype = "Restoration"), colour = "chocolate1") +
scale_linetype_manual(name = NULL, values = 5) +
new_scale("linetype") +
geom_hline(aes(yintercept = 0.004, linetype = "Threshold"), colour= 'black') +
scale_linetype_manual(name = NULL, values = 4) +
scale_y_continuous("BRI*", limits = c(min(tst_formule$BRI_adi_moy_transect - tst_formule$SD_transect, tst_formule$BRI_adi_moy_transect - tst_formule$Error_reso ), max(tst_formule$BRI_adi_moy_transect + tst_formule$SD_transect, tst_formule$BRI_adi_moy_transect + tst_formule$Error_reso))) +
scale_x_continuous(limits = c(min(tst_formule$Year - 1),max(tst_formule$Year + 1)), breaks = scales::breaks_pretty(n = 6)) +
theme_bw() +
facet_wrap(vars(River)) +
theme(legend.spacing.y = unit(-0.01, "cm")) +
guides(shape = guide_legend(order = 1),
colour = guide_legend(order = 2),
line = guide_legend(order = 3))
After tests and more research, I managed to do it by adding the second dataframe in geom_text():
new_scale("linetype") +
geom_segment(data = Flood_plot, aes(x = Date, xend = Date, y = 0.025, yend = 0.020, linetype = "Morphogenic flood"), arrow = arrow(length = unit(0.2, "cm")), inherit.aes = F, guide = guide_legend(order = 6)) +
scale_linetype_manual(name = NULL, values = 1) +
new_scale() creates a new linetype definition after the ones I created before, geom_segment() allows to draw arrows which I wanted but it works with geom_text() and scale_linetype_manual() draws the arrow in the legend without the mention "linetype" above. The second dataframe has the same column (River) as the 1st one to wrap and create the panels.

Create new column with percentages in data frame

I have the following dataframe:
dput(df1)
structure(list(month = c(1, 1, 2, 2, 3, 4), transaction_type = c("AAA",
"BBB", "BBB", "CCC",
"DDD", "AAA"), max_wt_per_month = c(54.9,
51.6833333333333, 52.3333333333333, 49.4666666666667, 49.85,
48.5833333333333), min_wt_per_month = c(0, 0, 0, 0, 0, 0), avg_wt_per_month = c(8.41701333107861,
7.65211141060198, 6.44184012508551, 7.74798927613941, 7.4360566888844,
7.50611319574734), prop = c(Inf, Inf, Inf, Inf, Inf, Inf)), .Names = c("month",
"transaction_type", "max_wt_per_month", "min_wt_per_month", "avg_wt_per_month",
"prop"), row.names = c(NA, -6L), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), vars = list(month), drop = TRUE, indices = list(
0:5), group_sizes = 6L, biggest_group_size = 6L, labels = structure(list(
month = 1), row.names = c(NA, -1L), class = "data.frame", vars = list(
month), drop = TRUE, .Names = "month"))
I want to create column prop that would contain the percentage of maximum waiting time with respect to each month. If I run this code, then I get Inf values in most of the rows... (especially it is evident in the real dataset):
my_fun=function(vec){
100*as.numeric(vec[3]) /
sum(with(data_merged_transactions, ifelse(month == vec[1], max_wt_per_month, 0))) }
data_merged_transactions$prop=apply(data_merged_transactions , 1 , my_fun)
I then finally need to create the filled area chart so that each area would be a percentage out of 100%:
ggplot(data_merged_transactions, aes(x=month, y=prop, fill=transaction_type)) +
geom_area(alpha=0.6 , size=1, colour="black")
Why do I get Inf if the sum is not equal to 0?
Moreover, is it possible to create filled area chart with months being factors (Jan, Feb,etc.), not numbers? I tried to substitute month id's by month names, but then I got very thin bars instead of a filled area.
Is this what you were looking for?
library(tidyverse)
df1_tidy <- df1 %>%
group_by(month) %>%
summarise(SUM = sum(max_wt_per_month)) %>%
full_join(df1) %>%
mutate(prop = max_wt_per_month / SUM)
ggplot(data = df1_tidy,
aes(x = month,
y = prop,
fill = transaction_type)) +
geom_area(alpha = 0.6,
size = 1,
colour = "black") +
scale_x_continuous(labels = c("Jan", "Feb", "Mar", "Apr"))

Resources