Related
I am using checkboxGroupInput for data filtering and am using ggplot to plot line graph. The plot shows up if one out of three options is selected, however if two or three checkboxes are selected it gives an error "Aesthetics must be either length 1 or the same as the data (10): group and colour". Also the plot doesn't update when joining_date is updated.
mydata<-structure(list(employee = c("Mary", "rob",
"smary", "rob", "Abe", "Abe"
), joining_date = structure(c(17869, 17862, 17865, 17848,
17862, 17848), class = "Date"), batch___A_2019 = c(1, 1,
1, 1, 1, 0), batches___A_2020 = c(0, 0, 0, 0, 0, 1), batch___B_2020 = c(0,
0, 0, 0, 0, 0), batch___B_2023 = c(0, 0, 0, 0, 0, 0)), row.names = c(NA,
6L), class = "data.frame")
Filter data:
timeseries <- reactive({
req(input$employee)
req(input$test)
req(input$Dates)
mydata %>%
filter(
employee == input$employee,
if_any(matches(str_c(
'batch___', tolower(input$test)
)), ~
.x == 1),
joining_date >= input$Dates[1] &
joining_date <= input$Dates[2]
) %>%
group_by(employee, month = lubridate::floor_date(joining_date, 'month')) %>%
summarize(Total = n(), .groups = "drop") %>%
complete(month = seq.Date(min(month), max(month), by="month"), employee) %>%
replace(is.na(.), 0)
})
Code to generate the plot:
output$timeseriesplot <- renderPlot({
timeseries() %>%
ggplot() +
geom_line(aes(x = month, y = Total, group=input$test, color=factor(input$test))) +
scale_x_date(date_labels = "%b %Y", date_breaks = "1 month") +
theme(axis.text.x=element_text(angle=50, hjust=1)) +
ylab("Total")
})
I have the values of the error bars, and I want to specify the values in "ggpubr". It seems like the add and error.plot functions have a lot of possibilities (e.g., "mean_sd"), but I couldn't find anything that will allow me to specify the values myself. I also tried geom_errorbar, but it doesn't work properly. I know next time I will use ggplot2 for flexibility.
example code -
df <- data.frame(stringsAsFactors = FALSE, "pse" = c(40, 42, 41, 40, 60, 61, 62, 60, 39, 38, 40, 39, 59, 58, 60, 59 ))
df[1:4,2]="30 cm"
df[5:8,2]="60 cm"
df[9:12,2]="30 cm"
df[13:16,2]="60 cm"
df[1:8,3] = "3.5 cm"
df[9:16,3] = "6.5 cm"
colnames(df)[2]="Size"
colnames(df)[3]="Distance"
my_comparisons <- list( c("Near", "Far"))
ggbarplot(df, x = "Size", y = "pse", fill ="Distance", color = "Distance", ylim=c(25,75), width = 0.6, add = c("mean_se", "jitter"), palette = c("#000000", "#111111"),
position = position_dodge(0.65))+
theme(legend.position = "top")+ theme_bw() + theme(axis.text=element_text(size=14),axis.title=element_text(size=14))+ scale_fill_grey(start=0.8, end=0.95)+ theme(legend.position = "top")+ ylab ("PSE (mm)")[![enter image description here][1]][1]
1: https://i.stack.imgur.com/AlrKa.jpg
library(ggpubr)
df <- data.frame(stringsAsFactors = FALSE, "pse" = c(40, 42, 41, 40, 60, 61, 62, 60, 39, 38, 40, 39, 59, 58, 60, 59 ))
df[1:4,2]="30 cm"
df[5:8,2]="60 cm"
df[9:12,2]="30 cm"
df[13:16,2]="60 cm"
df[1:8,3] = "3.5 cm"
df[9:16,3] = "6.5 cm"
colnames(df)[2]="Size"
colnames(df)[3]="Distance"
mean_30_3.5 <- mean(df$pse[df$Size == "30 cm" & df$Distance == "3.5 cm"])
mean_30_6.5 <- mean(df$pse[df$Size == "30 cm" & df$Distance == "6.5 cm"])
mean_60_3.5 <- mean(df$pse[df$Size == "60 cm" & df$Distance == "3.5 cm"])
mean_60_6.5 <- mean(df$pse[df$Size == "60 cm" & df$Distance == "6.5 cm"])
my_comparisons <- list( c("Near", "Far"))
ggbarplot(df, x = "Size", y = "pse", fill ="Distance",color = "Distance", ylim=c(25,75),label = F, width = 0.6, add = c("mean_se", "jitter"),
palette = c("#000000", "#111111"),
position = position_dodge(0.65))+
theme(legend.position = "top")+ theme_bw() + theme(axis.text=element_text(size=14),axis.title=element_text(size=14))+
scale_fill_grey(start=0.8, end=0.95)+
theme(legend.position = "top")+ ylab ("PSE (mm)") +
annotate("text", x = 0.85, y = mean_30_3.5 + 3, label = "your_value1")+
annotate("text", x = 1.15, y = mean_30_6.5 + 3, label = "your_value2")+
annotate("text", x = 1.85, y = mean_60_3.5 + 3, label = "your_value3")+
annotate("text", x = 2.15, y = mean_60_6.5 + 3, label = "your_value4")
Did you mean something like this?:
Thank you!
I have also found a different solution. Sharing it here.
data_summary <- function(data, varname, groupnames){
require(plyr)
summary_func <- function(x, col){
c(mean = mean(x[[col]], na.rm=TRUE),
sd = sd(x[[col]], na.rm=TRUE))
}
data_sum<-ddply(data, groupnames, .fun=summary_func,
varname)
data_sum <- rename(data_sum, c("mean" = varname))
return(data_sum)
}
next
df2 <- data_summary(x, varname="PSE",
groupnames=c("Size", "Distance"))
df2$Size=as.factor(df2$Size)
my_comparisons <- list( c("Near", "Far"))
ggbarplot(x, x = "Size", y = "PSE", fill ="Distance", color = "Distance",
ylim=c(25,75), width = 0.6, add = c( "mean", "jitter"), palette = c("#000000",
"#111111"),
position = position_dodge(0.65))+ theme_bw()
+theme(axis.text=element_text(size=14),axis.title=element_text(size=14))+
scale_fill_grey(start=0.8, end=0.95)+ theme(legend.position = "top")+ ylab ("PSE
(mm)")+geom_errorbar(data=df2, mapping=aes(x=Size, y=PSE, color=Distance, ymin=PSE-
0.32, ymax=PSE+0.32), width=.15, position=position_dodge(.6))
I have a faceted plot made with ggplot that is already working, it shows data about river altitude against years. I'm trying to add arrows based on a second dataframe which details when floods occurred.
Here's the current plot:
I would like to draw arrows in the top part of each graph based on date information in my second dataframe where each row corresponds to a flood and contains a date.
The link between the two dataframes is the Station_code column, each river has one or more stations which is indicated by this data (in this case only the Var river has two stations).
Here is the dput of the data frame used to create the original plot:
structure(list(River = c("Durance", "Durance", "Durance", "Durance",
"Roya", "Var"), Reach = c("La Brillanne", "Les Mées", "La Brillanne",
"Les Mées", "Basse vallée", "Basse vallée"), Area_km = c(465,
465, 465, 465, 465, 465), Type = c("restored", "target", "restored",
"target", "witness", "restored"), Year = c(2017, 2017, 2012,
2012, 2018, 2011), Restoration_year = c(2013, 2013, 2013, 2013,
NA, 2009), Station_code = c("X1130010", "X1130010", "X1130010",
"X1130010", "Y6624010", "Y6442015"), BRI_adi_moy_sstransect = c(0.00375820736746399,
0.00244752138003355, 0.00446807607783864, 0.0028792618981479,
0.00989200896930529, 0.00357247516596474), SD_sstransect = c(0.00165574247612667,
0.0010044634990875, 0.00220534492332107, 0.00102694633805149,
0.00788573233793128, 0.00308489160008849), min_BRI_sstransect = c(0.00108123849595469,
0.00111493913953216, 0.000555500340370182, 0.00100279590198288,
0, 0), max_BRI_sstransect = c(0.0127781240385231, 0.00700537285706352,
0.0210216858227621, 0.00815151653110584, 0.127734814926934, 0.0223738711013954
), Nb_sstr_unique_m = c(0.00623321576795815, 0.00259754717331206,
0.00117035034437559, 0.00209845092352825, 0.0458628969163946,
3.60620609570031), BRI_adi_moy_transect = c(0.00280232169999531,
0.00173868254527501, 0.00333818552810438, 0.00181398859573415,
0.00903651639185542, 0.00447856455432537), SD_transect = c(0.00128472161839638,
0.000477209421076879, 0.00204050725984513, 0.000472466654940182,
0.00780731734792112, 0.00310039904793707), min_BRI_transect = c(0.00108123849595469,
0.00106445386542223, 0.000901992689363725, 0.000855135344651009,
0.000944414463851629, 0.000162012161197014), max_BRI_transect = c(0.00709151795418251,
0.00434366293208643, 0.011717024999411, 0.0031991369873946, 0.127734814926934,
0.0187952134332499), Nb_tr_unique_m = c(0, 0, 0, 0, 0, 0), Error_reso = c(0.0011,
8e-04, 0.0018, 0.0011, 0.0028, 0.0031), W_BA = c(296.553323029366,
411.056574923547, 263.944186046512, 363.32874617737, 88.6420798065296,
158.66866970576), W_BA_sd = c(84.1498544481585, 65.3909073242282,
100.067554749308, 55.5534084807705, 35.2337070278364, 64.6978349498119
), W_BA_min = c(131, 206, 33, 223, 6, 45), W_BA_max = c(472,
564, 657, 513, 188, 381), W_norm = c(5.73271228619998, 7.9461900926133,
5.10234066090722, 7.02355699765464, 5.09378494746752, 4.81262001531126
), W_norm_sd = c(1.62671218635823, 1.2640804493236, 1.93441939783807,
1.07391043231191, 2.02469218788178, 1.96236658443141), W_norm_min = c(2.53237866910643,
3.98221378500706, 0.637927450996277, 4.31084307794454, 0.344787822572658,
1.36490651299098), W_norm_max = c(9.12429566273463, 10.9027600715727,
12.7005556152895, 9.91687219276031, 10.8033517739433, 11.5562084766569
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
And here is the dput of the date frame containing the flooding date:
structure(list(Station_code = c("Y6042010", "Y6042010", "Y6042010",
"Y6042010", "Y6042010", "Y6042010"), Date = structure(c(12006,
12007, 12016, 12017, 13416, 13488), class = "Date"), Qm3s = c(156,
177, 104, 124, 125, 90.4), Qual = c(5, 5, 5, 5, 5, 5), Year = c(2002,
2002, 2002, 2002, 2006, 2006), Month = c(11, 11, 11, 11, 9, 12
), Station_river = c("Var#Entrevaux", "Var#Entrevaux", "Var#Entrevaux",
"Var#Entrevaux", "Var#Entrevaux", "Var#Entrevaux"), River = c("Var",
"Var", "Var", "Var", "Var", "Var"), Mod_inter = c(13.32, 13.32,
13.32, 13.32, 13.32, 13.32), Qm3s_norm = c(11.7117117117117,
13.2882882882883, 7.80780780780781, 9.30930930930931, 9.38438438438438,
6.78678678678679), File_name = c("Var#Entrevaux.dat", "Var#Entrevaux.dat",
"Var#Entrevaux.dat", "Var#Entrevaux.dat", "Var#Entrevaux.dat",
"Var#Entrevaux.dat"), Station_name = c("#Entrevaux", "#Entrevaux",
"#Entrevaux", "#Entrevaux", "#Entrevaux", "#Entrevaux"), Reach = c("Daluis",
"Daluis", "Daluis", "Daluis", "Daluis", "Daluis"), Restauration_year = c(2009,
2009, 2009, 2009, 2009, 2009), `Area_km[BH]` = c(676, 676, 676,
676, 676, 676), Starting_year = c(1920, 1920, 1920, 1920, 1920,
1920), Ending_year = c("NA", "NA", "NA", "NA", "NA", "NA"), Accuracy = c("good",
"good", "good", "good", "good", "good"), Q2 = c(86, 86, 86, 86,
86, 86), Q5 = c(120, 120, 120, 120, 120, 120), Q10 = c(150, 150,
150, 150, 150, 150), Q20 = c(170, 170, 170, 170, 170, 170), Q50 = c(200,
200, 200, 200, 200, 200), Data_producer = c("DREAL_PACA", "DREAL_PACA",
"DREAL_PACA", "DREAL_PACA", "DREAL_PACA", "DREAL_PACA"), Coord_X_L2e_Z32 = c(959313,
959313, 959313, 959313, 959313, 959313), Coord_Y_L2e_Z32 = c(1893321,
1893321, 1893321, 1893321, 1893321, 1893321), Coord_X_L93 = c(1005748.88,
1005748.88, 1005748.88, 1005748.88, 1005748.88, 1005748.88),
Coord_Y_L93 = c(6324083.97, 6324083.97, 6324083.97, 6324083.97,
6324083.97, 6324083.97), New_FN = c("Var#Entrevaux.csv",
"Var#Entrevaux.csv", "Var#Entrevaux.csv", "Var#Entrevaux.csv",
"Var#Entrevaux.csv", "Var#Entrevaux.csv"), NA_perc = c(14.92,
14.92, 14.92, 14.92, 14.92, 14.92), Q2_norm = c(6.45645645645646,
6.45645645645646, 6.45645645645646, 6.45645645645646, 6.45645645645646,
6.45645645645646), Q5_norm = c(9.00900900900901, 9.00900900900901,
9.00900900900901, 9.00900900900901, 9.00900900900901, 9.00900900900901
), Q10_norm = c(11.2612612612613, 11.2612612612613, 11.2612612612613,
11.2612612612613, 11.2612612612613, 11.2612612612613), Q20_norm = c(12.7627627627628,
12.7627627627628, 12.7627627627628, 12.7627627627628, 12.7627627627628,
12.7627627627628), Q50_norm = c(15.015015015015, 15.015015015015,
15.015015015015, 15.015015015015, 15.015015015015, 15.015015015015
)), row.names = c(NA, -6L), groups = structure(list(Station_code = "Y6042010",
.rows = structure(list(1:6), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1L, class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
EDIT
Here is an example of what I would like to do on the plot:
This is the code I use currently to do the plot:
ggplot(data = tst_formule[tst_formule$River != "Roya",], aes(x = Year, y = BRI_adi_moy_transect, shape = River, col = Type)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = BRI_adi_moy_transect - SD_transect, ymax = BRI_adi_moy_transect + SD_transect), size = 0.7, width = 0.3) +
geom_errorbar(aes(ymin = BRI_adi_moy_transect - Error_reso, ymax = BRI_adi_moy_transect + Error_reso, linetype = "Error due to resolution"), size = 0.3, width = 0.3, colour = "black") +
scale_linetype_manual(name = NULL, values = 2) +
scale_shape_manual(values = c(15, 18, 17, 16)) +
scale_colour_manual(values = c("chocolate1", "darkcyan")) +
new_scale("linetype") +
geom_vline(aes(xintercept = Restoration_year, linetype = "Restoration"), colour = "chocolate1") +
scale_linetype_manual(name = NULL, values = 5) +
new_scale("linetype") +
geom_hline(aes(yintercept = 0.004, linetype = "Threshold"), colour= 'black') +
scale_linetype_manual(name = NULL, values = 4) +
scale_y_continuous("BRI*", limits = c(min(tst_formule$BRI_adi_moy_transect - tst_formule$SD_transect, tst_formule$BRI_adi_moy_transect - tst_formule$Error_reso ), max(tst_formule$BRI_adi_moy_transect + tst_formule$SD_transect, tst_formule$BRI_adi_moy_transect + tst_formule$Error_reso))) +
scale_x_continuous(limits = c(min(tst_formule$Year - 1),max(tst_formule$Year + 1)), breaks = scales::breaks_pretty(n = 6)) +
theme_bw() +
facet_wrap(vars(River)) +
theme(legend.spacing.y = unit(-0.01, "cm")) +
guides(shape = guide_legend(order = 1),
colour = guide_legend(order = 2),
line = guide_legend(order = 3))
After tests and more research, I managed to do it by adding the second dataframe in geom_text():
new_scale("linetype") +
geom_segment(data = Flood_plot, aes(x = Date, xend = Date, y = 0.025, yend = 0.020, linetype = "Morphogenic flood"), arrow = arrow(length = unit(0.2, "cm")), inherit.aes = F, guide = guide_legend(order = 6)) +
scale_linetype_manual(name = NULL, values = 1) +
new_scale() creates a new linetype definition after the ones I created before, geom_segment() allows to draw arrows which I wanted but it works with geom_text() and scale_linetype_manual() draws the arrow in the legend without the mention "linetype" above. The second dataframe has the same column (River) as the 1st one to wrap and create the panels.
Following is the dataframe for which I want to create a grouped barplot
df <- structure(list(Race = c("Caucasian/White", "African American", "Asian", "Other"), 'Hospital 1' = c(374, 820, 31, 108), 'Hospital 2' = c(291, 311, 5, 15), 'Hospital 3' = c(330, 206, 6, 5), 'Hospital 4' = c(950, 341, 6, 13)), class = "data.frame", row.names = c(NA, -4L))
To be precise, I want to group each Hospital according to 'Race'. Each hospital bars should be represented as percentages with their corresponding value labels.
Not a programmer basically, but trying to learn.
You probably want something like this:
df %>%
pivot_longer(contains("Hospital"), names_to = "hospital", values_to = "count") %>%
group_by(hospital) %>%
mutate(percent = count/sum(count)) %>%
ggplot() +
aes(x = hospital, y = percent, fill = Race) +
geom_col(position = "stack")
I want to show more dates in the x axis. Something like this: Mar 09, Mar 12, Mar 19 , etc
So this is my general data:
structure(list(Dia = structure(c(1583452800, 1583539200, 1583625600,
1583712000, 1583798400, 1583884800, 1583884800, 1583884800, 1583971200,
1584057600, 1584057600, 1584144000, 1584230400, 1584316800, 1584403200,
1584489600, 1584576000), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Hora = structure(c(-2209010400, -2209010400, -2209075200,
-2209044600, -2209046400, -2209039200, -2209023600, -2209003200,
-2209039500, -2209044600, -2209017600, -2209041000, -2209027800,
-2209040160, -2209038720, -2209050000, -2209032000), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), Total_Pruebas = c(155, 219, 250,
318, 346, 652, 656, 714, 855, 983, 1232, 1545, 1822, 2315,
2680, 3075, 4075), Descartados = c(154, 213, 243, 309, 335,
640, 641, 697, 833, 955, 1194, 1502, 1751, 2229, 2563, 2930,
3841), Positivos = c(1, 6, 7, 9, 11, 12, 15, 17, 22, 28,
38, 43, 71, 86, 117, 145, 234), TasaPositivos = c(0.645161290322581,
2.73972602739726, 2.8, 2.83018867924528, 3.17919075144509,
1.84049079754601, 2.28658536585366, 2.38095238095238, 2.57309941520468,
2.84842319430315, 3.08441558441558, 2.7831715210356, 3.89681668496158,
3.71490280777538, 4.36567164179105, 4.71544715447155, 5.74233128834356
), Pruebas_dia = c(155, 64, 31, 99, 28, 306, 4, 58, 141,
128, 249, 313, 277, 493, 365, 395, 1000), Recuperados = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 1, 1,
1)), row.names = c(NA, 17L), class = "data.frame")
This is my code
dat1 <- dat %>%
mutate(pos_new = Positivos-lag(Positivos,default = 0)) %>%
group_by(Dia) %>%
summarise(pos_new = sum(pos_new), tot_pruebas = sum(Pruebas_dia)) %>%
mutate(cum_pos = cumsum(pos_new))
This is dat1 data base:
structure(list(Dia = structure(c(1583452800, 1583539200, 1583625600,
1583712000, 1583798400, 1583884800, 1583971200, 1584057600, 1584144000,
1584230400, 1584316800, 1584403200, 1584489600, 1584576000), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), pos_new = c(1, 5, 1, 2, 2, 6, 5, 16,
5, 28, 15, 31, 28, 89), tot_pruebas = c(155, 64, 31, 99, 28,
368, 141, 377, 313, 277, 493, 365, 395, 1000), cum_pos = c(1,
6, 7, 9, 11, 17, 22, 38, 43, 71, 86, 117, 145, 234)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -14L))
and this is my final code:
f1 <- dat1 %>%
ggplot(aes(x = Dia)) +
geom_bar(aes(y = pos_new, fill = "Nuevos"), stat = "identity", alpha=.5) +
geom_line(aes(y = cum_pos, col = "Acumulados"), size=1) +
geom_point(aes(y = cum_pos), col = "#8B1C62") +
geom_text(aes(y = pos_new, label = pos_new), vjust = -0.8, col = "#43CD80") +
geom_text(aes(y = cum_pos, label = cum_pos), vjust = -0.8, col = "#8B1C62") +
labs(y = "Número de casos reportados", color = " Casos", fill = " ",
title = paste0("Número de casos confirmados \nhasta: ", Sys.Date())) +
scale_fill_manual(values = c("Nuevos" = "#43CD80")) +
scale_color_manual(values = c("Acumulados" = "#8B1C62")) +
scale_y_continuous(sec.axis = sec_axis(~ .)) +
theme_minimal() +
theme(legend.position="bottom")+
scale_x_continuous(breaks = seq(from =3-06-20 , to = 3-06-20, by = 1),
limits = c(3-06-20,3-19-20))
But I get this message:
Error in as.POSIXct.numeric(value) : 'origin' must be supplied
I want to show more dates ON THE X-AXIS ( from Mar 09 to Mar 19)
Instead of using scale_x_continuous you can use scale_x_datetime or scale_x_date. As your day Dia is already in POSIXct format, I used scale_x_datetime.
For your breaks, make sure to also put in POSIXct format. You can add labels to show Month Day using date_format from scales package.
library(ggplot2)
library(scales)
dat1 %>%
ggplot(aes(x = Dia)) +
geom_bar(aes(y = pos_new, fill = "Nuevos"), stat = "identity", alpha=.5) +
geom_line(aes(y = cum_pos, col = "Acumulados"), size=1) +
geom_point(aes(y = cum_pos), col = "#8B1C62") +
geom_text(aes(y = pos_new, label = pos_new), vjust = -0.8, col = "#43CD80") +
geom_text(aes(y = cum_pos, label = cum_pos), vjust = -0.8, col = "#8B1C62") +
labs(y = "Número de casos reportados", color = " Casos", fill = " ",
title = paste0("Número de casos confirmados \nhasta: ", Sys.Date())) +
scale_fill_manual(values = c("Nuevos" = "#43CD80")) +
scale_color_manual(values = c("Acumulados" = "#8B1C62")) +
scale_y_continuous(sec.axis = sec_axis(~ .)) +
theme_minimal() +
theme(legend.position="bottom") +
scale_x_datetime(breaks = seq(from = as.POSIXct("2020-03-06"), to = as.POSIXct("2020-03-20-20"), by = "1 days"), labels = date_format("%b %d"))
Note: As suggested by #Dave2e you can simplify scale_x_datetime:
scale_x_datetime(date_breaks = "1 day", date_labels = "%b %d")
Output