Error plotting lines on a graph made in R - r
Could you help me solve following issue:
I have two codes that were made to generate the same scatter plot. The first one works normally, generates the graph and the lines without any problems. It is a code that requires vector i to generate the mean and standard deviation(sd).
Code 2, on the other hand, does not require vector i, but the result is not the desired one regarding the construction of lines in relation to the mean and sd. In my opinion it was to work.
Could the problem be with the ylim?
I hope someone helps me with this! =)
Thank you so much!
First code
library(dplyr)
library(tidyr)
library(lubridate)
data <- structure(
list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
"2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
"2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
"2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
"Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
"Thursday","Friday","Friday","Saturday","Saturday"),
DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
class = "data.frame", row.names = c(NA, -21L))
graph <- function(dt, dta = data) {
dim_data<-dim(data)
day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
length = dim_data[1]
))
data_grouped <- data %>%
mutate(across(starts_with("date"), as.Date)) %>%
group_by(date2) %>%
summarise(Id = first(Id),
date1 = first(date1),
Week = first(Week),
DTPE = first(DTPE),
D1 = sum(D1)) %>%
select(Id,date1,date2,Week,DTPE,D1)
data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
date2=format(date2,"%d/%m/%Y"))
data_grouped<-data.frame(data_grouped)
data_grouped %>%
mutate(DTPE = na_if(DTPE, ""))
DS=c("Thursday","Friday","Saturday")
i<-2
df_OC<-subset(data_grouped, DTPE == "")
ds_CO<-subset(df_OC,df_OC$Week==DS[i])
mean<-mean(as.numeric(ds_CO[,"D1"]) )
sd<-sd(as.numeric(ds_CO[,"D1"]))
dta %>%
filter(date2 == ymd(dt)) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
mutate(name = as.numeric(name)) %>%
plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5,
cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
abline(h=mean, col='blue') +
abline(h=(mean + sd), col='green',lty=2)
abline(h=(mean - sd), col='orange',lty=2)
}
graph("2021-04-09",data)
Second code
library(dplyr)
library(tidyr)
library(lubridate)
data <- structure(
list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
"2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
"2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
"2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
"Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
"Thursday","Friday","Friday","Saturday","Saturday"),
DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
class = "data.frame", row.names = c(NA, -21L))
graph <- function(dt, dta = data) {
dim_data<-dim(data)
day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
length = dim_data[1]
))
data_grouped <- data %>%
mutate(across(starts_with("date"), as.Date)) %>%
group_by(date2) %>%
summarise(Id = first(Id),
date1 = first(date1),
Week = first(Week),
DTPE = first(DTPE),
D1 = sum(D1)) %>%
select(Id,date1,date2,Week,DTPE,D1)
data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
date2=format(date2,"%d/%m/%Y"))
data_grouped<-data.frame(data_grouped)
data_grouped %>%
mutate(DTPE = na_if(DTPE, ""))
# get the week day
my_day <- weekdays(as.Date(dt))
df_OC<-subset(data_grouped, DTPE == "")
ds_CO<-subset(df_OC,df_OC$Week == my_day)
mean<-mean(as.numeric(ds_CO[,"D1"]) )
sd<-sd(as.numeric(ds_CO[,"D1"]))
dta %>%
filter(date2 == ymd(dt)) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
mutate(name = as.numeric(name)) %>%
plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5,
cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
abline(h=mean, col='blue') +
abline(h=(mean + sd), col='green',lty=2)
abline(h=(mean - sd), col='orange',lty=2)
}
graph("2021-04-09",data)
You've messed up a lot with these data transformations.
Below, however, I present my code that works according to your expectations.
The main problem here was my_day <- weekdays (as.Date (dt)), In my system I was getting "piątek" and you didn't have such a day in your data, right?
library(dplyr)
library(tidyr)
library(lubridate)
data <- structure(
list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
"2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
"2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
"2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
"Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
"Thursday","Friday","Friday","Saturday","Saturday"),
DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
class = "data.frame", row.names = c(NA, -21L))
graph <- function(dt, dta = data) {
dim_data<-dim(data)
day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
length = dim_data[1]
))
data_grouped <- data %>%
mutate(across(starts_with("date"), as.Date)) %>%
group_by(date2) %>%
summarise(Id = first(Id),
date1 = first(date1),
Week = first(Week),
DTPE = first(DTPE),
D1 = sum(D1)) %>%
select(Id,date1,date2,Week,DTPE,D1)
#data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
# date2=format(date2,"%d/%m/%Y"))
#data_grouped<-data.frame(data_grouped)
data_grouped %>%
mutate(DTPE = na_if(DTPE, ""))
# get the week day
#my_day <- weekdays(as.Date(dt))
df_OC<-subset(data_grouped, DTPE == "")
ds_CO = df_OC %>% filter(weekdays(date2) %in% weekdays(as.Date(dt)))
#ds_CO<-subset(df_OC,df_OC$Week == my_day)
mean<-mean(ds_CO$D1)
sd<-sd(ds_CO$D1)
dta %>%
filter(date2 == ymd(dt)) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
mutate(name = as.numeric(name)) %>%
plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5,
cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
abline(h=mean, col='blue') +
abline(h=(mean + sd), col='green',lty=2)
abline(h=(mean - sd), col='orange',lty=2)
}
graph("2021-04-09",data)
Finally, I recommend:
Keep your data in tibble,
do not unnecessarily transform the date into a string several times and vice versa,
use ggplo2. The charts will be much nicer.
Related
Inserting new data into a table
I would like a little help with the following question: note that this code generates a coefficient from a date I have chosen, in this case for the day 03/07 (dmda), it gave a coefficient equal to 15.55. In this case, I would like to generate a new table, where there is a column with dates and the other column with the coefficient corresponding to those dates. For the column dates, only the dates of date2 after the day considered in date1 (28/06) will be considered, in this case, the dates are: 01/07, 02/07 and 03/07. So the table will look like this: Thanks! library(dplyr) library(tidyverse) library(lubridate) df1 <- structure( list(date1 = c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28", "2021-06-28","2021-06-28","2021-06-28"), date2 = c("2021-04-02","2021-04-03","2021-04-08","2021-04-09","2021-04-10","2021-07-01","2021-07-02","2021-07-03"), Week= c("Friday","Saturday","Thursday","Friday","Saturday","Thursday","Friday","Monday"), DR01 = c(14,11,14,13,13,14,13,16), DR02= c(14,12,16,17,13,12,17,14),DR03= c(19,15,14,13,13,12,11,15), DR04 = c(15,14,13,13,16,12,11,19),DR05 = c(15,14,15,13,16,12,11,19), DR06 = c(21,14,13,13,15,16,17,18),DR07 = c(12,15,14,14,19,14,17,18)), class = "data.frame", row.names = c(NA, -8L)) dmda<-"2021-07-03" datas<-df1 %>% filter(date2 == ymd(dmda)) %>% summarize(across(starts_with("DR"), sum)) %>% pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>% mutate(name = as.numeric(name)) colnames(datas)<-c("Days","Numbers") mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 47,b2 = 0), data = datas) coef(mod)[2] > coef(mod)[2] b2 15.55011
We may subset the data where the 'date2' is greater than date1', get the 'date2' column extracted as a vector. Loop over the dates with map (from purrr), do the transformation within the loop, build the nls and extract the coefficient in a tibble, and use _dfr to collapse the list to a single tibble library(purrr) library(dplyr) dates <- subset(df1, date2 > date1, select = date2)$date2 map_dfr(dates, ~ { datas <- df1 %>% filter(date2 == ymd(.x)) %>% summarize(across(starts_with("DR"), sum)) %>% pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>% mutate(name = as.numeric(name)) colnames(datas)<-c("Days","Numbers") mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 47,b2 = 0), data = datas) tibble(dates = .x, coef = coef(mod)[2]) }) %>% mutate(dates = format(ymd(dates), "%d/%m/%Y")) # A tibble: 3 × 2 dates coef <chr> <dbl> 1 01/07/2021 12.2 2 02/07/2021 12.4 3 03/07/2021 15.6
How to generate table until the specific date in R
I have the following problem: How can I generate the table only until the date 03/07, instead of until 05/07. Executable code below: library(purrr) library(dplyr) library(tidyverse) library(lubridate) df1 <- structure( list(date1 = c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28", "2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28"), date2 = c("2021-04-02","2021-04-03","2021-04-08","2021-04-09","2021-04-10","2021-07-01","2021-07-02","2021-07-03","2021-07-04","2021-07-05"), Week= c("Friday","Saturday","Thursday","Friday","Saturday","Thursday","Friday","Saturday","Sunday","Monday"), DR01 = c(14,11,14,13,13,14,13,16,15,12), DR02= c(14,12,16,17,13,12,17,14,13,13),DR03= c(19,15,14,13,13,12,11,15,13,13), DR04 = c(15,14,13,13,16,12,11,19,11,13),DR05 = c(15,14,15,13,16,12,11,19,14,13), DR06 = c(21,14,13,13,15,16,17,18,12,11),DR07 = c(12,15,14,14,19,14,17,18,14,13)), class = "data.frame", row.names = c(NA, -10L)) dates <- subset(df1, date2 > date1, select = date2)$date2 map_dfr(dates, ~ { datas <- df1 %>% filter(date2 == ymd(.x)) %>% summarize(across(starts_with("DR"), sum)) %>% pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>% mutate(name = as.numeric(name)) colnames(datas)<-c("Days","Numbers") mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 47,b2 = 0), data = datas) tibble(dates = .x, coef = coef(mod)[2]) }) %>% mutate(dates = format(ymd(dates), "%d/%m/%Y")) dates coef <chr> <dbl> 1 01/07/2021 12.2 2 02/07/2021 12.4 3 03/07/2021 15.6 4 04/07/2021 13.3 5 05/07/2021 12.7
In the subset step, add one more condition with & dates <- subset(df1, date2 > date1 & date2 <= "2021-07-03", select = date2)$date2
How to optimize code that generates graph in R
Could you help me optimize the code below? As you can see, I'm using the same date twice, once for graph generation and once for subset y generation. The result is correct, but I'd like some help trying to optimize to at least use the date only once and another optimizing that you find necessary. Every help is welcome. Thank you very much! library(dplyr) library(lubridate) library(tidyverse) #dataset df <- structure( list(date1 = c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28", "2021-06-28","2021-06-28","2021-06-28"), date2 = c("2021-04-02","2021-04-03","2021-04-08","2021-04-09","2021-04-10","2021-07-01","2021-07-02","2021-07-03"), Week= c("Friday","Saturday","Thursday","Friday","Saturday","Thursday","Friday","Monday"), DR01 = c(4,1,4,3,3,4,3,6), DR02= c(4,2,6,7,3,2,7,4),DR03= c(9,5,4,3,3,2,1,5), DR04 = c(5,4,3,3,6,2,1,9),DR05 = c(5,4,5,3,6,2,1,9), DR06 = c(2,4,3,3,5,6,7,8),DR07 = c(2,5,4,4,9,4,7,8)), class = "data.frame", row.names = c(NA, -8L)) #Generate graph dmda<-"2021-07-01" dta<-df datas<-dta %>% filter(date2 == ymd(dmda)) %>% summarize(across(starts_with("DR"), sum)) %>% pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>% mutate(name = as.numeric(name)) colnames(datas)<-c("Days","Numbers") attach(datas) plot(Numbers ~ Days, ylim=c(0,20)) model <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 47,b2 = 0)) new.data <- data.frame(Days = seq(min(Days),max(Days),len = 45)) lines(new.data$Days,predict(model,newdata = new.data)) #Add the y points to the graph df[, 1:2] = lapply(df[, 1:2], FUN = as_date) get_cutoff = function(date) { date2 = as_date(date) date1 = df[1,1] as.numeric(date2 - date1 + 1) } subset_data = function(date, start_index) { date = as_date(date) if (date > df[1,1]) { end_index = start_index + get_cutoff(date) - 1 df[, -c(start_index:end_index)] %>% filter(date2 == date) } else { return(df) } } y<-subset_data("2021-07-01", 4) y pivot_longer(y, cols=c(starts_with("DR"))) %>% mutate(day = parse_number(name)) -> new_y new_y lines(x=new_y$day, y=new_y$value, col="red") points(x=new_y$day, y=new_y$value, col="red")
Make these changes: only load packages used can eliminate lubridate don't need dta in filter we don't need to convert dmda to Date class pivot_wider can transform the names don't use attach the model is linear in the parameters so use lm, not nls replace the new.data/lines with curve don't overwrite df simplify the cutoff calculation use type = "o" to reduce points/lines to just lines use subset in lines Now assuming that df and dmda have been defined as in the question we have this. library(dplyr) library(tidyr) datas <- df %>% filter(date2 == dmda) %>% summarize(across(starts_with("DR"), sum)) %>% pivot_longer(everything(), names_pattern = "DR(.+)", names_to = "Days", values_to = "Numbers", names_transform = list(Days = as.numeric)) plot(Numbers ~ Days, datas, ylim=c(0,20)) model <- lm(Numbers ~ I(Days^2), datas) rng <- range(datas$Days) curve(predict(model, list(Days = x)), rng[1], rng[2], add = TRUE) # assume this for cutoff. You may or may not need to change this line. cutoff <- as.numeric(as.Date(dmda) - first(as.Date(df$date1))) + 1 lines(Numbers ~ Days, datas, subset = seq_len(nrow(datas)) > cutoff, type = "o" , col = "red")
I used ggplot rather than base R plotting functions since you are already working in the tidyverse. The following will do the trick to plot it all on a single graph. dmda<-"2021-07-01" dta<-df ## Rather than rely on column position, explicitly set the number ## of days desired for highlighting on plot num_days <- 3 model <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 47,b2 = 0)) new.data <- data.frame(Days = seq(min(Days),max(Days),len = 45)) %>% mutate(Numbers = predict(model, newdata = .)) datas<-dta %>% filter(date2 == ymd(dmda)) %>% summarize(across(starts_with("DR"), sum)) %>% ## Can convert data to numeric and create column names inside pivot_longer pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "Numbers", names_to = "Days", names_transform = list(Days = as.numeric, Numbers = as.numeric)) %>% ## Create flag for whether the values are in the final number of days mutate(subs = 1:n() > (n() - num_days)) plt <- ggplot(datas, aes(x = Days, y = Numbers)) + geom_point(aes(color = subs)) + geom_line(data = filter(datas, subs == TRUE), color = "red") + geom_line(data = new.data, color = "black") + scale_y_continuous(limits = c(0, 20)) + scale_color_manual(values = c("black", "red")) plt
Optimize code for scatter plot generation in R
The executable code below generates a scatter plot that depends on the date (date2) he chooses and three lines are also generated, referring to mean, mean+standard deviation and mean-standard deviation, which are based on the day of the week (Week) that is chosen. As you can see, I used vector i to generate the mean and standard deviation. But I would like to optimize this, that is, when he chooses the date, he already understands what day of the week it is, so he doesn't need to use this i vector. For example, I put it to generate scatterplot date 10/04/2021, so the code would need to know it's a Saturday, without having to set vector i to 3. Can you help me with this question? The link to download the database is:https://docs.google.com/spreadsheets/d/1W_hzuRq7D6X12BdwaXeM-cjg2A5MIKDx/edit?usp=sharing&ouid=102073768617937039119&rtpof=true&sd=true library(dplyr) library(ggplot2) library(tidyr) library(lubridate) df<-read_excel('C:/Users/Downloads/database_test1.xlsx') df<-subset(df,df$date2<df$date1) dim_data<-dim(df) day<-c(seq.Date(from = as.Date(df$date2[1]), to = as.Date(df$date2[dim_data[1]]), by = "1 day")) df_grouped <- df %>% mutate(across(starts_with("date"), as.Date)) %>% group_by(date2) %>% summarise(Id = first(Id), date1 = first(date1), Week = first(Week), D = first(D), D1 = sum(D1)) %>% select(Id,date1,date2,Week,D,D1) df_grouped <- df_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"), date2=format(date2,"%d/%m/%Y")) df_grouped<-data.frame(df_grouped) DS=c("Thursday","Friday","Saturday") i<-3 df_OC<-subset(df_grouped,is.na(D)) ds_OC<-subset(df_OC,df_OC$Week==DS[i]) #Mean and Standard Deviation mean_Week<-mean(as.numeric(ds_OC[,"D1"]) ) sdeviation_Week<-sd(as.numeric(ds_OC[,"D1"])) #create scatter plot scatter_date <- function(dt, dta = df) { dta %>% filter(date2 == ymd(dt)) %>% summarize(across(starts_with("DR"), sum)) %>% pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>% mutate(name = as.numeric(name)) %>% plot(xlab = "Days", ylab = "Types", xlim = c(0, 7), ylim = c((min(.$val) %/% 10) * 10, (max(.$val) %/% 10 + 1) * 15)) abline(h=mean_Week, col='blue') abline(h=(mean_Week + sdeviation_Week), col='green',lty=2) abline(h=(mean_Week - sdeviation_Week), col='orange',lty=2) } scatter_date("2021-04-10",df) Generated images
You could create a lookup table: library(tibble) lookup <- df %>% select(date2, Week) %>% distinct() %>% mutate(date2 = ymd(date2)) %>% deframe() lookup #> 2021-03-04 2021-04-02 2021-04-03 2021-04-08 2021-04-09 2021-04-10 #> "Thursday" "Friday" "Saturday" "Thursday" "Friday" "Saturday" So now lookup["2021-04-10"] #> "Saturday" To use this with your scatterplot function you need to move some of your code into your function. One more idea of optimization: # You could put this lines into one pipe df_grouped <- df %>% mutate(across(starts_with("date"), as.Date)) %>% group_by(date2) %>% summarise(Id = first(Id), date1 = first(date1), Week = first(Week), D = first(D), D1 = sum(D1)) %>% select(Id, date1, date2, Week, D, D1) %>% mutate(date1 = format(date1, "%d/%m/%Y"), date2 = format(date2, "%d/%m/%Y")) # you don't need this line # df_grouped<-data.frame(df_grouped) Two more hints: Use a space after ",". This makes the code easier to read. Avoid using different types of quoting marks: use either " or ' not both (unless you have to use both).
According to https://stackoverflow.com/a/68948847/8282674 you can adapt your scatter_date with a switch statment and calculate every mean in there. The other way with less changes in your code, would be to remove DS=c("Thursday","Friday","Saturday") to calculate the weekday in the scatter_date function directly: library(dplyr) library(ggplot2) library(tidyr) library(lubridate) df<-readxl::read_excel('C:/Users/Downloads/database_test1.xlsx') df<-subset(df,df$date2<df$date1) # translate the days df %>% dplyr::mutate(Week = ifelse(Week=="Thursday", "quinta-feira", Week), Week = ifelse(Week=="Friday", "sexta-feira", Week), Week = ifelse(Week=="Saturday", "sábado", Week)) -> df dim_data<-dim(df) day<-c(seq.Date(from = as.Date(df$date2[1]), to = as.Date(df$date2[dim_data[1]]), by = "1 day")) df_grouped <- df %>% mutate(across(starts_with("date"), as.Date)) %>% group_by(date2) %>% summarise(Id = first(Id), date1 = first(date1), Week = first(Week), D = first(D), D1 = sum(D1)) %>% select(Id,date1,date2,Week,D,D1) df_grouped <- df_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"), date2=format(date2,"%d/%m/%Y")) df_grouped<-data.frame(df_grouped) #create scatter plot scatter_date <- function(dt, dta = df) { # get the week day my_day <- weekdays(as.Date(dt)) df_OC<-subset(df_grouped,is.na(D)) ds_OC<-subset(df_OC,df_OC$Week==my_day) # omit 'i' and DS mean_Week<-mean(as.numeric(ds_OC[,"D1"]) ) sdeviation_Week<-sd(as.numeric(ds_OC[,"D1"])) mean_Week_pos <- (mean_Week + sdeviation_Week) mean_Week_neg <- (mean_Week - sdeviation_Week) dta %>% filter(date2 == ymd(dt)) %>% summarize(across(starts_with("DR"), sum)) %>% pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>% mutate(name = as.numeric(name)) %>% plot(xlab = "Days", ylab = "Types", xlim = c(0, 7), main = paste0(my_day, ": (", mean_Week, ",+",mean_Week_pos, ",-", mean_Week_neg,")"), ylim = c((min(.$val) %/% 10) * 10, (max(.$val) %/% 10 + 1) * 15)) abline(h=mean_Week, col='blue') abline(h= mean_Week_pos, col='green',lty=2) abline(h= mean_Week_neg, col='orange',lty=2) } scatter_date("2021-04-10",df) scatter_date("2021-04-9",df) scatter_date("2021-04-8",df)
Adjust line in graph in R
The graph below generates a scatter plot based on date2. In addition, a horizontal line that refers to the mean is generated. Each day of the week has a different mean as you can see. Note that in abline I specified h=mean_saturday, as 10/4 is a Saturday. But I didn't want to always have to change this part of the abline to show the right mean line, but my idea is to leave it automatically, that is, when I enter the date 10/4/2021 in the code, the code already recognize that the 10th it's Saturday and inserts the appropriate mean line. Any idea how to do this? library(dplyr) library(ggplot2) library(tidyr) library(lubridate) library(tibble) df <- structure( list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), date1 = c("2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20", "2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20", "2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20", "2021-07-20","2021-07-20","2021-07-20","2021-07-20"), date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02", "2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03", "2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08", "2021-04-09","2021-04-09","2021-04-10","2021-04-10"), Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday", "Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday", "Thursday","Friday","Friday","Saturday","Saturday"), D = c("","","Ho","","","","","","Ho","","","","","","","","","","","",""), DR01 = c(2,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3)), class = "data.frame", row.names = c(NA, -21L)) mean_thursday=4 mean_friday=5 mean_saturday=6 scatter_date <- function(dt, dta = df) { dta %>% filter(date2 == ymd(dt)) %>% summarize(across(starts_with("DR"), sum)) %>% pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>% mutate(name = as.numeric(name)) %>% plot(xlab = "Days", ylab = "Types", xlim = c(0, 7), ylim = c((min(.$val) %/% 10) * 10, (max(.$val) %/% 10 + 1) * 15)) abline(h=mean_saturday, col='blue') } scatter_date("2021-04-10",df)
You could try to convert the input date in your scatter_date function to a date and get the weekday: my_day <- weekdays(as.Date(dt)) add that to a switch statment for your means: my_mean <- switch( my_day, "Saturday" = mean_saturday, "Friday" = mean_friday, "Thursday" = mean_thursday, 0) # add here your other days and replace mean_saturday in abline(h=mean_saturday, col='blue') with my_mean here the full code: library(dplyr) library(ggplot2) library(tidyr) library(lubridate) library(tibble) df <- structure( list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), date1 = c("2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20", "2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20", "2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20", "2021-07-20","2021-07-20","2021-07-20","2021-07-20"), date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02", "2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03", "2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08", "2021-04-09","2021-04-09","2021-04-10","2021-04-10"), Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday", "Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday", "Thursday","Friday","Friday","Saturday","Saturday"), D = c("","","Ho","","","","","","Ho","","","","","","","","","","","",""), DR01 = c(2,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3)), class = "data.frame", row.names = c(NA, -21L)) mean_thursday=4 mean_friday=5 mean_saturday=6 scatter_date <- function(dt, dta = df) { my_day <- weekdays(as.Date(dt)) my_mean <- switch( my_day, "Saturday" = mean_saturday, "Friday" = mean_friday, "Thursday" = mean_thursday, 0) # add here your other days dta %>% filter(date2 == ymd(dt)) %>% summarize(across(starts_with("DR"), sum)) %>% pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>% mutate(name = as.numeric(name)) %>% plot(xlab = "Days", ylab = "Types", xlim = c(0, 7), main = paste0(my_day, ":", my_mean), ylim = c((min(.$val) %/% 10) * 10, (max(.$val) %/% 10 + 1) * 15)) abline(h=my_mean, col='blue') } # testing the different means scatter_date("2021-04-10",df) scatter_date("2021-04-9",df) scatter_date("2021-04-8",df)
One way would be to define a data.frame containing the mean for the days of interest and then use weekdays to extract the corresponding mean from that table. Instead of mean_thursday=4 mean_friday=5 mean_saturday=6 I would go for something like: mean_df <- data.frame(mean = c(4:6), day = c('Thursday', 'Friday', 'Saturday')) and then abline(h=subset(mean_df, day == weekdays(as.Date(dt)))$mean, col='blue') which will be the only change in your function.