How to generate table until the specific date in R - r

I have the following problem: How can I generate the table only until the date 03/07, instead of until 05/07.
Executable code below:
library(purrr)
library(dplyr)
library(tidyverse)
library(lubridate)
df1 <- structure(
list(date1 = c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28",
"2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28"),
date2 = c("2021-04-02","2021-04-03","2021-04-08","2021-04-09","2021-04-10","2021-07-01","2021-07-02","2021-07-03","2021-07-04","2021-07-05"),
Week= c("Friday","Saturday","Thursday","Friday","Saturday","Thursday","Friday","Saturday","Sunday","Monday"),
DR01 = c(14,11,14,13,13,14,13,16,15,12), DR02= c(14,12,16,17,13,12,17,14,13,13),DR03= c(19,15,14,13,13,12,11,15,13,13),
DR04 = c(15,14,13,13,16,12,11,19,11,13),DR05 = c(15,14,15,13,16,12,11,19,14,13),
DR06 = c(21,14,13,13,15,16,17,18,12,11),DR07 = c(12,15,14,14,19,14,17,18,14,13)),
class = "data.frame", row.names = c(NA, -10L))
dates <- subset(df1, date2 > date1, select = date2)$date2
map_dfr(dates, ~ {
datas <- df1 %>%
filter(date2 == ymd(.x)) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
mutate(name = as.numeric(name))
colnames(datas)<-c("Days","Numbers")
mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 47,b2 = 0), data = datas)
tibble(dates = .x, coef = coef(mod)[2])
}) %>%
mutate(dates = format(ymd(dates), "%d/%m/%Y"))
dates coef
<chr> <dbl>
1 01/07/2021 12.2
2 02/07/2021 12.4
3 03/07/2021 15.6
4 04/07/2021 13.3
5 05/07/2021 12.7

In the subset step, add one more condition with &
dates <- subset(df1, date2 > date1 & date2 <= "2021-07-03", select = date2)$date2

Related

R does not assign numbers to dates in the right order

I am working on a document were I have a list of tests with dates.
I am trying to get R to pivot them horizontally, with the first test showing up first and the later tests showing up later.
However, when applying functions such as sort() or order() or even group_by(), R still sometimes shows an earlier test in the first column pivotted to horizontal.
I would think I should apply some sort of odering to the date column before numbering, so that R numbers the actual first test with the first numerical value with which I am pivotting.
Any idea as to how I would go about this?
My dataframe looks like this:
employee nr. date date2 test_1 test_2
x 2010/01/10 2010/01/05 positive positive
.................................
It should be so that the 2 dates are switched. The date is formatted as yyyy/mm/dd.
In the original dataset it was formatted as dd/mm/yy (you can see the format change in the code).
My expected output should look something like this:
employee nr. date date2 test_1 test_2
x 2010/01/05 2010/01/10 positive positive
#specify dates as variable "date" for R to recognize the variable
ct_clean$date <- as.Date(ct_clean$date, origin = "1899-30-12", format = "%d/%m/%y")
###assign number to duplicate value of employee number (if multiple tests -> multiple entries)
ct_numbered <- ct_clean %>% group_by(employee) %>% mutate(test_nr = row_number())
ct_clean %>% group_by(employee) %>% mutate(test_nr = 1:n())
ct_clean %>% group_by(employee) %>% mutate(test_nr = seq_len(n()))
ct_clean %>% group_by(employee) %>% mutate(test_nr = seq_along(employee))
#spread out multiple test for one individual horizontally
ct_wide <- ct_numbered %>% group_by(date) %>% pivot_wider(names_from = "test_nr",
values_from = "ct",
names_expand = TRUE, names_vary = "slowest")
#merging rows to include the test-data and test-number in the same row
ct_df <- ct_wide %>%
group_by(employee) %>%
mutate(id = seq_along(employee)) %>%
pivot_wider(names_from = id, values_from = date, names_prefix = "date") %>%
summarize_all(list(~ .[!is.na(.)][1]))
You can do this by using if_else():
library(tidyverse)
d <- structure(list(employee = c("x", "y", "z"), date1 = structure(c(14619,
14611, 14619), class = "Date"), date2 = structure(c(14614, 14614,
14614), class = "Date"), test_1 = c("positive", "negative", "negative"
), test_2 = c("positive", "positive", "positive")), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -3L), spec = structure(list(
cols = list(employee = structure(list(), class = c("collector_character",
"collector")), date1 = structure(list(format = ""), class = c("collector_date",
"collector")), date2 = structure(list(format = ""), class = c("collector_date",
"collector")), test_1 = structure(list(), class = c("collector_character",
"collector")), test_2 = structure(list(), class = c("collector_character",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
d
#> # A tibble: 3 × 5
#> employee date1 date2 test_1 test_2
#> <chr> <date> <date> <chr> <chr>
#> 1 x 2010-01-10 2010-01-05 positive positive
#> 2 y 2010-01-02 2010-01-05 negative positive
#> 3 z 2010-01-10 2010-01-05 negative positive
d |>
mutate(date1 = if_else(d$date1 < d$date2, d$date1, d$date2),
date2 = if_else(d$date1 < d$date2, d$date2, d$date1),
test_1 = if_else(d$date1 < d$date2, d$test_1, d$test_2),
test_2 = if_else(d$date1 < d$date2, d$test_2, d$test_1)
)
#> # A tibble: 3 × 5
#> employee date1 date2 test_1 test_2
#> <chr> <date> <date> <chr> <chr>
#> 1 x 2010-01-05 2010-01-10 positive positive
#> 2 y 2010-01-02 2010-01-05 negative positive
#> 3 z 2010-01-05 2010-01-10 positive negative
Created on 2022-03-28 by the reprex package (v2.0.1)
I found the answer to my problem:
The argument had to be passed in the code for assigning numbers to the duplicates.
The original code looked like this:
ct_numbered <- ct_variant %>% group_by(date, umcg) %>% mutate(test_nr =
row_number())
ct_variant %>% group_by(date, umcg) %>% mutate(test_nr = 1:n())
ct_variant %>% group_by(date, umcg) %>% mutate(test_nr = seq_len(n()))
ct_variant %>% group_by(date, umcg) %>% mutate(test_nr = seq_along(umcg))
This is the solution I used:
ct_numbered <- ct_variant %>% arrange(ymd(ct_variant$date)) %>% group_by(date,
umcg) %>% mutate(test_nr = row_number())
ct_variant %>% group_by(date, umcg) %>% mutate(test_nr = 1:n())
ct_variant %>% group_by(date, umcg) %>% mutate(test_nr = seq_len(n()))
ct_variant %>% group_by(date, umcg) %>% mutate(test_nr = seq_along(umcg))

Inserting new data into a table

I would like a little help with the following question: note that this code generates a coefficient from a date I have chosen, in this case for the day 03/07 (dmda), it gave a coefficient equal to 15.55. In this case, I would like to generate a new table, where there is a column with dates and the other column with the coefficient corresponding to those dates. For the column dates, only the dates of date2 after the day considered in date1 (28/06) will be considered, in this case, the dates are: 01/07, 02/07 and 03/07.
So the table will look like this:
Thanks!
library(dplyr)
library(tidyverse)
library(lubridate)
df1 <- structure(
list(date1 = c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28",
"2021-06-28","2021-06-28","2021-06-28"),
date2 = c("2021-04-02","2021-04-03","2021-04-08","2021-04-09","2021-04-10","2021-07-01","2021-07-02","2021-07-03"),
Week= c("Friday","Saturday","Thursday","Friday","Saturday","Thursday","Friday","Monday"),
DR01 = c(14,11,14,13,13,14,13,16), DR02= c(14,12,16,17,13,12,17,14),DR03= c(19,15,14,13,13,12,11,15),
DR04 = c(15,14,13,13,16,12,11,19),DR05 = c(15,14,15,13,16,12,11,19),
DR06 = c(21,14,13,13,15,16,17,18),DR07 = c(12,15,14,14,19,14,17,18)),
class = "data.frame", row.names = c(NA, -8L))
dmda<-"2021-07-03"
datas<-df1 %>%
filter(date2 == ymd(dmda)) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
mutate(name = as.numeric(name))
colnames(datas)<-c("Days","Numbers")
mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 47,b2 = 0), data = datas)
coef(mod)[2]
> coef(mod)[2]
b2
15.55011
We may subset the data where the 'date2' is greater than date1', get the 'date2' column extracted as a vector. Loop over the dates with map (from purrr), do the transformation within the loop, build the nls and extract the coefficient in a tibble, and use _dfr to collapse the list to a single tibble
library(purrr)
library(dplyr)
dates <- subset(df1, date2 > date1, select = date2)$date2
map_dfr(dates, ~ {
datas <- df1 %>%
filter(date2 == ymd(.x)) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
mutate(name = as.numeric(name))
colnames(datas)<-c("Days","Numbers")
mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 47,b2 = 0), data = datas)
tibble(dates = .x, coef = coef(mod)[2])
}) %>%
mutate(dates = format(ymd(dates), "%d/%m/%Y"))
# A tibble: 3 × 2
dates coef
<chr> <dbl>
1 01/07/2021 12.2
2 02/07/2021 12.4
3 03/07/2021 15.6

How to optimize code that generates graph in R

Could you help me optimize the code below? As you can see, I'm using the same date twice, once for graph generation and once for subset y generation. The result is correct, but I'd like some help trying to optimize to at least use the date only once and another optimizing that you find necessary. Every help is welcome.
Thank you very much!
library(dplyr)
library(lubridate)
library(tidyverse)
#dataset
df <- structure(
list(date1 = c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28",
"2021-06-28","2021-06-28","2021-06-28"),
date2 = c("2021-04-02","2021-04-03","2021-04-08","2021-04-09","2021-04-10","2021-07-01","2021-07-02","2021-07-03"),
Week= c("Friday","Saturday","Thursday","Friday","Saturday","Thursday","Friday","Monday"),
DR01 = c(4,1,4,3,3,4,3,6), DR02= c(4,2,6,7,3,2,7,4),DR03= c(9,5,4,3,3,2,1,5),
DR04 = c(5,4,3,3,6,2,1,9),DR05 = c(5,4,5,3,6,2,1,9),
DR06 = c(2,4,3,3,5,6,7,8),DR07 = c(2,5,4,4,9,4,7,8)),
class = "data.frame", row.names = c(NA, -8L))
#Generate graph
dmda<-"2021-07-01"
dta<-df
datas<-dta %>%
filter(date2 == ymd(dmda)) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
mutate(name = as.numeric(name))
colnames(datas)<-c("Days","Numbers")
attach(datas)
plot(Numbers ~ Days, ylim=c(0,20))
model <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 47,b2 = 0))
new.data <- data.frame(Days = seq(min(Days),max(Days),len = 45))
lines(new.data$Days,predict(model,newdata = new.data))
#Add the y points to the graph
df[, 1:2] = lapply(df[, 1:2], FUN = as_date)
get_cutoff = function(date) {
date2 = as_date(date)
date1 = df[1,1]
as.numeric(date2 - date1 + 1)
}
subset_data = function(date, start_index) {
date = as_date(date)
if (date > df[1,1]) {
end_index = start_index + get_cutoff(date) - 1
df[, -c(start_index:end_index)] %>%
filter(date2 == date)
} else {
return(df)
}
}
y<-subset_data("2021-07-01", 4)
y
pivot_longer(y,
cols=c(starts_with("DR"))) %>%
mutate(day = parse_number(name)) -> new_y
new_y
lines(x=new_y$day, y=new_y$value, col="red")
points(x=new_y$day, y=new_y$value, col="red")
Make these changes:
only load packages used
can eliminate lubridate
don't need dta
in filter we don't need to convert dmda to Date class
pivot_wider can transform the names
don't use attach
the model is linear in the parameters so use lm, not nls
replace the new.data/lines with curve
don't overwrite df
simplify the cutoff calculation
use type = "o" to reduce points/lines to just lines
use subset in lines
Now assuming that df and dmda have been defined as in the question we have this.
library(dplyr)
library(tidyr)
datas <- df %>%
filter(date2 == dmda) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)",
names_to = "Days", values_to = "Numbers",
names_transform = list(Days = as.numeric))
plot(Numbers ~ Days, datas, ylim=c(0,20))
model <- lm(Numbers ~ I(Days^2), datas)
rng <- range(datas$Days)
curve(predict(model, list(Days = x)), rng[1], rng[2], add = TRUE)
# assume this for cutoff. You may or may not need to change this line.
cutoff <- as.numeric(as.Date(dmda) - first(as.Date(df$date1))) + 1
lines(Numbers ~ Days, datas, subset = seq_len(nrow(datas)) > cutoff,
type = "o" , col = "red")
I used ggplot rather than base R plotting functions since you are already working in the tidyverse. The following will do the trick to plot it all on a single graph.
dmda<-"2021-07-01"
dta<-df
## Rather than rely on column position, explicitly set the number
## of days desired for highlighting on plot
num_days <- 3
model <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 47,b2 = 0))
new.data <- data.frame(Days = seq(min(Days),max(Days),len = 45)) %>%
mutate(Numbers = predict(model, newdata = .))
datas<-dta %>%
filter(date2 == ymd(dmda)) %>%
summarize(across(starts_with("DR"), sum)) %>%
## Can convert data to numeric and create column names inside pivot_longer
pivot_longer(everything(), names_pattern = "DR(.+)",
values_to = "Numbers", names_to = "Days",
names_transform = list(Days = as.numeric, Numbers = as.numeric)) %>%
## Create flag for whether the values are in the final number of days
mutate(subs = 1:n() > (n() - num_days))
plt <- ggplot(datas, aes(x = Days, y = Numbers)) +
geom_point(aes(color = subs)) +
geom_line(data = filter(datas, subs == TRUE), color = "red") +
geom_line(data = new.data, color = "black") +
scale_y_continuous(limits = c(0, 20)) +
scale_color_manual(values = c("black", "red"))
plt

Error plotting lines on a graph made in R

Could you help me solve following issue:
I have two codes that were made to generate the same scatter plot. The first one works normally, generates the graph and the lines without any problems. It is a code that requires vector i to generate the mean and standard deviation(sd).
Code 2, on the other hand, does not require vector i, but the result is not the desired one regarding the construction of lines in relation to the mean and sd. In my opinion it was to work.
Could the problem be with the ylim?
I hope someone helps me with this! =)
Thank you so much!
First code
library(dplyr)
library(tidyr)
library(lubridate)
data <- structure(
list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
"2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
"2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
"2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
"Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
"Thursday","Friday","Friday","Saturday","Saturday"),
DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
class = "data.frame", row.names = c(NA, -21L))
graph <- function(dt, dta = data) {
dim_data<-dim(data)
day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
length = dim_data[1]
))
data_grouped <- data %>%
mutate(across(starts_with("date"), as.Date)) %>%
group_by(date2) %>%
summarise(Id = first(Id),
date1 = first(date1),
Week = first(Week),
DTPE = first(DTPE),
D1 = sum(D1)) %>%
select(Id,date1,date2,Week,DTPE,D1)
data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
date2=format(date2,"%d/%m/%Y"))
data_grouped<-data.frame(data_grouped)
data_grouped %>%
mutate(DTPE = na_if(DTPE, ""))
DS=c("Thursday","Friday","Saturday")
i<-2
df_OC<-subset(data_grouped, DTPE == "")
ds_CO<-subset(df_OC,df_OC$Week==DS[i])
mean<-mean(as.numeric(ds_CO[,"D1"]) )
sd<-sd(as.numeric(ds_CO[,"D1"]))
dta %>%
filter(date2 == ymd(dt)) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
mutate(name = as.numeric(name)) %>%
plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5,
cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
abline(h=mean, col='blue') +
abline(h=(mean + sd), col='green',lty=2)
abline(h=(mean - sd), col='orange',lty=2)
}
graph("2021-04-09",data)
Second code
library(dplyr)
library(tidyr)
library(lubridate)
data <- structure(
list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
"2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
"2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
"2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
"Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
"Thursday","Friday","Friday","Saturday","Saturday"),
DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
class = "data.frame", row.names = c(NA, -21L))
graph <- function(dt, dta = data) {
dim_data<-dim(data)
day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
length = dim_data[1]
))
data_grouped <- data %>%
mutate(across(starts_with("date"), as.Date)) %>%
group_by(date2) %>%
summarise(Id = first(Id),
date1 = first(date1),
Week = first(Week),
DTPE = first(DTPE),
D1 = sum(D1)) %>%
select(Id,date1,date2,Week,DTPE,D1)
data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
date2=format(date2,"%d/%m/%Y"))
data_grouped<-data.frame(data_grouped)
data_grouped %>%
mutate(DTPE = na_if(DTPE, ""))
# get the week day
my_day <- weekdays(as.Date(dt))
df_OC<-subset(data_grouped, DTPE == "")
ds_CO<-subset(df_OC,df_OC$Week == my_day)
mean<-mean(as.numeric(ds_CO[,"D1"]) )
sd<-sd(as.numeric(ds_CO[,"D1"]))
dta %>%
filter(date2 == ymd(dt)) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
mutate(name = as.numeric(name)) %>%
plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5,
cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
abline(h=mean, col='blue') +
abline(h=(mean + sd), col='green',lty=2)
abline(h=(mean - sd), col='orange',lty=2)
}
graph("2021-04-09",data)
You've messed up a lot with these data transformations.
Below, however, I present my code that works according to your expectations.
The main problem here was my_day <- weekdays (as.Date (dt)), In my system I was getting "piątek" and you didn't have such a day in your data, right?
library(dplyr)
library(tidyr)
library(lubridate)
data <- structure(
list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
"2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
"2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
"2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
"Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
"Thursday","Friday","Friday","Saturday","Saturday"),
DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
class = "data.frame", row.names = c(NA, -21L))
graph <- function(dt, dta = data) {
dim_data<-dim(data)
day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
length = dim_data[1]
))
data_grouped <- data %>%
mutate(across(starts_with("date"), as.Date)) %>%
group_by(date2) %>%
summarise(Id = first(Id),
date1 = first(date1),
Week = first(Week),
DTPE = first(DTPE),
D1 = sum(D1)) %>%
select(Id,date1,date2,Week,DTPE,D1)
#data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
# date2=format(date2,"%d/%m/%Y"))
#data_grouped<-data.frame(data_grouped)
data_grouped %>%
mutate(DTPE = na_if(DTPE, ""))
# get the week day
#my_day <- weekdays(as.Date(dt))
df_OC<-subset(data_grouped, DTPE == "")
ds_CO = df_OC %>% filter(weekdays(date2) %in% weekdays(as.Date(dt)))
#ds_CO<-subset(df_OC,df_OC$Week == my_day)
mean<-mean(ds_CO$D1)
sd<-sd(ds_CO$D1)
dta %>%
filter(date2 == ymd(dt)) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
mutate(name = as.numeric(name)) %>%
plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5,
cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
abline(h=mean, col='blue') +
abline(h=(mean + sd), col='green',lty=2)
abline(h=(mean - sd), col='orange',lty=2)
}
graph("2021-04-09",data)
Finally, I recommend:
Keep your data in tibble,
do not unnecessarily transform the date into a string several times and vice versa,
use ggplo2. The charts will be much nicer.

Adjust line in graph in R

The graph below generates a scatter plot based on date2. In addition, a horizontal line that refers to the mean is generated. Each day of the week has a different mean as you can see.
Note that in abline I specified h=mean_saturday, as 10/4 is a Saturday. But I didn't want to always have to change this part of the abline to show the right mean line, but my idea is to leave it automatically, that is, when I enter the date 10/4/2021 in the code, the code already recognize that the 10th it's Saturday and inserts the appropriate mean line. Any idea how to do this?
library(dplyr)
library(ggplot2)
library(tidyr)
library(lubridate)
library(tibble)
df <- structure(
list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
date1 = c("2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20",
"2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20",
"2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20",
"2021-07-20","2021-07-20","2021-07-20","2021-07-20"),
date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
"2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
"2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
"2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
"Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
"Thursday","Friday","Friday","Saturday","Saturday"),
D = c("","","Ho","","","","","","Ho","","","","","","","","","","","",""),
DR01 = c(2,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3)),
class = "data.frame", row.names = c(NA, -21L))
mean_thursday=4
mean_friday=5
mean_saturday=6
scatter_date <- function(dt, dta = df) {
dta %>%
filter(date2 == ymd(dt)) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
mutate(name = as.numeric(name)) %>%
plot(xlab = "Days", ylab = "Types", xlim = c(0, 7),
ylim = c((min(.$val) %/% 10) * 10, (max(.$val) %/% 10 + 1) * 15))
abline(h=mean_saturday, col='blue')
}
scatter_date("2021-04-10",df)
You could try to convert the input date in your scatter_date function to a date and get the weekday: my_day <- weekdays(as.Date(dt)) add that to a switch statment for your means:
my_mean <- switch(
my_day,
"Saturday" = mean_saturday,
"Friday" = mean_friday,
"Thursday" = mean_thursday,
0) # add here your other days
and replace mean_saturday in abline(h=mean_saturday, col='blue') with my_mean
here the full code:
library(dplyr)
library(ggplot2)
library(tidyr)
library(lubridate)
library(tibble)
df <- structure(
list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
date1 = c("2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20",
"2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20",
"2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20","2021-07-20",
"2021-07-20","2021-07-20","2021-07-20","2021-07-20"),
date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
"2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
"2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
"2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
"Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
"Thursday","Friday","Friday","Saturday","Saturday"),
D = c("","","Ho","","","","","","Ho","","","","","","","","","","","",""),
DR01 = c(2,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3)),
class = "data.frame", row.names = c(NA, -21L))
mean_thursday=4
mean_friday=5
mean_saturday=6
scatter_date <- function(dt, dta = df) {
my_day <- weekdays(as.Date(dt))
my_mean <- switch(
my_day,
"Saturday" = mean_saturday,
"Friday" = mean_friday,
"Thursday" = mean_thursday,
0) # add here your other days
dta %>%
filter(date2 == ymd(dt)) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
mutate(name = as.numeric(name)) %>%
plot(xlab = "Days", ylab = "Types", xlim = c(0, 7), main = paste0(my_day, ":", my_mean),
ylim = c((min(.$val) %/% 10) * 10, (max(.$val) %/% 10 + 1) * 15))
abline(h=my_mean, col='blue')
}
# testing the different means
scatter_date("2021-04-10",df)
scatter_date("2021-04-9",df)
scatter_date("2021-04-8",df)
One way would be to define a data.frame containing the mean for the days of interest and then use weekdays to extract the corresponding mean from that table.
Instead of
mean_thursday=4
mean_friday=5
mean_saturday=6
I would go for something like:
mean_df <- data.frame(mean = c(4:6),
day = c('Thursday', 'Friday', 'Saturday'))
and then
abline(h=subset(mean_df, day == weekdays(as.Date(dt)))$mean, col='blue')
which will be the only change in your function.

Resources