I have data in the zoo format in the following structure
date val
2020-11-01 3244
2020-11-02 3273
2020-11-03 2974
2020-11-04 3283
2020-11-05 3922
2020-11-06 3669
2020-11-07 4246
2020-11-08 4594
2020-11-09 4086
2020-11-10 4302
2020-11-11 4559
2020-11-12 4981
2020-11-13 4741
2020-11-14 5267
that I am trying to get into this form
date val
Mon 2020-11-01 3244
Tue 2020-11-02 3273
Wed 2020-11-03 2974
Thu 2020-11-04 3283
Fri 2020-11-05 3922
Sat 2020-11-06 3669
Sun 2020-11-07 4246
Mon 2020-11-08 4594
Tue 2020-11-09 4086
Wed 2020-11-10 4302
Thu 2020-11-11 4559
Fri 2020-11-12 4981
Sat 2020-11-13 4741
Sun 2020-11-14 5267
In order to count the number of time I observe the smallest of the values per week.
Mon = 1
Tue = 1
Wed = 0
Thu = 0
Fri = 0
Sat = 0
Sun = 0
I tried to let the data in the flat format before adding the date with zoo and added the weekdays but failed to count with it. Does anyone know an easier way to do it? I am open to visual solutions
If you store the data in a dataframe you can create a new column with weekdays and week number, for each week keep the row with minimum value and count number of weekdays that have the minimum value.
library(dplyr)
df %>%
mutate(date = as.Date(date),
weekday = factor(weekdays(date)),
week_year = format(date, '%Y-%W')) %>%
group_by(week_year) %>%
slice(which.min(val)) %>%
ungroup %>%
count(weekday, .drop = FALSE)
The following should do the trick:
library(lubridate)
df$day <- weekdays(as.Date(df$date))
# Note:
# There is one way to define a week
df$week <- week(df$date)
# And there is also another. Make sure to pick.
df$isoweek <- isoweek(df$date)
df <- df %>% group_by(isoweek) %>% mutate(min_here = val == min(val))
df %>% group_by(day) %>% summarise(sum(min_here))
# A tibble: 7 x 2
day `sum(min_here)`
<chr> <int>
1 Friday 0
2 Monday 1
3 Saturday 0
4 Sunday 1
5 Thursday 0
6 Tuesday 1
7 Wednesday 0
Base R
... though it seems a little clumsier in comparison to dplyr's mechanics in RonakShah's answer, or data.table below:
ismin <- ave(dat$val, list(format(dat$date, format = "%U")),
FUN = function(z) seq_along(z) == which.min(z))
aggregate(ismin, list(weekday = weekdays(dat$date)), FUN = sum)
# weekday x
# 1 Friday 0
# 2 Monday 1
# 3 Saturday 0
# 4 Sunday 0
# 5 Thursday 0
# 6 Tuesday 1
# 7 Wednesday 0
(The order is not emphasized here.)
data.table
library(data.table)
DT <- as.data.table(dat)
DT[, ismin := seq_len(.N) == which.min(val), by = format(date, format = "%U")
][, weekday := weekdays(date)][, .(n = sum(ismin)), by = .(weekday) ]
# weekday n
# <char> <int>
# 1: Sunday 0
# 2: Monday 1
# 3: Tuesday 1
# 4: Wednesday 0
# 5: Thursday 0
# 6: Friday 0
# 7: Saturday 0
Data
dat <- structure(list(date = structure(c(18567, 18568, 18569, 18570, 18571, 18572, 18573, 18574, 18575, 18576, 18577, 18578, 18579, 18580), class = "Date"), val = c(3244L, 3273L, 2974L, 3283L, 3922L, 3669L, 4246L, 4594L, 4086L, 4302L, 4559L, 4981L, 4741L, 5267L)), class = "data.frame", row.names = c(NA, -14L))
Related
I'm working on a timestamp data frame. An excerpt of the date-related variables from the January sample of the data frame:
sample_dates <- data.frame(date = c("2021-01-01", "2021-01-02", "2021-01-03", "2021-01-04", "2021-01-05", "2021-01-06", "2021-01-07", "2021-01-08", "2021-01-09", "2021-01-10", "2021-01-11", "2021-01-12", "2021-01-13", "2021-01-14", "2021-01-15", "2021-01-16", "2021-01-17", "2021-01-18", "2021-01-19", "2021-01-20", "2021-01-21", "2021-01-22", "2021-01-23", "2021-01-24", "2021-01-25", "2021-01-26", "2021-01-27", "2021-01-28", "2021-01-29", "2021-01-30", "2021-01-31"))
sample_dates <- sample_dates %>%
mutate(date = as.POSIXct(date)) %>%
mutate(day = factor(format(date, "%a")))
I want to add a new factor variable day_cat, the pseudo-code for which could be something like this:
sample_dates <- sample_dates %>%
# the month could start on any day and this function should identify it
# for the sample, I know January 2021 started on Friday
mutate(day_cat = while(month is not over)
if(day == "Fri") {"Fri1"},
else if(day == "Sat" | day == "Sun") {"Weekend1"},
else if(day == "Mon") {"Mon1"},
else if(day == "Tue" | day == "Wed" | day == "Thu") {"Weekdays1"},
# now we're onto the next Friday of the month
else if(day == "Fri") {"Fri2"},
else if(day == "Sat" | day == "Sun") {"Weekend2"},
else if(day == "Mon") {"Mon2"},
else if(day == "Tue" | day == "Wed" | day == "Thu") {"Weekdays2"},
...
...
# reached the end of month
)
mutate(day_cat = factor(day_cat, levels = c("Mon", "Weekdays", "Fri", "Weekend")))
So, the factors are Mon = {Mon}; Weekdays = {Tue, Wed, Thu}; Fri = {Fri}; Weekend = {Sat, Sun}. And, I want to number these factors as Mon1, Weekdays1, Fri1, Weekend1, Mon2, Weekdays2, Fri1, Weekend2, Mon3, and so on, in the day_cat variable (say if the month started from Monday).
The levels of the day_cat variable should be in the same order (for plotting purpose).
If a month starts on Wednesday, day_cat would take only that Wednesday and Thursday (the next day) as "Weekdays1". If the month ends on Saturday, day_cat would take only that Saturday as "Weekend4" or "Weekend5", whichever it might me.
Here, day_cat is a factor in chronological order, although as specified the three weekday and two weekend values will each week have the same factor level. Is that what you want?
library(dplyr); library(lubridate)
sample_dates %>%
mutate(day = wday(date, label = TRUE),
group = case_when(day == "Mon" ~ "Mon",
day == "Fri" ~ "Fri",
day %in% c("Sat", "Sun") ~ "Weekend",
TRUE ~ "Weekday"),
weeknum = (day(date)-1) %/% 7 + 1,
day_cat = paste0(group, weeknum) %>% fct_inorder())
Result
date day group weeknum day_cat
1 2021-01-01 Fri Fri 1 Fri1
2 2021-01-02 Sat Weekend 1 Weekend1
3 2021-01-03 Sun Weekend 1 Weekend1
4 2021-01-04 Mon Mon 1 Mon1
5 2021-01-05 Tue Weekday 1 Weekday1
6 2021-01-06 Wed Weekday 1 Weekday1
7 2021-01-07 Thu Weekday 1 Weekday1
8 2021-01-08 Fri Fri 2 Fri2
9 2021-01-09 Sat Weekend 2 Weekend2
10 2021-01-10 Sun Weekend 2 Weekend2
11 2021-01-11 Mon Mon 2 Mon2
12 2021-01-12 Tue Weekday 2 Weekday2
13 2021-01-13 Wed Weekday 2 Weekday2
14 2021-01-14 Thu Weekday 2 Weekday2
15 2021-01-15 Fri Fri 3 Fri3
16 2021-01-16 Sat Weekend 3 Weekend3
17 2021-01-17 Sun Weekend 3 Weekend3
18 2021-01-18 Mon Mon 3 Mon3
19 2021-01-19 Tue Weekday 3 Weekday3
20 2021-01-20 Wed Weekday 3 Weekday3
21 2021-01-21 Thu Weekday 3 Weekday3
22 2021-01-22 Fri Fri 4 Fri4
23 2021-01-23 Sat Weekend 4 Weekend4
24 2021-01-24 Sun Weekend 4 Weekend4
25 2021-01-25 Mon Mon 4 Mon4
26 2021-01-26 Tue Weekday 4 Weekday4
27 2021-01-27 Wed Weekday 4 Weekday4
28 2021-01-28 Thu Weekday 4 Weekday4
29 2021-01-29 Fri Fri 5 Fri5
30 2021-01-30 Sat Weekend 5 Weekend5
31 2021-01-31 Sun Weekend 5 Weekend5
I have a dataframe like this in R:
Start date
End date
Date 1
Date 2
Date 3
Date 4
11/12/2018
29/11/2019
08/03/2021
NA
NA
NA
07/03/2018
24/04/2019
08/03/2021
12/09/2016
NA
NA
04/06/2018
23/04/2019
08/03/2021
02/10/2017
05/10/2018
NA
26/07/2018
29/08/2019
08/03/2021
03/08/2015
02/10/2017
23/01/2017
I want to create a new column in R that says: If Date 1, Date 2, Date 3 or Date 4 is between Start Date and End date, it should return 1, 0 otherwise, as the table below:
Start date
End date
Date 1
Date 2
Date 3
Date 4
Change
11/12/2018
29/11/2019
08/03/2021
NA
NA
NA
0
07/03/2018
24/04/2019
08/03/2021
12/09/2016
NA
NA
0
04/06/2018
23/04/2019
08/03/2021
02/10/2017
05/10/2018
NA
1
26/07/2018
29/08/2019
08/03/2021
03/08/2015
02/10/2017
23/01/2017
0
Does anyone have a suggestion on how to solve this? Thank you :)
It'll make it much easier for people to help you if you can post code / data which we can run directly. The easiest way to do this is to use a handy R function called dput, which generates instructions to exactly recreate any R object. So you might run dput(MY_DATA), or if your data is much larger than needed to demonstrate your question, dput(head(MY_DATA)) to get the first six rows, and paste the output of that into your question. </PSA>
Here's code to generate your example data:
my_data <- data.frame(
stringsAsFactors = FALSE,
Start.date = c("11/12/2018", "07/03/2018", "04/06/2018", "26/07/2018"),
End.date = c("29/11/2019", "24/04/2019", "23/04/2019", "29/08/2019"),
Date.1 = c("08/03/2021", "08/03/2021", "08/03/2021", "08/03/2021"),
Date.2 = c(NA, "12/09/2016", "02/10/2017", "03/08/2015"),
Date.3 = c(NA, NA, "05/10/2018", "02/10/2017"),
Date.4 = c(NA, NA, NA, "23/01/2017")
)
Here's a tidyverse approach to first convert your day/month/year dates into data in R's Date type using lubridate::dmy, then to compare each of Date.1 thru Date.4 against your start dates, and then finally to show if there are any 1's (within range).
library(dplyr); library(lubridate)
my_data %>%
mutate(across(.fns = ~dmy(.x))) %>%
mutate(across(.cols = starts_with("Date"),
.fns = ~coalesce(.x >= Start.date & .x <= End.date, FALSE)*1)) %>%
mutate(Change = pmax(Date.1, Date.2, Date.3, Date.4))
coalesce(..., FALSE) used here to treat NA like FALSE.
(...)*1 to convert TRUE/FALSE to 1/0.
pmax(...) to grab the largest of the 1/0's, i.e. "are there any 1's?"
Edit: alternative to leave Date columns intact:
my_data %>%
mutate(across(.fns = ~dmy(.x))) %>%
mutate(across(.cols = starts_with("Date"),
.names = "Check_{.col}",
.fns = ~coalesce(.x >= Start.date & .x <= End.date, FALSE)*1)) %>%
rowwise() %>%
mutate(Change = max(c_across(starts_with("Check")))) %>%
select(-starts_with("Check"))
Start.date End.date Date.1 Date.2 Date.3 Date.4 Change
<date> <date> <date> <date> <date> <date> <dbl>
1 2018-12-11 2019-11-29 2021-03-08 NA NA NA 0
2 2018-03-07 2019-04-24 2021-03-08 2016-09-12 NA NA 0
3 2018-06-04 2019-04-23 2021-03-08 2017-10-02 2018-10-05 NA 1
4 2018-07-26 2019-08-29 2021-03-08 2015-08-03 2017-10-02 2017-01-23 0
library(tidyverse)
library(lubridate)
df <- read.table(textConnection("start_date;end_date;date_1;date_2;date_3;date_4
11/12/2018;29/11/2019;08/03/2021;NA;NA;NA
07/03/2018;24/04/2019;08/03/2021;12/09/2016;NA;NA
04/06/2018;23/04/2019;08/03/2021;02/10/2017;05/10/2018;NA
26/07/2018;29/08/2019;08/03/2021;03/08/2015;02/10/2017;23/01/2017"),
sep=";",
header = TRUE)
df %>%
mutate(
across(everything(), lubridate::dmy),
change = ((date_1 > start_date & date_1 < end_date) |
(date_2 > start_date & date_2 < end_date) |
(date_3 > start_date & date_3 < end_date)
) %>%
coalesce(FALSE) %>%
as.integer()
)
#> start_date end_date date_1 date_2 date_3 date_4 change
#> 1 2018-12-11 2019-11-29 2021-03-08 <NA> <NA> <NA> 0
#> 2 2018-03-07 2019-04-24 2021-03-08 2016-09-12 <NA> <NA> 0
#> 3 2018-06-04 2019-04-23 2021-03-08 2017-10-02 2018-10-05 <NA> 1
#> 4 2018-07-26 2019-08-29 2021-03-08 2015-08-03 2017-10-02 2017-01-23 0
I want to create a dataframe from a given start and end date:
start_date <- as.Date("2020-05-17")
end_date <- as.Date("2020-06-23")
For each row in this dataframe, I should have the start day and end day of the month, so the expected output is:
start end month year
2020-05-17 2020-05-31 May 2020
2020-06-01 2020-06-23 June 2020
I have tried to create a sequence, but I'm stuck on what to do next:
day_seq <- seq(start_date, end_date, 1)
Please, a base R or tidyverse solution will be greatly appreciated.
1) yearmon Using start_date and end_date from the question create a yearmon sequence and then each of the desired columns is a simple one line computation. The stringAsFactors line can be omitted under R 4.0 onwards as that is the default there.
library(zoo)
ym <- seq(as.yearmon(start_date), as.yearmon(end_date), 1/12)
data.frame(start = pmax(start_date, as.Date(ym)),
end = pmin(end_date, as.Date(ym, frac = 1)),
month = month.name[cycle(ym)],
year = as.integer(ym),
stringsAsFactors = FALSE)
giving:
start end month year
1 2020-05-17 2020-05-31 May 2020
2 2020-06-01 2020-06-23 June 2020
2) Base R This follows similar logic and gives the same answer. We first define a function month1 which given a Date class vector x returns a Date vector the same length but for the first of the month.
month1 <- function(x) as.Date(cut(x, "month"))
months <- seq(month1(start_date), month1(end_date), "month")
data.frame(start = pmax(start_date, months),
end = pmin(end_date, month1(months + 31) - 1),
month = format(months, "%B"),
year = as.numeric(format(months, "%Y")),
stringsAsFactors = FALSE)
A while ago that I used the tidyverse, but here is my go at things..
sample data
different sample data to tagckle some problems wher the year changes..
start_date <- as.Date("2020-05-17")
end_date <- as.Date("2021-06-23")
code
library( tidyverse )
library( lubridate )
#create a sequence of days from start to end
tibble( date = seq( start_date, end_date, by = "1 day" ) ) %>%
mutate( month = lubridate::month( date ),
year = lubridate::year( date ),
end = as.Date( paste( year, month, lubridate::days_in_month(date), sep = "-" ) ) ) %>%
#the end of the last group is now always larger than tghe maximum date... repair!
mutate( end = if_else( end > max(date), max(date), end ) ) %>%
group_by( year, month ) %>%
summarise( start = min( date ),
end = max( end ) ) %>%
select( start, end, month, year )
output
# # A tibble: 14 x 4
# # Groups: year [2]
# start end month year
# <date> <date> <dbl> <dbl>
# 1 2020-05-17 2020-05-31 5 2020
# 2 2020-06-01 2020-06-30 6 2020
# 3 2020-07-01 2020-07-31 7 2020
# 4 2020-08-01 2020-08-31 8 2020
# 5 2020-09-01 2020-09-30 9 2020
# 6 2020-10-01 2020-10-31 10 2020
# 7 2020-11-01 2020-11-30 11 2020
# 8 2020-12-01 2020-12-31 12 2020
# 9 2021-01-01 2021-01-31 1 2021
# 10 2021-02-01 2021-02-28 2 2021
# 11 2021-03-01 2021-03-31 3 2021
# 12 2021-04-01 2021-04-30 4 2021
# 13 2021-05-01 2021-05-31 5 2021
# 14 2021-06-01 2021-06-23 6 2021
For the specific period in your question, you may use:
library(lubridate)
start_date <- as.Date("2020-05-17")
end_date <- as.Date("2020-06-23")
start <- c(start_date, floor_date(end_date, unit = 'months'))
end <- c(ceiling_date(start_date, unit = 'months'), end_date)
month <- c(as.character(month(start[1], label = TRUE)),
as.character(month(start[2], label = TRUE)))
year <- c(year(start[1]), year(start[2]))
data.frame(start, end, month, year, stringsAsFactors = FALSE)
Here is one approach using intervals with lubridate. You would create a full interval between the 2 dates of interest, and then intersect with monthly ranges for each month (first to last day each month).
library(tidyverse)
library(lubridate)
start_date <- as.Date("2020-05-17")
end_date <- as.Date("2021-08-23")
full_int <- interval(start_date, end_date)
month_seq = seq(start_date, end_date, by = "month")
month_int = interval(floor_date(month_seq, "month"), ceiling_date(month_seq, "month") - days(1))
data.frame(interval = intersect(full_int, month_int)) %>%
mutate(start = int_start(interval),
end = int_end(interval),
month = month.abb[month(start)],
year = year(start)) %>%
select(-interval)
Output
start end month year
1 2020-05-17 2020-05-31 May 2020
2 2020-06-01 2020-06-30 Jun 2020
3 2020-07-01 2020-07-31 Jul 2020
4 2020-08-01 2020-08-31 Aug 2020
5 2020-09-01 2020-09-30 Sep 2020
6 2020-10-01 2020-10-31 Oct 2020
7 2020-11-01 2020-11-30 Nov 2020
8 2020-12-01 2020-12-31 Dec 2020
9 2021-01-01 2021-01-31 Jan 2021
10 2021-02-01 2021-02-28 Feb 2021
11 2021-03-01 2021-03-31 Mar 2021
12 2021-04-01 2021-04-30 Apr 2021
13 2021-05-01 2021-05-31 May 2021
14 2021-06-01 2021-06-30 Jun 2021
15 2021-07-01 2021-07-31 Jul 2021
16 2021-08-01 2021-08-23 Aug 2021
I am looking for an if statement in R that will help me determine if Monday close is higher the Friday close, the point is to add it as an extra column.
I have tried the following which gives a plausible result:
GSPC$GSPC.DIFF <- for(i in 1:length(GSPC$GSPC.Weekdaynr)) {if(GSPC$GSPC.Weekdaynr[i] > 4){GSPC.DIFF <- append(GSPC.DIFF,5)}else{GSPC.DIFF <- append(GSPC.DIFF, "")}}
enter image description here
But changing statment 1 to append(GSPC.DIFF,GSPC$GSPC.Close)
GSPC$GSPC.DIFF <- for(i in 1:length(GSPC$GSPC.Weekdaynr)) {if(GSPC$GSPC.Weekdaynr[i] > 4){GSPC.DIFF <- append(GSPC.DIFF,GSPC$GSPC.Close)}else{GSPC.DIFF <- append(GSPC.DIFF, "")}}
Gives the following:
[enter image description here][2]
do anyone know why it is not giving value to every five lines like in the first case?
here is where the value is collected from:
enter image description here
Here's a potentially useful approach (not the most elegant way I guess):
library(dplyr)
library(tibble)
library(lubridate)
set.seed(123)
data <- tibble(
Date = seq.Date(from = as.Date("2020/04/01"), to = as.Date("2020/05/12"), by = "day"),
Open = runif(42, min = 11500, max = 12600),
Close = runif(42, min = 11500, max = 12600)
)
data <- data %>%
mutate(day = weekdays(Date),
week = week(Date)) %>%
filter(day == "Monday" | day == "Friday") %>%
group_by(week) %>%
mutate(Delta = ifelse(Close - lead(Open, 1) > 0, "Higher", "Lower")) %>%
select(Date, week, Delta) %>%
filter(!is.na(Delta))
With this output:
> data
# A tibble: 6 x 3
# Groups: week [6]
Date week Delta
<date> <dbl> <chr>
1 2020-04-03 14 Higher
2 2020-04-10 15 Lower
3 2020-04-17 16 Lower
4 2020-04-24 17 Lower
5 2020-05-01 18 Lower
6 2020-05-08 19 Lower
From this input:
> data
# A tibble: 42 x 3
Date Open Close
<date> <dbl> <dbl>
1 2020-04-01 11816. 11955.
2 2020-04-02 12367. 11906.
3 2020-04-03 11950. 11668.
4 2020-04-04 12471. 11653.
5 2020-04-05 12535. 11756.
6 2020-04-06 11550. 12013.
7 2020-04-07 12081. 11793.
8 2020-04-08 12482. 12444.
9 2020-04-09 12107. 11550.
10 2020-04-10 12002. 11986.
We can convert the xts series to dataframe using fortify.zoo(), create a weekday column and calculate the percent difference between each day with the previous day.
DJI %>%
fortify.zoo() %>%
mutate(day = weekdays(Index),
diff_per = (DJI.Close - lag(DJI.Close))/lag(DJI.Close) * 100)
# Index DJI.Open DJI.High DJI.Low DJI.Close DJI.Volume DJI.Adjusted day diff_per
#1 2007-01-03 12474.5 12474.5 12474.5 12474.5 0 12474.5 Wednesday NA
#2 2007-01-04 12480.7 12480.7 12480.7 12480.7 0 12480.7 Thursday 0.049702954
#3 2007-01-05 12398.0 12398.0 12398.0 12398.0 0 12398.0 Friday -0.662624642
#4 2007-01-08 12423.5 12423.5 12423.5 12423.5 0 12423.5 Monday 0.205678335
#5 2007-01-09 12416.6 12416.6 12416.6 12416.6 0 12416.6 Tuesday -0.055543051
#6 2007-01-10 12442.2 12442.2 12442.2 12442.2 0 12442.2 Wednesday 0.206180330
#....
If you are interested only in Monday and Friday values, you can add filter in the above command.
%>% filter(day %in% c('Monday', 'Friday'))
data
library(quantmod)
library(zoo)
library(dplyr)
getSymbols('DJI')
How do I find the last weekday (e.g., Wednesday) of the month using R? In the code below, I calculate the month, day of the month, week of the month, and weekday. There are 5 Wednesdays in January 2014, but only 4 Wednesdays in February 2014, so I cannot use max(week of the month) as a filter. Any help is appreciated although I prefer to use the base R functions.
DF <- data.frame(DATE = seq(as.Date("2014-01-01"), as.Date("2014-12-31"), "day"))
DF$MONTH <- as.numeric(format(DF$DATE, "%m"))
DF$DAY_OF_MONTH <- as.numeric(format(DF$DATE, "%d"))
DF$WEEK_OF_MONTH <- ceiling(as.numeric(format(DF$DATE, "%d")) / 7)
DF$WEEKDAY <- format(DF$DATE, "%A")
DF
I think this is what you're after:
DF$last_weekday_o_month <- ave(
weekdays(DF$DATE),
months(DF$DATE),
FUN = function(x) tail(x[ !(x %in% c("Saturday","Sunday")) ], 1)
)
To find the particular date that is the last weekday....
DF$last_weekdaydate_o_month <- ave(
DF$DATE,
months(DF$DATE),
FUN = function(x) tail(x[ !(weekdays(x) %in% c("Saturday","Sunday")) ], 1)
)
the result looks like...
DATE last_weekday_o_month last_weekdaydate_o_month
1 2014-01-01 Friday 2014-01-31
2 2014-01-02 Friday 2014-01-31
3 2014-01-03 Friday 2014-01-31
4 2014-01-04 Friday 2014-01-31
5 2014-01-05 Friday 2014-01-31
6 2014-01-06 Friday 2014-01-31
...
360 2014-12-26 Wednesday 2014-12-31
361 2014-12-27 Wednesday 2014-12-31
362 2014-12-28 Wednesday 2014-12-31
363 2014-12-29 Wednesday 2014-12-31
364 2014-12-30 Wednesday 2014-12-31
365 2014-12-31 Wednesday 2014-12-31
If you did this first, of course you could compute last_weekday_o_month as weekdays(last_weekdaydate_o_month).
With a couple packages, this can be done more elegantly/readably, as suggested by #RichardScriven:
library(data.table)
setDT(DF)[,
last_weekdaydate_o_month := last(DATE[!chron::is.weekend(DATE)])
, by = month(DATE)]
which gives
DATE last_weekdaydate_o_month
1: 2014-01-01 2014-01-31
2: 2014-01-02 2014-01-31
3: 2014-01-03 2014-01-31
4: 2014-01-04 2014-01-31
5: 2014-01-05 2014-01-31
---
361: 2014-12-27 2014-12-31
362: 2014-12-28 2014-12-31
363: 2014-12-29 2014-12-31
364: 2014-12-30 2014-12-31
365: 2014-12-31 2014-12-31
Here is a method using dplyr. Essentially you group by the month, filter out the 'weekend' days and return the weekday of the last (i.e. max) day.
library(dplyr)
DF <- data.frame(DATE = seq(as.Date("2014-01-01"), as.Date("2014-12-31"), "day"))
DF %>%
mutate(month = months(DATE), weekday = weekdays(DATE)) %>%
group_by(month) %>%
filter(!weekday %in% c("Saturday", "Sunday")) %>%
summarise(last_weekday = weekdays(max(DATE)))
Source: local data frame [12 x 2]
month last_weekday
1 April Wednesday
2 August Friday
3 December Wednesday
4 February Friday
5 January Friday
6 July Thursday
7 June Monday
8 March Monday
9 May Friday
10 November Friday
11 October Friday
12 September Tuesday
library(lubridate)
x <- seq(as.Date("2007-12-31"), by="1 day", length.out=(Sys.Date() - as.Date("2007-12-31")))
library(plyr)
df <- data.frame(date=x, year=year(x), month=month(x))
df[,"weekday"] <- weekdays(df[,"date"])
df<- df[! df[,"weekday"] %in% c("Saturday", "Sunday"),]
df <- ddply(df, .(year, month), summarize, last=max(date))