Continue dates on a data frame in r - r

Let say I have the following data.frame:
Dates<-seq(as.Date('2017/01/01'), by = 'day', length.out = 365)
A <- data.frame(date=(Dates), month=month(Dates), week=week(Dates))
library(dplyr)
B <- A %>% dplyr::mutate(day = lubridate::wday(date, label = TRUE))
B[350:365,]
date month week day
350 2017-12-16 12 50 Sat
351 2017-12-17 12 51 Sun
352 2017-12-18 12 51 Mon
353 2017-12-19 12 51 Tue
354 2017-12-20 12 51 Wed
355 2017-12-21 12 51 Thu
356 2017-12-22 12 51 Fri
357 2017-12-23 12 51 Sat
358 2017-12-24 12 52 Sun
359 2017-12-25 12 52 Mon
360 2017-12-26 12 52 Tue
361 2017-12-27 12 52 Wed
362 2017-12-28 12 52 Thu
363 2017-12-29 12 52 Fri
364 2017-12-30 12 52 Sat
365 2017-12-31 12 53 Sun
I need to add another ten dates after the end date which is from 2018-01-01 to 2018-01-10. Sequence for week should be continuous. For example:
date month week day
365 2017-12-31 12 53 Sun
366 2018-01-01 1 53 Mon
367 2018-01-02 1 53 Tue
368 2018-01-03 1 53 Wed
369 2018-01-04 1 53 Thu
370 2018-01-05 1 53 Fri
371 2018-01-06 1 53 Sat
372 2018-01-07 1 54 Sun
373 2018-01-08 1 54 Mon
374 2018-01-09 1 54 Tue
375 2018-01-10 1 54 Wed

library(dplyr)
library(lubridate)
Dates<-seq(as.Date('2017/01/01'), by = 'day', length.out = 365)
A <- data.frame(date=(Dates), month=month(Dates), week=week(Dates))
B <- A %>% dplyr::mutate(day = lubridate::wday(date, label = TRUE))
B[350:365,]
B %>%
rbind( # bind rows with the following dataset
data.frame(date = seq(max(B$date)+1, by = 'day', length.out = 10)) %>% # create sequence of new dates
mutate(month = month(date), # add month
day = wday(date, label = TRUE), # add day
week = cumsum(day=="Sun") + max(A$week)) ) %>% # add week: continuous from last week of A and gets updated every Sunday
tbl_df() # only for visualisation purposes
# # A tibble: 375 x 4
# date month week day
# <date> <dbl> <dbl> <ord>
# 1 2017-01-01 1 1 Sun
# 2 2017-01-02 1 1 Mon
# 3 2017-01-03 1 1 Tue
# 4 2017-01-04 1 1 Wed
# 5 2017-01-05 1 1 Thu
# 6 2017-01-06 1 1 Fri
# 7 2017-01-07 1 1 Sat
# 8 2017-01-08 1 2 Sun
# 9 2017-01-09 1 2 Mon
#10 2017-01-10 1 2 Tue
# # ... with 365 more rows

Little Tweak to #antoniosk code , just added max of week from the past data frame and got the continuous week numbers as desired.
library(dplyr)
library(lubridate)
Dates<-seq(as.Date('2017/01/01'), by = 'day', length.out = 365)
A <- data.frame(date=(Dates), month=month(Dates), week=week(Dates))
B <- A %>% dplyr::mutate(day = lubridate::wday(date, label = TRUE))
B[350:365,]
c<- B %>% rbind( # bind rows with the following dataset
data.frame(date = seq(max(B$date)+1, by = 'day', length.out = 10)) %>% # get 10 extra sequential dates after the last date in B
mutate(month = month(date), week = (as.numeric(strftime(date, format = "%U")) +max(A$week)),day = wday(date, label = TRUE)) ) %>% tbl_df()

Related

Finding the Weekday Average from numerical values in R

So I have values like
Mon 162 Tue 123 Wed 29
and so on. I need to find the average for all weekdays in R. I have tried filter and group_by but cannot get an answer.
Time Day Count Speed
1 00:00 Sun 169 60.2
2 00:00 Mon 71 58.5
3 00:00 Tue 70 57.2
4 00:00 Wed 68 58.5
5 00:00 Thu 91 58.8
6 00:00 Fri 94 58.7
7 00:00 Sat 135 58.5
8 01:00 Sun 111 60.0
9 01:00 Mon 45 59.2
10 01:00 Tue 50 57.6
I need the out come to be Weekday Average = ####
Let's say your df is
> df
# A tibble: 14 x 2
Day Count
<chr> <dbl>
1 Sun 31
2 Mon 51
3 Tue 21
4 Wed 61
5 Thu 31
6 Fri 51
7 Sat 65
8 Sun 31
9 Mon 13
10 Tue 61
11 Wed 72
12 Thu 46
13 Fri 62
14 Sat 13
You can use
df %>%
filter(!Day %in% c('Sun', 'Sat')) %>%
group_by(Day) %>%
summarize(mean(Count))
To get
# A tibble: 5 x 2
Day `mean(Count)`
<chr> <dbl>
1 Fri 56.5
2 Mon 32
3 Thu 38.5
4 Tue 41
5 Wed 66.5
For the average of all filtered values
df %>%
filter(!Day %in% c("Sun", "Sat")) %>%
summarize("Average of all Weekday counts" = mean(Count))
Output
# A tibble: 1 x 1
`Average of all Weekday counts`
<dbl>
1 46.9
To get a numeric value instead of a tibble
df %>%
filter(!Day %in% c("Sun", "Sat")) %>%
summarize("Average of all Weekday counts" = mean(Count)) %>%
as.numeric()
Output
[1] 46.9
This might do the trick
days <- c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")
d.f <- data.frame(Day = rep(days, 3), Speed = rnorm(21))
# split dataframe by days then take the mean over the speed
lapply(split(d.f, f=days), function(d) mean(d$Speed))
If you're looking for the single mean for just the weekdays, you could do something like this:
dat = data.frame(Time = rep(c("00:00","01:00"),c(7,3)),
Day = c("Sun","Mon","Tue","Wed","Thu","Fri","Sat","Sun","Mon","Tue"),
Count = c(169,71,70,68,91,94,135,111,45,50),
Speed = c(60.2,58.5,57.2,58.5,58.8,58.7,58.5,60.0,59.2,57.6))
mean(dat$Count[dat$Day %in% c("Mon","Tue","Wed","Thu","Fri")])
# [1] 69.85714
If, on the other hand, you're looking for the mean across each individual day then you could do this using base R:
aggregate(dat$Count, by=list(dat$Day), FUN = mean)
# Group.1 x
# 1 Fri 94
# 2 Mon 58
# 3 Sat 135
# 4 Sun 140
# 5 Thu 91
# 6 Tue 60
# 7 Wed 68
It looks like you've tried dplyr, so the syntax for that same operation in dplyr would be:
library(dplyr)
dat %>% group_by(Day) %>% summarize(mean_count = mean(Count))
# Day mean_count
# <chr> <dbl>
# 1 Fri 94
# 2 Mon 58
# 3 Sat 135
# 4 Sun 140
# 5 Thu 91
# 6 Tue 60
# 7 Wed 68
And if you want to do the same thing in data.table you would do this:
library(data.table)
as.data.table(dat)[,.(mean_count = mean(Count)), by = Day]
# Day mean_count
# 1: Sun 140
# 2: Mon 58
# 3: Tue 60
# 4: Wed 68
# 5: Thu 91
# 6: Fri 94
# 7: Sat 135

Dataframe to tidy format in R

I've this dataframe
x <- data.frame("date" = c("03-01-2005","04-01-2005","05-01-2005","06-01-2005"),
"pricemax.0" = c(50,20,25,56),
"pricemax.200" = c(25,67,89,30),
"pricemax.1000" = c(45,60,40,30),
"pricemax.1400" = c(60,57,32,44),
"pricemin.0" = c(22,15,23,43),
"pricemin.200" = c(21,40,59,21),
"pricemin.1000" = c(32,12,20,24),
"pricemin.1400" = c(30,20,14,20))
The numbers after the dot represents hours, e.g pricemax.200 would be 02:00. I need to gather the date and time information in one column of class POSIXct with the other two columns being pricemax and pricemin.
So, what I want is something like this:
And what I've done so far:
tidy_x <- x %>%
pivot_longer(
cols = contains("pricemax"),
names_to = c(NA,"hour"),
names_sep = "\\.",
values_to = "pricemax"
) %>%
pivot_longer(
cols = contains("pricemin"),
names_to = c(NA,"hour_2"),
names_sep = "\\.",
values_to = "pricemin"
)
I'm not sure how I can combine the date and time columns and keep the variables pricemin and pricemax organized.
Using dplyr and tidyr, you can do :
library(dplyr)
library(tidyr)
x %>%
pivot_longer(cols = -date,
names_to = c('.value', 'time'),
names_sep = '\\.') %>%
mutate(time = sprintf('%04s', time)) %>%
unite(datetime, date, time, sep = " ") %>%
mutate(datetime = lubridate::dmy_hm(datetime))
# A tibble: 16 x 3
# datetime pricemax pricemin
# <dttm> <dbl> <dbl>
# 1 2005-01-03 00:00:00 50 22
# 2 2005-01-03 02:00:00 25 21
# 3 2005-01-03 10:00:00 45 32
# 4 2005-01-03 14:00:00 60 30
# 5 2005-01-04 00:00:00 20 15
# 6 2005-01-04 02:00:00 67 40
# 7 2005-01-04 10:00:00 60 12
# 8 2005-01-04 14:00:00 57 20
# 9 2005-01-05 00:00:00 25 23
#10 2005-01-05 02:00:00 89 59
#11 2005-01-05 10:00:00 40 20
#12 2005-01-05 14:00:00 32 14
#13 2005-01-06 00:00:00 56 43
#14 2005-01-06 02:00:00 30 21
#15 2005-01-06 10:00:00 30 24
#16 2005-01-06 14:00:00 44 20
Get the data in long format with max and min in different column and hour information in different column. We make hour information consistent (of 4 digits) using sprintf and combine them into one column and convert it into datetime value.
Maybe you can try reshape like below to make a long data frame
y <- transform(
reshape(x, direction = "long", varying = -1),
date = strptime(paste(date, time / 100), "%d-%m-%Y %H")
)[c("date", "pricemax", "pricemin")]
y <- `row.names<-`(y[order(y$date),],NULL)
which gives
> y
date pricemax pricemin
1 2005-01-03 00:00:00 50 22
2 2005-01-03 02:00:00 25 21
3 2005-01-03 10:00:00 45 32
4 2005-01-03 14:00:00 60 30
5 2005-01-04 00:00:00 20 15
6 2005-01-04 02:00:00 67 40
7 2005-01-04 10:00:00 60 12
8 2005-01-04 14:00:00 57 20
9 2005-01-05 00:00:00 25 23
10 2005-01-05 02:00:00 89 59
11 2005-01-05 10:00:00 40 20
12 2005-01-05 14:00:00 32 14
13 2005-01-06 00:00:00 56 43
14 2005-01-06 02:00:00 30 21
15 2005-01-06 10:00:00 30 24
16 2005-01-06 14:00:00 44 20
Here is a data.table approach:
setDT(x)
DT <- melt.data.table(x, id.vars = "date")
DT[, c("var", "time") := tstrsplit(variable , ".", fixed=TRUE)
][, datetime := as.POSIXct(paste(date, as.integer(time) / 100), format = "%d-%m-%Y %H")
][, setdiff(names(DT), c("datetime", "var", "value")) := NULL]
DT <- dcast.data.table(DT, datetime ~ var, value.var = "value")
> DT
datetime pricemax pricemin
1: 2005-01-03 00:00:00 50 22
2: 2005-01-03 02:00:00 25 21
3: 2005-01-03 10:00:00 45 32
4: 2005-01-03 14:00:00 60 30
5: 2005-01-04 00:00:00 20 15
6: 2005-01-04 02:00:00 67 40
7: 2005-01-04 10:00:00 60 12
8: 2005-01-04 14:00:00 57 20
9: 2005-01-05 00:00:00 25 23
10: 2005-01-05 02:00:00 89 59
11: 2005-01-05 10:00:00 40 20
12: 2005-01-05 14:00:00 32 14
13: 2005-01-06 00:00:00 56 43
14: 2005-01-06 02:00:00 30 21
15: 2005-01-06 10:00:00 30 24
16: 2005-01-06 14:00:00 44 20

Find the 'RARE' user based on occurrence in two columns

This is what my data frame looks like :
its the data of a song portal(like itunes or raaga)
datf <- read.csv(text =
"albumid,date_transaction,listened_time_secs,userid,songid
6263,3/28/2017,59,3747,6263
3691,4/24/2017,53,2417,3691
2222,3/24/2017,34,2417,9856
1924,3/16/2017,19,8514,1924
6691,1/1/2017,50,2186,6691
5195,1/1/2017,64,2186,5195
2179,1/1/2017,37,2186,2179
6652,1/11/2017,33,1145,6652")
My aim is to pick out the rare user. A 'rare' user is the one which visits the portal not more than once in each calendar month.
for e.g : 2186 is not rare. 2417 is rare because it occurred only once in 2 diff months, so are 3747,1145 and 8514.
I've been trying something like this :
DuplicateUsers <- duplicated(songsdata[,2:4])
DuplicateUsers <- songsdata[DuplicateUsers,]
DistinctSongs <- songsdata %>%
distinct(sessionid, date_transaction, .keep_all = TRUE)
RareUsers <- anti_join(DistinctSongs, DuplicateUsers, by='sessionid')
but doesn't seem to work.
Using library(dplyr) you could do this:
# make a new monthid variable to group_by() with
songdata$month_id <- gsub("\\/.*", "", songdata$date_transaction)
RareUsers <- group_by(songdata, userid, month_id) %>%
filter(n() == 1)
RareUsers
# A tibble: 5 x 6
# Groups: userid, month_id [5]
albumid date_transaction listened_time_secs userid songid month_id
<int> <chr> <int> <int> <int> <chr>
1 6263 3/28/2017 59 3747 6263 3
2 3691 4/24/2017 53 2417 3691 4
3 2222 3/24/2017 34 2417 9856 3
4 1924 3/16/2017 19 8514 1924 3
5 6652 1/11/2017 33 1145 6652 1
You can try something like:
df %>%
mutate(mth = lubridate::month(mdy(date_transaction))) %>%
group_by(mth, userid) %>%
filter(n() == 1)
which gives:
albumid date_transaction listened_time_secs userid songid mth
<int> <fctr> <int> <int> <int> <dbl>
1 6263 3/28/2017 59 3747 6263 3
2 3691 4/24/2017 53 2417 3691 4
3 2222 3/24/2017 34 2417 9856 3
4 1924 3/16/2017 19 8514 1924 3
5 6652 1/11/2017 33 1145 6652 1
You can do it with base R:
# parse date and extract month
datf$date_transaction <- as.Date(datf$date_transaction, "%m/%d/%Y")
datf$month <- format(datf$date_transaction, "%m")
# find non-duplicated pairs of userid and month
aux <- datf[, c("userid", "month")]
RareUsers <- setdiff(aux, aux[duplicated(aux), ])
RareUsers
# userid month
# 1 3747 03
# 2 2417 04
# 3 2417 03
# 4 8514 03
# 5 1145 01
If you need the other columns:
merge(RareUsers, datf)
# userid month albumid date_transaction listened_time_secs songid
# 1 1145 01 6652 2017-01-11 33 6652
# 2 2417 03 2222 2017-03-24 34 9856
# 3 2417 04 3691 2017-04-24 53 3691
# 4 3747 03 6263 2017-03-28 59 6263
# 5 8514 03 1924 2017-03-16 19 1924

Convert integer as "20160119" to different columns of "day" "year" "month"

How can I convert a column of integers as dates:
DATE PRCP
1: 19490101 25
2: 19490102 5
3: 19490118 18
4: 19490119 386
5: 19490202 38
to a table like this:
days month years PRCP
We can use extract
library(tidyr)
extract(df, DATE, into=c('YEAR', 'MONTH', 'DAY'),
'(.{4})(.{2})(.{2})', remove=FALSE)
# DATE YEAR MONTH DAY PRCP
#1 19490101 1949 01 01 25
#2 19490102 1949 01 02 5
#3 19490118 1949 01 18 18
#4 19490119 1949 01 19 386
#5 19490202 1949 02 02 38
Here's another way using regular expressions:
df <- read.table(header=T, stringsAsFactors=F, text="
DATE PRCP
19490101 25
19490102 5
19490118 18
19490119 386
19490202 38")
dates <- as.character(df$DATE)
res <- t(sapply(regmatches(dates, regexec("(\\d{4})(\\d{2})(\\d{2})", dates)), "[", -1))
res <- structure(as.integer(res), .Dim=dim(res)) # make them integer values
cbind(df, setNames(as.data.frame(res), c("Y", "M", "D"))) # combine with original data frame
# DATE PRCP Y M D
# 1 19490101 25 1949 01 01
# 2 19490102 5 1949 01 02
# 3 19490118 18 1949 01 18
# 4 19490119 386 1949 01 19
# 5 19490202 38 1949 02 02
I would advise you to use the lubridate package:
require(lubridate)
df[, DATE := ymd(DATE)]
df[, c("Day", "Month", "Year") := list(day(DATE), month(DATE), year(DATE))]
df[, DATE := NULL]
Another option would be to use separate from the tidyr package:
library(tidyr)
separate(df, DATE, c('year','month','day'), sep = c(4,6), remove = FALSE)
which results in:
DATE year month day PRCP
1: 19490101 1949 01 01 25
2: 19490102 1949 01 02 5
3: 19490118 1949 01 18 18
4: 19490119 1949 01 19 386
5: 19490202 1949 02 02 38
Two options in base R:
1) with substr as said by #coffeinjunky in the comments:
df$year <- substr(df$DATE,1,4)
df$month <- substr(df$DATE,5,6)
df$day <- substr(df$DATE,7,8)
2) with as.Date and format:
df$DATE <- as.Date(as.character(df$DATE),'%Y%m%d')
df$year <- format(df$DATE, '%Y')
df$month <- format(df$DATE, '%m')
df$day <- format(df$DATE, '%d')
First I would convert the DATE column to Date type using as.Date(), then build the new data.frame using calls to format():
df <- data.frame(DATE=c(19490101,19490102,19490118,19490119,19490202),PRCP=c(25,5,18,386,38),stringsAsFactors=F);
df$DATE <- as.Date(as.character(df$DATE),'%Y%m%d');
data.frame(day=as.integer(format(df$DATE,'%d')),month=as.integer(format(df$DATE,'%m')),year=as.integer(format(df$DATE,'%Y')),PRCP=df$PRCP);
## day month year PRCP
## 1 1 1 1949 25
## 2 2 1 1949 5
## 3 18 1 1949 18
## 4 19 1 1949 386
## 5 2 2 1949 38

Aggregate daily level data to weekly level in R

I have a huge dataset similar to the following reproducible sample data.
Interval value
1 2012-06-10 552
2 2012-06-11 4850
3 2012-06-12 4642
4 2012-06-13 4132
5 2012-06-14 4190
6 2012-06-15 4186
7 2012-06-16 1139
8 2012-06-17 490
9 2012-06-18 5156
10 2012-06-19 4430
11 2012-06-20 4447
12 2012-06-21 4256
13 2012-06-22 3856
14 2012-06-23 1163
15 2012-06-24 564
16 2012-06-25 4866
17 2012-06-26 4421
18 2012-06-27 4206
19 2012-06-28 4272
20 2012-06-29 3993
21 2012-06-30 1211
22 2012-07-01 698
23 2012-07-02 5770
24 2012-07-03 5103
25 2012-07-04 775
26 2012-07-05 5140
27 2012-07-06 4868
28 2012-07-07 1225
29 2012-07-08 671
30 2012-07-09 5726
31 2012-07-10 5176
I want to aggregate this data to weekly level to get the output similar to the following:
Interval value
1 Week 2, June 2012 *aggregate value for day 10 to day 14 of June 2012*
2 Week 3, June 2012 *aggregate value for day 15 to day 21 of June 2012*
3 Week 4, June 2012 *aggregate value for day 22 to day 28 of June 2012*
4 Week 5, June 2012 *aggregate value for day 29 to day 30 of June 2012*
5 Week 1, July 2012 *aggregate value for day 1 to day 7 of July 2012*
6 Week 2, July 2012 *aggregate value for day 8 to day 10 of July 2012*
How do I achieve this easily without writing a long code?
If you mean the sum of of ‘value’ by week I think the easiest way to do it is to convert the data into a xts object as GSee suggested:
data <- as.xts(data$value,order.by=as.Date(data$interval))
weekly <- apply.weekly(data,sum)
[,1]
2012-06-10 552
2012-06-17 23629
2012-06-24 23872
2012-07-01 23667
2012-07-08 23552
2012-07-10 10902
I leave the formatting of the output as an exercise for you :-)
If you were to use week from lubridate, you would only get five weeks to pass to by. Assume dat is your data,
> library(lubridate)
> do.call(rbind, by(dat$value, week(dat$Interval), summary))
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 24 552 4146 4188 3759 4529 4850
# 25 490 2498 4256 3396 4438 5156
# 26 564 2578 4206 3355 4346 4866
# 27 698 993 4868 3366 5122 5770
# 28 671 1086 3200 3200 5314 5726
This shows a summary for the 24th through 28th week of the year. Similarly, we can get the means with aggregate with
> aggregate(value~week(Interval), data = dat, mean)
# week(Interval) value
# 1 24 3758.667
# 2 25 3396.286
# 3 26 3355.000
# 4 27 3366.429
# 5 28 3199.500
I just came across this old question because it was used as a dupe target.
Unfortunately, all the upvoted answers (except the one by konvas and a now deleted one) present solutions for aggregating the data by week of the year while the OP has requested to aggregate by week of the month.
The definition of week of the year and week of the month is ambiguous as discussed here, here, and here.
However, the OP has indicated that he wants to count the days 1 to 7 of each month as week 1 of the month, days 8 to 14 as week 2 of the month, etc. Note that week 5 is a stub for most of the months consisting of only 2 or 3 days (except for the month of February if no leap year).
Having prepared the ground, here is a data.table solution for this kind of aggregation:
library(data.table)
DT[, .(value = sum(value)),
by = .(Interval = sprintf("Week %i, %s",
(mday(Interval) - 1L) %/% 7L + 1L,
format(Interval, "%b %Y")))]
Interval value
1: Week 2, Jun 2012 18366
2: Week 3, Jun 2012 24104
3: Week 4, Jun 2012 23348
4: Week 5, Jun 2012 5204
5: Week 1, Jul 2012 23579
6: Week 2, Jul 2012 11573
We can verify that we have picked the correct intervals by
DT[, .(value = sum(value),
date_range = toString(range(Interval))),
by = .(Week = sprintf("Week %i, %s",
(mday(Interval) -1L) %/% 7L + 1L,
format(Interval, "%b %Y")))]
Week value date_range
1: Week 2, Jun 2012 18366 2012-06-10, 2012-06-14
2: Week 3, Jun 2012 24104 2012-06-15, 2012-06-21
3: Week 4, Jun 2012 23348 2012-06-22, 2012-06-28
4: Week 5, Jun 2012 5204 2012-06-29, 2012-06-30
5: Week 1, Jul 2012 23579 2012-07-01, 2012-07-07
6: Week 2, Jul 2012 11573 2012-07-08, 2012-07-10
which is in line with OP's specification.
Data
library(data.table)
DT <- fread(
"rn Interval value
1 2012-06-10 552
2 2012-06-11 4850
3 2012-06-12 4642
4 2012-06-13 4132
5 2012-06-14 4190
6 2012-06-15 4186
7 2012-06-16 1139
8 2012-06-17 490
9 2012-06-18 5156
10 2012-06-19 4430
11 2012-06-20 4447
12 2012-06-21 4256
13 2012-06-22 3856
14 2012-06-23 1163
15 2012-06-24 564
16 2012-06-25 4866
17 2012-06-26 4421
18 2012-06-27 4206
19 2012-06-28 4272
20 2012-06-29 3993
21 2012-06-30 1211
22 2012-07-01 698
23 2012-07-02 5770
24 2012-07-03 5103
25 2012-07-04 775
26 2012-07-05 5140
27 2012-07-06 4868
28 2012-07-07 1225
29 2012-07-08 671
30 2012-07-09 5726
31 2012-07-10 5176", drop = 1L)
DT[, Interval := as.Date(Interval)]
If you are using a data frame, you can easily do this with the tidyquant package. Use the tq_transmute function, which applies a mutation and returns a new data frame. Select the "value" column and apply the xts function apply.weekly. The additional argument FUN = sum will get the aggregate by week.
library(tidyquant)
df
#> # A tibble: 31 x 2
#> Interval value
#> <date> <int>
#> 1 2012-06-10 552
#> 2 2012-06-11 4850
#> 3 2012-06-12 4642
#> 4 2012-06-13 4132
#> 5 2012-06-14 4190
#> 6 2012-06-15 4186
#> 7 2012-06-16 1139
#> 8 2012-06-17 490
#> 9 2012-06-18 5156
#> 10 2012-06-19 4430
#> # ... with 21 more rows
df %>%
tq_transmute(select = value,
mutate_fun = apply.weekly,
FUN = sum)
#> # A tibble: 6 x 2
#> Interval value
#> <date> <int>
#> 1 2012-06-10 552
#> 2 2012-06-17 23629
#> 3 2012-06-24 23872
#> 4 2012-07-01 23667
#> 5 2012-07-08 23552
#> 6 2012-07-10 10902
When you say "aggregate" the values, you mean take their sum? Let's say your data frame is d and assuming d$Interval is of class Date, you can try
# if d$Interval is not of class Date d$Interval <- as.Date(d$Interval)
formatdate <- function(date)
paste0("Week ", (as.numeric(format(date, "%d")) - 1) + 1,
", ", format(date, "%b %Y"))
# change "sum" to your required function
aggregate(d$value, by = list(formatdate(d$Interval)), sum)
# Group.1 x
# 1 Week 1, Jul 2012 3725.667
# 2 Week 2, Jul 2012 3199.500
# 3 Week 2, Jun 2012 3544.000
# 4 Week 3, Jun 2012 3434.000
# 5 Week 4, Jun 2012 3333.143
# 6 Week 5, Jun 2012 3158.667

Resources