creating special subgroup column in r - r

I have a large dataset with 516 rows (partial dataset below),
Check_In
Ward_1
Elapsed_time
2019-01-01 00:05:18
2019-01-01 00:09:32
4.2333333 mins
2019-01-01 00:11:3
2019-01-01 00:25:04
13.4500000 mins
2019-01-01 00:21:33
2019-01-01 01:03:31
41.9666667 mins
2019-01-01 00:27:18
2019-01-01 01:15:36
48.3000000 mins
2019-01-01 01:44:07
2019-01-01 02:02:45
18.6333333 mins
2019-01-01 02:10:46
2019-01-01 02:26:18
15.5333333 mins
I would like to create a subgroup number column of 3 rows per subgroup (example below) so i can then use the qcc.groups function using the Elapsed_time and subgroup columns
Check_In
Ward_1
Elapsed_time
subgroup
2019-01-01 00:05:18
2019-01-01 00:09:32
4.2333333 mins
1
2019-01-01 00:11:3
2019-01-01 00:25:04
13.4500000 mins
1
2019-01-01 00:21:33
2019-01-01 01:03:31
41.9666667 mins
1
2019-01-01 00:27:18
2019-01-01 01:15:36
48.3000000 mins
2
2019-01-01 01:44:07
2019-01-01 02:02:45
18.6333333 mins
2
2019-01-01 02:10:46
2019-01-01 02:26:18
15.5333333 mins
2

Another base R option
df$subgroup <- ceiling(seq(nrow(df)) / 3)

We can use gl from base R to create the group by specifying the n as number of rows (nrow(df1)) of the dataset and k = 3
df1$subgroup <- as.integer(gl(nrow(df1), 3, nrow(df1)))
data
df1 <- structure(list(Check_In = c("2019-01-01 00:05:18", "2019-01-01 00:11:3",
"2019-01-01 00:21:33", "2019-01-01 00:27:18", "2019-01-01 01:44:07",
"2019-01-01 02:10:46"), Ward_1 = c("2019-01-01 00:09:32", "2019-01-01 00:25:04",
"2019-01-01 01:03:31", "2019-01-01 01:15:36", "2019-01-01 02:02:45",
"2019-01-01 02:26:18"), Elapsed_time = c("4.2333333 mins", "13.4500000 mins",
"41.9666667 mins", "48.3000000 mins", "18.6333333 mins", "15.5333333 mins"
)), class = "data.frame", row.names = c(NA, -6L))

Or simply
df1 %>% mutate(grp = (row_number() +2) %/% 3)
Check_In Ward_1 Elapsed_time grp
1 2019-01-01 00:05:18 2019-01-01 00:09:32 4.2333333 mins 1
2 2019-01-01 00:11:3 2019-01-01 00:25:04 13.4500000 mins 1
3 2019-01-01 00:21:33 2019-01-01 01:03:31 41.9666667 mins 1
4 2019-01-01 00:27:18 2019-01-01 01:15:36 48.3000000 mins 2
5 2019-01-01 01:44:07 2019-01-01 02:02:45 18.6333333 mins 2
6 2019-01-01 02:10:46 2019-01-01 02:26:18 15.5333333 mins 2
df1 dput courtesy beloved #akrun

Or maybe: Thanks to akrun for the data.
library(dplyr)
df1 %>%
mutate(subgroup = rep(row_number(), each=3, length.out = n()))
Output:
Check_In Ward_1 Elapsed_time subgroup
1 2019-01-01 00:05:18 2019-01-01 00:09:32 4.2333333 mins 1
2 2019-01-01 00:11:3 2019-01-01 00:25:04 13.4500000 mins 1
3 2019-01-01 00:21:33 2019-01-01 01:03:31 41.9666667 mins 1
4 2019-01-01 00:27:18 2019-01-01 01:15:36 48.3000000 mins 2
5 2019-01-01 01:44:07 2019-01-01 02:02:45 18.6333333 mins 2
6 2019-01-01 02:10:46 2019-01-01 02:26:18 15.5333333 mins 2

Related

Count number of rows that are not NA [duplicate]

This question already has answers here:
Count number of non-NA values by group
(3 answers)
Count non-NA values by group [duplicate]
(3 answers)
Closed 1 year ago.
So I have a data frame that looks like this:
"date","id_station","id_parameter","valor","unit","year","day","month","hour","zona"
2019-01-01 00:00:00,"AJM","CO",NA,15,2019,1,1,0,"SO"
2019-01-01 00:00:00,"ATI","CO",NA,15,2019,1,1,0,"NO"
2019-01-01 00:00:00,"BJU","CO",NA,15,2019,1,1,0,"CE"
2019-01-01 00:00:00,"CAM","CO",NA,15,2019,1,1,0,"NO"
2019-01-01 00:00:00,"CCA","CO",NA,15,2019,1,1,0,"SO"
2019-01-01 00:00:00,"CHO","CO",NA,15,2019,1,1,0,"SE"
2019-01-01 00:00:00,"CUA","CO",NA,15,2019,1,1,0,"SO"
2019-01-01 00:00:00,"FAC","CO",NA,15,2019,1,1,0,"NO"
2019-01-01 00:00:00,"HGM","CO",NA,15,2019,1,1,0,"CE"
2019-01-01 00:00:00,"IZT","CO",NA,15,2019,1,1,0,"CE"
2019-01-01 00:00:00,"LLA","CO",NA,15,2019,1,1,0,"NE"
2019-01-01 00:00:00,"LPR","CO",NA,15,2019,1,1,0,"NE"
2019-01-01 00:00:00,"MER","CO",NA,15,2019,1,1,0,"CE"
2019-01-01 00:00:00,"MGH","CO",NA,15,2019,1,1,0,"SO"
2019-01-01 00:00:00,"NEZ","CO",NA,15,2019,1,1,0,"NE"
2019-01-01 00:00:00,"PED","CO",NA,15,2019,1,1,0,"SO"
2019-01-01 00:00:00,"SAG","CO",NA,15,2019,1,1,0,"NE"
2019-01-01 00:00:00,"SFE","CO",NA,15,2019,1,1,0,"SO"
2019-01-01 00:00:00,"SJA","CO",NA,15,2019,1,1,0,"NO"
2019-01-01 00:00:00,"TAH","CO",NA,15,2019,1,1,0,"SE"
2019-01-01 00:00:00,"TLA","CO",NA,15,2019,1,1,0,"NO"
2019-01-01 00:00:00,"TLI","CO",NA,15,2019,1,1,0,"NO"
2019-01-01 00:00:00,"UAX","CO",NA,15,2019,1,1,0,"SE"
2019-01-01 00:00:00,"UIZ","CO",NA,15,2019,1,1,0,"SE"
2019-01-01 00:00:00,"VIF","CO",NA,15,2019,1,1,0,"NE"
2019-01-01 00:00:00,"XAL","CO",NA,15,2019,1,1,0,"NE"
2019-01-01 01:00:00,"AJM","CO",NA,15,2019,1,1,1,"SO"
2019-01-01 01:00:00,"ATI","CO",NA,15,2019,1,1,1,"NO"
2019-01-01 01:00:00,"BJU","CO",NA,15,2019,1,1,1,"CE"
2019-01-01 01:00:00,"CAM","CO",NA,15,2019,1,1,1,"NO"
2019-01-01 01:00:00,"CCA","CO",NA,15,2019,1,1,1,"SO"
2019-01-01 01:00:00,"CHO","CO",NA,15,2019,1,1,1,"SE"
2019-01-01 01:00:00,"CUA","CO",NA,15,2019,1,1,1,"SO"
2019-01-01 01:00:00,"FAC","CO",NA,15,2019,1,1,1,"NO"
2019-01-01 01:00:00,"HGM","CO",NA,15,2019,1,1,1,"CE"
2019-01-01 01:00:00,"IZT","CO",NA,15,2019,1,1,1,"CE"
2019-01-01 01:00:00,"LLA","CO",NA,15,2019,1,1,1,"NE"
2019-01-01 01:00:00,"LPR","CO",NA,15,2019,1,1,1,"NE"
2019-01-01 01:00:00,"MER","CO",NA,15,2019,1,1,1,"CE"
2019-01-01 01:00:00,"MGH","CO",NA,15,2019,1,1,1,"SO"
2019-01-01 01:00:00,"NEZ","CO",NA,15,2019,1,1,1,"NE"
2019-01-01 01:00:00,"PED","CO",NA,15,2019,1,1,1,"SO"
2019-01-01 01:00:00,"SAG","CO",NA,15,2019,1,1,1,"NE"
2019-01-01 01:00:00,"SFE","CO",NA,15,2019,1,1,1,"SO"
2019-01-01 01:00:00,"SJA","CO",NA,15,2019,1,1,1,"NO"
2019-01-01 01:00:00,"TAH","CO",NA,15,2019,1,1,1,"SE"
2019-01-01 01:00:00,"TLA","CO",NA,15,2019,1,1,1,"NO"
2019-01-01 01:00:00,"TLI","CO",NA,15,2019,1,1,1,"NO"
2019-01-01 01:00:00,"UAX","CO",NA,15,2019,1,1,1,"SE"
2019-01-01 01:00:00,"UIZ","CO",NA,15,2019,1,1,1,"SE"
2019-01-01 01:00:00,"VIF","CO",NA,15,2019,1,1,1,"NE"
2019-01-01 01:00:00,"XAL","CO",NA,15,2019,1,1,1,"NE"
And what I want to do is to group all based on id_station, id_parameter, year, day, and month. Afterwards, I want to count the number of rows that are not NA in "valor" for each day.
Finally, I want to determine how many days had at least 18 non-NA values for each day of each id_station. If there are less than 274 days, I want to delete ALL values associated to that id_station
How can I do this?
Another possible option might be
aggregate(
cbind(Count = !is.na(valor)) ~id_station + id_parameter + year + day + month,
df,
sum
)
After grouping by the columns of interest, get the sum of logical vector as the count i.e. - is.na(valor) returns a logical vector with TRUE where there are NA and FALSE for non-NA, negate (!) to reverse it and get the sum of the logical such as each TRUE (-> 1) represents one non-NA element
library(dplyr)
df1 %>%
group_by(id_station, id_parameter, year, day, month) %>%
summarise(Count = sum(!is.na(valor)))

R: Moving average length of time between two dates

I have a dataset of observations with start and end dates. I would like to calculate the moving average difference between the start and end dates.
I've included an example dataset below.
require(dplyr)
df <- data.frame(id=c(1,2,3),
start=c("2019-01-01","2019-01-10", "2019-01-05"),
end=c("2019-02-01", "2019-01-15", "2019-01-10"))
df[,c("start", "end")] <- lapply(df[,c("start", "end")], as.Date)
id start end
1 2019-01-01 2019-02-01
2 2019-01-10 2019-01-15
3 2019-01-05 2019-01-10
The overall date ranges are 2019-01-01 to 2019-02-01. I would like to calculate the average difference between the start and end dates for each of the dates in that range.
The result would look exactly like this. I've included the actual values for the averages that should show up:
date avg
2019-01-01 0
2019-01-02 1
2019-01-03 2
2019-01-04 3
2019-01-05 4
2019-01-06 3
2019-01-07 4
2019-01-08 5
2019-01-09 6
2019-01-10 7
2019-01-11 5.5
. .
. .
. .
Creating a reproducible example:
df <- data.frame(id=c(1,2,3,4),
start=c("2019-01-01","2019-01-01", "2019-01-10", "2019-01-05"),
end=c("2019-01-04", "2019-01-05", "2019-01-12", "2019-01-08"))
df[,c("start", "end")] <- lapply(df[,c("start", "end")], as.Date)
df
Returns:
id start end
1 2019-01-01 2019-01-04
2 2019-01-01 2019-01-05
3 2019-01-10 2019-01-12
4 2019-01-05 2019-01-08
Then using the group_by function from dplyr:
library(dplyr)
df %>%
group_by(start) %>%
summarize(avg=mean(end - start)) %>%
rename(date=start)
Returns:
date avg
<time> <time>
2019-01-01 3.5 days
2019-01-05 3.0 days
2019-01-10 2.0 days
Editing the answer as per comments.
Creating the df:
require(dplyr)
df <- data.frame(id=c(1,2,3),
start=c("2019-01-01", "2019-01-10", "2019-01-05"),
end=c("2019-02-01", "2019-01-15", "2019-01-10"))
df[,c("start", "end")] <- lapply(df[,c("start", "end")], as.Date)
Create dates for every start-end combination:
#gives the list of all dates within start and end frames and calculates difference
datesList = lapply(1:nrow(df),function(i){
dat = data_frame('date'=seq.Date(from=df$start[i],to=df$end[i],by=1),
'start'=df$start[i]) %>%
dplyr::mutate(diff=date-start)
})
Finally, group_by the date and find avg to give output exactly as the one in the question:
finalDf = bind_rows(datesList) %>%
dplyr::filter(diff != 0) %>%
dplyr::group_by(date) %>%
dplyr::summarise(avg=mean(diff,na.rm=T))
The output thus becomes:
# A tibble: 31 x 2
date avg
<date> <time>
1 2019-01-02 1.0 days
2 2019-01-03 2.0 days
3 2019-01-04 3.0 days
4 2019-01-05 4.0 days
5 2019-01-06 3.0 days
6 2019-01-07 4.0 days
7 2019-01-08 5.0 days
8 2019-01-09 6.0 days
9 2019-01-10 7.0 days
10 2019-01-11 5.5 days
# … with 21 more rows
Let me know if it works.

Can I aggregate time series data between an on and off date using a data table join or the aggregate function?

I would like to efficiently summarize continuous meteorological data over the periods that discrete samples are being collected.
I currently do this with a time-consuming loop, but I imagine a better solution exists. I'm new to data.table syntax, but it seems like there should be a solution with joining.
continuous <- data.frame(Time = seq(as.POSIXct("2019-01-01 0:00:00"),
as.POSIXct("2019-01-01 9:00:00"),"hour"),
CO2 = sample(400:450,10),
Temp = sample(10:30,10))
> continuous
Time CO2 Temp
1 2019-01-01 00:00:00 430 11
2 2019-01-01 01:00:00 412 26
3 2019-01-01 02:00:00 427 17
4 2019-01-01 03:00:00 435 29
5 2019-01-01 04:00:00 447 23
6 2019-01-01 05:00:00 417 19
7 2019-01-01 06:00:00 408 12
8 2019-01-01 07:00:00 449 28
9 2019-01-01 08:00:00 445 20
10 2019-01-01 09:00:00 420 27
discrete <- data.frame(on = c(as.POSIXct("2019-01-01 0:00:00"),
as.POSIXct("2019-01-01 3:00:00")),
off = c(as.POSIXct("2019-01-01 3:00:00"),
as.POSIXct("2019-01-01 8:00:00")))
> discrete
on off
1 2019-01-01 00:00:00 2019-01-01 03:00:00
2 2019-01-01 03:00:00 2019-01-01 08:00:00
discrete[, c("CO2.mean","Temp.mean")] <-
lapply(seq(length(c("CO2","Temp"))), function(k)
unlist(lapply(seq(length(discrete[, 1])), function(i)
mean(continuous[
which.closest(continuous$Time,discrete$on[i]):
which.closest(continuous$Time, discrete$off[i]),
c("CO2","Temp")[k]]))))
> discrete
on off CO2.mean Temp.mean
1 2019-01-01 00:00:00 2019-01-01 03:00:00 426.0 20.75000
2 2019-01-01 03:00:00 2019-01-01 08:00:00 433.5 21.83333
This works, but when aggregating tens of continuous variables into hundreds of sampling periods, it takes a very long time to run. Thank you for your help!
An option would be a nonequi join in data.table
library(data.table)
setDT(continuous)[discrete, .(CO2mean = mean(CO2),
Tempmean = mean(Temp)),on = .(Time >= on, Time <= off), by = .EACHI]
or with a rolling join
setDT(continuous)[discrete, .(CO2mean = mean(CO2),
Tempmean = mean(Temp)),on = .(Time = on, Time = off),
by = .EACHI, roll = 'nearest']

Grouping rows to conversations and add conversation number

I have a file which has messages between customers and agents but these message are not grouped by conversations i.e. there is unique conversation id. Luckily the original message is included in each following reply to that message. The message is in the 'text' column. This can be easily explained by below example
actionDateTime text response postTime
2019-01-01 12:00 Hi N/A 2019-01-01 12:00
2019-01-01 12:01 Hi Hello! 2019-01-01 12:00
2019-01-01 12:02 Hi How can I help? 2019-01-01 12:00
.
.
.
2019-01-02 12:00 Hi there N/A 2019-01-01 12:00
2019-01-02 12:01 Hi there Morning 2019-01-01 12:00
2019-01-02 12:02 Hi there How can I help? 2019-01-01 12:00
So I tried the code below to group but this isn't working.
df %>%
group_by(text, postTime) %>%
mutate(convID = row_number()) %>%
ungroup()
This does output a file with convID but not the way I want. In fact, I don't understand how's it numbering. I believe that's because I'm using two variables in group_by. However, using only one will not work as two different people can message at the same time or two different messages can look similar (e.g. a lot of people can start with just 'Hi').
When I tried only group 'text' it still gives me numbers within a conversation rather than a unique ID. Again, explained below
What I get
text response postTime convID
Hi N/A 2019-01-01 12:00 1
Hi Hello! 2019-01-01 12:00 2
Hi How can I help? 2019-01-01 12:00 3
.
.
.
Hi there N/A 2019-01-01 12:00 1
Hi there Morning 2019-01-01 12:00 2
Hi there How can I help? 2019-01-01 12:00 3
What I want:
text response postTime convID
Hi N/A 2019-01-01 12:00 1
Hi Hello! 2019-01-01 12:00 1
Hi How can I help? 2019-01-01 12:00 1
.
.
.
Hi there N/A 2019-01-01 12:00 2
Hi there Morning 2019-01-01 12:00 2
Hi there How can I help? 2019-01-01 12:00 2
Any help?
We may need group_indices
library(dplyr)
df %>%
mutate(convID = group_indices(., text, postTime))
# actionDateTime text response postTime convID
#1 2019-01-01 12:00 Hi N/A 2019-01-01 12:00 1
#2 2019-01-01 12:01 Hi Hello! 2019-01-01 12:00 1
#3 2019-01-01 12:02 Hi How can I help? 2019-01-01 12:00 1
#4 2019-01-02 12:00 Hi there N/A 2019-01-01 12:00 2
#5 2019-01-02 12:01 Hi there Morning 2019-01-01 12:00 2
#6 2019-01-02 12:02 Hi there How can I help? 2019-01-01 12:00 2
data
df <- structure(list(actionDateTime = c("2019-01-01 12:00", "2019-01-01 12:01",
"2019-01-01 12:02", "2019-01-02 12:00", "2019-01-02 12:01", "2019-01-02 12:02"
), text = c("Hi", "Hi", "Hi", "Hi there", "Hi there", "Hi there"
), response = c("N/A", "Hello!", "How can I help?", "N/A", "Morning",
"How can I help?"), postTime = c("2019-01-01 12:00", "2019-01-01 12:00",
"2019-01-01 12:00", "2019-01-01 12:00", "2019-01-01 12:00", "2019-01-01 12:00"
)), class = "data.frame", row.names = c(NA, -6L))

Convert start time and total duration to elapsed time per hour

I have data on start time ('startTime', a date-time variable, POSIXct) and duration in minutes ('duration_minutes'):
df <- data.frame(id = c(1, 2, 3),
startTime = as.POSIXct(c("2018-01-01 12:15:31",
"2018-01-02 23:43:00",
"2018-01-03 11:00:11")),
duration_minutes = c(315, 120, 45))
I want to convert the start time and duration to elapsed time per hour, for each hour, from the hour of the start time to the last hour at the end of the duration:
df_result <- data.frame(id = c(1, 1, 1, 1, 1, 1, 2, 2, 2, 3),
startTime = c("2018-01-01 12:15:31","2018-01-01 13:00:00",
"2018-01-01 14:00:00","2018-01-01 15:00:00",
"2018-01-01 16:00:00","2018-01-01 17:00:00",
"2018-01-02 23:43:00","2018-01-03 00:00:00",
"2018-01-03 01:00:00",
"2018-01-03 11:00:11"),
duration_minutes = c(44.48, 60, 60, 60, 60, 30.5, 17, 60, 43, 45))
Please, advice with the possible solution.
Another possibility:
library(data.table)
library(lubridate)
setDT(df)
df[ , ceil_start := ceiling_date(start, "hour", change_on_boundary = TRUE)]
df[ , {
if(difftime(ceil_start, start, units = "min") > dur) {
.SD[ , .(start, dur)]
} else {
end <- start + dur * 60
time <- c(start,
seq(from = ceil_start,
to = floor_date(end, "hour"),
by = "hour"),
end)
.(start = head(time, -1), dur = `units<-`(diff(time), "mins"))
}
},
by = id]
# id start dur
# 1: 1 2018-01-01 12:15:31 44.48333 mins
# 2: 1 2018-01-01 13:00:00 60.00000 mins
# 3: 1 2018-01-01 14:00:00 60.00000 mins
# 4: 1 2018-01-01 15:00:00 60.00000 mins
# 5: 1 2018-01-01 16:00:00 60.00000 mins
# 6: 1 2018-01-01 17:00:00 30.51667 mins
# 7: 2 2018-01-02 23:43:00 17.00000 mins
# 8: 2 2018-01-03 00:00:00 60.00000 mins
# 9: 2 2018-01-03 01:00:00 43.00000 mins
# 10: 3 2018-01-03 11:00:11 45.00000 mins
# 11: 4 2018-01-03 11:35:00 25.00000 mins
# 12: 4 2018-01-03 12:00:00 10.00000 mins
# 13: 5 2018-01-03 00:00:00 60.00000 mins
# 14: 5 2018-01-03 01:00:00 0.00000 mins
Explanation
Convert data.frame to data.table (setDT). Round up start times to nearest hour (ceiling_date(start, "hour", ...). Use change_on_boundary = TRUE for easier handling of times without minutes and seconds (not in the data, but tested).
To handle cases when the end time (start + duration) is in the same hour as the start time (e.g. id = 3), check if difference between rounded time and start time is larger than duration (if(difftime(ceil_start, start, units = "min") > dur))). If so, just select the start and duration columns (.SD[ , .(start, dur)).
For other cases (else), calculate end time: end <- start + dur * 60. Create a sequence from the up-rounded start time ('ceil_start'), to the down-rounded end time, with an hourly increment (seq(from = ceil_start, to = floor_date(end, "hour"), by = "hour")). Concatenate with 'start' and 'end' times. Return all times except the last (head(time, -1) and calculate difference between time steps in minutes (`units<-`(diff(time), "mins")).
For times with H:M:S = 00:00:00 and duration is a multiple of 60 min, like id = 5, the current solution gives a row with a duration of 0 minutes for the last hour. While waiting for a more elegant solution, a quick and dirty way is just to delete such rows with duration = 0.
Data
Please note that I have added a case not included in original data, id = 4 (see also my comment above) and id = 5.
df <- data.frame(id = 1:5,
start = as.POSIXct(c("2018-01-01 12:15:31",
"2018-01-02 23:43:00",
"2018-01-03 11:00:11",
"2018-01-03 11:35:00",
"2018-01-03 00:00:00")),
dur = c(315, 120, 45, 35, 60))
Try this:
library(data.table)
library(lubridate)
library(magrittr)
df <-
setDT(df)[, start_ceiling := ceiling_date(startTime, "hour", change_on_boundary = TRUE)] %>%
.[, `:=` (
reps = ifelse(
startTime + (duration_minutes * 60) <= start_ceiling, 1, pmax(2, floor(duration_minutes / 60) + 1)
),
initial_diff = as.numeric(difftime(start_ceiling[1], startTime[1], units = "mins"))
), by = id] %>%
.[, df[df[, rep(.I, reps)]]] %>%
.[, startTime := pmax(startTime, floor_date(startTime, "hour") + hours(0:(.N - 1))), by = id] %>%
.[reps > 1, duration_minutes := c(initial_diff[.N],
rep(60, reps[.N] - 2),
(duration_minutes[.N] - initial_diff[.N]) %% 60), by = id] %>%
.[!(duration_minutes == 0 & reps > 1), ] %>%
.[, c("reps", "start_ceiling", "initial_diff") := NULL]
I've tested this with all the scenarios we've gathered so far, and this is the output:
id startTime duration_minutes
1: 1 2018-01-01 12:15:31 44.48333
2: 1 2018-01-01 13:00:00 60.00000
3: 1 2018-01-01 14:00:00 60.00000
4: 1 2018-01-01 15:00:00 60.00000
5: 1 2018-01-01 16:00:00 60.00000
6: 1 2018-01-01 17:00:00 30.51667
7: 2 2018-01-02 23:43:00 17.00000
8: 2 2018-01-03 00:00:00 60.00000
9: 2 2018-01-03 01:00:00 43.00000
10: 3 2018-01-03 11:00:11 45.00000
11: 4 2018-01-04 10:00:00 60.00000
12: 4 2018-01-04 11:00:00 5.00000
13: 5 2018-01-05 00:00:00 60.00000
14: 6 2018-01-06 11:35:00 25.00000
15: 6 2018-01-06 12:00:00 10.00000
16: 7 2018-01-07 00:00:00 60.00000
17: 7 2018-01-07 01:00:00 60.00000
Data used:
df <- data.frame(
id = c(1, 2, 3, 4, 5, 6, 7),
startTime = as.POSIXct(
c(
"2018-01-01 12:15:31",
"2018-01-02 23:43:00",
"2018-01-03 11:00:11",
"2018-01-04 10:00:00",
"2018-01-05 00:00:00",
"2018-01-06 11:35:00",
"2018-01-07 00:00:00"
)
),
duration_minutes = c(315, 120, 45, 65, 60, 35, 120)
)
df
id startTime duration_minutes
1 1 2018-01-01 12:15:31 315
2 2 2018-01-02 23:43:00 120
3 3 2018-01-03 11:00:11 45
4 4 2018-01-04 10:00:00 65
5 5 2018-01-05 00:00:00 60
6 6 2018-01-06 11:35:00 35
7 7 2018-01-07 00:00:00 120

Resources