I have dataset where length of stay of booking going in two or three month i want to create a row for every such bookings where revenue will be divided for every month and remaining information about the booking will remain same. if a booking length is in same month then it will show that as it is.
structure(list(channel = c("109", "109", "Agent"), room_stay_status = c("ENQUIRY",
"ENQUIRY", "CHECKED_OUT"), start_date = structure(c(1637971200,
1640995200, 1640995200), tzone = "UTC", class = c("POSIXct",
"POSIXt")), end_date = structure(c(1643155200, 1642636800, 1641168000
), tzone = "UTC", class = c("POSIXct", "POSIXt")), los = c(60,
19, 2), booker = c("Anuj", "Anuj", "Anuj"), area = c("Goa", "Goa",
"Goa"), property_sku = c("Amna-3b", "Amna-3b", "Amna-3b"), Revenue = c(90223.666,
5979, 7015.9), Booking_ref = c("aed97", "b497h9", "bde65")), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"))
output should look like this
structure(list(channel = c("109", "109", "109", "109", "Agent"
), room_stay_status = c("ENQUIRY", "ENQUIRY", "ENQUIRY", "ENQUIRY",
"CHECKED_OUT"), start_date = structure(c(1637971200, 1638316800,
1640995200, 1640995200, 1640995200), tzone = "UTC", class = c("POSIXct",
"POSIXt")), end_date = structure(c(1638230400, 1640908800, 1643155200,
1642636800, 1641168000), tzone = "UTC", class = c("POSIXct",
"POSIXt")), los = c(4, 31, 25, 19, 2), booker = c("Anuj", "Anuj",
"Anuj", "Anuj", "Anuj"), area = c("Goa", "Goa", "Goa", "Goa",
"Goa"), property_sku = c("Amna-3b", "Amna-3b", "Amna-3b", "Amna-3b",
"Amna-3b"), Revenue = c(6014.91106666667, 46615.5607666667, 37593.1941666667,
5979, 7015.9), Booking_ref = c("aed97", "aed97", "aed97", "b497h9",
"bde65")), row.names = c(NA, -5L), class = c("tbl_df", "tbl",
"data.frame"))
Many thanks in advance.
An quick attempt here (assuming your data is named df_in and df_out) which seems to do the trick:
library("dplyr")
library("tidyr")
library("lubridate")
# Function for creating a vector from start (st) to end (nd) with intermediate
# months inside
cut_months <- function(st, nd) {
repeat {
# Grow vector, keep adding next month
next_month <- ceiling_date(tail(st, 1) + seconds(1), "month")
if (next_month > nd) {
st <- append(st, nd)
break
} else {
st <- append(st, next_month)
}
}
return(st)
}
# Let's try it
print(cut_months(df_in$start_date[1], df_in$end_date[2]))
# [1] "2021-11-27 01:00:00 CET" "2021-12-01 01:00:00 CET" "2022-01-01 00:00:00 CET" "2022-01-20 01:00:00 CET"
# Function for expanding months:
expand_months <- function(df) {
expand_rows <-
df %>%
# Expand months and unnest list-column
mutate(key_dates = mapply(cut_months, start_date, end_date)) %>%
select(-start_date, -end_date) %>%
unnest(key_dates) %>%
# Compute needed values
group_by(Booking_ref) %>%
arrange(Booking_ref, key_dates) %>%
mutate(
start_date = key_dates,
end_date = lead(key_dates),
los = as.numeric(as.duration(start_date %--% end_date), "days"), # Ceiling this?
Revenue = Revenue * los / sum(los, na.rm = TRUE)
) %>%
arrange(Booking_ref, start_date) %>%
# Clean-up
filter(!is.na(end_date)) %>%
select(-key_dates)
expand_rows
}
# Print results and compare:
expand_months(df_in)
## A tibble: 5 x 10
## Groups: Booking_ref [3]
#channel room_stay_status los booker area property_~1 Revenue Booki~2 start_date end_date
#<chr> <chr> <dbl> <chr> <chr> <chr> <dbl> <chr> <dttm> <dttm>
#1 109 ENQUIRY 4 Anuj Goa Amna-3b 6015. aed97 2021-11-27 01:00:00 2021-12-01 01:00:00
#2 109 ENQUIRY 31.0 Anuj Goa Amna-3b 46553. aed97 2021-12-01 01:00:00 2022-01-01 00:00:00
#3 109 ENQUIRY 25.0 Anuj Goa Amna-3b 37656. aed97 2022-01-01 00:00:00 2022-01-26 01:00:00
#4 109 ENQUIRY 19 Anuj Goa Amna-3b 5979 b497h9 2022-01-01 01:00:00 2022-01-20 01:00:00
#5 Agent CHECKED_OUT 2 Anuj Goa Amna-3b 7016. bde65 2022-01-01 01:00:00 2022-01-03 01:00:00
## ... with abbreviated variable names 1: property_sku, 2: Booking_ref
df_out
## A tibble: 5 x 10
#channel room_stay_status start_date end_date los booker area property_~1 Revenue Booki~2
#<chr> <chr> <dttm> <dttm> <dbl> <chr> <chr> <chr> <dbl> <chr>
#1 109 ENQUIRY 2021-11-27 00:00:00 2021-11-30 00:00:00 4 Anuj Goa Amna-3b 6015. aed97
#2 109 ENQUIRY 2021-12-01 00:00:00 2021-12-31 00:00:00 31 Anuj Goa Amna-3b 46616. aed97
#3 109 ENQUIRY 2022-01-01 00:00:00 2022-01-26 00:00:00 25 Anuj Goa Amna-3b 37593. aed97
#4 109 ENQUIRY 2022-01-01 00:00:00 2022-01-20 00:00:00 19 Anuj Goa Amna-3b 5979 b497h9
#5 Agent CHECKED_OUT 2022-01-01 00:00:00 2022-01-03 00:00:00 2 Anuj Goa Amna-3b 7016. bde65
## ... with abbreviated variable names 1: property_sku, 2: Booking_ref
I do not understand entirely how you distribute the Revenue. Consider that left as an exercise to fix :).
Hint: you need a ceiling() around the computation of the new los which computes decimal days.
Using solution from this post to split date:
df2 <- df %>%
group_by(id = row_number()) %>% # for each row
mutate(seq = list(seq(start_date, end_date, "day")), # create a sequence of dates with 1 day step
month = map(seq, month)) %>% # get the month for each one of those dates in sequence
unnest() %>% # unnest data
group_by(Booking_ref, id, month) %>% # for each group, row and month
summarise(start_date = min(seq), # get minimum date as start
end_date = max(seq)) %>% # get maximum date as end
ungroup() %>% # ungroup
select(-id, - month)%>%
group_by(Booking_ref)%>%
mutate(last_date=max(end_date)) # get last_date to determine los
df3 <- merge(df2,df%>%select(-start_date,-end_date),by=c('Booking_ref'),all.x=T)%>%
mutate(timespam=end_date-start_date)%>%
mutate(los2=as.numeric(case_when(last_date==end_date~timespam,
T~timespam+1)),
Revenue2=Revenue*los2/los)
out_df <- df3%>%
select(-Revenue,-los,-timespam,-last_date)%>%
rename(Revenue=Revenue2,
los=los2)
> out_df
Booking_ref start_date end_date channel room_stay_status booker area property_sku los Revenue
1 aed97 2022-01-01 2022-01-26 109 ENQUIRY Anuj Goa Amna-3b 25 37593.194
2 aed97 2021-11-27 2021-11-30 109 ENQUIRY Anuj Goa Amna-3b 4 6014.911
3 aed97 2021-12-01 2021-12-31 109 ENQUIRY Anuj Goa Amna-3b 31 46615.561
4 b497h9 2022-01-01 2022-01-20 109 ENQUIRY Anuj Goa Amna-3b 19 5979.000
5 bde65 2022-01-01 2022-01-03 Agent CHECKED_OUT Anuj Goa Amna-3b 2 7015.900
Related
I have a time series with 15 minutes intervals.
I would like to change it into 1 hour interval using R. So the results of the measurements will be added together as well.
Could you please help me with this?
And is it possible to change it after that from hours to month?
The data frame is as below:
timestamp (UTC) value
2020-06-11 22:15:00 5,841
2020-06-11 22:30:00 5,719
2020-06-11 22:45:00 5,841
2020-06-11 23:00:00 5,841
2020-06-11 23:15:00 5,597
2020-06-11 23:30:00 5,232
2020-06-11 23:45:00 5,476
2020-06-12 0:00:00 4,259
2020-06-12 0:15:00 0,243
2020-06-12 0:30:00 0,243
2020-06-12 0:45:00 0,365
2020-06-12 1:00:00 0,243
Depending on how you count, every 15 mins after an hour belongs to the next, you can use lubridate::ceiling_date (22:15 => 23:00), if it belongs to the same hour, use lubridate::floor_date (22:15 => 22:00).
library(dplyr)
library(lubridate)
# option 1
df1 %>%
mutate(timestamp = ceiling_date(timestamp, unit = "hour")) %>%
group_by(timestamp) %>%
summarise(value = sum(value))
# A tibble: 3 × 2
timestamp value
<dttm> <dbl>
1 2020-06-11 23:00:00 23.2
2 2020-06-12 00:00:00 20.6
3 2020-06-12 01:00:00 1.09
#option 2
df1 %>%
mutate(timestamp = floor_date(timestamp, unit = "hour")) %>%
group_by(timestamp) %>%
summarise(value = sum(value))
# A tibble: 4 × 2
timestamp value
<dttm> <dbl>
1 2020-06-11 22:00:00 17.4
2 2020-06-11 23:00:00 22.1
3 2020-06-12 00:00:00 5.11
4 2020-06-12 01:00:00 0.243
data:
df1 <- structure(list(timestamp = structure(c(1591906500, 1591907400,
1591908300, 1591909200, 1591910100, 1591911000, 1591911900, 1591912800,
1591913700, 1591914600, 1591915500, 1591916400), class = c("POSIXct",
"POSIXt"), tzone = ""), value = c(5.841, 5.719, 5.841, 5.841,
5.597, 5.232, 5.476, 4.259, 0.243, 0.243, 0.365, 0.243)), row.names = c(NA,
-12L), class = "data.frame")
I am trying to get the time difference between elements of an array a sample of the data is below and the image at the bottom describes the problem I am trying to solve. I have a dataframe column events where each value is an array of date and time entries that correspond to events and other columns which partition time into a before, evaluation and after period. I would like to calculate the statistics on the time between events.
** Update **
Using the excellent answer by danlooo below which gives me almost exactly what I need if I
add the four boundary events corresponding to before_eval_begin, eval_month, after_eval_end to the event array
duration is calculated for consecutive events
the before and after case_when statement is tweaked
the following code appears to work:
duration <-
data %>% mutate(across(before_event_eval:after_eval_end,as.character)) %>%
as_tibble() %>%
mutate(
events = events %>% str_remove_all("[\\[\\]\\\"]")
) %>%
mutate( events = ifelse(events == "",events,paste0(events,",",
before_event_eval,",",as.character(as.Date(eval_month)-days(1)),
",",as.character(ceiling_date(as.Date('2021-02-01'),"month")),
",",after_eval_end))) %>%
separate_rows(events, sep = ",") %>%
rename(event = events) %>%
filter(event != "") %>%
mutate(across(before_event_eval:after_eval_end,parse_datetime)) %>%
mutate(
event = event %>% parse_datetime(),
position = case_when(
event >= before_event_eval &
event < eval_month ~ "before",
event <= after_eval_end &
event > eval_month ~ "after"
)
) %>%
arrange(id,event) %>% group_by(id) %>%
mutate(duration = as.numeric(event - lag(event))) %>%
group_by(id,position) %>%
summarise(time_until_first = first(duration[!is.na(duration)]),
timebetween_last = last(duration[!is.na(duration)]),
min_duration = min(duration,na.rm=TRUE),
avg_duration = mean(duration,na.rm=TRUE),
max_duration = max(duration,na.rm=TRUE))
I think a general strategy would be as follows but I am not sure how to proceed after step 1 and perform computations on the cleaned array:
remove brackets and parenthesis from string
create ordered vector of events
Determine if event falls before or after eval month:
Before: event is >= before_eval_begin and < eval_month
After: event is > eval_month and <= after_eval_end
Determine time between events for each period (before, after) including times
relative to before_eval_begin, eval_month, after_eval_end
Return the below statistics:
If events is missing then all the values below should be set to 185
• Time to first event in pre period
• Time between last event in pre period and end of pre period
• Average time between events for pre period
• Minimum of time between events in pre period
• Maximum of time between events in pre period
• Time to first event in post period
• Time between last event in post period and end of post period
• Minimum of time between events in post period
• Maximum of time between events in post period
*Edit: removed duplicate events and added id column
Data
structure(list(id = c(1, 2, 3, 4), before_event_eval = structure(c(1596240000,
1596240000, 1604188800, 1604188800), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), eval_month = structure(c(1612137600, 1612137600,
1619827200, 1619827200), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
after_eval_end = structure(c(1627776000, 1627776000, 1635724800,
1635724800), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
events = c("[\"2021-01-28 13:25:32\",\"2021-01-28 18:25:32\"]",
"[\"2021-04-30 18:25:32\",\"2021-01-15 11:25:32\",\"2021-01-30 18:25:32\",\"2021-03-30 18:25:32\",\"2021-01-27 11:25:32\",\"2021-01-30 18:26:32\"]",
"[]", "[\"2021-04-28 13:25:32\",\"2021-05-28 10:25:32\"]"
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-4L))
Picture of Problem
Something like this?
library(tidyverse)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
data <- structure(list(
before_event_eval = structure(c(
1596240000, 1596240000,
1604188800, 1604188800
), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
eval_month = structure(c(
1612137600, 1612137600, 1619827200,
1619827200
), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
after_eval_end = structure(c(
1627776000, 1627776000, 1635724800,
1635724800
), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
events = c(
"[\"2021-01-28 13:25:32\",\"2021-01-28 18:25:32\"]",
"[\"2021-04-30 18:25:32\",\"2021-01-15 11:25:32\",\"2021-01-30 18:25:32\",\"2021-03-30 18:25:32\",\"2021-01-27 11:25:32\",\"2021-01-30 18:25:32\",\"2021-01-30 18:25:32\"]",
"[]", "[\"2021-04-28 13:25:32\",\"2021-05-28 10:25:32\"]"
)
), class = c("tbl_df", "tbl", "data.frame"), row.names = c(
NA,
-4L
))
events <-
data %>%
as_tibble() %>%
mutate(
id = row_number(),
events = events %>% str_remove_all("[\\[\\]\\\"]")
) %>%
separate_rows(events, sep = ",") %>%
rename(event = events) %>%
filter(event != "") %>%
mutate(
event = event %>% parse_datetime(),
position = case_when(
event >= before_event_eval &
year(event) == year(eval_month) &
month(event) < month(eval_month) ~ "before",
event <= after_eval_end &
year(event) == year(eval_month) &
month(event) > month(eval_month) ~ "after"
)
) %>%
arrange(event)
events
#> # A tibble: 11 × 6
#> before_event_eval eval_month after_eval_end
#> <dttm> <dttm> <dttm>
#> 1 2020-08-01 00:00:00 2021-02-01 00:00:00 2021-08-01 00:00:00
#> 2 2020-08-01 00:00:00 2021-02-01 00:00:00 2021-08-01 00:00:00
#> 3 2020-08-01 00:00:00 2021-02-01 00:00:00 2021-08-01 00:00:00
#> 4 2020-08-01 00:00:00 2021-02-01 00:00:00 2021-08-01 00:00:00
#> 5 2020-08-01 00:00:00 2021-02-01 00:00:00 2021-08-01 00:00:00
#> 6 2020-08-01 00:00:00 2021-02-01 00:00:00 2021-08-01 00:00:00
#> 7 2020-08-01 00:00:00 2021-02-01 00:00:00 2021-08-01 00:00:00
#> 8 2020-08-01 00:00:00 2021-02-01 00:00:00 2021-08-01 00:00:00
#> 9 2020-11-01 00:00:00 2021-05-01 00:00:00 2021-11-01 00:00:00
#> 10 2020-08-01 00:00:00 2021-02-01 00:00:00 2021-08-01 00:00:00
#> 11 2020-11-01 00:00:00 2021-05-01 00:00:00 2021-11-01 00:00:00
#> # … with 3 more variables: event <dttm>, id <int>, position <chr>
durations <-
events$event %>%
as.character() %>%
unique() %>%
combn(2) %>%
t() %>%
as_tibble() %>%
transmute(
from = parse_datetime(V1),
to = parse_datetime(V2),
duration = to - from
) %>%
left_join(events, by = c("from" = "event"))
#> Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
#> Using compatibility `.name_repair`.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
durations
#> # A tibble: 44 × 8
#> from to duration before_event_eval
#> <dttm> <dttm> <drtn> <dttm>
#> 1 2021-01-15 11:25:32 2021-01-27 11:25:32 288 hours 2020-08-01 00:00:00
#> 2 2021-01-15 11:25:32 2021-01-28 13:25:32 314 hours 2020-08-01 00:00:00
#> 3 2021-01-15 11:25:32 2021-01-28 18:25:32 319 hours 2020-08-01 00:00:00
#> 4 2021-01-15 11:25:32 2021-01-30 18:25:32 367 hours 2020-08-01 00:00:00
#> 5 2021-01-15 11:25:32 2021-03-30 18:25:32 1783 hours 2020-08-01 00:00:00
#> 6 2021-01-15 11:25:32 2021-04-28 13:25:32 2474 hours 2020-08-01 00:00:00
#> 7 2021-01-15 11:25:32 2021-04-30 18:25:32 2527 hours 2020-08-01 00:00:00
#> 8 2021-01-15 11:25:32 2021-05-28 10:25:32 3191 hours 2020-08-01 00:00:00
#> 9 2021-01-27 11:25:32 2021-01-28 13:25:32 26 hours 2020-08-01 00:00:00
#> 10 2021-01-27 11:25:32 2021-01-28 18:25:32 31 hours 2020-08-01 00:00:00
#> # … with 34 more rows, and 4 more variables: eval_month <dttm>,
#> # after_eval_end <dttm>, id <int>, position <chr>
durations %>%
group_by(position) %>%
summarise(
min_duration = min(duration),
avg_duration = mean(duration),
max_duration = max(duration)
)
#> # A tibble: 2 × 4
#> position min_duration avg_duration max_duration
#> <chr> <drtn> <drtn> <drtn>
#> 1 after 664 hours 876.750 hours 1408 hours
#> 2 before 5 hours 1600.925 hours 3191 hours
Created on 2022-04-26 by the reprex package (v2.0.0)
To only look at consecutive events, one can do
durations <-
events %>%
arrange(position, event) %>%
mutate(
from = event,
to = lead(event)
)
As an addition to this question, is it possible to add when an event started and when it finished in another column(s)?
Here is a reproducible example pulled from the OP.
df <- structure(list(Time = structure(c(1463911500, 1463911800, 1463912100,
1463912400, 1463912700, 1463913000), class = c("POSIXct", "POSIXt"
), tzone = ""), Temp = c(20.043, 20.234, 6.329, 20.424, 20.615,
20.805)), row.names = c(NA, -6L), class = "data.frame")
> df
Time Temp
1 2016-05-22 12:05:00 20.043
2 2016-05-22 12:10:00 20.234
3 2016-05-22 12:15:00 6.329
4 2016-05-22 12:20:00 20.424
5 2016-05-22 12:25:00 20.615
6 2016-05-22 12:30:00 20.805
library(dplyr)
df %>%
# add id for different periods/events
mutate(tmp_Temp = Temp > 20, id = rleid(tmp_Temp)) %>%
# keep only periods with high temperature
filter(tmp_Temp) %>%
# for each period/event, get its duration
group_by(id) %>%
summarise(event_duration = difftime(last(Time), first(Time)))
id event_duration
<int> <time>
1 1 5 mins
2 3 10 mins
i.e there are two more columns: "start_DateTime" and "end_DateTime"
Thanks!
Sure. Modify the final summarise() like this:
df %>%
# add id for different periods/events
mutate(tmp_Temp = Temp > 20, id = rleid(tmp_Temp)) %>%
# keep only periods with high temperature
filter(tmp_Temp) %>%
# for each period/event, get its duration
group_by(id) %>%
summarise(event_duration = difftime(last(Time), first(Time)),
start_DateTime = min(Time),
end_DateTime = max(Time))
#> # A tibble: 2 × 4
#> id event_duration start_DateTime end_DateTime
#> <int> <drtn> <dttm> <dttm>
#> 1 1 5 mins 2016-05-22 12:05:00 2016-05-22 12:10:00
#> 2 3 10 mins 2016-05-22 12:20:00 2016-05-22 12:30:00
I have below-mentioned dataframe in R.
DF
ID Datetime Value
T-1 2020-01-01 15:12:14 10
T-2 2020-01-01 00:12:10 20
T-3 2020-01-01 03:11:11 25
T-4 2020-01-01 14:01:01 20
T-5 2020-01-01 18:07:11 10
T-6 2020-01-01 20:10:09 15
T-7 2020-01-01 15:45:23 15
By utilizing the above-mentioned dataframe, I want to bifurcate the count basis month and time bucket considering the Datetime.
Required Output:
Month Count Sum
Jan-20 7 115
12:00 AM to 05:00 AM 2 45
06:00 AM to 12:00 PM 0 0
12:00 PM to 03:00 PM 1 20
03:00 PM to 08:00 PM 3 35
08:00 PM to 12:00 AM 1 15
You can bin the hours of the day by using hour from the lubridate package and then cut from base R, before summarizing with dplyr.
Here, I am assuming that your Datetime column is actually in a date-time format and not just a character string or factor. If it is, ensure you have done DF$Datetime <- as.POSIXct(as.character(DF$Datetime)) first to convert it.
library(tidyverse)
DF$bins <- cut(lubridate::hour(DF$Datetime), c(-1, 5.99, 11.99, 14.99, 19.99, 24))
levels(DF$bins) <- c("00:00 to 05:59", "06:00 to 11:59", "12:00 to 14:59",
"15:00 to 19:59", "20:00 to 23:59")
newDF <- DF %>%
group_by(bins, .drop = FALSE) %>%
summarise(Count = length(Value), Total = sum(Value))
This gives the following result:
newDF
#> # A tibble: 5 x 3
#> bins Count Total
#> <fct> <int> <dbl>
#> 1 00:00 to 05:59 2 45
#> 2 06:00 to 11:59 0 0
#> 3 12:00 to 14:59 1 20
#> 4 15:00 to 19:59 3 35
#> 5 20:00 to 23:59 1 15
And if you want to add January as a first row (though I'm not sure how much sense this makes in this context) you could do:
newDF %>%
summarise(bins = "January", Count = sum(Count), Total = sum(Total)) %>% bind_rows(newDF)
#> # A tibble: 6 x 3
#> bins Count Total
#> <chr> <int> <dbl>
#> 1 January 7 115
#> 2 00:00 to 05:59 2 45
#> 3 06:00 to 11:59 0 0
#> 4 12:00 to 14:59 1 20
#> 5 15:00 to 19:59 3 35
#> 6 20:00 to 23:59 1 15
Incidentally, the reproducible version of the data I used for this was:
structure(list(ID = structure(1:7, .Label = c("T-1", "T-2", "T-3",
"T-4", "T-5", "T-6", "T-7"), class = "factor"), Datetime = structure(c(1577891534,
1577837530, 1577848271, 1577887261, 1577902031, 1577909409, 1577893523
), class = c("POSIXct", "POSIXt"), tzone = ""), Value = c(10,
20, 25, 20, 10, 15, 15)), class = "data.frame", row.names = c(NA,
-7L))
I have date time values and I'd like to calculate the difference. I tried to use - between the two times, as in t1 - t2 but it switches the units. Some of the output is in minutes, some in hours and some in days- which makes it hard to work with.
I used difftime from lubridate and it gave me results that don't make sense.
my_tibble %>%
mutate(time_diff = difftime(t2, t1, units = "mins"))
t1 t2 time_diff
<dttm> <dttm> <drtn>
1 2018-06-30 18:26:28 2018-07-01 01:26:43 0.2342667 mins
2 2018-06-30 19:33:03 2018-07-01 09:36:56 423.8818500 mins
3 2018-06-30 19:32:51 2018-07-01 02:33:41 0.8219833 mins
4 2018-06-30 23:09:59 2018-07-01 06:11:45 1.7654167 mins
5 2018-06-30 23:22:30 2018-07-01 06:23:00 0.4852000 mins
Here's more information.
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 6 obs. of 1 variable:
$ t1: POSIXct, format: "2018-07-01 01:26:43" "2018-07-01 09:36:56" "2018-07-01 02:33:41" "2018-07-01 06:11:45"
For what it's worth, the file comes from a CSV that has the time t1 defined as the number of milliseconds since the unix epoch. Here is how I read in the dataframe.
my_tibble <- read_csv("table.csv") %>%
mutate(t1 = as.POSIXct(epoch_milli / 1000, origin="1970-01-01")) %>%
mutate_if(is.numeric, as.character) %>%
as_tibble()
Couldn't be able to reproduce the issue after quoting the "mins correctly ("mins"). Also, difftime is a base R function
library(dplyr)
my_tibble %>%
mutate(time_diff = difftime(t2, t1, units = "mins"))
# A tibble: 5 x 3
# t1 t2 time_diff
# <dttm> <dttm> <drtn>
#1 2018-06-30 18:26:28 2018-07-01 01:26:43 420.2500 mins
#2 2018-06-30 19:33:03 2018-07-01 09:36:56 843.8833 mins
#3 2018-06-30 19:32:51 2018-07-01 02:33:41 420.8333 mins
#4 2018-06-30 23:09:59 2018-07-01 06:11:45 421.7667 mins
#5 2018-06-30 23:22:30 2018-07-01 06:23:00 420.5000 mins
data
my_tibble <- structure(list(t1 = structure(c(1530383188, 1530387183, 1530387171,
1530400199, 1530400950), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
t2 = structure(c(1530408403, 1530437816, 1530412421, 1530425505,
1530426180), class = c("POSIXct", "POSIXt"), tzone = "UTC")),
row.names = c(NA,
-5L), class = c("tbl_df", "tbl", "data.frame"))