What I'm trying to do is get lubridate to make the minute portion of the conversion always have a length of 3. This is so that further processing is of a uniform length. E.g the time 12:00 is converted to 12H 00M 0S instead of the default output of 12H 0M 0S. I can't see anything in the help for hm() so it may be necessary to use something outside of the lubridate package.
Below is some sample code of this idea:
library (lubridate)
df <- data.frame("Time" = c("13:04", "13:55", "8:00", "8:45"))
df$Time <- hm(df$Time)
# Desired output
# 13H 04M 0S
# 13H 55M 0S
# 8H 00M 0S
# 8H 45M 0S
This code will get your desired output:
library (lubridate)
df <- data.frame("Time" = c("13:04", "13:55", "8:00", "8:45"))
paste0(hour(hm(df$Time)), "H ", sprintf("%02d", minute(hm(df$Time))), "M 0S")
"13H 04M 0S" "13H 55M 0S" "8H 00M 0S" "8H 45M 0S"
However, I don't know what kind of "further processing" you aim to do, but this format won't allow for subsequent time/numeric computations.
hour(hm(df$Time)) #gives the hours
minute(hm(df$Time)) #gives the minutes
sprintf("%02d", ...argument2...) # forces the 2nd argument call (here the minutes) of that function to consist of 2 characters, which adds the zeroes when needed
paste0() #pastes the hours and minutes together as 1 string, with the string "H " inbetween and the string "M 0S" at the end
Related
In the following data frame the 'time' column is character in the format hour:minute:second
id <- c(1, 2, 3, 4)
time <- c("00:00:01", "01:02:00", "09:30:01", "14:15:25")
df <- data.frame(id, time)
How can I convert 'time' column to a dedicated time class, so that I can perform arithmetic calculations on it?
Use the function chron in package chron:
time<-c("00:00:01", "01:02:00", "09:30:01", "14:15:25")
library(chron)
x <- chron(times=time)
x
[1] 00:00:01 01:02:00 09:30:01 14:15:25
Do some useful things, like calculating the difference between successive elements:
diff(x)
[1] 01:01:59 08:28:01 04:45:24
chron objects store the values internally as a fraction of seconds per day. Thus 1 second is equivalent to 1/(60*60*24), or 1/86400, i.e. 1.157407e-05.
So, to add times, one simple option is this:
x + 1/86400
[1] 00:00:02 01:02:01 09:30:02 14:15:26
Using base R you could convert it to an object of class POSIXct, but this does add a date to the time:
id<-c(1,2,3,4)
time<-c("00:00:01","01:02:00","09:30:01","14:15:25")
df<-data.frame(id,time,stringsAsFactors=FALSE)
as.POSIXct(df$time,format="%H:%M:%S")
[1] "2012-08-20 00:00:01 CEST" "2012-08-20 01:02:00 CEST"
[3] "2012-08-20 09:30:01 CEST" "2012-08-20 14:15:25 CEST"
But that does allow you to perform arithmetic calculations on them.
Another possible alternative could be:
time <- c("00:00:01","01:02:00","09:30:01","14:15:25")
converted.time <- as.difftime(time, units = "mins") #"difftime" class
secss <- as.numeric(converted.time, units = "secs")
hourss <- as.numeric(converted.time, units = "hours")
dayss <- as.numeric(converted.time, units="days")
Or even:
w <- strptime(x = time, format = "%H:%M:%S") #"POSIXlt" "POSIXt" class
Using the ITime class in data.table package:
ITime is a time-of-day class stored as the integer number of seconds in the day.
library(data.table)
(it <- as.ITime(time))
# [1] "00:00:01" "01:02:00" "09:30:01" "14:15:25"
it + 10
# [1] "00:00:11" "01:02:10" "09:30:11" "14:15:35"
diff(it)
# [1] "01:01:59" "08:28:01" "04:45:24"
lubridate allows good flexibility on the time format :
library(lubridate)
time_hms_1<-c("00:00:01", "01:02:00", "09:30:01", "14:15:25")
hms(time_hms_1)
#> [1] "1S" "1H 2M 0S" "9H 30M 1S" "14H 15M 25S"
time_hms_2<-c("0:00:01", "1:02:00", "9:30:01", "14:15:25")
hms(time_hms_2)
#> [1] "1S" "1H 2M 0S" "9H 30M 1S" "14H 15M 25S"
time_hm_1<-c("00:00", "01:02", "09:30", "14:15")
hm(time_hm_1)
#> [1] "0S" "1H 2M 0S" "9H 30M 0S" "14H 15M 0S"
time_hm_2<-c("0:00", "1:02", "9:30", "14:15")
hm(time_hm_2)
#> [1] "0S" "1H 2M 0S" "9H 30M 0S" "14H 15M 0S"
Created on 2020-07-03 by the reprex package (v0.3.0)
Yet another alternative using the hms package.
id <- c(1, 2, 3, 4)
time <- c("00:00:01", "01:02:00", "09:30:01", "14:15:25")
df <- data.frame(id, time, stringsAsFactors = FALSE)
Convert column time to class hms
# install.packages("hms")
library(hms)
df$time <- as.hms(df$time)
Perform arithmetic calculations
diff(df$time)
#01:01:59
#08:28:01
#04:45:24
I have a datatable in R with some values of time in hours, minutes and seconds. This values are recognized as character and I want to convert them to time.
I've tried with the following functions, but I obtain the same results.
as.ITime(eventlog.dt$Duration)
as.difftime(eventlog.dt$Duration, units = "hours")
as.POSIXct(eventlog.dt$Duration, format = "%T")
If my data is this(character format):
12:45:12 72:56:12 05:13:36 162:14:12
I obtain:
12:45:12 NA 05:13:36 NA
I want to obtain this(time format):
12:45:12 72:56:12 05:13:36 162:14:12
The required format can only be a character class. If we need a time class, it needs to be converted properly
library(lubridate)
seconds_to_period(period_to_seconds(hms(v1)))
#[1] "12H 45M 12S" "3d 0H 56M 12S" "5H 13M 36S" "6d 18H 14M 12S"
If this needs to be converted to hours alone, convert to seconds and then divide by 3600
round(period_to_seconds(hms(v1))/3600)
data
v1 <- c("12:45:12", "72:56:12", "05:13:36", "162:14:12")
I've read the lubridate package manual and have queried Stack Overflow with a variety of permutations of my question but have come up with no answer to my specific problem.
What I'm trying to do is calculate age in months at time of event as the difference between date of birth and some specific event date.
As such, I imported a SAS dataset using the sas7bdat package and converted my SAS date variables (DOB and Event) to R objects using the following code:
df$DOB <- as.Date(df$DOB, origin="1960-01-01")
df$DOB1 <- ymd(df$DOB)
And same thing for the Event variable:
df$Event <- as.Date(df$Event, origin="1960-01-01")
df$Event1 <- ymd(df$Event)
However, there are some NA values for DOB. So, for the following code which I want to use to calculate age (in months).
df$interval <- new_interval(df$DOB1,df$Event1)
df$Age1 <- df$interval %/% months(1)
I'm receiving the error:
Error in est[start + est * per < end] <- est[start + est * per < end] + : NAs are not allowed in subscripted assignments
What am I doing wrong? I've tried an if/else function but perhaps used it incorrectly.
(Note: For the SAS programmers out there, I'm trying to produce the same results as the following function:
IF DOB ne . THEN Tage=Floor(intck('month',DOB,Event)-(Day(Event)<Day(DOB)));
Simple example using lubridate package
library(lubridate)
date1='20160101'
date2='20160501'
x=interval(ymd(date1),ymd(date2))
x= x %/% months(1)
print(x)
# answer : 4
or follows is same:
x=as.period(x) %>% month()
print(x)
# answer : 4
Well so I give all credit for this answer to my talented work colleague. I neglected to include a reproducible example because whenever I would write a simple approximation of my problem, the df$Age1 <- df$interval %/% months(1) always worked! This left me totally stumped. It wasn't until I actually ran the code on my dataframe of 650,000+ birthdates and event dates that the error message...
Error in est[start + est * per < end] <- est[start + est * per < end] + :
NAs are not allowed in subscripted assignments
... would even come up! My colleague had the idea to process this calculation iteratively with the following function:
df$Age1 = rep(NA, nrow(df))
for (i in 1:nrow(df)) {
df$Age1[i]<- df$interval[i] %/% months(1)
}
df$Age1[1:15]
Using my dataframe, it became plain to see that this calculation got hung up on row 13!
> df$interval[13]
[1] 1995-10-31 19:00:00 EST--1996-05-26 20:00:00 EDT
So we aren't certain, but maybe the fact that the df$DOB[13] is 10/31 is screwing it up. This sort of problem with the lubridate package has been reported before (i.e., lubridate not being able to divide intervals by a period when one of the dates is at the end of the month):
https://github.com/hadley/lubridate/issues/235
The way we came to a solution was by using as.period and then converting it to months:
df$Age1<- as.period(df$interval)
head(df$Age1)
[1] "1y 2m 26d 0H 0M 0S" "6m 15d 23H 0M 0S"
[3] "4m 9d 23H 0M 0S" "3m 19d 23H 0M 0S"
[5] "3y 0m 25d 0H 0M 0S" "1y 1m 29d 1H 0M 0S"
df$Age1 <- df$Age1 %/% months(1)
head(df$Age1)
[1] 14 6 4 3 36 13
Here is another example of this reported issue with lubridate (1.3.3). Note that there may be different error messages depending on what else is in the dataset, and the issue seems to be dependent on the unit of measure (in my case months worked whereas years did not).
dat <- as.data.frame(list(Start = as.Date(c("1942-08-09", "1956-02-29")),
End = as.Date(c("2007-07-31", "2007-09-13"))))
int0 <- with(dat, new_interval(Start, End))
as.period(int0, unit = "years")
"Error in est[start + est * per > end] <- est[start + est * per > end] - :
NAs are not allowed in subscripted assignments"
int1 <- with(dat[1,], new_interval(Start, End))
as.period(int1, unit = "years")
[1] "64y 11m 22d 0H 0M 0S"
int2 <- with(dat[2,], new_interval(Start, End))
as.period(int2, unit = "years")
"Error in while (any(start + est * per > end)) est[start + est * per > :
missing value where TRUE/FALSE needed"
as.period(int0) %/% years(1)
[1] 64 51
as.period(int0, unit = "months")
[1] "779m 22d 0H 0M 0S" "618m 15d 0H 0M 0S"
Instead of
df$Age1 <- df$interval %/% months(1)
you can try:
df$Age1 <- NA
df$Age1[!is.na(df$DOB)] <- df$interval[!is.na(df$DOB)] %/% months(1)
I have a time column in a database, where hours and minutes are separated by a ":". I would like to remove the ":" so that time field becomes numeric as I will use numeric time for some calculation.
Input:
X
00:00
01:15
02:30
Output:
X
0000
0115
0230
I am new to R. My apologies if this is a silly question. Greatly appreciate any help. Thank you.
> x <- c("00:00", "01:15", "02:30")
> gsub(":", "", x)
[1] "0000" "0115" "0230"
If you really want numbers you can coerce to numeric or integer
> as.numeric(gsub(":", "", x))
[1] 0 115 230
GSee's answer does what you ask. However, if you're doing arithmetic with units of time, you might think about some easier ways.
library(lubridate)
X <- hm(c("00:00", "01:15", "02:30")) # Converts to lubridate time objects
X + minutes(1)
# [1] "1M 0S" "1H 16M 0S" "2H 31M 0S"
X + weeks(2)
# [1] "14d 0H 0M 0S" "14d 1H 15M 0S" "14d 2H 30M 0S"
It might be more sensible to use the R facilities for time parsing to convert first to date-time. At the moment you will have no way to capture the non-decimal character of your "time" values. 300-259 should be 1 , not 41. This set of commands illustrates some of R's date-time and Date functions:
> X <- c('00:00', '01:15', '02:30')
> as.POSIXct(X, format="%H:%M")
[1] "2013-08-05 00:00:00 PDT" "2013-08-05 01:15:00 PDT" "2013-08-05 02:30:00 PDT"
This will give the results in differences in seconds from midnight today:
> as.numeric(as.POSIXct(X, format="%H:%M") - as.POSIXct("2013-08-05 00:00:00 PDT"))
[1] 0 4500 9000
Now try to use todays's date, but then notice that there is an offset of 7 hours because as.POSIXct will assume this is GMT.UCT time:
> as.numeric(as.POSIXct(X, format="%H:%M") - as.POSIXct(Sys.Date()))
[1] 7.00 8.25 9.50
> as.numeric(as.POSIXct(X, format="%H:%M") - (as.POSIXct(Sys.Date())+7*3600))
[1] 0 4500 9000
So finish off the process by shifting 7 hours (=7*3600 seconds) and then converting to minutes:
> as.numeric(as.POSIXct(X, format="%H:%M") - (as.POSIXct(Sys.Date())+7*3600))/60
[1] 0 75 150
In the following data frame the 'time' column is character in the format hour:minute:second
id <- c(1, 2, 3, 4)
time <- c("00:00:01", "01:02:00", "09:30:01", "14:15:25")
df <- data.frame(id, time)
How can I convert 'time' column to a dedicated time class, so that I can perform arithmetic calculations on it?
Use the function chron in package chron:
time<-c("00:00:01", "01:02:00", "09:30:01", "14:15:25")
library(chron)
x <- chron(times=time)
x
[1] 00:00:01 01:02:00 09:30:01 14:15:25
Do some useful things, like calculating the difference between successive elements:
diff(x)
[1] 01:01:59 08:28:01 04:45:24
chron objects store the values internally as a fraction of seconds per day. Thus 1 second is equivalent to 1/(60*60*24), or 1/86400, i.e. 1.157407e-05.
So, to add times, one simple option is this:
x + 1/86400
[1] 00:00:02 01:02:01 09:30:02 14:15:26
Using base R you could convert it to an object of class POSIXct, but this does add a date to the time:
id<-c(1,2,3,4)
time<-c("00:00:01","01:02:00","09:30:01","14:15:25")
df<-data.frame(id,time,stringsAsFactors=FALSE)
as.POSIXct(df$time,format="%H:%M:%S")
[1] "2012-08-20 00:00:01 CEST" "2012-08-20 01:02:00 CEST"
[3] "2012-08-20 09:30:01 CEST" "2012-08-20 14:15:25 CEST"
But that does allow you to perform arithmetic calculations on them.
Another possible alternative could be:
time <- c("00:00:01","01:02:00","09:30:01","14:15:25")
converted.time <- as.difftime(time, units = "mins") #"difftime" class
secss <- as.numeric(converted.time, units = "secs")
hourss <- as.numeric(converted.time, units = "hours")
dayss <- as.numeric(converted.time, units="days")
Or even:
w <- strptime(x = time, format = "%H:%M:%S") #"POSIXlt" "POSIXt" class
Using the ITime class in data.table package:
ITime is a time-of-day class stored as the integer number of seconds in the day.
library(data.table)
(it <- as.ITime(time))
# [1] "00:00:01" "01:02:00" "09:30:01" "14:15:25"
it + 10
# [1] "00:00:11" "01:02:10" "09:30:11" "14:15:35"
diff(it)
# [1] "01:01:59" "08:28:01" "04:45:24"
lubridate allows good flexibility on the time format :
library(lubridate)
time_hms_1<-c("00:00:01", "01:02:00", "09:30:01", "14:15:25")
hms(time_hms_1)
#> [1] "1S" "1H 2M 0S" "9H 30M 1S" "14H 15M 25S"
time_hms_2<-c("0:00:01", "1:02:00", "9:30:01", "14:15:25")
hms(time_hms_2)
#> [1] "1S" "1H 2M 0S" "9H 30M 1S" "14H 15M 25S"
time_hm_1<-c("00:00", "01:02", "09:30", "14:15")
hm(time_hm_1)
#> [1] "0S" "1H 2M 0S" "9H 30M 0S" "14H 15M 0S"
time_hm_2<-c("0:00", "1:02", "9:30", "14:15")
hm(time_hm_2)
#> [1] "0S" "1H 2M 0S" "9H 30M 0S" "14H 15M 0S"
Created on 2020-07-03 by the reprex package (v0.3.0)
Yet another alternative using the hms package.
id <- c(1, 2, 3, 4)
time <- c("00:00:01", "01:02:00", "09:30:01", "14:15:25")
df <- data.frame(id, time, stringsAsFactors = FALSE)
Convert column time to class hms
# install.packages("hms")
library(hms)
df$time <- as.hms(df$time)
Perform arithmetic calculations
diff(df$time)
#01:01:59
#08:28:01
#04:45:24