I have a dataset test. It was like 30-06-22 23:55:00, 1/7/2022 0:00 AM in excel, I have no idea why there are two different formats in one column and I can't change the format in excel. It's weird because the time format of each month from the 1st to the 12th is different from the rest of the days. Therefore, I import the data to R and try to unify the formats by using parse_date_time() function. But now it changed to 44568" in R. And I got Warning message: 20737 failed to parse. after running the code test$Time<- parse_date_time(test$Time, orders= c("%d-%m-%y %H%M%S","%d/%m/%Y %I:%M:%S %p" ))
I was so confused about why the formats are different and how to unify the data formats in the same way like 1/7/2022 0:00 AM (d/m/Y H:M AM/PM)
test<- c("30-06-22 20:35:00", "30-06-22 20:40:00", "30-06-22 20:45:00",
"30-06-22 20:50:00", "30-06-22 20:55:00", "30-06-22 21:00:00",
"30-06-22 21:05:00", "30-06-22 21:10:00", "30-06-22 21:15:00",
"30-06-22 21:20:00", "30-06-22 21:25:00", "30-06-22 21:30:00",
"30-06-22 21:35:00", "30-06-22 21:40:00", "30-06-22 21:45:00",
"30-06-22 21:50:00", "30-06-22 21:55:00", "30-06-22 22:00:00",
"30-06-22 22:05:00", "30-06-22 22:10:00", "30-06-22 22:15:00",
"30-06-22 22:20:00", "30-06-22 22:25:00", "30-06-22 22:30:00",
"30-06-22 22:35:00", "30-06-22 22:40:00", "30-06-22 22:45:00",
"30-06-22 22:50:00", "30-06-22 22:55:00", "30-06-22 23:00:00",
"30-06-22 23:05:00", "30-06-22 23:10:00", "30-06-22 23:15:00",
"30-06-22 23:20:00", "30-06-22 23:25:00", "30-06-22 23:30:00",
"30-06-22 23:35:00", "30-06-22 23:40:00", "30-06-22 23:45:00",
"30-06-22 23:50:00", "30-06-22 23:55:00", "44568", "44568.003472222219",
"44568.006944444445", "44568.010416666664", "44568.013888888891",
"44568.017361111109", "44568.020833333336", "44568.024305555555",
"44568.027777777781", "44568.03125", "44568.034722222219", "44568.038194444445",
"44568.041666666664", "44568.045138888891", "44568.048611111109",
"44568.052083333336", "44568.055555555555", "44568.059027777781",
"44568.0625", "44568.065972222219", "44568.069444444445", "44568.072916666664",
"44568.076388888891", "44568.079861111109", "44568.083333333336",
"44568.086805555555", "44568.090277777781", "44568.09375", "44568.097222222219",
"44568.100694444445", "44568.104166666664", "44568.107638888891",
"44568.111111111109", "44568.114583333336", "44568.118055555555",
"44568.121527777781", "44568.125", "44568.128472222219", "44568.131944444445",
"44568.135416666664", "44568.138888888891", "44568.142361111109",
"44568.145833333336", "44568.149305555555", "44568.152777777781",
"44568.15625", "44568.159722222219", "44568.163194444445", "44568.166666666664",
"44568.170138888891", "44568.173611111109", "44568.177083333336",
"44568.180555555555", "44568.184027777781", "44568.1875", "44568.190972222219",
"44568.194444444445", "44568.197916666664", "44568.201388888891",
"44568.204861111109")
We may do
library(parsedate)
library(dplyr)
v1 <- as.numeric(test)
v1 <- coalesce(openxlsx::convertToDateTime(v1), parse_date(test))
v1
-output
[> v1
[1] "2022-06-30 20:35:00 UTC" "2022-06-30 20:40:00 UTC" "2022-06-30 20:45:00 UTC" "2022-06-30 20:50:00 UTC" "2022-06-30 20:55:00 UTC" "2022-06-30 21:00:00 UTC"
[7] "2022-06-30 21:05:00 UTC" "2022-06-30 21:10:00 UTC" "2022-06-30 21:15:00 UTC" "2022-06-30 21:20:00 UTC" "2022-06-30 21:25:00 UTC" "2022-06-30 21:30:00 UTC"
[13] "2022-06-30 21:35:00 UTC" "2022-06-30 21:40:00 UTC" "2022-06-30 21:45:00 UTC" "2022-06-30 21:50:00 UTC" "2022-06-30 21:55:00 UTC" "2022-06-30 22:00:00 UTC"
[19] "2022-06-30 22:05:00 UTC" "2022-06-30 22:10:00 UTC" "2022-06-30 22:15:00 UTC" "2022-06-30 22:20:00 UTC" "2022-06-30 22:25:00 UTC" "2022-06-30 22:30:00 UTC"
[25] "2022-06-30 22:35:00 UTC" "2022-06-30 22:40:00 UTC" "2022-06-30 22:45:00 UTC" "2022-06-30 22:50:00 UTC" "2022-06-30 22:55:00 UTC" "2022-06-30 23:00:00 UTC"
[31] "2022-06-30 23:05:00 UTC" "2022-06-30 23:10:00 UTC" "2022-06-30 23:15:00 UTC" "2022-06-30 23:20:00 UTC" "2022-06-30 23:25:00 UTC" "2022-06-30 23:30:00 UTC"
[37] "2022-06-30 23:35:00 UTC" "2022-06-30 23:40:00 UTC" "2022-06-30 23:45:00 UTC" "2022-06-30 23:50:00 UTC" "2022-06-30 23:55:00 UTC" "2022-01-07 05:00:00 UTC"
[43] "2022-01-07 05:05:00 UTC" "2022-01-07 05:10:00 UTC" "2022-01-07 05:15:00 UTC" "2022-01-07 05:20:00 UTC" "2022-01-07 05:25:00 UTC" "2022-01-07 05:30:00 UTC"
[49] "2022-01-07 05:35:00 UTC" "2022-01-07 05:40:00 UTC" "2022-01-07 05:45:00 UTC" "2022-01-07 05:50:00 UTC" "2022-01-07 05:55:00 UTC" "2022-01-07 06:00:00 UTC"
[55] "2022-01-07 06:05:00 UTC" "2022-01-07 06:10:00 UTC" "2022-01-07 06:15:00 UTC" "2022-01-07 06:20:00 UTC" "2022-01-07 06:25:00 UTC" "2022-01-07 06:30:00 UTC"
[61] "2022-01-07 06:35:00 UTC" "2022-01-07 06:40:00 UTC" "2022-01-07 06:45:00 UTC" "2022-01-07 06:50:00 UTC" "2022-01-07 06:55:00 UTC" "2022-01-07 07:00:00 UTC"
[67] "2022-01-07 07:05:00 UTC" "2022-01-07 07:10:00 UTC" "2022-01-07 07:15:00 UTC" "2022-01-07 07:20:00 UTC" "2022-01-07 07:25:00 UTC" "2022-01-07 07:30:00 UTC"
[73] "2022-01-07 07:35:00 UTC" "2022-01-07 07:40:00 UTC" "2022-01-07 07:45:00 UTC" "2022-01-07 07:50:00 UTC" "2022-01-07 07:55:00 UTC" "2022-01-07 08:00:00 UTC"
[79] "2022-01-07 08:05:00 UTC" "2022-01-07 08:10:00 UTC" "2022-01-07 08:15:00 UTC" "2022-01-07 08:20:00 UTC" "2022-01-07 08:25:00 UTC" "2022-01-07 08:30:00 UTC"
[85] "2022-01-07 08:35:00 UTC" "2022-01-07 08:40:00 UTC" "2022-01-07 08:45:00 UTC" "2022-01-07 08:50:00 UTC" "2022-01-07 08:55:00 UTC" "2022-01-07 09:00:00 UTC"
[91] "2022-01-07 09:05:00 UTC" "2022-01-07 09:10:00 UTC" "2022-01-07 09:15:00 UTC" "2022-01-07 09:20:00 UTC" "2022-01-07 09:25:00 UTC" "2022-01-07 09:30:00 UTC"
[97] "2022-01-07 09:35:00 UTC" "2022-01-07 09:40:00 UTC" "2022-01-07 09:45:00 UTC" "2022-01-07 09:50:00 UTC" "2022-01-07 09:55:00 UTC"
I have convert my date from chr to POSIXCT using formula below.
crime2$Date = parse_date_time(crime2$Date, orders = c('dmy_HM'),tz="UTC")
so my date actually now in this format.
> head(crime2$Date, 10)
[1] "2015-03-18 19:44:00 UTC" "2015-03-18 22:45:00 UTC"
[3] "2015-03-18 22:30:00 UTC" "2015-03-18 22:00:00 UTC"
[5] "2015-03-18 23:00:00 UTC" "2015-03-18 21:35:00 UTC"
[7] "2015-03-18 22:50:00 UTC" "2015-03-18 23:40:00 UTC"
[9] "2015-03-18 23:30:00 UTC" "2015-03-18 22:45:00 UTC"
However, if i want to remove the time and keep the date only, what can i do about this?
Example, they will look like this
" 2015-03-18 " "2015-03-18 "
This question already has answers here:
Parse datetime with lubridate
(3 answers)
Parsing dates with different formats
(3 answers)
Closed 2 years ago.
I have a dataframe with a column containing Date Time values, but the values are in different formats.
I want to bring them all to the format "dd/mm/yyyy hh:mm". I tried using the lubridate package to convert the dates with the AM/PM text appended to the dates, but am unable to do so.
Date_Time
"11/01/2019 10:00"
"11/01/2019 11:00"
"11/01/2019 12:00"
"11/01/2019 13:00"
"11/01/2019 14:00"
"11/01/2019 15:00"
"11/01/2019 16:00"
"10/03/2019 23:00"
"10/04/2019 1:00"
"10/28/2019 05:00:00 AM"
"10/28/2019 10:00:00 PM"
"10/29/2019 02:00:00 AM"
"10/29/2019 03:00:00 AM"
"10/31/2019 01:00:00 PM"
"10/31/2019 02:00:00 PM"
"10/31/2019 10:00:00 PM"
You can use lubridate's parse_date_time :
lubridate::parse_date_time(df$Date_Time, c('mdYHM', 'mdYIMSp'))
#[1] "2019-11-01 10:00:00 UTC" "2019-11-01 11:00:00 UTC" "2019-11-01 12:00:00 UTC"
#[4] "2019-11-01 13:00:00 UTC" "2019-11-01 14:00:00 UTC" "2019-11-01 15:00:00 UTC"
#[7] "2019-11-01 16:00:00 UTC" "2019-10-03 23:00:00 UTC" "2019-10-04 01:00:00 UTC"
#[10]"2019-10-28 05:00:00 UTC" "2019-10-28 22:00:00 UTC" "2019-10-29 02:00:00 UTC"
#[13]"2019-10-29 03:00:00 UTC" "2019-10-31 13:00:00 UTC" "2019-10-31 14:00:00 UTC"
#[16]"2019-10-31 22:00:00 UTC"
data
df <- structure(list(Date_Time = c("11/01/2019 10:00", "11/01/2019 11:00",
"11/01/2019 12:00", "11/01/2019 13:00", "11/01/2019 14:00", "11/01/2019 15:00",
"11/01/2019 16:00", "10/03/2019 23:00","10/04/2019 1:00","10/28/2019 05:00:00 AM",
"10/28/2019 10:00:00 PM", "10/29/2019 02:00:00 AM", "10/29/2019 03:00:00 AM",
"10/31/2019 01:00:00 PM", "10/31/2019 02:00:00 PM", "10/31/2019 10:00:00 PM"
)), class = "data.frame", row.names = c(NA, -16L))
I am trying to make a sequence that only consists of times with one hour interval, without dates. It should look like this:
"00:00:00" "1:00:00" "2:00:00" "3:00:00"
I know that this code works:
dat <- seq(
from=as.POSIXct("00:00:00","%H:%M:%S", tz="UTC"),
to=as.POSIXct("23:00:00", "%H:%M:%S", tz="UTC"),
by="hour"
)
Which gives
[1] "2018-04-10 00:00:00 UTC" "2018-04-10 01:00:00 UTC" "2018-04-10 02:00:00 UTC" "2018-04-10 03:00:00 UTC" "2018-04-10 04:00:00 UTC"
[6] "2018-04-10 05:00:00 UTC" "2018-04-10 06:00:00 UTC" "2018-04-10 07:00:00 UTC" "2018-04-10 08:00:00 UTC" "2018-04-10 09:00:00 UTC"
[11] "2018-04-10 10:00:00 UTC" "2018-04-10 11:00:00 UTC" "2018-04-10 12:00:00 UTC" "2018-04-10 13:00:00 UTC" "2018-04-10 14:00:00 UTC"
[16] "2018-04-10 15:00:00 UTC" "2018-04-10 16:00:00 UTC" "2018-04-10 17:00:00 UTC" "2018-04-10 18:00:00 UTC" "2018-04-10 19:00:00 UTC"
[21] "2018-04-10 20:00:00 UTC" "2018-04-10 21:00:00 UTC" "2018-04-10 22:00:00 UTC" "2018-04-10 23:00:00 UTC"
But that is not what I want. Therefore I tried
library(chron)
seq(from = times("00:00:00"), to =times("23:00:00"), by="hour")
which gives an error
Error in convert.times(times., fmt) : format h:m:s may be incorrect
In addition: Warning message:
In unpaste(times, sep = fmt$sep, fnames = fmt$periods, nfields = 3) :
wrong number of fields in entry(ies) 1
I am stuck now, so I hope somebody can help me with this.
Of course I could just type it out, but I want to have a clean solution.
Using package chron which provides a times class:
library(chron)
times("00:00:00") + (0:23)/24
#[1] 00:00:00 01:00:00 02:00:00 03:00:00 04:00:00 05:00:00 06:00:00 07:00:00 08:00:00 09:00:00 10:00:00 11:00:00 12:00:00 13:00:00 14:00:00
#[16] 15:00:00 16:00:00 17:00:00 18:00:00 19:00:00 20:00:00 21:00:00 22:00:00 23:00:00
You can use strftime() to extract values in any format to character:
dat <- seq(
from=as.POSIXct("00:00:00","%H:%M:%S", tz="UTC"),
to=as.POSIXct("23:00:00", "%H:%M:%S", tz="UTC"),
by="hour"
)
strftime(dat, format="%H:%M:%S")
#"02:00:00" "03:00:00" "04:00:00" "05:00:00" "06:00:00" "07:00:00"
#"08:00:00" "09:00:00" "10:00:00" "11:00:00" "12:00:00" "13:00:00"
#"14:00:00" "15:00:00" "16:00:00" "17:00:00" "18:00:00" "19:00:00"
#"20:00:00" "21:00:00" "22:00:00" "23:00:00" "00:00:00" "01:00:00"
When you have a POSIXct class,
to extract only the hour, minutes and seconds you just need to do:
as.character(format(from, "%H:%M:%S"))
as.character(format(to, "%H:%M:%S"))
I want to generate a working week / working day sequence (Monday-Friday; 8am - 5pm) in R. However I only figured out how to extract a working week (Monday-Friday) with 24 hours.
library(timeDate)
start <- as.POSIXct("2010-01-01")
interval <- 60
seq_1 <- as.timeDate(seq(from=start, by=interval*60, length.out = 200))
seq_2 <- seq_1[isWeekday(seq_1)]; seq_2
dayOfWeek(seq_2)
Is there a similar function which can extract only working hours? Thanks
You can use function format to obtain hours
seq_2[as.numeric(format(seq_2,'%H')) %in% 8:15 ]
Select weekdays and then repeat with frequency equal to the desired hours. I'm afraid I missed your 8 o;clock start and used the phrase "9 to 5" as my guide:
twoyears <- seq.Date(as.Date("2010-01-01"), by='day', length.out=365*2)
twoworkyrs <- twoyears[isWeekday(twoyears, wday = 1:5)]
twoworkyrs[ 1:10]
# [1] "2010-01-01" "2010-01-04" "2010-01-05" "2010-01-06" "2010-01-07" "2010-01-08"
# [7] "2010-01-11" "2010-01-12" "2010-01-13" "2010-01-14"
workhours <- as.POSIXct( as.numeric(rep(twoworkyrs, each=9))*24*3600 + # weekdays
(9:17)*3600 , n # working hours
origin="1970-01-01", tz="America/LosAngeles")
#----- First two weeks ----------------
> workhours[1:90]
[1] "2010-01-01 09:00:00 UTC" "2010-01-01 10:00:00 UTC" "2010-01-01 11:00:00 UTC"
[4] "2010-01-01 12:00:00 UTC" "2010-01-01 13:00:00 UTC" "2010-01-01 14:00:00 UTC"
[7] "2010-01-01 15:00:00 UTC" "2010-01-01 16:00:00 UTC" "2010-01-01 17:00:00 UTC"
[10] "2010-01-04 09:00:00 UTC" "2010-01-04 10:00:00 UTC" "2010-01-04 11:00:00 UTC"
[13] "2010-01-04 12:00:00 UTC" "2010-01-04 13:00:00 UTC" "2010-01-04 14:00:00 UTC"
[16] "2010-01-04 15:00:00 UTC" "2010-01-04 16:00:00 UTC" "2010-01-04 17:00:00 UTC"
[19] "2010-01-05 09:00:00 UTC" "2010-01-05 10:00:00 UTC" "2010-01-05 11:00:00 UTC"
[22] "2010-01-05 12:00:00 UTC" "2010-01-05 13:00:00 UTC" "2010-01-05 14:00:00 UTC"
[25] "2010-01-05 15:00:00 UTC" "2010-01-05 16:00:00 UTC" "2010-01-05 17:00:00 UTC"
[snipped
I must admit that timezone conversions are one of my weakest suits.