I need to split 10/4/2018 19:21
and have tried
AccidentsMp$Hours <- format(as.POSIXct(AccidentsMp$Job.Date, "%Y-%m-%d %H:%M:%S", tz = ""), format = "%H:%M")
AccidentsMp$Dates <- format(as.Date(AccidentsMp$Job.Date,"%Y-%m-%d"), format = "%d/%m/%Y")
How can I split the above date and time it into two columns?The data is of class factor now.
Here is an option with tidyverse
library(tidyverse)
library(lubridate)
df %>%
mutate(Job.Date = dmy_hm(Job.Date)) %>%
separate(Job.Date, into = c('date', 'time'), sep=' ', remove = FALSE)
# Job.Date date time
#1 2018-04-10 19:21:00 2018-04-10 19:21:00
#2 2018-04-10 19:22:00 2018-04-10 19:22:00
#3 2018-04-10 19:23:00 2018-04-10 19:23:00
Or using base R
read.table(text = as.character(df$Job.Date), header = FALSE,
col.names = c("date", "time"))
data
df <- structure(list(Job.Date = structure(1:3, .Label = c("10/4/2018 19:21",
"10/4/2018 19:22", "10/4/2018 19:23"), class = "factor")),
class = "data.frame", row.names = c(NA, -3L))
If your data follows the same format as shown we can do it as follows using only base R
df$datetime <- as.POSIXct(df$Job.Date, format = "%d/%m/%Y %H:%M")
transform(df, time = format(datetime, "%T"), date = format(datetime, "%d/%m/%Y"))
# Job.Date datetime time date
#1 10/4/2018 19:21 2018-04-10 19:21:00 19:21:00 10/04/2018
#2 10/4/2018 19:22 2018-04-10 19:22:00 19:22:00 10/04/2018
#3 10/4/2018 19:23 2018-04-10 19:23:00 19:23:00 10/04/2018
You can remove the datetime column later if not needed.
data
df <- data.frame(Job.Date = c("10/4/2018 19:21", "10/4/2018 19:22",
"10/4/2018 19:23"))
If one needs to do it the hard way:
text<-"10/4/2018 19:21"
res<-strsplit(text," ")
df$Date<-res[[1]][1]
df$Time<-res[[1]][2]
#install.packages("lubridate")
df$Date<-lubridate::mdy(df$Date)
df$Time<-lubridate::hm(df$Time)
You can get the time and date without using any packages as:
df$Time<-format(strptime(res[[1]][2],"%H:%M",tz=""),"%H:%M") #cleaner output
df$Date<- as.Date(res[[1]][1],"%m/%d/%Y")
Result using lubridate:
month item sales Date Time
1 1 A 10 2018-10-04 19H 21M 0S
2 2 b 20 2018-10-04 19H 21M 0S
3 2 c 5 2018-10-04 19H 21M 0S
4 3 a 3 2018-10-04 19H 21M 0S
Data
df<-structure(list(month = c(1, 2, 2, 3), item = structure(c(2L,
3L, 4L, 1L), .Label = c("a", "A", "b", "c"), class = "factor"),
Time = new("Period", .Data = c(0, 0, 0, 0), year = c(0, 0,
0, 0), month = c(0, 0, 0, 0), day = c(0, 0, 0, 0), hour = c(19,
19, 19, 19), minute = c(21, 21, 21, 21))), class = "data.frame", row.names = c(NA,
-4L))
Related
I have a dataset, df, The Date column consists of dates from December and January. I would like to filter and make a new dataset with dates only from January onward.
Date ID
12/20/2019 1:00:01 AM A
12/30/2019 2:00:02 AM B
01/01/2020 1:00:00 AM C
02/05/2020 2:00:05 AM D
I would like this:
Date ID
01/01/2020 1:00:00 AM C
02/05/2020 2:00:05 AM D
Can I use dplyr with this? or Base R
library(lubridate)
library(tidyverse)
filter(Date) >= 01-01-2020 ?
dput is
structure(list(Date = structure(c(2L, 3L, 1L, 4L), .Label = c("1/1/2020 1:00:00 AM",
"12/20/2019 1:00:01 AM", "12/30/2019 2:00:02 AM", "2/5/2020 2:00:05 AM"
), class = "factor"), ID = structure(1:4, .Label = c("A", "B",
"C", "D"), class = "factor")), class = "data.frame", row.names = c(NA,
-4L))
Maybe just filter on year and select datest from 2020?
library(dplyr)
library(lubridate)
df %>% mutate(Date = mdy_hms(Date)) %>% filter(year(Date) >= 2020)
# Date ID
#1 2020-01-01 01:00:00 C
#2 2020-02-05 02:00:05 D
Or using base R :
subset(transform(df, Date = as.POSIXct(Date, format = "%m/%d/%Y %I:%M:%S %p")),
as.integer(format(Date, "%Y")) >= 2020)
We can use subset with strptime in base R
subset(df1, strptime(Date, "%m/%d/%Y %I:%M:%S %p")$year + 1900 >=2020)
# Date ID
#3 1/1/2020 1:00:00 AM C
#4 2/5/2020 2:00:05 AM D
I have a dataset, df1, I would like to convert all the values from the 24 hour clock to UTC.
Date Name
1/2/2020 16:46 A
1/2/2020 16:51 B
I Would like
Date Name
1/2/2020 4:46:47 PM A
1/2/2020 4:51:44 PM B
I have tried:
df$Date<- format(df$Date, "%m/%d/%Y %I:%M:%S %p")
dput:
structure(list(Date = structure(1:2, .Label = c("1/2/2020 16:46",
"1/2/2020 16:51"), class = "factor"), Name = structure(1:2, .Label = c("A",
"B"), class = "factor")), class = "data.frame", row.names = c(NA,
-2L))
You can first convert the data to POSIXct format and then use format to get data in the required format.
df$Date <- format(as.POSIXct(df$Date, format = "%m/%d/%Y %H:%M"),
"%m/%d/%Y %I:%M:%S %p")
#Can also use mdy_hm from lubridate
#df$Date <- format(lubridate::mdy_hm(df$Date), "%m/%d/%Y %I:%M:%S %p")
df
# Date Name
#1 01/02/2020 04:46:00 PM A
#2 01/02/2020 04:51:00 PM B
Assuming you want to actually convert a string in one format to a string in another format rather than having it as a (more useful) actual date/time, you can use a little arithmetic and string chopping along with mapply:
splits <- strsplit(as.character(df$Date), " |:")
Hours <- as.numeric(sapply(splits, `[`, 2))
AMPM <- c(" AM", " PM")[Hours %/% 12 + 1]
Hours <- Hours %% 13 + Hours %/% 13
df$Date <- mapply(function(x, y, z) paste0(x[1], " ", y, ":", x[3], z), splits, Hours, AMPM)
df
#> Date Name
#> 1 1/2/2020 4:46 PM A
#> 2 1/2/2020 4:51 PM B
Created on 2020-02-26 by the reprex package (v0.3.0)
Assuming the same assumptions as the previous answer by Allan, here is another way of converting from 24 hour to 12 hour.
library(tidyverse)
library(lubridate)
df <- tibble(
date = c(ymd_hms("2020/01/02 16:46:00", "2020/01/02 16:51:00", tz = "UTC")),
name = c("A", "B")
)
df %>%
mutate(date_hour = hour(date),
am_pm = if_else(date_hour > 12, "PM", "AM"),
date_hour = if_else(date_hour > 12, date_hour - 12, date_hour - 0),
newdatetime = paste0(date(date), " ", date_hour , ":", minute(date), " ", am_pm)) %>%
select(-c(date_hour, am_pm))
df
# A tibble: 2 x 3
date name newdatetime
<dttm> <chr> <chr>
1 2020-01-02 16:46:00 A 2020-01-02 4:46 PM
2 2020-01-02 16:51:00 B 2020-01-02 4:51 PM
Hope this helps!
I have 3 data frames, df1 = a time interval, df2 = list of IDs, df3 = list of IDs with associated date.
df1 <- structure(list(season = structure(c(2L, 1L), .Label = c("summer",
"winter"), class = "factor"), mindate = structure(c(1420088400,
1433131200), class = c("POSIXct", "POSIXt")), maxdate = structure(c(1433131140,
1448945940), class = c("POSIXct", "POSIXt")), diff = structure(c(150.957638888889,
183.040972222222), units = "days", class = "difftime")), .Names = c("season",
"mindate", "maxdate", "diff"), row.names = c(NA, -2L), class = "data.frame")
df2 <- structure(list(ID = c(23796, 23796, 23796)), .Names = "ID", row.names = c(NA,
-3L), class = "data.frame")
df3 <- structure(list(ID = c("23796", "123456", "12134"), time = structure(c(1420909920,
1444504500, 1444504500), class = c("POSIXct", "POSIXt"), tzone = "US/Eastern")), .Names = c("ID",
"time"), row.names = c(NA, -3L), class = "data.frame")
The code should compare if df2$ID == df3$ID. If true, and if df3$time >= df1$mindate and df3$time <= df1$maxdate, then df1$maxdate - df3$time, else df1$maxdate - df1$mindate. I tried using the ifelse function. This works when i manually specify specific cells, but this is not what i want as I have many more (uneven rows) for each of the dfs.
df1$result <- ifelse(df2[1,1] == df3[1,1] & df3[1,2] >= df1$mindate & df3[1,2] <= df1$maxdate,
difftime(df1$maxdate,df3[1,2],units="days"),
difftime(df1$maxdate,df1$mindate,units="days")
EDIT: The desired output is (when removing last row of df2):
season mindate maxdate diff result
1 winter 2015-01-01 2015-05-31 23:59:00 150.9576 days 141.9576
2 summer 2015-06-01 2015-11-30 23:59:00 183.0410 days 183.0410
Any ideas? I don't see how I could merge dfs to make them of the same length. Note that df2 can be of any row length and not affect the code. Issues arise when df1 and df3 differ in # of rows.
The > and < are vectorized:
transform(df1,result=ifelse(df3$ID%in%df2$ID & df3$time>mindate & df3$time <maxdate, difftime(maxdate,df3$time),difftime(maxdate,mindate)))
season mindate maxdate diff result
1 winter 2014-12-31 21:00:00 2015-05-31 20:59:00 150.9576 days 141.9576
2 summer 2015-05-31 21:00:00 2015-11-30 20:59:00 183.0410 days 183.0410
You can also use the between function from data.table library
library(data.table)
transform(df1,result=ifelse(df3$ID%in%df2$ID&df3$time%between%df1[2:3],
difftime(maxdate,df3$time),difftime(maxdate,mindate)))
season mindate maxdate diff result
1 winter 2014-12-31 21:00:00 2015-05-31 20:59:00 150.9576 days 141.9576
2 summer 2015-05-31 21:00:00 2015-11-30 20:59:00 183.0410 days 183.0410
I need to gather and transpose the data in data frame, hourly values should be in one column. First column should be date with hour and second transposed hourly values.
Sample of data:
structure(list(Year = c(2016L, 2016L), JDay = 1:2, Hour_1 = c(2.59,
5.95), Hour_2 = c(2.19, 5.84), Hour_3 = c(1.84, 5.75), Hour_4 = c(1.51,
5.66), Hour_5 = c(1.21, 5.58), Hour_6 = c(0.94, 5.5), Hour_7 = c(0.69,
5.43), Hour_8 = c(0.45, 5.37), Hour_9 = c(0.23, 5.31), Hour_10 = c(2.18,
6.19), Hour_11 = c(4.39, 7.16), Hour_12 = c(6.29, 8), Hour_13 = c(7.76,
8.65), Hour_14 = c(8.68, 9.06), Hour_15 = c(9, 9.2), Hour_16 = c(8.68,
9.06), Hour_17 = c(7.76, 8.65), Hour_18 = c(7.8, 8.52), Hour_19 = c(7.21,
7.57), Hour_20 = c(6.85, 6.99), Hour_21 = c(6.59, 6.57), Hour_22 = c(6.39,
6.25), Hour_23 = c(6.22, 5.98), Hour_24 = c(6.08, 5.75)), .Names = c("Year",
"JDay", "Hour_1", "Hour_2", "Hour_3", "Hour_4", "Hour_5", "Hour_6",
"Hour_7", "Hour_8", "Hour_9", "Hour_10", "Hour_11", "Hour_12",
"Hour_13", "Hour_14", "Hour_15", "Hour_16", "Hour_17", "Hour_18",
"Hour_19", "Hour_20", "Hour_21", "Hour_22", "Hour_23", "Hour_24"
), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
Use of gather is just giving me all Hour_1 values in in order...
gather(OP_daily[, c(5:28)], time,temp, Hour_1:Hour_24)
Example output:
date temp
2016-1-1 1:00 2.59
2016-1-1 2:00 2.19
This sound like the gather your are looking for:
df %>%
gather(-c(Year,JDay), key = "Hour", value = "temp") %>%
unite(date,Year,JDay,Hour) %>%
mutate(date=as.POSIXct(date,format='%Y_%j_Hour_%H')) %>%
arrange(date)
date temp
<time> <dbl>
1 2016-01-01 01:00:00 2.592221
2 2016-01-01 02:00:00 2.193009
3 2016-01-01 03:00:00 1.835225
4 2016-01-01 04:00:00 1.511071
5 2016-01-01 05:00:00 1.214767
6 2016-01-01 06:00:00 0.941902
EDIT
To see how many observations per day:
res <- df %>%
gather(-c(Year,JDay), key = "Hour", value = "temp") %>%
unite(date,Year,JDay,Hour) %>%
mutate(date=as.POSIXct(date,format='%Y_%j_Hour_%H',tz = "GMT")) %>%
arrange(date)
res%>%
mutate(date_only=as.Date(date))%>%
group_by(date_only)%>%
summarise(count=n())
date_only count
<date> <int>
1 2016-01-01 23
2 2016-01-02 24
3 2016-01-03 1
I'm trying to use dplyr to have the variables that are factors be represented by their values after importing a SPSS dataset using haven.
Two questions:
1) how can I loop over the columns in the dataframe containing labels over the imported dataset using dplyr?
u<-which(sapply(i,function(x) !is.null(attr(x,"labels"))))
n<-mutate_each(i,(as_factor),... = u)
2) how can I set the correct date after importing .sav file from SPSS. i$e3 is a date, but I'm uncertain how I can convert it to proper r-lingo.
Dataset:
> dput(i)
structure(list(e = structure(c(1, 1, 2, 2, 1), label = "Sex", class = c("labelled",
"numeric"), labels = structure(c(1, 2), .Names = c("Male", "Female"
))), e2 = structure(c(3, 3, 3, 3, 3), label = "The time from injury to surgery", class = c("labelled",
"numeric"), labels = structure(c(1, 2, 3), .Names = c("< 12 hours",
"12 to 24 hours", "> 24 hours"))), e3 = structure(c(13254624000,
13431139200, 13437360000, 13493174400, 13233369600), label = "Surgery Date")), .Names = c("e",
"e2", "e3"), row.names = c(NA, -5L), class = "data.frame")
I'm not sure how to adjust your dates properly (you can change the / 10 to / 100 or 1000). You could do this with base r:
i <- structure(list(e = structure(c(1, 1, 2, 2, 1), label = "Sex", class = c("labelled",
"numeric"), labels = structure(c(1, 2), .Names = c("Male", "Female"
))), e2 = structure(c(3, 3, 3, 3, 3), label = "The time from injury to surgery", class = c("labelled",
"numeric"), labels = structure(c(1, 2, 3), .Names = c("< 12 hours",
"12 to 24 hours", "> 24 hours"))), e3 = structure(c(13254624000,
13431139200, 13437360000, 13493174400, 13233369600), label = "Surgery Date")), .Names = c("e",
"e2", "e3"), row.names = c(NA, -5L), class = "data.frame")
i$e3 <- as.POSIXct(i$e3 / 10, origin = '1970-01-01')
# e e2 e3
# 1 1 3 2012-01-01 19:00:00
# 2 1 3 2012-07-24 03:12:00
# 3 2 3 2012-07-31 08:00:00
# 4 2 3 2012-10-03 22:24:00
# 5 1 3 2011-12-08 04:36:00
i <- setNames(i, sapply(i, function(x) attr(x, 'label')))
i[] <- lapply(i, function(x) {
if (!is.null(lab <- attr(x, 'labels')))
names(lab[x])
else x
})
# Sex The time from injury to surgery Surgery Date
# 1 Male > 24 hours 2012-01-01 19:00:00
# 2 Male > 24 hours 2012-07-24 03:12:00
# 3 Female > 24 hours 2012-07-31 08:00:00
# 4 Female > 24 hours 2012-10-03 22:24:00
# 5 Male > 24 hours 2011-12-08 04:36:00