lapply date format to many dataframes - r

I'm having difficulties cycling through a list and apply the same format to the same variable in many data frames:
df1 <- structure(list(datetime = structure(c(1446336120, 1446336180,
1446336240, 1446336300, 1446336360), class = c("POSIXct", "POSIXt"
), tzone = "UTC")), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"), .Names = "datetime")
df2 <- structure(list(datetime = structure(c(1446336120, 1446336180,
1446336240), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"), .Names = "datetime")
I want to apply the same format to all dataframes:
df1$datetime <- format(df1$datetime, "%m/%d/%Y %H:%M:%S")
df2$datetime <- format(df2$datetime, "%m/%d/%Y %H:%M:%S")
I tried this:
list_df <- mget(ls(pattern = "df"))
lapply(seq_along(list_df),
function(i) format(list_df[[i]]$datetime, "%m/%d/%Y %H:%M:%S"))
but not sure how to assign it back to each dataframe.

I think your current approach is not far off, but you never make an assignment back to the data frame. Add each data frame to a list and then use lapply:
lst <- list(df1, df2)
lapply(lst, function(x) {
x$datetime <- format(x$datetime, "%m/%d/%Y %H:%M:%S")
return(x)
})
At this point you have a list of data frames in the format you want. If you then later wanted to export each data frame to a CSV file, you could try this:
for (i in 1:length(lst)) {
filename <- paste0("out", i, ".csv")
write.csv(lst[[i]], file=filename)
}

library(dplyr)
lapply(list_df, function(x) x %>%
mutate(datetime=format(datetime, "%m/%d/%Y %H:%M:%S")))

Related

Add a column in dataframe by datetime conditions of another dataframe

I have the following data example.
first data:
structure(list(cycle_rounded = structure(c(1604188800, 1604190600,
1604192400, 1604194200, 1604196000, 1604197800, 1604199600, 1604201400,
1604203200, 1604205000, 1604206800, 1604208600, 1604210400, 1604212200,
1604214000, 1604215800, 1604217600, 1604219400, 1604221200, 1604223000,
1604224800, 1604226600, 1604228400, 1604230200, 1604232000, 1604233800,
1604235600, 1604237400, 1604239200, 1604241000, 1604242800, 1604244600,
1604246400, 1604248200, 1604250000, 1604251800, 1604253600, 1604255400,
1604257200, 1604259000, 1604260800, 1604262600, 1604264400, 1604266200,
1604268000, 1604269800, 1604271600, 1604273400, 1604275200, 1604277000,
1604278800, 1604280600, 1604282400, 1604284200, 1604286000, 1604287800,
1604289600, 1604291400, 1604293200, 1604295000, 1604296800, 1604298600,
1604300400, 1604302200, 1604304000, 1604305800, 1604307600, 1604309400,
1604311200, 1604313000, 1604314800, 1604316600, 1604318400, 1604320200,
1604322000, 1604323800, 1604325600, 1604327400, 1604329200, 1604331000,
1604332800, 1604334600, 1604336400, 1604338200, 1604340000, 1604341800,
1604343600, 1604345400, 1604347200, 1604349000, 1604350800, 1604352600,
1604354400, 1604356200, 1604358000, 1604359800, 1604361600, 1604363400,
1604365200, 1604367000), tzone = "UTC", class = c("POSIXct",
"POSIXt"))), row.names = c(NA, -100L), class = c("tbl_df", "tbl",
"data.frame"))
second data:
structure(list(data_inicio_dia = structure(c(1604206740, 1604293080,
1604379480), tzone = "UTC", class = c("POSIXct", "POSIXt")),
data_fim_dia = structure(c(1604252160, 1604338560, 1604424960
), tzone = "UTC", class = c("POSIXct", "POSIXt"))), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"))
I would to add a column daynight in the first data, where the categories will day and night. To be day the value in cycle_rounded column has to be between the date_start_day and date_end_day present in the second data otherwise it will be night.
Thanks
library(dplyr)
left_join(
df1 %>% mutate(date = lubridate::as_date(cycle_rounded)),
df2 %>% mutate(date = lubridate::as_date(data_inicio_dia))) %>%
mutate(daynight = if_else(
cycle_rounded %>% between(data_inicio_dia, data_fim_dia), "day", "night"))
This should work. Although, there might be a more elegant way.
tab1 %>%
dplyr::group_by(cycle_rounded) %>%
dplyr::summarise(
daynight = case_when(
cycle_rounded >= (tab2 %>% dplyr::filter(lubridate::as_date(tab2$data_inicio_dia) == lubridate::as_date(cycle_rounded)) %>% dplyr::pull(data_inicio_dia)) &
cycle_rounded <= (tab2 %>% dplyr::filter(lubridate::as_date(tab2$data_inicio_dia) == lubridate::as_date(cycle_rounded)) %>% dplyr::pull(data_fim_dia))
~ "Day",
TRUE ~ "Night"
)
)

How can I transform data from tidy to a unique format?

I have a dataset in which there are dates describing a time period of interest, as well as events ("Tests" in my toy example) that can fall inside or outside the period of the interest. The events also have a time and some dichotomous characteristics.
My collaborator has asked me to transform the data from this format:
structure(list(ID = c(1, 1, 2, 3), StartDate = structure(c(315878400,
315878400, 357696000, 323481600), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), EndDate = structure(c(316137600, 316310400,
357955200, 323654400), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
TestDateTime = structure(c(316135500, 315797700, 357923700,
323422560), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
TestName = c("Test1", "Test2", "Test1", "Test3"), Characteristic = c("Fast",
"Slow", "Fast", "Slow")), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
current state
to this format:
desired state
I am unsure how to accomplish this transformation or set of transformations using R, but I believe it is possible.
try the following
library(dplyr)
data %>%
select(-c(StartDate,EndDate)) %>% # Remove extra columns
tidyr::spread(TestDate, TestTime) %>% # Spread df to long form
select(-Characteristic, everything()) %>% # Move Characteristic to the end of the df
group_by(ID) %>% # Group by ID and
group_split() # split it
Take on count that the date columns of the final df are not exact as the "desire" state.
Hope this can help you.

How to plot layers of tupples on same plot in R?

I am trying to plot the time and NDVI for each region on the same plot. I think to do this I have to convert the date column from characters to time and then plot each layer. However I cannot figure out how to do this. Any thoughts?
list(structure(list(observation = 1L, HRpcode = NA_character_,
timeseries = NA_character_), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(observation = 1:6, time = c("2014-01-01",
"2014-02-01", "2014-03-01", "2014-04-01", "2014-05-01", "2014-06-01"
), ` NDVI` = c("0.3793765496776215", "0.21686891782421552", "0.3785652933528299",
"0.41027240624704164", "0.4035578030242673", "0.341299793064468"
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(observation = 1:6, time = c("2014-01-01",
"2014-02-01", "2014-03-01", "2014-04-01", "2014-05-01", "2014-06-01"
), ` NDVI` = c("0.4071076986818826", "0.09090719657570319", "0.35214166081795284",
"0.4444311032927228", "0.5220702877666005", "0.5732370503295022"
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(observation = 1:6, time = c("2014-01-01",
"2014-02-01", "2014-03-01", "2014-04-01", "2014-05-01", "2014-06-01"
), ` NDVI` = c("0.3412131556625801", "0.18815996897460135", "0.5218904976415136",
"0.6970128777711452", "0.7229657162729096", "0.535967435470161"
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
)))
111
First we need to clean your data. The first element in this list is empty
df = df[-1]
Now we need to make a data.frame
df = do.call(rbind, df)
I am going to add a region variable, change the name of NDVI to remove the space,
change ndvi into a numeric vector, and change time into a Date object
library(dplyr)
df = df %>%
mutate(region = factor(rep(1:3, rep(6, 3)))) %>%
rename(ndvi = ' NDVI') %>%
mutate(ndvi = as.numeric(ndvi)) %>%
mutate(time = as.Date(time))
Now we can use ggplot2 to plot the data by region
library(ggplot2)
g = df %>%
ggplot(aes(x = time, y = ndvi, col = region)) +
geom_line()
g
Which gives this plot:
Here's an approach with lubridate to handle dates and dplyr to make the binding of the data.frames easier to understand.
Note that the group names are taken from the names of the list, and since those don't exist in the data you provided, we have to set them in advance.
library(lubridate)
library(ggplot2)
library(dplyr)
names(data) <- 1:3
data <- bind_rows(data, .id = "group")
data$time <- ymd(data$time)
setnames(data," NDVI","NDVI")
data$NDVI <- as.numeric(data$NDVI)
ggplot(data, aes(x=time,y=NDVI,color=Group)) + geom_line()

Find value of a row by comparing two columns and a value with a range of a different dataset

I have 2 different datasets. One with an object that comes from a StationX and goes to StationY and arrives at a specific date and time as the following.
df1<-structure(list(From = c("Station1", "Station5", "Station6", "Station10"), To = c("Station15", "Station2", "Station2", "Station7"),
Arrival = structure(c(971169720, 971172720, 971178120, 971179620), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA, -4L),class = c("tbl_df","tbl", "data.frame"))
In the Dataset2 are e.g. trucks which wait for the specific object at StationY between the time&date "Arrival" and "Departure" and leave at "Departure to a specifc region "TOID".
As in the following:
df2<-structure(list(TOID = c(2, 4, 7, 20), Station = c("Station15",
"Station2", "Station2","Station7"), Arrival = structure(c(971169600, 971172000, 971177700, 971179500), class = c("POSIXct", "POSIXt"), tzone = "UTC"), Departure1 = structure(c(971170200, 971173200, 971178600, 971179800), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA, -4L), class = c("tbl_df", "tbl", "data.frame"))
I want to look for the TOID in Dataset2 and add it to Dataset1 if "TO"(Dataset1)="Station"(Dataset2) and "Arrival"(Dataset2)<="Arrival"(Dataset1)<="Departure"(Dataset2) and has therefore the following outcome:
df1outcome<-structure(list(From = c("Station1", "Station5", "Station6", "Station10"
), To = c("Station15", "Station2", "Station2", "Station7"), `TO_ID` = c(2, 4, 7, 20), Arrival = structure(c(971169720, 971172720, 971178120, 971179620), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA, -4L), class = c("tbl_df", "tbl", "data.frame"))
I need a solution which looks in dataset2 for the ID that matches the conditions regardless the roworder.
Would be awesome if you guys could help me how to code this in R.
Best,
J
Perhaps you could use tidyverse, use a left_join based on the station, and then filter based on dates:
library(tidyverse)
df1 %>%
left_join(df2, by = c("To" = "Station"), suffix = c("1","2")) %>%
filter(Arrival1 >= Arrival2 & Arrival1 <= Departure1) %>%
select(-c(Arrival2, Departure1))
# A tibble: 4 x 4
From To Arrival1 TOID
<chr> <chr> <dttm> <dbl>
1 Station1 Station15 2000-10-10 09:22:00 2
2 Station5 Station2 2000-10-10 10:12:00 4
3 Station6 Station2 2000-10-10 11:42:00 7
4 Station10 Station7 2000-10-10 12:07:00 20
Im pretty new to R, so this code is probably longer then it should be. But does this work?
#renaming variables so its easier to merge the objects and to compare them
df1 <- df1 %>% rename(Arrival_Package = Arrival)
df2 <- df2 %>% rename(Arrival_Truck = Arrival)
#merge objects
df1outcome <- merge(df1, df2, by.x = "To", by.y = "Station")
#subset from object and select relevant columns
df1outcome <- subset(df1outcome, Arrival_Package <= Departure1)
df1outcome <- subset(df1outcome, Arrival_Truck <= Arrival_Package)
df1outcome <- df1outcome %>% select(From, To, TOID, Arrival_Package)

Convert days to calendar dates within a data frame in R

I have a dataframe like
ID |TRTSDT| TRTEDT
101|17952 | 18037
102|17956 | 18041
How can i convert the days into Date format...Thank you
Try
df1[-1] <- lapply(df1[-1], as.Date, origin='1970-01-01')
data
df1 <- structure(list(ID = 101:102, TRTSDT = c(17952L, 17956L),
TRTEDT = c(18037L,
18041L)), .Names = c("ID", "TRTSDT", "TRTEDT"), class = "data.frame",
row.names = c(NA, -2L))

Resources