R date-time format in empty data frame - r

I am attempting to set up an empty data frame in R which will be populated by, amongst other things, two date-timestamps in the form of e.g. 21/08/2014 20:51.
This is my code:
eventised <- data.frame(student_id=integer(),
session_id=integer(),
start_ts=as.POSIXct(format = "%d/%m/%Y %H:%M"),
stop_ts=as.POSIXct(format = "%d/%m/%Y %H:%M"),
week=integer(),
macro_process=character(),
micro_process=character(),
stringsAsFactors=FALSE)
raw_events <- read.csv(file="SRL_Concat_ST1_Test_2.csv", header = TRUE, sep=",")
last_sess_ID <- 0
for (row in 1:nrow(raw_events)) {
if(raw_events[row, "SESSION_ID"] != last_sess_ID || row == nrow(raw_events)) {
print(row)
if(row !=1) {
eventised[nrow(eventised)+1,] <- c(r_student_id, r_session_id, r_start_ts, r_stop_ts, r_week, "MAC", "MIC")
# eventised[nrow(eventised)+1,] <- c(r_student_id, r_session_id, r_week, "MAC", "MIC")
}
r_student_id <- raw_events[row, "STUDENT_ID"]
r_session_id <- raw_events[row, "SESSION_ID"]
r_start_ts <- raw_events[row, "TIMESTAMP"]
r_stop_ts <- raw_events[row, "TIMESTAMP"]
r_week <- raw_events[row, "WEEK"]
last_sess_ID <- raw_events[row, "SESSION_ID"]
} else {
r_stop_ts <- raw_events[row, "TIMESTAMP"]
}
I get this error:
Error in inherits(x, "POSIXct") :
argument "x" is missing, with no default
Then later I attempt to do this:
eventised[nrow(eventised)+1,] <- c(r_student_id, r_session_id, r_start_ts, r_stop_ts, r_week, "MAC", "MIC")
I get:
Error in charToDate(x) :
character string is not in a standard unambiguous format
I am probably doing something stupid but I would really appreciate some help.
Thanks in advance,
F
DATA
STUDENT_ID SESSION_ID TIMESTAMP LACTION_TYPE WEEK STUDY_MODE
4 7 11/08/2014 23:08 CONTENT_ACCESS 3 revisiting

This should take care of the datetime format:
df <- data.frame(start_ts=as.POSIXct(character()))

Related

Conversion date stored as characters into date format in R [duplicate]

D <- "06.12.1948" # which is dd.mm.yyyy
as.Date(D, "%d.%m.%y") # convert to date
[1] "2019-12-06" # ????
what is it that I am missing?
Sys.getlocale(category = "LC_ALL")
[1] "LC_COLLATE=German_Austria.1252;LC_CTYPE=German_Austria.1252;LC_MONETARY=German_Austria.1252;LC_NUMERIC=C;LC_TIME=German_Austria.1252"
The format is case-sensitive ("%y" is ambiguous and system dependent, I believe):
as.Date(D, "%d.%m.%Y")
[1] "1948-12-06"
The help topic ?strptime has details:
‘%y’ Year without century (00-99). On input, values 00 to 68 are
prefixed by 20 and 69 to 99 by 19 - that is the behaviour
specified by the 2004 and 2008 POSIX standards, but they do
also say ‘it is expected that in a future version the default
century inferred from a 2-digit year will change’.
To avoid remembering formats of the date we can use packaged solutions.
1) With lubridate
lubridate::dmy(D)
#[1] "1948-12-06"
2) Using anytime
anytime::anydate(D)
#[1] "1948-06-12"
Might be helpful for someone. I have found this function in tutorial "Handling date-times in R" by Cole Beck. The function identifies format of your data.
# FUNCTION guessDateFormat #x vector of character dates/datetimes #returnDates return
# actual dates rather than format convert character datetime to POSIXlt datetime, or
# at least guess the format such that you could convert to datetime
guessDateFormat <- function(x, returnDates = FALSE, tzone = "") {
x1 <- x
# replace blanks with NA and remove
x1[x1 == ""] <- NA
x1 <- x1[!is.na(x1)]
if (length(x1) == 0)
return(NA)
# if it's already a time variable, set it to character
if ("POSIXt" %in% class(x1[1])) {
x1 <- as.character(x1)
}
dateTimes <- do.call(rbind, strsplit(x1, " "))
for (i in ncol(dateTimes)) {
dateTimes[dateTimes[, i] == "NA"] <- NA
}
# assume the time part can be found with a colon
timePart <- which(apply(dateTimes, MARGIN = 2, FUN = function(i) {
any(grepl(":", i))
}))
# everything not in the timePart should be in the datePart
datePart <- setdiff(seq(ncol(dateTimes)), timePart)
# should have 0 or 1 timeParts and exactly one dateParts
if (length(timePart) > 1 || length(datePart) != 1)
stop("cannot parse your time variable")
timeFormat <- NA
if (length(timePart)) {
# find maximum number of colons in the timePart column
ncolons <- max(nchar(gsub("[^:]", "", na.omit(dateTimes[, timePart]))))
if (ncolons == 1) {
timeFormat <- "%H:%M"
} else if (ncolons == 2) {
timeFormat <- "%H:%M:%S"
} else stop("timePart should have 1 or 2 colons")
}
# remove all non-numeric values
dates <- gsub("[^0-9]", "", na.omit(dateTimes[, datePart]))
# sep is any non-numeric value found, hopefully / or -
sep <- unique(na.omit(substr(gsub("[0-9]", "", dateTimes[, datePart]), 1, 1)))
if (length(sep) > 1)
stop("too many seperators in datePart")
# maximum number of characters found in the date part
dlen <- max(nchar(dates))
dateFormat <- NA
# when six, expect the century to be omitted
if (dlen == 6) {
if (sum(is.na(as.Date(dates, format = "%y%m%d"))) == 0) {
dateFormat <- paste("%y", "%m", "%d", sep = sep)
} else if (sum(is.na(as.Date(dates, format = "%m%d%y"))) == 0) {
dateFormat <- paste("%m", "%d", "%y", sep = sep)
} else stop("datePart format [six characters] is inconsistent")
}else if (dlen == 8) {
if (sum(is.na(as.Date(dates, format = "%Y%m%d"))) == 0) {
dateFormat <- paste("%Y", "%m", "%d", sep = sep)
} else if (sum(is.na(as.Date(dates, format = "%m%d%Y"))) == 0) {
dateFormat <- paste("%m", "%d", "%Y", sep = sep)
} else stop("datePart format [eight characters] is inconsistent")
} else {
stop(sprintf("datePart has unusual length: %s", dlen))
}
if (is.na(timeFormat)) {
format <- dateFormat
} else if (timePart == 1) {
format <- paste(timeFormat, dateFormat)
} else if (timePart == 2) {
format <- paste(dateFormat, timeFormat)
} else stop("cannot parse your time variable")
if (returnDates)
return(as.POSIXlt(x, format = format, tz = tzone))
format
}
# generate some dates
mydates <- format(as.POSIXct(sample(31536000, 20), origin = "2011-01-01", tz = "UTC"), "%m.%d.%Y %H:%M")
mydates
## [1] "02/07/2011 06:51" "11/21/2011 17:03" "09/17/2011 22:42" "02/16/2011 13:45"
## [5] "12/14/2011 19:11" "09/08/2011 09:22" "12/06/2011 14:06" "02/02/2011 11:00"
## [9] "03/27/2011 06:12" "01/05/2011 15:09" "04/15/2011 04:17" "10/20/2011 14:20"
## [13] "11/13/2011 21:46" "02/26/2011 03:24" "12/29/2011 11:02" "03/17/2011 02:24"
## [17] "02/27/2011 13:51" "06/27/2011 08:36" "03/14/2011 10:54" "01/28/2011 14:14"
guessDateFormat(mydates)
[1] "%m.%d.%Y %H:%M"
Lubridate is the best option for this in my opinion. The following will work fine.
`data %>% mutate(date_variable = as.Date(dmy(date_variable)))`
Interestingly though I found dmy() to behave weirdly when as.Date() and dmy() were called in separate steps

How to rename a column in a ts object

I am working with objects of class ts in R.
Is there code I can use to change column names in this kind of an object?
For a data frame, I would use something like this:
Shipper_City <- rename(Shipper_City,"ShipCity_Old" = "ShipCity")
Use the 'colnames()' function
You want to use colnames(), because you're dealing with a matrix in case of a ts object:
colnames(data) <- c("ColName1", "ColName2")
Hope this helps.
Here is a function that I created that might be useful for you.
rename.ts <- function(ts, ...){
if (inherits(ts, "mts")) {
x <- list(...)
old_names <- names(x)
if (all(old_names %in% colnames(ts))) {
id_old_names <- which(old_names %in% colnames(ts))
colnames(ts)[id_old_names] <- unname(unlist(x))
} else {
stop("You must provide valid column names")
}
} else {
stop("You must provide a mts object as argument")
}
ts
}
# Example
mts <- ts(data = mtcars, start = 2013, frequency = 4)
rename.ts(mts, "mpg" = "mpg2", "cyl" = "cyl2")

How to take value from one column and store it in newly created column using function call

firstly sorry if this is a stupid question ... I am learning R, and really dont have too much experience
I have following function in R programming language, that is taking value and returning value.
dec2binSingle <- function(decimal) {
print(decimal)
binaryValue <- ""
index <- 0
decimal <- as.numeric(decimal)
while(decimal != 0) {
print(decimal)
temp <- as.numeric(decimal) %% 2
if (temp == 1) {
binaryValue <- paste("1", binaryValue, sep="", collapse = NULL)
decimal <- decimal - 1
} else {
binaryValue <- paste("0", binaryValue, sep="", collapse = NULL)
}
index <- index + 1
decimal <- decimal / 2
}
return(binaryValue)
}
The function is converting decimal number into binary equivalent.
When I try to call the function, the function completes without any error, but when I try to see the data, the following error appears:
Error in View : 'names' attribute [200] must be the same length as the vector [1]
And this is the way, how the function is being called:
test_function <- function(value1) {return(dec2binSingle(as.numeric(unlist(value1))))}
data_example$tv <- with(data_example, test_function(data_example[which(colnames(data_example) == "numbers")]))
Any help is appreciated... thanks
EDIT:
I called the function for single value and it works as expected.
> dec2binSingle(23)
[1] "10111"
>
I hope this is what you wanted to achieve with your code.
#sample data
df <- data.frame(char1=c("abc","def","xyz"), num1=c(1,34,12), num2=c(34,20,8))
df
#function to convert decimal into binary
bin_func <- function(x) {gsub("^0+","",paste(rev(as.numeric(intToBits(x))), collapse=""))}
#verify which all columns are numeric
num_col <- sapply(df,is.numeric)
df1 <- as.data.frame(lapply(df[,num_col], FUN = function(x) {sapply(x, FUN = bin_func)}))
names(df1) <- paste(names(df1),"_converted",sep="")
#final dataframe having original as well as converted columns
df <- cbind(df,df1)
df
Please don't forget to let us know if it helped :)

How to handle "subscript out of bounds" error inside a function? in R

I have a dataframe like this
Datetime <- c("2016-03-03 05:30:13", "2015-03-02 12:45:00", "2016-03-01 02:53:20", "2016-02-28 03:22:18",
"2016-03-02 09:42:10", "2016-03-01 20:55:50", "2016-02-28 21:14:10", "2016-02-26 05:42:16",
"2016-03-02 08:31:15", "2016-03-02 09:13:10", "2016-03-01 00:45:14", "2016-02-26 05:56:00",
"2016-02-02 13:44:00", "2016-02-02 14:41:20", "2016-02-01 15:33:10", "2016-01-25 04:24:00",
"2016-03-02 17:24:12", "2016-03-01 17:28:16", "2016-02-28 18:22:34", "2016-02-27 02:34:31")
ID <- c("A","A","A","A","B","B","B","B","C","C","C","C","D","D","D","D","E","E","E","E")
PT <- c(27,35,38,22,35,39,7,15,37,25,38,42,45,19,17,25,32,35,39,26)
df <- data.frame(Datetime,ID,PT)
df$Datetime <- as.POSIXct(df$Datetime,format="%Y-%m-%d %H:%M:%S")
I use these lines to set the datetimes that I want to subset
today <- as.POSIXlt(Sys.time())
today$mday <- today$mday
today$hour = "6"
today$min = "30"
today$sec = "0"
Back2weeks <- today
Back1day <- today
Back2weeks$mday <- Back2weeks$mday-14
Back1day$mday <- Back1day$mday-1
I am trying to use functions to create ggplots to output plots automatically for every ID and I do it this way
plots <- function(a) {
if (nrow(a)>0) {
ggplot(data = a,aes(x=Datetime,y=PT)) + geom_line(size = 0.5) + geom_point(size=3)
}
}
lss<-list()
plot_lss<-list()
UniqueID <- summaryBy(ID~ID, data = df, FUN = function(x) { c(n = length(x)) } )
for (j in 1:(nrow(UniqueID)))
{
cat(j)
id <- as.character(UniqueID$ID[[j]])
df1 <- subset(df, ID == id)
df2 <- subset(df1, df1$Datetime>Back2weeks & df1$Datetime<today)
lss[[j]] <- df2
plotdata <- subset(df2, ID == id)
plot_lss[[j]] <- plots(plotdata)
print(plot_lss[[j]])
Sys.sleep(0)
}
While this runs for the first 3 plots, the 4th plot doesn't show up since the datetime for the ID is out of my subset range. Now the function stops here and throws an error saying "Error in plot_lss[[j]] : subscript out of bounds", which is fine but I want the function to continue running and output the 5th plot which has a datetime within range. How would I make the function to keep running till the end and prevent exiting out of the loop in this case? Kindly please provide your thoughts in this.
Perhaps
plotdata <- subset(df2, ID == id)
if(nrow(plotdata) == 0)
next
is enough?

Convert string to date, format: "dd.mm.yyyy"

D <- "06.12.1948" # which is dd.mm.yyyy
as.Date(D, "%d.%m.%y") # convert to date
[1] "2019-12-06" # ????
what is it that I am missing?
Sys.getlocale(category = "LC_ALL")
[1] "LC_COLLATE=German_Austria.1252;LC_CTYPE=German_Austria.1252;LC_MONETARY=German_Austria.1252;LC_NUMERIC=C;LC_TIME=German_Austria.1252"
The format is case-sensitive ("%y" is ambiguous and system dependent, I believe):
as.Date(D, "%d.%m.%Y")
[1] "1948-12-06"
The help topic ?strptime has details:
‘%y’ Year without century (00-99). On input, values 00 to 68 are
prefixed by 20 and 69 to 99 by 19 - that is the behaviour
specified by the 2004 and 2008 POSIX standards, but they do
also say ‘it is expected that in a future version the default
century inferred from a 2-digit year will change’.
To avoid remembering formats of the date we can use packaged solutions.
1) With lubridate
lubridate::dmy(D)
#[1] "1948-12-06"
2) Using anytime
anytime::anydate(D)
#[1] "1948-06-12"
Might be helpful for someone. I have found this function in tutorial "Handling date-times in R" by Cole Beck. The function identifies format of your data.
# FUNCTION guessDateFormat #x vector of character dates/datetimes #returnDates return
# actual dates rather than format convert character datetime to POSIXlt datetime, or
# at least guess the format such that you could convert to datetime
guessDateFormat <- function(x, returnDates = FALSE, tzone = "") {
x1 <- x
# replace blanks with NA and remove
x1[x1 == ""] <- NA
x1 <- x1[!is.na(x1)]
if (length(x1) == 0)
return(NA)
# if it's already a time variable, set it to character
if ("POSIXt" %in% class(x1[1])) {
x1 <- as.character(x1)
}
dateTimes <- do.call(rbind, strsplit(x1, " "))
for (i in ncol(dateTimes)) {
dateTimes[dateTimes[, i] == "NA"] <- NA
}
# assume the time part can be found with a colon
timePart <- which(apply(dateTimes, MARGIN = 2, FUN = function(i) {
any(grepl(":", i))
}))
# everything not in the timePart should be in the datePart
datePart <- setdiff(seq(ncol(dateTimes)), timePart)
# should have 0 or 1 timeParts and exactly one dateParts
if (length(timePart) > 1 || length(datePart) != 1)
stop("cannot parse your time variable")
timeFormat <- NA
if (length(timePart)) {
# find maximum number of colons in the timePart column
ncolons <- max(nchar(gsub("[^:]", "", na.omit(dateTimes[, timePart]))))
if (ncolons == 1) {
timeFormat <- "%H:%M"
} else if (ncolons == 2) {
timeFormat <- "%H:%M:%S"
} else stop("timePart should have 1 or 2 colons")
}
# remove all non-numeric values
dates <- gsub("[^0-9]", "", na.omit(dateTimes[, datePart]))
# sep is any non-numeric value found, hopefully / or -
sep <- unique(na.omit(substr(gsub("[0-9]", "", dateTimes[, datePart]), 1, 1)))
if (length(sep) > 1)
stop("too many seperators in datePart")
# maximum number of characters found in the date part
dlen <- max(nchar(dates))
dateFormat <- NA
# when six, expect the century to be omitted
if (dlen == 6) {
if (sum(is.na(as.Date(dates, format = "%y%m%d"))) == 0) {
dateFormat <- paste("%y", "%m", "%d", sep = sep)
} else if (sum(is.na(as.Date(dates, format = "%m%d%y"))) == 0) {
dateFormat <- paste("%m", "%d", "%y", sep = sep)
} else stop("datePart format [six characters] is inconsistent")
}else if (dlen == 8) {
if (sum(is.na(as.Date(dates, format = "%Y%m%d"))) == 0) {
dateFormat <- paste("%Y", "%m", "%d", sep = sep)
} else if (sum(is.na(as.Date(dates, format = "%m%d%Y"))) == 0) {
dateFormat <- paste("%m", "%d", "%Y", sep = sep)
} else stop("datePart format [eight characters] is inconsistent")
} else {
stop(sprintf("datePart has unusual length: %s", dlen))
}
if (is.na(timeFormat)) {
format <- dateFormat
} else if (timePart == 1) {
format <- paste(timeFormat, dateFormat)
} else if (timePart == 2) {
format <- paste(dateFormat, timeFormat)
} else stop("cannot parse your time variable")
if (returnDates)
return(as.POSIXlt(x, format = format, tz = tzone))
format
}
# generate some dates
mydates <- format(as.POSIXct(sample(31536000, 20), origin = "2011-01-01", tz = "UTC"), "%m.%d.%Y %H:%M")
mydates
## [1] "02/07/2011 06:51" "11/21/2011 17:03" "09/17/2011 22:42" "02/16/2011 13:45"
## [5] "12/14/2011 19:11" "09/08/2011 09:22" "12/06/2011 14:06" "02/02/2011 11:00"
## [9] "03/27/2011 06:12" "01/05/2011 15:09" "04/15/2011 04:17" "10/20/2011 14:20"
## [13] "11/13/2011 21:46" "02/26/2011 03:24" "12/29/2011 11:02" "03/17/2011 02:24"
## [17] "02/27/2011 13:51" "06/27/2011 08:36" "03/14/2011 10:54" "01/28/2011 14:14"
guessDateFormat(mydates)
[1] "%m.%d.%Y %H:%M"
Lubridate is the best option for this in my opinion. The following will work fine.
`data %>% mutate(date_variable = as.Date(dmy(date_variable)))`
Interestingly though I found dmy() to behave weirdly when as.Date() and dmy() were called in separate steps

Resources