finding closest date (but not the same date) in R - r

To find the closest date, I have:
closestDate <- function(searchDate, dateList, roundDown=FALSE) {
as.Date(sapply(as.Date(searchDate), function(x){
dist <- abs(x - as.Date(dateList))
closest <- dateList[which(min(dist) == dist)]
return(ifelse(roundDown, min(closest), max(closest)))
}), origin="1970-1-1")
}
When:
> nonNAdays
[1] "2011-08-15" "2011-08-18" "2011-08-19"
I get:
> closestDate('2011-08-15', nonNAdays)
[1] "2011-08-15"
I would like for the function to give me the closest date other than the date itself. So in this case, "2011-08-18". How can I alter my code to get this?
Thanks.

Just remove the dates that are equal from the dist calculations and from the selection operation:
closestDate <- function(searchDate, dateList, roundDown=FALSE) {
as.Date(sapply(as.Date(searchDate), function(x){
dist <- abs(x - as.Date(dateList[dateList != searchDate]))
closest <- dateList[dateList != searchDate][which(min(dist) == dist)]
return(ifelse(roundDown, min(closest), max(closest)))
}), origin="1970-1-1")
}
nonNAdays <- c("2011-08-15", "2011-08-18", "2011-08-19")
closestDate('2011-08-15', nonNAdays)
#[1] "2011-08-18"

Related

How do I apply this for loop over my character vector and output another vector?

The intention of my code is to concatenate the values from the vector v. To do this, I created a function concat with two arguments vector, SID. But for reasons I don't understand,
#A character vector to which other strings will be appended
v <- c("R_2wmKOSbPWHl4VtT2","R_2TtslLEVNeHs2r73","R_ZF79IJ60LaxxsuR4","R_3JJDUkrZ07eIwnh5","R_3JrWuv9fsLK6qNx6")
concat <- function(vector,SID){
decrement_append <- "&decrementQuotas=true"
SID_append <- "?surveyId="
for(i in 1:length(vector)){
out[i] <- paste0(v[i],SID_append,SID,decrement_append)
}
out[i]
}
And:
concat(vector = v,
SID = "SV_55tYjKDRKYTRNIh")
When I run this, I get:
Error in concat(vector = v, SID = "SV_55tYjKDRKYTRNIh") :
object 'out' not found
I've tried it a couple of other ways, such as:
concat <- function(vector,SID){
decrement_append <- "&decrementQuotas=true"
SID_append <- "?surveyId="
new_vector <- for(i in 1:length(vector)){
out[i] <- paste0(v[i],SID_append,SID,decrement_append)
}
new_vector
}
But I'm getting the same error.
The out is not initialized in the function
concat <- function(vector,SID){
out <- character(length(vector))
decrement_append <- "&decrementQuotas=true"
SID_append <- "?surveyId="
for(i in 1:length(vector)){
out[i] <- paste0(v[i],SID_append,SID,decrement_append)
}
out
}
-testing
> concat(v, "SV_55tYjKDRKYTRNIh")
[1] "R_2wmKOSbPWHl4VtT2?surveyId=SV_55tYjKDRKYTRNIh&decrementQuotas=true" "R_2TtslLEVNeHs2r73?surveyId=SV_55tYjKDRKYTRNIh&decrementQuotas=true"
[3] "R_ZF79IJ60LaxxsuR4?surveyId=SV_55tYjKDRKYTRNIh&decrementQuotas=true" "R_3JJDUkrZ07eIwnh5?surveyId=SV_55tYjKDRKYTRNIh&decrementQuotas=true"
[5] "R_3JrWuv9fsLK6qNx6?surveyId=SV_55tYjKDRKYTRNIh&decrementQuotas=true"
paste/paste0 are vectorized. So, looping is not really needed
concat2 <- function(vector,SID){
decrement_append <- "&decrementQuotas=true"
SID_append <- "?surveyId="
paste0(v, SID_append,SID,decrement_append)
}
-testing
> concat2(v, "SV_55tYjKDRKYTRNIh")
[1] "R_2wmKOSbPWHl4VtT2?surveyId=SV_55tYjKDRKYTRNIh&decrementQuotas=true" "R_2TtslLEVNeHs2r73?surveyId=SV_55tYjKDRKYTRNIh&decrementQuotas=true"
[3] "R_ZF79IJ60LaxxsuR4?surveyId=SV_55tYjKDRKYTRNIh&decrementQuotas=true" "R_3JJDUkrZ07eIwnh5?surveyId=SV_55tYjKDRKYTRNIh&decrementQuotas=true"
[5] "R_3JrWuv9fsLK6qNx6?surveyId=SV_55tYjKDRKYTRNIh&decrementQuotas=true"

Convert time don't work on a column's dataframe

I have some data from event producer. In a "created_at column I have mixed type of datetime value.
Some NA, some ISO8601 like, some POSIX with and without millisec.
I build a func that should take care of everything meanning let's NA and ISO8601 info as it is, and convert POSIX date to ISO8601.
library(anytime)
convert_time <- function(x) {
nb_char = nchar(x)
if (is.na(x)) return(x)
else if (nb_char == 10 | nb_char == 13) {
num_x = as.numeric(x)
if (nb_char == 13) {
num_x = round(num_x / 1000, 0)
}
return(anytime(num_x))
}
return(x)
}
If I passe one problematic value
convert_time("1613488656")
"2021-02-16 15:17:36 UTC"
Works well !
Now
df_offer2$created_at = df_offer2$created_at %>% sapply(convert_time)
I still have the problematic values.
Any tips here ?
I would suggest the following small changes...
convert_time <- function(x) {
nb_char = nchar(x)
if (is.na(x)) return(x)
else if (nb_char == 10 | nb_char == 13) {
num_x = as.numeric(x)
if (nb_char == 13) {
num_x = round(num_x / 1000, 0)
}
return(num_x) #remove anytime from here
}
return(x)
}
df_offer2$created_at = df_offer2$created_at %>%
sapply(convert_time) %>% anytime() #put it back in at this point
Two things that have worked for me:
col1<-seq(from=1,to=10)
col2<-rep("1613488656",10)
df <- data.frame(cbind(col1,col2))
colnames(df)<-c("index","created_at")
df <- df%>%
mutate(converted = convert_time(df$created_at))`
alternatively
col1<-seq(from=1,to=10)
col2<-rep("1613488656",10)
df <- data.frame(cbind(col1,col2))
colnames(df)<-c("index","created_at")
df$created_at <- convert_time(df$created_at)
Both spit out warnings but appear to make the correction properly

How to take value from one column and store it in newly created column using function call

firstly sorry if this is a stupid question ... I am learning R, and really dont have too much experience
I have following function in R programming language, that is taking value and returning value.
dec2binSingle <- function(decimal) {
print(decimal)
binaryValue <- ""
index <- 0
decimal <- as.numeric(decimal)
while(decimal != 0) {
print(decimal)
temp <- as.numeric(decimal) %% 2
if (temp == 1) {
binaryValue <- paste("1", binaryValue, sep="", collapse = NULL)
decimal <- decimal - 1
} else {
binaryValue <- paste("0", binaryValue, sep="", collapse = NULL)
}
index <- index + 1
decimal <- decimal / 2
}
return(binaryValue)
}
The function is converting decimal number into binary equivalent.
When I try to call the function, the function completes without any error, but when I try to see the data, the following error appears:
Error in View : 'names' attribute [200] must be the same length as the vector [1]
And this is the way, how the function is being called:
test_function <- function(value1) {return(dec2binSingle(as.numeric(unlist(value1))))}
data_example$tv <- with(data_example, test_function(data_example[which(colnames(data_example) == "numbers")]))
Any help is appreciated... thanks
EDIT:
I called the function for single value and it works as expected.
> dec2binSingle(23)
[1] "10111"
>
I hope this is what you wanted to achieve with your code.
#sample data
df <- data.frame(char1=c("abc","def","xyz"), num1=c(1,34,12), num2=c(34,20,8))
df
#function to convert decimal into binary
bin_func <- function(x) {gsub("^0+","",paste(rev(as.numeric(intToBits(x))), collapse=""))}
#verify which all columns are numeric
num_col <- sapply(df,is.numeric)
df1 <- as.data.frame(lapply(df[,num_col], FUN = function(x) {sapply(x, FUN = bin_func)}))
names(df1) <- paste(names(df1),"_converted",sep="")
#final dataframe having original as well as converted columns
df <- cbind(df,df1)
df
Please don't forget to let us know if it helped :)

How to create a chron sequence where time records are equally distributed (consistent delta t)

I have the following functions. CreateChronVector does exactly what it implies. The resulting vector is in hourly intervals by default. The RoundHour function rounds up a chron vector to the hour.
CreateChronVector <- function(chronFrom, chronTo, frequency = "hourly") {
library(chron)
datesFrom <- dates(chronFrom)
timesFrom <- (chronFrom - dates(chronFrom))
datesTo <- dates(chronTo)
timesTo <- (chronTo - dates(chronTo))
if ((timesFrom != 0 || timesTo != 0) && frequency == "daily") {
print("Error: The indicated dates have hour components while the given frequency is daily.")
}
else {
if (timesTo == 0 && frequency == "hourly") {
timesTo <- 23/24
}
if (frequency == "hourly") {
chronFrom <- chron(dates = datesFrom, times = timesFrom,
format = c(dates = "m/d/y", times = "h:m:s"))
chronTo <- chron(dates = datesTo, times = timesTo,
format = c(dates = "m/d/y", times = "h:m:s"))
dateVector <- seq(chronFrom, chronTo, by = 1/24)
}
else if (frequency == "daily") {
dateVector <- seq(datesFrom, datesTo)
}
return(dateVector)
}
}
RoundHour <- function(x) {
res <- trunc(x,'hours', eps=1e-17)
res <- ifelse((x-res) > 0.5/24, res+1/24, res)
return(as.chron(res))
}
The problem I'm facing is that the intervals are not consistent. As an example, the code below returns two different interval sizes:
unique(diff(CreateChronVector(as.chron('2010-01-01'), as.chron('2010-01-01'))))
Similarly, using my rounding function does not correct the problem:
unique(diff(RoundHour(CreateChronVector(as.chron('2010-01-01'), as.chron('2010-01-01')))))
I'm sure this problem has to do with round-off errors. I have been trying to play with the trunc function and its eps parameter, but no luck.
You can use xts package. Once you have your data in xts object, you can use align.time function to "round up" time index. Almost all the timeseries analysis is very convenient in xts
PS: If you give reproducible example of your data I will update the answer with an example.
Taking the point from #G. Grothendieck, you can see what he is talking about if you try this:
hours <- 1:23
dateVector <- sapply(hours , function(x){ chron( dates = "01/01/10" , times = paste0(x,":00:00") ) } )
head( dateVector )
[1] 14610.04166666666606034 14610.08333333333393966 14610.12500000000000000
[4] 14610.16666666666606034 14610.20833333333393966 14610.25000000000000000
unique(diff(dateVector))
[1] 0.04166666666787932626903 0.04166666666606033686548
So you can't really do it because these numbers can't be represented exactly in floating point, but is there a reason this matters to you?

Find first Tuesday of Month

I am trying to write a function which takes a vector of dates as an input and returns a vector of dates -- where the output is the date of the first Tuesday of the month which matches the input date.
So 2012-11-19 --> 2012-11-06, etc.
I have had some success with a single date, but have not been able to generalise to the vector case. Could someone please help?
This is what I have so far:
firstTuesday <- function(tt){
ct <- as.POSIXct(tt)
lt <- as.POSIXlt(tt)
firstOf <- as.POSIXlt(ct - 60*60*24* (lt$mday - 1))
if (firstOf$wday > 2)
{
adjDays <- (9 - firstOf$wday)
firstTues <- as.POSIXlt(as.POSIXct(firstOf) + 60*60*24*adjDays)
}
else {
adjDays <- (2 - firstOf$wday)
firstTues <- as.POSIXlt(as.POSIXct(firstOf) + 60*60*24*adjDays)
}
return(firstTues)
}
Which works for a single date: firstTuesday(Sys.Date()) but yielded junk for vectors of dates (due to issues with if not being a vectorised control operator, i think).
I got around my limited understanding by using indexing. The following code seems to do the trick.
firstTuesday <- function(tt){
ct <- as.POSIXct(tt)
lt <- as.POSIXlt(tt)
firstOf <- as.POSIXlt(ct - 60*60*24* (lt$mday - 1))
firstTue <- as.POSIXct(firstOf)
idx <- firstOf$wday > 2
firstTue[idx] <- as.POSIXct(firstOf[idx]) + 60*60*24*(9 - firstOf$wday[idx])
firstTue[!idx] <- as.POSIXct(firstOf[!idx]) + 60*60*24*(2 - firstOf$wday[!idx])
return(firstTue)
}
This uses lubridate and makes the logic a little simpler. Given a vector of dates the second function will return a vector of characters, similar to your input. You can change things around to suit your needs.
library(lubridate)
getTuesday = function(x) {
date = ymd(x)
first = floor_date(date,"month")
dow = sapply(seq(0,6),function(x) wday(first+days(x)))
firstTuesday = first + days(which(dow==3)-1)
return(firstTuesday)
}
getMultipleTuesdays = function(y) {
tmp = lapply(y, getTuesday)
tmp = lapply(tmp, as.character)
return(unlist(tmp))
}
Edit
Sample input/output
getMultipleTuesdays(c("2012-11-19","2012-11-19","2011-01-15"))
[1] "2012-11-06" "2012-11-06" "2011-01-04"
Here's a simple solution using base functions:
firstDayOfMonth <- function(dates, day="Mon", abbreviate=TRUE) {
# first 7 days of month
s <- lapply(as.Date(format(dates,"%Y-%m-01")), seq, by="day", length.out=7)
# first day of month
d <- lapply(s, function(d) d[weekdays(d,abbreviate)==day])
# unlist converts to atomic, so use do.call(c,...) instead
do.call(c, d)
}
Well, maybe the do.call at the end isn't so simple... but it's a handy piece of knowledge. :)
R> d <- as.Date(c("2012-11-19","2012-11-19","2011-01-15"))
R> firstDayOfMonth(d, "Tuesday", FALSE)
[1] "2012-11-06" "2012-11-06" "2011-01-04"

Resources