For-loop index iteration over date vector - r

I know this is probably a stupid question but why do I need to start from 2 instead of one to get this list to loop properly?
date <- seq(as.Date("2021-01-01"), as.Date("2021-12-31"), by="months")
num <- length(date)
for(i in 2:num-1){
print(paste0("i = ",i))
j = i+1
sd <- date[i]
ed <- date[i+1]
print(paste0("start: ",sd))
print(paste0("end: ",ed))
}
Output:
[1] "i = 1"
[1] "start: 2021-05-01"
[1] "end: 2021-06-01"
[1] "i = 2"
[1] "start: 2021-06-01"
[1] "end: 2021-07-01"
[1] "i = 3"
[1] "start: 2021-07-01"
[1] "end: 2021-08-01"
[1] "i = 4"
[1] "start: 2021-08-01"
[1] "end: 2021-09-01"
[1] "i = 5"
[1] "start: 2021-09-01"
[1] "end: 2021-10-01"
But when I start with: for(i in 1:num-1)
It doesn't find the first indexed item properly:
[1] "i = 0"
[1] "start: "
[1] "end: 2021-05-01"
[1] "i = 1"
[1] "start: 2021-05-01"
[1] "end: 2021-06-01"
[1] "i = 2"
[1] "start: 2021-06-01"
[1] "end: 2021-07-01"
[1] "i = 3"
[1] "start: 2021-07-01"
[1] "end: 2021-08-01"
[1] "i = 4"
[1] "start: 2021-08-01"
[1] "end: 2021-09-01"
[1] "i = 5"
[1] "start: 2021-09-01"
[1] "end: 2021-10-01"

Ok So thanks to Andre Wildberg in the comments, I didn't realize there was an implied order of operations within the counter. So this was what worked, using parenthesis to make explicit the desired order of operations:
1:(num-1)
instead of
1:num-1

Related

Function check for (length (list) != 0) always returns length = 0

I am using a UDF in an r-markdown script to plot data from stored lists. When I run my functions body, everything works out just fine, but when I try to execute the function with defined arguments, that I have verified to work in the body, all my logic that checks list length returns FALSE.
Here is an example data set I made:
library(dplyr)
library(plotly)
plotdataframe <- list()
dataframerows <- list()
AllExplVarCoeffs <- list()
AllExplVarNames <- list()
NrmPolcts <- list()
AllExplVarCoeff <- list()
AllExplVarName <- list()
NrmPolct <- list()
for (i in 1:10)
{
for (j in 1:8)
{
for(n in 1:16)
{
if (n == 3 || n == 8 || n == 4 && j == 5 || n == 9 && j == 5)
{
AllExplVarName <-NULL
AllExplVarCoeff <- NULL
NrmPolct <- NULL
}else
{
set.seed(10)
AllExplVarName <-c(1,2,3,4,5,6,7,8,9,10)
AllExplVarCoeff <- runif(10, min=-1, max=1)
NrmPolct <- c(.015,.005,.33,.32,.225,.025,.03,.05,0 ,0)
padvarcnt <-(length(AllExplVarCoeff)-length(NrmPolct))
for (l in seq_len(padvarcnt))
{
padval = l*0
NrmPolCt1 <- c(NrmPolCt,padval)
}
NrmPolct1 <- as.numeric(unlist(NrmPolct))
AllExplVarNames <- cbind(AllExplVarNames,AllExplVarName)
AllExplVarCoeffs <- cbind(AllExplVarCoeffs,AllExplVarCoeff)
NrmPolcts <- cbind(NrmPolcts,NrmPolct1)
dataframerows <- cbind(AllExplVarNames,AllExplVarCoeffs,NrmPolcts)
assign(paste("plotdataframe",i, j, n, sep="_"),cbind(plotdataframe,dataframerows))
plotdataframe <- list()
dataframerows <- list()
AllExplVarCoeffs <- list()
AllExplVarNames <- list()
NrmPolcts <- list()
assign(paste("AllExplVarNames",i, j, n, sep="_"),cbind(AllExplVarNames,AllExplVarName))
assign(paste("AllExplVarCoeffs",i ,j ,n, sep="_"),cbind(AllExplVarCoeffs,AllExplVarCoeff))
assign(paste("NrmPolcts",i, j, n, sep="_"),cbind(NrmPolcts,NrmPolct1))
AllExplVarCoeffs <- list()
AllExplVarNames <- list()
NrmPolcts <- list()
}
}
}
}
The NULL entries represent cases in my real data where the value for "n" would return an empty set.
My function is defined thus (please ignore the ugly plots, i'm pressed for time so I did not make this look nice):
DrawTestExVar <- function(x,j)
{
print(paste("x=",x))
print(paste("j=",j))
StCrPltData <- ls(pattern=paste0("plotdataframe_",x,"_",j))
SCPD <- mget(StCrPltData)
for(n in 1:16)
{ print(paste("n=",n))
print(paste0('plotdataframe_',x,'_',j,'_',n))
if (length(unlist((SCPD[c(paste0('plotdataframe_',x,'_',j,'_',n))]))) != 0)
{
n1=n
j1=j
print(n1)
plot1 <- plot_ly(as.data.frame(SCPD[c(paste0('plotdataframe_',x,'_',j,'_',n1))])) %>%
add_trace(x=~unlist(SCPD[c(paste0('plotdataframe_',x,'_',j,'_',n1))][[1]][,'AllExplVarName']), y=~unlist(SCPD[c(paste0('plotdataframe_',x,'_',j,'_',n1))][[1]][,'AllExplVarCoeff']), yaxis="y", type="scatter", mode="markers", name="Coef Values") %>%
add_bars(x=~unlist(SCPD[c(paste0('plotdataframe_',x,'_',j,'_',n1))][[1]][,'AllExplVarName']), y=~unlist(SCPD[c(paste0('plotdataframe_',x,'_',j,'_',n1))][[1]][,'NrmPolct1']), yaxis="y2", name="% Polcy Count")
print(plot1)
} else
{
print("Next")
}
}
}
but when I execute the function: DrawTestExVar(5,3) I get noting but the length = 0 response:
> DrawTestExVar(5,3)
[1] "x= 5"
[1] "j= 3"
[1] "n= 1"
[1] "plotdataframe_5_3_1"
[1] "Next"
[1] "n= 2"
[1] "plotdataframe_5_3_2"
[1] "Next"
[1] "n= 3"
[1] "plotdataframe_5_3_3"
[1] "Next"
[1] "n= 4"
[1] "plotdataframe_5_3_4"
[1] "Next"
[1] "n= 5"
[1] "plotdataframe_5_3_5"
[1] "Next"
[1] "n= 6"
[1] "plotdataframe_5_3_6"
[1] "Next"
[1] "n= 7"
[1] "plotdataframe_5_3_7"
[1] "Next"
[1] "n= 8"
[1] "plotdataframe_5_3_8"
[1] "Next"
[1] "n= 9"
[1] "plotdataframe_5_3_9"
[1] "Next"
[1] "n= 10"
[1] "plotdataframe_5_3_10"
[1] "Next"
[1] "n= 11"
[1] "plotdataframe_5_3_11"
[1] "Next"
[1] "n= 12"
[1] "plotdataframe_5_3_12"
[1] "Next"
[1] "n= 13"
[1] "plotdataframe_5_3_13"
[1] "Next"
[1] "n= 14"
[1] "plotdataframe_5_3_14"
[1] "Next"
[1] "n= 15"
[1] "plotdataframe_5_3_15"
[1] "Next"
[1] "n= 16"
[1] "plotdataframe_5_3_16"
[1] "Next"
I thought it was relatively straightforward; select the lists from the global environment with names that match the input pattern, then filter out the "n" values that return a length 0 list and plot the remainders in a loop. I have constructed two other functions to plot from stored data that work just fine, so I am confident the issue lies within the if else loop where the issue seems to be stemming from how I defined or am using my arguments. None of my digging into r function documentation has revealed what I am doing wrong though, and none of the attempts I have made to make the calls more explicit have worked.
Thoughts?
Update 1: After printing some troubleshooting statements, it is apparent that the error actually lies within the arguments being read or applied. When I print() the values for x and j when running the DrawTestExVar(5,3), x=5 and j=3, as needed. print()ing the value for n in the loop shows it incrementing appropriately and being applied to the paste commands correctly, but all the print(length(list)) return lengths of 0.
The answer lies within the way function is built and how it accesses data. The list references
StCrPltData <- ls(pattern=paste0("plotdataframe_",x,"_",j))
and
SCPD <- mget(StCrPltData)
need to occur outside of the function so it can actually access the dataframes. Furthermore, StCrPltData can be removed entirely by using its definition in the function call. The function arguments should look like
DrawTestExVar <- function(x,j,SCPD)
and to call the function
DrawTestExVar(5,3,mget(ls(pattern=paste0("plotdataframe_",5,"_",3))))
Running this gives the plots (which I will not show because they are very ugly), and the troubleshooting prints:
> DrawTestExVar(5,3,mget(ls(pattern=paste0("plotdataframe_",5,"_",3))))
[1] "x= 5"
[1] "j= 3"
[1] "n= 1"
[1] "plotdataframe_5_3_1"
[1] 1
[1] "n= 2"
[1] "plotdataframe_5_3_2"
[1] 2
[1] "n= 3"
[1] "plotdataframe_5_3_3"
[1] "Next"
[1] "n= 4"
[1] "plotdataframe_5_3_4"
[1] 4
[1] "n= 5"
[1] "plotdataframe_5_3_5"
[1] 5
[1] "n= 6"
[1] "plotdataframe_5_3_6"
[1] 6
[1] "n= 7"
[1] "plotdataframe_5_3_7"
[1] 7
[1] "n= 8"
[1] "plotdataframe_5_3_8"
[1] "Next"
[1] "n= 9"
[1] "plotdataframe_5_3_9"
[1] 9
[1] "n= 10"
[1] "plotdataframe_5_3_10"
[1] 10
[1] "n= 11"
[1] "plotdataframe_5_3_11"
[1] 11
[1] "n= 12"
[1] "plotdataframe_5_3_12"
[1] 12
[1] "n= 13"
[1] "plotdataframe_5_3_13"
[1] 13
[1] "n= 14"
[1] "plotdataframe_5_3_14"
[1] 14
[1] "n= 15"
[1] "plotdataframe_5_3_15"
[1] 15
[1] "n= 16"
[1] "plotdataframe_5_3_16"
[1] 16
which follow the expected output, showing the function to be working as desired.

How to flag missing left-hand collocates with NA

I want to compute collocates of the lemma GO, including all its forms such as go, goes, gone, etc.:
go <- c("go after it", "here we go", "he went bust", "go get it go", "i 'm gon na go", "she 's going berserk")
The lemma forms are stored in this vector:
lemma_GO <- c("go", "goes", "going", "gone", "went", "gon na")
and this vector turns them into an alternation pattern:
pattern_GO <- paste0("\\b(", paste0(lemma_GO, collapse = "|"), ")\\b")
However, when using the pattern with str_extract_all to extract the immediately left-hand collocate of GO, the extraction misses out on those strings where GO is the first word in the string and reoccurs later in the string:
library(stringr)
str_extract_all(go, paste0("'?\\b[a-z']+\\b(?=\\s?", pattern_GO, ")"))
[[1]]
character(0)
[[2]]
[1] "we"
[[3]]
[1] "he"
[[4]]
[1] "it"
[[5]]
[1] "'m" "na"
[[6]]
[1] "'s"
The expected result is this:
[[1]]
[1] NA
[[2]]
[1] "we"
[[3]]
[1] "he"
[[4]]
[1] NA "it"
[[5]]
[1] "'m" "na"
[[6]]
[1] "'s"
How can the extraction be mended to also return NA in the absence of a left-hand collocate?
You can add an alternative to match at the start of a string, or your consuming pattern:
str_extract_all(go, paste0("('?\\b[a-z']+\\b|^)(?=\\s?", pattern_GO, ")"))
See the regex demo.
See the R demo:
go <- c("go after it", "here we go", "he went bust", "go get it go", "i 'm gon na go", "she 's going berserk")
lemma_GO <- c("go", "goes", "going", "gone", "went", "gon na")
pattern_GO <- paste0("\\b(", paste0(lemma_GO, collapse = "|"), ")\\b")
library(stringr)
str_extract_all(go, paste0("('?\\b[a-z']+\\b|^)(?=\\s?", pattern_GO, ")"))
Output:
[[1]]
[1] ""
[[2]]
[1] "we"
[[3]]
[1] "he"
[[4]]
[1] "" "it"
[[5]]
[1] "'m" "na"
[[6]]
[1] "'s"
Sukces #stdin #stdout 0.26s 42528KB
[1] "\\b(go|goes|going|gone|went|gon na)\\b"
[[1]]
[1] ""
[[2]]
[1] "we"
[[3]]
[1] "he"
[[4]]
[1] "" "it"
[[5]]
[1] "'m" "na"
[[6]]
[1] "'s"
If you want, you can turn all empty items into NA using
res <- str_extract_all(go, paste0("('?\\b[a-z']+\\b|^)(?=\\s?", pattern_GO, ")"))
res <- lapply(res, function(x) ifelse(x=="", NA, x))

combine elements in list in r

I have a list of a character list data. The type of data and example is as follows,
typeof(data)
[1] "list"
print(data[1:3])
$...
[1] ",75=20140102,268=18,"
[2] "0,83=337407,"
[3] "0,83=337408,"
[4] "0,83=3374779,"
$...
[1] ",75=20140122,268=336,"
[2] "3,273=143000000,1020=50,"
[3] "1,270=422,271=1,273=143000000,"
[4] "0,83=337427,107=ZCH4,"
[5] "1020=58,"
$...
[1] ",52=20140102143000085,75=20140102,268=17,"
[2] "0,83=33744562,107=ZCH4,"
for each element in the list,i want to combine data[[i]][1] and the rest of its elements. I am doing it with a loop now, it works, but very slow.
Here is my code,
My current code is:
for (j in 1:length(data)){
for (k in 2:length(data[[j]])){
table[j+k,1]<- paste0(data[[j]][1], data[[j]][k]) #record every combination
} }
Since the data is pretty large, the loop runs very slow.
Desired results:
[1] ",75=20140102,268=18, 0,83=337407," "
[2] ",75=20140102,268=18, 0,83=337408,"
[3] ",75=20140102,268=18, 0,83=3374779,"
[4] ",75=20140122,268=336, 3,273=143000000,1020=50,"
[5] ",75=20140122,268=336, 1,270=422,271=1,273=143000000,"
[6] ",75=20140122,268=336, 0,83=337427,107=ZCH4,"
[7] ",75=20140122,268=336, 1020=58,"
[8] ",52=20140102143000085,75=20140102,268=17, 0,83=33744562,107=ZCH4,"
Thank you so much if someone can speed up the programming.
lapply(dat, function(x) paste0(x[1], x[2:length(x)]))
will do that quicker.
Example:
test <- list(a = list("test", "again", "meep"), b = list("and", "again", "doot"))
> test
$a
$a[[1]]
[1] "test"
$a[[2]]
[1] "again"
$a[[3]]
[1] "meep"
$b
$b[[1]]
[1] "and"
$b[[2]]
[1] "again"
$b[[3]]
[1] "doot"
> lapply(test, function(x) paste0(x[1], x[2:length(x)]))
$a
[1] "testagain" "testmeep"
$b
[1] "andagain" "anddoot"

Filter list in R which has nchar > 1

I have a list of names
> x <- c("Test t", "Cuma Nama K", "O", "Test satu dua t")
> name <- strsplit(x, " ")
> name
[[1]]
[1] "Test" "t"
[[2]]
[1] "Cuma" "Nama" "K"
[[3]]
[1] "O"
[[4]]
[1] "Test" "satu" "dua" "t"
How can I filter a list so that it can become like this?
I am trying to find out how to filter the list which has nchar > 1
> name
[[1]]
[1] "Test"
[[2]]
[1] "Cuma" "Nama"
[[4]]
[1] "Test" "satu" "dua"
lapply(name, function(x) x[nchar(x)>1])
Results in:
[[1]]
[1] "Test"
[[2]]
[1] "Cuma" "Nama"
[[3]]
character(0)
[[4]]
[1] "Test" "satu" "dua"
We can loop over the list elements, subset the elements that have nchar greater than 1 and use Filter to remove the elements that 0 elements
Filter(length,lapply(name, function(x) x[nchar(x) >1 ]))
#[[1]]
#[1] "Test"
#[[2]]
#[1] "Cuma" "Nama"
#[[3]]
#[1] "Test" "satu" "dua"
If we want to remove the words with one character from the string, we can also do this without splitting
setdiff(gsub("(^| ).( |$)", "", x), "")
#[1] "Test" "Cuma Nama" "Test satu dua"

Why while loop is working randomly?

I am confused while my while loop is working randomly?
how.many<-function(fruit, number){
string<-paste("How many",fruit,"?",sep=" ")
fruit_number<-readline(string)
print("fruit number")
print(fruit_number)
print("number")
print(number)
while(fruit_number > number){
print("inside while")
print("fruit number")
print(fruit_number)
print("number")
print(number)
print("ERROR: too many for the budget")
string<-paste("How many",fruit,"?",sep=" ")
fruit_number<-readline(string)
}
return(as.numeric(fruit_number))
}
Here's when I call this function:
> source("fruit.R")
> how.many("apple",6)
How many apple ?3
[1] "fruit number"
[1] "3"
[1] "number"
[1] 6
[1] 3
> how.many("apple",10)
How many apple ?9
[1] "fruit number"
[1] "9"
[1] "number"
[1] 10
[1] "inside while"
[1] "fruit number"
[1] "9"
[1] "number"
[1] 10
[1] "ERROR: too many for the budget"
How many apple ?7
[1] "inside while"
[1] "fruit number"
[1] "7"
[1] "number"
[1] 10
[1] "ERROR: too many for the budget"
How many apple ?2
[1] "inside while"
[1] "fruit number"
[1] "2"
[1] "number"
[1] 10
[1] "ERROR: too many for the budget"
How many apple ?1
[1] 1
Any hint?
Convert fruit_number to a number, not a string.
e.g.
fruit_number <- as.numeric(readline(string))
or
fruit_number <- as.integer(readline(string))
Otherwise the numbers are compared as strings, and "9" is greater than "10" in the string sense.

Resources