tapply of two variables in a loop - r

My data :
TEST <- structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1,
0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 3, 1, 0, 0,
0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 1, 1, 1,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2,
0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 3, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
3, 0, 0, 2, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,
0, 0, 2, 0, 1, 1, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 3, 0, 0, 1, 0, 0, 2, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
0, 0, 0, 1, 0, 0, 2, 0, 1, 2, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,
0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 3, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 1, 1, 0, 0, 1,
0, 0, 0), .Dim = c(22L, 20L), .Dimnames = list(NULL, c("month",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "")))
and :
month <- c(1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2)
I want to sum by row, so I used this code :
su_test <- list()
for (i in 1:ncol(TEST)){
su_test[[i]] <- tapply(TEST[,i], month, sum)
}
su_test <- do.call(cbind, su_test)
and to check the quantile :
su_test_obs <- apply(su_test,1,quantile,c(0.1,0.9))
This is an observation simulation per month. However I also have the detail by area. :
TEST2 <- structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 3, 1, 0,
0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 1, 1,
1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0,
0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 3, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,
0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,
0, 3, 0, 0, 2, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2,
0, 0, 0, 2, 0, 1, 1, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 3, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 3, 0, 0, 1, 0, 0, 2, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1,
0, 0, 0, 0, 1, 0, 0, 2, 0, 1, 2, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
1, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 3, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
0, 1, 1), .Dim = c(22L, 20L), .Dimnames = list(NULL, c("month",
"area", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "")))
area <- c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)
I would like to have the same result of su_test_obs
but with the details of the areas in addition like list(month, area) but I don't understand the logic.
Would you have a solution please? Maybe there is a solution simpler with dplyr?
Thanks

It would be much simpler if you convert your matrix to dataframe. We can then use aggregate which can be applied to multiple groups easily
df <- data.frame(TEST2)
apply(aggregate(.~month + area, df, sum)[-c(1, 2)], 1, quantile, c(0.1,0.9))
# [,1] [,2]
#10% 2 1.7
#90% 6 5.3

Related

Frequency bar graph in descending order

I created a frequency graph using ggplot2. I would like the bars to go in descending order based on frequency counts. So language measures from left to right, BNT, WAB_R, BDAE...etc. Of note, my dataframe is organized with the language measures are columns and the cases are rows. The values are 0 or 1 and 1 means that the participant endorsed the language measure. I have tried using reorder in various combinations but had no luck. I appreciate the help!
Here is sample data:
WAB-R BDAE BNT CAT
1 0 0 1 0
2 1 0 1 1
3 0 0 0 0
4 1 1 0 0
5 0 0 0 1
6 0 1 1 0
7 1 0 0 0
8 0 1 1 0
Portion of the Data Show in New WindowClear
OutputExpand/Collapse Output
structure(list(WAB_R = c(0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0), WAB_B = c(1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0), BDAE = c(0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), CAT = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0), BNT = c(0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1), PNT = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), PyramidPalms = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), QAB = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0), PALPA = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0), BASA = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Compiled_lang = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -85L), class = c("tbl_df", "tbl", "data.frame"))
Code:
library(tidyverse) survey %>% select(c(WAB_R:other_lang_measure)) %>% pivot_longer(everything()) %>% filter(value==1) %>% ggplot(aes(x= value))+ geom_histogram(stat = 'count',aes(fill=name), position = position_dodge2(0.9,preserve = 'single'))+ labs(fill='Language Measures') + theme(axis.title.x=element_blank(), axis.text.x=element_blank(),axis.ticks.x=element_blank()) + scale_y_continuous(breaks=seq(0,50,5))+ ylab("Frequency Counts") + coord_cartesian(ylim=c(0, 45))+ ggtitle("\nLanguage Measures\n ")+ cleanup
As mentioned by sage #r2evans you would need to format as factor the x-axis variable. Also, you can compute the counts directly using summarise() and then arrange in order to sketch the plot:
library(tidyverse)
#Code
survey %>%
select(c(WAB_R:Compiled_lang)) %>%
pivot_longer(everything()) %>% filter(value==1) %>%
group_by(name) %>%
summarise(value=sum(value)) %>%
arrange(desc(value)) %>%
mutate(name=factor(name,levels = unique(name),ordered = T)) %>%
ggplot(aes(x= name,y=value))+
geom_bar(stat = 'identity',aes(fill=name),
position = position_dodge2(0.9,preserve = 'single'))+
labs(fill='Language Measures') +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),axis.ticks.x=element_blank()) +
scale_y_continuous(breaks=seq(0,50,5))+
ylab("Frequency Counts") +
coord_cartesian(ylim=c(0, 45))+ ggtitle("\nLanguage Measures\n ")
Output:
Here is a way:
survey <- structure(list(WAB_R = c(0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0),
WAB_B = c(1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0),
BDAE = c(0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
CAT = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0),
BNT = c(0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1),
PNT = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
PyramidPalms = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
QAB = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0),
PALPA = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0),
BASA = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
Compiled_lang = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0)),
row.names = c(NA, -85L), class = c("tbl_df", "tbl", "data.frame"))
library(tidyverse,warn.conflicts = F)
survey %>%
pivot_longer(everything()) %>%
filter(value==1) %>%
count(name) %>%
ggplot(aes(x= name, y = n)) +
geom_col() +
labs(y = "Frequency Counts", title = "Language Measures", x = "")
survey %>%
pivot_longer(everything()) %>%
filter(value==1) %>%
count(name) %>%
ggplot(aes(x= name, y = n)) +
geom_col() +
labs(y = "Frequency Counts", title = "Language Measures", x = "") +
coord_flip()
Created on 2021-01-15 by the reprex package (v0.3.0)

xgb.cv with no folds and return the results based on a split of the data

I have some data which looks like:
# A tibble: 50 x 28
sanchinarro date holiday weekday weekend workday_on_holi… weekend_on_holi… protocol_active
<dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 -1.01 2010-01-01 1 1 0 1 0 0
2 0.832 2010-01-02 0 0 1 0 0 0
3 1.29 2010-01-03 0 0 1 0 0 0
4 1.04 2010-01-04 0 1 0 0 0 0
5 0.526 2010-01-05 0 1 0 0 0 0
6 -0.292 2010-01-06 1 1 0 1 0 0
7 -0.394 2010-01-07 0 1 0 0 0 0
8 -0.547 2010-01-08 0 1 0 0 0 0
9 -0.139 2010-01-09 0 0 1 0 0 0
10 0.628 2010-01-10 0 0 1 0 0 0
I want to run xgb.cv on the first 40 rows and validate it on the final 10 rows.
I try the following:
library(xgboost)
library(dplyr)
X_Val <- ddd %>% select(-c(1:2))
Y_Val <- ddd %>% select(c(1)) %>% pull()
dVal <- xgb.DMatrix(data = as.matrix(X_Val), label = as.numeric(Y_Val))
xgb.cv(data = dVal, nround = 30, folds = NA, params = list(eta = 0.1, max_depth = 5))
which gives me this error:
Error in xgb.cv(data = dVal, nround = 30, folds = NA, eta = 0.1,
max_depth = 5) : 'folds' must be a list with 2 or more elements
that are vectors of indices for each CV-fold
How can I run a simple xgb.cv on the first 40 rows and test it on the last 10 rows.
I eventually want to apply a gird search with a list of parameters and save the results in a list. Since I am dealing with time series data I do not want to mix the folds up, I just want a simple train and in-sample test of 40:10.
Data:
ddd <- structure(list(sanchinarro = c(-1.00742964973274, 0.832453587904369,
1.29242439731365, 1.03688505875294, 0.525806381631517, -0.291919501762755,
-0.394135237187039, -0.547458840323464, -0.138595898626329, 0.628022117055801,
1.19020866188936, 1.5990716035865, 1.5990716035865, -0.70078244345989,
2.11015028070792, 1.95682667757149, 0.985777191040795, 0.883561455616511,
0.985777191040795, 0.270267043070807, 2.51901322240505, 2.41679748698077,
0.372482778495091, -0.291919501762755, -0.905213914308458, -0.905213914308458,
-0.649674575747748, 1.2413165296015, 1.54796373587436, -0.70078244345989,
-0.905213914308458, -0.0363801632020448, 1.54796373587436, 2.00793454528363,
1.54796373587436, -0.445243104899181, -0.445243104899181, 1.03688505875294,
0.628022117055801, -0.496350972611323, 0.168051307646523, -0.649674575747748,
0.0658355722222391, -1.00742964973274, -0.291919501762755, 0.0147277045100972,
0.168051307646523, -0.189703766338471, 0.219159175358665, 0.679129984767943
), date = structure(c(14610, 14611, 14612, 14613, 14614, 14615,
14616, 14617, 14618, 14619, 14620, 14621, 14622, 14623, 14624,
14625, 14626, 14627, 14628, 14629, 14630, 14631, 14632, 14633,
14634, 14635, 14636, 14637, 14638, 14639, 14640, 14641, 14642,
14643, 14644, 14645, 14646, 14647, 14648, 14649, 14650, 14651,
14652, 14653, 14654, 14655, 14656, 14657, 14658, 14659), class = "Date"),
holiday = c(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), weekday = c(1,
0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1,
1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1), weekend = c(0, 1, 1, 0,
0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
0, 1, 1, 0, 0, 0, 0, 0), workday_on_holiday = c(1, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), weekend_on_holiday = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), protocol_active = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), text_broken_clouds = c(0,
1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1), text_clear = c(0, 0, 0,
0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 1), text_fog = c(0, 1, 0, 1, 1, 0,
0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 0), text_partly_cloudy = c(0, 1, 0, 0, 0,
1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), text_partly_sunny = c(1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 1), text_passing_clouds = c(1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 1, 1, 1), text_scattered_clouds = c(1, 1,
0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1), text_sunny = c(0, 0, 0, 0,
0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1), month_1 = c(1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), month_2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1), month_3 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_4 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_5 = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), month_6 = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), month_7 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), month_8 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_9 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_10 = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), month_11 = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), month_12 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-50L))
EDIT: List data:
The final data comes in the form of lists.
datalst <- list(structure(list(sanchinarro = c(-1.00742964973274, 0.832453587904369,
1.29242439731365, 1.03688505875294, 0.525806381631517, -0.291919501762755,
-0.394135237187039, -0.547458840323464, -0.138595898626329, 0.628022117055801,
1.19020866188936, 1.5990716035865, 1.5990716035865, -0.70078244345989
), date = structure(c(14610, 14611, 14612, 14613, 14614, 14615,
14616, 14617, 14618, 14619, 14620, 14621, 14622, 14623), class = "Date"),
holiday = c(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), weekday = c(1,
0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1), weekend = c(0, 1,
1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0), workday_on_holiday = c(1,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), weekend_on_holiday = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), protocol_active = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), text_broken_clouds = c(0,
1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), text_clear = c(0,
0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0), text_fog = c(0, 1,
0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0), text_partly_cloudy = c(0,
1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0), text_partly_sunny = c(1,
1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1), text_passing_clouds = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), text_scattered_clouds = c(1,
1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1), text_sunny = c(0,
0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0), month_1 = c(1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), month_2 = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_3 = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_4 = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), month_5 = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), month_6 = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), month_7 = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), month_8 = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), month_9 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), month_10 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), month_11 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), month_12 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-14L)), structure(list(sanchinarro = c(0.832179838392013, 1.29225734336885,
1.03665872949283, 0.525461501740789, -0.292454062662475, -0.394693508212883,
-0.548052676538495, -0.139094894336863, 0.627700947291197, 1.19001789781844,
1.59897568002007, 1.59897568002007, -0.701411844864107, 2.11017290777211
), date = structure(c(14611, 14612, 14613, 14614, 14615, 14616,
14617, 14618, 14619, 14620, 14621, 14622, 14623, 14624), class = "Date"),
holiday = c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), weekday = c(0,
0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1), weekend = c(1, 1,
0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0), workday_on_holiday = c(0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), weekend_on_holiday = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), protocol_active = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), text_broken_clouds = c(1,
0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), text_clear = c(0,
0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1), text_fog = c(1, 0,
1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0), text_partly_cloudy = c(1,
0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0), text_partly_sunny = c(1,
1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0), text_passing_clouds = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), text_scattered_clouds = c(1,
0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0), text_sunny = c(0,
0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1), month_1 = c(1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), month_2 = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_3 = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_4 = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), month_5 = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), month_6 = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), month_7 = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), month_8 = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), month_9 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), month_10 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), month_11 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), month_12 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-14L)), structure(list(sanchinarro = c(1.29293502084952, 1.03729933727253,
0.526027970118536, -0.292006217327851, -0.394260490758649, -0.547641900904846,
-0.138624807181653, 0.628282243549334, 1.19068074741873, 1.59969784114192,
1.59969784114192, -0.701023311051044, 2.11096920829591, 1.95758779814971
), date = structure(c(14612, 14613, 14614, 14615, 14616, 14617,
14618, 14619, 14620, 14621, 14622, 14623, 14624, 14625), class = "Date"),
holiday = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), weekday = c(0,
1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0), weekend = c(1, 0,
0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1), workday_on_holiday = c(0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), weekend_on_holiday = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), protocol_active = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), text_broken_clouds = c(0,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), text_clear = c(0,
0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0), text_fog = c(0, 1,
1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0), text_partly_cloudy = c(0,
0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0), text_partly_sunny = c(1,
1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1), text_passing_clouds = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), text_scattered_clouds = c(0,
0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0), text_sunny = c(0,
0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0), month_1 = c(1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), month_2 = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_3 = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_4 = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), month_5 = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), month_6 = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), month_7 = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), month_8 = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), month_9 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), month_10 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), month_11 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), month_12 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-14L)))
EDIT:
I think this gives me what I am after - I need to double/tripple check it. (if you see any errors please let me know)
splt <- 0.80 * nrow(ddd)
ddd[c(1:splt), "id"] = 1
ddd$id[is.na(ddd$id)] = 2
fold.ids <- unique(ddd$id)
custom.folds <- vector("list", length(fold.ids))
i <- 1
for( id in fold.ids){
custom.folds[[i]] <- which( ddd$id %in% id )
i <- i+1
}
custom.folds
cv <- xgb.cv(params = list(eta = 0.1, max_depth = 5), dVal, nround = 10, folds = custom.folds, prediction = TRUE)
cv$evaluation_log
I now need to find a way to apply this to all 3 lists in the "new" added data.
Firstly, you should split the data onto dtrain (40 first rows) and dval (10 last rows). Secondly, you need rather xgb.train, not xgb.cv.
So, your code should be modified to something like that:
library(xgboost)
library(dplyr)
# you code regarding ddd
X <- ddd %>% select(-c(1:2))
Y <- ddd %>% select(c(1)) %>% pull()
dtrain <- xgb.DMatrix(data = as.matrix(X[1:40,]), label = as.numeric(Y[1:40,]))
dval <- xgb.DMatrix(data = as.matrix(X[41:50,]), label = as.numeric(Y[41:50,]))
watchlist <- list(train=dtrain, val=dval)
model <- xgb.train(data=dtrain, watchlist=watchlist, nround = 30, eta = 0.1, max_depth = 5)
IMHO, 40+10 rows only and so sparse features give no hope to obtain good results using XGBoost.

How to use ids from one dataframe to sum rows in another dataframe

I feel like this answer has been asked before, but I can't seem to find an answer to this question. Maybe my title is too vague, so feel free to change it.
So I have one data frame, a, with ids the correspond to column name in data frame b. Both data frames are simplified versions of a much larger data frame.
here is data frame a
a <- structure(list(V1 = structure(c(4L, 5L, 1L, 2L, 3L), .Label = c("GEN[D00105].GT",
"GEN[D00151].GT", "GEN[D00188].GT", "GEN[D86396].GT", "GEN[D86397].GT"
), class = "factor")), row.names = c(NA, -5L), class = "data.frame")
here is data frame b
b <- structure(list(`GEN[D01104].GT` = c(0, 0, 0, 0, 1, 0, 0, 2, 0,
1, 1, 1, 1, 0, 0, 0, 2, 0, 0, 0), `GEN[D01312].GT` = c(1, 0,
2, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 0, 0, 2, 0, 0, 0), `GEN[D01878].GT` = c(0,
0, 0, 2, 0, 0, 2, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 2, 0, 0), `GEN[D01882].GT` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, 0, 0, 0, 0), `GEN[D01952].GT` = c(0,
0, 1, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0), `GEN[D01953].GT` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, 0, 0, 0, 2, 0), `GEN[D02053].GT` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0), `GEN[D00316].GT` = c(0,
0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2, 0, 0), `GEN[D01827].GT` = c(0,
0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0), `GEN[D01881].GT` = c(0,
0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 0, 2, 0, 2, 0), `GEN[D02044].GT` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0), `GEN[D02085].GT` = c(0,
0, 0, 2, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0), `GEN[D02204].GT` = c(0,
0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0), `GEN[D02276].GT` = c(0,
0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0), `GEN[D02297].GT` = c(0,
0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0), `GEN[D02335].GT` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 0), `GEN[D02397].GT` = c(0,
0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0), `GEN[D00856].GT` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0), `GEN[D00426].GT` = c(0,
0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0), `GEN[D02139].GT` = c(0,
0, 1, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0), `GEN[D02168].GT` = c(0,
0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0)), row.names = c(NA,
-20L), class = "data.frame")
I want to be able to use the ids from data frame a to sum the row in data frame b that have a matching id if that makes sense.
So in the past, I just did something like
b$affected.samples <- (b$`GEN[D86396].GT` + b$`GEN[D86397].GT` + b$`GEN[D00105].GT` + b$`GEN[D00151].GT` + b$`GEN[D00188].GT`)
which got annoying and took to much time, so I moved over to
b$affected.samples <- rowSums(b[,c(1:5)])
Which isn't too bad for this example but with my large data set, my sample can be all over the place, and it's starting to take too much time to finds where everything is. I was hoping there is a way just to use my data frame a to sum the correct rows in data frame b.
Hopefully, I gave this is all the information you need! Let me know if you have any questions.
Thanks in advance!!
Extract the 'V1' column as a character string, use that to select the columns of 'b' (assuming these column names are found in 'b') and get the rowSums
rowSums( b[as.character(a$V1)], na.rm = TRUE)

Why is auto.arima() giving me the Error: "'by' argument is much too small"

I am working on predicting intra-day sales for a retailer. We want to know if we can predict sales through the rest of the day, based off sales within that day. I'm working with roughly 3 years of data in a time series, which has given me roughly 26,000 rows of data.
I've never worked with a time series this large so my approach might be off. Or auto.arima() may not have been made to handle data this large.
I've tried limiting my data down to even 300 rows and had marginal success, but have not found anything that works with my larger data set. auto.arima() doesn't even have a by = argument from what I can find.
my_ts <- structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3,
3, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 4, 1,
1, 0, 3, 1, 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4,
1, 9, 1, 6, 5, 1, 0, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 3, 1, 0, 3, 5, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 3, 1, 0, 0, 6, 0, 6, 0, 1, 2, 3, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 2, 4, 6, 5, 0, 1, 0, 2, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 2, 0, 2, 0, 0, 0, 1, 1,
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3, 0, 1, 0,
3, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0,
8, 2, 7, 4, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
0, 0, 2, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 6, 2, 0, 1, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, -1, 2, 3, 1, 0, 0, 2, 5, 7, 0, -1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -1, 6, 1, 2, 2, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 3, 0, 2, 0,
4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 4, 0, 0,
2, 2, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
2, 0, 2, 3, 6, 5, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 2, 2, 2, 1, 4, 3, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 3, 4, 0, 4, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 3, 2, 1, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3, 0, 3, 4, 3, 0, 0, 2, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 5, 1, 1, 1, 0, 3,
0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 2,
0, 1, 1, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1,
1, 4, 1, 2, 9, 1, 4, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 4, 1, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 3, 1, 2, 1, 1, 4, 0, 3, 3, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 1, 2, 2, 1, 6, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 2, 0, 1, 2, 1, 4, 0, 0, 5,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 5, 1, 1, 3, 3,
4, 4, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 3, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
3, 0, 2, 8, 0, 2, 3, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 1, 2, 0, 1, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 3, 1, 2, 2, 3, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 4, 1, 2, 0, 0, 5, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 8, 0, 1, 1, 4, 0, 4, 3,
2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 6, 1, 0, 0, 0,
3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 8,
1, 2, 0, 4, 2, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
1, 6, 7, 0, 1, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 5, 0, 2, 1, 4, 1, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 2, 1, 4, 7, 5, 0, 7, 4, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 1, 6, 0, 4, 6, 2, 1, 3, 5, 3, 3, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 4, 3, 0, 1, 1, 3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 6, 0, 0), index = structure(c(1435190400,
1435194000, 1435197600, 1435201200, 1435204800, 1435208400, 1435212000,
1435215600, 1435219200, 1435222800, 1435226400, 1435230000, 1435233600,
1435237200, 1435240800, 1435244400, 1435248000, 1435251600, 1435255200,
1435258800, 1435262400, 1435266000, 1435269600, 1435273200, 1435276800,
1435280400, 1435284000, 1435287600, 1435291200, 1435294800, 1435298400,
1435302000, 1435305600, 1435309200, 1435312800, 1435316400, 1435320000,
1435323600, 1435327200, 1435330800, 1435334400, 1435338000, 1435341600,
1435345200, 1435348800, 1435352400, 1435356000, 1435359600, 1435363200,
1435366800, 1435370400, 1435374000, 1435377600, 1435381200, 1435384800,
1435388400, 1435392000, 1435395600, 1435399200, 1435402800, 1435406400,
1435410000, 1435413600, 1435417200, 1435420800, 1435424400, 1435428000,
1435431600, 1435435200, 1435438800, 1435442400, 1435446000, 1435449600,
1435453200, 1435456800, 1435460400, 1435464000, 1435467600, 1435471200,
1435474800, 1435478400, 1435482000, 1435485600, 1435489200, 1435492800,
1435496400, 1435500000, 1435503600, 1435507200, 1435510800, 1435514400,
1435518000, 1435521600, 1435525200, 1435528800, 1435532400, 1435536000,
1435539600, 1435543200, 1435546800, 1435550400, 1435554000, 1435557600,
1435561200, 1435564800, 1435568400, 1435572000, 1435575600, 1435579200,
1435582800, 1435586400, 1435590000, 1435593600, 1435597200, 1435600800,
1435604400, 1435608000, 1435611600, 1435615200, 1435618800, 1435622400,
1435626000, 1435629600, 1435633200, 1435636800, 1435640400, 1435644000,
1435647600, 1435651200, 1435654800, 1435658400, 1435662000, 1435665600,
1435669200, 1435672800, 1435676400, 1435680000, 1435683600, 1435687200,
1435690800, 1435694400, 1435698000, 1435701600, 1435705200, 1435708800,
1435712400, 1435716000, 1435719600, 1435723200, 1435726800, 1435730400,
1435734000, 1435737600, 1435741200, 1435744800, 1435748400, 1435752000,
1435755600, 1435759200, 1435762800, 1435766400, 1435770000, 1435773600,
1435777200, 1435780800, 1435784400, 1435788000, 1435791600, 1435795200,
1435798800, 1435802400, 1435806000, 1435809600, 1435813200, 1435816800,
1435820400, 1435824000, 1435827600, 1435831200, 1435834800, 1435838400,
1435842000, 1435845600, 1435849200, 1435852800, 1435856400, 1435860000,
1435863600, 1435867200, 1435870800, 1435874400, 1435878000, 1435881600,
1435885200, 1435888800, 1435892400, 1435896000, 1435899600, 1435903200,
1435906800, 1435910400, 1435914000, 1435917600, 1435921200, 1435924800,
1435928400, 1435932000, 1435935600, 1435939200, 1435942800, 1435946400,
1435950000, 1435953600, 1435957200, 1435960800, 1435964400, 1435968000,
1435971600, 1435975200, 1435978800, 1435982400, 1435986000, 1435989600,
1435993200, 1435996800, 1436000400, 1436004000, 1436007600, 1436011200,
1436014800, 1436018400, 1436022000, 1436025600, 1436029200, 1436032800,
1436036400, 1436040000, 1436043600, 1436047200, 1436050800, 1436054400,
1436058000, 1436061600, 1436065200, 1436068800, 1436072400, 1436076000,
1436079600, 1436083200, 1436086800, 1436090400, 1436094000, 1436097600,
1436101200, 1436104800, 1436108400, 1436112000, 1436115600, 1436119200,
1436122800, 1436126400, 1436130000, 1436133600, 1436137200, 1436140800,
1436144400, 1436148000, 1436151600, 1436155200, 1436158800, 1436162400,
1436166000, 1436169600, 1436173200, 1436176800, 1436180400, 1436184000,
1436187600, 1436191200, 1436194800, 1436198400, 1436202000, 1436205600,
1436209200, 1436212800, 1436216400, 1436220000, 1436223600, 1436227200,
1436230800, 1436234400, 1436238000, 1436241600, 1436245200, 1436248800,
1436252400, 1436256000, 1436259600, 1436263200, 1436266800, 1436270400,
1436274000, 1436277600, 1436281200, 1436284800, 1436288400, 1436292000,
1436295600, 1436299200, 1436302800, 1436306400, 1436310000, 1436313600,
1436317200, 1436320800, 1436324400, 1436328000, 1436331600, 1436335200,
1436338800, 1436342400, 1436346000, 1436349600, 1436353200, 1436356800,
1436360400, 1436364000, 1436367600, 1436371200, 1436374800, 1436378400,
1436382000, 1436385600, 1436389200, 1436392800, 1436396400, 1436400000,
1436403600, 1436407200, 1436410800, 1436414400, 1436418000, 1436421600,
1436425200, 1436428800, 1436432400, 1436436000, 1436439600, 1436443200,
1436446800, 1436450400, 1436454000, 1436457600, 1436461200, 1436464800,
1436468400, 1436472000, 1436475600, 1436479200, 1436482800, 1436486400,
1436490000, 1436493600, 1436497200, 1436500800, 1436504400, 1436508000,
1436511600, 1436515200, 1436518800, 1436522400, 1436526000, 1436529600,
1436533200, 1436536800, 1436540400, 1436544000, 1436547600, 1436551200,
1436554800, 1436558400, 1436562000, 1436565600, 1436569200, 1436572800,
1436576400, 1436580000, 1436583600, 1436587200, 1436590800, 1436594400,
1436598000, 1436601600, 1436605200, 1436608800, 1436612400, 1436616000,
1436619600, 1436623200, 1436626800, 1436630400, 1436634000, 1436637600,
1436641200, 1436644800, 1436648400, 1436652000, 1436655600, 1436659200,
1436662800, 1436666400, 1436670000, 1436673600, 1436677200, 1436680800,
1436684400, 1436688000, 1436691600, 1436695200, 1436698800, 1436702400,
1436706000, 1436709600, 1436713200, 1436716800, 1436720400, 1436724000,
1436727600, 1436731200, 1436734800, 1436738400, 1436742000, 1436745600,
1436749200, 1436752800, 1436756400, 1436760000, 1436763600, 1436767200,
1436770800, 1436774400, 1436778000, 1436781600, 1436785200, 1436788800,
1436792400, 1436796000, 1436799600, 1436803200, 1436806800, 1436810400,
1436814000, 1436817600, 1436821200, 1436824800, 1436828400, 1436832000,
1436835600, 1436839200, 1436842800, 1436846400, 1436850000, 1436853600,
1436857200, 1436860800, 1436864400, 1436868000, 1436871600, 1436875200,
1436878800, 1436882400, 1436886000, 1436889600, 1436893200, 1436896800,
1436900400, 1436904000, 1436907600, 1436911200, 1436914800, 1436918400,
1436922000, 1436925600, 1436929200, 1436932800, 1436936400, 1436940000,
1436943600, 1436947200, 1436950800, 1436954400, 1436958000, 1436961600,
1436965200, 1436968800, 1436972400, 1436976000, 1436979600, 1436983200,
1436986800, 1436990400, 1436994000, 1436997600, 1437001200, 1437004800,
1437008400, 1437012000, 1437015600, 1437019200, 1437022800, 1437026400,
1437030000, 1437033600, 1437037200, 1437040800, 1437044400, 1437048000,
1437051600, 1437055200, 1437058800, 1437062400, 1437066000, 1437069600,
1437073200, 1437076800, 1437080400, 1437084000, 1437087600, 1437091200,
1437094800, 1437098400, 1437102000, 1437105600, 1437109200, 1437112800,
1437116400, 1437120000, 1437123600, 1437127200, 1437130800, 1437134400,
1437138000, 1437141600, 1437145200, 1437148800, 1437152400, 1437156000,
1437159600, 1437163200, 1437166800, 1437170400, 1437174000, 1437177600,
1437181200, 1437184800, 1437188400, 1437192000, 1437195600, 1437199200,
1437202800, 1437206400, 1437210000, 1437213600, 1437217200, 1437220800,
1437224400, 1437228000, 1437231600, 1437235200, 1437238800, 1437242400,
1437246000, 1437249600, 1437253200, 1437256800, 1437260400, 1437264000,
1437267600, 1437271200, 1437274800, 1437278400, 1437282000, 1437285600,
1437289200, 1437292800, 1437296400, 1437300000, 1437303600, 1437307200,
1437310800, 1437314400, 1437318000, 1437321600, 1437325200, 1437328800,
1437332400, 1437336000, 1437339600, 1437343200, 1437346800, 1437350400,
1437354000, 1437357600, 1437361200, 1437364800, 1437368400, 1437372000,
1437375600, 1437379200, 1437382800, 1437386400, 1437390000, 1437393600,
1437397200, 1437400800, 1437404400, 1437408000, 1437411600, 1437415200,
1437418800, 1437422400, 1437426000, 1437429600, 1437433200, 1437436800,
1437440400, 1437444000, 1437447600, 1437451200, 1437454800, 1437458400,
1437462000, 1437465600, 1437469200, 1437472800, 1437476400, 1437480000,
1437483600, 1437487200, 1437490800, 1437494400, 1437498000, 1437501600,
1437505200, 1437508800, 1437512400, 1437516000, 1437519600, 1437523200,
1437526800, 1437530400, 1437534000, 1437537600, 1437541200, 1437544800,
1437548400, 1437552000, 1437555600, 1437559200, 1437562800, 1437566400,
1437570000, 1437573600, 1437577200, 1437580800, 1437584400, 1437588000,
1437591600, 1437595200, 1437598800, 1437602400, 1437606000, 1437609600,
1437613200, 1437616800, 1437620400, 1437624000, 1437627600, 1437631200,
1437634800, 1437638400, 1437642000, 1437645600, 1437649200, 1437652800,
1437656400, 1437660000, 1437663600, 1437667200, 1437670800, 1437674400,
1437678000, 1437681600, 1437685200, 1437688800, 1437692400, 1437696000,
1437699600, 1437703200, 1437706800, 1437710400, 1437714000, 1437717600,
1437721200, 1437724800, 1437728400, 1437732000, 1437735600, 1437739200,
1437742800, 1437746400, 1437750000, 1437753600, 1437757200, 1437760800,
1437764400, 1437768000, 1437771600, 1437775200, 1437778800, 1437782400,
1437786000, 1437789600, 1437793200, 1437796800, 1437800400, 1437804000,
1437807600, 1437811200, 1437814800, 1437818400, 1437822000, 1437825600,
1437829200, 1437832800, 1437836400, 1437840000, 1437843600, 1437847200,
1437850800, 1437854400, 1437858000, 1437861600, 1437865200, 1437868800,
1437872400, 1437876000, 1437879600, 1437883200, 1437886800, 1437890400,
1437894000, 1437897600, 1437901200, 1437904800, 1437908400, 1437912000,
1437915600, 1437919200, 1437922800, 1437926400, 1437930000, 1437933600,
1437937200, 1437940800, 1437944400, 1437948000, 1437951600, 1437955200,
1437958800, 1437962400, 1437966000, 1437969600, 1437973200, 1437976800,
1437980400, 1437984000, 1437987600, 1437991200, 1437994800, 1437998400,
1438002000, 1438005600, 1438009200, 1438012800, 1438016400, 1438020000,
1438023600, 1438027200, 1438030800, 1438034400, 1438038000, 1438041600,
1438045200, 1438048800, 1438052400, 1438056000, 1438059600, 1438063200,
1438066800, 1438070400, 1438074000, 1438077600, 1438081200, 1438084800,
1438088400, 1438092000, 1438095600, 1438099200, 1438102800, 1438106400,
1438110000, 1438113600, 1438117200, 1438120800, 1438124400, 1438128000,
1438131600, 1438135200, 1438138800, 1438142400, 1438146000, 1438149600,
1438153200, 1438156800, 1438160400, 1438164000, 1438167600, 1438171200,
1438174800, 1438178400, 1438182000, 1438185600, 1438189200, 1438192800,
1438196400, 1438200000, 1438203600, 1438207200, 1438210800, 1438214400,
1438218000, 1438221600, 1438225200, 1438228800, 1438232400, 1438236000,
1438239600, 1438243200, 1438246800, 1438250400, 1438254000, 1438257600,
1438261200, 1438264800, 1438268400, 1438272000, 1438275600, 1438279200,
1438282800, 1438286400, 1438290000, 1438293600, 1438297200, 1438300800,
1438304400, 1438308000, 1438311600, 1438315200, 1438318800, 1438322400,
1438326000, 1438329600, 1438333200, 1438336800, 1438340400, 1438344000,
1438347600, 1438351200, 1438354800, 1438358400, 1438362000, 1438365600,
1438369200, 1438372800, 1438376400, 1438380000, 1438383600, 1438387200,
1438390800, 1438394400, 1438398000, 1438401600, 1438405200, 1438408800,
1438412400, 1438416000, 1438419600, 1438423200, 1438426800, 1438430400,
1438434000, 1438437600, 1438441200, 1438444800, 1438448400, 1438452000,
1438455600, 1438459200, 1438462800, 1438466400, 1438470000, 1438473600,
1438477200, 1438480800, 1438484400, 1438488000, 1438491600, 1438495200,
1438498800, 1438502400, 1438506000, 1438509600, 1438513200, 1438516800,
1438520400, 1438524000, 1438527600, 1438531200, 1438534800, 1438538400,
1438542000, 1438545600, 1438549200, 1438552800, 1438556400, 1438560000,
1438563600, 1438567200, 1438570800, 1438574400, 1438578000, 1438581600,
1438585200, 1438588800, 1438592400, 1438596000, 1438599600, 1438603200,
1438606800, 1438610400, 1438614000, 1438617600, 1438621200, 1438624800,
1438628400, 1438632000, 1438635600, 1438639200, 1438642800, 1438646400,
1438650000, 1438653600, 1438657200, 1438660800, 1438664400, 1438668000,
1438671600, 1438675200, 1438678800, 1438682400, 1438686000, 1438689600,
1438693200, 1438696800, 1438700400, 1438704000, 1438707600, 1438711200,
1438714800, 1438718400, 1438722000, 1438725600, 1438729200, 1438732800,
1438736400, 1438740000, 1438743600, 1438747200, 1438750800, 1438754400,
1438758000, 1438761600, 1438765200, 1438768800, 1438772400, 1438776000,
1438779600, 1438783200, 1438786800), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), class = c("zooreg", "zoo"), frequency = 24)
fit1 <-auto.arima(my_ts,seasonal = TRUE)
I was hoping to get a model through arima, but I'm only getting the error:
"Error in seq.default(head(tt, 1), tail(tt, 1), deltat) :
'by' argument is much too small"

"non-finite value supplied by optim" error when using betareg

I'm working with betareg package for beta regression but receive the below error:
Error in optim(par = start, fn = loglikfun, gr = gradfun, method = method, :
non-finite value supplied by optim
I can trace this error to creating the initial values for optim. Specifically, these lines of betareg.fit, which uses lm.wfit to generate starting values.
It turns out that one of the starting values is returned as NA for my dataset. I'm unsure why this is the case, since there are no missing values in the data / input to lm.wfit.
Reproducible example to see NA
## data -- a sample of 100 obs from my actual data
nobs <- 100L
w <- rep(1, nobs)
offset <- rep(0, nobs)
y <- stats::rbeta(nobs, 0.75, 1.658)
x <- structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0165928242550604,
0.0984749494334759, 0.05517578125, 0.0185352577155742, 0.168701442841287,
0.0514759697487192, 0.026507054296708, 0.0188496858385694, 0.108620689655172,
0.0722387772757858, 0.0272373540856031, 0.0538907902524382, 0.0295235311312482,
0.0318257956448911, 0.231788079470199, 0.0674772036474164, 0.14846108458939,
0.0969908238068386, 0.0441553321506012, 0.154121863799283, 0,
0.110460389247421, 0.0292207792207792, 0.0522853185595568, 0.205288796102992,
0.00961124552835874, 0.0546908714289824, 0.0268199233716475,
0.0253164556962025, 0.181780542384243, 0.0551724137931034, 0.128842504743833,
0.0751429349305745, 0.217853751187085, 0.0510314875135722, 0.108407709439207,
0.04, 0.0638009815535624, 0.128329297820823, 0.0398115958281933,
0.0513258247605534, 0.0520833333333333, 0.0956239870340357, 0.0742899497995351,
0.144527098831031, 0.0723209169054441, 0.140116763969975, 0.172426847735821,
0.00830471112933819, 0.0548386400835806, 0.0372010221576987,
0.0549927641099855, 0.0386658431130327, 0.0256367439122648, 0.0166402535657686,
0.0769230769230769, 0.0130681818181818, 0.0229684699649666, 0.0344827586206897,
0.0135106607557526, 0.0581090909090909, 0.0321364452423698, 0.0141176470588235,
0.0203003337041157, 0.0948080795499367, 0.0202898550724638, 0.0443828016643551,
0.105830475257227, 0.0482315112540193, 0.0394736842105263, 0,
0.071608040201005, 0.0416666666666667, 0.268330928934329, 0.0422895357985838,
0.127678318597993, 0.037029330162505, 0.0328938677375888, 0.10183299389002,
0.0628212450028555, 0.0283431291591781, 0.0690879300928454, 0.134792626728111,
0.0770505385252693, 0.174605316421536, 0.0842012497997116, 0.068774108570891,
0.137089781654799, 0.00986436498150432, 0.0812065297585365, 0.0904255319148936,
0.0205776173285199, 0.0124064303568112, 0.0229630147033144, 0.0578925872983459,
0.0709677419354839, 0.0640070144673389, 0.106259964391839, 0.0315146661646867,
0.0356999429308195, 0.0268438884545218, 0.0748295057905382, 0.0556640625,
0.021107943539976, 0.199778024417314, 0.0175652598194682, 0.0487387772552373,
0.00289995166747221, 0.0672413793103448, 0.101364990868019, 0.0233463035019455,
0.0732353773706287, 0.022508038585209, 0.0368509212730318, 0.101545253863135,
0.0158054711246201, 0.152565574210159, 0.123442866663249, 0.0672186083185492,
0.129032258064516, 0, 0.104565780781544, 0.0551948051948052,
0.03601108033241, 0.160055671537926, 0.02201309207499, 0.0668891510112489,
0.0421455938697318, 0.0632911392405063, 0.234027661399237, 0.0206896551724138,
0.0950664136622391, 0.0936564116526001, 0.183475783475783, 0.0466883821932682,
0.088748974363268, 0.0422641509433962, 0.0467084108986292, 0.0920096852300242,
0.0401480318492767, 0.05103760198652, 0.0208333333333333, 0.0470016207455429,
0.0887666928515318, 0.075451647183847, 0.0310601719197708, 0.0928685551212356,
0.148991255923013, 0.0204541959296663, 0.0689569784090949, 0.0356901206750445,
0.0680173661360347, 0.0508956145768993, 0.0320699343321964, 0.0293185419968304,
0.0659340659340659, 0.00284090909090909, 0.0402373780415312,
0, 0.0230103440996411, 0.0629090909090909, 0.0229802513464991,
0, 0.00778642936596218, 0.0745676859521289, 0.0191304347826087,
0.0443828016643551, 0.0975012248897599, 0.00964630225080386,
0, 0, 0.0678391959798995, 0.05, 0.167973405256225, 0.0427812745869394,
0.197810150080232, 0.0363158937772493, 0.0415070411371503, 0.109979633401222,
0.0285551113649343, 0.0348520911254675, 0.0730930274895321, 0.119239631336406,
0.087821043910522, 0.251855350155175, 0.0668162153501042, 0.0731018910527801,
0.0945505662261226, 0.0530209617755857, 0.0879397164898608, 0.0531914893617021,
0.0339350180505415, 0.0144171389279045, 0.0176020675444417, 0.0758627731264039,
0.0387096774193548, 0.0791319596668128, 0.0898373582199164, 0.0641540015091306,
0.0308596309526326, 0.0853097037616193, 0.145443642937691, 0.134765625,
0.0735152424185233, 0.207547169811321, 0.0712368870456209, 0.1626763574177,
0.0128081198646689, 0.096551724137931, 0.130923771988849, 0.0525291828793774,
0.230005661202748, 0.0833089739842151, 0.123953098827471, 0.344370860927152,
0.0419452887537994, 0.188428900927962, 0.237145640026657, 0.184407648334319,
0.211469534050179, 0.0576923076923077, 0.182300929925301, 0.165584415584416,
0.0806786703601108, 0.274298306657388, 0.0837699832267066, 0.137316953882448,
0.157088122605364, 0.10126582278481, 0.260575112627044, 0.0413793103448276,
0.198292220113852, 0.177511570922951, 0.296296296296296, 0.139522258414767,
0.138297694201175, 0.0645283018867925, 0.12692502961584, 0.162227602905569,
0.0730066165750813, 0.142559418233416, 0.239583333333333, 0.152350081037277,
0.204935285553934, 0.155154091392136, 0.0889398280802292, 0.135152787965875,
0.235552733134678, 0.0947352232531912, 0.154476512637182, 0.137157008940595,
0.162083936324168, 0.138789376158122, 0.104513252539252, 0.0935023771790808,
0.120879120879121, 0.0133522727272727, 0.0824038693376326, 0.0344827586206897,
0.0789529237914292, 0.133527272727273, 0.0569120287253142, 0.0117647058823529,
0.0233592880978865, 0.158352535758029, 0.0527536231884058, 0.104022191400832,
0.201371876531112, 0.0289389067524116, 0.144736842105263, 0,
0.10678391959799, 0.0583333333333333, 0.233080348742395, 0.127753737214791,
0.233930088412044, 0.13375941339675, 0.13934317947634, 0.338085539714868,
0.0805254140491148, 0.101811822995094, 0.167819649250561, 0.142857142857143,
0.227009113504557, 0.254081770341384, 0.155103348822304, 0.161162856336438,
0.124957303134607, 0.155363748458693, 0.214043708410037, 0.111702127659574,
0.126714801444043, 0.0475465770701011, 0.195590169850639, 0.204512967122728,
0.158064516129032, 0.306298407131375, 0.191432758458608, 0.179792327044226,
0.072245355202807, 0.286421683606985, 0.19797912900281, 0.32275390625,
0.4288140812333, 0.180910099889012, 0.165406196633325, 0.539974348011971,
0.0396326727887869, 0.205172413793103, 0.528357204652504, 0.0953307392996109,
0.456736831270425, 0.312189418298743, 0.440536013400335, 0.322295805739514,
0.185410334346505, 0.289871801748646, 0.277182549853899, 0.478218016952494,
0.186379928315412, 0.730769230769231, 0.312846181208395, 0.573051948051948,
0.104916897506925, 0.251913709116214, 0.240451839867001, 0.390675796196668,
0.314176245210728, 0.151898734177215, 0.321921116708234, 0.191379310344828,
0.352371916508539, 0.384971413013885, 0.245204178537512, 0.314060803474484,
0.177397478189521, 0.128301886792453, 0.418175664240988, 0.0532687651331719,
0.263429404508243, 0.351343561546648, 0.520833333333333, 0.160453808752026,
0.40037131675589, 0.0701381509032944, 0.139484240687679, 0.208627671654089,
0.302720922280299, 0.464089813912954, 0.325967367919772, 0.509502256498519,
0.357452966714906, 0.387770228536133, 0.389464763566684, 0.401743264659271,
0.450549450549451, 0.0741477272727273, 0.371069078965643, 0.0344827586206897,
0.351488283723876, 0.394836363636364, 0.10394973070018, 0.0164705882352941,
0.0661846496106785, 0.247721658259119, 0.271552795031056, 0.375866851595007,
0.326800587947085, 0.090032154340836, 0.302631578947368, 0, 0.293969849246231,
0.0916666666666667, 0.262623094775136, 0.37765538945712, 0.437246326652613,
0.553230281411019, 0.553700229860763, 0.327902240325866, 0.18275271273558,
0.23291397483849, 0.407002852114813, 0.115207373271889, 0.569179784589892,
0.306166509243017, 0.536532606954014, 0.426756985605419, 0.214233164297538,
0.515413070283601, 0.401348069939186, 0.23936170212766, 0.406859205776173,
0.301968017675986, 0.46031385697156, 0.417500510516643, 0.280645161290323,
0.545082566125968, 0.308916785607088, 0.40121177121836, 0.110259770455074,
0.573885848318999, 0.0979737591064492, 0.05517578125, 0.43828037308911,
0, 0.0195169553549646, 0.106028217186832, 0.0224746254229096,
0.0913793103448276, 0.149860617129674, 0.0214007782101167, 0.138210545276756,
0.499269219526454, 0.351758793969849, 0, 0.0613981762917933,
0.172457222550019, 0.257446045009484, 0.141730731322689, 0.129032258064516,
0, 0.218723004217694, 0.116883116883117, 0.0207756232686981,
0.00150777081883554, 0.598457747628731, 0.310768434074628, 0.0344827586206897,
0.0759493670886076, 0, 0.0620689655172414, 0.0992409867172676,
0.175878028859243, 0.0207027540360874, 0.342562432138979, 0.0380448433496877,
0.0252830188679245, 0.138432898967676, 0, 0.510037007962319,
0.150097552323519, 0.0520833333333333, 0, 0.200037592718361,
0, 0.0220057306590258, 0.107599706582795, 0.0987006008499829,
0.381862921002717, 0.310497634940381, 0.266713526509752, 0.221418234442836,
0.376899320568252, 0.147387273053657, 0.412044374009509, 0.137362637362637,
0.266761363636364, 0.482401647112941, 0, 0.496200126662445, 0.145890909090909,
0.00933572710951526, 0, 0.000278086763070078, 0.0862344564156858,
0.598343685300207, 0.208044382801664, 0.171974522292994, 0.0418006430868167,
0.0921052631578947, 0, 0.268844221105528, 0.0333333333333333,
0.0333061531706705, 0.330841856805665, 0, 0.228289734443123,
0.226188274459035, 0.0712830957230143, 0.0177041690462593, 0.0492786710059746,
0.246950664482068, 0.00576036866359447, 0.0306545153272577, 0,
0.0744271751321904, 0.198701665255433, 0.268701227042224, 0.196054254007398,
0.0790648033408067, 0.0531914893617021, 0.00144404332129964,
0.620702802934266, 0.28496257232331, 0.197569940780069, 0.170967741935484,
0, 0.0478844352441583, 0.210611453120438, 0.0129145441863414,
0.000260620276257493, 0.208709971897542, 0.3251953125, 0.0114926544356481,
0.194228634850166, 0.640644059526714, 0.0280034202650705, 0.885693571773804,
0.313793103448276, 0.00884360280688263, 0.739299610894942, 0.0108913821054528,
0.00380005846243788, 0, 0, 0.610942249240122, 0.0171968030896315,
0, 0.0280898876404494, 0.100358422939068, 0.173076923076923,
0.0244422988973017, 0.0227272727272727, 0.62292243767313, 0.0628624449083739,
0.00386676364499993, 0.00405650335164993, 0.314176245210728,
0.126582278481013, 0.000798493705700851, 0.581034482758621, 0.0829222011385199,
0.0245031309556221, 0.00588793922127255, 0.0293159609120521,
0.380448433496877, 0.353207547169811, 0.0531392790658318, 0.414043583535109,
0.0632499719636649, 0.096842852075204, 0.0416666666666667, 0.494327390599676,
0.00292230188016789, 0.476089266737513, 0.557478510028653, 0.113709215510918,
0.0199428459772361, 0.00563900138411852, 0.0115416622646005,
0.0101230399337831, 0.0419681620839363, 0.00197652872143298,
0.164294749497994, 0.00475435816164818, 0.137362637362637, 0.584090909090909,
0.000167333519259466, 0.885057471264368, 0.00401097741186405,
0.109381818181818, 0.647935368043088, 0.948235294117647, 0.50139043381535,
0.188100724502294, 0.00447204968944099, 0.105409153952843, 0.0264576188143067,
0.765273311897106, 0.368421052631579, 0.841726618705036, 0.0766331658291457,
0.241666666666667, 0.0153672458132096, 0.0698269079464988, 0.00169902148947551,
0.00340864050733254, 0.00261565695262755, 0.0366598778004073,
0.572815533980582, 0.430635838150289, 0.0247284422598459, 0.285714285714286,
0.00165700082850041, 0.00580218593981919, 0.071062329754847,
0.0487345940351868, 0.0753895267873565, 0.00986436498150432,
0.0469890478271147, 0.340425531914894, 0.216967509025271, 7.16946330049502e-05,
0.0013271400132714, 0.0275678987134981, 0.0806451612903226, 0.00146134736226801,
0.0792675274278758, 0.0146813198032706, 0.472691340280273, 0.00295369646425159,
0.1175738808241, 0.0419921875, 0.00405272515967538, 0.048834628190899,
0.0239082703098317, 0.0611372381359555, 0.00894151764137264,
0.0827586206896552, 0.00552725175430164, 0.0330739299610895,
0.0168742441007694, 0.00701549254603917, 0.0100502512562814,
0, 0.0115501519756839, 0.0167676876039264, 0, 0.0364675734279519,
0.046594982078853, 0.0384615384615385, 0.0193480359774379, 0.0275974025974026,
0.0647506925207756, 0.0440733008582695, 0.0107987353233683, 0.0128455939468915,
0.10727969348659, 0.455696202531646, 0.000897073175540462, 0.0344827586206897,
0.0244781783681214, 0.0400217805608494, 0.0260208926875594, 0.0282301845819761,
0.0483765635727323, 0.295849056603774, 0.11034015907937, 0.150121065375303,
0.00336436021083324, 0.101764810216389, 0.0416666666666667, 0.0502431118314425,
0.0128435876929952, 0.0786397449521785, 0.065214899713467, 0.107117376930575,
0.0130062038981974, 0.0127646485876865, 0.0243222483755186, 0.00109047672226346,
0.0419681620839363, 0.00148239654107474, 0.0995579008011222,
0.0182250396196513, 0.010989010989011, 0.0190340909090909, 0.000341111490123878,
0.0114942528735632, 0.00960523538104285, 0.0609454545454545,
0.108617594254937, 0.00941176470588235, 0.370689655172414, 0.0941230200743186,
0.00919254658385093, 0.0527045769764216, 0.0416462518373346,
0.0128617363344051, 0.0263157894736842, 0.100719424460432, 0.0628140703517588,
0.383333333333333, 0.013861882958038, 0.00324547600314713, 0.00163609476764308,
0.00522195798652398, 0.00245713228883194, 0.00814663951120163,
0.0422615648201028, 0.0899839704667994, 0.00467261362946781,
0.165898617511521, 0.00662800331400166, 0.00748886789906895,
0.00480692196763339, 0.00997271615391853, 0.0314314616779211,
0.0357583230579531, 0.0671742820918722, 0.0585106382978723, 0.193501805054152,
0.000332402389386587, 0.00658331296056997, 0.00745354298550133,
0.106451612903226, 0.00401870524623703, 0.120851102817492, 0.0428155973269565,
0.222051954091015, 0.00773173486230562, 0.0590151610073926, 0.00927734375,
0.00420172240819286, 0, 0.0102464015613564, 0.0269345874305259,
0.00869985500241663, 0.0344827586206897, 0.00288378352398347,
0.00778210116731518, 0.0201551684207818, 0.0423852674656533,
0.0050251256281407, 0, 0.00547112462006079, 0.0142509252802661,
0.00779207463987287, 0.019712201852947, 0.043010752688172, 0,
0.0273133797449057, 0.00974025974025974, 0.0176592797783934,
0, 0.0310305927058439, 0.022756696107483, 0.00383141762452107,
0, 0, 0.0137931034482759, 0.0187855787476281, 0.0283147291042744,
0.00455840455840456, 0.0485884907709012, 0.0202783033875316,
0.0505660377358491, 0.0424775765781012, 0, 0.0069530111023887,
0.055028378857751, 0.03125, 0, 0.0158332727475844, 0, 0.0234957020057307,
0.0948079222645377, 0.00865859020077182, 0.0121494848003281,
0.0493989553698703, 0.00252254856234439, 0.0520984081041968,
0.0035206917850525, 0.0370753822968285, 0.0237717908082409, 0,
0.0267045454545455, 0.000411111567901742, 0, 0.0232214481739498,
0.0344, 0.0181328545780969, 0, 0.0100111234705228, 0.0560918394884889,
0.0242650103519669, 0.0651872399445215, 0.0284174424301813, 0.00321543408360129,
0.0263157894736842, 0.0575539568345324, 0.0515075376884422, 0.1,
0.00545694034999686, 0.00560582218725413, 0, 0.00274474831549742,
0.00129461808766414, 0.00610997963340122, 0.0125642490005711,
0.032180502258707, 0.00664482068086656, 0.0305299539170507, 0,
0, 0.00705015221919564, 0.0127951829899332, 0.0536469691794319,
0.0246609124537608, 0.022233842142586, 0.0531914893617021, 0,
0.00255493601254004, 0.0106578656328944, 0.0116397794568103,
0.0935483870967742, 0, 0.0555500678330224, 0.0552188638129316,
0.0432774619010378, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0,
0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0,
1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1,
1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1,
0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,
0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0,
1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,
0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1,
0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0,
0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0,
1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0,
1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1,
0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0,
0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1,
0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
1, 1, 0), .Dim = c(100L, 35L), .Dimnames = list(c("2801", "2316",
"382", "8062", "2687", "2731", "8019", "5652", "8429", "3479",
"7753", "9001", "2188", "8121", "8478", "5817", "1528", "2460",
"3946", "3531", "3421", "2802", "1975", "3639", "2894", "5897",
"9331", "9490", "7135", "5858", "7724", "9414", "9095", "6601",
"5064", "7111", "3593", "7322", "9522", "7116", "6922", "5172",
"2458", "5199", "1387", "3878", "6119", "8722", "6378", "4661",
"6109", "3682", "5751", "9390", "7915", "5268", "1029", "5953",
"242", "2912", "8798", "9607", "9768", "2222", "8260", "851",
"4205", "1823", "5063", "4189", "7541", "608", "6849", "7220",
"2889", "6770", "7064", "646", "4919", "1404", "120", "9716",
"7722", "7700", "6638", "8176", "5745", "6", "9481", "2233",
"341", "228", "1543", "553", "9709", "9493", "881", "7647", "6039",
"2925"), c("(Intercept)", "x 1", "x 2", "x 3", "x 4", "x 5",
"x 6", "x 7", "x 8", "x 9", "x 10", "x 11", "x 12", "x 13", "x 14",
"x 15", "x 16", "x 17", "x 18", "x 19", "x 20", "x 21", "x 22",
"x 23", "x 24", "x 25", "x 26", "x 27", "x 28", "x 29", "x 30",
"x 31", "x 32", "x 33", "x 34")))
Inside betareg: the NA that causes the problem
linkfun <- function(mu) {.Call(stats:::C_logit_link, mu)}
auxreg_test <- lm.wfit(x, linkfun(y), w, offset)
# problem:
(beta <- auxreg_test$coefficients)
is.na(beta['x 8'])
> beta['x 8']
x 8
NA
I originally thought this might be related to using the CRAN version of betareg (3.1-0). But I updated to the rforge version (3.2-0) via devtools::install_github("rforge/betareg/pkg") and still have the same problem.
If I remove the offending predictor from my formula, betareg runs fine; however, the predictor is a necessary one.
NA coefficients from glm / lm / lm.fit / .lm.fit / lm.wfit imply the model matrix to be rank-deficient. They are just 0 with 0 standard error (i.e., fixed at 0).
I appreciated that you have done much debugging work and located the source of the error, but giving us a model matrix x directly is less informative for us to investigate. It would be good if you show us the model formula and the data frame.
Anyway, I have (with some pain) found the collinearity problem from your model matrix.
rowSums(, x[, 2:9])
#2801 2316 382 8062 2687 2731 8019 5652 8429 3479 7753 9001 2188 8121 8478 5817
# 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#1528 2460 3946 3531 3421 2802 1975 3639 2894 5897 9331 9490 7135 5858 7724 9414
# 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#9095 6601 5064 7111 3593 7322 9522 7116 6922 5172 2458 5199 1387 3878 6119 8722
# 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#6378 4661 6109 3682 5751 9390 7915 5268 1029 5953 242 2912 8798 9607 9768 2222
# 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#8260 851 4205 1823 5063 4189 7541 608 6849 7220 2889 6770 7064 646 4919 1404
# 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# 120 9716 7722 7700 6638 8176 5745 6 9481 2233 341 228 1543 553 9709 9493
# 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# 881 7647 6039 2925
# 1 1 1 1
Columns x1 to x8, if all included, has collinearity problem with the intercept (strange; those columns are not dummy ones so they are not from factor variables). If you don't want to drop any of them, drop intercept instead.

Resources