I am in a group project for a class and one of the people in my group ran the normalization, as well as creating the test/train sets so that we all have the same sets to work from (we're all utilizing different algorithms). I am assigned with running the KNN algorithm.
We had multiple columns with NA's so those columns were omitted (<-NULL). When attempting to run the KNN I keep getting the error of
Error in knn(train = trainsetne, test = testsetne, cl = ne_train_target, :
no missing values are allowed
I ran which(is.na(dataset$col)) and found:
which(is.na(testsetne$median_days_on_market))
# [1] 8038 8097 8098 8100 8293 8304
When I look through the dataset those cells do not have missing data.
I am wondering if I may get some help with how to either find and fix the "No missing values" or to find a work around (if any).
I am sorry if I am missing something simple. Any help is appreciated.
I have listed the code that we have below:
ne$pending_ratio_yy <- ne$total_listing_count_yy <- ne$average_listing_price_yy <- ne$median_square_feet_yy <- ne$median_listing_price_per_square_feet_yy <- ne$pending_listing_count_yy <- ne$price_reduced_count_yy <- ne$median_days_on_market_yy <- ne$new_listing_count_yy <- ne$price_increased_count_yy <- ne$active_listing_count_yy <- ne$median_listing_price_yy <- ne$flag <- NULL
ne$pending_ratio_mm <- ne$total_listing_count_mm <- ne$average_listing_price_mm <- ne$median_square_feet_mm <- ne$median_listing_price_per_square_feet_mm <- ne$pending_listing_count_mm <- ne$price_reduced_count_mm <- ne$price_increased_count_mm <- ne$new_listing_count_mm <- ne$median_days_on_market_mm <- ne$active_listing_count_mm <- ne$median_listing_price_mm <- NULL
ne$factor_month_date <- as.factor(ne$month_date_yyyymm)
ne$factor_median_days_on_market <- as.factor(ne$median_days_on_market)
train20ne= sample(1:20893, 4179)
trainsetne=ne[train20ne,1:10]
testsetne=ne[-train20ne,1:10]
#This is where I start to come in
ne_train_target <- ne[train20ne, 3]
ne_test_target <- ne[-train20ne, 3]
predict_1 <- knn(train = trainsetne, test = testsetne, cl=ne_train_target, k=145)
# Error in knn(train = trainsetne, test = testsetne, cl = ne_train_target, :
# no missing values are allowed
When I run the following code, I do NOT get this error:
## https://www.dataiku.com/learn/guide/code/r/time_series.html
library(readxl)
library(forecast)
library(dplyr)
library(prophet)
library(rstan)
library(Hmisc)
library(caret)
data<-read_excel("Time Series/Items.xlsx", col_types = c("text", "numeric"))
Nper=0.75
stmodels=c("meanf","naive","snaive","rwf","croston","stlf","ses","holt","hw","splinef","thetaf","ets","auto.arima","tbats","prophet")
gkuniforecast = function(data, Np, Ncolumn, tsfreq, model) {
## Preparation
N = ceiling(Np*nrow(data))
## Models
if (model=="prophet"){
df=data
names(df)=c("ds","y")
df$ds=as.Date(paste(df$ds,"-01",sep=""), "%Y-%b-%d")
train.df = df[1:N,]
na.df=data.frame(ds=rep(NA, N),y=rep(NA, N))
test.df <- rbind(na.df, df[(N+1):nrow(data),])
m <- prophet(train.df)
future <- make_future_dataframe(m, periods = nrow(data)-N, freq = 'month')
pro_forecast <- predict(m, future)
plot(m, pro_forecast)
##prophet_plot_components(m, forecast)
acc=matrix(rep(NA, 16),nrow=2,ncol=8,dimnames=list(c("Training set", "Test set"),c("ME","RMSE","MAE","MPE","MAPE","MASE","ACF1","Theil's U")))
acc["Test set","RMSE"]=sqrt(mean((pro_forecast$yhat - test.df)^2, na.rm = TRUE))
}else{
x=pull(data,Ncolumn)
train.x = ts(x[1:N], frequency=tsfreq)
test.x <- ts(c(rep(NA, N), x[(N+1):NROW(x)]), frequency=tsfreq)
str1=paste0("m_",model," = ",model,"(train.x)")
if (Np==1) {str2=paste0("f_",model," = forecast(m_",model,", h=NROW(x)")
} else {str2=paste0("f_",model," = forecast(m_",model,", h=NROW(x)-N)")}
str3=paste0("plot(f_",model,")")
str4="lines(test.x)"
str5=paste0("acc=accuracy(f_",model,",test.x)")
str=paste0(str1,";",str2,";",str3,";",str4,";",str5)
eval(parse(text=str))
}
return(acc)
}
acc = lapply(stmodels, gkuniforecast, data=data, Np=Nper, Ncolumn=2,tsfreq=12)
But when I run this code, I do:
##Forecast data prep
tsfreq=5
x=pull(data,1)
train.x = ts(x[1:N], frequency=tsfreq)
test.x <- ts(c(rep(NA, N), x[(N+1):NROW(x)]), frequency=tsfreq)
stmodels=c("meanf","naive","snaive","rwf","croston","stlf","ses","holt","hw"##,"splinef"
,"thetaf","ets","auto.arima","tbats")
for (i in 1:length(stmodels)){
str1=paste0("m_",stmodels[i]," = ",stmodels[i],"(train.x)")
str2=paste0("f_",stmodels[i]," = forecast(m_",stmodels[i],", h=NROW(x)-N)")
str3=paste0("plot(f_",stmodels[i],")")
str4="lines(test.x)"
str5=paste0('acc[["',stmodels[i],'"]]=accuracy(f_',stmodels[i],',test.x)')
str=paste0(str1,";",str2,";",str3,";",str4,";",str5)
eval(parse(text=str))
}
There seems to be a problem with 'hw' (splinef is commented out, because it gives me another error), but I do not understand why in the first dataset, I get no errors and I do with the second dataset. What is also different is the frequency.
Again the error is:
Please select a longer horizon when the forecasts are first computed
You are mixing functions that create forecasts directly (like meanf()) with functions that generate models (like ets()). For functions that generate forecasts directly, you need to specify the forecast horizon when you call the function. See https://otexts.org/fpp2/the-forecast-package-in-r.html for a list of functions that produce forecasts directly.
I cannot seem to get past or understand what is wrong with this error while trying to run the ADF (Augmented Dickey Fuller) test on R-Studio. Will appreciate comments.
library(AER)
library("dynlm", lib.loc="~/R/win-library/3.2")
x<-read.table("C://R Files/protein.csv", header=T, sep=",")
"adf" <- function(x,k = 0, int = TRUE, trend = FALSE){
require(dynlm)
dx <- diff(x)
formula <- paste("dx ~ L(x)")
if(k > 0)
formula <- paste(formula," L(dx,1:k)")
if(trend){
s <- time(x)
t <- ts(s - s[1],start = s[1],freq = frequency(x))
formula <- paste(formula," t")
}
if(!int) formula <- paste(formula," - 1")
summary(dynlm(as.formula(formula)))
}
a<-ts(x$a)
adf(a, k=1, int=T, trend=T)
The error message that I get after this is :
Error in parse(text = x, keep.source = FALSE) :
:1:13: unexpected symbol
1: dx ~ L(x) L
^
It might be this:
formula <- paste(formula," L(dx,1:k)")
You can't just add something to a model. Try doing:
formula <- paste(formula,"+ L(dx,1:k)")
and see if that helps. If not you might want to share the contents of protein.csv so I can try to reproduce your problem.
Very new user here. I am trying to use lsoda to solve differential equations stratified into two layers (as denoted by the for(s in 1:2) loop).
When running this full code, I keep getting the error message
object 'N' not found
no matter where or how I try to define N.
Can anyone help spot the error or advise on what I'm doing wrong? Thanks in advance.
R code:
library(deSolve)
Dyn <- function(t, var,par) {
with(as.list(c(par, var)), {
for(s in 1:2){
#Derivatives
dX[s] <- mu*N[s] - sigma*X[s] - (c[s]*beta*(InD[s] +ID[s]+ IdT[s])/N[s])*X[s] - mu*X[s]
dXint[s] <- sigma*X[s] - (1-omega)*(c[s]*beta*(InD[s] +ID[s]+ IdT[s])/N[s])*Xint[s] - mu*Xprep[s]
dInD[s] <- (c[s]*beta*(InD[s] +ID[s]+ IdT[s])/N[s])*X[s] - psi*InD[s]- mu*InD[s]
dID[s] <- (1-omega)*(c[s]*beta*(InD[s] +ID[s]+ IdT[s]) /N[s])*Xint[s]+ psi*InD[s]- mu*ID[s]
N[s] <- X[s]+Xint[s]+InD[s]+ID[s]
diffs <- c(dX[s], dXint[s], dInD[s], dID[s], N[s])}
return(list(diffs))
})}
#Defining parameter and initial values
par <- c(mu=0.033, sigma=0.29, beta=0.40, c=c(2, 30), Ctot=1773600, N=c(332550, 36950), psi=0.022, omega=0.44)
init <- c(X=c(332550,36950), Xint=c(0,0), InD=c(1,1), ID=c(0,0))
t <- seq(0, 30, by=0.1)
#Numerical solution#
Hom.sol <- lsoda(init, t, Dyn,par)
I think you are mixing up parameters and variables. N seems to be defined as a parameter par with dimension 2. However, in your model definition you are updating N with dimension 1.
When I run the following line by line in R it works fine, but with knitr the options(digits=3) gets ignored.
Why? Any solutions?
<<cor>>=
#mock up data set.
x <- c(rnorm(100))
y <- c(rnorm(100))
z <- c(rnorm(100))
df <- as.data.frame(cbind(x,y,z))
df$x<- as.numeric(df$x)
df$y<- as.numeric(df$y)
df$z<- as.numeric(df$z)
options(digits=3)
cor(df, use = 'na.or.complete', method = c("spearman"))
#
I found a solution while searching for a different issue. Leave out options(digits=3) and use
round(cor(df, use = "na.or.complete", method = c("spearman")), digits = 3)
Which leaves the question why the options(...) doesn't work. But I can happily live with that!
Thanks everyone for their time!
Gerit