Convert R code into Matlab automatically - r

I would like to convert this code from R into Matlab.
require(tseries)
require(fracdiff)
require(matrixStats)
DCCA_beta_avg<-function(y,x,smin,smax,step){
XTX<-var(x)*(length(x)-1)
betas<-rep(0,(smax-smin)/step+1)
for(s in seq(smin,smax,by=step)){
betas[(s-smin)/step+1]<-DCCA_beta_sides(y,x,s)
}
DCCA_beta<-mean(betas)
DCCA_res<-(y-DCCA_beta*x)-mean(y-DCCA_beta*x)
DCCA_sigma2<-sum(DCCA_res^2)/(length(DCCA_res)-2)
DCCA_SE<-sqrt(DCCA_sigma2/XTX)
DCCA_R2<-1-var(DCCA_res)/var(y)
OLS_beta<-lm(y~x)$coefficients[2]
OLS_res<-(y-OLS_beta*x)-mean(y-OLS_beta*x)
OLS_sigma2<-sum(OLS_res^2)/(length(OLS_res)-2)
OLS_SE<-sqrt(OLS_sigma2/XTX)
OLS_R2<-1-var(OLS_res)/var(y)
return(c(OLS_beta,OLS_SE,OLS_R2,DCCA_beta,DCCA_SE,DCCA_R2))
}
DCCA_beta<-DCCdccafunction(y,x,s){
xx<-cumsum(x-mean(x))
yy<-cumsum(y-mean(y))
t<-1:length(xx)
F2sj_xy<-runif(floor(length(xx)/s))
F2sj_xx<-F2sj_xy
for(ss in seq(1,(floor(length(xx)/s)*s),by=s)){
F2sj_xy[(ss-1)/s+1]<-sum((summary(lm(xx[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals)*(summary(lm(yy[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals))/(s-1)
F2sj_xx[(ss-1)/s+1]<-sum((summary(lm(xx[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals)*(summary(lm(xx[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals))/(s-1)
}
beta<-mean(F2sj_xy)/mean(F2sj_xx)
return(beta)
}
DCCA_beta_F<-function(y,x,s){
xx<-cumsum(x-mean(x))
yy<-cumsum(y-mean(y))
t<-1:length(xx)
F2sj_xy<-runif(floor(length(xx)/s))
F2sj_xx<-F2sj_xy
F2sj_yy<-F2sj_xy
for(ss in seq(1,(floor(length(xx)/s)*s),by=s)){
F2sj_xy[(ss-1)/s+1]<-sum((summary(lm(xx[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals)*(summary(lm(yy[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals))/(s-1)
F2sj_xx[(ss-1)/s+1]<-sum((summary(lm(xx[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals)*(summary(lm(xx[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals))/(s-1)
F2sj_yy[(ss-1)/s+1]<-sum((summary(lm(yy[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals)*(summary(lm(yy[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals))/(s-1)
}
beta<-mean(F2sj_xy)/mean(F2sj_xx)
return(c(beta,mean(F2sj_xx),mean(F2sj_yy)))
#return(c(beta,sum(F2sj_xx),sum(F2sj_yy)))
}
DCCA_beta_SE<-function(y,x,s){
r<-DCCA_beta_F(y,x,s)
beta<-r[1]
yhat<-beta*x
alpha<-mean(y)-beta*mean(x)
res<-y-yhat
residuals<-res-mean(res)
resres<-cumsum(residuals-mean(residuals))
F2sj_res<-runif(floor(length(residuals)/s))
t<-1:length(resres)
for(ss in seq(1,(floor(length(residuals)/s)*s),by=s)){
F2sj_res[(ss-1)/s+1]<-sum((summary(lm(resres[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals)*(summary(lm(resres[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals))/(s-1)
}
#SE<-mean(residuals^2)/((length(residuals)-2)*r[2])
SE<-mean(F2sj_res)/((length(residuals)-2)*r[2])
SE_a<-(mean(F2sj_res)/r[2])*(sum(x^2)/(length(residuals)*(length(residuals)-2)))
R<-1-mean(F2sj_res)/(r[3])
return(c(alpha,sqrt(SE_a),beta,sqrt(SE),R))
}
DCCA_beta_SE_F<-function(y,x,s){
r<-DCCA_beta_F(y,x,s)
beta<-r[1]
yhat<-beta*x
alpha<-mean(y)-beta*mean(x)
res<-y-yhat
residuals<-res-mean(res)
res_R<-y-x
resres<-cumsum(residuals-mean(residuals))
resres_R<-cumsum(res_R)
F2sj_res<-runif(floor(length(residuals)/s))
F2sj_res_R<-runif(floor(length(res_R)/s))
t<-1:length(resres)
for(ss in seq(1,(floor(length(residuals)/s)*s),by=s)){
F2sj_res[(ss-1)/s+1]<-sum((summary(lm(resres[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals)*(summary(lm(resres[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals))/(s-1)
F2sj_res_R[(ss-1)/s+1]<-sum((summary(lm(resres_R[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals)*(summary(lm(resres_R[ss:(ss+s-1)]~t[ss:(ss+s-1)]))$residuals))/(s-1)
}
#SE<-mean(residuals^2)/((length(residuals)-2)*r[2])
#SE<-mean(F2sj_res)/((length(residuals)-2)*r[2])
#SE<-mean(F2sj_res)/((length(F2sj_res)-2)*r[2]) #controlling for uncertainty connected to scales (higher scales have higher uncertainty due to lower number of blocks)
SE<-mean(F2sj_res)/(ceiling(length(residuals)/s)*r[2]) #loosing d.f. due to fitting a and b in each box
#SE_a<-(mean(F2sj_res)/r[2])*(sum(x^2)/(length(residuals)*(length(residuals)-2)))
#SE_a<-(mean(F2sj_res)/r[2])*(sum(x^2)/(length(F2sj_res)*(length(F2sj_res)-2))) #controlling for uncertainty connected to scales (higher scales have higher uncertainty due to lower number of blocks)
SE_a<-(mean(F2sj_res)/r[2])*(sum(x^2)/(length(residuals)*ceiling(length(residuals)/s))) #loosing d.f. due to fitting a and b in each box
R<-1-mean(F2sj_res)/(r[3])
#SSR_U<-sum(residuals^2)
SSR_U<-sum(F2sj_res)
#SSR_R<-sum((y-x)^2) #specific null: alpha=0, beta=1
SSR_R<-sum(F2sj_res_R)
#F_stat<-((SSR_R-SSR_U)/(SSR_U))*((length(residuals)-2)/2)
#F_stat<-((SSR_R-SSR_U)/(SSR_U))*((length(F2sj_res)-2)/2) #controlling for uncertainty connected to scales (higher scales have higher uncertainty due to lower number of blocks)
F_stat<-((SSR_R-SSR_U)/(SSR_U))*(ceiling(length(residuals)/s)/2) #loosing d.f. due to fitting a and b in each box
F_p<-pf(F_stat,2,length(F2sj_res)-2,lower.tail=FALSE)
return(c(alpha,sqrt(SE_a),beta,sqrt(SE),R,F_stat,F_p))
}
DCCA_beta_s<-function(y,x,smin,smax,step){
results<-matrix(rep(0,6*((smax-smin)/step+1)),ncol=6)
for(s in seq(smin,smax,by=step)){
beta<-DCCA_beta_SE(y,x,s)
results[((s-smin)/step+1),1]<-s
results[((s-smin)/step+1),2]<-beta[1]
results[((s-smin)/step+1),3]<-beta[2]
results[((s-smin)/step+1),4]<-beta[3]
results[((s-smin)/step+1),5]<-beta[4]
results[((s-smin)/step+1),6]<-beta[5]
}
return(results)
}
DCCA_beta_s_F<-function(y,x,smin,smax,step){
results<-matrix(rep(0,10*((smax-smin)/step+2)),ncol=10)
for(s in seq(smin,smax,by=step)){
beta<-DCCA_beta_SE_F(y,x,s)
results[((s-smin)/step+1),1]<-s
results[((s-smin)/step+1),2]<-beta[1]
results[((s-smin)/step+1),3]<-beta[2]
results[((s-smin)/step+1),4]<-2*pnorm(abs(beta[1]/beta[2]),lower.tail=FALSE)#p-value for null=0
results[((s-smin)/step+1),5]<-beta[3]
results[((s-smin)/step+1),6]<-beta[4]
results[((s-smin)/step+1),7]<-2*pnorm(abs((beta[3]-1)/beta[4]),lower.tail=FALSE)#p-value for null=1
results[((s-smin)/step+1),8]<-beta[5]
results[((s-smin)/step+1),9]<-beta[6]
results[((s-smin)/step+1),10]<-beta[7]
}
#results[(smax-smin)/step+2,2]<-mean(results[1:(dim(results)[1]-1),2])#A
#results[(smax-smin)/step+2,5]<-mean(results[1:(dim(results)[1]-1),5])#B
results[(smax-smin)/step+2,2]<-sum(results[1:(dim(results)[1]-1),2]*results[1:(dim(results)[1]-1),8])/sum(results[1:(dim(results)[1]-1),8])#A as R2(s) weighted
results[(smax-smin)/step+2,5]<-sum(results[1:(dim(results)[1]-1),5]*results[1:(dim(results)[1]-1),8])/sum(results[1:(dim(results)[1]-1),8])#B as R2(s) weighted
results[(smax-smin)/step+2,3]<-sqrt((sum(x^2)/length(x))*sum((y-results[(smax-smin)/step+2,2]-results[(smax-smin)/step+2,5]*x)^2)/((length(y)-dim(results)[1]+1)*sum((x-mean(x))^2)))#SE_A
results[(smax-smin)/step+2,4]<-2*pnorm(abs(results[(smax-smin)/step+2,2]/results[(smax-smin)/step+2,3]),lower.tail=FALSE)#p-value for null=0
results[(smax-smin)/step+2,6]<-sqrt(sum((y-results[(smax-smin)/step+2,2]-results[(smax-smin)/step+2,5]*x)^2)/((length(y)-dim(results)[1]+1)*sum((x-mean(x))^2)))#SE_B
results[(smax-smin)/step+2,7]<-2*pnorm(abs((results[(smax-smin)/step+2,5]-1)/results[(smax-smin)/step+2,6]),lower.tail=FALSE)#p-value for null=1
results[(smax-smin)/step+2,8]<-1-sum((y-results[(smax-smin)/step+2,2]-results[(smax-smin)/step+2,5]*x)^2)/sum(y^2)#R2
results[(smax-smin)/step+2,9]<-((length(x)-2)/2)*((results[(smax-smin)/step+2,8]-(1-(sum((y-x)^2))/(sum((y-mean(y))^2))))/(1-results[(smax-smin)/step+2,8]))#F_test_R2_based
results[(smax-smin)/step+2,10]<-pf(results[(smax-smin)/step+2,9],2,length(y)-2,lower.tail=FALSE)#F_test p_val
return(results)
}
This aimed to perform a calculations of regression parameters a detrended cross correlation analysis.
Is there a way to do it automatically? or we should do it line by line.
This analysis should be performed on time series with the same length.

The answer is no, or at least I don't know of any way to do it, they are two different languages.
Maybe you want to look to the file exchange in Matlab, there is an script like MATLAB R-link that allows you to connect Matlab with R, this way you can call functions of R in MatLab.
Here is a description of this file:
A COM based interface that allows you to call R functions from within MATLAB. The functions are:
openR - Connect to an R server process.
evalR - Run an R command.
getRdata - Copies an R variable to MATLAB.
putRdata - Copies MATLAB data to an R variable.
closeR - Close connection to R server process.
Rdemo - An example of using R from withing MATLAB.
The other option is that you translate the code line by line.
EDIT:
I keep searching more options and I found this one to allow execute r code into Matlab
I hope this could help you.

Related

How to suppress annoying stream of warnings from pointcloudlibrary `SampleConsensusModelPlane::optimizeModelCoefficients`

I have this function for fitting a plane to a pointcloud using PCL's sac model fitting. I want the best result I can get, so I want to run with seg.setOptimizeCoefficients(true).
The problem is that a lot of the time, the pointcloud passed in, will not have enough points to optimise coefficients, so I get a continuous stream of:
[pcl::SampleConsensusModelPlane::optimizeModelCoefficients] Not enough inliers found to optimize model coefficients (0)! Returning the same coefficients.
I would like to have coefficient optimisation to run when it can, and when it can't to just carry on without polluting the CLI output with many red warning messages.
according to this issue this message just means that there are fewer than 3 inlier points for the SAC model fitting. I do extract the inlier points, so I could manually check if there are 3 or more. But I can't see how to do this first, and THEN find the optimized model coefficients. Is there a way?
inline void fit_plane_to_points(
const pcl::PointCloud<pcl::PointXYZI>::ConstPtr& det_points,
const pcl::ModelCoefficients::Ptr& coefficients,
const Eigen::Vector3f& vec,
const pcl::PointCloud<pcl::PointXYZI>::Ptr& inlier_pts) {
// if no det points to work with, don't try and segment
if (det_points->size() < 3) {
return;
}
// fit surface point samples to a plane
pcl::PointIndices::Ptr inlier_indices(new pcl::PointIndices);
pcl::SACSegmentation<pcl::PointXYZI> seg;
seg.setModelType(pcl::SACMODEL_PERPENDICULAR_PLANE);
// max allowed difference between the plane normal and the given axis
seg.setEpsAngle(sac_angle_threshold_);
seg.setAxis(vec);
seg.setMethodType(pcl::SAC_RANSAC);
seg.setDistanceThreshold(sac_distance_threshold_);
seg.setMaxIterations(1000);
seg.setInputCloud(det_points);
seg.setOptimizeCoefficients(sac_optimise_coefficients_);
seg.segment(*inlier_indices, *coefficients);
if (inlier_indices->indices.empty()) {
// if no inlier points don't try and extract
return;
}
// extract the planar points
pcl::ExtractIndices<pcl::PointXYZI> extract;
extract.setInputCloud(det_points);
extract.setIndices(inlier_indices);
extract.setNegative(false);
extract.filter(*inlier_pts);
return;
}
I would say the best way to do this is to disable setOptimizeCoefficients and then do that manually after seg.segment. You basically have to recreate these lines: https://github.com/PointCloudLibrary/pcl/blob/10235c9c1ad47989bdcfebe47f4a369871357e2a/segmentation/include/pcl/segmentation/impl/sac_segmentation.hpp#L115-L123 .
You can access the model via getModel() (https://pointclouds.org/documentation/classpcl_1_1_s_a_c_segmentation.html#ac7b9564ceba35754837b4848cf448d78).
Ultimately got it working, with advice from IBitMyBytes by setting seg.setOptimizeCoefficients(false); and then manually optimising after doing my own check:
// if we can, optimise model coefficients
if (sac_optimise_coefficients_ && inlier_indices->indices.size() > 4) {
pcl::SampleConsensusModel<pcl::PointXYZI>::Ptr model = seg.getModel();
Eigen::VectorXf coeff_refined;
Eigen::Vector4f coeff_raw(coefficients->values.data());
model->optimizeModelCoefficients(inlier_indices->indices,
coeff_raw, coeff_refined);
coefficients->values.resize(coeff_refined.size());
memcpy(&coefficients->values[0], &coeff_refined[0],
coeff_refined.size() * sizeof (float));
// Refine inliers
model->selectWithinDistance(coeff_refined, sac_distance_threshold_,
inlier_indices->indices);
}

r: sym() function failing [can't convert string to symbol]

I have a dataset with over 10 categorical variables and about 20 numerical ones. I'm trying to edit Stef van Buuren's mice.impute.logreg function which is available on github, to call glm.fit(), but with a higher maxit value to try to reach convergence. However, on running the code as is, I get the following error:
Error: Only strings can be converted to symbols
and it comes from this line in the code:
rv <- t(chol(sym(fit.sum$cov.unscaled)))
I went ahead to print out the content of fit.sum$cov.unscaled, and got a huge covariance matrix(?) with all variables (categorical ones kinda one-hot-encoded(?)), something like this, but way larger:
Proteinuria22 Proteinuria23 Proteinuria24 Proteinuria25 Aetiol22 Aetiol23 Aetiol24
-0.0775687218 6.603074e-02 6.995692e-01 -1.0462947407 -1.990400e-01 -3.756997e+01 -6.198267e-01
Weight2 -0.0003022753 6.802872e-04 -1.138967e-03 -0.0043737786 2.550278e-04 3.380858e-02 6.343819e-04
Height2 0.0174235854 -8.945169e-02 -2.588742e-01 0.2947104430 -1.763788e-01 2.027542e+00 -3.676413e-02
BMI22 0.0038176385 -2.246294e-02 3.529623e-02 0.0507158023 -1.959203e-03 1.515110e+00 3.618223e-02
BMI23 0.0463573025 4.600740e-02 1.210799e-01 0.1009359117 6.368376e-03 7.268413e-01 -4.677462e-03
BMI24 0.0230542190 4.822956e-02 1.424563e-01 0.2136974371 -7.688207e-02 -4.099045e+00 -4.920604e-02
Proteinuria21 0.2564365948 2.399999e-01 2.869407e-01 0.2866854741 -3.345524e-02 7.021764e+00 -1.380307e-02
Proteinuria22 0.5114421153 2.658057e-01 2.444392e-01 0.2575295706 -5.555202e-02 2.132465e+00 -2.367527e-02
Proteinuria23 0.2658056994 8.278569e-01 2.805812e-01 0.1743841777 -5.433797e-02 -5.289189e+00 -1.905688e-02
Proteinuria24 0.2444391680 2.805812e-01 5.436426e-01 0.2272864202 -4.551615e-02 2.533664e+00 -1.962130e-02
Proteinuria25 0.2575295706 1.743842e-01 2.272864e-01 1.1656567100 -7.355628e-02 9.412580e+00 -1.330318e-01
Aetiol22 -0.0555520221 -5.433797e-02 -4.551615e-02 -0.0735562813 4.327236e-01 4.698377e+00 1.196196e-01
Aetiol23 2.1324651321 -5.289189e+00 2.533664e+00 9.4125804535 4.698377e+00 1.175992e+04 2.984111e+00
Since I'm still not very conversant with r, I really have no idea what this means... I understand that sym() is used to convert a string to a symbol, but I don't understand how (or why) such a huge matrix would be converted into a symbol. Any ideas, please?
Thanks to pointers from #arun's comment, I discovered that I only needed to remove the sym() function, given the use of the surrounding chol function:
Compute the Choleski factorization of a real symmetric positive-definite square matrix.
I'm yet to figure out why the code author put the sym() function there in the first place, though, since the code apparently breaks with it, but works fine without it.

Square Roots by Newton's Method (SCIP example 1.1.7) in R code

I want to apply Newton's Method for square root through iterations in RStudio, but I keep getting error
"Error: C stack usage 7969204 is too close to the limit"
when I put a wrong sqrt in the 'g'. Instead, the code works fine when I write directly the right number (example: sqriter(2,4) --> 2)
Below is the code I wrote for it.
thank you for your help!
sqriter <- function(g,x){
ifelse(goodguess(g,x), g, sqriter(improve(g,x), x))
}
goodguess <- function(g,x){
abs(g*g-x)<0.001
}
average <- function(g,x){
((g+x)/2)
}
improve <- function(g,x){
average(g, (g/x))
}

Find min value (parameters estmation) based on recurrence equations

Sorry for trivial question, but I`m not a programmer. Do I transformed the following tasks in the form of R function OK?
I have recurrence equations, e.g.(p1_par,...,p4_par-parameters to find):
z1[i+1]= z1[i]+p1_par*p2_par
z12[i+1]= z12[i]+(p1_par*z1[i]-p3_par*z1z2[i]-p4_par)*p2_par
z1z2[i+1]=z1z2[i]+(-p3_par*z12[i]-p4_par*z1z2[i])*p2_par
i=1,...,5
with the initial conditions for i=0:
z1_0=1.23
z12_0=1
z1z2_0=0
and t=6, y=c(0.1,0.06,0.08,0.04,0.05,0.01)
I want to find parameters based on min value of function e.g. like this:
(-2*p1_par*z1[i]-z12[i]+y[i+1]^2+2*p3_par*z1z2[i]+2*p4_par*z1z3[i])^2
I try to build the function in R like:
function1=function(p1_par,p2_par,p3_par,p4_par,y,t){
ep=1
summa=0
result=rep(1,t)
for(i in 1:t){
z1_0=1.23
z12_0=1
z1z2_0=0
z1[1]=z1_0+p1_par*p2_par
z12[1]=z12_0+(p1_par*z1_0-*p3_par*z1z2_0-*p4_par)*p2_par
z1z2[1]=z1z2_0+(-p3_par*z12_0-p4_par*z1z2_0)*p2_par
z1[i+1]= z1[i]+p1_par*p2_par
z12[i+1]= z12[i]+p1_par*z1[i]-p3_par*z1z2[i]-p4_par)*p2_par
z1z2[i+1]=z1z2[i]+(-p3_par*z12[i]-p4_par*z1z2[i])*p2_par
if(i==1) {
result[ep]=(-2*p1_par*z1_0-z12_0+y[i+1]^2+2*p3_par*z1z2_0+2*p4_par*z1z3_0)^2
} else {
result[ep]=(-2*p1_par*z1[i]-z12[i]+y[i+1]^2+2*p3_par*z1z2[i]+2*p4_par*z1z3[i])^2
}
summa<<-summa+result[ep]
ep=ep+1
}
return(result)
}
Do I transformed task of the R function correct? Results from other softwares (like Math) differs. Thanks in advance for help.
PPS

K nearest neighbors with Gower distance measure in R

I hope everyone is well; I have a question it is may be looked as a dumb one but I really need someone to explain it for me. I also though it will be useful for some, since it has been asked before with no satisfactory answer.
Since , I have mixed data type matrix, I was looking for K-nearst neighbors algorithem that works with gower distance in R. I found the function Knngow under the package dprep that claims to perform this.
http://finzi.psych.upenn.edu/library/dprep/html/knngow.html
The function take three argument knngow( Training_Set, Testing_set, K_number) and return the predicted class.
I was playing around with it and was wondering how the function can recognize what is my target vector? Put differently, how does it return the predicted class, without me acknowledging it in advance with my target column.
please find the source code below ( I retrieved it using the function edit)
function (train, test, k)
{
p = dim(train)[2]
ntest = dim(test)[1]
ntrain = dim(train)[1]
classes = rep(0, ntest)
if (ntest == ntrain) {
for (i in 1:ntest) {
tempo = order(gower.dist(test[i, -p], train[-i,
-p]))[1:k]
classes[i] = moda(train[tempo, p])[1]
}
}
else {
for (i in 1:ntest) {
tempo = order(StatMatch::gower.dist(test[i, -p],
train[, -p]))[1:k]
classes[i] = moda(train[tempo, p])[1]
}
}
classes
}
please can someone explain for me the code?
I hope I have post the question in the correct form, please let me know if I have to move it to somewhere else.
Thank you very much for your time.
knngow function takes the last column of the train as the target attribute. Also p = dim(train)[2]) indicates your column number.
Column p (the last column of your training data) is not used for calculating Gower dist. It is only taken into account when it comes to predict the class label of test samples.

Resources