Strange behavior of `mutate` when applied to a dataframe - r

I'm trying to mutate a character column in a dataframe, the result should return another character column, but in reality some of the elements in the column are converted to numeric. How could this happen?
This is the dataframe, both sumspec and rate.express are character vectors
sumspec= c("TA","TCO2")
rate.express = c("(rNC-rPC)*RO2POC[i]*dstopw[i] + (rNC-rPC+4/3)*RNO2POC[i]*dstopw[i] + (rNC-rPC)*RNO3POC[i]*dstopw[i] + (rNC-rPC+4)*RMnO2POC[i]*dstopw[i] + (rNC-rPC+8)*RFeOOHPOC[i]*dstopw[i] + (rNC-rPC+1)*RSO4POC[i]*dstopw[i] + (rNC-rPC)*RCH4POC[i]*dstopw[i] + -2*RO2NH4[i] + -2*RO2Mn[i] + -1*RO2Mn_ads[i] + -2*RO2Fe[i] + -1*RO2Fe_ads[i] + -2*RO2H2S[i] + -2*RO2FeS[i]*dstopw[i] + -8/5*RNO3Mn[i] + -1.8*RNO3Fe[i] + 2*RSO4CH4[i] + -1*RMnO2Fe[i]*dstopw[i] + 2*RMnO2H2S[i]*dstopw[i] + 4*RFeOOHH2S[i]*dstopw[i] + -2*RFeS_pre[i]*dstopw[i] + 2*RFeS_dis[i]*dstopw[i] + 2*RCaCO3_dis[i]*dstopw[i] + -2*RCaCO3_pre[i]*dstopw[i] + 2*RMnCO3_dis[i]*dstopw[i] + -2*RMnCO3_pre[i]*dstopw[i] + 2*RFeCO3_dis[i]*dstopw[i] + -2*RFeCO3_pre[i]*dstopw[i] + 10*RAnnite_dis[i]",
"1*RO2POC[i]*dstopw[i] + 1*RNO2POC[i]*dstopw[i] + 1*RNO3POC[i]*dstopw[i] + 1*RMnO2POC[i]*dstopw[i] + 1*RFeOOHPOC[i]*dstopw[i] + 1*RSO4POC[i]*dstopw[i] + 0.5*RCH4POC[i]*dstopw[i] + 1*RO2CH4[i] + 1*RSO4CH4[i] + 1*RCaCO3_dis[i]*dstopw[i] + -1*RCaCO3_pre[i]*dstopw[i] + 1*RMnCO3_dis[i]*dstopw[i] + -1*RMnCO3_pre[i]*dstopw[i] + 1*RFeCO3_dis[i]*dstopw[i] + -1*RFeCO3_pre[i]*dstopw[i]")
Now mutate the colume rate.express
dH <- data.frame(sumspec,
rate.express) %>%
mutate(RHS = ifelse(
sumspec == "TA", rate.express,
paste0("-1.0*(",rate.express,")*dTA/d",sumspec,"[i]")
))
Which returns
> dH$RHS
[1] "1"
[2] "-1.0*(1*RO2POC[i]*dstopw[i] + 1*RNO2POC[i]*dstopw[i] + 1*RNO3POC[i]*dstopw[i] + 1*RMnO2POC[i]*dstopw[i] + 1*RFeOOHPOC[i]*dstopw[i] + 1*RSO4POC[i]*dstopw[i] + 0.5*RCH4POC[i]*dstopw[i] + 1*RO2CH4[i] + 1*RSO4CH4[i] + 1*RCaCO3_dis[i]*dstopw[i] + -1*RCaCO3_pre[i]*dstopw[i] + 1*RMnCO3_dis[i]*dstopw[i] + -1*RMnCO3_pre[i]*dstopw[i] + 1*RFeCO3_dis[i]*dstopw[i] + -1*RFeCO3_pre[i]*dstopw[i])*dTA/dTCO2[i]"
So the mutate rate.express should just return the original value for the first element but it is converted to numeric. The second element is OK. I think it is converted to numeric because
> as.numeric(dH$rate.express[1])
[1] 1
In stead, if I explicitly use as.character to rate.express
> dH <- data.frame(sumspec,
+ rate.express) %>%
+ mutate(RHS = ifelse(
+ sumspec == "TA", as.character(rate.express), # explicitly state type
+ paste0("-1.0*(",rate.express,")*dTA/d",sumspec,"[i]")
+ ))
Then the result is correct
Why did this type conversion happen?

Related

How to solve R not recognizing white-space after long line of code?

I am trying to add equations to my code which are quite long. When I do so, R does not quite grasp where the end of the equation is and when I go to the next line filled with whitespace, it puts the cursor to the middle of the line rather than the beginning, as usually.
When I run the code, it ignores the end of the long equation, as one can tell from the console output
+ # 3.b) Initialize ODEs
+ initialstate <- c(
Error: unexpected symbol in:
"# 3.b) Initialize ODEs
initialstate"
-- so using + rather than > -- following this input:
C_PL_s_ss = (Rin_s*(CL_dis_s*T1*T2*V_IS + CL_dis_s*T1*T2*V_PE + CL_dis_s*T1*T2*V_PL + CL_dis_s*T1*T3_s*V_IS + CL_dis_s*T1*T4_s*V_IS + CL_dis_s*T2*T3_s*V_IS + CL_dis_s*T2*T4_s*V_IS + CL_dis_s*T2*T5_s*V_IS + CL_dis_s*T2*T6_s*V_IS + CL_dis_s*T1*T3_s*V_PE + CL_dis_s*T1*T4_s*V_PE + CL_dis_s*T2*T3_s*V_PE + CL_dis_s*T2*T4_s*V_PE + CL_dis_s*T2*T5_s*V_PE + CL_dis_s*T2*T6_s*V_PE + CL_dis_s*T1*T3_s*V_PL + CL_dis_s*T1*T4_s*V_PL + CL_dis_s*T2*T3_s*V_PL + CL_dis_s*T2*T4_s*V_PL + CL_dis_s*T2*T5_s*V_PL + CL_dis_s*T2*T6_s*V_PL + CL_dis_s*T1*T2*V_IS*delta_Rin_s_TI + CL_dis_s*T1*T3_s*V_IS*delta_Rin_s_TI + CL_dis_s*T1*T4_s*V_IS*delta_Rin_s_TI + CL_dis_s*T2*T3_s*V_IS*delta_Rin_s_TI + CL_dis_s*T2*T4_s*V_IS*delta_Rin_s_TI + CL_dis_s*T2*T5_s*V_IS*delta_Rin_s_TI + CL_dis_s*T2*T6_s*V_IS*delta_Rin_s_TI + CL_dis_s*CLup_s^2*V_IS*V_PE*V_VC + CL_dis_s*CLup_s^2*V_IS*V_PL*V_VC + CLup_s^2*T1*V_IS*V_PE*V_PL + CLup_s^2*T2*V_IS*V_PE*V_VC + CLup_s^2*T2*V_PE*V_PL*V_VC + CLup_s^2*T3_s*V_IS*V_PE*V_PL + CLup_s^2*T4_s*V_IS*V_PE*V_PL + CLup_s^2*T5_s*V_IS*V_PE*V_PL + CLup_s^2*T6_s*V_IS*V_PE*V_PL + CLup_s^2*T3_s*V_PE*V_PL*V_VC + CLup_s^2*T4_s*V_PE*V_PL*V_VC + CLup_s^3*V_IS*V_PE*V_PL*V_VC + CL_dis_s*CLup_s*T1*V_IS*V_PE + CL_dis_s*CLup_s*T1*V_IS*V_PL + CL_dis_s*CLup_s*T2*V_IS*V_VC + CL_dis_s*CLup_s*T2*V_PE*V_VC + CL_dis_s*CLup_s*T2*V_PL*V_VC + CL_dis_s*CLup_s*T3_s*V_IS*V_PE + CL_dis_s*CLup_s*T4_s*V_IS*V_PE + CL_dis_s*CLup_s*T5_s*V_IS*V_PE + CL_dis_s*CLup_s*T6_s*V_IS*V_PE + CL_dis_s*CLup_s*T3_s*V_IS*V_PL + CL_dis_s*CLup_s*T4_s*V_IS*V_PL + CL_dis_s*CLup_s*T5_s*V_IS*V_PL + CL_dis_s*CLup_s*T6_s*V_IS*V_PL + CL_dis_s*CLup_s*T3_s*V_PE*V_VC + CL_dis_s*CLup_s*T4_s*V_PE*V_VC + CL_dis_s*CLup_s*T3_s*V_PL*V_VC + CL_dis_s*CLup_s*T4_s*V_PL*V_VC + CLup_s*T1*T2*V_IS*V_PE + CLup_s*T1*T2*V_PE*V_PL + CLup_s*T1*T3_s*V_IS*V_PE + CLup_s*T1*T4_s*V_IS*V_PE + CLup_s*T2*T3_s*V_IS*V_PE + CLup_s*T2*T4_s*V_IS*V_PE + CLup_s*T2*T5_s*V_IS*V_PE + CLup_s*T2*T6_s*V_IS*V_PE + CLup_s*T1*T3_s*V_PE*V_PL + CLup_s*T1*T4_s*V_PE*V_PL + CLup_s*T2*T3_s*V_PE*V_PL + CLup_s*T2*T4_s*V_PE*V_PL + CLup_s*T2*T5_s*V_PE*V_PL + CLup_s*T2*T6_s*V_PE*V_PL + CL_dis_s*CLup_s*T2*V_IS*V_VC*delta_Rin_s_TI + CLup_s*T1*T2*V_IS*V_PE*delta_Rin_s_TI + CLup_s*T1*T3_s*V_IS*V_PE*delta_Rin_s_TI + CLup_s*T1*T4_s*V_IS*V_PE*delta_Rin_s_TI + CLup_s*T2*T3_s*V_IS*V_PE*delta_Rin_s_TI + CLup_s*T2*T4_s*V_IS*V_PE*delta_Rin_s_TI + CLup_s*T2*T5_s*V_IS*V_PE*delta_Rin_s_TI + CLup_s*T2*T6_s*V_IS*V_PE*delta_Rin_s_TI + CLup_s^2*T2*V_IS*V_PE*V_VC*delta_Rin_s_TI))/(CLR_s*CL_dis_s*T1*T2 + CLR_s*CL_dis_s*T1*T3_s + CLR_s*CL_dis_s*T1*T4_s + CLR_s*CL_dis_s*T2*T3_s + CLR_s*CL_dis_s*T2*T4_s + CLR_s*CL_dis_s*T2*T5_s + CLR_s*CL_dis_s*T2*T6_s + CLR_s*CL_dis_s*CLup_s^2*V_IS*V_VC + CL_dis_s*CLup_s^2*Q*V_IS*V_VC + CLR_s*CLup_s^2*T1*V_IS*V_PE + CLR_s*CLup_s^2*T2*V_PE*V_VC + CL_dis_s*CLup_s^2*T1*V_IS*V_PE + CL_dis_s*CLup_s^2*T1*V_IS*V_PL + CL_dis_s*CLup_s^2*T2*V_PE*V_VC + CL_dis_s*CLup_s^2*T2*V_PL*V_VC + CLR_s*CLup_s^2*T3_s*V_IS*V_PE + CLR_s*CLup_s^2*T4_s*V_IS*V_PE + CLR_s*CLup_s^2*T5_s*V_IS*V_PE + CLR_s*CLup_s^2*T6_s*V_IS*V_PE + CLR_s*CLup_s^2*T3_s*V_PE*V_VC + CLR_s*CLup_s^2*T4_s*V_PE*V_VC + CL_dis_s*CLup_s^2*T3_s*V_IS*V_PE + CL_dis_s*CLup_s^2*T4_s*V_IS*V_PE + CL_dis_s*CLup_s^2*T5_s*V_IS*V_PE + CL_dis_s*CLup_s^2*T6_s*V_IS*V_PE + CL_dis_s*CLup_s^2*T3_s*V_IS*V_PL + CL_dis_s*CLup_s^2*T4_s*V_IS*V_PL + CL_dis_s*CLup_s^2*T5_s*V_IS*V_PL + CL_dis_s*CLup_s^2*T6_s*V_IS*V_PL + CL_dis_s*CLup_s^2*T3_s*V_PE*V_VC + CL_dis_s*CLup_s^2*T4_s*V_PE*V_VC + CL_dis_s*CLup_s^2*T3_s*V_PL*V_VC + CL_dis_s*CLup_s^2*T4_s*V_PL*V_VC + CLR_s*CLup_s^3*V_IS*V_PE*V_VC + CL_dis_s*CLup_s^3*V_IS*V_PE*V_VC + CL_dis_s*CLup_s^3*V_IS*V_PL*V_VC + CLup_s^2*Q*T2*V_PE*V_VC + CLup_s^2*Q*T3_s*V_IS*V_PE + CLup_s^2*Q*T4_s*V_IS*V_PE + CLup_s^2*Q*T5_s*V_IS*V_PE + CLup_s^2*Q*T6_s*V_IS*V_PE + CLup_s^2*Q*T3_s*V_PE*V_VC + CLup_s^2*Q*T4_s*V_PE*V_VC + CLup_s^3*Q*V_IS*V_PE*V_VC + CLup_s^2*T1*T2*V_PE*V_PL + CLup_s^2*T1*T3_s*V_PE*V_PL + CLup_s^2*T1*T4_s*V_PE*V_PL + CLup_s^2*T2*T3_s*V_PE*V_PL + CLup_s^2*T2*T4_s*V_PE*V_PL + CLup_s^2*T2*T5_s*V_PE*V_PL + CLup_s^2*T2*T6_s*V_PE*V_PL + CLup_s^3*T1*V_IS*V_PE*V_PL + CLup_s^3*T2*V_PE*V_PL*V_VC + CLup_s^3*T3_s*V_IS*V_PE*V_PL + CLup_s^3*T4_s*V_IS*V_PE*V_PL + CLup_s^3*T5_s*V_IS*V_PE*V_PL + CLup_s^3*T6_s*V_IS*V_PE*V_PL + CLup_s^3*T3_s*V_PE*V_PL*V_VC + CLup_s^3*T4_s*V_PE*V_PL*V_VC + CLup_s^4*V_IS*V_PE*V_PL*V_VC + CLR_s*CL_dis_s*CLup_s*T1*V_IS + CLR_s*CL_dis_s*CLup_s*T2*V_VC + CLR_s*CL_dis_s*CLup_s*T3_s*V_IS + CLR_s*CL_dis_s*CLup_s*T4_s*V_IS + CLR_s*CL_dis_s*CLup_s*T5_s*V_IS + CLR_s*CL_dis_s*CLup_s*T6_s*V_IS + CLR_s*CL_dis_s*CLup_s*T3_s*V_VC + CLR_s*CL_dis_s*CLup_s*T4_s*V_VC + CL_dis_s*CLup_s*Q*T2*V_VC + CL_dis_s*CLup_s*Q*T3_s*V_IS + CL_dis_s*CLup_s*Q*T4_s*V_IS + CL_dis_s*CLup_s*Q*T5_s*V_IS + CL_dis_s*CLup_s*Q*T6_s*V_IS + CL_dis_s*CLup_s*Q*T3_s*V_VC + CL_dis_s*CLup_s*Q*T4_s*V_VC + CLR_s*CLup_s*T1*T2*V_PE + CL_dis_s*CLup_s*T1*T2*V_PE + CL_dis_s*CLup_s*T1*T2*V_PL + CLR_s*CLup_s*T1*T3_s*V_PE + CLR_s*CLup_s*T1*T4_s*V_PE + CLR_s*CLup_s*T2*T3_s*V_PE + CLR_s*CLup_s*T2*T4_s*V_PE + CLR_s*CLup_s*T2*T5_s*V_PE + CLR_s*CLup_s*T2*T6_s*V_PE + CL_dis_s*CLup_s*T1*T3_s*V_PE + CL_dis_s*CLup_s*T1*T4_s*V_PE + CL_dis_s*CLup_s*T2*T3_s*V_PE + CL_dis_s*CLup_s*T2*T4_s*V_PE + CL_dis_s*CLup_s*T2*T5_s*V_PE + CL_dis_s*CLup_s*T2*T6_s*V_PE + CL_dis_s*CLup_s*T1*T3_s*V_PL + CL_dis_s*CLup_s*T1*T4_s*V_PL + CL_dis_s*CLup_s*T2*T3_s*V_PL + CL_dis_s*CLup_s*T2*T4_s*V_PL + CL_dis_s*CLup_s*T2*T5_s*V_PL + CL_dis_s*CLup_s*T2*T6_s*V_PL)
# 3.b) Initialize ODEs
initialstate <- c(*[...some other cade that work unless you add the long equation above...])*
Bizarrely, if I write more than one of these longer equations, it recognises the end of exactly every second one, i.e. it pairs two, which leads to the issue. For the ones that it does not pair, it also has the > rather than + in the respective place in the console. I could get it to work fine for 5 short equations.
As I am exporting the equations from Matlab (as I need to use the symbolics toolbox to find the solution), I checked whether any of
cutting whitespaces out online, incl. equation by equation, so line by line
pasting the many equations into Word to see whether there is a difference in the tabs/new lines etc. between the equations for which the pasting interrupts between equations appropriately versus not (no both are the same)
past into text editor before moving to R, incl. equation by equation, so line by line
using my local version of R studio rather than the R studio workbench I was using previously
would help but they did not.
I have tried for a long time now and would be super grateful for any insight!
R is built with a maximum line length of 4096 characters as shown in this thread. Lines that are longer than that will need to be broken.

Data shows n/a in a row where a value should be, why does it not show the code its supposed to show?

TrainAttempt$PriceGuess <- with(TrainAttempt,
0+(LotFrontage*1)+(LotArea*1)+
(MasVnrArea*1)+(BsmtFinSF1*1)+
(BsmtFinSF2*1)+
(BsmtUnfSF*1)+
(TotalBsmtSF*1)+
(GrLivArea*1) +
(BsmtFullBath*1) +
(BsmtHalfBath*1) +
(FullBath*1) +
(HalfBath+0) +
(BedroomAbvGr*1) +
(KitchenAbvGr*1) +
(TotRmsAbvGrd*1) +
(Fireplaces*1) +
(OpenPorchSF*1) +
(GarageArea*1) +
(WoodDeckSF*1) +
(OpenPorchSF+0) +
(EnclosedPorch*1) +
(ScreenPorch*1) +
(PoolArea*1) +
(MiscVal*1))

The leading minor of order 4 is not positive definite in npplreg

I am running the following code:
mydata1 = data.frame(dataset)
mydata1 <- na.omit(mydata1)
bw <- npplregbw(mydata1$X1 ~ mydata1$X2 + mydata1$X3 + mydata1$X4 + mydata1$effect_1993 + mydata1$effect_1994 + mydata1$effect_1995 + mydata1$effect_1996 + mydata1$effect_1997 + mydata1$country_2 + mydata1$country_3 + mydata1$country_4 + mydata1$country_5 + mydata1$country_6 + mydata1$effect_1998 + mydata1$effect_1999 + mydata1$effect_2000 + mydata1$effect_2001 + mydata1$effect_2002 + mydata1$effect_2003 + mydata1$effect_2004 + mydata1$effect_2005 + mydata1$effect_2006 + mydata1$effect_2007 + mydata1$effect_2008 + mydata1$effect_2009 + mydata1$effect_2010 + mydata1$effect_2011 + mydata1$effect_2012 + mydata1$effect_2013 + mydata1$effect_2014 + mydata1$effect_2015 + mydata1$effect_2016 + mydata1$effect_2017 + mydata1$effect_2018 + mydata1$effect_2019 + mydata1$effect_2020 + mydata1$effect_2021|mydata1$X5 + mydata1$X6 + mydata1$X7 + mydata1$X8, data = mydata1, na.action = na.omit)
summary(bw)
reg_np <- npplreg(bw)
The code is running fine except the last command which gives the following error:
Error in chol.default(t(model.matrix(model)) %*% model.matrix(model)) :
the leading minor of order 4 is not positive definite
My data do not have 0 (except the fixed effects data) or NA values.
Is there any way I can proceed with the npplreg regression without getting that error?
Thanks a lot in advance

How to change the polynomial order in a for-loop using the poly R function?

I would like to fit a regression by trying different polynomials, and I tried running this loop:
for (p_order in 1:9) {
assign(paste("RD0", p_order, sep = ""), electricity_price ~ d1 + gas_price + coal_price +
oil_price + EUA + weekday + month + median_windspeed1 +
median_windspeed2 + median_windspeed3 + median_windspeed4 +
sun1 + sun2 + sun3 + sun4 + median_temp1 + median_temp2 +
median_temp3 + median_temp4 + poly(as.numeric(date), p_order, raw=TRUE) + time)
}
Although it creates correctly the names of the variables (RD01, RD02, etc), instead of saving the correct order of the polynomial (1,2, etc) it stores "p_order". For example,
> RD04
electricity_price ~ d1 + gas_price + coal_price + oil_price +
EUA + weekday + month + median_windspeed1 + median_windspeed2 +
median_windspeed3 + median_windspeed4 + sun1 + sun2 + sun3 +
sun4 + median_temp1 + median_temp2 + median_temp3 + median_temp4 +
poly(as.numeric(date), p_order, raw = TRUE) + time
> RD07
electricity_price ~ d1 + gas_price + coal_price + oil_price +
EUA + weekday + month + median_windspeed1 + median_windspeed2 +
median_windspeed3 + median_windspeed4 + sun1 + sun2 + sun3 +
sun4 + median_temp1 + median_temp2 + median_temp3 + median_temp4 +
poly(as.numeric(date), p_order, raw = TRUE) + time
Could someone explain me why and how to sort this out?
Thank you!
Create the formula first, then assign it. I will post a simplified example.
for (p_order in 1:2) {
fmla <- paste("electricity_price ~ d1 + poly(as.numeric(date),", p_order, ", raw = TRUE)")
assign(paste("RD0", p_order, sep = ""), as.formula(fmla))
}
RD01
#electricity_price ~ d1 + poly(as.numeric(date), 1, raw = TRUE)
RD02
#electricity_price ~ d1 + poly(as.numeric(date), 2, raw = TRUE)

Error in First Differences Model

I am very new to R and trying to tackle some homework that is giving me trouble. I think I have just about everything worked out, except last glitch.
When I create the following First differences models (using my two panel datasets):
out00 <- plm(logmrate ~ 0 + lawchange + logbeertaxa + y70 + y71 + y72 + y73 + y74 + y75 + y76 + y77 + y78 + y79 + y80 + y81 + y82 + y83 + y84 + y85 + y86 + y87 + y88 + y89 + y90 + y91 + y92 + y93 + y94 + y95 + y96, data = pdt.deaths, model = 'fd')
out01 <- plm(logmrate ~ 0 + lawchange + logbeertaxa + y70 + y71 + y72 + y73 + y74 + y75 + y76 + y77 + y78 + y79 + y80 + y81 + y82 + y83 + y84 + y85 + y86 + y87 + y88 + y89 + y90 + y91 + y92 + y93 + y94 + y95 + y96, data = pdt.deaths1, model = 'fd')
stargazer(out00, type="text")
stargazer(out01, type="text")
I get this error term returned for both models:
Error in crossprod(t(X), beta) : non-conformable arguments
The variable "lawchange" is a 1 or 0 variable, and each of the year variables ("y70"..."y96") are year indicator variables to account for time

Resources