I am working with ggeffects package
I have the following syntax
data_example <- structure(list(paciente = structure(c(6171, 6488, 6300, 6446,
6489, 6445, 6473, 6351, 6212, 6387), label = "Paciente", format.spss = "F6.0"),
edad_s1 = structure(c(69, 62, 60, 71, 67, 59, 63, 66, 67,
70), label = "Edad", format.spss = "F3.0"), sexo_s1 = structure(c(1L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L), .Label = c("Hombre",
"Mujer"), label = "Sexo", class = "factor"), grupo_int_v00 = structure(c(1L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L), .Label = c("A", "B"), label = "Grupo de intervención", class = "factor"),
time = c(0, 0, 0, 2, 2, 2, 1, 2, 1, 1), peso1 = c(89.9, 62,
91.5, 75.2, 68.2, 88.4, 93.6, 79, 88.3, 84.4), cintura1 = c(113,
90, 112, NA, 87.5, 116, 98.5, 104, 112.5, 108.5), tasis2_e = c(132,
132, 149, NA, 145, 137, 129, 152, 146, 129), tadias2_e = c(81,
58, 79, NA, 80, 60, 79, 87, 79, 68), p17_total = c(7, 9,
10, 10, 10, 10, 10, 7, 10, 11), geaf_tot = c(3412.59, 3524.48,
559.44, 5454.55, 4293.71, 839.16, 3146.85, 7552.45, 4335.66,
566.9), glucosa = c(102, 97, 89, NA, 88, 168, 104, NA, 114,
121), albumi = c(4.94, 4.68, 4.75, NA, 4.34, 5.06, 4.56,
NA, 5.06, 3.96), coltot = c(232, 253, 215, NA, 202, 287,
255, NA, 217, 147), hdl = c(59, 64, 68, NA, 71, 46, 61, NA,
40, 42), ldl_calc = c(143, 150, 127, NA, 114, NA, 170, NA,
143, 86), trigli = c(152, 195, 99, NA, 85, 378, 121, NA,
170, 93), hba1c = c(5.61, 5.66, 5.43, NA, 5.38, 8.14, 5.81,
NA, 6, 6.38), i_hucpeptide = c(988.91, 673.5, 1036.03, NA,
734.29, 1266.3, 610.9, NA, 1144.8, 672.08), i_hughrelin = c(1133.35,
1230.06, 1109.98, NA, 1064.79, 725.35, 1437.85, NA, 866.07,
822.83), i_hugip = c(2.67, 2.67, 2.67, NA, 2.67, 2.67, 2.67,
NA, 2.67, 2.67), i_huglp1 = c(145.43, 138.32, 194.14, NA,
99.37, 166.27, 218.33, NA, 184.04, 222.84), i_huglucagon = c(513.89,
357.35, 624.73, NA, 464.85, 448.49, 304.29, NA, 310.61, 426.52
), i_huinsulin = c(234.23, 229.06, 358.86, NA, 175.38, 466,
99.02, NA, 367.95, 77.33), i_huleptin = c(7898.28, 5211.27,
14670.25, NA, 7161.39, 3218.49, 2659.8, NA, 3766.01, 1207.58
), i_hupai1 = c(3468.4, 1977.9, 4101.1, NA, 1613.4, 2847.27,
2442.49, NA, 1953.26, 1752.88), i_huresistin = c(4783.28,
2676.05, 3064.57, NA, 2165.52, 3878.48, 8343.46, NA, 2822.68,
6496.73), i_huvisfatin = c(831.6, 649.45, 2270.65, NA, 1578.88,
9.63, 185.09, NA, 162.8, 8.64), col_rema = c(30, 39, 20,
NA, 17, NA, 24, NA, 34, 19), homa = c(1061.843, 987.503,
1419.491, NA, 685.931, 3479.467, 457.692, NA, 1864.28, 415.864
), i_pcr = c(0.05, NA, 0.27, NA, 0.03, 0.23, 0.04, NA, 0.09,
0.09), d_homa = c(NA, NA, NA, NA, -2.629, 33.042, -181.211,
NA, -929.683, -89.108), d_hughrelin = c(NA, NA, NA, NA, -213.59,
48.43, 95.27, NA, -228.62, -146.8), d_huinsulin = c(NA, NA,
NA, NA, 3.24, -68.79, -43.31, NA, -147.33, -7.46), d_hucpeptide = c(NA,
NA, NA, NA, 192.39, -263.54, -71.56, NA, -437.38, -215.44
), d_huglucagon = c(NA, NA, NA, NA, 38.99, -112.45, -10.75,
NA, -133.55, -259.73), d_huleptin = c(NA, NA, NA, NA, 409.76,
-1081.5, -1778.69, NA, -353.91, -679.7), d_huresistin = c(NA,
NA, NA, NA, 391.02, -155.41, -436.47, NA, -1137.79, -922.75
), d_huvisfatin = c(NA, NA, NA, NA, 457.54, -260.79, -341.02,
NA, -426.89, 0), d_glucosa = c(NA, NA, NA, NA, -2, 23, 3,
NA, -8, -13), d_coltot = c(NA, NA, NA, NA, -52, 36, -11,
NA, 15, -12), d_hdl = c(NA, NA, NA, NA, 1, 3, -1, NA, 1,
4), d_ldl_calc = c(NA, NA, NA, NA, -50, NA, -10, NA, 12,
-15), d_col_rema = c(NA, NA, NA, NA, -3, NA, 0, NA, 2, -1
), d_trigli = c(NA, NA, NA, NA, -14, 132, -1, NA, 8, -5),
d_hba1c = c(NA, NA, NA, NA, -0.11, -0.04, -0.18, NA, -1.76,
-0.67), d_tasis2_e = c(NA, NA, NA, NA, 0, 6, -1, 7, -21,
-9), d_tadias2_e = c(NA, NA, NA, NA, 0, 2, -8, 8, -10, -17
), d_peso1 = c(NA, NA, NA, -6, -2.3, 0.2, -11.4, 0.8, -4.1,
-9.3), d_cintura1 = c(NA, NA, NA, NA, -2.5, -4, -12.5, 6,
-3.5, -4.5), d_geaf_tot = c(NA, NA, NA, 699.31, 2055.95,
-2181.82, 1748.25, 3776.23, 867.13, -6593.94), d_p17_total = c(NA,
NA, NA, 1, 4, 5, 4, -5, 5, 2), d_hupai1 = c(NA, NA, NA, NA,
-185.03, 204.77, 202.01, NA, -1551.91, 57.2), d_hugip = c(NA,
NA, NA, NA, 0, 0, 0, NA, 0, 0), d_huglp1 = c(NA, NA, NA,
NA, -42.07, -163.02, 107.28, NA, -95.82, -87.5), d_pcr = c(NA,
NA, NA, NA, NA, NA, NA, NA, -0.18, -0.22), ln_trigli = c(5.024,
5.273, 4.595, NA, 4.443, 5.935, 4.796, NA, 5.136, 4.533),
ln_homa = c(6.968, 6.895, 7.258, NA, 6.531, 8.155, 6.126,
NA, 7.531, 6.03), ln_hba1c = c(1.725, 1.733, 1.692, NA, 1.683,
2.097, 1.76, NA, 1.792, 1.853), ln_geaf_tot = c(8.135, 8.167,
6.327, 8.604, 8.365, 6.732, 8.054, 8.93, 8.375, 6.34), i_ratiolg = c(6.969,
4.237, 13.217, NA, 6.726, 4.437, 1.85, NA, 4.348, 1.468)), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
The mixed model I have created following the syntax
lme_peso <- lme(peso1 ~ sexo_s1 + edad_s1 + poly(time, 2)*grupo_int_v00 + p17_total,
random = ~ poly(time, 2)|paciente, control=lmeControl(opt="optim"),
data = dat_longer, subset = !, na.action = na.omit)
And then to plot it
ggpredict(lme_peso, c("time [all]", "grupo_int_v00"), type="fixed") %>%
ggplot(aes(x = x, y = predicted, colour = group)) +
geom_point() +
geom_line() +
stat_smooth(method = "loess",se = T) +
labs(x = "time (months)", y = "Weight (kg)") +
scale_color_manual(labels = c("Control", "Intervention"), values = c("orange", "green")) +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = F),alpha = 1/5) +
scale_x_continuous(breaks = 0:2, labels = c(0, 6, 12))
When I supress the arguments of fill in geom_ribbon the fill stays black. But I don't know how to manage to keep just one legend with 2 groups (Control and Intervention). I have the extra-added legend (with F in this case)
Thanks in advance

I couldn't run your code, but I rebuilt it with iris.
Like Matt suggested, one thing would be, remove fill=F:
ggplot(data=iris, aes(x = SepalLength , y = PetalLength, group=Name)) +
geom_point() +
geom_line() +
stat_smooth(method = "loess",se = T, aes(color=Name)) +
geom_ribbon(aes(ymin = 1, ymax = 3),alpha = 1/5) +
scale_x_continuous(breaks = 0:2, labels = c(0, 6, 12))
Or if you need it for some reason, use guides(fill="none"):
ggplot(data=iris, aes(x = SepalLength , y = PetalLength, group=Name)) +
geom_point() +
geom_line() +
stat_smooth(method = "loess",se = T, aes(color=Name)) +
geom_ribbon(aes(ymin = 1, ymax = 3, fill=FALSE),alpha = 1/5) +
scale_x_continuous(breaks = 0:2, labels = c(0, 6, 12)) +


How to backward generate levels from flows when both are in seperate dataframes in R?

I have two dataframes. The first one called 'sw_flows_final' contains data of flows between sectors from 1952-2019. The second one is called 'sw_stocks' and contains data of stocks of what each sector owes the other from 2011-2019. I want to backwards extend the 'sw_stocks' data frame till 1952. The connection between the two is the following
To take an example, I have the level for some sector in 2011. By subtracting the corresponding flow in 2011 of the same sector, I will get the level in 2010, and so on. There are 10 such sectors. I want R to generate levels from 2011 back until 1952 for every sector correctly. Can someone recommend how to do this efficiently?
Here are two images of how the dataframes look like
Here is the sample data using dput for sw_flows_final
structure(list(sector = structure(c(2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L), .Label = c("Source-Use",
"Banking", "Other financial corporations", "Private nonfinancial corporations",
"Government", "Rest of the World", "Households and NIPISHs",
"Other resident sectors", "Total"), class = "factor"), bank_s = structure(c(NA,
4, -1, 15, 23, -131, -6, -96, 19, NA, -3, -7, -43, -38, 13, 2,
-76, -48, NA, -3), label = "Banking Sources", format.stata = "%10.0g"),
bank_u = structure(c(NA, -5, 33, -25, -150, 32, NA, -115,
NA, NA, 6, -32, 10, 1, -13, NA, -28, NA, NA, -2), label = "Banking Uses", format.stata = "%10.0g"),
ofi_s = structure(c(NA, NA, NA, NA, 2, 38, 2, 42, -2, NA,
NA, NA, NA, 2, 53, NA, 55, -4, 4, NA), label = "Other Fin. Institutions Sources", format.stata = "%10.0g"),
ofi_u = structure(c(4, NA, 5, 25, 1, 9, NA, 44, NA, 2, NA,
5, 44, 2, 6, NA, 59, NA, 1, NA), label = "Other Fin. Institutions Uses", format.stata = "%10.0g"),
pcs_s = structure(c(54, 5, NA, 2, -1, 68, 6, 134, 118, -22,
5, NA, 3, 1, 58, 2, 47, 31, -9, 12), label = "Pvt. Corp. Sector Sources", format.stata = "%10.0g"),
pcs_u = structure(c(9, NA, NA, -2, NA, 2, 7, 16, NA, 3, NA,
NA, 5, NA, 3, 5, 16, NA, 15, NA), label = "Pvt. Corp. Sector Uses", format.stata = "%10.0g"),
govt_s = structure(c(-54, 15, -2, NA, 62, 102, -3, 120, 94,
75, 26, 5, NA, 28, 1, -41, 94, 120, -20, 36), label = "Government Sources", format.stata = "%10.0g"),
govt_u = structure(c(-3, NA, 2, NA, -2, 22, 7, 26, NA, -52,
NA, 3, NA, 1, 18, 4, -26, NA, -36, NA), label = "Government Uses", format.stata = "%10.0g"),
rotw_s = structure(c(-153, NA, -3, -14, NA, NA, -3, -173,
-207, 2, NA, NA, 9, NA, NA, -1, 10, 34, 26, NA), label = "Rest of the World Sources", format.stata = "%10.0g"),
rotw_u = structure(c(9, NA, 2, 42, NA, NA, -19, 34, NA, -48,
NA, 8, 20, NA, NA, -4, -24, NA, -25, NA), label = "Rest of the World Uses", format.stata = "%10.0g"),
hh_s = structure(c(32, 9, -5, 22, NA, NA, NA, 58, 1, -12,
7, -18, 18, NA, NA, NA, -5, -88, 24, 5), label = "Households Sources", format.stata = "%10.0g"),
hh_u = structure(c(-130, 11, 54, 122, NA, NA, NA, 57, NA,
16, 15, 23, 29, NA, NA, NA, 83, NA, 70, 16), label = "Households Uses", format.stata = "%10.0g"),
total_s = structure(c(-121, 33, -11, 25, 86, 77, -4, 85,
23, 43, 35, -20, -13, -7, 125, -38, 125, 45, 25, 50), label = "Total Sources", format.stata = "%8.0g"),
total_u = structure(c(-111, 6, 96, 162, -151, 65, -5, 62,
NA, -79, 21, 7, 108, 4, 14, 5, 80, NA, 25, 14), label = "Total Uses", format.stata = "%8.0g"),
year = structure(c(1952, 1952, 1952, 1952, 1952, 1952, 1952,
1952, 1952, 1953, 1953, 1953, 1953, 1953, 1953, 1953, 1953,
1953, 1954, 1954), label = "Year", format.stata = "%9.0g")), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
and for sw_stocks
structure(list(sector = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
9L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 9L, 8L, 1L, 2L), .Label = c("Banking",
"Government", "Households and NIPISHs", "Other financial corporations",
"Other resident sectors", "Private nonfinancial corporations",
"Rest of the World", "Source-Use", "Total"), class = "factor"),
bank_s_l = c(1350079.49133551, 1899576.67082447, 5062495.73351201,
1151214.12986427, 110.276338621267, 1355504.86599654, 859206.587426226,
11678187.7552976, -178532.519500652, 1505246.38567655, 2155467.21125388,
5855906.75278197, 1381570.68752383, 78.0505047633771, 1456469.98651368,
1066704.4326649, 13421443.5069196, -202944.716738584, 1740628.61553264,
2544347.17898597), bank_u_l = c(1156063.70260075, 3554941.84780303,
2447185.38181841, 616874.110576552, 138320.990278717, 1945789.820585,
1997544.42113584, 11856720.2747983, NA, 1241749.97765978,
4007409.67985746, 3290777.41165653, 612317.31788805, 141423.711476498,
2055249.27763703, 2275460.84748283, 13624388.2236582, NA,
1458813.42489447, 4587951.87769072), ofi_s_l = c(412030.693749423,
187700.316261259, 121865.465242432, 3611043.08926093, 87881.51,
120740.971533392, 175451.98473536, 4716714.03078279, -670782.594882376,
488886.759730145, 194866.871943733, 123364.152903093, 4019909.92650251,
94803.43, 115690.116216912, 253583.626498628, 5291104.88379502,
-846071.342671093, 572720.870848094, 205760.036602362), ofi_u_l = c(1082050.91844983,
1987036.26341659, 43593.1327534439, 1346470.61507149, 3473.61146178588,
891968.551489673, 32903.5330223561, 5387496.62566517, NA,
1080238.25250793, 2240819.70449555, 67741.7919287182, 1730643.94899816,
4370.85592038175, 930827.085634209, 82534.5869811682, 6137176.22646612,
NA, 1072481.25801139, 2658225.53044836), pcs_s_l = c(1940282.83357554,
229076.060527219, 1493897.17610662, 863355.291768346, 0,
5024995.75077571, 2801147.39882306, 12352754.5115765, 3480898.17067805,
2051048.40228274, 254992.855935366, 1509718.51083095, 899187.870098343,
0, 5265806.62852803, 3359680.47293263, 13340434.7406081,
3941569.20740091, 2386853.8881313, 277562.331033185), pcs_u_l = c(1355504.86599654,
471854.794244865, 0, 150821.917117881, 0, 6306138.74353917,
587536.02, 8871856.34089845, NA, 1456469.98651368, 476029.97685104,
0, 151878.215616114, 0, 6616322.85152686, 698164.502699446,
9398865.53320714, NA, 1614687.15738287, 497308.289032772),
govt_s_l = c(3026391.34344152, 1232718.57225668, 899831.786828824,
1758432.78129903, 0, 471854.794244865, 385631.474767484,
7774860.7528384, 7774860.7528384, 3299762.57843369, 1508255.01724503,
878289.365689908, 2114742.39334311, 0, 476029.97685104, 609867.659036133,
8886946.99059891, 8886946.99059891, 3775295.4629249, 1833119.31513762
), govt_u_l = c(1932781.13639086, 1380943.25642814, 20831.3302280885,
273893.850293545, 2077.96462093295, 219943.84762228, 10369.45,
3840840.83558385, 3840840.83558385, 2063225.61511638, 1542429.04485517,
23827.0863095146, 318847.879129413, 1766.43372182738, 246595.428939527,
12529.88, 4209221.36807184, 4209221.36807184, 2274350.44000159,
1705859.60013762), rotw_s_l = c(1451517.27411681, 10373.77,
0, 0, 175620.33, 587536.02, 0, 2225047.39411681, 2225047.39411681,
1548720.84, 12533.923713, 0, 0, 175786.24, 698164.502699446,
0, 2435205.50641245, 2435205.50641245, 1859509.8975799, 14605.2524611378
), rotw_u_l = c(337475.82215636, 409030.0781722, 0, 0, 0,
2814996.33882306, 0, 3561502.23915162, NA, 436497.84863876,
400723.19, 0, 0, 0, 3372956.29281454, 0, 4210177.3314533,
NA, 682427.647230683, 453829.566744612), hh_s_l = c(2963585.38181841,
20831.3302280885, 0, 43374.9585653354, 0, 0, 0, 3027791.67061183,
-7557951.13157221, 3273435.03602629, 23827.0863095146, 0,
67086.1333979951, 0, 0, 0, 3364348.2557338, -8314568.56600038,
3579173.99233299, 26649.23), hh_u_l = c(5147567.73351201,
882725.978434388, 0, 3061551.91413103, -7.27595761418343e-10,
1493897.17610662, 0, 10585742.802184, NA, 5861967.75278197,
881712.095035784, 0, 3425545.46308548, -7.27595761418343e-10,
1509691.51083095, 0, 11678916.8217342, NA, 6628288.36795117,
896978.173784456), total_s_l = c(11143887.0180372, 3580276.72009772,
7578090.16168989, 7427420.25075791, 263612.116338621, 7560632.4025505,
4221437.44575213, 41775356.115224, -2328803.00305744, 12167100.0021494,
4149942.96640053, 8367278.78220592, 8482497.01086579, 270667.720504763,
8012161.21080911, 5289836.19113229, 46739483.8840678, -2519261.62052293,
13914182.7273498, 4902043.34422028), total_u_l = c(11011444.1791064,
8686532.21849921, 2511609.84479994, 5449612.40719049, 143872.566361435,
13672734.4781658, 2628353.4241582, 44104159.1182814, NA,
12140149.4332185, 9549123.691095, 3382346.28989476, 6239232.82471722,
147561.001118706, 14731642.4473831, 3068689.81716345, 49258745.5045908,
NA, 13731048.2954722, 10800153.0378385), year = c(2012, 2012,
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2013, 2013, 2013,
2013, 2013, 2013, 2013, 2013, 2013, 2014, 2014)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L), groups = structure(list(
sector = structure(1:9, .Label = c("Banking", "Government",
"Households and NIPISHs", "Other financial corporations",
"Other resident sectors", "Private nonfinancial corporations",
"Rest of the World", "Source-Use", "Total"), class = "factor"),
.rows = structure(list(c(1L, 10L, 19L), c(2L, 11L, 20L),
c(3L, 12L), c(4L, 13L), c(5L, 14L), c(6L, 15L), c(7L,
16L), c(9L, 18L), c(8L, 17L)), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -9L), .drop = TRUE))

Looping/sapply through nlme function

I am trying to execute a loop with mixed-model effects with response variable changing. I came from here and here. I should say that I have tried sthg creating a function and then sapply or lapply (wihtout success)
I provide a small dataset (really small) just to represent my original database (much larger and similar to those of longitudinal studies)
data<- structure(list(paciente = structure(c(6134, 6099, 6457, 6164,
6470, 6323, 6550, 6082, 6476, 6044, 6509, 6539, 6234, 6555, 6383,
6127, 6507, 6513, 6486, 6080, 6101, 6007, 6023, 6516, 6001, 6198,
6510, 6530, 6351, 6181), label = "Paciente", format.spss = "F6.0"),
edad_s1 = structure(c(70, 63, 61, 71, 67, 59, 63, 69, 67,
67, 67, 72, 65, 72, 63, 65, 60, 64, 56, 63, 57, 62, 72, 60,
72, 63, 72, 68, 66, 71), label = "Edad", format.spss = "F3.0"),
sexo_s1 = structure(c(1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 2L), .Label = c("Hombre", "Mujer"), label = "Sexo", class = "factor"),
time = c(2, 1, 2, 1, 0, 0, 1, 0, 2, 1, 1, 0, 1, 2, 1, 2,
1, 2, 0, 1, 1, 0, 2, 1, 0, 2, 1, 2, 2, 0), grupo_int_v00 = structure(c(1L,
1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L), .Label = c("A",
"B"), label = "Grupo de intervención", class = "factor"),
peso1 = c(108, 80.4, 95, 75, 92.6, 90, 82.2, 94.4, 78, 71.3,
75.1, 83.5, 87.1, 63, 73, 98.5, 90.2, 81.3, 93.4, 79.8, 114.3,
110.9, 81.5, 88.5, 82.4, 88.3, 90, 73, 79, 94.8), cintura1 = c(127,
100.5, 103.5, 108, 115, 114.5, 95.5, 115, 101, 98, 99, 108.5,
105, 99, 104, 126, 114.2, 99, 110, 104.5, 120, 126, 111.5,
102, 117, 110, 125, 100, 104, 123), tasis2_e = c(156, 129,
131, 138, 167, 138, 115, 146, 119, 148, 130, 144, 115, 122,
135, 139, 128, 119, 138, 115, 138, 151, 151, NA, 137, 147,
124, 168, 152, 156), tadias2_e = c(70, 63, 80, 67, 76, 81,
57, 68, 69, 69, 68, 78, 61, 71, 54, 77, 63, 63, 92, 73, 80,
88, 84, NA, 79, 76, 62, 90, 87, 89), p17_total = c(10, 10,
5, 9, 9, 7, 15, 11, 6, 12, 11, 4, 9, 14, 9, 9, 11, 14, 6,
5, 10, 10, 9, 13, 12, 7, 11, 12, 7, 4), geaf_tot = c(1986.01,
1286.71, 1230.77, 1510.49, 839.16, 2144.52, 5361.31, 1678.32,
4055.94, 2601.4, 3363.64, 3076.92, 5342.66, 2769.23, 2601.4,
1693.24, 4055.94, 3146.85, 3916.08, 6405.59, 2442.89, 671.33,
867.13, 1585.08, 3153.85, 3188.81, 7986.01, 839.16, 7552.45,
2937.06), glucosa = c(127, 97, 95, 102, 119, 113, 109, 105,
93, 167, 85, 108, 122, 112, 113, 120, 100, 108, 100, 86,
129, 136, 98, 97, 130, 125, 109, 102, NA, 181), albumi = c(4.47,
4.82, 4.78, 4.22, 4.59, 4.5, 4.33, 4.87, 4.83, 4.98, 4.23,
4.77, 4.76, 4.98, 4.18, 4.51, 4.72, 4.87, 4.77, 4.61, 4.55,
4.77, 4.6, 4.59, 4.25, 4.71, 4.47, 4.54, NA, 4.63), coltot = c(157,
191, 276, 248, 248, 217, 187, 301, 173, 230, 258, 238, 231,
181, 183, 243, 223, 195, 237, 245, 164, 145, 199, 234, 178,
192, 201, 198, NA, 159), hdl = c(39, 50, 57, 59, 49, 44,
60, 98, 52, 73, 58, 44, 58, 60, 48, 46, 73, 58, 39, 47, 38,
45, 59, 56, 72, 34, 78, 62, NA, 54), ldl_calc = c(91, 124,
204, 133, 155, 140, 105, 162, 91, 141, 182, 173, 155, 107,
83, 150, 132, 124, NA, 167, 101, 88, 121, 160, 84, 130, 112,
120, NA, NA), trigli = c(137, 87, 74, 282, 219, 165, 112,
203, 149, 78, 89, 105, 91, 71, 259, 236, 92, 63, 447, 157,
123, 58, 94, 90, 112, 139, 53, 80, NA, 429), hba1c = c(6.57,
5.82, 5.68, 5.96, 6.11, 5.73, 5.48, 5.8, 5.6, 7.8, 5.21,
5.73, 6.1, 5.86, 6.37, 6.27, 5.22, 5.59, 5.47, 5.95, 6.96,
NA, 5.47, 4.99, NA, 6.25, 5.79, 5.79, NA, 6.54), i_hucpeptide = c(NA,
NA, 466.64, 838.61, 847.89, 1481.03, 819.65, NA, 1298.6,
NA, 564.59, 544.2, 755.73, 1057.83, 957.43, NA, 957.33, 1002.34,
1104, NA, NA, NA, NA, 594.6, NA, 815.82, 922.08, 628.54,
NA, 1591.01), i_hughrelin = c(NA, NA, 410.97, 553.65, 453,
352.44, 527.01, NA, 328.27, NA, 1668.41, 460.06, 1072.27,
260.24, 749.03, NA, 1327.91, 363.79, 524.53, NA, NA, NA,
NA, 1051.1, NA, 143.32, 1076.49, 1565.85, NA, 607.31), i_hugip = c(NA,
NA, 2.67, 2.67, 2.67, 2.67, 2.67, NA, 2.67, NA, 2.67, 2.67,
690.74, 1165.16, 2.67, NA, 2.67, 2.67, 2.67, NA, NA, NA,
NA, 2.67, NA, 2.67, 2.67, 2.67, NA, 2.67), i_huglp1 = c(NA,
NA, 127.66, 284.34, 200.13, 59.3, 234.84, NA, 503.42, NA,
103.9, 14.14, 71.6, 56.41, 75.13, NA, 161.36, 124.19, 220.52,
NA, NA, NA, NA, 14.14, NA, 112.57, 100.52, 237.55, NA, 470.91
), i_huglucagon = c(NA, NA, 333.79, 649.94, 726.99, 395.38,
610.5, NA, 434.42, NA, 502.4, 127.62, 268.23, 10.48, 428.15,
NA, 716.02, 238.95, 320.32, NA, NA, NA, NA, 10.48, NA, 238,
487.42, 297.6, NA, 495.16), i_huinsulin = c(NA, NA, 129.24,
270.98, 299.75, 730.82, 267.54, NA, 616.91, NA, 121.26, 85.34,
224.96, 247.48, 220.75, NA, 181.85, 341.25, 551.46, NA, NA,
NA, NA, 133.42, NA, 263.87, 279.45, 94.78, NA, 573.14), i_huleptin = c(NA,
NA, 3992.49, 17806.43, 8409.76, 11511.43, 2965.92, NA, 3223.08,
NA, 9018.79, 1039.45, 2613.33, 2128.98, 7307.89, NA, 13492.13,
2883.77, 4775.98, NA, NA, NA, NA, 2602.96, NA, 2829.59, 8511.92,
3528.77, NA, 11487.15), i_hupai1 = c(NA, NA, 997.29, 2499.25,
3085.25, 1909.44, 1730.55, NA, 3333.37, NA, 1424.3, 1857.71,
2578.46, 2268.52, 2222.97, NA, 2722.92, 1300.69, 2732.11,
NA, NA, NA, NA, 1204.36, NA, 2483.08, 2289.67, 1791.79, NA,
6595.54), i_huresistin = c(NA, NA, 3044.48, 5774.77, 3221.72,
4925.57, 5170.95, NA, 3683.64, NA, 4041.32, 6771.31, 5119.11,
9521.7, 3328.41, NA, 5061.65, 3773.39, 3039.39, NA, NA, NA,
NA, 4405.17, NA, 2577.84, 3433.82, 6802.94, NA, 6461.67),
i_huvisfatin = c(NA, NA, 302.3, 2083.46, 2989.72, 1118.7,
8.64, NA, 96.03, NA, 2209.51, 8.64, 1944.37, 1415.55, 678.33,
NA, 4349.56, 8.64, 410.1, NA, NA, NA, NA, 117, NA, 8.64,
2308.8, 228.53, NA, 1766.64), col_rema = c(27, 17, 15, 56,
44, 33, 22, 41, 30, 16, 18, 21, 18, 14, 52, 47, 18, 13, NA,
31, 25, 12, 19, 18, 22, 28, 11, 16, NA, NA), homa = c(NA,
NA, 5.053, 11.374, 14.679, 33.985, 12.001, NA, 23.61, NA,
4.242, 3.793, 11.294, 11.406, 10.265, NA, 7.484, 15.167,
22.694, NA, NA, NA, NA, 5.326, NA, 13.574, 12.535, 3.978,
NA, 42.691), i_pcr = c(NA, NA, 0.41, 0.82, NA, 2.08, 0.08,
NA, 0.1, NA, 0.38, 0.05, 0.04, 0.35, 0.2, NA, 0.98, 0.02,
NA, NA, NA, NA, NA, 0.2, NA, 0.1, 0.16, 0.16, NA, 2.93)), row.names = c(NA,
-30L), class = c("tbl_df", "tbl", "data.frame"))
Afterwards I am defining my iteration and my variables database
ex<- subset(data[, 6:30])
for (i in 1:length(ex)) {
var_1 <- ex[,i]
var_1 <- unlist(var_1)
lme_1 <- lme(var_1 ~ sexo_s1*peso1 + edad_s1 + p17_total + poly(time, 2)*grupo_int_v00,
random = ~ poly(time, 2)|paciente, control=lmeControl(opt="optim"),
data = dat_longer, subset = !
Error in model.frame.default(formula = ~time + var_1 + sexo_s1 + peso1 + :
invalid type (list) for variable 'var_1'
I have tried unlisting/ in before running the loop
for (i in 1:length(data)) {
var_1 <- data[,i]
var_1 <- unlist(var_1) #or
lme_1 <- lme(var_1 ~ sexo_s1*peso1 + edad_s1 + p17_total + poly(time, 2)*grupo_int_v00,
random = ~ poly(time, 2)|paciente, control=lmeControl(opt="optim"),
data = dat_longer, subset = !
Error in model.frame.default(formula = ~time + var_1 + sexo_s1 + peso1 + :
variable lengths differ (found for 'var_1')
I have also tried to develop a new function to iterate over
lme_z <- function(z){
out <- lme(z ~ sexo_s1*peso1 + edad_s1 + p17_total + poly(time, 2)*grupo_int_v00,
random = ~ poly(time, 2)|paciente, control=lmeControl(opt="optim"),
data = dat_longer, subset = !
If there is some contribution to iterate in the response variable (I know Ben Bolker is an expert)
Thanks in advance
If data is a data frame containing all of the variables that you use in your formula, including all of the responses that you want to consider, then you can do:
f <- function(resp) {
fixed <- . ~ sexo_s1 * peso1 + edad_s1 + p17_total + poly(time, 2) * grupo_int_v00
fixed[[2L]] <-
lme(fixed = fixed,
random = ~poly(time, 2) | paciente,
data = data,
subset = ![[resp]]),
control = lmeControl(opt = "optim"))
list_of_lme_objects <- lapply(names_of_response_variables, f)
An important piece is:
fixed <- . ~ sexo_s1 * peso1 + edad_s1 + p17_total + poly(time, 2) * grupo_int_v00
fixed[[2L]] <-
The second statement injects the response named resp into the left hand side of the formula template. A more transparent example:
fixed <- . ~ world
fixed[[2L]] <-"hello")
## hello ~ world
Another important piece is:
subset = ![[resp]])
Here, the right hand side actually evaluates to a logical vector of length equal to the number of rows of data. You might consider passing na.action = na.omit instead of subset, though that will also omit rows where the independent variables have missing values, so the semantics are slightly different.
The variable grupo_int_v00 is missing from your data frame. You'll have to fix that on your end in order to test the code...
I was going to suggest:
formvars <- c("sexo_s1*peso1",
"poly(time, 2)")
## excluded *grupo_int_v00 since not in example data frame
respvars <- names(df)[7:30]
result <- list()
for (r in respvars) {
result[[r]] <- lme(reformulate(formvars, response = r),
random = ~ poly(time, 2)|paciente,
data = df,
na.action = na.exclude)
Many of #MikaelJagan's points are well taken. In particular:
grupo_int_v00 excluded since it wasn't in your example data set
this code doesn't work for your example since there are only two complete cases (i.e., observations with no missing predictors/responses) in the data set, so we can't fit a quadratic polynomial ("degree must be less than the number of unique points")
I used na.exclude, which obviates your subset argument; it excludes NA values when fitting but will re-introduce them e.g. in calculating predictions or residuals

How to merge multiple rows in R with multiple columns in a dataset

I want to merge the rows for each record_id into one row based on the type column except from the volunteers in the record_id column which have two repeats in the repeat column. I would like a second row for these. Each record_id corresponds to one person that has either come in for a test once (repeat=1) or twice and therefore has two entries in the repeat column.
Here's is what my data look like
structure(list(record_id = c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4,
4, 4, 4), type = c(NA, "data_collection", "test", NA, "data_collection",
"test", NA, "data_collection", "test", "test", NA, "cata_collection",
"test", "test"), `repeat` = c(NA, 1, 1, NA, 1, 1, NA, 1, 1, 2,
NA, 1, 1, 2), dt_volunteer_reg = structure(c(1597246320, NA,
NA, 1599217080, NA, NA, 1596184500, NA, NA, NA, 1598192280, NA,
NA, NA), class = c("POSIXct", "POSIXt"), tzone = "UTC"), age = c(26,
NA, NA, 64, NA, NA, 51, NA, NA, NA, 39, NA, NA, NA), gender = c(0,
NA, NA, 1, NA, NA, 0, NA, NA, NA, 1, NA, NA, NA), case_type = c(NA,
1, NA, NA, 2, NA, NA, 1, NA, NA, NA, 1, NA, NA), test_dis_dt = structure(c(NA,
NA, 1597250220, NA, NA, 1600012980, NA, NA, 1596382080, 1601980740,
NA, NA, 1598284020, 1603118700), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), test_dis_res = c(NA, NA, 1, NA, NA, 1, NA,
NA, 2, 2, NA, NA, 2, 2), test_dis_in = c(NA, NA, NA, NA, NA,
0.02, NA, NA, 6.13, 4.75, NA, NA, 7.23, 3.85), test_cont_dt = structure(c(NA,
NA, 1597250280, NA, NA, 1608636120, NA, NA, NA, 1601980740, NA,
NA, 1605704940, 1603205340), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
test_cont_res = c(NA, NA, 2, NA, NA, 1, NA, NA, NA, 2, NA,
NA, 2, 2), test_cont_val = c(NA, NA, 123, NA, NA, 0, NA,
NA, NA, 40000, NA, NA, 471.6, 306.5)), row.names = c(NA,
-14L), class = c("tbl_df", "tbl", "data.frame"))
And this is what I'm hoping to get
structure(list(record_id = c(1, 2, 3, 3, 4, 4), `repeat` = c(1,
1, 1, 2, 1, 2), dt_volunteer_reg = structure(c(1597246320, 1599217080,
1596184500, 1596184500, 1598192280, 1598192280), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), age = c(26, 64, 51, 51, 39, 39), gender = c(0,
1, 0, 0, 1, 1), case_type = c(1, 2, 1, 1, 1, 1), test_dis_dt = structure(c(1597250220,
1600012980, 1596382080, 1601980740, 1598284020, 1603118700), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), test_dis_res = c(1, 1, 2, 2, 2, 2),
test_dis_in = c(NA, 0.02, 6.13, 4.75, 7.23, 3.85), test_cont_dt = structure(c(1597250280,
1608636120, NA, 1601980740, 1605704940, 1603205340), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), test_cont_res = c(2, 1, NA, 2,
2, 2), test_cont_val = c(123, 0, NA, 40000, 471.6, 306.5)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
Assuming the first dataframe is called input and you are happy using the tidyverse you can do it like this.
input %>%
nest(data = c(-record_id)) %>%
data = map(data, ~replace_na(., as.list(head(., 1)))), # Fill in speciment details
data = map(data, filter, !`repeat`)), # Remove speciment details
data = map(data, ~replace_na(., as.list(head(., 1)))), # Fill in test data with data collection details
data = map(data, filter, type == "test") # Remove data collection rows
) %>%
unnest(data) %>%
There are ways to do this more concisely and/or faster but this may be more readable.

Using for-loops in R to process several columns in a data frame

I am trying to edit 50 columns in my data frame into dummy variables depending on an exact match with a given vector of 50 values using a for-loop function.
I never used loop functions before and can't figure out how to do it.
I first wanted to code this "by hand" for each of the 50 columns like that:
dBGK1a <- as.numeric(BGK1a == BGKright[1])
dBGK2a <- as.numeric(BGK2a == BGKright[2])
dBGK3a <- as.numeric(BGK3a == BGKright[3])
dBGK50a <- as.numeric(BGK50a == BGKright[50])
As this is very tedious i tried to come up with a for-loop, that can handle this.
for(i in 1:50) {
for (j in seq(from = 348, to = 448, by = 2)){
data1[j] <- as.numeric(data1[j] == BGKright[i])
Somehow this doesn't work since i get the value "0" in every column over every observation.
data1 is my data frame. Here is a shorter version of the data frame:
dput(head(data1[348:354], 20))
structure(list(BGK1a = c(NA, NA, NA, NA, NA, NA, NA, NA, 2, NA,
NA, NA, NA, NA, 2, 2, 2, 2, 1, 2), BGK1b = c(NA, NA, NA, NA,
NA, NA, NA, NA, 50, NA, NA, NA, NA, NA, 100, 100, 100, 99, 89,
50), BGK2a = c(NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA,
NA, NA, 1, 2, 1, 2, 1, 1), BGK2b = c(NA, NA, NA, NA, NA, NA,
NA, NA, 50, NA, NA, NA, NA, NA, 100, 50, 96, 62, 93, 50), BGK3a = c(NA,
NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA, 2, 1, 1, 1,
1, 2), BGK3b = c(NA, NA, NA, NA, NA, NA, NA, NA, 50, NA, NA,
NA, NA, NA, 100, 100, 50, 85, 82, 74), BGK4a = c(NA, NA, NA,
NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA, 1, 2, 2, 2, 1, 1)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
What the loop should do is select the respective value of "BGKright" with "i" and the column to process with "j". Note that "j" needs to jump 2 steps every loop because i only need to process every second column (from column 348 to column 448).
I would appreciate any help regarding this loop and other solutions that are possible for this task without loops.
Thank you in advance.
Ok i used BGKa=select(data1[348:448],ends_with("a")) to make a new data frame with only the relevant columns.
Then i used the for-loop to create the dummies.
for(i in 1:50) {
Seems to work. Ty for help.

"grouping variable must not contain purely numeric items"

I am doing an analysis with groups and as such, need to make a grouping variable, for which I wanted to use gender (0=male, 1=female). What I first did was create a vector of this variable (manual told me to do this), but then I got an eror that: "grouping variable must not contain purely numeric items". Then I transformed my vector in a logical (TRUE/FALSE), but somehow I still get this error.
So my question is, does anyone know, in general terms, what may be the problem when I get this error?
Attached below is the code to the head of my dataset:
structure(c(7, 8, 7, 5, 6, 6, 4.9, NA, 6.9, 5.1, 5.8, NA, NA,
NA, 7, 3, 7, NA, NA, NA, 6.7, 4.1, 5.9, NA, NA, NA, 5, 6, 7,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 8, NA, NA, NA, 6.2,
4.3, 6.3, NA, NA, NA, 7, 5, 7, NA, NA, NA, 6.5, NA, NA, NA, NA,
NA, 6, NA, 7, NA, NA, NA, NA, NA, 5, NA, NA, NA, NA, NA, 7, NA,
NA, NA, NA, NA, 6.1, NA, NA, NA, NA, NA, 7, NA, NA, NA, NA, NA,
NA, NA, 16, 0.001, 12, 11, 11, 0.001, 0.001, 0.001, 12, 12, 12,
0.001, 0.001, 0.001, 12, 12, 12, 0.001, 0.001, 0.001, 15, 12,
12, 0.001, 0.001, 0.001, 16, 0.001, 12, 0.001, 0.001, 0.001,
0.001, 0.001, 15, 0.001, 0.001, 0.001, 0.001, 0.001, 16, 0.001,
0, 1, 0, 0, 1, 0), .Dim = c(6L, 24L), .Dimnames = list(c("800009",
"800012", "800015", "800033", "800042", "800045"), c("gener_sat_T0",
"sel_T0", "gener_sat_T1", "sel_T1", "gener_sat_T2", "sel_T2",
"gener_sat_T3", "sel_T3", "gener_sat_T4", "sel_T4", "gener_sat_T5",
"sel_T5", "gener_sat_T6", "sel_T6", "gener_sat_T7", "sel_T7",
"dT1", "dT2", "dT3", "dT4", "dT5", "dT6", "dT7", "female")))
Then what I am trying to do is fit a CT model (have used it before on non-group data and that worked fine).
CTMODEL <- ctModel(n.latent = 2, n.manifest = 2, Tpoints = 8,
manifestNames = c("gener_sat", "sel"),
latentNames = c("gener_sat", "sel"), LAMBDA = diag(2))
fit_CTMODEL <- ctMultigroupFit(datawide = data_wide, groupings=female, ctmodelobj = CTMODEL)
Thanks a bunch!
Ok, I redid your computations, albeit your code was not reproducible directly, I made some changes and now it works:
# create the structure object (data_wide), and change it to remove the
# grouping:
data_wide = structure(c(7, 8, 7, 5, 6, 6, 4.9, NA, 6.9, 5.1, 5.8, NA, NA,
NA, 7, 3, 7, NA, NA, NA, 6.7, 4.1, 5.9, NA, NA, NA, 5, 6, 7,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 8, NA, NA, NA, 6.2,
4.3, 6.3, NA, NA, NA, 7, 5, 7, NA, NA, NA, 6.5, NA, NA, NA, NA,
NA, 6, NA, 7, NA, NA, NA, NA, NA, 5, NA, NA, NA, NA, NA, 7, NA,
NA, NA, NA, NA, 6.1, NA, NA, NA, NA, NA, 7, NA, NA, NA, NA, NA,
NA, NA, 16, 0.001, 12, 11, 11, 0.001, 0.001, 0.001, 12, 12, 12,
0.001, 0.001, 0.001, 12, 12, 12, 0.001, 0.001, 0.001, 15, 12,
12, 0.001, 0.001, 0.001, 16, 0.001, 12, 0.001, 0.001, 0.001,
0.001, 0.001, 15, 0.001, 0.001, 0.001, 0.001, 0.001, 16, 0.001), .Dim =
c(6L, 23L),
.Dimnames = list(c("800009", "800012", "800015", "800033", "800042",
c("gener_sat_T0", "sel_T0", "gener_sat_T1", "sel_T1",
"sel_T2", "gener_sat_T3", "sel_T3", "gener_sat_T4",
"sel_T4", "gener_sat_T5",
"sel_T5", "gener_sat_T6", "sel_T6", "gener_sat_T7",
"dT1", "dT2", "dT3", "dT4", "dT5", "dT6", "dT7")))
CTMODEL <- ctModel(n.latent = 2, n.manifest = 2, Tpoints = 8,
manifestNames = c("gener_sat", "sel"),
latentNames = c("gener_sat", "sel"), LAMBDA = diag(2))
fem = c("f", "m", "f", "f", "m", "f") # grouping, which needs to be a
# character vector
fit_CTMODEL <- ctMultigroupFit(dat = data_wide, groupings=fem, ctmodelobj =
CTMODEL) # dat instead of datawide
So in the end it's just a matter of making the grouping variable character vector.
Add: the code runs but hives various errors:
Not all eigenvalues of Hessian are greater than 0
Fit attempt generated errors
Retry limit reached
I guess that's because of the model and leave the solution to you :)
