Remove legend in ggplot - r

I am working with ggeffects package
I have the following syntax
data_example <- structure(list(paciente = structure(c(6171, 6488, 6300, 6446,
6489, 6445, 6473, 6351, 6212, 6387), label = "Paciente", format.spss = "F6.0"),
edad_s1 = structure(c(69, 62, 60, 71, 67, 59, 63, 66, 67,
70), label = "Edad", format.spss = "F3.0"), sexo_s1 = structure(c(1L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L), .Label = c("Hombre",
"Mujer"), label = "Sexo", class = "factor"), grupo_int_v00 = structure(c(1L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L), .Label = c("A", "B"), label = "Grupo de intervención", class = "factor"),
time = c(0, 0, 0, 2, 2, 2, 1, 2, 1, 1), peso1 = c(89.9, 62,
91.5, 75.2, 68.2, 88.4, 93.6, 79, 88.3, 84.4), cintura1 = c(113,
90, 112, NA, 87.5, 116, 98.5, 104, 112.5, 108.5), tasis2_e = c(132,
132, 149, NA, 145, 137, 129, 152, 146, 129), tadias2_e = c(81,
58, 79, NA, 80, 60, 79, 87, 79, 68), p17_total = c(7, 9,
10, 10, 10, 10, 10, 7, 10, 11), geaf_tot = c(3412.59, 3524.48,
559.44, 5454.55, 4293.71, 839.16, 3146.85, 7552.45, 4335.66,
566.9), glucosa = c(102, 97, 89, NA, 88, 168, 104, NA, 114,
121), albumi = c(4.94, 4.68, 4.75, NA, 4.34, 5.06, 4.56,
NA, 5.06, 3.96), coltot = c(232, 253, 215, NA, 202, 287,
255, NA, 217, 147), hdl = c(59, 64, 68, NA, 71, 46, 61, NA,
40, 42), ldl_calc = c(143, 150, 127, NA, 114, NA, 170, NA,
143, 86), trigli = c(152, 195, 99, NA, 85, 378, 121, NA,
170, 93), hba1c = c(5.61, 5.66, 5.43, NA, 5.38, 8.14, 5.81,
NA, 6, 6.38), i_hucpeptide = c(988.91, 673.5, 1036.03, NA,
734.29, 1266.3, 610.9, NA, 1144.8, 672.08), i_hughrelin = c(1133.35,
1230.06, 1109.98, NA, 1064.79, 725.35, 1437.85, NA, 866.07,
822.83), i_hugip = c(2.67, 2.67, 2.67, NA, 2.67, 2.67, 2.67,
NA, 2.67, 2.67), i_huglp1 = c(145.43, 138.32, 194.14, NA,
99.37, 166.27, 218.33, NA, 184.04, 222.84), i_huglucagon = c(513.89,
357.35, 624.73, NA, 464.85, 448.49, 304.29, NA, 310.61, 426.52
), i_huinsulin = c(234.23, 229.06, 358.86, NA, 175.38, 466,
99.02, NA, 367.95, 77.33), i_huleptin = c(7898.28, 5211.27,
14670.25, NA, 7161.39, 3218.49, 2659.8, NA, 3766.01, 1207.58
), i_hupai1 = c(3468.4, 1977.9, 4101.1, NA, 1613.4, 2847.27,
2442.49, NA, 1953.26, 1752.88), i_huresistin = c(4783.28,
2676.05, 3064.57, NA, 2165.52, 3878.48, 8343.46, NA, 2822.68,
6496.73), i_huvisfatin = c(831.6, 649.45, 2270.65, NA, 1578.88,
9.63, 185.09, NA, 162.8, 8.64), col_rema = c(30, 39, 20,
NA, 17, NA, 24, NA, 34, 19), homa = c(1061.843, 987.503,
1419.491, NA, 685.931, 3479.467, 457.692, NA, 1864.28, 415.864
), i_pcr = c(0.05, NA, 0.27, NA, 0.03, 0.23, 0.04, NA, 0.09,
0.09), d_homa = c(NA, NA, NA, NA, -2.629, 33.042, -181.211,
NA, -929.683, -89.108), d_hughrelin = c(NA, NA, NA, NA, -213.59,
48.43, 95.27, NA, -228.62, -146.8), d_huinsulin = c(NA, NA,
NA, NA, 3.24, -68.79, -43.31, NA, -147.33, -7.46), d_hucpeptide = c(NA,
NA, NA, NA, 192.39, -263.54, -71.56, NA, -437.38, -215.44
), d_huglucagon = c(NA, NA, NA, NA, 38.99, -112.45, -10.75,
NA, -133.55, -259.73), d_huleptin = c(NA, NA, NA, NA, 409.76,
-1081.5, -1778.69, NA, -353.91, -679.7), d_huresistin = c(NA,
NA, NA, NA, 391.02, -155.41, -436.47, NA, -1137.79, -922.75
), d_huvisfatin = c(NA, NA, NA, NA, 457.54, -260.79, -341.02,
NA, -426.89, 0), d_glucosa = c(NA, NA, NA, NA, -2, 23, 3,
NA, -8, -13), d_coltot = c(NA, NA, NA, NA, -52, 36, -11,
NA, 15, -12), d_hdl = c(NA, NA, NA, NA, 1, 3, -1, NA, 1,
4), d_ldl_calc = c(NA, NA, NA, NA, -50, NA, -10, NA, 12,
-15), d_col_rema = c(NA, NA, NA, NA, -3, NA, 0, NA, 2, -1
), d_trigli = c(NA, NA, NA, NA, -14, 132, -1, NA, 8, -5),
d_hba1c = c(NA, NA, NA, NA, -0.11, -0.04, -0.18, NA, -1.76,
-0.67), d_tasis2_e = c(NA, NA, NA, NA, 0, 6, -1, 7, -21,
-9), d_tadias2_e = c(NA, NA, NA, NA, 0, 2, -8, 8, -10, -17
), d_peso1 = c(NA, NA, NA, -6, -2.3, 0.2, -11.4, 0.8, -4.1,
-9.3), d_cintura1 = c(NA, NA, NA, NA, -2.5, -4, -12.5, 6,
-3.5, -4.5), d_geaf_tot = c(NA, NA, NA, 699.31, 2055.95,
-2181.82, 1748.25, 3776.23, 867.13, -6593.94), d_p17_total = c(NA,
NA, NA, 1, 4, 5, 4, -5, 5, 2), d_hupai1 = c(NA, NA, NA, NA,
-185.03, 204.77, 202.01, NA, -1551.91, 57.2), d_hugip = c(NA,
NA, NA, NA, 0, 0, 0, NA, 0, 0), d_huglp1 = c(NA, NA, NA,
NA, -42.07, -163.02, 107.28, NA, -95.82, -87.5), d_pcr = c(NA,
NA, NA, NA, NA, NA, NA, NA, -0.18, -0.22), ln_trigli = c(5.024,
5.273, 4.595, NA, 4.443, 5.935, 4.796, NA, 5.136, 4.533),
ln_homa = c(6.968, 6.895, 7.258, NA, 6.531, 8.155, 6.126,
NA, 7.531, 6.03), ln_hba1c = c(1.725, 1.733, 1.692, NA, 1.683,
2.097, 1.76, NA, 1.792, 1.853), ln_geaf_tot = c(8.135, 8.167,
6.327, 8.604, 8.365, 6.732, 8.054, 8.93, 8.375, 6.34), i_ratiolg = c(6.969,
4.237, 13.217, NA, 6.726, 4.437, 1.85, NA, 4.348, 1.468)), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
The mixed model I have created following the syntax
lme_peso <- lme(peso1 ~ sexo_s1 + edad_s1 + poly(time, 2)*grupo_int_v00 + p17_total,
random = ~ poly(time, 2)|paciente, control=lmeControl(opt="optim"),
data = dat_longer, subset = !is.na(peso1), na.action = na.omit)
And then to plot it
ggpredict(lme_peso, c("time [all]", "grupo_int_v00"), type="fixed") %>%
ggplot(aes(x = x, y = predicted, colour = group)) +
geom_point() +
geom_line() +
stat_smooth(method = "loess",se = T) +
labs(x = "time (months)", y = "Weight (kg)") +
scale_color_manual(labels = c("Control", "Intervention"), values = c("orange", "green")) +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = F),alpha = 1/5) +
scale_x_continuous(breaks = 0:2, labels = c(0, 6, 12))
When I supress the arguments of fill in geom_ribbon the fill stays black. But I don't know how to manage to keep just one legend with 2 groups (Control and Intervention). I have the extra-added legend (with F in this case)
Thanks in advance

I couldn't run your code, but I rebuilt it with iris.
Like Matt suggested, one thing would be, remove fill=F:
ggplot(data=iris, aes(x = SepalLength , y = PetalLength, group=Name)) +
geom_point() +
geom_line() +
stat_smooth(method = "loess",se = T, aes(color=Name)) +
geom_ribbon(aes(ymin = 1, ymax = 3),alpha = 1/5) +
scale_x_continuous(breaks = 0:2, labels = c(0, 6, 12))
Or if you need it for some reason, use guides(fill="none"):
ggplot(data=iris, aes(x = SepalLength , y = PetalLength, group=Name)) +
geom_point() +
geom_line() +
stat_smooth(method = "loess",se = T, aes(color=Name)) +
geom_ribbon(aes(ymin = 1, ymax = 3, fill=FALSE),alpha = 1/5) +
scale_x_continuous(breaks = 0:2, labels = c(0, 6, 12)) +
guides(fill="none")
Output:

Related

How to backward generate levels from flows when both are in seperate dataframes in R?

I have two dataframes. The first one called 'sw_flows_final' contains data of flows between sectors from 1952-2019. The second one is called 'sw_stocks' and contains data of stocks of what each sector owes the other from 2011-2019. I want to backwards extend the 'sw_stocks' data frame till 1952. The connection between the two is the following
stock_(t)+flow_(t)=stock_(t+1).
To take an example, I have the level for some sector in 2011. By subtracting the corresponding flow in 2011 of the same sector, I will get the level in 2010, and so on. There are 10 such sectors. I want R to generate levels from 2011 back until 1952 for every sector correctly. Can someone recommend how to do this efficiently?
Here are two images of how the dataframes look like
sw_flows_final
sw_stocks
Here is the sample data using dput for sw_flows_final
structure(list(sector = structure(c(2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L), .Label = c("Source-Use",
"Banking", "Other financial corporations", "Private nonfinancial corporations",
"Government", "Rest of the World", "Households and NIPISHs",
"Other resident sectors", "Total"), class = "factor"), bank_s = structure(c(NA,
4, -1, 15, 23, -131, -6, -96, 19, NA, -3, -7, -43, -38, 13, 2,
-76, -48, NA, -3), label = "Banking Sources", format.stata = "%10.0g"),
bank_u = structure(c(NA, -5, 33, -25, -150, 32, NA, -115,
NA, NA, 6, -32, 10, 1, -13, NA, -28, NA, NA, -2), label = "Banking Uses", format.stata = "%10.0g"),
ofi_s = structure(c(NA, NA, NA, NA, 2, 38, 2, 42, -2, NA,
NA, NA, NA, 2, 53, NA, 55, -4, 4, NA), label = "Other Fin. Institutions Sources", format.stata = "%10.0g"),
ofi_u = structure(c(4, NA, 5, 25, 1, 9, NA, 44, NA, 2, NA,
5, 44, 2, 6, NA, 59, NA, 1, NA), label = "Other Fin. Institutions Uses", format.stata = "%10.0g"),
pcs_s = structure(c(54, 5, NA, 2, -1, 68, 6, 134, 118, -22,
5, NA, 3, 1, 58, 2, 47, 31, -9, 12), label = "Pvt. Corp. Sector Sources", format.stata = "%10.0g"),
pcs_u = structure(c(9, NA, NA, -2, NA, 2, 7, 16, NA, 3, NA,
NA, 5, NA, 3, 5, 16, NA, 15, NA), label = "Pvt. Corp. Sector Uses", format.stata = "%10.0g"),
govt_s = structure(c(-54, 15, -2, NA, 62, 102, -3, 120, 94,
75, 26, 5, NA, 28, 1, -41, 94, 120, -20, 36), label = "Government Sources", format.stata = "%10.0g"),
govt_u = structure(c(-3, NA, 2, NA, -2, 22, 7, 26, NA, -52,
NA, 3, NA, 1, 18, 4, -26, NA, -36, NA), label = "Government Uses", format.stata = "%10.0g"),
rotw_s = structure(c(-153, NA, -3, -14, NA, NA, -3, -173,
-207, 2, NA, NA, 9, NA, NA, -1, 10, 34, 26, NA), label = "Rest of the World Sources", format.stata = "%10.0g"),
rotw_u = structure(c(9, NA, 2, 42, NA, NA, -19, 34, NA, -48,
NA, 8, 20, NA, NA, -4, -24, NA, -25, NA), label = "Rest of the World Uses", format.stata = "%10.0g"),
hh_s = structure(c(32, 9, -5, 22, NA, NA, NA, 58, 1, -12,
7, -18, 18, NA, NA, NA, -5, -88, 24, 5), label = "Households Sources", format.stata = "%10.0g"),
hh_u = structure(c(-130, 11, 54, 122, NA, NA, NA, 57, NA,
16, 15, 23, 29, NA, NA, NA, 83, NA, 70, 16), label = "Households Uses", format.stata = "%10.0g"),
total_s = structure(c(-121, 33, -11, 25, 86, 77, -4, 85,
23, 43, 35, -20, -13, -7, 125, -38, 125, 45, 25, 50), label = "Total Sources", format.stata = "%8.0g"),
total_u = structure(c(-111, 6, 96, 162, -151, 65, -5, 62,
NA, -79, 21, 7, 108, 4, 14, 5, 80, NA, 25, 14), label = "Total Uses", format.stata = "%8.0g"),
year = structure(c(1952, 1952, 1952, 1952, 1952, 1952, 1952,
1952, 1952, 1953, 1953, 1953, 1953, 1953, 1953, 1953, 1953,
1953, 1954, 1954), label = "Year", format.stata = "%9.0g")), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
and for sw_stocks
structure(list(sector = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
9L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 9L, 8L, 1L, 2L), .Label = c("Banking",
"Government", "Households and NIPISHs", "Other financial corporations",
"Other resident sectors", "Private nonfinancial corporations",
"Rest of the World", "Source-Use", "Total"), class = "factor"),
bank_s_l = c(1350079.49133551, 1899576.67082447, 5062495.73351201,
1151214.12986427, 110.276338621267, 1355504.86599654, 859206.587426226,
11678187.7552976, -178532.519500652, 1505246.38567655, 2155467.21125388,
5855906.75278197, 1381570.68752383, 78.0505047633771, 1456469.98651368,
1066704.4326649, 13421443.5069196, -202944.716738584, 1740628.61553264,
2544347.17898597), bank_u_l = c(1156063.70260075, 3554941.84780303,
2447185.38181841, 616874.110576552, 138320.990278717, 1945789.820585,
1997544.42113584, 11856720.2747983, NA, 1241749.97765978,
4007409.67985746, 3290777.41165653, 612317.31788805, 141423.711476498,
2055249.27763703, 2275460.84748283, 13624388.2236582, NA,
1458813.42489447, 4587951.87769072), ofi_s_l = c(412030.693749423,
187700.316261259, 121865.465242432, 3611043.08926093, 87881.51,
120740.971533392, 175451.98473536, 4716714.03078279, -670782.594882376,
488886.759730145, 194866.871943733, 123364.152903093, 4019909.92650251,
94803.43, 115690.116216912, 253583.626498628, 5291104.88379502,
-846071.342671093, 572720.870848094, 205760.036602362), ofi_u_l = c(1082050.91844983,
1987036.26341659, 43593.1327534439, 1346470.61507149, 3473.61146178588,
891968.551489673, 32903.5330223561, 5387496.62566517, NA,
1080238.25250793, 2240819.70449555, 67741.7919287182, 1730643.94899816,
4370.85592038175, 930827.085634209, 82534.5869811682, 6137176.22646612,
NA, 1072481.25801139, 2658225.53044836), pcs_s_l = c(1940282.83357554,
229076.060527219, 1493897.17610662, 863355.291768346, 0,
5024995.75077571, 2801147.39882306, 12352754.5115765, 3480898.17067805,
2051048.40228274, 254992.855935366, 1509718.51083095, 899187.870098343,
0, 5265806.62852803, 3359680.47293263, 13340434.7406081,
3941569.20740091, 2386853.8881313, 277562.331033185), pcs_u_l = c(1355504.86599654,
471854.794244865, 0, 150821.917117881, 0, 6306138.74353917,
587536.02, 8871856.34089845, NA, 1456469.98651368, 476029.97685104,
0, 151878.215616114, 0, 6616322.85152686, 698164.502699446,
9398865.53320714, NA, 1614687.15738287, 497308.289032772),
govt_s_l = c(3026391.34344152, 1232718.57225668, 899831.786828824,
1758432.78129903, 0, 471854.794244865, 385631.474767484,
7774860.7528384, 7774860.7528384, 3299762.57843369, 1508255.01724503,
878289.365689908, 2114742.39334311, 0, 476029.97685104, 609867.659036133,
8886946.99059891, 8886946.99059891, 3775295.4629249, 1833119.31513762
), govt_u_l = c(1932781.13639086, 1380943.25642814, 20831.3302280885,
273893.850293545, 2077.96462093295, 219943.84762228, 10369.45,
3840840.83558385, 3840840.83558385, 2063225.61511638, 1542429.04485517,
23827.0863095146, 318847.879129413, 1766.43372182738, 246595.428939527,
12529.88, 4209221.36807184, 4209221.36807184, 2274350.44000159,
1705859.60013762), rotw_s_l = c(1451517.27411681, 10373.77,
0, 0, 175620.33, 587536.02, 0, 2225047.39411681, 2225047.39411681,
1548720.84, 12533.923713, 0, 0, 175786.24, 698164.502699446,
0, 2435205.50641245, 2435205.50641245, 1859509.8975799, 14605.2524611378
), rotw_u_l = c(337475.82215636, 409030.0781722, 0, 0, 0,
2814996.33882306, 0, 3561502.23915162, NA, 436497.84863876,
400723.19, 0, 0, 0, 3372956.29281454, 0, 4210177.3314533,
NA, 682427.647230683, 453829.566744612), hh_s_l = c(2963585.38181841,
20831.3302280885, 0, 43374.9585653354, 0, 0, 0, 3027791.67061183,
-7557951.13157221, 3273435.03602629, 23827.0863095146, 0,
67086.1333979951, 0, 0, 0, 3364348.2557338, -8314568.56600038,
3579173.99233299, 26649.23), hh_u_l = c(5147567.73351201,
882725.978434388, 0, 3061551.91413103, -7.27595761418343e-10,
1493897.17610662, 0, 10585742.802184, NA, 5861967.75278197,
881712.095035784, 0, 3425545.46308548, -7.27595761418343e-10,
1509691.51083095, 0, 11678916.8217342, NA, 6628288.36795117,
896978.173784456), total_s_l = c(11143887.0180372, 3580276.72009772,
7578090.16168989, 7427420.25075791, 263612.116338621, 7560632.4025505,
4221437.44575213, 41775356.115224, -2328803.00305744, 12167100.0021494,
4149942.96640053, 8367278.78220592, 8482497.01086579, 270667.720504763,
8012161.21080911, 5289836.19113229, 46739483.8840678, -2519261.62052293,
13914182.7273498, 4902043.34422028), total_u_l = c(11011444.1791064,
8686532.21849921, 2511609.84479994, 5449612.40719049, 143872.566361435,
13672734.4781658, 2628353.4241582, 44104159.1182814, NA,
12140149.4332185, 9549123.691095, 3382346.28989476, 6239232.82471722,
147561.001118706, 14731642.4473831, 3068689.81716345, 49258745.5045908,
NA, 13731048.2954722, 10800153.0378385), year = c(2012, 2012,
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2013, 2013, 2013,
2013, 2013, 2013, 2013, 2013, 2013, 2014, 2014)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L), groups = structure(list(
sector = structure(1:9, .Label = c("Banking", "Government",
"Households and NIPISHs", "Other financial corporations",
"Other resident sectors", "Private nonfinancial corporations",
"Rest of the World", "Source-Use", "Total"), class = "factor"),
.rows = structure(list(c(1L, 10L, 19L), c(2L, 11L, 20L),
c(3L, 12L), c(4L, 13L), c(5L, 14L), c(6L, 15L), c(7L,
16L), c(9L, 18L), c(8L, 17L)), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -9L), .drop = TRUE))

Looping/sapply through nlme function

I am trying to execute a loop with mixed-model effects with response variable changing. I came from here and here. I should say that I have tried sthg creating a function and then sapply or lapply (wihtout success)
I provide a small dataset (really small) just to represent my original database (much larger and similar to those of longitudinal studies)
data<- structure(list(paciente = structure(c(6134, 6099, 6457, 6164,
6470, 6323, 6550, 6082, 6476, 6044, 6509, 6539, 6234, 6555, 6383,
6127, 6507, 6513, 6486, 6080, 6101, 6007, 6023, 6516, 6001, 6198,
6510, 6530, 6351, 6181), label = "Paciente", format.spss = "F6.0"),
edad_s1 = structure(c(70, 63, 61, 71, 67, 59, 63, 69, 67,
67, 67, 72, 65, 72, 63, 65, 60, 64, 56, 63, 57, 62, 72, 60,
72, 63, 72, 68, 66, 71), label = "Edad", format.spss = "F3.0"),
sexo_s1 = structure(c(1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 2L), .Label = c("Hombre", "Mujer"), label = "Sexo", class = "factor"),
time = c(2, 1, 2, 1, 0, 0, 1, 0, 2, 1, 1, 0, 1, 2, 1, 2,
1, 2, 0, 1, 1, 0, 2, 1, 0, 2, 1, 2, 2, 0), grupo_int_v00 = structure(c(1L,
1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L), .Label = c("A",
"B"), label = "Grupo de intervención", class = "factor"),
peso1 = c(108, 80.4, 95, 75, 92.6, 90, 82.2, 94.4, 78, 71.3,
75.1, 83.5, 87.1, 63, 73, 98.5, 90.2, 81.3, 93.4, 79.8, 114.3,
110.9, 81.5, 88.5, 82.4, 88.3, 90, 73, 79, 94.8), cintura1 = c(127,
100.5, 103.5, 108, 115, 114.5, 95.5, 115, 101, 98, 99, 108.5,
105, 99, 104, 126, 114.2, 99, 110, 104.5, 120, 126, 111.5,
102, 117, 110, 125, 100, 104, 123), tasis2_e = c(156, 129,
131, 138, 167, 138, 115, 146, 119, 148, 130, 144, 115, 122,
135, 139, 128, 119, 138, 115, 138, 151, 151, NA, 137, 147,
124, 168, 152, 156), tadias2_e = c(70, 63, 80, 67, 76, 81,
57, 68, 69, 69, 68, 78, 61, 71, 54, 77, 63, 63, 92, 73, 80,
88, 84, NA, 79, 76, 62, 90, 87, 89), p17_total = c(10, 10,
5, 9, 9, 7, 15, 11, 6, 12, 11, 4, 9, 14, 9, 9, 11, 14, 6,
5, 10, 10, 9, 13, 12, 7, 11, 12, 7, 4), geaf_tot = c(1986.01,
1286.71, 1230.77, 1510.49, 839.16, 2144.52, 5361.31, 1678.32,
4055.94, 2601.4, 3363.64, 3076.92, 5342.66, 2769.23, 2601.4,
1693.24, 4055.94, 3146.85, 3916.08, 6405.59, 2442.89, 671.33,
867.13, 1585.08, 3153.85, 3188.81, 7986.01, 839.16, 7552.45,
2937.06), glucosa = c(127, 97, 95, 102, 119, 113, 109, 105,
93, 167, 85, 108, 122, 112, 113, 120, 100, 108, 100, 86,
129, 136, 98, 97, 130, 125, 109, 102, NA, 181), albumi = c(4.47,
4.82, 4.78, 4.22, 4.59, 4.5, 4.33, 4.87, 4.83, 4.98, 4.23,
4.77, 4.76, 4.98, 4.18, 4.51, 4.72, 4.87, 4.77, 4.61, 4.55,
4.77, 4.6, 4.59, 4.25, 4.71, 4.47, 4.54, NA, 4.63), coltot = c(157,
191, 276, 248, 248, 217, 187, 301, 173, 230, 258, 238, 231,
181, 183, 243, 223, 195, 237, 245, 164, 145, 199, 234, 178,
192, 201, 198, NA, 159), hdl = c(39, 50, 57, 59, 49, 44,
60, 98, 52, 73, 58, 44, 58, 60, 48, 46, 73, 58, 39, 47, 38,
45, 59, 56, 72, 34, 78, 62, NA, 54), ldl_calc = c(91, 124,
204, 133, 155, 140, 105, 162, 91, 141, 182, 173, 155, 107,
83, 150, 132, 124, NA, 167, 101, 88, 121, 160, 84, 130, 112,
120, NA, NA), trigli = c(137, 87, 74, 282, 219, 165, 112,
203, 149, 78, 89, 105, 91, 71, 259, 236, 92, 63, 447, 157,
123, 58, 94, 90, 112, 139, 53, 80, NA, 429), hba1c = c(6.57,
5.82, 5.68, 5.96, 6.11, 5.73, 5.48, 5.8, 5.6, 7.8, 5.21,
5.73, 6.1, 5.86, 6.37, 6.27, 5.22, 5.59, 5.47, 5.95, 6.96,
NA, 5.47, 4.99, NA, 6.25, 5.79, 5.79, NA, 6.54), i_hucpeptide = c(NA,
NA, 466.64, 838.61, 847.89, 1481.03, 819.65, NA, 1298.6,
NA, 564.59, 544.2, 755.73, 1057.83, 957.43, NA, 957.33, 1002.34,
1104, NA, NA, NA, NA, 594.6, NA, 815.82, 922.08, 628.54,
NA, 1591.01), i_hughrelin = c(NA, NA, 410.97, 553.65, 453,
352.44, 527.01, NA, 328.27, NA, 1668.41, 460.06, 1072.27,
260.24, 749.03, NA, 1327.91, 363.79, 524.53, NA, NA, NA,
NA, 1051.1, NA, 143.32, 1076.49, 1565.85, NA, 607.31), i_hugip = c(NA,
NA, 2.67, 2.67, 2.67, 2.67, 2.67, NA, 2.67, NA, 2.67, 2.67,
690.74, 1165.16, 2.67, NA, 2.67, 2.67, 2.67, NA, NA, NA,
NA, 2.67, NA, 2.67, 2.67, 2.67, NA, 2.67), i_huglp1 = c(NA,
NA, 127.66, 284.34, 200.13, 59.3, 234.84, NA, 503.42, NA,
103.9, 14.14, 71.6, 56.41, 75.13, NA, 161.36, 124.19, 220.52,
NA, NA, NA, NA, 14.14, NA, 112.57, 100.52, 237.55, NA, 470.91
), i_huglucagon = c(NA, NA, 333.79, 649.94, 726.99, 395.38,
610.5, NA, 434.42, NA, 502.4, 127.62, 268.23, 10.48, 428.15,
NA, 716.02, 238.95, 320.32, NA, NA, NA, NA, 10.48, NA, 238,
487.42, 297.6, NA, 495.16), i_huinsulin = c(NA, NA, 129.24,
270.98, 299.75, 730.82, 267.54, NA, 616.91, NA, 121.26, 85.34,
224.96, 247.48, 220.75, NA, 181.85, 341.25, 551.46, NA, NA,
NA, NA, 133.42, NA, 263.87, 279.45, 94.78, NA, 573.14), i_huleptin = c(NA,
NA, 3992.49, 17806.43, 8409.76, 11511.43, 2965.92, NA, 3223.08,
NA, 9018.79, 1039.45, 2613.33, 2128.98, 7307.89, NA, 13492.13,
2883.77, 4775.98, NA, NA, NA, NA, 2602.96, NA, 2829.59, 8511.92,
3528.77, NA, 11487.15), i_hupai1 = c(NA, NA, 997.29, 2499.25,
3085.25, 1909.44, 1730.55, NA, 3333.37, NA, 1424.3, 1857.71,
2578.46, 2268.52, 2222.97, NA, 2722.92, 1300.69, 2732.11,
NA, NA, NA, NA, 1204.36, NA, 2483.08, 2289.67, 1791.79, NA,
6595.54), i_huresistin = c(NA, NA, 3044.48, 5774.77, 3221.72,
4925.57, 5170.95, NA, 3683.64, NA, 4041.32, 6771.31, 5119.11,
9521.7, 3328.41, NA, 5061.65, 3773.39, 3039.39, NA, NA, NA,
NA, 4405.17, NA, 2577.84, 3433.82, 6802.94, NA, 6461.67),
i_huvisfatin = c(NA, NA, 302.3, 2083.46, 2989.72, 1118.7,
8.64, NA, 96.03, NA, 2209.51, 8.64, 1944.37, 1415.55, 678.33,
NA, 4349.56, 8.64, 410.1, NA, NA, NA, NA, 117, NA, 8.64,
2308.8, 228.53, NA, 1766.64), col_rema = c(27, 17, 15, 56,
44, 33, 22, 41, 30, 16, 18, 21, 18, 14, 52, 47, 18, 13, NA,
31, 25, 12, 19, 18, 22, 28, 11, 16, NA, NA), homa = c(NA,
NA, 5.053, 11.374, 14.679, 33.985, 12.001, NA, 23.61, NA,
4.242, 3.793, 11.294, 11.406, 10.265, NA, 7.484, 15.167,
22.694, NA, NA, NA, NA, 5.326, NA, 13.574, 12.535, 3.978,
NA, 42.691), i_pcr = c(NA, NA, 0.41, 0.82, NA, 2.08, 0.08,
NA, 0.1, NA, 0.38, 0.05, 0.04, 0.35, 0.2, NA, 0.98, 0.02,
NA, NA, NA, NA, NA, 0.2, NA, 0.1, 0.16, 0.16, NA, 2.93)), row.names = c(NA,
-30L), class = c("tbl_df", "tbl", "data.frame"))
Afterwards I am defining my iteration and my variables database
ex<- subset(data[, 6:30])
for (i in 1:length(ex)) {
var_1 <- ex[,i]
var_1 <- unlist(var_1)
lme_1 <- lme(var_1 ~ sexo_s1*peso1 + edad_s1 + p17_total + poly(time, 2)*grupo_int_v00,
random = ~ poly(time, 2)|paciente, control=lmeControl(opt="optim"),
data = dat_longer, subset = !is.na(var_1))
Error in model.frame.default(formula = ~time + var_1 + sexo_s1 + peso1 + :
invalid type (list) for variable 'var_1'
I have tried unlisting/as.data.frame in before running the loop
for (i in 1:length(data)) {
var_1 <- data[,i]
var_1 <- unlist(var_1) #or as.data.frame(var_1)
lme_1 <- lme(var_1 ~ sexo_s1*peso1 + edad_s1 + p17_total + poly(time, 2)*grupo_int_v00,
random = ~ poly(time, 2)|paciente, control=lmeControl(opt="optim"),
data = dat_longer, subset = !is.na(var_1))
}
Error in model.frame.default(formula = ~time + var_1 + sexo_s1 + peso1 + :
variable lengths differ (found for 'var_1')
I have also tried to develop a new function to iterate over
lme_z <- function(z){
out <- lme(z ~ sexo_s1*peso1 + edad_s1 + p17_total + poly(time, 2)*grupo_int_v00,
random = ~ poly(time, 2)|paciente, control=lmeControl(opt="optim"),
data = dat_longer, subset = !is.na(z))
}
Error
If there is some contribution to iterate in the response variable (I know Ben Bolker is an expert)
Thanks in advance
If data is a data frame containing all of the variables that you use in your formula, including all of the responses that you want to consider, then you can do:
f <- function(resp) {
fixed <- . ~ sexo_s1 * peso1 + edad_s1 + p17_total + poly(time, 2) * grupo_int_v00
fixed[[2L]] <- as.name(resp)
lme(fixed = fixed,
random = ~poly(time, 2) | paciente,
data = data,
subset = !is.na(data[[resp]]),
control = lmeControl(opt = "optim"))
}
list_of_lme_objects <- lapply(names_of_response_variables, f)
An important piece is:
fixed <- . ~ sexo_s1 * peso1 + edad_s1 + p17_total + poly(time, 2) * grupo_int_v00
fixed[[2L]] <- as.name(resp)
The second statement injects the response named resp into the left hand side of the formula template. A more transparent example:
fixed <- . ~ world
fixed[[2L]] <- as.name("hello")
fixed
## hello ~ world
Another important piece is:
subset = !is.na(data[[resp]])
Here, the right hand side actually evaluates to a logical vector of length equal to the number of rows of data. You might consider passing na.action = na.omit instead of subset, though that will also omit rows where the independent variables have missing values, so the semantics are slightly different.
The variable grupo_int_v00 is missing from your data frame. You'll have to fix that on your end in order to test the code...
I was going to suggest:
formvars <- c("sexo_s1*peso1",
"edad_s1",
"p17_total",
"poly(time, 2)")
## excluded *grupo_int_v00 since not in example data frame
respvars <- names(df)[7:30]
result <- list()
for (r in respvars) {
result[[r]] <- lme(reformulate(formvars, response = r),
random = ~ poly(time, 2)|paciente,
control=lmeControl(opt="optim"),
data = df,
na.action = na.exclude)
}
Many of #MikaelJagan's points are well taken. In particular:
grupo_int_v00 excluded since it wasn't in your example data set
this code doesn't work for your example since there are only two complete cases (i.e., observations with no missing predictors/responses) in the data set, so we can't fit a quadratic polynomial ("degree must be less than the number of unique points")
I used na.exclude, which obviates your subset argument; it excludes NA values when fitting but will re-introduce them e.g. in calculating predictions or residuals

How to merge multiple rows in R with multiple columns in a dataset

I want to merge the rows for each record_id into one row based on the type column except from the volunteers in the record_id column which have two repeats in the repeat column. I would like a second row for these. Each record_id corresponds to one person that has either come in for a test once (repeat=1) or twice and therefore has two entries in the repeat column.
Here's is what my data look like
structure(list(record_id = c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4,
4, 4, 4), type = c(NA, "data_collection", "test", NA, "data_collection",
"test", NA, "data_collection", "test", "test", NA, "cata_collection",
"test", "test"), `repeat` = c(NA, 1, 1, NA, 1, 1, NA, 1, 1, 2,
NA, 1, 1, 2), dt_volunteer_reg = structure(c(1597246320, NA,
NA, 1599217080, NA, NA, 1596184500, NA, NA, NA, 1598192280, NA,
NA, NA), class = c("POSIXct", "POSIXt"), tzone = "UTC"), age = c(26,
NA, NA, 64, NA, NA, 51, NA, NA, NA, 39, NA, NA, NA), gender = c(0,
NA, NA, 1, NA, NA, 0, NA, NA, NA, 1, NA, NA, NA), case_type = c(NA,
1, NA, NA, 2, NA, NA, 1, NA, NA, NA, 1, NA, NA), test_dis_dt = structure(c(NA,
NA, 1597250220, NA, NA, 1600012980, NA, NA, 1596382080, 1601980740,
NA, NA, 1598284020, 1603118700), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), test_dis_res = c(NA, NA, 1, NA, NA, 1, NA,
NA, 2, 2, NA, NA, 2, 2), test_dis_in = c(NA, NA, NA, NA, NA,
0.02, NA, NA, 6.13, 4.75, NA, NA, 7.23, 3.85), test_cont_dt = structure(c(NA,
NA, 1597250280, NA, NA, 1608636120, NA, NA, NA, 1601980740, NA,
NA, 1605704940, 1603205340), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
test_cont_res = c(NA, NA, 2, NA, NA, 1, NA, NA, NA, 2, NA,
NA, 2, 2), test_cont_val = c(NA, NA, 123, NA, NA, 0, NA,
NA, NA, 40000, NA, NA, 471.6, 306.5)), row.names = c(NA,
-14L), class = c("tbl_df", "tbl", "data.frame"))
And this is what I'm hoping to get
structure(list(record_id = c(1, 2, 3, 3, 4, 4), `repeat` = c(1,
1, 1, 2, 1, 2), dt_volunteer_reg = structure(c(1597246320, 1599217080,
1596184500, 1596184500, 1598192280, 1598192280), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), age = c(26, 64, 51, 51, 39, 39), gender = c(0,
1, 0, 0, 1, 1), case_type = c(1, 2, 1, 1, 1, 1), test_dis_dt = structure(c(1597250220,
1600012980, 1596382080, 1601980740, 1598284020, 1603118700), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), test_dis_res = c(1, 1, 2, 2, 2, 2),
test_dis_in = c(NA, 0.02, 6.13, 4.75, 7.23, 3.85), test_cont_dt = structure(c(1597250280,
1608636120, NA, 1601980740, 1605704940, 1603205340), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), test_cont_res = c(2, 1, NA, 2,
2, 2), test_cont_val = c(123, 0, NA, 40000, 471.6, 306.5)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
Assuming the first dataframe is called input and you are happy using the tidyverse you can do it like this.
input %>%
nest(data = c(-record_id)) %>%
mutate(
data = map(data, ~replace_na(., as.list(head(., 1)))), # Fill in speciment details
data = map(data, filter, !is.na(`repeat`)), # Remove speciment details
data = map(data, ~replace_na(., as.list(head(., 1)))), # Fill in test data with data collection details
data = map(data, filter, type == "test") # Remove data collection rows
) %>%
unnest(data) %>%
select(-type
There are ways to do this more concisely and/or faster but this may be more readable.

Using for-loops in R to process several columns in a data frame

I am trying to edit 50 columns in my data frame into dummy variables depending on an exact match with a given vector of 50 values using a for-loop function.
I never used loop functions before and can't figure out how to do it.
I first wanted to code this "by hand" for each of the 50 columns like that:
dBGK1a <- as.numeric(BGK1a == BGKright[1])
dBGK2a <- as.numeric(BGK2a == BGKright[2])
dBGK3a <- as.numeric(BGK3a == BGKright[3])
....
dBGK50a <- as.numeric(BGK50a == BGKright[50])
As this is very tedious i tried to come up with a for-loop, that can handle this.
for(i in 1:50) {
for (j in seq(from = 348, to = 448, by = 2)){
data1[j] <- as.numeric(data1[j] == BGKright[i])
}
}
Somehow this doesn't work since i get the value "0" in every column over every observation.
data1 is my data frame. Here is a shorter version of the data frame:
dput(head(data1[348:354], 20))
structure(list(BGK1a = c(NA, NA, NA, NA, NA, NA, NA, NA, 2, NA,
NA, NA, NA, NA, 2, 2, 2, 2, 1, 2), BGK1b = c(NA, NA, NA, NA,
NA, NA, NA, NA, 50, NA, NA, NA, NA, NA, 100, 100, 100, 99, 89,
50), BGK2a = c(NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA,
NA, NA, 1, 2, 1, 2, 1, 1), BGK2b = c(NA, NA, NA, NA, NA, NA,
NA, NA, 50, NA, NA, NA, NA, NA, 100, 50, 96, 62, 93, 50), BGK3a = c(NA,
NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA, 2, 1, 1, 1,
1, 2), BGK3b = c(NA, NA, NA, NA, NA, NA, NA, NA, 50, NA, NA,
NA, NA, NA, 100, 100, 50, 85, 82, 74), BGK4a = c(NA, NA, NA,
NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA, 1, 2, 2, 2, 1, 1)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
What the loop should do is select the respective value of "BGKright" with "i" and the column to process with "j". Note that "j" needs to jump 2 steps every loop because i only need to process every second column (from column 348 to column 448).
I would appreciate any help regarding this loop and other solutions that are possible for this task without loops.
Thank you in advance.
Ok i used BGKa=select(data1[348:448],ends_with("a")) to make a new data frame with only the relevant columns.
Then i used the for-loop to create the dummies.
for(i in 1:50) {
BGKa[i]=as.numeric(BGKa[i]==BGKright[i])
}
Seems to work. Ty for help.

"grouping variable must not contain purely numeric items"

I am doing an analysis with groups and as such, need to make a grouping variable, for which I wanted to use gender (0=male, 1=female). What I first did was create a vector of this variable (manual told me to do this), but then I got an eror that: "grouping variable must not contain purely numeric items". Then I transformed my vector in a logical (TRUE/FALSE), but somehow I still get this error.
So my question is, does anyone know, in general terms, what may be the problem when I get this error?
Attached below is the code to the head of my dataset:
structure(c(7, 8, 7, 5, 6, 6, 4.9, NA, 6.9, 5.1, 5.8, NA, NA,
NA, 7, 3, 7, NA, NA, NA, 6.7, 4.1, 5.9, NA, NA, NA, 5, 6, 7,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 8, NA, NA, NA, 6.2,
4.3, 6.3, NA, NA, NA, 7, 5, 7, NA, NA, NA, 6.5, NA, NA, NA, NA,
NA, 6, NA, 7, NA, NA, NA, NA, NA, 5, NA, NA, NA, NA, NA, 7, NA,
NA, NA, NA, NA, 6.1, NA, NA, NA, NA, NA, 7, NA, NA, NA, NA, NA,
NA, NA, 16, 0.001, 12, 11, 11, 0.001, 0.001, 0.001, 12, 12, 12,
0.001, 0.001, 0.001, 12, 12, 12, 0.001, 0.001, 0.001, 15, 12,
12, 0.001, 0.001, 0.001, 16, 0.001, 12, 0.001, 0.001, 0.001,
0.001, 0.001, 15, 0.001, 0.001, 0.001, 0.001, 0.001, 16, 0.001,
0, 1, 0, 0, 1, 0), .Dim = c(6L, 24L), .Dimnames = list(c("800009",
"800012", "800015", "800033", "800042", "800045"), c("gener_sat_T0",
"sel_T0", "gener_sat_T1", "sel_T1", "gener_sat_T2", "sel_T2",
"gener_sat_T3", "sel_T3", "gener_sat_T4", "sel_T4", "gener_sat_T5",
"sel_T5", "gener_sat_T6", "sel_T6", "gener_sat_T7", "sel_T7",
"dT1", "dT2", "dT3", "dT4", "dT5", "dT6", "dT7", "female")))
Then what I am trying to do is fit a CT model (have used it before on non-group data and that worked fine).
CTMODEL <- ctModel(n.latent = 2, n.manifest = 2, Tpoints = 8,
manifestNames = c("gener_sat", "sel"),
latentNames = c("gener_sat", "sel"), LAMBDA = diag(2))
fit_CTMODEL <- ctMultigroupFit(datawide = data_wide, groupings=female, ctmodelobj = CTMODEL)
Thanks a bunch!
Ok, I redid your computations, albeit your code was not reproducible directly, I made some changes and now it works:
# create the structure object (data_wide), and change it to remove the
# grouping:
data_wide = structure(c(7, 8, 7, 5, 6, 6, 4.9, NA, 6.9, 5.1, 5.8, NA, NA,
NA, 7, 3, 7, NA, NA, NA, 6.7, 4.1, 5.9, NA, NA, NA, 5, 6, 7,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 8, NA, NA, NA, 6.2,
4.3, 6.3, NA, NA, NA, 7, 5, 7, NA, NA, NA, 6.5, NA, NA, NA, NA,
NA, 6, NA, 7, NA, NA, NA, NA, NA, 5, NA, NA, NA, NA, NA, 7, NA,
NA, NA, NA, NA, 6.1, NA, NA, NA, NA, NA, 7, NA, NA, NA, NA, NA,
NA, NA, 16, 0.001, 12, 11, 11, 0.001, 0.001, 0.001, 12, 12, 12,
0.001, 0.001, 0.001, 12, 12, 12, 0.001, 0.001, 0.001, 15, 12,
12, 0.001, 0.001, 0.001, 16, 0.001, 12, 0.001, 0.001, 0.001,
0.001, 0.001, 15, 0.001, 0.001, 0.001, 0.001, 0.001, 16, 0.001), .Dim =
c(6L, 23L),
.Dimnames = list(c("800009", "800012", "800015", "800033", "800042",
"800045"),
c("gener_sat_T0", "sel_T0", "gener_sat_T1", "sel_T1",
"gener_sat_T2",
"sel_T2", "gener_sat_T3", "sel_T3", "gener_sat_T4",
"sel_T4", "gener_sat_T5",
"sel_T5", "gener_sat_T6", "sel_T6", "gener_sat_T7",
"sel_T7",
"dT1", "dT2", "dT3", "dT4", "dT5", "dT6", "dT7")))
CTMODEL <- ctModel(n.latent = 2, n.manifest = 2, Tpoints = 8,
manifestNames = c("gener_sat", "sel"),
latentNames = c("gener_sat", "sel"), LAMBDA = diag(2))
fem = c("f", "m", "f", "f", "m", "f") # grouping, which needs to be a
# character vector
fit_CTMODEL <- ctMultigroupFit(dat = data_wide, groupings=fem, ctmodelobj =
CTMODEL) # dat instead of datawide
So in the end it's just a matter of making the grouping variable character vector.
Add: the code runs but hives various errors:
Not all eigenvalues of Hessian are greater than 0
Fit attempt generated errors
Retry limit reached
I guess that's because of the model and leave the solution to you :)

Resources