Related
I have the following data set:
structure(list(Age = c(83L, 26L, 26L, 20L, 20L, 77L, 32L, 21L,
15L, 75L, 27L, 81L, 81L, 15L, 24L, 16L, 35L, 27L, 30L, 31L, 24L,
24L, 31L, 79L, 30L, 19L, 20L, 42L, 62L, 83L, 79L, 18L, 26L, 66L,
23L, 83L, 77L, 80L, 57L, 42L, 32L, 76L, 85L, 29L, 65L, 79L, 9L,
34L, 20L, 16L, 34L, 22L, 19L, 23L, 25L, 14L, 53L, 28L, 79L, 22L,
22L, 21L, 82L, 81L, 16L, 19L, 77L, 15L, 18L, 15L, 78L, 24L, 16L,
14L, 29L, 18L, 50L, 17L, 43L, 8L, 14L, 85L, 31L, 20L, 30L, 23L,
78L, 29L, 6L, 61L, 14L, 22L, 10L, 83L, 15L, 13L, 15L, 15L, 29L,
8L, 9L, 15L, 8L, 9L, 15L, 9L, 34L, 8L, 9L, 9L, 16L, 8L, 25L,
21L, 23L, 13L, 56L, 10L, 7L, 27L, 8L, 8L, 8L, 8L, 80L, 80L, 6L,
15L, 42L, 25L, 23L, 21L, 8L, 11L, 43L, 69L, 34L, 34L, 14L, 12L,
10L, 22L, 78L, 16L, 76L, 12L, 10L, 16L, 6L, 13L, 66L, 11L, 26L,
12L, 16L, 13L, 24L, 76L, 10L, 65L, 20L, 13L, 25L, 14L, 12L, 15L,
43L, 51L, 27L, 15L, 24L, 34L, 63L, 17L, 15L, 9L, 12L, 17L, 82L,
75L, 24L, 44L, 69L, 11L, 10L, 12L, 10L, 10L, 70L, 54L, 45L, 42L,
84L, 54L, 23L, 23L, 14L, 81L, 17L, 42L, 44L, 16L, 15L, 43L, 45L,
50L, 53L, 23L, 53L, 49L, 13L, 69L, 14L, 65L, 14L, 13L, 22L, 67L,
59L, 52L, 54L, 44L, 78L, 62L, 69L, 10L, 63L, 57L, 22L, 12L, 62L,
9L, 82L, 53L, 54L, 66L, 49L, 63L, 51L, 9L, 45L, 49L, 77L, 49L,
61L, 62L, 57L, 67L, 16L, 65L, 75L, 45L, 16L, 55L, 17L, 64L, 67L,
56L, 52L, 63L, 10L, 62L, 14L, 66L, 68L, 15L, 13L, 43L, 47L, 55L,
69L, 21L, 67L, 34L, 52L, 15L, 31L, 64L, 55L, 13L, 48L, 71L, 64L,
13L, 25L, 34L, 50L, 61L, 70L, 33L, 57L, 51L, 46L, 57L, 69L, 46L,
8L, 11L, 46L, 71L, 33L, 38L, 56L, 17L, 29L, 28L, 6L, 8L), Sex = structure(c(1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 2L, 2L), .Label = c("Male", "Female"), class = "factor"),
mean_AD_scaled = c(3.15891332561581, -0.0551328105526693,
0.582747640515478, 1.94179165777054, 1.7064645993306, 2.37250948563045,
1.015775832203, 1.36189033704266, -1.05640048650493, 0.184814975542474,
-0.143366705302007, 1.81560178585347, 2.06325078470728, -0.473088628698217,
0.414641167726219, 0.199887349084444, -0.60620959209809,
-0.17879228399189, -1.03483709078065, -1.43497010225613,
-0.958595084469815, 1.0203965598582, -1.44731404613503, -1.17191867788498,
-2.02547709312595, -1.22395687266857, -1.09952727795348,
-1.0830246791849, 1.21072653232248, 1.69997357714829, 1.53648783201423,
0.208688735094353, 0.0862394522314924, 1.08662698958276,
-0.731299290763917, 2.29307697689102, -0.660008064083659,
-1.21425334459264, 1.10191939777498, -2.0957781638801, -1.14947514355972,
0.248845058764562, 2.6526135953958, 0.197907037232212, -0.222469162066061,
1.92880961340592, 1.23328008397287, -1.17288683034607, -0.308282675662673,
-1.02603570477074, -1.32647101621898, -1.58316343919798,
-0.0440210607151585, -0.388375288352846, -0.935491446193807,
-0.63789458173376, 0.454577456746182, -1.77391147749773,
0.709267564407921, 0.125735671950958, -0.821073428064989,
-0.126534054558056, 0.519597695894384, 0.188005477971066,
0.212319306823438, -1.45807374053215, 1.5856655763446, -1.25641198358011,
-0.910847565366061, -1.1191763722206, 0.25300371365424, -0.750772357310844,
0.37932560636146, -0.871791414947088, -1.92771569802088,
-1.1752191976387, 0.210449012296334, -0.347778895382139,
-0.132254955464496, 0.953616043508016, -0.0862677135627232,
0.838977990728951, -1.8993092246739, -0.0254281327692267,
0.298022803094927, -1.21559555595915, 0.0134079829994995,
-0.763094297724715, 0.334768589686298, -1.12568939786794,
-2.11786964276497, -0.0434709740895377, 0.388237009696492,
1.30050066962355, -0.260645173884043, -0.60620959209809,
1.05945271027717, -0.275717547426008, -0.0238878902174922,
0.496604074943496, 0.534009965485611, -0.692903244295693,
-0.566933407028871, 0.125625654625835, -0.518305749324122,
1.79381835547894, -0.790708646330802, -0.227860010997131,
0.347420582075538, 0.784189362817269, -0.660118081408782,
1.29962053102256, -0.561652575422924, -0.710395998990384,
-1.29315777017148, -0.457356151205503, -1.01756437073621,
0.146528946399368, -1.07136284272178, -1.42968927065019,
0.798601632408495, -0.799730066990963, -0.431348055546223,
0.569545561500617, 2.32168148142323, 0.472070211440872, 1.65145593676866,
-0.814142336582189, -0.544489872703603, -0.315433801795725,
0.382626126115175, -0.623812364117908, 0.216279930527897,
-0.606099574772967, -0.367207954999011, 0.719829227619811,
-0.749122097433987, 0.934693063586709, -0.79026857703031,
-0.371872689584264, 0.0769979969210905, -0.793899148759394,
1.50414273842782, 0.730280873506577, -0.290569886317732,
0.303743704001367, 0.390877425499463, -1.00359217044547,
-0.534918365417827, 0.325967203676389, 0.129036191704673,
0.34434009697207, -0.141386393449775, -0.363401355549725,
-0.395416397160769, -0.0235578382421178, -1.13583299524436,
1.16781977552417, -1.31890182425046, 0.139377820266317, 0.0160483988024708,
0.481311666751279, -1.05475022662807, 0.839858129329941,
0.652498624644007, -0.350199276534864, -0.262075399110649,
0.178543988010412, -1.13198238886502, -0.05117218684821,
-1.29678834190056, 0.429603523943066, 1.05098137624263, -0.956504755292464,
0.502765045150433, -0.81678275238516, -1.50263075720731,
-0.826684311646306, 2.40100397283753, 2.06633126981075, -0.470558230220369,
0.484942238480364, 0.822035322659877, 0.143888530596397,
0.384056351341786, -0.63580425255641, 0.358422314587926,
-0.372422776209885, 0.0607154328027556, -0.113221958218067,
1.02710761669075, -0.349649189909243, 2.27195365046724, -0.507634068787109,
-0.326105482332738, -1.0396778530861, 1.06484355920824, 1.32151397872221,
-0.185173288849074, -0.651888785489516, -0.171311105883464,
-0.104200537557911, -0.693673365571561, -1.26609350819101,
0.411230630647381, -0.929770545287362, -0.481009876107135,
0.386146680519137, 0.0482834750637615, -0.198265350538812,
0.790020281048832, 0.926001694901924, -1.08918564939184,
0.50298507980068, -0.0694350628187722, 1.04966116834114,
0.00878725534429612, 1.48742010500899, 0.750194009353997,
0.423772605711498, -0.596418050162068, -0.652636903300361,
-0.308942779613417, 0.314437388003408, 0.679562886624478,
-1.24312189070515, -0.432712270377761, 0.00427654501421597,
-0.197935298563442, 0.228821905592019, 1.06957430418856,
-1.61612462980509, 1.9499329398297, -0.263285589687014, 0.156430505660519,
-0.322254875953402, -0.451085163673446, -0.35526007349056,
0.10780284795577, 0.408700232169533, -0.957604928543701,
-1.05662052115517, 1.00345389178912, -0.238751726184391,
0.300003114947154, -0.397946795638617, -0.0802167606809086,
0.943714484246865, 1.10973062785877, 1.76279346979401, 1.62087112038423,
0.25533608094687, 0.226841593739787, 0.869672824438507, -1.44960240649761,
-0.450315042397579, -0.199629565370345, 0.29813282042005,
0.760425620590513, 1.87391096816911, -0.454275666102039,
-0.0559029318285365, -0.343048150401812, -1.01371376435687,
0.68880434193488, -0.29222014619459, 1.16132875334186, -1.95715633422403,
-0.534368278792206, -0.560112332871189, 1.84508642898666,
-1.19150176175703, -0.772203732244971, -0.3443683583033,
-1.45684154649076, -0.633823940704178, -1.77454957798344,
0.279539892474118, -0.875532004001301, 1.26001429397797,
-0.536590628759707, 2.1869102581465, 0.211109116247078, 0.130246382281038,
-0.355810160116181, -0.898085555651692, -0.429741802599415,
1.13360438741065, 1.61338994227581, 0.588688576072169, 0.454137387445685,
0.747113524250528, 0.460848444278238, -0.38177424884541,
-0.169990897981981, -0.747361820232001, -0.760123829946369,
0.208028631143609, -1.28748087619509, 2.33950428809329, -0.973029357526068,
-1.06091119683501, 0.917530360867389, -0.35041931118511,
-1.90613029883158, -1.15057531681095, 0.65348878057012, 0.43147381847017
)), row.names = c(NA, -308L), class = c("tbl_df", "tbl",
"data.frame"))
I am using this gam model:
m1 <- gam(mean_AD_scaled ~ s(Age, bs = 'ad', k = -1) + Sex + ti(Age, by = Sex, bs ='fs'),
data = DF,
method = 'REML',
family = gaussian)
Output:
Family: gaussian
Link function: identity
Formula:
mean_AD_scaled ~ s(Age, bs = "ad", k = -1) + Sex + ti(Age,
by = Sex, bs = "fs")
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.04691 0.06976 0.672 0.502
SexFemale -0.12950 0.09428 -1.374 0.171
Approximate significance of smooth terms:
edf Ref.df F p-value
s(Age) 2.980 3.959 8.72 2.24e-06 ***
ti(Age):SexMale 2.391 2.873 23.47 < 2e-16 ***
ti(Age):SexFemale 1.000 1.000 43.40 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Rank: 48/49
R-sq.(adj) = 0.34 Deviance explained = 35.6%
-REML = 375.4 Scale est. = 0.63867 n = 308
But when I use gtsummary, I get a repeated value for each gender 'interaction':
tbl_regression(m1, tidy_fun = tidy_gam)
I see the following in a publication, which I am trying to replicate with gender and age:
I am not sure how to fix this. My goal is to print a table for a manuscript so any other gam-related information that can be added like edf and R^2.
I think you've found a bug in the handling of these types of interactions. While we work on a fix to the bug, this code should get you what you need. Thanks
library(gtsummary)
#> #BlackLivesMatter
library(mgcv)
packageVersion("gtsummary")
#> [1] ‘1.5.2’
m1 <- gam(marker ~ s(age, bs = 'ad', k = -1) + grade + ti(age, by = grade, bs ='fs'),
data = gtsummary::trial,
method = 'REML',
family = gaussian)
tbl_regression(m1, tidy_fun = gtsummary::tidy_gam) %>%
modify_table_body(
~ .x %>%
dplyr::select(-n_obs) %>%
dplyr::distinct()
) %>%
as_kable() # convert to kable to display on SO
Characteristic
Beta
95% CI
p-value
Grade
I
—
—
II
-0.39
-0.70, -0.08
0.014
III
-0.13
-0.43, 0.18
0.4
s(age)
>0.9
ti(age):gradeI
0.6
ti(age):gradeII
>0.9
ti(age):gradeIII
0.6
Created on 2022-02-21 by the reprex package (v2.0.1)
I am trying to run lme4 package in R. I have 10 Lines in total with four plants for each line in each of the two replications. But some of the plants died and there are some missing values. Weight is the response variable. Here are some lines from the data:
Line Rep Weight PLANT
Line 1 1 NA 1
Line 1 1 NA 2
Line 1 1 NA 3
Line 1 1 NA 4
Line 2 1 26 1
Line 2 1 26 2
Line 2 1 26 3
Line 2 1 27 4
Line 1 2 26 1
Line 1 2 28 2
Line 1 2 26 3
Line 1 2 25 4
Line 2 2 24 1
Line 2 2 26 2
Line 2 2 25 3
Line 2 2 NA 4
I want to run linear mixed model using lme4 package so I tried running:
lme4 <- lmer(Weight ~ 1 + (1|Rep:Plant), data=Data)
But I got an error:
boundary (singular) fit: see ?isSingular
> dput(Data)
structure(list(Line = c("Line 1", "Line 1", "Line 1", "Line 1",
"Line 2", "Line 2", "Line 2", "Line 2", "Line 1", "Line 1", "Line 1",
"Line 1", "Line 2", "Line 2", "Line 2", "Line 2"), Rep = c(1,
1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2), Weight = c(NA,
NA, NA, NA, 26, 26, 26, 27, 26, 28, 26, 25, 24, 26, 25, NA),
PLANT = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4)), row.names = c(NA,
-16L), class = c("tbl_df", "tbl", "data.frame"))
I am using it for the first time and I am not sure about the error. I will appreciate any help!
Your model did fit, but it generated that warning because your random effects are very small. You can read more about this in this post or the help page
Let us look at your data:
ggplot(Data,aes(x=PLANT,y=Weight,col=Rep)) + geom_jitter() + geom_boxplot(alpha=0.2) + facet_wrap(~Rep)
The effects of PLANT and in combination with Rep is extremely small. Let's look at the fitted model:
fit = lmer(Weight ~ 1 + (1|PLANT:Rep),data=Data)
boundary (singular) fit: see ?isSingular
ranef(fit)
$`PLANT:Rep`
(Intercept)
1:1 0
1:2 0
2:1 0
2:2 0
3:1 0
3:2 0
4:1 0
4:2 0
This is exactly what happened. So we can try to account for some other effects and we still see very small coefficients:
fit = lmer(Weight ~ Line + (1|Rep:PLANT),data=Data)
ranef(fit)
$`Rep:PLANT`
(Intercept)
1:1 1.397563e-19
1:2 2.811371e-19
1:3 8.112169e-20
1:4 1.813251e-19
2:1 -1.725964e-19
2:2 -2.463986e-20
2:3 -2.027357e-19
2:4 -2.833681e-19
The takehome message is, there's no really systematic effect coming from PLANT, so you don't need to specify a highly complicated model, do something like:
fit = lmer(Weight ~ Line + (1|Rep),data=Data)
The data in case anyone is interested:
Data = structure(list(Line = structure(c(1L, 1L, 1L, 1L, 12L, 12L, 12L,
12L, 23L, 23L, 23L, 23L, 34L, 34L, 34L, 34L, 45L, 45L, 45L, 45L,
56L, 56L, 56L, 56L, 65L, 65L, 65L, 65L, 66L, 66L, 66L, 66L, 67L,
67L, 67L, 67L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L,
9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 13L,
13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 16L,
16L, 16L, 8L, 8L, 8L, 8L, 66L, 66L, 66L, 66L, 17L, 17L, 17L,
17L, 18L, 18L, 18L, 18L, 9L, 9L, 9L, 9L, 19L, 19L, 19L, 19L,
20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 24L,
24L, 24L, 24L, 25L, 25L, 25L, 25L, 2L, 2L, 2L, 2L, 26L, 26L,
26L, 26L, 27L, 27L, 27L, 27L, 10L, 10L, 10L, 10L, 28L, 28L, 28L,
28L, 29L, 29L, 29L, 29L, 30L, 30L, 30L, 30L, 31L, 31L, 31L, 31L,
67L, 67L, 67L, 67L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 33L,
33L, 33L, 33L, 35L, 35L, 35L, 35L, 36L, 36L, 36L, 36L, 37L, 37L,
37L, 37L, 38L, 38L, 38L, 38L, 39L, 39L, 39L, 39L, 40L, 40L, 40L,
40L, 25L, 25L, 25L, 25L, 19L, 19L, 19L, 19L, 24L, 24L, 24L, 24L,
41L, 41L, 41L, 41L, 42L, 42L, 42L, 42L, 30L, 30L, 30L, 30L, 43L,
43L, 43L, 43L, 44L, 44L, 44L, 44L, 22L, 22L, 22L, 22L, 46L, 46L,
46L, 46L, 47L, 47L, 47L, 47L, 17L, 17L, 17L, 17L, 48L, 48L, 48L,
48L, 49L, 49L, 49L, 49L, 27L, 27L, 27L, 27L, 23L, 23L, 23L, 23L,
50L, 50L, 50L, 50L, 51L, 51L, 51L, 51L, 52L, 52L, 52L, 52L, 41L,
41L, 41L, 41L, 7L, 7L, 7L, 7L, 46L, 46L, 46L, 46L, 11L, 11L,
11L, 11L, 33L, 33L, 33L, 33L, 53L, 53L, 53L, 53L, 54L, 54L, 54L,
54L, 13L, 13L, 13L, 13L, 38L, 38L, 38L, 38L, 4L, 4L, 4L, 4L,
37L, 37L, 37L, 37L, 55L, 55L, 55L, 55L, 57L, 57L, 57L, 57L, 44L,
44L, 44L, 44L, 58L, 58L, 58L, 58L, 59L, 59L, 59L, 59L, 12L, 12L,
12L, 12L, 47L, 47L, 47L, 47L, 48L, 48L, 48L, 48L, 60L, 60L, 60L,
60L, 21L, 21L, 21L, 21L, 18L, 18L, 18L, 18L, 28L, 28L, 28L, 28L,
26L, 26L, 26L, 26L, 61L, 61L, 61L, 61L, 31L, 31L, 31L, 31L, 59L,
59L, 59L, 59L, 52L, 52L, 52L, 52L, 29L, 29L, 29L, 29L, 62L, 62L,
62L, 62L, 63L, 63L, 63L, 63L, 54L, 54L, 54L, 54L, 55L, 55L, 55L,
55L, 53L, 53L, 53L, 53L, 51L, 51L, 51L, 51L, 50L, 50L, 50L, 50L,
64L, 64L, 64L, 64L, 20L, 20L, 20L, 20L, 58L, 58L, 58L, 58L, 16L,
16L, 16L, 16L, 57L, 57L, 57L, 57L, 14L, 14L, 14L, 14L, 63L, 63L,
63L, 63L, 64L, 64L, 64L, 64L, 61L, 61L, 61L, 61L, 36L, 36L, 36L,
36L, 40L, 40L, 40L, 40L, 6L, 6L, 6L, 6L, 39L, 39L, 39L, 39L,
45L, 45L, 45L, 45L, 15L, 15L, 15L, 15L, 1L, 1L, 1L, 1L, 42L,
42L, 42L, 42L, 43L, 43L, 43L, 43L, 65L, 65L, 65L, 65L, 49L, 49L,
49L, 49L, 56L, 56L, 56L, 56L, 3L, 3L, 3L, 3L, 62L, 62L, 62L,
62L, 35L, 35L, 35L, 35L, 5L, 5L, 5L, 5L, 60L, 60L, 60L, 60L,
34L, 34L, 34L, 34L), .Label = c("Line1", "Line10", "Line11",
"Line12", "Line13", "Line14", "Line15", "Line16", "Line17", "Line18",
"Line19", "Line2", "Line20", "Line21", "Line22", "Line23", "Line24",
"Line25", "Line26", "Line27", "Line28", "Line29", "Line3", "Line30",
"Line31", "Line32", "Line33", "Line34", "Line35", "Line36", "Line37",
"Line38", "Line39", "Line4", "Line40", "Line41", "Line42", "Line43",
"Line44", "Line45", "Line46", "Line47", "Line48", "Line49", "Line5",
"Line50", "Line51", "Line52", "Line53", "Line54", "Line55", "Line56",
"Line57", "Line58", "Line59", "Line6", "Line60", "Line61", "Line62",
"Line63", "Line64", "Line65", "Line66", "Line67", "Line7", "Line8",
"Line9"), class = "factor"), Rep = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("1", "2"), class = "factor"), Weight = c(NA,
NA, NA, NA, 26L, 26L, 26L, 27L, NA, NA, NA, NA, 26L, 28L, 26L,
25L, 22L, 17L, 20L, 20L, 28L, 20L, 27L, 26L, 22L, 25L, 21L, 25L,
18L, 18L, 19L, 18L, 24L, 28L, 23L, 30L, 29L, 25L, 26L, 27L, NA,
NA, NA, NA, 29L, 30L, 29L, 30L, NA, NA, NA, NA, 33L, NA, NA,
NA, 21L, 23L, 18L, 23L, 32L, 29L, 30L, 30L, 18L, 19L, 21L, 21L,
25L, 25L, 25L, 26L, 26L, 27L, NA, NA, 29L, 29L, 27L, 29L, 26L,
NA, NA, NA, 26L, 20L, 23L, 27L, NA, NA, NA, NA, 32L, 32L, 30L,
30L, 20L, 20L, 20L, 19L, 22L, 21L, 22L, 22L, 24L, 23L, 23L, 25L,
20L, 25L, NA, NA, 27L, 26L, NA, NA, NA, NA, NA, NA, 30L, 28L,
NA, NA, 25L, 26L, 27L, 26L, NA, NA, NA, NA, 20L, 19L, NA, NA,
19L, 27L, 26L, 29L, 26L, 29L, 31L, 29L, 25L, 25L, 24L, 25L, 26L,
25L, 26L, 26L, 25L, 24L, 24L, 28L, 22L, 26L, 24L, 28L, 29L, 30L,
26L, NA, NA, NA, NA, NA, 26L, 24L, 24L, 24L, NA, NA, NA, NA,
NA, NA, NA, NA, 30L, 30L, 30L, 31L, 24L, 25L, 28L, 22L, 28L,
31L, 30L, NA, 31L, 30L, 29L, 25L, 25L, 22L, 24L, 20L, 30L, 30L,
30L, 29L, 26L, 32L, 28L, 29L, 20L, 15L, 15L, 11L, 25L, 24L, 24L,
24L, 26L, 29L, 31L, 30L, 24L, 28L, 20L, 22L, 29L, 26L, 26L, 28L,
27L, 27L, 27L, 26L, 21L, 22L, 21L, NA, 28L, 29L, 24L, 24L, 28L,
29L, 28L, 27L, 28L, 29L, 27L, 29L, NA, NA, NA, NA, 22L, 26L,
21L, 21L, 26L, 30L, 28L, 30L, 27L, 26L, 28L, 26L, 25L, 25L, 26L,
26L, 27L, 26L, 23L, 29L, NA, NA, NA, NA, 27L, 23L, 29L, 23L,
28L, 29L, 28L, 26L, 20L, NA, NA, NA, 28L, 23L, 26L, 21L, 28L,
26L, 26L, 29L, 20L, 27L, 20L, 26L, 29L, 26L, 28L, 28L, 30L, 27L,
NA, NA, 26L, 21L, 26L, 25L, 27L, 26L, 27L, 24L, 25L, 20L, 21L,
20L, 25L, 25L, 31L, 24L, 29L, 28L, 31L, 27L, 25L, 28L, 26L, 26L,
NA, NA, NA, NA, 24L, 25L, 23L, 27L, 20L, 26L, 25L, 25L, 29L,
28L, 29L, 29L, 26L, 27L, 25L, 28L, NA, NA, NA, NA, 26L, 28L,
NA, NA, 21L, 20L, 31L, 25L, 31L, 28L, 30L, 29L, 23L, 25L, 24L,
28L, 25L, 22L, 25L, 25L, 28L, 29L, 28L, 29L, 26L, 24L, 25L, 26L,
29L, 27L, NA, NA, 26L, 29L, 29L, 30L, 25L, 24L, 25L, 24L, 28L,
25L, 29L, 28L, 24L, 24L, 24L, 24L, 28L, 30L, 27L, 27L, 26L, 25L,
25L, 25L, 25L, 25L, 28L, 25L, 25L, 30L, 28L, 25L, 22L, 24L, 25L,
24L, NA, NA, NA, NA, 5L, 7L, 4L, 5L, 21L, 20L, 22L, 24L, 25L,
27L, 25L, 28L, 32L, 31L, NA, NA, 19L, 26L, 20L, NA, 26L, 26L,
30L, 25L, 28L, 31L, 30L, 26L, 5L, 8L, 4L, 8L, 25L, 25L, 28L,
25L, 28L, 28L, 27L, 26L, 30L, 27L, 27L, 24L, 32L, 29L, 31L, 25L,
30L, 30L, 27L, 28L, 16L, 20L, 16L, 21L, 25L, 22L, 25L, 20L, 24L,
25L, 18L, 25L, 25L, 26L, 29L, 29L, 21L, 20L, 22L, 21L, 19L, 22L,
19L, 21L, 28L, 25L, 26L, 24L, 28L, 26L, 24L, 25L, NA, NA, NA,
NA, 25L, NA, NA, NA, 23L, 21L, 19L, 23L, 25L, 24L, 25L, NA, 22L,
30L, 29L, 26L, 25L, 25L, 24L, 24L), PLANT = structure(c(1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"),
X = structure(c(4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L,
6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L,
5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L,
2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L,
4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L,
6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L,
5L, 6L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L,
7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L,
3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L,
1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L,
8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L,
7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L,
4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L,
6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L,
5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L,
2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L,
4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L,
6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L,
5L, 6L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L,
7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L,
3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L,
1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L,
8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L,
7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L,
3L, 7L, 8L, 1L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L,
6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L,
5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L,
2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L,
4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L,
6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L,
5L, 6L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L,
7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L,
3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L,
1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L,
8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L,
7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L
), .Label = c("24", "12", "21", "11", "13", "14", "22", "23"
), class = "factor")), row.names = c(NA, -536L), class = "data.frame")
How can you use rbind in a for loop that runs through a list of dataframes? I tried to follow Looping through list of data frames in R but receive the following:
Error in apply(dataFramesList, 2, function(x) { :
dim(X) must have a positive length
I have two dataframes, dfTraining and dfAccuracy (code to reproduce dataframes is below), and need to add a row for any of the crop types missing from either of two columns, CROP or CROP_LABEL. I believe my problem is in my last line of code.
My code block is:
dataFramesList <- list(dfTraining, dfAccuracy)
apply(dataFramesList, 2, function(x){
cropNumbers <- seq(1,23, by = 1)
cropNumbers <- cropNumbers[-c(3)]
cropNumbers <- append(cropNumbers, 34)
listofCROPandCROP_LABELColumns <- list(dataFrameList$CROP, dataFrameList$CROP_LABEL)
missingCROP <- NULL
for (i in listofCROPandCROP_LABELColumns){
for (j in cropNumbers){
if (!j %in% i){
# If crop number is missing from CROP_LABEL, add missingCROP observation (row)
# Make row for missing crop type
missingCrop <- list(FREQUENCY = 0, AA = 1, CROP = j, CROP_LABEL = j, ACRES = 0)
dataFrameList <- rbind(dataFrameList, missingCrop)
}
}
}
})
My dfAccuracy dataframe:
structure(list(FREQUENCY = c(4L, 2L, 1L, 1L, 1L, 1L, 65L, 1L,
1L, 4L, 1L, 5L, 5L, 2L, 4L, 1L, 1L, 1L, 1L, 4L, 9L, 2L, 1L, 1L,
1L, 2L, 4L, 1L, 2L, 18L, 1L, 10L, 3L, 1L, 7L, 1L, 1L, 1L, 3L,
1L, 7L, 1L), AA = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
CROP = c(1L, 4L, 12L, 13L, 14L, 18L, 1L, 1L, 1L, 1L, 1L,
4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 13L,
13L, 13L, 13L, 14L, 14L, 14L, 18L, 18L, 18L, 18L, 18L, 19L,
19L, 21L, 21L, 21L, 21L), CROP_LABEL = c(1L, 4L, 14L, 13L,
12L, 18L, 1L, 4L, 5L, 6L, 18L, 1L, 4L, 6L, 14L, 18L, 12L,
14L, 18L, 1L, 6L, 14L, 18L, 18L, 4L, 6L, 13L, 21L, 12L, 14L,
18L, 1L, 6L, 14L, 18L, 21L, 1L, 19L, 6L, 13L, 21L, 34L),
ACRES = c(331.737184484, 193.772138572, 26.48543619, 73.2696289437,
112.470306056, 66.6556450342, 3905.71121736, 24.9581079934,
39.9287379709, 259.662359273, 85.2786247851, 306.051491303,
368.342995232, 154.82030835, 265.754349805, 70.3722566979,
35.4066607701, 139.336463432, 58.4307705147, 251.070357093,
471.031628349, 150.965736858, 28.2780117926, 35.3426930108,
34.5730542194, 67.7383953308, 144.442123948, 33.2746560126,
69.4072817311, 1219.65459596, 92.4840910734, 582.983473317,
191.957841327, 35.708775262, 319.638682538, 60.6889287642,
82.6244195055, 36.2898952104, 267.422844756, 72.8352758659,
489.746546145, 65.5392893502)), row.names = c(25L, 26L, 27L,
29L, 30L, 31L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L,
70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L,
83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L
), class = "data.frame")
and my dfTraining dataframe is:
structure(list(FREQUENCY = c(7L, 1L, 1L, 4L, 2L, 6L, 1L, 107L,
1L, 21L, 1L, 1L, 1L, 2L, 1L, 19L, 3L, 1L, 1L, 12L, 1L, 2L, 32L,
2L, 2L, 29L, 2L, 18L, 1L), AA = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), CROP = c(1L, 1L, 4L, 4L, 12L, 13L, 21L,
1L, 1L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 7L, 12L, 13L, 14L, 14L,
14L, 18L, 18L, 18L, 19L, 21L, 34L), CROP_LABEL = c(1L, 4L, 1L,
4L, 12L, 13L, 21L, 1L, 6L, 4L, 6L, 1L, 5L, 14L, 18L, 6L, 14L,
1L, 12L, 13L, 1L, 6L, 14L, 6L, 14L, 18L, 19L, 21L, 34L), ACRES = c(624.940370218,
26.9188766351, 37.8773839813, 291.79294767, 140.949264214, 391.571023675,
44.5217011939, 6806.02216989, 72.7500299887, 1676.12121152, 14.8739557721,
67.0700291739, 59.7438207953, 82.6713019474, 75.62666152, 1370.78710769,
145.215281276, 41.7380537313, 66.5236760194, 679.91208779, 70.9661875374,
38.8514254734, 1749.63365551, 109.917242057, 79.7758083723, 1660.85759895,
96.8771921798, 1428.71888481, 69.473161379)), row.names = c(18L,
19L, 20L, 21L, 22L, 23L, 24L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L,
58L, 59L), class = "data.frame")
Hi I have animal abundance data collected from quadrats with 4 quadrats per station.
In the field, some quadrats were missed.
ex
St/ Q /Anim1 abundance /Anim 2 abundance/....etc
1 /1 /
1 /2 /
1 /3 /
1 /4 /
2 /1 /
2 /2 /
2 /4 /
3 /1 /
3 /2 /
3 /3 /
3 /4 /
Station 2 is missing quadrat 3. I would like to remove all rows (including animal abundance data) associated with station 2 from further analysis. I would like to do this in a function as I have multiple large csv files I need to clean up.
I tried subset and for loops but struggling with both
Thank you for your time
******update I'm working with this qc_Large29 <- Large29[Large29[, 5]>=4,]
which gives me all the 4th quadrats from each station. Is there a way to add a length() to it so that the new dataframe will only be the data associated with stations that have 4 quadrats?
**********update
dput(Large29[1:30,1:5])
structure(list(FID = 652:681, areaContro = c(29L, 29L, 29L, 29L,
29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L,
29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L
), areaShortN = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "CAIIN", class = "factor"), station = c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L), quadrat = c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L)), .Names = c("FID",
"areaContro", "areaShortN", "station", "quadrat"), row.names = c(NA,
30L), class = "data.frame")
>
This selects everything but the "2" stations:
Large29[Large29$station!=2,]
for you second question (the edit), I would suggest to use dplyr, where you can group by stations:
library(dplyr)
Large29 %>% group_by(station) %>% filter(n()>=4) %>% as.data.frame()
I have a dataframe df
df<-structure(list(subject = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L,
36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L,
49L, 50L, 51L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L), sex = c(1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L), age = c(29L, 54L, 67L,
36L, 48L, 37L, 25L, 46L, 37L, 33L, 25L, 26L, 28L, 59L, 46L, 50L,
55L, 56L, 37L, 30L, 38L, 30L, 50L, 39L, 29L, 46L, 48L, 46L, 55L,
32L, 66L, 35L, 48L, 54L, 38L, 31L, 42L, 36L, 27L, 63L, 45L, 31L,
26L, 38L, 43L, 52L, 36L, 43L, 65L, 46L, 42L, 29L, 54L, 67L, 36L,
48L, 37L, 25L, 46L, 37L, 33L, 25L, 26L, 28L, 59L, 46L, 50L, 55L,
56L, 37L, 30L, 38L, 30L, 50L, 39L, 29L, 46L, 48L, 46L, 55L, 32L,
66L, 35L, 48L, 54L, 38L, 31L, 42L, 36L, 27L, 63L, 45L, 31L, 26L,
38L, 43L, 52L, 36L, 43L, 65L, 46L, 42L), edu = c(4L, 3L, 3L,
3L, 4L, 2L, 3L, 3L, 1L, 3L, 4L, 4L, 5L, 1L, 1L, 2L, 2L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 2L, 1L, 2L, 2L, 4L, 2L, 4L, 4L, 3L, 3L,
4L, 5L, 3L, 3L, 4L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 6L, 1L, 3L,
4L, 3L, 3L, 3L, 4L, 2L, 3L, 3L, 1L, 3L, 4L, 4L, 5L, 1L, 1L, 2L,
2L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 1L, 2L, 2L, 4L, 2L, 4L,
4L, 3L, 3L, 4L, 5L, 3L, 3L, 4L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 1L,
6L, 1L, 3L), biz_exp = c(5L, 15L, 3L, 4L, 10L, 6L, 0L, 5L, 8L,
5L, 0L, 8L, 3L, 23L, 5L, 7L, 5L, 11L, 4L, 4L, 11L, 3L, 15L, 4L,
4L, 6L, 6L, 5L, 13L, 2L, 13L, 6L, 8L, 27L, 7L, 3L, 11L, 5L, 1L,
4L, 8L, 8L, 4L, 15L, 18L, 30L, 9L, 14L, 18L, 21L, 16L, 5L, 15L,
3L, 4L, 10L, 6L, 0L, 5L, 8L, 5L, 0L, 8L, 3L, 23L, 5L, 7L, 5L,
11L, 4L, 4L, 11L, 3L, 15L, 4L, 4L, 6L, 6L, 5L, 13L, 2L, 13L,
6L, 8L, 27L, 7L, 3L, 11L, 5L, 1L, 4L, 8L, 8L, 4L, 15L, 18L, 30L,
9L, 14L, 18L, 21L, 16L), turnov = c(36L, NA, 12L, 9L, 48L, 9L,
8L, 24L, 4L, 250L, NA, 600L, 6L, 6L, 10L, 10L, 5L, 4L, 250L,
200L, 50L, 150L, 48L, NA, 9L, 6L, 2L, NA, NA, 3L, 7L, 23L, 75L,
7L, 5L, NA, 20L, 450L, 5L, 32L, 21L, 12L, 6L, 4L, 24L, 7L, 10L,
12L, 12L, 14L, 18L, 36L, NA, 12L, 9L, 48L, 9L, 8L, 24L, 4L, 250L,
NA, 600L, 6L, 6L, 10L, 10L, 5L, 4L, 250L, 200L, 50L, 150L, 48L,
NA, 9L, 6L, 2L, NA, NA, 3L, 7L, 23L, 75L, 7L, 5L, NA, 20L, 450L,
5L, 32L, 21L, 12L, 6L, 4L, 24L, 7L, 10L, 12L, 12L, 14L, 18L),
loc_pr = c(1L, 1L, 1L, 6L, 1L, 6L, 4L, 1L, 8L, 5L, 1L, 3L,
1L, 1L, 1L, 1L, 5L, 8L, 2L, 1L, 1L, 1L, 1L, 2L, 8L, 2L, 4L,
4L, 2L, 2L, 2L, 1L, 4L, 5L, 4L, 4L, 4L, 4L, NA, 4L, 5L, 5L,
5L, 8L, 1L, 2L, 4L, 3L, 3L, 4L, 3L, 1L, 1L, 1L, 6L, 1L, 6L,
4L, 1L, 8L, 5L, 1L, 3L, 1L, 1L, 1L, 1L, 5L, 8L, 2L, 1L, 1L,
1L, 1L, 2L, 8L, 2L, 4L, 4L, 2L, 2L, 2L, 1L, 4L, 5L, 4L, 4L,
4L, 4L, NA, 4L, 5L, 5L, 5L, 8L, 1L, 2L, 4L, 3L, 3L, 4L, 3L
), type = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 4L, 2L, 1L, 1L, 2L, 4L, 1L, 2L, 1L,
1L, 4L, 1L, 3L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 4L, 2L, 1L,
1L, 2L, 4L, 1L, 2L, 1L, 1L, 4L, 1L, 3L, 1L, 1L, 1L, 3L, 2L
), age_rec = c(2L, 4L, 4L, 100L, 4L, 100L, 100L, 4L, 100L,
2L, 1L, 2L, 2L, 4L, 4L, 4L, 4L, 100L, 3L, 2L, 3L, 2L, 4L,
3L, 100L, 27L, 100L, 100L, 4L, 2L, 100L, 2L, 4L, 30L, 3L,
2L, 59L, 8L, 100L, 27L, 3L, 59L, 2L, 59L, 3L, 59L, 3L, 3L,
4L, 64L, 3L, 2L, 4L, 4L, 100L, 4L, 100L, 100L, 4L, 100L,
2L, 1L, 2L, 2L, 4L, 4L, 4L, 4L, 100L, 3L, 2L, 3L, 2L, 4L,
3L, 100L, 27L, 100L, 100L, 4L, 2L, 100L, 2L, 4L, 30L, 3L,
2L, 59L, 8L, 100L, 27L, 3L, 59L, 2L, 59L, 3L, 59L, 3L, 3L,
4L, 64L, 3L), biz_exp_rec = c(2L, 4L, 2L, 3L, 3L, 3L, 1L,
2L, 3L, 2L, 1L, 3L, 2L, 4L, 2L, 3L, 2L, 4L, 2L, 2L, 4L, 2L,
4L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 4L, 3L, 3L, 4L, 3L, 2L, 3L,
3L, 2L, 4L, 3L, 2L, 2L, 3L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 2L,
4L, 2L, 3L, 3L, 3L, 1L, 2L, 3L, 2L, 1L, 3L, 2L, 4L, 2L, 3L,
2L, 4L, 2L, 2L, 4L, 2L, 4L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 4L,
3L, 3L, 4L, 3L, 2L, 3L, 3L, 2L, 4L, 3L, 2L, 2L, 3L, 4L, 4L,
3L, 4L, 4L, 4L, 4L), turnov_rec = structure(c(3L, NA, 3L,
2L, 3L, 3L, 1L, 3L, 3L, 4L, NA, 4L, 2L, 2L, 2L, 2L, 2L, 4L,
4L, 4L, 3L, 4L, 3L, 5L, 2L, 3L, 3L, 2L, NA, 2L, 4L, 3L, 4L,
4L, 2L, NA, 4L, 2L, 1L, 2L, 3L, 3L, 2L, 4L, 3L, 4L, 2L, 3L,
3L, 4L, 3L, 3L, NA, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 4L, NA, 4L,
2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 3L, 4L, 3L, NA, 2L, 3L, 3L,
2L, NA, 2L, 4L, 3L, 4L, 4L, 2L, NA, 4L, 2L, 1L, 2L, 3L, 3L,
2L, 4L, 3L, 4L, 2L, 3L, 3L, 4L, 3L), .Label = c("1", "2",
"3", "4", "MA"), class = "factor"), bundle = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), investment = c(86L,
100L, 100L, 75L, 100L, 59L, 68L, 86L, 80L, 100L, 86L, 100L,
100L, 100L, 100L, 100L, 100L, 93L, 64L, 100L, 24L, 18L, 89L,
75L, 80L, 29L, 54L, 65L, 100L, 27L, 59L, 30L, 59L, 43L, 59L,
59L, 5L, 26L, 100L, 75L, 59L, 5L, 59L, 74L, 59L, 79L, 75L,
75L, 86L, 66L, 86L, 55L, 100L, 68L, 1L, 75L, 1L, 1L, 79L,
1L, 54L, 48L, 33L, 55L, 90L, 85L, 39L, 70L, 1L, 45L, 54L,
33L, 3L, 44L, 75L, 1L, 1L, 1L, 1L, 96L, 26L, 1L, 23L, 66L,
1L, 89L, 83L, 52L, 61L, 1L, 88L, 45L, 72L, 60L, 1L, 60L,
2L, 86L, 10L, 63L, 1L, 88L)), .Names = c("subject", "sex",
"age", "edu", "biz_exp", "turnov", "loc_pr", "type", "age_rec",
"biz_exp_rec", "turnov_rec", "bundle", "investment"), class = "data.frame", row.names = c(NA,
-102L))
In this dataframe investment is my dependent variable and the other variables are my independent variables. My subjects are crossed within type of bundle. First of all, I would like know whether my subjects do bundle or not (bundle= 1 means that people bundle and bundle=0 means that people do not bundle), it will have an effect on the investment.
I have done this mixed effect linear model but I am not sure if this is correct as my p-value are equal to zero.
library(nlme)
model <- lme(investment~bundle, random = ~1|subject/bundle, data=df)
I have also tried to make an anova with repeated measures as such:
aov(investment~bundle+ Error(subject/bundle), data=df)
It works but not sure if the model formula is right
Anyone could help me with that?