Related
I need to add a column, column name "type", and for every 16 rows, change the row name to "type1, type 2 etc".
I tried book1$ID %/% 16 but not quite right.
This is the original data:
book1 <- structure(list(ID = 1:34, per_section = c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L,
2L)), class = "data.frame", row.names = c(NA, -34L))
This is my desired output:
output <- structure(list(ID = 1:34, Type = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L), per_section = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 1L, 2L)), class = "data.frame", row.names = c(NA,
-34L))
With %/% we may need to adjust by subtracting 1 and adding 1
book1$Type <- with(book1, (ID-1) %/% 16 + 1)
Or maybe more easier with gl
library(dplyr)
book1 <- book1 %>%
mutate(Type = as.integer(gl(n(), 16, n())), .after = 1)
Try this
book1$type <-ifelse(1:nrow(book1) %% 16 != 0 ,
1:nrow(book1) %% 16 , 16)
I have two countries with different starting and endings years. I have the mean earnings of different social classes. I would like to have the earnings of the first starting year = 100 as an index for each social class. Then i would like to see how they progressed from 100 in subsequent years. This in general should be easy to do, but it seems that since my countries have different numbers of observations, it is not working.
Here is the code that i have tried, but i only got missing values:
df=df %>%
group_by(cntry, year,class_m) %>%
mutate(base_year = (mean[first(year)]/mean)*100)
Here is the data:
df= structure(list(cntry = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("at", "be"), class = "factor"),
year = structure(c(4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L,
13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L,
16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L,
19L, 19L, 19L, 20L, 20L, 20L, 20L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L,
10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L,
13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L,
16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L
), .Label = c("1995", "1997", "2000", "2003", "2004", "2005",
"2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013",
"2014", "2015", "2016", "2017", "2018", "2019"), class = "factor"),
class_m = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("Low-skilled working class",
"Skilled working class", "Middle class", "Upper class"), class = "factor"),
mean = c(21667.3165297756, 31141.2479100646, 38694.5317839067,
48897.5586114381, 21893.6782367936, 29866.0796003899, 36846.1057208349,
46115.8225807015, 19914.101136956, 30201.1848571751, 36688.5006276306,
44334.4349912073, 20505.9102212244, 30071.1070352498, 37093.4347815202,
44630.7476325564, 20265.9465807599, 29827.9369893851, 40549.4855257344,
48107.2865241041, 22378.7756708627, 31334.7756747725, 39981.9785570756,
50347.8600052063, 23101.010596959, 31412.9240693068, 40458.6454333296,
51740.898756006, 19805.2965921531, 30817.6682795387, 41165.6041754244,
52782.5026014194, 19078.5626059941, 30499.5262897878, 41177.4423103889,
51240.6014436097, 20393.1169949116, 29796.8273849528, 39234.5103600113,
50494.5284121857, 20786.560760249, 31306.6058474771, 40854.36428628,
50339.5860855376, 20033.5844477617, 30424.7651611075, 39659.447696875,
49191.4195426966, 18851.261369003, 30412.4669765863, 41857.2930659497,
51097.4975692186, 22333.7894908968, 30863.010648668, 41852.0093099513,
54112.6228115753, 21709.19921875, 30039.5068801246, 41097.4541047158,
49862.44140625, 20113.5586718618, 30733.8952367545, 41658.1716627373,
51754.4018503782, 21818.4311173551, 33225.8409123812, 43882.2512500977,
51037.5228976151, 15858.5028150308, 18782.8272745439, 22871.4020551682,
26288.6154497508, 26599.1650213236, 31720.3186300543, 41940.5016413888,
51187.0060567118, 18564.6510736198, 21526.72898147, 24807.2116933588,
29207.4658820585, 23058.5603825146, 31862.7097532934, 37588.62928007,
45160.9518839946, 25495.8949453907, 31851.1999874662, 38276.6899334939,
46318.331560595, 23165.6350767837, 32586.7829065825, 37256.5740814167,
45285.0662561028, 23975.7581116063, 30787.3910726117, 37346.8507982085,
45180.6091420909, 23786.1529599028, 32413.707905246, 38596.3467614532,
47026.6344280445, 24272.92088131, 31167.7104944988, 37745.6268718255,
46128.4799968946, 24583.9968164343, 29819.2298432657, 40053.8477213667,
48223.1556254353, 23227.04705051, 29611.9190298389, 39086.0012315702,
46742.9511396314, 20980.1647228858, 29627.2417955117, 38648.6829503705,
45677.0658477392, 21397.8125304146, 30675.2233482807, 40735.634479222,
46355.3748374436, 22836.5595055445, 29859.0336509053, 40335.3885497182,
47934.8837121327, 21465.185981748, 30436.1330929852, 40091.5582937488,
48743.3268548605, 21375.6534656544, 31060.2359133816, 40006.7183770635,
47618.8685730448, 20901.803025412, 29971.1886677767, 39526.0725185188,
46793.098588355, 21710.1246251194, 30894.12481284, 39699.3077814615,
47179.3071888513)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -140L), groups = structure(list(
cntry = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("at",
"be"), class = "factor"), year = structure(c(4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L), .Label = c("1995", "1997",
"2000", "2003", "2004", "2005", "2006", "2007", "2008", "2009",
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017",
"2018", "2019"), class = "factor"), .rows = structure(list(
1:4, 5:8, 9:12, 13:16, 17:20, 21:24, 25:28, 29:32, 33:36,
37:40, 41:44, 45:48, 49:52, 53:56, 57:60, 61:64, 65:68,
69:72, 73:76, 77:80, 81:84, 85:88, 89:92, 93:96, 97:100,
101:104, 105:108, 109:112, 113:116, 117:120, 121:124,
125:128, 129:132, 133:136, 137:140), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -35L), .drop = TRUE))
Is this what you want?
df %>%
group_by(cntry, class_m) %>%
mutate(base_year = 100 * (1 - (first(mean) - mean) / mean))
# # A tibble: 140 x 5
# # Groups: cntry, class_m [8]
# cntry year class_m mean base_year
# <fct> <fct> <fct> <dbl> <dbl>
# 1 at 2003 Low-skilled working class 21667. 100
# 2 at 2003 Skilled working class 31141. 100
# 3 at 2003 Middle class 38695. 100
# 4 at 2003 Upper class 48898. 100
# 5 at 2004 Low-skilled working class 21894. 101.
# 6 at 2004 Skilled working class 29866. 95.7
# 7 at 2004 Middle class 36846. 95.0
# 8 at 2004 Upper class 46116. 94.0
# 9 at 2005 Low-skilled working class 19914. 91.2
# 10 at 2005 Skilled working class 30201. 96.9
# # ... with 130 more rows
In an experiment, female fish were exposed to two levels of photoperiod (Ambient & Compressed), two levels of temperature (4 & 7). They were in four tanks (two tanks for each photoperiod, one tank for each temperature within photoperiod). There were nine samplings denoted by time_date in the data. Among other responses is "k". My interest is on the effects of photoperiod, temperature and time_date on "k".
Challenges faced: Unbalanced design (one photoperiod or temperature level not sampled during a sampling), pseudo-replication (each tank is a treatment (temperature masked within photoperiod)). with some reading, I came across the mixed models. I have tried with lmer (more importantly: I am not sure if am right) and fell into warnings and outputs with no p-values. I appreciate your help. Thank you in advance.
Here is the sample data
fem.fish <- structure(list(time_date = structure(c(8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("30-Jan-18",
"11-Apr-18", "13-Jun-18", "07-Aug-18", "19-Sep-18", "30-Oct-18",
"28-Nov-18", "03-Jan-19", "17-Jan-19", "31-Jan-19", "14-Feb-19",
"28-Feb-19", "14-Mar-19", "27-Mar-19", "10-Apr-19", "24-Apr-19"
), class = "factor"), photo = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Ambient",
"Compress"), class = "factor"), temp = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("4",
"7"), class = "factor"), tank = structure(c(2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("T1",
"T2", "T3", "T4"), class = "factor"), k = c(5.041791145, 5.408503999,
5.535282299, 5.346402317, 5.376649977, 5.072021484, 6.097412109,
4.390658006, 5.13676712, 4.472827193, 5.381892125, 4.882544582,
4.655393586, 5.435528121, 4.985185185, 4.548431822, 5.041791145,
5.408503999, 5.535282299, 5.346402317, 5.376649977, 5.072021484,
6.097412109, 4.390658006, 5.13676712, 4.472827193, 5.381892125,
4.882544582, 4.655393586, 5.435528121, 4.985185185, 4.548431822,
5.517125816, 4.772205603, 5.928149807, 4.152323266, 4.666037968,
4.638984928, 4.044444444, 4.720296599, 5.315500686, 4.967790359,
3.520804755, 4.722326417, 5.051895044, 4.807450844, 5.096461818,
5.28703008, 5.653368614, 6.357164944, 3.979492188, 3.928861374,
5.632685221, 5.264668498, 5.281464786, 5.387205387, 4.332381668,
5.250388878, 4.580237638, 4.650926114, 5.65951009, 4.401587625,
5.194587481, 4.184813255, 4.44738449, 5.829977261, 4.331985587,
4.827988338, 4.022222222, 3.672891297, 5.148148148, 4.068381688,
5.71922963, 4.566763848, 5.330442907, 2.422536369, 5.346580575,
4.971865289, 5.018922289, 5.513702624, 4.432146456, 5.692296224,
4.738120151, 4.896057489, 5.50365439, 5.249023438, 5.737818961,
4.260276996, 5.242507722, 4.580758017, 5.021888504, 5.013662642,
4.308286338, 5.50840192, 4.732342764, 4.672289386, 5.715557782,
3.827088497, 4.632069971, 4.935541824, 4.008746356, 4.963859809,
4.836806618, 4.46244856, 4.839677641, 4.498269896, 4.88357943,
4.984069185, 4.596844478, 5.196200195, 5.165529005, 14.74622771,
5.397084548, 7.983198678, 5.691090246, 5.707491082, 5.187172012,
6.297376093, 4.647178889, 4.282407407, 4.333496094, 4.773656052,
4.770999725, 4.092207407, 3.917638484, 5.193905817, 3.704833984,
5.571239611, 4.226680384, 3.65230095, 4.78515625, 5.603027344,
4.159218067, 4.719370009, 4.437016946, 4.407713499, 4.284050303,
4.676783265, 4.311689337, 4.540625, 4.864470022, 4.668176455,
5.221193416, 4.997084123, 4.112752873, 5.587217586, 6.045051626,
4.605417744, 4.35030714, 5.185252617, 4.752696927, 4.446670562,
4.268256569, 4.30372087, 4.025205761, 5.696474074, 4.068342788,
3.5212701, 4.544646911, 5.212620027, 5.31978738, 4.879910442,
4.606482493, 4.33502906, 5.294067215, 5.770262391, 4.264308136,
4.501028807, 2.944958848, 4.180638577, 4.120435057, 3.833076111,
4.496793003, 4.232167131, 3.783896334, 5.070553936, 4.825776352,
4.643534043, 6.318587106, 5.66205358, 5.194631597, 4.72557037,
4.195096521, 4.956238551, 3.503093444, 5.24857851, 4.792524005,
4.44229595, 5.285131195, 4.335878892, 4.170953361, 4.045779268
)), row.names = c(NA, -192L), class = "data.frame")
What I tried and the first warning
fit1 <- lmer(k ~ 0 + photo*temp*time_date + (1|tank), data = fem.fish, REML = FALSE)
fixed-effect model matrix is rank deficient so dropping 12 columns / coefficients
boundary (singular) fit: see ?isSingular
My summary and another warning on correlation matrix
summary(fit1)
Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: k ~ 0 + photo * temp * time_date + (1 | tank)
Data: fem.fish
AIC BIC logLik deviance df.resid
551.2 635.9 -249.6 499.2 166
Scaled residuals:
Min 1Q Median 3Q Max
-2.7467 -0.4380 -0.0447 0.3663 9.7226
Random effects:
Groups Name Variance Std.Dev.
tank (Intercept) 0.0000 0.0000
Residual 0.7883 0.8879
Number of obs: 192, groups: tank, 4
Fixed effects:
Estimate Std. Error t value
photoAmbient 5.284e+00 3.139e-01 16.832
photoCompress 4.937e+00 3.139e-01 15.728
temp7 -1.218e-14 4.439e-01 0.000
time_date17-Jan-19 -9.116e-02 4.439e-01 -0.205
time_date31-Jan-19 -9.798e-02 4.439e-01 -0.221
time_date14-Feb-19 1.264e-01 4.439e-01 0.285
time_date28-Feb-19 -3.986e-01 4.439e-01 -0.898
time_date14-Mar-19 3.655e-01 4.439e-01 0.823
time_date27-Mar-19 -3.979e-01 4.439e-01 -0.896
time_date10-Apr-19 -4.122e-01 4.439e-01 -0.929
time_date24-Apr-19 -2.184e-01 4.439e-01 -0.492
photoCompress:temp7 8.874e-15 6.278e-01 0.000
photoCompress:time_date31-Jan-19 -2.957e-01 6.278e-01 -0.471
photoCompress:time_date28-Feb-19 1.575e+00 6.278e-01 2.509
photoCompress:time_date14-Mar-19 -6.073e-01 6.278e-01 -0.967
temp7:time_date17-Jan-19 -4.121e-02 6.278e-01 -0.066
temp7:time_date31-Jan-19 2.382e-01 6.278e-01 0.379
temp7:time_date14-Feb-19 -2.024e-01 6.278e-01 -0.322
temp7:time_date28-Feb-19 -1.441e+00 6.278e-01 -2.295
temp7:time_date14-Mar-19 -1.104e+00 6.278e-01 -1.759
temp7:time_date27-Mar-19 -4.306e-01 6.278e-01 -0.686
temp7:time_date10-Apr-19 -7.885e-01 6.278e-01 -1.256
temp7:time_date24-Apr-19 -5.872e-01 6.278e-01 -0.935
photoCompress:temp7:time_date14-Mar-19 9.077e-01 8.879e-01 1.022
Correlation matrix not shown by default, as p = 24 > 12.
Use print(x, correlation=TRUE) or
vcov(x) if you need it
fit warnings:
fixed-effect model matrix is rank deficient so dropping 12 columns / coefficients
convergence code: 0
boundary (singular) fit: see ?isSingular
My understanding on t-values is not good at all, so I cannot establish whether there are significant effects or even whether the interactions are significant or not.
I will appreciate your suggestions on the modelling (Fitting the right model?) and more of what you find useful
Thank you so much all.
Try to import the "lmerTtest" package.
Before fit your model import this package, in this way you will see the p-value and the "*" of significance:
library("lme4")
library("lmerTest")
I have used your data for the following example. I think due to the fact that all your terms are categorical, you get the rank deficient model. I'd suggest you use time as continuous predictor, thereby you get rid of the rank-deficiency warning.
library(lme4)
library(parameters)
library(performance)
levels(fem.fish$time_date) <- 1:nlevels(fem.fish$time_date)
fem.fish$time_date <- as.numeric(fem.fish$time_date)
fit1 <- lmer(
k ~ 1 + photo * temp * time_date + (1 | tank),
data = fem.fish,
REML = FALSE
)
#> boundary (singular) fit: see ?isSingular
The second warning about the singular fit (now the first, and only warning) is because you literally have no variability in your outcome across the different groups (indicated by tank). This means that the random effects model here gives you not much more benefit than a simple linear model.
ranef(fit1)
#> $tank
#> (Intercept)
#> T1 0
#> T2 0
#> T3 0
#> T4 0
#>
#> with conditional variances for "tank"
Finally, you could use the packages parameters and performance to get comprehensive model summaries (including different p-value approximations like Satterthwaite or Kenward-Roger, standardized parameters or (cluster) robust standard errors) or model fit indices (like r2).
parameters::model_parameters(fit1)
#> Parameter | Coefficient | SE | 95% CI | t | df | p
#> -----------------------------------------------------------------------------------------------------
#> (Intercept) | 5.56 | 0.61 | [ 4.36, 6.76] | 9.06 | 182 | < .001
#> photo [Compress] | -1.46 | 1.04 | [-3.50, 0.58] | -1.41 | 182 | 0.160
#> temp [7] | 0.69 | 0.94 | [-1.16, 2.53] | 0.73 | 182 | 0.467
#> time_date | -0.04 | 0.05 | [-0.13, 0.06] | -0.74 | 182 | 0.461
#> photo [Compress] * temp [7] | 0.73 | 1.52 | [-2.24, 3.70] | 0.48 | 182 | 0.631
#> photo [Compress] * time_date | 0.12 | 0.09 | [-0.06, 0.31] | 1.35 | 182 | 0.178
#> temp [7] * time_date | -0.09 | 0.07 | [-0.23, 0.05] | -1.29 | 182 | 0.198
#> (photo [Compress] * temp [7]) * time_date | -0.07 | 0.13 | [-0.33, 0.19] | -0.52 | 182 | 0.604
performance::r2(fit1)
#> Warning: Can't compute random effect variances. Some variance components equal zero.
#> Solution: Respecify random structure!
#> Random effect variances not available. Returned R2 does not account for random effects.
#> # R2 for Mixed Models
#>
#> Conditional R2: NA
#> Marginal R2: 0.088
Now that your time variable is continuous, you may think about a non-linear relationship of the time trend. You could use the spline package to model this, and ggeffects to get effects plots. This works, of course, for the model with linear time trend as well as other curvilinear time trends.
library(ggeffects)
pr <- ggpredict(fit1, c("time_date", "photo", "temp"))
plot(pr)
library(splines)
fit2 <- lmer(
k ~ 1 + photo * temp * bs(time_date) + (1 | tank),
data = fem.fish,
REML = FALSE
)
#> boundary (singular) fit: see ?isSingular
pr <- ggpredict(fit2, c("time_date [all]", "photo", "temp"))
plot(pr)
Hope that helps!
Here is my dataframe:
structure(list(replicate = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L,
10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L), press_id = c(1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), start_time = c(164429106370979,
164429411618825, 164429837271940, 164430399454285, 164429106370980,
164429411618826, 164429837271941, 164430399454286, 164429106370981,
164429411618827, 164429837271942, 164430399454287, 164429106370982,
164429411618828, 164429837271943, 164430399454288, 164429106370983,
164429411618829, 164429837271944, 164430399454289, 164429106370984,
164429411618830, 164429837271945, 164430399454290, 164429106370985,
164429411618831, 164429837271946, 164430399454291, 164429106370986,
164429411618832, 164429837271947, 164430399454292, 164429106370987,
164429411618833, 164429837271948, 164430399454293, 164429106370988,
164429411618834, 164429837271949, 164430399454294, 164429106370989,
164429411618835, 164429837271950, 164430399454295, 164429106370990,
164429411618836, 164429837271951, 164430399454296, 164429106370991,
164429411618837, 164429837271952, 164430399454297, 164429106370992,
164429411618838, 164429837271953, 164430399454298, 164429106370993,
164429411618839, 164429837271954, 164430399454299), end_time = c(164429182443825,
164429512525748, 164429903243170, 164430465927555, 164429182443826,
164429512525749, 164429903243171, 164430465927556, 164429182443827,
164429512525750, 164429903243172, 164430465927557, 164429182443828,
164429512525751, 164429903243173, 164430465927558, 164429182443829,
164429512525752, 164429903243174, 164430465927559, 164429182443830,
164429512525753, 164429903243175, 164430465927560, 164429182443831,
164429512525754, 164429903243176, 164430465927561, 164429182443832,
164429512525755, 164429903243177, 164430465927562, 164429182443833,
164429512525756, 164429903243178, 164430465927563, 164429182443834,
164429512525757, 164429903243179, 164430465927564, 164429182443835,
164429512525758, 164429903243180, 164430465927565, 164429182443836,
164429512525759, 164429903243181, 164430465927566, 164429182443837,
164429512525760, 164429903243182, 164430465927567, 164429182443838,
164429512525761, 164429903243183, 164430465927568, 164429182443839,
164429512525762, 164429903243184, 164430465927569)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -60L), vars = c("replicate",
"press_id"), drop = TRUE, indices = list(0L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L,
54L, 55L, 56L, 57L, 58L, 59L), group_sizes = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
replicate = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L,
11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L), press_id = c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L)), class = "data.frame", row.names = c(NA,
-60L), vars = c("replicate", "press_id"), drop = TRUE, indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L,
50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L), group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
replicate = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L,
11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L), press_id = c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L)), class = "data.frame", row.names = c(NA,
-60L), vars = c("replicate", "press_id"), drop = TRUE, .Names = c("replicate",
"press_id")), .Names = c("replicate", "press_id")), .Names = c("replicate",
"press_id", "start_time", "end_time"))
I want to get the inter press_id time diff for example:
replicate press_id start_time end_time time_diff
1 1 1.644291e+14 1.644292e+14 0 (it's a first row)
1 2 1.644294e+14 1.644295e+14 1.644294e+14 - 1.644292e+14
1 3 1.644298e+14 1.644299e+14 1.644298e+14 - 1.644295e+14
1 4 1.644304e+14 1.644305e+14 .....
2 1 1.644291e+14 1.644292e+14
2 2 1.644294e+14 1.644295e+14
2 3 1.644298e+14 1.644299e+14
2 4 1.644304e+14 1.644305e+14
I am trying to do this using mutate, lag, lead and diff but without any luck. I have grouped, and ungrouped the dataset, nothing helped me.
df %>%
group_by(replicate) %>%
mutate(d = ifelse(row_number() == 1, 0, lead(start_time) - end_time))
df %>%
group_by(replicate) %>%
mutate(d = start_time - lag(end_time))
And if you want zeroes except NAs for the first row of each unique value in the replicate column, you could do:
df %>%
group_by(replicate) %>%
mutate(d = start_time - lag(end_time),
d = ifelse(is.na(d), 0, d))
Or just:
df %>%
group_by(replicate) %>%
mutate(d = ifelse(row_number() == 1, 0, start_time - lag(end_time)))
Searched for a solution for two days to no avail so far.
I have bird observations from different observation points. The observers write down the species, where they have seen them, and for how long.
Now it happens that from different points, observations are taken from the same area, but we only want to process the maximum value per species in an area.
So first, i aggregated the data by observation point, species and area, and summed up the time.
dt.agg <- aggregate(time ~ observp + species + time, dt, sum)
UUPS: completly wrong command:
should have been:
dt.agg <- aggregate(time ~ observp + species + area, dt, sum)
observp species area time
1 1a Rm A1 43.878488
2 1c Rm A1 296.152707
3 2 Rm A1 29.546790
4 1a Swm A1 34.127713
5 1b Swm A1 11.076880
6 2 Swm A1 8.771703
This worked ok. But now, I only need the maximum value for time for a species in an area, BUT i also need to know from which observation point these numbers were taken.
In my example, row 2 should be kept for Rm in A1, while rows 1 and 3 should be dropped. The same applies to row 4 (keep) and 5 + 6 (drop)
When i just do another aggregate with species and area over time and max, the info for the observation point is lost.
Can someone please show me a way to achieve this?
Cheers
Bernd
(now with a new account and no reputation .. thank you ... google!)
p.s. Please feel free to give this question a better headline
UPDATE:
trying to post the dput(head(dt,100))-sample as suggested. The original dataset has over 1300 rows. Hope thats what you want to have.
structure(list(species = structure(c(3L, 3L, 3L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L,
5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L,
5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 5L, 5L, 5L,
3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("Bf",
"Gr", "Rm", "Row", "Swm", "Wf", "Wsb", "Wst", "Ww"), class = "factor"),
area = structure(c(35L, 19L, 34L, 34L, 32L, 19L, 34L, 35L,
10L, 36L, 10L, 14L, 13L, 25L, 27L, 28L, 34L, 19L, 14L, 14L,
34L, 1L, 12L, 13L, 15L, 3L, 3L, 34L, 34L, 34L, 14L, 14L,
13L, 13L, 1L, 1L, 1L, 11L, 1L, 8L, 21L, 22L, 22L, 9L, 9L,
9L, 5L, 9L, 3L, 22L, 27L, 26L, 21L, 26L, 21L, 27L, 3L, 9L,
20L, 20L, 9L, 26L, 34L, 30L, 3L, 2L, 3L, 4L, 20L, 3L, 37L,
16L, 17L, 18L, 14L, 35L, 34L, 34L, 34L, 36L, 4L, 4L, 3L,
3L, 17L, 17L, 38L, 36L, 10L, 38L, 36L, 10L, 38L, 37L, 35L,
30L, 16L, 15L, 17L, 5L), .Label = c("A1", "A10", "A11", "A12",
"A13", "A14", "A15", "A16", "A17", "A18", "A2", "A3", "A4",
"A5", "A6", "A7", "A8", "A9", "O1", "O10", "O11", "O12",
"O13", "O14", "O15", "O16", "O17", "O18", "O19", "O2", "O20",
"O21", "O22", "O3", "O4", "O5", "O7", "O8", "O9"), class = "factor"),
observp = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("1a", "1b", "1c", "2", "3", "4"), class = "factor"),
time = c(36.37086972, 2.730715967, 1.891286914, 3.782573827,
4.496276059, 5.461431934, 18.91286914, 13.22577081, 5.823001976,
5.392743201, 3.882001317, 16.97305991, 6.094384821, 5.274262222,
5.462035947, 2.089427691, 7.565147654, 21.84572774, 25.45958986,
16.97305991, 7.565147654, 4.875387532, 8.885792099, 4.062923214,
6.636122805, 7.038317277, 10.55747592, 7.565147654, 7.565147654,
3.782573827, 25.45958986, 25.45958986, 12.18876964, 12.18876964,
19.50155013, 19.50155013, 9.750775065, 39.20627398, 4.875387532,
6.423076843, 2.436283538, 1.823249104, 1.823249104, 16.72889022,
41.82222555, 33.45778044, 12.30932064, 117.1022315, 3.519158639,
1.823249104, 27.31017974, 11.11346598, 4.872567077, 11.11346598,
4.872567077, 5.462035947, 3.519158639, 16.72889022, 14.86012871,
8.916077225, 25.09333533, 22.22693195, 3.782573827, 5.184879322,
10.55747592, 8.509038411, 10.55747592, 17.70988435, 5.944051483,
3.519158639, 17.69229328, 34.70586347, 5.966017168, 3.092236431,
2.828843318, 6.612885403, 3.782573827, 3.782573827, 7.565147654,
5.392743201, 17.70988435, 17.70988435, 3.519158639, 2.346105759,
11.93203434, 11.93203434, 2.386548395, 0.898790534, 0.64700022,
2.386548395, 0.898790534, 0.64700022, 2.684866944, 6.634609979,
1.239916013, 1.944329746, 3.2536747, 3.732819078, 6.711769315,
2.307997621)), .Names = c("species", "area", "observp", "time"
), row.names = c(NA, 100L), class = "data.frame")
You may also have a look another base function, by. The output is a list where each element is the result for different combination of INDICES.
bb <- by(data = df, INDICES = list(df$species, df$area), function(x) x[which.max(x$time), ])
bb
# : Rm
# : A1
# observp species area time
# 2 1c Rm A1 296.1527
# --------------------------------------------------------------------
# : Swm
# : A1
# observp species area time
# 4 1a Swm A1 34.12771
If you want to convert the list to a data.frame:
df2 <- do.call(rbind, bb)
df2
# observp species area time
# 2 1c Rm A1 296.15271
# 4 1a Swm A1 34.12771
Another alternative:
library(plyr)
ddply(.data = df, .variables = .(species, area), subset,
time == max(time))
An example is
stulevel_agg_2 <- stulevel[, list(a1=mean(ability, na.rm = TRUE), a2=last(school, na.rm=T)),by = grade]
a1, a2 are new column names. last can take the last element within the group, but you need to load xts first.