I need to present 3 clusters in 3D using the plotly package in R. The clusters are generated using the k-means function included in R. I searched but I find only using ggplot package.
How can I do this, please?
This is a part of my data set to give reproducible example.
> dput(DATAFINALE[1:50,])
structure(list(YEAR_SALES = c(2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L), CREATION_YEAR_SALES = c(2L,
1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
1L), TYPE_PEAU = c(2L, 3L, 4L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 1L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L,
3L, 4L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 4L, 2L, 2L, 3L,
3L, 2L, 2L, 2L, 2L, 2L, 4L), SENSIBILITE = c(3L, 3L, 3L, 2L,
1L, 3L, 3L, 2L, 2L, 2L, 3L, 1L, 3L, 1L, 2L, 3L, 3L, 2L, 3L, 3L,
3L, 3L, 3L, 2L, 1L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 1L, 3L,
3L, 3L, 3L, 1L, 2L, 2L, 3L, 2L, 3L, 3L, 3L, 1L, 2L, 3L), IMPERFECTIONS = c(2L,
3L, 2L, 1L, 3L, 2L, 2L, 1L, 2L, 1L, 2L, 3L, 2L, 2L, 1L, 3L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 3L, 2L,
1L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 1L, 2L, 3L, 1L,
2L), BRILLANCE = c(3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L,
3L, 1L, 3L, 3L, 1L, 3L, 2L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 3L,
1L, 3L, 3L, 3L, 3L, 3L, 3L), GRAIN_PEAU = c(3L, 3L, 3L, 3L, 1L,
3L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 1L, 1L, 2L, 1L, 1L, 3L, 3L, 1L,
1L, 1L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 2L), RIDES_VISAGE = c(1L,
1L, 1L, 3L, 3L, 3L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 3L,
1L, 3L, 1L, 1L, 1L, 3L, 1L, 3L, 2L, 1L, 3L, 3L, 3L, 3L, 1L, 3L,
3L, 3L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 3L, 2L, 3L, 3L, 1L,
1L), ALLERGIES = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), MAINS = c(2L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 1L, 3L, 3L, 2L, 3L, 3L, 2L, 2L,
2L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L,
2L, 3L, 1L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 3L), PEAU_CORPS = c(1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 2L, 3L, 2L, 2L, 1L, 2L, 1L, 3L, 2L, 2L, 2L, 3L, 2L,
2L, 2L, 2L, 3L, 3L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
3L), INTERET_ALIM_NATURELLE = c(1L, 3L, 3L, 1L, 3L, 1L, 1L, 1L,
3L, 1L, 1L, 3L, 1L, 1L, 1L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), INTERET_ORIGINE_GEO = c(1L,
2L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 2L, 1L,
1L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 2L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L,
1L), INTERET_VACANCES = c(2L, 3L, 1L, 2L, 1L, 2L, 1L, 1L, 2L,
3L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), INTERET_ENVIRONNEMENT = c(1L,
3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 1L,
1L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), INTERET_COMPOSITION = c(1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 1L,
3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), MONTH_SALES = c(9, 9,
2, 9, 3, 3, 11, 12, 3, 6, 3, 3, 8, 9, 5, 1, 10, 5, 4, 9, 2, 3,
4, 5, 6, 7, 7, 9, 7, 7, 11, 6, 4, 4, 4, 8, 9, 8, 9, 12, 4, 4,
3, 11, 5, 12, 11, 2, 6, 3), DAY_SALES = c(13, 3, 10, 23, 12,
10, 26, 4, 18, 9, 9, 9, 4, 10, 17, 28, 22, 4, 14, 22, 2, 10,
1, 20, 7, 12, 1, 3, 13, 3, 9, 5, 13, 27, 1, 28, 18, 10, 3, 2,
15, 6, 25, 4, 8, 23, 16, 19, 21, 14), HOURS_INS = c(17, 14, 18,
16, 23, 18, 16, 12, 17, 16, 21, 18, 22, 14, 10, 15, 13, 13, 21,
16, 23, 22, 17, 12, 15, 23, 17, 14, 8, 10, 12, 14, 13, 10, 17,
3, 19, 22, 17, 18, 23, 18, 8, 16, 12, 19, 21, 14, 11, 22), CREATION_MONTH_SALES = c(9,
9, 2, 10, 12, 3, 11, 2, 3, 6, 10, 3, 3, 9, 7, 11, 11, 5, 4, 9,
2, 3, 4, 8, 6, 7, 10, 5, 7, 8, 11, 6, 4, 4, 11, 8, 9, 8, 12,
12, 4, 8, 2, 11, 11, 1, 11, 10, 8, 3), CREATION_DAY_SALES = c(13,
11, 15, 31, 5, 10, 27, 7, 18, 9, 8, 18, 6, 26, 4, 24, 16, 12,
15, 22, 10, 10, 25, 5, 28, 20, 10, 18, 14, 31, 9, 5, 22, 27,
6, 29, 18, 11, 6, 2, 16, 17, 1, 4, 23, 23, 16, 1, 25, 16), VALIDATION_YEAR_SALES = c(2,
1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 1, 2, 1, 2, 1,
1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2,
1, 1, 2, 1, 2, 1, 1), VALIDATION_MONTH_SALES = c(9, 9, 2, 11,
12, 3, 12, 2, 3, 6, 10, 3, 3, 10, 7, 11, 11, 5, 4, 9, 2, 3, 4,
8, 6, 7, 10, 5, 7, 9, 11, 6, 4, 4, 11, 8, 9, 8, 12, 12, 4, 8,
2, 11, 11, 1, 11, 10, 8, 3), VALIDATION_DAY_SALES = c(15, 14,
16, 3, 6, 19, 1, 8, 21, 10, 9, 21, 7, 1, 6, 25, 17, 13, 20, 29,
11, 20, 29, 6, 30, 22, 12, 20, 16, 1, 10, 7, 25, 28, 14, 30,
19, 13, 8, 4, 28, 24, 2, 7, 25, 25, 19, 3, 27, 21), AGE_CUSTUMER = c(32,
37, 24, 32, 44, 33, 29, 30, 56, 48, 44, 43, 37, 43, 35, 62, 60,
33, 51, 32, 35, 33, 28, 24, 32, 38, 33, 36, 54, 45, 39, 41, 55,
34, 54, 51, 45, 57, 24, 47, 35, 51, 45, 39, 31, 40, 42, 42, 39,
58), MEAN_Sales = c(0, 71.75, 50.7142857142857, 0, 0.666666666666667,
83.3333333333333, 0.333333333333333, 25.7777777777778, 23.3846153846154,
35.5294117647059, 21.6363636363636, 46.8461538461538, 18.4, 15.0666666666667,
110.25, 8.85714285714286, 0, 21.5, 18.5714285714286, 28.125,
101.333333333333, 69.1428571428571, 48.25, 20.5833333333333,
12, 20.3333333333333, 23, 15.1428571428571, 12.3913043478261,
30.3076923076923, 24.625, 23.375, 20.0833333333333, 32.75, 0,
1.5, 0, 50.6, 32.3846153846154, 33, 28.6818181818182, 19.8076923076923,
25.6666666666667, 9.83333333333333, 33, 55.3333333333333, 42.7,
0, 31.375, 11.625), NBR_GIFTS = c(1, 1, 1, 1, 1, 1, 1, 1, 4,
3, 4, 2, 1, 4, 1, 1, 1, 1, 3, 2, 1, 2, 2, 1, 3, 5, 4, 1, 9, 2,
5, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 1, 3, 2, 1, 1, 4, 4),
OUTCOME = c(3, 4, 7, 3, 3, 6, 3, 9, 26, 17, 22, 13, 10, 30,
4, 7, 7, 6, 14, 16, 3, 7, 12, 12, 15, 24, 21, 7, 46, 13,
16, 8, 12, 8, 3, 8, 3, 10, 13, 13, 22, 26, 12, 6, 13, 6,
10, 4, 16, 24)), .Names = c("YEAR_SALES", "CREATION_YEAR_SALES",
"TYPE_PEAU", "SENSIBILITE", "IMPERFECTIONS", "BRILLANCE", "GRAIN_PEAU",
"RIDES_VISAGE", "ALLERGIES", "MAINS", "PEAU_CORPS", "INTERET_ALIM_NATURELLE",
"INTERET_ORIGINE_GEO", "INTERET_VACANCES", "INTERET_ENVIRONNEMENT",
"INTERET_COMPOSITION", "MONTH_SALES", "DAY_SALES", "HOURS_INS",
"CREATION_MONTH_SALES", "CREATION_DAY_SALES", "VALIDATION_YEAR_SALES",
"VALIDATION_MONTH_SALES", "VALIDATION_DAY_SALES", "AGE_CUSTUMER",
"MEAN_Sales", "NBR_GIFTS", "OUTCOME"), row.names = c(1L, 2L,
3L, 5L, 9L, 13L, 14L, 16L, 18L, 19L, 20L, 24L, 27L, 29L, 30L,
32L, 33L, 35L, 36L, 37L, 39L, 44L, 49L, 51L, 52L, 53L, 55L, 56L,
61L, 62L, 63L, 65L, 66L, 67L, 71L, 74L, 75L, 80L, 81L, 84L, 86L,
90L, 92L, 95L, 96L, 99L, 100L, 103L, 104L, 107L), class = "data.frame")
My model of clustering is given by this code:
Model<-kmeans(DATAFINALE,centers = 3,nstart=20)
Then I need to get a plot as given in this link https://plot.ly/r/3d-scatter-plots/ having as title Basic 3D Scatter Plot.
Thank you in advance
First of all, you have to add the cluster vector to the dataset.
# convert them as factor to plot them right
DATAFINALE$cluster <- as.factor(Model$cluster)
Then you have to decide which variables plot as x,y,and z (I've taken randomly three):
x <-'MONTH_SALES'
y <-'DAY_SALES'
z <- 'HOURS_INS'
Lastly you can plot it, using the cluster as colors:
library(plotly)
p <- plot_ly(DATAFINALE, x = ~MONTH_SALES, y = ~ DAY_SALES, z = ~HOURS_INS, color = ~cluster) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = x),
yaxis = list(title = y),
zaxis = list(title = z)))
p
Here the result:
Related
I am running a logistic regression model using complex survey data using the survey package in R. After fitting the model, I performed regression diagnostics using the car package. I noticed outlying and influential observations that I would like to remove and then refit the model to check for their effects on the regression coefficients but my current approach is not giving me want I expect.
My dataset has about 10,000 observations. Here is sample data and code I have tried using:
library(car); library(survey)
dat <- structure(list(id = c(1009918, 1012826, 1029625, 1000926, 1027525,
1000115, 1000201, 1000202, 1000214, 1000219, 1000313, 1000324,
1000510, 1000521, 1000624, 1000708, 1000811, 1000817, 1000818,
1000906, 1000922, 1001002, 1001005, 1001401, 1001411, 1001413,
1001420, 1001424, 1001501, 1001510, 1001518, 1001526, 1001621,
1001807, 1001922, 1001926, 1002106, 1002217, 1002406, 1002416,
1002618, 1002709, 1003004, 1003017, 1003103, 1003108, 1003304,
1003319, 1003723, 1003804, 1003811, 1003819, 1004014, 1008902,
1008913, 1009011, 1009022, 1009123, 1009212, 1009215), strata = c(1,
2, 6, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), cluster = c(785,
938, 2337, 28, 2122, 3, 6, 6, 6, 6, 10, 10, 16, 16, 19, 22, 24,
24, 24, 28, 28, 33, 33, 45, 45, 45, 45, 45, 50, 50, 50, 50, 53,
60, 63, 63, 69, 74, 96, 96, 100, 102, 111, 111, 115, 115, 122,
122, 178, 193, 193, 193, 210, 755, 755, 759, 759, 762, 765, 765
), weights = c(621.921704979739, 5440.9107594311, 8450.49341643626,
2457.37241774248, 7174.79930450487, 930.492019594546, 443.253676607562,
443.253676607562, 886.507353215123, 443.253676607562, 1552.30979801343,
517.436599337811, 403.146111343943, 806.292222687886, 439.775494378883,
839.561001668328, 1210.77101540146, 403.590338467152, 403.590338467152,
457.23211170669, 914.464223413381, 584.557580338056, 584.557580338056,
233.135312658304, 233.135312658304, 233.135312658304, 466.270625316608,
233.135312658304, 287.94933168791, 287.94933168791, 287.94933168791,
287.94933168791, 2354.32022397843, 213.628591090648, 300.596873749779,
300.596873749779, 1121.27419052962, 528.482361549292, 1936.60489456861,
1291.06992971241, 282.360930726457, 3526.73915258957, 337.531162185852,
337.531162185852, 2183.63202546241, 2729.54003182802, 1035.32340123929,
1552.98510185893, 1400.62601417017, 717.92144006312, 358.96072003156,
1435.84288012624, 275.058410167952, 557.874242565598, 278.937121282799,
1687.48015279064, 1012.48809167438, 424.663883556537, 227.805527040477,
227.805527040477), age = c(20, 19, 93, 24, 18, 23, 22, 23, 24,
19, 18, 24, 20, 19, 18, 17, 19, 23, 19, 19, 21, 22, 21, 20, 23,
24, 24, 19, 21, 22, 20, 23, 21, 23, 20, 22, 23, 15, 20, 23, 24,
18, 24, 24, 15, 21, 24, 16, 22, 20, 20, 18, 21, 20, 21, 21, 24,
22, 24, 18), gender = structure(c(1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), levels = c("Male", "Female"), class = "factor"),
educ = structure(c(4L, 2L, 1L, 3L, 2L, 3L, 2L, 2L, 2L, 2L,
3L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 4L, 2L, 3L, 2L,
4L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L,
4L, 4L, 4L, 3L, 3L), levels = c("No formal education", "Primary",
"Secondary", "Tertiary"), class = "factor"), employ = structure(c(4L,
3L, 4L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 4L, 3L, 2L, 2L, 2L, 4L,
3L, 3L, 3L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 4L,
4L, 4L, 2L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 4L, 3L, 2L, 4L, 4L,
2L, 3L, 1L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 4L, 3L, 4L), levels = c("Unemployed",
"Employed", "Self-employed", "Other"), class = "factor"),
know = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), levels = c("No/Don't know", "Yes"), class = "factor"),
status = structure(c(2L, 4L, 2L, 1L, 5L, 3L, 2L, 2L, 2L,
4L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 5L, 4L, 2L, 5L, 5L, 4L, 3L,
2L, 3L, 2L, 2L, 2L, 2L, 4L, 3L, 3L, 4L, 3L, 2L, 2L, 3L, 1L,
2L, 1L, 1L, 2L, 2L, 2L), levels = c("1", "2", "3", "4", "5"
), class = "factor"), smoker = structure(c(2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("no",
"yes"), class = "factor")), row.names = c(81L, 4174L, 6722L,
1255L, 2712L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L,
52L, 53L, 54L, 55L), class = "data.frame")
## Create survey design object
dat_svy <- survey::svydesign(ids = ~cluster, strat = ~strata, weights = ~weights, data = dat, nest = TRUE)
options(survey.lonely.psu = "adjust")
## fit logistic regression model
mod <- survey::svyglm(formula = smoker ~ age + educ + gender + employ + educ + know + status, design = dat_svy, family = "quasibinomial")
I have tried the following:
update(mod, subset = !(rownames(dat_svy) %in% c(2, 5, 9, 13, 21))) # returns an error
update(mod, subset = -c(2, 5, 9, 13, 21)) # only removes one (first specified) observation
I am not sure why I am still receiving this message when running a base model with all variables in my dataset:
My data, with anonymized variables:
set.seed(1234)
#dput(df)
structure(list(outcome_1= structure(c(2L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
1L, 2L, 1L, NA, 2L, 1L), .Label = c("0", "1"), class = "factor"),
outcome_2= structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, NA, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, NA, 2L, 1L), .Label = c("0", "1"), class = "factor"),
outcome_3= structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, NA, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, NA, 1L, 1L), .Label = c("0", "1"), class = "factor"),
bl_ep = c(16, 92, 10, 40, 19, 1, 16, 10, 22, 28, 8, 11, 6,
47, 12, 1, 9, 20, 2, 14, 72, 28, 5, 16, 61, 12, 24, 22, 44,
44, 16, 36, 62, 10, 16, 10, 89, 22, 5, 38, 8, 11), bl_days = c(12,
28, 10, 25, 19, 1, 10, 9, 13, 28, 4, 11, 6, 20, 12, 1, 8,
16, 2, 12, 27, 28, 5, 13, 24, 10, 18, 18, 16, 16, 10, 28,
22, 5, 15, 8, 28, 15, 5, 22, 7, 11), score_1 = c(11,
19, 17, 17, 12, 14, 8, 12, 14, 15, 14, 13, 12, 14, 15, 5,
11, 14, 14, 13, 16, 11, 11, 14, 20, 14, 12, 11, 17, 15, 14,
18, 15, 14, 12, 10, 17, 16, 11, 13, 18, 17), score_2 = c(1.1,
1.6, 1.6, 2.8, 1.9, 3.3, 4, 3.8, 1.8, 1.4, 2, 3.55, 1.6,
1.8, 2.4, 3.7, 1.4, 2.9, 3.55, 2.5, 1.6, 3.2, 3.5, 2.4, 3.1,
2.3, 3.8, 3.9, 1.1, 1.7, 2.3, 1.5, 1.9, 3.3, 3, 2.9, 1.6,
3.1, 3.7, 2.8, 1.2, 1.9), score_3 = c(1,
1.22222222222222, 1.11111111111111, 1.88888888888889, 1.44444444444444,
1.44444444444444, 3.22222222222222, 2.77777777777778, 1.11111111111111,
1, 1, 2.83333333333333, 1.22222222222222, 1.875, 1.55555555555556,
2.66666666666667, 1, 2.25, 1.72222222222222, 2.05555555555556,
1.22222222222222, 2, 2, 1.77777777777778, 1.33333333333333,
1.11111111111111, 2.5, 2.55555555555556, 1, 1.22222222222222,
1.77777777777778, 1.22222222222222, 2.44444444444444, 1.55555555555556,
1.77777777777778, 1.66666666666667, 1.11111111111111, 2.33333333333333,
2.88888888888889, 1.55555555555556, 1, 1.25), score_4 = c(1.31428571428571,
1.37142857142857, 1.08571428571429, 1.83809523809524, 1.37142857142857,
1.8952380952381, 4, 3.88571428571429, 3.02857142857143, 2.12222222222222,
1.43333333333333, 3.39047619047619, 1.74285714285714, 1.67619047619048,
2.02857142857143, 3.48571428571429, 1.24761904761905, 3.73333333333333,
3.08571428571429, 2.56666666666667, 1.74285714285714, 2.6952380952381,
3.45714285714286, 2.27619047619048, 1.9047619047619, 2.62857142857143,
3.74285714285714, 3.74285714285714, 1.24761904761905, 1.39047619047619,
1.83809523809524, 2.74285714285714, 4, 1.77142857142857,
3.42857142857143, 3.2, 1.65714285714286, 2.55238095238095,
2.38095238095238, 2.40952380952381, 2.07619047619048, 2.56666666666667
), score_5 = c(1, 1, 1, 1, 1.33333333333333,
1, 3.33333333333333, 3.66666666666667, 1.66666666666667,
1.66666666666667, 2, 2.5, 1.66666666666667, 1, 1.33333333333333,
3, 1, 1.66666666666667, 2.16666666666667, 2.16666666666667,
1.33333333333333, 2.66666666666667, 3, 2.66666666666667,
1.33333333333333, 2.66666666666667, 3, 1.33333333333333,
1, 1, 1, 1, 1, 1.33333333333333, 3, 3.66666666666667, 1.66666666666667,
1.33333333333333, 2.33333333333333, 1.66666666666667, 2,
2), sex = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("F", "M"), class = "factor"), age = c(64,
66, 51, 69, 60, 65, 65, 69, 50, 78, 75, 78, 35, 77, 69, 48,
65, 72, 60, 64, 78, 71, 58, 55, 55, 57, 81, 76, 56, 71, 56,
73, 69, 51, 43, 77, 31, 64, 69, 63, 38, 71), childbirth = structure(c(2L,
2L, 2L, 1L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 2L, NA, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, NA, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L), .Label = c("N",
"Y"), class = "factor"), x1= c(3, 2, 2, NA,
3, 2, 3, NA, 3, 3, 2, 2, NA, 2, 5, 2, 2, 2, 4, 3, 2, 2, 3,
NA, 2, 3, NA, NA, 2, 2, 2, 2, 2, 2, 3, 2, 1, NA, 2, 2, 1,
3), x2= c(0, 0, 0, NA, 1, 0, 0, NA, 0, 0,
0, 0, NA, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, NA, NA,
0, 0, 0, 0, 0, 0, 0, 0, 1, NA, 0, 0, 0, 0), x3= structure(c(4L,
1L, 1L, 2L, 1L, 1L, 1L, NA, 4L, 1L, 1L, 4L, NA, 4L, 1L, 4L,
4L, 4L, 4L, 3L, 1L, 1L, 1L, 2L, 4L, 1L, NA, 2L, 1L, 4L, 1L,
1L, 4L, 4L, 1L, 4L, 4L, 2L, 4L, 4L, 4L, 1L), .Label = c("N",
"NA", "UNK", "Y"), class = "factor"), x4= structure(c(4L,
1L, 1L, 2L, 1L, 1L, 1L, NA, 1L, 1L, 4L, 1L, NA, 1L, 1L, 4L,
3L, 1L, 4L, 4L, 1L, 4L, 4L, 2L, 1L, 4L, NA, 2L, 4L, 1L, 4L,
1L, 1L, 4L, 4L, 1L, 4L, 2L, 4L, 1L, 4L, 4L), .Label = c("N",
"NA", "UNK", "Y"), class = "factor"), x5= structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 2L, NA, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L), .Label = c("N",
"Y"), class = "factor"), x6= structure(c(2L, 2L, 2L, 1L,
1L, 2L, 2L, NA, 1L, 1L, 1L, 2L, NA, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, 1L, 2L, 2L, NA, 2L, 2L, 2L, 1L, 2L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L), .Label = c("N", "Y"), class = "factor"),
x7= structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, NA, 1L, 1L, 1L, 1L, NA, 1L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 3L, 1L, NA, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 1L, 1L, 1L, 1L, 2L, 3L), .Label = c("N", "NA", "Y"), class = "factor"),
x8= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, 1L,
2L, 2L, 2L, NA, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, NA, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
2L, 2L, 2L), .Label = c("N", "Y"), class = "factor"), x9= structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("N",
"Y"), class = "factor"), x10= structure(c(1L, 2L, 2L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x11= structure(c(1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x12= structure(c(1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x13= structure(c(2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x14= c(41, 7, 8, 9, 7, 2, 1, 5, 9, 6, 6, 8,
14, 2, 4, NA, 11, 9, 31, 13, 8, 2, 11, 20, 8, 7, 6, 8, 2,
12, 32, 1, 2, 38, 10, 17, 5, 28, 31, 10, 3, 6), x15= structure(c(3L,
4L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 5L, 1L, 3L, 3L,
3L, 3L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 1L, 2L, 2L, 3L, 3L, 3L,
2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L), .Label = c("IATRO",
"IDIO", "OBST", "OBST/IDIO", "TRAUM"), class = "factor"),
x16= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x17= structure(c(2L, 2L, 1L, 2L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x18= c(31.8, 20, 30.9, 23.3, 22.5, 23.1, 23.6, 25.9, 22.8,
25.2, 30.2, 23.4, 22.2, 29, 24.8, 32.7, 20.8, 28.5, 24.6,
23, 23.4, 21.1, 24.9, 18, 21.7, 27.6, 27, 29, 32.9, 26, 29.3,
27.1, 22.7, 19.7, 25, 22.3, 21.3, 17.5, 20.9, 20.1, 25.1,
22.1), x19= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
x20 = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 2L), .Label = c("NO", "YES"), class = "factor"),
x21= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L,
1L, 1L, 1L, 1L, 1L), .Label = c("NO", "YES"), class = "factor")), row.names = c(NA,
-42L), class = c("tbl_df", "tbl", "data.frame"))
logit1 <-glm(outcome_1~., data = df, family = "binomial")
Which yielded the classic error message for a logit model:
#Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) :
# contrasts can be applied only to factors with 2 or more levels
Ok, so I went to double check that all factor variables indeed have more than 1 unique value, and can verify:
sapply(lapply(df, unique), length)
returned all variables showing 2 or more unique values. Still same error message when I ran the model again.
I even attempted to run one solution I found online:
values_count <- sapply(lapply(df, unique), length)
logit1 <-
lm(outcome_1~ ., df[ , values_count > 1])
What's going on? Am I blind in seeing some variable that is secretly saying it has more than one unique value and does not?
Thank you!
The regression works on the supplied data for simple models, such as
logit1 <-glm(outcome_1~ sex + age, data = df, family = "binomial")
It's a small data set with lots of variables, the computer is not going to be able to pull out the meaningful relationships even if they are there. Start with some exploratory data plots, and think about how the (biological) relationship between your outcomes and other variables in order to come up with hypotheses you can test with you data. Realistically, which measurements do you think actually affect patient outcomes?
I am trying to make a stacked barplot with two variables. My desired outcome looks like this:
This is the first part of my data. There are 220 more rows:
Type Week Stage
<chr> <dbl> <dbl>
1 Captured 1 2
2 Captured 1 1
3 Captured 1 1
4 Captured 1 2
5 Captured 1 1
6 Captured 1 3
7 Captured 1 NA
8 Captured 1 3
9 Captured 1 2
10 Captured 1 1
So far I'm not getting anywhere, this is my code so far
library(data.table)
dat.m <- melt(newrstudio2, id.vars="Type")
dat.m
library(ggplot2)
ggplot(dat.m, aes(x=Type, y=value, fill=variable)) +
geom_bar(stat="identity")
I guess I need to calculate the number of observations of each stage in each week of each type? I've tried both long and wide data, but I somehow need to combine week with type? I don't know, I'm at a loss.
Alternative way:
set.seed(123)
# sample data
my_data <- data.frame(Type = sample(c("W", "C"), 220, replace = TRUE),
Week = sample(paste0("Week ", 1:4), 220, replace = TRUE),
Stage = sample(paste0('S', 1:4), 220, replace = TRUE))
head(my_data)
library(ggplot2)
ggplot(my_data, aes(x = Type, fill = Stage)) +
geom_bar(aes(y = (..count..)/sum(..count..)), position = "fill") +
facet_grid(. ~ Week, switch="both") +
scale_y_continuous(labels = scales::percent) +
ylab("Stage [%]") +
theme(strip.background = element_blank(),
strip.placement = "outside",
panel.spacing = unit(0, "lines"))
Alternatively we could use base graphics. First, what you're probably most interested in, we should reshape the data.
For this we could split the data per week and run a dcast() over it.
L <- lapply(split(d, d$week), function(x)
data.table::dcast(x, type ~ stage, value.var="stage", fun=length))
d2 <- do.call(rbind, L) # transform back into a data frame
Now – with credits to #alemol – we want the proportions.
d2[-1] <- t(apply(d2[-1], 1, prop.table))
Then we are able to plot relatively simply. Note, that barplot() additionally gives us a vector of bar coordinates which we can use later for the axis() labels.
cols <- c("#ed1c24", "#ff7f27", "#00a2e8", "#fff200") # define stage colors
par(mar=c(5, 5, 3, 5) + .1, xpd=TRUE) # set plot margins
p <- barplot(t(d2[-1]), col=cols, border="white", space=rep(c(.2, 0), 5),
font.axis=2, xaxt="n", yaxt="n", xlab="Week")
axis(1, at=p, labels=rep(c("C", "W"), 5), tick=FALSE, line=0)
axis(1, at=apply(matrix(p, , 2, byrow=TRUE), 1, mean), labels=1:5, tick=FALSE, line=1)
axis(2, at=0:10/10, labels=paste0(seq(0, 100, 10), "%"), line=0, las=2)
legend(12, .5, legend=rev(names(d2[-1])), col=rev(cols), pch=15, title="Stage")
Result:
Data:
d <- structure(list(type = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L), .Label = c("C", "W"), class = "factor"), week = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), stage = c(3L,
1L, 1L, 2L, 2L, 2L, 1L, 3L, 2L, 4L, 1L, 1L, 2L, 2L, 3L, 4L, 3L,
2L, 4L, 1L, 1L, 3L, 1L, 2L, 3L, 1L, 4L, 1L, 2L, 4L, 2L, 3L, 4L,
4L, 2L, 4L, 4L, 2L, 3L, 1L, 1L, 4L, 4L, 1L, 4L, 3L, 3L, 3L, 2L,
1L, 3L, 4L, 2L, 4L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 2L, 1L, 3L, 2L,
1L, 1L, 1L, 4L, 2L, 4L, 1L, 4L, 3L, 4L, 4L, 4L, 2L, 2L, 2L, 2L,
2L, 1L, 3L, 4L, 2L, 4L, 4L, 2L, 2L, 3L, 4L, 4L, 3L, 3L, 1L, 1L,
1L, 2L, 4L, 3L, 1L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 4L, 2L, 1L,
2L, 1L, 3L, 3L, 2L, 4L, 3L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 2L, 2L,
2L, 1L, 3L, 4L, 3L, 4L, 3L, 4L, 4L, 3L, 1L, 1L, 2L, 1L, 2L, 3L,
2L, 2L, 1L, 4L, 3L, 4L, 2L, 2L, 3L, 1L, 2L, 3L, 3L, 3L, 3L, 2L,
1L, 2L, 2L, 1L, 1L, 3L, 4L, 3L, 4L, 2L, 4L, 1L, 1L, 2L, 1L, 3L,
2L, 1L, 3L, 3L, 2L, 2L, 1L, 3L, 2L, 2L, 2L, 1L, 4L, 2L, 4L, 2L,
4L, 3L, 3L, 1L, 3L, 4L, 3L, 2L, 1L, 2L, 4L, 1L, 2L, 4L, 2L, 1L,
2L, 1L, 2L, 2L, 3L, 1L, 3L, 3L, 3L, 2L, 2L, 1L, 2L, 3L, 2L, 2L,
1L, 2L, 1L, 3L, 3L, 2L, 1L, 3L, 4L, 2L, 1L, 2L, 4L, 3L, 4L, 2L,
3L, 2L, 4L, 1L, 4L, 4L, 2L, 1L, 2L)), row.names = c(NA, -250L
), class = "data.frame")
Is this what you're looking for:
set.seed(123)
# sample data
my_data <- data.frame(Type = sample(paste0('T', 1:4), 220, replace = TRUE),
Week = sample(paste0('W', 1:4), 220, replace = TRUE),
Stage = sample(paste0('S', 1:4), 220, replace = TRUE))
ggplot(my_data, aes(x=Week:Type, fill = Stage)) + geom_bar()
I using PCA function of R to study the principal components analysis.
This is to make the question reproducible:
> dput(DATA_FINAL[1:50,])
structure(list(DataCRMSanoflore.Year_Sales = c(2, 1, 2, 1, 2,
1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,
1, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1,
1, 1, 1), DataCRMSanoflore.Month_Sales = c(9, 9, 2, 5, 9, 4,
7, 9, 3, 9, 7, 12, 3, 11, 3, 12, 3, 3, 6, 3, 4, 7, 5, 3, 5, 8,
8, 1, 9, 5, 4, 1, 10, 9, 5, 4, 9, 3, 2, 12, 9, 4, 4, 3, 6, 8,
6, 4, 4, 12), DataCRMSanoflore.Date_Sales = c(13, 3, 10, 22,
23, 26, 13, 1, 12, 2, 25, 11, 10, 26, 9, 4, 10, 18, 9, 9, 1,
7, 30, 9, 14, 24, 4, 2, 10, 17, 2, 28, 22, 17, 4, 14, 22, 30,
2, 5, 29, 13, 2, 10, 25, 5, 10, 23, 1, 6), DataCRMSanoflore.HOURS_INSCR = c(17,
14, 18, 17, 16, 11, 22, 14, 23, 17, 9, 21, 18, 16, 19, 12, 11,
17, 16, 21, 20, 11, 16, 18, 14, 19, 22, 17, 14, 10, 22, 15, 13,
19, 13, 21, 16, 19, 23, 19, 11, 21, 11, 22, 20, 13, 11, 15, 17,
15), DataCRMSanoflore.Year_Creation_Sales = c(2, 1, 2, 1, 2,
1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,
2, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1,
1, 1, 1), DataCRMSanoflore.Month_Creation_Sales = c(9, 9, 2,
10, 10, 9, 7, 9, 12, 9, 7, 12, 3, 11, 4, 2, 6, 3, 6, 10, 4, 7,
6, 3, 5, 8, 3, 1, 9, 7, 4, 11, 11, 9, 5, 4, 9, 3, 2, 12, 10,
4, 4, 3, 10, 8, 6, 4, 4, 12), DataCRMSanoflore.Day_Creation_Sales = c(13,
11, 15, 2, 31, 26, 23, 1, 5, 2, 25, 16, 10, 27, 13, 7, 3, 18,
9, 8, 27, 7, 8, 18, 18, 24, 6, 2, 26, 4, 4, 24, 16, 17, 12, 15,
22, 30, 10, 5, 1, 14, 2, 10, 5, 5, 10, 27, 25, 6), DataCRMSanoflore.Year_Validation_Sales = c(2,
1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
2, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1,
1, 1, 1, 1, 1, 1, 1), DataCRMSanoflore.Month_Validation_Sales = c(9,
9, 2, 10, 11, 10, 7, 9, 12, 9, 7, 12, 3, 12, 4, 2, 6, 3, 6, 10,
4, 7, 6, 3, 5, 8, 3, 1, 10, 7, 4, 11, 11, 9, 5, 4, 9, 4, 2, 12,
10, 4, 4, 3, 10, 8, 6, 4, 4, 12), DataCRMSanoflore.Day_Validation_Sales = c(15,
14, 16, 3, 3, 1, 27, 2, 6, 5, 27, 21, 19, 1, 27, 8, 5, 21, 10,
9, 30, 9, 9, 21, 26, 27, 7, 4, 1, 6, 15, 25, 17, 18, 13, 20,
29, 1, 11, 7, 2, 16, 3, 20, 6, 6, 13, 29, 29, 8), DataCRMSanoflore.AGE_CUSTUMER = c(33,
37, 24, 34, 32, 46, 52, 60, 44, 55, 37, 29, 34, 30, 30, 31, 37,
57, 48, 44, 42, 28, 34, 43, 45, 33, 37, 53, 43, 35, 55, 62, 60,
57, 33, 51, 32, 51, 35, 54, 42, 47, 59, 33, 45, 35, 36, 54, 28,
42), DataCRMSanoflore.MEAN_PURCHASE = c(0, 71.75, 50.7142857142857,
18.6666666666667, 0, 0, 54.7, 22, 0.666666666666667, 38, 6.5,
0, 83.3333333333333, 0.333333333333333, 44.3333333333333, 25.7777777777778,
24.1818181818182, 23.3846153846154, 35.5294117647059, 21.6363636363636,
1.125, 40.6428571428571, 0, 46.8461538461538, 6, 8.66666666666667,
18.4, 16.9285714285714, 15.0666666666667, 110.25, 0, 8.85714285714286,
0, 36.5, 21.5, 18.5714285714286, 28.125, 8.38888888888889, 101.333333333333,
0, 2, 0, 20.9166666666667, 69.1428571428571, 16.6666666666667,
1.5, 87.1666666666667, 0, 48.25, 13.3333333333333), DataCRMSanoflore.NUMBER_GIFTS = c(1,
1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 3, 4, 3, 4, 2, 2,
1, 2, 1, 1, 1, 2, 4, 1, 1, 1, 1, 3, 1, 3, 2, 4, 1, 1, 1, 1, 2,
2, 1, 1, 1, 1, 2, 3), DataCRMSanoflore.Year_Sales = c(2L, 1L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), DataCRMSanoflore.Month_Sales = c(9L, 9L, 2L, 5L, 9L, 4L, 7L,
9L, 3L, 9L, 7L, 12L, 3L, 11L, 3L, 12L, 3L, 3L, 6L, 3L, 4L, 7L,
5L, 3L, 5L, 8L, 8L, 1L, 9L, 5L, 4L, 1L, 10L, 9L, 5L, 4L, 9L,
3L, 2L, 12L, 9L, 4L, 4L, 3L, 6L, 8L, 6L, 4L, 4L, 12L), DataCRMSanoflore.Date_Sales = c(13L,
3L, 10L, 22L, 23L, 26L, 13L, 1L, 12L, 2L, 25L, 11L, 10L, 26L,
9L, 4L, 10L, 18L, 9L, 9L, 1L, 7L, 30L, 9L, 14L, 24L, 4L, 2L,
10L, 17L, 2L, 28L, 22L, 17L, 4L, 14L, 22L, 30L, 2L, 5L, 29L,
13L, 2L, 10L, 25L, 5L, 10L, 23L, 1L, 6L), DataCRMSanoflore.Year_Creation_Sales = c(2L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), DataCRMSanoflore.Month_Creation_Sales = c(9L, 9L, 2L, 10L,
10L, 9L, 7L, 9L, 12L, 9L, 7L, 12L, 3L, 11L, 4L, 2L, 6L, 3L, 6L,
10L, 4L, 7L, 6L, 3L, 5L, 8L, 3L, 1L, 9L, 7L, 4L, 11L, 11L, 9L,
5L, 4L, 9L, 3L, 2L, 12L, 10L, 4L, 4L, 3L, 10L, 8L, 6L, 4L, 4L,
12L), DataCRMSanoflore.Day_Creation_Sales = c(13L, 11L, 15L,
2L, 31L, 26L, 23L, 1L, 5L, 2L, 25L, 16L, 10L, 27L, 13L, 7L, 3L,
18L, 9L, 8L, 27L, 7L, 8L, 18L, 18L, 24L, 6L, 2L, 26L, 4L, 4L,
24L, 16L, 17L, 12L, 15L, 22L, 30L, 10L, 5L, 1L, 14L, 2L, 10L,
5L, 5L, 10L, 27L, 25L, 6L), DataCRMSanoflore.Year_Validation_Sales = c(2L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), DataCRMSanoflore.Month_Validation_Sales = c(9L, 9L, 2L,
10L, 11L, 10L, 7L, 9L, 12L, 9L, 7L, 12L, 3L, 12L, 4L, 2L, 6L,
3L, 6L, 10L, 4L, 7L, 6L, 3L, 5L, 8L, 3L, 1L, 10L, 7L, 4L, 11L,
11L, 9L, 5L, 4L, 9L, 4L, 2L, 12L, 10L, 4L, 4L, 3L, 10L, 8L, 6L,
4L, 4L, 12L), DataCRMSanoflore.Day_Validation_Sales = c(15L,
14L, 16L, 3L, 3L, 1L, 27L, 2L, 6L, 5L, 27L, 21L, 19L, 1L, 27L,
8L, 5L, 21L, 10L, 9L, 30L, 9L, 9L, 21L, 26L, 27L, 7L, 4L, 1L,
6L, 15L, 25L, 17L, 18L, 13L, 20L, 29L, 1L, 11L, 7L, 2L, 16L,
3L, 20L, 6L, 6L, 13L, 29L, 29L, 8L), TYPE_PEAU = c(3L, 4L, 5L,
1L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 4L, 3L, 1L, 3L, 1L, 3L, 3L,
3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 3L, 5L, 1L, 5L, 2L, 1L, 5L,
5L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L),
SENSIBILITE = c(4L, 4L, 4L, 1L, 3L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 4L, 4L, 1L, 3L, 1L, 3L, 3L, 4L, 1L, 1L, 1L, 2L, 1L, 1L,
4L, 1L, 2L, 3L, 1L, 4L, 4L, 1L, 3L, 4L, 4L, 1L, 4L, 1L, 1L,
1L, 1L, 4L, 1L, 1L, 1L, 1L, 4L, 1L), IMPERFECTIONS = c(3L,
4L, 3L, 1L, 2L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 3L, 3L, 1L, 2L,
1L, 3L, 2L, 3L, 1L, 1L, 1L, 4L, 1L, 1L, 3L, 1L, 3L, 2L, 1L,
4L, 3L, 1L, 3L, 3L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 3L, 1L), BRILLANCE = c(4L, 2L, 2L, 1L, 4L, 1L, 1L,
1L, 4L, 1L, 1L, 1L, 4L, 4L, 1L, 4L, 1L, 4L, 4L, 4L, 1L, 1L,
1L, 4L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 2L, 3L, 1L, 4L, 4L, 4L,
1L, 4L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 4L, 1L), GRAIN_PEAU = c(4L,
4L, 4L, 1L, 4L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 4L, 2L, 1L, 2L,
1L, 2L, 4L, 4L, 1L, 1L, 1L, 4L, 1L, 1L, 3L, 1L, 2L, 2L, 1L,
3L, 2L, 1L, 2L, 4L, 4L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 2L, 1L), RIDES_VISAGE = c(2L, 2L, 2L, 1L, 4L, 1L,
1L, 1L, 4L, 1L, 1L, 1L, 4L, 4L, 1L, 2L, 1L, 4L, 2L, 4L, 1L,
1L, 1L, 2L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 4L, 4L, 1L, 2L, 4L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 4L, 1L),
ALLERGIES = c(2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L), MAINS = c(3L, 4L, 4L,
1L, 4L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 3L, 1L, 3L, 1L, 3L,
3L, 3L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 3L, 2L, 1L, 4L, 4L,
1L, 3L, 4L, 4L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
3L, 1L), PEAU_CORPS = c(2L, 3L, 3L, 1L, 2L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 3L, 1L, 1L, 1L, 2L,
1L, 1L, 2L, 1L, 2L, 2L, 1L, 3L, 3L, 1L, 3L, 3L, 2L, 1L, 3L,
1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 3L, 1L), INTERET_ALIM_NATURELLE = c(2L,
4L, 4L, 1L, 2L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 2L, 2L, 1L, 2L,
1L, 4L, 2L, 2L, 1L, 1L, 1L, 4L, 1L, 1L, 2L, 1L, 2L, 2L, 1L,
3L, 4L, 1L, 4L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 2L, 1L), INTERET_ORIGINE_GEO = c(2L, 4L, 2L, 1L,
2L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 2L, 5L, 1L, 2L, 1L, 2L, 5L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 5L, 2L, 1L, 4L, 2L, 1L,
2L, 5L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L,
1L), INTERET_VACANCES = c(3L, 4L, 2L, 1L, 3L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 3L, 2L, 1L, 2L, 1L, 3L, 4L, 3L, 1L, 1L, 1L,
2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 3L, 3L, 1L, 4L, 3L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L), INTERET_ENVIRONNEMENT = c(3L,
5L, 5L, 1L, 5L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 3L, 3L, 1L, 3L,
1L, 3L, 3L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 3L, 3L, 1L,
5L, 3L, 1L, 3L, 3L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L,
1L, 1L, 3L, 1L), INTERET_COMPOSITION = c(2L, 2L, 2L, 1L,
4L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 3L, 4L, 1L,
4L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 4L,
1L)), .Names = c("DataCRMSanoflore.Year_Sales", "DataCRMSanoflore.Month_Sales",
"DataCRMSanoflore.Date_Sales", "DataCRMSanoflore.HOURS_INSCR",
"DataCRMSanoflore.Year_Creation_Sales", "DataCRMSanoflore.Month_Creation_Sales",
"DataCRMSanoflore.Day_Creation_Sales", "DataCRMSanoflore.Year_Validation_Sales",
"DataCRMSanoflore.Month_Validation_Sales", "DataCRMSanoflore.Day_Validation_Sales",
"DataCRMSanoflore.AGE_CUSTUMER", "DataCRMSanoflore.MEAN_PURCHASE",
"DataCRMSanoflore.NUMBER_GIFTS", "DataCRMSanoflore.Year_Sales",
"DataCRMSanoflore.Month_Sales", "DataCRMSanoflore.Date_Sales",
"DataCRMSanoflore.Year_Creation_Sales", "DataCRMSanoflore.Month_Creation_Sales",
"DataCRMSanoflore.Day_Creation_Sales", "DataCRMSanoflore.Year_Validation_Sales",
"DataCRMSanoflore.Month_Validation_Sales", "DataCRMSanoflore.Day_Validation_Sales",
"TYPE_PEAU", "SENSIBILITE", "IMPERFECTIONS", "BRILLANCE", "GRAIN_PEAU",
"RIDES_VISAGE", "ALLERGIES", "MAINS", "PEAU_CORPS", "INTERET_ALIM_NATURELLE",
"INTERET_ORIGINE_GEO", "INTERET_VACANCES", "INTERET_ENVIRONNEMENT",
"INTERET_COMPOSITION"), row.names = c(NA, 50L), class = "data.frame")
The first step is to write this code to create a PCA object as this:
library(FactoMineR)
library("factoextra")
res.pca <- PCA(as.data.frame(DATA_FINAL), graph = FALSE)
Then, to plot variables, I used the fviz_pca_var function like this:
fviz_pca_var(res.pca, col.var = "black")
I get this error:
Error in row.names<-.data.frame(*tmp*, value = value) :
duplicate 'row.names' are not allowed In addition: Warning messages:
1: In data.row.names(row.names, rowsi, i) : some row.names
duplicated: 14,15,16,17,18,19,20,21,22 --> row.names NOT used 2:
non-unique values when setting 'row.names':
‘DataCRMSanoflore.Date_Sales’, ‘DataCRMSanoflore.Day_Creation_Sales’,
‘DataCRMSanoflore.Day_Validation_Sales’,
‘DataCRMSanoflore.Month_Creation_Sales’,
‘DataCRMSanoflore.Month_Sales’,
‘DataCRMSanoflore.Month_Validation_Sales’,
‘DataCRMSanoflore.Year_Creation_Sales’, ‘DataCRMSanoflore.Year_Sales’,
‘DataCRMSanoflore.Year_Validation_Sales’
How resolve this issue please?
You have duplicate columns in your input data so simply by removing these columns you should be all set.
df <- DATA_FINAL[, -c(1:3, 5:10)]
and then run PCA
library(FactoMineR)
library(factoextra)
res.pca <- PCA(df, graph = F)
fviz_pca_var(res.pca, col.var = "black")
I would like to know whether a plant development score depends on a plant treatment. So I have the following experimental setup:
Treatment: "Control" or "Treated"
Plantpart: the part of the plant that was followed. Either "Root", "Stem" or "Leaf".
Score: the development score of the plant part. Explained variable, numeric (continuous).
I also have two factors treated as random errors:
Block: 4 blocks (places where plants were grown)
Biological_Replicate: each plant was used to gather the 3 plant parts (root, stem, leaf). Thus the scores of the plant parts of a given plant are not independent. There are 3 biological replicates per Block for treated and control plants.
I defined the variables then implemented the model:
library(lmerTest)
Score=Data$Score
Treatment=Data$Treatment
Biological_Replicate=as.factor(Data$Biological_Replicate)
Block=as.factor(Data$Block)
model<-lmer(Score~Treatment + (1|Biological_Replicate) + (1|Block), REML=FALSE)
Trying to retrieve the approximated p-value with coef(summary(model))
yielded the error:
Model is not identifiable...
summary from lme4 is returned
some computational error has occurred in lmerTest
The full data is below. The question is: what is wrong with the code, and/or the data?
Data<-structure(list(Treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Control", "Treated"), class = "factor"),
Plantpart = structure(c(1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L,
1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 3L, 3L, 3L,
2L, 2L, 2L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L,
3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L,
1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 3L, 3L, 3L,
2L, 2L, 2L), .Label = c("Leaf", "Root", "Stem"), class = "factor"),
Block = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4), Biological_Replicate = c(1,
2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6, 4, 5, 6, 7, 8,
9, 7, 8, 9, 7, 8, 9, 10, 11, 12, 10, 11, 12, 10, 11, 12,
13, 14, 15, 13, 14, 15, 13, 14, 15, 16, 17, 18, 16, 17, 18,
16, 17, 18, 19, 20, 21, 19, 20, 21, 19, 20, 21, 22, 23, 24,
22, 23, 24, 22, 23, 24), Score = c(20628, 26610, 11410, 18755,
17366, 13228, 27011, 17558, 16512, 30945, 28606, 29092, 23262,
18306, 23034, 9627, 16193, 24391, 35197, 26092, 23789, 29900,
22649, 23548, 23868, 18495, 17204, 31750, 27496, 24687, 24115,
25911, 25076, 12472, 12267, 13120, 21580, 20697, 14854, 7190,
55734, 12194, 23853, 16762, 18322, 27582, 28056, 28497, 16156,
17680, 21789, 10137, 18122, 9786, 23866, 30878, 23101, 18104,
22276, 23694, 18534, 20743, 15460, 31997, 32559, 28969, 20408,
24503, 21395, 9925, 15407, 14717)), .Names = c("Treatment",
"Plantpart", "Block", "Biological_Replicate", "Score"), row.names = c(NA,
-72L), class = "data.frame")