ddply function based on two variables - r

How can I modify the below code to calculate the average of time per ID in each level of Type.
df <- structure(list(ID = structure(c(1L, 2L, 2L, 3L, 3L, 4L, 4L, 4L,
4L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 5L, 6L), .Label = c("R1", "R2",
"R3", "R4", "R5", "R6"), class = "factor"), cost = c(123L, 12L,
14L, 15L, 16L, 17L, 18L, 9L, 6L, 6L, 7L, 8L, 1L, 111L, 121L,
141L, 181L, 1611L), Time = c(123L, -12L, NA, -15L, NA, 17L, 18L,
-9L, 88L, 666L, 7L, 78L, 188L, 111L, 121L, 141L, 189L, 161L),
number = c(14L, 12L, 15L, 17L, NA, 17L, 22L, 95L, NA, 67L,
7L, 8L, 7L, 118L, NA, 140L, 180L, 1611L),type = c("A", "A", "B", "B", "B", "C", "C",
"M", "A", "M", "D", "D", "C", "A", "D", "B", "B", "M")), .Names = c("ID",
"cost", "Time", "number","type"), row.names = c(NA, -18L), class = "data.frame")
library(dplyr)
df %>%
group_by(id) %>%
summarise(N.Time = sum(!is.na(Time)),
Time_Average = round(mean(Time[Time >=0 & !is.na(Time)]), 2))

Related

Grouping by multiple columns for ggplot

I have data that look like this (I'm only including the first twenty rows to show the spread of data but there are about 135 rows):
> dput(id)
structure(list(date = c("7/27/1992", "7/27/1992", "7/27/1992",
"8/1/1992", "7/1/1994", "7/1/1994", "7/1/1994", "8/7/2003", "8/7/2003",
"8/7/2003", "8/7/2003", "7/21/2004", "7/21/2004", "7/26/2004",
"7/26/2004", "7/5/2005", "7/5/2005", "7/9/2005", "7/9/2005",
"7/9/2005"), event.id = c(8L, 8L, 8L, 10L, 11L, 11L, 11L, 14L,
14L, 15L, 15L, 17L, 17L, 18L, 18L, 20L, 20L, 21L, 21L, 21L), id = c("L5", "L58",
"L73", "L21", "L5", "L58", "L73", "L5", "L73", "L7", "L57", "L21",
"L47", "L54", "L100", "J27", "J31", "J16", "J26", "J36"), sex = structure(c(1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 1L), .Label = c("0", "1"), class = "factor"),
age = c(28L, 12L, 6L, 42L, 30L, 14L, 8L, 39L, 17L, 42L, 26L,
54L, 30L, 27L, 3L, 14L, 10L, 33L, 14L, 6L), matr = c("L9",
"L9", "L9", "L21", "L9", "L9", "L9", "L9", "L9", "L37", "L45",
"L21", "L21", "L35", "L35", "J4", "J4", "J7", "J7", "J7"),
matralive = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), pod = c("L", "L", "L", "L", "L",
"L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "J", "J",
"J", "J", "J")), row.names = c(NA, -134L), class = c("tbl_df", "tbl", "data.frame"))
My goal is to create a graph that looks something like this, using ggplot:
I'm stuck as to how to generate this - I imagine I will have to group by pod and matriline, but am unsure how to go about this.
Any help is super appreciated - thank you so much!!
Here is one possibility using geom_mark_ellipse from ggforce for the ellipses. To have multiple groups (i.e., matr and date) to draw the ellipses, we can use interaction to combine the two columns into a new factor.
library(tidyverse)
library(ggforce)
df %>%
ggplot(aes(x = date, y = age )) +
geom_point(aes(shape = factor(sex), colour = factor(pod), size = 5)) +
geom_mark_ellipse(aes(color = pod, group=interaction(date, matr), label = matr)) +
scale_y_continuous(expand = c(0, 25)) +
scale_x_discrete(expand = c(0, 1.35))
Output
Data
df <- structure(list(date = c("7/27/1992", "7/27/1992", "7/27/1992",
"8/1/1992", "7/1/1994", "7/1/1994", "7/1/1994", "8/7/2003", "8/7/2003",
"8/7/2003", "8/7/2003", "7/21/2004", "7/21/2004", "7/26/2004",
"7/26/2004", "7/5/2005", "7/5/2005", "7/9/2005", "7/9/2005",
"7/9/2005"), event.id = c(8L, 8L, 8L, 10L, 11L, 11L, 11L, 14L,
14L, 15L, 15L, 17L, 17L, 18L, 18L, 20L, 20L, 21L, 21L, 21L),
id = c("L5", "L58", "L73", "L21", "L5", "L58", "L73", "L5",
"L73", "L7", "L57", "L21", "L47", "L54", "L100", "J27", "J31",
"J16", "J26", "J36"), sex = structure(c(1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L
), .Label = c("0", "1"), class = "factor"), age = c(28L,
12L, 6L, 42L, 30L, 14L, 8L, 39L, 17L, 42L, 26L, 54L, 30L,
27L, 3L, 14L, 10L, 33L, 14L, 6L), matr = c("L9", "L9", "L9",
"L21", "L9", "L9", "L9", "L9", "L9", "L37", "L45", "L21",
"L21", "L35", "L35", "J4", "J4", "J7", "J7", "J7"), matralive = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), pod = c("L", "L", "L", "L", "L", "L", "L",
"L", "L", "L", "L", "L", "L", "L", "L", "J", "J", "J", "J",
"J")), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame"))

How to remove observations from multiple dataframes and keep as multiple dataframes

I have many data frames - Here is a simplified version of two of them.
flows <- structure(list(Student = c("Adam", "Char", "Fred", "Greg", "Ed", "Mick", "Dave", "Nick", "Tim", "George", "Tom"),
Class = c(1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), Jan_18_score = c(NA, 5L, -7L, 2L, 1L, NA, 5L, 8L, -2L, 5L, NA),
Feb_18_score = c(2L, 0, 8L, NA, 2L, 6L, NA, 8L, 7L, 3L, 8L), Jan_18_Weight = c(150L, 30L, NA, 80L, 60L, 80L, 40L, 12L, 23L, 65L, 78L),
Feb_18_Weight = c(153L, 60L, 80L, 40L, 80L, 30L, 25L, 45L, 40L, NA, 50L)), class = "data.frame", row.names = c(NA, -11L))
returns <- structure(list(Student = c("Adam", "Char", "Fred", "Greg", "Ed", "Mick", "Dave", "Nick", "Tim", "George", "Tom"),
Class = c(1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), Jan_20_score = c(NA, 5L, -7L, 2L, 1L, NA, 5L, 8L, -2L, 5L, NA),
Feb_20_score = c(2L, 0, 8L, NA, 2L, 6L, NA, 8L, 7L, 3L, 8L), Jan_20_Weight = c(150L, 30L, NA, 80L, 60L, 80L, 40L, 12L, 23L, 65L, 78L),
Feb_20_Weight = c(153L, 60L, 80L, 40L, 80L, 30L, 25L, 45L, 40L, NA, 50L)), class = "data.frame", row.names = c(NA, -11L))
I am using lapply to remove some observations, I would like to do this across all my dataframes and keep the output as dataframes, basically update the existing dataframes and remove the observations I select.
Here is my current code.
df.list <- list(flows, returns)
lapply(df.list, function(df) df[!grepl("1", df$Class),])
However, when I do this the output is not updating the original dataframes and is outputting as a list in the global environment. Any help is appreciated.
Another solution:
flows <- structure(list(Student = c("Adam", "Char", "Fred", "Greg", "Ed", "Mick", "Dave", "Nick", "Tim", "George", "Tom"),
Class = c(1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), Jan_18_score = c(NA, 5L, -7L, 2L, 1L, NA, 5L, 8L, -2L, 5L, NA),
Feb_18_score = c(2L, 0, 8L, NA, 2L, 6L, NA, 8L, 7L, 3L, 8L), Jan_18_Weight = c(150L, 30L, NA, 80L, 60L, 80L, 40L, 12L, 23L, 65L, 78L),
Feb_18_Weight = c(153L, 60L, 80L, 40L, 80L, 30L, 25L, 45L, 40L, NA, 50L)), class = "data.frame", row.names = c(NA, -11L))
returns <- structure(list(Student = c("Adam", "Char", "Fred", "Greg", "Ed", "Mick", "Dave", "Nick", "Tim", "George", "Tom"),
Class = c(1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), Jan_20_score = c(NA, 5L, -7L, 2L, 1L, NA, 5L, 8L, -2L, 5L, NA),
Feb_20_score = c(2L, 0, 8L, NA, 2L, 6L, NA, 8L, 7L, 3L, 8L), Jan_20_Weight = c(150L, 30L, NA, 80L, 60L, 80L, 40L, 12L, 23L, 65L, 78L),
Feb_20_Weight = c(153L, 60L, 80L, 40L, 80L, 30L, 25L, 45L, 40L, NA, 50L)), class = "data.frame", row.names = c(NA, -11L))
df.list <- list(flows, returns)
Now, we need to assign lapply to some value and name it:
a <- lapply(df.list, function(df) df[!grepl("1", df$Class),])
names(a) <- c("flows","returns")
After this, we call list2env function:
list2env(a, envir = .GlobalEnv)
Output:
> flows
Student Class Jan_18_score Feb_18_score Jan_18_Weight Feb_18_Weight
5 Ed 2 1 2 60 80
6 Mick 2 NA 6 80 30
7 Dave 3 5 NA 40 25
8 Nick 3 8 8 12 45
9 Tim 3 -2 7 23 40
10 George 3 5 3 65 NA
11 Tom 3 NA 8 78 50
> returns
Student Class Jan_20_score Feb_20_score Jan_20_Weight Feb_20_Weight
5 Ed 2 1 2 60 80
6 Mick 2 NA 6 80 30
7 Dave 3 5 NA 40 25
8 Nick 3 8 8 12 45
9 Tim 3 -2 7 23 40
10 George 3 5 3 65 NA
11 Tom 3 NA 8 78 50
Checking classes of the outputs:
> class(returns)
[1] "data.frame"
> class(flows)
[1] "data.frame"
I'm not sure about using lapply but you can work with lists of variables by name using get and assign.
flows <- structure(list(Student = c("Adam", "Char", "Fred", "Greg", "Ed", "Mick", "Dave", "Nick", "Tim", "George", "Tom"),
Class = c(1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), Jan_18_score = c(NA, 5L, -7L, 2L, 1L, NA, 5L, 8L, -2L, 5L, NA),
Feb_18_score = c(2L, 0, 8L, NA, 2L, 6L, NA, 8L, 7L, 3L, 8L), Jan_18_Weight = c(150L, 30L, NA, 80L, 60L, 80L, 40L, 12L, 23L, 65L, 78L),
Feb_18_Weight = c(153L, 60L, 80L, 40L, 80L, 30L, 25L, 45L, 40L, NA, 50L)), class = "data.frame", row.names = c(NA, -11L))
returns <- structure(list(Student = c("Adam", "Char", "Fred", "Greg", "Ed", "Mick", "Dave", "Nick", "Tim", "George", "Tom"),
Class = c(1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), Jan_20_score = c(NA, 5L, -7L, 2L, 1L, NA, 5L, 8L, -2L, 5L, NA),
Feb_20_score = c(2L, 0, 8L, NA, 2L, 6L, NA, 8L, 7L, 3L, 8L), Jan_20_Weight = c(150L, 30L, NA, 80L, 60L, 80L, 40L, 12L, 23L, 65L, 78L),
Feb_20_Weight = c(153L, 60L, 80L, 40L, 80L, 30L, 25L, 45L, 40L, NA, 50L)), class = "data.frame", row.names = c(NA, -11L))
df.list <- list("flows", "returns")
for (df.name in df.list){
temp <- get(df.name)
temp <- temp[!grepl("1", temp$Class), ]
assign(paste0(df.name, "_new"), temp)
}
Remove "_new" to overwrite the original variables.

change data type by indexing

I want to change some columns to integers (from the 4 column of my table to the last one). How can I do that?
My data
I already tried
databianca %>%
mutate_if(is.factor, ~as.integer(as.character(.)))
and
databianca %>%
mutate_at(vars(columbia:escrita.POS), ~as.integer(as.character(.)))
and with both of them I get these warnings:
Warning messages:
1: In (function (..., .x = ..1, .y = ..2, . = ..1) :
NAs introduced by coercion
2: In (function (..., .x = ..1, .y = ..2, . = ..1) :
NAs introduced by coercion
3: In (function (..., .x = ..1, .y = ..2, . = ..1) :
NAs introduced by coercion
4: In (function (..., .x = ..1, .y = ..2, . = ..1) :
NAs introduced by coercion
5: In (function (..., .x = ..1, .y = ..2, . = ..1) :
NAs introduced by coercion
6: In (function (..., .x = ..1, .y = ..2, . = ..1) :
NAs introduced by coercion
7: In (function (..., .x = ..1, .y = ..2, . = ..1) :
NAs introduced by coercion
8: In (function (..., .x = ..1, .y = ..2, . = ..1) :
NAs introduced by coercion
Isn't there an option to treat all columns from columbia to the last column (in matlab I would use 4:end to give the instructions from the 4th column until the last element) as integrals?
my data via dpasta:
databianca <- data.frame(
SEXO = as.factor(c("F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "M", "M", "M", "M",
"M", "M", "M", "M", "M", "M", "M", "M", "M", "M",
"M", "M", "M", "M", "M", "M", "M", "M", "M",
"M", "M", "M", "M")),
GRUPOS = as.factor(c("C", "E", "E", "C", "C", "C", "E", "E", "E",
"C", "E", "E", "E", "C", "C", "C", "E", "C",
"E", "E", "C", "E", "C", "C", "C", "E", "E",
"E", "C", "E", "C", "C", "C", "E", "C", "E", "C",
"E", "E", "C", "C", "C", "E", "E", "E", "E",
"C", "C", "C", "C")),
SALAS = as.factor(c("B", "A", "C", "A", "A", "C", "A", "B", "C",
"C", "C", "B", "A", "C", "A", "C", "A", "C",
"C", "C", "B", "B", "B", "A", "B", "C", "A",
"B", "A", "A", "C", "B", "B", "A", "A", "B", "B",
"B", "C", "B", "B", "B", "B", "A", "A", "B",
"C", "C", "A", "B")),
token = c(100L, 95L, 106L, 112L, 84L, 125L, 108L, 114L, 112L,
117L, 84L, 96L, 124L, 118L, 99L, 84L, 95L, 113L, 123L,
135L, 100L, 92L, 101L, 129L, 93L, 106L, 101L, 84L, 84L,
128L, 113L, 95L, 109L, 127L, 112L, 108L, 108L, 118L, 97L,
112L, 116L, 88L, 92L, 111L, 123L, 112L, 116L, 104L, 90L,
89L),
hab.visuo = c(3L, 3L, 2L, 0L, 0L, 1L, 3L, 3L, 2L, 1L, 1L, 1L, 0L, 2L,
1L, 1L, 2L, 2L, 0L, 7L, 3L, 2L, 0L, 2L, 0L, 3L, 1L, 0L,
1L, 1L, 1L, 1L, 0L, 2L, 2L, 1L, 1L, 0L, 0L, 0L, 1L, 2L,
1L, 1L, 0L, 2L, 0L, 0L, 3L, 3L),
tamanho = c(7L, 4L, 8L, 7L, 7L, 8L, 8L, 8L, 8L, 7L, 7L, 7L, 8L, 8L,
8L, 6L, 7L, 7L, 8L, 8L, 8L, 7L, 7L, 8L, 8L, 8L, 7L, 7L,
7L, 7L, 8L, 8L, 7L, 8L, 7L, 8L, 8L, 7L, 7L, 7L, 8L, 8L,
8L, 8L, 8L, 7L, 8L, 8L, 7L, 7L),
forma = c(6L, 6L, 7L, 4L, 6L, 6L, 6L, 7L, 6L, 6L, 6L, 6L, 6L, 5L,
6L, 7L, 5L, 6L, 7L, 7L, 6L, 7L, 0L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 2L, 2L, 6L, 7L, 7L, 6L, 7L, 7L,
6L, 7L, 6L, 6L, 6L, 6L, 6L, 7L),
nomeou.cor = c(67L, 82L, 85L, 83L, 68L, 72L, 74L, 74L, 80L, 80L, 63L,
70L, 80L, 81L, 68L, 72L, 81L, 76L, 78L, 81L, 81L, 80L,
65L, 78L, 74L, 75L, 61L, 63L, 69L, 79L, 70L, 75L, 77L,
79L, 74L, 78L, 70L, 79L, 71L, 70L, 78L, 53L, 67L, 75L, 83L,
82L, 84L, 71L, 53L, 69L),
nomeou.incor = c(23L, 8L, 5L, 7L, 22L, 18L, 16L, 16L, 10L, 10L, 27L,
20L, 10L, 9L, 22L, 18L, 9L, 14L, 12L, 9L, 8L, 10L, 25L,
12L, 16L, 15L, 29L, 27L, 21L, 11L, 20L, 15L, 13L, 11L,
16L, 12L, 20L, 11L, 19L, 20L, 12L, 37L, 23L, 15L, 15L, 8L,
6L, 19L, 38L, 21L),
token.POS = c(120L, 120L, 136L, 117L, 122L, 118L, 135L, 122L, 124L,
112L, 98L, 127L, 130L, 119L, 116L, 94L, 122L, 125L,
130L, 123L, 113L, 116L, 105L, 109L, 113L, 120L, 99L, 87L,
0L, 123L, 0L, 94L, 136L, 125L, 118L, 124L, 123L, 128L,
101L, 124L, 131L, 87L, 116L, 120L, 117L, 119L, 123L, 127L,
91L, 105L),
hab.visuo.POS = c(1L, 2L, 2L, 1L, 2L, 5L, 2L, 1L, 3L, 0L, 2L, 2L, 1L, 2L,
5L, 1L, 0L, 0L, 0L, 5L, 0L, 2L, 0L, 1L, 0L, 2L, 3L, 1L,
0L, 1L, 0L, 1L, 0L, 3L, 1L, 4L, 1L, 0L, 2L, 0L, 1L, 0L,
0L, 2L, 3L, 2L, 7L, 1L, 2L, 3L),
tamanho.POS = c(8L, 8L, 8L, 7L, 8L, 7L, 7L, 8L, 8L, 8L, 7L, 8L, 8L, 8L,
8L, 7L, 8L, 8L, 8L, 7L, 8L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
0L, 7L, 0L, 7L, 8L, 8L, 7L, 7L, 7L, 8L, 7L, 8L, 7L, 7L,
8L, 6L, 8L, 7L, 8L, 7L, 8L, 7L),
forma.POS = c(6L, 7L, 7L, 7L, 6L, 7L, 7L, 7L, 6L, 5L, 7L, 6L, 6L, 7L,
5L, 5L, 7L, 5L, 7L, 8L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 6L,
0L, 7L, 0L, 5L, 7L, 7L, 7L, 7L, 2L, 7L, 5L, 5L, 5L, 7L,
6L, 7L, 7L, 6L, 6L, 7L, 2L, 6L),
nomeou.cor.POS = c(78L, 78L, 85L, 73L, 70L, 76L, 77L, 78L, 82L, 74L, 56L,
70L, 79L, 78L, 65L, 67L, 87L, 78L, 82L, 82L, 79L, 84L,
74L, 78L, 71L, 79L, 61L, 70L, 0L, 75L, 0L, 74L, 83L, 77L,
76L, 77L, 75L, 77L, 68L, 79L, 79L, 61L, 67L, 76L, 79L,
78L, 81L, 68L, 53L, 71L),
nomeou.incor.POS = c(12L, 12L, 5L, 17L, 20L, 14L, 13L, 12L, 8L, 16L, 34L,
20L, 11L, 12L, 25L, 23L, 3L, 12L, 8L, 8L, 11L, 6L, 16L,
12L, 19L, 11L, 29L, 20L, 0L, 15L, 0L, 16L, 7L, 13L, 14L,
13L, 15L, 13L, 22L, 11L, 11L, 29L, 23L, 14L, 11L, 12L,
9L, 22L, 37L, 19L),
columbia = as.factor(c("49,3", "55,38", "67,69", "53,33", "38,46",
"53,84", "63,07", "61,53", "63,07", "35,38",
"43,33", "38,33", "55,38", "67,69", "47,69",
"29,23", "60", "53,84", "61,66", "73,33", "47,
69", "46,66", "41,66", "56,66", "47,69", "61,66",
"33,33", "40", "60", "36,66", "51,66", "43,33",
"58,46", "60", "56,92", "38,33", "40", "60",
"53,33", "56,92", "61,53", "53,84", "44,61",
"47,69", "44,61", "66,15", "56,66", "58,33", "65",
"51,66")),
escrita = as.factor(c("2", "2", "2", "2", "2", "1,5", "2,25", "2",
"2", "2", "2", "2", "2", "2,75", "2", "2,5",
"1,75", "2", "3", "4,5", "1,75", "1,75", "2",
"2", "2", "2", "1", "1,5", "2", "2", "2", "2,5",
"2", "2", "2", "1,75", "1,75", "2", "2", "2,
25", "2", "2", "1,75", "1", "2", "0", "2,5", "2",
"1,25", "1,5")),
columbia.POS = as.factor(c("53,8", "66,7", "76,7", "31,7", "43,1", "58,
5", "56,7", "55", "70", "35,4", "28,3", "43,
3", "61,7", "68,3", "55,4", "28,3", "55", "56,7",
"68,3", "66,7", "41,5", "56,7", "50", "65",
"58,5", "75", "48,3", "31,7", "0", "53,3", "0",
"50", "65", "66,7", "73,8", "51,7", "45", "66,
7", "48,3", "69,2", "73,8", "43,3", "56,7", "47,
7", "51,7", "73,3", "66,7", "38,3", "43,3", "33,
3")),
escrita.POS = as.factor(c("2", "3", "3", "1,5", "1,5", "2", "3", "3",
"4", "2", "1,5", "3", "2", "4", "1,5", "3",
"2", "3", "4", "4", "1,5", "2", "3", "1,5", "2",
"2", "1,5", "1,5", "0", "1,5", "0", "2", "3",
"1,5", "1,5", "1,5", "1,5", "2", "1,5", "3",
"2", "2", "2", "1,5", "1,5", "1,5", "5", "3", "1,
5", "1,5"))
)
This happens when you use as.integer on non-integer variables.
It seems your numbers have "," in them (ex: 51,66 ) and also new line character which is creating problem so first make your factors "clean" with gsub() and then run as.integer()
databianca[,4:length(names(databianca))] <- apply(databianca[, 4:length(names(databianca))], 2, function(x) as.integer(gsub("[\n,]", "", x)))
This should also work
databianca[,4:length(names(databianca))] <- as.integer(gsub("[\n,]", "", as.matrix(databianca[,4:length(names(databianca))])))

Scatter plot with small pie charts with R

I have this data below called test1.melted. I also have the code to plot my data using package scatterpie, but due to inherent problem of scatterpie (if coordinates are not cartesian,i.e. equal horizontal and vertical distances), you would not get properly formatted plot. Is there a better way to plot this data without using scatterpie?
Data:
test1.melted<-structure(list(Wet_lab_dilution_A = structure(c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L), .Label = c("A", "B", "C", "D", "E", "F",
"G", "H", "I", "J", "K", "L"), class = "factor"), TypeA = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("I", "II"), class = "factor"),
NA12878 = c(100L, 50L, 25L, 20L, 10L, 0L, 100L, 50L, 25L,
20L, 10L, 0L, 100L, 50L, 25L, 20L, 10L, 0L, 100L, 50L, 25L,
20L, 10L, 0L), NA12877 = c(0L, 50L, 75L, 80L, 90L, 100L,
0L, 50L, 75L, 80L, 90L, 100L, 0L, 50L, 75L, 80L, 90L, 100L,
0L, 50L, 75L, 80L, 90L, 100L), IBD = c(1.02, 0.619, 0.294,
0.244, 0.134, 0.003, 0.003, 0.697, 0.964, 0.978, 1, 1, 1.02,
0.619, 0.294, 0.244, 0.134, 0.003, 0.003, 0.697, 0.964, 0.978,
1, 1), variableA = c("tEst", "tEst", "tEst", "tEst", "tEst",
"tEst", "tEst", "tEst", "tEst", "tEst", "tEst", "tEst", "pair",
"pair", "pair", "pair", "pair", "pair", "pair", "pair", "pair",
"pair", "pair", "pair"), valueA = c(0.1, 59.8, 84.6, 89.2,
97.4, 100, 99.6, 56.4, 29.9, 24, 12.1, 0.1, 0.1, 51.08, 75.28,
80.09, 90.16, 100, 100, 48.09, 23.97, 18.81, 9.24, 0.08)), row.names = c(NA,
-24L), .Names = c("Wet_lab_dilution_A", "TypeA", "NA12878", "NA12877",
"IBD", "variableA", "valueA"), class = "data.frame")
code:
p<- ggplot() + geom_scatterpie(aes(x=valueA, y=IBD, group=TypeA), data=test1.melted,
cols=c("NA12878", "NA12877")) + coord_equal()+
facet_grid(TypeA~variableA)
p
Do you have to use a pie chart? (And you might; there's nothing wrong with them.)
Cause something like this could illustrate literally every variable in the dataset:
library(ggplot2)
test1.melted$NA12877 <- as.factor(test1.melted$NA12877)
test1.melted$NA12878 <- as.factor(test1.melted$NA12878)
p <- ggplot(data = test1.melted, aes(x=valueA, y=IBD, group=TypeA))
p <- p + geom_point(aes(colour=NA12877, fill = NA12878), stroke=3, size = 3, shape = 21)
p <- p + geom_text(aes(label = Wet_lab_dilution_A), size = 2)
p + facet_grid(TypeA ~ variableA) + theme_minimal()

Emmeans continuous independant variable

I want to explan Type_f with Type_space of the experiment and the rate of Exhaustion_product and quantitative variable Age.
Here is my data :
res=structure(list(Type_space = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("",
"29-v1", "29-v2", "88-v1", "88-v2"), class = "factor"), Id = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L,
68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L,
81L, 82L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L,
52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L,
65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L,
78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L,
91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L,
103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L,
114L, 115L, 116L, 117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L,
125L, 126L, 127L, 128L, 129L, 130L, 131L, 132L, 133L, 134L, 135L,
136L, 137L, 138L, 139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L,
147L, 148L, 149L, 150L, 151L, 152L, 153L, 154L, 155L, 156L, 157L,
158L, 159L, 160L, 161L, 162L, 163L, 164L, 165L, 166L, 167L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L,
68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L,
81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L,
94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L,
56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L,
69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L,
82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L,
95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L, 105L,
106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L,
117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L, 125L, 126L, 127L,
128L, 129L, 130L, 131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L,
139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L, 147L, 148L, 149L,
150L, 151L, 152L, 153L, 154L, 155L, 156L, 157L, 158L, 159L, 160L,
161L, 162L, 163L, 164L), Age = c(3, 10, 1, 5, 4, 2, 1, 8, 2,
13, 1, 6, 3, 5, 2, 1, 3, 8, 3, 6, 1, 3, 7, 1, 2, 2, 2, 1, 2,
5, 4, 1, 6, 3, 6, 8, 2, 3, 4, 7, 3, 2, 6, 2, 3, 7, 1, 5, 4, 1,
4, 3, 2, 3, 5, 5, 2, 1, 1, 5, 8, 7, 2, 2, 4, 3, 4, 4, 2, 2, 10,
7, 5, 3, 3, 5, 7, 5, 3, 4, 5, 4, 1, 8, 6, 1, 12, 1, 6, 3, 4,
4, 13, 5, 2, 7, 7, 20, 1, 1, 1, 7, 1, 4, 3, 8, 2, 2, 4, 1, 1,
2, 3, 2, 2, 6, 11, 2, 5, 5, 9, 4, 4, 2, 7, 2, 7, 10, 6, 9, 2,
2, 5, 11, 1, 8, 8, 4, 1, 2, 14, 11, 13, 20, 3, 3, 4, 16, 2, 6,
11, 9, 11, 4, 5, 6, 19, 5, 2, 6, 1, 7, 11, 3, 9, 2, 3, 6, 20,
8, 6, 2, 11, 18, 9, 3, 7, 3, 2, 1, 8, 3, 5, 6, 2, 5, 8, 11, 4,
9, 7, 2, 12, 8, 2, 9, 5, 4, 15, 5, 13, 5, 10, 13, 7, 6, 1, 12,
12, 10, 4, 2, 16, 7, 17, 11, 18, 4, 3, 12, 1, 3, 7, 3, 6, 5,
11, 10, 12, 6, 14, 8, 6, 7, 8, 5, 10, 12, 6, 13, 3, 11, 14, 7,
9, 9, 4, 13, 4, 2, 1, 2, 2, 1, 7, 9, 3, 10, 3, 2, 1, 3, 1, 4,
2, 4, 5, 4, 2, 13, 4, 1, 3, 1, 11, 4, 1, 3, 3, 7, 5, 4, 5, 6,
1, 2, 1, 2, 1, 6, 1, 7, 6, 9, 5, 1, 6, 3, 2, 3, 3, 8, 8, 3, 2,
2, 4, 2, 5, 2, 6, 8, 11, 1, 6, 3, 3, 4, 5, 5, 7, 4, 2, 7, 3,
3, 1, 3, 9, 5, 2, 4, 12, 1, 4, 5, 2, 7, 6, 1, 2, 6, 4, 2, 7,
3, 5, 5, 3, 7, 1, 5, 2, 1, 15, 3, 5, 2, 5, 13, 6, 2, 3, 5, 2,
8, 4, 2, 6, 7, 2, 4, 1, 13, 8, 2, 1, 2, 1, 1, 5, 2, 1, 6, 11,
4, 1, 7, 7, 4, 3, 5, 1, 4, 10, 1, 2, 6, 1, 11, 3, 8, 9, 2, 6,
8, 11, 14, 16, 4, 1, 4, 2, 1, 10, 4, 9, 3, 12, 8, 11, 8, 8, 5,
1, 4, 13, 3, 8, 5, 14, 3, 5, 5, 12, 1, 3, 4, 5, 2, 7, 6, 9, 6,
10, 5, 2, 3, 2, 10, 10, 10, 10, 10, 1, 14, 3, 5, 9, 6, 2, 2,
2, 4, 4, 11, 14, 2, 2, 2, 8, 7, 2, 10, 12, 1, 6, 10, 2, 3, 5,
10, 6, 1, 8, 4, 11, 5, 4, 3, 6, 2, 4, 6, 9, 3, 9, 11, 7, 3, 15,
3, 7, 3, 5, 4, 6, 9, 13, 8, 5, 7, 8, 8, 5, 10), Type_product = c("f",
"s", "f", "f", "f", "f", "s", "c", "s", "f", "c", "f", "f", "f",
"s", "s", "f", "f", "c", "f", "s", "f", "f", "s", "f", "c", "f",
"f", "s", "f", "f", "c", "f", "c", "f", "f", "f", "f", "f", "c",
"c", "c", "f", "f", "c", "c", "f", "c", "c", "c", "c", "c", "s",
"f", "c", "c", "c", "s", "f", "c", "f", "f", "c", "c", "f", "c",
"c", "c", "f", "c", "c", "c", "c", "c", "c", "c", "f", "c", "c",
"c", "c", "f", "c", "f", "f", "s", "f", "c", "f", "f", "f", "c",
"f", "f", "f", "f", "f", "s", "c", "c", "f", "f", "c", "c", "f",
"f", "c", "c", "f", "f", "s", "f", "c", "c", "f", "f", "f", "c",
"f", "f", "f", "c", "f", "f", "f", "f", "f", "f", "c", "f", "f",
"f", "f", "c", "s", "f", "c", "f", "f", "c", "f", "f", "f", "c",
"f", "c", "c", "c", "f", "f", "f", "f", "c", "c", "c", "f", "f",
"c", "c", "f", "c", "f", "f", "c", "c", "c", "c", "f", "f", "f",
"c", "c", "c", "f", "c", "f", "c", "f", "f", "f", "c", "f", "c",
"c", "c", "c", "c", "f", "c", "c", "c", "c", "c", "c", "c", "f",
"f", "f", "c", "f", "c", "f", "f", "c", "c", "f", "f", "f", "c",
"c", "c", "f", "c", "c", "c", "c", "c", "f", "c", "f", "f", "c",
"c", "f", "c", "f", "c", "f", "c", "c", "c", "f", "c", "c", "c",
"c", "c", "c", "c", "f", "c", "c", "f", "c", "c", "f", "f", "c",
"f", "f", "s", "c", "s", "c", "f", "c", "c", "s", "c", "c", "s",
"c", "m", "c", "c", "f", "f", "f", "f", "f", "f", "s", "f", "f",
"c", "c", "f", "c", "f", "f", "f", "c", "f", "f", "f", "s", "f",
"f", "c", "f", "c", "f", "m", "c", "c", "c", "f", "s", "f", "f",
"f", "c", "s", "c", "m", "f", "c", "m", "c", "f", "c", "f", "f",
"f", "c", "m", "f", "c", "c", "f", "c", "f", "c", "c", "c", "c",
"c", "f", "f", "f", "c", "m", "f", "m", "m", "c", "c", "c", "c",
"m", "m", "c", "f", "m", "m", "m", "m", "m", "m", "m", "m", "m",
"c", "c", "f", "f", "f", "f", "c", "f", "m", "f", "f", "f", "c",
"f", "f", "f", "c", "f", "f", "c", "c", "f", "c", "f", "c", "m",
"f", "c", "f", "c", "f", "f", "f", "f", "c", "c", "f", "f", "c",
"c", "f", "f", "f", "f", "f", "f", "c", "f", "c", "c", "f", "c",
"f", "f", "f", "f", "f", "f", "f", "c", "f", "c", "f", "c", "f",
"c", "f", "c", "f", "f", "c", "c", "c", "c", "c", "f", "f", "f",
"c", "f", "c", "f", "f", "c", "c", "f", "f", "c", "f", "c", "f",
"c", "c", "c", "f", "f", "c", "f", "c", "c", "f", "c", "f", "c",
"f", "c", "f", "c", "m", "c", "c", "m", "c", "c", "f", "c", "c",
"f", "c", "c", "c", "f", "c", "c", "m", "c", "m", "m", "c", "c",
"f", "c", "c", "c", "c", "m", "c", "c", "c", "m", "m", "m", "c",
"c", "c", "c", "m", "m", "f", "m", "m", "m", "m", "m", "m", "m",
"m", "m", "m", "m", "m", "m", "m", "m"), Exhaustion_product = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L), .Label = c("(0,10]", "(10,20]", "(20,30]", "(30,40]", "(40,50]",
"(50,60]", "(60,70]", "(70,80]", "(80,90]", "(90,100]"), class = "factor"),
Type_f = c(1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0,
1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0,
1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0,
1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1,
1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1,
1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1,
1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1,
1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0,
1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0,
1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1,
0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1,
0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0,
0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1,
1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1,
1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0)), .Names = c("Type_space", "Id", "Age",
"Type_product", "Exhaustion_product", "Type_f"), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L,
68L, 69L, 70L, 71L, 73L, 75L, 76L, 79L, 80L, 81L, 82L, 84L, 85L,
86L, 91L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L,
111L, 112L, 113L, 114L, 115L, 116L, 117L, 118L, 119L, 120L, 121L,
122L, 123L, 124L, 125L, 126L, 127L, 128L, 129L, 130L, 131L, 132L,
133L, 134L, 135L, 136L, 137L, 138L, 139L, 140L, 141L, 142L, 143L,
144L, 145L, 146L, 147L, 148L, 149L, 150L, 151L, 152L, 153L, 154L,
155L, 156L, 157L, 158L, 159L, 160L, 161L, 162L, 163L, 164L, 165L,
166L, 167L, 168L, 169L, 170L, 171L, 172L, 173L, 174L, 175L, 176L,
177L, 178L, 179L, 180L, 181L, 182L, 183L, 184L, 185L, 186L, 187L,
188L, 189L, 190L, 191L, 192L, 193L, 194L, 195L, 197L, 198L, 199L,
201L, 202L, 203L, 204L, 206L, 207L, 208L, 209L, 210L, 212L, 213L,
214L, 215L, 217L, 218L, 219L, 220L, 221L, 222L, 223L, 225L, 227L,
229L, 230L, 231L, 232L, 233L, 234L, 235L, 236L, 237L, 238L, 239L,
242L, 243L, 244L, 246L, 247L, 248L, 249L, 250L, 251L, 253L, 254L,
256L, 259L, 260L, 261L, 262L, 263L, 264L, 265L, 266L, 269L, 270L,
272L, 273L, 274L, 276L, 277L, 278L, 279L, 280L, 281L, 282L, 283L,
284L, 285L, 287L, 289L, 290L, 291L, 292L, 293L, 294L, 295L, 296L,
297L, 298L, 300L, 301L, 302L, 303L, 306L, 308L, 309L, 311L, 312L,
313L, 314L, 315L, 316L, 317L, 318L, 319L, 320L, 322L, 323L, 325L,
326L, 327L, 328L, 329L, 331L, 332L, 334L, 335L, 336L, 338L, 339L,
340L, 341L, 342L, 343L, 344L, 345L, 346L, 347L, 348L, 349L, 350L,
352L, 353L, 354L, 356L, 357L, 358L, 359L, 360L, 361L, 363L, 364L,
365L, 366L, 367L, 368L, 369L, 370L, 372L, 373L, 374L, 375L, 376L,
377L, 378L, 379L, 380L, 381L, 382L, 384L, 385L, 387L, 388L, 389L,
391L, 393L, 394L, 395L, 396L, 397L, 398L, 399L, 400L, 401L, 402L,
404L, 407L, 408L, 409L, 411L, 412L, 413L, 414L, 415L, 416L, 417L,
418L, 419L, 420L, 421L, 422L, 423L, 424L, 425L, 426L, 427L, 428L,
429L, 430L, 431L, 432L, 433L, 434L, 435L, 436L, 437L, 438L, 439L,
440L, 442L, 443L, 444L, 445L, 446L, 447L, 448L, 449L, 450L, 451L,
452L, 453L, 454L, 455L, 456L, 457L, 458L, 459L, 460L, 461L, 462L,
463L, 464L, 465L, 466L, 467L, 468L, 469L, 470L, 471L, 472L, 473L,
474L, 476L, 477L, 478L, 479L, 480L, 481L, 482L, 483L, 484L, 486L,
487L, 488L, 489L, 490L, 491L, 492L, 493L, 494L, 495L, 496L, 497L,
498L, 500L, 501L, 502L, 503L, 504L, 505L, 506L, 507L, 508L, 509L,
510L, 511L, 512L, 513L, 514L, 515L, 516L, 517L, 518L, 519L, 520L,
521L, 522L, 523L, 524L, 525L, 526L, 527L, 528L, 529L, 530L, 531L,
532L, 534L, 535L, 536L, 537L, 538L, 539L, 540L, 541L, 542L, 543L,
547L, 548L, 550L, 551L, 552L, 553L, 554L, 555L, 556L, 557L, 558L,
559L, 560L, 561L, 562L, 563L, 565L, 566L, 567L, 568L, 569L, 570L,
571L, 572L, 573L, 575L, 577L, 579L, 580L, 581L, 582L, 583L, 585L,
586L, 587L, 590L, 592L, 599L, 606L, 608L), class = "data.frame")
an=Anova(glm(Type_f ~ Type_space + Exhaustion_product + Age , family=binomial,data=res))
gl=glm(Type_f ~ Type_space + Exhaustion_product + Age , family=binomial,data=res)
library("emmeans")
emmp <- emmeans( gl, pairwise ~ Exhaustion_product + Age)
summary( emmp, infer=TRUE)
(1) In the case of categorical variable the results are clear. But in the case of Age which is significant in the GLM, what is the value generated in the emmeans ?5.455426.Is that is means ? How can I interpret this ?
(0,10] 5.455426 0.36901411 0.2935894 Inf -0.20641061 0.94443883 1.257 0.2088
(2)I want to generate graphic representationof the interaction age and Exhaustion_product. Also this do not make sens.
emmip(gl, Exhaustion_product ~ Age)
Edit 1
Contrast result
$contrasts
contrast estimate SE df asymp.LCL asymp.UCL z.ratio p.value
(0,10],5.45542635658915 - (10,20],5.45542635658915 0.33231353 0.4078967 Inf -0.95814279 1.6227698 0.815 0.9984
(0,10],5.45542635658915 - (20,30],5.45542635658915 -0.53694399 0.4194460 Inf -1.86393835 0.7900504 -1.280 0.9582
(0,10],5.45542635658915 - (30,40],5.45542635658915 -0.16100309 0.4139472 Inf -1.47060101 1.1485948 -0.389 1.0000
(0,10],5.45542635658915 - (40,50],5.45542635658915 0.40113723 0.4021403 Inf -0.87110757 1.6733820 0.998 0.9925
(0,10],5.45542635658915 - (50,60],5.45542635658915 0.60576562 0.4106536 Inf -0.69341247 1.9049437 1.475 0.9022
(0,10],5.45542635658915 - (60,70],5.45542635658915 1.38800301 0.4319258 Inf 0.02152631 2.7544797 3.214 0.0430
(0,10],5.45542635658915 - (70,80],5.45542635658915 1.01677522 0.4147441 Inf -0.29534399 2.3288944 2.452 0.2952
(0,10],5.45542635658915 - (80,90],5.45542635658915 1.99085692 0.4747929 Inf 0.48876247 3.4929514 4.193 0.0011
(0,10],5.45542635658915 - (90,100],5.45542635658915 2.03923289 0.4745872 Inf 0.53778910 3.5406767 4.297 0.0007
Because this question seems like a self-learning one, I am going to do a similar example, not the same data. But the structure is the same, with one factor and one covariate as predictors.
The example is the emmeans::fiber dataset. Its response variable is fiber strength, the continuous predictor is the diameter, and the factor is the machine it was made on.
Model:
> mod = glm(log(strength) ~ machine + diameter, data = fiber)
> summary(mod)
... (output has been abbreviated) ...
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.124387 0.068374 45.695 6.74e-14
machineB 0.026025 0.023388 1.113 0.290
machineC -0.044593 0.025564 -1.744 0.109
diameter 0.023557 0.002633 8.946 2.22e-06
(Dispersion parameter for gaussian family taken to be 0.001356412)
Analysis with emmeans is based on the reference grid, which by default consists of all levels of the factor and the mean of the covariate:
> ref_grid(mod)
'emmGrid' object with variables:
machine = A, B, C
diameter = 24.133
Transformation: “log”
You can confirm in R that mean(fiber$diameter) is 24.133. I emphasize this is the mean of the diameter values, not of anything in the model.
> summary(.Last.value)
machine diameter prediction SE df
A 24.13333 3.692901 0.01670845 Inf
B 24.13333 3.718925 0.01718853 Inf
C 24.13333 3.648307 0.01819206 Inf
Results are given on the log (not the response) scale.
Those summary values are the predictions from mod at each combination of machine and diameter. Now look at EMMs for machine
> emmeans(mod, "machine")
machine emmean SE df asymp.LCL asymp.UCL
A 3.692901 0.01670845 Inf 3.660153 3.725649
B 3.718925 0.01718853 Inf 3.685237 3.752614
C 3.648307 0.01819206 Inf 3.612652 3.683963
Results are given on the log (not the response) scale.
Confidence level used: 0.95
... we get exactly the same three predictions. But if we look at diameter:
> emmeans(mod, "diameter")
diameter emmean SE df asymp.LCL asymp.UCL
24.13333 3.686711 0.009509334 Inf 3.668073 3.705349
Results are averaged over the levels of: machine
Results are given on the log (not the response) scale.
Confidence level used: 0.95
... we get the EMM is equal to the average of the three predicted values in the reference grid. And note that it says in the annotations that results were averaged over machine, so it is worth reading that.
To get a graphical representation of the model results, we can do
> emmip(mod, machine ~ diameter, cov.reduce = range)
The argument cov.reduce = range is added to cause the reference grid to use the min and max diameter, rather than its average. Without that, we'd have gotten three dots instead of three lines. This plot still shows the model predictions, just over a more detailed grid of values. Notice that all three lines have the same slope. That is vbecause the model was specified that way: the diameter effect is added to the machine effect. Each line thus has the common slope of 0.023557 (see the output from summary(mod).
There is no post hoc test needed for diameter, since its one effect is already tested in summary(mod).
One last thing. The model used log(strength) as the response. If we want the EMMs on the same scale as strength, just add type = "response":
> emmeans(mod, "machine", type = "response")
machine response SE df asymp.LCL asymp.UCL
A 40.16118 0.6710311 Inf 38.86728 41.49815
B 41.22008 0.7085126 Inf 39.85455 42.63239
C 38.40960 0.6987496 Inf 37.06421 39.80384
Confidence level used: 0.95
Intervals are back-transformed from the log scale
Again, the annotations below the results help explain the output.

Resources