How to rearrange output from dplyr - r

When I write the following code
ddply(milkers, .(dim_cat, lact_cat), function(x) mean(x$milkyield))
I get the following output
The mean calculations regarding milk production by class of stock (1 vs 2) are correct. I would like to end up with a table more like the one below.
Effectively I am trying to get the number of animals in each time period and calculate their mean milk production. The problem is that it is calculating the total number of animals for all time periods and mean milk production for all time periods.
The code I used to generate this data is below.
heiferdat <- subset(milkers, lact_cat== 1)
cowdat <- subset(milkers, lact_cat== 2)
ddply(milkers, .(dim_cat), function(x) c(Heifers = sum(milkers$lact_cat==1), H_Milk= mean(heiferdat$milkyield), Cows = sum(milkers$lact_cat==2), C_Milk= mean(cowdat$milkyield)))
I had anticipated that in this code the .(dim_cat) variable would be applied to the function to restrict the sum and mean functions to only include animals in the correct time period.
I am looking for advice as to how I can get the output with one row per time period with the number of animals for each class lact_cat and the mean milk production for each lact_cat
Thank you
The following is a subset of the data that i am working with.
dput(milkers[180:200, c(11, 25, 26)])
dput(heiferdat[1:20, c(11, 25, 26)])
dput(cowdat[1:20, c(11, 25, 26)])
> dput(milkers[180:200, c(11, 25, 26)])
structure(list(milkyield = structure(c(8.42, 38.32, 14.27, 7.68,
16.59, 17.19, 24.45, 33.47, 36.16, 25.88, 11.61, 18.96, 11.27,
33.6, 21.57, 20.87, 9.62, 7.93, 21.02, 17.75, 22.01), label = "Milk (L)", class = c("labelled",
"numeric")), dim_cat = structure(c(5L, 3L, 7L, 7L, 2L, 7L, 2L,
2L, 2L, 3L, 6L, 6L, 2L, 3L, 6L, 6L, 6L, 6L, 6L, 7L, 6L), .Label = c("<31",
"31-90", "91-150", "151-210", "211-270", "271-330", ">330"), class = c("labelled",
"factor"), label = "Days in Milk"), lact_cat = structure(c(2L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("1", "2"), class = "factor")), row.names = 180:200, class = "data.frame")
> dput(heiferdat[1:20, c(11, 25, 26)])
structure(list(milkyield = structure(c(14.27, 17.19, 11.61, 18.96,
11.27, 21.57, 20.87, 9.62, 7.93, 21.02, 17.75, 22.01, 25.15,
11.75, 12.6, 15.62, 19.29, 8.85, 15.52, 11.62), label = "Milk (L)", class = c("labelled",
"numeric")), dim_cat = structure(c(7L, 7L, 6L, 6L, 2L, 6L, 6L,
6L, 6L, 6L, 7L, 6L, 6L, 6L, 6L, 7L, 6L, 6L, 6L, 6L), .Label = c("<31",
"31-90", "91-150", "151-210", "211-270", "271-330", ">330"), class = c("labelled",
"factor"), label = "Days in Milk"), lact_cat = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("1", "2"), class = "factor")), row.names = c(182L,
185L, 190L, 191L, 192L, 194L, 195L, 196L, 197L, 198L, 199L, 200L,
201L, 202L, 203L, 204L, 205L, 206L, 207L, 208L), class = "data.frame")
> dput(cowdat[1:20, c(11, 25, 26)])
structure(list(milkyield = structure(c(15.73, 14.56, 16.94, 16.25,
39.09, 9.79, 8.41, 3.05, 38.89, 11.7, 29.89, 19.73, 18.2, 20.63,
20.32, 52.99, 10.11, 8.08, 10.84, 33.75), label = "Milk (L)", class = c("labelled",
"numeric")), dim_cat = structure(c(3L, 6L, 6L, 2L, 3L, 7L, 6L,
7L, 3L, 7L, 3L, 6L, 3L, 6L, 2L, 2L, 7L, 6L, 7L, 7L), .Label = c("<31",
"31-90", "91-150", "151-210", "211-270", "271-330", ">330"), class = c("labelled",
"factor"), label = "Days in Milk"), lact_cat = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("1", "2"), class = "factor")), row.names = c(NA,
20L), class = "data.frame")

Following from #DanChaltiel's advice to use dplyr. Here is a dplyr approach:
library(dplyr)
all_summary = milkers %>%
group_by(dim_cat, lact_cat) %>%
summarise(avg = mean(milkyield),
num = n())
At this point you have all the summary information calculated. The following code is just formatting/presentation.
heifer_summary = all_summary %>%
filter(lact_cat == 1) %>%
select(dim_cat, Heifers = num, H_Milk = avg)
cow_summary = all_summary %>%
filter(lact_cat == 2) %>%
select(dim_cat, Cows = num, C_Milk = avg)
arranged_summary = full_join(heifer_summary, cow_summary, by = "dim_cat") %>%
select(dim_cat, Heifers, H_Milk, Cows, C_Milk) %>%
arrange(dim_cat)

Related

Percentages in the wrong position in ggplot2

I'm trying to plot a graph for a likert test using ggplot2 and I would like to have the percentages values appearing on the graph. I've created a df with all the averages and percentages so I could write it on the graph. It all seems to be working good, except the values are being plotted as if they were upsided or something.
This is the code I'm using
example <- structure(list(grupo = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("EJA",
"REG"), class = "factor"), nivel = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("CINCO", "DOZE", "NOVE"), class = "factor"), tipo = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L), .Label = c("COR", "PAD", "RES"), class = "factor"),
likert = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L), .Label = c("0",
"1", "2", "3"), class = c("ordered", "factor")), cnt = c(3L,
1L, 3L, 5L, 3L, 1L, 3L, 6L, 2L, 1L, 10L, 5L, 5L, 9L, 11L,
6L, 4L, 10L, 10L, 10L), freq = c(0.25, 0.083, 0.25, 0.417,
0.231, 0.077, 0.231, 0.462, 0.154, 0.077, 0.769, 0.167, 0.167,
0.3, 0.367, 0.2, 0.133, 0.333, 0.333, 0.333), prop = c(25,
8.3, 25, 41.7, 23.1, 7.7, 23.1, 46.2, 15.4, 7.7, 76.9, 16.7,
16.7, 30, 36.7, 20, 13.3, 33.3, 33.3, 33.3), proptext = c("25",
"8.3", "25", "41.7", "23.1", "7.7", "23.1", "46.2", "15.4",
"7.7", "76.9", "16.7", "16.7", "30", "36.7", "20", "13.3",
"33.3", "33.3", "33.3")), row.names = c(NA, -20L), groups = structure(list(
grupo = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("EJA",
"REG"), class = "factor"), nivel = structure(c(1L, 1L, 1L,
2L, 2L, 2L), .Label = c("CINCO", "DOZE", "NOVE"), class = "factor"),
tipo = structure(c(1L, 2L, 3L, 1L, 2L, 3L), .Label = c("COR",
"PAD", "RES"), class = "factor"), .rows = structure(list(
1:4, 5:8, 9:11, 12:15, 16:19, 20L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
ggplot(example, aes(x=(interaction(grupo, nivel, tipo)),y=prop, fill=likert))+
geom_col()+
#scale_y_continuous(labels = percent)+
coord_flip() +
ggtitle("Testing")+
xlab("A, B, and C")+
ylab("%")+
geom_text(aes(label = proptext), size = 2, colour = "black")
Would someone have an idea of how could I solve it?
The geom_text may also require the x, y
library(dplyr)
library(tidyr)
library(ggplot2)
example %>%
unite(new, grupo, nivel, tipo, sep = ".") %>%
ggplot(aes(x=new, fill=likert))+
geom_col(aes(y= prop))+
geom_text(aes(x = new, y = prop, label = proptext),
position = position_stack(vjust = .5)) +
coord_flip() +
#scale_y_continuous(labels = percent)+
ggtitle("Testing")+
xlab("A, B, and C")+
ylab("%")
-output

How can I sort a file with a lot of variables as to 1

If I have a file like the one in the picture and I want to sort it based on a variable out of the 15 I have in total, how can I sort all the columns without spoiling the proper match?
> dput(head(data1))
structure(list(PRICE = c(47, 113, 165, 104.3, 62.5, 70), NROOM = c(4L,
7L, 7L, 7L, 7L, 6L), NBATH = c(1, 2.5, 2.5, 2.5, 1.5, 2.5), PATIO = c(0L,
1L, 1L, 1L, 1L, 1L), FIREPL = c(0L, 1L, 1L, 1L, 1L, 1L), AC = c(0L,
1L, 0L, 1L, 0L, 0L), BMENT = c(2L, 2L, 3L, 2L, 2L, 3L), NSTOR = c(3,
2, 2, 2, 2, 3), GAR = c(0L, 2L, 2L, 2L, 0L, 1L), AGE = c(148L,
9L, 23L, 5L, 19L, 20L), SQFT = c(11.25, 28.92, 30.62, 26.12,
22.04, 39.42), SQMT = c(104.51592, 268.67559168, 284.46910848,
242.66274048, 204.75830016, 366.22378368), SIZE = c("Medium",
"Large", "Large", "Large", "Large", "Large"), HC = structure(c(5L,
2L, 2L, 2L, 2L, 2L), .Label = c("New", "Neither New nor Old",
"Relatively Old", "Old", "Very Old-Antique"), class = "factor"),
PRSQMT = c(0.449692257409206, 0.42058156192538, 0.580027831076781,
0.429814646425277, 0.305237931508329, 0.191139961737616)), row.names = c(NA,
6L), class = "data.frame")

How to remove duplicate x-axis labels in R

I am trying to obtain a barplot representing mean percentage of coloration (valores) grouped both by sex and size intervals (class). However, labels in the x-axis appear duplicated. I would like to get one single label ("50-55" for the first and second columns together, "55-60" for the third and fourth columns together, and so on) for each class level. How could I do this?
Here is my code:
par(mar=c(7,4,4,2)+0.1)
class<-factor(coloration$clase.2,levels=c("50-55","55-60","60-65","65-70","70-75","75-80"))
sex<-factor(coloration$sexo,levels=c("M","H"))
valores<-coloration$perc.greenblue
graf<-barplot(tapply(valores,list(sex,class),mean),beside=T,axes=F,ylim=c(0,50),col=c(grey.colors(2)),axisnames=F ,xlab=("Sex and size"),ylab=("% mean coloration"),las=1)
axis(2,at=c(0,5,10,15,20,25,30,35,40,45,50),labels=c(0,5,10,15,20,25,30,35,40,45,50),las=1)
labs<-as.character(class)
text(graf,par("usr")[3]-0.25,srt=0,adj = c(0,2),labels=labs,xpd=T,cex=1)
legend(locator(1),c("Adult males","Adult females"),fill=c(grey.colors(2)),bty="n")
EDIT: here's some reproducible code:
structure(list(edad = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "ADU", class = "factor"),
sexo = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("H", "M"), class = "factor"),
clase.2 = structure(c(2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L,
6L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L), .Label = c("50-55",
"55-60", "60-65", "65-70", "70-75", "75-80"), class = "factor"),
perc.greenblue = c(0.09, 0.32, 12.8, 94.32, 34.83, 0.04,
45.83, 12.34, 0.75, 34.82, 0.5, 0.05, 3.46, 0, 1.72, 0.07,
0.09, 0.2)), row.names = c(9L, 10L, 12L, 13L, 48L, 49L, 109L,
110L, 194L, 195L, 263L, 264L, 266L, 267L, 332L, 333L, 408L, 409L
), class = "data.frame")

R how to use sjPlot::tab_model() to put lmer, glmer, and gamlss models into a table

I am looking to make a summary table for a set of linear models. The models are either lmer(), glmer(), or gamlss(). I am trying to print the results of 6 of these models into a single table. However, when I try to do this with sjPlot::tab_model, I receive the following error message, "Error: $ operator is invalid for atomic vectors".
I removed the two gamlss() models to see if the table would print and it did. It seems to me that the issue is that sjPlot::tab_model cannot work with gamlss() models.
Databases
recruitment_data <- structure(list(Site_long = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Hanauma Bay",
"Waikiki"), class = "factor"), Shelter = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("High",
"Low"), class = "factor"), `Module #` = structure(c(7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L), .Label = c("111", "112", "113", "114", "115",
"116", "211", "212", "213", "214", "215", "216"), class = "factor"),
TimeStep = c(4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 4L, 4L, 5L,
5L, 6L, 6L, 7L, 7L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 4L, 4L,
5L, 5L, 6L, 6L, 7L, 7L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 4L,
4L, 5L, 5L, 6L, 6L, 7L, 7L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L,
4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 4L, 4L, 5L, 5L, 6L, 6L, 7L,
7L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 4L, 4L, 5L, 5L, 6L, 6L,
7L, 7L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L), Side = structure(c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L), .Label = c("N", "S"), class = "factor"),
recruits = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 2, 1,
0, 0, 3, 0, 0, 1, 1, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2,
3, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
0, 2, 2, 0, 1, 1, 0, 1, 3, 1, 0, 1, 1, 0, 1, 5, 2, 0, 1,
1, 2, 0, 3, 1, 2, 2, 3, 6, 5, 2, 0, 1, 2, 0, 4, 1, 4, 1,
0, 0, 4, 0, 1)), row.names = c(NA, -96L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = c("Site_long", "Shelter",
"Module #", "TimeStep"), drop = TRUE)
survival_data <- structure(list(Date = structure(c(17288, 17288, 17288, 17288,
17288, 17288, 17292, 17299, 17299, 17304, 17306, 17386, 17386,
17386, 17386, 17386, 17386, 17387, 17387, 17387, 17389, 17389,
17389, 17390, 17398, 17404, 17475, 17475, 17477, 17480, 17482,
17484, 17484, 17484, 17484, 17484, 17484, 17489, 17575, 17575,
17575, 17575, 17575, 17582, 17586, 17594, 17600, 17601, 17603,
17603), class = "Date"), Year = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("17",
"18"), class = "factor"), Site = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("HAN",
"WAI"), class = "factor"), Treatment = c("CLO", "CLO", "CLO",
"OPE", "OPE", "OPE", "CLO", "CLO", "OPE", "OPE", "CLO", "CLO",
"CLO", "CLO", "OPE", "OPE", "OPE", "CLO", "OPE", "OPE", "CLO",
"CLO", "OPE", "OPE", "CLO", "CLO", "CLO", "OPE", "CLO", "OPE",
"CLO", "CLO", "CLO", "CLO", "OPE", "OPE", "OPE", "OPE", "CLO",
"CLO", "CLO", "OPE", "OPE", "OPE", "CLO", "OPE", "OPE", "CLO",
"CLO", "OPE"), `Module #` = c(212L, 214L, 216L, 211L, 213L, 215L,
116L, 114L, 115L, 113L, 112L, 212L, 214L, 216L, 211L, 213L, 215L,
116L, 111L, 115L, 112L, 114L, 115L, 113L, 114L, 114L, 112L, 115L,
116L, 113L, 114L, 212L, 214L, 216L, 211L, 213L, 215L, 111L, 212L,
214L, 216L, 213L, 215L, 211L, 116L, 115L, 113L, 114L, 112L, 111L
), n.x = c(1L, 1L, 2L, 2L, 3L, 3L, 6L, 7L, 5L, 4L, 2L, 2L, 2L,
1L, 2L, 3L, 5L, 10L, 1L, 4L, 10L, 13L, 6L, 5L, 2L, 2L, 8L, 6L,
10L, 8L, 12L, 2L, 6L, 2L, 2L, 3L, 5L, 2L, 2L, 5L, 1L, 8L, 9L,
2L, 10L, 15L, 10L, 16L, 12L, 4L), n.y = c(1, 1, 0, 2, 3, 3, 6,
7, 4, 4, 2, 2, 2, 1, 2, 3, 5, 9, 1, 3, 10, 11, 5, 5, 1, 1, 7,
6, 7, 8, 11, 2, 5, 2, 2, 3, 5, 2, 2, 5, 1, 7, 7, 0, 8, 14, 9,
9, 9, 4), `%_Survival` = c(100, 100, 0, 100, 100, 100, 100, 100,
80, 100, 100, 100, 100, 100, 100, 100, 100, 90, 100, 75, 100,
84.6153846153846, 83.3333333333333, 100, 50, 50, 87.5, 100, 70,
100, 91.6666666666667, 100, 83.3333333333333, 100, 100, 100,
100, 100, 100, 100, 100, 87.5, 77.7777777777778, 0, 80, 93.3333333333333,
90, 56.25, 75, 100), Quarter = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), Mortality = c(0,
0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 2,
1, 0, 1, 1, 1, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
2, 2, 1, 1, 7, 3, 0), Survival = c(100, 100, 0, 100, 100, 100,
100, 100, 80, 100, 100, 100, 100, 100, 100, 100, 100, 90, 100,
75, 100, 84.6153846153846, 83.3333333333333, 100, 50, 50, 87.5,
100, 70, 100, 91.6666666666667, 100, 83.3333333333333, 100, 100,
100, 100, 100, 100, 100, 100, 87.5, 77.7777777777778, 0, 80,
93.3333333333333, 90, 56.25, 75, 100), Module = c(212L, 214L,
216L, 211L, 213L, 215L, 116L, 114L, 115L, 113L, 112L, 212L, 214L,
216L, 211L, 213L, 215L, 116L, 111L, 115L, 112L, 114L, 115L, 113L,
114L, 114L, 112L, 115L, 116L, 113L, 114L, 212L, 214L, 216L, 211L,
213L, 215L, 111L, 212L, 214L, 216L, 213L, 215L, 211L, 116L, 115L,
113L, 114L, 112L, 111L), Survival_prop = c(1, 1, 0, 1, 1, 1,
1, 1, 0.8, 1, 1, 1, 1, 1, 1, 1, 1, 0.9, 1, 0.75, 1, 0.846153846153846,
0.833333333333333, 1, 0.5, 0.5, 0.875, 1, 0.7, 1, 0.916666666666667,
1, 0.833333333333333, 1, 1, 1, 1, 1, 1, 1, 1, 0.875, 0.777777777777778,
0, 0.8, 0.933333333333333, 0.9, 0.5625, 0.75, 1), Date_new = structure(c(17378,
17378, 17378, 17378, 17378, 17378, 17382, 17389, 17389, 17394,
17396, 17476, 17476, 17476, 17476, 17476, 17476, 17477, 17477,
17477, 17479, 17479, 17479, 17480, 17488, 17494, 17565, 17565,
17567, 17570, 17572, 17574, 17574, 17574, 17574, 17574, 17574,
17579, 17665, 17665, 17665, 17665, 17665, 17672, 17676, 17684,
17690, 17691, 17693, 17693), class = "Date"), Site_long = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("Hanauma Bay", "Waikiki"), class = "factor"),
Treatment_long = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L), .Label = c("Closed",
"Open"), class = "factor"), Shelter = c("Low", "Low", "Low",
"High", "High", "High", "Low", "Low", "High", "High", "Low",
"Low", "Low", "Low", "High", "High", "High", "Low", "High",
"High", "Low", "Low", "High", "High", "Low", "Low", "Low",
"High", "Low", "High", "Low", "Low", "Low", "Low", "High",
"High", "High", "High", "Low", "Low", "Low", "High", "High",
"High", "Low", "High", "High", "Low", "Low", "High")), row.names = c(NA,
-50L), vars = c("Date", "Year", "Site", "Treatment", "Module #"
), labels = structure(list(Date = structure(c(17288, 17288, 17288,
17288, 17288, 17288, 17292, 17299, 17299, 17304, 17306, 17386,
17386, 17386, 17386, 17386, 17386, 17387, 17387, 17387, 17389,
17389, 17389, 17390, 17398, 17404, 17475, 17475, 17477, 17480,
17482, 17484, 17484, 17484, 17484, 17484, 17484, 17489, 17575,
17575, 17575, 17575, 17575, 17582, 17586, 17594, 17600, 17601,
17603, 17603), class = "Date"), Year = c(17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18), Site = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("HAN", "WAI"), class = "factor"), Treatment = c("CLO",
"CLO", "CLO", "OPE", "OPE", "OPE", "CLO", "CLO", "OPE", "OPE",
"CLO", "CLO", "CLO", "CLO", "OPE", "OPE", "OPE", "CLO", "OPE",
"OPE", "CLO", "CLO", "OPE", "OPE", "CLO", "CLO", "CLO", "OPE",
"CLO", "OPE", "CLO", "CLO", "CLO", "CLO", "OPE", "OPE", "OPE",
"OPE", "CLO", "CLO", "CLO", "OPE", "OPE", "OPE", "CLO", "OPE",
"OPE", "CLO", "CLO", "OPE"), `Module #` = c(212L, 214L, 216L,
211L, 213L, 215L, 116L, 114L, 115L, 113L, 112L, 212L, 214L, 216L,
211L, 213L, 215L, 116L, 111L, 115L, 112L, 114L, 115L, 113L, 114L,
114L, 112L, 115L, 116L, 113L, 114L, 212L, 214L, 216L, 211L, 213L,
215L, 111L, 212L, 214L, 216L, 213L, 215L, 211L, 116L, 115L, 113L,
114L, 112L, 111L)), row.names = c(NA, -50L), class = "data.frame", vars = c("Date",
"Year", "Site", "Treatment", "Module #"), drop = TRUE), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L), drop = TRUE, group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), biggest_group_size = 1L, class = c("grouped_df", "tbl_df",
"tbl", "data.frame"))
Analyses
library(lme4)
library(gamlss)
recruitment_glmer_n3 <- glmer(recruits ~ Site_long*Shelter + (1|module_recruit), data = n3, family = poisson, na.action = "na.fail")
summary(recruitment_glmer_n3)
survival_gamlss <- gamlss(Survival_prop ~ Site_long*Treatment_long + (1|module_survival), data = survival_results_long_2, family = BEINF())
summary(survival_gamlss)
Table Code
library(sjPlot)
tab_model(recruitment_glmer_n3, survival_gamlss)
Is there a way to use sjPlot::tab_model to get html table outputs with gamlss model objects or is there another package that you would recommend for making publication-quality tables for linear model objects of gamlss as well as lmer and glmer? Thank you!
I have added (better) support for gamlss-models, so now you should be able to plot both models in one table with tab_model() (at least it worked for me). However, you need to update sjPlot and insight from GitHub first.
Then, it should work.

How to use multiple symbols in plots based on different variables in R?

I have created a PCA for measurements collected on individual from four locations placed on four substrates with three replicates. I have the sex (male or female)and "karyotype" (factor with three possible categories) and the calculated the first two PC scores for each individual.
I would like to make a plot where male and female have different symbols and the colour of the symbols is dependent on the karotype. I have created a plot with the code below that gives me one symbol colour coded for the three karyotypes and put 95% confidence elispses around the males and females.
How can I change the symbol for each sex and keeping the colouring dependent on the karytype? I would also like to have this reflected in the legend.
One last question. Is it possible to add an arrow for each PC (not each individual) from the origin similar to those found in ordination plots?
Sample Data:
test <- structure(list(Location = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("Kampinge", "Kaseberga", "Molle", "Steninge"
), class = "factor"), Substrate = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L), .Label = c("Kampinge", "Kaseberga", "Molle",
"Steninge"), class = "factor"), Replicate = structure(c(1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 1L, 1L), .Label = c("1", "2", "3"), class = "factor"),
Sex = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L
), .Label = c("Female", "Male"), class = "factor"), Karyotype = structure(c(3L,
4L, 3L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 2L, 4L,
3L, 3L, 4L, 4L, 3L, 4L, 3L, 4L, 3L), .Label = c("", "BB",
"BD", "DD"), class = "factor"), Wing_Length = c(1439L, 1224L,
1558L, 1508L, 1286L, 1560L, 1377L, 1486L, 1638L, 1475L, 1703L,
1726L, 1668L, 1405L, 1737L, 1419L, 1530L, 1508L, 1525L, 1326L,
1609L, 1357L, 1830L, 1476L, 1661L), Leg_Length = c(465L,
357L, 610L, 415L, 343L, 560L, 435L, 390L, 425L, 514L, 693L,
695L, 657L, 454L, 661L, 382L, 431L, 531L, 435L, 387L, 407L,
414L, 752L, 524L, 650L), Development_Time = c(15, 15, 12,
12, 12, 12, 12, 12, 12, 15, 15, 15, 15, 15, 15, 15, 11, 12,
14, 12, 14, 14, 14, 11, 11), PC1 = c(-281.031806232855, -515.247908786317,
-96.7283446465637, -260.171340782501, -476.664849753781,
-127.267190895631, -347.839240839062, -293.08530374415, -154.026702195308,
-221.98257463847, 67.7504074590983, 86.6778734586525, 17.8073498265326,
-314.171132928964, 73.3068216627556, -349.616320093329, -233.030545551831,
-185.761623361004, -234.30046275676, -417.754317941649, -187.820500930148,
-376.653043663908, 203.025275308178, -214.80078992031, 7.94703091626344
), PC2 = c(-78.3082792875783, -133.370219905995, -113.211488986839,
4.31036861466361, -82.8593541869054, -73.5708675263244, -95.0643731443612,
9.37702847686542, 80.0290301136235, -92.8061497557789, -83.8731164047719,
-70.6537733486393, -78.706783632851, -91.6793310834752, -37.5144466525303,
-27.4637667171696, 6.14809390611532, -84.6794844768708, -0.127837123829732,
-90.9556028004192, 75.2353710655562, -91.7834027435658, -47.669385541585,
-99.8362257341741, -77.8269478596591)), .Names = c("Location",
"Substrate", "Replicate", "Sex", "Karyotype", "Wing_Length",
"Leg_Length", "Development_Time", "PC1", "PC2"), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 30L, 31L), class = "data.frame")
## Plot
par(mfrow=c(1,1), mar=c(4,4,2,1), pty = "s")
plot(test$PC1, test$PC2, xlab="PC1", ylab="PC2", pch=16, col=as.numeric(test[,"Karyotype"]),
xlim = c(-1000, 1000), ylim = c(-250, 250), las=1, cex.lab = 1.5, cex.axis = 1.25, main = NULL)
ordiellipse(test[,9:10], test$Sex, conf=0.95, col="black", cex=1.75, label=TRUE)
legend("bottomright", pch=16, col=unique(as.numeric(test[,"Karyotype"])), legend=unique(test[,"Karyotype"]), cex = 1.75)
Replace your pch plot argument by something like :
pch=ifelse(test$Sex=='Male',15,19)
Try with ggplot:
library(ggplot2)
ggplot(test, aes(x=PC1, y=PC2, color=Karyotype, shape=Sex, group=Sex))+geom_point(size=5)+stat_ellipse()

Resources