R novice and trying to improve the appearance of grouped box plots - r

I found some online datasets and managed to make some complex box plots that had most of the features I was looking for. I'd appreciate the community's help in making these plots look better, such as:
removing axes lines,
adding tick marks and making them point inwards,
changing the background color or font of facet_wrap,
and removing "Label" in my attached plots.
The program Veusz allows you to change whisker mode to (e.g. I.5 IQR, 9/91 percentile, 1 stddev) and it would be nice to have that option as well. I also don't understand why the data points in my first box plot (linked below) are off center.
Linked below are screen shots of some grouped box plots that I made from my own data. I learn best by breaking and fixing things, and if someone has the time to write out the code for a box plot with lots of features, I will deconstruct it to see what each part does and search for the code online to get a better understanding of how it works.
Box plot of my data 1
Box plot of my data 2
Box plot of my data 3
structure(list(X. = structure(c(1L, 12L, 23L, 34L, 45L, 56L,
67L, 71L, 72L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 25L, 26L, 27L,
28L, 29L, 30L, 31L, 32L, 33L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L,
57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 68L, 69L, 70L
), .Label = c("# 1", "# 10", "# 11", "# 12", "# 13", "# 14",
"# 15", "# 16", "# 17", "# 18", "# 19", "# 2", "# 20", "# 21",
"# 22", "# 23", "# 24", "# 25", "# 26", "# 27", "# 28", "# 29",
"# 3", "# 30", "# 31", "# 32", "# 33", "# 34", "# 35", "# 36",
"# 37", "# 38", "# 39", "# 4", "# 40", "# 41", "# 42", "# 43",
"# 44", "# 45", "# 46", "# 47", "# 48", "# 49", "# 5", "# 50",
"# 51", "# 52", "# 53", "# 54", "# 55", "# 56", "# 57", "# 58",
"# 59", "# 6", "# 60", "# 61", "# 62", "# 63", "# 64", "# 65",
"# 66", "# 67", "# 68", "# 69", "# 7", "# 70", "# 71", "# 72",
"# 8", "# 9"), class = "factor"), Label = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Sample 1", "Sample 2", "Sample 3"
), class = "factor"), Rescan = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L), .Label = c("Rescan 0", "Rescan 1", "Rescan 2", "Rescan 3"
), class = "factor"), Porosity = c(15.19, 15.72, 15.83, 15.57,
15.11, 14.15, 17.24, 17.53, 18.21, 18.8, 18.32, 19.59, 17.4,
17.98, 19.33, 18.94, 18.32, 18.17, 19.67, 20.55, 19.04, 18.18,
19.59, 18.19, 18.97, 18.64, 18.83, 17.24, 18.09, 17.74, 22.28,
22.29, 21.35, 21.96, 23.12, 22.9, 22.9, 21.06, 23.34, 22.82,
21.42, 20.48, 21.22, 22.75, 21.62, 22.24, 24.28, 20.48, 14.79,
13.69, 13.4, 14.46, 14.13, 13.55, 20.67, 19.81, 21.2, 20.77,
22.29, 21.94, 19.49, 19.29, 19.43, 20.31, 21.77, 19.39, 22.37,
21.46, 21.86, 21.58, 21.82, 23.02)), class = "data.frame", row.names = c(NA,
-72L))

Here's an example of your plot with the things you wanted to do. I suppose you can adjust the code to your needs from here:
ggplot(mydata, aes(Label,Porosity,fill=Label))+
geom_boxplot()+
# shift strips down
facet_wrap(~Rescan, strip.position = "bottom")+
# add exact points
geom_point(alpha=0.2)+
# add your preferred colors here (hexcode also works fine)
scale_fill_manual(values=c("red","blue","green"))+
# appearance
theme_classic()+
# legend options
theme(legend.title = element_blank(),
legend.text = element_text(color ="black", size = 8),
legend.position = "top", # "bottom" or "right"
legend.key.size = unit(1, "cm"),
legend.spacing.x = unit(5, "mm"),
legend.direction = "horizontal", # or "vertical"
legend.background = element_blank())+
# reverse legend keys
guides(fill = guide_legend(reverse = F))+ # set T to take action
# scaling y-axis
scale_y_continuous(expand = c(0, 0), limits = c(0,max(mydata$Porosity)),breaks = seq(0,max(mydata$Porosity,10)))+
# paramaters of the axes
theme(
axis.text = element_text(color = "black", angle = 0, hjust = 0.5, vjust = 0.5, size = 8),
# axis.title = element_blank(), # activate for no axis titles
axis.line = element_line(color = "black", size = 0.5), # use element_blank() for no lines
axis.ticks.length=unit(-0.1, "cm"), # negative values turn them inside
plot.background = element_blank(),
text = element_text(family = "Arial"),
strip.background = element_blank(),
strip.placement = "outside")+ # or "inside"
# name your axes
ylab("your y lab")+
xlab("your x lab")+
# add x-axis in each facet
annotate("segment", y=0,yend=0,x=0,xend=Inf)

Hopefulyy this answers some of the questions, but to be honest it's hard to pull out the individual questions you are asking. Perhaps put them in dot points?
overall look:
There are some pre-made themes you can try, I like:
theme_void() - removes most things
theme_classic() - makes the background nicer to look at
theme_minimal() - removes the outer border and makes the background prettier
You can add them as a layer with + theme_void() on the end of the plotting code.
For all other specific customisations look at ?theme() as there are a whole bunch of things you can do.
labels:
To remove the title 'label' add labs(legend = '') as a layer. You can also use this to modify the x, y, caption and title text.
If you want to remove the legend entirely, you can add show.legend = F inside your geom_jitter() layer. (e.g. geom_jitter(show.legend = F)) This will mean it shows on the graph, but nothing appears in the legend.
facets:
To change the colour of the background in facte_wrap use theme(strip.background = element_rect(color = 'desired_colour'))
To change the colour of the text in facte_wrap use theme(strip.text = element_text(color = 'desired_colour'))
axis lines:
add theme(axis.line = element_blank())
points:
Your points are off center due to the geom_jitter(). Try geom_point() instead.

Related

ggplot plotting vertical lines only?

When entering the following code, I get a weird ggplot where it plots vertical lines.
ggplot(data = otherdata, aes(x = subject, y = pct_.below)) + geom_point(aes(colour = subgroup))
When doing geom_point rather than geom_line, I get the other graph. I have no idea why this happens. There are more points than there are subgroups but that's not the solution to the issue. What do I do to fix this ggplot?
# dummy data
set.seed(45)
df <- data.frame(x=rep(1:5, 9), val=sample(1:100, 45),
variable=rep(paste0("category", 1:9), each=5))
# plot
ggplot(data = df, aes(x=x, y=val)) + geom_line(aes(colour=variable))
That code that I just posted works but I have no idea what the difference is between the two codes.
First 20 rows of the data:
structure(list(subject = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Algebra II",
"Biology I", "Chemistry", "English I", "English II", "English III",
"Geometry", "Int Math I", "Int Math II", "Int Math III", "US History"
), class = "factor"), pct_.below = c(0, 12.5, 12.4, 12.5, 0,
0, 12.5, 8.4, 11.1, 12.8, 11.9, 0, 11.5, 9, 100, 66.7, 100, 100,
100, 50), subgroup = structure(c(2L, 3L, 4L, 5L, 7L, 10L, 11L,
12L, 13L, 15L, 16L, 17L, 18L, 19L, 3L, 4L, 5L, 8L, 10L, 11L), .Label = c("All Students",
"Asian", "Black or African Amer", "Black/Hispanic/Native Amer",
"ED", "English Learner T 1-2", "English Learner T 1-4", "English Learners",
"English Learners with T 1-2", "English Learners with T 1-4",
"Hispanic", "Non-Black/Hispanic/Native Amer", "Non-ED", "Non-English Learners/T 1-2",
"Non-English Learners/T 1-4", "Non-Students with Disabilities",
"Students with Disabilities", "Super Subgroup", "White"), class = "factor")), row.names = c(2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 32L,
33L, 34L, 35L, 36L, 37L), class = "data.frame")

R ggplot2 & gganimate: animation changes at end

I am trying to create this figure that animates over time using the gganimate library, going from the 'baseline' timepoint to the 'late' timepoint'. However for some reason, the image changes between frames 22-24 and again between 42-44. It throws off the visualization. But I am not sure how to fix it. Many thanks!
library(ggplot2)
library(tweenr)
library(gganimate)
library(treemapify)
set.seed(1)
colors <- c("turquoise", "gold", "yellowgreen", "dodgerblue", "firebrick", "orchid4",
"grey74", "forestgreen", "deeppink2", "grey0", "slateblue", "sienna2",
"khaki2", "steelblue", "darksalmon", "darksalmon")
tweened <- tween_states(list(PID50baseline, PID50late, PID50baseline),
tweenlength = 8, statelength = 8,
ease = 'cubic-in-out', nframes = 50)
animated_plot <- ggplot(tweened,
aes(area = Number, fill = Cluster.Name,
subgroup=Type, frame = .frame)) +
geom_treemap(fixed = T) +
geom_treemap_subgroup_border(fixed = T) +
geom_treemap_subgroup_text(place = "centre", grow = T, alpha = 0.5,
colour = "black", fontface = "italic",
min.size = 0,fixed = T) +
scale_fill_manual(values = colors) +
theme(legend.position = "bottom")
animation::ani.options(interval = 1/10)
gganimate(animated_plot, "animated_treemap_PID50.gif", title_frame = T,
ani.width = 200, ani.height = 200)
The data I used for this:
dput(PID50baseline)
structure(list(Cluster.Name = structure(c(13L, 14L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 15L, 15L), .Label = c("Cluster
13", "Cluster 14", "Cluster 17", "Cluster 18", "Cluster 19", "Cluster 20",
"Cluster 27", "Cluster 35", "Cluster 36", "Cluster 40", "Cluster 41",
"Cluster 42", "Cluster 5", "Cluster 6", "Non-clonal"), class = "factor"),
Number = c(5L, 9L, 0L, 0L, 1L, 2L, 0L, 2L, 3L, 2L, 1L, 0L,
0L, 0L, 1L, 28L), Type = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L), .Label = c("Defective",
"Intact"), class = "factor")), .Names = c("Cluster.Name",
"Number", "Type"), class = "data.frame", row.names = c(NA, -16L))
dput(PID50late)
structure(list(Cluster.Name = structure(c(13L, 14L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 15L, 15L), .Label = c("Cluster 13",
"Cluster 14", "Cluster 17", "Cluster 18", "Cluster 19", "Cluster 20",
"Cluster 27", "Cluster 35", "Cluster 36", "Cluster 40", "Cluster 41",
"Cluster 42", "Cluster 5", "Cluster 6", "Non-clonal"), class = "factor"),
Number = c(2L, 10L, 2L, 2L, 1L, 0L, 5L, 0L, 5L, 0L, 3L, 3L,
2L, 2L, 18L, 59L), Type = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L), .Label = c("Defective",
"Intact"), class = "factor")), .Names = c("Cluster.Name",
"Number", "Type"), class = "data.frame", row.names = c(NA, -16L))
I believe treemapify omits areas with a size of 0. This could be the reason for your problem. In other words, replacing 0 with a small positive value greater than 0 (and using 16 distinct colors) gives you something like this:
tweened$Number[tweened$Number==0] <- 1e-10
colors <- unname(randomcoloR::distinctColorPalette(nlevels(tweened$Cluster.Name)))

GLMER warning: variance-covariance matrix [...] is not positive definite or contains NA values

I sometimes find that my GLMMs from glmer, package lme4, show the following warning messages, when their summary is called:
Warning messages:
1: In vcov.merMod(object, use.hessian = use.hessian) :
variance-covariance matrix computed from finite-difference Hessian is
not positive definite or contains NA values: falling back to var-cov estimated from RX
2: In vcov.merMod(object, correlation = correlation, sigm = sig) :
variance-covariance matrix computed from finite-difference Hessian is
not positive definite or contains NA values: falling back to var-cov estimated from RX
Similar questions I found here on Stackoverflow refer to other functions, not glmer, and the LME4 Wiki does not elaborate on that either. In this question, the problem was solved before that kind of error messages were tackled, and here the discussion focuses on a particular model rather than on the meaning of the warning message.
So the question is: should I worry about that message, or is it OK because it is simply a warning and not an error, and as it says, it is "falling back to var-cov estimated from RX" (whatever RX is) anyway.
Interestingly, although the summary states that the model failed to converge, I do not get the usual convergence warnings in red.
Here comes a (minimal) dataset:
testdata=structure(list(Site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("EO1", "EO2",
"EO3", "EO4", "EO5", "EO6"), class = "factor"), Treatment = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("control",
"no ants", "no birds", "no birds no ants"), class = "factor"),
Tree = structure(c(2L, 3L, 4L, 16L, 12L, 13L, 14L, 15L, 5L,
6L, 7L, 8L, 1L, 9L, 10L, 11L, 28L, 29L, 30L, 31L, 17L, 25L,
26L, 27L, 18L, 19L, 20L, 32L, 21L, 22L, 23L, 24L, 33L, 41L,
42L, 43L, 37L, 38L, 39L, 40L, 44L, 45L, 46L, 47L, 34L, 35L,
36L, 48L, 49L, 57L, 58L, 59L, 50L, 51L, 52L, 64L, 53L, 54L,
55L, 56L, 60L, 61L, 62L, 63L, 66L, 67L, 68L, 80L, 69L, 70L,
71L, 72L, 76L, 77L, 78L, 79L, 65L, 73L, 74L, 75L, 82L, 83L,
84L, 96L, 92L, 93L, 94L, 95L, 85L, 86L, 87L, 88L, 81L, 89L,
90L, 91L), .Label = c("EO1 1", "EO1 10", "EO1 11", "EO1 12",
"EO1 13", "EO1 14", "EO1 15", "EO1 16", "EO1 2", "EO1 3",
"EO1 4", "EO1 5", "EO1 6", "EO1 7", "EO1 8", "EO1 9", "EO2 1",
"EO2 10", "EO2 11", "EO2 12", "EO2 13", "EO2 14", "EO2 15",
"EO2 16", "EO2 2", "EO2 3", "EO2 4", "EO2 5", "EO2 6", "EO2 7",
"EO2 8", "EO2 9", "EO3 1", "EO3 10", "EO3 11", "EO3 12",
"EO3 13", "EO3 14", "EO3 15", "EO3 16", "EO3 2", "EO3 3",
"EO3 4", "EO3 5", "EO3 6", "EO3 7", "EO3 8", "EO3 9", "EO4 1",
"EO4 10", "EO4 11", "EO4 12", "EO4 13", "EO4 14", "EO4 15",
"EO4 16", "EO4 2", "EO4 3", "EO4 4", "EO4 5", "EO4 6", "EO4 7",
"EO4 8", "EO4 9", "EO5 1", "EO5 10", "EO5 11", "EO5 12",
"EO5 13", "EO5 14", "EO5 15", "EO5 16", "EO5 2", "EO5 3",
"EO5 4", "EO5 5", "EO5 6", "EO5 7", "EO5 8", "EO5 9", "EO6 1",
"EO6 10", "EO6 11", "EO6 12", "EO6 13", "EO6 14", "EO6 15",
"EO6 16", "EO6 2", "EO6 3", "EO6 4", "EO6 5", "EO6 6", "EO6 7",
"EO6 8", "EO6 9"), class = "factor"), predators_trunk = c(7L,
10L, 9L, 15L, 18L, 11L, 5L, 7L, 15L, 12L, 6L, 12L, 7L, 13L,
24L, 17L, 3L, 0L, 0L, 2L, 4L, 3L, 0L, 6L, 2L, 3L, 5L, 1L,
5L, 12L, 18L, 15L, 7L, 0L, 5L, 1L, 17L, 7L, 13L, 19L, 7L,
3L, 5L, 10L, 11L, 7L, 13L, 7L, 7L, 0L, 4L, 2L, 5L, 7L, 4L,
7L, 8L, 7L, 9L, 20L, 13L, 2L, 12L, 7L, 0L, 7L, 2L, 2L, 2L,
4L, 17L, 2L, 3L, 1L, 1L, 1L, 11L, 1L, 1L, 8L, 8L, 18L, 5L,
6L, 6L, 5L, 6L, 5L, 9L, 2L, 8L, 13L, 13L, 5L, 3L, 5L), pH_H2O = c(4.145,
4.145, 4.145, 4.145, 4.1825, 4.1825, 4.1825, 4.1825, 4.1325,
4.1325, 4.1325, 4.1325, 4.14125, 4.14125, 4.14125, 4.14125,
4.265, 4.265, 4.265, 4.265, 4.21, 4.21, 4.21, 4.21, 4.18375,
4.18375, 4.18375, 4.18375, 4.09625, 4.09625, 4.09625, 4.09625,
4.1575, 4.1575, 4.1575, 4.1575, 4.1125, 4.1125, 4.1125, 4.1125,
4.20875, 4.20875, 4.20875, 4.20875, 3.97125, 3.97125, 3.97125,
3.97125, 4.025, 4.025, 4.025, 4.025, 4.005, 4.005, 4.005,
4.005, 4.04, 4.04, 4.04, 4.04, 4.03125, 4.03125, 4.03125,
4.03125, 4.4575, 4.4575, 4.4575, 4.4575, 4.52, 4.52, 4.52,
4.52, 4.505, 4.505, 4.505, 4.505, 4.34875, 4.34875, 4.34875,
4.34875, 4.305, 4.305, 4.305, 4.305, 4.32, 4.32, 4.32, 4.32,
4.35, 4.35, 4.35, 4.35, 4.445, 4.445, 4.445, 4.445), ant_mean_abundance = c(53.85714,
53.85714, 53.85714, 53.85714, 24.28571, 24.28571, 24.28571,
24.28571, 45.5, 45.5, 45.5, 45.5, 51.14286, 51.14286, 51.14286,
51.14286, 66.28571, 66.28571, 66.28571, 66.28571, 76.5, 76.5,
76.5, 76.5, 65.71429, 65.71429, 65.71429, 65.71429, 8.642857,
8.642857, 8.642857, 8.642857, 109.3571, 109.3571, 109.3571,
109.3571, 25.14286, 25.14286, 25.14286, 25.14286, 101.3571,
101.3571, 101.3571, 101.3571, 31.78571, 31.78571, 31.78571,
31.78571, 78.64286, 78.64286, 78.64286, 78.64286, 93.28571,
93.28571, 93.28571, 93.28571, 63.14286, 63.14286, 63.14286,
63.14286, 67.14286, 67.14286, 67.14286, 67.14286, 44.0625,
44.0625, 44.0625, 44.0625, 23.875, 23.875, 23.875, 23.875,
95.8125, 95.8125, 95.8125, 95.8125, 49.125, 49.125, 49.125,
49.125, 57, 57, 57, 57, 38.125, 38.125, 38.125, 38.125, 40.6875,
40.6875, 40.6875, 40.6875, 22, 22, 22, 22), bird_activity = c(153.24,
153.24, 153.24, 153.24, 153.24, 153.24, 153.24, 153.24, 0,
0, 0, 0, 0, 0, 0, 0, 240.96, 240.96, 240.96, 240.96, 240.96,
240.96, 240.96, 240.96, 0, 0, 0, 0, 0, 0, 0, 0, 154.54, 154.54,
154.54, 154.54, 154.54, 154.54, 154.54, 154.54, 0, 0, 0,
0, 0, 0, 0, 0, 107.68, 107.68, 107.68, 107.68, 107.68, 107.68,
107.68, 107.68, 0, 0, 0, 0, 0, 0, 0, 0, 172.42, 172.42, 172.42,
172.42, 172.42, 172.42, 172.42, 172.42, 0, 0, 0, 0, 0, 0,
0, 0, 113.8, 113.8, 113.8, 113.8, 113.8, 113.8, 113.8, 113.8,
0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("Site", "Treatment",
"Tree", "predators_trunk", "pH_H2O", "ant_mean_abundance", "bird_activity"
), class = "data.frame", row.names = c(NA, -96L))
And here is the code leading to the warnings:
library(lme4)
summary(glmer.nb(predators_trunk ~ scale(ant_mean_abundance) + scale(bird_activity) + scale(pH_H2O) + (1 | Site/Treatment), testdata, na.action = na.fail))
summary(glmer(predators_trunk ~ scale(ant_mean_abundance) + scale(bird_activity) + scale(pH_H2O) + (1 | Site/Treatment), testdata, family = negative.binomial(theta = 4.06643400243645), na.action = na.fail))
Interestingly to me, the summary of the glmer.nb does not yield any warnings, but the call to glmer, using the theta that was estimated by glmer.nb, does give me the warnings. The latter is the model call that is generated by using dredge (MuMIn) on the corresponding glmer.nb full model.
This warning suggests that your standard error estimates might be less accurate. But as with all warnings, it's hard to know for sure and the best thing is to try to cross-check if you can.
In this case I saved your two fits, from glmer.nb and glmer, as g1 and g2. You can see that the estimates (point estimates, SEs, Z values ...) have changed a little bit, but not very much, so at the very least that should reassure you.
printCoefmat(coef(summary(g1)),digits=2)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.844 0.111 16.7 <2e-16 ***
scale(ant_mean_abundance) -0.347 0.077 -4.5 7e-06 ***
scale(bird_activity) -0.122 0.076 -1.6 0.107
scale(pH_H2O) -0.275 0.104 -2.6 0.008 **
> printCoefmat(coef(summary(g2)),digits=2)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.846 0.108 17.1 <2e-16 ***
scale(ant_mean_abundance) -0.347 0.077 -4.5 6e-06 ***
scale(bird_activity) -0.122 0.075 -1.6 0.102
scale(pH_H2O) -0.275 0.102 -2.7 0.007 **
I have a development version of lme4 on Github (the test_mods branch, hopefully integrated into the master branch soon: if you want to install it, you can use devtools::install_github("lme4/lme4",ref="test_mods")) which allows you to pick a more accurate (but slower) calculation for the standard errors: this gets us back to (nearly) the same standard errors as glmer.nb.
g3 <- update(g2, control=glmerControl(deriv.method="Richardson"))
printCoefmat(coef(summary(g3)),digits=2)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.846 0.111 16.7 <2e-16 ***
scale(ant_mean_abundance) -0.347 0.077 -4.5 6e-06 ***
scale(bird_activity) -0.122 0.076 -1.6 0.106
scale(pH_H2O) -0.275 0.104 -2.6 0.008 **
all.equal(coef(summary(g1))[,"Std. Error"],
coef(summary(g3))[,"Std. Error"])
[1] "Mean relative difference: 0.001597978"
The glmmTMB package (on Github) also gives almost the same results:
library(glmmTMB)
g5 <- glmmTMB(predators_trunk ~ scale(ant_mean_abundance) +
scale(bird_activity) + scale(pH_H2O) +
(1 | Site/Treatment), testdata,
family=nbinom2)
printCoefmat(coef(summary(g5))[["cond"]],digits=2)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.852 0.110 16.8 <2e-16 ***
scale(ant_mean_abundance) -0.348 0.077 -4.5 7e-06 ***
scale(bird_activity) -0.123 0.076 -1.6 0.106
scale(pH_H2O) -0.276 0.105 -2.6 0.008 **

R ggplot2 - errorbars layering over eachother

I have a data set in R which I want to get an error bar on, however it isn't plotting correctly (see photo). I have also included my data set.
ant.d<-structure(list(group.name = structure(c(1L, 18L, 20L, 24L, 8L,
13L, 15L, 17L, 12L, 19L, 21L, 22L, 23L, 9L, 11L, 16L, 2L, 3L,
4L, 5L, 6L, 7L, 10L, 14L), .Label = c("group 1", "group 10",
"group 11", "group 12", "group 13", "group 14", "group 15 ",
"group 16 ", "group 17", "group 18", "group 19", "group 2", "group 20",
"group 21", "group 22", "group 23", "group 24", "group 3", "group 4 ",
"group 5 ", "group 6", "group 7 ", "group 8 ", "group 9 "), class = "factor"),
habitat.type = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("edge", "forest", "Pasture"), class = "factor"),
species.richness = c(3L, 5L, 2L, 3L, 1L, 2L, 4L, 3L, 9L,
5L, 5L, 4L, 4L, 4L, 8L, 7L, 4L, 3L, 5L, 2L, 3L, 6L, 2L, 1L
), X = c(2.875, 2.875, 2.875, 2.875, 2.875, 2.875, 2.875,
2.875, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 3.25,
3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25), se = c(2.32340059786604,
1.7996983644207, 2.84557296642458, 2.32340059786604, 4.02424788183988,
2.84557296642458, 2.01212394091994, 2.32340059786604, 1.34141596061329,
1.7996983644207, 1.7996983644207, 2.01212394091994, 2.01212394091994,
2.01212394091994, 1.42278648321229, 1.52102272991811, 2.01212394091994,
2.32340059786604, 1.7996983644207, 2.84557296642458, 2.32340059786604,
1.64289231816395, 2.84557296642458, 4.02424788183988)), .Names = c("group.name",
"habitat.type", "species.richness", "X", "se"), row.names = c(NA,
-24L), class = "data.frame")
What am I doing wrong? I've spent some time reading about error bars in R and I've not been successful.
ant.d$se <- 1.96*(sd(ant.d$species.richness, na.rm=T)/sqrt(ant.d$species.richness))
p<-ggplot(data = ant.d, aes(y = species.richness, x = habitat.type)) +
geom_bar(stat="identity",position="dodge")
p
p + geom_bar(position=dodge) + geom_errorbar(aes(ymax = species.richness + se, ymin=species.richness - se), position=dodge, width=0.25)
If I understand you correctly about what you are trying to achieve, then it's probably best to aggregate your data before plotting:
df <- aggregate(cbind(species.richness,se) ~ habitat.type, ant.d, mean)
ggplot(data = df, aes(x = habitat.type, y = species.richness)) +
geom_bar(stat="identity", fill="grey") +
geom_errorbar(stat="identity", aes(ymax = species.richness + se, ymin=species.richness - se), width=0.25)
which gives:
If you want groups within each habitat.type, you could something like this:
ggplot(data = ant.d, aes(x = habitat.type, y = species.richness, fill = group.name)) +
geom_bar(stat="identity", position=position_dodge(0.8)) +
geom_errorbar(stat="identity", aes(ymax = species.richness + se, ymin=species.richness - se), width=0.25,
position=position_dodge(0.8)) +
scale_fill_discrete(guide = guide_legend(ncol=2))
which gives:

Reverse fill order of stacked bars with faceting

I can't figure out how to get the fill order to reverse. Basically, I'm trying to get the guide and the fill to match an intrinsic order of the words from positive to negative:
The guide, and the fill order, from top to bottom should be:
"Far better than I expected", (Filled at very top, at top of legend)
"A little better than I expected",
"About what I expected",
"A little worse than I expected",
"Far worse than I expected" (Filled at very bottom, at bottom of legend)
You'll need sample data:
dat <- structure(list(Banner = structure(c(2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Other", "Some Company"
), class = "factor"), Response = structure(c(1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L),
.Label = c(
"Far better than I expected",
"A little better than I expected",
"About what I expected",
"A little worse than I expected",
"Far worse than I expected"), class = "factor"), Frequency = c(1L,
6L, 9L, 0L, 0L, 29L, 71L, 149L, 32L, 6L, 1L, 7L, 16L, 1L, 0L,
38L, 90L, 211L, 24L, 6L, 0L, 0L, 8L, 1L, 1L, 6L, 13L, 109L, 35L,
9L), Proportion = c(6, 38, 56, 0, 0, 10, 25, 52, 11, 2, 4, 28,
64, 4, 0, 10, 24, 57, 7, 2, 0, 0, 80, 10, 10, 3, 8, 63, 20, 5
), Phase = c("Phase 1", "Phase 1", "Phase 1", "Phase 1", "Phase 1",
"Phase 1", "Phase 1", "Phase 1", "Phase 1", "Phase 1", "Phase 2",
"Phase 2", "Phase 2", "Phase 2", "Phase 2", "Phase 2", "Phase 2",
"Phase 2", "Phase 2", "Phase 2", "Phase 3", "Phase 3", "Phase 3",
"Phase 3", "Phase 3", "Phase 3", "Phase 3", "Phase 3", "Phase 3",
"Phase 3")), .Names = c("Banner", "Response", "Frequency", "Proportion",
"Phase"),
row.names = c(NA, 30L),
sig = character(0),
comment = "Overall, my experience was... by Company", q1 = "", q2 = "",
class = c("survcsub", "data.frame"))
Position labels
dat <- ddply(dat, .(Banner, Phase), function(x) {
x$Pos <- (cumsum(x$Proportion) - 0.5*x$Proportion)
x
})
Plot
ggplot(dat, aes(Banner, Proportion/100, fill=Response,
label=ifelse(Proportion > 5, percent(Proportion/100), ""))) +
geom_bar(position="fill", stat="identity") +
geom_text(aes(Banner, Pos/100)) +
facet_grid(~Phase) +
scale_y_continuous(labels=percent) +
labs(x="\nCompany", y="\nProportion")
What I've tried:
dat$Response <- factor(dat$Response, levels=rev(dat$Response))
# No dice, reverses the colour of the scale but not the position of the fill
To change the order of values in stacked barplot you should use argument order= in aes() of geom_bar() and set name of column necessary for ordering (in this case Response). With function desc() you can set reverse order of bars.
Using your original data frame (without last line of factor()).
ggplot(dat, aes(Banner, Proportion/100, fill=Response,
label=ifelse(Proportion > 5, percent(Proportion/100), ""))) +
geom_bar(position="fill", stat="identity",aes(order=desc(Response))) +
geom_text(aes(Banner, Pos/100)) +
facet_grid(~Phase) +
scale_y_continuous(labels=percent) +
labs(x="\nCompany", y="\nProportion")
To get correct placement of labels, changed calculation of positions:
dat <- ddply(dat, .(Banner, Phase), function(x) {
x$Pos <- (100-cumsum(x$Proportion) + 0.5*x$Proportion)
x
})

Resources