Running into something strange as I try to use dplyr's select command to reduce the number of columns I have. I name three columns but I keep getting 4. Aside from the star trek chain of command flashbacks I find this behaviour odd and not sure how to get around it. Also, why is this happening?
Here is my dataframe expressing the number of occurrences at in each block of time in a day. It's also pretty verbose for just 6 rows of data.
library(dplyr)
library(tidyr)
test <- structure(list(Day = c("Dec 10", "Dec 10", "Dec 10", "Dec 10",
"Dec 11", "Dec 11"), Number = c(10L, 10L, 10L, 10L, 11L, 11L),
time = c("08:30", "12:00", "15:30", "19:00", "08:30", "12:00"
), Start = structure(c(1544430600, 1544443200, 1544455800,
1544468400, 1544517000, 1544529600), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), n = c(29L, 74L, 20L, 26L, 29L,
32L)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -6L), vars = c("Day", "Number", "time"), drop = TRUE, indices = list(
0L, 1L, 2L, 3L, 4L, 5L), group_sizes = c(1L, 1L, 1L, 1L,
1L, 1L), biggest_group_size = 1L, labels = structure(list(Day = c("Dec 10",
"Dec 10", "Dec 10", "Dec 10", "Dec 11", "Dec 11"), Number = c(10L,
10L, 10L, 10L, 11L, 11L), time = c("08:30", "12:00", "15:30",
"19:00", "08:30", "12:00")), class = "data.frame", row.names = c(NA,
-6L), vars = c("Day", "Number", "time"), drop = TRUE, indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L,
50L, 51L, 52L), group_sizes = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
Day = c("Dec 10", "Dec 10", "Dec 10", "Dec 10", "Dec 11",
"Dec 11", "Dec 11", "Dec 11", "Dec 12", "Dec 12", "Dec 12",
"Dec 12", "Dec 13", "Dec 13", "Dec 13", "Dec 13", "Dec 14",
"Dec 14", "Dec 14", "Dec 14", "Dec 15", "Dec 15", "Dec 15",
"Dec 17", "Dec 17", "Dec 17", "Dec 17", "Dec 18", "Dec 18",
"Dec 18", "Dec 18", "Dec 19", "Dec 19", "Dec 19", "Dec 4",
"Dec 4", "Dec 4", "Dec 4", "Dec 5", "Dec 5", "Dec 5", "Dec 5",
"Dec 6", "Dec 6", "Dec 6", "Dec 6", "Dec 7", "Dec 7", "Dec 7",
"Dec 7", "Dec 8", "Dec 8", "Dec 8"), Number = c(10L, 10L,
10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L,
13L, 13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 17L, 17L, 17L,
17L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L),
time = c("08:30", "12:00", "15:30", "19:00", "08:30", "12:00",
"15:30", "19:00", "08:30", "12:00", "15:30", "19:00", "08:30",
"12:00", "15:30", "19:00", "08:30", "12:00", "15:30", "19:00",
"08:30", "12:00", "15:30", "08:30", "12:00", "15:30", "19:00",
"08:30", "12:00", "15:30", "19:00", "08:30", "12:00", "15:30",
"08:30", "12:00", "15:30", "19:00", "08:30", "12:00", "15:30",
"19:00", "08:30", "12:00", "15:30", "19:00", "08:30", "12:00",
"15:30", "19:00", "08:30", "12:00", "15:30")), class = "data.frame", row.names = c(NA,
-53L), vars = c("Day", "Number", "time"), drop = TRUE)))
You can see in the output that there's only 3 variables listed but oddly shows more. And when I select for specific variables or subtract others it won't work.
test %>%
select(Day, time, n)
The tibble should be ungrouped before selecting variables, as described here "Adding missing grouping variables" message in dplyr in R:
Without ungrouping:
test %>%
select(Day, time, n)
> test %>%
+ select(Day, time, n)
Adding missing grouping variables: `Number`
# A tibble: 6 x 4
# Groups: Day, Number, time [6]
Number Day time n
<int> <chr> <chr> <int>
1 10 Dec 10 08:30 29
2 10 Dec 10 12:00 74
3 10 Dec 10 15:30 20
4 10 Dec 10 19:00 26
5 11 Dec 11 08:30 29
6 11 Dec 11 12:00 32
With ungrouping
test %>%
ungroup() %>%
select(Day, time, n)
> test %>%
+ ungroup() %>%
+ select(Day, time, n)
# A tibble: 6 x 3
Day time n
<chr> <chr> <int>
1 Dec 10 08:30 29
2 Dec 10 12:00 74
3 Dec 10 15:30 20
4 Dec 10 19:00 26
5 Dec 11 08:30 29
6 Dec 11 12:00 32
Ungrouping the dataframe fixes it all.
Related
With the following data frame:
dta <- structure(list(sociodemographic_var = structure(c(3L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 26L, 18L, 20L, 21L, 26L, 13L, 16L, 21L, 22L, 26L, 26L,
9L, 13L, 17L, 18L, 20L, 21L, 23L, 26L, 20L, 26L), levels = c("1st grade",
"2nd grade", "3rd grade", "4th grade", "5th grade", "6th grade",
"7th grade", "8th grade", "9th grade", "10th grade", "11th grade",
"12th grade, no diploma", "High school graduate", "GED or equivalent",
"Some college, no degree", "Less than 1 year of college credit/post-secondary education (or less than 10 classes)",
"One year or more of college credit, no degree", "Associate degree: Occupational, Technical, or Vocational",
"Associate degree: Academic Program", "Bachelor's degree (ex. BA, AB, BS, BBS)",
"Master's degree (ex. MA, MS, MEng, MEd, MBA)", "Professional School degree (ex. MD, DDS, DVN, JD)",
"Doctoral degree (ex. PhD, EdD)", "Refused to answer", "Don't Know",
"unknown"), class = "factor"), event = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 7L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 11L, 11L), levels = c("Baseline", "0.5 Year", "1 Year",
"1.5 Year", "2 Year", "2.5 Year", "3 Year", "3.5 Year", "4 Year",
"4.5 Year", "5 Year", "5.5 Year", "6 Year", "Screener"), class = "factor"),
visit_type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), levels = c("on-site", "hybrid", "remote", "unknown"), class = "factor"),
n = c(2L, 13L, 5L, 9L, 15L, 18L, 26L, 25L, 192L, 27L, 485L,
224L, 183L, 1011L, 666L, 55L, 78L, 3L, 9L, 1L, 1L, 2L, 208L,
1L, 1L, 1L, 1L, 126L, 28L, 1L, 1L, 2L, 2L, 3L, 4L, 1L, 543L,
1L, 300L)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-39L))
I would assume that, generating a highcharter bar plot with:
library(highcharter) # v0.9.4
dta |>
hchart(type = "column", hcaes(x = "event", y = "n", group = "sociodemographic_var")) |>
hc_yAxis(title = list(text = "%"), max = 115, endOnTick = FALSE, stackLabels = list(enabled = TRUE)) |>
hc_xAxis(title = "") |>
hc_plotOptions(series = list(stacking = "percent"))
the xAxis would be ordered by levels(dta$event):
levels(dta$event)
[1] "Baseline" "0.5 Year" "1 Year" "1.5 Year" "2 Year" "2.5 Year" "3 Year" "3.5 Year" "4 Year" "4.5 Year" "5 Year" "5.5 Year"
[13] "6 Year" "Screener"
But the ordering is different and neither alphabetical nor based on the total number of values:
I am interested to understand why it's the case and how to set the order right.
You can add categories to your hc_xAxis to make an order like this:
library(highcharter)
dta |>
hchart(type = "column", hcaes(x = "event", y = "n", group = "sociodemographic_var")) |>
hc_yAxis(title = list(text = "%"), max = 115, endOnTick = FALSE, stackLabels = list(enabled = TRUE)) |>
hc_xAxis(title = "", categories = levels(dta$event)) |>
hc_plotOptions(series = list(stacking = "percent"))
Output:
I am trying to create this figure that animates over time using the gganimate library, going from the 'baseline' timepoint to the 'late' timepoint'. However for some reason, the image changes between frames 22-24 and again between 42-44. It throws off the visualization. But I am not sure how to fix it. Many thanks!
library(ggplot2)
library(tweenr)
library(gganimate)
library(treemapify)
set.seed(1)
colors <- c("turquoise", "gold", "yellowgreen", "dodgerblue", "firebrick", "orchid4",
"grey74", "forestgreen", "deeppink2", "grey0", "slateblue", "sienna2",
"khaki2", "steelblue", "darksalmon", "darksalmon")
tweened <- tween_states(list(PID50baseline, PID50late, PID50baseline),
tweenlength = 8, statelength = 8,
ease = 'cubic-in-out', nframes = 50)
animated_plot <- ggplot(tweened,
aes(area = Number, fill = Cluster.Name,
subgroup=Type, frame = .frame)) +
geom_treemap(fixed = T) +
geom_treemap_subgroup_border(fixed = T) +
geom_treemap_subgroup_text(place = "centre", grow = T, alpha = 0.5,
colour = "black", fontface = "italic",
min.size = 0,fixed = T) +
scale_fill_manual(values = colors) +
theme(legend.position = "bottom")
animation::ani.options(interval = 1/10)
gganimate(animated_plot, "animated_treemap_PID50.gif", title_frame = T,
ani.width = 200, ani.height = 200)
The data I used for this:
dput(PID50baseline)
structure(list(Cluster.Name = structure(c(13L, 14L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 15L, 15L), .Label = c("Cluster
13", "Cluster 14", "Cluster 17", "Cluster 18", "Cluster 19", "Cluster 20",
"Cluster 27", "Cluster 35", "Cluster 36", "Cluster 40", "Cluster 41",
"Cluster 42", "Cluster 5", "Cluster 6", "Non-clonal"), class = "factor"),
Number = c(5L, 9L, 0L, 0L, 1L, 2L, 0L, 2L, 3L, 2L, 1L, 0L,
0L, 0L, 1L, 28L), Type = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L), .Label = c("Defective",
"Intact"), class = "factor")), .Names = c("Cluster.Name",
"Number", "Type"), class = "data.frame", row.names = c(NA, -16L))
dput(PID50late)
structure(list(Cluster.Name = structure(c(13L, 14L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 15L, 15L), .Label = c("Cluster 13",
"Cluster 14", "Cluster 17", "Cluster 18", "Cluster 19", "Cluster 20",
"Cluster 27", "Cluster 35", "Cluster 36", "Cluster 40", "Cluster 41",
"Cluster 42", "Cluster 5", "Cluster 6", "Non-clonal"), class = "factor"),
Number = c(2L, 10L, 2L, 2L, 1L, 0L, 5L, 0L, 5L, 0L, 3L, 3L,
2L, 2L, 18L, 59L), Type = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L), .Label = c("Defective",
"Intact"), class = "factor")), .Names = c("Cluster.Name",
"Number", "Type"), class = "data.frame", row.names = c(NA, -16L))
I believe treemapify omits areas with a size of 0. This could be the reason for your problem. In other words, replacing 0 with a small positive value greater than 0 (and using 16 distinct colors) gives you something like this:
tweened$Number[tweened$Number==0] <- 1e-10
colors <- unname(randomcoloR::distinctColorPalette(nlevels(tweened$Cluster.Name)))
I sometimes find that my GLMMs from glmer, package lme4, show the following warning messages, when their summary is called:
Warning messages:
1: In vcov.merMod(object, use.hessian = use.hessian) :
variance-covariance matrix computed from finite-difference Hessian is
not positive definite or contains NA values: falling back to var-cov estimated from RX
2: In vcov.merMod(object, correlation = correlation, sigm = sig) :
variance-covariance matrix computed from finite-difference Hessian is
not positive definite or contains NA values: falling back to var-cov estimated from RX
Similar questions I found here on Stackoverflow refer to other functions, not glmer, and the LME4 Wiki does not elaborate on that either. In this question, the problem was solved before that kind of error messages were tackled, and here the discussion focuses on a particular model rather than on the meaning of the warning message.
So the question is: should I worry about that message, or is it OK because it is simply a warning and not an error, and as it says, it is "falling back to var-cov estimated from RX" (whatever RX is) anyway.
Interestingly, although the summary states that the model failed to converge, I do not get the usual convergence warnings in red.
Here comes a (minimal) dataset:
testdata=structure(list(Site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("EO1", "EO2",
"EO3", "EO4", "EO5", "EO6"), class = "factor"), Treatment = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("control",
"no ants", "no birds", "no birds no ants"), class = "factor"),
Tree = structure(c(2L, 3L, 4L, 16L, 12L, 13L, 14L, 15L, 5L,
6L, 7L, 8L, 1L, 9L, 10L, 11L, 28L, 29L, 30L, 31L, 17L, 25L,
26L, 27L, 18L, 19L, 20L, 32L, 21L, 22L, 23L, 24L, 33L, 41L,
42L, 43L, 37L, 38L, 39L, 40L, 44L, 45L, 46L, 47L, 34L, 35L,
36L, 48L, 49L, 57L, 58L, 59L, 50L, 51L, 52L, 64L, 53L, 54L,
55L, 56L, 60L, 61L, 62L, 63L, 66L, 67L, 68L, 80L, 69L, 70L,
71L, 72L, 76L, 77L, 78L, 79L, 65L, 73L, 74L, 75L, 82L, 83L,
84L, 96L, 92L, 93L, 94L, 95L, 85L, 86L, 87L, 88L, 81L, 89L,
90L, 91L), .Label = c("EO1 1", "EO1 10", "EO1 11", "EO1 12",
"EO1 13", "EO1 14", "EO1 15", "EO1 16", "EO1 2", "EO1 3",
"EO1 4", "EO1 5", "EO1 6", "EO1 7", "EO1 8", "EO1 9", "EO2 1",
"EO2 10", "EO2 11", "EO2 12", "EO2 13", "EO2 14", "EO2 15",
"EO2 16", "EO2 2", "EO2 3", "EO2 4", "EO2 5", "EO2 6", "EO2 7",
"EO2 8", "EO2 9", "EO3 1", "EO3 10", "EO3 11", "EO3 12",
"EO3 13", "EO3 14", "EO3 15", "EO3 16", "EO3 2", "EO3 3",
"EO3 4", "EO3 5", "EO3 6", "EO3 7", "EO3 8", "EO3 9", "EO4 1",
"EO4 10", "EO4 11", "EO4 12", "EO4 13", "EO4 14", "EO4 15",
"EO4 16", "EO4 2", "EO4 3", "EO4 4", "EO4 5", "EO4 6", "EO4 7",
"EO4 8", "EO4 9", "EO5 1", "EO5 10", "EO5 11", "EO5 12",
"EO5 13", "EO5 14", "EO5 15", "EO5 16", "EO5 2", "EO5 3",
"EO5 4", "EO5 5", "EO5 6", "EO5 7", "EO5 8", "EO5 9", "EO6 1",
"EO6 10", "EO6 11", "EO6 12", "EO6 13", "EO6 14", "EO6 15",
"EO6 16", "EO6 2", "EO6 3", "EO6 4", "EO6 5", "EO6 6", "EO6 7",
"EO6 8", "EO6 9"), class = "factor"), predators_trunk = c(7L,
10L, 9L, 15L, 18L, 11L, 5L, 7L, 15L, 12L, 6L, 12L, 7L, 13L,
24L, 17L, 3L, 0L, 0L, 2L, 4L, 3L, 0L, 6L, 2L, 3L, 5L, 1L,
5L, 12L, 18L, 15L, 7L, 0L, 5L, 1L, 17L, 7L, 13L, 19L, 7L,
3L, 5L, 10L, 11L, 7L, 13L, 7L, 7L, 0L, 4L, 2L, 5L, 7L, 4L,
7L, 8L, 7L, 9L, 20L, 13L, 2L, 12L, 7L, 0L, 7L, 2L, 2L, 2L,
4L, 17L, 2L, 3L, 1L, 1L, 1L, 11L, 1L, 1L, 8L, 8L, 18L, 5L,
6L, 6L, 5L, 6L, 5L, 9L, 2L, 8L, 13L, 13L, 5L, 3L, 5L), pH_H2O = c(4.145,
4.145, 4.145, 4.145, 4.1825, 4.1825, 4.1825, 4.1825, 4.1325,
4.1325, 4.1325, 4.1325, 4.14125, 4.14125, 4.14125, 4.14125,
4.265, 4.265, 4.265, 4.265, 4.21, 4.21, 4.21, 4.21, 4.18375,
4.18375, 4.18375, 4.18375, 4.09625, 4.09625, 4.09625, 4.09625,
4.1575, 4.1575, 4.1575, 4.1575, 4.1125, 4.1125, 4.1125, 4.1125,
4.20875, 4.20875, 4.20875, 4.20875, 3.97125, 3.97125, 3.97125,
3.97125, 4.025, 4.025, 4.025, 4.025, 4.005, 4.005, 4.005,
4.005, 4.04, 4.04, 4.04, 4.04, 4.03125, 4.03125, 4.03125,
4.03125, 4.4575, 4.4575, 4.4575, 4.4575, 4.52, 4.52, 4.52,
4.52, 4.505, 4.505, 4.505, 4.505, 4.34875, 4.34875, 4.34875,
4.34875, 4.305, 4.305, 4.305, 4.305, 4.32, 4.32, 4.32, 4.32,
4.35, 4.35, 4.35, 4.35, 4.445, 4.445, 4.445, 4.445), ant_mean_abundance = c(53.85714,
53.85714, 53.85714, 53.85714, 24.28571, 24.28571, 24.28571,
24.28571, 45.5, 45.5, 45.5, 45.5, 51.14286, 51.14286, 51.14286,
51.14286, 66.28571, 66.28571, 66.28571, 66.28571, 76.5, 76.5,
76.5, 76.5, 65.71429, 65.71429, 65.71429, 65.71429, 8.642857,
8.642857, 8.642857, 8.642857, 109.3571, 109.3571, 109.3571,
109.3571, 25.14286, 25.14286, 25.14286, 25.14286, 101.3571,
101.3571, 101.3571, 101.3571, 31.78571, 31.78571, 31.78571,
31.78571, 78.64286, 78.64286, 78.64286, 78.64286, 93.28571,
93.28571, 93.28571, 93.28571, 63.14286, 63.14286, 63.14286,
63.14286, 67.14286, 67.14286, 67.14286, 67.14286, 44.0625,
44.0625, 44.0625, 44.0625, 23.875, 23.875, 23.875, 23.875,
95.8125, 95.8125, 95.8125, 95.8125, 49.125, 49.125, 49.125,
49.125, 57, 57, 57, 57, 38.125, 38.125, 38.125, 38.125, 40.6875,
40.6875, 40.6875, 40.6875, 22, 22, 22, 22), bird_activity = c(153.24,
153.24, 153.24, 153.24, 153.24, 153.24, 153.24, 153.24, 0,
0, 0, 0, 0, 0, 0, 0, 240.96, 240.96, 240.96, 240.96, 240.96,
240.96, 240.96, 240.96, 0, 0, 0, 0, 0, 0, 0, 0, 154.54, 154.54,
154.54, 154.54, 154.54, 154.54, 154.54, 154.54, 0, 0, 0,
0, 0, 0, 0, 0, 107.68, 107.68, 107.68, 107.68, 107.68, 107.68,
107.68, 107.68, 0, 0, 0, 0, 0, 0, 0, 0, 172.42, 172.42, 172.42,
172.42, 172.42, 172.42, 172.42, 172.42, 0, 0, 0, 0, 0, 0,
0, 0, 113.8, 113.8, 113.8, 113.8, 113.8, 113.8, 113.8, 113.8,
0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("Site", "Treatment",
"Tree", "predators_trunk", "pH_H2O", "ant_mean_abundance", "bird_activity"
), class = "data.frame", row.names = c(NA, -96L))
And here is the code leading to the warnings:
library(lme4)
summary(glmer.nb(predators_trunk ~ scale(ant_mean_abundance) + scale(bird_activity) + scale(pH_H2O) + (1 | Site/Treatment), testdata, na.action = na.fail))
summary(glmer(predators_trunk ~ scale(ant_mean_abundance) + scale(bird_activity) + scale(pH_H2O) + (1 | Site/Treatment), testdata, family = negative.binomial(theta = 4.06643400243645), na.action = na.fail))
Interestingly to me, the summary of the glmer.nb does not yield any warnings, but the call to glmer, using the theta that was estimated by glmer.nb, does give me the warnings. The latter is the model call that is generated by using dredge (MuMIn) on the corresponding glmer.nb full model.
This warning suggests that your standard error estimates might be less accurate. But as with all warnings, it's hard to know for sure and the best thing is to try to cross-check if you can.
In this case I saved your two fits, from glmer.nb and glmer, as g1 and g2. You can see that the estimates (point estimates, SEs, Z values ...) have changed a little bit, but not very much, so at the very least that should reassure you.
printCoefmat(coef(summary(g1)),digits=2)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.844 0.111 16.7 <2e-16 ***
scale(ant_mean_abundance) -0.347 0.077 -4.5 7e-06 ***
scale(bird_activity) -0.122 0.076 -1.6 0.107
scale(pH_H2O) -0.275 0.104 -2.6 0.008 **
> printCoefmat(coef(summary(g2)),digits=2)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.846 0.108 17.1 <2e-16 ***
scale(ant_mean_abundance) -0.347 0.077 -4.5 6e-06 ***
scale(bird_activity) -0.122 0.075 -1.6 0.102
scale(pH_H2O) -0.275 0.102 -2.7 0.007 **
I have a development version of lme4 on Github (the test_mods branch, hopefully integrated into the master branch soon: if you want to install it, you can use devtools::install_github("lme4/lme4",ref="test_mods")) which allows you to pick a more accurate (but slower) calculation for the standard errors: this gets us back to (nearly) the same standard errors as glmer.nb.
g3 <- update(g2, control=glmerControl(deriv.method="Richardson"))
printCoefmat(coef(summary(g3)),digits=2)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.846 0.111 16.7 <2e-16 ***
scale(ant_mean_abundance) -0.347 0.077 -4.5 6e-06 ***
scale(bird_activity) -0.122 0.076 -1.6 0.106
scale(pH_H2O) -0.275 0.104 -2.6 0.008 **
all.equal(coef(summary(g1))[,"Std. Error"],
coef(summary(g3))[,"Std. Error"])
[1] "Mean relative difference: 0.001597978"
The glmmTMB package (on Github) also gives almost the same results:
library(glmmTMB)
g5 <- glmmTMB(predators_trunk ~ scale(ant_mean_abundance) +
scale(bird_activity) + scale(pH_H2O) +
(1 | Site/Treatment), testdata,
family=nbinom2)
printCoefmat(coef(summary(g5))[["cond"]],digits=2)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.852 0.110 16.8 <2e-16 ***
scale(ant_mean_abundance) -0.348 0.077 -4.5 7e-06 ***
scale(bird_activity) -0.123 0.076 -1.6 0.106
scale(pH_H2O) -0.276 0.105 -2.6 0.008 **
I am looking for a solution for the following problem:
Currently I am producing a wind rose plot with ggplot2 following the script provided by Andy Clifton over here.
This works perfectly and I am very thankful for the script provided. However I am having one problem. The labels for the X-Axis get truncated by the edge of the plot, especially when plotting in a facet.
Is there a way to expand the plot so that the text is fully shown?
(an example picture can be found at the link above)
To practice with a windrose, using the data frame at the end, I created the following plot, which has no issues of x-axis labels being truncated. It admittedly doesn't concern wind speeds or a 24-hour framework, as it is intended for amounts paid to two kinds of vendors, but the plot and the data may help others respond more easily.
> dput(lfvendrose)
structure(list(Time = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), Payee = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Firm", "Vendor"), class = "factor"),
Data1 = structure(c(12L, 10L, 8L, 13L, 7L, 9L, 14L, 3L, 5L,
17L, 6L, 11L, 2L, 15L, 18L, 2L, 4L, 19L, 1L, 16L, 18L, 2L,
15L, 18L), .Label = c("0, 1", "0, 13", "0, 15", "0, 2", "0, 20",
"0, 34", "0, 39", "0, 40", "0, 41", "0, 45", "0, 48", "0, 50",
"0, 64", "0, 68", "0, 9", "0, 90", "0, 94", "0,11", "0,16"
), class = "factor"), Month = structure(c(5L, 4L, 8L, 1L,
9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L,
6L, 2L, 12L, 11L, 10L, 3L), .Label = c("Apr", "Aug", "Dec",
"Feb", "Jan", "Jul", "Jun", "Mar", "May", "Nov", "Oct", "Sep"
), class = "factor"), Month.1 = structure(c(5L, 4L, 8L, 1L,
9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L,
6L, 2L, 12L, 11L, 10L, 3L), .Label = c("Apr", "Aug", "Dec",
"Feb", "Jan", "Jul", "Jun", "Mar", "May", "Nov", "Oct", "Sep"
), class = "factor"), Data2 = structure(c(11L, 10L, 6L, 12L,
8L, 4L, 13L, 3L, 7L, 15L, 9L, 5L, 2L, 14L, 16L, 1L, 11L,
10L, 6L, 12L, 8L, 4L, 13L, 3L), .Label = c("0, 13,1", "0, 13,50",
"0, 15, 30", "0, 40, 100", "0, 40,1", "0, 40,3", "0, 40,5",
"0, 45, 5", "0, 45,15", "0, 45,2", "0, 50,1", "0, 64, 4",
"0, 64,200", "0, 9,150", "0, 94,10", "0,11, 400"), class = "factor")), .Names = c("Time",
"Payee", "Data1", "Month", "Month.1", "Data2"), class = "data.frame", row.names = c(NA,
-24L))
Hi everybody I am working with a dataframe in R to build a nice graph. I have developed the graph but I have some problems with legends. Mi dataframe DF has the next form (I add the dput() version in the final side):
Mes Estado Numero Label
1 2 X 7 22 (1.19%)
2 2 A 13 22 (1.19%)
3 2 Z 2 22 (1.19%)
4 3 X 19 30 (1.62%)
5 3 A 10 30 (1.62%)
6 3 Z 1 30 (1.62%)
7 4 X 19 31 (1.68%)
8 4 A 11 31 (1.68%)
9 4 Z 1 31 (1.68%)
10 5 X 17 28 (1.52%)
11 5 A 7 28 (1.52%)
12 5 Z 4 28 (1.52%)
It has 4 variables Mes, Estado, Numero, Label. I want to show the distribution of Estado with Mes according to the number of cases (Numero), so I build this graphic with the next code:
AAA=ggplot(DF, aes(x = Mes, y = Numero, fill = Estado)) +
geom_bar(stat = "identity") + scale_y_continuous(labels = comma) + geom_text(aes(label=Numero),fontface="bold",size=6)
print(AAA)
How you can see the distribution of Estado for each value in Mes according to Numero works fine, but the problem is with labels. I would like to fix labels in each bar in the middle of their respective color. For example in the case of first bar 2 should be located in blue area, 7 in green area and 13 in rose area. But all labels are not in order. Moreover, I have in DF a variable named Label I am trying to add these values at the top side of each bar, for example in the case of Mes=2 label has a value of 22 (1.19%) that means the sum of all values in the bar and the value in parentheses is the relation between that sum and 1848 (22/1848). I would like to add that values in the top of each bar but when I tried to use another geom_text() with unique(PPP$Label) I got error. The dput version of DF is the next:
DF<-structure(list(Mes = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L,
9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L,
13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L, 17L,
18L, 18L, 19L, 20L), .Label = c("2", "3", "4", "5", "6", "7",
"8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18",
"19", "20", "21"), class = "factor"), Estado = structure(c(2L,
1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L,
2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L,
1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 1L, 3L,
1L, 3L, 1L, 3L), .Label = c("A", "X", "Z"), class = "factor"),
Numero = c(7L, 13L, 2L, 19L, 10L, 1L, 19L, 11L, 1L, 17L,
7L, 4L, 19L, 8L, 7L, 11L, 13L, 15L, 8L, 3L, 13L, 13L, 8L,
6L, 14L, 4L, 11L, 14L, 5L, 3L, 4L, 3L, 5L, 12L, 6L, 2L, 9L,
4L, 2L, 6L, 5L, 1L, 5L, 2L, 1L, 2L, 3L, 5L, 2L, 3L, 2L, 1L,
1L), Label = c("22 (1.19%)", "22 (1.19%)", "22 (1.19%)",
"30 (1.62%)", "30 (1.62%)", "30 (1.62%)", "31 (1.68%)", "31 (1.68%)",
"31 (1.68%)", "28 (1.52%)", "28 (1.52%)", "28 (1.52%)", "34 (1.84%)",
"34 (1.84%)", "34 (1.84%)", "24 (1.3%)", "24 (1.3%)", "26 (1.41%)",
"26 (1.41%)", "26 (1.41%)", "34 (1.84%)", "34 (1.84%)", "34 (1.84%)",
"24 (1.3%)", "24 (1.3%)", "24 (1.3%)", "30 (1.62%)", "30 (1.62%)",
"30 (1.62%)", "10 (0.54%)", "10 (0.54%)", "10 (0.54%)", "23 (1.24%)",
"23 (1.24%)", "23 (1.24%)", "15 (0.81%)", "15 (0.81%)", "15 (0.81%)",
"13 (0.7%)", "13 (0.7%)", "13 (0.7%)", "8 (0.43%)", "8 (0.43%)",
"8 (0.43%)", "6 (0.32%)", "6 (0.32%)", "6 (0.32%)", "7 (0.38%)",
"7 (0.38%)", "5 (0.27%)", "5 (0.27%)", "1 (0.05%)", "1 (0.05%)"
)), .Names = c("Mes", "Estado", "Numero", "Label"), row.names = c(NA,
-53L), class = "data.frame")
Many thanks for your help.
First, we calculate the positions of the midpoints (NumeroPos) and the heights of the stacked bars (NumeroSum).
DF <- transform(DF, NumeroPos = ave(Numero, Mes, FUN = cumsum) - Numero / 2,
NumeroSum = ave(Numero, Mes, FUN = sum))
Now, the new variables can be used for creating the labels. Note that we use a subset of the data frame for the labels on top of the bars since we need exactly one label for each bar.
library(ggplot2)
ggplot(DF, aes(x = Mes, y = Numero, fill = Estado)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Numero, y = NumeroPos), fontface = "bold", size = 6) +
geom_text(data = DF[!duplicated(DF$Mes), ],
aes(y = NumeroSum, label = Label), vjust = -.5, size = 4)