R: Trying to remove NAs from a boxplot - r

I am trying to do a basic boxplot, and I can't get NA values away from it. I have tried many tricks for this issue. For example this one.
ggplot(df=subset(df, !is.na(sum_variable)), aes(x = gender, y = sum_variable, fill = gender)) +
stat_boxplot(geom ="errorbar", width = 0.5) +
geom_boxplot(fill = "light blue") +
stat_summary(fun.y=mean, geom="point", shape=10, size=3.5, color="black") +
ggtitle("Title") +
theme_bw() + theme(legend.position="none")
And this ggplot(na.omit(data), aes(x=luse, y=rich)) +
And none of these solve the issue. What would you recommend?
Data↓
structure(list(gender = structure(c(2L, 2L, NA, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 2L, 1L, 1L), .Label = c("1", "2"), class = "factor"),
sum_variable = c(9, 6, 13, 3, 4, 3, 12, 2, 7, 8, 7, 4, 5,
10, 2, 5, 4, NA, 14, 9, 2, 5, 7, 3, NA, 3, 5, 7, 3, 8, 3,
3, 4, 8, 10, 9, 5, 7, 8, 4, 9, NA, 10, 14, 10, 3, 4, 10,
3, NA, 5, 3, 4, 4, NA, 5, 4, 6, 6, 9, 6, 2, 3, NA, 4, NA,
2, 2, 6, 5, 5, 3, 5, NA, 4, 4)), class = c("rowwise_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -76L), groups = structure(list(
.rows = structure(list(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L,
54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L,
65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L,
76L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -76L), class = c("tbl_df",
"tbl", "data.frame")))

You can filter your NA values before creating the plot:
df %>%
filter(!is.na(gender)) %>%
ggplot(aes(x = gender, y = sum_variable, fill = gender)) +
stat_boxplot(geom ="errorbar", width = 0.5) +
geom_boxplot(fill = "light blue") +
stat_summary(fun.y=mean, geom="point", shape=10, size=3.5, color="black") +
ggtitle("Title") +
theme_bw() + theme(legend.position="none")

Try removing NAs first before passing in the dataset.
sub_dta = na.omit(dta)
ggplot(data = sub_dta, aes(x = gender, y = sum_variable, fill = gender)) +
stat_boxplot(geom ="errorbar", width = 0.5) +
geom_boxplot(fill = "light blue") +
stat_summary(fun =mean, geom="point", shape=10, size=3.5, color="black") +
ggtitle("Title") +
theme_bw() + theme(legend.position="none")

One solution is to use complete.cases
ggplot(df[complete.cases(df), ], aes(x = gender, y = sum_variable,
fill = gender)) +
stat_boxplot(geom ="errorbar", width = 0.5) +
geom_boxplot(fill = "light blue") +
stat_summary(fun.y=mean, geom="point", shape=10, size=3.5, color="black") +
ggtitle("Title") +
theme_bw() + theme(legend.position="none")
Data
df <- structure(list(gender = structure(c(2L, 2L, NA, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 1L), levels = c("1", "2"), class = "factor"),
sum_variable = c(9, 6, 13, 3, 4, 3, 12, 2, 7, 8, 7, 4, 5,
10, 2, 5, 4, NA, 14, 9, 2, 5, 7, 3, NA, 3, 5, 7, 3, 8, 3,
3, 4, 8, 10, 9, 5, 7, 8, 4, 9, NA, 10, 14, 10, 3, 4, 10,
3, NA, 5, 3, 4, 4, NA, 5, 4, 6, 6, 9, 6, 2, 3, NA, 4, NA,
2, 2, 6, 5, 5, 3, 5, NA, 4, 4)), class = c("rowwise_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -76L), groups = structure(list(
.rows = structure(list(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L,
54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L,
65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L,
76L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -76L), class = c("tbl_df",
"tbl", "data.frame")))

Related

Obtaining intercept and slope per patient (as diagnostics in a repeated measurements study) using the lmLst and intervals functions

I have a repeated measurements dataset of 24 stroke patients in which I want to assess the effect of three different types of rehabilitation (Group) on functional recovery scores (Barthel_index). Each patients functional ability was measured weekly (Time_num) for 8 weeks.
The data looks as follows:
library(dplyr)
library(magrittr)
library(nlme)
library(lmer)
mydata <-
structure(list(Subject = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L,
19L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L,
21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 23L,
23L, 23L, 23L, 23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 24L, 24L,
24L, 24L), Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A", "B", "C"), class = "factor"),
Time_num = c(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7,
8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2,
3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5,
6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3,
4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6,
7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1,
2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4,
5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7,
8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2,
3, 4, 5, 6, 7, 8), Barthel_index = c(45L, 45L, 45L, 45L,
80L, 80L, 80L, 90L, 20L, 25L, 25L, 25L, 30L, 35L, 30L, 50L,
50L, 50L, 55L, 70L, 70L, 75L, 90L, 90L, 25L, 25L, 35L, 40L,
60L, 60L, 70L, 80L, 100L, 100L, 100L, 100L, 100L, 100L, 100L,
100L, 20L, 20L, 30L, 50L, 50L, 60L, 85L, 95L, 30L, 35L, 35L,
40L, 50L, 60L, 75L, 85L, 30L, 35L, 45L, 50L, 55L, 65L, 65L,
70L, 40L, 55L, 60L, 70L, 80L, 85L, 90L, 90L, 65L, 65L, 70L,
70L, 80L, 80L, 80L, 80L, 30L, 30L, 40L, 45L, 65L, 85L, 85L,
85L, 25L, 35L, 35L, 35L, 40L, 45L, 45L, 45L, 45L, 45L, 80L,
80L, 80L, 80L, 80L, 80L, 15L, 15L, 10L, 10L, 10L, 20L, 20L,
20L, 35L, 35L, 35L, 45L, 45L, 45L, 50L, 50L, 40L, 40L, 40L,
55L, 55L, 55L, 60L, 65L, 20L, 20L, 30L, 30L, 30L, 30L, 30L,
30L, 35L, 35L, 35L, 40L, 40L, 40L, 40L, 40L, 35L, 35L, 35L,
40L, 40L, 40L, 45L, 45L, 45L, 65L, 65L, 65L, 80L, 85L, 95L,
100L, 45L, 65L, 70L, 90L, 90L, 95L, 95L, 100L, 25L, 30L,
30L, 35L, 40L, 40L, 40L, 40L, 25L, 25L, 30L, 30L, 30L, 30L,
35L, 40L, 15L, 35L, 35L, 35L, 40L, 50L, 65L, 65L)), row.names = c(NA,
-192L), class = c("tbl_df", "tbl", "data.frame"))
head(mydata)
# A tibble: 6 x 4
Subject Group Time_num Barthel_index
<int> <fct> <dbl> <int>
1 1 A 1 45
2 1 A 2 45
3 1 A 3 45
4 1 A 4 45
5 1 A 5 80
6 1 A 6 80
To see if and how intercepts and slopes vary per patient I want to plot the intercepts and slopes using the lmList and interval functions.
Question 1 I don't understand why calling the lmList function () in lme4 gives me 48 warnings while the same function in nlme does not:
lmlist <-
lme4::lmList(Barthel_index ~ Time_num | Subject,
data=mydata)
> There were 48 warnings (use warnings() to see them)
lmlist <-
nlme::lmList(Barthel_index ~ Time_num | Subject,
data=mydata)
# Works fine
Question 2 I am trying to extract the confidence intervals for each regression slope, but this gives a warning and NaN for certain values:
lmlist <-
nlme::lmList(Barthel_index ~ Time_num | Subject,
data=mydata)
coefs <- coef(lmlist)
names(coefs) <- c("Intercepts", "Slopes")
intervals(lmlist)
> Warning message:
In summary.lm(el) : essentially perfect fit: summary may be unreliable
Question 3 Now that I have my new list of coefficients with confidence intervals, I'd like to plot them to see if and how much intercepts and slopes vary amongst patients. I'm trying to achieve something like the following:
Any help? Thanks.
Q1. The warnings are occurring in lme4::lmList because you're using a tibble as input: no warnings from
lme4::lmList(Barthel_index ~ Time_num | Subject,
data=as.data.frame(mydata))
(this is a harmless "infelicity" or buglet in lme4 ...)
Q2. If you look at the list of coefficients, you'll see that subject 5 is the problematic one. The data for this subject all have the same response value: thus it's not surprising that we can't compute confidence intervals on a linear regression fit ...
mydata[mydata$Subject=="5",]
# A tibble: 8 × 4
Subject Group Time_num Barthel_index
<int> <fct> <dbl> <int>
1 5 A 1 100
2 5 A 2 100
3 5 A 3 100
4 5 A 4 100
5 5 A 5 100
6 5 A 6 100
7 5 A 7 100
8 5 A 8 100
Q3 plot(intervals(lmlist))
For Q3, you could use the dotplot function in the lattice package:
require(lattice)
m0 <- lmer(Reaction ~ Days + (Days | Subject), data = sleepstudy)
dotplot(ranef(m0, condVar = TRUE))

Plotting Piecewise growth curves

I am trying to plot a piecewise growth curve similar to this first plot. I used the separate slopes coding scheme and placed a breakpoint at time 2
| time | 0 | 1 | 2 | 5 | 10 | 15 | 20|
| time1 | 0 | 1 | 2 | 2 | 2 | 2 | 2 |
| time2 | 0 | 0 | 0 | 1 | 2 | 3 | 4 |
I used the following code to create my growth model
m1 <- lmer(sdmtwr ~ time1 + time2 + (time1 | id) + (0 + time2 | id), data = SDMT, REML = FALSE)
I'm also exploring an interaction with a 2-level categorical predictor with the following code
m2 <- lmer(sdmtwr ~ (time1 + time2)*edu + (time1 | id) + (0 + time2 | id), data = SDMT, REML = FALSE)
I've attempted to create the plots with the ggplot2, sjPlot, and effects packages to no avail, and I am at a loss due to limited programming experience. I have only ever been able to plot segments separately for both the baseline and interaction models.
If anyone could provide assistance on the appropriate code, I would appreciate it!
Edit: Here is the dput summary (edited for length to show edu, time1, and time2)
> dput(sdmt)
structure(list(id = c(3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 6L,
6L, 6L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 62L, 62L, 62L, 62L,
108L, 108L, 108L, 108L, 119L, 119L, 120L, 120L, 120L, 120L, 132L,
132L, 132L, 132L, 132L, 148L, 148L, 148L, 148L, 148L, 148L, 175L,
175L, 175L, 178L, 178L, 178L, 178L, 201L, 201L, 201L, 201L, 201L,
201L, 201L, 253L, 253L, 253L, 253L, 327L, 327L, 327L, 327L, 336L,
336L, 336L, 336L, 336L, 336L, 343L, 343L, 360L, 360L, 360L, 366L,
366L, 366L), time = c(0L, 2L, 10L, 15L, 20L, 5L, 10L, 15L, 2L,
2L, 15L, 20L, 0L, 1L, 2L, 5L, 10L, 15L, 20L, 5L, 10L, 15L, 20L,
0L, 2L, 15L, 20L, 0L, 2L, 0L, 10L, 15L, 20L, 0L, 1L, 5L, 10L,
20L, 1L, 2L, 5L, 10L, 15L, 20L, 0L, 1L, 2L, 0L, 1L, 2L, 5L, 0L,
1L, 2L, 5L, 10L, 15L, 20L, 0L, 1L, 5L, 15L, 0L, 1L, 10L, 20L,
0L, 1L, 5L, 10L, 15L, 20L, 0L, 10L, 1L, 5L, 10L, 0L, 10L, 15L
), sdmtwr = c(20L, 24L, 18L, 19L, 9L, 17L, 24L, 17L, 41L, 33L,
27L, 29L, 31L, 29L, 26L, 29L, 32L, 20L, 19L, 40L, 42L, 46L, 38L,
14L, 25L, 24L, 29L, 46L, 45L, 29L, 26L, 34L, 38L, 30L, 33L, 71L,
52L, 51L, 29L, 33L, 50L, 55L, 40L, 39L, 32L, 34L, 35L, 28L, 37L,
37L, 36L, 37L, 29L, 52L, 51L, 50L, 44L, 42L, 30L, 43L, 43L, 41L,
33L, 46L, 49L, 38L, 52L, 50L, 48L, 49L, 49L, 50L, 40L, 39L, 18L,
NA, 3L, 31L, 43L, 47L), time_seg1 = c(0, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2,
0, 2, 2, 2, 0, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 0, 1, 2, 0, 1, 2,
2, 0, 1, 2, 2, 2, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 2,
2, 0, 2, 1, 2, 2, 0, 2, 2), time_seg2 = c(0, 0, 2, 3, 4, 1, 2,
3, 0, 0, 3, 4, 0, 0, 0, 1, 2, 3, 4, 1, 2, 3, 4, 0, 0, 3, 4, 0,
0, 0, 2, 3, 4, 0, 0, 1, 2, 4, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 1, 2, 3, 4, 0, 0, 1, 3, 0, 0, 2, 4, 0, 0, 1, 2,
3, 4, 0, 2, 0, 1, 2, 0, 2, 3), ed_dich = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, NA, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L), .Label = c("< HS",
">= HS"), class = "factor")), row.names = c(NA, -80L), class = "data.frame")
What I think you want is a piecewise linear spline. You can do this with a truncated power basis function. In your model, you would include time and a function that is time-2 if time is greater than 2 and 0 otherwise. This makes a piecewise linear function that meet each other at time=2. You can do this in the model as follows:
library(lme4)
mod <- lmer(sdmtwr ~ time + I(ifelse(time > 2, time-2, 0)) +
(1 |id), data=tmp, REML=TRUE)
Then, you could use the ggpredict() function from the ggeffects package to produce the plot:
library(ggeffects)
g <- ggpredict(mod, "time")
plot(g)
Note: I couldn't get it to run with random effects on the time variables, but with more data perhaps you'll be able to get it to work.

How can I produce this specific boxplot that combines data on multiple levels from different data sources in ggplot or tidyverse/ R?

I am producing a plot that consists of several different boxplots. Please find my data sample below.
I have located data from three different studies: p$studie==1,2,3
Data comprise different tumor samples from a certain cancer that has four stages: p$ny_stadie=1,2,3,4.
Each tumor patient had lymph nodes removed (ranging from 3 to 124) and is a continuous covariate: p$n_fjernet.
Therefore
head(p)
studie ny_stadie n_fjernet
1 1 1 25
2 1 4 10
3 1 1 3
4 1 4 27
5 1 3 13
6 1 4 9
Data from all three studies have all four levels of p$ny_stadie==1,2,3,4 and a variety of diffenet lymph nodes removed p$n_fjernet.
I want to produce this plot (going up to p$ny_stadie==3,4 too)
Simply, I want to show the spread of resected lymph nodes per p$ny_stadie and per p$studie.
I use ggplot and tidyverse.
# My Data
p <- structure(list(studie = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), ny_stadie = structure(c(1,
4, 1, 4, 3, 4, 4, 4, 4, 4, 4, 3, 1, 3, 4, 3, 1, 1, 1, 4, 4, 3,
4, 4, 2, 2, 2, 2, 4, 3, 2, 1, 4, 1, 4, 3, 2, 1, 1, 1, 1, 4, 3,
4, 2, 4, 4, 4, 4, 3, 3, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 2, 4, 3,
3, 4, 4, 4, 4, 3, 2, 4, 4, 3, 3, 3, 2, 1, 3, 4, 4, 3, 4, 4, 4,
4, 4, 4, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2), class = "AsIs"),
n_fjernet = c(25L, 10L, 3L, 27L, 13L, 9L, 7L, 7L, 7L, 6L,
6L, 5L, 4L, 3L, 37L, 26L, 19L, 17L, 15L, 9L, 57L, 55L, 33L,
33L, 33L, 28L, 27L, 27L, 26L, 23L, 23L, 23L, 22L, 22L, 21L,
21L, 20L, 20L, 19L, 18L, 18L, 18L, 18L, 17L, 17L, 16L, 16L,
16L, 15L, 15L, 67L, 35L, 56L, 15L, 37L, 44L, 124L, 41L, 30L,
31L, 35L, 36L, 28L, 39L, 54L, 25L, 27L, 69L, 53L, 24L, 33L,
52L, 77L, 51L, 7L, 22L, 53L, 26L, 58L, 28L, 83L, 39L, 15L,
37L, 27L, 9L, 17L, 32L, 26L, 22L, 37L, 28L, 52L, 27L, 15L,
11L, 7L, 24L, 11L, 56L, 47L, 27L, 14L)), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 380L, 381L, 382L,
383L, 384L, 385L, 386L, 387L, 388L, 389L, 390L, 391L, 392L, 393L,
394L, 395L, 396L, 397L, 398L, 399L, 400L, 401L, 402L, 403L, 404L,
405L, 406L, 407L, 408L, 409L, 410L, 411L, 412L, 413L, 414L, 415L,
416L, 417L, 418L, 620L, 621L, 622L, 623L, 624L, 625L, 626L, 627L,
628L, 629L, 630L, 631L, 632L, 633L), class = "data.frame")
I'm not sure if that was your intention, if not correct my in order for me to edit the answer
doing the following on the data:
p$ny_stadie_f <- factor(p$ny_stadie)
p$studie_f <- factor(p$studie)
q <- ggplot(p, aes(x = ny_stadie_f, y = n_fjernet, fill= studie_f)) + geom_boxplot()
q
I get the following output:
This is the desired output you want? you can see that there is no expression in the ny_stadie=3,4 for the case where studie=3

Remove unused nodes in ggraph

I have the following data.frame.
library(tidyverse)
library(ggraph)
library(tidygraph)
df <- structure(list(from = c(3L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 2L,
2L, 1L, 3L, 1L, 3L, 3L, 3L, 1L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L,
1L, 3L, 1L, 1L, 2L, 1L, 2L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 2L,
3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 2L, 1L,
3L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L), to = c(31L, 21L, 5L,
97L, 68L, 49L, 3L, 84L, 17L, 335L, 1L, 6L, 207L, 2L, 3L, 457L,
3L, 149L, 17L, 3L, 41L, 126L, 89L, 150L, 42L, 262L, 235L, 79L,
335L, 2L, 104L, 445L, 10L, 79L, 5L, 7L, 3L, 39L, 6L, 402L, 123L,
104L, 246L, 448L, 261L, 44L, 47L, 170L, 158L, 435L, 39L, 47L,
11L, 31L, 18L, 236L, 144L, 237L, 106L, 236L, 19L, 393L, 104L,
6L, 440L, 191L, 171L, 302L, 255L, 134L, 197L, 373L, 68L, 10L,
1L, 6L, 461L, 28L, 216L, 473L, 108L, 238L, 79L, 1L, 11L, 178L,
432L, 5L, 3L, 91L, 449L, 26L, 123L, 456L, 73L, 1L, 105L, 432L,
160L, 10L), time = c(2, 1, 3, 3, 1, 3, 1, 1, 1, 2, 3, 1, 1, 1,
1, 3, 3, 2, 1, 1, 1, 3, 1, 1, 2, 2, 2, 3, 2, 1, 3, 3, 1, 3, 3,
1, 2, 1, 1, 3, 3, 2, 2, 3, 3, 1, 1, 3, 1, 3, 1, 1, 1, 3, 3, 2,
1, 3, 3, 2, 3, 3, 1, 1, 3, 1, 1, 3, 2, 3, 3, 3, 1, 2, 2, 2, 3,
1, 1, 3, 1, 2, 3, 1, 1, 3, 3, 1, 3, 1, 3, 2, 3, 3, 2, 3, 1, 3,
3, 1)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-100L))
g <- df %>% graph_from_data_frame()
And this code to produce a ggraph
g %>%
as_tbl_graph() %>%
activate(nodes) %>%
mutate(degree = centrality_degree()) %>%
ggraph()+
geom_edge_fan(aes(color = time))+
geom_node_point(aes(size = degree),
show.legend = F)+
coord_equal()+
facet_edges(~time, drop = T)+
theme_graph()
Which produces this faceted plot:
My question is: How can I remove the edgesless nodes in the facets of the plot, so that only nodes that have edges appear in each facet?
Thanks!

Error in migrating a ggplot graph to ver. 0.9.3

Trying to migrate a ggplot graph to ver 0.9.3, the commented out line in the code below produces a Discrete value supplied to continuous scale error (which was not a problem before).
Can you help me correct it?
If you need the data to experiment with, I provide it below.
ggplot(mdfr, aes(as.Date(value, "%d/%m/%Y"), name, colour = factor(stadio))) +
geom_line(aes(size=rating)) +
labs(colour="Baseline/Actual :", x = "", y = "") +
scale_colour_brewer(palette="BrBG",breaks = c("1", "3", "6","8"),
labels = c("Label 1", "Label 2", "Label 3","Label 4")) +
scale_size_manual(breaks = levels(mdfr$rating), values = as.integer(levels(mdfr$rating)), guide = "none") +
theme_bw() +
#geom_vline(data=dfrDataDate, aes(xintercept= as.Date(data.date, "%Y-%m-%d")),colour=rgb(215, 25, 28, max = 255),size=1) +
geom_text(data=dfrDataDate, aes(x= as.Date(data.date, "%Y-%m-%d"), label = format(as.Date(data.date),"%d/%m/%Y")), hjust = -0.05, vjust = 1.5, colour = "darkred", size = 3 ) +
geom_text(data=dfrLabels, aes(x= as.Date(diag_date, "%d/%m/%Y"), label = format(as.Date(diag_date, "%d/%m/%Y"),"%d/%m/%Y")), hjust = 0.5, vjust = -1, colour = "black", size = 3 ) +
geom_text(data=dfrYpogr, aes(x= as.Date(ypogr_date, "%d/%m/%Y"), label = format(as.Date(ypogr_date, "%d/%m/%Y"),"%d/%m/%Y")), hjust = 0.5, vjust = -1, colour = "black", size = 3 ) +
scale_y_discrete(breaks=names, labels=new.names) +
ggtitle('New plot title') +
theme(plot.title = element_text(size=16),
legend.position = "top",
legend.title = element_text(size=12),
legend.text = element_text(size=10),
legend.key = element_rect(colour='white'),
axis.text.x = element_text(angle=0,size = 10),
axis.title.x = element_text(size=14),
axis.text.y = element_text(size=10),
axis.title.y = element_text(angle=90,size=14),
axis.ticks.length = unit(.05, "cm")
)
DATA needed to produce the graph:
mdfr <- structure(list(name = structure(c(22L, 22L, 22L, 22L, 20L, 20L,
20L, 20L, 18L, 18L, 18L, 18L, 16L, 16L, 16L, 16L, 14L, 14L, 14L,
14L, 12L, 12L, 12L, 12L, 10L, 10L, 10L, 10L, 8L, 8L, 8L, 8L,
6L, 6L, 6L, 6L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 21L, 21L, 21L,
21L, 19L, 19L, 19L, 19L, 17L, 17L, 17L, 17L, 15L, 15L, 15L, 15L,
13L, 13L, 13L, 13L, 11L, 11L, 11L, 11L, 9L, 9L, 9L, 9L, 7L, 7L,
7L, 7L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 22L,
22L, 22L, 22L, 20L, 20L, 20L, 20L, 18L, 18L, 18L, 18L, 16L, 16L,
16L, 16L, 14L, 14L, 14L, 14L, 12L, 12L, 12L, 12L, 10L, 10L, 10L,
10L, 8L, 8L, 8L, 8L, 6L, 6L, 6L, 6L, 4L, 4L, 4L, 4L, 2L, 2L,
2L, 2L, 21L, 21L, 21L, 21L, 19L, 19L, 19L, 19L, 17L, 17L, 17L,
17L, 15L, 15L, 15L, 15L, 13L, 13L, 13L, 13L, 11L, 11L, 11L, 11L,
9L, 9L, 9L, 9L, 7L, 7L, 7L, 7L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L), .Label = c("733 A", "733 B", "725 A", "725 B",
"727 A", "727 B", "558 A", "558 B", "705 A", "705 B", "635 A",
"635 B", "737 A", "737 B", "719 A", "719 B", "700 A", "700 B",
"579 A", "579 B", "541 A", "541 B"), class = "factor"), stadio = c(2,
4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4,
5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5,
7, 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8,
1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 1,
3, 6, 8, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4,
5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5,
7, 2, 4, 5, 7, 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8,
1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 1,
3, 6, 8, 1, 3, 6, 8), variable = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("start_date",
"end_date"), class = "factor"), value = structure(c(3L, 18L,
20L, 36L, 2L, 14L, 24L, 38L, 7L, 7L, 7L, 31L, 9L, 15L, 27L, 34L,
4L, 19L, 21L, 37L, 1L, 9L, 23L, 33L, 8L, 13L, 25L, 32L, 10L,
16L, 28L, 29L, 5L, 12L, 26L, 35L, 6L, 17L, 22L, 39L, 11L, 17L,
22L, 30L, 3L, 18L, 20L, 36L, 2L, 14L, 24L, 31L, 50L, 50L, 50L,
56L, 15L, 52L, 55L, 32L, 48L, 49L, 55L, 34L, 1L, 53L, 53L, 57L,
49L, 51L, 54L, 58L, 10L, 16L, 28L, 29L, 5L, 12L, 26L, 35L, 6L,
17L, 22L, 39L, 11L, 17L, 22L, 30L, 18L, 20L, 36L, 45L, 14L, 24L,
38L, 46L, 7L, 7L, 31L, 42L, 15L, 27L, 34L, 44L, 19L, 21L, 37L,
44L, 9L, 23L, 33L, 40L, 13L, 25L, 32L, 44L, 16L, 28L, 29L, 41L,
12L, 26L, 35L, 43L, 17L, 22L, 39L, 47L, 17L, 22L, 30L, 47L, 18L,
20L, 36L, 59L, 14L, 24L, 31L, 64L, 50L, 50L, 56L, 66L, 52L, 55L,
32L, 61L, 49L, 55L, 34L, 63L, 53L, 53L, 57L, 65L, 51L, 54L, 58L,
60L, 16L, 28L, 29L, 63L, 12L, 26L, 35L, 62L, 17L, 22L, 39L, 67L,
17L, 22L, 30L, 61L), .Label = c("03/05/2012", "07/06/2011", "22/02/2011",
"22/06/2012", "23/12/2011", "28/12/2011", "29/02/2012", "29/03/2012",
"29/06/2012", "30/05/2011", "30/12/2011", "03/02/2012", "04/07/2012",
"08/02/2012", "10/07/2012", "17/10/2011", "23/02/2012", "24/05/2011",
"25/07/2012", "01/06/2011", "01/08/2012", "02/03/2012", "05/07/2012",
"09/02/2012", "12/07/2012", "16/02/2012", "19/07/2012", "27/01/2012",
"06/04/2012", "07/06/2012", "08/05/2012", "09/10/2012", "11/09/2012",
"16/10/2012", "18/05/2012", "20/09/2011", "23/10/2012", "24/04/2012",
"31/05/2012", "04/02/2013", "09/09/2012", "12/11/2012", "19/11/2012",
"28/02/2013", "28/09/2012", "29/10/2012", "30/11/2012", "01/07/2012",
"06/07/2012", "22/03/2012", "02/08/2012", "17/07/2012", "31/07/2012",
"06/09/2012", "26/07/2012", "12/06/2012", "13/11/2012", "20/11/2012",
"17/01/2013", "21/05/2013", "21/12/2012", "22/07/2012", "28/12/2012",
"30/03/2013", "30/04/2013", "31/01/2013", "31/12/2012"), class = "factor"),
rating = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("3", "5"), class = "factor")), row.names = c(NA,
-176L), .Names = c("name", "stadio", "variable", "value", "rating"
), class = "data.frame")
dfrDataDate <- structure(list(name = structure(1L, .Label = "733 A", class = "factor"),
data.date = structure(1L, .Label = "2013-01-02", class = "factor")), .Names = c("name",
"data.date"), row.names = c(NA, -1L), class = "data.frame")
dfrLabels <- structure(list(name = c("541 A", "579 A", "700 A", "719 A", "737 A",
"635 A", "705 A", "558 A", "727 A", "725 A", "733 A"), diag_date = c("20/09/2011",
"08/05/2012", "12/06/2012", "09/10/2012", "16/10/2012", "13/11/2012",
"20/11/2012", "06/04/2012", "18/05/2012", "31/05/2012", "07/06/2012"
)), .Names = c("name", "diag_date"), row.names = c(135L, 139L,
143L, 147L, 151L, 155L, 159L, 163L, 167L, 171L, 175L), class = "data.frame")
dfrYpogr <- structure(list(name = c("541 A", "579 A", "700 A", "719 A", "737 A",
"635 A", "705 A", "558 A", "727 A", "725 A", "733 A"), ypogr_date = c("17/01/2013",
"30/03/2013", "31/01/2013", "21/12/2012", "28/12/2012", "30/04/2013",
"21/05/2013", "28/12/2012", "22/07/2012", "31/12/2012", "21/12/2012"
)), .Names = c("name", "ypogr_date"), row.names = c(136L, 140L,
144L, 148L, 152L, 156L, 160L, 164L, 168L, 172L, 176L), class = "data.frame")
names <- as.character(unique(mdfr$name))
new.names <- c("No.541", "No.579", "No.700", "No.719", "No.737", "No.635",
"No.705", "No.558", "No.727", "No.725", "No.733", "", "",
"", "", "", "", "", "", "", "", "")
Adding of as.numeric() around as.Date() in call to geom_vline() makes red line to apear.
geom_vline(data=dfrDataDate, aes(xintercept= as.numeric(as.Date(data.date, "%Y-%m-%d"))),colour=rgb(215, 25, 28, max = 255),size=1)

Resources