Set the thickness of geom_line based on frequency (like geom_count) - r

I would like to set the thickness of geom_line to the proportion of data that follows that path, in the same way that geom_count sets the size of points based on the proportion of data that overlap at that point, or find a function that will allow me to do this.
I would also be happy if I could do this as a count rather than a proportion - either would work. I have attached the graph the grey lines represent connections between the same ID (ie. same individual in different categories), if I could set the thickness of the lines I can show the most common connection pathways.
My current code is:
ggplot(dat, aes(x = Category, y = Metric, group = ID)) +
geom_line(aes(group = ID), colour = "gray59") +
geom_count(aes(size = ..prop.., group = 1), colour = "gray59") +
scale_size_area(max_size = 5) +
theme_bw() +
geom_smooth(method = "lm", se = F, colour = "black",
aes(group = 1), linetype = "dotdash") +
xlab("Category") +
ylab("Metric") +
theme(text = element_text(size = 16))
This is the resulting graph, point size shows the proportion of data that overlaps at that point, I would like to do the same with line thickness if possible:
My searching has so far turned up nothing helpful but maybe I am searching the wrong terms. Any help would be much appreciated!
Here is the data - unsure how to upload it as a file
dat <- structure(list(IDD = structure(c(1L, 1L, 1L, 1L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 2L, 2L, 2L, 2L, 7L, 7L, 7L,
8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 12L,
12L, 13L, 13L, 13L, 13L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 16L,
16L, 16L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 19L, 20L, 20L, 21L,
21L, 21L, 22L, 22L, 23L, 23L, 24L, 24L, 25L, 25L, 25L, 26L, 26L,
26L, 26L, 27L, 27L, 28L, 28L, 29L, 29L, 29L, 30L, 30L, 30L, 31L,
31L, 31L, 31L, 32L, 32L, 33L, 33L, 33L, 34L, 34L, 34L, 34L, 35L,
35L, 36L, 36L, 36L, 37L, 37L, 37L, 37L, 38L, 38L, 38L, 39L, 39L,
39L, 40L, 40L, 40L, 41L, 41L, 42L, 42L, 43L, 43L, 44L, 44L, 44L,
44L, 45L, 45L, 45L, 46L, 46L, 46L, 47L, 47L, 47L, 48L, 48L, 49L,
49L, 50L, 50L, 51L, 51L, 51L, 51L, 52L, 52L, 53L, 53L, 54L, 54L,
55L, 55L, 56L, 56L, 57L, 57L, 57L, 58L, 58L, 59L, 59L, 59L, 59L
), .Label = c("ID005", "ID040", "ID128", "ID131", "ID133", "ID134",
"ID147", "ID149", "ID166", "ID167", "ID175", "ID181", "ID191",
"ID198", "ID213", "ID235", "ID254", "ID257", "ID259", "ID273",
"ID279", "ID287", "ID292", "ID299", "ID300", "ID321", "ID334",
"ID348", "ID349", "ID354", "ID359", "ID377", "ID379", "ID383",
"ID390", "ID395", "ID409", "ID445", "ID467", "ID469", "ID482",
"ID492", "ID496", "ID524", "ID526", "ID527", "ID534", "ID535",
"ID538", "ID545", "ID564", "ID576", "ID578", "ID579", "ID600",
"ID610", "ID622", "ID631", "ID728"), class = "factor"), Category = c(2L,
4L, 5L, 5L, 2L, 4L, 1L, 3L, 3L, 4L, 4L, 2L, 4L, 5L, 5L, 5L, 2L,
5L, 5L, 5L, 3L, 2L, 5L, 4L, 5L, 5L, 4L, 4L, 5L, 5L, 3L, 4L, 5L,
5L, 2L, 4L, 2L, 5L, 3L, 4L, 5L, 5L, 4L, 5L, 3L, 4L, 5L, 5L, 3L,
4L, 5L, 5L, 5L, 5L, 2L, 3L, 4L, 4L, 5L, 5L, 5L, 5L, 4L, 4L, 5L,
5L, 5L, 3L, 4L, 5L, 5L, 4L, 5L, 5L, 1L, 3L, 4L, 4L, 3L, 5L, 3L,
5L, 2L, 3L, 4L, 3L, 4L, 4L, 3L, 3L, 4L, 4L, 3L, 5L, 3L, 4L, 4L,
3L, 3L, 4L, 5L, 2L, 3L, 2L, 3L, 4L, 2L, 2L, 3L, 4L, 4L, 5L, 5L,
2L, 3L, 4L, 2L, 3L, 4L, 3L, 4L, 4L, 5L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 3L, 4L, 1L, 3L, 4L, 1L, 3L, 4L, 3L, 4L, 3L, 3L, 2L, 3L, 2L,
2L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 4L, 3L, 4L, 3L, 4L, 1L, 2L, 3L,
2L, 3L, 1L, 3L, 4L, 4L), Metric = c(2, 2, 3.5, 4, 2, 1.5, 2,
2, 3, 3, 2, 2, 2, 2, 3.5, 3.5, 2, 3, 3.5, 4, 2, 2, 3, 2, 3, 3,
2, 3, 3, 2.5, 1.5, 3, 3.5, 4, 2, 2, 1.5, 2, 1.5, 2, 2, 2, 2.5,
3, 2.5, 3.5, 3.5, 3.5, 1.5, 2, 2.5, 2.5, 3.5, 4, 2, 2, 1.5, 3,
3.5, 3, 3, 3, 3.5, 2.5, 3, 3, 3, 2, 3, 2.5, 2.5, 2, 2, 2, 2,
2, 2, 2, 2.5, 2.5, 2, 3, 2.5, 2, 2.5, 2, 2.5, 2.5, 2, 2, 2.5,
3.5, 2, 2.5, 2.5, 2.5, 2.5, 2, 2, 2, 2.5, 2, 2, 1.5, 2, 2, 2.5,
2, 2, 2.5, 2, 2, 2.5, 2.5, 2.5, 3, 2.5, 2.5, 2.5, 2, 2, 2.5,
2.5, 2, 2, 2, 2, 1.5, 2, 1.5, 2, 2, 2, 1.5, 2, 2, 2.5, 2.5, 1.5,
1.5, 2, 2.5, 2, 2, 2, 2, 2.5, 2, 1.5, 2, 2.5, 2, 1.5, 1.5, 1.5,
2, 2, 2, 2, 2, 1.5, 2, 2.5, 2, 2, 2.5, 2.5)), .Names = c("IDD",
"Category", "Metric"), class = "data.frame", row.names = c(NA,
-167L))

I am a bit confused about how you want to scale different line segments, but I was able to create a proportional variable within dat and then plot that as an argument to geom_line():
dat$thickness <- with(dat, ave(Category, Metric, FUN = prop.table))
ggplot(dat, aes(x = Category, y = Metric, group = ID)) +
geom_line(aes(group = ID), colour = "gray59", size = dat$thickness) +
geom_count(aes(size = ..prop.., group = 1), colour = "gray59") +
scale_size_area(max_size = 5) +
theme_bw() +
geom_smooth(method = "lm", se = F, colour = "black",
aes(group = 1), linetype = "dotdash") +
xlab("Category") +
ylab("Metric") +
theme(text = element_text(size = 16))
Which yields this plot:

Related

Obtaining intercept and slope per patient (as diagnostics in a repeated measurements study) using the lmLst and intervals functions

I have a repeated measurements dataset of 24 stroke patients in which I want to assess the effect of three different types of rehabilitation (Group) on functional recovery scores (Barthel_index). Each patients functional ability was measured weekly (Time_num) for 8 weeks.
The data looks as follows:
library(dplyr)
library(magrittr)
library(nlme)
library(lmer)
mydata <-
structure(list(Subject = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L,
19L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L,
21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 23L,
23L, 23L, 23L, 23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 24L, 24L,
24L, 24L), Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A", "B", "C"), class = "factor"),
Time_num = c(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7,
8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2,
3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5,
6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3,
4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6,
7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1,
2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4,
5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7,
8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2,
3, 4, 5, 6, 7, 8), Barthel_index = c(45L, 45L, 45L, 45L,
80L, 80L, 80L, 90L, 20L, 25L, 25L, 25L, 30L, 35L, 30L, 50L,
50L, 50L, 55L, 70L, 70L, 75L, 90L, 90L, 25L, 25L, 35L, 40L,
60L, 60L, 70L, 80L, 100L, 100L, 100L, 100L, 100L, 100L, 100L,
100L, 20L, 20L, 30L, 50L, 50L, 60L, 85L, 95L, 30L, 35L, 35L,
40L, 50L, 60L, 75L, 85L, 30L, 35L, 45L, 50L, 55L, 65L, 65L,
70L, 40L, 55L, 60L, 70L, 80L, 85L, 90L, 90L, 65L, 65L, 70L,
70L, 80L, 80L, 80L, 80L, 30L, 30L, 40L, 45L, 65L, 85L, 85L,
85L, 25L, 35L, 35L, 35L, 40L, 45L, 45L, 45L, 45L, 45L, 80L,
80L, 80L, 80L, 80L, 80L, 15L, 15L, 10L, 10L, 10L, 20L, 20L,
20L, 35L, 35L, 35L, 45L, 45L, 45L, 50L, 50L, 40L, 40L, 40L,
55L, 55L, 55L, 60L, 65L, 20L, 20L, 30L, 30L, 30L, 30L, 30L,
30L, 35L, 35L, 35L, 40L, 40L, 40L, 40L, 40L, 35L, 35L, 35L,
40L, 40L, 40L, 45L, 45L, 45L, 65L, 65L, 65L, 80L, 85L, 95L,
100L, 45L, 65L, 70L, 90L, 90L, 95L, 95L, 100L, 25L, 30L,
30L, 35L, 40L, 40L, 40L, 40L, 25L, 25L, 30L, 30L, 30L, 30L,
35L, 40L, 15L, 35L, 35L, 35L, 40L, 50L, 65L, 65L)), row.names = c(NA,
-192L), class = c("tbl_df", "tbl", "data.frame"))
head(mydata)
# A tibble: 6 x 4
Subject Group Time_num Barthel_index
<int> <fct> <dbl> <int>
1 1 A 1 45
2 1 A 2 45
3 1 A 3 45
4 1 A 4 45
5 1 A 5 80
6 1 A 6 80
To see if and how intercepts and slopes vary per patient I want to plot the intercepts and slopes using the lmList and interval functions.
Question 1 I don't understand why calling the lmList function () in lme4 gives me 48 warnings while the same function in nlme does not:
lmlist <-
lme4::lmList(Barthel_index ~ Time_num | Subject,
data=mydata)
> There were 48 warnings (use warnings() to see them)
lmlist <-
nlme::lmList(Barthel_index ~ Time_num | Subject,
data=mydata)
# Works fine
Question 2 I am trying to extract the confidence intervals for each regression slope, but this gives a warning and NaN for certain values:
lmlist <-
nlme::lmList(Barthel_index ~ Time_num | Subject,
data=mydata)
coefs <- coef(lmlist)
names(coefs) <- c("Intercepts", "Slopes")
intervals(lmlist)
> Warning message:
In summary.lm(el) : essentially perfect fit: summary may be unreliable
Question 3 Now that I have my new list of coefficients with confidence intervals, I'd like to plot them to see if and how much intercepts and slopes vary amongst patients. I'm trying to achieve something like the following:
Any help? Thanks.
Q1. The warnings are occurring in lme4::lmList because you're using a tibble as input: no warnings from
lme4::lmList(Barthel_index ~ Time_num | Subject,
data=as.data.frame(mydata))
(this is a harmless "infelicity" or buglet in lme4 ...)
Q2. If you look at the list of coefficients, you'll see that subject 5 is the problematic one. The data for this subject all have the same response value: thus it's not surprising that we can't compute confidence intervals on a linear regression fit ...
mydata[mydata$Subject=="5",]
# A tibble: 8 × 4
Subject Group Time_num Barthel_index
<int> <fct> <dbl> <int>
1 5 A 1 100
2 5 A 2 100
3 5 A 3 100
4 5 A 4 100
5 5 A 5 100
6 5 A 6 100
7 5 A 7 100
8 5 A 8 100
Q3 plot(intervals(lmlist))
For Q3, you could use the dotplot function in the lattice package:
require(lattice)
m0 <- lmer(Reaction ~ Days + (Days | Subject), data = sleepstudy)
dotplot(ranef(m0, condVar = TRUE))

Adding points to persp 3D plot - hide or obscure points when behind surface

Background:
I'm attempting to add a 3D plot to a Shiny application. I've added a button to rotate the plot ~ 90 degrees. I'd also like to include radio buttons to plot points on the surface.
Problem:
When points are plotted they simply appear on top of the image, even when they should be behind the surface.
Question:
Is there a way to plot the surface so that it's transparent and points appear either behind or in front? Or hide the points if they land out of eyesight?
Data:
d <- list(x = c(0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6,
6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10), y = c(0, 0.5, 1, 1.5, 2, 2.5,
3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10),
z = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0.000147818839413345, 0.00112553487724733,
0.00210325091508131, 0.00308096695291529, 0.00405868299074927,
0.00503639902858325, 0.00601411506641723, 0.00699183110425121,
0.00796954714208519, 0.00894726317991917, 0.00992497921775315,
0.0109026952555871, 0.0118804112934211, 0.0128581273312551,
0.0138358433690891, 0.0148135594069231, 0.015791275444757,
0.016768991482591, 0.017746707520425, 0.018724423558259,
0.019702139596093, 0.00332663525507192, 0.0253299512993333,
0.0473332673435947, 0.0693365833878561, 0.0913398994321175,
0.113343215476379, 0.13534653152064, 0.157349847564902, 0.179353163609163,
0.201356479653424, 0.223359795697686, 0.245363111741947,
0.267366427786209, 0.28936974383047, 0.311373059874731, 0.333376375918993,
0.355379691963254, 0.377383008007516, 0.399386324051777,
0.421389640096038, 0.4433929561403, 0.0185048854236584, 0.140901484725856,
0.263298084028054, 0.385694683330252, 0.50809128263245, 0.630487881934648,
0.752884481236846, 0.875281080539044, 0.997677679841242,
1.12007427914344, 1.24247087844564, 1.36486747774784, 1.48726407705003,
1.60966067635223, 1.73205727565443, 1.85445387495663, 1.97685047425883,
2.09924707356102, 2.22164367286322, 2.34404027216542, 2.46643687146762,
0.0575583422570596, 0.438265663185897, 0.818972984114734,
1.19968030504357, 1.58038762597241, 1.96109494690124, 2.34180226783008,
2.72250958875892, 3.10321690968776, 3.48392423061659, 3.86463155154543,
4.24533887247427, 4.6260461934031, 5.00675351433194, 5.38746083526078,
5.76816815618962, 6.14887547711845, 6.52958279804729, 6.91029011897613,
7.29099743990496, 7.6717047608338, 0.129117933403967, 0.98314083577592,
1.83716373814787, 2.69118664051983, 3.54520954289178, 4.39923244526373,
5.25325534763568, 6.10727825000764, 6.96130115237959, 7.81532405475154,
8.6693469571235, 9.52336985949545, 10.3773927618674, 11.2314156642394,
12.0854385666113, 12.9394614689833, 13.7934843713552, 14.6475072737272,
15.5015301760991, 16.3555530784711, 17.209575980843, 0.23363441995763,
1.77895922624881, 3.32428403254, 4.86960883883118, 6.41493364512237,
7.96025845141355, 9.50558325770473, 11.0509080639959, 12.5962328702871,
14.1415576765783, 15.6868824828695, 17.2322072891607, 18.7775320954518,
20.322856901743, 21.8681817080342, 23.4135065143254, 24.9588313206166,
26.5041561269078, 28.0494809331989, 29.5948057394901, 31.1401305457813,
0.36143039040365, 2.75203425835922, 5.14263812631479, 7.53324199427035,
9.92384586222592, 12.3144497301815, 14.7050535981371, 17.0956574660926,
19.4862613340482, 21.8768652020038, 24.2674690699593, 26.6580729379149,
29.0486768058705, 31.439280673826, 33.8298845417816, 36.2204884097372,
38.6110922776927, 41.0016961456483, 43.3923000136039, 45.7829038815594,
48.173507749515, 0.494048345421132, 3.76182525870662, 7.02960217199211,
10.2973790852776, 13.5651559985631, 16.8329329118486, 20.1007098251341,
23.3684867384196, 26.636263651705, 29.9040405649905, 33.171817478276,
36.4395943915615, 39.707371304847, 42.9751482181325, 46.242925131418,
49.5107020447035, 52.778478957989, 56.0462558712744, 59.3140327845599,
62.5818096978454, 65.8495866111309, 0.608277972936286, 4.63160227964344,
8.65492658635059, 12.6782508930577, 16.7015751997649, 20.724899506472,
24.7482238131792, 28.7715481198863, 32.7948724265935, 36.8181967333006,
40.8415210400078, 44.8648453467149, 48.8881696534221, 52.9114939601292,
56.9348182668364, 60.9581425735435, 64.9814668802507, 69.0047911869578,
73.028115493665, 77.0514398003722, 81.0747641070793, 0.68169864474794,
5.19064825215217, 9.6995978595564, 14.2085474669606, 18.7174970743649,
23.2264466817691, 27.7353962891733, 32.2443458965776, 36.7532955039818,
41.262245111386, 45.7711947187903, 50.2801443261945, 54.7890939335987,
59.298043541003, 63.8069931484072, 68.3159427558114, 72.8248923632157,
77.3338419706199, 81.8427915780241, 86.3517411854284, 90.8606907928326,
0.698331143785818, 5.31729285196915, 9.93625456015249, 14.5552162683358,
19.1741779765192, 23.7931396847025, 28.4121013928858, 33.0310631010692,
37.6500248092525, 42.2689865174358, 46.8879482256192, 51.5069099338025,
56.1258716419859, 60.7448333501692, 65.3637950583525, 69.9827567665359,
74.6017184747192, 79.2206801829025, 83.8396418910859, 88.4586035992692,
93.0775653074525, 0.653010606586468, 4.9722093330084, 9.29140805943032,
13.6106067858523, 17.9298055122742, 22.2490042386961, 26.568202965118,
30.88740169154, 35.2066004179619, 39.5257991443838, 43.8449978708057,
48.1641965972277, 52.4833953236496, 56.8025940500715, 61.1217927764935,
65.4409915029154, 69.7601902293373, 74.0793889557592, 78.3985876821812,
82.7177864086031, 87.036985135025, 0.553337675961259, 4.21327116124787,
7.87320464653448, 11.5331381318211, 15.1930716171077, 18.8530051023943,
22.5129385876809, 26.1728720729675, 29.8328055582542, 33.4927390435408,
37.1526725288274, 40.812606014114, 44.4725394994006, 48.1324729846872,
51.7924064699738, 55.4523399552604, 59.112273440547, 62.7722069258337,
66.4321404111203, 70.0920738964069, 73.7520073816935, 0.418509049668882,
3.18664747819306, 5.95478590671724, 8.72292433524142, 11.4910627637656,
14.2592011922898, 17.027339620814, 19.7954780493381, 22.5636164778623,
25.3317549063865, 28.0998933349107, 30.8680317634349, 33.636170191959,
36.4043086204832, 39.1724470490074, 41.9405854775316, 44.7087239060558,
47.4768623345799, 50.2450007631041, 53.0131391916283, 55.7812776201525,
0.274945103406177, 2.09351057307846, 3.91207604275075, 5.73064151242304,
7.54920698209532, 9.36777245176761, 11.1863379214399, 13.0049033911122,
14.8234688607845, 16.6420343304568, 18.460599800129, 20.2791652698013,
22.0977307394736, 23.9162962091459, 25.7348616788182, 27.5534271484905,
29.3719926181628, 31.1905580878351, 33.0091235575073, 34.8276890271796,
36.6462544968519, 0.14939138421548, 1.1375086826693, 2.12562598112311,
3.11374327957693, 4.10186057803075, 5.08997787648456, 6.07809517493838,
7.06621247339219, 8.05432977184601, 9.04244707029983, 10.0305643687536,
11.0186816672075, 12.0067989656613, 12.9949162641151, 13.9830335625689,
14.9711508610227, 15.9592681594765, 16.9473854579304, 17.9355027563842,
18.923620054838, 19.9117373532918, 0.0610345623904979, 0.464734596487648,
0.868434630584799, 1.27213466468195, 1.6758346987791, 2.07953473287625,
2.4832347669734, 2.88693480107055, 3.2906348351677, 3.69433486926485,
4.098034903362, 4.50173493745915, 4.9054349715563, 5.30913500565345,
5.7128350397506, 6.11653507384775, 6.52023510794491, 6.92393514204206,
7.32763517613921, 7.73133521023636, 8.13503524433351, 0.0150842607904164,
0.114855871447028, 0.214627482103639, 0.31439909276025, 0.414170703416861,
0.513942314073472, 0.613713924730083, 0.713485535386694,
0.813257146043305, 0.913028756699917, 1.01280036735653, 1.11257197801314,
1.21234358866975, 1.31211519932636, 1.41188680998297, 1.51165842063958,
1.61143003129619, 1.71120164195281, 1.81097325260942, 1.91074486326603,
2.01051647392264, 0.00112075907879118, 0.00853377984279572,
0.0159468006068003, 0.0233598213708048, 0.0307728421348093,
0.0381858628988139, 0.0455988836628184, 0.0530119044268229,
0.0604249251908275, 0.067837945954832, 0.0752509667188366,
0.0826639874828411, 0.0900770082468456, 0.0974900290108502,
0.104903049774855, 0.112316070538859, 0.119729091302864,
0.127142112066868, 0.134555132830873, 0.141968153594877,
0.149381174358882, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), .Dim = c(21L, 21L)), facetcol = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 6L, 6L, 1L, 2L, 2L, 3L, 4L, 4L, 5L, 6L, 6L, 7L, 8L,
9L, 9L, 10L, 11L, 11L, 12L, 13L, 13L, 14L, 1L, 3L, 4L, 5L,
7L, 8L, 9L, 11L, 12L, 13L, 15L, 16L, 17L, 19L, 20L, 21L,
23L, 24L, 25L, 27L, 2L, 4L, 6L, 9L, 11L, 13L, 15L, 17L, 19L,
22L, 24L, 26L, 28L, 30L, 33L, 35L, 37L, 39L, 41L, 44L, 3L,
6L, 9L, 12L, 15L, 18L, 21L, 25L, 28L, 31L, 34L, 37L, 40L,
44L, 47L, 50L, 53L, 56L, 59L, 62L, 3L, 7L, 11L, 15L, 19L,
23L, 28L, 32L, 36L, 40L, 44L, 48L, 52L, 56L, 60L, 64L, 68L,
72L, 76L, 80L, 4L, 8L, 13L, 18L, 23L, 27L, 32L, 37L, 42L,
46L, 51L, 56L, 61L, 65L, 70L, 75L, 80L, 84L, 89L, 94L, 4L,
9L, 14L, 19L, 24L, 29L, 34L, 39L, 45L, 50L, 55L, 60L, 65L,
70L, 75L, 80L, 85L, 90L, 95L, 100L, 4L, 9L, 14L, 19L, 24L,
29L, 34L, 39L, 44L, 49L, 54L, 59L, 64L, 69L, 74L, 78L, 83L,
88L, 93L, 98L, 3L, 8L, 12L, 17L, 21L, 26L, 30L, 35L, 39L,
43L, 48L, 52L, 57L, 61L, 66L, 70L, 75L, 79L, 83L, 88L, 3L,
6L, 10L, 14L, 17L, 21L, 24L, 28L, 32L, 35L, 39L, 42L, 46L,
49L, 53L, 57L, 60L, 64L, 67L, 71L, 2L, 5L, 7L, 10L, 12L,
15L, 18L, 20L, 23L, 25L, 28L, 30L, 33L, 35L, 38L, 41L, 43L,
46L, 48L, 51L, 2L, 3L, 5L, 6L, 8L, 9L, 11L, 12L, 14L, 16L,
17L, 19L, 20L, 22L, 23L, 25L, 27L, 28L, 30L, 31L, 1L, 2L,
3L, 3L, 4L, 5L, 6L, 6L, 7L, 8L, 9L, 10L, 10L, 11L, 12L, 13L,
13L, 14L, 15L, 16L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("(-0.357,3.59]", "(3.59,7.18]",
"(7.18,10.8]", "(10.8,14.4]", "(14.4,17.9]", "(17.9,21.5]",
"(21.5,25.1]", "(25.1,28.7]", "(28.7,32.3]", "(32.3,35.9]",
"(35.9,39.5]", "(39.5,43.1]", "(43.1,46.6]", "(46.6,50.2]",
"(50.2,53.8]", "(53.8,57.4]", "(57.4,61]", "(61,64.6]", "(64.6,68.2]",
"(68.2,71.8]", "(71.8,75.3]", "(75.3,78.9]", "(78.9,82.5]",
"(82.5,86.1]", "(86.1,89.7]", "(89.7,93.3]", "(93.3,96.9]",
"(96.9,100]", "(100,104]", "(104,108]", "(108,111]", "(111,115]",
"(115,118]", "(118,122]", "(122,126]", "(126,129]", "(129,133]",
"(133,136]", "(136,140]", "(140,144]", "(144,147]", "(147,151]",
"(151,154]", "(154,158]", "(158,161]", "(161,165]", "(165,169]",
"(169,172]", "(172,176]", "(176,179]", "(179,183]", "(183,187]",
"(187,190]", "(190,194]", "(194,197]", "(197,201]", "(201,204]",
"(204,208]", "(208,212]", "(212,215]", "(215,219]", "(219,222]",
"(222,226]", "(226,230]", "(230,233]", "(233,237]", "(237,240]",
"(240,244]", "(244,248]", "(248,251]", "(251,255]", "(255,258]",
"(258,262]", "(262,265]", "(265,269]", "(269,273]", "(273,276]",
"(276,280]", "(280,283]", "(283,287]", "(287,291]", "(291,294]",
"(294,298]", "(298,301]", "(301,305]", "(305,309]", "(309,312]",
"(312,316]", "(316,319]", "(319,323]", "(323,326]", "(326,330]",
"(330,334]", "(334,337]", "(337,341]", "(341,344]", "(344,348]",
"(348,352]", "(352,355]", "(355,359]"), class = "factor"))
Code
flip <- 1 # 1 or 2
theta = c(-300,120)[flip]
pmat <- persp(d$x, d$y, d$z, asp = 1,col = color[d$facetcol], phi = 30, theta = theta, border = "grey10"
,d = .8,r = 2.8,expand = .6,shade = .2,axes = F,box = T,cex = .1)
xx <- c(7.76245335753423, 6.73123147037805)
yy <- c(4.88402435072353, 4.20867046100364)
zz <- c(68.727, 48.558)
mypoints <- trans3d(xx,yy,zz,pmat = pmat)
points(mypoints,pch = 16,col = 2)
The image below is correct, but when the plot is rotated (set flip to 2) the points do not jive. In other words, when the plot is rotated the points should be hidden from view, or seen through a semi-transparent surface. Help is appreciated!
In case this is helpful to anyone. I ended up using the persp3D() function from the plot3D package. All my custom axes labels and tick marks transferred seamlessly from the base persp() with the added bonus of a transparency argument (alpha =) and proper point plotting (points3D).

how to assign groupings based on attributes?

Imagine, I have a list of 51 personas, each of them has a standardized value inherent to their 6 skills.
Now, I am wondering if there is a programmable way to accurately and equally assign those individuals into equal teams, with the skill levels in mind. I wasn't sure which format of the data is more suitable, but intuitively I decided long dataset will make it easier:
df <- structure(list(unique_id = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L,
12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L,
14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L,
18L, 19L, 19L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 20L, 20L,
21L, 21L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 22L, 22L, 23L,
23L, 23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 24L, 24L, 25L, 25L,
25L, 25L, 25L, 25L, 26L, 26L, 26L, 26L, 26L, 26L, 27L, 27L, 27L,
27L, 27L, 27L, 28L, 28L, 28L, 28L, 28L, 28L, 29L, 29L, 29L, 29L,
29L, 29L, 30L, 30L, 30L, 30L, 30L, 30L, 31L, 31L, 31L, 31L, 31L,
31L, 32L, 32L, 32L, 32L, 32L, 32L, 33L, 33L, 33L, 33L, 33L, 33L,
34L, 34L, 34L, 34L, 34L, 34L, 35L, 35L, 35L, 35L, 35L, 35L, 36L,
36L, 36L, 36L, 36L, 36L, 37L, 37L, 37L, 37L, 37L, 37L, 38L, 38L,
38L, 38L, 38L, 38L, 39L, 39L, 39L, 39L, 39L, 39L, 40L, 40L, 40L,
40L, 40L, 40L, 41L, 41L, 41L, 41L, 41L, 41L, 42L, 42L, 42L, 42L,
42L, 42L, 43L, 43L, 43L, 43L, 43L, 43L, 44L, 44L, 44L, 44L, 44L,
44L, 45L, 45L, 45L, 45L, 45L, 45L, 46L, 46L, 46L, 46L, 46L, 46L,
47L, 47L, 47L, 47L, 47L, 47L, 48L, 48L, 48L, 48L, 48L, 48L, 49L,
49L, 49L, 49L, 49L, 49L, 50L, 50L, 50L, 50L, 50L, 50L, 51L, 51L,
51L, 51L, 51L, 51L), attribute = structure(c(2L, 1L, 3L, 4L,
5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L,
3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L,
2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L,
5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L,
3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L,
2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L,
5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L,
3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L,
2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L,
5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L,
3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L,
2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L,
5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L,
3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L,
2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L,
5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L,
3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L,
2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L,
5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 5L, 6L), .Label = c("Analytics",
"Communication", "Creativity", "Problem solving", "Programming",
"Project management"), class = "factor"), skill_level = c(1,
1, 2, 1, 0, 0, 1, 2, 1, 1, 1, 1, 4, 2, 2, 3, 2, 4, 2, 1, 1, 2,
2, 2, 2, 0, 0, 3, 0, 0, 2, 3, 3, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2,
3, 3, 3, 3, 1, 3, 3, 3, 3, 1, 3, 1, 1, 1, 2, 2, 2, 4, 0, 0, 2,
0, 0, 3, 2, 3, 3, 2, 1, 1, 3, 4, 4, 4, 3, 3, 2, 3, 3, 3, 1, 2,
2, 1, 3, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 0, 2, 2, 2, 2, 3,
1, 2, 1, 1, 1, 0, 0, 0, 3, 2, 2, 3, 4, 3, 2, 2, 2, 2, 0, 2, 2,
2, 1, 2, 0, 0, 3, 3, 4, 3, 2, 3, 2, 1, 0, 3, 0, 2, 2, 1, 1, 2,
1, 1, 2, 1, 1, 2, 0, 1, 2, 3, 3, 3, 2, 2, 2, 2, 1, 2, 1, 1, 2,
1, 1, 2, 1, 1, 0, 1, 2, 2, 0, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 2,
1, 2, 1, 1, 1, 1, 1, 0, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 1, 3,
2, 2, 3, 0, 1, 3, 2, 3, 2, 3, 2, 1, 1, 1, 2, 0, 2, 2, 2, 2, 2,
2, 1, 2, 2, 2, 2, 2, 0, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2,
2, 2, 3, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 0, 2, 1, 2, 2,
2, 1, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 1, 4, 3, 3, 3, 2, 3, 2,
2, 2, 3, 1, 2, 2, 3, 2, 3, 1, 3)), class = c("spec_tbl_df", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -306L))
My idea was to somehow focus on running averages in each skill group, but I have no clue where to start.
Perhaps, I am over complicating the problem, and it may be achieved through a specific set of grouping and sorting operations. Frankly, I am not even sure how to search for some existing assignment problems like that, which is slowing me down.
Thank you.
What you describe sounds like you want to do cluster analysis. Here is one using kmeans clustering and 4 groups (finding the right number of cluster is a longer story, I'm just guessing here):
library(tidyr)
library(dplyr)
library(broom)
# kmeans needs wide format
mat <- df %>%
pivot_wider(id_cols = unique_id, names_from = attribute, values_from = skill_level)
# for the clustering we remove the id as it would be seen as a variable
clust <- mat %>%
select(-unique_id) %>%
kmeans(4)
# we can attach group membership back to the data
df_new <- mat %>%
mutate(group = clust$cluster)
df_new %>%
select(unique_id, group)
#> # A tibble: 51 x 2
#> unique_id group
#> <int> <int>
#> 1 1 3
#> 2 2 3
#> 3 3 4
#> 4 4 2
#> 5 5 2
#> 6 6 1
#> 7 7 2
#> 8 8 1
#> 9 9 4
#> 10 10 2
#> # ... with 41 more rows
# and also obtain group averages
group_average <- clust %>%
tidy() %>%
rename(Communication = x1,
Analytics = x2,
Creativity = x3,
"Problem solving" = x4,
Programming = x5,
"Project management" = x6)
group_average
#> # A tibble: 4 x 9
#> Communication Analytics Creativity `Problem solvin~ Programming
#> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2.11 1.94 2.22 2.33 1.94
#> 2 2 1.22 0.944 2.22 0.667
#> 3 0.833 1.33 1.5 1 0.5
#> 4 2.78 2.67 2.89 3 2.33
#> # ... with 4 more variables: `Project management` <dbl>, size <int>,
#> # withinss <dbl>, cluster <fct>
Now the groups are pretty homogeneous, meaning people in each group have relatively similar skill values. If your intention is to get groups that are equally strong, you could randomly select people from the different clusters so that each group has the same number of people from cluster 1,2,3 and 4.

How can I produce this specific boxplot that combines data on multiple levels from different data sources in ggplot or tidyverse/ R?

I am producing a plot that consists of several different boxplots. Please find my data sample below.
I have located data from three different studies: p$studie==1,2,3
Data comprise different tumor samples from a certain cancer that has four stages: p$ny_stadie=1,2,3,4.
Each tumor patient had lymph nodes removed (ranging from 3 to 124) and is a continuous covariate: p$n_fjernet.
Therefore
head(p)
studie ny_stadie n_fjernet
1 1 1 25
2 1 4 10
3 1 1 3
4 1 4 27
5 1 3 13
6 1 4 9
Data from all three studies have all four levels of p$ny_stadie==1,2,3,4 and a variety of diffenet lymph nodes removed p$n_fjernet.
I want to produce this plot (going up to p$ny_stadie==3,4 too)
Simply, I want to show the spread of resected lymph nodes per p$ny_stadie and per p$studie.
I use ggplot and tidyverse.
# My Data
p <- structure(list(studie = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), ny_stadie = structure(c(1,
4, 1, 4, 3, 4, 4, 4, 4, 4, 4, 3, 1, 3, 4, 3, 1, 1, 1, 4, 4, 3,
4, 4, 2, 2, 2, 2, 4, 3, 2, 1, 4, 1, 4, 3, 2, 1, 1, 1, 1, 4, 3,
4, 2, 4, 4, 4, 4, 3, 3, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 2, 4, 3,
3, 4, 4, 4, 4, 3, 2, 4, 4, 3, 3, 3, 2, 1, 3, 4, 4, 3, 4, 4, 4,
4, 4, 4, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2), class = "AsIs"),
n_fjernet = c(25L, 10L, 3L, 27L, 13L, 9L, 7L, 7L, 7L, 6L,
6L, 5L, 4L, 3L, 37L, 26L, 19L, 17L, 15L, 9L, 57L, 55L, 33L,
33L, 33L, 28L, 27L, 27L, 26L, 23L, 23L, 23L, 22L, 22L, 21L,
21L, 20L, 20L, 19L, 18L, 18L, 18L, 18L, 17L, 17L, 16L, 16L,
16L, 15L, 15L, 67L, 35L, 56L, 15L, 37L, 44L, 124L, 41L, 30L,
31L, 35L, 36L, 28L, 39L, 54L, 25L, 27L, 69L, 53L, 24L, 33L,
52L, 77L, 51L, 7L, 22L, 53L, 26L, 58L, 28L, 83L, 39L, 15L,
37L, 27L, 9L, 17L, 32L, 26L, 22L, 37L, 28L, 52L, 27L, 15L,
11L, 7L, 24L, 11L, 56L, 47L, 27L, 14L)), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 380L, 381L, 382L,
383L, 384L, 385L, 386L, 387L, 388L, 389L, 390L, 391L, 392L, 393L,
394L, 395L, 396L, 397L, 398L, 399L, 400L, 401L, 402L, 403L, 404L,
405L, 406L, 407L, 408L, 409L, 410L, 411L, 412L, 413L, 414L, 415L,
416L, 417L, 418L, 620L, 621L, 622L, 623L, 624L, 625L, 626L, 627L,
628L, 629L, 630L, 631L, 632L, 633L), class = "data.frame")
I'm not sure if that was your intention, if not correct my in order for me to edit the answer
doing the following on the data:
p$ny_stadie_f <- factor(p$ny_stadie)
p$studie_f <- factor(p$studie)
q <- ggplot(p, aes(x = ny_stadie_f, y = n_fjernet, fill= studie_f)) + geom_boxplot()
q
I get the following output:
This is the desired output you want? you can see that there is no expression in the ny_stadie=3,4 for the case where studie=3

ggmap with ggsubplot creates blank map

I am trying to place some plots on a map but nothing appears on the map. Here is a reproducible example. The first plot shows how each subplot should look. The second excludes the map but the subplot sizes are too large. The last is one attempt at the final product. I have tried many permutations but this has me stuck. Thanks in advance.
library(ggplot2)
library(ggmap)
library(ggsubplot)
pDat <- structure(list(Location = structure(c(13L, 12L, 14L, 14L, 15L, 15L, 16L, 16L, 17L, 17L, 18L, 19L, 32L, 19L, 19L, 20L, 20L, 20L, 21L, 21L, 21L, 22L, 22L, 22L, 23L, 23L, 24L, 25L, 25L, 26L, 27L, 28L, 28L, 29L, 30L, 30L, 31L), .Label = c("PW-29", "PW-31", "PW-32", "PW-33", "PW-35", "PW-36", "PW-37", "PW-38", "PW-39", "PW-40", "PW29", "SD-03", "SD-03a", "SD-12", "SD-18", "SD-19", "SD-27", "SD-29", "SD-30", "SD-31", "SD-32", "SD-33", "SD-35", "SD-36", "SD-37", "SD-38", "SD-40", "SD-41", "SD-42", "SD-43", "SD-44", "SD30"), class = "factor"), Lat = c(47.292351, 47.292351, 47.289376, 47.289376, 47.288299, 47.288299, 47.288014, 47.288014, 47.287338, 47.287338, 47.29476, 47.293246, 47.293246, 47.293246, 47.293246, 47.293259, 47.293259, 47.293259, 47.292206, 47.292206, 47.292206, 47.291523, 47.291523, 47.291523, 47.290496, 47.290496, 47.289826, 47.288262, 47.288262, 47.287735, 47.286672, 47.290059, 47.290059, 47.290482, 47.28852, 47.28852, 47.288377), Long = c(-73.098418, -73.098418, -73.101282, -73.101282, -73.102558, -73.102558, -73.102178, -73.102178, -73.103016, -73.103016, -73.096432, -73.096412, -73.096412, -73.096412, -73.096412, -73.098245, -73.098245, -73.098245, -73.097552, -73.097552, -73.097552, -73.100022, -73.100022, -73.100022, -73.099395, -73.099395, -73.100051, -73.101199, -73.101199, -73.101895, -73.102629, -73.100954, -73.100954, -73.100184, -73.102246, -73.102246, -73.101477), SBD_ft = c(0, 2, 0, 7, 0, 10, 0, 6, 2, 5, 0, 0.5, 0.5, 0, 2.5, 0.5, 0, 3, 0.5, 0, 2.5, 0.5, 0, 2.5, 0.5, 0, 0, 0.5, 0, 0, 0, 2, 5, 3, 0, 6, 0), SED_ft = c(20, 4, 2, 9, 2, 12, 2, 8, 4, 7, 0.5, 2.5, 2.5, 0.5, 4.5, 2.5, 0.5, 5, 2.5, 0.5, 4.5, 2.5, 0.5, 3.5, 2.5, 0.5, 0.5, 2.5, 0.5, 0.5, 0.5, 4, 7, 5, 2, 8, 2), Cluster = structure(c(3L, 3L, 3L, 4L, 5L, 5L, 2L, 2L, 4L, 5L, 1L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 6L, 1L, 6L, 4L, 1L, 6L, 1L, 1L, 1L, 5L, 1L, 4L, 1L, 3L, 4L, 3L, 4L, 4L, 4L), .Label = c("1", "2", "3", "4", "5", "6"), class = "factor")), .Names = c("Location", "Lat", "Long", "SBD_ft", "SED_ft", "Cluster"), row.names = 5:41, class = "data.frame")
BBox<-c(-73.01, 47.28, -73.1, 47.30)
#Base <-get_map(BBox,zoom=13,source='google',maptype = 'hybrid')
Base_z <-get_map(BBox,zoom=15,source='google',maptype = 'hybrid')
fm0<-ggmap(Base_z,legend = "none",
base_layer=ggplot(aes(x=Long,y=Lat),data=pDat))
# Example subplots
ggplot(pDat,aes(ymin=SBD_ft,ymax=SED_ft,xmin=0,xmax=1,fill=Cluster))+
facet_wrap(~Location)+
geom_rect() +
scale_y_reverse()
# TEST 1, need to control size of subplots
ggplot(pDat)+
geom_subplot(aes(x=Long,y=Lat,group=Location,
subplot=geom_rect(data=pDat,aes(ymin=SBD_ft,ymax=SED_ft,xmin=0,xmax=1,fill=Cluster))))
# Final , does not work
fm0+
geom_subplot(aes(x=Long,y=Lat,group=Location,
subplot=geom_rect(data=pDat,aes(ymin=SBD_ft,ymax=SED_ft,xmin=0,xmax=1,fill=Cluster))))

Resources