Ggplot2 stairtep graph won't join points - r

I have a sequence of Surfaces that were touched in a room during patient care.
dput(sequence_contacts)
structure(list(value = structure(c(1L, 1L, 2L, 3L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 1L, 4L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L), .Label = c("FarPatient", "Patient", "Equipment", "HygieneArea",
"Out"), class = "factor"), x = 1:25), row.names = c(NA, -25L), class = c("tbl_df",
"tbl", "data.frame"))
I'm trying to plot the sequence of contacts on along the x axis with the surface name on the y axis using a connected stair-step graph.
But I can't get the points to join up. Why not?
ggplot()+
geom_step(data= sequence_contacts, mapping=aes(x=x, y=value)) +
geom_step(data= sequence_contacts, mapping=aes(x=x, y=value), direction="vh", linetype=3) +
geom_point(data= sequence_contacts, mapping=aes(x=x, y=value, colour=x)) +
scale_color_gradient(low="blue", high="red")+
scale_x_continuous("Contact Number")+
scale_y_discrete("Surface Category")

Related

Remove three sides of border around ggplot facet strip label

I have the following graph:
And would like to make what I thought would be a very simple change: I would like to remove the top, right and bottom sides of the left facet label border lines.
How do I do I remove those lines, or draw the equivalent of the right hand lines? I would rather not muck about with grobs, if possible, but won't say no to any solution that works.
Graph code:
library(ggplot2)
library(dplyr)
library(forcats)
posthoc1 %>%
mutate(ordering = -as.numeric(Dataset) + Test.stat,
Species2 = fct_reorder(Species2, ordering, .desc = F)) %>%
ggplot(aes(x=Coef, y=Species2, reorder(Coef, Taxa), group=Species2, colour=Taxa)) +
geom_point(size=posthoc1$Test.stat*.25, show.legend = FALSE) +
ylab("") +
theme_classic(base_size = 20) +
facet_grid(Taxa~Dataset, scales = "free_y", space = "free_y", switch = "y") +
geom_vline(xintercept = 0) +
theme(axis.text.x=element_text(colour = "black"),
strip.placement = "outside",
strip.background.x=element_rect(color = NA, fill=NA),
strip.background.y=element_rect(color = "black", fill=NA)) +
coord_cartesian(clip = "off") +
scale_x_continuous(limits=NULL)
Data:
structure(list(Dataset = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 5L, 5L, 5L, 5L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L), .Label = c("All.habitat", "Aut.habitat", "Habitat.season",
"Lit.season", "Spr.habitat"), class = "factor"), Species = structure(c(1L,
2L, 3L, 5L, 6L, 10L, 11L, 12L, 13L, 1L, 3L, 5L, 6L, 13L, 1L,
2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L), .Label = c("Ar.sp1",
"Ar.sp2", "Arc.sp1", "B.pus", "Dal.sp1.bumps", "Dip.unID", "I.palladium",
"Pale", "Ph.sp3", "Port", "Somethus", "sty", "Sty.sp1"), class = "factor"),
Species2 = structure(c(2L, 9L, 1L, 4L, 5L, 7L, 11L, 12L,
13L, 2L, 1L, 4L, 5L, 13L, 2L, 9L, 4L, 5L, 6L, 10L, 8L, 7L,
11L, 13L), .Label = c("Arcitalitrus sp1", "Armadillidae sp1 ",
"Brachyiulus pusillus ", "Dalodesmidae sp1", "Diplopoda",
"Isocladosoma pallidulum ", "Ommatoiulus moreleti ", "Philosciidae sp2",
"Porcellionidae sp1", "Siphonotidae sp2", "Somethus sp1",
"Styloniscidae ", "Styloniscidae sp1"), class = "factor"),
Taxa = structure(c(3L, 3L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
1L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L), .Label = c("Amphipoda",
"Diplopoda", "Isopoda"), class = "factor"), Variable = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Autumn", "Litter",
"Spring", "Summer"), class = "factor"), Coef = c(1.911502938,
2.086917154, 1.571872993, 12.61184801, 15.6161116, -1.430032837,
-12.51944478, 12.33934516, -8.040249562, 8.08258816, 1.780142396,
12.88982576, 16.78107544, -13.22641153, 1.68810887, 2.093965381,
12.27209197, 15.08328526, -6.334640911, -11.29985948, -11.62658947,
-1.676293808, -6.246555908, -3.470297147), SE = c(0.403497472,
2.21607562, 0.348600794, 2.423896379, 0.509468128, 3.423013791,
2.382857733, 1.775086895, 2.087788334, 2.23631504, 0.33402261,
2.518562443, 0.459720131, 1.950974996, 0.2476205, 0.235648095,
1.815155489, 0.325804415, 2.564680067, 2.437104984, 2.212583358,
2.677618401, 2.324019051, 0.420436743), Test.stat = c(18.36532749,
13.27324683, 13.29039037, 20.50277493, 44.06097153, 10.55234932,
14.64951518, 13.22575401, 20.16415411, 16.55627107, 11.81407568,
15.15213717, 40.67205188, 12.62233207, 37.60085488, 16.90879258,
20.20215107, 80.30520371, 13.35250626, 13.01692428, 17.52987519,
20.03658771, 12.02467914, 53.5052683)), row.names = 10:33, class = "data.frame")
This solution is based on grobs: find positions of "strip-l" (left strips) and then substitute the rect grobs with line grobs.
p <- posthoc1 %>%
mutate(ordering = -as.numeric(Dataset) + Test.stat,
Species2 = fct_reorder(Species2, ordering, .desc = F)) %>%
ggplot(aes(x=Coef, y=Species2, reorder(Coef, Taxa), group=Species2, colour=Taxa)) +
geom_point(size=posthoc1$Test.stat*.25, show.legend = FALSE) +
ylab("") +
theme_classic(base_size = 20) +
facet_grid(Taxa~Dataset, scales = "free_y", space = "free_y", switch = "y") +
geom_vline(xintercept = 0) +
theme(axis.text.x=element_text(colour = "black"),
strip.placement = "outside",
#strip.background.x=element_rect(color = "white", fill=NULL),
strip.background.y=element_rect(color = NA)
) +
coord_cartesian(clip = "off") +
scale_x_continuous(limits=NULL)
library(grid)
q <- ggplotGrob(p)
lg <- linesGrob(x=unit(c(0,0),"npc"), y=unit(c(0,1),"npc"),
gp=gpar(col="red", lwd=4))
for (k in grep("strip-l",q$layout$name)) {
q$grobs[[k]]$grobs[[1]]$children[[1]] <- lg
}
grid.draw(q)

Why isn't my barplot rearranging properly when faceting with ggplot?

So I have made this barplot with this code, bars organised in descending order, great!
na.omit(insect_tally_native_ranges)%>%
group_by(native_ranges)%>%
dplyr::summarise(freq=sum(n))%>%
ggplot(aes(x=reorder(native_ranges,freq),y=freq))+
geom_col(color="#CD4F39",fill="#CD4F39",alpha=0.8)+
coord_flip()+
labs(x="Native ranges",
y="Number of invasive insect arrivals",
title="Species by native ranges")+
theme_minimal()
And now I wanted to do the same but faceting by a variable called Period, here's the code:
ggplot(native_freq_period,
aes(y=reorder(native_ranges,freq),x=freq))+
geom_barh(stat= "identity",
color="#CD4F39",
fill="#CD4F39",
alpha=0.8)+
labs(x="Native ranges",
y="Number of invasive insect arrivals",
title="Species by native ranges")+
theme_minimal()+
facet_wrap(~Period)
But the plot came out like this:
Which is pretty annoying because it is the same code as above and the levels for the variable native_ranges should be organised again. But instead it gives me this lumpy order that isn't even the alphabetic order. So the reorder part is reordering but not by freq! Don't understand.
Here is the data:
structure(list(native_ranges = structure(c(6L, 10L, 11L, 7L,
3L, 5L, 1L, 1L, 8L, 6L, 3L, 5L, 2L, 4L, 5L, 7L, 7L, 7L, 8L, 9L,
11L), .Label = c("Afrotropic", "Afrotropic/Neotropic", "Australasia",
"Australasia/Neotropic", "Indomalaya", "Nearctic", "Neotropic",
"Neotropic/Nearctic", "Neotropic/Nearctic/Australasia", "Palearctic",
"Palearctic/Indomalaya"), class = "factor"), Period = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 3L, 3L, 3L, 4L, 4L, 2L, 1L, 2L,
3L, 2L, 4L, 3L), .Label = c("1896-1925", "1926-1955", "1956-1985",
"1986-2018"), class = "factor"), freq = c(21L, 13L, 12L, 11L,
10L, 10L, 4L, 4L, 4L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L)), row.names = c(NA, -21L), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), vars = "native_ranges", drop = TRUE, indices = list(
6:7, 12L, c(4L, 10L), 13L, c(5L, 11L, 14L), c(0L, 9L), c(3L,
15L, 16L, 17L), c(8L, 18L), 19L, 1L, c(2L, 20L)), group_sizes = c(2L,
1L, 2L, 1L, 3L, 2L, 4L, 2L, 1L, 1L, 2L), biggest_group_size = 4L, labels = structure(list(
native_ranges = structure(1:11, .Label = c("Afrotropic",
"Afrotropic/Neotropic", "Australasia", "Australasia/Neotropic",
"Indomalaya", "Nearctic", "Neotropic", "Neotropic/Nearctic",
"Neotropic/Nearctic/Australasia", "Palearctic", "Palearctic/Indomalaya"
), class = "factor")), row.names = c(NA, -11L), class = "data.frame", vars = "native_ranges", drop = TRUE))
You have to arrange the order of the variable first before plotting. Since you didn't provide any reproducible data I am using the following data
drugs <- data.frame(drug = c("a", "b", "c"), effect = c(4.2, 9.7, 6.1))
ggplot(drugs, aes(drug, effect)) +
geom_col()
Now to change the order of the variable use factor
drugs$drug <- factor(drugs$drug,levels = c("b","a","c")) #This is the order I want
ggplot(drugs, aes(drug, effect)) +
geom_col()
Here I provided the levels in factor manually. You can either provide them manually or sort the order of the variable first separately and provide. See below,
drugs$drug <- factor(drugs$drug,levels = drugs[order(drugs$effect),]$drug)
ggplot(drugs, aes(drug, effect)) +
geom_col()
This should work with facet_wrap as well.
OK, finally figured it out with help from the other answer. You need to create another column that summarizes the total frequency so you can then reorder by that column. There may be a more efficient way to do it, but I create a new summary data.frame and then join it back to the original and then reorder based on the new column.
summary_data <- data %>%
ungroup() %>%
group_by(native_ranges) %>%
summarize(total = sum(freq))
data <- data %>%
left_join(summary_data)
ggplot(data, aes(y = reorder(native_ranges, total),x = freq)) +
geom_barh(stat= "identity",
color="#CD4F39",
fill="#CD4F39",
alpha=0.8) +
labs(x="Native ranges",
y="Number of invasive insect arrivals",
title="Species by native ranges") +
theme_minimal()+
facet_wrap(~Period)

ggplot: how to change colors of vertical lines according to group id (in polar plot)

I have the following dataframe (output of dput(df2)):
structure(list(angles = c(-0.701916320805404, 2.33367948606366,
0.364313791379516, -0.228918909875176, -2.77064550417737, 2.97776037032614,
-3.03604124258522, 2.10507549390108, 2.07708771915781, -0.0646656487453258,
-0.701916320805404, 2.33367948606366, 0.364313791379516, -0.228918909875176,
-2.77064550417737, 2.97776037032614, -3.03604124258522, 2.10507549390108,
2.07708771915781, -0.0646656487453258, -0.701916320805404, 2.33367948606366,
0.364313791379516, -0.228918909875176, -2.77064550417737, 2.97776037032614,
-3.03604124258522, 2.10507549390108, 2.07708771915781, -0.0646656487453258
), id = c(9L, 4L, 5L, 6L, 3L, 10L, 3L, 4L, 4L, 6L, 1L, 4L, 5L,
6L, 2L, 1L, 3L, 4L, 4L, 6L, 1L, 7L, 5L, 6L, 2L, 1L, 3L, 4L, 4L,
6L), method = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("kd-clips", "QT-Clust", "True"
), class = "factor"), truid = structure(c(1L, 4L, 5L, 6L, 2L,
1L, 3L, 4L, 4L, 6L, 1L, 4L, 5L, 6L, 2L, 1L, 3L, 4L, 4L, 6L, 1L,
4L, 5L, 6L, 2L, 1L, 3L, 4L, 4L, 6L), .Label = c("1", "2", "3",
"4", "5", "6"), class = "factor")), .Names = c("angles", "id",
"method", "truid"), row.names = c(940L, 474L, 889L, 298L, 222L,
932L, 87L, 695L, 261L, 832L, 1940L, 1474L, 1889L, 1298L, 1222L,
1932L, 1087L, 1695L, 1261L, 1832L, 2940L, 2474L, 2889L, 2298L,
2222L, 2932L, 2087L, 2695L, 2261L, 2832L), class = "data.frame")
I run the following code to make the plot that follows:
df2$y <- as.numeric(as.factor(df2$method)) + 3
df2$yend <- df2$y + 1
library(ggplot2)
library(RColorBrewer)
cx <- ggplot(df2, aes(y = y, x = angles))
cx + geom_point(aes(color = as.factor(id))) + ylim(0,6) + theme_light() + scale_colour_brewer(palette = "Paired") +
scale_x_continuous(labels = NULL, breaks = df2$angles)+coord_polar() +
theme(legend.position="none", panel.border=element_blank(), axis.title =
element_blank(), axis.text = element_blank())
I get the following figure:
Almost there, but what I would like to get are two more things:
The radial lines to be colored according to the last column (true.id) of the df2 (which is the same color as the points in the third concentric circle -- same as that for id == "True").
I would like a radial scale also, marked at intervals of 30 (like at angles of 0, 30, 60, 90, ... 330. 0). However, I do not want the scale at the left (of the y's).
The above has 30 points, three replicates of each method at each angle. However, the figures only appears to plot 9 replicates, i.e. 27 points in total. (It is possible that two angles -- the one with 2.077 and the one with 2.105) are very close, so that they are really perhaps both there, but I can not tell because then what are the two points that are close to each other?
I have tried all day but could not get either of these to work, so I was wondering if anyone can help.
Thanks in advance!
I think this is probably what you wanted (I know, long time ago now...). Some of what you were getting wrong I actually couldn't quite tell, but it's nearly always a mistake to try to use the coordinate gridlines to show data - use geom_line or geom_segment for the data, and leave the gridlines to show the coordinates. This solves both your need to have the lines coloured, and makes it easier for the x gridlines (ie the radial ones) to have the labels you want (whether I understood you correctly on this, re degrees v. radians, I'm not sure).
library(ggplot2)
library(RColorBrewer)
# your "angle" looks to be in radians, not sure how you want these converted to degrees?
# but let's set where we want the axis marks to be
br <- seq(from = -pi, to = pi, length.out = 7)
ggplot(df2, aes(y = y, x = angles)) +
geom_point(aes(color = as.factor(id))) +
theme_light() +
scale_colour_brewer(palette = "Paired") +
geom_vline(aes(xintercept = angles, colour = truid)) +
coord_polar() +
scale_x_continuous(lim = c(-pi, pi), breaks = br, labels = 0:6 * 60) +
theme(legend.position="none",
panel.border=element_blank(),
axis.title = element_blank(),
axis.text.y = element_blank())

Create balloon plot with ggplot2: use ..count.. to adjust size of geom_point?

I want to essentially create a balloon plot with ggplot2 where the size of the points are the frequency of data at a given coordinate.
Given the data.frame d:
d = structure(list(value.x = structure(c(2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L), .Label = c("Not at all Knowledgeable", "Somewhat Knowledgeable", "Very Knowledgeable"), class = c("ordered", "factor")), value.y = structure(c(5L, 5L, 3L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 5L, 4L, 4L, 4L, 5L, 4L, 5L, 5L, 4L, 4L), .Label = c("Much less knowledgeable", "Less knowledgeable", "Same as before workshop", "More knowledgeable", "Much more knowledgeable"), class = c("ordered", "factor"))), .Names = c("value.x", "value.y"), row.names = c(NA, 20L), class = "data.frame")
I want to do something like:
ggplot(d,aes(value.x,value.y,size=..count..))+geom_point()
where the data points are proportional to how many times data occur, but I cannot figure out how to properly set the size of the points for what I want.
Importantly, I would like to avoid creating a new column in d just for counts of data as has been done with other datasets (e.g. http://www.r-bloggers.com/balloon-plot-using-ggplot2/). This seems messy and I would like to utilize ggplot2's power if I can.
Per #BenBolker's suggestion, I found a solution using stat_sum():
ggplot(d, aes(value.x, value.y, size = ..n..)) + stat_sum()

Using panel.linejoin with missing data

This question is very much related to the question and answers received here, where #Mr. Flick helped me with a question I had regarding the xyplot in the lattice package. But seeing that I'm now trouble-shooting some code I thought I'd ask to the "broader public" for some help.
I've been asked by the reviewers of our paper, to present patient body mass index follow-up data similarly to the way we presented their intraoperative data in the link I provided above.
When I plot the data in an analog fashion, the black line representing "mean" stops at three months, but I want it to go through all time points. See image below.
Here's my data called bmi_data
dput(bmi_data)
structure(list(StudyID = structure(c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("P1",
"P2", "P3", "P4", "P5", "P6", "P7"), class = "factor"), BMI = c(37.5,
43.82794785, 48.87848306, 39.93293705, 42.76788399, 39.44207394,
50.78043704, 25.61728395, 37.91099773, 39.02185224, 36.00823045,
37.75602259, 34.06360931, 39.12591051, 25.98765432, 34.89937642,
32.95178633, 35.62719098, 35.75127802, 32.27078777, NA, 23.61111111,
32.34835601, NA, 34.33165676, NA, 26.53375883, 35.79604579, 23.20987654,
31.71060091, NA, 34.29355281, NA, NA, NA), BMITIME2 = structure(c(5L,
5L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L,
4L, 4L), .Label = c("12 months FU", "3 months FU", "6 months FU",
"Over 12 months FU", "Preoperative BMI"), class = "factor"),
TIME2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Preoperative BMI",
"3 months FU", "6 months FU", "12 months FU", "Over 12 months FU"
), class = "factor")), .Names = c("StudyID", "BMI", "BMITIME2",
"TIME2"), class = "data.frame", row.names = c(NA, -35L))
Some data.frame manipulation to get the right order of my time-points.
bmi_data$TIME2 <- factor(bmi_data$BMITIME2, unique(bmi_data$BMITIME2))
And now my code that doesn't seem to be working properly.
require(lattice)
stderr <- function(x) sqrt(var(x,na.rm=TRUE)/length(na.omit(x)))
panel.sem <- function(x, y, col.se=plot.line$col, alpha.se=.10, ...) {
plot.line <- trellis.par.get("plot.line")
xs <- if(is.factor(x)) {
factor(c(levels(x) , rev(levels(x))), levels=levels(x))
} else {
xx <- sort(unique(x))
c(xx, rev(xx))
}
means <- tapply(y,x, mean, na.rm=T)
stderr <- tapply(y,x, stderr)
panel.polygon(xs, c(means+stderr, rev(means-stderr)), col=col.se, alpha=alpha.se)}
xyplot(BMI~bmi_data$TIME2, groups=StudyID, data=bmi_data, ty=c("l", "p"),
panel = function(x, y, ...) {
panel.sem(x,y, col.se="grey")
panel.xyplot(x, y, ...)
panel.linejoin(x, y, horizontal = FALSE ,..., col="black", lty=1, lwd=4)}
,xlab="Measurement Time Point",
ylab=expression("BMI"~"(kg/m^2)"))
Which results in this plot:
Any help for solving this question is greatly appreciated!!!
The problem is that you have missing data (NA) values in this data set. The panel.linejoin() calls mean() over the observations at each x and if there are NA vales, by default the mean will be NA and then a line won't be drawn. To change that, you can specify a function wrapper to panel.linejoin. Try
xyplot(BMI~bmi_data$TIME2, groups=StudyID, data=bmi_data, ty=c("l", "p"),
panel = function(x, y, ...) {
panel.sem(x,y, col.se="grey")
panel.xyplot(x, y, ...)
panel.linejoin(x, y, horizontal = FALSE ,..., col="black",
lty=1, lwd=4, na.rm=T,
fun=function(x) mean(x, na.rm=T))
},
xlab="Measurement Time Point",
ylab=expression("BMI"~"(kg/m^2)")
)
Here's an approach using ggplot + dplyr but don't know lattice:
if (!require("pacman")) install.packages("pacman")
pacman::p_load(ggplot2, dplyr)
ave_data <- bmi_data %>%
group_by(TIME2) %>%
summarize(BMI = mean(BMI, na.rm=TRUE)) %>%
mutate(ave = TRUE)
ggplot(bmi_data, aes(y=BMI, x=TIME2)) +
geom_point(aes(color = StudyID), shape=21) +
geom_smooth(aes(group=1), alpha=.1) +
geom_line(size=.8, aes(group=StudyID, color = StudyID)) +
geom_path(data=ave_data, color="black", size=1.2, aes(group=ave)) +
xlab("Measurement Time Point") + theme_bw() +
ylab(expression("BMI"~"(kg/m^2)")) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position=c(.87, .70)
) +
guides(fill=guide_legend(title="ID"))

Resources