I have a plot that looks like below. I want to change the order so that the larger value comes first (so cyan would precede red). But I can't seem to do this. What am I doing wrong?
This is my current code block so far:
ggplot(df, aes(x = Gene.Set.Size, y = OR, label =P.value, color = Method, group = Method)) +
geom_point(position=position_dodge(width=0.5)) +
ggrepel::geom_text_repel(size = 6, box.padding = 1, segment.angle = 20, position=position_dodge(width=0.5))+
geom_pointrange(aes(ymax = UpperCI, ymin = LowerCI),position=position_dodge(width=0.5)) +
theme_bw() +
theme(text=element_text(size=25),axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Odds ratio") +
xlab("Gene set size") +
theme(plot.margin = unit(c(2,2,2,2), "cm"))
> dput(df)
structure(list(Method = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("MAGMA",
"Pairwise"), class = "factor"), P.value = c(8.74e-28, 1.33e-56,
5.57e-92, 1.63e-44, 4.23e-71, 2.78e-95), OR = c(1.39, 1.424668,
1.4, 1.513, 1.478208, 1.409563), UpperCI = c(1.481491, 1.487065,
1.446039, 1.601557, 1.417117, 1.455425), LowerCI = c(1.316829,
1.364601, 1.356358, 1.42, 1.541768, 1.365056), Gene.Set.Size = structure(c(1L,
2L, 3L, 1L, 2L, 3L), .Label = c("500", "1000", "2000"), class = "factor")), row.names = c(NA,
-6L), class = "data.frame")
You must set the factor order.
library(ggplot2)
df <- structure(list(Method = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("MAGMA",
"Pairwise"), class = "factor"), P.value = c(8.74e-28, 1.33e-56,
5.57e-92, 1.63e-44, 4.23e-71, 2.78e-95), OR = c(1.39, 1.424668,
1.4, 1.513, 1.478208, 1.409563), UpperCI = c(1.481491, 1.487065,
1.446039, 1.601557, 1.417117, 1.455425), LowerCI = c(1.316829,
1.364601, 1.356358, 1.42, 1.541768, 1.365056), Gene.Set.Size = structure(c(1L,
2L, 3L, 1L, 2L, 3L), .Label = c("500", "1000", "2000"), class = "factor")), row.names = c(NA,
-6L), class = "data.frame")
#reorder Factor
df$Method = factor(df$Method, levels=c("Pairwise", "MAGMA"))
ggplot(df, aes(x=Gene.Set.Size, y=OR, label=P.value,
group= Method, color=Method)) +
geom_point(position=position_dodge(width=0.5)) +
ggrepel::geom_text_repel(size = 6, box.padding = 1, segment.angle = 20, position=position_dodge(width=0.5))+
geom_pointrange(aes(ymax = UpperCI, ymin = LowerCI),position=position_dodge(width=0.5)) +
theme_bw() +
theme(text=element_text(size=25),axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Odds ratio") +
xlab("Gene set size") +
theme(plot.margin = unit(c(2,2,2,2), "cm"))
df %>% mutate(Method = fct_relevel(Method, 'Pairwise')) %>% <<your ggplot2 code>
should do the job, assuming you have imported the tidyverse pipe operator %>% and the forcats package, which you can do with require(tidyverse)
You can simply reverse the ordering of the Method factor with forcats::fct_rev.
df$Method <- fct_rev(df$Method)
Alternatively, you can specify the first level when you initially converted that column to a factor.
Related
I am having difficulty customising the ggplot (geom_point) output.
Points to have black outline but with different fill colors by group (white and black)
Is there a way to combine the legend into one?
Data
library(dplyr)
library(ggplot2)
dat <- structure(list(q = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L),
is_female = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L), levels = c("Male", "Female"), class = "factor"), mean_exp = c(19.3095475534936,
19.2351713991988, 21.6718804471611, 21.69517120871, 23.4144749290445,
23.4191383190372, 25.2817658487443, 25.2772340355605, 28.3982168611512,
28.7869521340185), mean_fi = c(0.0915386254018914, 0.0912295567094683,
0.0771055282779973, 0.0790597510143077, 0.0859508568981647,
0.088489590940481, 0.109848283385112, 0.11358904634185, 0.128425331060705,
0.136830729164909), b_fi.frail = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1), pct = c(47.5830407777478, 52.4169592222522, 37.567084078712,
62.432915921288, 36.9897959183673, 63.0102040816327, 34.0960360941025,
65.9039639058975, 29.0891283055828, 70.9108716944172)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -10L), groups = structure(list(
q = 1:5, .rows = structure(list(1:2, 3:4, 5:6, 7:8, 9:10), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L), .drop = TRUE))
My attempt:
ggplot(dat, aes(x=mean_exp, y=mean_fi, linetype=is_female)) +
geom_point(aes(color=is_female, fill=is_female), size=2, shape="square") +
geom_line() +
scale_x_continuous(limits = c(18, 30), breaks = seq(20, 30, 5), expand = c(0,0)) +
scale_y_continuous(limits = c(0.05, 0.15), breaks = seq(0.05, 0.15, 0.02), expand = c(0,0)) +
scale_color_manual(values = c("M" = "black", "F" = "black")) +
scale_fill_manual(values = c("M" = "black", "F" = "white")) +
labs(x= expression(Body ~ mass ~ index ~ (kg/m^2)), y= "Mean baseline FI score", title = "BMI") +
theme_classic() +
theme(plot.title = element_text(hjust=0.5),
legend.position = "bottom")
My code outputs a graph with grey points and lines for some reason. I would like black lines and outlines (but points to be filled with black or white depending on the group) and to combine the legend if possible.
I realise there are many similar examples out there but I can't seem to figure out why my code is not working... Thanks for your help in advance!
Use shape = 22 for a fillable square, i.e. one which can have a different outline.
Colour can be outside the aes if both groups require a black outline for the points.
library(dplyr)
library(ggplot2)
dat <- structure(list(
q = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L),
is_female = structure(c(
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L
), levels = c("Male", "Female"), class = "factor"), mean_exp = c(
19.3095475534936,
19.2351713991988, 21.6718804471611, 21.69517120871, 23.4144749290445,
23.4191383190372, 25.2817658487443, 25.2772340355605, 28.3982168611512,
28.7869521340185
), mean_fi = c(
0.0915386254018914, 0.0912295567094683,
0.0771055282779973, 0.0790597510143077, 0.0859508568981647,
0.088489590940481, 0.109848283385112, 0.11358904634185, 0.128425331060705,
0.136830729164909
), b_fi.frail = c(
1, 1, 1, 1, 1, 1, 1, 1,
1, 1
), pct = c(
47.5830407777478, 52.4169592222522, 37.567084078712,
62.432915921288, 36.9897959183673, 63.0102040816327, 34.0960360941025,
65.9039639058975, 29.0891283055828, 70.9108716944172
)
), class = c(
"grouped_df",
"tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L), groups = structure(list(
q = 1:5, .rows = structure(list(1:2, 3:4, 5:6, 7:8, 9:10), ptype = integer(0), class = c(
"vctrs_list_of",
"vctrs_vctr", "list"
))
), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -5L), .drop = TRUE))
ggplot(dat, aes(x = mean_exp, y = mean_fi, linetype = is_female)) +
geom_point(aes(fill = is_female), size = 2, shape = 22, colour = "black") +
geom_line() +
scale_x_continuous(limits = c(18, 30), breaks = seq(20, 30, 5), expand = c(0, 0)) +
scale_y_continuous(limits = c(0.05, 0.15), breaks = seq(0.05, 0.15, 0.02), expand = c(0, 0)) +
scale_fill_manual(values = c("black", "white")) +
labs(x = expression(Body ~ mass ~ index ~ (kg / m^2)), y = "Mean baseline FI score",
title = "BMI", fill = "Sex", linetype = "Sex") +
theme_classic() +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "bottom"
)
Created on 2022-07-07 by the reprex package (v2.0.1)
My error bars in my barplot are messed up despite the position dodge, I used the same for a less complicated plot and it worked. I welcome suggestions around it
tgc <- structure(list(Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("Visible", "Remembered"), class = "factor"),
Condition = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L,
2L, 3L, 3L), .Label = c("CEN", "IPS", "CTR"), class = "factor"),
test = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L), .Label = c("Pretest", "Posttest"), class = "factor"),
N = c(12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), EE = c(7.33336873483333,
6.80361546108333, 7.09299323975, 7.85694358425, 7.03401583625,
6.98843623408333, 6.64690423166667, 6.76286593966667, 6.53229116175,
6.597801229, 5.87755052541667, 6.29406939166667), sd = c(1.10209636894719,
0.640166385925573, 0.829194321849813, 0.815786383997366,
1.13990647741682, 0.831699837406531, 0.894188346279884, 0.836594325568601,
0.762116322320573, 0.810426854086389, 0.986911196860133,
0.835898962602035), se = c(0.318147817642284, 0.18480011762014,
0.239367782465249, 0.235497244201055, 0.329062655793799,
0.240091062505814, 0.258129941215459, 0.241503979534773,
0.220004031922795, 0.233950081182639, 0.284896722586727,
0.241303245536807), ci = c(0.700238625346207, 0.406742316465117,
0.526844937010058, 0.518325939729, 0.724262022142493, 0.528436865645932,
0.568140169990552, 0.531546675059109, 0.484225609422399,
0.514920656885223, 0.62705345857213, 0.531104862508461)), row.names = c(NA,
-12L), class = "data.frame")
tgc <- summarySE(data10, measurevar="EE", groupvars=c("Group", "Condition", "test"))
ggplot(tgc, aes(x = Condition, y = EE), fill = test) +
geom_errorbar(aes(ymin=EE-se, ymax=EE+se), position = position_dodge(0.5), width=.1) +
geom_bar(aes(fill = test), stat = "identity", width = 0.5, color = "black", position='dodge') + ylim(0,9) + theme_bw() +
geom_signif(data = data.frame(Condition = c("CEN","IPS", "CTL")),
aes(y_position=c(8.5, 8.5, 8.5, 8.5, 8.5, 8.5), xmin=c(0.8, 1.8, 2.8, 0.8, 1.8, 2.8), xmax=c(1.2, 2.2, 3.2, 1.2, 2.2,3.2),
annotations=c("**", "*", "NS", "*", "**", "NS")), tip_length=0, manual = T) +
scale_fill_manual(values = c("grey80", "grey20")) +
facet_grid(~ Group, scales = "free")
You need to let ggplot know which variable to use as the grouping variable on which to apply the dodge (in your case this would be test). Also, you should draw the error bars after the actual bars so that the lower portion remains visible:
ggplot(tgc, aes(x = Condition, y = EE, fill = test)) +
geom_bar(stat = "identity", width = 0.5, color = "black", position='dodge') +
geom_errorbar(aes(ymin = EE - se, ymax = EE + se, group = test),
position = position_dodge(width = 0.5), width = 0.25) +
geom_signif(data = data.frame(Condition = c("CEN","IPS", "CTL"),
ypos = c(8.5, 8.5, 8.5, 8.5, 8.5, 8.5),
xmin = c(0.8, 1.8, 2.8, 0.8, 1.8, 2.8),
xmax = c(1.2, 2.2, 3.2, 1.2, 2.2,3.2),
annot = c("**", "*", "NS", "*", "**", "NS")),
mapping = aes(y_position = ypos,
xmin = xmin,
xmax = xmax,
annotations = annot),
tip_length = 0, manual = TRUE, inherit.aes = FALSE)
scale_fill_manual(values = c("grey80", "grey20")) +
facet_grid(~ Group, scales = "free") +
ylim(0, 9) +
theme_bw()
Despite having tried many types of lines, I just cannot get the same result.
Here is how I need the lines to look:
And this is how I got it so far (and am stuck at):
Here is my code:
myData <- read.csv(file.choose(), header = TRUE)
require(ggplot2)
g <- ggplot(myData, aes(speed, resp))
g + geom_point(aes(color = padlen, shape = padlen)) +
geom_smooth(method = "lm", formula = y ~ splines::bs(x, df = 4, degree = 2), se = FALSE, aes(color = padlen), linetype = "solid", size = 1) +
scale_color_manual(values = c("red", "black")) +
scale_shape_manual(values = c(2, 1))
And here is the database (dput):
myData <- structure(list(resp = c(0, 0.125, 0.583333333, 1, 0.958333333,
1, 0, 0.041666667, 0.25, 0.916666667, 1, 1), padlen = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("big",
"small"), class = "factor"), speed = c(2L, 3L, 4L, 5L, 6L, 7L,
2L, 3L, 4L, 5L, 6L, 7L)), .Names = c("resp", "padlen", "speed"
), class = "data.frame", row.names = c(NA, -12L))
I have also tried all these polynomial models (and others), but none works:
## Quadratic model
lmQuadratic <- lm(formula = y ~ x + I(x^2),
data = fpeg)
## Cubit model
lmCubic <- lm(formula = y ~ x + I(x^2) + I(x^3),
data = fpeg)
## Fractional polynomial model
lmFractional <- lm(formula = y ~ x + I(x^2) + I(x^(1/2)),
data = fpeg)
So, what should I do/not do to get my lines the same as the original ones? Thanks.
Instead of using method = "lm" in the geom_smooth-function use the glm with the binomial family. The glm-smooth gives you only values between 0 and 1 (what you want to have, because you're dealing with proportion).
library(ggplot2)
ggplot(myData, aes(speed, resp)) +
geom_point(aes(color = padlen, shape = padlen)) +
geom_smooth(method = "glm", method.args = list(family = "binomial"),
se = FALSE, aes(color = padlen), linetype = "solid", size = 1) +
scale_color_manual(values = c("red", "black")) +
scale_shape_manual(values = c(2, 1)) +
theme_classic()
Data
myData <-
structure(list(resp = c(0, 0.125, 0.583333333, 1, 0.958333333, 1, 0,
0.041666667, 0.25, 0.916666667, 1, 1),
padlen = c("small", "small", "small", "small", "small",
"small", "big", "big", "big", "big", "big", "big"),
speed = c(2L, 3L, 4L, 5L, 6L, 7L, 2L, 3L, 4L, 5L, 6L, 7L)),
.Names = c("resp", "padlen", "speed"), class = "data.frame",
row.names = c(NA, -12L))
I am working on the dataset reported here below (pre.sss)
pre.sss <- pre.sss <- structure(list(Pretest.num = c(63, 62, 61, 60, 59, 58, 57, 4,2, 1), stress = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,1L), .Label = c("[0,6]", "(6,9]"), class = "factor"), time = c(1L,1L, 1L, 1L, 1L, 1L, 1L, 8L, 8L, 8L), after = structure(c(2L,2L, 2L, 2L, 2L, 2L, 1L, 1L, NA, 1L), .Label = c("no", "yes"), class = "factor"),id = c("call_fam", "call_fam", "call_fam", "call_fam", "call_fam","call_fam", "call_fam", "counselor", "counselor", "counselor")), .Names = c("Pretest.num", "stress", "time", "after","id"), reshapeLong = structure(list(varying = structure(list(after = c("after.call.fam", "after.speak", "after.send.email","after.send.card", "after.attend", "after.fam.mtg", "after.sup.grp","after.counselor")), .Names = "after", v.names = "after", times = 1:8),v.names = "after", idvar = "Pretest.num", timevar = "time"), .Names = c("varying","v.names", "idvar", "timevar")), row.names = c("63.1", "62.1","61.1", "60.1", "59.1", "58.1", "57.1", "4.8", "2.8", "1.8"), class = "data.frame")
and I need to plot the counts of several categorical variables according to a specific level of another categorical variable ('stress'): so, a faceted bobble-lot would do the job in my case
So what I do is the following:
ylabels = c('call_fam' = "call fam.member for condolences",
'speak' = "speak to fam.member in person",
'send.email' = "send condolence email to fam.member",
'send.card' = "send condolence card/letter to fam.member",
'attend' = "attend funeral/wake",
'fam.mtg' = "provide fam.meeting",
'sup.grp' = "suggest attending support grp.",
'counselor' = "make referral to bereavement counselor" )
p = ggplot(pre.sss, aes(x = after, y = id)) +
geom_count(alpha = 0.5, col = 'darkblue') +
scale_size(range = c(1,30)) +
theme(legend.position = 'none') +
xlab("Response") +
ylab("What did you do after learning about death?") +
scale_y_discrete(labels = ylabels) +
facet_grid(.~ pre.sss$stress, labeller = as_labeller(stress.labels))
and I obtain the following image, exactly as I want.
Now I would like to label each bubble with the count with which the corresponding data appear in the dataset.
dat = data.frame(ggplot_build(p)$data[[1]][, c('x', 'y', 'PANEL', 'n')])
dat$PANEL = ifelse(dat$PANEL==1, "[0,6]", "(6-9]")
colnames(dat) = c('x', 'y', 'stress', 'n')
p + geom_text(aes(x, y, label = n, group = NULL), data = dat)
This gives me the following error I really can't understand.
> p + geom_text(aes(x, y, label=n, group=NULL), data=dat)
Error in `$<-.data.frame`(`*tmp*`, "PANEL", value = c(1L, 1L, 1L, 1L, :
replacement has 504 rows, data has 46
Can anybody help me with this?
Thanks!
EM
The function you refer to as your labeller function is missing from this example still. geom_count uses stat_sum, which calculates a parameter n, the number of observations at that point. Because you can use this calculated parameter, you don't actually have to assign the plot to a variable and pull out its data, as you did with ggplot_build.
This should do what you're looking for:
ggplot(pre.sss, aes(x = after, y = id)) +
geom_count(alpha = 0.5, col = 'darkblue') +
# note the following line
stat_sum(mapping = aes(label = ..n..), geom = "text") +
scale_size(range = c(1,30)) +
theme(legend.position = 'none') +
xlab("Response") +
ylab("What did you do after learning about death?") +
scale_y_discrete(labels = ylabels) +
facet_grid(.~ stress)
The line I added computes the same thing as what's behind the scenes in geom_count, but gives it a text geom instead, with the label mapped to that computed parameter n.
I'm working with ggplot2 for the first time, and I'm having trouble making the colors of the labels I created with ggrepel change dynamically. Currently, my code looks like this:
ggplot(tstat) +
geom_point(aes(Mu, Sigma),size = 5, color = 'black') +
geom_label_repel(aes(Mu, Sigma, label = VarNames, fill = factor(Hemisphere)), fontface = 'bold', color = 'white',
box.padding = unit(0.25, 'lines'),point.padding = unit(0.5, 'lines')) +
geom_rangeframe() +
theme_tufte() +
xlab(expression(paste(mu, "*"))) +
ylab(expression(sigma)) +
theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=1.5)) +
ggtitle("Model Sensitivity by Hemisphere")
In general, this works pretty well, except I strongly dislike the toothpaste green color it gives me for one of the two factors plotted. I want to dictate the specific colors of that fill = factor(Hemisphere)) line, but I don't know how.
I have already tried using the scale_colours_manual function, but when I include it within the geom_label_repel(.....) paratheses in line 3, the program complains that "ggplot2 doesn't know how to deal with data of class ScaleDiscrete/Scale/ggproto", and when I place the scale_colours_manual line outside of line 3, it has no effect at all, as in this example, which produced an identical plot to the one above:
ggplot(tstat) +
geom_point(aes(Mu, Sigma),size = 5, color = 'black') +
scale_colour_manual(values = c('blue', 'red')) +
geom_label_repel(aes(Mu, Sigma, label = VarNames, fill = factor(Hemisphere)), fontface = 'bold', color = 'white',
box.padding = unit(0.25, 'lines'),point.padding = unit(0.5, 'lines')) +
geom_rangeframe() +
theme_tufte() +
xlab(expression(paste(mu, "*"))) +
ylab(expression(sigma)) +
theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=1.5)) +
ggtitle("Model Sensitivity by Hemisphere")
I know there has to be a way to do this, but I'm at a loss. Thanks for any help you've got!
EDIT: At request, I've attached a dput() of tstat. Not a big data frame.
structure(list(VarNames = structure(c(4L, 1L, 3L, 2L, 5L, 6L,
4L, 1L, 3L, 2L, 5L, 6L), .Label = c("Dry Deposition", "MEGAN Acetone",
"MEGAN Terpenes", "Monoterpene Yield", "Ocean", "Photolysis"), class = "factor"),
Mu = c(2703.09, 8066.01, 6566.6, 19741.7, 5809.6, 14231.8, 1493.56, 3067.54, 3631.32, 9951.06, 8748.95, 7967.93),
Sigma = c(3478.28, 8883.23, 7276.49, 18454.4, 6218.8, 14989.7, 1925.14, 3410.27, 4017.64, 9289.57, 9354.64, 8403.1),
Hemisphere = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("Northern", "Southern"), class = "factor")),
.Names = c("VarNames", "Mu", "Sigma", "Hemisphere"),
class = "data.frame", row.names = c(NA, -12L))
You can use scale_fill_manual:
tstat <- structure(list(VarNames = structure(c(4L, 1L, 3L, 2L, 5L, 6L,
4L, 1L, 3L, 2L, 5L, 6L), .Label = c("Dry Deposition", "MEGAN Acetone",
"MEGAN Terpenes", "Monoterpene Yield", "Ocean", "Photolysis"), class = "factor"),
Mu = c(2703.09, 8066.01, 6566.6, 19741.7, 5809.6, 14231.8, 1493.56, 3067.54, 3631.32, 9951.06, 8748.95, 7967.93),
Sigma = c(3478.28, 8883.23, 7276.49, 18454.4, 6218.8, 14989.7, 1925.14, 3410.27, 4017.64, 9289.57, 9354.64, 8403.1),
Hemisphere = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("Northern", "Southern"), class = "factor")),
.Names = c("VarNames", "Mu", "Sigma", "Hemisphere"),
class = "data.frame", row.names = c(NA, -12L))
library(ggplot2)
library(ggrepel)
library(ggthemes)
ggplot(tstat) +
geom_point(aes(Mu, Sigma),size = 5, color = 'black') +
geom_label_repel(aes(Mu, Sigma, label = VarNames, fill = factor(Hemisphere)), fontface = 'bold', color = 'white',
box.padding = unit(0.25, 'lines'),point.padding = unit(0.5, 'lines')) +
geom_rangeframe() +
theme_tufte() +
xlab(expression(paste(mu, "*"))) +
ylab(expression(sigma)) +
theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=1.5)) +
ggtitle("Model Sensitivity by Hemisphere") +
scale_fill_manual(values = setNames(c("lightblue", "darkgreen"), levels(tstat$Hemisphere)))