Related
I have a plot that looks like below. I want to change the order so that the larger value comes first (so cyan would precede red). But I can't seem to do this. What am I doing wrong?
This is my current code block so far:
ggplot(df, aes(x = Gene.Set.Size, y = OR, label =P.value, color = Method, group = Method)) +
geom_point(position=position_dodge(width=0.5)) +
ggrepel::geom_text_repel(size = 6, box.padding = 1, segment.angle = 20, position=position_dodge(width=0.5))+
geom_pointrange(aes(ymax = UpperCI, ymin = LowerCI),position=position_dodge(width=0.5)) +
theme_bw() +
theme(text=element_text(size=25),axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Odds ratio") +
xlab("Gene set size") +
theme(plot.margin = unit(c(2,2,2,2), "cm"))
> dput(df)
structure(list(Method = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("MAGMA",
"Pairwise"), class = "factor"), P.value = c(8.74e-28, 1.33e-56,
5.57e-92, 1.63e-44, 4.23e-71, 2.78e-95), OR = c(1.39, 1.424668,
1.4, 1.513, 1.478208, 1.409563), UpperCI = c(1.481491, 1.487065,
1.446039, 1.601557, 1.417117, 1.455425), LowerCI = c(1.316829,
1.364601, 1.356358, 1.42, 1.541768, 1.365056), Gene.Set.Size = structure(c(1L,
2L, 3L, 1L, 2L, 3L), .Label = c("500", "1000", "2000"), class = "factor")), row.names = c(NA,
-6L), class = "data.frame")
You must set the factor order.
library(ggplot2)
df <- structure(list(Method = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("MAGMA",
"Pairwise"), class = "factor"), P.value = c(8.74e-28, 1.33e-56,
5.57e-92, 1.63e-44, 4.23e-71, 2.78e-95), OR = c(1.39, 1.424668,
1.4, 1.513, 1.478208, 1.409563), UpperCI = c(1.481491, 1.487065,
1.446039, 1.601557, 1.417117, 1.455425), LowerCI = c(1.316829,
1.364601, 1.356358, 1.42, 1.541768, 1.365056), Gene.Set.Size = structure(c(1L,
2L, 3L, 1L, 2L, 3L), .Label = c("500", "1000", "2000"), class = "factor")), row.names = c(NA,
-6L), class = "data.frame")
#reorder Factor
df$Method = factor(df$Method, levels=c("Pairwise", "MAGMA"))
ggplot(df, aes(x=Gene.Set.Size, y=OR, label=P.value,
group= Method, color=Method)) +
geom_point(position=position_dodge(width=0.5)) +
ggrepel::geom_text_repel(size = 6, box.padding = 1, segment.angle = 20, position=position_dodge(width=0.5))+
geom_pointrange(aes(ymax = UpperCI, ymin = LowerCI),position=position_dodge(width=0.5)) +
theme_bw() +
theme(text=element_text(size=25),axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Odds ratio") +
xlab("Gene set size") +
theme(plot.margin = unit(c(2,2,2,2), "cm"))
df %>% mutate(Method = fct_relevel(Method, 'Pairwise')) %>% <<your ggplot2 code>
should do the job, assuming you have imported the tidyverse pipe operator %>% and the forcats package, which you can do with require(tidyverse)
You can simply reverse the ordering of the Method factor with forcats::fct_rev.
df$Method <- fct_rev(df$Method)
Alternatively, you can specify the first level when you initially converted that column to a factor.
I would like to plot both the raw data from df_plotting and the summary statistics from table_max (the mean values per conditon).
This is my code so far
Edit: Add apa <- c("#68246d", "#a53d55", "#b87246", "#afa961")
ggplot(table_max, mapping= aes(x = phase_bins, y = Mean)) +
geom_line(aes(group=as.numeric(cond_f),
colour = cond_f), size = 1.2)+
geom_line(df_plotting, mapping = aes(x=phase_bins, y = max_change_to_base,
group=interaction(ID, cond_f),
colour = cond_f, linetype =cond_f),
alpha = 0.3, size =0.3)+
geom_errorbar(table_max, mapping=aes(ymin=Mean-SD, ymax=Mean+SD, colour = cond_f),
width=.2, size = 0.7)+
facet_wrap(factor(sample, levels = c("UGi", "UKi", "UKa"))~.)+
labs(title="Max. temperature change by condition and sample", colour = "Condition:", linetype = "Condition:")+
theme(guides(linetype=guide_legend(override.aes = list(alpha = 1)))+
scale_x_discrete(labels=
c("base", "stim1", "stim2", "recovery", "break"), drop = T)+
scale_y_continuous(limits = c(-2,3))+
scale_colour_manual(name = "Condition:",
labels = c("artificial", "cry", "laugh", "babble"), values=apa) +
scale_linetype_manual(name = "Condition:",
labels = c("artificial", "cry", "laugh", "babble"),
values = c(1,2,3,4)) # also tried "solid", "dotted", "dashed", "twodashed"
The problem is, that this displays only solid lines for the legend, and I would like a combination of linetype and colour as one joint legend. I think this does not work because colour is assigned once to solid lines and then to the combination of linetype and colour and I can't override only one of the colour aesthetics.
Here is my data:
df_plotting
structure(list(ID = structure(c(35L, 35L, 35L, 35L, 35L), .Label = c("UG201",
"UG208", "UG209", "UG211", "UG215", "UG217", "UG219", "UG220",
"UG221", "UG222", "UG228", "UG243", "UG247", "UG254", "UG268",
"UG271", "UG272", "UG273", "UG274", "UG275", "UG280", "UG283",
"UG284", "UG286", "UG297", "UG299", "UG308", "UG310", "UG315",
"UG316", "UG330", "UG331", "UG334", "UG335", "UK103", "UK104",
"UK105", "UK106", "UK107", "UK108", "UK110", "UK111", "UK112",
"UK113", "UK115", "UK116", "UK117", "UK119", "UK122", "UK123",
"UK130", "UK132", "UK135", "UK136", "UK138", "UK139", "UK140",
"UK142", "UK145", "UK147", "UK150", "UK153", "UK155", "UK156",
"UK159", "UK160", "UK162", "UK164", "UKA102", "UKA103", "UKA104",
"UKA105", "UKA106", "UKA107", "UKA108", "UKA109", "UKA110", "UKA111",
"UKA112", "UKA113", "UKA114", "UKA115", "UKA116", "UKA117", "UKA119",
"UKA120", "UKA121", "UKA122"), class = "factor"), sex = structure(c(1L,
1L, 1L, 1L, 1L), .Label = c("f", "m"), class = "factor"), trial = structure(c(1L,
1L, 1L, 1L, 2L), .Label = c("1", "2", "3", "4"), class = "factor"),
cond_f = structure(c(3L, 3L, 3L, 3L, 2L), .Label = c("artificial",
"cry", "laugh", "babble"), class = "factor"), stimulus = structure(c(16L,
16L, 16L, 16L, 12L), .Label = c("a1", "a2", "a3", "a4", "b1",
"b2", "b3", "b4", "c1", "c2", "c3", "c4", "l1", "l2", "l3",
"l4"), class = "factor"), phase_bins = structure(c(2L, 3L,
4L, 5L, 2L), .Label = c("pre", "baseline", "stim_bin1", "stim_bin2",
"recovery", "break"), class = "factor"), mean_change_to_base = c(0,
-0.516666666666667, -0.0333333333333336, 0.433333333333333,
0), max_change_to_base = c(0, -0.933333333333334, 0.166666666666668,
0.566666666666666, 0), sample = structure(c(2L, 2L, 2L, 2L,
2L), .Label = c("UGi", "UKi", "UKa"), class = "factor")), row.names = c(NA,
-5L), groups = structure(list(ID = structure(c(35L, 35L), .Label = c("UG201",
"UG208", "UG209", "UG211", "UG215", "UG217", "UG219", "UG220",
"UG221", "UG222", "UG228", "UG243", "UG247", "UG254", "UG268",
"UG271", "UG272", "UG273", "UG274", "UG275", "UG280", "UG283",
"UG284", "UG286", "UG297", "UG299", "UG308", "UG310", "UG315",
"UG316", "UG330", "UG331", "UG334", "UG335", "UK103", "UK104",
"UK105", "UK106", "UK107", "UK108", "UK110", "UK111", "UK112",
"UK113", "UK115", "UK116", "UK117", "UK119", "UK122", "UK123",
"UK130", "UK132", "UK135", "UK136", "UK138", "UK139", "UK140",
"UK142", "UK145", "UK147", "UK150", "UK153", "UK155", "UK156",
"UK159", "UK160", "UK162", "UK164", "UKA102", "UKA103", "UKA104",
"UKA105", "UKA106", "UKA107", "UKA108", "UKA109", "UKA110", "UKA111",
"UKA112", "UKA113", "UKA114", "UKA115", "UKA116", "UKA117", "UKA119",
"UKA120", "UKA121", "UKA122"), class = "factor"), trial = structure(1:2, .Label = c("1",
"2", "3", "4"), class = "factor"), cond_f = structure(3:2, .Label = c("artificial",
"cry", "laugh", "babble"), class = "factor"), .rows = structure(list(
1:4, 5L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
table_max
structure(list(cond_f = structure(c(1L, 1L, 1L, 1L, 1L), .Label = c("artificial",
"cry", "laugh", "babble"), class = "factor"), phase_bins = structure(c(2L,
2L, 2L, 3L, 3L), .Label = c("pre", "baseline", "stim_bin1", "stim_bin2",
"recovery", "break"), class = "factor"), sample = structure(c(1L,
2L, 3L, 1L, 2L), .Label = c("UGi", "UKi", "UKa"), class = "factor"),
Mean = c(0, 0, 0, 0.244444444444444, 0.711111111111109),
SD = c(0, 0, 0, 0.760260897979524, 0.474268442504406), Min. = c(0,
0, 0, -0.899999999999999, 0.133333333333333), Max. = c(0,
0, 0, 1.8, 1.4), count = c(12L, 9L, 20L, 12L, 9L)), row.names = c(NA,
-5L), groups = structure(list(cond_f = structure(c(1L, 1L), .Label = c("artificial",
"cry", "laugh", "babble"), class = "factor"), phase_bins = structure(2:3, .Label = c("pre",
"baseline", "stim_bin1", "stim_bin2", "recovery", "break"), class = "factor"),
.rows = structure(list(1:3, 4:5), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Thanks for any ideas on how to fix this!
The problem was in cond_f's factor levels. Note that I define both the vector of colors apa and the vector of line types lty. And assign names to each of these colors and line types from the factor cond_f levels.
First the plot without the scale_*'s.
library(ggplot2)
apa <- c("#68246d", "#a53d55", "#b87246", "#afa961")
apa <- setNames(apa, levels(df_plotting$cond_f))
lty <- c("solid", "dotted", "dashed", "twodash")
lty <- setNames(lty, levels(df_plotting$cond_f))
p <- ggplot(table_max, mapping= aes(x = phase_bins, y = Mean,
group = cond_f, colour = cond_f,
linetype = cond_f)) +
geom_line(size = 1.2) +
geom_line(
data = df_plotting,
mapping = aes(x = phase_bins, y = max_change_to_base,
group = interaction(ID, cond_f),
colour = cond_f,
linetype = cond_f),
alpha = 0.3, size = 0.3,
inherit.aes = FALSE
) +
geom_errorbar(
mapping = aes(ymin = Mean - SD, ymax = Mean + SD),
width = 0.2,
size = 0.7
)
Created on 2022-07-25 by the reprex package (v2.0.1)
Now the problem. The default drop = TRUE drops the unused factor levels, set it to FALSE on both scale_colour_manual and scale_linetype_manual.
p2 <- p +
scale_x_discrete(
labels = c("base", "stim1", "stim2", "recovery", "break"),
drop = TRUE
) +
scale_y_continuous(limits = c(-2, 3)) +
scale_colour_manual(
name = "Condition:",
labels = c("artificial", "cry", "laugh", "babble"),
values = apa,
drop = FALSE
) +
scale_linetype_manual(
name = "Condition:",
labels = c("artificial", "cry", "laugh", "babble"),
values = lty,
drop = FALSE
) +
labs(
title = "Max. temperature change by condition and sample",
colour = "Condition:",
linetype = "Condition:"
) +
facet_wrap(sample ~ .)
p2
#> geom_path: Each group consists of only one observation. Do you need to adjust
#> the group aesthetic?
Created on 2022-07-25 by the reprex package (v2.0.1)
I don't believe this makes a difference.
p2 + guides(linetype = guide_legend(override.aes = list(alpha = 1)))
I am having difficulty customising the ggplot (geom_point) output.
Points to have black outline but with different fill colors by group (white and black)
Is there a way to combine the legend into one?
Data
library(dplyr)
library(ggplot2)
dat <- structure(list(q = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L),
is_female = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L), levels = c("Male", "Female"), class = "factor"), mean_exp = c(19.3095475534936,
19.2351713991988, 21.6718804471611, 21.69517120871, 23.4144749290445,
23.4191383190372, 25.2817658487443, 25.2772340355605, 28.3982168611512,
28.7869521340185), mean_fi = c(0.0915386254018914, 0.0912295567094683,
0.0771055282779973, 0.0790597510143077, 0.0859508568981647,
0.088489590940481, 0.109848283385112, 0.11358904634185, 0.128425331060705,
0.136830729164909), b_fi.frail = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1), pct = c(47.5830407777478, 52.4169592222522, 37.567084078712,
62.432915921288, 36.9897959183673, 63.0102040816327, 34.0960360941025,
65.9039639058975, 29.0891283055828, 70.9108716944172)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -10L), groups = structure(list(
q = 1:5, .rows = structure(list(1:2, 3:4, 5:6, 7:8, 9:10), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L), .drop = TRUE))
My attempt:
ggplot(dat, aes(x=mean_exp, y=mean_fi, linetype=is_female)) +
geom_point(aes(color=is_female, fill=is_female), size=2, shape="square") +
geom_line() +
scale_x_continuous(limits = c(18, 30), breaks = seq(20, 30, 5), expand = c(0,0)) +
scale_y_continuous(limits = c(0.05, 0.15), breaks = seq(0.05, 0.15, 0.02), expand = c(0,0)) +
scale_color_manual(values = c("M" = "black", "F" = "black")) +
scale_fill_manual(values = c("M" = "black", "F" = "white")) +
labs(x= expression(Body ~ mass ~ index ~ (kg/m^2)), y= "Mean baseline FI score", title = "BMI") +
theme_classic() +
theme(plot.title = element_text(hjust=0.5),
legend.position = "bottom")
My code outputs a graph with grey points and lines for some reason. I would like black lines and outlines (but points to be filled with black or white depending on the group) and to combine the legend if possible.
I realise there are many similar examples out there but I can't seem to figure out why my code is not working... Thanks for your help in advance!
Use shape = 22 for a fillable square, i.e. one which can have a different outline.
Colour can be outside the aes if both groups require a black outline for the points.
library(dplyr)
library(ggplot2)
dat <- structure(list(
q = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L),
is_female = structure(c(
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L
), levels = c("Male", "Female"), class = "factor"), mean_exp = c(
19.3095475534936,
19.2351713991988, 21.6718804471611, 21.69517120871, 23.4144749290445,
23.4191383190372, 25.2817658487443, 25.2772340355605, 28.3982168611512,
28.7869521340185
), mean_fi = c(
0.0915386254018914, 0.0912295567094683,
0.0771055282779973, 0.0790597510143077, 0.0859508568981647,
0.088489590940481, 0.109848283385112, 0.11358904634185, 0.128425331060705,
0.136830729164909
), b_fi.frail = c(
1, 1, 1, 1, 1, 1, 1, 1,
1, 1
), pct = c(
47.5830407777478, 52.4169592222522, 37.567084078712,
62.432915921288, 36.9897959183673, 63.0102040816327, 34.0960360941025,
65.9039639058975, 29.0891283055828, 70.9108716944172
)
), class = c(
"grouped_df",
"tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L), groups = structure(list(
q = 1:5, .rows = structure(list(1:2, 3:4, 5:6, 7:8, 9:10), ptype = integer(0), class = c(
"vctrs_list_of",
"vctrs_vctr", "list"
))
), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -5L), .drop = TRUE))
ggplot(dat, aes(x = mean_exp, y = mean_fi, linetype = is_female)) +
geom_point(aes(fill = is_female), size = 2, shape = 22, colour = "black") +
geom_line() +
scale_x_continuous(limits = c(18, 30), breaks = seq(20, 30, 5), expand = c(0, 0)) +
scale_y_continuous(limits = c(0.05, 0.15), breaks = seq(0.05, 0.15, 0.02), expand = c(0, 0)) +
scale_fill_manual(values = c("black", "white")) +
labs(x = expression(Body ~ mass ~ index ~ (kg / m^2)), y = "Mean baseline FI score",
title = "BMI", fill = "Sex", linetype = "Sex") +
theme_classic() +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "bottom"
)
Created on 2022-07-07 by the reprex package (v2.0.1)
I have used ggplot2 to create a line graph for a soil water release curve. However, because I only have one data point at each pressure value (the x axis), the lines are connected directly from point to point. I would like to keep the points but have a curve that shows the trend of the points. This is the typical style for soil water release curves.
Data:
> dput(head(sub2018))
structure(list(Year = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label =
c("2018",
"2019"), class = "factor"), Pressure = structure(1:6, .Label = c("-1",
"-0.5", "-0.25", "-0.2", "-0.1", "-0.05", "-0.02", "-0.01", "0"
), class = "factor"), meanVWC = c(0.291819594, 0.308328767666667,
0.318496127666667, 0.323671866333333, 0.349356212666667,
0.374201803666667
)), row.names = c(NA, -6L), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), vars = "Year", drop = TRUE, indices = list(
0:5), group_sizes = 6L, biggest_group_size = 6L, labels = structure(list(
Year = structure(1L, .Label = c("2018", "2019"), class = "factor")),
row.names = c(NA,
-1L), class = "data.frame", vars = "Year", drop = TRUE))
ggplot:
GGplot2018 <- ggplot(sub2018, aes(x=Pressure, y=meanVWC, group=1)) +
geom_line() +
geom_point() + labs(y= "Volumetric Water Content")
GGplot2018
Does anyone know if/how I can add this curve?
Thanks very much for any help!
I believe this is what you are looking for:
GGplot2018 <- ggplot(sub2018, aes(x=Pressure, y=meanVWC, group=1)) +
geom_line()+
geom_point() + labs(y= "Volumetric Water Content")+
geom_smooth(method = "lm",se = FALSE)
GGplot2018
I am getting the below error msg when running ggplot pie chart...any idea what issue could be?
code is :
ggplot(pie_unrated, aes(x = "FEBRUARY IBG UNRATED Book COMPOSITION", y = prop,
fill = ProductDetails)) + geom_bar(width = 1,
stat = "identity", color = "white")
+ coord_polar(theta = "y", start = 0) + ggpubr::fill_palette("jco")
+theme_void()
My error Msg :
Error in +coord_polar(theta = "y", start = 0) :
invalid argument to unary operator
>
dput(head(pie_unrated)
structure(list(RatingStatus = c("UNRATED", "UNRATED", "UNRATED",
"UNRATED", "UNRATED", "UNRATED"), ProductDetails = structure(c(1L,
2L, 6L, 7L, 9L, 10L), .Label = c("ACB", "Bonds", "Cash and Short Term",
"Deposit with Banks", "LBD", "LC", "LG", "loan", "Loan", "OD",
"Treasury Bonds"), class = "factor"), counts = c(10L, 1L, 21L,
102L, 758L, 126L), prop = c(1, 0.1, 2.1, 10, 74.5, 12.4), lab.ybos = c(0.5,
1.05, 2.15, 8.2, 50.45, 93.9)), .Names = c("RatingStatus", "ProductDetails",
"counts", "prop", "lab.ybos"), row.names = c(NA, -6L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = "RatingStatus", drop = TRUE, indices = list(
0:5), group_sizes = 6L, biggest_group_size = 6L, labels = structure(list(
RatingStatus = "UNRATED"), row.names = c(NA, -1L), class = "data.frame", vars = "RatingStatus", drop = TRUE, .Names = "RatingStatus"))
library(ggpubr)
ggplot(pie_unrated,
aes(x = "FEBRUARY IBG UNRATED Book COMPOSITION", y = prop, fill = ProductDetails)) +
geom_bar(width = 1,stat = "identity", color = "white") +
coord_polar(theta = "y", start = 0) + ggpubr::fill_palette("jco") + theme_void()