Center facets in ggplot2 - r

I'm plotting geom_smooths in facets grouped by size:
library(ggplot2)
ggplot(df,
aes(x = pos, y = mean_ratio_f ))+
geom_smooth(aes(group = factor(size)), method = "lm", se = FALSE, linewidth = 0.5) +
# facets:
facet_wrap(. ~ size, scales = 'free_x')+
labs(x ="X",
y = "Y")
Unfortunately the last three facets (for size groups 23, 24,and 25) are aligned to the left margin so that there is a gap to their right (which also creates the impression of the whole plot being tilted to the right!):
It appears to me that the issue can be solved by centering the three facets in question (but maybe there are other solutions as well). How can the factes be rearranged so that the last three facets are centered?
Data:
df <- structure(list(size = c(3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L,
8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L,
14L, 15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L,
21L, 21L, 22L, 22L, 23L, 23L, 24L, 24L, 25L, 25L), pos = c(1.5,
2, 1.5, 2, 1.5, 2, 1.5, 2, 1.5, 2, 1.5, 2, 1.5, 2, 1.5, 2, 1.5,
2, 1.5, 2, 1.5, 2, 1.5, 2, 1.5, 2, 1.5, 2, 1.5, 2, 1.5, 2, 1.5,
2, 1.5, 2, 1.5, 2, 1.5, 2, 1.5, 2, 1.5, 2, 1.5, 2), mean_ratio_f = c(527.899043866778,
1223.75592041265, 26.7055556681507, 1014.99764633205, 6.47082070497567,
863.659744048962, 3.81972089409093, 777.045156006896, 2.46140197567771,
745.040410893806, 2.22400421369641, 759.114492391129, 2.13729390098214,
687.177457687369, 1.98034033753045, 778.931235189388, 1.90373974226176,
718.311850673966, 1.80384197110368, 825.996874022512, 1.81708729221153,
784.264857079573, 1.7777262939807, 752.39972151211, 1.76691331278538,
860.318640599953, 1.75527539730966, 869.777351603508, 1.74520729149527,
880.417441527199, 1.73611317639682, 780.755824759386, 1.78837402005967,
868.750440691095, 1.70425949150671, 804.161284483241, 1.70130414461642,
827.894751207786, 1.6956455656474, 846.217696086233, 1.6805039077424,
796.011388849723, 1.65481637360088, 811.918292989823, 1.67084107927763,
920.002748174406), mean_ratio_f_log = c(6773.00321795844, 17499.3396876788,
91.9407050566451, 14640.9637434847, 0.961390621510839, 12925.4581530315,
0.962105282138507, 11965.2882380283, 0.959588977914962, 11925.034356026,
0.95894420256844, 12389.1090131876, 0.962673236418588, 10291.5804363065,
0.961229361905838, 13564.6305043359, 0.959542208244426, 11807.7801051298,
0.958279155901719, 13222.8288829741, 0.960694717000605, 14050.9037663119,
0.959865919899295, 13196.4602878018, 0.960818003520457, 18197.3072369647,
0.959524692210418, 16167.0124087112, 0.962044614557777, 19156.2703675997,
0.958319770694746, 12192.6024672023, 0.96568915213801, 14355.9254483709,
0.957678872168589, 12384.5704259404, 0.956930859337691, 15515.5122785017,
0.964217350399733, 14886.2543318109, 0.958708854899801, 12105.1755086371,
0.959842413426268, 12265.603237096, 0.954623252993519, 15048.4316206923
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA,
-46L), groups = structure(list(size = 3:25, .rows = structure(list(
1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 13:14, 15:16, 17:18, 19:20,
21:22, 23:24, 25:26, 27:28, 29:30, 31:32, 33:34, 35:36, 37:38,
39:40, 41:42, 43:44, 45:46), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -23L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))

You can set the layout with ggh4x::facet_manual() and then manually adjust any spacing in the gtable that seem superfluous to you.
library(ggplot2)
library(ggh4x)
# Data omitted from reprex for brevity, but taken from question
# df <- structure(...)
# Create layout
design <- matrix(
c(1:20, NA, 21:23, NA),
5, 5, byrow = TRUE
)
# Make plot
p <- ggplot(df, aes(x = pos, y = mean_ratio_f ))+
geom_smooth(aes(group = factor(size)),
method = "lm", se = FALSE, linewidth = 0.5) +
facet_manual(vars(size), design = design, scales = "free_x") +
labs(x ="X", y = "Y")
# Convert to gtable
gt <- ggplotGrob(p)
#> `geom_smooth()` using formula = 'y ~ x'
# Set some widths to zero
# I don't know of a programmatic way to get the right indices
gt$widths[8] <- unit(0, "cm")
# Plotting
grid::grid.newpage(); grid::grid.draw(gt)
Created on 2023-01-10 with reprex v2.0.2
Disclaimer: I'm the author of ggh4x

Related

Combining two types of labellers in facet_grid

I have the follow facet_grid + ggplot2 code:
ggplot(output,aes(x=as.factor(X_and_Color),y=Y_values,fill=as.factor(X_and_Color)))+
geom_bar(stat="identity",position='dodge')+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
theme(text = element_text(size=18),legend.position = "bottom")+
facet_grid(rows=vars(facet_rows),cols=vars(facet_columns),scales="free",labeller = label_bquote(cols=N[t0]==.(facet_columns)))
When the resulting plots look fine but the labels for the rows need to be text wrapped.
I looked at using label_wrap_gen() but have been unable to make it fit into the label_bquote() correctly for just the rows.
Edit 1:
output<-structure(list(X_and_Color = c(100, 1000, 5000, 100, 1000, 5000,
100, 1000, 5000, 100, 1000), Y_values = c(0.867583920521508,
0.124953765675481, 344.093547640026, 0.0134418163074479, 0.0890165925565165,
155.778207495509, 86.4571212256961, 13.2165726083926, 0.395814237374253,
33.928067237654, 175.145881111351), facet_rows = structure(c(2L,
2L, 1L, 7L, 7L, 4L, 5L, 5L, 3L, 6L, 6L), .Label = c("Effective Number of Haplotypes",
"Gene Diversity", "Nucleotide Diversity", "Number of Haplotypes",
"Number of Heterozygotes", "Number of Polymorphic Sites", "Site by Site Heterozygosity"
), class = "factor"), Not_Needed = c(1e-04, 1e-04, 1e-04, 1e-04,
1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04), facet_columns = c(2,
100, 2, 2, 100, 2, 2, 100, 2, 2, 100)), .Names = c("X_and_Color",
"Y_values", "facet_rows", "Not_Needed", "facet_columns"), row.names = c(1L,
7L, 13L, 19L, 25L, 31L, 37L, 43L, 49L, 55L, 61L), class = "data.frame")
Using this data and the above code I get this for the row labels:
We can use str_wrap function from stringr for rows of facet_grid separately. Also adjust your size parameter based on your preference/screen size so the labels are visible.
library(ggplot2)
library(stringr)
ggplot(output,aes(x=as.factor(X_and_Color),y=Y_values,fill=as.factor(X_and_Color)))+
geom_bar(stat="identity",position='dodge')+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
theme(text = element_text(size=10),legend.position = "bottom")+
facet_grid(rows= vars(str_wrap(facet_rows, 2)),
cols=vars(facet_columns),scales="free",
labeller = label_bquote(cols=N[t0]==.(facet_columns)))

Adding a ggtree object to already existing ggplot with shared y-axis

I have the following data and plot:
Data:
structure(list(type = c("mut", "mut", "mut", "mut", "mut", "mut",
"mut", "mut", "gene", "gene", "gene", "gene"), gene = c("gyrA",
"gyrA", "gyrB", "gyrB", "parC", "parC", "parE", "parE", "qnrA1",
"qnrA1", "sul3", "sul3"), type2 = c(1, 1, 1, 1, 1, 1, 1, 1, 2,
2, 2, 2), id = c("2014-01-7234-1-S", "2015-01-3004-1-S", "2014-01-2992-1-S",
"2016-17-299-1-S", "2015-01-2166-1-S", "2014-01-4651-1-S", "2016-02-514-2-S",
"2016-02-402-2-S", "2016-02-425-2-S", "2015-01-5140-1-S", "2016-02-522-2-S",
"2016-02-739-2-S"), result = c("1", "0", "0", "0", "0", "0",
"1", "1", "0", "0", "0", "1"), species = c("Broiler", "Pig",
"Broiler", "Red fox", "Pig", "Broiler", "Wild bird", "Wild bird",
"Wild bird", "Pig", "Wild bird", "Wild bird"), fillcol = c("Broiler_1",
"Pig_0", "Broiler_0", "Red fox_0", "Pig_0", "Broiler_0", "Wild bird_1",
"Wild bird_1", "Wild bird_0", "Pig_0", "Wild bird_0", "Wild bird_1"
)), row.names = c(NA, -12L), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), vars = "gene", drop = TRUE, indices = list(
0:1, 2:3, 4:5, 6:7, 8:9, 10:11), group_sizes = c(2L, 2L,
2L, 2L, 2L, 2L), biggest_group_size = 2L, labels = structure(list(
gene = c("gyrA", "gyrB", "parC", "parE", "qnrA1", "sul3")), row.names = c(NA,
-6L), class = "data.frame", vars = "gene", drop = TRUE, indices = list(
0:1, 2:3, 4:5, 6:7, 8:9, 10:11), group_sizes = c(2L, 2L,
2L, 2L, 2L, 2L), biggest_group_size = 2L, labels = structure(list(
gene = c("gyrA", "gyrB", "parC", "parE", "qnrA1", "sul3")), row.names = c(NA,
-6L), class = "data.frame", vars = "gene", drop = TRUE)))
Plot:
library(ggplot2)
p1 <- ggplot(test_df, aes(fct_reorder(gene, type2),
factor(id),
fill = fillcol,
alpha = result)) +
geom_tile(color = "white")+
theme_minimal()+
labs(fill = NULL)+
theme(axis.text.x = element_text(angle = 90,
hjust = 1,
vjust = 0.3,
size = 7),
axis.title = element_blank(),
panel.grid = element_blank(),
legend.position = "right")+
guides(alpha = FALSE)+
coord_fixed()
Additionally, I have the following tree object:
structure(list(edge = structure(c(23L, 23L, 22L, 22L, 21L, 21L,
20L, 20L, 19L, 19L, 18L, 18L, 17L, 17L, 16L, 16L, 15L, 15L, 14L,
14L, 13L, 13L, 1L, 3L, 2L, 9L, 22L, 23L, 4L, 5L, 20L, 21L, 11L,
12L, 18L, 19L, 10L, 17L, 8L, 16L, 6L, 7L, 14L, 15L), .Dim = c(22L,
2L)), edge.length = c(2, 2, 0, 0, 2.5, 0.5, 2, 2, 0.75, 0.25,
0.5, 0.5, 2.41666666666667, 0.166666666666667, 3.0625, 0.145833333333333,
3.38888888888889, 0.326388888888889, 3, 3, 0.5, 0.111111111111111
), tip.label = c("2016-02-425-2-S", "2016-02-522-2-S", "2015-01-2166-1-S",
"2016-02-402-2-S", "2016-02-514-2-S", "2016-17-299-1-S", "2016-02-739-2-S",
"2015-01-5140-1-S", "2014-01-2992-1-S", "2014-01-7234-1-S", "2014-01-4651-1-S",
"2015-01-3004-1-S"), Nnode = 11L), class = "phylo", order = "postorder")
Which is plotted like this:
library(ggtree)
p2 <- ggtree(tree)+
geom_treescale()+
geom_tiplab(align = TRUE, linesize = 0, size = 1)+
xlim(0, 4.2)
What I want to do is to combine the tree and the first plot, and order the first plot y-axis after the order in the tree, so that they match. I have tried to use some of the solutions here, but I can't seem to produce the same plot with the facet_plot function. Is there a way to identify maching values on the y-axis on both plots, and then combine them?
This is how I want it to look (approximately):
We need to arrange the tile plot in the same order as the tree plot and then we need to lay the two plots out so they correspond. The first task is relatively straightforward, but I'm not sure how to do the second without some manual tweaking of the layout.
library(tidyverse)
library(ggtree)
library(grid)
library(gridExtra)
p2 <- ggtree(tree)+
geom_treescale()+
geom_tiplab(align = TRUE, linesize = 0, size = 3)+
xlim(0, 4.2)
Now that we've created the tree plot, let's get the ordering of the y axis programmatically. We can do that using ggplot_build to get the plot structure.
p2b = ggplot_build(p2)
We can look at the data for the plot layout by running p2b$data in the console. This outputs a list with the various data frames that represent the plot structure. Looking these over, we can see that the fifth and six data frames have the node labels. We'll use the fifth one (p2b$data[[5]] and order them based on the y column to get a vector of node labels (p2b$data[[5]] %>% arrange(y) %>% pull(label))). Then we'll convert test_df$id to a factor variable with this node ordering.
test_df = test_df %>%
mutate(id = factor(id, levels=p2b$data[[5]] %>% arrange(y) %>% pull(label)))
(As another option, you can get the ordering of the nodes directly from p2 with p2$data %>% filter(isTip) %>% arrange(parent) %>% pull(label))
Now we can generate the tile plot p1 with a node order that corresponds to that of the tree plot.
p1 <- ggplot(test_df, aes(fct_reorder(gene, type2),
factor(id),
fill = fillcol,
alpha = result)) +
geom_tile(color = "white")+
theme_minimal()+
labs(fill = NULL)+
theme(axis.text.x = element_text(angle = 90,
hjust = 1,
vjust = 0.3,
size = 7),
axis.title = element_blank(),
panel.grid = element_blank(),
legend.position = "right")+
guides(alpha = FALSE)+
coord_fixed()
We can see in the plot below that the labels correspond.
grid.arrange(p2, p1, ncol=2)
Now we need to lay out the two plots with only one set of labels and with the node lines matching up vertically with the tiles. I've done this with some manual tweaking below by creating a nullGrob() (basically a blank space below p1) and adjusting the heights argument to get the alignment. The layout can probably be done programmatically, but that would take some additional grob (graphical object) manipulation.
grid.arrange(p2 + theme(plot.margin=margin(0,-20,0,0)),
arrangeGrob(p1 + theme(axis.text.y=element_blank()),
nullGrob(),
heights=c(0.98,0.02)),
ncol=2)

Plotting multiple lines in R

I'm pretty new to R so I don't really know what I'm doing. Anyway, I have data in this format in excel (as a csv file):
dt <- data.frame(species = rep(c("a", "b", "c"), each = 4),
cover = rep(1:3, times = 4),
depth = rep(c(15, 30, 60, 90), times = 3),
stringsAsFactors = FALSE)
I want to plot a graph of cover against depth, with a different coloured line for each species, and a key for which species is which colour. I don't even know where to start.
Sorry if something similar has been asked before. Any help would be much appreciated!
Don't know if this is in a helpful format but here's some of the actual data, I need to read more about dput I think:
structure(list(species = structure(c(1L, 1L, 2L, 2L, 3L, 3L,
4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L,
11L), .Label = c("Agaricia fragilis", "bryozoan", "Dichocoenia stokesi",
"Diploria labyrinthiformis", "Diploria strigosa", "Madracis decactis",
"Manicina", "Montastrea cavernosa", "Orbicella franksi", "Porites asteroides",
"Siderastrea radians"), class = "factor"), cover = c(0.021212121,
0.04047619, 0, 0, 0, 0, 1.266666667, 4.269047619, 3.587878788,
3.25, 0.118181818, 0.152380952, 0, 0.007142857, 3.806060606,
2.983333333, 14.13030303, 15.76190476, 0.415151515, 0.2, 0.26969697,
0.135714286), depth = c(30L, 15L, 30L, 15L, 30L, 15L, 30L, 15L,
30L, 15L, 30L, 15L, 30L, 15L, 30L, 15L, 30L, 15L, 30L, 15L, 30L,
15L)), .Names = c("species", "cover", "depth"), row.names = c(NA,
22L), class = "data.frame")
Here is a solution using the ggplot2 package.
# Load packages
library(ggplot2)
# Create example data frame based on the original example the OP provided
dt <- data.frame(species = rep(c("a", "b", "c"), each = 4),
cover = rep(1:3, times = 4),
depth = rep(c(15, 30, 60, 90), times = 3),
stringsAsFactors = FALSE)
# Plot the data
ggplot(dt, aes(x = depth, y = cover, group = species, colour = species)) +
geom_line()
This should get you going!
df1 <- read.csv("//file_location.csv", headers=T)
library(dplyr)
df1 <- df1 %>% select(species, depth) %>% group_by(species) %>%
summarise(mean(depth)
library(ggplot2)
ggplot(df1, aes(x=depth, y=species, group=species, color=species) +
geom_line()

ggplot histogram code issues

Below there is a code producing a histogram using ggplot:
ggplot(res, aes(x=TOPIC,y=count)) +
scale_y_continuous(expand=c(0,0)) +
geom_bar(stat='identity') +
theme(axis.title.x=element_blank()) +
theme_bw(base_size = 16) +
ggtitle("Peer-reviewed papers per topic")
res<-structure(list(TOPIC = structure(c(20L, 18L, 21L, 3L, 9L, 4L,
7L, 8L, 17L, 27L, 29L, 2L, 24L, 30L, 16L, 28L, 10L, 22L, 12L,
5L, 1L, 19L, 6L, 25L, 11L, 23L, 14L, 15L, 13L, 26L), .Label = c("ANA",
"BEH", "BOUND", "CC", "DIS", "EVO", "HAB", "HABP", "HARV", "HWC",
"ISSUE", "METH", "MINOR", "PA", "PEFF", "PHYS", "POLL", "POPABU",
"POPGEN", "POPSTAT", "POPTR", "PRED", "PROT", "REPEC", "RESIMP",
"REV", "SHIP", "TEK", "TOUR", "ZOO"), class = "factor"), count = c(9,
7, 13, 5, 23, 35, 27, 5, 118, 0, 9, 22, 29, 46, 27, 12, 9, 44,
70, 40, 24, 19, 26, 2, 4, 17, 4, 10, 86, 31)), .Names = c("TOPIC",
"count"), row.names = c(NA, -30L), class = "data.frame")
The topics have names like "Population status" (POPSTAT), "Population trend" (POPTREND) etc.
There are issues I need help to solve:
1. The theme(axis.title.x=element_blank()) does not remove the x axis title.
2. The scale_y_continuous(expand=c(0,0)) puts the bar on the level of
the x-axis, which I need, but I need some expansion on the upper part
of the y-axis. How do I achieve that?
3. My labels from the data frame are abbreviations. How do I replace
these with new tick labels?
4. I need to turn the x-axis labels 90°.
Use labs(x = NULL)
Do you mean ylim? Using this, you can set the limits of the y-axis. Or use scale_y_continuous(limits = c(0, <upperLimit>), expand = c(0, 0))
Use a discrete scale with labels: scale_x_discrete(labels = <axis-labels>, limits = c(1:length(axis-labels))
theme(axis.text.x = element_text(angle = 90))

Stack Points in ggplot

I am making a dotplot using ggplot with the code and data that is below which produces the following the graph.
ggplot(data=holder, aes(x=Coef, y=CoefShort, colour=factor(Name))) + geom_point() + labs(x="Value", y="Coefficient") + scale_colour_discrete("Model")
Their is a significant amount of overplotting and I would like to create some vertical seperation between dots. But it has to be systematic, i.e. the order is always red-green-blue.
geom_stack only seems to work when the x value is the same, not when the y value is the same. I tried using the opposite axes (swapping the x and y), doing a position_dodge() then doing a coord_flip(). This only works with bars, as seen below.
ggplot(data=holder, aes(y=Coef, x=CoefShort, fill=factor(Name))) + geom_bar(position="dodge", aes(ymax=Coef)) + labs(y="Value", x="Coefficient") + scale_fill_discrete("Model") + coord_flip()
Any ideas how to achieve that stacking with geom_point()? Thanks.
The data:
structure(list(Coef = c(-3875.46969970703, 7871.08213392282,
1120.33185255098, 1510.13540851347, 1439.07714113149, 1800.92398445336,
-3760.05411752962, 8183.74295221482, 1126.98290537184, 1517.99524139857,
1442.73063836897, 1808.03721179571, -90.6507661872817, -71.7225864185226,
-103.615416254984, -732.167583256825, -1075.67574987664, -1908.56266462926,
-7362.80215630299, 8886.12888250011, 655.767448263926, 848.716877683527,
869.395903077767, 998.254438325812, -211.682481369473, -303.310032581644,
-506.1995360406, -978.697664841985, -1440.30190190734, -2325.22236024601,
2625.94998656519, 3573.68798735398, 4217.82910198788, 4534.8789695778,
4967.19941000705, 5072.02764498623, 5419.64684461491), Name = c(1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), CoefShort = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L), .Label = c("(Intercept)",
"carat", "Good", "Very Good", "Premium", "Ideal", "E", "F", "G",
"H", "I", "J", "SI2", "SI1", "VS2", "VS1", "VVS2", "VVS1", "IF"
), class = "factor")), .Names = c("Coef", "Name", "CoefShort"
), class = "data.frame", row.names = c(NA, -37L))
points in geom_point is zero width, so you need to set the width of dodging:
ggplot(data=holder, aes(y=Coef, x=CoefShort, colour=factor(Name))) +
geom_point(position=position_dodge(width = 0.8)) +
labs(y="Value", x="Coefficient", colour = "Model") +
coord_flip()

Resources