Add vertical offset to stacked column plot with ggplot2 - r

I have several stacked column charts representing drilling profiles. I want to offset the y-position of each Borehole to represent the actual height on the ground.
My Data looks like this:
x layer.thickness layer.depth Petrography BSCategory Offset
0 0.2 0.2 silt Drilling1 0
0 1.0 1.2 gravel Drilling1 0
0 3.0 4.2 silt Drilling1 0
4 0.4 0.4 silt Drilling2 -1
4 0.8 1.2 gravel Drilling2 -1
4 2.0 3.2 sand Drilling2 -1
My minimum working code so far is this:
df <- data.frame(x=c(0,0,0,4,4,4), layer.thickness = c(0.2,1.0,3.0,0.4,0.8,2.0),
layer.depth = c(0.2,1.2,4.2,0.4,1.2,3.2),
Petrography = c("silt", "gravel", "silt", "silt", "gravel", "sand"),
BSCategory = c("Drilling1","Drilling1","Drilling1","Drilling2","Drilling2","Drilling2"),
Offset = c(0,0,0,-1,-1,-1))
# provide a numeric ID that stops grouping individual petrography items
df <- transform(df,ix=as.numeric(factor(df$BSCategory)));
drilling <- ggplot(data = df, aes(x = x, y = layer.thickness, group = ix, fill = Petrography)) +
theme_minimal() +
theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line.x = element_blank(),
axis.ticks.y = element_line(),
aspect.ratio=1) +
geom_col(position = position_stack(reverse = TRUE), width= .15,color="black") +
scale_y_reverse(expand = c(0, 0), name ="Depth [m]") +
scale_x_continuous(position = "top", breaks = df$x, labels=paste(df$BSCategory,"\n",df$x,"m"), name="") +
scale_fill_manual(values = c("gravel"='#f3e03a', "sand"= '#e09637', "silt"='#aba77d'))
print(drilling)
This is my output so far (with red indicating what it should look like):

It may be easier to work with geom_rect here (bar charts are/should be anchored at zero). First we need to calculate y start and end positions for each sample:
library(data.table)
setDT(df)[ , `:=`(start = start <- c(Offset[1] + c(0, cumsum(head(layer.thickness, -1)))),
end = start + layer.thickness), by = BSCategory]
geom_rect has both fill and color aesthetics, which makes it easy to add a border to each individual sample.
w <- 0.5 # adjust to desired width
ggplot(data = df, aes(xmin = x - w / 2, xmax = x + w / 2, ymin = start, ymax = end,
fill = Petrography, group = Petrography)) +
geom_rect(color = "black") +
scale_y_reverse(expand = c(0, 0), name ="Depth [m]") +
scale_x_continuous(position = "top", breaks = df$x, labels = paste(df$BSCategory,"\n", df$x, "m"), name = "") +
scale_fill_manual(values = c("gravel" = '#f3e03a', "sand" = '#e09637', "silt" = '#aba77d')) +
theme_classic() +
theme(axis.line.x = element_blank(),
axis.ticks.x = element_blank())
Alternatively, geom_segment could be used. geom_segment doesn't have a fill aes, so we need to change to color:
ggplot(data = df, aes(x = x, xend = x, y = start, yend = end, color = Petrography, group = ix)) +
geom_segment(size = 5) +
scale_y_reverse(expand = c(0, 0), name ="Depth [m]") +
scale_x_continuous(position = "top", breaks = df$x, labels = paste(df$BSCategory,"\n", df$x, "m"), name = "") +
scale_color_manual(values = c("gravel" = '#f3e03a', "sand" = '#e09637', "silt" = '#aba77d')) +
theme_classic() +
theme(axis.line.x = element_blank(),
axis.ticks.x = element_blank())
To add borders, see Add border to segments in geom_segment.

Related

How to customize Horizontal dots plot?

I want to plot customized Horizontal dots using my data and the code given here
data:
df <- data.frame (origin = c("A","B","C","D","E","F","G","H","I","J"),
Percentage = c(23,16,32,71,3,60,15,21,44,60),
rate = c(10,12,20,200,-25,12,13,90,-105,23),
change = c(10,12,-5,12,6,8,0.5,-2,5,-2))
.
origin Percentage rate change
1 A 23 10 10.0
2 B 16 12 12.0
3 C 32 20 -5.0
4 D 71 200 12.0
5 E 3 -25 6.0
6 F 60 12 8.0
7 G 15 13 0.5
8 H 21 90 -2.0
9 I 44 -105 5.0
10 J 60 23 -2.0
obs from 'origin' column need be put on y-axis. corresponding values in 'change' and 'rate' column must be presented/differentiated through in box instead of circles, for example values from 'change' column in lightblue and values from 'rate' column in blue. In addition I want to add second vertical axis on right and put circles on it which size will be defined based on corresponding value in 'Percentage' column.
Output of code from the link:
Expected outcome (smth. like this:
Try this.
First, reshaping so that both rate and change are in one column better supports ggplot's general preference towards "long" data.
df2 <- reshape2::melt(df, id.vars = c("origin", "Percentage"))
(That can also be done using pivot_wider.)
The plot:
ggplot(df2, aes(value, origin)) +
geom_label(aes(label = value, fill = variable, color = variable)) +
geom_point(aes(size = Percentage), x = max(df2$value) +
20, shape = 21) +
scale_x_continuous(expand = expansion(add = c(15, 25))) +
scale_fill_manual(values = c(change="lightblue", rate="blue")) +
scale_color_manual(values = c(change="black", rate="white")) +
theme_bw() +
theme(panel.border = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank()) +
labs(x = NULL, y = NULL)
The legend and labels can be adjusted in the usual ggplot methods. Overlapping of labels is an issue with which you will need to contend.
Update on OP request: See comments:
gg_dot +
geom_text(aes(x = rate, y = origin,
label = paste0(round(rate, 1), "%")),
col = "black") +
geom_text(aes(x = change, y = origin,
label = paste0(round(change, 1), "%")),
col = "white") +
geom_text(aes(x = x, y = y, label = label, col = label),
data.frame(x = c(40 - 1.1, 180 + 0.6), y = 11,
label = c("change", "rate")), size = 6) +
scale_color_manual(values = c("#9DBEBB", "#468189"), guide = "none") +
scale_y_discrete(expand = c(0.2, 0))
First answer:
Something like this?
library(tidyverse)
library(dslabs)
gg_dot <- df %>%
arrange(rate) %>%
mutate(origin = fct_inorder(origin)) %>%
ggplot() +
# remove axes and superfluous grids
theme_classic() +
theme(axis.title = element_blank(),
axis.ticks.y = element_blank(),
axis.line = element_blank()) +
# add a dummy point for scaling purposes
geom_point(aes(x = 12, y = origin),
size = 0, col = "white") +
# add the horizontal discipline lines
geom_hline(yintercept = 1:10, col = "grey80") +
# add a point for each male success rate
geom_point(aes(x = rate, y = origin),
size = 11, col = "#9DBEBB") +
# add a point for each female success rate
geom_point(aes(x = change, y = origin),
size = 11, col = "#468189")
gg_dot +
geom_text(aes(x = rate, y = origin,
label = paste0(round(rate, 1))),
col = "black") +
geom_text(aes(x = change, y = origin,
label = paste0(round(change, 1))),
col = "white") +
geom_text(aes(x = x, y = y, label = label, col = label),
data.frame(x = c(40 - 1.1, 180 + 0.6), y = 11,
label = c("change", "rate")), size = 6) +
scale_color_manual(values = c("#9DBEBB", "#468189"), guide = "none") +
scale_y_discrete(expand = c(0.2, 0))

Bar graphs that don't start at 0 in R

I'm trying to show the available range of travel of a machine that has 360 degrees of rotation and that has another axis of motion with a range of -5 to 152 that is independent of yaw. All of the bar graph drawing functions I can find assume that data start at 0 and this leaves a hole in the middle of the graph between -5 and 0. Is it possible to tell geom_bar() or geom_col() to start drawing at -5 instead of 0?
Here is the code that I'm using and an example graph.
df <- data.frame(0:360)
colnames(df) = "Yaw"
df$Max.Static <- ((runif(361) * 157)-5)
library(ggplot2)
ggplot(df, aes(x =Yaw , y = Max.Static)) +
geom_col(width = 1, alpha = .5 , fill = "#69B600") +
scale_y_continuous(
limits = c(-5,152),
breaks = seq(0,140,20)
) +
scale_x_continuous(
limits = c(-1,361),
breaks = seq(0,360,45),
minor_breaks = seq(0,360,15)
) +
coord_polar(theta = "x") +
labs(x = NULL, y = NULL) +
theme(axis.text.y = element_blank(),
axis.ticks = element_blank())
Bit of a weird hack, but the following works if you can tolerate a warning:
df <- data.frame(0:360)
colnames(df) = "Yaw"
df$Max.Static <- ((runif(361) * 157)-5)
library(ggplot2)
ggplot(df, aes(x =Yaw , y = Max.Static)) +
geom_col(width = 1, alpha = .5 , fill = "#69B600",
aes(ymin = after_scale(-Inf))) + ######## <- Change this line
scale_y_continuous(
limits = c(-5,152),
breaks = seq(0,140,20)
) +
scale_x_continuous(
limits = c(-1,361),
breaks = seq(0,360,45),
minor_breaks = seq(0,360,15)
) +
coord_polar(theta = "x") +
labs(x = NULL, y = NULL) +
theme(axis.text.y = element_blank(),
axis.ticks = element_blank())
#> Warning: Ignoring unknown aesthetics: ymin
Created on 2021-01-20 by the reprex package (v0.3.0)
It works because we can access the rectangle parameters (xmin, xmax, ymin, ymax) after the bar parametrisation has been converted to rectangles with after_scale().
The -Inf instructs ggplot that the variable should be at the minimum of the scale (even after expansion), hence closing the gap.
Technically, you should be looking at geom_segment or geom_rect, rather than geom_bar, as conceptually bar graphs represent count, therefore always starting at 0
Here with geom_segment. No hack needed.
df <- data.frame(0:360)
colnames(df) = "Yaw"
df$Max.Static <- ((runif(361) * 157)-5)
df$y <- -5
library(ggplot2)
ggplot(df, aes(Yaw , y, xend = Yaw, yend = Max.Static)) +
geom_segment( color = "#69B600", size = .2) +
scale_y_continuous(
limits = c(-5,152),
breaks = seq(0,140,20)
) +
scale_x_continuous(
limits = c(-1,361),
breaks = seq(0,360,45),
minor_breaks = seq(0,360,15)
) +
coord_polar(theta = "x") +
labs(x = NULL, y = NULL) +
theme(axis.text.y = element_blank(),
axis.ticks = element_blank())
or, with geom_rect, to get the windmill style
ggplot(df, aes(xmin = Yaw-0.4 , ymin = y, xmax = Yaw +0.4, ymax = Max.Static)) +
geom_rect( fill = "#69B600") +
scale_y_continuous(
limits = c(-5,152),
breaks = seq(0,140,20)
) +
scale_x_continuous(
limits = c(-1,361),
breaks = seq(0,360,45),
minor_breaks = seq(0,360,15)
) +
coord_polar(theta = "x") +
labs(x = NULL, y = NULL) +
theme(axis.text.y = element_blank(),
axis.ticks = element_blank())

Plot odds ratio by groups with ggplot for many variables

I am making a plot for 17 symptoms by age group. So far I got what I want when I use my code for just one symptom ( code and plot below), but when running the code through all the variables, I am getting a worng plot and still have no idea where I got it wrong.
This is my data:
x <- data.frame(symptoms=c("symptom1: 0 to 9","symptom1: 10 to 19","symptom1: 20 to 49","symptom1: 50+","symptom2: 0 to 9","symptom2: : 10 to 19",
"symptom2: : 20 to 49","symptom2: 50+","symptom3: 0 to 9","symptom3: 10 to 19","symptom3: 20 to 49","symptom3: 50+",
"symptom4: 0 to 9",
"symptom4: 10 to 19","symptom4: 20 to 49","symptom4:50+","symptom5: 0 to 9","symptom5: 10 to 19","symptom5: 20 to 49",
"symptom5: 50+",
"symptom6: 0 to 9","symptom6: 10 to 19","symptom6: 20 to 49","symptom6: 50+","symptom7: 0 to 9","symptom7: 10 to 19","symptom7: 20 to 49",
"symptom7: 50+", "symptom8: 0 to 9","symptom8: 10 to 19","symptom8: 20 to 49","symptom8: 50+",
"symptom9: 0 to 9","symptom9: 10 to 19","symptom9: 20 to 49","symptom9: 50+","symptom10: 0 to 9","symptom10: 10 to 19",
"symptom10: 20 to 49","symptom10: 50+","symptom11: 0 to 9","symptom11: 10 to 19","symptom11: 20 to 49",
"symptom11: 50+","symptom12: 0 to 9","symptom12: 10 to 19","symptom12: 20 to 49","symptom12: 50+","symptom13: 0 to 9",
"symptom13: 10 to 19","symptom13: 20 to 49","symptom13: 50+","symptom14: 0 to 9","symptom14: 10 to 19",
"symptom14: 20 to 49","symptom14: 50+","symptom15: 0 to 9","symptom15: 10 to 19","symptom15: 20 to 49","symptom15: 50+",
"symptom16:0 to 9","symptom16:10 to 19","symptom16:20 to 49","symptom16:50+","symptom17: 0 to 9","symptom17: 10 to 19",
"symptom17: 20 to 49","symptom17: 50+"),
OR=c(3.1,3,0.6,0.2,2,2.5,5,1.8,7.4,4.2,6.9,2.3,3.7,2.7,3.7,5.1,6.8,3.4,4.4,8.3,14540102.8,1036435.3,8070307.6,565044.8,2.9,1.7,2.6,4.2,3.4,1.3,2.5,2.9,1,1.6,48.4,2.6,1.3,1.9,2.6,4.5,0.8,0.7,3.6,0,7.5,14.8,2.7,3.8,1.5,3.2,3.1,0.8,2.4,12,4.5,1.7,2.8,1.8,3.1,1.9,3.3,25,5,1.4,430072.7,5.8,2.8,1.5),
Lower=c(1.3,1.6,0.2,0,1.6,1.7,1.6,0.7,2.2,1.3,2.6,0.3,1.9,1.8,1.4,2,3.3,2.2,2.2,3.2,0,0,0,0,1.5,1.2,1.3,1.5,1.8,0.9,1.3,1.2,0.3,0.6,1.3,0.4,0.9,1.2,1.3,1.7,0.2,0.3,0.4,NA,3.8,8,1.4,1.5,0.7,1.6,1.3,0.3,1.2,9.1,2.2,0.7,0.7,0.6,1.1,0.3,1.3,9,1.5,0.4,0,2.5,0.9,0.1),
Upper=c(8.7,6.3,2.2,4.2,6.1,3.8,8,4.7,26,7.9,19,14.7,7.6,4,6,15.1,14.1,5.3,8.8,22.8,NA,5.463E+98,NA,NA,5.5,4.6,5.2,15.5,6.6,2,5,7.5,3.2,4.2,165.4,22.4,3.5,2.8,5,12.3,2.6,1.6,76.8,2.0619295829016E+205,15.1,30.1,5.4,10.2,3,6.7,9.4,2.1,4.6,28,9.7,7.3,9.9,4.8,8,4.7,11,46.4,23.1,5.6,NA,16,9.1,38.8),
group=rep(c("0-9 years", "10-19 years", "20-49 years", "50+ years"), 17))
This is the code for just the first symptom:
ggplot(x[1:4,] , aes(x = OR, y = 4:1, group=group)) +
geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
geom_errorbarh(aes(xmax = Upper, xmin = Lower), size = 1, height = .1, color = "blue") +
geom_point(aes(shape=group, color=group), size = 5) +
scale_shape_manual(values=c(15,15,15,15)) +
scale_color_manual(values=c('red','green', 'orange', "grey")) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_y_continuous(breaks = 4:1, labels = x$symptoms[1:4]) +
scale_x_continuous(breaks = seq(0,20,1) ) +
ylab("") +
xlab("Odds ratio") +
ggtitle("Odd ratios (OR) with 95% COnfidence Interval")
and this is the plot that I got with just the first symptom by age group:
When I repeat this for all symptoms so I can have everything in one plot, the plot is a mess. See below code:
ggplot(x , aes(x = OR, y = 68:1, group=group)) +
geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
geom_errorbarh(aes(xmax = Upper, xmin = Lower), size = 1, height = .1, color = "blue") +
geom_point(aes(shape=group, color=group), size = 5) +
scale_shape_manual(values=c(15,15,15,15)) +
scale_color_manual(values=c('red','green', 'orange', "grey")) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_y_continuous(breaks = 68:1, labels = x$symptoms) +
scale_x_continuous(breaks = seq(0,20,1) ) +
ylab("") +
xlab("Odds ratio") +
ggtitle("Odd ratios (OR) with 95% Confidence Interval")
This is the ugly plot:
At the end, I should have something like the figure below, with the frequency tables at the left and values for OR with 95%CI. I haven't try that one yet (to add all the numbers etc), but suggestions are more than welcome.
Thanks a lot for helping me to debug my code
This is a forest plot. Your main problem is that a couple of your values are several orders of magnitude greater than the rest. Typically with a forest plot, you want a log scale for the odds ratio to make it symmetrical around one. However, even that won't be enough here to resolve the details on your plot, so I have simply filtered out the outliers (which appear nonsensical)
Since you effectively have nested factor levels, I have "silently" faceted the plot.
library(dplyr)
x %>%
mutate(Upper = replace(Upper, abs(Upper) > 100, NA),
Lower = replace(Lower, abs(Lower) > 100, NA),
OR = replace(OR, abs(OR) > 100, NA),
symptoms = factor(gsub(":.*$", "", symptoms),
levels = paste0("symptom", 1:17))) %>%
ggplot(aes(x = OR, y = group)) +
geom_rect(aes(xmin = 0.001, xmax = 1000,
ymin = -Inf, ymax = Inf, fill = symptoms)) +
geom_errorbarh(aes(xmin = Lower, xmax = Upper)) +
geom_point(aes(colour = group, shape = group), size = 5 ) +
geom_vline(aes(xintercept = 1), linetype = 2) +
scale_shape_manual(values = rep(15, 5)) +
scale_fill_manual(values = rep(c("#ffffff00", "#f0f0f090"), 9)[-1],
guide = "none") +
scale_x_log10() +
coord_cartesian(xlim = c(0.01, 100)) +
facet_grid(symptoms~., switch = "y") +
theme_bw() +
theme(panel.spacing.y = unit(0, "points"),
panel.border = element_blank(),
axis.text.y = element_blank(),
axis.ticks.length.y = unit(0, "points"),
strip.text.y.left = element_text(angle = 0),
strip.background.y = element_blank(),
strip.placement = "outside",
axis.line = element_line()
)
You may also wish to check out the ggforest package.

r - column wise heatmap using ggplot2

I would really appreciate if anyone could guide me with the following challenge.
I am trying to build column wise heatmap. For each column, I want the lowest value to be green and highest value to be red. The current solution takes a matrix wide approach.
I saw the solution on Heat map per column with ggplot2. As you can see, I implemented the same code but I am not getting the desired result [picture below]
df <- data.frame(
F1 = c(0.66610194649319, 0.666123551800434,
0.666100611954119, 0.665991102703081,
0.665979885730484),
acc_of_pred = c(0.499541627510021, 0.49960260221954,
0.499646067768102, 0.499447308828986,
0.499379552967265),
expected_mean_return = c(2.59756065316356e-07, 2.59799087404167e-07,
2.86466725381146e-07, 2.37977452007967e-07,
2.94242908573705e-07),
win_loss_ratio = c(0.998168189343307, 0.998411671274781,
0.998585272507726, 0.997791676357902,
0.997521287688458),
corr_pearson = c(0.00161443345430616, -0.00248811119331013,
-0.00203407575954095, -0.00496817102369628,
-0.000140531627184482),
corr_spearman = c(0.00214838517340878, -0.000308343671725617,
0.00228492127281917, -0.000359577740835049,
0.000608090759428587),
roc_vec = c(0.517972308828151, 0.51743161463546,
0.518033230192484, 0.518033294993802,
0.517931553535524)
)
combo <- data.frame(combo = c("baseline_120", "baseline_20",
"baseline_60", "baseline_288",
"baseline_5760"))
df.scaled <- scale(df)
df.scaled <- cbind(df.scaled,combo)
df.melt <- melt(df.scaled, id.vars = "combo")
ggplot(df.melt, aes(combo, variable)) +
geom_tile(aes(fill = value), colour = "white") +
scale_fill_gradient(low = "green", high = "red") +
geom_text(aes(label=value)) +
theme_grey(base_size = 9) +
labs(x = "", y = "") + scale_x_discrete(expand = c(0, 0)) +
scale_y_discrete(expand = c(0, 0)) +
theme(legend.position = "none", axis.ticks = element_blank(),
axis.text.x = element_text(size = 9 * 0.8,
angle = 0, hjust = 0, colour = "grey50"))
You are nearly correct. The code you implemented is the same for plotting. But the person who asked the question did one step in data preparation, he added a scaling variable.
If you scale your variable before plotting it and using the scaled factor as fill argument it works (i just added the rescale in scale_fill_gradient in ggplot after calculating it):
df.melt <- melt(df.scaled, id.vars = "combo")
df.melt<- ddply(df.melt, .(combo), transform, rescale = rescale(value))
ggplot(df.melt, aes(combo, variable)) +
geom_tile(aes(fill = rescale), colour = "white") +
scale_fill_gradient( low= "green", high = "red") +
geom_text(aes(label=round(value,4))) +
theme_grey(base_size = 9) +
labs(x = "", y = "") + scale_x_discrete(expand = c(0, 0)) +
scale_y_discrete(expand = c(0, 0)) +
theme(legend.position = "none", axis.ticks = element_blank(),
axis.text.x = element_text(size = 9 * 0.8,
angle = 0, hjust = 0, colour = "grey50"))
giving the plot:

How to add axis text in this negative and positive bars differently using ggplot2?

I've drawed bar graph with negative and positive bars which is familiar to the research. However, my code seems extremely inconvenient and verbose usinggraphics::plot() and graphics::text() as showed below. Try as I may, I could find the solution using element_text to fulfill in ggplot2. Please help or try to give some ideas how to achieve this in ggplot2.Thanks in advance.
# my data
df <- data.frame(genus=c("Prevotella","Streptococcus","YRC22","Phascolarctobacterium","SMB53","Epulopiscium",
"CF231","Anaerovibrio","Paludibacter","Parabacteroides","Desulfovibrio","Sutterella",
"Roseburia","Others__0_5_","Akkermansia","Bifidobacterium","Campylobacter","Fibrobacter",
"Coprobacillus","Bulleidia","f_02d06","Dorea","Blautia","Enterococcus","Eubacterium",
"p_75_a5","Clostridium","Coprococcus","Oscillospira","Escherichia","Lactobacillus"),
class=c(rep("groupA",18),rep("groupB",13)),
value=c(4.497311,4.082377,3.578472,3.567310,3.410453,3.390026,
3.363542,3.354532,3.335634,3.284165,3.280838,3.218053,
3.071454,3.026663,3.021749,3.004152,2.917656,2.811455,
-2.997631,-3.074314,-3.117659,-3.151276,-3.170631,-3.194323,
-3.225207,-3.274281,-3.299712,-3.299875,-3.689051,-3.692055,
-4.733154)
)
# bar graph
tiff(file="lefse.tiff",width=2000,height=2000,res=400)
par(mar=c(5,2,1,1))
barplot(df[,3],horiz=T,xlim=c(-6,6),xlab="LDA score (log 10)",
col=c(rep("forestgreen",length(which(df[,2]=="groupA"))),
rep("goldenrod",length(which(df[,2]=="groupB")))))
axis(1,at=seq(-6,6,by=1))
# add text
text(0.85,36.7,label=df[,1][31],cex=0.6);text(0.75,35.4,label=df[,1][30],cex=0.6)
text(0.75,34.1,label=df[,1][29],cex=0.6);text(0.85,33.0,label=df[,1][28],cex=0.6)
text(0.75,31.8,label=df[,1][27],cex=0.6);text(0.6,30.6,label=df[,1][26],cex=0.6)
text(0.8,29.5,label=df[,1][25],cex=0.6);text(0.85,28.3,label=df[,1][24],cex=0.6)
text(0.45,27.1,label=df[,1][23],cex=0.6);text(0.4,25.9,label=df[,1][22],cex=0.6)
text(0.55,24.7,label=df[,1][21],cex=0.6);text(0.55,23.5,label=df[,1][20],cex=0.6)
text(0.85,22.3,label=df[,1][19],cex=0.6);text(-0.75,21.1,label=df[,1][18],cex=0.6)
text(-1,19.9,label=df[,1][17],cex=0.6);text(-1,18.8,label=df[,1][16],cex=0.6)
text(-0.85,17.6,label=df[,1][15],cex=0.6);text(-0.85,16.3,label=df[,1][14],cex=0.6)
text(-0.7,15.1,label=df[,1][13],cex=0.6);text(-0.65,13.9,label=df[,1][12],cex=0.6)
text(-0.85,12.7,label=df[,1][11],cex=0.6);text(-1.05,11.5,label=df[,1][10],cex=0.6)
text(-0.85,10.3,label=df[,1][9],cex=0.6);text(-0.85,9.1,label=df[,1][8],cex=0.6)
text(-0.47,7.9,label=df[,1][7],cex=0.6);text(-0.85,6.7,label=df[,1][6],cex=0.6)
text(-0.49,5.5,label=df[,1][5],cex=0.6);text(-1.44,4.3,label=df[,1][4],cex=0.6)
text(-0.49,3.1,label=df[,1][3],cex=0.6);text(-0.93,1.9,label=df[,1][2],cex=0.6)
text(-0.69,0.7,label=df[,1][1],cex=0.6)
# add lines
segments(0,-1,0,40,lty=3,col="grey")
segments(2,-1,2,40,lty=3,col="grey")
segments(4,-1,4,40,lty=3,col="grey")
segments(6,-1,6,40,lty=3,col="grey")
segments(4,-1,4,40,lty=3,col="grey")
segments(-2,-1,-2,40,lty=3,col="grey")
segments(-4,-1,-4,40,lty=3,col="grey")
segments(-6,-1,-6,40,lty=3,col="grey")
legend("topleft",bty="n",cex=0.65,inset=c(0.01,-0.02),ncol=2,
legend=c("groupA","groupB"),
col=c("forestgreen", "goldenrod"),pch=c(15,15))
dev.off()
Here's a solution using dplyr to create some extra columns for the label position and the justification, and then theming the plot to match reasonably closely what you originally had:
library("dplyr")
library("ggplot2")
df <- df %>%
mutate(
genus = factor(genus, levels = genus[order(value, decreasing = TRUE)]),
label_y = ifelse(value < 0, 0.2, -0.2),
label_hjust = ifelse(value < 0, 0, 1)
)
my_plot <- ggplot(df, aes(x = genus, y = value, fill = class)) +
geom_bar(stat = "identity", col = "black") +
geom_text(aes(y = label_y, label = genus, hjust = label_hjust)) +
coord_flip() +
scale_fill_manual(values = c(groupA = "forestgreen", groupB = "goldenrod")) +
theme_minimal() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
legend.position = "top",
legend.justification = 0.05,
legend.title = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_line(colour = "grey80", linetype = "dashed"),
panel.grid.minor.x = element_blank()) +
scale_y_continuous(expression(log[10](italic("LDA score"))),
breaks = -6:6, limits = c(-6, 6))
print(my_plot)
ggsave("lefse.tiff", width = 5, height = 5, dpi = 400, my_plot)
I would try this:
library(ggplot2)
# change the factor levels so it will be displayed in correct order
df$genus <- factor(df$genus, levels = as.character(df$genus))
ggplot(df, aes(x = genus, y = value)) +
geom_bar(aes(fill = class), stat = 'identity') + # color by class
coord_flip() + # horizontal bars
geom_text(aes(y = 0, label = genus, hjust = as.numeric(value > 0))) + # label text based on value
theme(axis.text.y = element_blank())
In the above, hjust will change the direction of the text relative to its y position (flipped to x now), which is similar to pos parameter in base R plot. So you code could also be simplified with a vector for pos argument to text function.
Two options:
library(ggplot2)
# my data
df <- data.frame(genus=c("Prevotella","Streptococcus","YRC22","Phascolarctobacterium","SMB53","Epulopiscium",
"CF231","Anaerovibrio","Paludibacter","Parabacteroides","Desulfovibrio","Sutterella",
"Roseburia","Others__0_5_","Akkermansia","Bifidobacterium","Campylobacter","Fibrobacter",
"Coprobacillus","Bulleidia","f_02d06","Dorea","Blautia","Enterococcus","Eubacterium",
"p_75_a5","Clostridium","Coprococcus","Oscillospira","Escherichia","Lactobacillus"),
class=c(rep("groupA",18),rep("groupB",13)),
value=c(4.497311,4.082377,3.578472,3.567310,3.410453,3.390026,
3.363542,3.354532,3.335634,3.284165,3.280838,3.218053,
3.071454,3.026663,3.021749,3.004152,2.917656,2.811455,
-2.997631,-3.074314,-3.117659,-3.151276,-3.170631,-3.194323,
-3.225207,-3.274281,-3.299712,-3.299875,-3.689051,-3.692055,
-4.733154)
)
ggplot(df, aes(reorder(genus, -value), value, fill = class)) +
geom_bar(stat = "identity") +
coord_flip() +
geom_text(aes(label = genus,
y = ifelse(value < 1, 1.5, -1.5)), size = 2.5) +
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())
Or this:
library(ggplot2)
# my data
df <- data.frame(genus=c("Prevotella","Streptococcus","YRC22","Phascolarctobacterium","SMB53","Epulopiscium",
"CF231","Anaerovibrio","Paludibacter","Parabacteroides","Desulfovibrio","Sutterella",
"Roseburia","Others__0_5_","Akkermansia","Bifidobacterium","Campylobacter","Fibrobacter",
"Coprobacillus","Bulleidia","f_02d06","Dorea","Blautia","Enterococcus","Eubacterium",
"p_75_a5","Clostridium","Coprococcus","Oscillospira","Escherichia","Lactobacillus"),
class=c(rep("groupA",18),rep("groupB",13)),
value=c(4.497311,4.082377,3.578472,3.567310,3.410453,3.390026,
3.363542,3.354532,3.335634,3.284165,3.280838,3.218053,
3.071454,3.026663,3.021749,3.004152,2.917656,2.811455,
-2.997631,-3.074314,-3.117659,-3.151276,-3.170631,-3.194323,
-3.225207,-3.274281,-3.299712,-3.299875,-3.689051,-3.692055,
-4.733154)
)
ggplot(df, aes(reorder(genus, -value), value, fill = class)) +
geom_bar(stat = "identity") +
coord_flip() +
xlab("genus")

Resources