Position geom_text on dodged barplot - r

I tried to make the title self-explanatory, but here goes - data first:
dtf <- structure(list(variable = structure(c(1L, 1L, 2L, 2L, 3L, 3L,
4L, 4L, 5L, 5L), .Label = c("vma", "vla", "ia", "fma", "fla"), class = "factor"),
ustanova = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L), .Label = c("srednja škola", "fakultet"), class = "factor"),
`(all)` = c(42.9542857142857, 38.7803203661327, 37.8996138996139,
33.7672811059908, 29.591439688716, 26.1890660592255, 27.9557692307692,
23.9426605504587, 33.2200772200772, 26.9493087557604)), .Names = c("variable",
"ustanova", "(all)"), row.names = c(NA, 10L), class = c("cast_df",
"data.frame"), idvars = c("variable", "ustanova"), rdimnames = list(
structure(list(variable = structure(c(1L, 1L, 2L, 2L, 3L,
3L, 4L, 4L, 5L, 5L), .Label = c("vma", "vla", "ia", "fma",
"fla"), class = "factor"), ustanova = structure(c(1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("srednja škola",
"fakultet"), class = "factor")), .Names = c("variable", "ustanova"
), row.names = c("vma_srednja škola", "vma_fakultet", "vla_srednja škola",
"vla_fakultet", "ia_srednja škola", "ia_fakultet", "fma_srednja škola",
"fma_fakultet", "fla_srednja škola", "fla_fakultet"), class = "data.frame"),
structure(list(value = structure(1L, .Label = "(all)", class = "factor")), .Names = "value", row.names = "(all)", class = "data.frame")))
And I'd like to create a dodged barplot, do the coord_flip and put some text labels inside the bars:
ggplot(bar) + geom_bar(aes(variable, `(all)`, fill = ustanova), position = "dodge") +
geom_text(aes(variable, `(all)`, label = sprintf("%2.1f", `(all)`)), position = "dodge") +
coord_flip()
you can see output here.
I reckon I'm asking for something trivial. I want the text labels to "follow" stacked bars. Labels are placed correctly on the y-axis, but how to position them correctly on x-axis?

Is this what you want?
library(ggplot2)
ggplot(bar) +
geom_col(aes(variable, `(all)`, fill = ustanova), position = "dodge") +
geom_text(aes(variable, `(all)`, label = sprintf("%2.1f", `(all)`), group = ustanova),
position = position_dodge(width = .9)) +
coord_flip()
The key is to position = position_dodge(width = .9) (where .9 is the default width of the bars) instead of position = "dodge", which is just a shortcut without any parameter. Additionally you have to set the group=ustanova aesthetic in geom_text to dodge the labels by ustanova (A second option would be to make fill = ustanova a global aesthetic via ggplot(bar, aes(fill = ustanova))
In ggplot2_2.0.0 you find several examples in ?geom_text on how to position geom_text on dodged or stacked bars (the code chunk named "# Aligning labels and bars"). The Q&A What is the width argument in position_dodge? provides a more thorough description of the topic.

Related

How can I set median crossbars to align within factors?

I have a data frame like so:
my_df <- structure(list(SampleID = c("sample01", "sample02", "sample03",
"sample04", "sample05", "sample06", "sample07", "sample08", "sample09",
"sample10", "sample11", "sample12", "sample13", "sample14", "sample15",
"sample16", "sample17", "sample18", "sample19", "sample20"),
y = c(1.68547922357333, 0.717650914301956, 1.18156420566867,
1.31643130248052, 1.2021341615705, 0.946937741954258, 1.75576099871947,
0.952670480793451, 2.00921185693852, 0.968642950473789, 1.65243482711174,
2.14332269635055, 0.30556964944383, 0.860605616591314, 0.933339331803171,
1.31797519903504, 0.857873539291964, -0.328227710452388,
-0.22023346428776, 1.6600566728651), week = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 3L, 1L, 2L,
3L, 1L, 2L, 3L), .Label = c("0", "3", "6"), class = "factor"),
grumpy = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), week_grumpy = structure(c(2L,
4L, 6L, 2L, 4L, 6L, 1L, 3L, 5L, 2L, 4L, 6L, 1L, 5L, 2L, 4L,
6L, 1L, 3L, 5L), .Label = c("0 No", "0 Yes", "3 No", "3 Yes",
"6 No", "6 Yes"), class = "factor")), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L))
#packages needed if you don't have
install.packages("ggbeeswarm")
install.packages("ggplot2")
This is typically how I graph:
library(ggplot2)
library(ggbeeswarm)
ggplot(data = my_df, aes(x=week, y=y, color=grumpy)) +
geom_quasirandom(dodge.width = 0.75)
Which is nice because it separates the colors rather nicely. Nowadays, I like to add a median crossbars to further show the differences between groups. Like so:
ggplot(data = my_df, aes(x=week, y=y, color=grumpy)) +
geom_quasirandom(dodge.width = 0.75) +
stat_summary(aes(group = grumpy), fun = median, fun.min = median, fun.max = median, geom = "crossbar", color = "black", width = 0.7, lwd = 0.2)
Now, what I would love to have is the median crossbars to align with the colors within each factor on the x-axis. Is there a way to do this within R? Or am I relegated to manually editing the crossbars to line up?
Here's is one thing I have tried:
ggplot(data = my_df, aes(x=week_grumpy, y=y, color=grumpy)) +
geom_jitter(width = 0.1) +
stat_summary(aes(group = grumpy), fun = median, fun.min = median, fun.max = median, geom = "crossbar", color = "black", width = 0.7, lwd = 0.2)
But now the x-axis is not the way I want it (However, it would be easier to manually edit in something like Inkscape than the previous example).
I've found some hints here and here but have yet to arrive at a satisfactory solution.
What you are looking for is to dodge the crossbar geom. For example:
ggplot(data = my_df, aes(x=week, y=y, color=grumpy)) +
geom_quasirandom(dodge.width = 0.75) +
stat_summary(
aes(group = grumpy), fun = median, fun.min = median, fun.max = median,
geom = "crossbar", color = "black", width = 0.7, lwd = 0.2,
# add this bit here to your stat_summary function
position=position_dodge(width=0.75)
)
It seems that geom_quasirandom() is acting here very similarly to geom_point(position=position_jitterdodge(dodge.width=0.75)). In this case, since dodge.width is specified in geom_quasirandom(), you use the same width for position_dodge in the crossbar geom.
Note: you may want to play around with aesthetic formatting to be able to make the distinction a bit more clear what the crossbars are telling you, but this should answer your question.

ggplot add aggregated summaries to a bar plot

I have the following data frame:
structure(list(StepsGroup = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L), .Label = c("(-Inf,3e+03]", "(3e+03,1.2e+04]", "(1.2e+04, Inf]"
), class = "factor"), GlucoseGroup = structure(c(1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L), .Label = c("<100", "100-180", ">180"
), class = "factor"), n = c(396L, 1600L, 229L, 787L, 4182L, 375L,
110L, 534L, 55L), freq = c(0.177977528089888, 0.719101123595506,
0.102921348314607, 0.147267964071856, 0.782559880239521, 0.0701721556886228,
0.157367668097282, 0.763948497854077, 0.0786838340486409)), class =
c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -9L), vars = "StepsGroup",
labels = structure(list(
StepsGroup = structure(1:3, .Label = c("(-Inf,3e+03]", "(3e+03,1.2e+04]",
"(1.2e+04, Inf]"), class = "factor")), class = "data.frame", row.names =
c(NA, -3L), vars = "StepsGroup", drop = TRUE), indices = list(0:2,
3:5, 6:8), drop = TRUE, group_sizes = c(3L, 3L, 3L), biggest_group_size =
3L)
I would like to create a stacked bar plot, and add a summary of each StepsGroup on top of each bar. So the first group will have 2225, the second 5344 and the third 699.
I am using the following script:
ggplot(d_stepsFastingSummary , aes(y = freq, x = StepsGroup, fill =
GlucoseGroup)) + geom_bar(stat = "identity") +
geom_text(aes(label = sum(n()), vjust = 0))
The part until before the geom_text works, but for the last bit I get the following error:
Error: This function should not be called directly
Any idea how to add the aggregated quantity?
We could create a new dataframe stacked_df which would have sum for each StepsGroup
stacked_df <- df %>% group_by(StepsGroup) %>% summarise(nsum = sum(n))
ggplot(df) +
geom_bar(aes(y = freq, x = StepsGroup, fill= GlucoseGroup),stat = "identity") +
geom_text(data = stacked_df, aes(label = nsum, StepsGroup,y = 1.1))

creating a factor-based in dendrogram with R and ggplot2

This is not so much a coding as general approach call for help ;-) I prepared a table containing taxonomic information about organisms. But I want to use the "names" of these organisms, so no values or anything where you could compute a distance or clustering with (this is also all the information I have). I just want to use these factors to create a plot that shows the relationship. My data looks like this:
test2<-structure(list(genus = structure(c(4L, 2L, 7L, 8L, 6L, 1L, 3L,
5L, 5L), .Label = c("Aminobacter", "Bradyrhizobium", "Hoeflea",
"Hyphomonas", "Mesorhizobium", "Methylosinus", "Ochrobactrum",
"uncultured"), class = "factor"), family = structure(c(4L, 1L,
2L, 3L, 5L, 6L, 6L, 6L, 6L), .Label = c("Bradyrhizobiaceae",
"Brucellaceae", "Hyphomicrobiaceae", "Hyphomonadaceae", "Methylocystaceae",
"Phyllobacteriaceae"), class = "factor"), order = structure(c(1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Caulobacterales",
"Rhizobiales"), class = "factor"), class = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Alphaproteobacteria", class = "factor"),
phylum = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Proteobacteria", class = "factor")), .Names = c("genus",
"family", "order", "class", "phylum"), class = "data.frame", row.names = c(NA,
9L))
is it necessary to set up artificial values to describe a distance between the levels?
Here is an attempt using data.tree library
First create a string variable in the form:
Proteobacteria/Alphaproteobacteria/Caulobacterales/Hyphomonadaceae/Hyphomonas
library(data.tree)
test2$pathString <- with(test2,
paste(phylum,
class,
order,
family,
genus, sep = "/"))
tree_test2 = as.Node(test2)
plot(tree_test2)
many things can be done after like:
Interactive network:
library(networkD3)
test2_Network <- ToDataFrameNetwork(tree_test2, "name")
simpleNetwork(test2_Network)
or graph styled
library(igraph)
plot(as.igraph(tree_test2, directed = TRUE, direction = "climb"))
check out the vignette
using ggplot2:
library(ggraph)
graph = as.igraph(tree_test2, directed = TRUE, direction = "climb")
ggraph(graph, layout = 'kk') +
geom_node_text(aes(label = name))+
geom_edge_link(arrow = arrow(type = "closed", ends = "first",
length = unit(0.20, "inches"),
angle = 15)) +
geom_node_point() +
theme_graph()+
coord_cartesian(xlim = c(-3,3), expand = TRUE)
or perhaps:
ggraph(graph, layout = 'kk') +
geom_node_text(aes(label = name), repel = T)+
geom_edge_link(angle_calc = 'along',
end_cap = circle(3, 'mm'))+
geom_node_point(size = 5) +
theme_graph()+
coord_cartesian(xlim = c(-3,3), expand = TRUE)

r- ggplot decrease number of intervals of axis or spacing the axe tiks

I've made a group plot of time series with ggplot with this syntax:
ggplot(Tur_flow, aes(x=time, group=parameter, colour=parameter))
+ geom_point(aes(y=value), size=1)
+ stat_smooth(aes(y=value), method=lm)
+ facet_grid(parameter ~ Section, scale="free_y")
+ theme_minimal()
+ theme(text = element_text(size=16))
dput(head(Tur_flow))
structure(list(Section = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("S-5", "S-50", "S+5", "S+50"), class = "factor"), parameter = structure(c(3L,
3L, 3L, 3L, 3L, 3L), .Label = c("Discharge", "Mean_Velocity",
"T_15", "T_25", "T_65", "Water_Depth"), class = "factor"), time = structure(c(6L, 13L, 20L, 27L, 34L, 41L), .Label = c("11:59:55", "11:59:56",
"11:59:58", "11:59:59", "12:00:00", "12:00:02", "12:00:05", "12:00:55",
"12:00:56", "12:00:58", "12:00:59", "12:01:00", "12:01:01", "12:01:05",
"12:01:55", "12:01:56............. "8.30", "8.31", "8.41", "8.54", "8.94", "800.31", "822.01", "828.77", "839.30", "846.11", "847.60", "8497.25", "894.21", "91.66", "91.67", "91.68", "91.90", "92.08", "92.23", "92.54", "93.23", "974.50", "N/A"), class = "factor")), .Names = c("Section", "parameter",
"time", "value"), row.names = c(NA, 6L), class = "data.frame")
How can I reduce the interval of both x and y axis? I mean spacing the axes? The x_axis data is time?
On y-axis how can I reduce decimal numbers?

asterix R bar ggplot [duplicate]

I tried to make the title self-explanatory, but here goes - data first:
dtf <- structure(list(variable = structure(c(1L, 1L, 2L, 2L, 3L, 3L,
4L, 4L, 5L, 5L), .Label = c("vma", "vla", "ia", "fma", "fla"), class = "factor"),
ustanova = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L), .Label = c("srednja škola", "fakultet"), class = "factor"),
`(all)` = c(42.9542857142857, 38.7803203661327, 37.8996138996139,
33.7672811059908, 29.591439688716, 26.1890660592255, 27.9557692307692,
23.9426605504587, 33.2200772200772, 26.9493087557604)), .Names = c("variable",
"ustanova", "(all)"), row.names = c(NA, 10L), class = c("cast_df",
"data.frame"), idvars = c("variable", "ustanova"), rdimnames = list(
structure(list(variable = structure(c(1L, 1L, 2L, 2L, 3L,
3L, 4L, 4L, 5L, 5L), .Label = c("vma", "vla", "ia", "fma",
"fla"), class = "factor"), ustanova = structure(c(1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("srednja škola",
"fakultet"), class = "factor")), .Names = c("variable", "ustanova"
), row.names = c("vma_srednja škola", "vma_fakultet", "vla_srednja škola",
"vla_fakultet", "ia_srednja škola", "ia_fakultet", "fma_srednja škola",
"fma_fakultet", "fla_srednja škola", "fla_fakultet"), class = "data.frame"),
structure(list(value = structure(1L, .Label = "(all)", class = "factor")), .Names = "value", row.names = "(all)", class = "data.frame")))
And I'd like to create a dodged barplot, do the coord_flip and put some text labels inside the bars:
ggplot(bar) + geom_bar(aes(variable, `(all)`, fill = ustanova), position = "dodge") +
geom_text(aes(variable, `(all)`, label = sprintf("%2.1f", `(all)`)), position = "dodge") +
coord_flip()
you can see output here.
I reckon I'm asking for something trivial. I want the text labels to "follow" stacked bars. Labels are placed correctly on the y-axis, but how to position them correctly on x-axis?
Is this what you want?
library(ggplot2)
ggplot(bar) +
geom_col(aes(variable, `(all)`, fill = ustanova), position = "dodge") +
geom_text(aes(variable, `(all)`, label = sprintf("%2.1f", `(all)`), group = ustanova),
position = position_dodge(width = .9)) +
coord_flip()
The key is to position = position_dodge(width = .9) (where .9 is the default width of the bars) instead of position = "dodge", which is just a shortcut without any parameter. Additionally you have to set the group=ustanova aesthetic in geom_text to dodge the labels by ustanova (A second option would be to make fill = ustanova a global aesthetic via ggplot(bar, aes(fill = ustanova))
In ggplot2_2.0.0 you find several examples in ?geom_text on how to position geom_text on dodged or stacked bars (the code chunk named "# Aligning labels and bars"). The Q&A What is the width argument in position_dodge? provides a more thorough description of the topic.

Resources