I'd like to set the minimum bounds for a violin plot, similar to this question: set only lower bound of a limit for ggplot
For this:
p <- ggplot(somedf, aes(factor(user1), pq)) + aes(ymin = -50)
p + geom_violin(aes(fill = user1))+ aes(ymin=-50)
I've tried adding
+ expand_limits(y=-50)
and
+ aes(ymin = -50)
to set lower bounds with no effect.
Here's a sample dataframe that results in the same problem:
structure(list(pq = c(-20L, -12L, 10L, -13L, 11L, -16L), time = c(1214.1333,
1214.1833, 1214.2667, 1214.2833, 1214.35, 1214.5167), pq.1 = c(-20L,
-12L, 10L, -13L, 11L, -16L), time.1 = c(1214.1333, 1214.1833,
1214.2667, 1214.2833, 1214.35, 1214.5167), time.2 = c(1214.1333,
1214.1833, 1214.2667, 1214.2833, 1214.35, 1214.5167), pq.2 = c(-20L,
-12L, 10L, -13L, 11L, -16L), user1 = structure(c(1L, 1L, 2L,
1L, 2L, 1L), .Label = c("someguy3", "someguy4", "someguy6", "someguy4",
"someguy5", "someguy6"), class = "factor"), pq.3 = c(-20L, -12L, 10L,
-13L, 11L, -16L), time.3 = c(1214.1333, 1214.1833, 1214.2667,
1214.2833, 1214.35, 1214.5167), user1.1 = structure(c(1L, 1L,
2L, 1L, 2L, 1L), .Label = c("someguy3", "someguy4", "someguy6",
"someguy4", "someguy5", "someguy6"), class = "factor")), .Names = c("pq",
"time", "pq.1", "time.1", "time.2", "pq.2", "user1", "pq.3",
"time.3", "user1.1"), row.names = c(565L, 566L, 568L, 569L, 570L,
574L), class = "data.frame")
ggplot will pay attention to the aes() directive if you add a call to geom_blank().
## A reproducible example
library(ggplot2)
p <- ggplot(mtcars, aes(factor(cyl), mpg))
## This doesn't work:
p + aes(ymin = -10) + geom_violin()
## But this does:
p + aes(ymin = -10) + geom_violin() + geom_blank()
(Note: For this example at least, expand_limits(y = -10) works with or without an accompanying call to geom_blank().)
Related
I've made a bar chart using ggplot with grouped data, and facetted with facet_grid. The column widths are inconsistent, so I want to make them all the same. I've read this can be done with preserve="single, but it seems to mess up the position dodging. Any idea how to prevent this happening??
Here is a small sample of the data:
data <- structure(list(grp2 = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
7L, 7L, 7L, 7L, 7L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L,
7L), .Label = c("CSF1", "CSF2", "PC", "NC", "GPC", "GNC", "standard"
), class = "factor"), label2 = structure(c(7L, 8L, 9L, 7L, 8L,
9L, 7L, 15L, 15L, 15L, 15L, 15L, 7L, 8L, 9L, 7L, 8L, 9L, 7L,
15L, 15L, 15L, 15L, 15L), .Label = c("CSF1_raw", "CSF1_supernatant",
"CSF1_pellet", "CSF2_raw", "CSF2_supernatant", "CSF2_pellet",
"PC_raw", "PC_supernatant", "PC_pellet", "NC_raw", "NC_supernatant",
"NC_pellet", "GPC", "GNC", "standard", "NC"), class = "factor"),
mda_label = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 1L, 1L, 1L, 1L
), .Label = c("none", "mda_20", "mda_200"), class = "factor"),
conc = c(`7` = 0, `8` = 0, `9` = 0.324886127298521, `55` = 4.14765656994934,
`56` = 1.16840050032707, `57` = 8.33529714053568, `76` = 10.6220645144775,
`77` = 48.9241552191721, `78` = 4.51513315624087, `79` = 1.03887911533275,
`80` = 0.0445944796011582, `81` = 0.00484116548901831, `89` = 0,
`90` = 0, `91` = 0.322922569348207, `137` = 6.38488684568018,
`138` = 1.68909814271646, `139` = 7.61828609738757, `158` = 15.3082130743032,
`159` = 41.3127531345335, `160` = 4.64193087683391, `161` = 0.411672491030815,
`162` = 0.0568193835425769, `163` = 0.00439419098560105)), row.names = c(NA,
-24L), class = c("tbl_df", "tbl", "data.frame"))
Here's the initial plot:
ggplot(data, aes(x=label2, y=conc, colour=mda_label, fill=mda_label)) +
facet_grid(. ~ grp2, scales="free_x", space="free") +
stat_summary(fun = mean, geom = "bar", position = position_dodge()) +
stat_summary(fun.data = mean_se, geom = "errorbar", colour="black", width=0.5,
position = position_dodge(width=0.9)) +
geom_point(position = position_dodge(width=0.9), pch=21, colour="black") +
scale_y_continuous(trans='pseudo_log',
labels = scales::number_format(accuracy=0.01)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
But when I try to standardise the column widths with preserve="single", it gets messed up:
ggplot(data, aes(x=label2, y=conc, colour=mda_label, fill=mda_label)) +
facet_grid(. ~ grp2, scales="free_x", space="free") +
stat_summary(fun = mean, geom = "bar", position = position_dodge(preserve="single")) +
stat_summary(fun.data = mean_se, geom = "errorbar", colour="black", width=0.5,
position = position_dodge(width=0.9, preserve="single")) +
geom_point(position = position_dodge(width=0.9, preserve="single"), pch=21, colour="black") +
scale_y_continuous(trans='pseudo_log',
labels = scales::number_format(accuracy=0.01)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Since you're using data that as 0 values, you could make the 0 values for the other 'mda_label' on grp2/label2 standard categories.
data <- rbind(data, data.frame(grp2 = c("standard", "standard"),
label2 = c("standard", "standard"),
mda_label = c("mda_20", "mda_200"),
conc = c(0, 0)))
Also you never actually make the bar plot
data %>%
ggplot(aes(label2, conc, fill = mda_label)) +
geom_col(position = position_dodge(width = 1)) +
facet_grid(. ~ grp2, scales = "free", space = "free")
This might seem a really stupid mistake on my part but whenever I specify geom_point depending on a factor and choose a hollow point and a solid point (shapes 1 and 19), and plot error bars, it crosses the point.
Here are my data frames:
> dput(head(allbins.sum))
structure(list(T = c(0L, 0L, 10L, 10L, 20L, 20L), treatment = structure(c(1L,
2L, 1L, 2L, 1L, 2L), .Label = c("control bead", "dP bead"), class = "factor"),
N = c(3, 3, 3, 3, 3, 3), cellsBase = c(0, 0, 0.013028995209506,
0.135599858885737, -0.0130289952095061, 0.759359209760127
), sd = c(0, 0, 0.0597063567767786, 0.0469731690178533, 0.0983667566897066,
0.183436089048999), se = c(0, 0, 0.034471481157405, 0.0271199717771474,
0.0567920734541125, 0.105906875391532), ci = c(0, 0, 0.148318812500416,
0.116687820597672, 0.244356569875469, 0.455680506502609),
bin = c("BinA", "BinA", "BinA", "BinA", "BinA", "BinA")), .Names = c("T",
"treatment", "N", "cellsBase", "sd", "se", "ci", "bin"), row.names = c(NA,
6L), class = "data.frame")
> dput(head(allbins.fitdata))
structure(list(wellvidbin = structure(c(1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("A1-002-BinA", "A1-002-BinB", "A1-002-BinC", "A1-031-BinA",
"A1-031-BinB", "A1-031-BinC", "A3-004-BinA", "A3-004-BinB", "A3-004-BinC",
"B1-032-BinA", "B1-032-BinB", "B1-032-BinC", "B4-026-BinA", "B4-026-BinB",
"B4-026-BinC", "C4-027-BinA", "C4-027-BinB", "C4-027-BinC"), class = "factor"),
treatment = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("control bead",
"dP bead"), class = "factor"), wellvid = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("A1-002", "A1-031", "A3-004",
"B1-032", "B4-026", "C4-027"), class = "factor"), bin = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("BinA", "BinB", "BinC"), class = "factor"),
T = c(0L, 10L, 20L, 30L, 40L, 50L), T.factor = structure(1:6, .Label = c("0",
"10", "20", "30", "40", "50", "60"), class = "factor"), cells = c(7L,
11L, 26L, 27L, 28L, 36L), cellsS = c(-1.36568429306349, -1.20296446240061,
-0.592765097414793, -0.552085139749072, -0.511405182083351,
-0.185965520757582), cellsBase = c(0, 0.162719830662884,
0.772919195648701, 0.813599153314422, 0.854279110980143,
1.17971877230591), treatT = structure(c(2L, 4L, 6L, 8L, 10L,
12L), .Label = c("control bead.0", "P bead.0", "control bead.10",
"P bead.10", "control bead.20", "P bead.20", "control bead.30",
"P bead.30", "control bead.40", "P bead.40", "control bead.50",
"P bead.50", "control bead.60", "P bead.60"), class = "factor"),
fit = c(0.0285939715820639, 0.304399288764407, 0.58020460594675,
0.856009923129092, 1.13181524031144, 1.40762055749378), se.fit = c(0.157415367032567,
0.132348142293459, 0.114707848741265, 0.108190467052118,
0.114707848741265, 0.132348142293459), upr = c(0.337128090965895,
0.563801647659587, 0.805031989479629, 1.06806323855124, 1.35664262384431,
1.66702291638896), lwr = c(-0.279940147801767, 0.0449969298692267,
0.35537722241387, 0.643956607706942, 0.906987856778556, 1.1482181985986
)), .Names = c("wellvidbin", "treatment", "wellvid", "bin",
"T", "T.factor", "cells", "cellsS", "cellsBase", "treatT", "fit",
"se.fit", "upr", "lwr"), class = c("data.table", "data.frame"
), row.names = c(NA, -6L), .internal.selfref = <pointer: 0x0000000000100788>)
And the code:
ggplot(data=allbins.sum, aes(x=T, y=cellsBase, shape=treatment)) + geom_point(size=5, aes(shape=treatment))+
geom_errorbar(aes(ymin=cellsBase-se, ymax=cellsBase+se), width=2, size=1) +
geom_smooth(data=allbins.fitdata, size=1, aes(y=fit, ymin=lwr, ymax=upr),
color="black", method="lm", stat="identity", alpha=0.2)+
facet_grid(bin~.) +
scale_shape_manual(values=c(1, 19))
This gives me this plot:
Any hints on how to have the hollow circles to be hollowed?
I also tried specifying geom_shape (aes(fill=treatment) and then scale_fill_manual but then it is also applied to my geom_smooth
Thanks for the help!
If you mean that you don't want the line of the error bar to be visible through the 'hollow' points, then plot geom_errorbar first, then plot geom_point second, with solid fill, so it will overlay the error bar.
ggplot(data=allbins.sum, aes(x=T, y=cellsBase)) +
# plotting this first
geom_errorbar(aes(ymin=cellsBase-se, ymax=cellsBase+se), width=2, size=1) +
# plotting this second, with a hollow fillable shape, and black outline
geom_point(size=5, shape = 21, color='black',
aes(fill = treatment)) +
# solid black and solid white fill for the points
scale_fill_manual(values = c('black', 'white')) +
theme_bw()
(The data you posted only has these points for allbins.sum, and the code for allbins.fitdata has an error, so no error bars on this plot)
I've made a group plot of time series with ggplot with this syntax:
ggplot(Tur_flow, aes(x=time, group=parameter, colour=parameter))
+ geom_point(aes(y=value), size=1)
+ stat_smooth(aes(y=value), method=lm)
+ facet_grid(parameter ~ Section, scale="free_y")
+ theme_minimal()
+ theme(text = element_text(size=16))
dput(head(Tur_flow))
structure(list(Section = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("S-5", "S-50", "S+5", "S+50"), class = "factor"), parameter = structure(c(3L,
3L, 3L, 3L, 3L, 3L), .Label = c("Discharge", "Mean_Velocity",
"T_15", "T_25", "T_65", "Water_Depth"), class = "factor"), time = structure(c(6L, 13L, 20L, 27L, 34L, 41L), .Label = c("11:59:55", "11:59:56",
"11:59:58", "11:59:59", "12:00:00", "12:00:02", "12:00:05", "12:00:55",
"12:00:56", "12:00:58", "12:00:59", "12:01:00", "12:01:01", "12:01:05",
"12:01:55", "12:01:56............. "8.30", "8.31", "8.41", "8.54", "8.94", "800.31", "822.01", "828.77", "839.30", "846.11", "847.60", "8497.25", "894.21", "91.66", "91.67", "91.68", "91.90", "92.08", "92.23", "92.54", "93.23", "974.50", "N/A"), class = "factor")), .Names = c("Section", "parameter",
"time", "value"), row.names = c(NA, 6L), class = "data.frame")
How can I reduce the interval of both x and y axis? I mean spacing the axes? The x_axis data is time?
On y-axis how can I reduce decimal numbers?
I don't know what I'm missing but I cannot figure out a very simple task. This is a small piece of my dataframe:
dput(df)
structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "SOU55", class = "factor"), Depth = c(2L, 4L,
6L, 8L, 10L, 12L, 14L, 16L, 18L, 20L), Value = c(211.8329815,
278.9603866, 255.6111086, 212.6163368, 193.7281895, 200.9584658,
160.9289157, 192.0664419, 174.5951019, 7.162682425)), .Names = c("ID",
"Depth", "Value"), class = "data.frame", row.names = c(NA, -10L
))
What I'm trying to do is simply plotting Depth versus Value with ggplot, this is the simple code:
ggplot(df, aes(Value, Depth))+
geom_point()+
geom_line()
and this the result:
But it is pretty different from what I really want. This is the plot made with Libreoffice:
It seems that ggplot doesn't link correctly the values. What am I doing wrong?
Thanks to all!
You need geom_path() to connect the observations in the original order. geom_line() sorts the data according to the x-aesthetic before plotting:
ggplot(df, aes(Value, Depth))+
geom_point()+
geom_path()
I am graphing some data with ggplot. However, I don't understand the error I'm getting with slightly different data than data that I can graph successfully. For example, this data graphs successfully:
to_graph <- structure(list(Teacher = c("BS", "BS", "FA"
), Level = structure(c(2L, 1L, 1L), .Label = c("BE", "AE", "ME",
"EE"), class = "factor"), Count = c(2L, 25L, 28L)), .Names = c("Teacher",
"Level", "Count"), row.names = c(NA, 3L), class = "data.frame")
ggplot(data=to_graph, aes(x=Teacher, y=Count, fill=Level), ordered=TRUE) +
geom_bar(aes(fill = Level), position = 'fill') +
scale_y_continuous("",formatter="percent") +
scale_fill_manual(values = c("#FF0000", "#FFFF00","#00CC00", "#0000FF")) +
opts(axis.text.x=theme_text(angle=45)) +
opts(title = "Score Distribution")
But this does not:
to_graph <- structure(list(School = c(84351L, 84384L, 84385L, 84386L, 84387L,
84388L, 84389L, 84397L, 84398L, 84351L, 84384L, 84385L, 84386L,
84387L, 84388L, 84389L, 84397L, 84398L, 84351L, 84386L), Level = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 3L, 3L), .Label = c("BE", "AE", "ME", "EE"), class = "factor"),
Count = c(3L, 7L, 5L, 4L, 3L, 4L, 4L, 6L, 2L, 116L, 138L,
147L, 83L, 76L, 81L, 83L, 85L, 53L, 1L, 1L)), .Names = c("School",
"Level", "Count"), row.names = c(NA, 20L), class = "data.frame")
ggplot(data=to_graph, aes(x=School, y=Count, fill=Level), ordered=TRUE) +
geom_bar(aes(fill = Level), position = 'fill') +
scale_y_continuous("",formatter="percent") +
scale_fill_manual(values = c("#FF0000", "#FFFF00","#00CC00", "#0000FF")) +
opts(axis.text.x=theme_text(angle=90)) +
opts(title = "Score Distribution")
With the latter code, I get this error:
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this. Error in if (!all(data$ymin == 0)) warning("Filling not well
defined when ymin != 0") : missing value where TRUE/FALSE needed
Anyone know what's going on here? Thank you!
The error occurs because your x variable has numerical values, when in reality you want them to be discrete, i.e. use x=factor(School).
The reason for this is that stat_bin, the default stat for geom_bar, will try to summarise for each unique value of x. When your x-variable is numeric, it tries to summarise at each integer in the range. This is clearly not what you need.
ggplot(data=to_graph, aes(x=factor(School), y=Count, fill=Level), ordered=TRUE) +
geom_bar(aes(fill = Level), position='fill') +
opts(axis.text.x=theme_text(angle=90)) +
scale_y_continuous("",formatter="percent") +
opts(title = "Score Distribution") +
scale_fill_manual(values = c("#FF0000", "#FFFF00","#00CC00", "#0000FF"))