Adding space between bars in ggplot2 - r

I'd like to add spaces between bars in ggplot2. This page offers one solution: http://www.streamreader.org/stats/questions/6204/how-to-increase-the-space-between-the-bars-in-a-bar-plot-in-ggplot2. Instead of using factor levels for the x-axis groupings, however, this solution creates a numeric sequence, x.seq, to manually place the bars and then scales them using the width() argument. width() doesn't work, however, when I use factor level groupings for the x-axis as in the example, below.
library(ggplot2)
Treatment <- rep(c('T','C'),each=2)
Gender <- rep(c('M','F'),2)
Response <- sample(1:100,4)
df <- data.frame(Treatment, Gender, Response)
hist <- ggplot(df, aes(x=Gender, y=Response, fill=Treatment, stat="identity"))
hist + geom_bar(position = "dodge") + scale_y_continuous(limits = c(0,
100), name = "")
Does anyone know how to get the same effect as in the linked example, but while using factor level groupings?

Is this what you want?
hist + geom_bar(width=0.4, position = position_dodge(width=0.5))
width in geom_bar determines the width of the bar.
width in position_dodge determines the position of each bar.
Probably you can easily understand their behavior after you play with them for a while.

Related

Ggplot2 Boxplot width setting changes x-axis

I have produced a boxplot with a continuous x-axis unsing geom_boxplot() in ggplot2. However, as there are many boxes they appear as skinny lines. Another stackoverflow chain (see here) suggested using the width= argument to make all the boxes the same width. However, when I use this argument it changes the x-axis and some of the boxes just disappear!
For example, take this example dataframe. I apologise for the number of observations this has but I think the quantity has to do with the problem as I couldn't reproduce it with a more simple boxplot:
Lat<- c(50.70228,50.70228,50.70228,51.82067,51.82067,51.82067,52.45893,52.45893,52.45893,52.76478,52.76478,52.76478,52.78354,52.78354,52.78354,53.56102,53.56102,53.56102,53.65364,53.65364,53.65364,53.63130,53.63130,53.63130,54.19035,54.19035,54.19035,54.25751,54.25751,54.25751,54.23526,54.23526,54.23526,54.62469,54.62469,54.62469,54.67831,54.67831,54.67831,54.67900,54.67900,54.67900,54.94908,54.94908,54.94908,55.19456,55.19456,55.19456,54.79198,54.79198,54.79198,55.34981,55.34981,55.34981,55.85655,55.85655,55.85655,56.06078,56.06078,56.06078,55.84553,55.84553,55.84553,56.00197,56.00197,56.00197,56.71842,56.71842,56.71842,57.00116,57.00116,57.00116,57.06942,57.06942,57.06942,57.26815,57.26815,57.26815,57.45532,57.45532,57.45532,57.88596,57.88596,57.88596,51.07711,51.07711,51.07711,51.07801,51.07621,51.11159,51.11159,51.11159,52.02484,52.02484,52.02484,52.02581,52.02581,52.02581,52.02685,52.02685,52.02685,52.05353,52.05353,52.05626,52.05353,52.05353,52.05353,52.05353,52.05353,52.05353,51.93541,51.93541,51.93541,51.93541,51.93541,51.93541,51.93541,51.93541,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.90810,52.90810,52.90810,52.90810,52.90810,52.90810,52.78968,52.78778,52.78968,52.78968,52.78881,52.78883,52.78883,52.78883,52.78970,52.78970,52.79506,52.79506,52.79506,53.77270,53.77276,53.77109,53.77109,53.77276,53.76845,53.76845,53.77109,53.76845,53.77109,53.87020,53.87020,53.87020,53.87103,53.88205,53.88205,53.88205,53.88205,53.87701,53.87701,53.87098,53.87098,53.87098,53.86932,53.86932,53.86932,56.51869,56.51869,56.51869,56.55870,56.55870,56.55870,56.55964,56.55964,56.55964,57.51056,57.49542,57.49542,57.50878,57.50878,57.50878,57.45201,57.45477,57.45192,57.45192,57.45192)
y <- c(33.45407,21.40954,27.73487,20.38318,26.65483,31.68201,23.95467,20.77363,32.94192,22.71228,25.78824,28.39449,35.60615,24.29325,22.95047,25.65343,30.23262,22.05534,37.20565,35.53812,38.20211,39.38034,35.16619,38.82336,29.72370,38.25754,26.51339,39.38283,29.57483,31.80111,24.52967,34.83037,21.75038,35.50868,39.41830,21.96971,22.82504,32.69746,35.10747,27.75669,34.96690,37.61921,37.17226,20.50448,39.26582,22.08668,28.41502,36.69530,23.69404,23.18052,33.27420,23.04157,33.17285,32.00579,21.83845,22.97143,32.27190,21.53771,38.65481,20.14341,33.62718,39.86755,39.77881,30.59810,27.65909,24.11646,34.56981,29.30249,34.99361,32.39553,28.90443,34.88775,22.77049,36.44468,30.64496,35.81501,31.77673,24.19058,39.36298,21.47219,23.02268,31.37647,27.28457,33.14749,23.20842,39.73427,39.81399,35.51515,24.55080,39.41190,29.59987,38.46791,20.94479,37.22109,26.36060,30.91641,39.25975,39.88288,22.59061,30.24439,21.66110,30.36878,28.76901,38.75561,33.80408,31.05842,26.18921,21.30804,35.02966,33.85981,30.84373,31.67341,35.07605,37.93820,31.30481,21.45117,37.13626,25.70964,25.64736,38.58381,31.24448,26.55902,23.90817,33.70300,26.48909,37.73200,32.52413,22.44440,28.19878,32.46415,25.13711,26.66075,28.16254,20.40673,39.89327,30.83327,32.40196,39.81218,39.80391,21.87316,34.95792,33.38958,38.18441,22.03114,35.64410,34.90643,24.23056,36.66581,29.35813,20.86880,30.02044,36.13727,24.65558,39.43175,29.00154,29.78185,22.89196,37.15204,35.88188,28.73920,28.04934,37.50701,30.36306,28.39842,35.20973,26.54260,29.57763,26.03163,26.90440,27.60110,25.80086,39.98019,21.59970,28.83825,32.01711,20.50812,38.43331,32.41898,27.68722,32.59905,24.18150,29.05701,22.38512,32.93342,37.66694,37.65391,34.19613,23.89985,36.90012,20.74244,27.08511,29.21433,35.83771,35.59557,33.74533,27.08854,38.38994)
V3 <-c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2)
df <- as.data.frame(cbind(Lat, y, as.factor(V3)))
head(df)
I plot it on a continuous x-axis as so:
df_plot <- ggplot(df, aes(x=Lat, y=y, group=Lat))+
geom_boxplot(aes(colour=as.factor(V3)))+
theme_classic()
df_plot
Which produces:
As you can see the boxes are represented as skinny lines.
Therefore I tried to use the width= argument as so:
df_plot2 <- ggplot(df, aes(x=Lat, y=y, group=Lat))+
geom_boxplot(aes(colour=as.factor(V3)), width=1)+
theme_classic()
df_plot2
The output is:
The main thing to notice here is that the x-axis range has suddenly changed! Some of the boxes are no longer plotted whilst others seem to be placed at different values of the x-axis.
The range of the x-axis should be:
range(df$Lat)
[1] 50.70228 57.88596
I am completley perplexed as to why the x-axis would change by simply adding the width= argument in geom_boxplot(). I therefore tried to force the limits of the x-axis scale as so:
df_plot3 <- ggplot(df, aes(x=Lat, y=y, group=Lat))+
geom_boxplot(aes(colour=as.factor(V3)), width=1)+
xlim(50,58)+
theme_classic()
df_plot3
ouput:
Please send help!
I think the strange behaviour comes from ggplot trying to automatically dodge your boxplots apart. By setting position = position_dodge(width = 0) the plot seems to be created as expected without changing the placement of boxes along the x-axis. (But gives a warning about overlapping x intervals)
Lat<- c(50.70228,50.70228,50.70228,51.82067,51.82067,51.82067,52.45893,52.45893,52.45893,52.76478,52.76478,52.76478,52.78354,52.78354,52.78354,53.56102,53.56102,53.56102,53.65364,53.65364,53.65364,53.63130,53.63130,53.63130,54.19035,54.19035,54.19035,54.25751,54.25751,54.25751,54.23526,54.23526,54.23526,54.62469,54.62469,54.62469,54.67831,54.67831,54.67831,54.67900,54.67900,54.67900,54.94908,54.94908,54.94908,55.19456,55.19456,55.19456,54.79198,54.79198,54.79198,55.34981,55.34981,55.34981,55.85655,55.85655,55.85655,56.06078,56.06078,56.06078,55.84553,55.84553,55.84553,56.00197,56.00197,56.00197,56.71842,56.71842,56.71842,57.00116,57.00116,57.00116,57.06942,57.06942,57.06942,57.26815,57.26815,57.26815,57.45532,57.45532,57.45532,57.88596,57.88596,57.88596,51.07711,51.07711,51.07711,51.07801,51.07621,51.11159,51.11159,51.11159,52.02484,52.02484,52.02484,52.02581,52.02581,52.02581,52.02685,52.02685,52.02685,52.05353,52.05353,52.05626,52.05353,52.05353,52.05353,52.05353,52.05353,52.05353,51.93541,51.93541,51.93541,51.93541,51.93541,51.93541,51.93541,51.93541,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.92425,52.90810,52.90810,52.90810,52.90810,52.90810,52.90810,52.78968,52.78778,52.78968,52.78968,52.78881,52.78883,52.78883,52.78883,52.78970,52.78970,52.79506,52.79506,52.79506,53.77270,53.77276,53.77109,53.77109,53.77276,53.76845,53.76845,53.77109,53.76845,53.77109,53.87020,53.87020,53.87020,53.87103,53.88205,53.88205,53.88205,53.88205,53.87701,53.87701,53.87098,53.87098,53.87098,53.86932,53.86932,53.86932,56.51869,56.51869,56.51869,56.55870,56.55870,56.55870,56.55964,56.55964,56.55964,57.51056,57.49542,57.49542,57.50878,57.50878,57.50878,57.45201,57.45477,57.45192,57.45192,57.45192)
y <- c(33.45407,21.40954,27.73487,20.38318,26.65483,31.68201,23.95467,20.77363,32.94192,22.71228,25.78824,28.39449,35.60615,24.29325,22.95047,25.65343,30.23262,22.05534,37.20565,35.53812,38.20211,39.38034,35.16619,38.82336,29.72370,38.25754,26.51339,39.38283,29.57483,31.80111,24.52967,34.83037,21.75038,35.50868,39.41830,21.96971,22.82504,32.69746,35.10747,27.75669,34.96690,37.61921,37.17226,20.50448,39.26582,22.08668,28.41502,36.69530,23.69404,23.18052,33.27420,23.04157,33.17285,32.00579,21.83845,22.97143,32.27190,21.53771,38.65481,20.14341,33.62718,39.86755,39.77881,30.59810,27.65909,24.11646,34.56981,29.30249,34.99361,32.39553,28.90443,34.88775,22.77049,36.44468,30.64496,35.81501,31.77673,24.19058,39.36298,21.47219,23.02268,31.37647,27.28457,33.14749,23.20842,39.73427,39.81399,35.51515,24.55080,39.41190,29.59987,38.46791,20.94479,37.22109,26.36060,30.91641,39.25975,39.88288,22.59061,30.24439,21.66110,30.36878,28.76901,38.75561,33.80408,31.05842,26.18921,21.30804,35.02966,33.85981,30.84373,31.67341,35.07605,37.93820,31.30481,21.45117,37.13626,25.70964,25.64736,38.58381,31.24448,26.55902,23.90817,33.70300,26.48909,37.73200,32.52413,22.44440,28.19878,32.46415,25.13711,26.66075,28.16254,20.40673,39.89327,30.83327,32.40196,39.81218,39.80391,21.87316,34.95792,33.38958,38.18441,22.03114,35.64410,34.90643,24.23056,36.66581,29.35813,20.86880,30.02044,36.13727,24.65558,39.43175,29.00154,29.78185,22.89196,37.15204,35.88188,28.73920,28.04934,37.50701,30.36306,28.39842,35.20973,26.54260,29.57763,26.03163,26.90440,27.60110,25.80086,39.98019,21.59970,28.83825,32.01711,20.50812,38.43331,32.41898,27.68722,32.59905,24.18150,29.05701,22.38512,32.93342,37.66694,37.65391,34.19613,23.89985,36.90012,20.74244,27.08511,29.21433,35.83771,35.59557,33.74533,27.08854,38.38994)
V3 <-c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2)
library(ggplot2)
df <- as.data.frame(cbind(Lat, y, as.factor(V3)))
df_plot <- ggplot(df) +
geom_boxplot(aes(colour=as.factor(V3), x=Lat, y=y, group=as.factor(Lat)),
position=position_dodge(width = 0),
width=1) +
theme_classic()

How to reduce binwidth in geom_bar for one single bar?

I'm trying to get a side-by-side bar plot using ggplot's geom_bar(). Here's some sample data I made up for replication purposes:
dat <- data.frame("x"=c(rep(c(1,2,3,4,5),5)),
"by"=c(NA,0,0,0,0,NA,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1))
I want to plot "x" grouped by "by". Now, because I don't need to plot NA values, I filtered for !is.na(by))
library(dplyr)
dat <- filter(dat, !is.na(by))
Now for the plot:
library(ggplot2)
ggplot(dat, aes(x=x, fill=as.factor(by))) + geom_bar(position="dodge") + theme_tufte()
This returns what I need; almost. Unfortunately, the first bar looks really weird, because it's binwidth is twice as high (due to the fact that there are no zeros in "by" for "x"==1).
Is there a way to reduce the binwidth for the first bar back to "normal"?
You could also do it like this. Precalculate the table and use geom_col.
ggplot(as.data.frame(table(dat)), aes(x = x, y = Freq, fill = by)) +
theme_bw() +
geom_col(position = "dodge")
Never mind, I just figured out that you can manipulate the binwidth argument using an ifelse statement.
...geom_bar(..., binwidth = ifelse("by"==1 & is.na("x"), .5, 1)))
So if you play around with this, it will work. At least it worked for me.

Horizontal geom_bar with no overlaps, equal bar widths, and customized axis tick labels

Probably a simple ggplot2 question.
I have a data.frame with a numeric value, a categorical (factor) value, and a character value:
library(dplyr)
set.seed(1)
df <- data.frame(log10.p.value=c(-2.5,-2.5,-2.5,-2.39,-2,-1.85,-1.6,-1.3,-1.3,-1),
direction=sample(c("up","down"),10,replace = T),
label=paste0("label",1:10),stringsAsFactors = F) %>% dplyr::arrange(log10.p.value)
df$direction <- factor(df$direction,levels=c("up","down"))
I want to plot these data as a barplot using geom_bar, where the bars are horizontal and their lengths are determined by df$log10.p.value, their color by df$direction, and the y-axis tick labels are df$label, where the bars are vertically ordered by df$log10.p.value.
As you can see df$log10.p.value are not unique, hence:
ggplot(df,aes(log10.p.value))+geom_bar(aes(fill=direction))+theme_minimal()+coord_flip()+ylab("log10(p-value)")+xlab("")
Gives me:
How do I:
Make the bars not overlap each other.
Have the same width.
Be separated by a small margin?
Have the y-axis tick labels be df$label?
Thanks
Here is one possible solution. Please note that, by default, geom_bar determines the bar length using frequency/count. So, you need to specify stat = "identity" for value mapping.
# since all of your values are negative the graph is on the left side
ggplot(df, aes(x = label, y = log10.p.value, fill = direction)) +
geom_bar(stat = "identity") +
theme_minimal() +
coord_flip() +
ylab("log10(p-value)") +
xlab("")

Remove space between bars in ggplot2 geom_bar

I am looking to "dodge" the bars of a barplot together. The following R code leaves white space between the bars. Other answers like this one show how to accomplish this for the bars part of a group, but that does not seem to apply for distinct bars per factor on the x axis.
require(ggplot2)
dat <- data.frame(a=c("A", "B", "C"), b=c(0.71, 0.94, 0.85), d=c(32, 99, 18))
ggplot(dat, aes(x= a, y = b, fill=d, width = d/sum(d))) +
geom_bar(position=position_dodge(width = 0.1), stat="identity")
Playing with the width variable changes the appearance, but it does not seem possible to get the bars to sit side by side while still retaining their meaningful difference in width (in this graph redundantly represented by the fill colour too).
I would generate my x-positions and widths first, then pass them in to the aesthetics and override to make your factor labels:
First, store the width
dat$width <-
dat$d / sum(dat$d)
Then, assuming that your data.frame is in the order you want it plotted, you can set the location as the cumulative sum of the widths. Note, however, that that cumulative sum is where you want the right edge of the bar to be, so to get the center you need to subtract half of the width:
dat$loc <-
cumsum(dat$width) - dat$width/2
Then, pass it all in to the ggplot call, setting your labels explictly:
ggplot(dat, aes(x= loc, y = b, fill=d, width = width)) +
geom_bar(stat="identity") +
scale_x_continuous(breaks = dat$loc
, labels = dat$a)
gives
I am not sure about the advisability of this appproach, but this should get the job done.
It is possible by using a continuous x axis and relabel it.
ggplot(dat, aes(x=cumsum(d/sum(d))) - d/sum(d)/2, y = b, fill=d, width=d/sum(d))) +
geom_bar(stat="identity", position=position_dodge()) +
scale_x_continuous(breaks=cumsum(dat$d/sum(dat$d)) - dat$d/sum(dat$d)/2, labels=dat$a)
Or isn't this what you where looking for

ggplot2 histogram of factors showing the probability mass instead of count

I am trying to use the excellent ggplot2 using the bar geom to plot the probability mass rather than the count. However, using aes(y=..density..) the distribution does not sum to one (but is close). I think the problem might be due to the default binwidth for factors. Here is an example of the problem,
age <- c(rep(0,4), rep(1,4))
mppf <- c(1,1,1,0,1,1,0,0)
data.test <- as.data.frame(cbind(age,mppf))
data.test$age <- as.factor(data.test$age)
data.test$mppf <- as.factor(data.test$mppf)
p.test.density <- ggplot(data.test, aes(mppf, group=age, fill=age)) +
geom_bar(aes(y=..density..), position='dodge') +
scale_y_continuous(limits=c(0,1))
dev.new()
print(p.test.density)
I can get around this problem by keeping the x-variable as continuous and setting binwidth=1, but it doesn't seem very elegant.
data.test$mppf.numeric <- as.numeric(data.test$mppf)
p.test.density.numeric <- ggplot(data.test, aes(mppf.numeric, group=age, fill=age)) +
geom_histogram(aes(y=..density..), position='dodge', binwidth=1)+
scale_y_continuous(limits=c(0,1))
dev.new()
print(p.test.density.numeric)
I think you almost have it figured out, and would have once you realized you needed a bar plot and not a histogram.
The default width for bars with categorical data is .9 (See ?stat_bin. The help page for geom_bar doesn't give the default bar width but does send you to stat_bin for further reading.). Given that, your plots show the correct density for a bar width of .9. Simply change to a width of 1 and you will see the density values you expected to see.
ggplot(data.test, aes(x = mppf, group = age, fill = age)) +
geom_bar(aes(y=..density..), position = "dodge", width = 1) +
scale_y_continuous(limits=c(0,1))

Resources