Drawing a bar chart using bin sizes - r

Arrival_Frequency Total_Arrival
0-1 2633586
2-4 223079
4-7 5281
7+ 1718
How to get bar plot for this. If use normal geom_bar() it gives the count not the total.

Do you want this?
library(scales)
library(tidyverse)
ggplot(df, aes(x=Arrival_Frequency, y=Total_Arrival))+
geom_bar(position=position_dodge(), stat="identity") +
scale_y_continuous(labels = label_number()) +
ylab("Total Arrival") + xlab("Arrival Frequency")
Your values are wide apart. So, you can think of transforming the values like
ggplot(df, aes(x=Arrival_Frequency, y=Total_Arrival))+
geom_col() +
scale_y_continuous(trans = "log", labels = label_number()) +
ylab("log (Total Arrival)") + xlab("Arrival Frequency")

Related

How do I sort the bars when the barchart represents the number of occurances?

I draw a barchart in R:
ggplot(data, aes(x=rating, fill=rating)) +
geom_bar(stat="count") +
ggtitle("Rating in stories")+
coord_flip()+
xlab("rating")+
ylab("number of stories")+
theme(legend.position="none")
The result is here.
The bars represent the amount of times the specific value (M, T, K or K+) occurs in the rating variable.
How do I sort the bars decreasingly?
OK, I found what I was looking for. I needed to use fct_rev(fct_infreq()) on the variable.
ggplot(data, aes(forcats::fct_rev(fct_infreq(rating)), fill=rating)) +
geom_bar(stat = "count") +
ggtitle("Rating in stories")+
coord_flip()+
xlab("rating")+
ylab("number of stories")+
theme(legend.position="none")

ggplot histogram: present both overall count in addition to group count in each bin

I am trying to generate a histogram using ggplot which on the x axis has speeds and on the y axis has the counts. In addition, each bin shows how many of those were during the day and night.
I need to present the counts themselves on the plot. I managed to add the counts within each bar but now I would like to present another number, the total count, on top of each bar. Is that possible?
This is my code:
ggplot(aes(x = speedmh ) , data = GPSdataset1hDFDS48) +
geom_histogram(aes(fill=DayActv), bins=15, colour="grey20", lwd=0.2) + ylim(0, 400) +xlim(0,500)+
stat_bin(bins=15, geom="text", colour="white", size=3.5,
aes(label=..count.., group=DayActv), position=position_stack(vjust=0.5))
and this is the result I get:
How do I add the total count of speeds within each bin to the top of every bar?
Ideally I would like to make this histogram of proportions of speeds instead of counts, but I think that is too complicated for me at the moment.
Thank you!!
Mia
One way is to add another stat_bin command without the grouping:
library(ggplot2)
ggplot(aes(x = speedmh) , data = GPSdataset1hDFDS48) +
geom_histogram(aes(fill=DayActv), bins=15, colour="grey20", lwd=0.2) + ylim(0, 400) +
xlim(0,500) +
stat_bin(bins=15, geom="text", colour="white", size=3.5,
aes(label=..count.., group=DayActv), position=position_stack(vjust=0.5)) +
stat_bin(bins=15, geom="text", colour="black", size=3.5,
aes(label=..count..), vjust=-0.5)
Data:
GPSdataset1hDFDS48 <- data.frame(speedmh=rexp(1000, 0.015), DayActv=factor(sample(0:1, 1000,TRUE)))

Change tickmark labels in ggplot2 [duplicate]

I would like to show a short time series showing heterogeneity of heroin seizures in Europe over the span of 22 years. However there are different amount countries included in some of the years. I would like to display this in the graph by putting "n=xx" for each year on the x-axis. Does anyone know how I should do this?
across_time<- ggplot(by_year, aes(year, value) +
geom_errorbar(aes(ymin=value-se, ymax=value+se), width=.4) +
geom_line(colour="black", size= 2) +
geom_point(size=4, shape=21, fill="white") + # 21 is filled circle
xlab("Year") +
ylab("Siezures") +
ggtitle("Hetrogeniety Across Time") +
scale_x_continuous(breaks = round(seq(min(1990), max(2012), by=2)))
across_time
Here is a link to what the graph looks like:
http://imgur.com/XWhBqqi
I found this as a solution:
#make a list of the lables you want
lab<- c("1990\nn=26", "1991\nn=29", "1992\nn=30", "1993\nn=32", "1994\nn=36", "1995\nn=35", "1996\nn=33", "1997\nn=38", "1998\nn=36", "1999\nn=39", "2000\nn=39", "2001\nn=40", "2002\nn=38", "2003\nn=40", "2004\nn=39", "2005\nn=41", "2006\nn=42", "2007\nn=43", "2008\nn=44", "2009\nn=41", "2010\nn=41", "2011\nn=41", "2012\nn=42")
lab<- as.factor(lab)
#bind our label list to our table
by_year<-cbind(lab, by_year)
#make a column of zeros to group by for the line
by_year$g<- 0
# Standard error of the mean
across_time<- ggplot(by_year, aes(x=lab, y=value)) +
geom_errorbar(aes(ymin=value-se, ymax=value+se), width=.4) +
geom_line(aes(group=g), colour="black", size= 2) + #notice the grouping
geom_point(size=4, shape=21, fill="white") + # 21 is filled circle
scale_x_discrete(labels = by_year$lab) + # discrete not continuous
xlab("Year & Number of Reporting Countries") +
ylab("Total Annual Seizures") +
ggtitle("Heterogeneity of Heroin Seizures in Europe")
across_time
Here is the final result:
Have you tried using the label argument in scale_x_continuous? If you have a vector with the "xx" you want as labels this should work.

Equal geom point size in legend in multiple plots with ggplot

Is there a way to equalise the size of geom_points throughout multiple plots, so that they are easily comparable?
ie. I want the size of a 100 value to be equal throughout the plots, regardless of the minimum and maximum value that makes up the size values. As seen below, the size of geom_points are the same, but they represent different values.
graph <- ggplot(mar, aes(x=long, y=lat)) + xlab("Longitude") + ylab("Latitude")
graph + theme_grey() + geom_point(aes(size=distance$NEAR_DIST)) + scale_size_area() + labs(size = "Distance from predicted LCP Roman\nroad to known Roman road (m)")
Thanks!
You could achieve that as follows:
df1 =data.frame(x=1:20,y=runif(20,1,10),size=runif(20,1,10))
df2 =data.frame(x=1:20,y=runif(20,1,10),size=runif(20,31,40))
maximum = max(c(df1$size,df2$size))
graph <- ggplot(df1, aes(x=x, y=y,size=size)) + geom_point() +
scale_size_area(limits=c(1,maximum))
graph2 <- ggplot(df2, aes(x=x, y=y,size=size)) + geom_point() +
scale_size_area(limits=c(1,maximum))
Hope this helps!

ggplot2: Shift the baseline of barplot (geom_bar) to the minimum data value

I'm trying to generate a bar plot using geom_bar. My bars have both negative and positive values:
set.seed(1)
df <- data.frame(y=log(c(runif(6,0,1),runif(6,1,10))),se=runif(12,0.05,0.1),name=factor(rep(c("a","a","b","b","c","c"),2),levels=c("a","b","c")),side=factor(rep(1:2,6),levels=1:2),group=factor(c(rep("x",6),rep("y",6)),levels=c("x","y")),stringsAsFactors=F)
This plot command plots the positive bars to face up and the negative ones to face down:
library(ggplot2)
dodge <- position_dodge(width=0.9)
limits <- aes(ymax=y+se,ymin=y-se)
ggplot(df,aes(x=name,y=y,group=interaction(side,name),col=group,fill=group))+facet_wrap(~group)+geom_bar(width=0.6,position=position_dodge(width=1),stat="identity")+
geom_bar(position=dodge,stat="identity")+geom_errorbar(limits,position=dodge,width=0.25)
My question is how do I set the base line to the minimum of all bars instead of at 0 and therefre have the red bars facing up?
You can subtract min(df$y) from each value so that the data are shifted to a baseline of zero, but then relabel the y-axis to the actual values of the points. The code to do it is below, but I wouldn't recommend this. It seems confusing to have bars emanating from a non-zero baseline, as the lengths of the bars no longer encode the magnitudes of the y values.
ggplot(df, aes(x=name,y=y - min(y),group=interaction(side, name), col=group, fill=group)) +
facet_wrap(~group) +
geom_bar(position=dodge, stat="identity", width=0.8) +
geom_errorbar(aes(ymin=y-se-min(y), ymax=y+se-min(y)),
position=dodge, width=0.25, colour="black") +
scale_y_continuous(breaks=0:4, labels=round(0:4 + min(df$y), 1)) +
geom_hline(aes(yintercept=0))
Another option is to use geom_linerange which avoids having to shift the y-values and relabel the y-axis. But this suffers from the same distortions as the bar plot above:
ggplot(df, aes(x=name, group=interaction(side, name), col=group, fill=group)) +
facet_wrap(~group) +
geom_linerange(aes(ymin=min(y), ymax=y, x=name, xend=name), position=dodge, size=10) +
geom_errorbar(aes(ymin=y-se, ymax=y+se), position=dodge, width=0.25, colour="black") +
geom_hline(aes(yintercept=min(y)))
Instead, it seems to me points would be more intuitive and natural than bars here:
ggplot(df, aes(x=name,y=y,group=interaction(side, name), col=group, fill=group)) +
facet_wrap(~group) +
geom_hline(yintercept=0, lwd=0.4, colour="grey50") +
geom_errorbar(limits, position=dodge, width=0.25) +
geom_point(position=dodge)
This simple hack also works:
m <- min(df$y) # find min
df$y <- df$y - m
ggplot(df,aes(x=name,y=y,group=interaction(side,name),col=group,fill=group))+
facet_wrap(~group)+
geom_bar(width=0.6,position=position_dodge(width=1),stat="identity")+
geom_bar(position=dodge,stat="identity")+
geom_errorbar(limits,position=dodge,width=0.25) +
scale_y_continuous(breaks=seq(min(df$y), max(df$y), length=5),labels=as.character(round(seq(m, max(df$y+m), length=5),2))) # relabel
I ran into the same problem and discovered you can also easily do this using geom_crossbar.
As long as color and fill are the same you don't see the break in the crossbar (set with y aesthetic) so they look exactly like bars.
library(ggplot2)
dodge <- position_dodge(width=0.9)
limits <- aes(ymax = y+se, ymin = y-se)
df$ymin <- min(df$y)
ggplot(df, aes(x = name, ymax = y, y = y, ymin = ymin, group = interaction(side,name), col = group, fill = group)) +
facet_wrap(~group) +
geom_crossbar(width=0.6,position=position_dodge(width=1),stat="identity") +
geom_errorbar(limits, color = 'black', position = dodge, width=0.25)
ggplot output

Resources