Annotate group on stacked bar graph - r

How could I add "Division" label on top of the bars themselves in this example of a stacked bar chart?
ggplot2 and a Stacked Bar Chart with Negative Values
I only want to show it for values with space (don't want to overcrowd the figure), so maybe this could be implemented by a minimum bar height. How could I do it for only bars with that minimum height?
Thanks!

You can use geom_text() which comes with a check_overlap parameter -- see ?geom_text():
dat <- read.table(text = " Division Year OperatingIncome
1 A 2012 11460
2 B 2012 7431
3 C 2012 -8121
4 D 2012 15719
5 E 2012 364
6 A 2011 12211
7 B 2011 6290
8 C 2011 -2657
9 D 2011 14657
10 E 2011 1257
11 A 2010 12895
12 B 2010 5381
13 C 2010 -2408
14 D 2010 11849
15 E 2010 517",header = TRUE,sep = "",row.names = 1)
ggplot(dat, aes(x = Year, y = OperatingIncome, fill = Division)) +
geom_col() +
geom_text(aes(label = Division),
position = position_stack(vjust = 0.5),
check_overlap = TRUE)
In the example, however, you will see that the labels do not overlap.

Related

ggplot2 stacked bar with negative values not working with Plotly

I tried the code in the answer in this previous thread, ggplot2 and a Stacked Bar Chart with Negative Values.
dat <- read.table(text = " Division Year OperatingIncome
1 A 2012 11460
2 B 2012 7431
3 C 2012 -8121
4 D 2012 15719
5 E 2012 364
6 A 2011 12211
7 B 2011 6290
8 C 2011 -2657
9 D 2011 14657
10 E 2011 1257
11 A 2010 12895
12 B 2010 5381
13 C 2010 -2408
14 D 2010 11849
15 E 2010 517",header = TRUE,sep = "",row.names = 1)
dat1 <- subset(dat,OperatingIncome >= 0)
dat2 <- subset(dat,OperatingIncome < 0)
plot <- ggplot() +
geom_bar(data = dat1, aes(x=Year, y=OperatingIncome, fill=Division),stat = "identity") +
geom_bar(data = dat2, aes(x=Year, y=OperatingIncome, fill=Division),stat = "identity") +
scale_fill_brewer(type = "seq", palette = 1)
ggplotly(plot)
Here is what I'm getting:
If I run plot(plot) then it works fine:
How do I fix the issue in Plotly?
For future readers
Nowadays, plotly (I am using 4.8.0) supports stacked barcharts with negative values. In the layout you have to set barmode=relative. Moreover, you can also use the ggplotly functionality posted in the question.
plot_ly(dat, y=~OperatingIncome, x=~Year, type='bar', name=~Division, color =~Division,
colors='Blues', marker=list(line=list(width=1, color='lightgray'))) %>%
layout(barmode = 'relative')
Will return:

Ordering a 2 bar plot in R

I have a data set as below and I have created a graph with below code as suggested in a previous question. What I want to do is order the bars by rankings rather than team names. Is that possible to do in ggplot?
Team Names PLRankingsReverse Grreserve
Liverpool 20 20
Chelsea 19 19
Manchester City 15 18
Arsenal 16 17
Tottenham 18 16
Manchester United 8 15
Everton 10 14
Watford 13 13
Burnley 17 12
Southampton 9 11
WBA 11 10
Stoke 4 9
Bournemouth 12 8
Leicester 7 7
Middlesbrough 14 6
C. Palace 6 5
West Ham 1 4
Hull 3 3
Swansea 5 2
Sunderland 2 1
And here is the code:
alldata <- read.csv("premierleague.csv")
library(ggplot2)
library(reshape2)
alldata <- melt(alldata)
ggplot(alldata, aes(x = Team.Names, y= value, fill = variable), xlab="Team Names") +
geom_bar(stat="identity", width=.5, position = "dodge")
Thanks for the help!
In this case you need to sort your data frame prior to melting and capture the order. You can then use this to set the limit order on scale_x_discrete, or you can factor Team Name in your aes string.
Using factor:
ordr <- order(alldata$`Team Names`, alldata$PLRankingsReverse, decreasing = TRUE)
alldata <- melt(alldata)
ggplot(alldata, aes(x = factor(`Team Name`, ordr), y = value, fill = variable) +
labs(x = "Team Name") +
geom_bar(stat = "identity", width = .5, position = "dodge")
Using scale_x_discrete:
ordr <- alldata$`Team Name`[order(alldata$PLRankingsReverse, decreasing = TRUE)]
alldata <- melt(alldata)
ggplot(alldata, aes(x = `Team Name`, y = value, fill = variable) +
labs(x = "Team Name") +
geom_bar(stat = "identity", width =. 5, position = "dodge") +
scale_x_discrete(limits = ordr)

How to plot the availability of a variable by year?

year <- c(2000:2014)
group <- c("A","A","A","A","A","A","A","A","A","A","A","A","A","A","A",
"B","B","B","B","B","B","B","B","B","B","B","B","B","B","B",
"C","C","C","C","C","C","C","C","C","C","C","C","C","C","C")
value <- sample(1:5, 45, replace=TRUE)
df <- data.frame(year,group,value)
df$value[df$value==1] <- NA
year group value
1 2000 A NA
2 2001 A 2
3 2002 A 2
...
11 2010 A 2
12 2011 A 3
13 2012 A 5
14 2013 A NA
15 2014 A 3
16 2000 B 2
17 2001 B 3
...
26 2010 B NA
27 2011 B 5
28 2012 B 4
29 2013 B 3
30 2014 B 5
31 2000 C 5
32 2001 C 4
33 2002 C 3
34 2003 C 4
...
44 2013 C 5
45 2014 C 3
Above is the sample dataframe for my question.
Each group (A,B or C), has value from 2000 to 2014, but in some years, the value might be missing for some of the groups.
The graph I would like to plot is as below:
x-axis is year
y-axis is group (i.e. A, B & C should be showed on y-lab)
the bar or line represent the value availability of each group
If the value is NA, then the bar would not show at that time point.
ggplot2 is preferred if possible.
Can anyone help?
Thank you.
I think my description is confusing. I am expecting a graph like below, BUT the x-axis would be year. And the bar or line represents the availability of the value for a given group across the year.
In the sample dataframe of group A, we have
2012 A 5
2013 A NA
2014 A 3
Then there should be nothing at the point of group A in 2013, and then a dot would be presented at the point of group A in 2014.
You can use the geom_errorbar, with no range (geom_errorbarh for horizontal). Then just subset for complete.cases (or !is.na(df$value))
library(ggplot2)
set.seed(10)
year <- c(2000:2014)
group <- c("A","A","A","A","A","A","A","A","A","A","A","A","A","A","A",
"B","B","B","B","B","B","B","B","B","B","B","B","B","B","B",
"C","C","C","C","C","C","C","C","C","C","C","C","C","C","C")
value <- sample(1:5, 45, replace=TRUE)
df <- data.frame(year,group,value)
df$value[df$value==1] <- NA
no_na_df <- df[complete.cases(df), ]
ggplot(no_na_df, aes(x=year, y = group)) +
geom_errorbarh(aes(xmax = year, xmin = year), size = 2)
Edit:
To get a countious bar, you can use this slightly unappealing method. It is nesessary to make a numeric representation of the group data, to give the bars a width. Thereafter, we can make the scale represent the variables as discrete again.
df$group_n <- as.numeric(df$group)
no_na_df <- df[complete.cases(df), ]
ggplot(no_na_df, aes(xmin=year-0.5, xmax=year+0.5, y = group_n)) +
geom_rect(aes(ymin = group_n-0.1, ymax = group_n+0.1)) +
scale_y_discrete(limits = levels(df$group))

Grouped barplot in ggplot2 in R

I would like to make a grouped bar plot. An example of my data is as follows:
site code year month gear total value
678490 2012 3 GL 13882
678490 2012 4 GL 50942
678490 2012 5 GL 54973
678490 2012 6 GL 63938
678490 2012 7 GL 23825
678490 2012 8 GL 8195
678490 2012 9 GL 14859
678490 2012 9 RT 3225
678490 2012 10 GL 981
678490 2012 10 RT 19074
678490 2012 11 SD 106384
678490 2012 11 RT 2828
678490 2012 12 GL 107167
678490 2012 12 RT 4514
There are 17 site code options, four year options, twelve month options, and four gear options.
What I would to produce is a plot per site, per year, showing the 'total value' for each gear, for each month, as a bar.
So far I have managed to produce a plot, specific to site and year, but with the total values displayed in one bar per month, not separated into separate bars per month (can not include image in first post!)
But for months 9, 10, 11 and 12 there were two gears used so I want there to be two bars for these months.
I am using the following piece of code:
ggplot(subset(cdata, year %in% c("2012") & site code %in% c("678490")),
aes(x = factor(month), y = total value)) +
geom_bar(stat = "identity") +
labs(x = "Month", y = "Total value")
Any help on this would be greatly appreciated.
If you want separate bars for each gear, then you should add fill=gear to the aes in geom_bar:
ggplot(cdata[cdata$year==2012 & cdata$sitecode==678490,],
aes(x = factor(month), y = totalvalue, fill=gear)) +
geom_bar(stat = "identity", position="dodge") +
labs(x = "Month", y = "Total value")
this gives:
When you want to make a plot per site, per year, showing the 'total value' for each gear, for each month, as a bar, you can use facet_grid. For example:
ggplot(cdata, aes(x = factor(month), y = totalvalue, fill=gear)) +
geom_bar(stat = "identity", position="dodge") +
labs(x = "Month", y = "Total value") +
facet_grid(sitecode ~ year)
this gives:
Some additional comments:
It's probably better not to use spaces in your column names (in the code above I removed the spaces)
Add an example to your question which illustrative for the problem you are facing. In this case, it's better to give an example dataset that includes several sitecodes and several years.
I therefore made up some data:
df1 <- read.table(text="sitecode year month gear totalvalue
678490 2012 3 GL 13882
678490 2012 4 GL 50942
678490 2012 5 GL 54973
678490 2012 6 GL 63938
678490 2012 7 GL 23825
678490 2012 8 GL 8195
678490 2012 9 GL 14859
678490 2012 9 RT 3225
678490 2012 10 GL 981
678490 2012 10 RT 19074
678490 2012 11 SD 106384
678490 2012 11 RT 2828
678490 2012 12 GL 107167
678490 2012 12 RT 4514", header= TRUE)
df2 <- df1
df2$sitecode <- 7849
df2$year <- 2013
df3 <- df1
df3$sitecode <- 7849
df4 <- df1
df4$year <- 2013
cdata <- rbind(df1,df2,df3,df4)

Changing X-axis values in Time Series plot with R

I'm a newer R user and I need help with a time series plot. I created a time series plot, and cannot figure out how to change my x-axis values to correspond to my sample dates. My data is as follows:
Year Month Level
2009 8 350
2009 9 210
2009 10 173
2009 11 166
2009 12 153
2010 1 141
2010 2 129
2010 3 124
2010 4 103
2010 5 69
2010 6 51
2010 7 49
2010 8 51
2010 9 51
Let's say this data is saved as the name "data.csv"
data = read.table("data.csv", sep = ",", header = T)
data.ts = ts(data, frequency = 1)
plot(dat.mission.ts[, 3], ylab = "level", main = "main", axes = T)
I've also tried inputing the start = c(2009, 8) into the ts function but I still get wrong values
When I plot this my x axis does not correlate to August 2009 through Sept. 2010. It will either increase by year or just by decimal. I've looked up many examples online and also through the ? help on R, but cannot find a way to relabel my axis values. Any help would be appreciated.
Using base coding, you can accomplish this in a few steps. As described in this SO answer, you can identify your "Month" and "Year" data as a date if you use as.Date and paste functions together and incorporate a day (i.e., first day of the month; "1"). For the purposes of this answer, I will simply refer to the data you provided as df:
df$date<-with(df,as.Date(paste(Year,Month,'1',sep='-'),format='%Y-%m-%d'))
df
Year Month Level date
1 2009 8 350 2009-08-01
2 2009 9 210 2009-09-01
3 2009 10 173 2009-10-01
4 2009 11 166 2009-11-01
5 2009 12 153 2009-12-01
6 2010 1 141 2010-01-01
7 2010 2 129 2010-02-01
8 2010 3 124 2010-03-01
9 2010 4 103 2010-04-01
10 2010 5 69 2010-05-01
11 2010 6 51 2010-06-01
12 2010 7 49 2010-07-01
13 2010 8 51 2010-08-01
14 2010 9 51 2010-09-01
Then you can use your basic plot, axis, and mtext functions to control how you want to visualize the data and your axes. For instance:
xmin<-min(df$date,na.rm=T);xmax<-max(df$date,na.rm=T) #ESTABLISH X-VALUES (MIN & MAX)
ymin<-min(df$Level,na.rm=T);ymax<-max(df$Level,na.rm=T) #ESTABLISH Y-VALUES (MIN & MAX)
xseq<-seq.Date(xmin,xmax,by='1 month') #CREATE DATE SEQUENCE THAT INCREASES BY MONTH FROM DATE MINIMUM TO MAXIMUM
yseq<-round(seq(0,ymax,by=50),0) # CREATE SEQUENCE FROM 0-350 BY 50
par(mar=c(1,1,0,0),oma=c(6,5,3,2)) #CONTROLS YOUR IMAGE MARGINS
plot(Level~date,data=df,type='b',ylim=c(0,ymax),axes=F,xlab='',ylab='');box() #PLOT LEVEL AS A FUNCTION OF DATE, REMOVE AXES FOR FUTURE CUSTOMIZATION
axis.Date(side=1,at=xseq,format='%Y-%m',labels=T,las=3) #ADD X-AXIS LABELS WITH "YEAR-MONTH" FORMAT
axis(side=2,at=yseq,las=2) #ADD Y-AXIS LABELS
mtext('Date (Year-Month)',side=1,line=5) #X-AXIS LABEL
mtext('Level',side=2,line=4) #Y-AXIS LABEL
library(data.table)
library(ggplot2)
library(scales)
data<-data.table(datetime=seq(as.POSIXct("2009/08/01",format="%Y/%m/%d"),
as.POSIXct("2010/09/01",format="%Y/%m/%d"),by="1 month"),
Level=c(350,210,173,166,153,141,129,124,103,69,51,49,51,51))
ggplot(data)+
geom_point(aes(x=datetime,y=Level),col="brown1",size=1)+
scale_x_datetime(labels = date_format("%Y/%m"),breaks = "1 month")+
theme(axis.text.x = element_text(angle = 90, hjust = 1,vjust=0.3))
Example using xts package:
library(xts)
ts1 <- xts(data$Level, as.POSIXct(sprintf("%d-%d-01", data$Year, data$Month)))
# or ts1 <- xts(data$Level, as.yearmon(data$Year + (data$Month-1)/12))
plot(ts1)
If you are using ggplot2:
library(ggplot2)
autoplot(ts1)

Resources