Multiple colour scales in one stacked bar plot using ggplot - r

I created a stacked bar plot which depicts the distribution of council seats (= y-axis) among parties within a municipality over several years (= x-axis). The used code and some data is below. Unfortunately, I don't have yet sufficient points to post the graph.
The different parties are also associated with a variable called "ideology" as a category for different political orientations ("progressive", "moderate", "conservative").
I would like to modify the colors in such a way that all e.g. conservative parties have different kinds of blues; all progressive parties different kinds of green; and all moderate parties e.g. different kinds of red;
The variable on the ideology is in the same dataframe (y).
Any hint how to get this modification? I already tried color=factor(ideology) and group=ideology but to no avail. I am also aware of this related entry Using a pre-defined color palette in ggplot however it doesn't pertain specifically to my problem.
Many thanks.
Used command:
municipality.plot <- ggplot(y, aes(x=as.factor(year), y=seats, fill=party, color=party)) +
geom_bar(bandwidth=1, stat="identity", group="party", position="fill") +
labs(x="year", y="% of seats for municipality")
Sample data:
year district.id party seats ideology
1 2012 127 Stranka Pravde I Razvoja Bosne I Hercegovine 1 p
2 2012 127 Savez Za Bolju Buducnost (SBB) 3 p
3 2008 127 Stranka Demokratske Akcije (SDA) 13 p
4 2004 127 Stranka Demokratske Akcije (SDA) 14 p
5 2008 127 Hrvatska Demokratska Zajednica (HDZ) 1 c
6 2008 127 Stranka Pravde I Razvoja Bosne I Hercegovine 1 p
7 2012 127 Stranka Za Bosnu I Hercegovinu (SzBiH) 4 p
8 2000 127 Socijaldemokratska Partija (SDP) 8 m
9 2012 127 Narodna Stranka Radom Za Boljitak (NSRzB) 2 m
10 2012 127 Socijaldemokratska Unija Bih (SDU) 1 p
11 2000 127 Koalicija - SDA, SBiH 15 p
12 2008 127 Socijaldemokratska Partija (SDP) 5 m
13 2008 127 Narodna Stranka Radom Za Boljitak (NSRzB) 1 m
14 2008 127 Koalicija - LDS, SDU 2 m
15 2000 127 Lgk-liberalno-gradanska Koalicija Bih (liberali Bih, Gds Bih) 1 m
16 2000 127 Nova Hrvatska Inicijativa (NHI) 1 c
17 1997 127 Socijaldemokratska Partija (SDP) 3 m
18 2012 127 Socijaldemokratska Partija (SDP) 6 m
19 2004 127 Stranka Za Bosnu I Hercegovinu (SzBiH) 5 p
20 1997 127 Bosanskohercegovacka Patriotska Stranka (BPS) 9 p
21 2000 127 Bosanskohercegovacka Patriotska Stranka (BPS) 3 p
22 2008 127 Stranka Za Bosnu I Hercegovinu (SzBiH) 4 p
23 1997 127 Hrvatska Demokratska Zajednica (HDZ) 5 c
24 2000 127 Hrvatska Demokratska Zajednica (HDZ) 2 c
25 2012 127 Stranka Demokratske Akcije (SDA) 10 p
26 2004 127 Socijaldemokratska Partija (SDP) 6 m
27 1997 127 Koalicija - SDA, SBiH, Liberali, GDS 13 p

# load relevant packages
library(scales)
library(grid)
library(ggplot2)
library(plyr)
# assume your data is called df
# order data by year, ideology and party
df2 <- arrange(df, year, ideology, party)
########################################
# create one colour palette per ideology
# count number of parties per ideology
tt <- with(df2[!duplicated(df2$party), ], table(ideology))
# conservative parties blues
# progressive parties green
# moderate parties red
blue <- brewer_pal(pal = "Blues")(tt[names(tt) == "c"])
green <- brewer_pal(pal = "Greens")(tt[names(tt) == "p"])
red <- brewer_pal(pal = "Reds")(tt[names(tt) == "m"])
# create data on party and ideology
party_df <- df2[!duplicated(df2$party), c("party", "ideology")]
# set levels of ideologies; c, p, m
party_df$ideology <- factor(party_df$ideology, levels = c("c", "p", "m"))
# order by ideology and party
party_df <- arrange(party_df, ideology, party)
# add fill colours
party_df$fill <- c(blue, green, red)
# set levels of parties based on the order of parties in party_df
party_df$party <- factor(party_df$party, levels = party_df$party)
# use same factor levels for parties in df2
df2$party <- factor(df2$party, levels = party_df$party)
##################################
# Alternative 1. Plot with one legend
g1 <- ggplot(data = df2, aes(x = as.factor(year),
y = seats,
fill = party)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "year", y = "% of seats for municipality") +
coord_cartesian(ylim = c(0, 1)) +
scale_fill_manual(values = party_df$fill, name = "Parties") +
theme_classic()
g1
#####################################3
# alt 2. Plot with separate legends for each ideology
# create separate plots for each ideology to get legends
# conservative parties blue
cons <- ggplot(data = df2[df2$ideology == "c", ],
aes(x = as.factor(year),
y = seats,
fill = party)) +
geom_bar(stat = "identity", position = "fill") +
scale_fill_manual(values = blue, name = "Conservative parties" )
# extract 'cons' legend
tmp <- ggplot_gtable(ggplot_build(cons))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend_cons <- tmp$grobs[[leg]]
# progressive parties green
prog <- ggplot(data = df2[df2$ideology == "p", ],
aes(x = as.factor(year),
y = seats,
fill = party)) +
geom_bar(stat = "identity", position = "fill") +
scale_fill_manual(values = green, name = "Progressive parties" )
# extract 'prog' legend
tmp <- ggplot_gtable(ggplot_build(prog))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend_prog <- tmp$grobs[[leg]]
# moderate parties red
mod <- ggplot(data = df2[df2$ideology == "m", ],
aes(x = as.factor(year),
y = seats,
fill = party)) +
geom_bar(stat = "identity", position = "fill") +
scale_fill_manual(values = red, name = "Moderate parties" )
# extract 'mod' legend
tmp <- ggplot_gtable(ggplot_build(mod))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend_mod <- tmp$grobs[[leg]]
#######################################
# arrange plot and legends
# define plotting regions (viewports) for plot and legends
vp_plot <- viewport(x = 0.25, y = 0.5,
width = 0.5, height = 1)
vp_legend_cons <- viewport(x = 0.66, y = 0.87,
width = 0.5, height = 0.15)
vp_legend_prog <- viewport(x = 0.7, y = 0.55,
width = 0.5, height = 0.60)
vp_legend_mod <- viewport(x = 0.75, y = 0.2,
width = 0.5, height = 0.30)
# clear current device
grid.newpage()
# add objects to the viewports
# plot without legend
print(g1 + theme(legend.position = "none"), vp = vp_plot)
upViewport(0)
# legends
pushViewport(vp_legend_cons)
grid.draw(legend_cons)
upViewport(0)
pushViewport(vp_legend_prog)
grid.draw(legend_prog)
upViewport(0)
pushViewport(vp_legend_mod)
grid.draw(legend_mod)

Related

geom_text() to label two separate points from different plots in ggplot

I am trying to create individual plots facetted by 'iid' using 'facet_multiple', in the following dataset (first 3 rows of data)
iid Age iop al baseIOP baseAGE baseAL agesurg
1 1 1189 20 27.9 21 336 24.9 336
2 2 877 11 21.5 16 98 20.3 98
3 2 1198 15 21.7 16 98 20.3 98
and wrote the following code:
# Install gg_plus from GitHub
remotes::install_github("guiastrennec/ggplus")
# Load libraries
library(ggplot2)
library(ggplus)
# Generate ggplot object
p <- ggplot(data_longF1, aes(x = Age, y = al)) +
geom_point(alpha = 0.5) +
geom_point(aes(x= baseAGE, y=baseAL)) +
labs(x = 'Age (days)',
y = 'Axial length (mm)',
title = 'Individual plots of Axial length v time')
p1 <- p+geom_vline(aes(xintercept = agesurg),
linetype = "dotted",
colour = "red",
size =1.0)
p2<- p1 + geom_text(aes(label=iop ,hjust=-1, vjust=-1))
p3 <- p2 + geom_text(aes(label = baseIOP, hjust=-1, vjust=-1))
# Plot on multiple pages (output plot to R/Rstudio)
facet_multiple(plot = p3,
facets = 'iid',
ncol = 1,
nrow = 1,
scales = 'free')
The main issue I am having is labeling the points. The points corresponding to (x=age, y=axl) get labelled fine, but labels for the second group of points (x=baseIOP, y=baseAL) gets put in the wrong place.individual plot sample
I have had a look at similar issues in Stack Overflow e.g. ggplot combining two plots from different data.frames
But not been able to correct my code.
Thanks for your help
You need to define the x and y coordinates for the labels or they will default to the last ones specified.
Thus the geom_text() definitions should look something like:
data_longF1 <-read.table(header=TRUE, text="iid Age iop al baseIOP baseAGE baseAL agesurg
1 1 1189 20 27.9 21 336 24.9 336
2 2 877 11 21.5 16 98 20.3 98
3 2 1198 15 21.7 16 98 20.3 98")
# Generate ggplot object
p <- ggplot(data_longF1, aes(x = Age, y = al)) +
geom_point(alpha = 0.5) +
geom_point(aes(x= baseAGE, y=baseAL)) +
labs(x = 'Age (days)',
y = 'Axial length (mm)',
title = 'Individual plots of Axial length v time')
p1 <- p+geom_vline(aes(xintercept = agesurg),
linetype = "dotted",
colour = "red",
size =1.0)
#Need to specify the x and y coordinates or will default to the last ones defined
p2<- p1 + geom_text(aes(x=Age, y= al, label=iop ,hjust=-1, vjust=-1))
p3 <- p2 + geom_text(aes(x=baseAGE, y= baseAL, label = baseIOP, hjust=-1, vjust=-1))
print(p3)

how to colour a funnel plot in ggplot R

I have drawn the attached funnel plot in ggplot, But I have 2 questions:
Is there any way to make the coloured green dot bigger (only that one);
is there any way to colour the upper and lower part of the confidence intervals?
This is what I am able to make so far:
Thank you!
The data set I am working on:
df <-
read.table(text = "
school_id year sdq_emotional
1060 7 4
1060 7 5
1060 7 7
1060 7 6
1060 7 4
1060 7 7
1060 7 8
1115 7 5
1115 7 9
1115 7 3
1136 7 1
1136 7 8
1136 7 5
1136 7 9
1135 7 4
1139 7 7
1139 7 3
2371 7 6
2371 7 3
2372 7 4
2372 7 1
2378 7 6
2378 7 7
2378 7 5", header=TRUE)
My code as follows:
# Format the data
df1 <- plyr::count(df, c('school_id'))
df2 <- merge(df,df1, by= c("school_id"))
df <- df2
M3 <- aggregate(df$sdq_emotional[df$freq > 10], by=list(df$school_id[df$freq > 10]),mean,na.rm=T)
S3 <- aggregate(df$sdq_emotional[df$freq > 10], by=list(df$school_id[df$freq > 10]),nona)
CG_PLOT1 <- merge(M3,S3,by="Group.1")
names(CG_PLOT1) <- c("School","Mean","Size")
LINE3 <- data.frame(M3=rep(mean(df$sdq_emotional,na.rm=T),max(CG_PLOT1$Size)+25),
SD3=rep(sd(df$sdq_emotional,na.rm=T),max(CG_PLOT1$Size)+25),
N3=sqrt(1:(max(CG_PLOT1$Size)+25)))
ID <- 1060
filling3 <- rep("white",nrow(CG_PLOT1))
filling3[CG_PLOT1$School ==ID]<-"green"
# Build the graph
ggplot(data = CG_PLOT1) +
geom_line(data = LINE3, aes(x = 1:(max(CG_PLOT1$Size) + 25),
y = M3 + qnorm(0.975) * SD3 / N3), size = 1, colour = "steelblue2",
linetype = 5) +
geom_line(data = LINE3, aes(x = 1:(max(CG_PLOT1$Size) + 25),
y = M3 - qnorm(0.975) * SD3 / N3), size = 1, colour = "steelblue2",
linetype = 5) +
geom_segment(xend = max(CG_PLOT1$Size)+25,yend=mean(LINE3$M3,na.rm=T)),
aes(x = 1, y = mean(LINE3$M3,na.rm=T), size=1, colour="steelblue2") +
geom_point(data = CG_PLOT1, aes(x = Size, y = Mean), size = 2,
colour = "black", shape = 21,fill = filling3) +
ylim(0, 8)
thank you very much!
As you didn't provide a reproducible example, I have used this question as a template for your problem:
Creating a dataset here:
library(ggplot2)
set.seed(101)
x <- runif(100, min=1, max=10)
y <- rnorm(length(x), mean=5, sd=0.1*x)
df <- data.frame(x=x*70, y=y)
m <- lm(y ~ x, data=df)
fit95 <- predict(m, interval="conf", level=.95)
fit99 <- predict(m, interval="conf", level=.999)
df <- cbind.data.frame(df,
lwr95=fit95[,"lwr"], upr95=fit95[,"upr"],
lwr99=fit99[,"lwr"], upr99=fit99[,"upr"])
To add a colour background to the funnel plot, we can use the geom_ribbon function within ggplot to fill the area between a ymin and ymax. In this case, we will use the data used to construct each of the lines:
ggplot(df, aes(x, y)) +
# Add background
geom_ribbon(ymin= df$upr99, ymax = Inf, fill = "#e2a49a", alpha = 0.5) +
geom_ribbon(ymin = df$lwr99, ymax = df$upr99, fill = "#e0ba9d", alpha = 0.5 ) +
geom_ribbon(ymin = 0, ymax = df$lwr99, fill = "#8fd6c9", alpha = 0.5 ) +
# Overlay points and lines
geom_point() +
geom_smooth(method="lm", colour="black", lwd=1.1, se=FALSE) +
geom_line(aes(y = upr95), color="black", linetype=2) +
geom_line(aes(y = lwr95), color="black", linetype=2) +
geom_line(aes(y = upr99), color="red", linetype=3) +
geom_line(aes(y = lwr99), color="red", linetype=3)
labs(x="No. admissions...", y="Percentage of patients...")
As for changing the size of one point, you can check out the answer here. I would recommend subsetting the data to extract the one point, and then add another layer for the geom_point and then changing the size and colour argument of the new layer`

reorder ggplot with geom_bar(stat="identity")

I have prepare a dataframe and use a ggplot on him. But the initial order is not respected. How i can respect this order ?
Patient Nb_peptides Type_affinite
1 22 563 a
2 22 1040 b
3 22 11139 c
4 24 489 a
5 24 1120 b
6 24 11779 c
7 13 467 a
8 13 1239 b
9 13 14600 c
g_plot <- ggplot(data = nb_peptides_type,
aes(x = reorder(Patient, True_order),
y = Nb_peptides,
fill = Type_affinite)) +
geom_bar(stat = "identity")
print(g_plot)
Please provide stand-alone code to make it easier.
I would use levels outside of your plot to reorder factor levels : Is it what you're looking for ?
## fake dataframe
df <- data.frame(patient = as.factor(rep((21:30), each=3)),
nb = rpois(30, 1000),
type=sample(letters[1:3], replace =T, size =30))
## initial plot
ggplot(data = df,
aes(x = patient,
y = nb,
fill = type)) +
geom_bar(stat = "identity")
## adjust factors levels
True_order <- sample((21:30), 10)
levels(df$patient) <- True_order
## re-plot
ggplot(data = df,
aes(x = patient,
y = nb,
fill = type)) +
geom_bar(stat = "identity")

ggplot2: reorder bars from highest to lowest in each facet [duplicate]

This question already has answers here:
ggplot bar plot with facet-dependent order of categories
(4 answers)
Closed 5 years ago.
In the df below, I want to reorder bars from highest to lowest in each facet
I tried
df <- df %>% tidyr::gather("var", "value", 2:4)
ggplot(df, aes (x = reorder(id, -value), y = value, fill = id))+
geom_bar(stat="identity")+facet_wrap(~var, ncol =3)
It gave me
It didn't order the bars from highest to lowest in each facet.
I figured out another way to get what I want. I had to plot each variable at a time, then combine all plots using grid.arrange()
#I got this function from #eipi10's answer
#http://stackoverflow.com/questions/38637261/perfectly-align-several-plots/38640937#38640937
#Function to extract legend
# https://github.com/hadley/ggplot2/wiki/Share-a-legend-between-two-ggplot2-graphs
g_legend<-function(a.gplot) {
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)
}
p1 <- ggplot(df[df$var== "A", ], aes (x = reorder(id, -value), y = value, fill = id))+
geom_bar(stat="identity") + facet_wrap(~var, ncol =3)
fin_legend <- g_legend(p1)
p1 <- p1 + guides(fill= F)
p2 <- ggplot(df[df$var== "B", ], aes (x = reorder(id, -value), y = value, fill = id))+
geom_bar(stat="identity") + facet_wrap(~var, ncol =3)+guides(fill=FALSE)
p3 <- ggplot(df[df$var== "C", ], aes (x = reorder(id, -value), y = value, fill = id))+
geom_bar(stat="identity") + facet_wrap(~var, ncol =3)+guides(fill=FALSE)
grid.arrange(p1, p2, p3, fin_legend, ncol =4, widths = c(1.5, 1.5, 1.5, 0.5))
The result is what I want
I wonder if there is a straightforward way that can help me order the bars from highest to lowest in all facets without having to plot each variable separtely and then combine them. Any suggestions will be much appreciated.
DATA
df <- read.table(text = c("
id A B C
site1 10 15 20
site2 20 10 30
site3 30 20 25
site4 40 35 40
site5 50 30 35"), header = T)
The approach below uses a specially prepared variable for the x-axis with facet_wrap() but uses the labels parameter to scale_x_discrete() to display the correct x-axis labels:
Prepare data
I'm more fluent in data.table, so this is used here. Feel free to use what ever package you prefer for data manipulation.
Edit: Removed second dummy variable, only ord is required
library(data.table)
# reshape from wide to long
molten <- melt(setDT(df), id.vars = "id")
# create dummy var which reflects order when sorted alphabetically
molten[, ord := sprintf("%02i", frank(molten, variable, -value, ties.method = "first"))]
molten
# id variable value ord
# 1: site1 A 10 05
# 2: site2 A 20 04
# 3: site3 A 30 03
# 4: site4 A 40 02
# 5: site5 A 50 01
# 6: site1 B 15 09
# 7: site2 B 10 10
# 8: site3 B 20 08
# 9: site4 B 35 06
#10: site5 B 30 07
#11: site1 C 20 15
#12: site2 C 30 13
#13: site3 C 25 14
#14: site4 C 40 11
#15: site5 C 35 12
Create plot
library(ggplot2)
# `ord` is plotted on x-axis instead of `id`
ggplot(molten, aes(x = ord, y = value, fill = id)) +
# geom_col() is replacement for geom_bar(stat = "identity")
geom_col() +
# independent x-axis scale in each facet,
# drop absent factor levels (not the case here)
facet_wrap(~ variable, scales = "free_x", drop = TRUE) +
# use named character vector to replace x-axis labels
scale_x_discrete(labels = molten[, setNames(as.character(id), ord)]) +
# replace x-axis title
xlab("id")
Data
df <- read.table(text = "
id A B C
site1 10 15 20
site2 20 10 30
site3 30 20 25
site4 40 35 40
site5 50 30 35", header = T)
If you're willing to lose the X axis labels, you can do this by using the actual y values as the x aesthetic, then dropping unused factor levels in each facet:
ggplot(df, aes (x = factor(-value), y = value, fill = id))+
geom_bar(stat="identity", na.rm = TRUE)+
facet_wrap(~var, ncol =3, scales = "free_x", drop = TRUE) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
Result:
The loss of x-axis labels is probably not too bad here as you still have the colours to go on (and the x-axis is confusing anyway since it's not consistent across facets).

Show loess smoothed trend-line in line plot, #ggplot2.

How to show loess smoothed trend-line in the plot? Please help to handle the warning message: "Removed 19 rows containing non-finite values (stat_smooth)".
My data:
yrcnt<-read.table(header = TRUE, text = "year outcome pop rate pred.SC
1 1995 2306 87592001 2.632660 0.9626214
2 1996 2221 87628543 2.534562 0.9599941
3 1997 2202 81872629 2.689544 0.9573667
4 1998 2316 88200076 2.625848 0.9547394
5 1999 2456 96200312 2.553006 0.9521121
6 2000 2526 99565063 2.537035 0.9494848
7 2001 2511 95951330 2.616952 0.9468575
8 2002 2537 96976191 2.616106 0.9442302
9 2003 2618 101673130 2.574918 0.9416028
10 2004 2644 104554479 2.528825 0.9389755
11 2005 2594 100522055 2.580528 0.9363482
12 2006 2620 105787278 2.476668 0.9337209
13 2007 2722 108946407 2.498476 0.9310936
14 2008 2788 112200567 2.484836 0.9284663
15 2009 2706 104491560 2.589683 0.9258389
16 2010 2773 108651896 2.552187 0.9232116
17 2011 2764 109632577 2.521148 0.9205843
18 2012 2694 107594922 2.503836 0.9179570
19 2013 2673 107553219 2.485281 0.9153297")
http://tutorials.iq.harvard.edu/R/Rgraphics/Rgraphics.html
My code:
p1 <- ggplot(yrcnt, aes(y = log(rate), x = year))
yrcnt$pred.SC <- predict(lm(year ~ log(rate), data = yrcnt))
p1 + geom_line(aes(color = rate)) +geom_line(aes(y = pred.SC))
p1 + geom_line(aes(color = rate)) + geom_smooth()
p4 <- p1 + geom_line(aes(color = rate)) + geom_smooth(color="red")
p4 + scale_x_continuous(name = "Years",limits = c(1995, 2013),breaks = 1994:2014) +
scale_y_continuous(name = "Pancreatic Cancer Hospitalization Rate, 1995-2013",limits = c(2.4, 2.7),breaks = seq(2.4, 2.7, by = 0.1)) +
ggtitle("Long Term Trend in Pancreatic Cancer Hospitalizations")
>`geom_smooth()` using method = 'loess'
p4=Base plot with trendline
Scale plot that was failed to get incorporated to base plot=p4
In the function call to scale_y_continuous(), remove the arguments
limits = c(2.4, 2.7),
breaks = seq(2.4, 2.7,
by = 0.1))
Because the true limits of the y-axis are between 0.9 and 1, and you are setting them to a range between 2.4 and 2.7 . I don't know if you need the rate or the log(rate) here.
An alternative would be
library('ggplot2')
p1 <- ggplot(yrcnt, aes(y = rate, x = year))
######### lm() args flipped, then
######### wrapped in exp() function.
yrcnt$pred.SC <- exp(predict(lm( log(rate) ~ year, data = yrcnt)))
p1 + geom_line(aes(color = rate)) +geom_line(aes(y = pred.SC))
p4 <- p1 + geom_line(aes(color = rate)) + geom_smooth(color="red", method="loess")
p4 + scale_x_continuous(name = "Years",limits = c(1995, 2013),breaks = 1994:2014) +
scale_y_continuous(name = "Pancreatic Cancer Hospitalization Rate, 1995-2013",limits = c(2.4, 2.7),breaks = seq(2.4, 2.7, by = 0.1)) +
theme(legend.position ="none") +
ggtitle("Long Term Trend in Pancreatic Cancer Hospitalizations")

Resources