change the order of a common legend, in a superimposed graph - r

I would like to change the order of my legend, and not to display them in alphabetical order as you can see below. I would like to have
"NONE","LIGHT","MEDIUM","HEAVY","V_COLD","COLD","MEDIUM","HOT".
Is it possible? I tried with several arguments but without success.
Below, my table :
structure(list(SOUNAME = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "BALLYSHANNON (CATHLEENS FALL)", class = "factor"),
year_month = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L), .Label = c("2013-03",
"2013-04", "2013-05", "2013-06", "2013-07", "2013-08", "2013-09",
"2013-10", "2013-12"), class = "factor"), pre_type = structure(c(4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L), .Label = c("HEAVY", "LIGHT", "MEDIUM",
"NONE"), class = "factor"), pre_value = c(13L, 2L, 11L, 5L,
9L, 3L, 10L, 7L, 2L, 6L, 13L, 10L, 10L, 1L, 15L, 4L, 16L,
2L, 7L, 5L, 2L, 2L, 17L, 9L, 7L, 3L, 13L, 6L, 5L, 2L, 10L,
14L, 1L, 5L, 19L, 6L), tem_type = structure(c(4L, 3L, 2L,
1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L,
2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L,
3L, 2L, 1L), .Label = c("COLD", "HOT", "MEDIUM", "V_COLD"
), class = "factor"), tem_value = c(0L, 7L, 0L, 23L, 0L,
29L, 0L, 1L, 0L, 29L, 2L, 0L, 0L, 21L, 9L, 0L, 0L, 5L, 25L,
0L, 0L, 18L, 13L, 0L, 0L, 21L, 9L, 0L, 0L, 26L, 5L, 0L, 0L,
24L, 0L, 7L), cnt_vehicle = c(NA, 2754406, NA, NA, NA, 2846039,
NA, NA, NA, 3149377, NA, NA, NA, 3058810, NA, NA, NA, 3362614,
NA, NA, NA, 3415716, NA, NA, NA, 3020812, NA, NA, NA, 3076665,
NA, NA, NA, 2775306, NA, NA), x = c(1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L
)), .Names = c("SOUNAME", "year_month", "pre_type", "pre_value",
"tem_type", "tem_value", "cnt_vehicle", "x"), row.names = c(NA,
-36L), class = "data.frame")
Below my graph:
ggplot(data = b_complet_2013, aes(x = x, y = pre_value*100000, fill = pre_type), stat = "identity") +
scale_x_continuous(breaks=(1:9)+0.2, labels=unique(b_complet_2013$year_month)) +
geom_bar(stat = "identity", width=0.3) +
xlab("date") + ylab ("Number of days of précipitations(left) and temperatures (ritght)") +
ggtitle("Precipitation per month") +
geom_bar(data=b_complet_2013,aes(x=x+0.4, y=tem_value*100000, fill=tem_type), width=0.3, stat = "identity") +
xlab("date") + ylab("Number of days of precipitations(left) and temperatures (ritght)") +
ggtitle("Impact of weather on road traffics") + theme( axis.title.y = element_text(color = "blue", face = "bold")) +
theme(axis.text.y = element_text(color = "blue", face = "bold", size=9)) + theme( axis.title.y.right = element_text(color = "black", face = "bold")) +
theme(axis.text.y.right = element_text(color = "black", size = 9, face = "bold")) +
geom_line(mapping = aes(x= x+0.2, y = as.numeric(cnt_vehicle)), colour = I("blue"), size = 0.8) +
geom_point(aes(x= x+0.2, y = as.numeric(cnt_vehicle), colour = I("blue")), show.legend=FALSE, stat = "identity") +
scale_y_continuous(sec.axis = sec_axis(~./100000,name="Number of days of precipitations(left) and temperatures (ritght)")) +
theme( plot.title = element_text(size = 17)) + theme(axis.title.x = element_text(size = 12)) + theme(axis.title.y = element_text(size = 12)) +
labs(y = "Number of vehicles", color ="black") +
theme(panel.background = element_rect(linetype = "dashed", fill="white"), plot.background = element_rect(linetype = "dashed",fill="grey90" ))

Related

Need plot labels from separate column in ggplot

I am plotting number of people against the number of certain incidents per month, and need to plot each month's label on the side of each point in the plot. The labels are in a separate column (column 'month') and I need to find the synthax that can help me put the abbreviated 3-letter month label besides each associated point in the plot. I have done this in base plot previously but can't get it done in ggplot.
My script:
library(dplyr)
library(ggplot2)
new_labels <- c("1995-\n2001","2002-\n2011","2012-\n2019")
df %>%
mutate(period=factor(period,levels = unique(period),
labels = new_labels,ordered = T)) %>%
ggplot(aes(people,inc)) +
geom_point(cex=3.5) +
scale_y_continuous(breaks=seq(0,12,by=2),limit=c(0,12),expand=c(0,1)) +
scale_x_continuous(breaks=seq(0,75000,by=10000),limit=c(0,75000),expand=c(0,0)) +
theme_bw(base_size=20) +
facet_grid(class~category) +
facet_grid(rows=vars(period)) +
stat_smooth(method="glm", method.args = list(family = "poisson"),col="black") +
theme(strip.background = element_rect(fill="lightgrey", size=1, color="black")) +
theme(strip.text.y = element_text(size=19, color="black",angle=0)) +
labs(x = "Number of people per month", y = "Incidents per month")
My dataframe:
dput(df)
structure(list(period = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("1",
"2", "3"), class = "factor"), month = structure(c(5L, 4L, 8L,
1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L,
6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L,
11L, 10L, 3L), .Label = c("APR", "AUG", "DEC", "FEB", "JAN",
"JUL", "JUN", "MAR", "MAY", "NOV", "OCT", "SEP"), class = "factor"),
people = c(4068L, 7251L, 14384L, 20513L, 18748L, 17760L,
23433L, 22878L, 12815L, 8101L, 7477L, 5018L, 6830L, 16278L,
30244L, 45747L, 31807L, 41184L, 54124L, 52565L, 24365L, 12759L,
8307L, 6038L, 16711L, 32187L, 45810L, 53932L, 40082L, 58506L,
71259L, 67564L, 33556L, 22818L, 16508L, 15848L), inc = c(2L,
1L, 3L, 5L, 3L, 0L, 2L, 5L, 1L, 1L, 0L, 0L, 0L, 2L, 1L, 5L,
5L, 2L, 7L, 6L, 1L, 0L, 0L, 2L, 0L, 0L, 2L, 2L, 0L, 1L, 1L,
1L, 0L, 0L, 0L, 0L)), row.names = c(NA, -36L), class = "data.frame")

ggarrange() function overvrites the color of my boxplots

I am making two boxplots and want to arrange them beside each other. I have made each of them look like I want when displaying them separately but when I use ggarrange() the colors disappear. This is my code for the plots:
BOX1_data <- read.table(file = "clipboard",
sep = "\t", header=TRUE)
BOX1_data$Diagnosis <- as.factor(BOX1_data$Diagnosis)
BOX1plot <- ggplot(BOX1_data, aes(x=Diagnosis, y=No.Variants, fill= Diagnosis)) + geom_boxplot() +
scale_fill_brewer(palette = "Dark2") +
scale_x_discrete(labels = c("AC\nN=38", "SqCC\nN=15", "SCLC\nN=8", "BL disease\nN=16"))
BOX2_data <- read.table(file = "clipboard",
sep = "\t", header=TRUE)
BOX2_data$Stage <- as.factor(BOX2_data$Stage)
BOX2plot <- ggplot(BOX2_data, aes(x=Stage, y=No.Variants, fill = Stage)) + geom_boxplot(width = 0.4) +
scale_fill_brewer(palette = "Dark2") +
scale_x_discrete(labels = c("Stage I-III\nN=24", "Stage IV\nN=37"))
To arrange the plots I then write:
BOX_list <- list(BOX1plot, BOX2plot)
ggarrange(plotlist = BOX_list, labels = c('A', 'B'), ncol = 2)
The easiest way of getting rid of gridlines etc I thought was by using theme_set() and I think that this might be my problem.
My code is:
theme_set(theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.background = element_blank(),
axis.line = element_line(colour = "grey")))
I realize that theme_bw() overwrites my colors in the boxes. But I have tried removing it, switching it for theme_transparent() (this removes all my labels) and neither works. I have searched for a way of just adding a transparency to my boxes in the theme so that my colors will shine through. I am also suspicious that maybe the palette that I chose might give me the same colors in the two plots which I also do not want. To add, if it matters, I have 4 groups in the first plot and 2 in the second.
dput(BOX1_data)
structure(list(Diagnosis = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"),
No.Variants = c(3L, 4L, 6L, 14L, 3L, 3L, 4L, 3L, 3L, 3L,
8L, 6L, 22L, 10L, 6L, 9L, 1L, 9L, 3L, 4L, 8L, 2L, 13L, 3L,
11L, 19L, 5L, 5L, 3L, 12L, 4L, 2L, 4L, 18L, 8L, 7L, 7L, 12L,
4L, 1L, 6L, 3L, 2L, 8L, 10L, 3L, 15L, 9L, 13L, 13L, 15L,
10L, 10L, 12L, 6L, 3L, 12L, 9L, 15L, 10L, 18L, 3L, 6L, 3L,
6L, 1L, 3L, 3L, 7L, 1L, 2L, 10L, 7L, 7L, 1L, 0L, 2L)), row.names = c(NA,
-77L), class = "data.frame")
dput(BOX2_data)
structure(list(No.Variants = c(3L, 4L, 6L, 14L, 3L, 3L, 4L, 3L,
3L, 3L, 8L, 6L, 22L, 10L, 6L, 9L, 1L, 9L, 3L, 4L, 8L, 2L, 13L,
3L, 11L, 19L, 5L, 5L, 3L, 12L, 4L, 2L, 4L, 18L, 8L, 7L, 7L, 12L,
4L, 1L, 6L, 3L, 2L, 8L, 10L, 3L, 15L, 9L, 13L, 13L, 15L, 10L,
10L, 12L, 6L, 3L, 12L, 9L, 15L, 10L, 18L), Stage = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1",
"2"), class = "factor")), row.names = c(NA, -61L), class = "data.frame")
Grateful for any tips!
As already pointed out, it seems the OP's issue with theme_set() removing the fill colors set in your two plots was solved by updating to a new version of ggplot2. Herein, I have a solution for the second part of OP's question (that was clarified in the comments). Represented here for convenience:
Now it is just the problem that I want the palette to continue on the second plot's boxes and not restart so that I will get different colors on all boxes.
In order to do this, one has to realize that there are 4 fill colors for the first plot BOX1plot, and 2 fill colors for BOX2plot. For BOX1plot, we want the color palette to begin at the first color, but for BOX2plot, we want the palette to start on the 5th color sequence in the palette. There's no way to do this through the scale_*_brewer() functions, so the approach here will be to access the Brewer palette from RcolorBrewer::brewer.pal(), and then assign where to begin and end in that sequence based on the number of levels of each factor using scale_fill_manual() to just set the color values from the extracted Brewer color palette.
You can just "know" that you need to "use colors 1-4" for BOX1plot and "use color 5 and 6" for BOX2plot; however, it is much more elegant to just calculate this automatically based on the number of levels (in case you want to run this again). The code below does this:
library(ggplot2)
library(ggpubr)
library(RColorBrewer)
# ... read in your data as before
# create factors (as OP did before)
BOX1_data$Diagnosis <- as.factor(BOX1_data$Diagnosis)
BOX2_data$Stage <- as.factor(BOX2_data$Stage)
# make color palette based on Brewer "Dark2" palette
lev_diag <- length(levels(BOX1_data$Diagnosis))
lev_stage <- length(levels(BOX2_data$Stage))
lev_total <- lev_diag + lev_stage
my_colors <- brewer.pal(lev_total, "Dark2")
BOX1plot <- ggplot(BOX1_data, aes(x=Diagnosis, y=No.Variants, fill= Diagnosis)) + geom_boxplot() +
scale_fill_manual(values=my_colors[1:lev_diag]) +
scale_x_discrete(labels = c("AC\nN=38", "SqCC\nN=15", "SCLC\nN=8", "BL disease\nN=16"))
BOX2plot <- ggplot(BOX2_data, aes(x=Stage, y=No.Variants, fill = Stage)) + geom_boxplot(width = 0.4) +
scale_fill_manual(values = my_colors[(lev_diag+1):lev_total]) +
scale_x_discrete(labels = c("Stage I-III\nN=24", "Stage IV\nN=37"))
BOX_list <- list(BOX1plot, BOX2plot)
ggarrange(plotlist = BOX_list, labels = c('A', 'B'), ncol = 2)
If you have issues with ggarrange() I would suggest next approach using patchwork:
library(ggplot2)
library(patchwork)
#Data format
BOX1_data$Diagnosis <- as.factor(BOX1_data$Diagnosis)
#Plot 1
BOX1plot <- ggplot(BOX1_data, aes(x=Diagnosis, y=No.Variants, fill= Diagnosis)) + geom_boxplot() +
scale_fill_brewer(palette = "Dark2") +
scale_x_discrete(labels = c("AC\nN=38", "SqCC\nN=15", "SCLC\nN=8", "BL disease\nN=16"))
#Data format
BOX2_data$Stage <- as.factor(BOX2_data$Stage)
#Plot 2
BOX2plot <- ggplot(BOX2_data, aes(x=Stage, y=No.Variants, fill = Stage)) + geom_boxplot(width = 0.4) +
scale_fill_brewer(palette = "Dark2") +
scale_x_discrete(labels = c("Stage I-III\nN=24", "Stage IV\nN=37"))
#Arrange plots
BOX1plot+BOX2plot+plot_annotation(tag_levels = 'A')
The output:

adding geom_text from different dataset to geom_bar

I have a bar plot with a facet grid, and I would like to add the number of observations per sub-plot which are stored in a separate dataframe.
The bar plot is produced with
bar.plot <- ggplot(BarDiff.m.s, aes(x=value.change, fill=incompatibility))+
geom_bar(binwidth=1)+
labs(x="score differences", y="count / years since start of PSA")+
geom_vline(aes(xintercept=0), linetype="dotted")+
theme(plot.title=element_text(face="bold", size=10),
legend.position= "bottom")+
scale_fill_brewer(palette="Set1")+
facet_grid(years.since.peace ~ strategy.cm6.YP, space="free")
I tried to add the geom_text by adding after the geom_bar line
geom_text(data=num.obs, aes(label=paste("obs=",num.obs),y=4,x=min(BarDiff.m.s$value.change)))
however, I obtain the error message
Error in eval(expr, envir, enclos) : object 'incompatibility' not found
Apparently, for some reason, I have to consider the "fill" variable in geom_text; I tried to add group=BarDiff.m.s$incompatibility to geom_text, but to no avail.
I have seen How to add custom labels from a dataset on top of bars using ggplot/geom_bar in R? , but if possible I would like to keep the two data.frames separate and understand how to solve the "fill" issue. Any suggestion would be very much welcome! thx.
The pertaining data for the plot is
BarDiff.m.s <- structure(list(value.change = c(-1, -1, -2, -2, 1, NA, 0, -2,
-1, -2, NA, 2, -3, NA, NA, -3, -2, -1, -4, -1, -3, -1, 2, 2,
NA, 1, -1, 0, 0, -2, -2, -2, -1, 1, NA, -1, -1, 0, -2, NA, 0,
-4, NA, NA, NA, -3, -1, -4, -2, -3, -2, -1, 0, NA, NA, 0, -4,
NA, -2, -2, -3, -1, NA, NA, -1, -1, 0, -2, NA, 0, NA, NA, NA,
NA, -4, NA, -4, -2, -3, -2, -2, 2, NA, NA, 0, -4, -2, NA, NA,
NA, NA, NA, NA, -1, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA, NA,
-4, NA, -2, -1, -2, NA, NA, NA, NA, -3, 1), incompatibility = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 1L, 1L), .Label = c("territory", "government"), class = "factor"),
years.since.peace = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L), .Label = c("y0", "y10", "y15", "y20", "diff.y5",
"diff.y10", "diff.y15", "diff.y20"), class = "factor"), strategy.cm6.YP = structure(c(4L,
4L, 5L, 1L, 1L, 4L, 3L, 4L, 3L, 1L, 1L, 1L, 3L, 4L, 4L, 4L,
4L, 3L, 3L, 4L, 4L, 4L, 1L, 4L, 5L, 1L, 4L, 1L, 4L, 4L, 4L,
5L, 1L, 1L, 4L, 3L, 4L, 3L, 1L, 1L, 1L, 3L, 4L, 4L, 4L, 4L,
3L, 3L, 4L, 4L, 4L, 1L, 4L, 5L, 1L, 4L, 1L, 4L, 4L, 4L, 5L,
1L, 1L, 4L, 3L, 4L, 3L, 1L, 1L, 1L, 3L, 4L, 4L, 4L, 4L, 3L,
3L, 4L, 4L, 4L, 1L, 4L, 5L, 1L, 4L, 1L, 4L, 4L, 4L, 5L, 1L,
1L, 4L, 3L, 4L, 3L, 1L, 1L, 1L, 3L, 4L, 4L, 4L, 4L, 3L, 3L,
4L, 4L, 4L, 1L, 4L, 5L, 1L, 4L, 1L, 4L), .Label = c("none",
"only offered", "communication/\nfacilitation", "procedural",
"directive", "unspecified"), class = "factor")), .Names = c("value.change",
"incompatibility", "years.since.peace", "strategy.cm6.YP"), class = "data.frame", row.names = c(1298L,
1299L, 1335L, 1339L, 1340L, 1341L, 1344L, 1372L, 1379L, 1386L,
1387L, 1402L, 1415L, 1439L, 1449L, 1454L, 1455L, 1456L, 1463L,
1466L, 1470L, 1496L, 1497L, 1498L, 1525L, 1536L, 1542L, 1546L,
1563L, 1617L, 1618L, 1654L, 1658L, 1659L, 1660L, 1663L, 1691L,
1698L, 1705L, 1706L, 1721L, 1734L, 1758L, 1768L, 1773L, 1774L,
1775L, 1782L, 1785L, 1789L, 1815L, 1816L, 1817L, 1844L, 1855L,
1861L, 1865L, 1882L, 1936L, 1937L, 1973L, 1977L, 1978L, 1979L,
1982L, 2010L, 2017L, 2024L, 2025L, 2040L, 2053L, 2077L, 2087L,
2092L, 2093L, 2094L, 2101L, 2104L, 2108L, 2134L, 2135L, 2136L,
2163L, 2174L, 2180L, 2184L, 2201L, 2255L, 2256L, 2292L, 2296L,
2297L, 2298L, 2301L, 2329L, 2336L, 2343L, 2344L, 2359L, 2372L,
2396L, 2406L, 2411L, 2412L, 2413L, 2420L, 2423L, 2427L, 2453L,
2454L, 2455L, 2482L, 2493L, 2499L, 2503L, 2520L))
The data for the number of observations is:
num.obs <- structure(list(years.since.peace = structure(c(5L, 5L, 5L, 5L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L), .Label = c("y0",
"y10", "y15", "y20", "diff.y5", "diff.y10", "diff.y15", "diff.y20"
), class = "factor"), strategy.cm6.YP = structure(c(1L, 3L, 4L,
5L, 1L, 3L, 4L, 5L, 1L, 3L, 4L, 5L, 1L, 3L, 4L, 5L), .Label = c("none",
"only offered", "communication/\nfacilitation", "procedural",
"directive", "unspecified"), class = "factor"), num.obs = c(8L,
5L, 14L, 2L, 8L, 5L, 14L, 2L, 8L, 5L, 14L, 2L, 8L, 5L, 14L, 2L
)), .Names = c("years.since.peace", "strategy.cm6.YP", "num.obs"
), row.names = c(NA, -16L), class = "data.frame")
Move the fill aesthetic to geom_bar and change the y position for geom_text should get you what you want.
bar.plot <- ggplot(BarDiff.m.s, aes(x = value.change)) +
geom_bar(aes( fill = incompatibility), binwidth = 1) +
geom_text(data = num.obs, aes(label = paste("obs=", num.obs),y = 4, x = -4)) +
labs(x = "score differences", y = "count / years since start of PSA") +
geom_vline(aes(xintercept = 0), linetype = "dotted") +
theme(plot.title = element_text(face = "bold", size = 10),
legend.position = "bottom") +
scale_fill_brewer(palette = "Set1") +
facet_grid(years.since.peace ~ strategy.cm6.YP, space = "free")
bar.plot
If you want the text labels to be positioned by value.change in the first data set, probably the easiest way to get that is to merge the relevant column into the second data set.

Overlay ggplot grouped tiles with polygon border depending on extra factor

I have a data frame with x and y positions and two factor columns blocknr and cat:
dput(testData)
structure(list(xpos = c(2L, 8L, 5L, 8L, 1L, 4L, 5L, 1L, 8L, 4L,
3L, 2L, 6L, 5L, 1L, 7L, 3L, 4L, 3L, 7L, 1L, 6L, 7L, 7L, 2L, 5L,
3L, 4L, 6L, 7L, 1L, 5L, 1L, 6L, 4L, 5L, 3L, 6L, 4L, 8L, 1L, 3L,
4L, 6L, 7L, 3L, 2L, 6L, 4L, 2L, 1L, 7L, 4L, 8L, 2L, 3L, 2L, 5L,
8L, 2L, 8L, 3L, 3L, 5L, 6L, 7L, 1L, 5L, 6L, 4L, 2L, 6L, 7L, 1L,
5L, 7L, 2L), ypos = c(1L, 2L, 8L, 1L, 6L, 7L, 1L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 7L, 8L, 10L, 2L, 6L, 9L, 1L, 2L, 10L, 4L, 5L,
6L, 3L, 5L, 9L, 3L, 9L, 10L, 3L, 7L, 8L, 2L, 5L, 6L, 3L, 4L,
10L, 1L, 4L, 10L, 2L, 8L, 9L, 3L, 6L, 8L, 5L, 7L, 10L, 3L, 4L,
7L, 2L, 4L, 5L, 6L, 7L, 9L, 4L, 7L, 8L, 1L, 2L, 9L, 5L, 9L, 10L,
1L, 6L, 8L, 3L, 5L, 7L), blocknr = c(1L, 3L, 2L, 3L, 1L, 2L,
2L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 1L, 3L, 2L, 2L, 1L, 3L, 1L, 2L,
3L, 3L, 1L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 1L, 3L, 2L, 2L, 1L, 3L,
2L, 3L, 1L, 1L, 2L, 3L, 3L, 2L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 3L,
1L, 2L, 1L, 2L, 3L, 1L, 3L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 3L, 2L,
1L, 2L, 3L, 1L, 2L, 3L, 1L), cat = structure(c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("A", "B", "C"
), class = "factor")), .Names = c("xpos", "ypos", "blocknr",
"cat"), row.names = c(NA, -77L), class = "data.frame")
I've made the following ggplot code to make 2D overview:
ggplot(data=testData, aes(x=xpos,y=ypos))+
geom_tile(aes(fill=cat), colour = "white")+
scale_fill_manual(values = c('A' = '#F8766D','C' = '#8ABF54','B' = '#C1DDA5'))+
geom_text(aes(x=xpos,y=ypos,label=blocknr),size=3)+
coord_cartesian(ylim = c(0.5, ymax + 0.5)) +
coord_cartesian(xlim = c(0.5, xmax + 0.5)) +
scale_x_continuous(breaks=seq(1,xmax,1))+
scale_y_continuous(breaks=seq(1,ymax,1))+
#geom_polygon(aes(group=blocknr))+
theme(axis.line = element_line(colour = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
which produces the following result:
Now I would like to highlight each group of blocknrs by drawing a border around them as shown below:
I've played around with geom_polygon, geom_path, but I can't quite find a way to do this. Is there a general way to achieve this in ggplot without constructing an algorithm to compute where each line should be and add those lines as a geom_segment?
As far as I know, there is no way to do this with standard ggplot2 tile options. But it's not to much trouble to constuct them if you do it as segments. For example
ymax <- max(testData$ypos)
xmax <- max(testData$xpos)
m <- matrix(0, nrow=ymax, ncol=xmax)
m[as.matrix(testData[,2:1])] <- testData[,3]
Here we are basically taking all the row/col assignment data and creating a matrix that essentially looks like the plot but we will with the block numbers. Now, we will scan for the locations we need to add "wall" by looking for changes in the block numbers as we go across each row and column separately.
has.breaks<-function(x) ncol(x)==2 & nrow(x)>0
hw<-do.call(rbind.data.frame, Filter(has.breaks, Map(function(i,x)
cbind(y=i,x=which(diff(c(0,x,0))!=0)), 1:nrow(m), split(m, 1:nrow(m)))))
vw<-do.call(rbind.data.frame, Filter(has.breaks, Map(function(i,x)
cbind(x=i,y=which(diff(c(0,x,0))!=0)), 1:ncol(m), as.data.frame(m))))
And you can add calls to geom_segments to add the horizontal and vertical walls to the plot.
ggplot(data=testData, aes(x=xpos,y=ypos))+
geom_tile(aes(fill=cat), colour = "white")+
scale_fill_manual(values = c('A' = '#F8766D','C' = '#8ABF54','B' = '#C1DDA5'))+
geom_text(aes(x=xpos,y=ypos,label=blocknr),size=3)+
geom_segment(data=hw, aes(x=x-.5, xend=x-.5, y=y-.5, yend=y+.5))+
geom_segment(data=vw, aes(x=x-.5, xend=x+.5, y=y-.5, yend=y-.5))+
coord_cartesian(ylim = c(0.4, ymax + 0.6)) +
coord_cartesian(xlim = c(0.4, xmax + 0.6)) +
scale_x_continuous(breaks=seq(1,xmax,1))+
scale_y_continuous(breaks=seq(1,ymax,1))+
theme(axis.line = element_line(colour = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
which gives
The desplot package will do this for you (using lattice):
library(desplot)
desplot(cat ~ xpos*ypos, testData, out1=blocknr, text=blocknr, main="testData")

format color and legend in ggplot geom_tile of p-values

I am currently trying to make a 'heat map' using ggplot2 to display a series of p-values, but can't figure out how to tailor the actual color assignments and legend.
sampledata.m <- melt(sampledata)
sampledata.m$var2 <- as.character(sampledata.m$var2)
sampledata.m$var2 <- factor(sampledata.m$var2, levels=unique(sampledata.m$var2),ordered=TRUE)
sampledata.m$var1 <- as.character(sampledata.m$var1)
sampledata.m$var1 <- factor(sampledata.m$var1, levels=unique(sampledata.m$var1),ordered=TRUE)
This was done so that I could maintain the order of my variables.
p <- ggplot(sampledata.m, aes(var2, var1)) +
geom_tile(aes(fill = value), colour = "transparent") +
scale_fill_gradientn(colours=c("light green","dark green", "black"),
values=rescale(c(0,0.0003,0.05,0.5,1)),limits=c(0,1)))
p + theme_bw(base_size = base_size) + labs(x = "", y = "") +
scale_x_discrete(expand = c(0,0)) +
theme(legend.position = "bottom", axis.ticks = element_blank(),
axis.text.x = element_text(size = base_size * 0.8, angle = 310,
hjust = 0, colour = "black"))
This creates a nice looking plot, however my legend and my color gradient don't represent the rescale that I assigned. Forgive my ignorance if this is a simple fix, but I've only been coding R for about 2 weeks now. Ideally, I would love my plot and legend to mimic the color scheme and legend labeling similar to this paper: http://www.ncbi.nlm.nih.gov/pubmed/22496159
structure(list(var1 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L
), .Label = c("A", "B", "C",
"D", "E"), class = "factor"), var2 = structure(c(1L,
5L, 23L, 18L, 9L, 8L, 14L, 12L, 20L, 6L, 21L, 11L, 2L, 22L, 10L,
3L, 19L, 16L, 4L, 7L, 15L, 17L, 13L, 24L, 1L, 5L, 23L, 18L, 9L,
8L, 14L, 12L, 20L, 6L, 21L, 11L, 2L, 22L, 10L, 3L, 19L, 16L,
4L, 7L, 15L, 17L, 13L, 24L, 1L, 5L, 23L, 18L, 9L, 8L, 14L, 12L,
20L, 6L, 21L, 11L, 2L, 22L, 10L, 3L, 19L, 16L, 4L, 7L, 15L, 17L,
13L, 24L, 1L, 5L, 23L, 18L, 9L, 8L, 14L, 12L, 20L, 6L, 21L, 11L,
2L, 22L, 10L, 3L, 19L, 16L, 4L, 7L, 15L, 17L, 13L, 24L, 1L, 5L,
23L, 18L, 9L, 8L, 14L, 12L, 20L, 6L, 21L, 11L, 2L, 22L, 10L,
3L, 19L, 16L, 4L, 7L, 15L, 17L, 13L), .Label = c("1", "2",
"3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21",
"22", "23", "24"), class = "factor"), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = "pvalue", class = "factor"),
value = c(0.810172671, 0.596026338, 0.076550169, 0.908670635,
0.300418653, 0.051553286, 0.124196482, 0.601568833, 0.058431468,
0.341726981, 0.876674726, 0.002698295, 0.812059425, 0.068199656,
0.758383287, 0.60362134, 0.89265723, 0.246111936, 0.156348035,
0.909574522, 0.020202377, 0.388843992, 0.769441835, 0.102272916,
0.38895717, 0.882296525, 0.792438683, 0.000491393, 0.004233434,
0.202424095, 0.426941568, 0.08520186, 0.763036306, 0.602828564,
0.037278697, 0.121642743, 0.669123606, 0.974328438, 0.834329923,
0.050413697, 0.078476666, 0.387647156, 0.000540422, 0.379576632,
0.361428444, 0.502439758, 0.001326035, 0.027652693, 0.188885638,
0.579244445, 0.471985778, 0.677458228, 0.119307242, 0.364857868,
0.238260538, 0.53472206, 0.204344281, 0.291888993, 0.295809688,
0.00029, 0.005476157, 0.960975822, 0.00029, 0.055915429,
0.618284682, 0.040605253, 0.521649682, 0.421086546, 0.164333061,
0.755528982, 0.306854182, 0.012832628, 0.270393143, 0.946675764,
0.59227376, 0.112658388, 0.429091426, 0.01662083, 0.017342483,
0.065817234, 0.012140224, 0.359828816, 0.031969725, 0.00029,
0.14555102, 0.18865081, 0.00029, 0.064107531, 0.505257768,
0.070224536, 0.017082975, 0.375864198, 0.00029, 0.104103689,
0.898979883, 0.004879605, 0.003597954, 0.036722932, 0.849058218,
0.00029, 0.003739938, 0.00029, 0.00029, 0.00029, 0.008179017,
0.193870353, 0.460181712, 0.389475522, 0.00029, 0.8785017,
0.070414642, 0.584977921, 0.990764677, 0.767253318, 0.002234906,
0.051331823, 0.00446149, 0.234477639, 0.275139791)), .Names = c("var1", "var2", "variable", "value"), row.names = c(NA, -119L), class = "data.frame")
I'm not going to get into all of the theme settings you've got - as I understand it the key of your problem is the scale of the fill gradient. You can set this in scale_fill_gradient() with a log transformation:
p <- ggplot(sampledata.m, aes(var2, var1)) +
geom_tile(aes(fill = value), colour = "transparent") +
scale_fill_gradient(trans = "log", low = "light green", high = "black",
breaks = c(0, 0.001, 0.05, 0.5))
dt <- data.frame(
N=letters[5:11],
a=c(0.01,0.05,0.1,0.5,1,5,10),
b=c(10,20,50,100,200,1000,2000))
dt.mlt <- melt(dt,variable.name="Cls",value.name="Val")
ggplot(dt.mlt,aes(x=N,y=Cls,fill=Val))+
geom_tile()+
scale_fill_gradient2(
low="green",high="red",mid="black",trans="log",breaks=c(0,0.01,0.1,1,10,100,1000))+
geom_text(data=dt.mlt,aes(x=N,y=Cls,label=Val))
But if I add the midpoint=10 to the scale_fill_gradient2, the picture will become:

Resources