Arrange graph, table and legend in a single grid - r

I need to print in Word a graph with a table at the bottom which indicates the legend used in the graph. I have the 3 elements separated and I need to put it together in a single grid.
I use grid.arrange to try but the size of each element differs, like here
There is a way to put them all together so years matches in x-axis graph and column table and each row of the table match each label from the legend.?
As a second question, it is possible to put the Total line in the legend?
Any help would be appreciated.
The code I'm using:
VARIABLE<-c("OFERT_PLP","OFERT_TP","OFERT_LIC")
X2007<-as.vector(matrix1) #matrices A-D contain randomly generated NA and numerical values
X2008<-as.vector(matrix2)
X2009<-as.vector(matrix3)
X2010<-as.vector(matrix4)
X2011<-as.vector(matrix5)
X2012<-as.vector(matrix6)
X2013<-as.vector(matrix7)
X2014<-as.vector(matrix8)
X2015<-as.vector(matrix9)
X2016<-as.vector(matrix10)
orden<-c(1,2,3)
ETIQUETA<-c("Pregrado","Postitulo","Postgrado")
df<-as.data.frame(cbind(VARIABLE,X2007,X2008,X2009,X2010,X2011,X2012,X2013,X2014,X2015,X2016,orden,ETIQUETA))
df<-df[,colnames(df)!="ETIQUETA"]
dfm1<-melt(df,id.vars = c("VARIABLE","orden"), variable.name = "ANO", value.name = "VALOR")
dfm <- dfm1 %>% group_by(ANO,VARIABLE,VALOR) %>%
group_by(ANO) %>%
mutate(pct = (VALOR / sum(VALOR,na.rm=TRUE)),
total=sum(VALOR,na.rm=TRUE),
cumsum=cumsum(VALOR),
npos = total-cumsum+0.5*VALOR)
d<-as.data.frame(rbind(df[,paste0("X",anos)],colSums(df[,paste0("X",anos)],na.rm=TRUE)))
p1<-ggplot(dfm, aes(x=ANO, y=VALOR, fill=orden)) +
geom_bar(stat="identity")+ geom_line(aes(y=total,group=1,linetype="Total"))+ scale_fill_brewer()
none <- element_blank()
p2 <- p1 + theme_bw() +
scale_x_discrete(labels=anos) +
labs(panel.grid.major = none, panel.grid.minor = none) +
labs(panel.background = none) +
labs(panel.border = none) +
xlab(NULL) + ylab(NULL)+
background_grid(major = "y", minor = "none" )+
geom_text(aes(label=ifelse(pct==0,"",paste0(sprintf("%1.0f",pct*100),"%")),y=npos), colour="black",size=3)
p3<-p2 + theme(legend.title=none,legend.position = "left",legend.text=element_text(size=5),
legend.key.height=unit(0.8,"line"), axis.text = element_text(size = 8)) + guides(linetype=FALSE)
legend <- get_legend(p3)
p3<-p3+theme(legend.position = "none")
pg <- ggplotGrob(p3)
mythemegra <- gridExtra::ttheme_default(
core = list(fg_params=list(cex = 0.8)),
colhead = list(fg_params=list(cex = 0.8)),
rowhead = list(fg_params=list(cex = 0.8)),parse = "TRUE")
table<-tableGrob(d,rows = NULL,col=NULL,
theme = mythemegra)
blankPlot <- ggplot()+geom_blank(aes(1,1)) + cowplot::theme_nothing()
grid.arrange(arrangeGrob(blankPlot,pg,top=textGrob("Title",gp=gpar(fontsize=12,font=2,col="black"),just = "centre"),
nrow=1,ncol=2,widths = c(1,5)),
arrangeGrob(legend,table,nrow=1,ncol=2,widths = c(1,4.5)),heights=c(6.5,6.5))

Related

How can I show empty bins on the plot in r?

I have a dataframe df with Genderand Results columns
x <- df$Results
cuts <- cut(x, breaks = seq(min(x),max(x),(max(x)-min(x))/15) , include.lowest = TRUE, right = TRUE,dig.lab = 5)
df$cuts <- mgsub::mgsub(as.character(cuts),pattern =c("\\[","\\]","\\(","\\)",",") ,replacement = c("","","",""," - "))
df$cuts <- factor(df$cuts,levels = unique(df$cuts))
df$Gender <- factor(df$Gender,levels = c("Men","Women"))
hist.plot <- function(df) {ggplot(df,aes(cuts,fill =Gender)) +
geom_bar(position = position_dodge(preserve = "single"),alpha=0.8,color="black",width = 0.5) +
theme(axis.text.x = element_text(face="bold",color="#993333", size=8, angle=45,vjust=0.6),
axis.text.y = element_text(face="bold", color="#993333", size=8, angle=45)) +
theme(plot.title = element_text(hjust = 0.5))
}
hist.plot(df)
There is no data in 918.8 - 989.9 bin and as you can see I can't show that it has 0 count on the plot
And in 1061 - 1132.1 bin the data belongs to women but appear on the left hand side instead right hand side.
The thing that I should change is about cut function or ggplot ?
Thank you for your help in advance.

Annotation label won't show when passed as a grob in annotation_custom in r (also trying to put it at the bottom right corner)

I am trying to automatically put a custom annotation in the bottom right corner of a plot no matter the actual axes range.
I have tried to do so with annotate from ggplot2 but it just didn't work.
I am trying to work with annotation_custom from the grid package instead.
My code is long so I won't post all of it here, but rather the main problematic lines imo:
EDIT: I am adding a small dataframe for reproducibility
df <- data.frame(col.a = c(1:5), col.b = c(23.3,5.2,61.0,9.0,3.25))
# correlation calculation
cor.result = df %>% cor.test(col.a, col.b,
method = "spearman",
na.action=na.omit,
exact = FALSE)
corr.label <- sprintf("r = %.3f\np = %g\n%s", cor.result$estimate,
cor.result$p.value, "spearman")
The result is something like:
"r = -0.853\np = 0.003\nspearman"
Then I create a plot:
ttl = "Scatter Plot" # The title and subtitles are different in my code.
sub.ttl = "sample id: patient zero"
p <- df %>% ggplot(aes(x = col.a, y = col.b) +
geom_smooth(color = "steelblue3", method = lm, formula = y ~ x) +
geom_abline(aes(intercept=0, slope=1), color = 'grey45') +
geom_point(color = "steelblue4", alpha = 0.5, size = 3) +
labs(x = "HUMANnN2", y = "HUMAnN3",
title = ttl,
subtitle = sub.ttl) +
theme(text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 16),
plot.subtitle = element_text(hjust = 0.5, size = 14))
And try to add an annotation:
grob <- grobTree(textGrob(label = corr.label, x = 0.8, y = 0.3))
p <- p + annotation_custom(grob)
The result is as follows:
I did manage to add an annotation at the upper left corner with:
p <- p + annotation_custom(corr.label)
Which gives:
Yes, it has to be at the bottom right corner.
The annotation does show up when I switch corr.label with just a string of "hello". My guess is that grob doesn't pass newline characters accordingly.

how do i combine multiple data sources in ggplot using split and sapply?

this question is linked to a previous one answered by #Rui Barradas and #Duck, but i need more help. Previous link here:
how do i vectorise (automate) plot creation in R
Basically, I need to combine 3 datasets into one plot with a secondary y axis. All datasets need to be split by SITENAME and will facet wrap by Sampling.Year. I am using split and sapply. Being facet wrap the plots look something like this:
However, i'm now trying to add the two other data sources into the plots, to look something like this:
But i am struggling to add the two other data sources and get them to split by SITENAME. Her is my code so far...
Record plot format as a function to be applied to a split list df (ideally 'df' would be added as geom_line with a secondary y axis, and 'FF_start_dates' will be added as a vertical dashed line):
SITENAME_plot <- function(AllDates_TPAF){
ggplot(AllDates_TPAF, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
labs(x = "Month", y = "Total PAF (% affected)") +
scale_x_date(breaks = "1 month", labels = scales::date_format("%B")) +
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
scale_y_continuous(limits = c(0, 100), sec.axis = sec_axis(~., name = "Water level (m)")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(AllDates_TPAF$SITENAME))
}
plot write function:
SITENAME_plot_write <- function(name, g, dir = "N:/abc/"){
flname <- file.path(dir, name)
flname <- paste0(flname, ".jpg")
png(filename = flname, width = 1500, height = 1000)
print(g)
dev.off()
flname
}
Apply function to list split by SITENAME:
sp1 <- split(AllDates_TPAF, AllDates_TPAF$SITENAME)
gg_list <- sapply(sp1, SITENAME_plot, simplify = FALSE)
mapply(SITENAME_plot_write, names(gg_list), gg_list, MoreArgs = list(dir = getwd()))
dev.off()
I have uploaded samples of all 3 datasets here: Sample Data
Apologies for not using gsub but there was too much data and I couldn't get it to work properly
thanks in advance for any help you can give, even if it is just to point me towards a web tutorial of some kind.
You can try next code. I used the data you shared. Just be careful with names of all datasets. Ideally, the key columns as DATE and Sampling.Year should be present in all dataframes before making the split. Also some variables as Risk was absent so I added an example var with same name. Here the code, I added a function for the plot you want:
library(tidyverse)
library(readxl)
#Data
df1 <- read_excel('Sample data.xlsx',1)
#Create var
df1$Risk <- c(rep(c("Very Low","Low","Moderate","High","Very High"),67),"Very High")
#Other data
df2 <- read_excel('Sample data.xlsx',2)
df3 <- read_excel('Sample data.xlsx',3)
#Split 1
L1 <- split(df1,df1$SITENAME)
L2 <- split(df2,df2$SITENAME)
L3 <- split(df3,df3$`Site Name`)
#Function to create plots
myplot <- function(x,y,z)
{
#Merge x and y
#Check for duplicates and avoid column
y <- y[!duplicated(paste(y$DATE,y$Sampling.Year)),]
y$SITENAME <- NULL
xy <- merge(x,y,by.x = c('Sampling.Year','DATE'),by.y = c('Sampling.Year','DATE'),all.x=T)
#Format to dates
xy$DATE <- as.Date(xy$DATE)
#Scale factor
scaleFactor <- max(xy$Daily.Ave.PAF) / max(xy$Height)
#Rename for consistency in names
names(z)[4] <- 'DATE'
#Format date
z$DATE <- as.Date(z$DATE)
#Plot
#Plot
G <- ggplot(xy, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
scale_x_date(breaks = "1 month", labels = scales::date_format("%b %Y")) +
geom_line(aes(x=DATE,y=Height*scaleFactor))+
scale_y_continuous(name="Total PAF (% affected)", sec.axis=sec_axis(~./scaleFactor, name="Water level (m)"))+
labs(x = "Month") +
geom_vline(data = z,aes(xintercept = DATE),linetype="dashed")+
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(xy$SITENAME))
return(G)
}
#Create a list of plots
Lplots <- mapply(FUN = myplot,x=L1,y=L2,z=L3,SIMPLIFY = FALSE)
#Now format names
vnames <- paste0(names(Lplots),'.png')
mapply(ggsave, Lplots,filename = vnames,width = 30,units = 'cm')
You will end up with plots like these saved in your dir:
Some dashed lines do not appear in plots because they were not present in the data you provided.

how to remove label in facet_wrap

I made a ggplot with my data. than I wanted to label some interactions between my boxplot-data (via anova/Tukey) to show, if there are signifikant differences between special Groups or not. I'm working with one csv-sheet called "test", that I have imported and with one tabel called "final"
[
[
but when I plot my data WITH the label from my TukeyHSD, I get every label triple with empty spaces.
how can i remove the empty spaces. there must be any solution to just show the first tree lines with label in the first plot, the middle tree lines in the middle plot with their specific label and the last tree lines in the third spot with label. How can I remove those empty lines with their alphateic labels?
Here my code:
test$int<-interaction(test$geno,test$n)
generate_label_df <- function(TUKEY, variable){
Tukey.levels <- TUKEY[[variable]][,4]
Tukey.labels <- data.frame(multcompLetters(Tukey.levels)['Letters'])
Tukey.labels$int=rownames(Tukey.labels)
Tukey.labels=Tukey.labels[order(Tukey.labels$int) , ]
return(Tukey.labels)
}
model=lm(test$amino~test$int ) ###
ANOVA=aov(model)
TUKEY <- TukeyHSD(x=ANOVA, 'test$int', conf.level=0.95)
labels<-generate_label_df(TUKEY , "test$int")
names(labels)<-c('Letters','int')
yvalue<-aggregate(amino~int, data=test, max)
final<-merge(labels,yvalue)
ggplot(test, aes(x=int, y=amino)) +
stat_boxplot(geom="errorbar") +
geom_boxplot(aes(fill=CO2)) +
geom_text(data = final, aes(label = Letters, vjust = -0.6)) +
labs(title = "Aminosäuren Erdkultur", x = "Genotyp", y = "Aminosäurekonz. [µmol/gFW]") +
scale_x_discrete(labels = c("col.Ammonium" = "col", "sps.Ammonium" = "sps",
"swe.Ammonium" = "swe", "col.Nitrat" = "col",
"sps.Nitrat" = "sps", "swe.Nitrat" = "swe",
"col.NON" = "col", "sps.NON" = "sps","swe.NON" = "swe"),
limits = c("col.Ammonium", "sps.Ammonium", "swe.Ammonium",
"col.Nitrat", "sps.Nitrat","swe.Nitrat", "col.NON",
"sps.NON", "swe.NON") )+
guides(fill = guide_legend(title = "CO2-Behandlung")) +
theme(plot.title = element_text(face = "bold", size="17", hjust = "0.5")) +
scale_fill_manual(values = c("violetred3", "steelblue1")) +
facet_wrap( ~ n) +
theme(legend.title = element_text(face = "bold"))
my first opinion was, to remove "facet_wrap(~n)". The Problem is solved than, but obviously I'm missing the subdivision on this way

R: Pie chart with percentage as labels using ggplot2

From a data frame I want to plot a pie chart for five categories with their percentages as labels in the same graph in order from highest to lowest, going clockwise.
My code is:
League<-c("A","B","A","C","D","E","A","E","D","A","D")
data<-data.frame(League) # I have more variables
p<-ggplot(data,aes(x="",fill=League))
p<-p+geom_bar(width=1)
p<-p+coord_polar(theta="y")
p<-p+geom_text(data,aes(y=cumsum(sort(table(data)))-0.5*sort(table(data)),label=paste(as.character(round(sort(table(data))/sum(table(data)),2)),rep("%",5),sep="")))
p
I use
cumsum(sort(table(data)))-0.5*sort(table(data))
to place the label in the corresponding portion and
label=paste(as.character(round(sort(table(data))/sum(table(data)),2)),rep("%",5),sep="")
for the labels which is the percentages.
I get the following output:
Error: ggplot2 doesn't know how to deal with data of class uneval
I've preserved most of your code. I found this pretty easy to debug by leaving out the coord_polar... easier to see what's going on as a bar graph.
The main thing was to reorder the factor from highest to lowest to get the plotting order correct, then just playing with the label positions to get them right. I also simplified your code for the labels (you don't need the as.character or the rep, and paste0 is a shortcut for sep = "".)
League<-c("A","B","A","C","D","E","A","E","D","A","D")
data<-data.frame(League) # I have more variables
data$League <- reorder(data$League, X = data$League, FUN = function(x) -length(x))
at <- nrow(data) - as.numeric(cumsum(sort(table(data)))-0.5*sort(table(data)))
label=paste0(round(sort(table(data))/sum(table(data)),2) * 100,"%")
p <- ggplot(data,aes(x="", fill = League,fill=League)) +
geom_bar(width = 1) +
coord_polar(theta="y") +
annotate(geom = "text", y = at, x = 1, label = label)
p
The at calculation is finding the centers of the wedges. (It's easier to think of them as the centers of bars in a stacked bar plot, just run the above plot without the coord_polar line to see.) The at calculation can be broken out as follows:
table(data) is the number of rows in each group, and sort(table(data)) puts them in the order they'll be plotted. Taking the cumsum() of that gives us the edges of each bar when stacked on top of each other, and multiplying by 0.5 gives us the half the heights of each bar in the stack (or half the widths of the wedges of the pie).
as.numeric() simply ensures we have a numeric vector rather than an object of class table.
Subtracting the half-widths from the cumulative heights gives the centers each bar when stacked up. But ggplot will stack the bars with the biggest on the bottom, whereas all our sort()ing puts the smallest first, so we need to do nrow - everything because what we've actually calculate are the label positions relative to the top of the bar, not the bottom. (And, with the original disaggregated data, nrow() is the total number of rows hence the total height of the bar.)
Preface: I did not make pie charts of my own free will.
Here's a modification of the ggpie function that includes percentages:
library(ggplot2)
library(dplyr)
#
# df$main should contain observations of interest
# df$condition can optionally be used to facet wrap
#
# labels should be a character vector of same length as group_by(df, main) or
# group_by(df, condition, main) if facet wrapping
#
pie_chart <- function(df, main, labels = NULL, condition = NULL) {
# convert the data into percentages. group by conditional variable if needed
df <- group_by_(df, .dots = c(condition, main)) %>%
summarize(counts = n()) %>%
mutate(perc = counts / sum(counts)) %>%
arrange(desc(perc)) %>%
mutate(label_pos = cumsum(perc) - perc / 2,
perc_text = paste0(round(perc * 100), "%"))
# reorder the category factor levels to order the legend
df[[main]] <- factor(df[[main]], levels = unique(df[[main]]))
# if labels haven't been specified, use what's already there
if (is.null(labels)) labels <- as.character(df[[main]])
p <- ggplot(data = df, aes_string(x = factor(1), y = "perc", fill = main)) +
# make stacked bar chart with black border
geom_bar(stat = "identity", color = "black", width = 1) +
# add the percents to the interior of the chart
geom_text(aes(x = 1.25, y = label_pos, label = perc_text), size = 4) +
# add the category labels to the chart
# increase x / play with label strings if labels aren't pretty
geom_text(aes(x = 1.82, y = label_pos, label = labels), size = 4) +
# convert to polar coordinates
coord_polar(theta = "y") +
# formatting
scale_y_continuous(breaks = NULL) +
scale_fill_discrete(name = "", labels = unique(labels)) +
theme(text = element_text(size = 22),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank())
# facet wrap if that's happening
if (!is.null(condition)) p <- p + facet_wrap(condition)
return(p)
}
Example:
# sample data
resps <- c("A", "A", "A", "F", "C", "C", "D", "D", "E")
cond <- c(rep("cat A", 5), rep("cat B", 4))
example <- data.frame(resps, cond)
Just like a typical ggplot call:
ex_labs <- c("alpha", "charlie", "delta", "echo", "foxtrot")
pie_chart(example, main = "resps", labels = ex_labs) +
labs(title = "unfacetted example")
ex_labs2 <- c("alpha", "charlie", "foxtrot", "delta", "charlie", "echo")
pie_chart(example, main = "resps", labels = ex_labs2, condition = "cond") +
labs(title = "facetted example")
It worked on all included function greatly inspired from here
ggpie <- function (data)
{
# prepare name
deparse( substitute(data) ) -> name ;
# prepare percents for legend
table( factor(data) ) -> tmp.count1
prop.table( tmp.count1 ) * 100 -> tmp.percent1 ;
paste( tmp.percent1, " %", sep = "" ) -> tmp.percent2 ;
as.vector(tmp.count1) -> tmp.count1 ;
# find breaks for legend
rev( tmp.count1 ) -> tmp.count2 ;
rev( cumsum( tmp.count2 ) - (tmp.count2 / 2) ) -> tmp.breaks1 ;
# prepare data
data.frame( vector1 = tmp.count1, names1 = names(tmp.percent1) ) -> tmp.df1 ;
# plot data
tmp.graph1 <- ggplot(tmp.df1, aes(x = 1, y = vector1, fill = names1 ) ) +
geom_bar(stat = "identity", color = "black" ) +
guides( fill = guide_legend(override.aes = list( colour = NA ) ) ) +
coord_polar( theta = "y" ) +
theme(axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text( colour = "black"),
axis.title = element_blank(),
plot.title = element_text( hjust = 0.5, vjust = 0.5) ) +
scale_y_continuous( breaks = tmp.breaks1, labels = tmp.percent2 ) +
ggtitle( name ) +
scale_fill_grey( name = "") ;
return( tmp.graph1 )
} ;
An example :
sample( LETTERS[1:6], 200, replace = TRUE) -> vector1 ;
ggpie(vector1)
Output

Resources