How can I show empty bins on the plot in r? - r

I have a dataframe df with Genderand Results columns
x <- df$Results
cuts <- cut(x, breaks = seq(min(x),max(x),(max(x)-min(x))/15) , include.lowest = TRUE, right = TRUE,dig.lab = 5)
df$cuts <- mgsub::mgsub(as.character(cuts),pattern =c("\\[","\\]","\\(","\\)",",") ,replacement = c("","","",""," - "))
df$cuts <- factor(df$cuts,levels = unique(df$cuts))
df$Gender <- factor(df$Gender,levels = c("Men","Women"))
hist.plot <- function(df) {ggplot(df,aes(cuts,fill =Gender)) +
geom_bar(position = position_dodge(preserve = "single"),alpha=0.8,color="black",width = 0.5) +
theme(axis.text.x = element_text(face="bold",color="#993333", size=8, angle=45,vjust=0.6),
axis.text.y = element_text(face="bold", color="#993333", size=8, angle=45)) +
theme(plot.title = element_text(hjust = 0.5))
}
hist.plot(df)
There is no data in 918.8 - 989.9 bin and as you can see I can't show that it has 0 count on the plot
And in 1061 - 1132.1 bin the data belongs to women but appear on the left hand side instead right hand side.
The thing that I should change is about cut function or ggplot ?
Thank you for your help in advance.

Related

Annotation label won't show when passed as a grob in annotation_custom in r (also trying to put it at the bottom right corner)

I am trying to automatically put a custom annotation in the bottom right corner of a plot no matter the actual axes range.
I have tried to do so with annotate from ggplot2 but it just didn't work.
I am trying to work with annotation_custom from the grid package instead.
My code is long so I won't post all of it here, but rather the main problematic lines imo:
EDIT: I am adding a small dataframe for reproducibility
df <- data.frame(col.a = c(1:5), col.b = c(23.3,5.2,61.0,9.0,3.25))
# correlation calculation
cor.result = df %>% cor.test(col.a, col.b,
method = "spearman",
na.action=na.omit,
exact = FALSE)
corr.label <- sprintf("r = %.3f\np = %g\n%s", cor.result$estimate,
cor.result$p.value, "spearman")
The result is something like:
"r = -0.853\np = 0.003\nspearman"
Then I create a plot:
ttl = "Scatter Plot" # The title and subtitles are different in my code.
sub.ttl = "sample id: patient zero"
p <- df %>% ggplot(aes(x = col.a, y = col.b) +
geom_smooth(color = "steelblue3", method = lm, formula = y ~ x) +
geom_abline(aes(intercept=0, slope=1), color = 'grey45') +
geom_point(color = "steelblue4", alpha = 0.5, size = 3) +
labs(x = "HUMANnN2", y = "HUMAnN3",
title = ttl,
subtitle = sub.ttl) +
theme(text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 16),
plot.subtitle = element_text(hjust = 0.5, size = 14))
And try to add an annotation:
grob <- grobTree(textGrob(label = corr.label, x = 0.8, y = 0.3))
p <- p + annotation_custom(grob)
The result is as follows:
I did manage to add an annotation at the upper left corner with:
p <- p + annotation_custom(corr.label)
Which gives:
Yes, it has to be at the bottom right corner.
The annotation does show up when I switch corr.label with just a string of "hello". My guess is that grob doesn't pass newline characters accordingly.

How to set in order a secondary x-axis in ggraph / ggplot2?

I think I have lost my weekend dealing with this error for this I decided to post my code
headache.
I have this matrix
library(tidyverse)
library(tidygraph)
library(ggraph)
library(scales)
mat <- matrix(c(10,10,0,2,5,0,1,
10,9,1,2,6,0,1,
10,9,1,1,5,1,1,
10,10,1,2,6,1,1,
10,10,2,6,7,1,1,
10,9,7,6,7,3,2,
10,10,2,6,7,1,1,
10,7,2,5,7,1,1,
10,10,2,5,7,1,1,
10,9,2,5,7,1,1,
10,10,2,6,7,1,1), nrow=11, ncol=7, byrow = TRUE)
colnames(mat) <- c(0,12,40,50,60,70,90)
rownames(mat) <- c(12,20,30,40,50,60,70,80,90,100,120)
and I plot it as bipartite network with ggraph and then I am taking a completely distorted/crazy x-axis.
(layout <- create_layout(mat, "bipartite"))
ggraph(layout) +
geom_edge_link0(aes(edge_width = weight),edge_colour = "black", alpha=0.9) +
scale_edge_width(range = c(1, 10)) +
geom_node_point(aes(shape = type, colour = type), size = 10) +
scale_colour_manual(values = c("#02BEC4", "#45BF44")) +
scale_shape_manual(values = c(15, 19)) +
scale_x_continuous(
breaks = layout$x[!layout$type],
labels = layout$name[!layout$type],
position = "top",
sec.axis = dup_axis(
breaks = layout$x[layout$type],
labels = layout$name[layout$type],
)
) +
theme(axis.text = element_text())
Why my x-axis can not start from 0?
I want to see in the x-axis at the bottom the following order,
0,12,40,50,60,70,90 and not 90,12,60,50,0,40,70
I would highly appreciate any help in the topic.
footnote: Many thanks to Martin Gal for his comment

how do i combine multiple data sources in ggplot using split and sapply?

this question is linked to a previous one answered by #Rui Barradas and #Duck, but i need more help. Previous link here:
how do i vectorise (automate) plot creation in R
Basically, I need to combine 3 datasets into one plot with a secondary y axis. All datasets need to be split by SITENAME and will facet wrap by Sampling.Year. I am using split and sapply. Being facet wrap the plots look something like this:
However, i'm now trying to add the two other data sources into the plots, to look something like this:
But i am struggling to add the two other data sources and get them to split by SITENAME. Her is my code so far...
Record plot format as a function to be applied to a split list df (ideally 'df' would be added as geom_line with a secondary y axis, and 'FF_start_dates' will be added as a vertical dashed line):
SITENAME_plot <- function(AllDates_TPAF){
ggplot(AllDates_TPAF, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
labs(x = "Month", y = "Total PAF (% affected)") +
scale_x_date(breaks = "1 month", labels = scales::date_format("%B")) +
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
scale_y_continuous(limits = c(0, 100), sec.axis = sec_axis(~., name = "Water level (m)")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(AllDates_TPAF$SITENAME))
}
plot write function:
SITENAME_plot_write <- function(name, g, dir = "N:/abc/"){
flname <- file.path(dir, name)
flname <- paste0(flname, ".jpg")
png(filename = flname, width = 1500, height = 1000)
print(g)
dev.off()
flname
}
Apply function to list split by SITENAME:
sp1 <- split(AllDates_TPAF, AllDates_TPAF$SITENAME)
gg_list <- sapply(sp1, SITENAME_plot, simplify = FALSE)
mapply(SITENAME_plot_write, names(gg_list), gg_list, MoreArgs = list(dir = getwd()))
dev.off()
I have uploaded samples of all 3 datasets here: Sample Data
Apologies for not using gsub but there was too much data and I couldn't get it to work properly
thanks in advance for any help you can give, even if it is just to point me towards a web tutorial of some kind.
You can try next code. I used the data you shared. Just be careful with names of all datasets. Ideally, the key columns as DATE and Sampling.Year should be present in all dataframes before making the split. Also some variables as Risk was absent so I added an example var with same name. Here the code, I added a function for the plot you want:
library(tidyverse)
library(readxl)
#Data
df1 <- read_excel('Sample data.xlsx',1)
#Create var
df1$Risk <- c(rep(c("Very Low","Low","Moderate","High","Very High"),67),"Very High")
#Other data
df2 <- read_excel('Sample data.xlsx',2)
df3 <- read_excel('Sample data.xlsx',3)
#Split 1
L1 <- split(df1,df1$SITENAME)
L2 <- split(df2,df2$SITENAME)
L3 <- split(df3,df3$`Site Name`)
#Function to create plots
myplot <- function(x,y,z)
{
#Merge x and y
#Check for duplicates and avoid column
y <- y[!duplicated(paste(y$DATE,y$Sampling.Year)),]
y$SITENAME <- NULL
xy <- merge(x,y,by.x = c('Sampling.Year','DATE'),by.y = c('Sampling.Year','DATE'),all.x=T)
#Format to dates
xy$DATE <- as.Date(xy$DATE)
#Scale factor
scaleFactor <- max(xy$Daily.Ave.PAF) / max(xy$Height)
#Rename for consistency in names
names(z)[4] <- 'DATE'
#Format date
z$DATE <- as.Date(z$DATE)
#Plot
#Plot
G <- ggplot(xy, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
scale_x_date(breaks = "1 month", labels = scales::date_format("%b %Y")) +
geom_line(aes(x=DATE,y=Height*scaleFactor))+
scale_y_continuous(name="Total PAF (% affected)", sec.axis=sec_axis(~./scaleFactor, name="Water level (m)"))+
labs(x = "Month") +
geom_vline(data = z,aes(xintercept = DATE),linetype="dashed")+
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(xy$SITENAME))
return(G)
}
#Create a list of plots
Lplots <- mapply(FUN = myplot,x=L1,y=L2,z=L3,SIMPLIFY = FALSE)
#Now format names
vnames <- paste0(names(Lplots),'.png')
mapply(ggsave, Lplots,filename = vnames,width = 30,units = 'cm')
You will end up with plots like these saved in your dir:
Some dashed lines do not appear in plots because they were not present in the data you provided.

Need help on ggplot in R, I am getting an error: 'argument "x" is missing, with no default'

[enter image description here][1]I am trying to create a lowry plot in R but am having difficulty debugging the errors returned. I am using the following code to create the plot:
library(ggplot2)
library(reshape)
m_xylene_data <- data.frame(
Parameter = c(
"BW", "CRE", "DS", "KM", "MPY", "Pba", "Pfaa",
"Plia", "Prpda", "Pspda", "QCC", "QfaC", "QliC",
"QPC", "QspdC", "Rurine", "Vfac", "VliC", "Vmax"),
"Main Effect" = c(
1.03E-01, 9.91E-02, 9.18E-07, 3.42E-02, 9.27E-3, 2.82E-2, 2.58E-05,
1.37E-05, 5.73E-4, 2.76E-3, 6.77E-3, 8.67E-05, 1.30E-02,
1.19E-01, 4.75E-04, 5.25E-01, 2.07E-04, 1.73E-03, 1.08E-03),
Interaction = c(
1.49E-02, 1.43E-02, 1.25E-04, 6.84E-03, 3.25E-03, 7.67E-03, 8.34E-05,
1.17E-04, 2.04E-04, 7.64E-04, 2.84E-03, 8.72E-05, 2.37E-03,
2.61E-02, 6.68E-04, 4.57E-02, 1.32E-04, 6.96E-04, 6.55E-04
)
)
fortify_lowry_data <- function(data,
param_var = "Parameter",
main_var = "Main.Effect",
inter_var = "Interaction")
{
#Convert wide to long format
mdata <- melt(data, id.vars = param_var)
#Order columns by main effect and reorder parameter levels
o <- order(data[, main_var], decreasing = TRUE)
data <- data[o, ]
data[, param_var] <- factor(
data[, param_var], levels = data[, param_var]
)
#Force main effect, interaction to be numeric
data[, main_var] <- as.numeric(data[, main_var])
data[, inter_var] <- as.numeric(data[, inter_var])
#total effect is main effect + interaction
data$.total.effect <- rowSums(data[, c(main_var, inter_var)])
#Get cumulative totals for the ribbon
data$.cumulative.main.effect <- cumsum(data[, main_var])
data$.cumulative.total.effect <- cumsum(data$.total.effect)
#A quirk of ggplot2 means we need x coords of bars
data$.numeric.param <- as.numeric(data[, param_var])
#The other upper bound
#.maximum = 1 - main effects not included
data$.maximum <- c(1 - rev(cumsum(rev(data[, main_var])))[-1], 1)
data$.valid.ymax <- with(data,
pmin(.maximum, .cumulative.total.effect)
)
mdata[, param_var] <- factor(
mdata[, param_var], levels = data[, param_var]
)
list(data = data, mdata = mdata)
}
lowry_plot <- function(data,
param_var = "Parameter",
main_var = "Main.Effect",
inter_var = "Interaction",
x_lab = "Parameters",
y_lab = "Total Effects (= Main Effects + Interactions)",
ribbon_alpha = 0.5,
x_text_angle = 25)
{
#Fortify data and dump contents into plot function environment
data_list <- fortify_lowry_data(data, param_var, main_var, inter_var)
list2env(data_list, envir = sys.frame(sys.nframe()))
p <- ggplot(data) +
geom_bar(aes_string(x = param_var, y = "value", fill = "variable"),
data = mdata) +
geom_ribbon(
aes(x = .numeric.param, ymin = .cumulative.main.effect, ymax =
.valid.ymax),
data = data,
alpha = ribbon_alpha) +
xlab(x_lab) +
ylab(y_lab) +
scale_y_continuous(labels = "percent") +
theme(axis.text.x = text(angle = x_text_angle, hjust = 1)) +
scale_fill_grey(end = 0.5) +
theme(legend.position = "top",
legend.title =blank(),
legend.direction = "horizontal"
)
p
}
m_xylene_lowry <- lowry_plot(m_xylene_data)
When I run the code, it is giving me the following error:
Error: argument "x" is missing, with no default
It is not specific enough for me to know what the issue is. What is causing the error to be displayed and how can I make error statements more verbose?
Lowry PLOT
It seems that you have more than one faulty element in your code than just the error it throws. In my experience it always helps to first check whether the code works as expected before putting it into a function. The plotting-part below should work:
p <- ggplot(data) + # no need to give data here, if you overwrite it anyway blow, but does not affect outcome...
# geom_bar does the counting but does not take y-value. Use geom_col:
geom_col(aes_string(x = param_var, y = "value", fill = "variable"),
data = mdata,
position = position_stack(reverse = TRUE)) +
geom_ribbon(
aes(x = .numeric.param, ymin = .cumulative.main.effect, ymax =
.valid.ymax),
data = data,
alpha = ribbon_alpha) +
xlab(x_lab) +
ylab(y_lab) +
# use scales::percent_format():
scale_y_continuous(labels = scales::percent_format()) +
# text is not an element you can use here, use element_text():
theme(axis.text.x = element_text(angle = x_text_angle, hjust = 1)) +
scale_fill_grey(end = 0.5) +
# use element_blank(), not just blank()
theme(legend.position = "top",
legend.title = element_blank(),
legend.direction = "horizontal"
)
This at least plots something, but I'm not sure whether it is what you expect it to do. It would help if you could show the desired output.
Edit:
Added position = position_stack(reverse = TRUE) to order according to sample plot.

Arrange graph, table and legend in a single grid

I need to print in Word a graph with a table at the bottom which indicates the legend used in the graph. I have the 3 elements separated and I need to put it together in a single grid.
I use grid.arrange to try but the size of each element differs, like here
There is a way to put them all together so years matches in x-axis graph and column table and each row of the table match each label from the legend.?
As a second question, it is possible to put the Total line in the legend?
Any help would be appreciated.
The code I'm using:
VARIABLE<-c("OFERT_PLP","OFERT_TP","OFERT_LIC")
X2007<-as.vector(matrix1) #matrices A-D contain randomly generated NA and numerical values
X2008<-as.vector(matrix2)
X2009<-as.vector(matrix3)
X2010<-as.vector(matrix4)
X2011<-as.vector(matrix5)
X2012<-as.vector(matrix6)
X2013<-as.vector(matrix7)
X2014<-as.vector(matrix8)
X2015<-as.vector(matrix9)
X2016<-as.vector(matrix10)
orden<-c(1,2,3)
ETIQUETA<-c("Pregrado","Postitulo","Postgrado")
df<-as.data.frame(cbind(VARIABLE,X2007,X2008,X2009,X2010,X2011,X2012,X2013,X2014,X2015,X2016,orden,ETIQUETA))
df<-df[,colnames(df)!="ETIQUETA"]
dfm1<-melt(df,id.vars = c("VARIABLE","orden"), variable.name = "ANO", value.name = "VALOR")
dfm <- dfm1 %>% group_by(ANO,VARIABLE,VALOR) %>%
group_by(ANO) %>%
mutate(pct = (VALOR / sum(VALOR,na.rm=TRUE)),
total=sum(VALOR,na.rm=TRUE),
cumsum=cumsum(VALOR),
npos = total-cumsum+0.5*VALOR)
d<-as.data.frame(rbind(df[,paste0("X",anos)],colSums(df[,paste0("X",anos)],na.rm=TRUE)))
p1<-ggplot(dfm, aes(x=ANO, y=VALOR, fill=orden)) +
geom_bar(stat="identity")+ geom_line(aes(y=total,group=1,linetype="Total"))+ scale_fill_brewer()
none <- element_blank()
p2 <- p1 + theme_bw() +
scale_x_discrete(labels=anos) +
labs(panel.grid.major = none, panel.grid.minor = none) +
labs(panel.background = none) +
labs(panel.border = none) +
xlab(NULL) + ylab(NULL)+
background_grid(major = "y", minor = "none" )+
geom_text(aes(label=ifelse(pct==0,"",paste0(sprintf("%1.0f",pct*100),"%")),y=npos), colour="black",size=3)
p3<-p2 + theme(legend.title=none,legend.position = "left",legend.text=element_text(size=5),
legend.key.height=unit(0.8,"line"), axis.text = element_text(size = 8)) + guides(linetype=FALSE)
legend <- get_legend(p3)
p3<-p3+theme(legend.position = "none")
pg <- ggplotGrob(p3)
mythemegra <- gridExtra::ttheme_default(
core = list(fg_params=list(cex = 0.8)),
colhead = list(fg_params=list(cex = 0.8)),
rowhead = list(fg_params=list(cex = 0.8)),parse = "TRUE")
table<-tableGrob(d,rows = NULL,col=NULL,
theme = mythemegra)
blankPlot <- ggplot()+geom_blank(aes(1,1)) + cowplot::theme_nothing()
grid.arrange(arrangeGrob(blankPlot,pg,top=textGrob("Title",gp=gpar(fontsize=12,font=2,col="black"),just = "centre"),
nrow=1,ncol=2,widths = c(1,5)),
arrangeGrob(legend,table,nrow=1,ncol=2,widths = c(1,4.5)),heights=c(6.5,6.5))

Resources