Showing median value in grouped boxplot in R - r

I have created boxplots using ggplot2 with this code.
plotgraph <- function(x, y, colour, min, max)
{
plot1 <- ggplot(dims, aes(x = x, y = y, fill = Region)) +
geom_boxplot()
#plot1 <- plot1 + scale_x_discrete(name = "Blog Type")
plot1 <- plot1 + labs(color='Region') + geom_hline(yintercept = 0, alpha = 0.4)
plot1 <- plot1 + scale_y_continuous(breaks=c(seq(min,max,5)), limits = c(min, max))
plot1 <- plot1 + labs(x="Blog Type", y="Dimension Score") + scale_fill_grey(start = 0.3, end = 0.7) + theme_grey()
plot1 <- plot1 + theme(legend.justification = c(1, 1), legend.position = c(1, 1))
return(plot1)
}
plot1 <- plotgraph (Blog, Dim1, Region, -30, 25)
A part of data I use is reproduced here.
Blog,Region,Dim1,Dim2,Dim3,Dim4
BlogsInd.,PK,-4.75,13.47,8.47,-1.29
BlogsInd.,PK,-5.69,6.08,1.51,-1.65
BlogsInd.,PK,-0.27,6.09,0.03,1.65
BlogsInd.,PK,-2.76,7.35,5.62,3.13
BlogsInd.,PK,-8.24,12.75,3.71,3.78
BlogsInd.,PK,-12.51,9.95,2.01,0.21
BlogsInd.,PK,-1.28,7.46,7.56,2.16
BlogsInd.,PK,0.95,13.63,3.01,3.35
BlogsNews,PK,-5.96,12.3,6.5,1.49
BlogsNews,PK,-8.81,7.47,4.76,1.98
BlogsNews,PK,-8.46,8.24,-1.07,5.09
BlogsNews,PK,-6.15,0.9,-3.09,4.94
BlogsNews,PK,-13.98,10.6,4.75,1.26
BlogsNews,PK,-16.43,14.49,4.08,9.91
BlogsNews,PK,-4.09,9.88,-2.79,5.58
BlogsNews,PK,-11.06,16.21,4.27,8.66
BlogsNews,PK,-9.04,6.63,-0.18,5.95
BlogsNews,PK,-8.56,7.7,0.71,4.69
BlogsNews,PK,-8.13,7.26,-1.13,0.26
BlogsNews,PK,-14.46,-1.34,-1.17,14.57
BlogsNews,PK,-4.21,2.18,3.79,1.26
BlogsNews,PK,-4.96,-2.99,3.39,2.47
BlogsNews,PK,-5.48,0.65,5.31,6.08
BlogsNews,PK,-4.53,-2.95,-7.79,-0.81
BlogsNews,PK,6.31,-9.89,-5.78,-5.13
BlogsTech,PK,-11.16,8.72,-5.53,8.86
BlogsTech,PK,-1.27,5.56,-3.92,-2.72
BlogsTech,PK,-11.49,0.26,-1.48,7.09
BlogsTech,PK,-0.9,-1.2,-2.03,-7.02
BlogsTech,PK,-12.27,-0.07,5.04,8.8
BlogsTech,PK,6.85,1.27,-11.95,-10.79
BlogsTech,PK,-5.21,-0.89,-6,-2.4
BlogsTech,PK,-1.06,-4.8,-8.62,-2.42
BlogsTech,PK,-2.6,-4.58,-2.07,-3.25
BlogsTech,PK,-0.95,2,-2.2,-3.46
BlogsTech,PK,-0.82,7.94,-4.95,-5.63
BlogsTech,PK,-7.65,-5.59,-3.28,-0.54
BlogsTech,PK,0.64,-1.65,-2.36,-2.68
BlogsTech,PK,-2.25,-3,-3.92,-4.87
BlogsTech,PK,-1.58,-1.42,-0.38,-5.15
Columns,PK,-5.73,3.26,0.81,-0.55
Columns,PK,0.37,-0.37,-0.28,-1.56
Columns,PK,-5.46,-4.28,2.61,1.29
Columns,PK,-3.48,2.38,12.87,3.73
Columns,PK,0.88,-2.24,-1.74,3.65
Columns,PK,-2.11,4.51,8.95,2.47
Columns,PK,-10.13,10.73,9.47,-0.47
Columns,PK,-2.08,1.04,0.11,0.6
Columns,PK,-4.33,5.65,2,-0.77
Columns,PK,1.09,-0.24,-0.92,-0.17
Columns,PK,-4.23,-4.01,-2.32,6.26
Columns,PK,-1.46,-1.53,9.83,5.73
Columns,PK,9.37,-1.32,1.27,-4.12
Columns,PK,5.84,-2.42,-5.21,1.07
Columns,PK,8.21,-9.36,-5.87,-3.21
Columns,PK,7.34,-7.3,-2.94,-5.86
Columns,PK,1.83,-2.77,1.47,-4.02
BlogsInd.,PK,14.39,-0.55,-5.42,-4.7
BlogsInd.,US,22.02,-1.39,2.5,-3.12
BlogsInd.,US,4.83,-3.58,5.34,9.22
BlogsInd.,US,-3.24,2.83,-5.3,-2.07
BlogsInd.,US,-5.69,15.17,-14.27,-1.62
BlogsInd.,US,-22.92,4.1,5.79,-3.88
BlogsNews,US,0.41,-2.03,-6.5,2.81
BlogsNews,US,-4.42,8.49,-8.04,2.04
BlogsNews,US,-10.72,-4.3,3.75,11.74
BlogsNews,US,-11.29,2.01,0.67,8.9
BlogsNews,US,-2.89,0.08,-1.59,7.06
BlogsNews,US,-7.59,8.51,3.02,12.33
BlogsNews,US,-7.45,23.51,2.79,0.48
BlogsNews,US,-12.49,15.79,-9.86,18.29
BlogsTech,US,-11.59,6.38,11.79,-7.28
BlogsTech,US,-4.6,4.12,7.46,3.36
BlogsTech,US,-22.83,2.54,10.7,5.09
BlogsTech,US,-4.83,3.37,-8.12,-0.9
BlogsTech,US,-14.76,29.21,6.23,9.33
Columns,US,-15.93,12.85,19.47,-0.88
Columns,US,-2.78,-1.52,8.16,0.24
Columns,US,-16.39,13.08,11.07,7.56
Even though I have tried to add detailed scale on y-axis, it is hard for me to pinpoint exact median score for each boxplot. So I need to print median value within each boxplot. There was another answer available (for faceted boxplot) which does not work for me as the printed values are not within the boxes but jammed together in the middle. It will be great to be able to print them within (middle and above the median line of) boxplots.
Thanks for your help.
Edit: I make a grouped graph as below.
Add

library(dplyr)
dims=dims%>%
group_by(Blog,Region)%>%
mutate(med=median(Dim1))
plotgraph <- function(x, y, colour, min, max)
{
plot1 <- ggplot(dims, aes(x = x, y = y, fill = Region)) +
geom_boxplot()+
labs(color='Region') +
geom_hline(yintercept = 0, alpha = 0.4)+
scale_y_continuous(breaks=c(seq(min,max,5)), limits = c(min, max))+
labs(x="Blog Type", y="Dimension Score") + scale_fill_grey(start = 0.3, end = 0.7) +
theme_grey()+
theme(legend.justification = c(1, 1), legend.position = c(1, 1))+
geom_text(aes(y = med,x=x, label = round(med,2)),position=position_dodge(width = 0.8),size = 3, vjust = -0.5,colour="blue")
return(plot1)
}
plot1 <- plotgraph (Blog, Dim1, Region, -30, 25)
Which gives (the text colour can be tweaked to something less tacky):
Note: You should consider using non-standard evaluation in your function rather than having it require the use of attach()
Edit:
One liner, not as clean I wanted it to be since I ran into problems with dplyr not properly aggregating the data even though it says the grouping was performed.
This function assume the dataframe is always called dims
library(ggplot2)
library(reshape2)
plotgraph <- function(x, y, colour, min, max)
{
plot1 <- ggplot(dims, aes_string(x = x, y = y, fill = colour)) +
geom_boxplot()+
labs(color=colour) +
geom_hline(yintercept = 0, alpha = 0.4)+
scale_y_continuous(breaks=c(seq(min,max,5)), limits = c(min, max))+
labs(x="Blog Type", y="Dimension Score") +
scale_fill_grey(start = 0.3, end = 0.7) +
theme_grey()+
theme(legend.justification = c(1, 1), legend.position = c(1, 1))+
geom_text(data= melt(with(dims, tapply(eval(parse(text=y)),list(eval(parse(text=x)),eval(parse(text=colour))), median)),varnames=c("Blog","Region"),value.name="med"),
aes_string(y = "med",x=x, label = "med"),position=position_dodge(width = 0.8),size = 3, vjust = -0.5,colour="blue")
return(plot1)
}
plot1 <- plotgraph ("Blog", "Dim1", "Region", -30, 25)

Assuming that Blog is your dataframe, the following should work:
min <- -30
max <- 25
meds <- aggregate(Dim1~Region, Blog, median)
plot1 <- ggplot(Blog, aes(x = Region, y = Dim1, fill = Region)) +
geom_boxplot()
plot1 <- plot1 + labs(color='Region') + geom_hline(yintercept = 0, alpha = 0.4)
plot1 <- plot1 + scale_y_continuous(breaks=c(seq(min,max,5)), limits = c(min, max))
plot1 <- plot1 + labs(x="Blog Type", y="Dimension Score") + scale_fill_grey(start = 0.3, end = 0.7) + theme_grey()
plot1 + theme(legend.justification = c(1, 1), legend.position = c(1, 1)) +
geom_text(data = meds, aes(y = Dim1, label = round(Dim1,2)),size = 5, vjust = -0.5, color='white')

Related

ggplot trying to make a Cleveland plot but I cannot get a legend

library(ggplot2)
library(ggthemes)
data <- read.csv('/Users/zbhay/Documents/r-data.csv', header = 1)
zb <- ggplot(data) +
geom_segment( aes(x=x, xend=x, y=value1, yend=value2), color="black")+
geom_point( aes(x=x, y=value1), color=rgb(0.2,0.7,0.1,1), size=4 )+
geom_point( aes(x=x, y=value2), color=rgb(0.7,0.2,0.1,1), size=4 )+
coord_flip() +
theme_solarized() +
scale_y_continuous(breaks = seq(0, 10000, by = 500)
)
zb + labs(title = "Title",
subtitle = "subtitle") +
xlab("Business Functions") +
ylab("# of hours")
legend("left", c("Starting", "Ending"),
box.col = "darkgreen"
)
Hello, here is the code. The CSV file is structured as follows; column A = names, column b = starting number, column c = final number.
I am trying to set up a legend that calls out the final number vs starting number. I have tried and tried but cannot seem to be able to crack it. If anyone knows a fix, I would appreciate it if you could let me know.
As a general rule when using ggplot2 you have to map on aesthetics if you want to get a legend, i.e. instead of setting the colors for your points as arguments map a value on the color aes, e.g. in my code below I map the constant value or category start on the color aes inside aes() for the first geom_point. Afterwards you could use scale_color_manual to assign your desired colors and labels to these "categories" or "values". Finally, the color of the legend box could be set via the theme option legend.background. However, the legend keys themselves have a background color too, which I set to NA via legend.key.
Using some fake random example data:
library(ggplot2)
library(ggthemes)
set.seed(123)
data <- data.frame(x = letters[1:5], value1 = runif(5, 0, 10000), value2 = runif(5, 0, 10000))
ggplot(data) +
geom_segment(aes(x = x, xend = x, y = value1, yend = value2), color = "black") +
geom_point(aes(x = x, y = value1, color = "start"), size = 4) +
geom_point(aes(x = x, y = value2, color = "end"), size = 4) +
coord_flip() +
theme_solarized() +
scale_y_continuous(breaks = seq(0, 10000, by = 500)) +
scale_color_manual(values = c(start = rgb(0.2, 0.7, 0.1, 1), end = rgb(0.7, 0.2, 0.1, 1)), labels = c(start = "Starting", end = "Ending")) +
labs(title = "Title", subtitle = "subtitle", x = "Business Functions", y = "# of hours", color = NULL) +
theme(
legend.key = element_rect(fill = NA),
legend.background = element_rect(fill = "darkgreen")
)

Polygon disappears when Plotly labels are defined - R

I am trying to plot a polygon hull using ggplot and plotly.
While without label polygons are shown in the plot, when I add extra labels in aesthetics the polygons disappear.
library(data.table)
library(ggplot2)
library(dplyr)
library(plotly)
df <- data.table(continent = c(rep("America",3), rep("Europe",4)),
state = c("USA", "Brasil", "Chile", "Italy", "Swiss", "Spain", "Greece"),
X = rnorm(7, 5, 1),
Y = rnorm(7, -13, 1)
)
df$X_sd = sd(df$X)
df$Y_sd = sd(df$Y)
hull2 <- df %>%
group_by(continent) %>%
slice(chull(X,Y))
p <- df %>%
ggplot( aes(x=X,
y=Y,
fill = continent,
color = continent,
label=state))+
geom_polygon(data = hull2,
lwd = 1,
alpha = 0.1,
linetype = "dashed")+
geom_errorbarh(aes(xmin = X - X_sd,
xmax = X + X_sd),
size = 0.5,
alpha = 0.3) +
geom_errorbar(aes(ymin = Y - Y_sd,
ymax = Y + Y_sd),
size = 0.5,
alpha = 0.3) +
geom_point(shape=21,
color="black",
size=3)+
theme_bw()+
theme(legend.position = "none")
ggplotly(p)
How odd! If you most label = state to the aes for the last geom_ you'll get the standard warning, but it works and the state shows up in the tooltip.
The designation of color = continent shows up, as well. I am going to guess that you're not interested in having that in your tooltip, so I've added how you could change that at the end. There is a tooltip with the continent listed two times, but with the information about how to remove the color, you'll see how you might make further adjustments depending on the trace.
p <- df %>%
ggplot(aes(x = X, y = Y,
fill = continent,
color = continent #,
# label = state)
)) +
geom_polygon(data = hull2, lwd = 1,
alpha = 0.1, linetype = "dashed") +
geom_errorbarh(aes(xmin = X - X_sd,
xmax = X + X_sd),
size = 0.5, alpha = 0.3) +
geom_errorbar(aes(ymin = Y - Y_sd,
ymax = Y + Y_sd),
size = 0.5, alpha = 0.3) +
geom_point(shape = 21,
color = "black",
size = 3, aes(label = state)) +
theme_bw() + theme(legend.position = "none")
p
ggplotly(p)
To remove the color from the tooltip, assign ggplotly to an object. Then you can remove the string from the 7th and 8th trace.
p1 = ggplotly(p)
lapply(7:8,
function(i){
p1$x$data[[i]]$text <<- stringr::str_replace(p1$x$data[[i]]$text,
"continent: black<br />",
"")
})
p1
FYI, there are 8 traces that make up your plot. The first trace has the double continent text.

Add legend for line plot with confidence interval (geom_ribbon) and density plot

I have a plot containing a line plot with confidence interval (geom_ribbon()) and an additional density plot of the underlying data (from a different data frame).
I would like to add a legend with three entries: for the line, the confidence interval, and the density.
Consider the following MWE: (in t1, ylo and yup mark the CI, y the line, and t2 contains the data for the density plot.
(I tried incorporating code from this question to no avail.)
library(tidyverse)
t1 <- tibble(x = c(0:22)) %>%
mutate(ylo = c(seq(from = .25, to = 1.35, length.out=10), seq(from = 1.35, to = -1.22, length.out=13)),
y = .25 * x + 1,
yup = c(seq(from = 2.75, to = 4.5, length.out=10), seq(from = 4.75, to = 12, length.out=13)))
t2 <- tibble(x = rnorm(100000, 10, 1))
ggplot() +
geom_line(data = t1, aes(x=x, y=y)) +
geom_ribbon(data = t1, aes(x=x, y=y, ymin=ylo, ymax=yup), linetype=2, alpha=.15) +
geom_hline(linetype='dotted', yintercept = 0) + labs(y = "Y", x = "X") + theme_bw() +
geom_density(data = t2, aes(x = x), color="darkblue", fill="lightblue", linetype="dashed") +
scale_color_manual(values = c("#000000", "grey60", "lightblue"), name = "Title") + # from other answer; couldn't get it to work
coord_cartesian(ylim = c(-0.0125, 12.5), xlim = c(-0.5, 22))
Many thanks for any pointers! :)
Legends appear only for aesthetics. You need to pass color, fill, linetype, alpha, etc. etc. etc into your call to aes.
You can use either columns programmatically, i.e. pass unquoted column names (usually preferred), but you can also just pass a character string, which will then create a discrete aesthetic, which you can then scale as usual with scale_?aes_...
Below I just added title and label to the character vectors just to make it clear where the names come from.
library(tidyverse)
t1 <- tibble(x = c(0:22)) %>%
mutate(ylo = c(seq(from = .25, to = 1.35, length.out=10), seq(from = 1.35, to = -1.22, length.out=13)),
y = .25 * x + 1,
yup = c(seq(from = 2.75, to = 4.5, length.out=10), seq(from = 4.75, to = 12, length.out=13)))
t2 <- tibble(x = rnorm(100000, 10, 1))
ggplot() +
geom_line(data = t1, aes(x=x, y=y, lty = "myline-label")) +
geom_ribbon(data = t1, aes(x=x, y=y, ymin=ylo, ymax=yup, fill = "MyCI-label"), linetype=2, alpha=.15) +
geom_hline(linetype='dotted', yintercept = 0) + labs(y = "Y", x = "X") +
geom_density(data = t2, aes(x = x, color="MyDens-label"), fill="lightblue", linetype="dashed") +
scale_linetype("Myline-title") +
scale_fill_discrete("MyCI-title") +
scale_color_manual("MyDens-title", values = c("Darkblue"))
Created on 2021-04-07 by the reprex package (v1.0.0)

How to customize a boxplot legend indicating mean, outliers, median, etc?

I have a boxplot and by my supervisor's advice I have to indicate the mean, outliers and median in the legend, like this image:
How can I do this using ggplot2?
library(ggplot2)
A <- 1:20
DF <- data.frame(A)
ggplot(data = DF) +
geom_boxplot(aes(x = "", y = A))
There is no straightforward way. But you could make a custom legend using another plot:
p <- ggplot(mtcars) +
geom_boxplot(aes(x = factor(cyl), y = mpg))
d1 <- data.frame(x = 1, y = c(1:1000, 1502))
d2 <- data.frame(
y = c(boxplot.stats(d1$y)$stats, 1502),
x = 1,
label = c('min', '1st quartile', 'median', '3rd quartile', 'max', 'outlier')
)
leg <- ggplot(d1, aes(x, y)) + geom_boxplot(width = 0.2) +
geom_text(aes(x = 1.15, label = label), d2, hjust = 0) +
xlim(0.9, 1.5) +
theme_void() + theme(panel.background = element_rect(fill = 'white', color = 1))
p + annotation_custom(ggplotGrob(leg), xmin = 3, xmax = 3.5, ymin = 25, ymax = 35)

How do I remove the _printed_ output warnings using ggplot2 with knitr

GGplot2 prints out a warning when using scale_colour_gradient twice in a plot, which I cannot suppress in knitr. Here is a screenshot of my browser after knitting an RHTML file:
I need to colour gradients, one for the line (cheap / dear of yield curve) and one for the instruments which is similar but subtly different (coloured dots).
Here is my ggplot code:
ggp <- ggplot(polys, aes(x = xvals, y = yvals)) +
#geom_polygon(aes(fill = - value, group = id, alpha = value)) + # lovely blue
geom_polygon(aes(fill = value, group = id, alpha = value)) + # lovely shiny light blue middle draw me in
scale_x_log10(breaks = xaxtickpos, minor_breaks = NULL) +
theme(legend.position = "none", panel.background = element_rect(fill = "grey85", colour = NA)) +
xlab("maturity") + ylab("bps")
ggp <- ggp + geom_line(data = quanmelt[quanmelt[, "percentile"] %in% outerthresh, ],
aes(x = mat, y = value, group = percentile), colour = "white", size = 2)
ggp <- ggp + geom_line(data = quanmelt[quanmelt[, "percentile"] %in% innerthresh, ],
aes(x = mat, y = value, group = percentile), colour = "white", size = 1,
linetype = "dotted")
#add last few days line/today (this doesn't work very well hence commented out)
todayback <- todayline[todayline$daysback == 2, ] # get this historic lines
ggp <- ggp + geom_smooth(data = todayback, aes(x = mat, y = value, group = daysback),
colour = "darkred", linetype = "dashed",
se = FALSE, size = 1, method = "loess", span = (ifelse(smooth, 0.3, 0.1)))
#add boxplot
ggp <- ggp + geom_boxplot(data = meltcdlong, aes(x = mat, y = value, group = bond), outlier.size = NA,
colour = "grey30", alpha = 0.5, size = 0.2, width = 0.025)
# add the latest point
ggp <- ggp + geom_point(data = latestcdpoint, aes(x = mat, y = value, group = bond))
# now do labels (twice - one for above, one for below)
ggp <- ggp + geom_text(data = latestcdpoint[latestcdpoint$adjustvertvec == 1, ], aes(x = mat, y = labelposies, label = label),
angle = 90, colour = "grey20", size = 3, hjust = 0, alpha = 0.5)
ggp <- ggp + geom_text(data = latestcdpoint[latestcdpoint$adjustvertvec == 0, ], aes(x = mat, y = labelposies, label = label),
angle = 90, colour = "grey20", size = 3, hjust = 1, alpha = 0.5)
#now print a nice z-score graded colour line for the curve
todaytoday <- todayline[todayline$daysback == 0, ]
minz <- min(rescale(todaytoday[, "zscore"])) # for scaling of z-score line gradient colours
maxz <- max(rescale(todaytoday[, "zscore"]))
bpspline <- smooth.spline(todaytoday$mat, todaytoday$value, spar = 0.4) # Smooth out the curve with lots of points
zscorespline <- smooth.spline(todaytoday$mat, todaytoday$zscore) # and smooth out the zscores too
xplot <- seq(2, maxmat, by = 0.1)
todayplotter <- data.frame(mat = xplot, value = predict(bpspline, xplot)$y,
zscore = rescale(c(-5, 5, predict(zscorespline, xplot)$y))[-1:-2]) # build the plotter
ggp <- ggp + geom_path(data = todayplotter, aes(x = mat, y = value, colour = zscore), size = 2, linejoin = "bevel") +
scale_colour_gradientn(colours = gradientcolours, values = gradientscale, limits = c(minz, maxz))
#and the title
ggp <- ggp + ggtitle(cCode)
# now the test chart
mm <<- meltcdrecent[meltcdrecent$daysback == 0, ]
ggp <- ggp + geom_point(data = mm, aes(x = mat, y = value, colour = rescale(c(-5, 5, zscore))[-1:-2]), size = 6) +
scale_colour_gradientn(colours = gradientcolours, values = gradientscale, limits = c(0, 1))
ggp <- ggp + geom_point(data = mm, aes(x = mat, y = value), colour = "black", size = 4.5)
ggp <- ggp + geom_text(data = mm, aes(x = mat, y = value), label = round(mm$zscore, 1), colour = "white", size = 2, alpha = 0.7)
It's quite complex, but you can see I have two scale_colour_gradient(s).
Here is my knitr code:
<!--begin.rcode changer, echo=FALSE, fig.height=4.5, fig.width=8
for(x in ac) {
g <- ggCD(x, plotit = FALSE)
suppressWarnings(plot(g$cdChart))
}
end.rcode-->
I would like either to get rid of these warnings (they're not actual real warnings, so suppressWarnings doesn't work), or else, use scale_colour_gradient in a way which does not produce this text in the first place.
Change
<!--begin.rcode changer, echo=FALSE, fig.height=4.5, fig.width=8
into
<!--begin.rcode changer, echo=FALSE, fig.height=4.5, fig.width=8, message=FALSE

Resources