Use alpha values provided in data - r

I would like to use the explicit values for the alpha level.
head(D)
x y group alpha
1 1 18 A 0.40 <~~~~
2 2 18 A 0.44
3 3 18 A 0.47
4 1 18 A 0.51
5 2 21 B 0.55
6 3 21 B 0.58
...
However, ggplot is scaling the alpha levels. I can override this using scale_alpha_continuous(range = range(D$alpha)), but this becomes a nuisance when creating the graph programmatically.
Is there a direct way to tell ggplot NOT to scale alpha? (instead of telling it what range to scale to)
Reproducible Exmple
library(ggplot)
library(gridExtra)
(D <- data.frame(x=rep(1:3, 4), y=rep((6:8)*3, each=4), group=rep(c("A","B", "C"), each=4), alpha=round(seq(.4, .8, length.out=12), 2)))
P <- ggplot(data=D, aes(x=x, y=y, alpha=alpha)) + geom_bar(stat="identity", fill="blue") + theme(legend.position="bottom") + facet_grid(group ~. )
### Adding scale_alpha_continuous
P.manually_scaled <- P + scale_alpha_continuous(range=range(D$alpha))
grid.arrange( P + ggtitle("INCORRECT")
, P.manually_scaled + ggtitle("CORRECT")
, ncol=2)

If you have actual alpha, color, ..., values then you should use ..identity() scales. This will tell ggplot() to assign alpha values as they are in your data frame and not to scale them.
ggplot(data=D, aes(x=x, y=y, alpha=alpha)) +
geom_bar(stat="identity", fill="blue") +
facet_grid(group ~. ) +
scale_alpha_identity()

Related

R: ggarange increasing distance between plot title of the muliplot and titles of each plot

assume I've the following data to generate two barplot as a multiplot:
quarter1
variable value merge1
1 h=4 3 no
2 h=7 2 yes
3 h=8 3 no
4 h=21 2 no
quarter2
variable value merge2
1 h=6 1 no
2 h=7 2 yes
3 h=10 1 no
4 h=12 3 no
5 h=13 1 no
6 h=16 1 no
7 h=17 1 no
Code for the two plots:
bar_q <- ggplot(quarter1, aes(x=variable, y=value, fill=merge1)) + geom_bar(stat="identity")
bar_qf <- bar_q + ggtitle("k = 0") +
theme(axis.text=element_text(size=24, color="gray0"), axis.title=element_blank()) +
scale_y_continuous(breaks= function(x) unique(floor(pretty(seq(0, (max(x) + 1) * 1.1))))) +
scale_fill_manual(values= grp_colors, guide=F) +
theme(plot.title = element_text(hjust = 0.5, size=24))
bar2_q <- ggplot(quarter2, aes(x=variable, y=value, fill=merge2)) + geom_bar(stat="identity")
bar2_qf <- bar2_q + ggtitle(expression(k %in% group("[", "1;4", "]"))) + theme(axis.text=element_text(size=24 , color="gray0"), axis.title=element_blank()) +
scale_y_continuous(limits=c(0,3), breaks=seq(3)) +
scale_fill_manual(values= grp_colors, guide=F) +
theme(plot.title = element_text(hjust = 0.5, size=24))
Generating multiplot with:
plot_quarter <- egg::ggarrange(bar_qf,bar2_qf, ncol=2, top=textGrob("Quartalsdaten:Häufigkeiten", gp=gpar(fontsize=28,font=2)))
Resulting plot looks like this:
If you look at the "g" in the plot title it's a bit cutted. However I tried to increase the distance between the title of the multiplot and the titles of each plot but don't find a way to do it in "ggarange". Do someone know how I can do this in ggarange/textGrob
grid has always been a bit peculiar in estimating textGrob heights. ggplot2 recently introduced a titleGrob to deal with this more consistently; unfortunately it's a private function not meant to be used externally.
The two easiest workarounds I can think of are:
library(ggplot2)
p1 <- p2 <- ggplot()
library(grid)
# create a new class and give it a more generous height
tg <- grobTree(textGrob("Quartalsdaten:Häufigkeiten", gp=gpar(fontsize=28,font=2)), cl='title')
heightDetails.title <- function(x) grobHeight(x$children[[1]]) + unit(2,"line")
egg::ggarrange(p1,p2, ncol=2, top=tg)
library(gridExtra)
# wrap the text in a dummy gtable
tg <- gridExtra::tableGrob("Quartalsdaten:Häufigkeiten",
theme = ttheme_minimal(base_size = 28, padding = unit(c(0, 2), "line"),
core = list(fg_params=list(font=2))))
egg::ggarrange(p1,p2, ncol=2, top=tg)
Edit: actually, as pointed out in the comments, ggarrange has a padding argument for this purpose,
egg::ggarrange(p1,p2, ncol=2, top=textGrob("Quartalsdaten:Häufigkeiten", gp=gpar(fontsize=28,font=2)), padding = unit(1,"line"))

Exploded 180 degree pie chart in R ggplot or ggvis (image included)?

Given a dataset with a factor column (X1) and a subtotal column (X2)
X1 X2
1 1 12
2 2 200
3 3 23
4 4 86
5 5 141
I would like to create a graphic like this:
which gives x2 as a percentage of the X2 total, divided by X1.
Edit: clarity and adding dataset for reproducability
For example
set.seed(1234)
df <- data.frame(x = 1:6)
df$y <- runif(nrow(df))
df$type <- sample(letters, nrow(df))
ggplot(df, aes(x+-.5, y, fill=type)) +
geom_bar(stat="identity", width=1) +
coord_polar(start = pi/2) +
scale_x_continuous(limits = c(0, nrow(df)*2)) +
geom_text(aes(label=scales::percent(y))) +
ggthemes::theme_map() + theme(legend.position = c(0,.15))
gives you

what does ..level.. mean in ggplot::stat_density2d

I've seen some examples when constructing a heatmap of having the fill variable set to ..level...
Such as in this example:
library(MASS)
ggplot(geyser, aes(x = duration, y = waiting)) +
geom_point() +
geom_density2d() +
stat_density2d(aes(fill = ..level..), geom = "polygon")
I suspect that the ..level.. means that the fill is set to the relative amount of layers present? Also could someone link me a good example of how to interpret these 2D-density plots, what does each contour represent etc.? I have searched online but couldn't find any suitable guide.
the stat_ functions compute new values and create new data frames. this one creates a data frame with a level variable. you can see it if you use ggplot_build vs plotting the graph:
library(ggplot2)
library(MASS)
gg <- ggplot(geyser, aes(x = duration, y = waiting)) +
geom_point() +
geom_density2d() +
stat_density2d(aes(fill = ..level..), geom = "polygon")
gb <- ggplot_build(gg)
head(gb$data[[3]])
## fill level x y piece group PANEL
## 1 #132B43 0.002 3.876502 43.00000 1 1-001 1
## 2 #132B43 0.002 3.864478 43.09492 1 1-001 1
## 3 #132B43 0.002 3.817845 43.50833 1 1-001 1
## 4 #132B43 0.002 3.802885 43.65657 1 1-001 1
## 5 #132B43 0.002 3.771212 43.97583 1 1-001 1
## 6 #132B43 0.002 3.741335 44.31313 1 1-001 1
The ..level.. tells ggplot to reference that column in the newly build data frame.
Under the hood, ggplot is doing something similar to (this is not a replication of it 100% as it uses different plot limits, etc):
n <- 100
h <- c(bandwidth.nrd(geyser$duration), bandwidth.nrd(geyser$waiting))
dens <- kde2d(geyser$duration, geyser$waiting, n=n, h=h)
df <- data.frame(expand.grid(x = dens$x, y = dens$y), z = as.vector(dens$z))
head(df)
## x y z
## 1 0.8333333 43 9.068691e-13
## 2 0.8799663 43 1.287684e-12
## 3 0.9265993 43 1.802768e-12
## 4 0.9732323 43 2.488479e-12
## 5 1.0198653 43 3.386816e-12
## 6 1.0664983 43 4.544811e-12
And also calling contourLines to get the polygons.
This is a decent introduction to the topic. Also look at ?kde2d in R help.
Expanding on the answer provided by #hrbrmstr -- first, the call to geom_density2d() is redundant. That is, you can achieve the same results with:
library(ggplot2)
library(MASS)
gg <- ggplot(geyser, aes(x = duration, y = waiting)) +
geom_point() +
stat_density2d(aes(fill = ..level..), geom = "polygon")
Let's consider some other ways to visualize this density estimate that may help clarify what is going on:
base_plot <- ggplot(geyser, aes(x = duration, y = waiting)) +
geom_point()
base_plot +
stat_density2d(aes(color = ..level..))
base_plot +
stat_density2d(aes(fill = ..density..), geom = "raster", contour = FALSE)
base_plot +
stat_density2d(aes(alpha = ..density..), geom = "tile", contour = FALSE)
Notice, however, we can no longer see the points generated from geom_point().
Finally, note that you can control the bandwidth of the density estimate. To do this, we pass x and y bandwidth arguments to h (see ?kde2d):
base_plot +
stat_density2d(aes(fill = ..density..), geom = "raster", contour = FALSE,
h = c(2, 5))
Again, the points from geom_point() are hidden as they are behind the call to stat_density2d().

ggplot: colouring areas between density lines according to relative position

I have this plot
set.seed(28100)
df <- data.frame(value = sample(1:10000,1000,replace=TRUE),
gender = sample(c("male","female"),1000,replace=TRUE))
ggplot(df, aes(value)) +
geom_density() +
geom_density(data=subset(df, gender=='male'), aes(value), colour="blue") +
geom_density(data=subset(df, gender=='female'), aes(value), colour="red")
I wonder if it's conceivable to fill the areas between the red and blue density lines with two colours: one colour when the blue line is above the red line and a different colour when the blue line is below.
There's no easy way to color in different overlapping regions unless you explicitly calculate the regions yourself. Here's a function that can help calculate regions where densities swap places
densitysplit <- function(val, grp, N=200, x=seq(min(val), max(val), length.out=N)) {
grp <- factor(grp)
den <- Map(function(z) {
dx<-density(val[grp==z])
approxfun(dx$x, dx$y)(x)
}, levels(grp))
maxcat <- apply(do.call("cbind",den), 1, which.max)
data.frame(x=x, ymin=do.call("pmin", den), ymax=do.call("pmax", den),
top = levels(grp)[maxcat],
group = cumsum(c(1,diff(maxcat)!=0))
)
}
For your data, you would do something like this
head(densitysplit(df$value, df$gender))
# x ymin ymax top group
# 1 8.00000 4.214081e-05 5.198326e-05 male 1
# 2 58.17085 4.485596e-05 5.433638e-05 male 1
# 3 108.34171 4.760983e-05 5.665547e-05 male 1
# 4 158.51256 5.039037e-05 5.893143e-05 male 1
# 5 208.68342 5.318724e-05 6.115595e-05 male 1
# 6 258.85427 5.598707e-05 6.332672e-05 male 1
This gives you the data you need to use geom_ribbon to plot the data. You can do
ggplot(df, aes(value)) +
geom_ribbon(data=densitysplit(df$value, df$gender), aes(x, ymin=ymin, ymax=ymax, fill=top, group=group)) +
geom_density() +
geom_density(data=subset(df, gender=='male'), aes(value), colour="blue") +
geom_density(data=subset(df, gender=='female'), aes(value), colour="red")
You can use fill and alpha to generate the (maybe) desired effect.
set.seed(28100)
df <- data.frame(value = sample(1:10000,1000,replace=TRUE),
gender = sample(c("male","female"),1000,replace=TRUE))
ggplot(df, aes(value, colour=gender, fill=gender, alpha=0.5)) +
geom_density() +theme(legend.position="none")
I hope this helps. Cheers

R ggplot barplot; Fill based on two separate variables

A picture says more than a thousand words. As you can see, my fill is based on the variable variable.
Within each bar there is however multiple data entities (black borders) since the discrete variable complexity make them unique. What I am trying to find is something that makes each section of the bar more distinguishable than the current look. Preferable would be if it was something like shading.
Here's an example (not the same dataset, since the original was imported):
dat <- read.table(text = "Complexity Method Sens Spec MMC
1 L Alpha 50 20 10
2 M Alpha 40 30 80
3 H Alpha 10 10 5
4 L Beta 70 50 60
5 M Beta 49 10 80
6 H Beta 90 17 48
7 L Gamma 19 5 93
8 M Gamma 18 39 4
9 H Gamma 10 84 74", sep = "", header=T)
library(ggplot2)
library(reshape)
short.m <- melt(dat)
ggplot(short.m, aes(x=Method, y= value/100 , fill=variable)) +
geom_bar(stat="identity",position="dodge", colour="black") +
coord_flip()
This is far from perfect, but hopefully a step in the right direction, as it's dodged by variable, but still manages to represent Complexity in some way:
ggplot(short.m, aes(x=Method, y=value/100, group=variable, fill=variable, alpha=Complexity,)) +
geom_bar(stat="identity",position="dodge", colour="black") +
scale_alpha_manual(values=c(0.1, 0.5, 1)) +
coord_flip()
Adding alpha=complexity might work:
ggplot(short.m, aes(x=Method, y= value/100 , fill=variable, alpha=complexity)) +
geom_bar(stat="identity",position="dodge", colour="black") + coord_flip()
You might need to separate your Method and variable factors. Here are two ways to do that:
Use facet_wrap():
ggplot(short.m, aes(x=variable, y=value/100, fill=Complexity)) +
facet_wrap(~ Method) + geom_bar(position="stack", colour="black") +
scale_alpha_manual(values=c(0.1, 0.5, 1)) + coord_flip()
Use both on the x-axis:
ggplot(short.m, aes(x=Method:variable, y=value/100, group=Method, fill=variable, alpha=Complexity,)) +
geom_bar(stat="identity", position="stack", colour="black") +
scale_alpha_manual(values=c(0.1, 0.5, 1)) + coord_flip()

Resources