conditional probability is not displaying properly in facet label in ggplot2 - r

I am trying to display the expression p(\theta|y) in a facet label but it displays as p(|(\theta, y))
I have the following code:
library(tidyverse)
X = seq(0, 1, by = 0.01)
prior = dbeta(X, 3, 2)
posterior = dbeta(X, 14, 6)
plot_df =
tibble(X = X, `p(theta)` = prior, `p(theta|y)` = posterior) %>%
gather(type, Y, -X) %>%
mutate(type = factor(type, levels = c("p(theta)", "p(theta|y)")))
ggplot(plot_df, aes(X, Y)) +
geom_line() +
facet_wrap(~type, ncol = 1, labeller = label_parsed, strip.position = "left") +
xlab(expr(theta)) +
ylab("")
So how do I get that to display as p(\theta | y)?

The following works. The trick is to add spacing characters, asterisks, and to put the vertical bar between single quotes. This must match the new factor levels.
plot_df =
tibble(X = X, `p(theta)` = prior, `p(theta*'|'*y)` = posterior) %>%
gather(type, Y, -X) %>%
mutate(type = factor(type, levels = c("p(theta)", "p(theta*'|'*y)")))
The rest of the code is exactly the same.
ggplot(plot_df, aes(X, Y)) +
geom_line() +
facet_wrap(~type, ncol = 1, labeller = label_parsed, strip.position = "left") +
xlab(expr(theta)) +
ylab("")

Related

R ggplot label number of observations per ordered violin with facet wrap

I've got a plot that looks like the output of the following code using the iris data
require(tidyverse)
require(purrr)
require(forcats) # Useful for ordering facets found at [here][1]
# Make some long data and set a custom sorting order using some of t
tbl <- iris %>%
pivot_longer(., cols = 1:4, names_to = "Msr", values_to = "Vls") %>%
mutate(Msr = factor(Msr)) %>%
mutate(plot_fct = fct_cross(Species, Msr)) %>%
mutate(plot_fct = fct_reorder(plot_fct, Vls))
# A functioning factory for minor log breaks found [here][1] (very helpful)
minor_breaks_log <- function(base) {
# Prevents lazy evaluation
force(base)
# Wrap calculation in a function that the outer function returns
function(limits) {
ggplot2:::calc_logticks(
base = base,
minpow = floor(log(limits[1], base = base)),
maxpow = ceiling(log(limits[2], base = base))
)$value
}
}
# Plot the images
ggplot(data = tbl, aes(x =plot_fct, y = Vls, fill = Species)) +
geom_violin() +
coord_flip() + # swap coords
scale_y_log10(labels = function(x) sprintf("%g", x),
minor_breaks = minor_breaks_log(10)) + # format for labels # box fills
theme_bw(base_size = 12) +
annotation_logticks(base = 10, sides = "b") +
facet_wrap(~Species, nrow = 1, scales = "free")
I would now like to list the number of observations per violin on the right side of each facet just inside the maximum border, which I'm sure is possible but cannot seem to find an example that does this sort of labeling, with violins and facets.
ggplot(data = tbl, aes(y = plot_fct, fill = Species)) +
geom_violin(aes(x = Vls)) +
geom_text(aes(label = after_stat(count)), hjust = 1,
stat = "count", position = "fill") +
scale_x_log10(labels = function(x) sprintf("%g", x),
minor_breaks = minor_breaks_log(10)) + # format for labels # box fills
theme_bw(base_size = 12) +
annotation_logticks(base = 10, sides = "b") +
facet_wrap(~Species, nrow = 1, scales = "free")

How to put two mean plots on a single plane with different scale?

I obtained the two separate mean plots. Is there any simple way to combine them on a single plane with different line colours? Tricky part is each has a different scale, so I want to put one (lshare) scale on left hand side of y-axis and the other (va) on right side of y-axis.
p1 <- ggplot(df, aes(x = year, y = lshare)) + stat_summary(geom = "line", fun.y = mean)
p2 <- ggplot(df, aes(x = year, y = va)) + stat_summary(geom = "line", fun.y = mean)
grid.arrange(p1, p2, ncol = 2)
Update2:
Combining all:
library(tidyverse)
mtcars %>%
select(mpg, disp) %>%
mutate(year = 1900:1931) %>%
pivot_longer(
c(mpg, disp)
) %>%
ggplot(aes(x=year, y=value, group=name, color=name))+
stat_summary(fun =mean, geom="line", size=1) +
scale_y_continuous(
name = "my first y axis",
sec.axis = sec_axis(~./10, name="my second y axis")
)
Update: How to add secodn y axis as requested:
library(tidyverse)
mtcars %>%
select(mpg, disp) %>%
mutate(year = 1900:1931) %>%
ggplot(aes(x=year))+
geom_line(aes(y=mpg*10), size=1, color="red")+
geom_line(aes(y=disp), size=1, color="blue") +
scale_y_continuous(
name = "my first y axis",
sec.axis = sec_axis(~./10, name="my second y axis")
)
First answer:
Here is a reproducible example with the mtcars dataset:
library(tidyverse)
mtcars %>%
select(mpg, disp) %>%
mutate(year = 1900:1931) %>%
pivot_longer(
c(mpg, disp)
) %>%
ggplot(aes(x=year, y=value, group=name, color=name))+
stat_summary(fun =mean, geom="line", size=1)
As #jdobres commented, you can use facet_wrap(), like in the following example. Simply introduce a grouping factor to your data.frame.
set.seed(1)
# sample data
year <- 1:20
lshare <- 0.50 - 0.02 * year + rnorm(length(year), sd = 3)
df <- data.frame(year = c(year, year), lshare = c(lshare, lshare))
df$group <- factor(gl(2, length(year)))
# plot
ggplot(df, aes(x = year, y = lshare, colour = group)) +
stat_summary(geom = "line", fun.y = mean, size = 1) +
facet_wrap(~ group)
Addition
As per your edit, which I saw after I posted this answer, facet_wrap() also works when you want to have two different y-axes. You just have to play a bit with the function that is specified within sec_axis().
set.seed(1)
# sample data
year <- 1:20
lshare <- 0.50 - 0.02 * year + rnorm(length(year), sd = 3)
noise <- abs(rnorm(length(lshare), mean = 150, sd = 100))
df <- data.frame(year = c(year, year), lshare = c(lshare, lshare + noise))
df$group <- factor(gl(2, length(year)))
# set two limits
ylim_left <- with(subset(df, group == 1), c(min(lshare), max(lshare)))
ylim_right <- with(subset(df, group == 2), c(min(lshare), max(lshare)))
axis_right <- diff(ylim_left)/diff(ylim_right)
axis_left <- ylim_left[1] - axis_right * ylim_right[1]
# plot
ggplot(df, aes(x = year, y = lshare, colour = group)) +
stat_summary(geom = "line", fun = mean, size = 1) +
facet_wrap(~ group) +
scale_y_continuous(sec.axis = sec_axis(~ (. - axis_left)/axis_right))
Addition 2
If you would like to have the two lines in the same pane, you can use something along the following lines of code. Note, I use the same data as in the first addition (see above).
# set two limits
ylim_left <- with(subset(df, group == 1), c(min(lshare), max(lshare)))
ylim_right <- with(subset(df, group == 2), c(min(lshare), max(lshare)))
axis_right <- diff(ylim_left)/diff(ylim_right)
axis_left <- ylim_left[1] - axis_right * ylim_right[1]
# plot
ggplot(df, aes(colour = group)) +
stat_summary(data = subset(df, group == 1),
mapping = aes(x = year, y = lshare),
geom = "line", fun = mean, size = 1) +
stat_summary(data = subset(df, group == 2),
mapping = aes(x = year, y = lshare),
geom = "line", fun = mean, size = 1) +
scale_y_continuous(sec.axis = sec_axis(~ (. - axis_left)/axis_right)) +
scale_colour_manual(name = 'My_groups',
values = c('1' = "blue4", '2' = "darkorange"),
labels = c('Group 1', 'Group 2'))

ggplot2 add sum to chart

Using mtcars as an example, I've produced some violin plots. I wanted to add two things to this chart:
for each group, list n
for each group, sum a third variable (e.g. wt)
I can do (1) with the geom_text code below although (n) is actually plotted on the x axis rather than off to the side.
But I can't work out how to do (2).
Any help much appreciated!
library(ggplot2)
library(gridExtra)
library(ggthemes)
result <- mtcars
ggplot(result, aes(x = gear, y = drat, , group=gear)) +
theme_tufte(base_size = 15) + theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(stat = "count", aes(label = ..count.., y = ..count..))
You can add both of these annotations by creating them in your dataframe temporarily prior to graphing. Using the dplyr package, you can create two new columns, one with the count for each group, and one with the sum of wt for each group. This can then be piped directly into your ggplot using %>% (alternatively, you could save the new dataset and insert it into ggplot the way you have it). Then with some minor edits to your geom_text call and adding a second one, we can create the plot you want. The code looks like this:
library(ggplot2)
library(gridExtra)
library(ggthemes)
library(magrittr)
library(dplyr)
result <- mtcars
result %>%
group_by(gear) %>%
mutate(count = n(), sum_wt = sum(wt)) %>%
ggplot(aes(x = gear, y = drat, , group=gear)) +
theme_tufte(base_size = 15) + theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(aes(label = paste0("n = ", count),
x = (gear + 0.25),
y = 4.75)) +
geom_text(aes(label = paste0("sum wt = ", sum_wt),
x = (gear - 0.25),
y = 4.75))
The new graph looks like this:
Alternatively, if you create a summary data frame named result_sum, then you can manually add that into the geom_text calls.
result <- mtcars %>%
mutate(gear = factor(as.character(gear)))
result_sum <- result %>%
group_by(gear) %>%
summarise(count = n(), sum_wt = sum(wt))
ggplot(result, aes(x = gear, y = drat, , group=gear)) +
theme_tufte(base_size = 15) +
theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(data = result_sum, aes(label = paste0("n = ", count),
x = (as.numeric(gear) + 0.25),
y = 4.75)) +
geom_text(data = result_sum, aes(label = paste0("sum wt = ", sum_wt),
x = (as.numeric(gear) - 0.25),
y = 4.75))
This gives you this:
The benefit to this second method is that the text isn't bold like in the first graph. The bold effect occurs in the first graph due to the text being printed over itself for all observations in the dataframe.
Thanks to those who helped.... I used this in the end which plots the calculated values, one set of classes being text based so using vjust to position the vertical offset.
thanks again!
library(ggplot2)
library(gridExtra)
library(ggthemes)
results <- mtcars
results$gear <- as.factor(as.character(results$gear)) #Turn 'gear' to text to simulate classes, then factorise
result_sum <- results %>%
group_by(gear) %>%
summarise(count = n(), sum_wt = sum(wt))
ggplot(results, aes(x = gear, y = drat, group=gear)) +
theme_tufte(base_size = 15) + theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(data = result_sum, aes(label = paste0("n = ", count), x = (gear), vjust= 0, y = 5.25)) +
geom_text(data = result_sum, aes(label = paste0("sum wt = ", round(sum_wt,0)), x = (gear), vjust= -2, y = 5.25))

Position ggplot text in each corner with facet

I have a data frame with 10 values in each of the 5 kinds and have 2 types.
df <- data.frame(x2=rnorm(100),y2=rnorm(100), type = c(rep("type a", 50), rep("type b", 50)), kind = rep(LETTERS[1:5],10))
I want to print the labels of percent of values in each quadrants. My current code is:
ggplot(df, aes(x2, y2)) + geom_point() +
geom_vline(xintercept = 0) +
geom_hline(yintercept = 0) +
geom_text(data = df, aes(x2, y2, label = "")) +
facet_grid(type~kind)
Current output:
Expected output (for example I showed percentage of kind A and B of type a, i want to plot percent values for all kinds and types):
Any suggestion would be great. Thanks!
You probably need to calculate the proportions outside ggplot2,
library(dplyr)
numbers <- df %>% group_by(type,kind) %>% mutate(cases = n()) %>%
add_count(x2>0,y2>0) %>% mutate(label=paste(round(n/cases*100),"%"),
x = ifelse(`x2 > 0`, Inf, -Inf),
y = ifelse(`y2 > 0`, Inf, -Inf),
hjust = ifelse(`x2 > 0`, 1, 0),
vjust = ifelse(`y2 > 0`, 1, 0))
ggplot(df, aes(x2, y2)) + geom_point() +
geom_vline(xintercept = 0) +
geom_hline(yintercept = 0) +
facet_grid(type~kind) +
geom_label(data=numbers, aes(label = label, x=x, y=y, vjust=vjust, hjust = hjust))

ggplot2: merge two legends

I'm trying to plot an area with two different set of points with ggplot2 but I get always two different legends. I've read this and this but I still have two legends.
Below the code and the chart.
Thank you very much
library(ggplot2)
library(dplyr)
set.seed(1)
df <- data.frame(x = letters,
y = 1:26 +runif(26),
z = 2*(1:26) + runif(26),
jj = 1:26,
hh = 1:26*2,
x1 = 1:26)
some_names <- df %>%
filter(row_number() %% 10 == 1) %>%
select(x,x1)
p <- df %>%
ggplot(aes(x1)) +
geom_ribbon(aes(ymin = y, ymax = z, fill = "area")) +
geom_point(aes(y = jj, colour = "points1")) +
geom_point(aes(y = hh, colour = "points2")) +
scale_x_continuous(breaks = some_names %>% select(x1) %>% unlist %>% unname,
labels = some_names %>% select(x) %>% unlist %>% unname )
p + scale_fill_manual(name = "legend",
values = c("area" = "red","points1" = NA,"points2" = NA)) +
scale_colour_manual(name = "legend",
values = c("area" = NA ,"points1" = "blue","points2" = "purple"))
You could do something in the veins of
library(tidyverse)
packageVersion("ggplot2")
# [1] ‘2.2.1’
df %>%
gather(var, val, jj, hh) %>%
ggplot(aes(x1, val, ymin=y, ymax=z, color=var, fill=var)) +
geom_ribbon(color=NA) +
geom_point() +
scale_color_manual(values=c("blue","purple"), name="leg", labels = c("lab1","lab2")) +
scale_fill_manual(values = rep("red", 2), name="leg", labels= c("lab1","lab2"))
or
library(tidyverse)
df %>%
gather(var, val, jj, hh) %>%
bind_rows(data.frame(x=NA,y=NA,z=NA,x1=NA,var="_dummy",val=NA)) %>%
ggplot(aes(x1, val, ymin=y, ymax=z, color=var, fill=var)) +
geom_ribbon(color=NA) +
geom_ribbon(color=NA, fill="red") +
geom_point() +
scale_color_manual(
values=c("#FFFFFF00", "blue","purple"), name="leg", labels = c("lab1","lab2","lab3")) +
scale_fill_manual(
values = c("red", rep(NA, 2)), name="leg", labels= c("lab1","lab2","lab3"))
One option is to use an interior fill for each element. There may be a way to use override.aes to get the points to be a point in the legend, but I wasn't able to get that with any quick experimentation.
p <- df %>%
ggplot(aes(x1)) +
geom_ribbon(aes(ymin = y, ymax = z, fill = "area")) +
geom_point(aes(y = jj, fill = "points1"), shape=21, colour="blue") +
geom_point(aes(y = hh, fill = "points2"), shape=21, colour="purple") +
scale_x_continuous(breaks = some_names %>% select(x1) %>% unlist %>% unname,
labels = some_names %>% select(x) %>% unlist %>% unname ) +
scale_fill_manual(name = "legend",
values = c("area" = "red","points1" = "blue","points2" = "purple"),
guide = guide_legend(override.aes=aes(colour=NA)))
p

Resources