I have a data frame like the following:
df = data.frame(x = runif(100, 0, 1),
y = runif(100, 1, 2),
var1 = runif(100, 0, 1),
var2 = runif(100, 0, 1),
var3 = rep(c("a", "b"), 50))
I want to make a faceted plot in ggplot2 that plots the same x vs y in each facet (scatterplot), but colors by the values of var1, var2, and var3. In this case, there would only be 3 facets, one for each of the coloring columns.
How could this be done?
plots = lapply(3:5, function(i){
dt = df[,c(1, 2, i)]
ggplot(data = dt, aes_string(x = names(dt)[1],
y = names(dt)[2],
color = names(dt[3]))) +
geom_point()
})
library(gridExtra)
do.call(function(...){
grid.arrange (..., ncol = 3)},
plots)
Related
I would like to display a scale_color_gradient scale bar that (i) only has a set number of decimal points and (ii) also always displays "0" not "0.00". What is the best way to do this?
library(ggplot2)
dat <- data.frame(x = rnorm(10, 30, .2), y = runif(10, 3, 5),z = rnorm(10, 30, .2))
scaled.dat <- data.frame(scale(dat))
ggplot(scaled.dat, aes(x, y, colour = z)) + geom_point()+
# Modify the number of decimal points
scale_color_gradient(labels = function(x) sprintf("%.5f", x))
# Make zero value display "0" only
#scale_color_gradient(labels = ~sub("0.0", "0", sprintf("%.1f", .x)))
Using an ifelse you could do:
library(ggplot2)
set.seed(123)
dat <- data.frame(x = rnorm(10, 30, .2), y = runif(10, 3, 5), z = rnorm(10, 30, .2))
scaled.dat <- data.frame(scale(dat))
ggplot(scaled.dat, aes(x, y, colour = z)) +
geom_point() +
scale_color_gradient(
labels = ~ ifelse(.x != 0, sprintf("%.5f", .x), sprintf("%.0f", .x))
)
Here's my dataframe:
df <- data.frame(state = sample(c(0, 1), replace = TRUE, size = 100),
X1 = rnorm(100, 0, 1),
X2 = rnorm(100, 1, 2),
X3 = rnorm(100, 2, 3))
What I would like to do is to plot for each variable X1, X2, X3 two densities/histograms (given the value of state) on the same plot BUT in such a way that all of the plots are on the same facet. I've done these things separately:
ggplot() +
geom_density(data = df, aes(x = X1, group = state, fill = state), alpha = 0.5, adjust = 2) +
xlab("X1") +
ylab("Density")
ggplot(gather(df[df$state == 0, 2:4]), aes(value)) +
geom_density() +
facet_wrap(~key, scales = 'free_x')
but I struggle to make it work together.
I'm assuming that you want the three facets for variables X1, X2 and X3, each with two curves filled by state.
You'll need to convert state to a factor, to make it a categorical variable, using dplyr::mutate(). I would also use the newer tidyr::pivot_longer() instead of gather: this will generate columns name + value by default.
Your data but with a seed to make it reproducible and named df1:
set.seed(1001)
df1 <- data.frame(state = sample(c(0, 1), replace = TRUE, size = 100),
X1 = rnorm(100, 0, 1),
X2 = rnorm(100, 1, 2),
X3 = rnorm(100, 2, 3))
The plot:
library(dplyr)
library(tidyr)
library(ggplot2)
df1 %>%
pivot_longer(-state) %>%
mutate(state = as.factor(state)) %>%
ggplot(aes(value)) +
geom_density(aes(fill = state), alpha = 0.5) +
facet_wrap(~name)
Result:
set.seed(3)
df <- data.frame(lambda = c(rep(0, 6), rep(1, 6), rep(1.5, 6)),
approach = rep(c(rep("A", 3), rep("B", 3)), 3),
value = rnorm(18, 0, 1))
ggplot(data = df, aes(x = lambda, y = value)) + geom_boxplot(aes(fill = approach))
I want to plot 3 sets of boxplots at lambda = 0, 1, and 1.5, respectively. Within each set are 2 boxplots, one corresponds to approach A and the other to approach B. However, the current code is only plotting two boxplots, whereas I'm looking for a total of six.
I think you want "lambda" to be a factor, e.g.
library(tidyverse)
set.seed(3)
df <- data.frame(lambda = c(rep(0, 6), rep(1, 6), rep(1.5, 6)),
approach = rep(c(rep("A", 3), rep("B", 3)), 3),
value = rnorm(18, 0, 1))
ggplot(data = df, aes(x = factor(lambda), y = value)) +
geom_boxplot(aes(fill = approach))
I would like to plot an angle between two lines using ggplot2, meaning something similar to the bold red line in the plot below. Is there an easy solution to this?
Data and code to make the plot without the red line:
library(tidyverse)
df <- tibble(
line = c("A", "A", "B", "B"),
x = c(1, 5, 1, 3),
y = c(1, 3, 1, 5))
ggplot(
df, aes(x, y, group = line))+
geom_path()
have a look at geom_curve, e.g. :
ggplot( df, aes(x, y, group = line))+
geom_path() +
geom_curve(aes(x = 1.5, y = 2, xend = 2, yend = 1.5), curvature = -0.5, color = "red", size = 3)
You will have to tweak it a bit to use it in a more robust, automatic way, for example:
red_curve <- df %>%
group_by(line) %>%
summarise( avg_x = mean(x),
avg_y = mean(y))
ggplot( df, aes(x, y, group = line))+
geom_path() +
geom_curve( data = red_curve, aes(x = avg_x[1], y = avg_y[1], xend = avg_x[2], yend = avg_y[2]), curvature = 0.5, color = "red", size = 3)
Here is a solution with geom_arc of the ggforce package.
library(ggplot2)
library(ggforce)
angle <- function(p, c){
M <- p - c
Arg(complex(real = M[1], imaginary = M[2]))
}
O <- c(1,1)
P1 <- c(5,3)
P2 <- c(3,5)
a1 <- angle(P1, O)
a2 <- angle(P2, O)
df <- data.frame(
line = c("A", "A", "B", "B"),
x = c(1, 5, 1, 3),
y = c(1, 3, 1, 5)
)
ggplot(df, aes(x, y, group = line)) +
geom_path() +
geom_arc(aes(x0 = 1, y0 = 1, r = 1, start = a1, end = a2),
color="red", size = 2, inherit.aes = FALSE)
The arc does not look like a true arc circle. That's because the aspect ratio is not set to 1. To set the aspect ratio to 1:
ggplot(df, aes(x, y, group = line)) +
geom_path() +
geom_arc(aes(x0 = 1, y0 = 1, r = 1, start = a1, end = a2),
color="red", size = 2, inherit.aes = FALSE) +
coord_fixed()
My question is similar to this but the answers there will not work for me. Basically, I'm trying to produce a regression discontinuity plot with a "fuzzy" design that uses all the data for the treatment and control groups, but only plots the regression line within the "range" of the treatment and control groups.
Below, I've simulated some data and produced the fuzzy RD plot with base graphics. I'm hoping to replicate this plot with ggplot2. Note that the most important part of this is that the light blue regression line is fit using all the blue points, while the peach colored regression line is fit using all the red points, despite only being plotted over the ranges in which individuals were intended to receive treatment. That's the part I'm having a hard time replicating in ggplot.
I'd like to move to ggplot because I'd like to use faceting to produce this same plot across various units in which participants were nested. In the code below, I show a non-example using geom_smooth. When there's no fuzziness within a group, it works fine, but otherwise it fails. If I could get geom_smooth to be limited to only specific ranges, I think I'd be set. Any and all help is appreciated.
Simulate data
library(MASS)
mu <- c(0, 0)
sigma <- matrix(c(1, 0.7, 0.7, 1), ncol = 2)
set.seed(100)
d <- as.data.frame(mvrnorm(1e3, mu, sigma))
# Create treatment variable
d$treat <- ifelse(d$V1 <= 0, 1, 0)
# Introduce fuzziness
d$treat[d$treat == 1][sample(100)] <- 0
d$treat[d$treat == 0][sample(100)] <- 1
# Treatment effect
d$V2[d$treat == 1] <- d$V2[d$treat == 1] + 0.5
# Add grouping factor
d$group <- gl(9, 1e3/9)
Produce regression discontinuity plot with base
library(RColorBrewer)
pal <- brewer.pal(5, "RdBu")
color <- d$treat
color[color == 0] <- pal[1]
color[color == 1] <- pal[5]
plot(V2 ~ V1,
data = d,
col = color,
bty = "n")
abline(v = 0, col = "gray", lwd = 3, lty = 2)
# Fit model
m <- lm(V2 ~ V1 + treat, data = d)
# predicted achievement for treatment group
pred_treat <- predict(m,
newdata = data.frame(V1 = seq(-3, 0, 0.1),
treat = 1))
# predicted achievement for control group
pred_no_treat <- predict(m,
newdata = data.frame(V1 = seq(0, 4, 0.1),
treat = 0))
# Add predicted achievement lines
lines(seq(-3, 0, 0.1), pred_treat, col = pal[4], lwd = 3)
lines(seq(0, 4, 0.1), pred_no_treat, col = pal[2], lwd = 3)
# Add legend
legend("bottomright",
legend = c("Treatment", "Control"),
lty = 1,
lwd = 2,
col = c(pal[4], pal[2]),
box.lwd = 0)
non-example with ggplot
d$treat <- factor(d$treat, labels = c("Control", "Treatment"))
library(ggplot2)
ggplot(d, aes(V1, V2, group = treat)) +
geom_point(aes(color = treat)) +
geom_smooth(method = "lm", aes(color = treat)) +
facet_wrap(~group)
Notice the regression lines extending past the treatment range for groups 1 and 2.
There's probably a more graceful way to make the lines with geom_smooth, but it can be hacked together with geom_segment. Munge the data.frames outside of the plotting call if you like.
ggplot(d, aes(x = V1, y = V2, color = factor(treat, labels = c('Control', 'Treatment')))) +
geom_point(shape = 21) +
scale_color_brewer(NULL, type = 'qual', palette = 6) +
geom_vline(aes(xintercept = 0), color = 'grey', size = 1, linetype = 'dashed') +
geom_segment(data = data.frame(t(predict(m, data.frame(V1 = c(-3, 0), treat = 1)))),
aes(x = -3, xend = 0, y = X1, yend = X2), color = pal[4], size = 1) +
geom_segment(data = data.frame(t(predict(m, data.frame(V1 = c(0, 4), treat = 0)))),
aes(x = 0, xend = 4, y = X1, yend = X2), color = pal[2], size = 1)
Another option is geom_path:
df <- data.frame(V1 = c(-3, 0, 0, 4), treat = c(1, 1, 0, 0))
df <- cbind(df, V2 = predict(m, df))
ggplot(d, aes(x = V1, y = V2, color = factor(treat, labels = c('Control', 'Treatment')))) +
geom_point(shape = 21) +
geom_vline(aes(xintercept = 0), color = 'grey', size = 1, linetype = 'dashed') +
scale_color_brewer(NULL, type = 'qual', palette = 6) +
geom_path(data = df, size = 1)
For the edit with facets, if I understand what you want correctly, you can calculate a model for each group with lapply and predict for each group. Here I'm recombine with dplyr::bind_rows instead of do.call(rbind, ...) for the .id parameter to insert the group number from the list element name, though there are other ways to do the same thing.
df <- data.frame(V1 = c(-3, 0, 0, 4), treat = c('Treatment', 'Treatment', 'Control', 'Control'))
m_list <- lapply(split(d, d$group), function(x){lm(V2 ~ V1 + treat, data = x)})
df <- dplyr::bind_rows(lapply(m_list, function(x){cbind(df, V2 = predict(x, df))}), .id = 'group')
ggplot(d, aes(x = V1, y = V2, color = treat)) +
geom_point(shape = 21) +
geom_vline(aes(xintercept = 0), color = 'grey', size = 1, linetype = 'dashed') +
geom_path(data = df, size = 1) +
scale_color_brewer(NULL, type = 'qual', palette = 6) +
facet_wrap(~group)