How to plot several boxplots by group in r? - r

ID <- 1:10
group <- c(1,1,1,2,2,2,3,3,3,3)
var1 <- c(6:15)
var2 <- c(7:16)
var3 <- c(6:11, NA, NA, NA, NA)
var4 <- c(4:9, NA, NA, NA, NA)
data <- data.frame(ID, group, var1, var2, var3, var4)
library(dplyr)
data %>% group_by(group) %>% boxplot(var1, var2)
The last line does not work as i wish. The idea is to get 4 boxplots in one graphic. Two for each variable. Maybe i need to use ggplot2?

You need to reorganize the data if you want to get both variables in the same plot. Here is a ggplot2 solution:
# load library
library(ggplot2)
library(tidyr)
library(ggthemes)
# reorganize data
df <- gather(data, "ID","group")
#rename columns
colnames(df) <- c("ID","group","var","value")
# plot
ggplot(data=df) +
geom_boxplot( aes(x=factor(group), y=value, fill=factor(var)), position=position_dodge(1)) +
scale_x_discrete(breaks=c(1, 2, 3), labels=c("A", "B", "C")) +
theme_minimal() +
scale_fill_grey()
Making boxplots with the same width is a whole different question (solution here), but one simple alternative would be like this:
# recode column `group` in the `data.frame`.
df <- transform(df, group = ifelse(group==1, 'A', ifelse(group==2, 'B', "C")))
# plot
ggplot(data=df) +
geom_boxplot( aes(x=factor(var), y=value, fill=factor((var))), position=position_dodge(1)) +
geom_jitter(aes(x=factor(var), y=value, color=factor((var)))) +
facet_grid(.~group, scales = "free_x") +
theme_minimal()+
scale_fill_grey() +
theme(axis.text.x=element_blank(),
axis.title.x=element_blank(),
axis.ticks=element_blank())

You might try melting the data frame (mentioned in comment by #lukeA) first and then sticking to base graphics. ggplot2 or lattice are other good options.
library(reshape2)
DF <- melt(data, id.vars = c("ID", "group"), measure.vars = c("var1", "var2"))
boxplot(value ~ group + variable, DF)
Alternate lattice code, also using DF:
bwplot(~ value | variable + group, data = DF)
Alternate ggplot2 code, also using DF:
ggplot(DF, aes(x = factor(group), y = value, fill = variable)) + geom_boxplot()

Although quite late, a found a great base-R solution here
# Create some data, e.g. from https://en.wikipedia.org/wiki/One-way_analysis_of_variance#Example
df <- as.data.frame(matrix(c(6, 8, 13, 8, 12, 9, 4, 9, 11, 5, 11, 8, 3, 6, 7, 4, 8, 12),ncol = 3, byrow = TRUE))
df <- reshape(data = df, direction = "long", idvar=1:3, varying=1:3, sep = "", timevar = "Treatment")
df$Treatment <- as.factor(df$Treatment)
rownames(df) <- NULL
par(mfrow = c(2, 1))
par(mar=c(1,4,4,2) + 0.1) # mar=c(b,l,t,r)
boxplot(V ~ Treatment, data = df, xlab = NULL, xaxt = "n",
ylab = "V", main = "One-way anova with 3 different levels of one factor")
stripchart(V ~ Treatment, # Points
data = df, # Data
method = "jitter", # Random noise
pch = 19, # Pch symbols
col = 4, # Color of the symbol
vertical = TRUE, # Vertical mode
add = TRUE) # Add it over
par(mar=c(5,4,0,2) + 0.1)
boxplot(V ~ Treatment, data = df, xlab = "Treatment",
ylab = "V", main = NULL)
stripchart(V ~ Treatment, # Points
data = df, # Data
method = "overplot", # Random noise
pch = 19, # Pch symbols
col = 4, # Color of the symbol
vertical = TRUE, # Vertical mode
add = TRUE) # Add it over
par(mfrow = c(1, 1))
Result:

Related

Function to graph multiple X's at once

I'm trying to check the correlation of a bunch of variables and wanted to create a graph(s) of all the dependent variables on my response.
Price <- c(10,11,22,15,15)
Var1 <- c(2,3,12,5,17)
Var2 <- c(3,3,12,16,7)
Var3 <- c(2,5,2,5,18)
data <- data.frame(Var1,Var2,Var3,Price)
I was thinking something like this would work ;
variables <- c('Var1', 'Var2', 'Var3')
for (i in variables){
plot <- ggplot(data=data, aes(x = i, y=Price))+
geom_point(shape=16, color="dodgerblue")+
geom_smooth(method=lm, color='Black')
print(plot)
}
But it only prints out the response for variable 3 without any values of x.
As i in your loop is a character, you need to call it with get(i) in your ggplot:
for (i in variables){
plot <- ggplot(data=data, aes(x = get(i), y=Price))+
geom_point(shape=16, color="dodgerblue")+
geom_smooth(method=lm, color='Black')
print(plot)
}
will work.
Two alternatives to have the 3 graphs together:
alternative 1
Long format, and facet_wrap:
library(tidyr)
pivot_longer(data,paste0("Var",1:3)) %>%
ggplot(aes(value,Price))+
geom_point(shape=16, color="dodgerblue")+
geom_smooth(method=lm, color='Black')+
facet_wrap(~name)
second alternative
You could try to use the wonderful {patchwork} package also:
plot_list <- lapply(variables,function(i){
ggplot(data=data, aes(x = get(i), y=Price))+
geom_point(shape=16, color="dodgerblue")+
geom_smooth(method=lm, color='Black')+
labs(x = i)
})
library(patchwork)
wrap_plots(plot_list)
par works in low-level plotting.
par(mfrow=c(1, 3))
with(data, lapply(names(data)[1:3], \(x) {
plot(data[c(x, 'Price')]); abline(lm(Price ~ get(x)))
}))
Data:
data <- structure(list(Var1 = c(2, 3, 12, 5, 17), Var2 = c(3, 3, 12,
16, 7), Var3 = c(2, 5, 2, 5, 18), Price = c(10, 11, 22, 15, 15
)), class = "data.frame", row.names = c(NA, -5L))

How to specify multiple xlims for facetted data in ggplot2 R?

The data is facetted by two variables (see graph). Each variable has a different range. I want to specify the range so that all plots in var1 and vae2 are bound by the min and max values of those variables. See sample code attached. I don't want to use setscales = "free" on facet_wrap.
var1 <- rnorm(100, 6, 2)
var2 <- rnorm(100,15,2)
spp.val <- rnorm(100,10,2)
spp <- rep(c("A","B","C","D"), 25)
df <- data.frame(var1, var2,spp, spp.val)
df <- gather(df,
key = "var",
value = "var.val",
var1,var2)
df$var <- as.factor(as.character(df$var))
df$spp <- as.factor(as.character(df$spp))
ggplot(aes(x = var.val, y = spp.val), data = df) +
geom_point() +
facet_grid(spp~var)
#I want the limits for each facet_grid to be set as follows
xlim(min(df[df$var == "var1",]), max(df[df$var == "var1",])
xlim(min(df[df$var == "var2",]), max(df[df$var == "var2",])
Is this what you want?
library(tidyverse)
tibble(
var1 = rnorm(100, 6, 2),
var2 = rnorm(100, 15, 2),
spp.val = rnorm(100, 10, 2),
spp = rep(c("A", "B", "C", "D"), 25)
) |>
pivot_longer(starts_with("var"), names_to = "var", values_to = "var.val") |>
mutate(across(c(spp, var), factor)) |>
ggplot(aes(var.val, spp.val)) +
geom_point() +
facet_grid(spp ~var, scales = "free_x")
Created on 2022-04-23 by the reprex package (v2.0.1)

How to label lines obtained using split with ggrepel

I am trying to label 4 lines grouped by the value of variable cc. To label the lines I use ggrepel but I get all the 4 labels instead of 2 for each graph. How to correct this error?
The location of the labels is in this example at the last date but I want something more flexible: I want to locate each of the 4 labels in specific points that I chose (e.g. b at date 1, a at date 2, etc.). How to do that?
library(tidyverse)
library(ggrepel)
library(cowplot)
set.seed(1234)
df <- tibble(date = c(rep(1,4), rep(2,4), rep(3,4), rep(4,4)),
country = rep(c('a','b','c','d'),4),
value = runif(16),
cc = rep(c(1,1,2,2),4))
df$cc <- as.factor(df$cc)
# make list of plots
ggList <- lapply(split(df, df$cc), function(i) {
ggplot(i, aes(x = date, y = value, color = country)) +
geom_line(lwd = 1.1) +
geom_text_repel(data = subset(df, date == 4),
aes(label = country)) +
theme(legend.position = "none")
})
# plot as grid in 1 columns
cowplot::plot_grid(plotlist = ggList, ncol = 1,
align = 'v', labels = levels(df$cc))
Created on 2021-08-18 by the reprex package (v2.0.0)
Here I make a tibble to hold color and position preferences, and join that to df.
The geom_text_repel line should probably use i instead of df so that it's split the same way as the line. The only trouble is this forces us to specify that we want four colors up front, since otherwise each chart would just use the two it needs.
set.seed(1234)
df <- tibble(date = c(rep(1,4), rep(2,4), rep(3,4), rep(4,4)),
country = rep(c('a','b','c','d'),4),
value = runif(16),
cc = rep(c(1,1,2,2),4))
label_pos <- tibble(country = letters[1:4],
label_pos = c(2, 1, 3, 2),
color = RColorBrewer::brewer.pal(4, "Set2")[1:4])
df <- df %>% left_join(label_pos)
df$cc <- as.factor(df$cc)
# make list of plots
ggList <- lapply(split(df, df$cc), function(i) {
ggplot(i, aes(x = date, y = value, color = color)) +
geom_line(lwd = 1.1) +
geom_text_repel(data = subset(i, date == label_pos),
aes(label = country), box.padding = unit(0.02, "npc"), direction = "y") +
scale_color_identity() +
theme(legend.position = "none")
})
# plot as grid in 1 columns
cowplot::plot_grid(plotlist = ggList, ncol = 1,
align = 'v', labels = levels(df$cc))

Replace strip labels with list of values

I am plotting some data which needs to be labelled with LaTeX expressions, see this small reproducible example. I have a separate list which contains the LaTeX labels for treatment1 and treatment2, to avoid changing the underlying data:
## Required packages
library(tidyverse)
library(latex2exp)
## LaTeX labels
labs <- list(treatment1 = c(unname(TeX("$\\textit{Avo}cado$")), unname(TeX("$Ban_{ana}")) ),
treatment2 = c(unname(TeX("$\\textit{C}at$")), unname(TeX("$D_{og}$")) ) )
## Dummy data frame
df <- data.frame(treatment1 = factor(c(rep("A", 5), rep("B", 5))),
treatment2 = factor(c(rep(c("C", "D"), 5))),
var1 = c(1, 4, 5, 7, 2, 8, 9, 1, 4, 7),
var2 = c(2, 8, 11, 13, 4, 10, 11, 2, 6, 10))
To apply the LaTeX labels to treatment2, the colour variable, I use the label argument in scale_colour_manual():
## Scatter plot with colour varying by treatment2
p <- ggplot(df, aes(x = var1, y = var2, colour = treatment2)) +
geom_point() +
scale_colour_manual(values = c("Black", "Blue"),
labels = labs$treatment2)
## Add facet by treatment1
p + facet_grid(treatment1 ~ .)
I've tried using the labeller argument in facet_grid() but both of these options result in an error:
p + facet_grid(treatment1 ~ ., labeller = labs$treatment1)
p + facet_grid(treatment1 ~ ., labeller = label_value(labs$treatment1))
## > Error in cbind(labels = list(), list(`{`, if (!is.null(.rows) || !is.null(.cols)) { :
## number of rows of matrices must match (see arg 2)
While trying to use the as_labeller() function loads the plot, but with no change to the facet labels:
p + facet_grid(treatment1 ~ ., labeller = as_labeller(labs$treatment1))
I have also tried to change the labels manually (although I would prefer to refer to a separate object due to the size of my actual data frame), which has no observable effect:
p + facet_grid(treatment1 ~ .,
labeller = labeller(treatment1 = c("A" = unname(TeX("$\\textit{Avo}cado$")),
"B" = unname(TeX("$Ban_{ana}")))))
I assume I have to write a new labeller function but I don't really know where to begin. Or am I going about this all wrong?
This works:
flabels <- function(level){
labels <- c(
A = unname(TeX("$\\textit{Avo}cado$")),
B = unname(TeX("$Ban_{ana}"))
)
labels[level]
}
p <- ggplot(df, aes(x = var1, y = var2, colour = treatment2)) +
geom_point()
p + facet_grid(treatment1 ~ .,
labeller = labeller(treatment1 = as_labeller(flabels, default = label_parsed)))

Create heatmap with values from matrix in ggplot2

I've seen heatmaps with values made in various R graphics systems including lattice and base like this:
I tend to use ggplot2 a bit and would like to be able to make a heatmap with the corresponding cell values plotted. Here's the heat map and an attempt using geom_text:
library(reshape2, ggplot2)
dat <- matrix(rnorm(100, 3, 1), ncol=10)
names(dat) <- paste("X", 1:10)
dat2 <- melt(dat, id.var = "X1")
p1 <- ggplot(dat2, aes(as.factor(Var1), Var2, group=Var2)) +
geom_tile(aes(fill = value)) +
scale_fill_gradient(low = "white", high = "red")
p1
#attempt
labs <- c(apply(round(dat[, -2], 1), 2, as.character))
p1 + geom_text(aes(label=labs), size=1)
Normally I can figure out the x and y values to pass but I don't know in this case since this info isn't stored in the data set. How can I place the text on the heatmap?
Key is to add a row identifier to the data and shape it "longer".
edit Dec 2022 to make code reproducible with R 4.2.2 / ggplot2 3.4.0 and reflect changes in tidyverse semantics
library(ggplot2)
library(tidyverse)
dat <- matrix(rnorm(100, 3, 1), ncol = 10)
## the matrix needs names
names(dat) <- paste("X", 1:10)
## convert to tibble, add row identifier, and shape "long"
dat2 <-
dat %>%
as_tibble() %>%
rownames_to_column("Var1") %>%
pivot_longer(-Var1, names_to = "Var2", values_to = "value") %>%
mutate(
Var1 = factor(Var1, levels = 1:10),
Var2 = factor(gsub("V", "", Var2), levels = 1:10)
)
#> Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
#> `.name_repair` is omitted as of tibble 2.0.0.
#> ℹ Using compatibility `.name_repair`.
ggplot(dat2, aes(Var1, Var2)) +
geom_tile(aes(fill = value)) +
geom_text(aes(label = round(value, 1))) +
scale_fill_gradient(low = "white", high = "red")
Created on 2022-12-31 with reprex v2.0.2
There is another simpler way to make heatmaps with values. You can use pheatmap to do this.
dat <- matrix(rnorm(100, 3, 1), ncol=10)
names(dat) <- paste("X", 1:10)
install.packages('pheatmap') # if not installed already
library(pheatmap)
pheatmap(dat, display_numbers = T)
This will give you a plot like this
If you want to remove clustering and use your color scheme you can do
pheatmap(dat, display_numbers = T, color = colorRampPalette(c('white','red'))(100), cluster_rows = F, cluster_cols = F, fontsize_number = 15)
You can also change the fontsize, format, and color of the displayed numbers.

Resources