How to specify multiple xlims for facetted data in ggplot2 R? - r

The data is facetted by two variables (see graph). Each variable has a different range. I want to specify the range so that all plots in var1 and vae2 are bound by the min and max values of those variables. See sample code attached. I don't want to use setscales = "free" on facet_wrap.
var1 <- rnorm(100, 6, 2)
var2 <- rnorm(100,15,2)
spp.val <- rnorm(100,10,2)
spp <- rep(c("A","B","C","D"), 25)
df <- data.frame(var1, var2,spp, spp.val)
df <- gather(df,
key = "var",
value = "var.val",
var1,var2)
df$var <- as.factor(as.character(df$var))
df$spp <- as.factor(as.character(df$spp))
ggplot(aes(x = var.val, y = spp.val), data = df) +
geom_point() +
facet_grid(spp~var)
#I want the limits for each facet_grid to be set as follows
xlim(min(df[df$var == "var1",]), max(df[df$var == "var1",])
xlim(min(df[df$var == "var2",]), max(df[df$var == "var2",])

Is this what you want?
library(tidyverse)
tibble(
var1 = rnorm(100, 6, 2),
var2 = rnorm(100, 15, 2),
spp.val = rnorm(100, 10, 2),
spp = rep(c("A", "B", "C", "D"), 25)
) |>
pivot_longer(starts_with("var"), names_to = "var", values_to = "var.val") |>
mutate(across(c(spp, var), factor)) |>
ggplot(aes(var.val, spp.val)) +
geom_point() +
facet_grid(spp ~var, scales = "free_x")
Created on 2022-04-23 by the reprex package (v2.0.1)

Related

Normalize data per row in R

How can I scale/normalize my data per row (Observations)? Something like [-1:1] like a z score?
I have seen previous post which involve normalization of the whole dataset like this https://stats.stackexchange.com/questions/178626/how-to-normalize-data-between-1-and-1
, but id like to normalise per row so they can be plotted in a same box plot as they all show same pattern across x-axis.
Obs <- c("A", "B", "C")
count1 <- c(100,15,3)
count2 <- c(250, 30, 5)
count3 <- c(290, 20, 8)
count4<- c(80,12, 2 )
df <- data.frame(Obs, count1, count2, count3, count4)
dff<- df %>% pivot_longer(cols = !Obs, names_to = 'count', values_to = 'Value')
ggplot(dff, aes(x = count, y = Value)) +
geom_jitter(alpha = 0.1, color = "tomato") +
geom_boxplot()
Based on the link you shared, you can use apply to use the corresponding function to rescale dataframe over [-1,1].
library(scales)
library(ggplot2)
library(tidyr)
Obs <- c("A", "B", "C")
count1 <- c(100,15,3)
count2 <- c(250, 30, 5)
count3 <- c(290, 20, 8)
count4<- c(80,12, 2 )
df <- data.frame(count1, count2, count3, count4)
df <- as.data.frame(t(apply(df, 1, function(x)(2*(x-min(x))/(max(x)-min(x)))- 1)))
df <- cbind(Obs, df)
dff<- df %>%
tidyr::pivot_longer(cols = !Obs, names_to = 'count', values_to = 'Value')
ggplot(dff, aes(x = count, y = Value)) +
geom_jitter(alpha = 0.1, color = "tomato") +
geom_boxplot()
Console output:
If you pivot it longer, you can group by your observations and scale:
df %>%
pivot_longer(cols = !Obs, names_to = 'count', values_to = 'Value') %>% group_by(Obs) %>%
mutate(z=as.numeric(scale(Value))) %>%
ggplot(aes(x=count,y=z))+geom_boxplot()
Or in base R, just do:
boxplot(t(scale(t(df[,-1]))))

How to pass an expression to a geom_text label in ggplot?

I am attempting to pass an expression with subscript to a single geom_text() label in ggplot. Here is my code right now:
my_exp <- expression('my_exp'[s][u][b])
my_data <-
data.frame(
var_1 = c("a", "b", "c"),
var_2 = c(1, 2, 3)
)
my_data %>%
ggplot(aes(x = var_1, y = var_2))+
geom_text(aes(label = var_1))
Here is the resulting plot:
What I would like to do is replace the var_1 value of "a" with the expression specified by my_exp and then have geom_text() evaluate that value as an expression, resulting in the subscript appearing on the ggplot.
I would suggest this approach. You can build another variable for your labels and then enable the option parse=T from geom_text() in order to have the desired plot. Here the code:
library(ggplot2)
library(tidyverse)
#Data
my_exp <- as.character(expression('my_exp'[s][u][b]))
my_data <-
data.frame(
var_1 = c("a", "b", "c"),
var_2 = c(1, 2, 3),stringsAsFactors = F
)
#Mutate
my_data$label <- ifelse(my_data$var_1=='a',my_exp,my_data$var_1)
#Plot
my_data %>%
ggplot(aes(x = var_1, y = var_2))+
geom_text(aes(label = label),parse = T)
Output:
Update: If there are issues with labels here a code for that:
#Label
my_exp <- "14~M~my_exp[s][u][b]"
#Code
my_data <-
data.frame(
var_1 = c("a", "b", "c"),
var_2 = c(1, 2, 3),stringsAsFactors = F
)
#Mutate
my_data$label <- ifelse(my_data$var_1=='a',my_exp,my_data$var_1)
#Plot
my_data %>%
ggplot(aes(x = var_1, y = var_2))+
geom_text(aes(label = label),parse = T)
Output:

Mapping function by groups in ggplot

The same regression model has been estimated on several groups using dplyr::group_by() and broom::tidy(). The estimates should be used to plot the regression function for each group in ggplot.
The following code works for base r curve().
library(tidyverse)
my_tbl <- tibble::tribble(
~Col_1, ~Col_2, ~Col_3,
"A", "(Intercept)", 30,
"A", "x", 10,
"A", "x2", -2,
"B", "(Intercept)", 40,
"B", "x", 20,
"B", "x2", -1
)
my_tbl %>%
split(.$Col_1) %>%
map( ~curve(.$Col_3[1] + .$Col_3[2] * x + .$Col_3[3] * x^2,
1,
30,
main = paste(.$Col_1[1]),
ylab = "y"))
The stat_function() is not able to find the parameter values in my_tbl.
my_tbl %>%
nest(-Col_1) %>%
mutate(plot = map(data, ~ggplot(data = data.frame(x = c(1, 30)),
mapping = aes(x = x)) +
stat_function()))
Is there any particular reason why you went with nest instead of split (as per the base R approach)? Because that works fine with ggplot, and the map logic matches that of the base R solution.
With nest, since the Col_1 column isn't part of the nested data, I used map2 instead of map in order to pass both data & Col_1 to ggplot.
result1 <- my_tbl %>%
split(.$Col_1) %>%
map(~ ggplot(data.frame(x = seq(0, 30)), aes(x)) +
ggtitle(.$Col_1[1]) +
stat_function(fun = function(x) .x$Col_3[1] + .x$Col_3[2] * x + .x$Col_3[3] * x^2))
result2 <- my_tbl %>%
nest(-Col_1) %>%
mutate(plot = map2(data, Col_1,
~ ggplot(data.frame(x = seq(0, 30)), aes(x)) +
ggtitle(.y) +
stat_function(fun = function(x) .x$Col_3[1] + .x$Col_3[2] * x + .x$Col_3[3] * x^2)))
# resulting plots are the same
cowplot::plot_grid(plotlist = result1)
cowplot::plot_grid(plotlist = result2$plot)

Can you facet on a indicator variable in ggplot2?

types = c("A", "B", "C")
df = data.frame(n = rnorm(100), type=sample(types, 100, replace = TRUE))
ggplot(data=df, aes(n)) + geom_histogram() + facet_grid(~type)
Above is how I normally used facetting. But can I use it when instead of a categorical variable I have a set of columns that are indicator variables such as:
df = data.frame(n = rnorm(100), A=rbinom(100, 1, .5), B=rbinom(100, 1, .5), C=rbinom(100, 1, .5))
Now the "Type" variable from my previous example isn't mutually exclusive. An observation can be "A and B" or "A and B and C" for example. However, I'd still like an individual histogram for any observation that has the presence of A, B, or C?
I would reshape the data with tidyr so that data in more that one category are duplicated. filter to remove unwanted cases.
df <- data.frame(
n = rnorm(100),
A = rbinom(100, 1, .5),
B = rbinom(100, 1, .5),
C = rbinom(100, 1, .5)
)
library("tidyr")
library("dplyr")
library("ggplot2")
df %>% gather(key = "type", value = "value", -n) %>%
filter(value == 1) %>%
ggplot(aes(x = n)) +
geom_histogram() +
facet_wrap(~type)
I've always despised gather, so I'll add another method and one for the data.table fans.
library(data.table)
DT <- melt(setDT(df), id= "n", variable = "type")[value > 0]
ggplot(DT,aes(n)) + geom_histogram() + facet_grid(~type)
#tidyland
library(reshape2)
library(dplyr)
library(ggplot2)
df %>%
melt(id = "n", variable = "type") %>%
filter(value > 0) %>%
ggplot(aes(n)) + geom_histogram() + facet_grid(~type)

How to plot several boxplots by group in r?

ID <- 1:10
group <- c(1,1,1,2,2,2,3,3,3,3)
var1 <- c(6:15)
var2 <- c(7:16)
var3 <- c(6:11, NA, NA, NA, NA)
var4 <- c(4:9, NA, NA, NA, NA)
data <- data.frame(ID, group, var1, var2, var3, var4)
library(dplyr)
data %>% group_by(group) %>% boxplot(var1, var2)
The last line does not work as i wish. The idea is to get 4 boxplots in one graphic. Two for each variable. Maybe i need to use ggplot2?
You need to reorganize the data if you want to get both variables in the same plot. Here is a ggplot2 solution:
# load library
library(ggplot2)
library(tidyr)
library(ggthemes)
# reorganize data
df <- gather(data, "ID","group")
#rename columns
colnames(df) <- c("ID","group","var","value")
# plot
ggplot(data=df) +
geom_boxplot( aes(x=factor(group), y=value, fill=factor(var)), position=position_dodge(1)) +
scale_x_discrete(breaks=c(1, 2, 3), labels=c("A", "B", "C")) +
theme_minimal() +
scale_fill_grey()
Making boxplots with the same width is a whole different question (solution here), but one simple alternative would be like this:
# recode column `group` in the `data.frame`.
df <- transform(df, group = ifelse(group==1, 'A', ifelse(group==2, 'B', "C")))
# plot
ggplot(data=df) +
geom_boxplot( aes(x=factor(var), y=value, fill=factor((var))), position=position_dodge(1)) +
geom_jitter(aes(x=factor(var), y=value, color=factor((var)))) +
facet_grid(.~group, scales = "free_x") +
theme_minimal()+
scale_fill_grey() +
theme(axis.text.x=element_blank(),
axis.title.x=element_blank(),
axis.ticks=element_blank())
You might try melting the data frame (mentioned in comment by #lukeA) first and then sticking to base graphics. ggplot2 or lattice are other good options.
library(reshape2)
DF <- melt(data, id.vars = c("ID", "group"), measure.vars = c("var1", "var2"))
boxplot(value ~ group + variable, DF)
Alternate lattice code, also using DF:
bwplot(~ value | variable + group, data = DF)
Alternate ggplot2 code, also using DF:
ggplot(DF, aes(x = factor(group), y = value, fill = variable)) + geom_boxplot()
Although quite late, a found a great base-R solution here
# Create some data, e.g. from https://en.wikipedia.org/wiki/One-way_analysis_of_variance#Example
df <- as.data.frame(matrix(c(6, 8, 13, 8, 12, 9, 4, 9, 11, 5, 11, 8, 3, 6, 7, 4, 8, 12),ncol = 3, byrow = TRUE))
df <- reshape(data = df, direction = "long", idvar=1:3, varying=1:3, sep = "", timevar = "Treatment")
df$Treatment <- as.factor(df$Treatment)
rownames(df) <- NULL
par(mfrow = c(2, 1))
par(mar=c(1,4,4,2) + 0.1) # mar=c(b,l,t,r)
boxplot(V ~ Treatment, data = df, xlab = NULL, xaxt = "n",
ylab = "V", main = "One-way anova with 3 different levels of one factor")
stripchart(V ~ Treatment, # Points
data = df, # Data
method = "jitter", # Random noise
pch = 19, # Pch symbols
col = 4, # Color of the symbol
vertical = TRUE, # Vertical mode
add = TRUE) # Add it over
par(mar=c(5,4,0,2) + 0.1)
boxplot(V ~ Treatment, data = df, xlab = "Treatment",
ylab = "V", main = NULL)
stripchart(V ~ Treatment, # Points
data = df, # Data
method = "overplot", # Random noise
pch = 19, # Pch symbols
col = 4, # Color of the symbol
vertical = TRUE, # Vertical mode
add = TRUE) # Add it over
par(mfrow = c(1, 1))
Result:

Resources