The same regression model has been estimated on several groups using dplyr::group_by() and broom::tidy(). The estimates should be used to plot the regression function for each group in ggplot.
The following code works for base r curve().
library(tidyverse)
my_tbl <- tibble::tribble(
~Col_1, ~Col_2, ~Col_3,
"A", "(Intercept)", 30,
"A", "x", 10,
"A", "x2", -2,
"B", "(Intercept)", 40,
"B", "x", 20,
"B", "x2", -1
)
my_tbl %>%
split(.$Col_1) %>%
map( ~curve(.$Col_3[1] + .$Col_3[2] * x + .$Col_3[3] * x^2,
1,
30,
main = paste(.$Col_1[1]),
ylab = "y"))
The stat_function() is not able to find the parameter values in my_tbl.
my_tbl %>%
nest(-Col_1) %>%
mutate(plot = map(data, ~ggplot(data = data.frame(x = c(1, 30)),
mapping = aes(x = x)) +
stat_function()))
Is there any particular reason why you went with nest instead of split (as per the base R approach)? Because that works fine with ggplot, and the map logic matches that of the base R solution.
With nest, since the Col_1 column isn't part of the nested data, I used map2 instead of map in order to pass both data & Col_1 to ggplot.
result1 <- my_tbl %>%
split(.$Col_1) %>%
map(~ ggplot(data.frame(x = seq(0, 30)), aes(x)) +
ggtitle(.$Col_1[1]) +
stat_function(fun = function(x) .x$Col_3[1] + .x$Col_3[2] * x + .x$Col_3[3] * x^2))
result2 <- my_tbl %>%
nest(-Col_1) %>%
mutate(plot = map2(data, Col_1,
~ ggplot(data.frame(x = seq(0, 30)), aes(x)) +
ggtitle(.y) +
stat_function(fun = function(x) .x$Col_3[1] + .x$Col_3[2] * x + .x$Col_3[3] * x^2)))
# resulting plots are the same
cowplot::plot_grid(plotlist = result1)
cowplot::plot_grid(plotlist = result2$plot)
Related
The data is facetted by two variables (see graph). Each variable has a different range. I want to specify the range so that all plots in var1 and vae2 are bound by the min and max values of those variables. See sample code attached. I don't want to use setscales = "free" on facet_wrap.
var1 <- rnorm(100, 6, 2)
var2 <- rnorm(100,15,2)
spp.val <- rnorm(100,10,2)
spp <- rep(c("A","B","C","D"), 25)
df <- data.frame(var1, var2,spp, spp.val)
df <- gather(df,
key = "var",
value = "var.val",
var1,var2)
df$var <- as.factor(as.character(df$var))
df$spp <- as.factor(as.character(df$spp))
ggplot(aes(x = var.val, y = spp.val), data = df) +
geom_point() +
facet_grid(spp~var)
#I want the limits for each facet_grid to be set as follows
xlim(min(df[df$var == "var1",]), max(df[df$var == "var1",])
xlim(min(df[df$var == "var2",]), max(df[df$var == "var2",])
Is this what you want?
library(tidyverse)
tibble(
var1 = rnorm(100, 6, 2),
var2 = rnorm(100, 15, 2),
spp.val = rnorm(100, 10, 2),
spp = rep(c("A", "B", "C", "D"), 25)
) |>
pivot_longer(starts_with("var"), names_to = "var", values_to = "var.val") |>
mutate(across(c(spp, var), factor)) |>
ggplot(aes(var.val, spp.val)) +
geom_point() +
facet_grid(spp ~var, scales = "free_x")
Created on 2022-04-23 by the reprex package (v2.0.1)
How can I scale/normalize my data per row (Observations)? Something like [-1:1] like a z score?
I have seen previous post which involve normalization of the whole dataset like this https://stats.stackexchange.com/questions/178626/how-to-normalize-data-between-1-and-1
, but id like to normalise per row so they can be plotted in a same box plot as they all show same pattern across x-axis.
Obs <- c("A", "B", "C")
count1 <- c(100,15,3)
count2 <- c(250, 30, 5)
count3 <- c(290, 20, 8)
count4<- c(80,12, 2 )
df <- data.frame(Obs, count1, count2, count3, count4)
dff<- df %>% pivot_longer(cols = !Obs, names_to = 'count', values_to = 'Value')
ggplot(dff, aes(x = count, y = Value)) +
geom_jitter(alpha = 0.1, color = "tomato") +
geom_boxplot()
Based on the link you shared, you can use apply to use the corresponding function to rescale dataframe over [-1,1].
library(scales)
library(ggplot2)
library(tidyr)
Obs <- c("A", "B", "C")
count1 <- c(100,15,3)
count2 <- c(250, 30, 5)
count3 <- c(290, 20, 8)
count4<- c(80,12, 2 )
df <- data.frame(count1, count2, count3, count4)
df <- as.data.frame(t(apply(df, 1, function(x)(2*(x-min(x))/(max(x)-min(x)))- 1)))
df <- cbind(Obs, df)
dff<- df %>%
tidyr::pivot_longer(cols = !Obs, names_to = 'count', values_to = 'Value')
ggplot(dff, aes(x = count, y = Value)) +
geom_jitter(alpha = 0.1, color = "tomato") +
geom_boxplot()
Console output:
If you pivot it longer, you can group by your observations and scale:
df %>%
pivot_longer(cols = !Obs, names_to = 'count', values_to = 'Value') %>% group_by(Obs) %>%
mutate(z=as.numeric(scale(Value))) %>%
ggplot(aes(x=count,y=z))+geom_boxplot()
Or in base R, just do:
boxplot(t(scale(t(df[,-1]))))
I would like to be able to plot each of "X1 by grpA", "X2 by grpA", "X3 by grpB", "X1 by grpB", "X2 by grpB", and "x3 by grpB" using ggplot2::ggplot() in conjunction with a for loop.
So far, I can get it to almost work, but the argument for the column of the grouping variable in the facet_grid() function does not resolve correctly when I try to use tidy_eval properties. It does work, however, when I type the column name explicitly, but of course, having to type the name explicitly would make it so I would not be able to dynamically change the grouping variable.
I provide the following data-set returned by the following code snippet to give context to my question:
set.seed(1)
dfr <- tibble(x1 = factor(sample(letters[1:7], 50, replace = T), levels=letters[1:7]),
x2 = factor(sample(letters[1:7], 50, replace = T), levels=letters[1:7]),
x3 = factor(sample(letters[1:7], 50, replace = T), levels=letters[1:7]),
grpA = factor(sample(c("grp1","grp2"),50, prob=c(0.3, 0.7) ,replace=T), levels = c("grp1", "grp2")),
grpB = factor(sample(c("grp1","grp2"),50, prob=c(0.6, 0.4) ,replace=T), levels = c("grp1", "grp2"))
)
head(df)
I also provide a function that creates the plotting data I need to make the grouped plots. It accepts strings as arguments for the parameters 'groupvar' and 'mainvar':
plot_data_prepr <- function(dat, groupvar, mainvar){
groupvar <- sym(groupvar)
mainvar <- sym(mainvar)
plot_data <- dat %>%
group_by(!!groupvar) %>%
count(!!mainvar, .drop = F) %>% drop_na() %>%
mutate(pct = n/sum(n),
pct2 = ifelse(n == 0, 0.005, n/sum(n)),
grp_tot = sum(n),
pct_lab = paste0(format(pct*100, digits = 1),'%'),
pct_pos = pct2 + .02)
return(plot_data)
}
here is normal usage of the function:
plot_data_prepr(dat = dfr, groupvar = "grpA", mainvar = "x1")
Now I share my for loop that fails when I try to use tidy_eval in the facet_grid() function in the context of ggplot(); the returned error = "Error in !sgvar : invalid argument type"
"FAILING EXAMPLE:"
for (i in seq_along(names(dfr)[1:3])){
mvar <- names(dfr)[i]
print(mvar)
gvar <- names(dfr[4])
print(gvar)
smvar <- sym(mvar)
sgvar <- sym(gvar)
plot <- ggplot(data=plot_data_prepr(dfr, gvar, mvar),
mapping = aes(x=!!smvar, y = pct2, fill = !!smvar)) +
geom_bar(stat = 'identity') +
ylim(0,1) +
geom_text(aes(x=!!smvar, label=pct_lab, y = pct_pos + .02)) +
facet_grid(. ~ !!sgvar) +
ggtitle(paste0(mvar," by ",gvar))
print(plot)
}
When I run the loop by explicitly typing grpA in place of !!sgvar in the facet_grid() function, it works for some reason:
"FUNCTIONING BUT NOT WHAT I WANT EXAMPLE:"
for (i in seq_along(names(dfr)[1:3])){
mvar <- names(dfr)[i]
print(mvar)
gvar <- names(dfr[4])
print(gvar)
smvar <- sym(mvar)
sgvar <- sym(gvar)
plot <- ggplot(data=plot_data_prepr(dfr, gvar, mvar),
mapping = aes(x=!!smvar, y = pct2, fill = !!smvar)) +
geom_bar(stat = 'identity') +
ylim(0,1) +
geom_text(aes(x=!!smvar, label=pct_lab, y = pct_pos + .02)) +
facet_grid(. ~ grpA) +
ggtitle(paste0(mvar," by ",gvar))
print(plot)
}
Of course, if I wanted to loop through a set of grouping variables, then needing to explicitly type each one would not allow for looping. Could someone explain why my code with the 'bang bang' operator inside facet_gric() doesn't work properly in the 'FAILING EXAMPLE' and also suggest how to remedy this error?
Thank you.
It's difficult to piece together exactly what you're looking for, since your example code has errors, unassigned variable names and pieces of code missing. However, I think you're wanting the loop to print all of the pairs of grouping variables and main variables by cycling through the names of your data frame.
So that there is no dubiety, here is a full reprex:
Load packages and create reproducible data:
library(dplyr)
library(ggplot2)
set.seed(1)
df <- tibble(x1 = factor(sample(letters[1:7], 50, replace = TRUE)),
x2 = factor(sample(letters[1:7], 50, replace = TRUE)),
x3 = factor(sample(letters[1:7], 50, replace = TRUE)),
grpA = factor(sample(c("grp1", "grp2"), 50,
prob = c(0.3, 0.7), replace=TRUE)),
grpB = factor(sample(c("grp1", "grp2"), 50,
prob = c(0.6, 0.4), replace=TRUE)))
Define data preparation function
plot_data_prepr <- function(dat, groupvar, mainvar)
{
groupvar <- sym(groupvar)
mainvar <- sym(mainvar)
plot_data <- dat %>%
group_by(!!groupvar) %>%
count(!!mainvar, .drop = F) %>% tidyr::drop_na() %>%
mutate(pct = n/sum(n),
pct2 = ifelse(n == 0, 0.005, n/sum(n)),
grp_tot = sum(n),
pct_lab = paste0(format(pct*100, digits = 1),'%'),
pct_pos = pct2 + .02)
return(plot_data)
}
Loop to create all 6 plots
for(gvar in names(df)[4:5]){
for(mvar in names(df)[1:3])
{
print(ggplot(plot_data_prepr(df, gvar, mvar),
aes(x = !!sym(mvar), y = pct2, fill = !!sym(mvar))) +
geom_bar(stat = 'identity') +
ylim(0,1) +
geom_text(aes(label=pct_lab, y = pct_pos + .02)) +
facet_grid(as.formula(paste0(".~", gvar))) +
ggtitle(paste0(mvar, " by ", gvar))
)
}
}
Output:
Created on 2020-06-30 by the reprex package (v0.3.0)
types = c("A", "B", "C")
df = data.frame(n = rnorm(100), type=sample(types, 100, replace = TRUE))
ggplot(data=df, aes(n)) + geom_histogram() + facet_grid(~type)
Above is how I normally used facetting. But can I use it when instead of a categorical variable I have a set of columns that are indicator variables such as:
df = data.frame(n = rnorm(100), A=rbinom(100, 1, .5), B=rbinom(100, 1, .5), C=rbinom(100, 1, .5))
Now the "Type" variable from my previous example isn't mutually exclusive. An observation can be "A and B" or "A and B and C" for example. However, I'd still like an individual histogram for any observation that has the presence of A, B, or C?
I would reshape the data with tidyr so that data in more that one category are duplicated. filter to remove unwanted cases.
df <- data.frame(
n = rnorm(100),
A = rbinom(100, 1, .5),
B = rbinom(100, 1, .5),
C = rbinom(100, 1, .5)
)
library("tidyr")
library("dplyr")
library("ggplot2")
df %>% gather(key = "type", value = "value", -n) %>%
filter(value == 1) %>%
ggplot(aes(x = n)) +
geom_histogram() +
facet_wrap(~type)
I've always despised gather, so I'll add another method and one for the data.table fans.
library(data.table)
DT <- melt(setDT(df), id= "n", variable = "type")[value > 0]
ggplot(DT,aes(n)) + geom_histogram() + facet_grid(~type)
#tidyland
library(reshape2)
library(dplyr)
library(ggplot2)
df %>%
melt(id = "n", variable = "type") %>%
filter(value > 0) %>%
ggplot(aes(n)) + geom_histogram() + facet_grid(~type)
ID <- 1:10
group <- c(1,1,1,2,2,2,3,3,3,3)
var1 <- c(6:15)
var2 <- c(7:16)
var3 <- c(6:11, NA, NA, NA, NA)
var4 <- c(4:9, NA, NA, NA, NA)
data <- data.frame(ID, group, var1, var2, var3, var4)
library(dplyr)
data %>% group_by(group) %>% boxplot(var1, var2)
The last line does not work as i wish. The idea is to get 4 boxplots in one graphic. Two for each variable. Maybe i need to use ggplot2?
You need to reorganize the data if you want to get both variables in the same plot. Here is a ggplot2 solution:
# load library
library(ggplot2)
library(tidyr)
library(ggthemes)
# reorganize data
df <- gather(data, "ID","group")
#rename columns
colnames(df) <- c("ID","group","var","value")
# plot
ggplot(data=df) +
geom_boxplot( aes(x=factor(group), y=value, fill=factor(var)), position=position_dodge(1)) +
scale_x_discrete(breaks=c(1, 2, 3), labels=c("A", "B", "C")) +
theme_minimal() +
scale_fill_grey()
Making boxplots with the same width is a whole different question (solution here), but one simple alternative would be like this:
# recode column `group` in the `data.frame`.
df <- transform(df, group = ifelse(group==1, 'A', ifelse(group==2, 'B', "C")))
# plot
ggplot(data=df) +
geom_boxplot( aes(x=factor(var), y=value, fill=factor((var))), position=position_dodge(1)) +
geom_jitter(aes(x=factor(var), y=value, color=factor((var)))) +
facet_grid(.~group, scales = "free_x") +
theme_minimal()+
scale_fill_grey() +
theme(axis.text.x=element_blank(),
axis.title.x=element_blank(),
axis.ticks=element_blank())
You might try melting the data frame (mentioned in comment by #lukeA) first and then sticking to base graphics. ggplot2 or lattice are other good options.
library(reshape2)
DF <- melt(data, id.vars = c("ID", "group"), measure.vars = c("var1", "var2"))
boxplot(value ~ group + variable, DF)
Alternate lattice code, also using DF:
bwplot(~ value | variable + group, data = DF)
Alternate ggplot2 code, also using DF:
ggplot(DF, aes(x = factor(group), y = value, fill = variable)) + geom_boxplot()
Although quite late, a found a great base-R solution here
# Create some data, e.g. from https://en.wikipedia.org/wiki/One-way_analysis_of_variance#Example
df <- as.data.frame(matrix(c(6, 8, 13, 8, 12, 9, 4, 9, 11, 5, 11, 8, 3, 6, 7, 4, 8, 12),ncol = 3, byrow = TRUE))
df <- reshape(data = df, direction = "long", idvar=1:3, varying=1:3, sep = "", timevar = "Treatment")
df$Treatment <- as.factor(df$Treatment)
rownames(df) <- NULL
par(mfrow = c(2, 1))
par(mar=c(1,4,4,2) + 0.1) # mar=c(b,l,t,r)
boxplot(V ~ Treatment, data = df, xlab = NULL, xaxt = "n",
ylab = "V", main = "One-way anova with 3 different levels of one factor")
stripchart(V ~ Treatment, # Points
data = df, # Data
method = "jitter", # Random noise
pch = 19, # Pch symbols
col = 4, # Color of the symbol
vertical = TRUE, # Vertical mode
add = TRUE) # Add it over
par(mar=c(5,4,0,2) + 0.1)
boxplot(V ~ Treatment, data = df, xlab = "Treatment",
ylab = "V", main = NULL)
stripchart(V ~ Treatment, # Points
data = df, # Data
method = "overplot", # Random noise
pch = 19, # Pch symbols
col = 4, # Color of the symbol
vertical = TRUE, # Vertical mode
add = TRUE) # Add it over
par(mfrow = c(1, 1))
Result: