Creating stimuli in R with ggplot - r

I am trying to generate my own stimuli for an experiment using R. Below is the code that creates my (x,y) coordinates using the rnorm() with different a sample size of 100, different means and sd. I also create another variable to represent the size of the circles, which are determined by the runif().
dt <- data.frame(x = NA,
y = NA,
size = NA,
M = NA,
sd = NA,
col = NA,
iter = NA)
sa<-0
mySD<-c(5, 15)
myMeans<-c(35, 45)
colors<-c("Blues", "Reds")
for(i in 1:10){
for(s in mySD){
for(m in myMeans){
x = abs(rnorm(n=1, mean=m, sd=s))
y = abs(rnorm(n=1, mean=m, sd=s))
size = runif(1, 1, 25) #select a random x speed between [25,35]
sa<-sa+1
dt[sa,] <- NA
dt$x[sa]<-x
dt$y[sa]<-y
dt$M[sa]<-m
dt$sd[sa]<-s
dt$size[sa]<-size
dt$iter[sa]<-i
}
}
}
}
Next, I want to use ggplot(dt, aes(x, y, size=size) to plot. I want to randomly select 4 (x,y) values to plot for one graph, then 8 for another, then 16 for another, etc. Basically, I want to plot different graphs with a different number of data points. For example, some graphs that you would see would have 4 data points that vary by size and color, others would have 32 data points that vary in size and color. I m not sure how to select a set of unique data points from the data frame that I created. Any help would be great. I'm pretty new to R.

Here are two ways - depending if you wanted each group to not contain points from any other group.
I'll just use a dummy data frame that just has columns x, y, and size.
library(tidyverse)
dt <- tibble(x = runif(100), y = runif(100), size = runif(100))
Allowing groups to share the same points
Create a vector for the size of each group.
sample_sizes <- 2^(seq_len(4) + 1)
sample_sizes
#> [1] 4 8 16 32
Randomly sample the data frame and add a group column.
sampled <- map_dfr(
sample_sizes,
~sample_n(dt, .),
.id = "group"
)
Plot using facets.
ggplot(sampled, aes(x, y, size = size)) +
geom_point() +
facet_wrap(~group)
Requiring groups to have different points
First, we need a way to generate four 1s, eight 2s etc. This can be done using log2 and some tricks.
groups <- floor(log2(seq_len(nrow(dt)) + 3)) - 1
groups
#> [1] 1 1 1 1 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4
#> [36] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5
#> [71] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
Shuffle this vector and add it as a column.
dt$group <- sample(groups)
Facet using this new column to generate the desired plots.
ggplot(dt, aes(x, y, size = size)) +
geom_point() +
facet_wrap(~group)

First of all, the question's data creation code can be greatly simplified, rewritten with no loops at all. R is a vectorized language and the following will create a data frame with the same structure.
Don't forget to set the RNG seed, in order to make the results reproducible.
library(ggplot2)
set.seed(2020) # make the results reproducible
sd <- rep(rep(mySD, each = 2), 10)
M <- rep(myMeans, 2*10)
x <- abs(rnorm(n = 40, mean = M, sd = sd))
y <- abs(rnorm(n = 40, mean = M, sd = sd))
size <- runif(40, 1, 25)
iter <- seq_along(x)
dt2 <- data.frame(x, y, size, M, sd, iter)
dt2$col <- c("blue", "red")
Now the plots. The following function accepts a data frame X as its first argument and a number of points to draw as the second one. Then plots n points chosen at random with color col and size (a continuous variable) size.
plot_fun <- function(X, n){
Colors <- unique(X[["col"]])
Colors <- setNames(Colors, Colors)
i <- sample(nrow(X), n)
g <- ggplot(X[i,], aes(x, y, size = size, color = col)) +
geom_point() +
scale_color_manual(values = Colors) +
theme_bw()
g
}
plot_fun(dt2, 8)
To plot several values for n, produce the plots with lapply then use grid.arrange from package gridExtra.
plot_list <- lapply(c(4,8,16,32), function(n) plot_fun(dt2, n))
gridExtra::grid.arrange(grobs = plot_list)
Individual plots are still possible with
plot_list[[1]]
plot_list[[2]]
and so on.
Another way is to use faceting. Write another function, plot_fun_facets assigning the number of points to a new variable in the sample data frames, n, and use that variable as a faceting variable.
plot_fun_facets <- function(X, n){
Colors <- unique(X[["col"]])
Colors <- setNames(Colors, Colors)
X_list <- lapply(n, function(.n){
i <- sample(nrow(X), .n)
Y <- X[i,]
Y$n <- .n
Y
})
X <- do.call(rbind, X_list)
g <- ggplot(X, aes(x, y, size = size, color = col)) +
geom_point() +
scale_color_manual(values = Colors) +
facet_wrap(~ n) +
theme_bw()
g
}
plot_fun_facets(dt2, c(4,8,16,32))

Related

Explain the code underlying a linear model in R visualised with ggplot

I am trying to understand how linear modelling can be used to as an alternative to the t-test when analysing gene expression data. For a single gene, I have a dataframe of 20 gene expression values altogether in group 1 (n=10) and group 2 (n=10).
gexp = data.frame(expression = c(2.7,0.4,1.8,0.8,1.9,5.4,5.7,2.8,2.0,4.0,3.9,2.8,3.1,2.1,1.9,6.4,7.5,3.6,6.6,5.4),
group = c(rep(1, 10), rep(2, 10)))
The data can be (box)plotted using ggplot as shown below:
plot <- gexp %>%
ggplot(aes(x = group, y = expression)) +
geom_boxplot() +
geom_point()
plot
I wish to model the expression in groups 1 and 2 using the regression formula:
Y = Beta0 + (Beta1 x X) + e where Y is the expression I want to model and X represents the two groups that are encoded as 0 and 1 respectively. Therefore, the expression in group 1 (when x = 0) is equal to Beta0; and the expression in group 2 (when x = 1) is equal to Beta0 + Beta1.
If this is modelled with:
mod1 <- lm(expression ~ group, data = gexp)
mod1
The above code outputs an intercept of 2.75 and a slope of 1.58. It is the visualisation of the linear model that I don't understand. I would be grateful for a clear explanation of the below code:
plot +
geom_point(data = data.frame(x = c(1, 2), y = c(2.75, 4.33)),
aes(x = x, y = y),
colour = "red", size = 5) +
geom_abline(intercept = coefficients(mod1)[1] - coefficients(mod1)[2],
slope = coefficients(mod1)[2])
I get why the data.frame values are the ones chosen (the value of 4.33 is the sum of the intercept, Beta0 and the slope, Beta1) , but it is the geom_abline arguments I do not understand. Why is the intercept calculation as shown? In the text I am using it states, '...we need to subtract the slope from the intercept when plotting the linear model because groups 1 and 2 are encoded as 0 and 1 in the model, but plotted as 1 and 2 on the figure.' I don't follow this point and would be grateful for an explanation, without getting too technical.
I believe your code is correct if the group variable was encoded as a factor.
library(ggplot2)
gexp = data.frame(expression = c(2.7,0.4,1.8,0.8,1.9,5.4,5.7,2.8,2.0,4.0,3.9,2.8,3.1,2.1,1.9,6.4,7.5,3.6,6.6,5.4),
group = factor(c(rep(1, 10), rep(2, 10))))
plot <-
ggplot(gexp, aes(x = group, y = expression)) +
geom_boxplot() +
geom_point()
mod1 <- lm(expression ~ group, data = gexp)
plot +
geom_point(data = data.frame(x = c(1, 2), y = c(2.75, 4.33)),
aes(x = x, y = y),
colour = "red", size = 5) +
geom_abline(intercept = coefficients(mod1)[1] - coefficients(mod1)[2],
slope = coefficients(mod1)[2])
Created on 2022-03-30 by the reprex package (v2.0.1)
To understand the difference between factors and integers in specifying linear models, you can have a look at the model matrix.
model.matrix(y ~ f, data = data.frame(f = 1:3, y = 1))
#> (Intercept) f
#> 1 1 1
#> 2 1 2
#> 3 1 3
#> attr(,"assign")
#> [1] 0 1
model.matrix(y ~ f, data = data.frame(f = factor(1:3), y = 1))
#> (Intercept) f2 f3
#> 1 1 0 0
#> 2 1 1 0
#> 3 1 0 1
#> attr(,"assign")
#> [1] 0 1 1
#> attr(,"contrasts")
#> attr(,"contrasts")$f
#> [1] "contr.treatment"
Created on 2022-03-30 by the reprex package (v2.0.1)
In the first model matrix, what you specify is what you get: you're modelling something as a function of the intercept and the f variable. In this model, you account for that f = 2 is twice as much as f = 1.
This works a little bit differently when f is a factor. A k-level factor gets split up in k-1 dummy variables, where each dummy variable encodes with 1 or 0 whether it deviates from the reference level (the first factor level). By modelling it in this way, you don't consider that the 2nd factor level might be twice the 1st factor level.
Because in ggplot2, the first factor level is displayed at position = 1 and not at position = 0 (how it is modelled), your calculated intercept is off. You need to subtract 1 * slope from the calculated intercept to get it to display right in ggplot2.

How calculate probabillity of density plot?

I have following question: Is it possible to calculate a probabillity of a density plot?
So for example, I have following data frame
test<- data.frame(
Gruppe = rep(c("Aktien","Aktien"),
times=c(136, 37)),
Zufriedenheit = c(f_keineErf, f_Erf))
and i plot a density plot, with de ggplot function:
ggplot(test, aes(x=Zufriedenheit)) +geom_density()
How can I calculate the probability for example getting a value above 70?
Thank you!
Your data is not included in the question, so let's make up a small random sample:
library(ggplot2)
set.seed(69)
df <- data.frame(x = rnorm(10))
Now we can create a density plot as per your example:
p <- ggplot(df, aes(x)) +
geom_density() +
xlim(c(-5, 5))
p
Now, we can actually find the x and y coordinates of this line using the base R function density and extracting its x and y components into a data frame:
dens <- density(df$x)
d <- data.frame(x = dens$x, y = dens$y)
head(d)
#> x y
#> 1 -3.157056 0.0009453767
#> 2 -3.144949 0.0010145927
#> 3 -3.132841 0.0010870523
#> 4 -3.120733 0.0011665920
#> 5 -3.108625 0.0012488375
#> 6 -3.096517 0.0013382316
We can see plotting this as a red dashed geom_line it is the same as geom_density:
p + geom_line(data = d, aes(x, y), col = "red", linetype = 2, size = 2)
Now suppose we want to know the probability of having a value of more than one. We can show the area we are interested in like this:
p + geom_area(data = d[d$x >= 1,], aes(x, y), fill = "red")
Since the x values are all equally spaced in our data frame d, then the red area's proportion of the area under the line is a simple ratio of the sum of all y values at x values greater than one to the grand sum of y:
sum(d$y[d$x > 1])/sum(d$y)
#> [1] 0.1599931
So the probability of getting an x value of > 1 is 0.15999, or 16%
Created on 2020-08-17 by the reprex package (v0.3.0)

How to plot two Lorenz curve on same plot?

I want to plot two lorenz curves on one graph (data1 and data2), so please help me with the code?
I have one code which work perfectly with one curve (data1), but I want to make comparison with tho curves (data1 and data2) on same plot.
#Packages
library(ineq)
library(ggplot2)
library(scales)
library(grid)
#DATA SETS
set.seed(1)
data1<-sample(1000)
data2<-c(1000:2000) # I want to put this data set into second lorenz curve
# compute lorenz curve
lcolc <- Lc(data1)
# bring lorenz curve in another format easily readable by ggplot2
# namely reverse the L column so that lorenz curve is mirrored on diagonal
# p stays p (the diagonal)
# Uprob contains the indices of the L's, but we need percentiles
lcdf <- data.frame(L = rev(1-lcolc$L), p = lcolc$p, Uprob = c(1:length(lcolc$L)/length(lcolc$L)))
# basic plot with the diagonal line and the L line
p <- ggplot(lcdf, aes(y = Uprob, x = L)) + geom_line(colour = hcl(h=15, l=65, c=100)) + geom_line(aes(y = p, x = p))
# compute annotation lines at 50 percent L (uses a heuristic)
index <- which(lcdf$L >= 0.499 & lcdf$L <= 0.501)[1]
ypos <- lcdf$L[index]
yposs <- c(0,ypos)
xpos <- index/length(lcdf$L)
xposs <- c(0,xpos)
ypositions <- data.frame(y = xposs, x = c(ypos,ypos))
xpositions <- data.frame(y = c(xpos,xpos), x = yposs)
# add annotation line
p <- p + geom_line(data = ypositions, aes(x = x, y = y),
linetype="dashed") + geom_line(data = xpositions, aes(x = x, y = y),
linetype="dashed")
# set axes and labels (namely insert custom breaks in scales)
p <- p + scale_x_continuous(breaks=c(0, xpos,0.25,0.5,0.75,1),
labels = percent_format()) + scale_y_continuous(
labels = percent_format())
# add minimal theme
p <- p + theme_minimal() + xlab("Percent of Population") + ylab("Percent of Income")
# customize theme
p <- p + theme(plot.margin = unit(c(0.5,1,1,1), "cm"),
axis.title.x = element_text(vjust=-1),
axis.title.y = element_text(angle=90, vjust=0),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = rgb(0.99,0.99,0.99), linetype=0))
# print plot
p
Here is a minimal, reproducible example of how to display multiple curves on the same plot using ggplot2. The key point here is to pass data in "long-form" to the ggplot() function.
library(ggplot2)
# Construct example data.frame with separate columns for each curve.
x = seq(from=0, to=3, by=1)
dat = data.frame(x=x,
y1=9 * x,
y2=3 * x^2,
y3=x^3)
dat
# x y1 y2 y3
# 1 0 0 0 0
# 2 1 9 3 1
# 3 2 18 12 8
# 4 3 27 27 27
# Convert data to long form, putting all y-values in a single column.
mdat = reshape2::melt(dat, id.vars="x", measure.vars=c("y1", "y2", "y3"))
mdat
# x variable value
# 1 0 y1 0
# 2 1 y1 9
# 3 2 y1 18
# 4 3 y1 27
# 5 0 y2 0
# 6 1 y2 3
# 7 2 y2 12
# 8 3 y2 27
# 9 0 y3 0
# 10 1 y3 1
# 11 2 y3 8
# 12 3 y3 27
p = ggplot(data=mdat, aes(x=x, y=value, colour=variable, group=variable)) +
geom_point() +
geom_line()

Convert 3 Matrices to 1 plot using ggplot2

I'm trying to combine 3 matrices to one plot.
I'm trying to simulate a mark-recapture scenario. However, instead of having 1 population, there are 3 (which are contained in each of their matrices).
Because I want to sample from each population once, the x-axis will range from 0-300. However, 1-100 on the x-axis will correspond to the samples collected from population:
101-200 from population 2
201-300 from population 3. The only deviation from the picture is that I'd like a continuous line, from 0-300.
I have the code to create these matrices and made each matrix the same size, but I don't know how to 1) convert and plot them using ggplot2 2) put all three on one graph
## Population size
N <- 400
N
## Vector labeling each item in the population
pop <- c(1:N)
pop
## Lower and upper bounds of sample size
lower.bound <- round(x = .05 * N, digits = 0)
lower.bound ## Smallest possible sample size
upper.bound <- round(x = .15 * N, digits = 0)
upper.bound ## Largest possible sample size
## Length of sample size interval
length.ss.interval <- length(c(lower.bound:upper.bound))
length.ss.interval ## total possible sample sizes, ranging form lower.bound to upper.bound
## Determine a sample size randomly (not a global variable...simply for test purposes)
## Between lower and upper bounds set previously
## Give equal weight to each possible sample size in this interval
sample(x = c(lower.bound:upper.bound),
size = 1,
prob = c(rep(1/length.ss.interval, length.ss.interval)))
## Specify number of samples to take
n.samples <- 100
## Initiate empty matrix
## 1st column is population (item 1 thorugh item 400)
## 2nd through nth column are all rounds of sampling
dat <- matrix(data = NA,
nrow = length(pop),
ncol = n.samples + 1)
dat[,1] <- pop
## Take samples of random sizes
## Record results in columns 2 through n
## 1 = sampled (marked)
## 0 = not sampled (not marked)
for(i in 2:ncol(dat)) {
a.sample <- sample(x = pop,
size = sample(x = c(lower.bound:upper.bound),
size = 1,
prob = c(rep(1/length.ss.interval, length.ss.interval))),
replace = FALSE)
dat[,i] <- dat[,1] %in% a.sample
}
## How large was each sample size?
apply(X = dat, MARGIN = 2, FUN = sum)
## 1st element is irrelevant
## 2nd element through nth element: sample size for each of the 100 samples
schnabel.comp <- data.frame(sample = 1:n.samples,
n.sampled = apply(X = dat, MARGIN = 2, FUN = sum)[2:length(apply(X = dat, MARGIN = 2, FUN = sum))]
)
## First column: which sample, 1-100
## Second column: number selected in that sample
## How many items were previously sampled?
## For 1st sample, it's 0
## For 2nd sample, code is different than for remaning samples
n.prev.sampled <- c(0, rep(NA, n.samples-1))
n.prev.sampled
n.prev.sampled[2] <- sum(ifelse(test = dat[,3] == 1 & dat[,2] == 1,
yes = 1,
no = 0))
n.prev.sampled
for(i in 4:ncol(dat)) {
n.prev.sampled[i-1] <- sum(ifelse(test = dat[,i] == 1 & rowSums(dat[,2:(i-1)]) > 0,
yes = 1,
no = 0))
}
schnabel.comp$n.prev.sampled <- n.prev.sampled
## n.newly.sampled: in each sample, how many items were newly sampled?
## i.e., never seen before?
schnabel.comp$n.newly.sampled <- with(schnabel.comp,
n.sampled - n.prev.sampled)
## cum.sampled: how many total items have you seen?
schnabel.comp$cum.sampled <- c(0, cumsum(schnabel.comp$n.newly.sampled)[2:n.samples-1])
## numerator of schnabel formula
schnabel.comp$numerator <- with(schnabel.comp,
n.sampled * cum.sampled)
## denominator of schnable formula is n.prev.sampled
## pop.estimate -- after each sample (starting with 2nd -- need at least two samples)
schnabel.comp$pop.estimate <- NA
for(i in 1:length(schnabel.comp$pop.estimate)) {
schnabel.comp$pop.estimate[i] <- sum(schnabel.comp$numerator[1:i]) / sum(schnabel.comp$n.prev.sampled[1:i])
}
## Plot population estimate after each sample
if (!require("ggplot2")) {install.packages("ggplot2"); require("ggplot2")}
if (!require("scales")) {install.packages("scales"); require("scales")}
small.sample.dat <- schnabel.comp
small.sample <- ggplot(data = small.sample.dat,
mapping = aes(x = sample, y = pop.estimate)) +
geom_point(size = 2) +
geom_line() +
geom_hline(yintercept = N, col = "red", lwd = 1) +
coord_cartesian(xlim = c(0:100), ylim = c(300:500)) +
scale_x_continuous(breaks = pretty_breaks(11)) +
scale_y_continuous(breaks = pretty_breaks(11)) +
labs(x = "\nSample", y = "Population estimate\n",
title = "Sample sizes are between 5% and 15%\nof the population") +
theme_bw(base_size = 12) +
theme(aspect.ratio = 1)
small.sample
It seems that what you want to do is...
Given three data frames like this:
> d1
x y
1 1 0.899683096
2 2 0.604513234
3 3 0.005824789
4 4 0.442692758
5 5 0.103125175
> d2
x y
1 1 0.35260029
2 2 0.06248654
3 3 0.79272047
> d3
x y
1 1 0.4791399
2 2 0.2583674
3 3 0.1283629
4 4 0.7133847
Construct d:
> d = rbind(d1,d2,d3)
> d$x = 1:nrow(d)
> d
x y
1 1 0.899683096
2 2 0.604513234
3 3 0.005824789
4 4 0.442692758
5 5 0.103125175
6 6 0.352600287
7 7 0.062486543
8 8 0.792720473
9 9 0.479139947
10 10 0.258367356
11 11 0.128362933
12 12 0.713384651
And then plot x against y as normal.

How to mimic geom_boxplot() with outliers using geom_boxplot(stat = "identity")

I would like to pre-compute by-variable summaries of data (with plyr and passing a quantile function) and then plot with geom_boxplot(stat = "identity"). This works great except it (a) does not plot outliers as points and (b) extends the "whiskers" to the max and min of the data being plotted.
Example:
library(plyr)
library(ggplot2)
set.seed(4)
df <- data.frame(fact = sample(letters[1:2], 12, replace = TRUE),
val = c(1:10, 100, 101))
df
# fact val
# 1 b 1
# 2 a 2
# 3 a 3
# 4 a 4
# 5 b 5
# 6 a 6
# 7 b 7
# 8 b 8
# 9 b 9
# 10 a 10
# 11 b 100
# 12 a 101
by.fact.df <- ddply(df, c("fact"), function(x) quantile(x$val))
by.fact.df
# fact 0% 25% 50% 75% 100%
# 1 a 2 3.25 5.0 9.00 101
# 2 b 1 5.50 7.5 8.75 100
# What I can do...with faults (a) and (b) above
ggplot(by.fact.df,
aes(x = fact, ymin = `0%`, lower = `25%`, middle = `50%`,
upper = `75%`, ymax = `100%`)) +
geom_boxplot(stat = "identity")
# What I want...
ggplot(df, aes(x = fact, y = val)) +
geom_boxplot()
What I can do...with faults (a) and (b) mentioned above:
What I would like to obtain, but still leverage pre-computation via plyr (or other method):
Initial Thoughts: Perhaps there is some way to pre-compute the true end-points of the whiskers without the outliers? Then, subset the data for outliers and pass them as geom_point()?
Motivation: When working with larger datasets, I have found it faster and more practical to leverage plyr, dplyr, and/or data.table to pre-compute the stats and then plot them rather than having ggplot2 to the calculations.
UPDATE
I am able to extract what I need with the following mix of dplyr and plyr code, but I'm not sure if this is the most efficient way:
df %>%
group_by(fact) %>%
do(ldply(boxplot.stats(.$val), data.frame))
Source: local data frame [6 x 3]
Groups: fact
fact .id X..i..
1 a stats 2
2 a stats 4
3 a stats 10
4 a stats 13
5 a stats 16
6 a n 9
Here's my answer, using built-in functions quantile and boxplot.stats.
geom_boxplot does the calcualtions for boxplot slightly differently than boxplot.stats. Read ?geom_boxplot and ?boxplot.stats to understand my implementation below
#Function to calculate boxplot stats to match ggplot's implemention as in geom_boxplot.
my_boxplot.stats <-function(x){
quantiles <-quantile(x, c(0, 0.25, 0.5, 0.75, 1))
labels <-names(quantile(x))
#replacing the upper whisker to geom_boxplot
quantiles[5] <-boxplot.stats(x)$stats[5]
res <-data.frame(rbind(quantiles))
names(res) <-labels
res$out <-boxplot.stats(x)$out
return(res)
}
Code to calculate the stats and plot it
library(dplyr)
df %>% group_by(fact) %>% do(my_boxplot.stats(.$val)) %>%
ggplot(aes(x=fact, y=out, ymin = `0%`, lower = `25%`, middle = `50%`,
upper = `75%`, ymax = `100%`)) +
geom_boxplot(stat = "identity") + geom_point()
To get the correct statistics, you have to do some more calculations than just finding the quantiles. The geom_boxplot function with stat = "identity" does not draw the outliers. So you have to calculate the statistics without the outliers and then use geom_point to draw the outliers seperately. The following function (basically a simplified version of stat_boxplot) is probably not the most efficient, but it gives the desired result:
box.df <- df %>% group_by(fact) %>% do({
stats <- as.numeric(quantile(.$val, c(0, 0.25, 0.5, 0.75, 1)))
iqr <- diff(stats[c(2, 4)])
coef <- 1.5
outliers <- .$val < (stats[2] - coef * iqr) | .$val > (stats[4] + coef * iqr)
if (any(outliers)) {
stats[c(1, 5)] <- range(c(stats[2:4], .$val[!outliers]), na.rm=TRUE)
}
outlier_values = .$val[outliers]
if (length(outlier_values) == 0) outlier_values <- NA_real_
res <- as.list(t(stats))
names(res) <- c("lower.whisker", "lower.hinge", "median", "upper.hinge", "upper.whisker")
res$out <- outlier_values
as.data.frame(res)
})
box.df
## Source: local data frame [2 x 7]
## Groups: fact
##
## fact lower.whisker lower.hinge median upper.hinge upper.whisker out
## 1 a 2 3.25 5.0 9.00 10 101
## 2 b 1 5.50 7.5 8.75 9 100
ggplot(box.df, aes(x = fact, y = out, middle = median,
ymin = lower.whisker, ymax = upper.whisker,
lower = lower.hinge, upper = upper.hinge)) +
geom_boxplot(stat = "identity") +
geom_point()

Resources