I would like to plot box plots for a data set that including four categorical such as: Good, Bad, VeryGood, and VeryBad and four normal distribution.
My question how to make plots the four categorical with four different normal distribution in one plot separate from each other, I have tried (see below) but there look a mess.
I have used an example that I found it here and did some changes on it.
I added another edited plot which I would like each plot of box plot look like this one which more cleat and each four categorical (blue, yellow, red and green) are clear .
par(mfrow=c(2,2))
df <- data.frame(id = c(rep("Good",200), rep("Bad", 200),
rep("VeryGood",200), rep("VeryBad",200)),
F1 = c(rnorm(200,10,2), rnorm(200,8,1), rnorm(200,5,2),rnorm(200,7,3)),
F2 = c(rnorm(200,7,1), rnorm(200,6,1), rnorm(200,8,1),rnorm(200,12,4)),
F3 = c(rnorm(200,6,2), rnorm(200,9,3),rnorm(200,12,3),rnorm(200,15,2)),
F4 = c(rnorm(200,12,3), rnorm(200,8,2),rnorm(200,8,5),rnorm(200,5,1)))
boxplot(df[,-1], xlim = c(0.5, ncol(df[,-1])+0.9),
boxfill=rgb(1, 1, 1, alpha=1), border=rgb(1, 1, 1, alpha=1)) #invisible boxes
boxplot(df[which(df$id=="Good"), -1], xaxt = "n", add = TRUE, boxfill="red", boxwex=0.25,
at = 1:ncol(df[,-1]) - 0.15) #shift these left by -0.15
boxplot(df[which(df$id=="Bad"), -1], xaxt = "n", add = TRUE, boxfill="blue", boxwex=0.25,
at = 1:ncol(df[,-1]) + 0.15) #shift these right by +0.15
boxplot(df[which(df$id=="VeryBad"), -1], xaxt = "n", add = TRUE, boxfill="green", boxwex=0.25,
at = 1:ncol(df[,-1]) + 0.25) #shift these right by +0.15
boxplot(df[which(df$id=="VeryGood"), -1], xaxt = "n", add = TRUE, boxfill="yellow", boxwex=0.25,
at = 1:ncol(df[,-1]) + 0.45) #shift these right by +0.15
If you aren't set on using Base R graphics, and looking at the new plot you added to the question, I believe this is what you are looking for:
library(dplyr)
library(tidyr)
library(ggplot2)
df <- data.frame(id = c(rep("Good",200), rep("Bad", 200),
rep("VeryGood",200), rep("VeryBad",200)),
F1 = c(rnorm(200,10,2), rnorm(200,8,1), rnorm(200,5,2),rnorm(200,7,3)),
F2 = c(rnorm(200,7,1), rnorm(200,6,1), rnorm(200,8,1),rnorm(200,12,4)),
F3 = c(rnorm(200,6,2), rnorm(200,9,3),rnorm(200,12,3),rnorm(200,15,2)),
F4 = c(rnorm(200,12,3), rnorm(200,8,2),rnorm(200,8,5),rnorm(200,5,1)))
df2 <- tidyr::gather(df, key = "FVar", value = "value", F1:F4)
df2 %>%
ggplot(aes(id, value, fill = id)) +
geom_boxplot() +
facet_grid(. ~ FVar) +
theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5))
Related
I have continuous data that I'd like to plot using R's plotly with a box or violin plot without the outliers and whiskers:
set.seed(1)
df <- data.frame(group=c(rep("g1",500),rep("g2",700),rep("g3",600)),
value=c(c(rep(0,490),runif(10,10,15)),abs(rnorm(700,1,10)),c(rep(0,590),runif(10,10,15))),
stringsAsFactors = F)
df$group <- factor(df$group, levels = c("g1","g2","g3"))
I know how to remove outliers in plotly:
plotly::plot_ly(x = df$group, y =df$value, type = 'box', color = df$group, boxpoints = F, showlegend = F)
But I'm still left with the whiskers.
I tried using ggplot2 for that (also limiting the height of the y-axis to that of the 75 percentile):
library(ggplot2)
gp <- ggplot(df, aes(group, value, color = group, fill = group)) + geom_boxplot(outlier.shape = NA, coef = 0) +
scale_y_continuous(limits = c(0, ceiling(max(dplyr::summarise(dplyr::group_by(df, group), tile = quantile(value, probs = 0.75))$tile)))) +
theme_minimal() + theme(legend.position = "none",axis.title = element_blank())
But then trying to convert that to a plotly object doesn't maintain that:
plotly::ggplotly(gp)
Any idea?
This is a workaround.
I changed your plot a bit, first.
# box without outliers
p <- plot_ly(df, x = ~group, y = ~value, type = 'box',
color = ~group, boxpoints = F, showlegend = F,
whiskerwidth = 0, line = list(width = 0)) # no whisker, max or min line
Then I add the medians back to the graph. This requires calculating the medians, matching the colors, and creating the shape lists for Plotly.
For the colors, it's odd, the first three default colors are used, but the order is g3, g2, g1...
# the medians
res = df %>% group_by(group) %>%
summarise(med = median(value))
# default color list: https://community.plotly.com/t/plotly-colours-list/11730/2
col = rev(c('#1f77b4', '#ff7f0e', '#2ca02c')) # the plot is colored 3, 2, 1
# discrete x-axis; domain default [0, 1]
# default box margin = .08, three groups, each get 1/3 of space
details <- function(col){ # need everytime basics
list(type = 'line',
line = list(color = col, width = 4),
xref = "paper", yref = "y")
}
# horizontal segments/ median
segs = lapply(1:nrow(res),
function(k){
x1 <- k/3 - .08 # if the domain is [0, 1]
x0 <- (k - 1)/3 + .08
y0 <- y1 <- res[k, ]$med
line = list("x0" = x0, "x1" = x1,
"y0" = y0, "y1" = y1)
deets = details(col[k])
c(deets, line)
})
Finally, I added them back onto the plot.
p %>% layout(shapes = segs)
I made the lines obnoxiously wide, but you get the idea.
If you wanted the IQR outline back, you could do this, as well. I used functions here, as well. I figured that the data you've provided is not the actual data, so the function will serve a purpose.
# include IQR outline
res2 = df %>% group_by(group) %>%
summarise(q1 = setNames(quantile(value, type = 7, 1/4), NULL),
q3 = setNames(quantile(value, type = 7, 3/4), NULL),
med = median(value))
# IQR segments
rects = lapply(1:nrow(res2), # if the domain is [0, 1]
function(k){
x1 <- k/3 - .08
x0 <- (k - 1)/3 + .08
y0 <- res2[k, ]$q1
y1 <- res2[k, ]$q3
line = list(color = col[k], width = 4)
rect = list("x0" = x0, "x1" = x1,
"y0" = y0, "y1" = y1,
type = "rect", xref = "paper",
yref = "y", "line" = line)
rect
})
rects = append(segs, rects)
p %>% layout(shapes = rects)
I've been trying to plot different exponential decay curves on to one graph. Initially I thought this would be rather be easy but it is turning out to be rather frustrating.
What I want to get:
nlsplot(k_data_nls, model = 6, start = c(a= 603.3, b= -0.03812), xlab = "hours", ylab = "copies")
nlsplot(r4, model=6, start=c(a=25.5487,b=-0.5723), xlab = "hours", ylab = "copies")
Here is some additional code for the data:
df4 <- data.frame(hours=c(0,1,3,5,12,24,48,96,168,336,504,720), copies=c(603.3,406,588,393.27,458.47,501.67,767.53,444.13,340.6,298.47,61.42,51.6))
nlsfit(df4, model=6, start=c(a=603.3,b=-0.009955831526))
d4plot <- nlsplot(df4, model=6, start=c(a=603.3,b=-0.009955831526))
r4 <- data.frame(hours=c(0,1,3,5,12,24,48,96,168,336,504,720), copies=c(26,13.44,4.57,3.12,6.89,0.71,0.47,0.47,0,0,0.24,0.48))
nlsLM(copies ~ a*exp(b*hours), data=r4, start=list(a=26,b=-0.65986))
r4plot <- nlsplot(r4, model=6, start=c(a=25.5487,b=-0.5723))
Essentially I want to be able to get both of these plots on one graph. I'm new to R so I'm not too sure where I can go from here. Thank you !
I don't know if this is actually helpful because it's so specific, but this is how I would do it (with ggplot2). First, you need data for the function you want to plot. Take the x for all the values you want to display and apply your function with your coefficients to the data. You need to have data points, not just a function, to plot data.
df_simulated <- data.frame("x" = rep(1:100, 2),
"class"= rep(c("DNA", "RNA"), each = 100))
df_simulated$y <- c(1683.7 * exp(-0.103 * 1:100), # DNA
578.7455 * exp(-0.156 * 1:100)) # RNA
However, since I never used the packages you used, I don't know how to extract the values from the models, so I took the values in your example plot. It's important that the "simulated" values for both groups are within one dataframe, and that you have a column which attributes the points to the respective group (RNA or DNA). At least it's easier if you do it like this. Then you need a data frame with your actual observations for the dots. I invented data again:
df_observed <- data.frame("x" = c(12, 13, 25, 26, 50, 51),
"y" = c(500, 50, 250, 25, 0, 5),
"class" = rep(c("DNA", "RNA"), 3))
Then you can create the plot. With color=class you specify that the data points will be grouped by "class" and will be colored accordingly. ("apple" and "banana" are just dummy words to demonstrate linebreaks)
ggplot() +
geom_line(data = df_simulated, aes(x = x, y = y, color = class), size = 1, linetype = "dashed") +
geom_point(data = df_observed, aes(x = x, y = y, color = class), size = 4, pch = 1) +
annotate("text", x = 50, y = 1250, label = "DNA\napple", color = "tomato", hjust = 0) +
annotate("text", x = 50, y = 750, label ="RNA\nbanana", color = "steelblue", hjust = 0) +
ggtitle(expression(~italic("Styela clava")~"(isolated)")) +
ylab("COI copies per 1ml") +
xlab("Time since removal of organisms (hours)") +
theme_classic() +
theme(legend.position = "none") +
scale_color_manual(values = c("DNA" = "tomato", "RNA" = "steelblue"))
This is the output:
First note that R squared is normally used for linear models and not for nonlinear models so the use of this statistic is suspect here; however, below we show it anyways since it seems that is what was asked for. A different goodness of fit measurement that is often used is residual standard error. If fm is the fitted model from nls then sigma(fm) is the residual standard error. Smaller values are more favorable. summary(fm) also reports this value.
For each of df4 and r4 we use lm to get starting values (taking log of both sides we get a model that is linear in log(a) and b), run nls fits and get the coefficients.
Now plot the points and add the fitted lines and legend. (Note that in setting up the graph we use rbind which assumes that df4 and r4 have the same column names, which they do.)
Note that the data provided in the question is much different than that shown in the question's image.
The code below does not need starting values since it uses lm to get them, runs nls and automatically extracts whatever information is needed for the graph.
1) Classic graphics In this alternative no packages are used.
r2 <- function(fm, digits = 3) {
y <- fitted(fm) + resid(fm)
r2 <- 1 - deviance(fm) / sum((y - mean(y))^2)
if (is.numeric(digits)) r2 <- round(r2, digits)
r2
}
fo <- copies ~ a * exp(b * hours) # formula used in nls
# get nls fitted model and coefficients for df4
co_d0 <- coef(lm(log(copies) ~ hours, df4, subset = copies > 0))
fmd <- nls(fo, df4, start = list(a = exp(co_d0[[1]]), b = co_d0[[2]]))
co_d <- round(coef(fmd), 4)
# get nls fitted model and coefficients for r4
co_r0 <- coef(lm(log(copies) ~ hours, r4, subset = copies > 0))
fmr <- nls(fo, r4, start = list(a = exp(co_r0[[1]]), b = co_r0[[2]]))
co_r <- round(coef(fmr), 4)
both <- rbind(cbind(df4, col = "red"), cbind(r4, col = "blue"))
plot(both[1:2], col = both$col,
xlab = "Time since removal of organisms", ylab = "COI copies per 1ml",
main = "C)" ~ italic("Styela clava") ~ "(isolated)", adj = 0)
lines(fitted(fmd) ~ hours, df4, col = "red", lty = 2)
lines(fitted(fmr) ~ hours, r4, col = "blue", lty = 2)
legend <- c(bquote(DNA),
bquote(y == .(co_d[[1]]) * e ^ {.(co_d[[2]])*x}),
bquote(R^2 == .(r2(fmd))),
bquote(),
bquote(RNA),
bquote(y == .(co_r[[1]]) * e ^ {.(co_r[[2]])*x}),
bquote(R^2 == .(r2(fmr))))
legend("right", legend = as.expression(legend), bty = "n",
text.col = c("red", "red", "red", NA, "blue", "blue", "blue"))
2) ggplot2 This uses ggplot2 and gridtext. r2, fmd, fmr, co_d and co_r are all taken from (1). We use richtest_grob from gridtext to create a custom grob for the legend and pass it using annotate_custom.
library(gridtext)
library(ggplot2)
txt <- sprintf(
"<span style='color:red'>DNA
<br>y = %.3f*e<sup>%.3fx</sup>
<br>R<sup>2</sup> = %.3f</span>
<br><br><span style='color:blue'>RNA
<br>y = %.3f*e<sup>%.3fx</sup>
<br>R<sup>2</sup> = %.3f</span>",
co_d[[1]], co_d[[2]], r2(fmd), co_r[[1]], co_r[[2]], r2(fmr))
both2 <- rbind(cbind(df4, col = "red", fitted = fitted(fmd)),
cbind(r4, col = "blue", fitted = fitted(fmr)))
ggplot(both2, aes(hours, copies, col = I(col))) +
geom_point() +
geom_line(aes(y = fitted), linetype = 2) +
annotation_custom(richtext_grob(txt, hjust = 0)) +
theme(legend.position = "none") +
labs(x = "Time since removal of organisms", y = "COI copies per 1ml") +
ggtitle(("C)" ~ italic("Styela clava") ~ "(isolated)"))
3) lattice
This uses legend from (1) and both2 from (2). First create a plot for the data points. It will also contain the legend, axes and labels. Then add a layer for the fitted lines. main.settings specifies that the main title should be left justified and bold and is adapted from this page.
library(latticeExtra)
main.settings <- list(par.main.text = list(font = 2, just = "left",
x = grid::unit(25, "mm")))
xyplot(copies ~ hours, both2, col = both2$col,
xlab = "Time since removal of organisms", ylab = "COI copies per 1ml",
main = "C)" ~ italic("Styela clava") ~ "(isolated)", adj = 0,
key = list(text = list(as.expression(legend),
col = c("red", "red", "red", NA, "blue", "blue", "blue")),
x = 0.65, y = 0.65, columns = 1),
par.settings = main.settings) +
as.layer(xyplot(fitted ~ hours, both2, groups = col, type = "l", lty = 2))
I'm trying to plot multiple simple Random Walks in R, but am having problems doing so.
Please be aware that by simple Random Walk I mean the Sum of Random Variables that can either be {-1} or {1} with each values having the same probability, not some Random Walk absed on white Noise. (see the definition on https://en.wikipedia.org/wiki/Random_walk#One-dimensional_random_walk )
I use the following code to plot the Random Walks:
set.seed(1)
n <- 200
Random_Walk<- cumsum(sample(c(-1, 1), n, TRUE))
n <- 200
Random_Walk_2 <- cumsum(sample(c(-1, 1), n, TRUE))
ts.plot(Random_Walk, gpars=list(xlab="Length of Random Walk", ylab="Distance from origin",lty=c(1:1)))
This code works fine, but once I try to plot both Random Walks in the same Graph it breaks.
Can someone explain how i could plot both of them or even multiple Random Walks in one Graph?
Additionally I was wondering whether there is some tools that could give me the variance or the standard deviation of all those Random Walks
Thank you all in advance!!
This is a possible solution in R-base
plot(Random_Walk, type = "l", xlim = c(0, 200), ylim = c(-15, 15),
col = "blue", xlab = "n", ylab = "Rw")
par(new=T)
plot(Random_Walk_2, type = "l", xlim = c(0, 200), ylim = c(-15, 15),
col = "red", xlab = "n", ylab = "Rw")
This is a possible solution with ggplot2:
library(ggplot2)
df_rw <- data.frame(n = 1:200, r1 = Random_Walk, r2 = Random_Walk_2)
ggplot(df_rw) +
geom_line(aes(n, r1), col = "blue") +
geom_line(aes(n, r2), col = "red") +
labs(x = "n", y = "Rw")
This is another possibile solution with ggplot2
library(ggplot2)
df_rw2 <- data.frame(n = c(1:200, 1:200),
rw = c(Random_Walk, Random_Walk_2),
lab = rep(c("Random Walk 1", "Random Walk 2"), each = 200))
ggplot(df_rw2) +
geom_line(aes(x = n, y = rw, color = lab)) +
scale_color_manual(values = c("red", "blue"))
Here is a simple base R solution with the many times forgotten function matplot.
RW <- cbind(Random_Walk, Random_Walk_2)
matplot(RW, type = "l", lty = "solid")
A ggplot2 solution could be the following. But the data format should be the long format and the data is in wide format. See this post on how to reshape the data from wide to long format.
library(tidyverse)
as.data.frame(RW) %>%
mutate(x = row_number()) %>%
pivot_longer(-x) %>%
ggplot(aes(x, value, color = name)) +
geom_line()
As for the 2 first moments of your random walk, see this post of Mathematics Stack Exchange.
I am attempting to use grid.arrange to plot several graphs in one column, as the x axis is the same for all graphs. However the different graphs have different number of discrete values, resulting in Samples in the top graph more distanced than the graph below. Is there a way to set the distance between discrete values on an axis so the distance between Sample1 and Sample2 lines is the same for both graphs? Thanks!
Here is an example:
library(reshape2)
library(tidyverse)
library(gridExtra)
#Data frame 1
a <- c(1,2,3,4,5)
b <- c(10,20,30,40,50)
Species <- factor(c("Species1","Species2","Species3","Species4","Species5"))
bubba <- data.frame(Sample1=a,Sample2=b,Species=Species)
bubba$Species=factor(bubba$Species, levels=bubba$Species)
xm=melt(bubba,id.vars = "Species", variable.name="Samples", value.name = "Size")
#Data frame 2
c <- c(1,2,3,4,5)
d <- c(10,20,30,40,50)
e <- c(1,2,3,4,5)
f <- c(10,20,30,40,50)
bubban <- data.frame(Sample1=c,Sample2=d,Sample3=e,Sample4=f,Species=Species)
xn=melt(bubban,id.vars = "Species", variable.name="Samples", value.name = "Size")
#Not related, but part of my original script i am using
shrink_10s_trans = trans_new("shrink_10s",
transform = function(y){
yt = ifelse(y >= 10, y*0.1, y)
return(yt)
},
inverse = function(yt){
return(yt) # Not 1-to-1 function, picking one possibility
}
)
#Make plot 1
p1=ggplot(xm,aes(x= Species,y= fct_rev(Samples), fill = Size < 10))+
geom_point(aes(size=Size), shape = 21)+
scale_size_area(trans = shrink_10s_trans, max_size = 10,
breaks = c(1,3,5,10,20,30,40,50),
labels = c(1,3,5,10,20,30,40,50)) +
scale_fill_manual(values = c(rgb(136,93,100, maxColorValue = 255),
rgb(236,160,172, maxColorValue = 255))) +
theme_bw()+theme(axis.text.x = element_text(angle = -45, hjust = 1))+scale_x_discrete(position = "top")
#Make plot 2
p2=ggplot(xn,aes(x= Species,y= fct_rev(Samples), fill = Size < 10))+
geom_point(aes(size=Size), shape = 21)+
scale_size_area(trans = shrink_10s_trans, max_size = 10,
breaks = c(1,3,5,10,20,30,40,50),
labels = c(1,3,5,10,20,30,40,50)) +
scale_fill_manual(values = c(rgb(136,93,100, maxColorValue = 255),
rgb(236,160,172, maxColorValue = 255))) +
theme_bw()+theme(axis.text.x = element_blank())
#arrange the plots
grid.arrange(p1,p2,nrow=2)
Instead of using grid.extra use ggpubr::ggarrange function. It lets you specify heights of each plot and set shared legend.
# Using plots generated with OPs code
ggpubr::ggarrange(p1, p2, nrow = 2, heights = c(1.3, 2),
common.legend = TRUE, legend = "right")
With argument heights you can set relative heights of each provided plot.
# I am trying to combine a horizontal beside barplot with the table
# with the values in it.
# E.g. original table, including sample_ids
df = data.frame(
sample_id=c("s01","s02","s03","s04","s05","s06","s07","s08","s09","s10"),
one=runif(10,0,10),
two=runif(10,0,10),
three=runif(10,0,10),
four=runif(10,0,10)
)
# I created a mydata that I then do barplot as matrix
mydata = data.frame(
one=df$one,
two=df$two,
three=df$three,
four=df$four
)
# Plotted, using rainbow colouring, with a legend in the top right
barplot(as.matrix(mydata),horiz=TRUE,beside=TRUE,col=rainbow(length(df$sample_id)), legend=paste(df$sample_id), args.legend = list(x = "topright", bty = "n"),xlim=c(0,20))
# Now I would like the grid.table to be on the bottom right, ideally with the same order and colouring as the legend
library(gridExtra)
grid.table(df)
# Any ideas?
# EDIT: also tried addtable2plot from plotrix, with no much success
bp = barplot(as.matrix(mydata),horiz=TRUE,beside=TRUE,col=rainbow(length(df$sample_id)), legend=paste(df$sample_id), args.legend = list(x = "topright", bty = "n"),xlim=c(0,20))
library(plotrix)
addtable2plot(bp, y=0, df,cex=0.3)
The other option would be to turn the barplot into a ggplot geom_bar, but I struggled to do it for more than 2 columns.
Here's one way to do it using addtable2plot of plotrix package. It allows you to use the legend positions such as "bottomright"
df = data.frame(
sample_id=c("s01","s02","s03","s04","s05","s06","s07","s08","s09","s10"),
one=runif(10,0,10),
two=runif(10,0,10),
three=runif(10,0,10),
four=runif(10,0,10)
)
mydata = data.frame(
one=df$one,
two=df$two,
three=df$three,
four=df$four
)
library(plotrix)
dev.off()
windows(width = 8, height = 6)
df$one = round(df$one,2)
df$two = round(df$two,2)
df$three = round(df$three,2)
df$four = round(df$four,2)
barplot(as.matrix(mydata),horiz=TRUE,beside=TRUE,col=rainbow(length(df$sample_id)),
legend=paste(df$sample_id),
args.legend = list(x = "topright", bty = "n", cex = 1),
xlim=c(0,20))
addtable2plot("bottomright",table = df, cex = .9, bty = "o",
bg = c("white","grey"), vlines = TRUE, xpad = .25)
If you want to make the barplot in ggplot2, you need to reshape your data into long format. Based on your example data, the following code:
library(ggplot2)
library(gridExtra)
library(reshape2)
bp <- ggplot(melt(df, id.vars = 1),
aes(x = variable, y = value, fill = sample_id)) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_fill_manual(values = rainbow(10)) +
labs(x = NULL, y = NULL) +
coord_flip() +
theme_minimal(base_size = 14)
gt <- tableGrob(df, rows = NULL, theme = ttheme_minimal())
grid.arrange(bp, gt, ncol = 2, widths = c(2.5,2))
which gives the following result: