adding different curves to plots in R - r

I know I can add the same curve to each plot pane as a layer using lattice and latticeExtra R packages (see blow).
But suppose we wanted to add different curves to each plot pane.
For example, in the below example, I wonder how to add v1 only to the top plot, v2 to the bottom-left plot, and v3 to the bottom-right plot?
library(lattice)
library(latticeExtra)
set.seed(24)
v1 <- density(rnorm(1e3, 3.5))
v2 <- density(rnorm(1e3, 3))
v3 <- density(rnorm(1e3, 2.75))
foo <- xyplot((1:32*.01)~wt|gear , data = mtcars)
foo + layer(panel.polygon(v1, col = 2, alpha = 0.3))

Here is one option
foo +
layer(if(panel.number() == 3) {
panel.polygon(v1, col = 2, alpha = 0.3)
} else if(panel.number() == 1) {
panel.polygon(v2, col = 2, alpha = 0.3)
} else{
panel.polygon(v3, col = 2, alpha = 0.3)
})
data
library(lattice)
library(latticeExtra)
set.seed(24)
v1 <- density(rnorm(1e3, 3.5))
v2 <- density(rnorm(1e3, 3))
v3 <- density(rnorm(1e3, 2.75))
foo <- xyplot((1:32*.01)~wt|gear , data = mtcars)

You can do it with ggplot2 and facet_grid:
library(ggplot2)
library(dplyr)
# user data
v1 <- (rnorm(1e3, 3.5))
v2 <- (rnorm(1e3, 3))
v3 <- (rnorm(1e3, 2.75))
# Make df1 from user data and associate with add gear value in `mtcars`
df1 <- data.frame(wt = c(v1, v2, v3),
gear = as.factor(c(rep(3, 1000),
rep(4, 1000),
rep(5, 1000))))
# select data from mtcars and add user defined values (`val`)
df2 <- mtcars %>%
mutate(val = 1:32 * 0.01) %>%
remove_rownames() %>%
mutate(gear = as.factor(gear)) %>%
select(c(val, gear, wt))
ggplot(df2, aes(x = wt, y = val, #set up mapping with df2
fill = gear)) +
geom_density(data = df1, #make density plots of df1
aes(x = wt,
fill = gear),
inherit.aes = FALSE) + #next add points from df1
geom_point(data = df2, aes(x = wt, y = val), inherit.aes = FALSE) +
facet_grid(cols = vars(gear))
You can make the fill color more transparent by adding an alpha value to geom_density.

Related

How to insert specific IDs in the scatterplot in R

Here an example of my data:
mydf <- tibble(
ID = c(1, 2, 9,4,5,15),
M1 = c(60,50,40,20,30,45),
M2 = c(90,80, 30, 33,70,40)
)
I want to insert only IDs in the plot in which M1<M2.
Here are the codes that I have used:
library(ggrepel)
res1<- mydf %>%
filter(M1< M2)
p <- ggplot(data = mydf, aes(x = M1, y = M2,label = ID))+
geom_text_repel() +
geom_point(data=res1,
aes(x=M1,y= M2),
color='red',
size=3)
I want to remove IDs 9,15 as M1>M2.
Does this give you the desired plot?
mydf <- mydf %>%
mutate(M1lower = M1 < M2)
res1<- mydf %>%
filter(M1lower == T)
ggplot(data = mydf, aes(x = M1, y =M2,label = ID, color = M1lower))+
geom_point(size=3) +
geom_text_repel(data=res1,
aes(x=M1,y= M2))

Scaling geom_point size on heatmap to fit correctly in R?

Basically, I have a heatmap that contains some points. What Im trying to do is automatically rescale the size of the points in a sensible way for different sized heatmaps. For example, if I have a heatmap that looks like so:
library(reshape)
library(ggplot2)
library(ggnewscale)
# Create matrix
set.seed(1701)
a <- sample(1:10,100, replace=TRUE)
s <- matrix(a, nrow = 5, ncol=5)
s[upper.tri(s)] = t(s)[upper.tri(s)]
rownames(s) <- colnames(s) <- paste0("x", 1:5)
diag(s) <- 0
sDf <- melt(s)
# create diagonal values
diagDf <- data.frame(
var1 = c(paste0("x", 1:5)),
var2 = c(paste0("x", 1:5)),
val = c(2,5,3,1,5)
)
# make plot
ggplot(sDf, aes(X1,X2)) +
geom_tile(aes(fill = value)) +
scale_fill_gradientn(colors = rev(colorspace::sequential_hcl(palette = "Blues 3", n = 100))) +
new_scale_fill() +
geom_point(data = diagDf, aes(var1, var2, col = val), size = 20) +
theme(aspect.ratio = 1)
So in the image above, the diagonal contains geom_points and their size is manually set to size = 20.... This works for this example, but the issue is:
If the heatmap dimensions were changed to say 20x20, then having the size hardcoded to equal 20 won't work due to overlapping & the points being too big etc.
So what Im trying to do is come up with a method that will automatically resize the points to effectively fill square they are contained in without overlapping, being too big or too small.
Any suggestions as to how I could do this?
I would do something like this:
library(reshape)
library(ggplot2)
library(ggnewscale)
n <- 5
# Create matrix
set.seed(1701)
a <- sample(1:10,100, replace=TRUE)
s <- matrix(a, nrow = n, ncol=n)
s[upper.tri(s)] = t(s)[upper.tri(s)]
rownames(s) <- colnames(s) <- paste0("x", 1:n)
diag(s) <- 0
sDf <- melt(s)
# create diagonal values
diagDf <- data.frame(
var1 = c(paste0("x", 1:n)),
var2 = c(paste0("x", 1:n)),
val = sample(1:5,n,replace = T)
)
# make plot
ggplot(sDf, aes(X1,X2)) +
geom_tile(aes(fill = value)) +
scale_fill_gradientn(colors = rev(colorspace::sequential_hcl(palette = "Blues 3", n = 100))) +
new_scale_fill() +
geom_point(data = diagDf, aes(var1, var2, col = val), size = 1/sqrt(nrow(sDf))*80) +
theme(aspect.ratio = 1)
here the size of the points depends on the dimension of the matrix.
an example of the output with a 3x3, 5x5, and 10x10 matrix
You can modify diagDf to contain the co-ordinates of the circles you want to plot using some basic trigonometry, then plot them as filled polygons. This ensures they will always scale exactly with your plot.
library(dplyr)
diagDf <- diagDf %>%
mutate(var1 = as.numeric(as.factor(var1)),
var2 = as.numeric(as.factor(var2))) %>%
split.data.frame(diagDf$var1) %>%
lapply(function(x) {
deg <- seq(0, 2 * pi, length = 100)
var1 <- cos(deg)/2.2
var2 <- sin(deg)/2.2
val <- rep(x$val, 100)
data.frame(var1 = var1 + x$var1, var2 = var2 + x$var2, val = val)}) %>%
{do.call(rbind, .)}
Now with slightly modified plot code, we get:
ggplot(sDf, aes(X1,X2)) +
geom_tile(aes(fill = value)) +
scale_fill_gradientn(colors=rev(colorspace::sequential_hcl(palette = "Blues 3", n=100))) +
new_scale_fill() +
geom_polygon(data = diagDf, aes(var1, var2, fill = val, group = val)) +
theme(aspect.ratio = 1)
Created on 2021-09-27 by the reprex package (v2.0.0)

Create a three panel plot with one panel spanning 2 columns using ggplot2

I have following data:
df <- data.frame("Stat" = c("Var1","Var1","Var1","Var1","Var1","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var3","Var3","Var3","Var3","Var3","Var3","Var3","Var3","Var3","Var3"),
"Value" = c(0,1,2,3,4,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,1,2,3,4,5,6,7,8,9,10),
"n" = c(33,120,223,63,20,17,28,33,22, 35,41,53,44,55,59,39,33, 46,30,29,23,21,14,6,18,7,29,50,80,86,91,83,35,34, 20))
What I wanted to do is to plot the above data as bar plot in one canvas but in three rows (1 columns x 3 rows) and each panel should contain plot for only one variable (Stat) eg. Var1 in first panel, Var2 in second and Var3 in the third panel, using the following code:
library(multipanelfigure)
fig1 <- multi_panel_figure(columns = 2, rows = 2, panel_label_type = "none")
# fit the plots on the panels
fig1 %<>%
fill_panel(Var1Plot, column = 1, row = 1) %<>%
fill_panel(Var2Plot, column = 2, row = 1) %<>%
fill_panel(Var3Plot, column = 1:2, row = 2)
fig1
Issue is how to get the Var1Plot, Var2Plot and Var3Plot so that these can be placed in respective panels above. I used the below code, but not able to get the results into above panels:
library(tidyverse)
df %>% ggplot(aes(x = Value, y = n)) +
geom_bar(stat='identity') + facet_wrap(~ Stat)
Expected plot should look something like this :
Here's an approach with cowplot.
library(cowplot)
figure.list <- map(unique(df$Stat), ~
ggplot(data = subset(df, df$Stat == .x), aes(x = Value, y = n)) +
geom_bar(stat='identity') +
ggtitle(.x))
top <- plot_grid(figure.list[[1]], figure.list[[2]], ncol = 2)
bottom <- plot_grid(figure.list[[3]], ncol = 1)
plot_grid(top, bottom,
ncol=1, rel_heights=c(1,1))
If you really want some to be coord_flip-ed, you could make the list manually:
figure.list <- list()
figure.list[[1]] <- ggplot(data = subset(df, df$Stat == "Var1"), aes(x = Value, y = n)) +
geom_bar(stat='identity') + coord_flip()
figure.list[[2]] <- ggplot(data = subset(df, df$Stat == "Var2"), aes(x = Value, y = n)) +
geom_bar(stat='identity') + coord_flip()
figure.list[[3]] <- ggplot(data = subset(df, df$Stat == "Var3"), aes(x = Value, y = n)) +
geom_bar(stat='identity')
top <- plot_grid(figure.list[[1]], figure.list[[2]], ncol = 2)
bottom <- plot_grid(figure.list[[3]], ncol = 1)
plot_grid(top, bottom,
ncol=1, rel_heights=c(1,1))

ggplot: Drawing tiles / rectangles with discrete variables

I'm attempting to draw tiles / rectangles to get the following result:
library(tidyverse)
library(plotly)
set.seed(0)
df <- tibble(
a = runif(5),
b = runif(5),
c = runif(5),
d = runif(5),
case_id = 1:5
) %>% tidyr::pivot_longer(cols = -case_id)
plot <- ggplot2::ggplot(
data = df,
mapping = aes(
x = name,
y = value,
group = case_id
)
) + geom_point()
plot_boxes_y <- seq(from = 0, to = 1, by = .2)
plot_boxes_x <- unique(df$name) %>% length()
for (x in 1:plot_boxes_x) {
for (y in plot_boxes_y) {
plot <- plot + geom_rect(
mapping = aes_(
xmin = x - .5,
xmax = x + .5,
ymin = y - .5,
ymax = y + .5
),
color = "red",
fill = NA
)
}
}
plotly::ggplotly(plot)
As you can see, I currently do this by looping through coordinates and drawing each rectangle individually. The problem is, that this generates many layers which makes plotly::ggplotly() really slow on large datasets.
Therefore, I'm looking for a more efficient way. Please note, that I cannot use the panel.grid, since I intend to visualize z-data by filling rectangles later on.
My approach was to draw geom_tile() on top of the scatter plot:
# my attempt
df$z <- rep(0, nrow(df))
plot2 <- ggplot2::ggplot(
data = df,
mapping = aes(
x = name,
y = value,
color = z,
group = case_id
)
) + geom_point() + geom_tile()
I assume that this fails because of the fact that name is a discrete variable? So, how can i efficiently draw tiles in addition to my scatterplot?
Thanks
Here is a solution using the geom_tile option. The key here creating a data frame to hold the coordinates of the grid and then specifying the aesthetics individually in each of the function calls.
library(ggplot2)
library(tidyr)
set.seed(0)
df <- tibble(
a = runif(5),
b = runif(5),
c = runif(5),
d = runif(5),
case_id = 1:5
) %>% pivot_longer(cols = -case_id)
df$z <- rep(0, nrow(df))
#make data frame for the grid corrdinates
grid<-data.frame(x=factor( ordered( 1:4), labels = c("a", "b", "c", "d" )),
y=rep(seq(0, 1, .1), each=4))
#plot using geom_tile & geom_point
plot2 <- ggplot2::ggplot() + geom_tile(data=grid, aes(x=x, y=y), fill=NA, col="red") +
geom_point(data = df,
mapping = aes(
x = name,
y = value,
color = z,
group = case_id))
print(plot2)
if you don't mind them going beyond the axis
ggplot(df,aes(x=name,y=value)) + geom_point() +
geom_vline(xintercept=seq(0.5,4.5,by=1)) +
geom_hline(yintercept=seq(0,2,by=.2))
else:
#make a new data frame
GRIDS = rbind(
# the vertical lines
data.frame(x=seq(0.5,4.5,by=1),xend=seq(0.5,4.5,by=1),y=0,yend=2),
# the horizontal lines
data.frame(x=0.5,xend=4.5,y=seq(0,2,by=.2),yend=seq(0,2,by=.2))
)
ggplot(df,aes(x=name,y=value)) + geom_point() +
geom_segment(data=GRIDS,aes(x=x,y=y,xend=xend,yend=yend),col="red")

plot rownames based on condition in ggplot

I have a data set like this:
df <- data.frame(v1 = rnorm(10), col = rbinom(10, size=1,prob= 0.5))
rownames(df) <- letters[1:10]
> head(df)
v1 col
a -0.1806868 1
b 0.6934783 0
c -0.4658297 1
d 1.6760829 0
e -0.8475840 0
f -1.3499387 1
I plot it like this:
ggplot(df, aes(x = v1, y=rownames(df), group = col, color= col)) + geom_point()
Now I want to show only the rownames on the y-axis where col == 1.
The other names should not be displayed (but the points should be)
To add some context, I have a plot with many overlapping variable names on the y-axis, but I only want to display the names of the ones outside the dashed line
You could use scale_y_discrete:
set.seed(2017);
df <- data.frame(v1 = rnorm(10), col = rbinom(10, size=1,prob= 0.5))
rownames(df) <- letters[1:10]
library(ggplot2);
ggplot(df, aes(x = v1, y = rownames(df), group = col, color = col)) +
geom_point() +
scale_y_discrete(
limits = rownames(df),
labels = ifelse(df$col == 1, rownames(df), ""))
There is not much to add to the answer given by #MauritsEvers, I just had the idea that for your plot it might be desirable to have fewer horizontal lines that guide your eye.
We can use the breaks argument in scale_y_discrete for that.
set.seed(1); df <- data.frame(v1 = rnorm(10), col = rbinom(10, size=1,prob= 0.5))
rownames(df) <- letters[1:10]
axis_labels <- which(df$col == 1)
ggplot(df, aes(x = v1, y=rownames(df), group = col, color= col)) +
geom_point() +
scale_y_discrete(breaks = rownames(df)[axis_labels])

Resources