I have the following df as example:
sites <- c('s1','s1','s2', "s2", "s3", "s3")
conc <- c(15, 12, 0.5, 0.05, 3, 0.005)
trop <- c("pp", "pt")
df <- data.frame(sites, conc, trop)
df$trop<- factor(df$trop, levels = c("pp", "pt"))
ggplot(df, aes(x= sites, y= conc))+
geom_bar(stat = "identity", colour="black")+
scale_y_log10()+
facet_grid(.~trop)+
theme_bw()
which gives as results the following figure, which is quite helpful for my data analysis since I want to highlight sites with values above 1.
However, under another assumption, I need to highlight sites above 1 and 0.1 using facet_grid, ending up with something like this (I edited this figure as desire output):
Do you know any option in scale_y_log10 in order to get the second figure under facet_grid?
One option is to reparameterise the bars as rectangles and plot that instead.
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.0.3
sites <- c('s1','s1','s2', "s2", "s3", "s3")
conc <- c(15, 12, 0.5, 0.05, 3, 0.005)
trop <- c("pp", "pt")
df <- data.frame(sites, conc, trop)
df$trop<- factor(df$trop, levels = c("pp", "pt"))
char2num <- function(x){match(x, sort(unique(x)))}
ggplot(df) +
geom_rect(
aes(
xmin = char2num(sites) - 0.4,
xmax = char2num(sites) + 0.4,
ymin = ifelse(trop == "pt", 0.1, 1),
ymax = conc
),
colour = 'black'
) +
scale_y_log10() +
# Fake discrete axis
scale_x_continuous(labels = sort(unique(df$sites)),
breaks = 1:3) +
facet_grid(. ~ trop) +
theme_bw()
Created on 2021-02-26 by the reprex package (v1.0.0)
Related
I have been struggling with this for hours now. I have the following script:
library(ggplot2)
sims = replicate(1000, sample(c(0,0,0,0,1,1,1,2,2,2), size=3, replace=FALSE))
df = data.frame(x=colSums(sims == 0),
y=colSums(sims == 1))
df$count <- 1
total_counts = aggregate(count ~ ., df, FUN = sum)
min_count = min(total_counts$count)
max_count = max(total_counts$count)
p = (ggplot(df, aes(x=x, y=y))
+ geom_count(aes(color=..n.., size=..n..), alpha=0.8)
+ guides(color = 'legend', size=FALSE)
+ labs(color='Count')
+ scale_colour_gradient(limits = c(min_count, max_count),
breaks = round(seq(min_count, max_count, length.out=5)),
labels = round(seq(min_count, max_count, length.out=5)))
+ scale_size_continuous(range = c(3, 7.5))
)
So far so good. The problem is that I want to add two additional sets of points:
df2 = data.frame(x=c(0, 1, 2, 3),
y=c(1.5253165, 1.0291262, 0.4529617, 0))
df3 = data.frame(x=c(0, 1, 2, 3),
y=c(1.5, 1, 0.5, 0))
To get something like this:
p2 = (p
+ geom_point(data=df2, aes(x=x, y=y), alpha=0.4, color="red", size = 2.5)
+ geom_point(data=df3, aes(x=x, y=y), alpha=0.4, color="green", size = 2.5)
)
The problem is that I am not being capable of adding these new points to the legend. I would like the legend to be in a different "section". Namely, to have an empty string title (to differentiate these points from "Count" title), and to have strings instead of numbers in their labels ("Simulated means" and "Theoretical means", for example).
Is there any way to achieve this?
A trick I learned from #tjebo is that you can use the ggnewscale package to spawn additional legends. At what point in plot construction you call the new scale is important, so you first want to make a geom/stat layer and add the desired scale. Once these are declared, you can use new_scale_colour() and all subsequent geom/stat layers will use a new colour scale.
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.0.5
library(ggnewscale)
#> Warning: package 'ggnewscale' was built under R version 4.0.3
sims = replicate(1000, sample(c(0,0,0,0,1,1,1,2,2,2), size=3, replace=FALSE))
df = data.frame(x=colSums(sims == 0),
y=colSums(sims == 1))
df$count <- 1
total_counts = aggregate(count ~ ., df, FUN = sum)
min_count = min(total_counts$count)
max_count = max(total_counts$count)
df2 = data.frame(x=c(0, 1, 2, 3),
y=c(1.5253165, 1.0291262, 0.4529617, 0))
df3 = data.frame(x=c(0, 1, 2, 3),
y=c(1.5, 1, 0.5, 0))
ggplot(df, aes(x, y)) +
geom_count(aes(colour = after_stat(n), size = after_stat(n)),
alpha = 0.5) +
scale_colour_gradient(
limits = c(min_count, max_count),
breaks = round(seq(min_count, max_count, length.out = 5)),
labels = round(seq(min_count, max_count, length.out = 5)),
guide = "legend"
) +
new_scale_colour() +
geom_point(aes(colour = "Simulated means"),
data = df2, alpha = 0.4) +
geom_point(aes(colour = "Theoretical means"),
data = df3, alpha = 0.4) +
scale_colour_discrete(
name = ""
) +
scale_size_continuous(range = c(3, 7.5), guide = "none")
Created on 2021-04-22 by the reprex package (v1.0.0)
(P.S. sorry for reformatting your code, it just read more easily for myself this way)
My data looks like this:
df <- data.frame(Year = as.factor(c(rep(2015, 3), rep(2016, 3), rep(2017,3))),
Tax = as.factor(c(rep(c("A", "B", "C"), 3))),
Depth = as.factor(c(10, 30, 50, 20,30,50,10,30,40)),
values= c(0.5, 0.25, 0.25, 0.1, 0.4, 0.5, 0.2, 0.6, 0.2))
I want to plot it with gaps for missing data and individual axis labels.
library(ggplot2)
The scale argument of facet_wrap gives individual axes, but is not performing as desired, as missing data is not reflected.:
ggplot(df, aes(Depth, values, fill=Tax)) + geom_bar(stat="identity")+
facet_wrap(~Year, scale="free") +
coord_flip()
Without scales:
ggplot(df, aes(Depth, values, fill=Tax)) + geom_bar(stat="identity")+
facet_wrap(~Year) +
coord_flip()
The missing data is represented (which i want!), but it lacks axis labels (which i need).
is there anything i can do?
It looks like this can be done using the lemon package:
library(tidyverse)
library(lemon)
df <- data.frame(Year = as.factor(c(rep(2015, 3), rep(2016, 3), rep(2017,3))),
Tax = as.factor(c(rep(c("A", "B", "C"), 3))),
Depth = as.factor(c(10, 30, 50, 20,30,50,10,30,40)),
values= c(0.5, 0.25, 0.25, 0.1, 0.4, 0.5, 0.2, 0.6, 0.2))
ggplot(df, aes(Depth, values, fill=Tax)) + geom_bar(stat="identity")+
facet_rep_wrap(~Year,repeat.tick.labels = T) +
coord_flip()
I'm creating a visualization of missing data by slightly tweaking some of the code from the missmap function in the Amelia package. I want to draw borders around my rectangles, but I can't figure out a way to do that in ggplot2.
I found the function "borders()" but that appears to be related to map work. I also tried using geom_rect, but it seems like that would require me to specify mins and maxes. Geom_raster seems to be doing exactly what I need, but I can't figure out how to specify borders.
This example code creates the visualization that I'm imagining, but I have more variables in the "real" version and I'd like to be able to outline each variable (var1, var2, etc.) with a line (border).
#Dataset
missmap_data_test <- data.frame(var1 = c(11, 26, NA, NA, 15),
var2 = c(NA, NA, 0, NA, 1))
#Create Function
ggplot_missing <-
function(x){
x %>%
is.na %>%
melt %>%
ggplot(data = .,
aes(x = Var2,
y = Var1)) +
geom_raster(aes(fill = value)) +
scale_fill_grey(name = "",
labels = c("Present","Missing")) +
theme_minimal() +
theme(axis.text.x = element_text(angle=90, hjust=1)) +
labs(x = "Variables in Dataset",
y = "Observations")
}
#Feed the function my new data
ggplot_missing(missmap_data_test)
As #Axeman suggests, geom_tile does the job. I've updated your code to give an example below. Here, colour defines the colour of the border, while size define the thickness.
#Dataset
missmap_data_test <- data.frame(var1 = c(11, 26, NA, NA, 15),
var2 = c(NA, NA, 0, NA, 1))
# Load libraries
library(dplyr)
library(ggplot2)
library(reshape2)
#Create Function
ggplot_missing <- function(x){
x %>%
is.na %>%
melt %>%
ggplot(data = .,
aes(x = Var2,
y = Var1)) +
geom_tile(aes(fill = value), colour = "#FF3300", size = 2) +
scale_fill_grey(name = "",
labels = c("Present","Missing")) +
theme_minimal() +
theme(axis.text.x = element_text(angle=90, hjust=1)) +
labs(x = "Variables in Dataset",
y = "Observations")
}
#Feed the function my new data
ggplot_missing(missmap_data_test)
Created on 2019-05-30 by the reprex package (v0.3.0)
If you're getting notches in the top left corner (discussed here and apparent in the plot above), you may want to update to the development version of ggplot2. That is, devtools::install_github("tidyverse/ggplot2"). For example, compare the plot above with the plot below:
Update
I assume this is a toy example, so I've tried to come up with a generic solution. Here, I've created a function called boxy that will make a data frame for geom_rect based on the original data frame.
#Dataset
missmap_data_test <- data.frame(var1 = c(11, 26, NA, NA, 15),
var2 = c(NA, NA, 0, NA, 1))
# Function for making box data frame
boxy <- function(df){
data.frame(xmin = seq(0.5, ncol(df) - 0.5),
xmax = seq(1.5, ncol(df) + 0.5),
ymin = 0.5, ymax = nrow(df) + 0.5)
}
# Load libraries
library(dplyr)
library(ggplot2)
library(reshape2)
#Create Function
ggplot_missing <- function(x){
df_box <- boxy(x)
df_rast <- x %>% is.na %>% melt
ggplot() +
geom_raster(data = df_rast,
aes(x = Var2,
y = Var1,
fill = value)) +
geom_rect(data = df_box,
aes(xmin = xmin, xmax = xmax,
ymin = ymin, ymax = ymax),
colour = "#FF3300", fill = NA, size = 3) +
scale_fill_grey(name = "",
labels = c("Present","Missing")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(x = "Variables in Dataset",
y = "Observations")
}
#Feed the function my new data
ggplot_missing(missmap_data_test)
Created on 2019-05-30 by the reprex package (v0.3.0)
If you add a third variable (i.e., column) to your data frame, you get something like this:
What I have here are two graphs "PlotA" and "PlotB", however I want a combined graph with geom_pointranges showing points, geom_line showing the line and geom_ribbon showing the standard deviation.
water <- c(35,40,42,46,48,50)
depth <- c(1,2,3,4,5,6)
sd <- c(10,10,10,10,10,10)
dataA <- data.frame(depth, water, sd)
from <- c(0.5, 1.5, 2.5, 3.5, 4.5, 5.5)
to <- c(1.5, 2.5, 3.5, 4.5, 5.5, 6.5)
depth1 <- c(1,2,3,4,5,6)
water1 <- c(40,32,50,55,62,30)
dataB <- data.frame(from,to,depth1, water1)
# Load necessary packages
require(ggplot2)
# Plotting Started
#PlotA
ggplot(data=dataA, aes(x = water, y = depth), na.rm=T) +
geom_path(size=0.4, color="black")+
geom_pointrange(data=dataB, aes(water1, depth1, ymin=from, ymax=to), size=0.1, color='black') +
scale_y_reverse(lim = c(10,0), breaks = seq(0,10,1)) +
theme_bw(12) +
scale_x_continuous(lim =c(0,100), breaks = seq(0,100,20))
#PlotB
ggplot() + geom_ribbon(data=dataA, aes(x=depth, y=water, ymin = water - sd, ymax = water + sd), alpha=0.3, fill='grey12') + coord_flip() +
scale_x_reverse(lim = c(10,0), breaks = seq(0,10,1)) + theme_bw(12) +
scale_y_continuous(lim =c(0,100), breaks = seq(0,100,20))
coord_flip is difficult to use well in the middle of a plot. I strongly recommend debugging plots without it and then adding it as the last step.
I think this is what you're looking for. If not, please describe your desired result in more detail.
ggplot(data = dataA, aes(x = depth, y = water)) +
geom_ribbon(
data = dataA,
aes(
x = depth,
ymin = water - sd,
ymax = water + sd
),
alpha = 0.3,
fill = 'grey12'
) +
geom_path(size = 0.4, color = "black") +
geom_point(
data = dataB,
aes(x = depth1, y = water1),
size = 0.1,
color = 'black'
) +
geom_errorbarh(
data = dataB,
aes(
x = depth1,
xmin = from,
xmax = to,
y = water1
),
size = 0.1,
height = 0
) +
theme_bw(12) +
scale_x_reverse(lim = c(10, 0), breaks = seq(0, 10, 1)) +
scale_y_continuous(lim = c(0, 100), breaks = seq(0, 100, 20)) +
coord_flip()
I have a data frame that looks like this:
genotype DIV3 DIV4 ...
WT 12.4 15.2
WT 35.4 35.3
HET 1.3 1.2
HET 1.5 5.2
I calculate the means and sd by the following functions:
means = aggregate(. ~ genotype, data=dat, FUN=mean)
errors = aggregate(. ~ genotype, data=dat, FUN=sd)
I am using ggplot2 to plot the means as a scatter plot. I want to use the errors dataframe for error bars, but I am having trouble calculating ymin and ymax since I have two dataframes.
Is there a better way to do this?
EDIT:
ggplot2 code:
x = melt(means)
ggplot(x_melt, aes(group=genotype, variable, value, col=genotype, shape = genotype)) +
geom_line() +
geom_point(size=3)+
theme(axis.text=element_text(size=14),
axis.title.x=element_blank(),
axis.text.x=element_text(angle = 45, vjust = 0.8, hjust = .9, color = "black"),
axis.text.y=element_text(color="black"))
You can do this either by creating a single dataset from the aggregate step and then reshape it before plotting.
dat2 <- do.call(`data.frame`,
aggregate(. ~genotype, dat, FUN= function(x) c(Mean=mean(x), SD=sd(x))))
nm1 <- unique(gsub("\\..*", "", colnames(dat2)[-1]))
datN <- reshape(dat2, direction="long", idvar="genotype",
varying=list(c(2,4), c(3,5)),sep=".")
datN$time <- nm1[datN$time]
colnames(datN)[3:4] <- c("Mean", "SD")
library(ggplot2)
ggplot(datN, aes(group=genotype, time, Mean, col=genotype,
shape=genotype))+
geom_line()+
geom_point(size=3)+
geom_errorbar(aes(ymin=Mean-SD, ymax=Mean+SD), width=0.1)+
theme(axis.text=element_text(size=14),
axis.title.x=element_blank(),
axis.text.x=element_text(angle = 45, vjust = 0.8, hjust = .9, color = "black"),
axis.text.y=element_text(color="black"))
Or you can merge the melted datasets means and errors
library(reshape2)
x_melt <- melt(means, value.name="Mean")
y_melt <- melt(errors, value.name="SD")
datN1 <- merge(x_melt, y_melt)
ggplot(datN1, aes(group=genotype, variable, Mean, col=genotype,
shape=genotype))+
geom_line()+
geom_point(size=3)+
geom_errorbar(aes(ymin=Mean-SD, ymax=Mean+SD), width=0.1)+
theme(axis.text=element_text(size=14),
axis.title.x=element_blank(),
axis.text.x=element_text(angle = 45, vjust = 0.8, hjust = .9, color = "black"),
axis.text.y=element_text(color="black"))
data
dat <- structure(list(genotype = c("WT", "WT", "HET", "HET"), DIV3 = c(12.4,
35.4, 1.3, 1.5), DIV4 = c(15.2, 35.3, 1.2, 5.2)), .Names = c("genotype",
"DIV3", "DIV4"), class = "data.frame", row.names = c(NA, -4L))