Related
I have data with large degrees of separation between "clusters/groups" of values that I hope to make a histogram with, but dividing the bins into equal sized groups has been difficult. I'd like for zero (0) to have it's own bin, the total number of equally spaced bins be < 8 (ideally, to avoid crowding the plot) with an extra empty bin for "..." signifying the large gaps in-between the data values. The actual dataset has 800+ zeros with maybe 5% data >0. Naturally the zeros will over-shadow the rest of the data, but a log transform will fix that. I just can't figure out the best way to break-up the data...
Data looks like this:
set.seed(123)
zero <- runif(50, min=0, max=0)
small <- runif(7, min=0, max=0.1)
medium <- runif(5, min=0, max=0.5)
high <- runif(3, min=1.5, max=2.5)
f <- function(x){
return(data.frame(ID=deparse(substitute(x)), value=x))
}
all <- bind_rows(f(zero), f(small), f(medium), f(high))
all <- as.data.frame(all[,-1])
names(all)[1] <- "value"
My attempt:
bins <- all %>% mutate(bin = cut(all$value, breaks = c(0, seq(0.01:0.4), Inf), right = FALSE)) %>%
count(bin, name = "freq") %>%
add_row(bin = "...", freq = NA_integer_) %>%
mutate(bin = fct_relevel(bin, "...", after = 0.4))
But I get this error:
Error in `mutate()`:
! Problem while computing `bin = fct_relevel(bin, "...", after = 0.5)`.
Caused by error:
! `idx` must contain one integer for each level of `f`
This is not equally spaced, but I'm looking for something like this as labels for my plot:
levels(bins$bin) <- c("0", "0.01-0.05", "0.05-0.1", "0.1-0.2", "0.2-0.3", "0.3-0.4", "...", "2.0+")
ggplot(bins, aes(x = bin, y = freq, fill = bin)) +
geom_histogram(stat = "identity", colour = "black")
You can use cut directly inside ggplot
ggplot(all, aes(cut(value, breaks = c(0, 0.25, 0.5, 3), inc = TRUE))) +
geom_bar() +
scale_y_log10() +
labs(x = "value")
This worked for me (using my own data):
bins <- WET %>% mutate(bin = cut(den, breaks = c(0, seq(0.001, 0.225, 0.15), 0.255, 0.3, Inf), right = FALSE)) %>%
count(bin, name = "freq") %>% # build frequency table, frequency = freq
add_row(bin = "...", freq = NA_integer_) %>% # add empty row for NA
mutate(bin = fct_relevel(bin, "...", after = 3)) # Put factor level "..." after 3! (the 3rd position)
levels(bins$bin) <- c("0", "0.001-0.15", "0.15-0.255", "...", "0.3+")
# fct_relevel(f, "a", after = 2), "..., after = x, x must be an integer! (2nd position)
ggplot(bins, aes(x = bin, y = freq, fill = bin)) +
geom_bar(stat = "identity", colour = "black") +
geom_text(aes(label = freq), vjust = -0.5) +
scale_y_continuous(limits = c(0, 800), expand = expansion(mult = c(0, 0.05))) +
scale_fill_brewer(name = "Density", palette="Greys", breaks = c("0", "0.001-0.15", "0.15-0.255", "0.3+")) +
# Only show these legend values (exclude "...")
labs(title = "Wet seasons - Pink shrimp density (no./m2)",x = "Density range", y = "Frequency") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text = element_text(size = 9, face = "bold")) +
theme(axis.title = element_text(size = 13, face = "bold")) + # Axis titles
theme(axis.title.x = element_text(vjust = -3)) +
theme(panel.border = element_rect(color = "black", fill = NA, size = 1)) +
# Adjust distance of x-axis title from plot
theme(plot.margin = margin(t = 20, # Top margin
r = 50, # Right margin
b = 40, # Bottom margin
l = 10)) # Left margin
I am using ggplot/usmap libararies to plot highly skewed data onto a map.
Because the data is so skewed, I created uneven interval brackets. See below;
My Code:
library(dplyr)
library(tidyverse)
library(usmap)
library(ggplot2)
library(readxl)
library(rgdal)
plot_usmap(regions = "states",
# fill = 'orange',
labels = TRUE) +
geom_point(data = grant_sh,
size = 5,
aes(x = x,
y = y,
color = funding_cat)) +
theme(
legend.title = element_text(size = 16),
#change legend title font size
legend.text = element_text(size = 14),
#change legend text font size
legend.position = 'left',
plot.title = element_text(size = 22),
plot.subtitle = element_text(size = 16)
) + #+
scale_color_manual(
values = c('#D4148C', # pink muesaum
'#049CFC', #library,blue
'#1C8474',
'#7703fC'),
name = "Map Key",
labels = c(
'$1,500 - $4,000 (n = 7)',
'$4,001 - $6,000 (n = 12)',
'$6,001 - $20,000 (n = 6)',
'$20,001 - $40,000 (n = 25)'
)
) +
guides(colour = guide_legend(override.aes = list(size = 3)))
Current Output
Desired Output
I would like to adjust the legend key to reflect the size of each interval. So, for example 1500-400 would be the smallest icon, and 20,001-40,000 would be the largest.
I want to do this so that the viewer immediately knows that the intervals are not even. Any solution to achieve this outcome is greatly appreciated!
See how the sign/oval next to each interval represents the range of the interval in my example below.
One option to create this kind of legend would be to make it as a second plot and glue it to the main plot using e.g. patchwork.
Note: Especially with a map as the main plot and the export size if any, this approach requires some fiddling to position the legend, e.g. in my code below a added a helper row to the patchwork design to shift the legend upwards.
UPDATE: Update the code to include the counts in the labels. Added a second approach to make the legend using geom_col and a separate dataframe.
library(dplyr, warn = FALSE)
library(usmap)
library(ggplot2)
library(patchwork)
# Make example data
set.seed(123)
cat1 <- c(1500, 4001, 6001, 20001)
cat2 <- c(4000, 6000, 2000, 40000)
n = c(7, 12, 6, 25)
funding_cat <- paste0("$", cat1, " - $", cat2, " (n=", n, ")")
funding_cat <- factor(funding_cat, levels = rev(funding_cat))
grant_sh <- utils::read.csv(system.file("extdata", "us_states_centroids.csv", package = "usmapdata"))
grant_sh$funding_cat = sample(funding_cat, 51, replace = TRUE, prob = n / sum(n))
# Make legend plot
grant_sh_legend <- data.frame(
funding_cat = funding_cat,
n = c(7, 12, 6, 25)
)
legend <- ggplot(grant_sh, aes(y = funding_cat, fill = funding_cat)) +
geom_bar(width = .6) +
scale_y_discrete(position = "right") +
scale_fill_manual(
values = c('#D4148C',
'#049CFC',
'#1C8474',
'#7703fC')
) +
theme_void() +
theme(axis.text.y = element_text(hjust = 0),
plot.title = element_text(size = rel(1))) +
guides(fill = "none") +
labs(title = "Map Key")
map <- plot_usmap(regions = "states",
labels = TRUE) +
geom_point(data = grant_sh,
size = 5,
aes(x = x,
y = y,
color = funding_cat)) +
theme(
legend.position = 'none',
plot.title = element_text(size = 22),
plot.subtitle = element_text(size = 16)
) + #+
scale_color_manual(
values = c('#D4148C', # pink muesaum
'#049CFC', #library,blue
'#1C8474',
'#7703fC'),
name = "Map Key",
labels = c(
'$1,500 - $4,000 (n = 7)',
'$4,001 - $6,000 (n = 12)',
'$6,001 - $20,000 (n = 6)',
'$20,001 - $40,000 (n = 25)'
)
) +
guides(colour = guide_legend(override.aes = list(size = 3)))
# Glue together
design <- "
#B
AB
#B
"
legend + map + plot_layout(design = design, heights = c(5, 1, 1), widths = c(1, 10))
Using geom_bar the counts are computed from your dataset grant_sh. A second option would be to compute the counts manually or use a manually created dataframe and then use geom_col for the legend plot:
grant_sh_legend <- data.frame(
funding_cat = funding_cat,
n = c(7, 12, 6, 25)
)
legend <- ggplot(grant_sh, aes(y = funding_cat, n = n, fill = funding_cat)) +
geom_col(width = .6) +
scale_y_discrete(position = "right") +
scale_fill_manual(
values = c('#D4148C',
'#049CFC',
'#1C8474',
'#7703fC')
) +
theme_void() +
theme(axis.text.y = element_text(hjust = 0),
plot.title = element_text(size = rel(1))) +
guides(fill = "none") +
labs(title = "Map Key")
I am trying to create a function on Rthat creates and saves a dotplot with facets on my wd. Code for the function below:
get_dotplot <- function(df, xvalue, avgvalue, sdvalue, svalue, gvalue, main, xaxis, yaxis, glegend, figure_title)
{
dp <- ggplot(df, aes(x = xvalue, y = avgvalue, color = gvalue)) +
geom_point(stat = 'identity', aes(shape=svalue, color=gvalue))+
geom_errorbar(aes(ymin=avgvalue-sdvalue, ymax=avgvalue+sdvalue))+
facet_grid(cols = vars(svalue), scales = "fixed")+
labs(x = xaxis, y = yaxis, title = main, color=glegend)+
theme(axis.title.x.bottom = element_text(hjust = 0.5, vjust = 1),
axis.title.y = element_text(hjust = 0.5, vjust = 1),
axis.ticks.x = element_line(),
axis.text.x = element_text(angle = 0, hjust = 1, vjust = 0.5, size = 7),
axis.ticks.x.bottom = element_line(colour = "grey", size = (0.5)),
axis.ticks.y.left = element_line(colour = "black", size = (0.4)),
panel.background=element_rect(colour = "black", size = 0.5, fill=NA),
panel.grid = element_blank())
print(dp)
ggsave(paste(figure_title, "png", sep = "."), plot = dp, scale = 1, dpi = 600)
}
get_dotplot(df, xvalue, avgvalue, sdvalue, svalue, gvalue, main, xaxis, yaxis, glegend, figure_title)
However, I always get this error message:
Error in `combine_vars()`:
! At least one layer must contain all faceting variables: `svalue`.
* Plot is missing `svalue`
* Layer 1 is missing `svalue`
* Layer 2 is missing `svalue`
Backtrace:
1. global get_dotplot_errorbar_yaxis(...)
3. ggplot2:::print.ggplot(dp)
5. ggplot2:::ggplot_build.ggplot(x)
6. layout$setup(data, plot$data, plot$plot_env)
7. ggplot2 f(..., self = self)
8. self$facet$compute_layout(data, self$facet_params)
9. ggplot2 f(...)
10. ggplot2::combine_vars(data, params$plot_env, cols, drop = params$drop)
I suspect it's because of the facetting so I played around between facet_wrap() and facet_grid() with no result. Could someone please help me with that ?
I checked and I have the svalue variable in my dataframe, and it is spelled correctly. I also consulted previous questions about the topic but they were not helpful.
the dataset looks something like this, but with a larger number of individuals and numbers of days:
set.seed(108)
n <- 1:12
treatment <- factor(paste("trt", 1:2))
individuals <- sample(LETTERS, 2)
days <- c("12", "20", "25")
avg_var1 <- sample(1:100, 12)
sd_var1 <- sample(1:50, 12)
avg_var2 <- sample(1:100, 12)
sd_var2 <- sample(1:50, 12)
avg_var3 <- sample(1:100, 12)
sd_var3 <- sample(1:50, 12)
test <- data.frame(n, treatment, individuals, days,avg_var1, sd_var1, avg_var2, sd_var2, avg_var3, sd_var3)
I define the variables for the function as follows on R:
df=test
xvalue=test$days
avgvalue=test$avg_var1
sdvalue = test$sd_var1
svalue=test$treatment
gvalue=test$individuals
main= "var1 in function of days"
xaxis="days"
yaxis="var1"
glegend="individuals"
figure_title ="var1_days"
As written, your code passes columns into the function repeating the data in the dataframe. This doesn't seem to "play nicely" with the non-standard evaluation used in ggplot. Essentially ggplot is looking for a column in df called "svalue" to use for faceting (it doesn't find it). Once this has been fixed, the same sort of problem occurs with the error bars.
One way round this is to just pass in the column names, and use aes_string for the variables. This doesn't work for the faceting or the calculated values, so those are calculated at the start of the function. This would give:
get_dotplot <- function(df, xvalue, avgvalue, sdvalue, svalue, gvalue, main, xaxis, yaxis, glegend, figure_title)
{
df$ymin <- df[[avgvalue]] - df[[sdvalue]]
df$ymax <- df[[avgvalue]] + df[[sdvalue]]
df$facets <- df[[svalue]]
dp <- ggplot(df, aes_string(x = xvalue, y = avgvalue, color = gvalue)) +
geom_point(stat = 'identity', aes_string(shape=svalue, color=gvalue)) +
geom_errorbar(aes(ymin=ymin, ymax=ymax))+
facet_grid(cols = vars(facets), scales = "fixed")+
labs(x = xaxis, y = yaxis, title = main, color=glegend)+
theme(axis.title.x.bottom = element_text(hjust = 0.5, vjust = 1),
axis.title.y = element_text(hjust = 0.5, vjust = 1),
axis.ticks.x = element_line(),
axis.text.x = element_text(angle = 0, hjust = 1, vjust = 0.5, size = 7),
axis.ticks.x.bottom = element_line(colour = "grey", size = (0.5)),
axis.ticks.y.left = element_line(colour = "black", size = (0.4)),
panel.background=element_rect(colour = "black", size = 0.5, fill=NA),
panel.grid = element_blank())
print(dp)
ggsave(paste(figure_title, "png", sep = "."), plot = dp, scale = 1, dpi = 600)
}
get_dotplot(df=test,
xvalue="days",
avgvalue="avg_var1",
sdvalue = "sd_var1",
svalue="treatment",
gvalue="individuals",
main= "var1 in function of days",
xaxis="days",
yaxis="var1",
glegend="individuals",
figure_title ="var1_days")
I generated a heatmap with ggplot, and order the samples by using hclust, However, I still need more reordering to get all the similar values corespondent with one of the samples in the ordered cluster. Here I generate a samples data to explain better.
set.seed(99)
M <- data.frame(names = paste0("g", seq(1,30)), S1 = runif(30, 0 , 8), S2 = runif(30, -4, 5), S3 = runif(30, -5, 5))
M.mat <- M %>%
tibble::column_to_rownames('names') %>%
as.matrix()
M.dendro <- as.dendrogram(hclust(d = dist(x = M.mat)))
dendro.plot <- ggdendrogram(data = M.dendro, rotate = TRUE) +
theme(axis.text.y = element_text(size = 6))
print(dendro.plot)
str(M.dendro)
dend.order <- order.dendrogram(M.dendro)
df <- melt(M, id.vars = "names")
df$names <- factor(x = df$names,
levels = M$names[dend.order],
ordered = TRUE)
ggplot(df, aes(x = names, y = variable, fill = value)) +
geom_tile(color = "black") +
scale_fill_gradient2(low = muted("steelblue"), mid = "white", high = muted("red3"),
midpoint = 0, space = "Lab", na.value = "grey50",
guide = "colourbar", aesthetics = "fill"
) +
theme(axis.text.x = element_text(angle = 90, hjust=1), legend.key.size = unit(0.4, "cm")) +
coord_fixed()
For the generated heatmap, I need reorder it such that all the dark blue be on the bottom, the middle color and then the red on the top based on samples S3. Thank you
This question already has an answer here:
passing unquoted variables to curly curly {{}} ggplot function
(1 answer)
Closed 2 years ago.
I wrote a code to create a plot
#Create a plot mean Calories in each Category
p<-mean_table %>%
ggplot(aes(mean.Calories,reorder(Category, mean.Calories))) +
geom_col(aes(fill = mean.Calories)) +
scale_fill_gradient2(low = "forestgreen",
high = 'firebrick1',
mid = "gold", midpoint = 283.8947)+
scale_x_continuous(breaks = seq(0, 600, length.out = 7),
limits = c(0, 600),
labels = seq(0, 600, length.out = 7))+
xlab("Calories")+
ylab("Category")+
ggtitle("Mean of Calories in each Category")+
theme_minimal()
#Theme for plots
theme<-theme(axis.title.x=element_text(size=16),
axis.title.y = element_text(size = 16),
axis.text.x = element_text(size = 12, color = "black"),
axis.text.y = element_text(size = 12, color = "black"),
panel.background = element_rect(color = "black"),
plot.title = element_text(size = 20,hjust = 0.5),
legend.position = "none")
#Calories+theme
p+theme
It works well. We can see this plot
But I'd like to write a function because I need several similar graphs.
It's my variant:
p3<-function(data, x_data, breaks, xlab, title){
my_plot<-data %>%
ggplot(aes(x_data,reorder(Category, x_data))) +
geom_col(aes(fill = x_data)) +
scale_fill_gradient2(low = "forestgreen",
high = 'firebrick1',
mid = "gold", midpoint = median(data$x_data))+
scale_x_continuous(breaks = round(seq(0, breaks, length.out = 7)),
limits = c(0, breaks),
labels = round(seq(0, breaks, length.out = 7)))+
xlab(xlab)+
ylab("Category")+
ggtitle(title)+
theme_minimal()
return(my_plot)
}
p3(mean_table, mean_table$mean.Cholesterol, 155, "Gram", "Mean of Fat in each Cholesterol")
I use data.frame like this(it's small version):
mean_table<-data.frame(Category=c("Beef & Pork","Beverages","Breakfast"), mean.Cholesterol=c(87.3333333, 0.5555556, 152.8571429))
Error:Elements must equal the number of rows or 1
Run rlang::last_error() to see where the error occurred.
1: Unknown or uninitialised column: x_data.
What is wrong with my function?
You need to use non-standard evaluation (NSE) while passing the column names to the function.
There are two ways in which you can pass the column names, unquoted and quoted column names. In this answer, we pass unquoted column names. We use curly-curly ({{}}) operator.
library(ggplot2)
library(rlang)
p3<-function(data, x_data, breaks, xlab, title){
my_plot<-data %>%
ggplot(aes({{x_data}},reorder(Category, {{x_data}}))) +
geom_col(aes(fill = {{x_data}})) +
scale_fill_gradient2(low = "forestgreen",
high = 'firebrick1',
mid = "gold",
midpoint = median(data %>% pull({{x_data}}))) +
scale_x_continuous(breaks = round(seq(0, breaks, length.out = 7)),
limits = c(0, breaks),
labels = round(seq(0, breaks, length.out = 7)))+
xlab(xlab)+
ylab("Category")+
ggtitle(title) +
theme_minimal()
return(my_plot)
}
and call p3 as :
p3(mean_table, mean.Cholesterol, 155, "Gram", "Mean of Fat in each Cholesterol")
If we want to pass column names as quoted variables as "mean.Cholesterol" we can use sym and !! to evaluate it.