I am trying to generate a heatmap via ggplot using the following code I found online (https://simplystatistics.org/2019/08/28/you-can-replicate-almost-any-plot-with-ggplot2/):
library(dslabs)
data(us_contagious_diseases)
the_disease <- "Measles"
dat <- us_contagious_diseases %>%
filter(!state%in%c("Hawaii","Alaska") & disease == the_disease) %>%
mutate(rate = count / population * 10000 * 52 / weeks_reporting)
jet.colors <- colorRampPalette(c("#F0FFFF", "cyan", "#007FFF", "yellow", "#FFBF00", "orange", "red", "#7F0000"), bias = 2.25)
dat %>% mutate(state = reorder(state, desc(state))) %>%
ggplot(aes(year, state, fill = rate)) +
geom_tile(color = "white", size = 0.35) +
scale_x_continuous(expand = c(0,0)) +
scale_fill_gradientn(colors = jet.colors(16), na.value = 'white') +
geom_vline(xintercept = 1963, col = "black") +
theme_minimal() +
theme(panel.grid = element_blank()) +
coord_cartesian(clip = 'off') +
ggtitle(the_disease) +
ylab("") +
xlab("") +
theme(legend.position = "bottom", text = element_text(size = 8)) +
annotate(geom = "text", x = 1963, y = 50.5, label = "Vaccine introduced", size = 3, hjust = 0)
Result:
This all works fine but in my dataset I have 320 units instead of 50. I have created a similar scenario using the data from the example above:
library(dslabs)
data(us_contagious_diseases)
the_disease <- "Measles"
dat <- us_contagious_diseases %>%
filter(!state%in%c("Hawaii","Alaska") & disease == the_disease) %>%
mutate(rate = count / population * 10000 * 52 / weeks_reporting)
dat1 <- dat
levels(dat1$state) <- c("State1_1","State1_2","State1_3","State1_4","State1_5","State1_6","State1_7","State1_8","State1_9","State1_10",
"State1_11","State1_12","State1_13","State1_14","State1_15","State1_16","State1_17","State1_18","State1_19","State1_20",
"State1_21","State1_22","State1_23","State1_24","State1_25","State1_26","State1_27","State1_28","State1_29","State1_30",
"State1_31","State1_32","State1_33","State1_34","State1_35","State1_36","State1_37","State1_38","State1_39","State1_40",
"State1_41","State1_42","State1_43","State1_44","State1_45","State1_46","State1_47","State1_48","State1_49","State1_50","State1_51")
dat2 <- dat
levels(dat2$state) <- c("State2_1","State2_2","State2_3","State2_4","State2_5","State2_6","State2_7","State2_8","State2_9","State2_10",
"State2_11","State2_12","State2_13","State2_14","State2_15","State2_16","State2_17","State2_18","State2_19","State2_20",
"State2_21","State2_22","State2_23","State2_24","State2_25","State2_26","State2_27","State2_28","State2_29","State2_30",
"State2_31","State2_32","State2_33","State2_34","State2_35","State2_36","State2_37","State2_38","State2_39","State2_40",
"State2_41","State2_42","State2_43","State2_44","State2_45","State2_46","State2_47","State2_48","State2_49","State2_50","State2_51")
dat3 <- dat
levels(dat3$state) <- c("State3_1","State3_2","State3_3","State3_4","State3_5","State3_6","State3_7","State3_8","State3_9","State3_10",
"State3_11","State3_12","State3_13","State3_14","State3_15","State3_16","State3_17","State3_18","State3_19","State3_20",
"State3_21","State3_22","State3_23","State3_24","State3_25","State3_26","State3_27","State3_28","State3_29","State3_30",
"State3_31","State3_32","State3_33","State3_34","State3_35","State3_36","State3_37","State3_38","State3_39","State3_40",
"State3_41","State3_42","State3_43","State3_44","State3_45","State3_46","State3_47","State3_48","State3_49","State3_50","State3_51")
dat4 <- dat
levels(dat4$state) <- c("State4_1","State4_2","State4_3","State4_4","State4_5","State4_6","State4_7","State4_8","State4_9","State4_10",
"State4_11","State4_12","State4_13","State4_14","State4_15","State4_16","State4_17","State4_18","State4_19","State4_20",
"State4_21","State4_22","State4_23","State4_24","State4_25","State4_26","State4_27","State4_28","State4_29","State4_30",
"State4_31","State4_32","State4_33","State4_34","State4_35","State4_36","State4_37","State4_38","State4_39","State4_40",
"State4_41","State4_42","State4_43","State4_44","State4_45","State4_46","State4_47","State4_48","State4_49","State4_50","State4_51")
dat5 <- dat
levels(dat5$state) <- c("State5_1","State5_2","State5_3","State5_4","State5_5","State5_6","State5_7","State5_8","State5_9","State5_10",
"State5_11","State5_12","State5_13","State5_14","State5_15","State5_16","State5_17","State5_18","State5_19","State5_20",
"State5_21","State5_22","State5_23","State5_24","State5_25","State5_26","State5_27","State5_28","State5_29","State5_30",
"State5_31","State5_32","State5_33","State5_34","State5_35","State5_36","State5_37","State5_38","State5_39","State5_40",
"State5_41","State5_42","State5_43","State5_44","State5_45","State5_46","State5_47","State5_48","State5_49","State5_50","State5_51")
dat <- rbind(dat,dat1,dat2,dat3,dat4,dat5)
jet.colors <- colorRampPalette(c("#F0FFFF", "cyan", "#007FFF", "yellow", "#FFBF00", "orange", "red", "#7F0000"), bias = 2.25)
dat %>% mutate(state = reorder(state, desc(state))) %>%
ggplot(aes(year, state, fill = rate)) +
geom_tile(color = "white", size = 0.35) +
scale_x_continuous(expand = c(0,0)) +
scale_fill_gradientn(colors = jet.colors(16), na.value = 'white') +
geom_vline(xintercept = 1963, col = "black") +
theme_minimal() +
theme(panel.grid = element_blank()) +
coord_cartesian(clip = 'off') +
ggtitle(the_disease) +
ylab("") +
xlab("") +
theme(legend.position = "bottom", text = element_text(size = 8)) +
annotate(geom = "text", x = 1963, y = 50.5, label = "Vaccine introduced", size = 3, hjust = 0)
Result:
In this case, the heatmap is too long and no longer can be seen on the screen without scrolling down. Any ideas on how to fit all these 320 units on the screen without making the squares too small?
There is a practical limit to how useful this is, based on screen (or paper size). It looks like you are coming up against that limit.
You could rotate the plot, since screens tend to be wider than they are high, but that upsets the obvious left-to-right chronology of your plot.
I think your only other realistic options would be to either split the plot up by some other variable, or summarise the categories on the y axis in some way, and hide most of the labels on the y axis. In the example you've created, you could group them as geographic regions rather than individual states e.g. East coast, West coast, South, etc. Whatever works for your real data. If all the labels on the y axis are important, then the plot has to be split up, because they are essentially unreadable as it currently stands.
I am using the windrose function posted here: Wind rose with ggplot (R)?
I need to have the percents on the figure showing on the individual lines (rather than on the left side), but so far I have not been able to figure out how. (see figure below for depiction of goal)
Here is the code that makes the figure:
p.windrose <- ggplot(data = data,
aes(x = dir.binned,y = (..count..)/sum(..count..),
fill = spd.binned)) +
geom_bar()+
scale_y_continuous(breaks = ybreaks.prct,labels=percent)+
ylab("")+
scale_x_discrete(drop = FALSE,
labels = waiver()) +
xlab("")+
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE)+
theme_bw(base_size = 12, base_family = "Helvetica")
I marked up the figure I have so far with what I am trying to do! It'd be neat if the labels either auto-picked the location with the least wind in that direction, or if it had a tag for the placement so that it could be changed.
I tried using geom_text, but I get an error saying that "aesthetics must be valid data columns".
Thanks for your help!
One of the things you could do is to make an extra data.frame that you use for the labels. Since the data isn't available from your question, I'll illustrate with mock data below:
library(ggplot2)
# Mock data
df <- data.frame(
x = 1:360,
y = runif(360, 0, 0.20)
)
labels <- data.frame(
x = 90,
y = scales::extended_breaks()(range(df$y))
)
ggplot(data = df,
aes(x = as.factor(x), y = y)) +
geom_point() +
geom_text(data = labels,
aes(label = scales::percent(y, 1))) +
scale_x_discrete(breaks = seq(0, 1, length.out = 9) * 360) +
coord_polar() +
theme(axis.ticks.y = element_blank(), # Disables default y-axis
axis.text.y = element_blank())
#teunbrand answer got me very close! I wanted to add the code I used to get everything just right in case anyone in the future has a similar problem.
# Create the labels:
x_location <- pi # x location of the labels
# Get the percentage
T_data <- data %>%
dplyr::group_by(dir.binned) %>%
dplyr::summarise(count= n()) %>%
dplyr::mutate(y = count/sum(count))
labels <- data.frame(x = x_location,
y = scales::extended_breaks()(range(T_data$y)))
# Create figure
p.windrose <- ggplot() +
geom_bar(data = data,
aes(x = dir.binned, y = (..count..)/sum(..count..),
fill = spd.binned))+
geom_text(data = labels,
aes(x=x, y=y, label = scales::percent(y, 1))) +
scale_y_continuous(breaks = waiver(),labels=NULL)+
scale_x_discrete(drop = FALSE,
labels = waiver()) +
ylab("")+xlab("")+
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE)+
theme_bw(base_size = 12, base_family = "Helvetica") +
theme(axis.ticks.y = element_blank(), # Disables default y-axis
axis.text.y = element_blank())
I want to create bubble grid charts with ggplot.
somthing like this:
I couldnt find any code or exampe online.
Thanks
Using geom_point with discrete x and y scales will get you started. Here's an example with some quick toy data:
library(tidyverse)
offenses <- c("robbery", "violence", "drugs")
actions <- c("formal", "informal", "considered")
counts <- sample(10:100, 9, replace = TRUE)
data <- expand.grid(offenses = offenses, actions = actions) %>% bind_cols(counts = counts)
ggplot(data,
aes(x = str_to_title(offenses),
y = str_to_title(actions),
colour = str_to_title(offenses),
size = counts)) +
geom_point() +
geom_text(aes(label = counts),
colour = "white",
size = 3) +
scale_x_discrete(position = "top") +
scale_size_continuous(range = c(10, 30)) + # Adjust as required.
scale_color_brewer(palette = "Set2") +
labs(x = NULL, y = NULL) +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid = element_blank(),
axis.ticks = element_blank())
Play around with the range parameter of scale_size_continouous as needed to get bubbles of a reasonable size for your data set.
Oh, I also tried meanwhile. It looks very similar ...
require(ggplot2)
require(ggsci) # for the scale_fill_npg()
grid.bubble.plot <- function(df,
axis_labels_size=16,
aspect_ratio=1/1,
values_text_size=3,
values_text_color="white",
x_axis_position="top", # or "bottom",
bubble_size_range=c(5, 24),
bubble_alpha=0.5,
bubble_shape=21,
bubble_edge_stroke=0) {
col_names <- colnames(df)
row_names <- rownames(df)
values <- as.vector(as.matrix(df))
values_x <- as.vector(sapply(col_names, function(i) rep(i, nrow(df))))
values_y <- as.vector(rep(row_names, dim(df)[2]))
res_df <- data.frame(values = values, values_x = values_x, values_y)
gg <- ggplot(res_df, aes(x=values_x, y=values_y, size = values, fill=factor(values_x))) +
geom_point(alpha=bubble_alpha, shape=bubble_shape, stroke=bubble_edge_stroke) +
scale_size(range = bubble_size_range) +
scale_fill_npg() +
scale_x_discrete(position = x_axis_position) +
geom_text(aes(label=values), size=values_text_size, color=values_text_color) +
theme(line=element_blank(),
panel.background=element_blank(),
legend.position="none",
axis.title=element_blank(),
axis.text=element_text(size=axis_labels_size),
aspect.ratio=aspect_ratio)
gg
}
grid.bubble.plot(df)
Play around with the values.
e.g. you can make also the text size vary with the values:
# thanks to #MSR for example dataset
require(tidyverse)
offenses <- c("robbery", "violence", "drugs")
actions <- c("formal", "informal", "considered")
counts <- sample(10:100, 9, replace = TRUE)
df <- expand.grid(offenses = offenses, actions = actions) %>% bind_cols(counts = counts)
grid.bubble.plot(df, values_text_size=as.vector(as.matrix(df)))
For color, you can try other scale_fill_ variants like:
_aaas() _lancet() _jco() _tron()
Below is some R code that generates a bar plot using ggplot, where the bars go off to the left and right, centered at x = 0. I would like to take the text on the y axis (the stage names), and place them in-between the left and the right bars. Here is the R code creating the graph:
library(dplyr)
libary(ggplot2)
# Read data
email_campaign_funnel <- read.csv("https://raw.githubusercontent.com/selva86/datasets/master/email_campaign_funnel.csv")
# X Axis Breaks and Labels
brks <- seq(-15000000, 15000000, 5000000)
lbls = paste0(as.character(c(seq(15, 0, -5), seq(5, 15, 5))), "m")
# Shorten Names
email_campaign_funnel <- email_campaign_funnel %>%
dplyr::mutate(Stage = gsub('Stage ', '', Stage)) %>%
dplyr::mutate(Stage = gsub(' Page', '', Stage)) %>%
dplyr::mutate(Stage = gsub('Campaign-', '', Stage))
# Plot
ggplot(email_campaign_funnel, aes(x = Stage, y = Users, fill = Gender)) + # Fill column
geom_bar(stat = "identity", width = .6) + # draw the bars
scale_y_continuous(breaks = brks, # Breaks
labels = lbls) + # Labels
coord_flip() + # Flip axes
labs(title="Email Campaign Funnel") +
theme(plot.title = element_text(hjust = .5),
axis.ticks = element_blank()) + # Centre plot title
scale_fill_brewer(palette = "Dark2") # Color palette
Below is a screenshot of a different graph that highlights sort of how I'd like the text to be split in between the bars (I prefer the vertical style of the ggplot() graph more so than the horizontal nature of the imaged graph below).
Any ideas on how to do this in R would be greatly appreciated, thanks!
How about something like this using ggarrange from the ggpubr package:
gg1 <- email_campaign_funnel %>%
mutate(Users = if_else(Gender == "Male", Users, 0)) %>%
ggplot(aes(Stage, Users, fill = Gender)) +
geom_col(width = 0.6) +
scale_y_continuous(breaks = brks, labels = lbls) +
coord_flip() +
labs(title="Email Campaign Funnel") +
theme_minimal() +
scale_fill_manual(values = c("Male" = "Red", "Female" = "Blue")) +
theme(
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())
gg2 <- email_campaign_funnel %>%
filter(Gender == "Male") %>%
ggplot(aes(Stage, 0, label = Stage)) +
geom_text() +
coord_flip() +
theme_void()
gg3 <- email_campaign_funnel %>%
mutate(Users = if_else(Gender == "Female", Users, 0)) %>%
ggplot(aes(Stage, Users, fill = Gender)) +
geom_col(width = 0.6) +
scale_y_continuous(breaks = brks, labels = lbls) +
coord_flip() +
labs(title="Email Campaign Funnel") +
theme_minimal() +
scale_fill_manual(values = c("Male" = "Red", "Female" = "Blue")) +
theme(
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())
library(ggpubr)
ggarrange(gg1, gg2, gg3, ncol = 3, common.legend = TRUE, align = "h")
Explanation: The idea is to build the plot separately from the left and right pyramid bar charts and the labels in the middle. We then use ggpubr::ggarrange to arrange all three ggplot2 plot objects in a single row and ensure that axes are properly aligned.
Split horizontal bar chart with labels in the middle
I was interested in seeing how close we can get to the horizontal pyramid bar chart plot you link to. Here is my attempt:
# Sample data
df <- read.table(text =
"Category Group Value
REB Red 39
REB Blue 35
OREB Red 8
OREB Blue 4
DREB Red 31
DREB Blue 31
AST Red 25
AST Blue 21
STL Red 5
STL Blue 5
BLK Red 1
BLK Blue 0
TOV Red 9
TOV Blue 11", header = T)
# Set factor order
df <- df %>% mutate(Category = factor(Category, unique(Category)))
# Build ggplot2 plot objects
library(tidyverse)
gg1 <- df %>%
filter(Group == "Red") %>%
ggplot(aes(Category, Value, fill = Group, label = Value)) +
geom_col() +
geom_text(colour = "red3", fontface = "bold", nudge_y = 10) +
theme_void() +
scale_fill_manual(values = c("Red" = "red3", "Blue" = "navyblue"), drop = FALSE) +
ylim(c(0, round(1.5 * max(df$Value))))
gg2 <- df %>%
filter(Group == "Red") %>%
ggplot(aes(Category, 0, label = Category)) +
geom_text(fontface = "bold") +
theme_void()
gg3 <- df %>%
filter(Group == "Blue") %>%
ggplot(aes(Category, -Value, fill = Group, label = Value)) +
geom_col() +
geom_text(colour = "navyblue", fontface = "bold", nudge_y = -10) +
theme_void() +
scale_fill_manual(values = c("Red" = "red3", "Blue" = "navyblue"), drop = FALSE) +
ylim(c(round(-1.5 * max(df$Value)), 0))
# Arrange plot objects in 1 column with horizontal scales aligned
library(ggpubr)
ggarrange(gg1, gg2, gg3, nrow = 3, common.legend = TRUE, align = "h", heights = c(1, 0.5, 1))
I'm having trouble with the sizing and aligning of one my plots using the plot_grid function in the cowplot package. The bottom left plot always seems to be a tad bit smaller then the others. I did some researching and couldn't seem to find anything that works. I'm new to R, so any help would be greatly appreciated! Thanks!
Attached is my code as well as what the plot is looking like and what I want it to look like
'#Data frame with huc results for each parameter
parameter_results <- readRDS("param_results_2014.RDS") %>% select(1:84)
#list of parameter names
parameters <- sort(readRDS("parameters.RDS"))
blank_theme <- theme_minimal()+ theme(
axis.title.x = element_blank(),
plot.margin = unit(c(0,0,0,0), "pt"),
axis.title.y = element_blank(),
panel.border = element_blank(),
legend.position=c(.5,.02),
legend.direction="horizontal",
legend.key=element_rect(colour="black",size=0.5,linetype="solid"),
panel.grid=element_blank(),
axis.ticks = element_blank(),
plot.title= element_text(size=8, vjust=-4.0, face="bold")
)
#Function for creating poroportions table for parameters
parameter_summary <-function(parameter) {
parameter_df <- parameter_results %>%
select(results = parameter) %>% #keep only column for the parameter you want to plot
filter(results != "Not Applicable") %>% #filters out 'not applicable' results
count(results) %>% #
mutate(prop = prop.table(n), perc = paste0(round(prop * 100),"%"))
return(parameter_df)
}
parameter_pie_chart <- function(parameter,title="",nudgex=5,nudgey=-10) {
# parameter: the parameter you want to create a pie chart for, example: 'DO'
# title: plot title, default is the name of the parameter
parameter_df <- parameter_summary(parameter)
#data frame of proportions less than 10%. necessary because for these values, labels are implemented with an arrow
small_perc <- parameter_df %>% filter(prop < .10)
#dataframe of proportions greater than 10%
signif_perc <- parameter_df %>% filter(prop >= .10)
pie_chart <- ggplot(parameter_df, aes(x = "", y = n, fill = fct_inorder(results))) +
geom_bar(stat = "identity", width = 1,colour='black') +
coord_polar(theta = "y") +
blank_theme +
theme(axis.text.x=element_blank()) +
theme(legend.title=element_blank()) +
#ggtitle(title)+
theme(plot.title = element_text(hjust = 0.5)) +
geom_text(data = signif_perc, aes(label = perc),
position = position_stack(vjust = .5), size = 5, show.legend = F) +
scale_fill_manual(values = c("Attaining" = "#99FF99","Insufficient Information" = "#FFFF99", "Non Attaining" = "#FF9999", "Not Applicable" = "orange"),labels=c("Attaining ",
"Insufficient Information ",
"Non Attaining "))
if (sum(parameter_df$prop < .10) > 0) {
pie_chart <- pie_chart + geom_text_repel(data = small_perc, aes(label = perc), size= 5, show.legend = F, nudge_x = nudgex,nudge_y = nudgey)
}
pie_chart
}
#Indivdual pie charts to create combined pie charts
pie_do <- parameter_pie_chart('DO')
pie_TP<-parameter_pie_chart('Total Phosphorus')
pie_temp<-parameter_pie_chart('Temperature')
pie_pH<-parameter_pie_chart('pH')
pie_arcs<-parameter_pie_chart('Arsenic-HH')
pie_TDS<-parameter_pie_chart('Total Dissolved Solids')
pie_causebio<-parameter_pie_chart('Biological (Cause Unknown)')
pie_human_lead<-parameter_pie_chart('Lead-HH - DWS')
pie_mercury<-parameter_pie_chart('Mercury-HH')
pie_nitrate<-parameter_pie_chart('Nitrate')
pie_aluminum <- parameter_pie_chart("Aluminum")
pie_temp_trout<-parameter_pie_chart('Temperature Trout')
pie_do_trout<-parameter_pie_chart('DO Trout')
pie_fish_merc<-parameter_pie_chart('Fish-Mercury')
pie_fish_ddt<-parameter_pie_chart('Fish-DDx')
pie_fish_dioxin<-parameter_pie_chart('Fish-Dioxin')
pie_fish_chlordane<-parameter_pie_chart('Fish-Chlordane')
pie_fish_pcb<-parameter_pie_chart('Fish-PCB')
pie_human_arsenic<-parameter_pie_chart('Arsenic-HH')
pie_TDS<-parameter_pie_chart('Total Dissolved Solids')
pie_arsenic_dws<-parameter_pie_chart('Arsenic HH - DWS')
pie_trout_do<-parameter_pie_chart('DO Trout')
pie_unknown_trout<-parameter_pie_chart('Biological Trout (Cause Unknown)')
pie_ecoli<-parameter_pie_chart('e.Coli')
pie_enterococcus<-parameter_pie_chart('Enterococcus')
pie_beach_enterococcus<-parameter_pie_chart('Beach Closing (Enterococcus)')
##Figure 2.10
combined_plot1 <- plot_grid(pie_human_arsenic + theme(legend.position="none"),
pie_TDS + theme(legend.position = "none"),
pie_human_lead + theme(legend.position = "none"),
pie_mercury + theme(legend.position = "none"),
pie_nitrate + theme(legend.position = "bottom"),
nrow = 2,ncol=3,align="hv",labels=c("Arsenic,human health","TDS","Lead,human health","Mercury,human health","Nitrate"),label_fontface="bold",label_size=10,hjust=-0.3,vjust=9)+
draw_label("Figure 2.10:Assessment Results for Key Parameters Associated with Water Supply Use,\nPercent(%) of 826 AUs",fontface="bold",hjust=0.5,vjust=-14.5)
ggsave(filename="Figure2.10-Water Supply Use.pdf",path="V:/lum/WM&S/BEAR (Bureau of Environmental Analysis and Restoration)/Envpln/Hourly Employees/KevinZolea/Rwork/2014IR/PieCharts",width=11.5,height=11)
`
Plot that I have:
Plot I Want: