Heatmap using ggplot for 300+ units - r

I am trying to generate a heatmap via ggplot using the following code I found online (https://simplystatistics.org/2019/08/28/you-can-replicate-almost-any-plot-with-ggplot2/):
library(dslabs)
data(us_contagious_diseases)
the_disease <- "Measles"
dat <- us_contagious_diseases %>%
filter(!state%in%c("Hawaii","Alaska") & disease == the_disease) %>%
mutate(rate = count / population * 10000 * 52 / weeks_reporting)
jet.colors <- colorRampPalette(c("#F0FFFF", "cyan", "#007FFF", "yellow", "#FFBF00", "orange", "red", "#7F0000"), bias = 2.25)
dat %>% mutate(state = reorder(state, desc(state))) %>%
ggplot(aes(year, state, fill = rate)) +
geom_tile(color = "white", size = 0.35) +
scale_x_continuous(expand = c(0,0)) +
scale_fill_gradientn(colors = jet.colors(16), na.value = 'white') +
geom_vline(xintercept = 1963, col = "black") +
theme_minimal() +
theme(panel.grid = element_blank()) +
coord_cartesian(clip = 'off') +
ggtitle(the_disease) +
ylab("") +
xlab("") +
theme(legend.position = "bottom", text = element_text(size = 8)) +
annotate(geom = "text", x = 1963, y = 50.5, label = "Vaccine introduced", size = 3, hjust = 0)
Result:
This all works fine but in my dataset I have 320 units instead of 50. I have created a similar scenario using the data from the example above:
library(dslabs)
data(us_contagious_diseases)
the_disease <- "Measles"
dat <- us_contagious_diseases %>%
filter(!state%in%c("Hawaii","Alaska") & disease == the_disease) %>%
mutate(rate = count / population * 10000 * 52 / weeks_reporting)
dat1 <- dat
levels(dat1$state) <- c("State1_1","State1_2","State1_3","State1_4","State1_5","State1_6","State1_7","State1_8","State1_9","State1_10",
"State1_11","State1_12","State1_13","State1_14","State1_15","State1_16","State1_17","State1_18","State1_19","State1_20",
"State1_21","State1_22","State1_23","State1_24","State1_25","State1_26","State1_27","State1_28","State1_29","State1_30",
"State1_31","State1_32","State1_33","State1_34","State1_35","State1_36","State1_37","State1_38","State1_39","State1_40",
"State1_41","State1_42","State1_43","State1_44","State1_45","State1_46","State1_47","State1_48","State1_49","State1_50","State1_51")
dat2 <- dat
levels(dat2$state) <- c("State2_1","State2_2","State2_3","State2_4","State2_5","State2_6","State2_7","State2_8","State2_9","State2_10",
"State2_11","State2_12","State2_13","State2_14","State2_15","State2_16","State2_17","State2_18","State2_19","State2_20",
"State2_21","State2_22","State2_23","State2_24","State2_25","State2_26","State2_27","State2_28","State2_29","State2_30",
"State2_31","State2_32","State2_33","State2_34","State2_35","State2_36","State2_37","State2_38","State2_39","State2_40",
"State2_41","State2_42","State2_43","State2_44","State2_45","State2_46","State2_47","State2_48","State2_49","State2_50","State2_51")
dat3 <- dat
levels(dat3$state) <- c("State3_1","State3_2","State3_3","State3_4","State3_5","State3_6","State3_7","State3_8","State3_9","State3_10",
"State3_11","State3_12","State3_13","State3_14","State3_15","State3_16","State3_17","State3_18","State3_19","State3_20",
"State3_21","State3_22","State3_23","State3_24","State3_25","State3_26","State3_27","State3_28","State3_29","State3_30",
"State3_31","State3_32","State3_33","State3_34","State3_35","State3_36","State3_37","State3_38","State3_39","State3_40",
"State3_41","State3_42","State3_43","State3_44","State3_45","State3_46","State3_47","State3_48","State3_49","State3_50","State3_51")
dat4 <- dat
levels(dat4$state) <- c("State4_1","State4_2","State4_3","State4_4","State4_5","State4_6","State4_7","State4_8","State4_9","State4_10",
"State4_11","State4_12","State4_13","State4_14","State4_15","State4_16","State4_17","State4_18","State4_19","State4_20",
"State4_21","State4_22","State4_23","State4_24","State4_25","State4_26","State4_27","State4_28","State4_29","State4_30",
"State4_31","State4_32","State4_33","State4_34","State4_35","State4_36","State4_37","State4_38","State4_39","State4_40",
"State4_41","State4_42","State4_43","State4_44","State4_45","State4_46","State4_47","State4_48","State4_49","State4_50","State4_51")
dat5 <- dat
levels(dat5$state) <- c("State5_1","State5_2","State5_3","State5_4","State5_5","State5_6","State5_7","State5_8","State5_9","State5_10",
"State5_11","State5_12","State5_13","State5_14","State5_15","State5_16","State5_17","State5_18","State5_19","State5_20",
"State5_21","State5_22","State5_23","State5_24","State5_25","State5_26","State5_27","State5_28","State5_29","State5_30",
"State5_31","State5_32","State5_33","State5_34","State5_35","State5_36","State5_37","State5_38","State5_39","State5_40",
"State5_41","State5_42","State5_43","State5_44","State5_45","State5_46","State5_47","State5_48","State5_49","State5_50","State5_51")
dat <- rbind(dat,dat1,dat2,dat3,dat4,dat5)
jet.colors <- colorRampPalette(c("#F0FFFF", "cyan", "#007FFF", "yellow", "#FFBF00", "orange", "red", "#7F0000"), bias = 2.25)
dat %>% mutate(state = reorder(state, desc(state))) %>%
ggplot(aes(year, state, fill = rate)) +
geom_tile(color = "white", size = 0.35) +
scale_x_continuous(expand = c(0,0)) +
scale_fill_gradientn(colors = jet.colors(16), na.value = 'white') +
geom_vline(xintercept = 1963, col = "black") +
theme_minimal() +
theme(panel.grid = element_blank()) +
coord_cartesian(clip = 'off') +
ggtitle(the_disease) +
ylab("") +
xlab("") +
theme(legend.position = "bottom", text = element_text(size = 8)) +
annotate(geom = "text", x = 1963, y = 50.5, label = "Vaccine introduced", size = 3, hjust = 0)
Result:
In this case, the heatmap is too long and no longer can be seen on the screen without scrolling down. Any ideas on how to fit all these 320 units on the screen without making the squares too small?

There is a practical limit to how useful this is, based on screen (or paper size). It looks like you are coming up against that limit.
You could rotate the plot, since screens tend to be wider than they are high, but that upsets the obvious left-to-right chronology of your plot.
I think your only other realistic options would be to either split the plot up by some other variable, or summarise the categories on the y axis in some way, and hide most of the labels on the y axis. In the example you've created, you could group them as geographic regions rather than individual states e.g. East coast, West coast, South, etc. Whatever works for your real data. If all the labels on the y axis are important, then the plot has to be split up, because they are essentially unreadable as it currently stands.

Related

How To Create Pyramid Bar Chart in R with y-axis labels between the bars

Below is some R code that generates a bar plot using ggplot, where the bars go off to the left and right, centered at x = 0. I would like to take the text on the y axis (the stage names), and place them in-between the left and the right bars. Here is the R code creating the graph:
library(dplyr)
libary(ggplot2)
# Read data
email_campaign_funnel <- read.csv("https://raw.githubusercontent.com/selva86/datasets/master/email_campaign_funnel.csv")
# X Axis Breaks and Labels
brks <- seq(-15000000, 15000000, 5000000)
lbls = paste0(as.character(c(seq(15, 0, -5), seq(5, 15, 5))), "m")
# Shorten Names
email_campaign_funnel <- email_campaign_funnel %>%
dplyr::mutate(Stage = gsub('Stage ', '', Stage)) %>%
dplyr::mutate(Stage = gsub(' Page', '', Stage)) %>%
dplyr::mutate(Stage = gsub('Campaign-', '', Stage))
# Plot
ggplot(email_campaign_funnel, aes(x = Stage, y = Users, fill = Gender)) + # Fill column
geom_bar(stat = "identity", width = .6) + # draw the bars
scale_y_continuous(breaks = brks, # Breaks
labels = lbls) + # Labels
coord_flip() + # Flip axes
labs(title="Email Campaign Funnel") +
theme(plot.title = element_text(hjust = .5),
axis.ticks = element_blank()) + # Centre plot title
scale_fill_brewer(palette = "Dark2") # Color palette
Below is a screenshot of a different graph that highlights sort of how I'd like the text to be split in between the bars (I prefer the vertical style of the ggplot() graph more so than the horizontal nature of the imaged graph below).
Any ideas on how to do this in R would be greatly appreciated, thanks!
How about something like this using ggarrange from the ggpubr package:
gg1 <- email_campaign_funnel %>%
mutate(Users = if_else(Gender == "Male", Users, 0)) %>%
ggplot(aes(Stage, Users, fill = Gender)) +
geom_col(width = 0.6) +
scale_y_continuous(breaks = brks, labels = lbls) +
coord_flip() +
labs(title="Email Campaign Funnel") +
theme_minimal() +
scale_fill_manual(values = c("Male" = "Red", "Female" = "Blue")) +
theme(
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())
gg2 <- email_campaign_funnel %>%
filter(Gender == "Male") %>%
ggplot(aes(Stage, 0, label = Stage)) +
geom_text() +
coord_flip() +
theme_void()
gg3 <- email_campaign_funnel %>%
mutate(Users = if_else(Gender == "Female", Users, 0)) %>%
ggplot(aes(Stage, Users, fill = Gender)) +
geom_col(width = 0.6) +
scale_y_continuous(breaks = brks, labels = lbls) +
coord_flip() +
labs(title="Email Campaign Funnel") +
theme_minimal() +
scale_fill_manual(values = c("Male" = "Red", "Female" = "Blue")) +
theme(
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())
library(ggpubr)
ggarrange(gg1, gg2, gg3, ncol = 3, common.legend = TRUE, align = "h")
Explanation: The idea is to build the plot separately from the left and right pyramid bar charts and the labels in the middle. We then use ggpubr::ggarrange to arrange all three ggplot2 plot objects in a single row and ensure that axes are properly aligned.
Split horizontal bar chart with labels in the middle
I was interested in seeing how close we can get to the horizontal pyramid bar chart plot you link to. Here is my attempt:
# Sample data
df <- read.table(text =
"Category Group Value
REB Red 39
REB Blue 35
OREB Red 8
OREB Blue 4
DREB Red 31
DREB Blue 31
AST Red 25
AST Blue 21
STL Red 5
STL Blue 5
BLK Red 1
BLK Blue 0
TOV Red 9
TOV Blue 11", header = T)
# Set factor order
df <- df %>% mutate(Category = factor(Category, unique(Category)))
# Build ggplot2 plot objects
library(tidyverse)
gg1 <- df %>%
filter(Group == "Red") %>%
ggplot(aes(Category, Value, fill = Group, label = Value)) +
geom_col() +
geom_text(colour = "red3", fontface = "bold", nudge_y = 10) +
theme_void() +
scale_fill_manual(values = c("Red" = "red3", "Blue" = "navyblue"), drop = FALSE) +
ylim(c(0, round(1.5 * max(df$Value))))
gg2 <- df %>%
filter(Group == "Red") %>%
ggplot(aes(Category, 0, label = Category)) +
geom_text(fontface = "bold") +
theme_void()
gg3 <- df %>%
filter(Group == "Blue") %>%
ggplot(aes(Category, -Value, fill = Group, label = Value)) +
geom_col() +
geom_text(colour = "navyblue", fontface = "bold", nudge_y = -10) +
theme_void() +
scale_fill_manual(values = c("Red" = "red3", "Blue" = "navyblue"), drop = FALSE) +
ylim(c(round(-1.5 * max(df$Value)), 0))
# Arrange plot objects in 1 column with horizontal scales aligned
library(ggpubr)
ggarrange(gg1, gg2, gg3, nrow = 3, common.legend = TRUE, align = "h", heights = c(1, 0.5, 1))

heatmap in ggplot, different color for each group

I am trying to produce a heatmap in ggplot. I want each group to have different color gradient, but don't know how to do that. My current code looks like this:
## dummy data -----------------------------------------------------------------------------
data <- data.frame(
group = sample(c("Direct Patient Care", "Indirect Patient Care", "Education", "Rounds", "Handoff", "Misce"), 30, replace = T),
pct = rnorm(30, mean = 50, sd = 8)
)
## generate group id
data <- data %>%
group_by(group) %>%
mutate(id = row_number())
data$grpid <- with(data, ifelse(group == "Direct Patient Care", 1, ifelse(group == "Indirect Patient Care", 2,
ifelse(group == "Education", 3,
ifelse(group == "Rounds", 4,
ifelse(group == "Handoff", 5,6 ))))))
## draw graph ------------------------------------------------------------------------------
library(ggplot2)
p <- ggplot(data, aes(x=id, y=group, fill = pct)) +
theme(panel.background = element_rect(fill = "white", colour = "grey50"), aspect.ratio = 0.4) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)+
# guides(fill = guide_legend("Time, %")) +
geom_tile() +
scale_x_continuous (name = " ", breaks = seq(1, 8, by = 1)) +
scale_y_discrete(name = " ") +
theme(axis.text.x = element_text(angle = 0,hjust = 1,vjust = 1), plot.title = element_text(hjust = 0.5) ) +
ggtitle("Heatmap of time spent doing activities across 194 shifts")
p + scale_fill_gradient2(low = "white", high = "red", limits = c(0, 80), breaks = c(0, 10, 20, 30, 40, 50, 60, 70), guide = guide_legend("Time, %")) ## change the color theme ##
And the resulting figure looks like this:
How can I change the color theme for each group, like red for 'Rounds', blue for 'Misce', green for 'Handoff' etc...
Many thanks!
You can do this by creating your own rescaled value in your data and then slightly "hacking" the alpha aesthetic combined with the fill aesthetic:
library(tidyverse)
data %>%
group_by(group) %>%
mutate(rescale = scales::rescale(pct)) %>%
ggplot(., aes(x = factor(id), y = group)) +
geom_tile(aes(alpha = rescale, fill = group), color = "white") +
scale_alpha(range = c(0.1, 1))
First we create a new column called rescale which rescales the pct from 0 to 1 then you force the scale_alpha(range = c(0, 1)) [note, in this case I used c(0.1, 1) so that you can still "see" the zero points.
Finally, you probably want to remove the guides:
data %>%
group_by(group) %>%
mutate(rescale = scales::rescale(pct)) %>%
ggplot(., aes(x = factor(id), y = group)) +
geom_tile(aes(alpha = rescale, fill = group), color = "white") +
scale_alpha(range = c(0.1, 1)) +
theme(legend.position = "none")
N.B. by using aes(x = factor(id)... you can get around manually setting your x-axis since in this case it appears you want to treat it as a factor not a numeric scale.
Finally, if you really want to get fancy, you could double-encode the axis.text.y colors to that of the levels of your factor (i.e., data$group) variable:
data %>%
group_by(group) %>%
mutate(rescale = scales::rescale(pct)) %>%
ggplot(., aes(x = factor(id), y = group)) +
geom_tile(aes(alpha = rescale, fill = group), color = "white") +
scale_alpha(range = c(0.1, 1)) +
theme(legend.position = "none",
axis.text.y = element_text(color = scales::hue_pal()(length(levels(data$group)))),
axis.ticks = element_blank()) +
labs(x = "", y = "")

Add smooth line to R pyramid plots

Please help me to add smooth lines(thick black lines shown in the figure) to a R pyramid plot as shown in the attached image. Appreciate your help.This plot shows the population distribution according to the age and gender.
xy.pop<-c(3.2,3.5,3.6,3.6,3.5,3.5,3.9,3.7,3.9,3.5,3.2,2.8,2.2,1.8,1.5,1.3,0.7,0.4)
xx.pop<-c(3.2,3.4,3.5,3.5,3.5,3.7,4,3.8,3.9,3.6,3.2,2.5,2,1.7,1.5,1.3,1,0.8)
agelabels<-c("0-4","5-9","10-14","15-19","20-24","25-29","30-34",
"35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74",
"75-79","80-44","85+")
mcol<-color.gradient(c(0,0,0.5,1),c(0,0,0.5,1),c(1,1,0.5,1),18)
fcol<-color.gradient(c(1,1,0.5,1),c(0.5,0.5,0.5,1),c(0.5,0.5,0.5,1),18)
par(mar=pyramid.plot(xy.pop,xx.pop,labels=agelabels,main="Australian population pyramid 2002",lxcol=mcol,rxcol=fcol,))
How about the following (using ggplot rather than base R graphics).
# Your data
xy.pop<-c(3.2,3.5,3.6,3.6,3.5,3.5,3.9,3.7,3.9,3.5,3.2,2.8,2.2,1.8,1.5,1.3,0.7,0.4)
xx.pop<-c(3.2,3.4,3.5,3.5,3.5,3.7,4,3.8,3.9,3.6,3.2,2.5,2,1.7,1.5,1.3,1,0.8)
agelabels<-c("0-4","5-9","10-14","15-19","20-24","25-29","30-34",
"35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74",
"75-79","80-44","85+")
# Collect data in dataframe
df <- rbind.data.frame(
cbind.data.frame(Percentage = -xy.pop, Group = agelabels, Gender = "male"),
cbind.data.frame(Percentage = +xx.pop, Group = agelabels, Gender = "female"));
# Make sure agelabels have the right order
df$Group <- factor(df$Group, levels = agelabels);
# (gg)plot
gg <- ggplot(
data = df,
aes(x = Group, y = Percentage, fill = Gender, group = Gender));
gg <- gg + geom_bar(data = subset(df, Gender == "female"), stat = "identity");
gg <- gg + geom_bar(data = subset(df, Gender == "male"), stat = "identity");
gg <- gg + coord_flip();
gg <- gg + geom_smooth(
colour = "black", method = "loess", se = FALSE, show.legend = FALSE, size = 0.5);
gg <- gg + labs(
x = "Age",
y = "Percentage",
title = "Australian population pyramid 2012");
gg <- gg + scale_y_continuous(
breaks = seq(-4, 4, by = 2),
labels = c(rev(seq(0, 4, by = 2)), seq(2, 4, by = 2)));
print(gg);
I'm here fitting a LOESS curve separately to both the male and female pyramid halves (through the group aesthetic).
It's not quite the same plot as the one you show, but there is still room for improvement/tweaking. For example, you can change the fill aesthetic to achieve a percentage-dependent fill of the bars.
Credit where credit is due: This solution is based on this post on SO by #DidzisElferts.
Update (nearly a year later)
I've always wanted to review this answer to increase the aesthetic similarity of a ggplot2 solution with the plot generated from plotrix::pyramid.plot. Here is an update that gets pretty close.
# Define function to draw the left/right half of an age pyramid
ggpyramidhalf <- function(df, pos = "left", title) {
gg <- ggplot(df, aes(Group, Percentage, group = Gender)) +
geom_col(aes(fill = Group), colour = "black") +
geom_smooth(
colour = "black",
method = "loess",
se = F,
show.legend = F, size = 0.5) +
theme_minimal() +
labs(y = "%", title = title) +
coord_flip(expand = FALSE) +
theme(
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
if (pos == "left") {
gg <- gg +
ylim(c(min(range(pretty(df$Percentage))), 0)) +
scale_fill_manual(
values = colorRampPalette(c("blue", "white"))(length(agelabels)),
guide = F) +
theme(
plot.title = element_text(hjust = 1),
axis.text.y = element_blank())
} else {
gg <- gg +
ylim(c(0, max(range(pretty(df$Percentage))))) +
scale_fill_manual(
values = colorRampPalette(c("red", "white"))(length(agelabels)),
guide = F) +
theme(
plot.title = element_text(hjust = 0),
axis.title.y = element_blank(),
axis.text.y = element_text(hjust = 0.5, margin = margin(r = 10)))
}
gg
}
# Draw left (male) half of age pyramid
gg1 <- df %>%
filter(Gender == "male") %>%
mutate(Group = factor(Group, agelabels)) %>%
ggpyramidhalf(pos = "left", title = "Male")
# Draw right (female) half of age pyramid
gg2 <- df %>%
filter(Gender == "female") %>%
mutate(Group = factor(Group, agelabels)) %>%
ggpyramidhalf(pos = "right", title = "Female")
# Use gridExtra to draw both halfs in one plot
library(gridExtra)
library(grid)
grid.arrange(
gg1, gg2,
ncol = 2,
widths = c(1, 1.15),
top = textGrob("Australian population period 2002", gp = gpar(font = 2)))
Here is a solution using the pyramid.plot function of plotrix:
library(plotrix)
pyramid.plot(xy.pop,xx.pop,labels=agelabels,
main="Australian population pyramid 2002",lxcol=mcol,rxcol=fcol)
male.smline <- loess.smooth(x=1:18, y=xy.pop, degree=2)
lines(-1-male.smline$y, male.smline$x, col="red", lwd=3)
female.smline <- loess.smooth(x=1:18, y=xx.pop, degree=2)
lines(1+female.smline$y, female.smline$x, col="black", lwd=3)

Aesthetics must be either length 1 or the same as the data (1): x, y, label

I'm working on some data on party polarization (something like this) and used geom_dumbbell from ggalt and ggplot2. I keep getting the same aes error and other solutions in the forum did not address this as effectively. This is my sample data.
df <- data_frame(policy=c("Not enough restrictions on gun ownership", "Climate change is an immediate threat", "Abortion should be illegal"),
Democrats=c(0.54, 0.82, 0.30),
Republicans=c(0.23, 0.38, 0.40),
diff=sprintf("+%d", as.integer((Democrats-Republicans)*100)))
I wanted to keep order of the plot, so converted policy to factor and wanted % to be shown only on the first line.
df <- arrange(df, desc(diff))
df$policy <- factor(df$policy, levels=rev(df$policy))
percent_first <- function(x) {
x <- sprintf("%d%%", round(x*100))
x[2:length(x)] <- sub("%$", "", x[2:length(x)])
x
}
Then I used ggplot that rendered something close to what I wanted.
gg2 <- ggplot()
gg2 <- gg + geom_segment(data = df, aes(y=country, yend=country, x=0, xend=1), color = "#b2b2b2", size = 0.15)
# making the dumbbell
gg2 <- gg + geom_dumbbell(data=df, aes(y=country, x=Democrats, xend=Republicans),
size=1.5, color = "#B2B2B2", point.size.l=3, point.size.r=3,
point.color.l = "#9FB059", point.color.r = "#EDAE52")
I then wanted the dumbbell to read Democrat and Republican on top to label the two points (like this). This is where I get the error.
gg2 <- gg + geom_text(data=filter(df, country=="Government will not control gun violence"),
aes(x=Democrats, y=country, label="Democrats"),
color="#9fb059", size=3, vjust=-2, fontface="bold", family="Calibri")
gg2 <- gg + geom_text(data=filter(df, country=="Government will not control gun violence"),
aes(x=Republicans, y=country, label="Republicans"),
color="#edae52", size=3, vjust=-2, fontface="bold", family="Calibri")
Any thoughts on what I might be doing wrong?
I think it would be easier to build your own "dumbbells" with geom_segment() and geom_point(). Working with your df and changing the variable refences "country" to "policy":
library(tidyverse)
# gather data into long form to make ggplot happy
df2 <- gather(df,"party", "value", Democrats:Republicans)
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
# our dumbell
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
# the text labels
geom_text(aes(label = party), vjust = -1.5) + # use vjust to shift text up to no overlap
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red")) + # named vector to map colors to values in df2
scale_x_continuous(limits = c(0,1), labels = scales::percent) # use library(scales) nice math instead of pasting
Produces this plot:
Which has some overlapping labels. I think you could avoid that if you use just the first letter of party like this:
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
geom_text(aes(label = gsub("^(\\D).*", "\\1", party)), vjust = -1.5) + # just the first letter instead
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red"),
guide = "none") +
scale_x_continuous(limits = c(0,1), labels = scales::percent)
Only label the top issue with names:
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
geom_text(data = filter(df2, policy == "Not enough restrictions on gun ownership"),
aes(label = party), vjust = -1.5) +
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red")) +
scale_x_continuous(limits = c(0,1), labels = scales::percent)

No legend shows up using multiple geom_point and geom_line functions in 1 graph

I'm struggling to learn the ins and outs of R, ggplot2, etc - being more used to being taught in an A to Z manner an entire (fixed) coding language (not used to open source - I learned to code when dinosaurs roamed the earth). So I have kluged together the following code to create one graph. Only ... I don't have the dupe legends problem -- I have no legend a'tall!
erc <- ggplot(usedcarval, aes(x = usedcarval$age)) +
geom_line(aes(y = usedcarval$dealer), colour = "orange", size = .5) +
geom_point(aes(y = usedcarval$dealer),
show.legend = TRUE, colour = "orange", size = 1) +
geom_line(aes(y = usedcarval$pvtsell), colour = "green", size = .5) +
geom_point(aes(y = usedcarval$pvtsell), colour = "green", size = 1) +
geom_line(aes(y = usedcarval$tradein), colour = "blue", size = .5) +
geom_point(aes(y = usedcarval$tradein), colour = "blue", size = 1) +
geom_line(aes(y = as.integer(predvalt)), colour = "gray", size = 1) +
geom_line(aes(y = as.integer(predvalp)), colour = "gray", size = 1) +
geom_line(aes(y = as.integer(predvald)), colour = "gray", size = 1) +
labs(x = "Value of a Used Car as it Ages (Years)", y = "Dollars") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle = 60, vjust = .6))
erc
I can't figure out how to put an image in this text since I have no link except to my dropbox...
I would appreciate any help. Sincerely, Stephanie
Ok, I felt like doing some ggplot, and it was an interesting task to contrast the way ggplot-beginners (I was one not so long ago) approach it compared to the way you need to do it to get things like legends.
Here is the code:
library(ggplot2)
library(gridExtra)
library(tidyr)
# fake up some data
n <- 100
dealer <- 12000 + rnorm(n,0,100)
age <- 10 + rnorm(n,3)
pvtsell <- 10000 + rnorm(n,0,300)
tradein <- 5000 + rnorm(n,0,100)
predvalt <- 6000 + rnorm(n,0,120)
predvalp <- 7000 + rnorm(n,0,100)
predvald <- 8000 + rnorm(n,0,100)
usedcarval <- data.frame(dealer=dealer,age=age,pvtsell=pvtsell,tradein=tradein,
predvalt=predvalt,predvalp=predvalp,predvald=predvald)
# The ggplot-naive way
erc <- ggplot(usedcarval, aes(x = usedcarval$age)) +
geom_line(aes(y = usedcarval$dealer), colour = "orange", size = .5) +
geom_point(aes(y = usedcarval$dealer),
show.legend = TRUE, colour = "orange", size = 1) +
geom_line(aes(y = usedcarval$pvtsell), colour = "green", size = .5) +
geom_point(aes(y = usedcarval$pvtsell), colour = "green", size = 1) +
geom_line(aes(y = usedcarval$tradein), colour = "blue", size = .5) +
geom_point(aes(y = usedcarval$tradein), colour = "blue", size = 1) +
geom_line(aes(y = as.integer(predvalt)), colour = "gray", size = 1) +
geom_line(aes(y = as.integer(predvalp)), colour = "gray", size = 1) +
geom_line(aes(y = as.integer(predvald)), colour = "gray", size = 1) +
labs(x = "ggplot naive way - Value of a Used Car as it Ages (Years)", y = "Dollars") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle = 60, vjust = .6))
# The tidyverse way
# ggplot needs long data, not wide data.
# Also we have two different sets of data for points and lines
gdf <- usedcarval %>% gather(series,value,-age)
pdf <- gdf %>% filter( series %in% c("dealer","pvtsell","tradein"))
# our color and size lookup tables
clrs = c("dealer"="orange","pvtsell"="green","tradein"="blue","predvalt"="gray","predvalp"="gray","predvald"="gray")
szes = c("dealer"=0.5,"pvtsell"=0.0,"tradein"=0.5,"predvalt"=1,"predvalp"=1,"predvald"=1)
trc <- ggplot(gdf,aes(x=age)) + geom_line(aes(y=value,color=series,size=series)) +
scale_color_manual(values=clrs) +
scale_size_manual(values=szes) +
geom_point(data=pdf,aes(x=age,y=value,color=series),size=1) +
labs(x = "tidyverse way - Value of a Used Car as it Ages (Years)", y = "Dollars") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle = 60, vjust = .6))
grid.arrange(erc, trc, ncol=1)
Study it, espeically look at gdf,pdf and gather. You just can't get legends without using "long data".
If you want more information on the "tidyverse", start here: Hadley Wickham's tidyverse
If you are looking for a short example of how to take some series data that comes in wide format, convert it to long format (using gather), and then plot it with a ggplot (with a legend), here is a nice short example I cooked up for someone recently:
library(ggplot2)
library(tidyr)
# womp up some fake news (uhh... data)
x <- seq(-pi,pi,by=0.25)
y <- sin(x)
yhat <- sin(x) + 0.4*rnorm(length(x))
# This is the data in wide form
# you will never get ggplot to make a legend for it
# it simply hates wide data
df1 <- data.frame(x=x,y=y,yhat=yhat)
# So we use gather from tidyr to make it into long data
# creates two new colums, throws y and yhat in them, and replicates x as needed
# you have to look at the data frame to understand gather,
# and read the docs a few times
df2 <- gather(df1,series,value,-x)
# it is now in long form and we can plot it
ggplot(df2) + geom_line(aes(x,value,color=series))
So here is the plot:

Resources