This question already has answers here:
Reverse stacked bar order
(2 answers)
Closed 5 years ago.
I'm a biology graduate student learning R. I was hoping someone could help me have the bars go horizontally in the opposite direction (the blue portion should start at 0 and the red at the 100 end of the scale).
Graph with bars in the wrong direction
Here is the data
my_species <- c('apomict_2-17-17_compreh', 'apomict_2-17-17_compreh', 'apomict_2-17-17_compreh', 'apomict_2-17-17_compreh', 'parthenogen_2-17-17_compreh', 'parthenogen_2-17-17_compreh', 'parthenogen_2-17-17_compreh', 'parthenogen_2-17-17_compreh', 'sexual_2-9-17', 'sexual_2-9-17', 'sexual_2-9-17', 'sexual_2-9-17')
my_species <- factor(my_species)
my_species <- factor(my_species,levels(my_species)[c(length(levels(my_species)):1)]) # reorder your species here just by changing the values in the vector :
my_percentage <- c(36.3, 56.3, 2.6, 4.8, 42.2, 50.6, 2.4, 4.8, 56.0, 19.9, 6.7, 17.4)
my_values <- c(522, 811, 38, 69, 608, 729, 35, 68, 806, 286, 96, 252)
category <- c(rep(c("S","D","F","M"),c(1)))
category <-factor(category)
category = factor(category,levels(category)[c(4,1,2,3)])
df = data.frame(my_species,my_percentage,my_values,category)
Here is the code:
# Load the required libraries
library(ggplot2)
library("grid")
# !!! CONFIGURE YOUR PLOT HERE !!!
# Output
#my_output <- paste("/home/loki/","busco_figure.png",sep="/")
my_width <- 20
my_height <- 15
my_unit <- "cm"
# Colors
my_colors <- c("#56B4E9", "#3492C7", "#F0E442", "#F04442")
# Bar height ratio
my_bar_height <- 0.75
# Legend
my_title <- "BUSCO Assessment Results"
# Font
my_family <- "sans"
my_size_ratio <- 1
# Code to produce the graph
labsize = 1
if (length(levels(my_species)) > 10){
labsize = 0.66
}
print("Plotting the figure ...")
figure <- ggplot() +
geom_bar(aes(y = my_percentage, x = my_species, fill = category), data = df, stat="identity", width=my_bar_height) +
coord_flip() +
theme_gray(base_size = 8) +
#scale_y_continuous(labels = c("100","80","60","40","20","0"), breaks = c(100,80,60,40,20,0)) +
scale_y_continuous(labels = c("100","80","60","40","20","0"), breaks = c(100,80,60,40,20,0)) +
#scale_y_continuous(labels = c("100","80","60","40","20","0"), breaks = c(0,20,40,60,80,100)) +
scale_fill_manual(values = my_colors,labels =c(" Complete (C) and single-copy (S) ",
" Complete (C) and duplicated (D)",
" Fragmented (F) ",
" Missing (M)")) +
ggtitle(my_title) +
xlab("") +
ylab("\n%BUSCOs") +
theme(plot.title = element_text(family=my_family, colour = "black", size = rel(2.2)*my_size_ratio, face = "bold")) +
theme(legend.position="top",legend.title = element_blank()) +
theme(legend.text = element_text(family=my_family, size = rel(1.2)*my_size_ratio)) +
theme(panel.background = element_rect(color="#FFFFFF", fill="white")) +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major = element_blank()) +
theme(axis.text.y = element_text(family=my_family, colour = "black", size = rel(1.66)*my_size_ratio)) +
theme(axis.text.x = element_text(family=my_family, colour = "black", size = rel(1.66)*my_size_ratio)) +
theme(axis.line = element_line(size=1*my_size_ratio, colour = "black")) +
theme(axis.ticks.length = unit(.85, "cm")) +
theme(axis.ticks.y = element_line(colour="white", size = 0)) +
theme(axis.ticks.x = element_line(colour="#222222")) +
theme(axis.ticks.length = unit(0.4, "cm")) +
theme(axis.title.x = element_text(family=my_family, size=rel(1.2)*my_size_ratio)) +
guides(fill = guide_legend(override.aes = list(colour = NULL))) +
guides(fill=guide_legend(nrow=2,byrow=TRUE))
for(i in rev(c(1:length(levels(my_species))))){
detailed_values <- my_values[my_species==my_species[my_species==levels(my_species)[i]]]
total_buscos <- sum(detailed_values)
figure <- figure +
annotate("text", label=paste("C:", detailed_values[1] + detailed_values[2], " [S:", detailed_values[1], ", D:", detailed_values[2], "], F:", detailed_values[3], ", M:", detailed_values[4], ", n:", total_buscos, sep=""),
y=3, x = i, size = labsize*4*my_size_ratio, colour = "black", hjust=0, family=my_family)
}
my_output="~/temp.png"
ggsave(figure, file=my_output, width = my_width, height = my_height, unit = my_unit)
print("Done")
see ?position_stack:
position_fill() and position_stack() automatically stack values in
reverse order of the group aesthetic, which for bar charts is usually
defined by the fill aesthetic (the default group aesthetic is formed
by the combination of all discrete aesthetics except for x and y).
This default ensures that bar colours align with the default legend.
In order to change the stacking direction, you simply need to add position = position_stack(reverse = TRUE) to geom_bar:
figure <- ggplot() +
geom_bar(
aes(y = my_percentage, x = my_species, fill = category),
data = df, stat="identity", width=my_bar_height,
position = position_stack(reverse = TRUE)) +
coord_flip() +
...
If you don't want to use position_stack, you would have to change factor level and You also have to set filling color breaks to maintain the same legend order.
You need to reorder the factor levels in order for ggplot2 to know what to do. Here is an example of that (note I had to reorder the labels and colors as well):
...
# Colors
my_colors <- c( "#F04442", "#F0E442", "#3492C7", "#56B4E9")
...
df$category = ordered(df$category, levels = c("M", "F", "D", "S"))
figure <- ggplot(data = df[order(df$category, decreasing = F),]) +
geom_bar(aes(y = my_percentage, x = my_species, fill = category), stat="identity", width=my_bar_height) +
coord_flip() +
theme_gray(base_size = 8) +
scale_y_continuous(labels = c("100","80","60","40","20","0"), breaks = c(100,80,60,40,20,0)) +
scale_fill_manual(values = my_colors,labels =c(" Missing (M)",
" Fragmented (F) ",
" Complete (C) and duplicated (D)",
" Complete (C) and single-copy (S) ")) +
...
Related
I would like to create series of pie charts in ggplot2.
Each plot is showing percentage of two categories ('Yes', 'No'), but I want to show only 'Yes' percentage values and the value should be centred relatively to whole plot not only the 'Yes' part itself. The problem is that I'm able to change the position of the value locally i.e. within the category slice but not in the context of whole pie chart.
Dataset:
df <- data.frame(Perc = c(78, 94, 99, 22, 6, 1),
Source = as.factor(rep(c("Oil", "Solar", "Wind"), 2)),
Agree = as.factor(c(rep("Yes", 3), rep("No", 3))))
Plotting:
ggplot(df, aes(x=" ", y=Perc, group=rev(Agree), fill=Agree)) +
geom_bar(size = .5, stat = "identity", color = "black") +
scale_fill_manual(values = c("grey", "lightgreen")) +
coord_polar("y", start=0) +
geom_text(aes(label = ifelse(Agree=="Yes", paste0(Perc, "%"),""))) +
facet_grid(~Source) + theme_void() + theme(legend.position = "none", strip.text.x = element_text(size = 9))
Now I'm getting plot that looks like this:
And I would like to create this plot:
One option would be to set the y value to 50 for the labels:
library(ggplot2)
ggplot(df, aes(x = " ", y = Perc, group = rev(Agree), fill = Agree)) +
geom_bar(size = .5, stat = "identity", color = "black") +
scale_fill_manual(values = c("grey", "lightgreen")) +
coord_polar("y", start = 0) +
geom_text(aes(y = 50, label = ifelse(Agree == "Yes", paste0(Perc, "%"), ""))) +
facet_grid(~Source) +
theme_void() +
theme(legend.position = "none", strip.text.x = element_text(size = 9))
How can I add labels to the groups of bars on the x- axis (which is the left side of the graph)? It is easy to label the entire axis, or to allow the labels to be generated based on the data, but I am unable to figure out how to label each group of bars, if that makes sense.
I know I could recode the item data into complete sentences, but that seems inelegant relative to making some change to the ggplot code.
I have tried using the code from a similar question on this site (Customize axis labels) scale_x_discrete(breaks = 1:5, labels=c("foo","bar","baz","phi","fum")) + but it simply causes all of the labels to disappear from my graph, and I'm not sure why. That result is worse than using scale_x_discrete(waiver()) +
First, load libraries + color palette:
library(ggplot2)
library(tidyverse)
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
And given the following data:
item <- c("none","none","none",
"low", "low", "low",
"moderatelow","moderatelow","moderatelow",
"moderate","moderate","moderate",
"greatest","greatest","greatest")
mean <- c(2.566, 2.873, 3.286, # none - Scenario A
3.911, 4.123, 4.519, # low - Scenario B
4.113, 4.169, 4.174, # moderatelow - Scenario C
3.88, 3.589, 3.2, # moderate - Scenario D
3.065, 2.544, 2.107) # greatest - Scenario E
reg <- c("GP", "hunt", "trap",
"GP", "hunt", "trap",
"GP", "hunt", "trap",
"GP", "hunt", "trap",
"GP", "hunt", "trap")
mydata <- data.frame(item, mean, reg)
I used the following code to generate the figure
ggplot(mydata, aes(x = item, y = mean, fill = reg)) +
ggtitle("How acceptable are each of the following scenarios to you?")+
coord_flip() +
geom_bar(position = "dodge", stat = "identity") +
# facet_wrap(~item, scales = "free_x") + # changed
scale_fill_manual(values=cbPalette) +
# scale_fill_grey(start = 0.8, end = 0.2) +
ylab("1 = highly unacceptable, 7 = highly acceptable") +
xlab("") +
theme_bw() +
#theme(legend.position="bottom")+
scale_x_discrete(waiver()) +
labs(fill="reg")
Here is the resulting figure:
ETA something for the millionth time - I figured out what works for me, which is using
ggtitle("How acceptable are each of the following scenarios to you?")+
coord_flip() +
geom_bar(position = "dodge", stat = "identity") +
# facet_wrap(~item, scales = "free_x") + # changed
scale_fill_manual(values=cbPalette) +
# scale_fill_grey(start = 0.8, end = 0.2) +
ylab("1 = highly unacceptable, 7 = highly acceptable") +
xlab("") +
theme_bw() +
#theme(legend.position="bottom")+
scale_x_discrete(breaks=c("none", "low", "moderatelow", "moderate", "greatest"),
labels=c("Control", "Treat 1", "Treat 2", "slkdj", "adkljf")) +
labs(fill="reg")
Thank you so much to those of you who commented! Your help led me to the answer.
ETA - okay here I am, back again. It was pointed out to me by #Gregor Thomas that my scale limits are set incorrectly, along with the unnecessary nature of some of my code. This feedback is much appreciated. Using the guidance of commentors, I was able to resolve the labeling issue that existed.
But, now I cannot figure out how to adjust the limits of the axis in the new code format. Given the following, how can I set the scale from 1-7 to reflect the nature of the likert scale people responded to? See code below.
ggplot(mydata, aes(y = item, x = mean, fill = reg)) +
geom_col(position = "dodge") +
scale_fill_manual(values = cbPalette) +
scale_y_discrete(breaks=c("none", "low", "moderatelow", "moderate", "greatest"),
labels=c("No wolves", "Very low numbers of wolves", "Moderately low numbers of wolves", "Moderate numbers of wolves", "Greatest numbers of wolves that can be sustained")
) +
scale_x_continuous(expand = expansion(mult = c(0, .05))) +
labs(
title = "How acceptable are each of the following scenarios to you?",
x = "1 = highly unacceptable, 7 = highly acceptable",
y = "",
fill = "population"
) +
theme_bw() +
theme(
legend.position = "bottom",
panel.grid.major.y = element_blank()
)
Here's how I'd clean up your code. I skip the coord_flip, just mapping the x and y variables as desired. I consolidate all the labels into labs(), and I use scale_y_discrete(labels = ) for the labels.
my_labels = rev(paste("Scenario", LETTERS[1:5]))
ggplot(mydata, aes(y = item, x = mean, fill = reg)) +
geom_col(position = "dodge") +
scale_fill_manual(values = cbPalette) +
scale_y_discrete(
labels = my_labels
) +
labs(
title = "How acceptable are each of the following scenarios to you?",
x = "1 = highly unacceptable, 7 = highly acceptable",
y = "",
fill = "population"
) +
theme_bw() +
theme(legend.position = "bottom")
If this were my plot, I'd adjust the x-scale to remove the padding below 0, and I'd remove the y gridlines, like this:
ggplot(mydata, aes(y = item, x = mean, fill = reg)) +
geom_col(position = "dodge") +
scale_fill_manual(values = cbPalette) +
scale_y_discrete(
labels = my_labels
) +
scale_x_continuous(expand = expansion(mult = c(0, .05))) +
labs(
title = "How acceptable are each of the following scenarios to you?",
x = "1 = highly unacceptable, 7 = highly acceptable",
y = "",
fill = "population"
) +
theme_bw() +
theme(
legend.position = "bottom",
panel.grid.major.y = element_blank()
)
Though if 1 is "highly unacceptable", I don't know how to interpret 0... the whole x scale is seems confusing. Maybe you should set the x limits to be from 1 to 7, not 0 to max of data (which is 5)? If so, use scale_x_continuous(expand = expansion(mult = c(0, .05)), limits = c(1, 7)).
We could add facet_grid(item ~ ., scales="free_y", space="free_y", switch="y")
ggplot(mydata, aes(x = item, y = mean, fill = reg)) +
ggtitle("How acceptable are each of the following scenarios to you?")+
coord_flip() +
geom_bar(position = "dodge", stat = "identity") +
# facet_wrap(~item, scales = "free_x") + # changed
scale_fill_manual(values=cbPalette) +
# scale_fill_grey(start = 0.8, end = 0.2) +
ylab("1 = highly unacceptable, 7 = highly acceptable") +
xlab("") +
theme_bw() +
theme(legend.position="bottom")+
scale_x_discrete(breaks = NULL) +
labs(fill="population") +
facet_grid(item ~ ., scales="free_y", space="free_y", switch="y") +
guides(fill=FALSE)
I'm using ggplot() to build two scatter plots that plot health assessment score for (1) male and (2) female patients vs. # weeks in treatment, plus I'm using geom_line() to plot regression line overlays for both the males and females on each graph.
My question: How do I match the colors of the line overlays with the colors of the scatter plot points ('steelblue2' and 'pink3') while still retaining the legend?
I've found if I move color outside of the aes() argument in geom_line(), the colors of the lines and scatterplot points match, but then the legend disappears.
My code & a sample from my data:
mean_behav_by_numweeks = data.frame(
numweeks_round = c(1:10),
Mean_Behavior_Score_Male = c(3.32,4.18,3.82,4.06,3.33, 3.80,3.64,3.66,3.37,3.82),
nrow_male = c(396,323,293,259,226,217,202,190,170,167),
lm_results_predict_male = c(3.82,3.80,3.78,3.76,3.74, 3.72,3.70,3.68,3.66,3.64),
Mean_Behavior_Score_Female = c(2.91,3.79,3.65,3.41, 2.88,2.88,3.78,2.98,3.67,3.93),
nrow_female = c(109,82,72,74,66,60,58,56,52,50),
lm_results_predict_female=c(3.44,3.44,3.45,3.45, 3.46,3.47,3.47,3.48,3.48,3.49))
gg_plot1 <- ggplot(mean_behav_by_numweeks,
aes(numweeks_round,
Mean_Behavior_Score_Male,
size = mean_behav_by_numweeks$nrow_male)) +
geom_point(colour='steelblue2') +
ggtitle(paste("Scatter plot of mean behavior assessment score by member by # weeks \n since 1st assessment for",
as.character(var),
"among Male Medi-Cal plan members")) +
theme(plot.title = element_text(size=10.9, hjust = 0.5)) +
theme(axis.text = element_text(size=8)) +
scale_size_continuous(range = c(1, 7)) +
xlab("Number of weeks since 1st assessment") +
ylab("Mean behavior assessment score") +
theme(legend.position="bottom") +
labs(size="# members") +
geom_line(data=mean_behav_by_numweeks,
aes(numweeks_round, lm_results_predict_male, color='steelblue2'),
size=1) +
geom_line(data=mean_behav_by_numweeks,
aes(numweeks_round, lm_results_predict_female, color='pink3'),
size=1) +
scale_color_discrete(name = "GenderCode", labels = c("Female", "Male")) +
theme(legend.position="bottom") +
guides(color = guide_legend(order=1, direction="vertical"))
gg_plot1
gg_plot2 <- ggplot(mean_behav_by_numweeks,
aes(numweeks_round,
Mean_Behavior_Score_Female,
size = mean_behav_by_numweeks$nrow_female)) +
geom_point(colour='pink3') +
ggtitle(paste("Scatter plot of mean behavior assessment score by member by # weeks \n since 1st assessment for",
as.character(var),
"among Female Medi-Cal plan members")) +
theme(plot.title = element_text(size=10.9, hjust = 0.5)) +
theme(axis.text = element_text(size=8)) +
scale_size_continuous(range = c(1, 7)) +
xlab("Number of weeks since 1st assessment") +
ylab("Mean behavior assessment score") +
theme(legend.position="bottom") +
labs(size="# members") +
geom_line(data=mean_behav_by_numweeks,
aes(numweeks_round, lm_results_predict_male, color='steelblue2'),
size=1) +
geom_line(data=mean_behav_by_numweeks,
aes(numweeks_round, lm_results_predict_female, color='pink3'), size=1) +
scale_color_discrete(name = "GenderCode", labels = c("Female", "Male")) +
theme(legend.position="bottom") +
guides(color = guide_legend(order=1, direction="vertical"))
windows()
gg_plot2
You will want to reshape your data into long format, although you don't have to use melt or gather if you don't want to -- you can stack your data manually, like
library(dplyr)
library(ggplot2)
new_df <- bind_rows(
male = select(mean_behav_by_numweeks,
numweeks_round,
Mean_Behavior_Score = Mean_Behavior_Score_Male,
nrow = nrow_male,
lm_results_predict = lm_results_predict_male),
female = select(mean_behav_by_numweeks,
numweeks_round,
Mean_Behavior_Score = Mean_Behavior_Score_Female,
nrow = nrow_female,
lm_results_predict = lm_results_predict_female),
.id = "gender"
)
Then you can just do
ggplot(new_df, aes(numweeks_round, Mean_Behavior_Score, size = nrow, colour = gender)) +
geom_point() +
theme(plot.title = element_text(size=10.9, hjust = 0.5),
axis.text = element_text(size=8),
legend.position="bottom") +
scale_size_continuous(range = c(1, 7)) +
labs(x = "Number of weeks since 1st assessment",
y = "Mean behavior assessment score",
size="# members") +
geom_line(aes(y = lm_results_predict), size = 1) +
scale_color_manual(name = "GenderCode", labels = c("Female", "Male"), values = c("pink3", "steelblue2")) +
guides(color = guide_legend(order=1, direction="vertical")) +
facet_wrap("gender")
which gives you
One can use gather/separate to first convert data in long format and then plot.
# A simple capitalization function to convert first letter in Caps
# This function is used to convert male/female to Male/Female
.simpleCap <- function(x) {
s <- strsplit(x, " ")[[1]]
paste(toupper(substring(s, 1, 1)), substring(s, 2),
sep = "", collapse = " ")
}
library(tidyverse)
df <- mean_behav_by_numweeks %>%
gather(key, value, - numweeks_round) %>%
separate(key, c("key", "GenderCode"), sep = "_(?=[^_]*?$)") %>% #separates on last _
mutate(GenderCode = mapply(.simpleCap,GenderCode)) %>%
spread(key, value)
Plot the graph:
ggplot(df, aes(numweeks_round, Mean_Behavior_Score, size = nrow, color = GenderCode )) +
geom_point() +
geom_line(aes(y = lm_results_predict, color = GenderCode), size = 1) +
theme(plot.title = element_text(size=10.9, hjust = 0.5),
axis.text = element_text(size=8),
legend.position="bottom") +
labs(x = "Number of weeks since 1st assessment",
y = "Mean behavior assessment score",
size="# members") +
guides(color = guide_legend(order=1, direction="vertical"))
Data:
mean_behav_by_numweeks = data.frame(
numweeks_round = c(1:10),
Mean_Behavior_Score_Male = c(3.32,4.18,3.82,4.06,3.33, 3.80,3.64,3.66,3.37,3.82),
nrow_male = c(396,323,293,259,226,217,202,190,170,167),
lm_results_predict_male = c(3.82,3.80,3.78,3.76,3.74, 3.72,3.70,3.68,3.66,3.64),
Mean_Behavior_Score_Female = c(2.91,3.79,3.65,3.41, 2.88,2.88,3.78,2.98,3.67,3.93),
nrow_female = c(109,82,72,74,66,60,58,56,52,50),
lm_results_predict_female=c(3.44,3.44,3.45,3.45, 3.46,3.47,3.47,3.48,3.48,3.49))
I'm working on some data on party polarization (something like this) and used geom_dumbbell from ggalt and ggplot2. I keep getting the same aes error and other solutions in the forum did not address this as effectively. This is my sample data.
df <- data_frame(policy=c("Not enough restrictions on gun ownership", "Climate change is an immediate threat", "Abortion should be illegal"),
Democrats=c(0.54, 0.82, 0.30),
Republicans=c(0.23, 0.38, 0.40),
diff=sprintf("+%d", as.integer((Democrats-Republicans)*100)))
I wanted to keep order of the plot, so converted policy to factor and wanted % to be shown only on the first line.
df <- arrange(df, desc(diff))
df$policy <- factor(df$policy, levels=rev(df$policy))
percent_first <- function(x) {
x <- sprintf("%d%%", round(x*100))
x[2:length(x)] <- sub("%$", "", x[2:length(x)])
x
}
Then I used ggplot that rendered something close to what I wanted.
gg2 <- ggplot()
gg2 <- gg + geom_segment(data = df, aes(y=country, yend=country, x=0, xend=1), color = "#b2b2b2", size = 0.15)
# making the dumbbell
gg2 <- gg + geom_dumbbell(data=df, aes(y=country, x=Democrats, xend=Republicans),
size=1.5, color = "#B2B2B2", point.size.l=3, point.size.r=3,
point.color.l = "#9FB059", point.color.r = "#EDAE52")
I then wanted the dumbbell to read Democrat and Republican on top to label the two points (like this). This is where I get the error.
gg2 <- gg + geom_text(data=filter(df, country=="Government will not control gun violence"),
aes(x=Democrats, y=country, label="Democrats"),
color="#9fb059", size=3, vjust=-2, fontface="bold", family="Calibri")
gg2 <- gg + geom_text(data=filter(df, country=="Government will not control gun violence"),
aes(x=Republicans, y=country, label="Republicans"),
color="#edae52", size=3, vjust=-2, fontface="bold", family="Calibri")
Any thoughts on what I might be doing wrong?
I think it would be easier to build your own "dumbbells" with geom_segment() and geom_point(). Working with your df and changing the variable refences "country" to "policy":
library(tidyverse)
# gather data into long form to make ggplot happy
df2 <- gather(df,"party", "value", Democrats:Republicans)
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
# our dumbell
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
# the text labels
geom_text(aes(label = party), vjust = -1.5) + # use vjust to shift text up to no overlap
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red")) + # named vector to map colors to values in df2
scale_x_continuous(limits = c(0,1), labels = scales::percent) # use library(scales) nice math instead of pasting
Produces this plot:
Which has some overlapping labels. I think you could avoid that if you use just the first letter of party like this:
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
geom_text(aes(label = gsub("^(\\D).*", "\\1", party)), vjust = -1.5) + # just the first letter instead
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red"),
guide = "none") +
scale_x_continuous(limits = c(0,1), labels = scales::percent)
Only label the top issue with names:
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
geom_text(data = filter(df2, policy == "Not enough restrictions on gun ownership"),
aes(label = party), vjust = -1.5) +
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red")) +
scale_x_continuous(limits = c(0,1), labels = scales::percent)
I'm struggling to learn the ins and outs of R, ggplot2, etc - being more used to being taught in an A to Z manner an entire (fixed) coding language (not used to open source - I learned to code when dinosaurs roamed the earth). So I have kluged together the following code to create one graph. Only ... I don't have the dupe legends problem -- I have no legend a'tall!
erc <- ggplot(usedcarval, aes(x = usedcarval$age)) +
geom_line(aes(y = usedcarval$dealer), colour = "orange", size = .5) +
geom_point(aes(y = usedcarval$dealer),
show.legend = TRUE, colour = "orange", size = 1) +
geom_line(aes(y = usedcarval$pvtsell), colour = "green", size = .5) +
geom_point(aes(y = usedcarval$pvtsell), colour = "green", size = 1) +
geom_line(aes(y = usedcarval$tradein), colour = "blue", size = .5) +
geom_point(aes(y = usedcarval$tradein), colour = "blue", size = 1) +
geom_line(aes(y = as.integer(predvalt)), colour = "gray", size = 1) +
geom_line(aes(y = as.integer(predvalp)), colour = "gray", size = 1) +
geom_line(aes(y = as.integer(predvald)), colour = "gray", size = 1) +
labs(x = "Value of a Used Car as it Ages (Years)", y = "Dollars") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle = 60, vjust = .6))
erc
I can't figure out how to put an image in this text since I have no link except to my dropbox...
I would appreciate any help. Sincerely, Stephanie
Ok, I felt like doing some ggplot, and it was an interesting task to contrast the way ggplot-beginners (I was one not so long ago) approach it compared to the way you need to do it to get things like legends.
Here is the code:
library(ggplot2)
library(gridExtra)
library(tidyr)
# fake up some data
n <- 100
dealer <- 12000 + rnorm(n,0,100)
age <- 10 + rnorm(n,3)
pvtsell <- 10000 + rnorm(n,0,300)
tradein <- 5000 + rnorm(n,0,100)
predvalt <- 6000 + rnorm(n,0,120)
predvalp <- 7000 + rnorm(n,0,100)
predvald <- 8000 + rnorm(n,0,100)
usedcarval <- data.frame(dealer=dealer,age=age,pvtsell=pvtsell,tradein=tradein,
predvalt=predvalt,predvalp=predvalp,predvald=predvald)
# The ggplot-naive way
erc <- ggplot(usedcarval, aes(x = usedcarval$age)) +
geom_line(aes(y = usedcarval$dealer), colour = "orange", size = .5) +
geom_point(aes(y = usedcarval$dealer),
show.legend = TRUE, colour = "orange", size = 1) +
geom_line(aes(y = usedcarval$pvtsell), colour = "green", size = .5) +
geom_point(aes(y = usedcarval$pvtsell), colour = "green", size = 1) +
geom_line(aes(y = usedcarval$tradein), colour = "blue", size = .5) +
geom_point(aes(y = usedcarval$tradein), colour = "blue", size = 1) +
geom_line(aes(y = as.integer(predvalt)), colour = "gray", size = 1) +
geom_line(aes(y = as.integer(predvalp)), colour = "gray", size = 1) +
geom_line(aes(y = as.integer(predvald)), colour = "gray", size = 1) +
labs(x = "ggplot naive way - Value of a Used Car as it Ages (Years)", y = "Dollars") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle = 60, vjust = .6))
# The tidyverse way
# ggplot needs long data, not wide data.
# Also we have two different sets of data for points and lines
gdf <- usedcarval %>% gather(series,value,-age)
pdf <- gdf %>% filter( series %in% c("dealer","pvtsell","tradein"))
# our color and size lookup tables
clrs = c("dealer"="orange","pvtsell"="green","tradein"="blue","predvalt"="gray","predvalp"="gray","predvald"="gray")
szes = c("dealer"=0.5,"pvtsell"=0.0,"tradein"=0.5,"predvalt"=1,"predvalp"=1,"predvald"=1)
trc <- ggplot(gdf,aes(x=age)) + geom_line(aes(y=value,color=series,size=series)) +
scale_color_manual(values=clrs) +
scale_size_manual(values=szes) +
geom_point(data=pdf,aes(x=age,y=value,color=series),size=1) +
labs(x = "tidyverse way - Value of a Used Car as it Ages (Years)", y = "Dollars") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle = 60, vjust = .6))
grid.arrange(erc, trc, ncol=1)
Study it, espeically look at gdf,pdf and gather. You just can't get legends without using "long data".
If you want more information on the "tidyverse", start here: Hadley Wickham's tidyverse
If you are looking for a short example of how to take some series data that comes in wide format, convert it to long format (using gather), and then plot it with a ggplot (with a legend), here is a nice short example I cooked up for someone recently:
library(ggplot2)
library(tidyr)
# womp up some fake news (uhh... data)
x <- seq(-pi,pi,by=0.25)
y <- sin(x)
yhat <- sin(x) + 0.4*rnorm(length(x))
# This is the data in wide form
# you will never get ggplot to make a legend for it
# it simply hates wide data
df1 <- data.frame(x=x,y=y,yhat=yhat)
# So we use gather from tidyr to make it into long data
# creates two new colums, throws y and yhat in them, and replicates x as needed
# you have to look at the data frame to understand gather,
# and read the docs a few times
df2 <- gather(df1,series,value,-x)
# it is now in long form and we can plot it
ggplot(df2) + geom_line(aes(x,value,color=series))
So here is the plot: