Add in legend to ggplot - r

I know this question is similar to ones that has been asked before but the suggested solutions don't seem to apply.
I set up the problem as follows
mat1 <- NULL
mat2 <- NULL
mat1 <- data.frame(matrix(nrow =16, ncol =2, data = rnorm(32, 0, 1)))
mat2 <- data.frame(matrix(nrow =16, ncol =2, data = rnorm(32, 0, 1)))
mat1[,1] = mat2[,1] = 1:16
colnames(mat1) = c("Window", "CM")
colnames(mat2) = c("Window", "FM")
ggplot() +
geom_line(data = mat1, aes(x = mat1$Window, y= mat1$CM), linetype ="twodash", color ="steelblue") +
geom_line(data = mat2, aes(x = mat2$Window, y= mat2$FM), color = "black") +
theme_classic() + xlab("Quater after alpha assessment") + ylab("Estimated Coefficient") + labs(fill = "cohort model")
I want to add in a legend. Specifically i want the blue line to be labelled as CM and the black line to be labelled as FM

In these kind of scenarios I think it is often the easiest to bring your data into the appropriate format for ggplot. Then you can properly use all of the ggplot toolset.
library(tidyverse)
mat3 = bind_cols(mat1, mat2) %>%
select(-Window1) %>%
gather(type, value, -Window)
mat3 %>%
ggplot(aes(x = Window, y = value, group = type, color = type, linetype = type)) +
geom_line() +
scale_color_manual("cohort model",
values = c("CM" = "steelblue","FM" = "black"),
breaks = c("CM", "FM")) +
scale_linetype_manual("cohort model",
values = c("twodash", "solid"),
breaks = c("CM", "FM")) +
labs(x = "Quater after alpha assessment", y = "Estimated Coefficient") +
theme_classic()

I assume the simplest way to do this would be to use annote():
ggplot() +
geom_line(data = mat1, aes(x = mat1$Window, y= mat1$CM), linetype ="twodash", color ="steelblue") +
geom_line(data = mat2, aes(x = mat2$Window, y= mat2$FM), color = "black") +
theme_classic() + xlab("Quater after alpha assessment") + ylab("Estimated Coefficient") + labs(fill = "cohort model") +
xlim(NA,18) +
annotate(geom="text", x=16.5, y=1.51232841, label="CM", color="blue", size=3) +
annotate(geom="text", x=16.5, y=-0.487350382, label="FM", color="black", size=3)
You can easily change and adjust the position with x= and y=. I also slightly extended the upper limit of x-scale so that the text fits in.
Of course, I don't know if that's enough for you. Otherwise, you could also add a text field as legend. But this would be the easiest and fastest way.

Related

adding a label in geom_line in R

I have two very similar plots, which have two y-axis - a bar plot and a line plot:
code:
sec_plot <- ggplot(data, aes_string (x = year, group = 1)) +
geom_col(aes_string(y = frequency), fill = "orange", alpha = 0.5) +
geom_line(aes(y = severity))
However, there are no labels. I want to get a label for the barplot as well as a label for the line plot, something like:
How can I add the labels to the plot, if there is only pone single group? is there a way to specify this manually? Until know I have only found option where the labels can be added by specifying them in the aes
EXTENSION (added a posterior):
getSecPlot <- function(data, xvar, yvar, yvarsec, groupvar){
if ("agegroup" %in% xvar) xvar <- get("agegroup")
# data <- data[, startYear:= as.numeric(startYear)]
data <- data[!claims == 0][, ':=' (scaled = get(yvarsec) * max(get(yvar))/max(get(yvarsec)),
param = max(get(yvar))/max(get(yvarsec)))]
param <- data[1, param] # important, otherwise not found in ggplot
sec_plot <- ggplot(data, aes_string (x = xvar, group = groupvar)) +
geom_col(aes_string(y = yvar, fill = groupvar, alpha = 0.5), position = "dodge") +
geom_line(aes(y = scaled, color = gender)) +
scale_y_continuous(sec.axis = sec_axis(~./(param), name = paste0("average ", yvarsec),labels = function(x) format(x, big.mark = " ", scientific = FALSE))) +
labs(y = paste0("total ", yvar)) +
scale_alpha(guide = 'none') +
theme_pubclean() +
theme(legend.title=element_blank(), legend.background = element_rect(fill = "white"))
}
plot.ExposureYearly <- getSecPlot(freqSevDataAge, xvar = "agegroup", yvar = "exposure", yvarsec = "frequency", groupvar = "gender")
plot.ExposureYearly
How can the same be done on a plot where both the line plot as well as the bar plot are separated by gender?
Here is a possible solution. The method I used was to move the color and fill inside the aes and then use scale_*_identity to create and format the legends.
Also, I needed to add a scaling factor for severity axis since ggplot does not handle the secondary axis well.
data<-data.frame(year= 2000:2005, frequency=3:8, severity=as.integer(runif(6, 4000, 8000)))
library(ggplot2)
library(scales)
sec_plot <- ggplot(data, aes(x = year)) +
geom_col(aes(y = frequency, fill = "orange"), alpha = 0.6) +
geom_line(aes(y = severity/1000, color = "black")) +
scale_fill_identity(guide = "legend", label="Claim frequency (Number of paid claims per 100 Insured exposure)", name=NULL) +
scale_color_identity(guide = "legend", label="Claim Severity (Average insurance payment per claim)", name=NULL) +
theme(legend.position = "bottom") +
scale_y_continuous(sec.axis =sec_axis( ~ . *1, labels = label_dollar(scale=1000), name="Severity") ) + #formats the 2nd axis
guides(fill = guide_legend(order = 1), color = guide_legend(order = 2)) #control which scale plots first
sec_plot

How to draw graph with dodged position?

I am an R novice. I will try to be as brief and simple as possible. Currently, I am trying to connect points between two conditions based on another condition all over a single discrete x-axis.
Below is some test data and my attempt to plot some data.
set.seed(42)
# Test case data
mydf1 <- tibble(
xx = rep('myLabel', 8),
yy = rnorm(8),
grp = rep(c(1, 2), each = 4),
cond = rep(c('a', 'b', 'c', 'd'), length.out = 8)
)
ggplot(mydf1, aes(x = xx, y = yy, col = factor(grp))) +
geom_point(position = position_dodge(width = 0.9)) +
geom_path(position = position_dodge(width = 0.9), aes(group = cond), col = "black") +
theme_bw() +
ggtitle("Test Case for geom_path and position_dodge")
From what I can tell, it seems that position_dodge is applied after the draw. Is there a way to change this behavior? or to achieve the overall goal of connecting these points in this type of way?
Thank you for your time.
EDIT: details.
EDIT2:
I would like to capture a before and after relationship between grp based on 4 conditions in one big main conditions.
Probably you want this.
set.seed(42)
library(ggplot2)
ggplot(mydf1, aes(x = grp, y = yy, col = factor(grp))) +
geom_point() +
geom_path(aes(group = cond), col = "black") +
theme_bw() +
ggtitle("Test Case for geom_path and position_dodge") +
xlim(c(.5, 2.5)) +
labs(color = "Group", x = "myLabel", y = "yy") +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank())
You could plot a categorical x axis.
ggplot(mydf1, aes(x = cond, y = yy, col = factor(grp))) +
geom_point() +
geom_path(aes(group = cond), col = "black") +
theme_bw() +
ggtitle("Test Case for categorical X-axis")
Alternatively, if you need comparison across multiple categorical dimensions mapped to the x axis, you can try facets.
ggplot(mydf1, aes(x = cond, y = yy, col = factor(grp))) +
geom_point() +
geom_path(aes(group = cond), col = "black") +
theme_bw() +
ggtitle("Test Case for Categorical X-axis and Facets") +
facet_wrap(~cond)

Aesthetics must be either length 1 or the same as the data (1): x, y, label

I'm working on some data on party polarization (something like this) and used geom_dumbbell from ggalt and ggplot2. I keep getting the same aes error and other solutions in the forum did not address this as effectively. This is my sample data.
df <- data_frame(policy=c("Not enough restrictions on gun ownership", "Climate change is an immediate threat", "Abortion should be illegal"),
Democrats=c(0.54, 0.82, 0.30),
Republicans=c(0.23, 0.38, 0.40),
diff=sprintf("+%d", as.integer((Democrats-Republicans)*100)))
I wanted to keep order of the plot, so converted policy to factor and wanted % to be shown only on the first line.
df <- arrange(df, desc(diff))
df$policy <- factor(df$policy, levels=rev(df$policy))
percent_first <- function(x) {
x <- sprintf("%d%%", round(x*100))
x[2:length(x)] <- sub("%$", "", x[2:length(x)])
x
}
Then I used ggplot that rendered something close to what I wanted.
gg2 <- ggplot()
gg2 <- gg + geom_segment(data = df, aes(y=country, yend=country, x=0, xend=1), color = "#b2b2b2", size = 0.15)
# making the dumbbell
gg2 <- gg + geom_dumbbell(data=df, aes(y=country, x=Democrats, xend=Republicans),
size=1.5, color = "#B2B2B2", point.size.l=3, point.size.r=3,
point.color.l = "#9FB059", point.color.r = "#EDAE52")
I then wanted the dumbbell to read Democrat and Republican on top to label the two points (like this). This is where I get the error.
gg2 <- gg + geom_text(data=filter(df, country=="Government will not control gun violence"),
aes(x=Democrats, y=country, label="Democrats"),
color="#9fb059", size=3, vjust=-2, fontface="bold", family="Calibri")
gg2 <- gg + geom_text(data=filter(df, country=="Government will not control gun violence"),
aes(x=Republicans, y=country, label="Republicans"),
color="#edae52", size=3, vjust=-2, fontface="bold", family="Calibri")
Any thoughts on what I might be doing wrong?
I think it would be easier to build your own "dumbbells" with geom_segment() and geom_point(). Working with your df and changing the variable refences "country" to "policy":
library(tidyverse)
# gather data into long form to make ggplot happy
df2 <- gather(df,"party", "value", Democrats:Republicans)
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
# our dumbell
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
# the text labels
geom_text(aes(label = party), vjust = -1.5) + # use vjust to shift text up to no overlap
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red")) + # named vector to map colors to values in df2
scale_x_continuous(limits = c(0,1), labels = scales::percent) # use library(scales) nice math instead of pasting
Produces this plot:
Which has some overlapping labels. I think you could avoid that if you use just the first letter of party like this:
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
geom_text(aes(label = gsub("^(\\D).*", "\\1", party)), vjust = -1.5) + # just the first letter instead
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red"),
guide = "none") +
scale_x_continuous(limits = c(0,1), labels = scales::percent)
Only label the top issue with names:
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
geom_text(data = filter(df2, policy == "Not enough restrictions on gun ownership"),
aes(label = party), vjust = -1.5) +
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red")) +
scale_x_continuous(limits = c(0,1), labels = scales::percent)

Add item to legend by theme options

I have a data frame d like this:
d <- data.frame("name" = c("pippo","pluto","paperino"),
"id" = c(1,2,3),"count" = c(10,20,30),
"pvalue"=c(0.01,0.02,0.05),
geneRatio=c(0.5,0.8,0.2),
type=c("KEGG","Reactome","Reactome"))
and I plot a dotplot using the library ggplot:
ggplot(data = d,aes(geneRatio,name,size=count,colour = pvalue)) +
geom_point()+
ggtitle("Significantly Pathways") +
xlab("Gene Ratio") +
ylab("Pathways")+
theme(axis.text.y = element_text(color=d$type))
This is the plot at the moment
I would like to add to legend the information of "type" contained in dataframe d.
I would like to have a new item in the legend with color red = Reactome and color black= KEGG
Not saying that this is a good idea, but you can add a nonsensical geom to force the adding of a guide:
d <- data.frame("name" = c("pippo","pluto","paperino"),
"id" = c(1,2,3),
"count" = c(10,20,30),
"value"=c(0.01,0.02,0.05),
geneRatio=c(0.5,0.8,0.2),
type=c("KEGG","Reactome","Reactome")
)
library(ggplot2)
ggplot(data = d, aes(geneRatio,name,colour = pvalue)) +
geom_point(aes(size=count))+
geom_polygon(aes(geneRatio,name,fill = type)) +
ggtitle("Significantly Pathways") +
xlab("Gene Ratio") +
ylab("Pathways") +
scale_fill_manual(values = c('Reactome'='red', 'KEGG'='black')) +
theme(axis.text.y = element_text(color=d$type))
geom_polygon may not work with your actual data, and you may not find a suitable 'nonsensical' geom. I agree with #zx8754, a facet would be clearer:
ggplot(data = d, aes(geneRatio,name,colour = pvalue)) +
geom_point(aes(size=count)) +
ggtitle("Significantly Pathways") +
xlab("Gene Ratio") +
ylab("Pathways") +
facet_grid(type ~ ., scales = 'free_y', switch = 'y')
You could accomplish this using annotate, but it is a bit manual.
ggplot(data = d, aes(geneRatio, name, size = count, colour = pvalue)) +
geom_point() +
ggtitle("Significantly Pathways") +
xlab("Gene Ratio") +
ylab("Pathways")+
theme(axis.text.y = element_text(color=d$type)) +
annotate("text", x = 0.25, y = 3.5, label = "Reactome", color = "red") +
annotate("text", x = 0.25, y = 3.4, label = "KEGG", color = "black")

Colour average lines in ggplot

I would like to colour the dashed lines, which are the average values of the two respective categories, with the same colour of the default palette used by ggplot to fill the distributions:
Click here to view the distribution
This is the code used:
library(ggplot2)
print(ggplot(dati, aes(x=ECU_fuel_consumption_L_100Km_CF, fill=Model))
+ ggtitle("Fuel Consumption density histogram, by Model")
+ ylab("Density")
+ geom_density(alpha=.3)
+ scale_x_continuous(breaks=pretty(dati$ECU_fuel_consumption_L_100Km_CF, n=10))
+ geom_vline(aes(xintercept = mean(ECU_fuel_consumption_L_100Km_CF[dati$Model == "500X"])), linetype="dashed", size=1)
+ geom_vline(aes(xintercept = mean(ECU_fuel_consumption_L_100Km_CF[dati$Model == "Renegade"])), linetype="dashed", size=1)
)
Thank you all in advance!
No reproducible example, but you probably want to do something like this:
library(dplyr)
# make up some data
d <- data.frame(x = c(mtcars$mpg, mtcars$hp),
var = rep(c('mpg', 'hp'), each = nrow(mtcars)))
means <- d %>% group_by(var) %>% summarize(m = mean(x))
ggplot(d, aes(x, fill = var)) +
geom_density(alpha = 0.3) +
geom_vline(data = means, aes(xintercept = m, col = var),
linetype = "dashed", size = 1)
This approach is extendable to any number of groups.
An option that doesn't require pre-calculation, but is also a bit more hacky, is:
ggplot(d, aes(x, fill = var)) +
geom_density(alpha = 0.3) +
geom_vline(aes(col = 'hp', xintercept = x), linetype = "dashed", size = 1,
data = data.frame(x = mean(d$x[d$var == 'hp']))) +
geom_vline(aes(col = 'mpg', xintercept = x), linetype = "dashed", size = 1,
data = data.frame(x = mean(d$x[d$var == 'mpg'])))

Resources