ggplot2 legend with two different geom_point - r

I have the following ggplot graph with circles representing the observed data and the crosses the mean for each treatment :
d <- data.frame(Number = rnorm(12,100,20),
Treatment = rep(c("A","B","C", "D"), each = 3))
av <- aggregate(d["Number"], d["Treatment"], mean)
ggplot(data = d, aes(y = Number, x = Treatment)) +
geom_point(shape = 1, size = 6, color = "grey50") +
geom_point(data=av, shape = 4) +
theme_bw()
I would like to add a legend with the exact same symbols on top of the graphs but I'm a bit lost... I use aes to force the creation of legend and then try to modify it with manual scales but the result is not convincing. I would like to have one grey circle of size 6. That sounds also quite complicated for such a basic thing ... There is probably an easyier solution.
ggplot(data = d, aes(y = Number, x = Treatment)) +
geom_point(aes(shape = "1", size = "6", color = "grey50")) +
geom_point(data=av, aes(shape = "4")) +
theme_bw() +
scale_shape_manual(name = "", values = c(1,4), labels = c("observed values", "mean")) +
scale_size_manual(name = "", values = c(6,1), labels = c("observed values", "mean")) +
scale_color_manual(name = "", values = c("grey50","black"),
labels = c("observed values", "mean")) +
theme(legend.position = "top",
legend.key = element_rect(color = NA))
http://imagizer.imageshack.us/v2/320x240q90/842/4pgj.png

The ggplot2 way would be combining everything into a single data.frame like this:
av$Aggregated <- "mean"
d$Aggregated <- "observed value"
d <- rbind(d, av)
ggplot(data = d, aes(y = Number, x = Treatment,
shape=Aggregated, size=Aggregated, colour=Aggregated)) +
geom_point()
And than customize using manual scales and themes.

Related

How to make log10 ONLY first y-axis (not secondary y-axis) in ggplot in R

I would like to plot ONLY y-axis1 DATA (left axis, Var1, dotted line) as a log10 scale. The dotted line would therefore look higher on the y-axis and differences between 1 and 2 would be noticeable.
I have tried several things, but does not work ( I believe I am using them in the wrong order/place) such as:
+coord_trans(y='log10')--> empty plot
scale_y_continuous(trans = log10_trans(),... --> makes both Var1 and Var 2 log10
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),labels = trans_format("log10", math_format(10^.x)))--> makes both y axis log10 and removes y-axis2 (Var2)
data<- data.frame(
Day=c(1,2,3,1,2,3,1,2,3),
Name=rep(c(rep("a",3),rep("b",3),rep("c",3))),
Var1=c(1090,484,64010,1090,484,64010,1090,484,64010),
Var2= c(4,16,39,2,22,39,41,10,3))
ggplot(data) +
geom_bar(aes(fill=Name, y=Var2*1000, x=Day),stat="identity", colour="black", position= position_stack(reverse = TRUE))+
geom_line(aes(x=Day, y=Var1),stat="identity",color="black", linetype="dotted", size=0.8)+
geom_point(aes(Day, Var1), shape=8)+
labs(title= "",
x="",y=expression('Var1'))+
scale_y_continuous(
sec.axis=sec_axis(~./1000, name= expression(paste("Var2"))))+
theme_classic()+
scale_fill_grey(start = 1, end=0.1,name = "", labels = c("a", "b", "c"))
I think the easiest way is to have the primary axis be the linear one, but put it on the right side of the plot. Then, you can have the secondary one be your log-transformed axis.
library(ggplot2)
data<- data.frame(
Day=c(1,2,3,1,2,3,1,2,3),
Name=rep(c(rep("a",3),rep("b",3),rep("c",3))),
Var1=c(1090,484,64010,1090,484,64010,1090,484,64010),
Var2= c(4,16,39,2,22,39,41,10,3))
# Max of secondary divided by max of primary
upper <- log10(3e6) / 80
breakfun <- function(x) {
10^scales::extended_breaks()(log10(x))
}
ggplot(data) +
geom_bar(aes(fill=Name, y=Var2, x=Day),
stat="identity", colour="black", position= position_stack(reverse = TRUE))+
geom_line(aes(x=Day, y=log10(Var1) / upper),
stat="identity",color="black", linetype="dotted", size=0.8)+
geom_point(aes(Day, log10(Var1) / upper), shape=8)+
labs(title= "",
x="",y=expression('Var1'))+
scale_y_continuous(
position = "right",
name = "Var2",
sec.axis = sec_axis(~10^ (. * upper), name= expression(paste("Var1")),
breaks = breakfun)
)+
theme_classic() +
scale_fill_grey(start = 1, end=0.1,name = "", labels = c("a", "b", "c"))
Created on 2022-02-09 by the reprex package (v2.0.1)
Here is a custom breaks function:
br <- function(limits) {
10^(seq(ifelse(limits[1] <= 0,
0,
trunc(log10(limits[1]))),
trunc(log10(limits[2])),
by = 1))}
ggplot(data) +
geom_bar(aes(fill = Name, y = Var2 * 1000, x = Day),
stat = "identity",
colour = "black",
position = position_stack(reverse = TRUE))+
geom_line(aes(x=Day, y=Var1),
stat = "identity",
color = "black",
linetype = "dotted",
size = 0.8)+
geom_point(aes(Day, Var1),
shape = 8)+
labs(title = "",
x = "",
y = expression('Var1'))+
scale_y_continuous(
breaks = br,
sec.axis = sec_axis(~./1000, name= expression(paste("Var2"))))+
theme_classic()+
scale_fill_grey(start = 1,
end = 0.1,
name = "",
labels = c("a", "b", "c"))
Results aren't so pretty but you can customize the breaks as you wish.
You absolutely should read the answer #teunbrand linked to in the comment to your question. But for the matter of displaying log values on the left and original values on the right, you can use:
tibble(Day = 1:10,
Val1 =10*Day) %>%
ggplot(aes(x = Day, y = log10(Val1))) +
geom_col() +
scale_y_log10(name = "log(Val1)",
sec.axis = sec_axis(~ 10^., name = "Val1"))

Jitter categorical variable and keep variable name in graph

I had to jitter points along a catagorical axis to avoid data overlay. Unfortunately, to do this, I needed to make my categorical variable a factor and then numerical. When I plot it, it remains numerical without the categorical labels. Is there a way I can get the labels to show up?
Here is the code:
levels(factor(All_VARs$Dataset))
[1] "Data1" "Data2" "Data3"
df$Dataset_jit <- jitter(as.numeric(factor(df$Dataset)))
ggplot(df, aes(x = POS_start, y = Dataset_jit, color = Type)) +
geom_point() +
scale_color_manual(values = annotation_color_associations) +
theme_classic()
I would like the y axis to be categorical, while maintaining the jitter.
You can use position = position_jitter():
ggplot(df, aes(x = POS_start, y = as.factor(Set), color = as.factor(Type))) +
geom_point(position = position_jitter(height = 0.2), show.legend = FALSE) +
theme_classic() +
scale_color_manual(values = colorRampPalette(c("pink", "purple"))(5)) +
labs(x = "CDS Position", y = "Dataset")
Edit:
OP says they need to be able to do other things, so another approach is to manually control the y-axis with scale_y_continuous:
df$Dataset_jit <- jitter(as.numeric(factor(df$Set)))
ggplot(df, aes(x = POS_start, y = Dataset_jit, color = as.factor(Type))) +
geom_point(show.legend = FALSE) +
theme_classic() +
scale_color_manual(values = colorRampPalette(c("pink", "purple"))(5)) +
scale_y_continuous(breaks = 1:3, labels = c("Data 1", "Data 2", "Data 3")) +
labs(x = "CDS Position", y = "Dataset")
Sample Data
set.seed(3)
df <- data.frame(POS_start = round(runif(n = 100,1,1500),0),
Set = sample(1:3,100, prob = c(0.45,0.1,0.45), replace = TRUE),
Type = sample(1:5,100,replace = TRUE))

adding a label in geom_line in R

I have two very similar plots, which have two y-axis - a bar plot and a line plot:
code:
sec_plot <- ggplot(data, aes_string (x = year, group = 1)) +
geom_col(aes_string(y = frequency), fill = "orange", alpha = 0.5) +
geom_line(aes(y = severity))
However, there are no labels. I want to get a label for the barplot as well as a label for the line plot, something like:
How can I add the labels to the plot, if there is only pone single group? is there a way to specify this manually? Until know I have only found option where the labels can be added by specifying them in the aes
EXTENSION (added a posterior):
getSecPlot <- function(data, xvar, yvar, yvarsec, groupvar){
if ("agegroup" %in% xvar) xvar <- get("agegroup")
# data <- data[, startYear:= as.numeric(startYear)]
data <- data[!claims == 0][, ':=' (scaled = get(yvarsec) * max(get(yvar))/max(get(yvarsec)),
param = max(get(yvar))/max(get(yvarsec)))]
param <- data[1, param] # important, otherwise not found in ggplot
sec_plot <- ggplot(data, aes_string (x = xvar, group = groupvar)) +
geom_col(aes_string(y = yvar, fill = groupvar, alpha = 0.5), position = "dodge") +
geom_line(aes(y = scaled, color = gender)) +
scale_y_continuous(sec.axis = sec_axis(~./(param), name = paste0("average ", yvarsec),labels = function(x) format(x, big.mark = " ", scientific = FALSE))) +
labs(y = paste0("total ", yvar)) +
scale_alpha(guide = 'none') +
theme_pubclean() +
theme(legend.title=element_blank(), legend.background = element_rect(fill = "white"))
}
plot.ExposureYearly <- getSecPlot(freqSevDataAge, xvar = "agegroup", yvar = "exposure", yvarsec = "frequency", groupvar = "gender")
plot.ExposureYearly
How can the same be done on a plot where both the line plot as well as the bar plot are separated by gender?
Here is a possible solution. The method I used was to move the color and fill inside the aes and then use scale_*_identity to create and format the legends.
Also, I needed to add a scaling factor for severity axis since ggplot does not handle the secondary axis well.
data<-data.frame(year= 2000:2005, frequency=3:8, severity=as.integer(runif(6, 4000, 8000)))
library(ggplot2)
library(scales)
sec_plot <- ggplot(data, aes(x = year)) +
geom_col(aes(y = frequency, fill = "orange"), alpha = 0.6) +
geom_line(aes(y = severity/1000, color = "black")) +
scale_fill_identity(guide = "legend", label="Claim frequency (Number of paid claims per 100 Insured exposure)", name=NULL) +
scale_color_identity(guide = "legend", label="Claim Severity (Average insurance payment per claim)", name=NULL) +
theme(legend.position = "bottom") +
scale_y_continuous(sec.axis =sec_axis( ~ . *1, labels = label_dollar(scale=1000), name="Severity") ) + #formats the 2nd axis
guides(fill = guide_legend(order = 1), color = guide_legend(order = 2)) #control which scale plots first
sec_plot

Aesthetics must be either length 1 or the same as the data (1): x, y, label

I'm working on some data on party polarization (something like this) and used geom_dumbbell from ggalt and ggplot2. I keep getting the same aes error and other solutions in the forum did not address this as effectively. This is my sample data.
df <- data_frame(policy=c("Not enough restrictions on gun ownership", "Climate change is an immediate threat", "Abortion should be illegal"),
Democrats=c(0.54, 0.82, 0.30),
Republicans=c(0.23, 0.38, 0.40),
diff=sprintf("+%d", as.integer((Democrats-Republicans)*100)))
I wanted to keep order of the plot, so converted policy to factor and wanted % to be shown only on the first line.
df <- arrange(df, desc(diff))
df$policy <- factor(df$policy, levels=rev(df$policy))
percent_first <- function(x) {
x <- sprintf("%d%%", round(x*100))
x[2:length(x)] <- sub("%$", "", x[2:length(x)])
x
}
Then I used ggplot that rendered something close to what I wanted.
gg2 <- ggplot()
gg2 <- gg + geom_segment(data = df, aes(y=country, yend=country, x=0, xend=1), color = "#b2b2b2", size = 0.15)
# making the dumbbell
gg2 <- gg + geom_dumbbell(data=df, aes(y=country, x=Democrats, xend=Republicans),
size=1.5, color = "#B2B2B2", point.size.l=3, point.size.r=3,
point.color.l = "#9FB059", point.color.r = "#EDAE52")
I then wanted the dumbbell to read Democrat and Republican on top to label the two points (like this). This is where I get the error.
gg2 <- gg + geom_text(data=filter(df, country=="Government will not control gun violence"),
aes(x=Democrats, y=country, label="Democrats"),
color="#9fb059", size=3, vjust=-2, fontface="bold", family="Calibri")
gg2 <- gg + geom_text(data=filter(df, country=="Government will not control gun violence"),
aes(x=Republicans, y=country, label="Republicans"),
color="#edae52", size=3, vjust=-2, fontface="bold", family="Calibri")
Any thoughts on what I might be doing wrong?
I think it would be easier to build your own "dumbbells" with geom_segment() and geom_point(). Working with your df and changing the variable refences "country" to "policy":
library(tidyverse)
# gather data into long form to make ggplot happy
df2 <- gather(df,"party", "value", Democrats:Republicans)
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
# our dumbell
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
# the text labels
geom_text(aes(label = party), vjust = -1.5) + # use vjust to shift text up to no overlap
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red")) + # named vector to map colors to values in df2
scale_x_continuous(limits = c(0,1), labels = scales::percent) # use library(scales) nice math instead of pasting
Produces this plot:
Which has some overlapping labels. I think you could avoid that if you use just the first letter of party like this:
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
geom_text(aes(label = gsub("^(\\D).*", "\\1", party)), vjust = -1.5) + # just the first letter instead
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red"),
guide = "none") +
scale_x_continuous(limits = c(0,1), labels = scales::percent)
Only label the top issue with names:
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
geom_text(data = filter(df2, policy == "Not enough restrictions on gun ownership"),
aes(label = party), vjust = -1.5) +
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red")) +
scale_x_continuous(limits = c(0,1), labels = scales::percent)

Set specific color in ggplot2 using scale_fill

I am visualizing missing data in R using this method which uses ggplot2:
library(reshape2)
library(ggplot2)
ggplot_missing <- function(x){
x %>%
is.na %>%
melt %>%
ggplot(data = .,
aes(x = Var2,
y = Var1)) +
geom_raster(aes(fill = value)) +
scale_fill_grey(name = "", labels = c("Present","Missing")) +
theme_minimal() +
theme(axis.text.x = element_text(angle=45, vjust=0.5)) +
labs(x = "Columns / Attributes",
y = "Rows / Observations")
}
The scale_fill_grey method uses black and grey. How can I change the color of the cells to a specific color, say "red"?
I have tried:
scale_fill_brewer(name = "", labels = c("Present","Missing"), na.val="red")
Also,
scale_fill_gradient(name = "", labels = c("Present","Missing"), low = "#FF69B4", high = "#FF0000")
But I get the error:
Error: Discrete value supplied to continuous scale
I got it to work by replacing scale_fill_grey with the following:
scale_fill_manual(name = "", values = c('my_color_1', 'my_color_2'), labels = c("Present","Missing")) +

Resources