ggplot does not show legend in geom_histogram - r

I have this code
ggplot()
+ geom_histogram(aes(x=V1, y=(..count..)/sum(..count..)), fill="red", alpha=.4, colour="red", data=coding, stat = "bin", binwidth = 30)
+ geom_histogram(aes(x=V1,y=(..count..)/sum(..count..)), fill="blue", alpha=.4, colour="blue", data=lncrna, stat = "bin", binwidth = 30)
+ coord_cartesian(xlim = c(0, 2000))
+ xlab("Size (nt)")
+ ylab("Percentage (%)")
+ geom_vline(data=cdf, aes(xintercept=rating.mean, colour=Labels), linetype="dashed", size=1)
that produces a beautiful histogram without legend:
In every post I visit with the same problem, they say to put color inside aes. nevertheless, this does not give any legend.
I tried:
ggplot() + geom_histogram(aes(x=V1, y=(..count..)/sum(..count..),color="red", fill="red"), fill="red", alpha=.4, colour="red", data=coding, stat = "bin", binwidth = 30)
+ geom_histogram(aes(x=V1,y=(..count..)/sum(..count..), color="blue", fill="blue"), fill="blue", alpha=.4, colour="blue", data=lncrna, stat = "bin", binwidth = 30)
+ coord_cartesian(xlim = c(0, 2000))
+ xlab("Size (nt)")
+ ylab("Percentage (%)")
+ geom_vline(data=cdf, aes(xintercept=rating.mean, colour=Labels), linetype="dashed", size=1)
without success.
How can I put a legend in my graph?

If you don't want to put the data in one data.frame, you can do this:
set.seed(42)
coding <- data.frame(V1=rnorm(1000))
lncrna <- data.frame(V1=rlnorm(1000))
library(ggplot2)
ggplot() +
geom_histogram(aes(x=V1, y=(..count..)/sum(..count..), fill="r", colour="r"), alpha=.4, data=coding, stat = "bin") +
geom_histogram(aes(x=V1,y=(..count..)/sum(..count..), fill="b", colour="b"), alpha=.4, data=lncrna, stat = "bin") +
scale_colour_manual(name="group", values=c("r" = "red", "b"="blue"), labels=c("b"="blue values", "r"="red values")) +
scale_fill_manual(name="group", values=c("r" = "red", "b"="blue"), labels=c("b"="blue values", "r"="red values"))

The problem is that you can't map your color into aes because you've got two separete sets of data. An idea is to bind them, then to apply the "melt" function of package reshape2 so you create a dummy categorical variable that you can pass into aes. the code:
require(reshape2)
df=cbind(blue=mtcars$mpg, red=mtcars$mpg*0.8)
df=melt(df, id.vars=1:2)
ggplot()+geom_histogram(aes(y=(..count..)/sum(..count..),x=value, fill=Var2, color=Var2), alpha=.4, data=df, stat = "bin")
There you've got your legend

Related

Merge legend in ggplot when the geoms are different

I have the following code which yields the figure below:
ggplot(data=data.frame(x=x, y=y, mass=mass)) +
geom_line(mapping = aes(x=x, y=y, linetype='Gompertz predicted mass', col='Gompertz predicted mass')) +
geom_point(mapping = aes(x=x, y=mass, shape='Actual mass',col='Actual mass')) +
theme_bw() +
ylab('Mass') +
xlab('t') +
scale_color_manual(name='',values = c("black",'red')) +
scale_linetype_manual(name='',values = c("solid")) +
scale_shape_manual(name='', values = c(19)) +
scale_x_continuous(breaks=seq(4,26,2)) +
ylim(c(0, 20000)) +
ggtitle('Problem 3: Plot of tumor mass with time')
Notice how the legend is separated. I'd like to merge it for shape and color. When the geoms are the same, the technique of using scale_something_manual works perfectly fine to merge the legends. However, I'm having trouble with it here since I have two different geoms.
The problem is similar to the one described in https://github.com/tidyverse/ggplot2/issues/3648. There is no elegant solution at the moment. Because you haven't included any data, I've presumed that your problem is conceptually similar to the plot below:
library(ggplot2)
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(shape = "Point", colour = "Point")) +
geom_smooth(aes(linetype = "Line", colour = "Line"),
formula = y ~ x, se = FALSE, method = "loess") +
scale_colour_manual(values = c("red", "black")) +
scale_linetype_manual(values = "solid") +
scale_shape_manual(values = 19)
The way to fix the problem is to get rid of the linetype and shape aesthetics and scales, and instead override aesthetics at the level of the legend.
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = "Point")) +
geom_smooth(aes(colour = "Line"),
formula = y ~ x, se = FALSE, method = "loess") +
scale_colour_manual(
values = c("red", "black"),
guide = guide_legend(override.aes = list(shape = c(NA, 19),
linetype = c(1, NA)))
)
Created on 2021-09-04 by the reprex package (v2.0.1)

ggplot2: doughnuts, how to conditional color fill with if_else

Following guides like ggplot Donut chart I am trying to draw small gauges, doughnuts with a label in the middle, with the intention to put them later on on a map.
If the value reaches a certain threshold I would like the fill of the doughnut to change to red. Is it possible to achieve with if_else (it would be most natural but it does not work).
library(tidyverse)
df <- tibble(ID=c("A","B"),value=c(0.7,0.5)) %>% gather(key = cat,value = val,-ID)
ggplot(df, aes(x = val, fill = cat)) + scale_fill_manual(aes,values = c("red", "yellow"))+
geom_bar(position="fill") + coord_polar(start = 0, theta="y")
ymax <- max(df$val)
ymin <- min(df$val)
p2 = ggplot(df, aes(fill=cat, y=0, ymax=1, ymin=val, xmax=4, xmin=3)) +
geom_rect(colour="black",stat = "identity") +
scale_fill_manual(values = if_else (val > 0.5, "red", "black")) +
geom_text( aes(x=0, y=0, label= scales::percent (1-val)), position = position_dodge(0.9))+
coord_polar(theta="y") +
xlim(c(0, 4)) +
theme_void() +
theme(legend.position="none") +
scale_y_reverse() + facet_wrap(facets = "ID")
Scale fill manual values= if else.... this part does not work, the error says: Error in if_else(val > 0.5, "red", "black") : object 'val' not found. Is it my error, or some other solution exists?
I also realize my code is not optimal, initially gather waited for more variables to be included in the plot, but I failed to stack one variable on top of the other. Now one variable should be enough to indicate the percentage of completion. I realise my code is redundant for the purpose. Can you help me out?
A solution for the color problem is to first create a variable in the data and then use that to map the color in the plot:
df <- tibble(ID=c("A","B"),value=c(0.7,0.5)) %>% gather(key = cat,value = val,-ID) %>%
mutate(color = if_else(val > 0.5, "red", "black"))
p2 = ggplot(df, aes(fill=color, y=0, ymax=1, ymin=val, xmax=4, xmin=3)) +
geom_rect(colour="black",stat = "identity") +
scale_fill_manual(values = c(`red` = "red", `black` = "black")) +
geom_text( aes(x=0, y=0, label= scales::percent (1-val)), position = position_dodge(0.9))+
coord_polar(theta="y") +
xlim(c(0, 4)) +
theme_void() +
theme(legend.position="none") +
scale_y_reverse() + facet_wrap(facets = "ID")
The result would be:

R: Combine pie charts with ggplot2

EDITED
I have the following example where I create 3 pie charts , but I would like to have them 3 combined into 1 pie + donuts pie.
Besides, it would be really useful to have the numbers as well, how can this be accomplished? Thanks a lot.
df.mut <- data.frame(Avrg.muts.HLA.A11.A24=c(20.20000,37.39286,11.85714,50.26087,20.20000,37.39286,11.85714,50.26087,20.20000,37.39286,11.85714,50.26087), Avrg.muts.HLA.A11=c(32.86842,32.86842,35.72973,35.72973,32.86842,32.86842,35.72973,35.72973,32.86842,32.86842,35.72973,35.72973), Avrg.muts.HLA.A24=c(15.33333,43.19608,15.33333,43.19608,15.33333,43.19608,15.33333,43.19608,15.33333,43.19608,15.33333,43.19608), variable=c("HLA.A11.A24","HLA.A11.A24","HLA.A11.A24","HLA.A11.A24","HLA.A11","HLA.A11","HLA.A11","HLA.A11","HLA.A24","HLA.A24","HLA.A24","HLA.A24"), value=c("+/+","+/-","-/+","-/-","+","+","-","-","+","-","+","-"))
df.mut$variable <- factor(df.mut$variable, levels=unique(df.mut$variable))
png(file="IMAGES/test1.png")
print(
ggplot(df.mut, aes(x="")) +
facet_grid(variable~., scales="free_y") +
geom_bar(data=subset(df.mut, variable=='HLA.A11.A24'),
aes(x='0', y=Avrg.muts.HLA.A11.A24, fill=value), width = 1, stat = "identity") +
geom_bar(data=subset(df.mut, variable=='HLA.A11'),
aes(x='1', y=Avrg.muts.HLA.A11, fill=value), width = 1, stat = "identity") +
geom_bar(data=subset(df.mut, variable=='HLA.A24'),
aes(x='2', y=Avrg.muts.HLA.A24, fill=value), width = 1, stat = "identity") +
ggtitle("TEST1") +
theme(axis.text.x=element_blank(), legend.title=element_blank(), legend.position="right", legend.background=element_blank(), legend.box.just="left", plot.title=element_text(size=15, face="bold", colour="black", vjust=1.5)) +
scale_y_continuous(name="") +
scale_x_discrete(name="") +
coord_polar(theta="y")
)
dev.off()
This produces the following image:
However, when I try to having the 3 of them together, the best I get is this mess:
How can I combine the pie charts above? And include numbers.
This should get you started:
df.test <- data.frame(genotype.1=c("+","+","-","-"), genotype.2=c("+","-","+","-"), count=c(345,547,678,987))
require(ggplot2)
require(grid)
ggplot(df.test, aes(y = count)) +
geom_bar(aes(x='0', fill = paste(genotype.1, genotype.2, sep="/")), color='black', width = 1, stat = "identity") +
geom_bar(aes(x='1', fill = genotype.1), width = 1, color='black', stat = "identity") +
geom_bar(aes(x='2', fill = genotype.2), width = 1, color='black', stat = "identity") +
coord_polar(theta="y") +
scale_x_discrete(name='', breaks=c('0', '1', '2'), labels=rep('', 3)) +
theme(axis.ticks.length = unit(0, "npc")) +
scale_fill_discrete(name='genotype', breaks = c('-', '+', '-/-', '-/+', '+/-', '+/+')) +
scale_y_continuous(breaks=0)
EDIT: Part of the reason, you get something different with faceting than without is because you use scales="free_y". To get the same thing without the facets, you can do scale the variables yourself.
p <- ggplot(df.mut, aes(x="")) +
geom_bar(data=subset(df.mut, variable=='HLA.A11.A24'),
aes(x='0', y=Avrg.muts.HLA.A11.A24/sum(Avrg.muts.HLA.A11.A24), fill=value), color='black', width = 1, stat = "identity") +
geom_bar(data=subset(df.mut, variable=='HLA.A11'),
aes(x='1', y=Avrg.muts.HLA.A11/sum(Avrg.muts.HLA.A11), fill=value), color='black', width = 1, stat = "identity") +
geom_bar(data=subset(df.mut, variable=='HLA.A24'),
aes(x='2', y=Avrg.muts.HLA.A24/sum(Avrg.muts.HLA.A24), fill=value), color='black', width = 1, stat = "identity") +
ggtitle("TEST1") +
theme(axis.text.x=element_blank(), legend.title=element_blank(), legend.position="right", legend.background=element_blank(), legend.box.just="left", plot.title=element_text(size=15, face="bold", colour="black", vjust=1.5)) +
scale_y_continuous(name="") +
scale_x_discrete(name="") +
coord_polar(theta="y")
# now look at the faceted and unfaceted plots...
p
p + facet_grid(variable~., scales="free_y")
However, your faceted plots also don't line up as nicely as your previous test data did. That just appears to be because the data is actually not exactly lined up (there are really only 2 unique values for the HLA.A11 and HLA.A24, so it's impossible to get 4 different sizes).

Draw lines between two facets in ggplot2

How can I draw several lines between two facets?
I attempted this by plotting points at the min value of the top graph but they are not between the two facets. See picture below.
This is my code so far:
t <- seq(1:1000)
y1 <- rexp(1000)
y2 <- cumsum(y1)
z <- rep(NA, length(t))
z[100:200] <- 1
df <- data.frame(t=t, values=c(y2,y1), type=rep(c("Bytes","Changes"), each=1000))
points <- data.frame(x=c(10:200,300:350), y=min(y2), type=rep("Bytes",242))
vline.data <- data.frame(type = c("Bytes","Bytes","Changes","Changes"), vl=c(1,5,20,5))
g <- ggplot(data=df, aes(x=t, y=values)) +
geom_line(colour=I("black")) +
facet_grid(type ~ ., scales="free") +
scale_y_continuous(trans="log10") +
ylab("Log values") +
theme(axis.text.x = element_text(angle = 90, hjust = 1), panel.margin = unit(0, "lines"))+
geom_point(data=points, aes(x = x, y = y), colour="green")
g
In order to achieve that, you have to set the margins inside the plot to zero. You can do that with expand=c(0,0). The changes I made to your code:
When you use scale_y_continuous, you can define the axis label inside that part and you don't need a seperarate ylab.
Changed colour=I("black") to colour="black" inside geom_line.
Added expand=c(0,0) to scale_x_continuous and scale_y_continuous.
The complete code:
ggplot(data=df, aes(x=t, y=values)) +
geom_line(colour="black") +
geom_point(data=points, aes(x = x, y = y), colour="green") +
facet_grid(type ~ ., scales="free") +
scale_x_continuous("t", expand=c(0,0)) +
scale_y_continuous("Log values", trans="log10", expand=c(0,0)) +
theme(axis.text.x=element_text(angle=90, vjust=0.5), panel.margin=unit(0, "lines"))
which gives:
Adding lines can also be done with geom_segment. Normally the lines (segments) will appear in both facets. If you want them to appear between the two facets, you will have to restrict that in data parameter:
ggplot(data=df, aes(x=t, y=values)) +
geom_line(colour="black") +
geom_segment(data=df[df$type=="Bytes",], aes(x=10, y=0, xend=200, yend=0), colour="green", size=2) +
geom_segment(data=df[df$type=="Bytes",], aes(x=300, y=0, xend=350, yend=0), colour="green", size=1) +
facet_grid(type ~ ., scales="free") +
scale_x_continuous("t", expand=c(0,0)) +
scale_y_continuous("Log values", trans="log10", expand=c(0,0)) +
theme(axis.text.x=element_text(angle=90, vjust=0.5), panel.margin=unit(0, "lines"))
which gives:

to show mean value in ggplot box plot

I need to be able to show the mean value in ggplot box plot. Below works for a point but I need the white dashed lines? Any body help?
x
Team Value
A 10
B 5
C 29
D 35
ggplot(aes(x = Team , y = Value), data = x)
+ geom_boxplot (aes(fill=Team), alpha=.25, width=0.5, position = position_dodge(width = .9))
+ stat_summary(fun.y=mean, colour="red", geom="point")
Here's my way of adding mean to boxplots:
ggplot(RQA, aes(x = Type, y = engagementPercent)) +
geom_boxplot(aes(fill = Type),alpha = .6,size = 1) +
scale_fill_brewer(palette = "Set2") +
stat_summary(fun.y = "mean", geom = "text", label="----", size= 10, color= "white") +
ggtitle("Participation distribution by type") +
theme(axis.title.y=element_blank()) + theme(axis.title.x=element_blank())
ggplot(df, aes(x = Type, y = scorepercent)) +
geom_boxplot(aes(fill = Type),alpha = .6,size = 1) +
scale_fill_brewer(palette = "Set2") +
stat_summary(fun.y = "mean", geom = "point", shape= 23, size= 3, fill= "white") +
ggtitle("score distribution by type") +
theme(axis.title.y=element_blank()) + theme(axis.title.x=element_blank())
I would caution against using text to this and do geom_line instead as text is offset slightly and gives the wrong portrayal of the mean.
Hey user1471980, I think people are more inclined to help if you have a unique user name but then again you have a lot of points :)
this is a hack but does this help:
Value<-c(1,2,3,4,5,6)
Team<-c("a","a","a","b","b","b")
x<-data.frame(Team,Value) #note means for a=2, mean for b=5
ggplot(aes(x = Team , y = Value), data = x) + geom_boxplot (aes(fill=Team), alpha=.25, width=0.5, position = position_dodge(width = .9)) +
annotate(geom="text", x=1, y=2, label="----", colour="white", size=7, fontface="bold", angle=0) +
annotate(geom="text", x=2, y=5, label="----", colour="white", size=7, fontface="bold", angle=0)

Resources