Delete the symbol a from legend - r

I need to delete that symbol 'a' that is coming in the legend, plus I would like to know if there is a possibility to place the label on the top of the bars.
This my example file:
Residue,Position,Weight,SVM Count,Odd,Ttest,lower,upper,Resistance
G163R,163,0.357,49,19.9453848,6.978518E-82,5.6628402,70.2925768,Accessory
V165I,165,0.268,49,2.98167788,1.60934E-80,1.25797484,7.06728692,Novel
N155H,155,0.253,50,38.6089584,1.089188E-83,9.5815554,155.7070612,Major
library(ggplot2)
m <- read.csv('example.csv', header=T, row.names=1)
boxOdds = m$Odd
df <- data.frame(
yAxis = length(boxOdds):1,
boxnucleotide = m$Position,
boxCILow = m$lower,
boxCIHigh = m$upper,
Mutation = m$Resistance)
ticksy<-c(seq(0,0.3,by=.1), seq(0, 1, by =.5), seq(0, 20, by =5), seq(0, 150, by =50))
ticksx<-c(seq(0,300,by=25))
p <- ggplot(df, aes(x = boxnucleotide, y = boxOdds, colour=Mutation,label=rownames(m)))
p1 <- p + geom_errorbar(aes(ymax = boxCIHigh, ymin = boxCILow), size = .5, height = .01) +
geom_point(size = 1) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_y_continuous(breaks=ticksy, labels = ticksy) +
scale_x_continuous(breaks=ticksx, labels = ticksx) +
coord_trans(y = "log10") +
ylab("Odds ratio (log scale)") +
scale_color_manual(values=c("#00BFC4","#F8766D","#619CFF")) +
xlab("Integrase nucleotide position") +
geom_text(size=4,hjust=0, vjust=0)+
theme(legend.position = c(0.9, 0.9))
p1
I already tried all possible solutions from Remove 'a' from legend when using aesthetics and geom_text but none worked out

Related

ggplot trying to make a Cleveland plot but I cannot get a legend

library(ggplot2)
library(ggthemes)
data <- read.csv('/Users/zbhay/Documents/r-data.csv', header = 1)
zb <- ggplot(data) +
geom_segment( aes(x=x, xend=x, y=value1, yend=value2), color="black")+
geom_point( aes(x=x, y=value1), color=rgb(0.2,0.7,0.1,1), size=4 )+
geom_point( aes(x=x, y=value2), color=rgb(0.7,0.2,0.1,1), size=4 )+
coord_flip() +
theme_solarized() +
scale_y_continuous(breaks = seq(0, 10000, by = 500)
)
zb + labs(title = "Title",
subtitle = "subtitle") +
xlab("Business Functions") +
ylab("# of hours")
legend("left", c("Starting", "Ending"),
box.col = "darkgreen"
)
Hello, here is the code. The CSV file is structured as follows; column A = names, column b = starting number, column c = final number.
I am trying to set up a legend that calls out the final number vs starting number. I have tried and tried but cannot seem to be able to crack it. If anyone knows a fix, I would appreciate it if you could let me know.
As a general rule when using ggplot2 you have to map on aesthetics if you want to get a legend, i.e. instead of setting the colors for your points as arguments map a value on the color aes, e.g. in my code below I map the constant value or category start on the color aes inside aes() for the first geom_point. Afterwards you could use scale_color_manual to assign your desired colors and labels to these "categories" or "values". Finally, the color of the legend box could be set via the theme option legend.background. However, the legend keys themselves have a background color too, which I set to NA via legend.key.
Using some fake random example data:
library(ggplot2)
library(ggthemes)
set.seed(123)
data <- data.frame(x = letters[1:5], value1 = runif(5, 0, 10000), value2 = runif(5, 0, 10000))
ggplot(data) +
geom_segment(aes(x = x, xend = x, y = value1, yend = value2), color = "black") +
geom_point(aes(x = x, y = value1, color = "start"), size = 4) +
geom_point(aes(x = x, y = value2, color = "end"), size = 4) +
coord_flip() +
theme_solarized() +
scale_y_continuous(breaks = seq(0, 10000, by = 500)) +
scale_color_manual(values = c(start = rgb(0.2, 0.7, 0.1, 1), end = rgb(0.7, 0.2, 0.1, 1)), labels = c(start = "Starting", end = "Ending")) +
labs(title = "Title", subtitle = "subtitle", x = "Business Functions", y = "# of hours", color = NULL) +
theme(
legend.key = element_rect(fill = NA),
legend.background = element_rect(fill = "darkgreen")
)

sorting the labels alphabetically when they appear on the y bar

For the example below, I wonder to know how can I have the labels sorted alphabetically when they appear on the y bar.
Thanks,
NM
# Create labels for plot
boxLabels = c("Package recommendation", "Breeder’s recommendations", "Vet’s
recommendation", "Measuring cup", "Weigh on scales", "Certain number of
cans", "Ad lib feeding", "Adjusted for body weight")
df <- data.frame(yAxis = length(boxLabels):1,
boxOdds =
c(2.23189,1.315737,1.22866,.8197413,.9802449,.9786673,.6559005,.5929812),
boxCILow =
c(.7543566,1.016,.9674772,.6463458,.9643047,.864922,.4965308,.3572142),
boxCIHigh =
c(6.603418,1.703902,1.560353,1.039654,.9964486,1.107371,.8664225,.9843584)
)
(p <- ggplot(df, aes(x = boxOdds, y = boxLabels)) +
geom_vline(aes(xintercept = 1), size = .25, linetype = 'dashed') +
geom_errorbarh(aes(xmax = boxCIHigh, xmin = boxCILow), size = .5, height =
.2, color = 'gray50') +
geom_point(size = 3.5, color = 'orange') +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_x_continuous(breaks = seq(0,7,1) ) +
coord_trans(x = 'log10') +
ylab('') +
xlab('Odds ratio (log scale)') +
annotate(geom = 'text', y =1.1, x = 3.5, label ='Model p < 0.001\nPseudo
R^2 = 0.10', size = 3.5, hjust = 0) + ggtitle('Intention to remove box
turtles from the road')
)
the values are shown in alphabetical order but from bottom to top. to reverse, you could try using fct_rev from the forcats package:
replace y = boxLabels with
y = forcats::fct_rev(boxLabels)

Adding odds ratios values and different colors in a ggplot graph in R

I am trying to insert the Odds ratio values in the graph.
I'm also trying to give different color to the shapes with significant Odds ratio. Significant Odds ratio are those whose confidence intervals does not include a value of 1.
Can someone, please, help me with this?
My code exemple is:
df <- data.frame(yAxis = length(boxLabels):1,
boxOdds = c(2.23189,1.315737,1.22866,.8197413,.9802449,.9786673,.6559005,.5929812),
boxCILow = c(.7543566,1.016,.9674772,.6463458,.9643047,.864922,.4965308,.3572142),
boxCIHigh = c(6.603418,1.703902,1.560353,1.039654,.9964486,1.107371,.8664225,.9843584))
Ploting the graph
ggplot(df, aes(x = boxOdds, y = boxLabels)) +
geom_vline(aes(xintercept = 1), size = .25, linetype = 'dashed') +
geom_errorbarh(aes(xmax = boxCIHigh, xmin = boxCILow), size = .5, height =
.2, color = 'gray50') +
geom_point(size = 3.5, color = 'orange') +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_x_continuous(breaks = seq(0,7,1) ) +
coord_trans(x = 'log10') +
ylab('') +
xlab('Odds ratio (log scale)') +
annotate(geom = 'text', y =1.1, x = 3.5, label ='Model p < 0.001\nPseudo
R^2 = 0.10', size = 3.5, hjust = 0) + ggtitle('My title')
I made a dummy variable for boxLabels since your code was missing it.
One thing to note for your future plots :
define everything you need for the plot beforehand. Such as the a variable detailing whether or not your test is signifcant.
boxLabels <- letters[1:8] # dummy boxlabels variable
df <- data.frame(yAxis = length(boxLabels):1,
boxOdds = c(2.23189,1.315737,1.22866,.8197413,.9802449,.9786673,.6559005,.5929812),
boxCILow = c(.7543566,1.016,.9674772,.6463458,.9643047,.864922,.4965308,.3572142),
boxCIHigh = c(6.603418,1.703902,1.560353,1.039654,.9964486,1.107371,.8664225,.9843584),
boxLabels = boxLabels # Don't forget to actually put it in the dataframe if you use it in the plot.
)
# Make a variable for whether your test is significant
df$isSignif <- df$boxCILow > 1 | df$boxCIHigh < 1
g <- ggplot(df, aes(x = boxOdds, y = boxLabels)) +
geom_vline(aes(xintercept = 1), size = .25, linetype = 'dashed') +
geom_errorbarh(aes(xmax = boxCIHigh, xmin = boxCILow), size = .5, height =
.2, color = 'gray50') +
geom_point(aes(color = isSignif), size = 3.5) + ## Use the variable to define the color of your plot.
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_x_continuous(breaks = seq(0,7,1) ) +
coord_trans(x = 'log10') +
ylab('') +
xlab('Odds ratio (log scale)') +
annotate(geom = 'text', y =1.1, x = 3.5, label ='Model p < 0.001\nPseudo
R^2 = 0.10', size = 3.5, hjust = 0) + ggtitle('My title')
# add the oddsRatio values to the plot
g <- g + geom_text(label=round(df$boxOdds, 2), nudge_y=0.25)
g
The only lines I changed from your code are those with a comment.
This outputs the following plot :

Generate two plots with same x axis breaks

I have two plots I want the x axes being broken by the same way.
This is the code for plot 1:
m <- read.csv('Finalfor1lowergreaterthan1.csv', header=T, row.names=1)
m <- m[m$SVM.Count >= 40,]
boxOdds = m$Odd
df <- data.frame(
yAxis = length(boxOdds):1,
boxnucleotide = m$Position,
boxCILow = m$lower,
boxCIHigh = m$upper,
Mutation = m$Resistance)
ticksy <- c(seq(0,0.3,by=.1), seq(0, 1, by =.5), seq(0, 20, by =5), seq(0, 150, by =50))
ticksx <- c(seq(0,300,by=25))
p <- ggplot(df,
aes(x = boxnucleotide, y = boxOdds, colour=Mutation, label=rownames(m)))
p1 <- p +
geom_errorbar(aes(ymax = boxCIHigh, ymin = boxCILow), size = .5, height = .01) +
geom_point(size = 1) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_y_continuous(breaks=ticksy, labels = ticksy) +
scale_x_continuous(breaks=ticksx, labels = ticksx) +
coord_trans(y = "log10") +
ylab("Odds ratio (log scale)") +
scale_color_manual(values=c("#00BFC4","#F8766D","#619CFF")) +
xlab("Integrase nucleotide position") +
geom_text(size=2,hjust=0, vjust=0)
Then I have another plot:
m <- read.csv('Finalfor20lowergreaterthan1.csv', header=T, row.names=1)
#m <- m[m$SVM.Count >= 40, ]
boxOdds = m$Odd
df <- data.frame(
yAxis = length(boxOdds):1,
boxnucleotide = m$Position,
boxCILow = m$lower,
boxCIHigh = m$upper,
Mutation = m$Resistance)
ticksy <- c(seq(0,0.3,by=.1), seq(0, 1, by =.5), seq(0, 20, by =5), seq(0, 150, by =50))
ticksx <- c(seq(0,300,by=25))
p <- ggplot(df,
aes(x = boxnucleotide, y = boxOdds, colour=Mutation, label=rownames(m)))
p1 <- p +
geom_errorbar(aes(ymax = boxCIHigh, ymin = boxCILow), size = .5, height = .01) +
geom_point(size = 1) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_y_continuous(breaks=ticksy, labels = ticksy) +
scale_x_continuous(breaks=ticksx, labels = ticksx) +
coord_trans(y = "log10") +
ylab("Odds ratio (log scale)") +
scale_color_manual(values=c("#00BFC4","#F8766D","#619CFF")) +
xlab("Integrase nucleotide position") +
geom_text(size=2,hjust=0, vjust=0)
Why is plot 1 starting from 75 on x axis and plot 2 starting at 100...how can plot2 start at 75 as well and being scaled like plot 1.
The two codes get the same piece of: ticksx <- c(seq(0, 300, by=25))
A good technique to align the axis range on different plots is to use expand_limits.
You can simply use p1 + expand_limits(x=c(0, 300)). This will ensure the x-axis contains at least 0 and 300 on all your plots. You can also control the y-axis range by using the y argument.
From ?expand_limits:
Sometimes you may want to ensure limits include a single value, for all panels or all plots. This function is a thin wrapper around geom_blank() that makes it easy to add such values.

Problems adding legend to ggplot2 + ggfortify

I'm having troubles using
scale_colour_manual
function of ggplot. I tried
guide = "legend"
to force legend appears, but it doesn't work. Rep code:
library(ggfortify)
library(ggplot2)
p <- ggdistribution(pgamma, seq(0, 100, 0.1), shape = 0.92, scale = 22,
colour = 'red')
p2 <- ggdistribution(pgamma, seq(0, 100, 0.1), shape = 0.9, scale = 5,
colour = 'blue', p=p)
p2 +
theme_bw(base_size = 14) +
theme(legend.position ="top") +
xlab("Precipitación") +
ylab("F(x)") +
scale_colour_manual("Legend title", guide = "legend",
values = c("red", "blue"), labels = c("Observado","Reforecast")) +
ggtitle("Ajuste Gamma")
A solution with stat_function:
library(ggplot2)
library(scales)
cols <- c("LINE1"="red","LINE2"="blue")
df <- data.frame(x=seq(0, 100, 0.1))
ggplot(data=df, aes(x=x)) +
stat_function(aes(colour = "LINE1"), fun=pgamma, args=list(shape = 0.92, scale = 22)) +
stat_function(aes(colour = "LINE2"), fun=pgamma, args=list(shape = 0.9, scale = 5)) +
theme_bw(base_size = 14) +
theme(legend.position ="top") +
xlab("Precipitación") +
ylab("F(x)") +
scale_colour_manual("Legend title", values=c(LINE1="red",LINE2="blue"),
labels = c("Observado","Reforecast")) +
scale_y_continuous(labels=percent) +
ggtitle("Ajuste Gamma")
This appears to be a bug with ggfortify.* You can achieve identical results simply using geom_line() from ggplot2 though:
library(ggplot2)
# Sequence of values to draw from dist(s) for plotting
x = seq(0, 100, 0.1)
# Defining dists
d1 = pgamma(x, shape=0.92, scale=22)
d2 = pgamma(x, shape=0.90, scale=5)
# Plotting
p1 = ggplot() +
geom_line(aes(x,d1,colour='red')) +
geom_line(aes(x,d2,colour='blue')) +
theme_bw(base_size = 14) +
theme(legend.position="top") +
ggtitle("Ajuste Gamma") +
xlab("Precipitación") +
ylab("F(x)") +
scale_colour_manual("Legend title",
guide = "legend",
values = c("red", "blue"),
labels=c("Observado", "Reforecast"))
* Related question: Plotting multiple density distributions on one plot

Resources