I have two plots I want the x axes being broken by the same way.
This is the code for plot 1:
m <- read.csv('Finalfor1lowergreaterthan1.csv', header=T, row.names=1)
m <- m[m$SVM.Count >= 40,]
boxOdds = m$Odd
df <- data.frame(
yAxis = length(boxOdds):1,
boxnucleotide = m$Position,
boxCILow = m$lower,
boxCIHigh = m$upper,
Mutation = m$Resistance)
ticksy <- c(seq(0,0.3,by=.1), seq(0, 1, by =.5), seq(0, 20, by =5), seq(0, 150, by =50))
ticksx <- c(seq(0,300,by=25))
p <- ggplot(df,
aes(x = boxnucleotide, y = boxOdds, colour=Mutation, label=rownames(m)))
p1 <- p +
geom_errorbar(aes(ymax = boxCIHigh, ymin = boxCILow), size = .5, height = .01) +
geom_point(size = 1) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_y_continuous(breaks=ticksy, labels = ticksy) +
scale_x_continuous(breaks=ticksx, labels = ticksx) +
coord_trans(y = "log10") +
ylab("Odds ratio (log scale)") +
scale_color_manual(values=c("#00BFC4","#F8766D","#619CFF")) +
xlab("Integrase nucleotide position") +
geom_text(size=2,hjust=0, vjust=0)
Then I have another plot:
m <- read.csv('Finalfor20lowergreaterthan1.csv', header=T, row.names=1)
#m <- m[m$SVM.Count >= 40, ]
boxOdds = m$Odd
df <- data.frame(
yAxis = length(boxOdds):1,
boxnucleotide = m$Position,
boxCILow = m$lower,
boxCIHigh = m$upper,
Mutation = m$Resistance)
ticksy <- c(seq(0,0.3,by=.1), seq(0, 1, by =.5), seq(0, 20, by =5), seq(0, 150, by =50))
ticksx <- c(seq(0,300,by=25))
p <- ggplot(df,
aes(x = boxnucleotide, y = boxOdds, colour=Mutation, label=rownames(m)))
p1 <- p +
geom_errorbar(aes(ymax = boxCIHigh, ymin = boxCILow), size = .5, height = .01) +
geom_point(size = 1) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_y_continuous(breaks=ticksy, labels = ticksy) +
scale_x_continuous(breaks=ticksx, labels = ticksx) +
coord_trans(y = "log10") +
ylab("Odds ratio (log scale)") +
scale_color_manual(values=c("#00BFC4","#F8766D","#619CFF")) +
xlab("Integrase nucleotide position") +
geom_text(size=2,hjust=0, vjust=0)
Why is plot 1 starting from 75 on x axis and plot 2 starting at 100...how can plot2 start at 75 as well and being scaled like plot 1.
The two codes get the same piece of: ticksx <- c(seq(0, 300, by=25))
A good technique to align the axis range on different plots is to use expand_limits.
You can simply use p1 + expand_limits(x=c(0, 300)). This will ensure the x-axis contains at least 0 and 300 on all your plots. You can also control the y-axis range by using the y argument.
From ?expand_limits:
Sometimes you may want to ensure limits include a single value, for all panels or all plots. This function is a thin wrapper around geom_blank() that makes it easy to add such values.
Related
The colors are added to the ggplot scatter plot based on interaction of two variables : choice and flag (each has two values, therefore, total four combinations). I used faceting based on z value.
library(tidyverse)
x <- runif(10000)
y <- runif(10000)
z <- c(rep(0, 5000), rep(1, 5000))
flag <- c(rep(0, 500), rep(1, 4500), rep(0, 4500), rep(1, 500))
choice <- rep(c(0, 1), 5000)
tbl <- tibble(x, y, z, flag, choice)
scatterplot <- ggplot(tbl,
aes(x = x,
y = y,
color = factor(interaction(choice, flag)))
) +
geom_point(alpha = 0.7,
size = 2) +
scale_color_manual(values = c("blue3", "cyan1", "red3", "orange")) +
facet_grid(z ~ .) +
theme_bw() +
theme(legend.position = "right") +
theme(aspect.ratio = 1) +
ggtitle("Scatter plot")
scatterplot
But I have the following requirement -
z is used for facetting. For z = i, I want points with flag = i to be above, i.e. in the figure below,
for z = 0, blue points (flag = 0) should be over red/orange points.
for z = 1, red/orange points (flag = 1) should be over blue points (as shown)
If I understand you correctly, you are happy with the lower panel, but you need the blue dots in the top panel to be overlaid on the orange dots (at the moment the orange dots are overlaid on the blue dots in both panels).
If this is the case, then calling geom_point a second time with a subsetted data frame where z == 0 & flag == 0 will overlay the appropriate blue points on the top panel without affecting the lower panel.
tbl <- tbl %>%
mutate(col = interaction(choice, flag))
ggplot(tbl, aes(x, y, color = col)) +
geom_point(alpha = 0.7, size = 2) +
geom_point(data = subset(tbl, z == 0 & flag == 0),
alpha = 0.7, size = 2) +
scale_color_manual(values = c("blue3", "cyan1", "red3", "orange")) +
facet_grid(z ~ .) +
theme_bw() +
theme(legend.position = "right") +
theme(aspect.ratio = 1) +
ggtitle("Scatter plot")
Consider this as an option for you. With facets it was complex to set specific order but you can do the same plot using patchwork:
library(tidyverse)
library(patchwork)
#Data
x <- runif(10000)
y <- runif(10000)
z <- c(rep(0, 5000), rep(1, 5000))
flag <- c(rep(0, 500), rep(1, 4500), rep(0, 4500), rep(1, 500))
choice <- rep(c(0, 1), 5000)
tbl <- tibble(x, y, z, flag, choice)
Plots:
#Plot
G1 <- ggplot(subset(tbl,z==0),aes(x = x,y = y,
color = factor(interaction(choice, flag),
levels = rev(unique(interaction(choice, flag))),
ordered = T))) +
geom_point(alpha = 0.7,
size = 2) +
scale_color_manual(values = c("blue3", "cyan1", "red3", "orange")) +
facet_grid(z ~ .) +
theme_bw() +
theme(legend.position = "right") +
theme(aspect.ratio = 1) +
ggtitle("Scatter plot")+
labs(color='Color',x='')+theme(legend.position = 'none')
#Plot 2
G2 <- ggplot(subset(tbl,z==1),aes(x = x,y = y,
color = factor(interaction(choice, flag)))) +
geom_point(alpha = 0.7,
size = 2) +
scale_color_manual(values = c("blue3", "cyan1", "red3", "orange")) +
facet_grid(z ~ .) +
theme_bw() +
theme(legend.position = "right") +
theme(aspect.ratio = 1) +
labs(color='Color')
Final arrange:
#Final plot
G <- G1/G2
G <- G+plot_layout(guides = 'collect')
Output:
I need to delete that symbol 'a' that is coming in the legend, plus I would like to know if there is a possibility to place the label on the top of the bars.
This my example file:
Residue,Position,Weight,SVM Count,Odd,Ttest,lower,upper,Resistance
G163R,163,0.357,49,19.9453848,6.978518E-82,5.6628402,70.2925768,Accessory
V165I,165,0.268,49,2.98167788,1.60934E-80,1.25797484,7.06728692,Novel
N155H,155,0.253,50,38.6089584,1.089188E-83,9.5815554,155.7070612,Major
library(ggplot2)
m <- read.csv('example.csv', header=T, row.names=1)
boxOdds = m$Odd
df <- data.frame(
yAxis = length(boxOdds):1,
boxnucleotide = m$Position,
boxCILow = m$lower,
boxCIHigh = m$upper,
Mutation = m$Resistance)
ticksy<-c(seq(0,0.3,by=.1), seq(0, 1, by =.5), seq(0, 20, by =5), seq(0, 150, by =50))
ticksx<-c(seq(0,300,by=25))
p <- ggplot(df, aes(x = boxnucleotide, y = boxOdds, colour=Mutation,label=rownames(m)))
p1 <- p + geom_errorbar(aes(ymax = boxCIHigh, ymin = boxCILow), size = .5, height = .01) +
geom_point(size = 1) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_y_continuous(breaks=ticksy, labels = ticksy) +
scale_x_continuous(breaks=ticksx, labels = ticksx) +
coord_trans(y = "log10") +
ylab("Odds ratio (log scale)") +
scale_color_manual(values=c("#00BFC4","#F8766D","#619CFF")) +
xlab("Integrase nucleotide position") +
geom_text(size=4,hjust=0, vjust=0)+
theme(legend.position = c(0.9, 0.9))
p1
I already tried all possible solutions from Remove 'a' from legend when using aesthetics and geom_text but none worked out
I'm having troubles using
scale_colour_manual
function of ggplot. I tried
guide = "legend"
to force legend appears, but it doesn't work. Rep code:
library(ggfortify)
library(ggplot2)
p <- ggdistribution(pgamma, seq(0, 100, 0.1), shape = 0.92, scale = 22,
colour = 'red')
p2 <- ggdistribution(pgamma, seq(0, 100, 0.1), shape = 0.9, scale = 5,
colour = 'blue', p=p)
p2 +
theme_bw(base_size = 14) +
theme(legend.position ="top") +
xlab("Precipitación") +
ylab("F(x)") +
scale_colour_manual("Legend title", guide = "legend",
values = c("red", "blue"), labels = c("Observado","Reforecast")) +
ggtitle("Ajuste Gamma")
A solution with stat_function:
library(ggplot2)
library(scales)
cols <- c("LINE1"="red","LINE2"="blue")
df <- data.frame(x=seq(0, 100, 0.1))
ggplot(data=df, aes(x=x)) +
stat_function(aes(colour = "LINE1"), fun=pgamma, args=list(shape = 0.92, scale = 22)) +
stat_function(aes(colour = "LINE2"), fun=pgamma, args=list(shape = 0.9, scale = 5)) +
theme_bw(base_size = 14) +
theme(legend.position ="top") +
xlab("Precipitación") +
ylab("F(x)") +
scale_colour_manual("Legend title", values=c(LINE1="red",LINE2="blue"),
labels = c("Observado","Reforecast")) +
scale_y_continuous(labels=percent) +
ggtitle("Ajuste Gamma")
This appears to be a bug with ggfortify.* You can achieve identical results simply using geom_line() from ggplot2 though:
library(ggplot2)
# Sequence of values to draw from dist(s) for plotting
x = seq(0, 100, 0.1)
# Defining dists
d1 = pgamma(x, shape=0.92, scale=22)
d2 = pgamma(x, shape=0.90, scale=5)
# Plotting
p1 = ggplot() +
geom_line(aes(x,d1,colour='red')) +
geom_line(aes(x,d2,colour='blue')) +
theme_bw(base_size = 14) +
theme(legend.position="top") +
ggtitle("Ajuste Gamma") +
xlab("Precipitación") +
ylab("F(x)") +
scale_colour_manual("Legend title",
guide = "legend",
values = c("red", "blue"),
labels=c("Observado", "Reforecast"))
* Related question: Plotting multiple density distributions on one plot
I've built up a faceted plot using ggplot2, but when I try to add text each to of the facets, I get an error message which I can seem to get around ('Error in eval(expr, envir, enclos) : object 'width' not found'. I would appreciate another pair of eyes on this. The line I'm trying to add is 3/4 of the way down and commented out ( # p1 = p1 + geom_text(ae ..... ).
library(ggplot2)
library(dplyr)
library(tidyr)
rownum <- 1:6
orgs <- c('A','B','C','D','E','F')
level <- c(0,1,1,1,1,1)
qtd <- c(1216.146, 743.482, 276.105, 135.089, 52.703, 8.767)
qtd_ref <- c(0.53,0.529,0.56,0.556,0.499,0.421)
qtd_vs_qc <- c(0.574,0.646,0.656,0.508,0.215,0.249)
qtd_vs_qf <- c(0.566,0.627,0.656,0.507,0.217,0.249)
qtd_vs_qp <- c(0.536,0.622,0.52,0.458,0.25,0.233)
yl1_ref <- c(0.526,0.502,0.563,0.472,0.629,0.418)
yl2_ref <- c(0.534,0.544,0.62,0.422,0.344,0.478)
yl3_ref <- c(0.53,0.54,0.498,0.772,0.525,0.368)
ql1_ref <- c(0.548,0.557,0.56,0.595,0.319,0.594)
qc_vs_ref <- c(0.044,0.118,0.096,-0.048,-0.284,-0.172)
colors <- c("#98abc5", "#8a89a6", "#7b6888", "#6b486b", "#a05d56", "#d0743c", "#ff8c00")
class(colors)
## plot: p1
## yAxis: Target QTD Attainment (%)
##
##
df <- data.frame( rownum, orgs, level, qtd, qtd_ref, qtd_vs_qc, qtd_vs_qf, qtd_vs_qp)
df1 <- data.frame(df[,c('rownum','orgs','level')], width = 0.1 + 0.9 * ((df$qtd * 2) / sum(df$qtd)),df[,c('qtd_ref','qtd_vs_qc','qtd_vs_qf','qtd_vs_qp')])
n1 <- ncol(df1)
tg1 <- tbl_df(df1) %>% gather('target','percent',6:n1)
tg1$orgs <- factor(tg1$orgs, levels = orgs)
tg1
hldata<- data.frame(x = 0, y = qtd_ref * 100, lab = qtd_ref * 100, orgs = orgs)
p1 = ggplot(data = tg1, aes(x=target, y = percent * 100, width=width, fill=factor(target)))
p1 = p1 + geom_bar(stat='identity', position='identity')
p1 = p1 + facet_wrap(~orgs)
p1 = p1 + geom_hline(aes(yintercept = qtd_ref * 100), hldata)
# p1 = p1 + geom_text(aes(x=x,y=y,label=lab, vjust = -0.5, hjust = -0.5), data=hldata)
p1 = p1 + scale_fill_manual(values=c(colors[1], colors[3], colors[5]), labels=c("Commit","Forecast","Plan"))
p1 = p1 + xlab("organization")
p1 = p1 + ylab("Target QTD Attainment (%)")
p1 = p1 + labs(fill="Target")
p1 = p1 + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5,colour='gray50'),
strip.text.y = element_text(size = 18) ,
strip.text = element_text(size = 12)
)
p1 = p1 + ggtitle("Percent of Target Attained QTD")
p1
Not sure why (maybe a bug?) I can only get this to work if the I name the x column of hldata to target, matching the name of the x column in tg1. However, as I said in the comments, if you start by mapping tg1$target to the x axis, then anything else you map the x-axis (hldata$x) needs of the same data type: in this case a factor with the same levels of tg1$target.
# this will have the same levels as the tg1$target
# and there's no need for duplicate qtd_ref columns
hldata <- data.frame(target = tg1$target[1],
y = qtd_ref, orgs = orgs)
p1 =
ggplot(data = tg1, aes(x=target, y = percent, fill= target)) +
geom_bar(aes(width = width), stat='identity', position='identity') +
facet_wrap(~ orgs) +
geom_hline(aes(yintercept = qtd_ref), data = hldata) +
# take vjust and hjust outside of aes()
# map y and label to y since they were pointing to identical columns
geom_text(aes(x = target, y = y,
# I'm guessing you want this as a percent too
label = scales::percent(y)),
vjust = -0.5, hjust = 1, data=hldata) +
# put percent labels on the y axis
scale_y_continuous(labels = scales::percent) +
# pass vector to colors[]
scale_fill_manual(values=c(colors[c(1, 3, 5)],
labels=c("Commit", "Forecast", "Plan")) +
# a single labs call is easier than xlab(), ylab() ggtitle()...
# and with the percent in the tick labels maybe don't need (%)
labs(x = "organization",
y = "Target QTD Attainment",
fill = "Target",
title = "Percent of Target Attained QTD") +
theme(axis.text.x = element_text(angle = 90, hjust = 1,
vjust = .5, colour = 'gray50'),
strip.text.y = element_text(size = 18) ,
strip.text = element_text(size = 12))
p1
I cleaned up the code a little bit, removed the multiplying by 100 and replaced it with a call to scales::percent. And there's no need to re-assign the plot every line.
Is there a method to overlay something analogous to a density curve when the vertical axis is frequency or relative frequency? (Not an actual density function, since the area need not integrate to 1.) The following question is similar:
ggplot2: histogram with normal curve, and the user self-answers with the idea to scale ..count.. inside of geom_density(). However this seems unusual.
The following code produces an overinflated "density" line.
df1 <- data.frame(v = rnorm(164, mean = 9, sd = 1.5))
b1 <- seq(4.5, 12, by = 0.1)
hist.1a <- ggplot(df1, aes(v)) +
stat_bin(aes(y = ..count..), color = "black", fill = "blue",
breaks = b1) +
geom_density(aes(y = ..count..))
hist.1a
#joran's response/comment got me thinking about what the appropriate scaling factor would be. For posterity's sake, here's the result.
When Vertical Axis is Frequency (aka Count)
Thus, the scaling factor for a vertical axis measured in bin counts is
In this case, with N = 164 and the bin width as 0.1, the aesthetic for y in the smoothed line should be:
y = ..density..*(164 * 0.1)
Thus the following code produces a "density" line scaled for a histogram measured in frequency (aka count).
df1 <- data.frame(v = rnorm(164, mean = 9, sd = 1.5))
b1 <- seq(4.5, 12, by = 0.1)
hist.1a <- ggplot(df1, aes(x = v)) +
geom_histogram(aes(y = ..count..), breaks = b1,
fill = "blue", color = "black") +
geom_density(aes(y = ..density..*(164*0.1)))
hist.1a
When Vertical Axis is Relative Frequency
Using the above, we could write
hist.1b <- ggplot(df1, aes(x = v)) +
geom_histogram(aes(y = ..count../164), breaks = b1,
fill = "blue", color = "black") +
geom_density(aes(y = ..density..*(0.1)))
hist.1b
When Vertical Axis is Density
hist.1c <- ggplot(df1, aes(x = v)) +
geom_histogram(aes(y = ..density..), breaks = b1,
fill = "blue", color = "black") +
geom_density(aes(y = ..density..))
hist.1c
Try this instead:
ggplot(df1,aes(x = v)) +
geom_histogram(aes(y = ..ncount..)) +
geom_density(aes(y = ..scaled..))
library(ggplot2)
smoothedHistogram <- function(dat, y, bins=30, xlabel = y, ...){
gg <- ggplot(dat, aes_string(y)) +
geom_histogram(bins=bins, center = 0.5, stat="bin",
fill = I("midnightblue"), color = "#E07102", alpha=0.8)
gg_build <- ggplot_build(gg)
area <- sum(with(gg_build[["data"]][[1]], y*(xmax - xmin)))
gg <- gg +
stat_density(aes(y=..density..*area),
color="#BCBD22", size=2, geom="line", ...)
gg$layers <- gg$layers[2:1]
gg + xlab(xlabel) +
theme_bw() + theme(axis.title = element_text(size = 16),
axis.text = element_text(size = 12))
}
dat <- data.frame(x = rnorm(10000))
smoothedHistogram(dat, "x")