plotting modified point and line plot - variability as "spike" plot in r - r

Before explaining details, here is my data:
set.seed (1234)
datas <- data.frame (Indv = 1:20, Xvar = rnorm (20, 50, 10),
Yvar = rnorm (20, 30,5), Yvar1 = rnorm (20, 10, 2),
Yvar2 = rnorm (20, 5, 1), Yvar3 = rnorm (20, 100, 20),
Yvar4 = rnorm (20, 15, 3))
I want to prepare a graph (Metroglymph ) which is essentially point plot however points (of Xvar and Yvar) with spikes (lines) orignated from the point scaled to rest of variables (Yvar1, Yvar2, Yvar3, Yvar4).
Each spike are ordered and preferably color coded.
require(ggplot2)
ggplot(datas, aes(x=Xvar, y=Yvar)) +
geom_point(shape=1, size = 10) + theme_bw()

Here is one possible approach that may be helpful to you. It uses stat_spoke() from ggplot2. Each of your y-variables is mapped to the spoke radius in 4 separate calls to stat_spoke.
plot_1 = ggplot(datas, aes(x=Xvar, y=Yvar)) +
stat_spoke(aes(angle=(1/8)*pi, radius=Yvar1), colour="#E41A1C",size=1) +
stat_spoke(aes(angle=(3/8)*pi, radius=Yvar2), colour="#377EB8",size=1) +
stat_spoke(aes(angle=(5/8)*pi, radius=Yvar3), colour="#4DAF4A",size=1) +
stat_spoke(aes(angle=(7/8)*pi, radius=Yvar4), colour="#984EA3",size=1) +
geom_point(shape=1, size = 10)
ggsave("plot_1.png", plot_1)
Depending on your data and specific needs, it may make sense to transform the variables so they fit better on the plot.
normalize = function(x) {
new_x = (x - mean(x)) / sd(x)
new_x = new_x + abs(min(new_x))
return(new_x)
}
plot_2 = ggplot(datas, aes(x=Xvar, y=Yvar)) +
stat_spoke(aes(angle=(1/8)*pi, radius=normalize(Yvar1)), colour="#E41A1C", size=1) +
stat_spoke(aes(angle=(3/8)*pi, radius=normalize(Yvar2)), colour="#377EB8", size=1) +
stat_spoke(aes(angle=(5/8)*pi, radius=normalize(Yvar3)), colour="#4DAF4A", size=1) +
stat_spoke(aes(angle=(7/8)*pi, radius=normalize(Yvar4)), colour="#984EA3", size=1) +
geom_point(shape=1, size = 10)
ggsave("plot_2.png", plot_2)
Important caveat: For the same spoke radius value, the magnitude of the plotted line will be greater if the line is more vertical, and less if the line is more horizontal. This is because the range of x is around twice the range of y for your data set. The plotted angles also become distorted as the x-to-y axis ratio changes. Adding coord_equal(ratio=1) solves this issue, but may introduce other problems.
Edit: Plotting without a loop
This was fun and educational to figure out. Possibly it would have been more time-efficient to type the repetitive code! If anyone can offer advice to improve this code, please comment.
library(reshape2)
dat2 = melt(datas, id.vars=c("Indv", "Xvar", "Yvar"),
variable.name="spoke_var", value.name="spoke_value")
# Apply normalization in a loop. Can plyr do this more gracefully?.
for (var_name in levels(dat2$spoke_var)) {
select_rows = dat2$spoke_var == var_name
norm_dat = normalize(dat2[select_rows, "spoke_value"])
dat2[select_rows, "spoke_value"] = norm_dat
}
# Pick an angle for each Yvar, then add angle column to dat2.
tmp = data.frame(spoke_var=unique(dat2$spoke_var))
tmp$spoke_angle = seq(from=pi/8, by=pi/4, length.out=nrow(tmp))
dat2 = merge(dat2, tmp)
plot_4 = ggplot(dat2, aes(x=Xvar, y=Yvar)) +
stat_spoke(data=dat2, size=1,
aes(colour=spoke_var, angle=spoke_angle, radius=spoke_value)) +
geom_point(data=datas, aes(x=Xvar, y=Yvar), shape=1, size=7) +
coord_equal(ratio=1) +
scale_colour_brewer(palette="Set1")

Here is more manual approach:
set.seed (1234)
datas <- data.frame (Indv = 1:20, Xvar = rnorm (20, 50, 10),
Yvar = rnorm (20, 30,5), Yvar1 = rnorm (20, 10, 2),
Yvar2 = rnorm (20, 5, 1), Yvar3 = rnorm (20, 100, 20),
Yvar4 = rnorm (20, 15, 3))
datas$SYvar1 <- 2 + scale (datas$Yvar1)
datas$SYvar2 <- 2 + scale (datas$Yvar2)
datas$SYvar3 <- 2 + scale (datas$Yvar3)
datas$SYvar4 <- 2 + scale (datas$Yvar4)
require(ggplot2)
p <- ggplot(datas, aes(x=Xvar, y=Yvar)) +
geom_point(size = 10, pch = 19, col = "yellow2")
p + geom_segment(aes(x = Xvar, y = Yvar, xend = Xvar + SYvar1,
yend = Yvar), col = "red4", size = 1) +
geom_segment(aes(x = Xvar, y = Yvar, xend = Xvar,
yend = Yvar + SYvar2), col = "green4", size = 1) +
geom_segment(aes(x = Xvar, y = Yvar, xend = Xvar-2.5,
yend = Yvar + SYvar3), col = "darkblue", size = 1) +
geom_segment(aes(x = Xvar, y = Yvar, xend =
Xvar - SYvar4, yend = Yvar ), col = "red", size = 1) +
theme_bw()

Related

Overlaying histogram and density estimate [duplicate]

I am trying to make a histogram of density values and overlay that with the curve of a density function (not the density estimate).
Using a simple standard normal example, here is some data:
x <- rnorm(1000)
I can do:
q <- qplot( x, geom="histogram")
q + stat_function( fun = dnorm )
but this gives the scale of the histogram in frequencies and not densities. with ..density.. I can get the proper scale on the histogram:
q <- qplot( x,..density.., geom="histogram")
q
But now this gives an error:
q + stat_function( fun = dnorm )
Is there something I am not seeing?
Another question, is there a way to plot the curve of a function, like curve(), but then not as layer?
Here you go!
# create some data to work with
x = rnorm(1000);
# overlay histogram, empirical density and normal density
p0 = qplot(x, geom = 'blank') +
geom_line(aes(y = ..density.., colour = 'Empirical'), stat = 'density') +
stat_function(fun = dnorm, aes(colour = 'Normal')) +
geom_histogram(aes(y = ..density..), alpha = 0.4) +
scale_colour_manual(name = 'Density', values = c('red', 'blue')) +
theme(legend.position = c(0.85, 0.85))
print(p0)
A more bare-bones alternative to Ramnath's answer, passing the observed mean and standard deviation, and using ggplot instead of qplot:
df <- data.frame(x = rnorm(1000, 2, 2))
# overlay histogram and normal density
ggplot(df, aes(x)) +
geom_histogram(aes(y = after_stat(density))) +
stat_function(
fun = dnorm,
args = list(mean = mean(df$x), sd = sd(df$x)),
lwd = 2,
col = 'red'
)
What about using geom_density() from ggplot2? Like so:
df <- data.frame(x = rnorm(1000, 2, 2))
ggplot(df, aes(x)) +
geom_histogram(aes(y=..density..)) + # scale histogram y
geom_density(col = "red")
This also works for multimodal distributions, for example:
df <- data.frame(x = c(rnorm(1000, 2, 2), rnorm(1000, 12, 2), rnorm(500, -8, 2)))
ggplot(df, aes(x)) +
geom_histogram(aes(y=..density..)) + # scale histogram y
geom_density(col = "red")
I'm trying for iris data set. You should be able to see graph you need in these simple code:
ker_graph <- ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram(aes(y = ..density..),
colour = 1, fill = "white") +
geom_density(lwd = 1.2,
linetype = 2,
colour = 2)

ggplot histogram and distribution same levels [duplicate]

I am trying to make a histogram of density values and overlay that with the curve of a density function (not the density estimate).
Using a simple standard normal example, here is some data:
x <- rnorm(1000)
I can do:
q <- qplot( x, geom="histogram")
q + stat_function( fun = dnorm )
but this gives the scale of the histogram in frequencies and not densities. with ..density.. I can get the proper scale on the histogram:
q <- qplot( x,..density.., geom="histogram")
q
But now this gives an error:
q + stat_function( fun = dnorm )
Is there something I am not seeing?
Another question, is there a way to plot the curve of a function, like curve(), but then not as layer?
Here you go!
# create some data to work with
x = rnorm(1000);
# overlay histogram, empirical density and normal density
p0 = qplot(x, geom = 'blank') +
geom_line(aes(y = ..density.., colour = 'Empirical'), stat = 'density') +
stat_function(fun = dnorm, aes(colour = 'Normal')) +
geom_histogram(aes(y = ..density..), alpha = 0.4) +
scale_colour_manual(name = 'Density', values = c('red', 'blue')) +
theme(legend.position = c(0.85, 0.85))
print(p0)
A more bare-bones alternative to Ramnath's answer, passing the observed mean and standard deviation, and using ggplot instead of qplot:
df <- data.frame(x = rnorm(1000, 2, 2))
# overlay histogram and normal density
ggplot(df, aes(x)) +
geom_histogram(aes(y = after_stat(density))) +
stat_function(
fun = dnorm,
args = list(mean = mean(df$x), sd = sd(df$x)),
lwd = 2,
col = 'red'
)
What about using geom_density() from ggplot2? Like so:
df <- data.frame(x = rnorm(1000, 2, 2))
ggplot(df, aes(x)) +
geom_histogram(aes(y=..density..)) + # scale histogram y
geom_density(col = "red")
This also works for multimodal distributions, for example:
df <- data.frame(x = c(rnorm(1000, 2, 2), rnorm(1000, 12, 2), rnorm(500, -8, 2)))
ggplot(df, aes(x)) +
geom_histogram(aes(y=..density..)) + # scale histogram y
geom_density(col = "red")
I'm trying for iris data set. You should be able to see graph you need in these simple code:
ker_graph <- ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram(aes(y = ..density..),
colour = 1, fill = "white") +
geom_density(lwd = 1.2,
linetype = 2,
colour = 2)

Generate two plots with same x axis breaks

I have two plots I want the x axes being broken by the same way.
This is the code for plot 1:
m <- read.csv('Finalfor1lowergreaterthan1.csv', header=T, row.names=1)
m <- m[m$SVM.Count >= 40,]
boxOdds = m$Odd
df <- data.frame(
yAxis = length(boxOdds):1,
boxnucleotide = m$Position,
boxCILow = m$lower,
boxCIHigh = m$upper,
Mutation = m$Resistance)
ticksy <- c(seq(0,0.3,by=.1), seq(0, 1, by =.5), seq(0, 20, by =5), seq(0, 150, by =50))
ticksx <- c(seq(0,300,by=25))
p <- ggplot(df,
aes(x = boxnucleotide, y = boxOdds, colour=Mutation, label=rownames(m)))
p1 <- p +
geom_errorbar(aes(ymax = boxCIHigh, ymin = boxCILow), size = .5, height = .01) +
geom_point(size = 1) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_y_continuous(breaks=ticksy, labels = ticksy) +
scale_x_continuous(breaks=ticksx, labels = ticksx) +
coord_trans(y = "log10") +
ylab("Odds ratio (log scale)") +
scale_color_manual(values=c("#00BFC4","#F8766D","#619CFF")) +
xlab("Integrase nucleotide position") +
geom_text(size=2,hjust=0, vjust=0)
Then I have another plot:
m <- read.csv('Finalfor20lowergreaterthan1.csv', header=T, row.names=1)
#m <- m[m$SVM.Count >= 40, ]
boxOdds = m$Odd
df <- data.frame(
yAxis = length(boxOdds):1,
boxnucleotide = m$Position,
boxCILow = m$lower,
boxCIHigh = m$upper,
Mutation = m$Resistance)
ticksy <- c(seq(0,0.3,by=.1), seq(0, 1, by =.5), seq(0, 20, by =5), seq(0, 150, by =50))
ticksx <- c(seq(0,300,by=25))
p <- ggplot(df,
aes(x = boxnucleotide, y = boxOdds, colour=Mutation, label=rownames(m)))
p1 <- p +
geom_errorbar(aes(ymax = boxCIHigh, ymin = boxCILow), size = .5, height = .01) +
geom_point(size = 1) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_y_continuous(breaks=ticksy, labels = ticksy) +
scale_x_continuous(breaks=ticksx, labels = ticksx) +
coord_trans(y = "log10") +
ylab("Odds ratio (log scale)") +
scale_color_manual(values=c("#00BFC4","#F8766D","#619CFF")) +
xlab("Integrase nucleotide position") +
geom_text(size=2,hjust=0, vjust=0)
Why is plot 1 starting from 75 on x axis and plot 2 starting at 100...how can plot2 start at 75 as well and being scaled like plot 1.
The two codes get the same piece of: ticksx <- c(seq(0, 300, by=25))
A good technique to align the axis range on different plots is to use expand_limits.
You can simply use p1 + expand_limits(x=c(0, 300)). This will ensure the x-axis contains at least 0 and 300 on all your plots. You can also control the y-axis range by using the y argument.
From ?expand_limits:
Sometimes you may want to ensure limits include a single value, for all panels or all plots. This function is a thin wrapper around geom_blank() that makes it easy to add such values.

R: plotting pdf in a certain interval

I want to plot a probability function in a certain interval. The x-axes has to be longer than the interval because there are more pdf's in one plot.
But with the growing of the x-axes, the pdf generating data too, although the code implied the certain interval.
Code:
p1 <- ggplot() +
stat_function(data=data.frame(x=c(2,30)),aes(x),fun = dnorm, n = 101,
args= list(mean=5,sd=1),color="black")+
xlim(-5,80)+
scale_y_continuous(breaks = NULL)
the pdf in p1 generate data until x=80. But x values in the code are in a vector until x=30.
How could I prevent that the pdf produces values until 80 or how have to be the code that the distribution stops at x=30?
We can construct a dataframe with the only the x-values you want plotted on the fly using dplyr::data_frame. I added another p.d.f. to demonstrate that the way you want it presented will work.
library(dplyr)
# to use data_frame
ggplot() +
geom_line(data=data_frame(x=seq(2,30, 0.25), y = dnorm(x, mean = 5, sd = 1)),
aes(x, y), color = "black") +
geom_line(data=data_frame(x=seq(40,70, 0.25), y = dnorm(x, mean = 60, sd = 5)),
aes(x, y), color = "red") +
xlim(-5,80)+
scale_y_continuous(breaks = NULL)
Update
You can also label them like so, by moving the color= inside the aes(...):
ggplot() +
geom_line(data=data_frame(x=seq(2,30, 0.25), y = dnorm(x, mean = 5, sd = 1)),
aes(x, y, color = "mean:5, sd:1")) +
geom_line(data=data_frame(x=seq(40,70, 0.25), y = dnorm(x, mean = 60, sd = 5)),
aes(x, y, color = "mean:60, sd:5")) +
xlim(-5,80)+
scale_y_continuous(breaks = NULL) +
scale_color_manual(values = c("mean:5, sd:1" = "black",
"mean:60, sd:5" = "red"))
Update2
Your density function works for me.
dTDF<-function(x,g,a,b,k){
exp(-exp(-(x/a)+((a*k)/(g-x))-b))*(exp(-(x/a)+((a*k)/(g-x))-b))*((1/a)-((a*k)/((g-x)^2)))
}
df1 <- data_frame(x=seq(2500,11300, 100),
y = dTDF(x,g=11263,a=1185, b=-4, k=-0.5))
df2 <- data_frame(x=seq(7000,14300, 100),
y = dTDF(x,g=15263,a=1105, b=-10, k=-0.5))
ggplot() +
geom_line(data = df1, aes(x, y))+
geom_line(data = df2, aes(x, y)) +
xlim(1000, 15000)

Normal Curve Overlay in ggplot2 [duplicate]

I am trying to make a histogram of density values and overlay that with the curve of a density function (not the density estimate).
Using a simple standard normal example, here is some data:
x <- rnorm(1000)
I can do:
q <- qplot( x, geom="histogram")
q + stat_function( fun = dnorm )
but this gives the scale of the histogram in frequencies and not densities. with ..density.. I can get the proper scale on the histogram:
q <- qplot( x,..density.., geom="histogram")
q
But now this gives an error:
q + stat_function( fun = dnorm )
Is there something I am not seeing?
Another question, is there a way to plot the curve of a function, like curve(), but then not as layer?
Here you go!
# create some data to work with
x = rnorm(1000);
# overlay histogram, empirical density and normal density
p0 = qplot(x, geom = 'blank') +
geom_line(aes(y = ..density.., colour = 'Empirical'), stat = 'density') +
stat_function(fun = dnorm, aes(colour = 'Normal')) +
geom_histogram(aes(y = ..density..), alpha = 0.4) +
scale_colour_manual(name = 'Density', values = c('red', 'blue')) +
theme(legend.position = c(0.85, 0.85))
print(p0)
A more bare-bones alternative to Ramnath's answer, passing the observed mean and standard deviation, and using ggplot instead of qplot:
df <- data.frame(x = rnorm(1000, 2, 2))
# overlay histogram and normal density
ggplot(df, aes(x)) +
geom_histogram(aes(y = after_stat(density))) +
stat_function(
fun = dnorm,
args = list(mean = mean(df$x), sd = sd(df$x)),
lwd = 2,
col = 'red'
)
What about using geom_density() from ggplot2? Like so:
df <- data.frame(x = rnorm(1000, 2, 2))
ggplot(df, aes(x)) +
geom_histogram(aes(y=..density..)) + # scale histogram y
geom_density(col = "red")
This also works for multimodal distributions, for example:
df <- data.frame(x = c(rnorm(1000, 2, 2), rnorm(1000, 12, 2), rnorm(500, -8, 2)))
ggplot(df, aes(x)) +
geom_histogram(aes(y=..density..)) + # scale histogram y
geom_density(col = "red")
I'm trying for iris data set. You should be able to see graph you need in these simple code:
ker_graph <- ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram(aes(y = ..density..),
colour = 1, fill = "white") +
geom_density(lwd = 1.2,
linetype = 2,
colour = 2)

Resources