how to overlay a line plot with a density plot? (R, ggplot2)

how to overlay a line plot with a density plot? (R, ggplot2) - r

Hi how do I overlap the following curves in one graph? Any help's appreciated. Thank you!
library(ggplot2)
x = -10:10
y = dnorm(x, mean=0, sd=3)
df.norm = data.frame('x'=x, 'y'=y)
ggplot(data=df.norm, aes(x=x, y=y)) +
geom_line() +
geom_point()
random = data.frame('x'=rnorm(1000, mean = 0, sd = 3))
ggplot(random, aes(x=x)) +
geom_density(size=1)
I tried the following and it didn't work
ggplot(data=df.norm, aes(x=x, y=y)) +
geom_line() +
geom_point() +
geom_density(random, aes(x=x), size=1)

library(ggplot2)
x = -10:10
y = dnorm(x, mean=0, sd=3)
df.norm = data.frame('x'=x, 'y'=y)
random = data.frame('x'=rnorm(1000, mean = 0, sd = 3))
ggplot() +
geom_line(data=df.norm, aes(x=x, y=y)) +
geom_point(data=df.norm, aes(x=x, y=y)) +
geom_density(data=random, aes(x=x), size=1)

ggplot2
A more concise version in ggplot2 using the argument inherit.aes = FALSE inside geom_density to override the default aesthetics used in the previous two layers.
library(ggplot2)
set.seed(2017)
x = -10:10
y = dnorm(x, mean = 0, sd = 3)
df.norm = data.frame('x' = x, 'y' = y)
random = data.frame('x' = rnorm(1000, mean = 0, sd = 3))
ggplot(data = df.norm, aes(x = x, y = y)) +
geom_line() +
geom_point() +
geom_density(data = random,
aes(x = x),
inherit.aes = FALSE,
size = 1)
Base
Adapting the solution provided by scoa to the base package:
plot(df.norm, type = "l", bty = "n", las = 1)
points(df.norm, pch= 19)
lines(density(random$x), lwd = 2)
Adding a legend, and a different colour for the density curve:
plot(df.norm, type = "l", bty="n", las = 1)
points(df.norm, pch= 19)
lines(density(random$x), lwd =2, col = 'orange')
legend(x = "topleft",
c("df.norm", "Density plot"),
col = c("black", "orange"),
lwd = c(2, 2),
bty = "n")

Related

Controlling ggplot2 legend order with new_scale_color() of ggnewscale

I can't seem to arrange the order of legend items when using the new_scale_color() feature of the ggnewscale package. Here's a minimal example:
n <- 10
sd_res <- 1
beta0 <- 0
beta1 <- 1
x <- runif(n, 0, 10)
y <- beta0 + beta1*x + rnorm(n, 0, sd_res)
dat <- data.frame(x,y)
ggplot() + theme_minimal() + labs(x = '', y = '') +
geom_abline(aes(color = 'x', linetype = 'x', slope = 1, intercept = 0)) +
geom_abline(aes(color = 'y', linetype = 'y', slope = 2, intercept = 0)) +
scale_color_manual(name = '', values = c('x' = 'orange', 'y' = 'black')) +
scale_linetype_manual(name = '', values = c('x' = 1, 'y' = 2)) +
new_scale_color() +
geom_point(data=dat, aes(x=x, y=y, shape = 'z', size = 'z', color = 'z'), alpha = 1) +
scale_size_manual(name = '', values = c('z' = 1.5)) +
scale_shape_manual(name = '', values = c('z' = 16)) +
scale_color_manual(name = '', values = c('z' = 'black')) +
theme(legend.position = 'bottom')
What I'd like is for the z-item to appear before the x- and y-items in the legend.

You could use guides with guide_legend and specify the order of your aes. Here I set the order of your "z" to 1 which will set it as the first legend like this:
n <- 10
sd_res <- 1
beta0 <- 0
beta1 <- 1
x <- runif(n, 0, 10)
y <- beta0 + beta1*x + rnorm(n, 0, sd_res)
dat <- data.frame(x,y)
library(ggplot2)
library(ggnewscale)
p <- ggplot() + theme_minimal() + labs(x = '', y = '') +
geom_abline(aes(color = 'x', linetype = 'x', slope = 1, intercept = 0)) +
geom_abline(aes(color = 'y', linetype = 'y', slope = 2, intercept = 0)) +
scale_color_manual(name = '', values = c('x' = 'orange', 'y' = 'black')) +
scale_linetype_manual(name = '', values = c('x' = 1, 'y' = 2)) +
new_scale_color() +
geom_point(data=dat, aes(x=x, y=y, shape = 'z', size = 'z', color = 'z'), alpha = 1) +
scale_size_manual(name = '', values = c('z' = 1.5)) +
scale_shape_manual(name = '', values = c('z' = 16)) +
scale_color_manual(name = '', values = c('z' = 'black')) +
theme(legend.position = 'bottom') +
guides(size = guide_legend(order = 1),
color = guide_legend(order = 1),
shape = guide_legend(order = 1))
p
Created on 2022-08-26 with reprex v2.0.2

When using ggnewscale, I don't recommend using guides() to define guides, since the name of the aesthetics change internally. It's better to use the guide argument of the scale_ function
Example from ggnewscale documentation:
library(ggplot2)
library(ggnewscale)
# Equivalent to melt(volcano)
topography <- expand.grid(x = 1:nrow(volcano),
y = 1:ncol(volcano))
topography$z <- c(volcano)
# point measurements of something at a few locations
set.seed(42)
measurements <- data.frame(x = runif(30, 1, 80),
y = runif(30, 1, 60),
thing = rnorm(30))
ggplot(mapping = aes(x, y)) +
geom_contour(data = topography, aes(z = z, color = stat(level))) +
# Color scale for topography
scale_color_viridis_c(option = "D",
guide = guide_colorbar(order = 2)) +
# geoms below will use another color scale
new_scale_color() +
geom_point(data = measurements, size = 3, aes(color = thing)) +
# Color scale applied to geoms added after new_scale_color()
scale_color_viridis_c(option = "A",
guide = guide_colorbar(order = 8))
Created on 2022-08-26 by the reprex package (v2.0.1)

Adding horizontal lines in the plot of a step function

I attach graphic, image.jpg, in which I want to draw the line y=0 for x<3 and the line y=1 for x>=8, i.e. the result would be image2.jpg.
These are the instructions for image.jpg.
df <- data.frame(x=Asignaturas, y=solF)
df$xend <- c(df$x[2:nrow(df)],NA)
df$yend <- df$y
p <- (ggplot(df, aes(x=x, y=y, xend=xend, yend=yend)) +
geom_vline(aes(xintercept=x), linetype=2,color="grey") +
geom_point() + # Solid points to left
geom_point(aes(x=xend, y=y), shape=1) + # Open points to right
geom_segment() + # Horizontal line
geom_text(aes(label = paste0(solF,''),vjust = -0.5), color = "black") +
ylab("Función de distribucción") +
xlab("Asignaturas"))
p
Does anyone know how to do it?
Thanks

An option using base R.
plot(df$x, df$y, xlim=c(2.5, max(df$x) + .5), ylim=c(0, 1.075), pch=19)
points(df$x + 1, df$y, pch=1)
segments(df$x, df$y, df$x + 1)
text(df$x, df$y + .05, df$y)
sapply(0:1, \(x) lines(2:3 + x*6, rep(x, 2), col='red'))
Or just
plot(df$x, df$y, type='s', xlim=c(2.5, max(df$x) + .5), ylim=c(0, 1.075))
text(df$x, df$y + .05, df$y)
sapply(0:1, \(x) lines(2:3 + x*6, rep(x, 2), col='red'))
Data:
df <- structure(list(y = c(0, 0.14, 0.31, 0.48, 0.67, 0.82, 1), x = c(2,
3, 4, 5, 6, 7, 8)), class = "data.frame", row.names = c(NA, -7L
))

If you want to stick with ggplot2, the geom_segment() will give what you're looking for. Make a data.frame of the parameters and then add the geom_segment().
extraLines <- data.frame(x = c(-Inf, max(df$x)), xend = c(min(df$x), Inf), y = c(2, max(df$y)), yend = c(2, max(df$y)))
p +
geom_segment(data = extraLines, aes(x = x, xend = xend, y = y, yend = yend), colour = "red") +
geom_point(data = filter(extraLines, x > 0), aes(x = x, y=y), colour = "red") +
geom_point(data = filter(extraLines, x < max(df$x)), aes(x=xend, y=y), shape = 1, colour = "red")

I'd like to paint an area but i don't know how to

I mean, I'd want to paint only the square area P1 X (Q1-Q2).
Not the trapezoid (P2+P1) X (Q1-Q2/2).
Here's code that I used. I used ggplot and dplyr. How can I solve this problem?
How can I paint the only square area not the trapezoied area!!!!
library(ggplot2)
library(dplyr)
supply <- Hmisc::bezier(x = c(1, 8, 9),
y = c(1, 5, 9)) %>%
as_data_frame()
demand <- Hmisc::bezier(c(1, 3, 9),
c(9, 3, 1)) %>%
as_data_frame()
fun_supply <- approxfun(supply$x, supply$y, rule = 2)
fun_supply(c(2, 6, 8))
fun_demand <- approxfun(demand$x, demand$y, rule = 2)
intersection_funs <- uniroot(function(x) fun_supply(x) - fun_demand(x), c(1, 9))
intersection_funs
y_root <- fun_demand(intersection_funs$root)
curve_intersect <- function(curve1, curve2) {
# Approximate the functional form of both curves
curve1_f <- approxfun(curve1$x, curve1$y, rule = 2)
curve2_f <- approxfun(curve2$x, curve2$y, rule = 2)
# Calculate the intersection of curve 1 and curve 2 along the x-axis
point_x <- uniroot(function(x) curve1_f(x) - curve2_f(x),
c(min(curve1$x), max(curve1$x)))$root
# Find where point_x is in curve 2
point_y <- curve2_f(point_x)
# Finish
return(list(x = point_x, y = point_y))
}
intersection_xy <- curve_intersect(supply, demand)
intersection_xy
intersection_xy_df <- intersection_xy %>% as_data_frame()
demand2 <- Hmisc::bezier(c(1.5, 3.5, 9.5),
c(9.5, 3.5, 1.5)) %>%
as_data_frame()
supply2 <- Hmisc::bezier(c(1,7,8),
c(3,7,11)) %>%
as_data_frame()
#Make a data frame of the intersections of the supply curve and both demand curves
intersections <- bind_rows(curve_intersect(supply, demand),
curve_intersect(supply2, demand2))
plot_labels <- data_frame(label = c("S", "D","S[1]","D[1]"),
x = c(9, 1, 6.5, 3),
y = c(8, 8, 8, 8))
ggplot(mapping = aes(x = x, y = y)) +
geom_path(data = supply, color = "#0073D9", size = 1, linetype = "dashed") +
geom_path(data = demand, color = "#FF4036", size = 1, linetype = "dashed") +
geom_path(data = demand2, color = "#FF4036", size = 1) +
geom_path(data = supply2, color = "#0073D9", size = 1) +
geom_segment(data = intersections,
aes(x = x, y = 0, xend = x, yend = y), lty = "dotted") +
geom_segment(data = intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
geom_segment(data = intersections,
aes(x = x, y = y, xend = x, yend= y), lty = "dotted") +
geom_point(data = intersections, size = 3) +
geom_text(data = plot_labels,
aes(x = x, y = y, label = label), parse = TRUE) +
scale_x_continuous(expand = c(0, 0), breaks = intersections$x,
labels = expression(Q[1], Q[2])) +
scale_y_continuous(expand = c(0, 0), breaks = intersections$y,
labels = expression(P[1], P[2]))+
labs(x = "Quantity", y = "Price") +
geom_area(data =intersections, fill="#9999FF", alpha=0.5) +
theme_classic() +
coord_equal()
Could you help me to paint the area that I mentioned.

You might try adding geom_rect(data=intersections[1,], aes(xmin=0, xmax=x, ymin=0, ymax=y),fill='green', alpha=0.5) to your plot call.
So we have:
ggplot(mapping = aes(x = x, y = y)) +
geom_path(data = supply, color = "#0073D9", size = 1, linetype = "dashed") +
geom_path(data = demand, color = "#FF4036", size = 1, linetype = "dashed") +
geom_path(data = demand2, color = "#FF4036", size = 1) +
geom_path(data = supply2, color = "#0073D9", size = 1) +
geom_segment(data = intersections,
aes(x = x, y = 0, xend = x, yend = y), lty = "dotted") +
geom_segment(data = intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
geom_segment(data = intersections,
aes(x = x, y = y, xend = x, yend= y), lty = "dotted") +
geom_point(data = intersections, size = 3) +
geom_text(data = plot_labels,
aes(x = x, y = y, label = label), parse = TRUE) +
scale_x_continuous(expand = c(0, 0), breaks = intersections$x,
labels = expression(Q[1], Q[2])) +
scale_y_continuous(expand = c(0, 0), breaks = intersections$y,
labels = expression(P[1], P[2]))+
labs(x = "Quantity", y = "Price") +
geom_area(data =intersections, fill="#9999FF", alpha=0.5) +
theme_classic() +
coord_equal()+
geom_rect(data=intersections[1,], aes(xmin=0, xmax=x, ymin=0, ymax=y),fill='green', alpha=0.5)
Edit based on comment:
geom_rect(data=intersections, aes(xmin=x[2], xmax=x[1], ymin=0, ymax=y[1]),fill='green', alpha=0.5)

Though the answer from J Con is in depth and does provide a solution, a cleaner approach in ggplot2 may be to use the annotate function, with geom and other arguments set appropriately. (See link for help page.)
This is because using something like geom_rect involves passing positions and so on as a data.frame, which is a bit more of a hack as, conceptually, from a grammar of graphics perspective, the data layer and the annotation layer are distinct: the act of mapping data variables to graphical aesthetics in a systematic and objective way, and of marking up features within the dataset in a piecemeal and subjective way, are separate activities, and using annotate explicitly for the latter purpose makes this divide clearer in terms of the code and concepts.
Edit
To be more specific, the annotate equivalent of the following:
geom_rect(data=intersections, aes(xmin=x[2], xmax=x[1], ymin=0, ymax=y[1]),fill='green', alpha=0.5)
Would likely be as follows
annotate(
geom = "rect",
xmin = intersections$x[2], x = intersections$x[1],
ymin = 0, ymax = intersections$y[1],
fill = 'green', alpha = 0.5
)
Functionally this is exactly the same, but conceptually it makes the separation between the data layer and the annotation layer much clearer in the code expressed.
Note: Annotate could also be used for the points and text.

#What causes different behaviour between stats and ggplot2 when writing histograms, normal curves and qqplots to .pdf?

I need to produce plots for statistical analyses and I am stumped by a difference in behaviour between stats and ggplot. Who can help out?
I am trying to produce a pdf with histograms, including normal curves, side-by-side with qqplots, with the next plot continuing on the same page. Preferably using ggplot (because prettier plots). I have a large number of variables in my real dataset, so I am using a 'for' loop.
library(ggplot2)
library(stats)
library(datasets)
This piece of ggplot code does what I want it to do.
ggplot(airquality, aes(Wind)) +
geom_histogram(aes(y = ..density..),colour = "black", fill = "white") +
stat_function(fun = dnorm, args = list(mean = mean(airquality$Wind), sd = sd(airquality$Wind)), colour = "red", size = 1) +
xlab("Wind")
qplot(sample = airquality$Wind, stat = "qq")
I am fine with the binwidth warning, I want that picked automatically, and I will build in a suppression for that message later on. I am not sure wat to do though with: '"stat" is deprecated' Anyone?
If I try to work this into a 'for' loop, I cannot get it to work. It keeps putting every plot on a new page and it leaves out the normal curves:
Variablesairquality<-c("Wind", "Temp", "Month", "Day")
pdf(file = "Normality.pdf", 4, 5)
par(mfrow = c(2,2))
for(i in Variablesairquality){
plot(ggplot(airquality, aes(airquality[,i])) +
geom_histogram(aes(y = ..density..),colour = "black", fill = "white") +
stat_function(fun = dnorm, args = list(mean = mean(airquality[,i]), sd = sd(airquality[,i])), colour = "red", size = 1) +
xlab(i)
)
plot(qplot(sample = airquality[,i], stat = "qq" )
)
}
dev.off()
Which I don’t get, because if I try it using stats, it does exactly what I want:
pdf(file = "Normality2.pdf", 4, 5)
par(mfrow = c(2,2))
for(i in Variablesairquality){
h <- hist(airquality[,i], col = "white", cex.axis=0.50, xlab = i, cex.lab=0.75, main = paste("Distribution"), cex.main= 0.75)
xfit<-seq(min(airquality[,i]),max(airquality[,i]),length=length(airquality[,i]))
yfit<-dnorm(xfit,mean=mean(airquality[,i]),sd=sd(airquality[,i]))
yfit <- yfit*diff(h$mids[1:2])*length(airquality[,i])
lines(xfit, yfit, col="red", lwd=1)
qqnorm(airquality[,i], cex = 0.5, cex.axis=0.50, cex.lab=0.75, main = expression("Q-Q plot for"~paste(i)), cex.main= 0.75)
qqline(airquality[,i], col = "red")
}
dev.off()
(Accept for the thing with the main label which I still need to figure out. Anyone any tips?)
I would be most grateful if someone could point out the mistake in my ggplot code or otherwise explain this behaviour. Thanks!
I use R-programming V3.2.3 and R-studio v0.99.891. (And yes, I read every similar item here, scowered the internet and I read the help files; that did not get me where I need to go.)

On `stat` is deprecated, see Deprecated features in the ggplot2 2.0.0 release notes. Use instead:
ggplot(airquality, aes(sample = Wind)) +
stat_qq()
If you don't wish to use gridExtra::grid.arrange, here's an approach that uses facets. Begin by wrangling the data into a new dataframe with the values we want for x, y, plot type, and geom variables:
d <- as.data.frame(qqnorm(airquality$Wind, plot.it = F))
d$plot <- "QQ plot"
d$geom <- "point"
d <- rbind(d, data.frame(x = airquality$Wind, y = NA,
plot = "Histogram", geom = "bar"))
d <- rbind(d, with(airquality, data.frame(
x = seq(min(Wind), max(Wind), l = 100),
y = dnorm(seq(min(Wind), max(Wind), l = 100),
mean = mean(Wind), sd = sd(Wind)),
plot = "Histogram", geom = "line")))
Then call ggplot, subsetting the data as appropriate for each geom:
ggplot(d, aes(x = x, y = y)) + facet_wrap(~plot, scales = "free") +
geom_histogram(data = subset(d, plot == "Histogram" & geom == "bar"),
aes(y = ..density..),
colour = "black", fill = "white") +
geom_line(data = subset(d, plot == "Histogram" & geom == "line"),
colour = "red", size = 1) +
geom_point(data = subset(d, plot == "QQ plot")) +
labs(x = "Wind")
Output:
To do multiple plots, you can wrap the code above into a for loop, making sure to wrap ggplot inside print:
pdf("path/to/pdf/out.pdf")
Variablesairquality <- c("Wind", "Temp", "Month", "Day")
for (i in rev(Variablesairquality)) {
x <- airquality[[i]]
d <- as.data.frame(qqnorm(x, plot.it = F))
d$plot <- "QQ plot"
d$geom <- "point"
d <- rbind(d, data.frame(x = x, y = NA, plot = "Histogram", geom = "bar"))
d <- rbind(d, data.frame(x = seq(min(x), max(x), l = 100),
y = dnorm(seq(min(x), max(x), l = 100),
mean = mean(x), sd = sd(x)),
plot = "Histogram", geom = "line"))
print(
ggplot(d, aes(x = x, y = y)) + facet_wrap(~plot, scales = "free") +
geom_histogram(data = subset(d, plot == "Histogram" & geom == "bar"),
aes(y = ..density..),
colour = "black", fill = "white") +
geom_line(data = subset(d, plot == "Histogram" & geom == "line"),
colour = "red", size = 1) +
geom_point(data = subset(d, plot == "QQ plot")) +
labs(x = i)
)
}
dev.off()

Can't label multi-panel figure in r while using ggplot

Sorry for the possibly simple question. I'm a programmer, though I rarely deal with graphics, and after tearing my hair out for hours with this problem, it's time to get some help. I'm creating a multi-panel plot in r using ggplot, but I cannot find a way to display figure labels, outside of the figure, when using ggplot.
Here is what I want my code to do:
par(mfrow = c(1, 2), pty = "s", las = 1, mgp = c(2, 0.4, 0), tcl = -0.3)
qqnorm(rnorm(100), main = "")
mtext("a", side = 3, line = 1, adj = 0, cex = 1.1)
qqnorm(rnorm(100), main = "")
mtext("b", side = 3, line = 1, adj = 0, cex = 1.1)
How would I get those "a" and "b" labels, in the location that they are in for the figure created by the above code, into this type of code:
df = data.frame(gp = factor(rep(letters[1:3], each = 10)), y = rnorm(30))
p = ggplot(df) + geom_point(aes(x = gp, y = y))
p2 = ggplot(df) + geom_point(aes(x = y, y = gp))
grid.arrange(p, p2, ncol = 2)
Thank you in advance for your help!

You could use ggtitle and theme:
df = data.frame(gp = factor(rep(letters[1:3], each = 10)), y = rnorm(30))
p = ggplot(df) + geom_point(aes(x = gp, y = y)) + ggtitle('a') + theme(plot.title=element_text(hjust=0))
p2 = ggplot(df) + geom_point(aes(x = y, y = gp)) + ggtitle('b') + theme(plot.title=element_text(hjust=0))
grid.arrange(p, p2, ncol = 2)

Two (less than ideal) options:
#Use faceting, similar to Matthew's ggtitle option
df = data.frame(gp = factor(rep(letters[1:3], each = 10)), y = rnorm(30))
df$lab1 <- 'a'
df$lab2 <- 'b'
p = ggplot(df) + geom_point(aes(x = gp, y = y)) + facet_wrap(~lab1)
p2 = ggplot(df) + geom_point(aes(x = y, y = gp)) + facet_wrap(~lab2)
j <- theme(strip.text = element_text(hjust = 0.05))
grid.arrange(p + j, p2 + j, ncol = 2)
#Use grid.text
grid.text(letters[1:2],x = c(0.09,0.59),y = 0.99)
For the grid.text option, if you delve into the ggplot object you can probably avoid having to tinker to get those values right manually.

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

how to overlay a line plot with a density plot? (R, ggplot2) - r

library(ggplot2) x = -10:10 y = dnorm(x, mean=0, sd=3) df.norm = data.frame('x'=x, 'y'=y) random = data.frame('x'=rnorm(1000, mean = 0, sd = 3)) ggplot() + geom_line(data=df.norm, aes(x=x, y=y)) + geom_point(data=df.norm, aes(x=x, y=y)) + geom_density(data=random, aes(x=x), size=1)

Related

Controlling ggplot2 legend order with new_scale_color() of ggnewscale

Adding horizontal lines in the plot of a step function

I'd like to paint an area but i don't know how to

#What causes different behaviour between stats and ggplot2 when writing histograms, normal curves and qqplots to .pdf?

Can't label multi-panel figure in r while using ggplot

Categories

Resources