I would like to add a mean of valuus to windows in a scatter plot I have. I created the scatter plot with ggplot2
p <- ggplot(mtcars, aes(wt, mpg))
p + geom_point()
This will give the scatter plot but I woudl like to add add the mean of a window (say size equals 1) and plot this points of the mean as a line. Additionally I woudl like to have vertical bars at each point to indicate the variance.
Mtcars is the data set standard available in ggplot 2
This uses the new dplyr library.
library(dplyr)
forLines <- mtcars %.%
group_by(cut(wt, breaks = 6)) %.%
summarise(mean_mpg = mean(mpg), mean_wt = mean(wt))
p +
geom_point(size=5) +
geom_boxplot(aes(group = cut(wt, breaks = 6))) +
geom_line(data=forLines,aes(x=mean_wt,y=mean_mpg))
Maybe this is what you're looking for:
library(ggplot2)
s <- seq(0, ceiling(max(mtcars$wt)), 1)
ind <- as.integer(cut(mtcars$wt, s))
myfun <- function(i)
c(y = mean(i), ymin = mean(i) - var(i), ymax = mean(i) + var(i))
ggplot(mtcars, aes(wt, mpg)) +
geom_point() +
stat_summary(fun.data = myfun, aes(group = ind, x = ind - .5),
colour = "red") +
stat_summary(fun.y = mean, aes(x = ind - .5), geom = "line",
colour = "red")
Is this what you want?
p <- ggplot(mtcars, aes(wt, mpg))
p + geom_point() + geom_smooth(aes(wt, mpg, group=1), method = "lm")
Related
I am trying to highlight a specific point in a distribution. I am using geom_vline which works fine except that it extends beyond the curve drawn by geom_density. Is there any way to cut that vertical line off a the point it meets the curve drawn by geom_density?
library(ggplot2)
mtcars$car <- rownames(mtcars)
javelin <- mtcars[mtcars$car == "AMC Javelin", ]
ggplot(mtcars) +
geom_density(aes(x = mpg)) +
geom_vline(data = javelin, aes(xintercept = mpg))
You can't use geom_vline here. You'll have to calculate the correct point and use geom_segment, but this really isn't that complex:
ggplot(mtcars) +
geom_density(aes(x = mpg)) +
geom_segment(data = as.data.frame(density(mtcars$mpg)[1:2]) %>%
filter(seq(nrow(.)) == which.min(abs(x - javelin$mpg))),
aes(x, 0, xend = x, yend = y))
If you want this to be easily generalised, you can define a little helper function:
geom_vdensity <- function(data, at, ...) {
ggplot2::geom_segment(
data = dplyr::filter(as.data.frame(density(data)[1:2]),
seq_along(x) == which.min(abs(x - at))),
ggplot2::aes(x, 0, xend = x, yend = y), ...)
}
This allows:
ggplot(mtcars) +
geom_density(aes(x = mpg)) +
geom_vdensity(data = mtcars$mpg, at = javelin$mpg, color = "red")
and
ggplot(iris) +
geom_density(aes(x = Petal.Length)) +
geom_vdensity(data = iris$Petal.Length, at = 4)
I am trying to plot a graph which involved both a geom_point() function and a geom_smooth() function.
I would like to crop the geom_smooth() function at a certain x-value, BUT only after all its values have been used to calculate the smoothed curve (i.e. I do NOT want to use xlim(), which will remove the values from being used for plotting).
Reproducible example:
library(dplyr)
library(ggplot2)
set.seed(42)
test <- data.frame(replicate(2,sample(0:10,100,rep=TRUE)))
g <- ggplot() + geom_point(data = test, aes(x = X1, y = X2))
t_i <- test
t_i$group <- as.factor(as.numeric(cut(t_i$X1, 25)))
summar_t <- t_i %>%
group_by(group) %>%
summarise(y_mean=mean(X2),
y_sd=sd(X2),
c_mean =mean(X1,na.rm=T),
n =n()
)
summar_t$t_2sd <- summar_t$y_mean + summar_t$y_sd*2
g2 <- g + geom_smooth(data = summar_t, aes(x=c_mean, y = t_2sd), se=FALSE, method = lm, formula=y~poly(x,2), color = "black", linetype=3)
You can use the argument xseq – which is passed to StatSmooth$compute_group – as follows :
library(ggplot2)
ggplot(mtcars, aes(hp, mpg)) +
geom_point() +
geom_smooth(se = FALSE) +
geom_smooth(se = FALSE, xseq = 100:200, col = "red")
Result
I would appreciate any help to apply the transparent background colours below to
divide into two parts the plot area based on x-values as illustrated in the plot below (vertical division).
Here are my sample data and code:
mtcars$cyl <- as.factor(mtcars$cyl)
ggplot(mtcars, aes(x=wt, y=mpg, color=cyl)) +
geom_point() +
theme(legend.position="none")+
geom_smooth(method=lm, se=FALSE, fullrange=TRUE)
Here is the plot I would like to replicate, and the legend illustrates the change I want to implement:
Thank you in advance.
I think you want something like this. You'll have to designate groups and fill by that group in your geom_ribbon, and set your ymin and ymax as you like.
library(tidyverse)
mtcars$group <- ifelse(mtcars$wt <= 3.5, "<= 3.5", "> 3.5")
mtcars <- arrange(mtcars, wt)
mtcars$group2 <- rleid(mtcars$group)
mtcars_plot <- head(do.call(rbind, by(mtcars, mtcars$group2, rbind, NA)), -1)
mtcars_plot[,c("group2","group")] <- lapply(mtcars_plot[,c("group2","group")], na.locf)
mtcars_plot[] <- lapply(mtcars_plot, na.locf, fromLast = TRUE)
ggplot(mtcars_plot, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(aes(), method=lm, se=F, fullrange=TRUE) +
geom_ribbon(aes(ymin = mpg *.75, ymax = mpg * 1.25, fill = group), alpha = .25) +
labs(fill = "Weight Class")
Edit:
To map confidence intervals using geom_ribbon you'll have to calculate them beforehand using lm and predict.
mtmodel <- lm(mpg ~ wt, data = mtcars)
mtcars$Low <- predict(mtmodel, newdata = mtcars, interval = "confidence")[,2]
mtcars$High <- predict(mtmodel, newdata = mtcars, interval = "confidence")[,3]
Followed by the previous code to modify mtcars. Then plot with the calculated bounds.
ggplot(mtcars_plot, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(aes(), method=lm, se=F, fullrange=TRUE) +
geom_ribbon(aes(ymin = Low, ymax = High, fill = group), alpha = .25) +
labs(fill = "Weight Class") +
scale_fill_manual(values = c("red", "orange"), name = "fill")
I would like to plot a "combined" bar plot with points.
Consider to following dummy data:
library(ggplot2)
library(gridExtra)
library(dplyr)
se <- function(x){sd(x)/sqrt(length(x))}
p1 <- ggplot(mtcars, aes(y=disp, x=cyl, fill=cyl))
p1 <- p1 + geom_point() + theme_classic() + ylim(c(0,500))
my_dat <- summarise(group_by(mtcars, cyl), my_mean=mean(disp),my_se=se(disp))
p2 <- ggplot(my_dat, aes(y=my_mean,x=cyl,ymin=my_mean-my_se,ymax=my_mean+my_se))
p2 <- p2 + geom_bar(stat="identity",width=0.75) + geom_errorbar(stat="identity",width=0.75) + theme_classic() + ylim(c(0,500))
The final plot should look like that:
You can add layers together, but if they have different data and/or aesthetics you'll want to include the data and aes arguments in each graphical layer.
p3 <- ggplot() +
geom_bar(data=my_dat, aes(y=my_mean,x=cyl,ymin=my_mean-my_se,ymax=my_mean+my_se), stat="identity", width = 0.75) +
geom_errorbar(data=my_dat, aes(y=my_mean,x=cyl,ymin=my_mean-my_se,ymax=my_mean+my_se), width = 0.75) +
geom_point(data=mtcars, aes(y=disp, x=cyl, fill=cyl)) +
ylim(c(0,500)) +
theme_classic()
If you want to make it so that the the points are off to the side of the bars, you could subtract an offset from the cyl values to move over the points. Like #LukeA mentioned, by changing the geom_point to geom_point(data=mtcars, aes(y=disp, x=cyl-.5, fill=cyl)).
You can specify each layer individually to ggplot2. Often you are using the same data frame and options for each geom, so it makes sense to set defaults in ggplot(). In your case you should specify each geom separately:
library(ggplot2)
library(gridExtra)
library(dplyr)
se <- function(x){sd(x)/sqrt(length(x))}
my_dat <- summarise(group_by(mtcars, cyl),
my_mean = mean(disp),
my_se = se(disp))
p1 <- ggplot() +
geom_bar(data = my_dat,
aes(y = my_mean, x = cyl,
ymin = my_mean - my_se,
ymax = my_mean + my_se), stat="identity", width=0.75) +
geom_errorbar(data = my_dat,
aes(y = my_mean, x = cyl,
ymin = my_mean - my_se,
ymax = my_mean + my_se), stat="identity", width=0.75) +
geom_point(data = mtcars, aes(y = disp, x = cyl, fill = cyl)) +
theme_classic() + ylim(c(0,500))
p1
When using stat_smooth() with geom_point is there a way to remove the shaded fit region, but only draw its outer bounds? I know I can remove the shaded region with something like:
geom_point(aes(x=x, y=y)) + geom_stat(aes(x=x, y=y), alpha=0)
but how can I make the outer bounds of it (outer curves) still visible as faint black lines?
You can also use geom_ribbon with fill = NA.
gg <- ggplot(mtcars, aes(qsec, wt))+
geom_point() +
stat_smooth( alpha=0,method='loess')
rib_data <- ggplot_build(gg)$data[[2]]
ggplot(mtcars)+
stat_smooth(aes(qsec, wt), alpha=0,method='loess')+
geom_point(aes(qsec, wt)) +
geom_ribbon(data=rib_data,aes(x=x,ymin=ymin,ymax=ymax,col='blue'),
fill=NA,linetype=1)
...and if for some reason you don't want the vertical bars, you can just use two geom_line layers:
ggplot(mtcars)+
stat_smooth(aes(qsec, wt), alpha=0,method='loess')+
geom_point(aes(qsec, wt)) +
geom_line(data = rib_data,aes(x = x,y = ymax)) +
geom_line(data = rib_data,aes(x = x,y = ymin))
There are most likely easier ways, but you may try this as a start. I grab data for the confidence interval with ggbuild, which I then use in geom_line
# create a ggplot object with a linear smoother and a CI
library(ggplot2)
gg <- ggplot(data = mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm")
gg
# grab the data from the plot object
gg_data <- ggplot_build(gg)
str(gg_data)
head(gg_data$data[[2]])
gg2 <- gg_data$data[[2]]
# plot with 'CI-lines' and the shaded confidence area
ggplot(data = mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, size = 1) +
geom_line(data = gg2, aes(x = x, y = ymin), size = 0.02) +
geom_line(data = gg2, aes(x = x, y = ymax), size = 0.02)
# plot with 'CI-lines' but without confidence area
ggplot(data = mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, size = 1) +
geom_line(data = gg2, aes(x = x, y = ymin), size = 0.02) +
geom_line(data = gg2, aes(x = x, y = ymax), size = 0.02)