I've coded this:
ggplot() +
geom_point(mapping = aes(x = X, y = y)) +
geom_abline(intercept = -0.9930872, slope = 0.4866284, colour = "red") +
geom_abline(intercept = -1, slope = 0.5, colour = "blue")
but cannot seem to get a working legend for my least square and populuation regression line. I've tried various stack overflow answers but nothing seems to give me what I need.
Add a legend to a ggplot2 scatter plot including additional lines
This looked like the best answer, but I can't get it to work!
Any suggestions?
set.seed(1234)
X <- rnorm(20,sd=2.5)
y <- -1+0.5*X+rnorm(20, sd=0.4)
library(ggplot2)
ggplot() +
geom_point(mapping = aes(x = X, y = y)) +
geom_abline(aes(intercept = -0.9930872, slope = 0.4866284, colour = "line1"), lwd=1) +
geom_abline(aes(intercept = -1, slope = 0.5, colour = "line2"), lwd=1) +
scale_colour_manual(values=c("line1"="red","line2"="blue"))
With slight modification your code works just fine:
ggplot() +
geom_point(mapping = aes(x = X, y = y)) +
geom_abline(aes(colour = "line_1", intercept = -0.9930872, slope = 0.4866284)) +
geom_abline(aes(colour = "line_2", intercept = -1, slope = 0.5)) +
scale_colour_manual(name = "lines", values = c("red", "blue")) +
theme(legend.position = "bottom")
Added legend position in case if you want to change that aswell.
Related
I have 3 columns in a data frame from which I want to create a visualisation with geom_smooth() :
ggplot(my_data_frame) +
aes(x = fin_enquete,
y = intentions,
colour = candidat) +
geom_point(alpha = 1/6,
shape = "circle",
size = .5L) +
geom_smooth(mapping = aes(y = erreur_inf),
size = .5L,
span = .42,
se = F) +
geom_smooth(mapping = aes(y = erreur_sup),
size = .5L,
span = .42,
se = F) +
geom_smooth(method = "loess",
size = 1.5L,
span = .42,
se = F) +
labs(x = "Date de fin d'enquĂȘte",
y = "Pourcentage d'intentions de vote") +
theme_minimal() +
theme(text = element_text(family = "DIN Pro")) +
coord_cartesian(expand = F) +
easy_remove_legend()
3 lines with geom_smooth
I would like to color the area between the upper and the lower line. I know the geom_ribbon() function but I am not sure I can use it in this situation.
Does anybody have a solution?
Have a nice day!
You could use geom_ribbon and calculate the loess model yourself within the geom_ribbon call?
Toy random data
dat <- data.frame(x=1:100, y=runif(100), y2=runif(100)+1, y3=runif(100)+2)
Now suppose we want a smoothed ribbon between y and y3, with y2 drawn as a line between them:
ggplot( dat , aes(x, y2)) +
geom_ribbon(aes(ymin=predict(loess(y~x)),
ymax=predict(loess(y3~x))), alpha=0.3) +
geom_smooth(se=F)
You could use lapply() smooth to calculate the range of df values such as (5,11,13) to calculate the smooths and plot only the two edges of the se.
Sample code:
library(ggplot2)
ggplot(data = mtcars,
mapping = aes(x = wt,
y = mpg)) +
geom_point(size = 2)+
lapply(c(5,11, 13), function (i) {
geom_smooth(
data = ~ cbind(., facet_plots = i),
method = lm,
se=F,
formula = y ~ splines::bs(x, i)
)
})+
#facet_wrap(vars(facet_plots))
geom_ribbon(
stat = "smooth",
method = "loess",
se = TRUE,
alpha = 0, # or, use fill = NA
colour = "black",
linetype = "dotted")+
theme_minimal()
Plot:
I want to add some legend to this figure.
There are one set of scatter points and two lines.
Below is my codes.
rm(list = ls())
n=500
set.seed(100)
x1=seq(from=-3,to=3,length.out = n)
a=rnorm(n,mean=0,sd=0.2)
z=1+2*x1+a
p=1/(1+exp(-z))
y=(p>=(runif(n,min=0.4,max=0.6)))*1
X=data.frame(x1=x1,y=y)
K=glm(formula=y~x1,family=binomial(link="logit"),data=X)
p_fit=1/(1+exp(-(K$coefficients[1]+K$coefficients[2]*x1)))
ggplot()+ xlab("x1")+ ylab("y")+facet_grid()+
geom_point(data=data.frame(x1,y), aes(x=x1, y=y),size=4)+
geom_line(data=data.frame(x1,p), aes(x=x1, y=p),size=1.2,col="blue")+
geom_line(data=data.frame(x1,p_fit), aes(x=x1, y=p_fit),size=1.5,col="red")+
theme(legend.position =c(0.8,0.5))
In ggplot legends appear when you map a variable or a constant to an aesthetic. In your case, try as follow.
ggplot() +
geom_point(data = data.frame(x1, y), aes(x = x1, y = y), size = 3, alpha = 0.5)+
geom_line(data = data.frame(x1, p),
aes(x = x1, y = p, color = 'p'),
size = 1.2) +
geom_line(data = data.frame(x1, p_fit),
aes(x = x1, y = p_fit, color = "p_fit"), size = 1.5) +
xlab("x1") +
ylab("y")+
theme(legend.position = c(0.8, 0.5)) +
scale_color_manual(values = c("blue", "red"))
I am an R novice. I will try to be as brief and simple as possible. Currently, I am trying to connect points between two conditions based on another condition all over a single discrete x-axis.
Below is some test data and my attempt to plot some data.
set.seed(42)
# Test case data
mydf1 <- tibble(
xx = rep('myLabel', 8),
yy = rnorm(8),
grp = rep(c(1, 2), each = 4),
cond = rep(c('a', 'b', 'c', 'd'), length.out = 8)
)
ggplot(mydf1, aes(x = xx, y = yy, col = factor(grp))) +
geom_point(position = position_dodge(width = 0.9)) +
geom_path(position = position_dodge(width = 0.9), aes(group = cond), col = "black") +
theme_bw() +
ggtitle("Test Case for geom_path and position_dodge")
From what I can tell, it seems that position_dodge is applied after the draw. Is there a way to change this behavior? or to achieve the overall goal of connecting these points in this type of way?
Thank you for your time.
EDIT: details.
EDIT2:
I would like to capture a before and after relationship between grp based on 4 conditions in one big main conditions.
Probably you want this.
set.seed(42)
library(ggplot2)
ggplot(mydf1, aes(x = grp, y = yy, col = factor(grp))) +
geom_point() +
geom_path(aes(group = cond), col = "black") +
theme_bw() +
ggtitle("Test Case for geom_path and position_dodge") +
xlim(c(.5, 2.5)) +
labs(color = "Group", x = "myLabel", y = "yy") +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank())
You could plot a categorical x axis.
ggplot(mydf1, aes(x = cond, y = yy, col = factor(grp))) +
geom_point() +
geom_path(aes(group = cond), col = "black") +
theme_bw() +
ggtitle("Test Case for categorical X-axis")
Alternatively, if you need comparison across multiple categorical dimensions mapped to the x axis, you can try facets.
ggplot(mydf1, aes(x = cond, y = yy, col = factor(grp))) +
geom_point() +
geom_path(aes(group = cond), col = "black") +
theme_bw() +
ggtitle("Test Case for Categorical X-axis and Facets") +
facet_wrap(~cond)
I want to plot a data set where the size of the points are proportional to the x-variable and have a regression line with a 95% prediction interval. The "sample" code I have written is as follows:
# Create random data and run regression
x <- rnorm(40)
y <- 0.5 * x + rnorm(40)
plot.dta <- data.frame(y, x)
mod <- lm(y ~ x, data = plot.dta)
# Create values for prediction interval
x.new <- data.frame(x = seq(-2.5, 2.5, length = 1000))
pred <- predict(mod,, newdata = x.new, interval = "prediction")
pred <- data.frame(cbind(x.new, pred))
# plot the data w/ regression line and prediction interval
p <- ggplot(pred, aes(x = x, y = upr)) +
geom_line(aes(y = lwr), color = "#666666", linetype = "dashed") +
geom_line(aes(y = upr), color = "#666666", linetype = "dashed") +
geom_line(aes(y = fit)) +
geom_point(data = plot.dta, aes(y = y, size = x))
p
This produces the following plot:
Obviously, the legend is not too helpful here. I would like to have one entry in the legend for the points, say, labeled "data", one grey, dashed line labeled "95% PI" and one entry with a black line labeled "Regression line."
As Hack-R alluded in the provided link, you can set the breaks and labels for scale_size() to make that legend more meaningful.
You can also construct a legend for all your geom_line() calls by adding linetype into your aes() and use a scale_linetype_manual() to set the values, breaks and labels.
ggplot(pred, aes(x = x, y = upr)) +
geom_line(aes(y = lwr, linetype = "dashed"), color = "#666666") +
geom_line(aes(y = upr, linetype = "dashed"), color = "#666666") +
geom_line(aes(y = fit, linetype = "solid")) +
geom_point(data = plot.dta, aes(y = y, size = x)) +
scale_size(labels = c("Eensy-weensy", "Teeny", "Small", "Medium", "Large")) +
scale_linetype_manual(values = c("dashed" = 2, "solid" = 1), labels = c("95% PI", "Regression Line"))
I'm trying to add a normal distribution line to my chart. But it simply becomes flat at the bottom for some reason.
My code
MyChart <- function(x) {
ggplot(x, aes( x = max.DrawD, y = cum.Return, label = Symbol)) +
scale_y_continuous(breaks = c(seq(0, 10, 1)), limits = c(0,10)) + # outliers excluded
scale_x_continuous(limit =c(0, 0.5)) +
geom_histogram(aes(y = ..density..), binwidth = 0.02) +
geom_text(size = 3) +
stat_function(fun = dnorm, colour = 'firebrick') +
theme_classic()
}
As you can see, the red line (my stat_function() code) is right at the bottom of the graph. How can I resolve this?
UPDATE: So I solved it. But I don't know why it now works. Just added a manual spec on the mean and standard deviation.
Updated code
MyChart <- function(x) {
ggplot(x, aes( x = max.DrawD, y = cum.Return, label = Symbol)) +
scale_y_continuous(breaks = c(seq(0, 10, 1)), limits = c(0,10)) + # outliers excluded
scale_x_continuous(limit =c(0, 0.5)) +
geom_histogram(aes(y = ..density..), binwidth = 0.02) +
geom_text(size = 3) +
stat_function(fun = dnorm, args = list(mean = mean(x$max.DrawD), sd = sd(x$max.DrawD)), colour = 'firebrick') +
theme_classic()
}
From #user20650: it works as the function dnorm needs parameters mean and sd. If you dont specify them they are assumed to be zero and one