Mathematical expression in tick mark labels, ggplot2 - r

I want to make tick mark labels using mathematical expressions.
See the next example:
library(tidyverse)
gl<-30
ggplot(data = data.frame(x = c(-5, 5)), aes(x)) +
stat_function(fun = dt, args = list(df = 30))+ylab("f(t)")+
geom_segment(aes(x=qt(.975,gl),xend=qt(.975,gl),y=0,yend=dt(qt(.975,gl),gl)))+
scale_x_continuous("t", round(c(-5,qt(1-.975,gl),0,qt(.975,gl),5),3), limits=c(-5,5),labels=c("-5.000", "-2.042", "0" ,"list(q[0.95]==0.025)", "5.000"))+
annotate("segment", x = c(2.2), xend = c(3.8),
y = c(0.02), yend = c(.16), colour = "red", size=1, alpha=0.6, arrow=arrow())+
annotate("segment", x = c(-1), xend = c(-3),
y = c(0.02), yend = c(.16), colour = 1, size=1, alpha=0.6, arrow=arrow())+
stat_function(fun = dt, args = list(df = gl),
xlim = c(-5,qt(.975,gl)),
geom = "area",fill="red",alpha=0.5)+
annotate("text", x = c(-3.8,3.8,4), y = c(0.18,0.18,.3),
label = c("1-alpha","alpha/2","list(q[0.95]==0.025)"),parse=T , size=4 , fontface="bold")+
theme_bw()
If line
scale_x_continuous("t", round(c(-5,qt(1-.975,gl),0,qt(.975,gl),5),3), limits=c(-5,5),labels=c("-5.000", "-2.042", "0" ,"list(q[0.95]==0.025)", "5.000"))
Is replaced by
scale_x_continuous("t", round(c(-5,qt(1-.975,gl),0,qt(.975,gl),5),3), limits=c(-5,5),labels=c("-5.000", "-2.042", "0" ,"list(q[0.95]==0.025)", "5.000"),parse=T)
An error is obtained:
Error in scale_x_continuous("t", round(c(-5, qt(1 - 0.975, gl), 0,
qt(0.975, : unused argument (parse = T)
How to achieve mathematical expressions in scale_x_continuous as is achieved in annotate?

You can use plotmath expressions in expression vectors to make this happen.
library(tidyverse)
mf<-55
sdf<-8
weight_lim<-c(30, 110)
xlabels <- expression(
"-5.000",
"-2.042",
"0",
q[0.95]==0.025,
"5.000"
)
ggplot(data = data.frame(weight = weight_lim), aes(weight)) +
stat_function(fun = dnorm, n = 101, args = list(mean = mf, sd = sdf),color=2) +
geom_segment(aes(x=50,xend=50,y=c(0),yend=c(dnorm(50,mf,sdf))),linetype=2,col=2)+
stat_function(fun = dnorm, args = list(mean = mf,sd=sdf),
xlim = c(weight_lim[1],50),
geom = "area",fill="red",alpha=0.5)+
ylab("f(weight)") + scale_x_continuous("t", seq(weight_lim[1],weight_lim[2], length.out =5),
limits=weight_lim,
labels= xlabels) +
theme_bw()
Created on 2020-07-15 by the reprex package (v0.3.0)

xlabels <- c(
~ "-5.000",
~ "-2.042",
~ "0",
~ list(q[0.95]==0.025),
~ "5.000"
)
ggplot(......) + ...... +
scale_x_continuous("t", round(c(-5,qt(1-.975,gl),0,qt(.975,gl),5),3),
limits=c(-5,5),
labels= xlabels)

Related

How to manually change line size and alpha values for ggplot2 lines (separated by factor)?

I want to create a graph where I can change the line size for each line c(1,2,3) and the alpha values for each line c(0.5,0.6,0.7). I tried to use scale_size_manual but it didn't make any difference. Any ideas on how to proceed?
var <- c("T","T","T","M","M","M","A","A","A")
val <- rnorm(12,4,5)
x <- c(1:12)
df <- data.frame(var,val,x)
ggplot(aes(x= x , y = val, color = var, group = var), data = df) +
scale_color_manual(values = c("grey","blue","black")) + geom_smooth(aes(x = x, y = val), formula = "y ~ x", method = "loess",se = FALSE, size = 1) + scale_x_continuous(breaks=seq(1, 12, 1), limits=c(1, 12)) + scale_size_manual(values = c(1,2,3))
To set the size and alpha values for your lines you have to map on aesthetics. Otherwise scale_size_manual will have no effect:
library(ggplot2)
ggplot(aes(x = x, y = val, color = var, group = var), data = df) +
scale_color_manual(values = c("grey", "blue", "black")) +
geom_smooth(aes(x = x, y = val, size = var, alpha = var), formula = "y ~ x", method = "loess", se = FALSE) +
scale_x_continuous(breaks = seq(1, 12, 1), limits = c(1, 12)) +
scale_size_manual(values = c(1, 2, 3)) +
scale_alpha_manual(values = c(.5, .6, .7))

How can I add a legend to indicate a color code instead of data?

I am struggling to add a color code (legend) to the plot created with the PwrPlot() function shown below. This is for a teaching demonstration and I just need to replace the labels (alpha, beta, 1-alpha, 1-beta) by a legend indicating what each color refers to (indeed, changing the parameters of the curves could result in some 'ugly' label positions in the graph!). And running this function also gives out warnings that I can't interpret:
Warning messages:
1: In is.na(x) :
is.na() applied to non-(list or vector) of type 'expression'
Any help would be greatly appreciated! Thanks
PwrPlot <- function(mu0=0, mu1=1.9, sig0=1, sig1=1, alpha=0.05, tail=1){
ggplot(data.frame(x = -4:5), aes(x)) +
stat_function(fun = dnorm, args = c(mu0, sig0), geom = 'area',
xlim = c(qnorm(1-alpha/tail, mu0, sig0), 5), fill = 'red') +
stat_function(fun = dnorm, args = c(mu0, sig0)) +
stat_function(fun = dnorm, args = c(mu0, sig0), geom = 'area',
xlim = c(-4, qnorm(1-alpha/tail, mu0, sig0)), fill = 'deepskyblue3') +
stat_function(fun = dnorm, args = c(mu0, sig0)) +
stat_function(fun = dnorm, args = c(mu1, sig1)) +
stat_function(fun = dnorm, args = c(mu1, sig1), geom = 'area',
xlim = c(qnorm(1-alpha/tail, mu0, sig0), 5), fill = 'cyan4', alpha=0.2) +
stat_function(fun = dnorm, args = c(mu1, sig1), geom = 'area',
xlim = c(-4, qnorm(1-alpha/tail, mu0, sig0)), fill = 'chocolate3', alpha=0.5) +
geom_text(x=-0.4, y=0.18, label=expression(1-alpha), size=10, col="white") +
geom_text(x=2, y=0.018, label=expression(alpha), size=10, col="white") +
geom_text(x=1, y=0.1, label=expression(beta), size=10, col="white") +
geom_text(x=2.5, y=0.1, label=expression(1-beta), size=10, col="black") +
geom_text(x=-1.7, y=0.35, label="H0", size=10, col="black") +
geom_text(x=3.5, y=0.35, label="H1", size=10, col="black") +
labs(y="Densité")
}
PwrPlot()
You can put the colours inside aes() and combined with scale_fill_identity() you can construct a legend.
library(ggplot2)
PwrPlot <- function(mu0=0, mu1=1.9, sig0=1, sig1=1, alpha=0.05, tail=1){
ggplot(data.frame(x = -4:5), aes(x)) +
stat_function(fun = dnorm, args = c(mu0, sig0), geom = 'area',
xlim = c(qnorm(1-alpha/tail, mu0, sig0), 5),
aes(fill = 'red')) +
stat_function(fun = dnorm, args = c(mu0, sig0)) +
stat_function(fun = dnorm, args = c(mu0, sig0), geom = 'area',
xlim = c(-4, qnorm(1-alpha/tail, mu0, sig0)),
aes(fill = 'deepskyblue3')) +
stat_function(fun = dnorm, args = c(mu0, sig0)) +
stat_function(fun = dnorm, args = c(mu1, sig1)) +
stat_function(fun = dnorm, args = c(mu1, sig1), geom = 'area',
xlim = c(qnorm(1-alpha/tail, mu0, sig0), 5),
aes(fill = 'cyan4'), alpha=0.2) +
stat_function(fun = dnorm, args = c(mu1, sig1), geom = 'area',
xlim = c(-4, qnorm(1-alpha/tail, mu0, sig0)),
aes(fill = 'chocolate3'), alpha=0.5) +
scale_fill_identity(
labels = expression(beta, 1-alpha, 1-beta, alpha),
guide = guide_legend()
) +
annotate(
"text", size = 10,
x = c(-0.4, 2, 1, 2.5, -1.7, 3.5),
y = c(0.18, 0.018, 0.1, 0.1, 0.35, 0.35),
label = expression(1-alpha, alpha, beta, 1-beta, "H0", "H1"),
colour = rep(c("white", "black"), each = 3)
) +
labs(y="Densité")
}
PwrPlot()
#> Warning in is.na(x): is.na() applied to non-(list or vector) of type
#> 'expression'
Created on 2021-10-14 by the reprex package (v2.0.1)

Wrong area on normal curve plot

I'm trying to learn R from scratch and I just delivered a college assignment for hypothesis testing a binomial distribution (proportion test for one sample) that I used R to solve and plot. But I ran into some problems.
My sample size is 130, success cases are 68.
H0: π = 50%
H1: π > 50
The is the code I used (plenty of copy-paste and trial/error)
library(ggplot2)
library(ggthemes)
library(scales)
#data
n = 130
p = 1/2
stdev = sqrt(n*p*(1-p))
mean_binon = n*p
cases = 68
ztest = (cases-mean_binon)/stdev
pvalor = pnorm(-abs(ztest))
zcrit = qnorm(0.975)
#normal curve
xvalues <- data.frame(x = c(-4, 4))
#first plots and lines
p1 <- ggplot(xvalues, aes(x = xvalues))
p2 <- p1 + stat_function(fun = dnorm) + xlim(c(-4, 4)) +
geom_vline(xintercept = ztest, linetype="solid", color="blue",
size=1) +
geom_vline(xintercept = zcrit, linetype="solid", color="red",
size=1)
#z area function
area_z <- function(x){
norm_z <- dnorm(x)
norm_z[x < ztest] <- NA
return(norm_z)
}
#critical z area function
area_zc <- function(x){
norm_zc <- dnorm(x)
norm_zc[x < zcrit] <- NA
return(norm_zc)
}
#area value
valor_area_z <- round(pnorm(4) - pnorm(ztest), 3)
valor_area_zc <- round(pnorm(4) - pnorm(zcrit), 3)
#final plot
p3 <- p2 + stat_function(fun = dnorm) +
stat_function(fun = area_z, geom = "area", fill = "blue", alpha = 0.3) +
geom_text(x = 1.13, y = 0.1, size = 5, fontface = "bold",
label = paste0(valor_area_z * 100, "%")) +
stat_function(fun = area_zc, geom = "area", fill = "red", alpha = 0.5) +
geom_text(x = 2.27, y = 0.015, size = 3, fontface = "bold",
label = paste0(valor_area_zc * 100, "%")) +
scale_x_continuous(breaks = c(-3:3)) +
labs(x = "\n z", y = "f(z) \n", title = "Distribuição Normal \n") +
theme_fivethirtyeight()
p3
Here's the plot
There is a gap between my geom_vline's and the shaded area. I'm not sure if I'm doing the wrong steps with my statistics or this is an R related problem. Maybe both? Sorry if this is elementary. I'm not good at both but I'm trying to improve.
A solution is to use the option xlim inside stat_function which defines the range of the function. You can also replace area_z and area_zc with dnorm.
p3 <- p2 + stat_function(fun = dnorm) +
stat_function(fun = dnorm, geom = "area", fill = "blue", alpha = 0.3,
xlim = c(ztest,zcrit)) +
geom_text(x = 1.13, y = 0.1, size = 5, fontface = "bold",
label = paste0(valor_area_z * 100, "%")) +
stat_function(fun = dnorm, geom = "area", fill = "red", alpha = 0.5,
xlim = c(zcrit,xvalues$x[2])) +
geom_text(x = 2.27, y = 0.015, size = 3, fontface = "bold",
label = paste0(valor_area_zc * 100, "%")) +
scale_x_continuous(breaks = c(-3:3)) +
labs(x = "\n z", y = "f(z) \n", title = "Distribuição Normal \n") +
theme_fivethirtyeight()
p3

How do I plot constraints in R

I have to plot constraints in R and it's very new to me. Please could someone give me some help.
x1 + 2*x2 <= 100
3x1 + x2 <= 75
Many thanks
Update:
You can use geom_polygon to shade the constraint sets. With linear constraints, the edges of the constraint set are easy to compute.
library(ggplot2)
ggplot(data_frame(x = c(0, 100)), aes(x = x)) +
stat_function(fun = function(x) {(100 - x)/2}, aes(color = "Function 1")) +
stat_function(fun = function(x) {(75 - 3*x) }, aes(color = "Function 2")) +
theme_bw() +
scale_color_discrete(name = "Function") +
geom_polygon(
data = data_frame(
x = c(0, 0, 100, Inf),
y = c(0, 50, 0, 0)
),
aes(
x = x, y = y, fill = "Constraint 1"
),
inherit.aes = FALSE, alpha = 0.4
) +
geom_polygon(
data = data_frame(
x = c(0, 0, 25, Inf),
y = c(0, 75, 0, 0)
),
aes(
x = x, y = y, fill = "Constraint 2"
),
inherit.aes = FALSE, alpha = 0.4
) +
scale_fill_discrete(name = "Constraint Set") +
scale_y_continuous(limits = c(0, 100))
This gives:
Use stat_function. Here is a simple example:
ggplot(data_frame(x = c(0, 100)), aes(x = x)) +
stat_function(fun = function(x) {(100 - x)/2}, aes(color = "Function 1")) +
stat_function(fun = function(x) {(75 - 3*x) }, aes(color = "Function 2")) +
theme_bw() +
scale_color_discrete(name = "Function")
There are some enhancements that you can make, like shading the area under the constraint, and to add labels to the constraint lines.

how to overlay a line plot with a density plot? (R, ggplot2)

Hi how do I overlap the following curves in one graph? Any help's appreciated. Thank you!
library(ggplot2)
x = -10:10
y = dnorm(x, mean=0, sd=3)
df.norm = data.frame('x'=x, 'y'=y)
ggplot(data=df.norm, aes(x=x, y=y)) +
geom_line() +
geom_point()
random = data.frame('x'=rnorm(1000, mean = 0, sd = 3))
ggplot(random, aes(x=x)) +
geom_density(size=1)
I tried the following and it didn't work
ggplot(data=df.norm, aes(x=x, y=y)) +
geom_line() +
geom_point() +
geom_density(random, aes(x=x), size=1)
library(ggplot2)
x = -10:10
y = dnorm(x, mean=0, sd=3)
df.norm = data.frame('x'=x, 'y'=y)
random = data.frame('x'=rnorm(1000, mean = 0, sd = 3))
ggplot() +
geom_line(data=df.norm, aes(x=x, y=y)) +
geom_point(data=df.norm, aes(x=x, y=y)) +
geom_density(data=random, aes(x=x), size=1)
ggplot2
A more concise version in ggplot2 using the argument inherit.aes = FALSE inside geom_density to override the default aesthetics used in the previous two layers.
library(ggplot2)
set.seed(2017)
x = -10:10
y = dnorm(x, mean = 0, sd = 3)
df.norm = data.frame('x' = x, 'y' = y)
random = data.frame('x' = rnorm(1000, mean = 0, sd = 3))
ggplot(data = df.norm, aes(x = x, y = y)) +
geom_line() +
geom_point() +
geom_density(data = random,
aes(x = x),
inherit.aes = FALSE,
size = 1)
Base
Adapting the solution provided by scoa to the base package:
plot(df.norm, type = "l", bty = "n", las = 1)
points(df.norm, pch= 19)
lines(density(random$x), lwd = 2)
Adding a legend, and a different colour for the density curve:
plot(df.norm, type = "l", bty="n", las = 1)
points(df.norm, pch= 19)
lines(density(random$x), lwd =2, col = 'orange')
legend(x = "topleft",
c("df.norm", "Density plot"),
col = c("black", "orange"),
lwd = c(2, 2),
bty = "n")

Resources