Mark start point and end point in ggplot - r

I'm making a plot of 2D random walk using R and ggplot2 library. It works, but I would like to show where the starting point and ending point are in my random walk plot.
I tried to create another geom_point and append it to the existing ggplot but it did not work. Any suggestions? Thanks!
x = 0
y = 0
vec1 <- vector()
xcor <- vector()
ycor <- vector()
number = 1000
list_num = c(1,2,3,4)
move = sample(list_num, size = number, replace = TRUE)
for (i in 1:number) {
if (move[i] == 1) {
x = x + 1
}
else if (move[i] == 2) {
x = x - 1
}
else if (move[i] == 3) {
y = y + 1
}
else if (move[i] == 4) {
y = y - 1
}
vec1 <- c(vec1, i)
xcor <- c(xcor, x)
ycor <- c(ycor, y)
}
df_randomwalk = data.frame(vec1, xcor, ycor)
ggplot(df_randomwalk, aes(x = xcor, y = ycor)) +
geom_point(alpha = 0.1, size = 0.3) + geom_path() +
theme_minimal()

This should do it.
start <- df_randomwalk %>% filter(vec1 == min(df_randomwalk$vec1))
end <- df_randomwalk %>% filter(vec1 == max(df_randomwalk$vec1))
ggplot(df_randomwalk, aes(x = xcor, y = ycor)) +
geom_point(alpha = 0.1, size = 0.3) + geom_path() +
geom_point(alpha = 0.1, size = 0.3) +
theme_minimal() +
geom_point(start, mapping=aes(x=xcor,y=ycor), colour="red", size=1) +
geom_point(end, mapping=aes(x=xcor,y=ycor), colour="blue", size=1)

Related

Why is geom_density plotting my data differently from the expected image?

I have the following task statement:
In this task we want to simulate random variables with density
To do this, write a function r_density(n) that simulates n of such random variables.
Then use this function to simulate N = 1000 of such random variables. Using geom_density() you can now estimate the density from the simulated random variables. We can compare this estimate with the real density. To do this, create a graph that looks like this:
Problem is, however, that I don't understand why my output looks like this:
Why is the raked density plotted in such a weird way? Can someone explain to me why it looks like that and how to get the estimated density from the expected image?
This is the corresponding code I wrote for the above plot:
library(tidyverse)
N <- 1000
r_density <- function(n){
exp(-abs(n))/2
}
x <- runif(N)
tb <- tibble(
x = x,
density_fkt = r_density(x)
)
ggplot() +
geom_density(
data = tb,
mapping = aes(
x = density_fkt,
y = ..scaled..
)
) +
geom_function(
fun = r_density,
xlim = c(-6,6),
color = "red",
size = 1
) +
theme_minimal() +
labs(
x = "x",
y = "Dichtefunktion f(x)",
title = "Geschätzte (schwarz) vs echte (rot) Dichte"
)
You may use inverse transform sampling or rejection sampling. I choose rejection sampling.
library(tidyverse)
N <- 1000
r_density <- function(n){
exp(-abs(n))/2
}
x = c()
while (length(x) < N) {
y = rnorm(1)
while (y > 6 | y < -6) {
y = rnorm(1)
}
u = runif(1)
if (u < r_density(y)/(dnorm(y) * 3)) {
x=append(x, y)
}
}
tb <- tibble(
x = x,
density_fkt = r_density(x)
)
ggplot() +
geom_density(
data = tb,
mapping = aes(
x = x,
y = ..density..
)
) +
geom_function(
fun = r_density,
xlim = c(-6,6),
color = "red",
size = 1
) +
theme_minimal() +
labs(
x = "x",
y = "Dichtefunktion f(x)",
title = "Geschätzte (schwarz) vs echte (rot) Dichte"
)
Here's the inverse transform sampling method (this involves some difficult integration, so perhaps not what your teacher intended)
r_density <- function(n) {
cdf <- function(x) {
1/4 * exp(-x) * (-1 + 2 * exp(x) + exp(2*x) - (-1 + exp(x))^2 * sign(x))
}
sapply(runif(n), function(i) {
uniroot(function(x) cdf(x) - i, c(-30, 20))$root
})
}
Plotting gives:
ggplot() +
geom_density(aes(r_density(1000))) +
geom_function(
fun = function(x) exp(-abs(x))/2,
xlim = c(-6,6),
color = "red",
size = 1
) +
theme_minimal() +
labs(
x = "x",
y = "Dichtefunktion f(x)",
title = "Geschätzte (schwarz) vs echte (rot) Dichte"
)

Plot a discontinuous function in R without connecting a "jump"

I'd like to plot a discontinuous function without connecting a jump. For example, in the following plot, I'd like to delete the line connecting (0.5, 0.5) and (0.5, 1.5).
f <- function(x){
(x < .5) * (x) + (x >= .5) * (x + 1)
}
ggplot()+
geom_function(fun = f)
Edit: I'm looking for a solution that works even if the discountinuous point is not a round number, say pi/10.
You could write a little wrapper function which finds discontinuities in the given function and plots them as separate groups:
plot_fun <- function(fun, from = 0, to = 1, by = 0.001) {
x <- seq(from, to, by)
groups <- cut(x, c(-Inf, x[which(abs(diff(fun(x))) > 0.1)], Inf))
df <- data.frame(x, groups, y = fun(x))
ggplot(df, aes(x, y, group = groups)) +
geom_line()
}
This allows
plot_fun(f)
plot_fun(floor, 0, 10)
This answer is based on Allan Cameron's answer, but depicts the jump using open and closed circles. Whether the function is right or left continuous is controlled by an argument.
library("ggplot2")
plot_fun <- function(fun, from = 0, to = 1, by = 0.001, right_continuous = TRUE) {
x <- seq(from, to, by)
tol_vertical <- 0.1
y <- fun(x)
idx_break <- which(abs(diff(y)) > tol_vertical)
x_break <- x[idx_break]
y_break_l <- y[idx_break]
y_break_r <- y[idx_break + 1]
groups <- cut(x, c(-Inf, x_break, Inf))
df <- data.frame(x, groups, y = fun(x))
plot_ <- ggplot(df, aes(x, y, group = groups)) +
geom_line()
# add open and closed points showing jump
dataf_l <- data.frame(x = x_break, y = y_break_l)
dataf_r <- data.frame(x = x_break, y = y_break_r)
shape_open_circle <- 1
# this is the default of shape, but might as well specify.
shape_closed_circle <- 19
shape_size <- 4
if (right_continuous) {
shape_l <- shape_open_circle
shape_r <- shape_closed_circle
} else {
shape_l <- shape_closed_circle
shape_r <- shape_open_circle
}
plot_ <- plot_ +
geom_point(data = dataf_l, aes(x = x, y = y), group = NA, shape = shape_l, size = shape_size) +
geom_point(data = dataf_r, aes(x = x, y = y), group = NA, shape = shape_r, size = shape_size)
return(plot_)
}
Here's the OP's original example:
f <- function(x){
(x < .5) * (x) + (x >= .5) * (x + 1)
}
plot_fun(f)
Here's Allan's additional example using floor, which shows multiple discontinuities:
plot_fun(floor, from = 0, to = 10)
And here's an example showing that the function does not need to be piecewise linear:
f_curved <- function(x) ifelse(x > 0, yes = 0.5*(2-exp(-x)), no = 0)
plot_fun(f_curved, from = -1, to = 5)
You can insert everything inside an ifelse:
f <- function(x){
ifelse(x==0.5,
NA,
(x < .5) * (x) + (x >= .5) * (x + 1))
}
ggplot()+
geom_function(fun = f)

Error: Aesthetics must be either length 1 or the same as the data (121): yintercept

I have the following data (from 1 to 1032) and I am trying to plot correlogram for autocorrelation and partial autocorrelation:
prp: data frame
prp$Log.prp.Standardized: the column that I am trying to plot
Data:
prp$Log.prp.Standardized (Name of the column - I have 1 column with 1032 values)
1 1.7923928339
2 0.7792383013
3 -0.2033400303
4 -1.7016479357
5 0.8002357419
6 0.3575677621
7 1.0209246410
8 0.7188631605
9 -0.5320108464
10 -0.2190886401
.
.
.
.
(till 1032)
The function that I am using:
correlogram <- function(x, type = "correlation"){
gacf = acf(x, plot=FALSE, lag.max=120, type = type)
gacf.df = with(gacf, data.frame(lag, acf))
gacf.df$sig = qnorm((1 + 0.95)/2)/sqrt(length(x))
q <- ggplot(data = gacf.df, mapping = aes(x = lag, y = acf))
q <- q + xlim(c(0,120)) + theme_bw()
q <- q + geom_hline(aes(yintercept = 0))
q <- q + geom_segment(mapping = aes(xend = lag), yend = 0, lwd = 1)
q <- q + geom_hline(aes(yintercept = c(sig, -1*sig)), linetype = 2, colour = "#e51843")
if(type == "partial"){
q <- q + ylab(expression(alpha[k]))
} else {
q <- q + ylab(expression(rho[k]))
}
q <- q + xlab("lag k")
}
Then the code I am running:
require(gridExtra)
library(gridExtra)
library(ggplot2)
library(grid)
q1 <- correlogram(prp$Log.prp.Standardized) + xlab(" ") + ggtitle("Total and Partial Correlograms")
q2 <- correlogram(prp$Log.prp.Standardized, type = "partial")
grid.arrange (q1, q2, nrow = 2)
grid
But I am getting the following error:
Error: Aesthetics must be either length 1 or the same as the data (121): yintercept
Any help will be appreciated!
The issue is that you map c(sig, -1*sig) on yintercept which will not work as the length of c(sig, -1*sig) is two times the length of your df gacf.df. That's what the error message is telling you. There are two options to achieve your desired result:
If you add sig as a variable you have to add the horizontal lines via two calls of geom_hline.
The approach below instead makes sig a scalar. In that case you don't have to wrap yintercept = c(sig, -1*sig) inside aes() :
correlogram <- function(x, type = "correlation"){
gacf = acf(x, plot=FALSE, lag.max=120, type = type)
gacf.df = with(gacf, data.frame(lag, acf))
#gacf.df$sig = qnorm((1 + 0.95)/2)/sqrt(length(x))
sig = qnorm((1 + 0.95)/2)/sqrt(length(x))
q <- ggplot(data = gacf.df, mapping = aes(x = lag, y = acf))
q <- q + xlim(c(0,120)) + theme_bw()
q <- q + geom_hline(aes(yintercept = 0))
q <- q + geom_segment(mapping = aes(xend = lag), yend = 0, lwd = 1)
# q <- q + geom_hline(aes(yintercept = sig), linetype = 2, colour = "#e51843")
# q <- q + geom_hline(aes(yintercept = -1*sig), linetype = 2, colour = "#e51843")
q <- q + geom_hline(yintercept = c(sig, -1*sig), linetype = 2, colour = "#e51843")
if(type == "partial"){
q <- q + ylab(expression(alpha[k]))
} else {
q <- q + ylab(expression(rho[k]))
}
q <- q + xlab("lag k")
}
library(gridExtra)
library(ggplot2)
library(grid)
set.seed(42)
prp <- data.frame(Log.prp.Standardized = rnorm(100))
q1 <- correlogram(prp$Log.prp.Standardized) + xlab(" ") + ggtitle("Total and Partial Correlograms")
q2 <- correlogram(prp$Log.prp.Standardized, type = "partial")
grid.arrange (q1, q2, nrow = 2)
Created on 2021-02-18 by the reprex package (v1.0.0)

Setting color levels in contourplots in ggplot R

I am plotting contour plots using ggplot in loop. I have few concerns -
the color levels are different in all iterations, how do it keep it steady iterations?
the number and range of levels are also changing with iteration, how to keep it constant across iterations ?
the length occupied by color scale is much longer than actual figure. How do I adjust that ?
How do I manually set the levels of colors in contours?
I have attached a sample below. Can someone please edit in the same code with comments
library(tidyverse)
library(gridExtra)
library(grid)
# data generation
x <- seq(-10, 10, 0.2)
y <- seq(-10, 10, 0.2)
tbl <- crossing(x, y)
for (i in seq(1, 2)) # to create two sample plots
{
# initialize list to store subplots
p <- list()
for (j in seq(1, 3)) # to create 3 subplots
{
# for randomness
a <- runif(1)
b <- runif(1)
# add z
tbl <- tbl %>%
mutate(z = a*(x - a)^2 + b*(y - b)^2)
# plot contours
p[[j]] <- ggplot(data = tbl,
aes(x = x,
y = y,
z = z)) +
geom_contour_filled(alpha = 0.8) +
theme_bw() +
theme(legend.position = "right") +
theme(aspect.ratio = 1) +
ggtitle("Sample")
}
p <- grid.arrange(p[[1]], p[[2]], p[[3]],
ncol = 3)
ggsave(paste0("iteration - ", i, ".png"),
p,
width = 8,
height = 3)
}
The actual plots are subplot for another plot, so I can increase its size. Therefore, width and height cannot be increased in ggsave.
Thanks
You can set breaks in geom_contour_filled. You can change your pngs by doubling their size but halfing their resolution. They will remain the same in terms of pixel dimensions.
for (i in seq(1, 2)) # to create two sample plots
{
p <- list()
for (j in seq(1, 3)) # to create 3 subplots
{
# for randomness
a <- runif(1)
b <- runif(1)
tbl <- tbl %>%
mutate(z = a*(x - a)^2 + b*(y - b)^2)
p[[j]] <- ggplot(data = tbl,
aes(x = x,
y = y,
z = z)) +
geom_contour_filled(alpha = 0.8, breaks = 0:9 * 20) +
scale_fill_viridis_d(drop = FALSE) +
theme_bw() +
theme(legend.position = "right") +
theme(aspect.ratio = 1) +
ggtitle("Sample")
}
p <- grid.arrange(p[[1]], p[[2]], p[[3]],
ncol = 3)
ggsave(paste0("iteration - ", i, ".png"),
p,
width = 16,
height = 6,
dpi = 150)
}
iteration-1.png
iteration-2.png

Different colours for values above / below a linear trend line

I'm using ggplot to plot a time series with a linear regression line. I would like to have different colours for my time series depending on whether it is above or below the trend line.
Here is a code example to plot the series and the corresponding trend line with different colours for the series and the line:
x <- seq(as.Date("2000/1/1"), as.Date("2010/1/1"), "years")
y <- rnorm(length(x),0,10)
df <- data.frame(x,y)
ggplot(df, aes(x, y)) +
stat_smooth(method = 'lm', aes(colour = 'Trend'), se = FALSE) +
geom_line(aes(colour = 'Observation') ) +
theme_bw() +
xlab("x") +
ylab("y") +
scale_colour_manual(values = c("blue","red"))
Have a nice day!
I got rid of the dates, since they were driving me nuts. Perhaps someone can add a solution for that. Otherwise it seems quite doable, with some basic high school maths.
df <- data.frame(x = 2000:2010,
y = rnorm(11, 0, 10))
fm <- lm(y ~ x, data = df)
co <- coef(fm)
df$under_over <- sign(fm$residuals)
for (i in 1:(nrow(df) - 1)) {
# Get slope and intercept for line segment
slope <- (df$y[i + 1] - df$y[i]) / (df$x[i + 1] - df$x[i])
int <- df$y[i] - slope * df$x[i]
# find where they would cross
x <- (co[1] - int) / (slope - co[2])
y <- slope * x + int
# if that is in the range of the segment it is a crossing, add to the data
if (x > df$x[i] & x < df$x[i + 1])
df <- rbind(df, c(x = x, y = y, under_over = NA))
}
#order by x
df <- df[order(df$x), ]
# find color for intersections
for (i in 1:nrow(df))
if (is.na(df$under_over[i]))
df$under_over[i] <- df$under_over[i + 1]
ggplot(df) +
geom_abline(intercept = co[1], slope = co[2]) +
geom_path(aes(x, y, col = as.factor(under_over), group = 1)) +
theme_bw()

Resources