R function to find suitable values for fitting constants - r

library(ggplot2)
set.seed(1)
dataset <- data.frame(X = rnorm(1000))
dfun <- function(x, a, b) 1/(sqrt(2*pi)*b)*exp(-0.5*((x-a)^2/(2*b^2)))
ggplot(dataset, aes(x = X)) +
geom_histogram(aes(y = ..density..), binwidth = 0.5)+
stat_function(fun = dfun,
args = list(a = , b = ))
How can I calculate suitable values of a and b in case like this?

You can compute values for the arguments a and b with nls. Something like the following.
dens <- density(dataset$X, n = nrow(dataset))
df_dens <- data.frame(x = dens$x, y = dens$y)
a0 <- mean(dataset$X)
b0 <- sd(dataset$X)
fit <- nls(y ~ dfun(x, a, b), data = df_dens, start = list(a = a0, b = b0))
coef(fit)
# a b
#-0.007006625 0.97518478
Now plot the histogram and the function with these values for a and b.
ggplot(dataset, aes(x = X)) +
geom_histogram(aes(y = ..density..), binwidth = 0.5)+
stat_function(fun = dfun,
args = list(a = coef(fit)[1], b = coef(fit)[2]))

Related

Add a fitting function to histogram

library(ggplot2)
library(fitdistrplus)
set.seed(1)
dat <- data.frame(n = rlnorm(1000))
# binwidth
bw = 0.2
# fit a lognormal distribution
fit_params <- fitdistr(dat$n,"lognormal")
ggplot(dat, aes(n)) +
geom_histogram(aes(y = ..density..), binwidth = bw, colour = "black") +
stat_function(fun = dlnorm, size = 1, color = 'gray',
args = list(mean = fit_params$estimate[1], sd = fit_params$estimate[2]))
# my defined function
myfun <- function(x, a, b) 1/(sqrt(2*pi*b(x-1)))*exp(-0.5*((log(x-a)/b)^2)) # a and b are meanlog and sdlog resp.
I'd like to fit a modified lognormal defined by myfun to a density histogram. How do I add this function?
Maybe you are looking for this. Some values can not appear because of the domain of your myfun:
library(ggplot2)
library(fitdistrplus)
set.seed(1)
dat <- data.frame(n = rlnorm(1000))
# binwidth
bw = 0.2
# fit a lognormal distribution
fit_params <- fitdistr(dat$n,"lognormal")
# my defined function
myfun <- function(x, a, b) 1/(sqrt(2*pi*b*(x-1)))*exp(-0.5*((log(x-a)/b)^2))
# a and b are meanlog and sdlog resp.
#Plot
ggplot(dat, aes(n)) +
geom_histogram(aes(y = ..density..), binwidth = bw, colour = "black") +
stat_function(fun = myfun, size = 1, color = 'gray',
args = list(a = fit_params$estimate[1], b = fit_params$estimate[2]))
Output:

How to draw multiple contours in the same

I am trying to get two contours in the same plot using ggplot2 in R.
Here is a reproducible example:
library(MASS)
library(ggplot2)
# first contour
m <- c(.0, -.0)
sigma <- matrix(c(1,.5,.5,1), nrow=2)
data.grid <- expand.grid(s.1 = seq(-3, 3, length.out=200), s.2 = seq(-3, 3, length.out=200))
q.samp <- cbind(data.grid, prob = mvtnorm::dmvnorm(data.grid, mean = m, sigma = sigma))
plot1 <- ggplot(q.samp, aes(x = s.1, y = s.2, z = prob)) +
stat_contour(color = 'green')
# second contour
m1 <- c(1, 1)
sigma1 <- matrix(c(1,-.5,-.5,1), nrow=2)
set.seed(10)
data.grid1 <- expand.grid(s.1 = seq(-3, 3, length.out=200), s.2 = seq(-3, 3, length.out=200))
q.samp1 <- cbind(data.grid1, prob = mvtnorm::dmvnorm(data.grid1, mean = m1, sigma = sigma1))
plot2 <- ggplot(q.samp1, aes(x = s.1, y = s.2, z = prob)) +
stat_contour(color = 'red')
However, trying plot1 + plot2 also does not work. Is there a way to get the two contours on the same plot.
What about including another stat_contour with different data?
ggplot(q.samp1, aes(x = s.1, y = s.2, z = prob)) +
stat_contour(color = 'red') +
stat_contour(data = q.samp, aes(x = s.1, y = s.2, z = prob), color = 'green')

Generating multiple plots containing functions in ggplot2

I am trying to make a composite plot in R using the packages ggplot2 and ggpubr.
I have no problem in making the composite plots except each plot has a normal distribution curve specific to that dataset. When I generate the composite plot, both plots have the same curve, that of the last dataset.
How can I generate the composite plot with each plot having its own specific normal distribution curve?
CODE AND OUTPUT PLOTS
## PLOT 1 ##
results_matrix_C <- data.frame(matrix(rnorm(20), nrow=20))
colnames(results_matrix_C) <- c("X")
m <- mean(results_matrix_C$X)
sd <- sd(results_matrix_C$X)
dnorm_C <- function(x){
norm_C <- dnorm(x, m, sd)
return(norm_C)
}
e = 1
dnorm_one_sd_C <- function(x){
norm_one_sd_C <- dnorm(x, m, sd)
# Have NA values outside interval x in [e]:
norm_one_sd_C[x <= e] <- NA
return(norm_one_sd_C)
}
C <- ggplot(results_matrix_C, aes(x = results_matrix_C$X)) +
geom_histogram(aes(y=..density..), bins = 10, colour = "black", fill = "white") +
stat_function(fun = dnorm_one_sd_C, geom = "area", fill = "#CE9A05", color = "#CE9A05", alpha = 0.25, size = 1) +
stat_function(fun = dnorm_C, colour = "#CE0539", size = 1) +
theme_classic()
## PLOT 2 ##
results_matrix_U <- data.frame(matrix(rnorm(20)+1, nrow=20))
colnames(results_matrix_U) <- c("X")
m <- mean(results_matrix_U$X)
sd <- sd(results_matrix_U$X)
dnorm_U <- function(x){
norm_U <- dnorm(x, m, sd)
return(norm_U)
}
e = 2
dnorm_one_sd_U <- function(x){
norm_one_sd_U <- dnorm(x, m, sd)
# Have NA values outside interval x in [e]:
norm_one_sd_U[x <= e] <- NA
return(norm_one_sd_U)
}
U <- ggplot(results_matrix_U, aes(x = results_matrix_U$X)) +
geom_histogram(aes(y=..density..), bins = 10, colour = "black", fill = "white") +
stat_function(fun = dnorm_one_sd_U, geom = "area", fill = "#CE9A05", color = "#CE9A05", alpha = 0.25, size = 1) +
stat_function(fun = dnorm_U, colour = "#CE0539", size = 1) +
theme_classic()
library(ggpubr)
ggarrange(C, U,
nrow = 1, ncol = 2)
As you can see in the composite plot, the first one has taken the normal distribution curve of the second plot rather than its own one from my initial plot (Plot 1).
UPDATE
Variable "e" refers to the shaded area which is related to the distribution curve.
m = mean of the dataset
sd = standard deviation of the dataset
m and sd are used to generate the normal distribution curves
SOLVED
By inserting the function in full into the stat_function section of the ggplot2 code, this has worked
i.e:
## PLOT 1 ##
results_matrix_C <- data.frame(matrix(rnorm(20), nrow=20))
colnames(results_matrix_C) <- c("X")
mean <- mean(results_matrix_C$X)
sd <- sd(results_matrix_C$X)
e = 1
C <- ggplot(results_matrix_C, aes(x = results_matrix_C$X)) +
geom_histogram(aes(y=..density..), bins = 10, colour = "black", fill = "white") +
stat_function(
fun = function(x, mean, sd, e){
norm_one_sd_C <- dnorm(x, mean, sd)
norm_one_sd_C[x <= e] <- NA
return(norm_one_sd_C)},
args = c(mean = mean, sd = sd, e = e), geom = "area", fill = "#CE9A05", color = "#CE9A05", alpha = 0.25, size = 1) +
stat_function(
fun = function(x, mean, sd){
dnorm(x = x, mean = mean, sd = sd)},
args = c(mean = mean, sd = sd), colour = "#CE0539", size = 1) +
theme_classic()
## PLOT 2 ##
results_matrix_U <- data.frame(matrix(rnorm(20)+1, nrow=20))
colnames(results_matrix_U) <- c("X")
mean <- mean(results_matrix_U$X)
sd <- sd(results_matrix_U$X)
e = 2
U <- ggplot(results_matrix_U, aes(x = results_matrix_U$X)) +
geom_histogram(aes(y=..density..), bins = 10, colour = "black", fill = "white") +
stat_function(
fun = function(x, mean, sd, e){
norm_one_sd_U <- dnorm(x, mean, sd)
norm_one_sd_U[x <= e] <- NA
return(norm_one_sd_U)},
args = c(mean = mean, sd = sd, e = e), geom = "area", fill = "#CE9A05", color = "#CE9A05", alpha = 0.25, size = 1) +
stat_function(
fun = function(x, mean, sd){
dnorm(x = x, mean = mean, sd = sd)},
args = c(mean = mean, sd = sd), colour = "#CE0539", size = 1) +
theme_classic()
library(ggpubr)
ggarrange(C, U,
nrow = 1, ncol = 2)

operation between stat_summary_hex plots made in ggplot2

I have two populations A and B distributed spatially with one character Z, I want to be able to make an hexbin substracting the proportion of the character in each hexbin. Here I have the code for two theoretical populations A and B
library(hexbin)
library(ggplot2)
set.seed(2)
xA <- rnorm(1000)
set.seed(3)
yA <- rnorm(1000)
set.seed(4)
zA <- sample(c(1, 0), 20, replace = TRUE, prob = c(0.2, 0.8))
hbinA <- hexbin(xA, yA, xbins = 40, IDs = TRUE)
A <- data.frame(x = xA, y = yA, z = zA)
set.seed(5)
xB <- rnorm(1000)
set.seed(6)
yB <- rnorm(1000)
set.seed(7)
zB <- sample(c(1, 0), 20, replace = TRUE, prob = c(0.4, 0.6))
hbinB <- hexbin(xB, yB, xbins = 40, IDs = TRUE)
B <- data.frame(x = xB, y = yB, z = zB)
ggplot(A, aes(x, y, z = z)) + stat_summary_hex(fun = function(z) sum(z)/length(z), alpha = 0.8) +
scale_fill_gradientn(colours = c("blue","red")) +
guides(alpha = FALSE, size = FALSE)
ggplot(B, aes(x, y, z = z)) + stat_summary_hex(fun = function(z) sum(z)/length(z), alpha = 0.8) +
scale_fill_gradientn (colours = c("blue","red")) +
guides(alpha = FALSE, size = FALSE)
here is the two resulting graphs
My goal is to make a third graph with hexbins with the values of the difference between hexbins at the same coordinates but I don't even know how to start to do it, I have done something similar in the raster Package, but I need it as hexbins
Thanks a lot
You need to make sure that both plots use the exact same binning. In order to achieve this, I think it is best to do the binning beforehand and then plot the results with stat_identity / geom_hex. With the variables from your code sample you ca do:
## find the bounds for the complete data
xbnds <- range(c(A$x, B$x))
ybnds <- range(c(A$y, B$y))
nbins <- 30
# function to make a data.frame for geom_hex that can be used with stat_identity
makeHexData <- function(df) {
h <- hexbin(df$x, df$y, nbins, xbnds = xbnds, ybnds = ybnds, IDs = TRUE)
data.frame(hcell2xy(h),
z = tapply(df$z, h#cID, FUN = function(z) sum(z)/length(z)),
cid = h#cell)
}
Ahex <- makeHexData(A)
Bhex <- makeHexData(B)
## not all cells are present in each binning, we need to merge by cellID
byCell <- merge(Ahex, Bhex, by = "cid", all = T)
## when calculating the difference empty cells should count as 0
byCell$z.x[is.na(byCell$z.x)] <- 0
byCell$z.y[is.na(byCell$z.y)] <- 0
## make a "difference" data.frame
Diff <- data.frame(x = ifelse(is.na(byCell$x.x), byCell$x.y, byCell$x.x),
y = ifelse(is.na(byCell$y.x), byCell$y.y, byCell$y.x),
z = byCell$z.x - byCell$z.y)
## plot the results
ggplot(Ahex) +
geom_hex(aes(x = x, y = y, fill = z),
stat = "identity", alpha = 0.8) +
scale_fill_gradientn (colours = c("blue","red")) +
guides(alpha = FALSE, size = FALSE)
ggplot(Bhex) +
geom_hex(aes(x = x, y = y, fill = z),
stat = "identity", alpha = 0.8) +
scale_fill_gradientn (colours = c("blue","red")) +
guides(alpha = FALSE, size = FALSE)
ggplot(Diff) +
geom_hex(aes(x = x, y = y, fill = z),
stat = "identity", alpha = 0.8) +
scale_fill_gradientn (colours = c("blue","red")) +
guides(alpha = FALSE, size = FALSE)

Filling alphahull with ggplot2

I used alphahull package to delineate dots on a map.
I plot the contours with a geom_segment.
My question is : how to fill the delineation given by the segment with a color ?
Here is a reproducible example :
set.seed(2)
dat <- data.frame(x = rnorm(20, 10, 5), y = rnorm(20, 20, 5), z = c(rep(1, 6), rep(2, 4)))
library(ggplot2)
library(alphahull)
alpha <- 100
alphashape1 <- ashape(dat[which(dat$z==1), c("x", "y")], alpha = alpha)
alphashape2 <- ashape(dat[which(dat$z==2), c("x", "y")], alpha = alpha)
map <- ggplot(dat, aes(x = x, y = y)) +
geom_point(data = dat, aes(x = x, y = y, colour = as.factor(dat$z))) +
geom_segment(data = data.frame(alphashape1$edges), aes(x = x1, y = y1, xend = x2, yend = y2, colour = levels(as.factor(dat$z))[1])) +
geom_segment(data = data.frame(alphashape2$edges), aes(x = x1, y = y1, xend = x2, yend = y2, colour = levels(as.factor(dat$z))[2]))
map
I believe this works w/o the need for graph ops:
fortify.ashape <- function(ashape_res) {
xdf <- data.frame(ashape_res$edges)
xdf <- do.call(
rbind,
lapply(1:nrow(xdf), function(i) {
rbind(
data.frame(x=xdf$x1[i], y=xdf$y1[i]),
data.frame(x=xdf$x2[i], y=xdf$y2[i])
)
})
)
xdf <- xdf[order(-1 * atan2(
xdf$y - mean(range(xdf$y)),
xdf$x - mean(range(xdf$x)))), c("x", "y")]
xdf <- rbind.data.frame(xdf[nrow(xdf),], xdf[1:(nrow(xdf)-1),])
xdf
}
alphashape1 <- ashape(dat[which(dat$z == 1), c("x", "y")], alpha = 15)
alphashape2 <- ashape(dat[which(dat$z == 2), c("x", "y")], alpha = 15)
ggplot() +
geom_point(data = dat, aes(x = x, y = y, colour = as.factor(dat$z))) +
geom_polygon(data=alphashape1, aes(x, y), fill="red", alpha=2/3) +
geom_polygon(data=alphashape2, aes(x, y), fill="blue", alpha=2/3)
This is because the ashape function only returns segments, not in any order.
The only way I found to reconstruct the order was by using the node information to form a graph and then find the shortest path along that graph.
A detailed example is here: https://rpubs.com/geospacedman/alphasimple - the code needs wrapping into a single function, which should be fairly easy to do. Once you have that order sorted, geom_polygon will draw it with filled shading in ggplot2.
Based on Spacedman's answer, I ordered separately the two sets of points and came up with this solution.
It could be optimized with a function that does it for each group automatically.
set.seed(2)
dat <- data.frame(x = rnorm(20, 10, 5), y = rnorm(20, 20, 5), z = c(rep(1, 6), rep(2, 4)))
library(ggplot2)
library(alphahull)
alpha <- 100
alphashape1 <- ashape(dat[which(dat$z==1), c("x", "y")], alpha = alpha)
alphashape2 <- ashape(dat[which(dat$z==2), c("x", "y")], alpha = alpha)
map <- ggplot(dat, aes(x = x, y = y)) +
geom_point(data = dat, aes(x = x, y = y, colour = as.factor(dat$z))) +
geom_segment(data = data.frame(alphashape1$edges), aes(x = x1, y = y1, xend = x2, yend = y2, colour = levels(as.factor(dat$z))[1])) +
geom_segment(data = data.frame(alphashape2$edges), aes(x = x1, y = y1, xend = x2, yend = y2, colour = levels(as.factor(dat$z))[2]))
map
alpha <- 15 # transparency argument
# First contour
alphashape1 <- ashape(dat[which(dat$z == 1), c("x", "y")], alpha = alpha)
alphashape1_ind <- alphashape1$edges[, c("ind1", "ind2")]
class(alphashape1_ind) = "character"
alphashape1_graph <- graph.edgelist(alphashape1_ind, directed = FALSE)
cut_graph1 <- alphashape1_graph - E(alphashape1_graph)[1] # Cut the first edge
ends1 <- names(which(degree(cut_graph1) == 1)) # Get two nodes with degree = 1
path1 <- get.shortest.paths(cut_graph1, ends1[1], ends1[2])$vpath[[1]]
path_nodes1 <- as.numeric(V(alphashape1_graph)[path1]$name)
# Second contour
alphashape2 <- ashape(dat[which(dat$z == 2), c("x", "y")], alpha = alpha)
alphashape2_ind <- alphashape2$edges[, c("ind1", "ind2")]
class(alphashape2_ind) = "character"
alphashape2_graph <- graph.edgelist(alphashape2_ind, directed = FALSE)
cut_graph2 <- alphashape2_graph - E(alphashape2_graph)[1] # Cut the first edge
ends2 <- names(which(degree(cut_graph2) == 1)) # Get two nodes with degree = 1
path2 <- get.shortest.paths(cut_graph2, ends2[1], ends2[2])$vpath[[1]]
path_nodes2 <- as.numeric(V(alphashape2_graph)[path2]$name)
# Updating of previous plot (see question)
map +
geom_polygon(data = dat[which(dat$z == 1), c("x", "y")][path_nodes1, ], aes(x = x, y = y),
fill = "red", colour = "red", size = 0.5, alpha = 0.3) +
geom_polygon(data = dat[which(dat$z == 2), c("x", "y")][path_nodes2, ],
aes(x = x, y = y), colour = "blue", fill = "blue", size = 0.5, alpha = 0.3)

Resources