R correction of plot - r

I have a code to plot histogram in R that works fine:
fun_hist <- function(c, E, HS, log_EC_50) {
df <- data.frame(log_c = c, response = V({{c}}, {{E}}, {{HS}}, {{log_EC50}}))
ggplot2::ggplot(df, aes( response)) + geom_histogram(binwidth=0.03)
}
I want to correct my histogram so that its bars have a graphic design as in the attached photo.

This looks pretty similar to my eye:
library(ggplot2)
set.seed(1)
df <- data.frame(response = rnorm(25000, 15, 2))
ggplot(df, aes(response)) +
geom_histogram(binwidth = 0.5, color = 'black', fill = "#547fbf") +
xlim(c(0, 24)) +
labs(x = NULL, y = NULL) +
theme_bw(base_size = 16)
Created on 2022-11-03 with reprex v2.0.2

Related

How to smooth out a time-series geom_area with fill in ggplot?

I have the following graph and code:
Graph
ggplot(long2, aes(x = DATA, y = value, fill = variable)) + geom_area(position="fill", alpha=0.75) +
scale_y_continuous(labels = scales::comma,n.breaks = 5,breaks = waiver()) +
scale_fill_viridis_d() +
scale_x_date(date_labels = "%b/%Y",date_breaks = "6 months") +
ggtitle("Proporcions de les visites, només 9T i 9C") +
xlab("Data") + ylab("% visites") +
theme_minimal() + theme(legend.position="bottom") + guides(fill=guide_legend(title=NULL)) +
annotate("rect", fill = "white", alpha = 0.3,
xmin = as.Date.character("2020-03-16"), xmax = as.Date.character("2020-06-22"),
ymin = 0, ymax = 1)
But it has some sawtooth, how am I supposed to smooth it out?
I believe your situation is roughly analogous to the following, wherein we have missing x-positions for one group, but not the other at the same position. This causes spikes if you set position = "fill".
library(ggplot2)
x <- seq_len(100)
df <- data.frame(
x = c(x[-c(25, 75)], x[-50]),
y = c(cos(x[-c(25, 75)]), sin(x[-50])) + 5,
group = rep(c("A", "B"), c(98, 99))
)
ggplot(df, aes(x, y, fill = group)) +
geom_area(position = "fill")
To smooth out these spikes, it has been suggested to linearly interpolate the data at the missing positions.
# Find all used x-positions
ux <- unique(df$x)
# Split data by group, interpolate data groupwise
df <- lapply(split(df, df$group), function(xy) {
approxed <- approx(xy$x, xy$y, xout = ux)
data.frame(x = ux, y = approxed$y, group = xy$group[1])
})
# Recombine data
df <- do.call(rbind, df)
# Now without spikes :)
ggplot(df, aes(x, y, fill = group)) +
geom_area(position = "fill")
Created on 2022-06-17 by the reprex package (v2.0.1)
P.S. I would also have expected a red spike at x=50, but for some reason this didn't happen.

Plot a 2D Canopy Height Model (CHM) line

I'm looking for some help creating a "Canopy Height Model (CHM)-Zmax-curve/line" on my las-plots. I extracted a transect with the lidR package
las_tr <- clip_transect(las, p1, p2, width = 3, xz=T)
And I plotted it with ggplot
ggplot(las_tr#data, aes(X,Z, color = Z)) +
geom_point(size = 0.5) +
coord_equal() +
theme_minimal() +
scale_color_gradientn(colours = height.colors(50)) +
xlim(-80,0) + ylim(0,45) +
labs(title = "2D profile", subtitle= paste(round(x[i]),round(y[i])))
I have this
But I would like something like this
I hope someone can help me.
library(lidR)
library(dplyr)
library(ggplot2)
LASfile <- system.file("extdata", "Megaplot.laz", package="lidR")
las = readLAS(LASfile, select = "xyz")
p1 <- c(684800, y = 5017800)
p2 <- c(684900, y = 5017900)
tr <- clip_transect(las, p1, p2, width = 4, xz = T)
DSM <- tr#data[ , .(Z = max(Z)), by = list(X = plyr::round_any(X, 1))]
ggplot(tr#data) +
aes(X,Z, color = Z) +
geom_point(size = 0.5) +
geom_line(data = DSM, color = "black") +
coord_equal() +
theme_minimal() +
scale_color_gradientn(colours = height.colors(50))
Created on 2021-05-27 by the reprex package (v2.0.0)

Two density plots in ggplot

This is really basic. Still hope I can get your help. I need to superimpose two density plots. The first is a generated normal density plot given mean and sd of AAPL. >
x <- seq(-20, 20, length.out = 5113)
normAAPL<-data.frame(x, f = dnorm(x,mean = meanAAPL, sd = sdAAPL)) %>%
ggplot(aes(x, f)) +
geom_line() +
stat_function(fun=dnorm, geom="line", col=2, lty=2)+
ylim(0,0.2)
> meanAAPL
[1] 0.101133
> sdAAPL
[1] 2.461525
The next is the actual distribution
dAAPL <-density(oldandnew$AAPL)
Where the 20 first AAPL data is
c(-8.810021, 1.45281, -9.051401, 4.628075, -1.774445, -5.25055,
-6.181806, 10.40407, 3.74302, 3.425328, 2.48944, 6.309463, -1.948374,
-4.652429, 5.493372, -1.852238, -0.1725783, -7.924, 2.074379,
-3.431709)
Do I need to combine the data in one data frame to plot them in the same ggplot?
Hope you can help me out.
df <- data.frame(x = seq(-20, 20, length.out = 5113),
f = dnorm(x))
df2 <- data.frame(x = c(-8.810021, 1.45281, -9.051401, 4.628075, -1.774445, -5.25055,
-6.181806, 10.40407, 3.74302, 3.425328, 2.48944, 6.309463, -1.948374,
-4.652429, 5.493372, -1.852238, -0.1725783, -7.924, 2.074379,
-3.431709))
ggplot() +
geom_line(data = df, aes(x, f, colour = "Normal")) +
geom_density(data = df2, aes(x, colour = "Actual")) +
ylim(0,0.2) +
scale_color_manual(name = "Distribution", values = c("Normal" = "Blue", "Actual" = "Red")) +
theme_minimal() + theme(legend.position = "top", aspect.ratio = 1)
Produces:

ggplot2 confusion matrix geom_text labeling

I've plotted a confusion matrix (predicting 5 outcomes) in R using ggplot and scales for geom_text labeling.
The way geom_text(aes(label = percent(Freq/sum(Freq))) is written in code, it's showing Frequency of each box divided by sum of all observations, but what I want to do is get Frequency of each box divided by sum Frequency for each Reference.
In other words, instead of A,A = 15.8%,
it should be A,A = 15.8%/(0.0%+0.0%+0.0%+0.0%+15.8%%) = 100.0%
library(ggplot2)
library(scales)
valid_actual <- as.factor(c("A","B","B","C","C","C","E","E","D","D","A","A","A","E","E","D","D","C","B"))
valid_pred <- as.factor(c("A","B","C","C","E","C","E","E","D","B","A","B","A","E","D","E","D","C","B"))
cfm <- confusionMatrix(valid_actual, valid_pred)
ggplotConfusionMatrix <- function(m){
mytitle <- paste("Accuracy", percent_format()(m$overall[1]),
"Kappa", percent_format()(m$overall[2]))
p <-
ggplot(data = as.data.frame(m$table) ,
aes(x = Reference, y = Prediction)) +
geom_tile(aes(fill = log(Freq)), colour = "white") +
scale_fill_gradient(low = "white", high = "green") +
geom_text(aes(x = Reference, y = Prediction, label = percent(Freq/sum(Freq)))) +
theme(legend.position = "none") +
ggtitle(mytitle)
return(p)
}
ggplotConfusionMatrix(cfm)
The problem is that, as far as I know, ggplot is not able to do group calculation. See this recent post for similar question.
To solve your problem you should take advantage of the dplyrpackage.
This should work
library(ggplot2)
library(scales)
library(caret)
library(dplyr)
valid_actual <- as.factor(c("A","B","B","C","C","C","E","E","D","D","A","A","A","E","E","D","D","C","B"))
valid_pred <- as.factor(c("A","B","C","C","E","C","E","E","D","B","A","B","A","E","D","E","D","C","B"))
cfm <- confusionMatrix(valid_actual, valid_pred)
ggplotConfusionMatrix <- function(m){
mytitle <- paste("Accuracy", percent_format()(m$overall[1]),
"Kappa", percent_format()(m$overall[2]))
data_c <- mutate(group_by(as.data.frame(m$table), Reference ), percentage =
percent(Freq/sum(Freq)))
p <-
ggplot(data = data_c,
aes(x = Reference, y = Prediction)) +
geom_tile(aes(fill = log(Freq)), colour = "white") +
scale_fill_gradient(low = "white", high = "green") +
geom_text(aes(x = Reference, y = Prediction, label = percentage)) +
theme(legend.position = "none") +
ggtitle(mytitle)
return(p)
}
ggplotConfusionMatrix(cfm)
And the result:

ggplot2: Stat_function misbehaviour with log scales

I am trying to plot a point histogram (a histogram that shows the values with a point instead of bars) that is log-scaled. The result should look like this:
MWE:
Lets simulate some Data:
set.seed(123)
d <- data.frame(x = rnorm(1000))
To get the point histogram I need to calculate the histogram data (hdata) first
hdata <- hist(d$x, plot = FALSE)
tmp <- data.frame(mids = hdata$mids,
density = hdata$density,
counts = hdata$counts)
which we can plot like this
p <- ggplot(tmp, aes(x = mids, y = density)) + geom_point() +
stat_function(fun = dnorm, col = "red")
p
to get this graph:
In theory we should be able to apply the log scales (and set the y-limits to be above 0) and we should have a similar picture to the target graph.
However, if I apply it I get the following graph:
p + scale_y_log10(limits = c(0.001, 10))
The stat_function clearly shows non-scaled values instead of producing a figure closer to the solid line in the first picture.
Any ideas?
Bonus
Are there any ways to graph the histogram with dots without using the hist(..., plot = FALSE) function?
EDIT Workaround
One possible solution is to calculate the dnorm-data outside of ggplot and then insert it as a line. For example
tmp2 <- data.frame(mids = seq(from = min(tmp$mids), to = max(tmp$mids),
by = (max(tmp$mids) - min(tmp$mids))/10000))
tmp2$dnorm <- dnorm(tmp2$mids)
# Plot it
ggplot() +
geom_point(data = tmp, aes(x = mids, y = density)) +
geom_line(data = tmp2, aes(x = mids, y = dnorm), col = "red") +
scale_y_log10()
This returns a graph like the following. This is basically the graph, but it doesn't resolve the stat_function issue.
library(ggplot2)
set.seed(123)
d <- data.frame(x = rnorm(1000))
ggplot(d, aes(x)) +
stat_bin(geom = "point",
aes(y = ..density..),
#same breaks as function hist's default:
breaks = pretty(range(d$x), n = nclass.Sturges(d$x), min.n = 1),
position = "identity") +
stat_function(fun = dnorm, col = "red") +
scale_y_log10(limits = c(0.001, 10))
Another possible solution that I found while revisiting this issue is to apply the log10 to the stat_function-call.
library(ggplot2)
set.seed(123)
d <- data.frame(x = rnorm(1000))
hdata <- hist(d$x, plot = FALSE)
tmp <- data.frame(mids = hdata$mids,
density = hdata$density,
counts = hdata$counts)
ggplot(tmp, aes(x = mids, y = density)) + geom_point() +
stat_function(fun = function(x) log10(dnorm(x)), col = "red") +
scale_y_log10()
Created on 2018-07-25 by the reprex package (v0.2.0).

Resources