ggplot2 - how to limit panel and axis? - r

I want to know how to turn this plot:
Into this plot:
As you can see the panel and axis on the 2nd plot are limited to the data extent. I made the second graph using design software but want to know the code.
Ive already limited the x and y axis using
xlim and ylim but no difference.
Please see my code below, sorry its so messy, first time using r studio. Thanks!
ggplot() +
geom_errorbar(data = U1483_Coiling_B_M_Removed_R, mapping = aes(x = `Age (Ma) Linear Age Model`, ymin = `Lower interval*100`, ymax = `Upper interval*100`), width = 0.025, colour = 'grey') +
geom_line(data = U1483_Coiling_B_M_Removed_R, aes(x = `Age (Ma) Linear Age Model`, y = `Percent Dextral`)) +
geom_point(data = U1483_Coiling_B_M_Removed_R, aes(x = `Age (Ma) Linear Age Model`, y = `Percent Dextral`), colour = 'red') +
geom_point(data = U1483_Coiling_B_M_Removed_R, aes(x = `Age (Ma) Linear Age Model`, y = `Lab?`)) +
theme(axis.text.x=element_text(angle=90, size=10, vjust=0.5)) +
theme(axis.text.y=element_text(angle=90, size=10, vjust=0.5)) +
theme_classic() +
theme(panel.background = element_rect(colour = 'black', size = 1)) +
xlim(0, 2.85) +
ylim(0, 100)

You can use expand when specifying axis scales, like so:
# Load library
library(ggplot2)
# Set RNG
set.seed(0)
# Create dummy data
df <- data.frame(x = seq(0, 3, by = 0.1))
df$y <- 100 - abs(rnorm(nrow(df), 0, 10))
# Plot results
# Original
ggplot(df, aes(x, y)) +
geom_line() +
geom_point(colour = "#FF3300", size = 5)
# With expand
ggplot(df, aes(x, y)) +
geom_line() +
geom_point(colour = "#FF3300", size = 5) +
scale_y_continuous(expand = c(0, 0))

Related

How to smooth out a time-series geom_area with fill in ggplot?

I have the following graph and code:
Graph
ggplot(long2, aes(x = DATA, y = value, fill = variable)) + geom_area(position="fill", alpha=0.75) +
scale_y_continuous(labels = scales::comma,n.breaks = 5,breaks = waiver()) +
scale_fill_viridis_d() +
scale_x_date(date_labels = "%b/%Y",date_breaks = "6 months") +
ggtitle("Proporcions de les visites, només 9T i 9C") +
xlab("Data") + ylab("% visites") +
theme_minimal() + theme(legend.position="bottom") + guides(fill=guide_legend(title=NULL)) +
annotate("rect", fill = "white", alpha = 0.3,
xmin = as.Date.character("2020-03-16"), xmax = as.Date.character("2020-06-22"),
ymin = 0, ymax = 1)
But it has some sawtooth, how am I supposed to smooth it out?
I believe your situation is roughly analogous to the following, wherein we have missing x-positions for one group, but not the other at the same position. This causes spikes if you set position = "fill".
library(ggplot2)
x <- seq_len(100)
df <- data.frame(
x = c(x[-c(25, 75)], x[-50]),
y = c(cos(x[-c(25, 75)]), sin(x[-50])) + 5,
group = rep(c("A", "B"), c(98, 99))
)
ggplot(df, aes(x, y, fill = group)) +
geom_area(position = "fill")
To smooth out these spikes, it has been suggested to linearly interpolate the data at the missing positions.
# Find all used x-positions
ux <- unique(df$x)
# Split data by group, interpolate data groupwise
df <- lapply(split(df, df$group), function(xy) {
approxed <- approx(xy$x, xy$y, xout = ux)
data.frame(x = ux, y = approxed$y, group = xy$group[1])
})
# Recombine data
df <- do.call(rbind, df)
# Now without spikes :)
ggplot(df, aes(x, y, fill = group)) +
geom_area(position = "fill")
Created on 2022-06-17 by the reprex package (v2.0.1)
P.S. I would also have expected a red spike at x=50, but for some reason this didn't happen.

Overlaying histogram with different y-scales

I'm struggling with the following issue:
I want to plot two histograms, but since the statistics of one of the two classes is much less than the other I need to add a second y-axis to allow a direct comparison of the values.
I report below the code I used at the moment and the result.
Thank you in advance!
ggplot(data,aes(x= x ,group=class,fill=class)) + geom_histogram(position="identity",
alpha=0.5, bins = 20)+ theme_bw()
Consider the following situation where you have 800 versus 200 observations:
library(ggplot2)
df <- data.frame(
x = rnorm(1000, rep(c(1, 2), c(800, 200))),
class = rep(c("A", "B"), c(800, 200))
)
ggplot(df, aes(x, fill = class)) +
geom_histogram(bins = 20, position = "identity", alpha = 0.5,
# Note that y = stat(count) is the default behaviour
mapping = aes(y = stat(count)))
You could scale the counts for each group to a maximum of 1 by using y = stat(ncount):
ggplot(df, aes(x, fill = class)) +
geom_histogram(bins = 20, position = "identity", alpha = 0.5,
mapping = aes(y = stat(ncount)))
Alternatively, you can set y = stat(density) to have the total area integrate to 1.
ggplot(df, aes(x, fill = class)) +
geom_histogram(bins = 20, position = "identity", alpha = 0.5,
mapping = aes(y = stat(density)))
Note that after ggplot 3.3.0 stat() probably will get replaced by after_stat().
How about comparing them side by side with facets?
ggplot(data,aes(x= x ,group=class,fill=class)) +
geom_histogram(position="identity",
alpha=0.5,
bins = 20) +
theme_bw() +
facet_wrap(~class, scales = "free_y")

Preventing wrong density plots when coloring histograms according to groups

based on some dummy data I created a histogram with desity plot
set.seed(1234)
wdata = data.frame(
sex = factor(rep(c("F", "M"), each=200)),
weight = c(rnorm(200, 55), rnorm(200, 58))
)
a <- ggplot(wdata, aes(x = weight))
a + geom_histogram(aes(y = ..density..,
# color = sex
),
colour="black",
fill="white",
position = "identity") +
geom_density(alpha = 0.2,
# aes(color = sex)
) +
scale_color_manual(values = c("#868686FF", "#EFC000FF"))
The histogram of weight shall be colored corresponding to sex, so I use aes(y = ..density.., color = sex) for geom_histogram():
a + geom_histogram(aes(y = ..density..,
color = sex
),
colour="black",
fill="white",
position = "identity") +
geom_density(alpha = 0.2,
# aes(color = sex)
) +
scale_color_manual(values = c("#868686FF", "#EFC000FF"))
As I want it to, the density plot stays the same (overall for both groups), but the histograms jump scale up (and seem to be treated individually now):
How do I prevent this from happening? I need individually colored histogram bars but a joint density plot for all coloring groups.
P.S.
Using aes(color = sex) for geom_density() gets everything back to original scales - but I don't want individual density plots (like below):
a + geom_histogram(aes(y = ..density..,
color = sex
),
colour="black",
fill="white",
position = "identity") +
geom_density(alpha = 0.2,
aes(color = sex)
) +
scale_color_manual(values = c("#868686FF", "#EFC000FF"))
EDIT:
As it has been suggested, dividing by the number of groups in geom_histogram()'s aesthetics with y = ..density../2 may approximate the solution. Nevertheless, this only works with symmetric distributions like in the first output below:
a + geom_histogram(aes(y = ..density../2,
color = sex
),
colour="black",
fill="white",
position = "identity") +
geom_density(alpha = 0.2,
) +
scale_color_manual(values = c("#868686FF", "#EFC000FF"))
which yields
Less symmetric distributions, however, may cause trouble using this approach. See those below, where for 5 groups, y = ..density../5 was used. First original, then manipulation (with position = "stack"):
Since the distribution is heavy on the left, dividing by 5 underestimates on the left and overestimates on the right.
EDIT 2: SOLUTION
As suggested by Andrew, the below (complete) code solves the problem:
library(ggplot2)
set.seed(1234)
wdata = data.frame(
sex = factor(rep(c("F", "M"), each = 200)),
weight = c(rnorm(200, 55), rnorm(200, 58))
)
binwidth <- 0.25
a <- ggplot(wdata,
aes(x = weight,
# Pass binwidth to aes() so it will be found in
# geom_histogram()'s aes() later
binwidth = binwidth))
# Basic plot w/o colouring according to 'sex'
a + geom_histogram(aes(y = ..density..),
binwidth = binwidth,
colour = "black",
fill = "white",
position = "stack") +
geom_density(alpha = 0.2) +
scale_color_manual(values = c("#868686FF", "#EFC000FF")) +
# Use fixed scale for sake of comparability
scale_x_continuous(limits = c(52, 61)) +
scale_y_continuous(limits = c(0, 0.25))
# Plot w/ colouring according to 'sex'
a + geom_histogram(aes(x = weight,
# binwidth will only be found if passed to
# ggplot()'s aes() (as above)
y = ..count.. / (sum(..count..) * binwidth),
color = sex),
binwidth = binwidth,
fill="white",
position = "stack") +
geom_density(alpha = 0.2) +
scale_color_manual(values = c("#868686FF", "#EFC000FF")) +
# Use fixed scale for sake of comparability
scale_x_continuous(limits = c(52, 61)) +
scale_y_continuous(limits = c(0, 0.25)) +
guides(color = FALSE)
Note:
binwidth = binwidth needed to be passed to ggplot()'s aes(), otherwise the pre-specified binwidth would not be found by geom_histogram()'s aes(). Further, position = "stack" is specified, so that both versions of the histogram are comparable. Plots for dummy data and the more complex distribution below:
Solved - Thanks for your help!
I don't think you can do it using y=..density.., but you can recreate the same thing like this...
binwidth <- 0.25 #easiest to set this manually so that you know what it is
a + geom_histogram(aes(y = ..count.. / (sum(..count..) * binwidth),
color = sex),
binwidth = binwidth,
fill="white",
position = "identity") +
geom_density(alpha = 0.2) +
scale_color_manual(values = c("#868686FF", "#EFC000FF"))

How to draw graph with dodged position?

I am an R novice. I will try to be as brief and simple as possible. Currently, I am trying to connect points between two conditions based on another condition all over a single discrete x-axis.
Below is some test data and my attempt to plot some data.
set.seed(42)
# Test case data
mydf1 <- tibble(
xx = rep('myLabel', 8),
yy = rnorm(8),
grp = rep(c(1, 2), each = 4),
cond = rep(c('a', 'b', 'c', 'd'), length.out = 8)
)
ggplot(mydf1, aes(x = xx, y = yy, col = factor(grp))) +
geom_point(position = position_dodge(width = 0.9)) +
geom_path(position = position_dodge(width = 0.9), aes(group = cond), col = "black") +
theme_bw() +
ggtitle("Test Case for geom_path and position_dodge")
From what I can tell, it seems that position_dodge is applied after the draw. Is there a way to change this behavior? or to achieve the overall goal of connecting these points in this type of way?
Thank you for your time.
EDIT: details.
EDIT2:
I would like to capture a before and after relationship between grp based on 4 conditions in one big main conditions.
Probably you want this.
set.seed(42)
library(ggplot2)
ggplot(mydf1, aes(x = grp, y = yy, col = factor(grp))) +
geom_point() +
geom_path(aes(group = cond), col = "black") +
theme_bw() +
ggtitle("Test Case for geom_path and position_dodge") +
xlim(c(.5, 2.5)) +
labs(color = "Group", x = "myLabel", y = "yy") +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank())
You could plot a categorical x axis.
ggplot(mydf1, aes(x = cond, y = yy, col = factor(grp))) +
geom_point() +
geom_path(aes(group = cond), col = "black") +
theme_bw() +
ggtitle("Test Case for categorical X-axis")
Alternatively, if you need comparison across multiple categorical dimensions mapped to the x axis, you can try facets.
ggplot(mydf1, aes(x = cond, y = yy, col = factor(grp))) +
geom_point() +
geom_path(aes(group = cond), col = "black") +
theme_bw() +
ggtitle("Test Case for Categorical X-axis and Facets") +
facet_wrap(~cond)

"Density" curve overlay on histogram where vertical axis is frequency (aka count) or relative frequency?

Is there a method to overlay something analogous to a density curve when the vertical axis is frequency or relative frequency? (Not an actual density function, since the area need not integrate to 1.) The following question is similar:
ggplot2: histogram with normal curve, and the user self-answers with the idea to scale ..count.. inside of geom_density(). However this seems unusual.
The following code produces an overinflated "density" line.
df1 <- data.frame(v = rnorm(164, mean = 9, sd = 1.5))
b1 <- seq(4.5, 12, by = 0.1)
hist.1a <- ggplot(df1, aes(v)) +
stat_bin(aes(y = ..count..), color = "black", fill = "blue",
breaks = b1) +
geom_density(aes(y = ..count..))
hist.1a
#joran's response/comment got me thinking about what the appropriate scaling factor would be. For posterity's sake, here's the result.
When Vertical Axis is Frequency (aka Count)
Thus, the scaling factor for a vertical axis measured in bin counts is
In this case, with N = 164 and the bin width as 0.1, the aesthetic for y in the smoothed line should be:
y = ..density..*(164 * 0.1)
Thus the following code produces a "density" line scaled for a histogram measured in frequency (aka count).
df1 <- data.frame(v = rnorm(164, mean = 9, sd = 1.5))
b1 <- seq(4.5, 12, by = 0.1)
hist.1a <- ggplot(df1, aes(x = v)) +
geom_histogram(aes(y = ..count..), breaks = b1,
fill = "blue", color = "black") +
geom_density(aes(y = ..density..*(164*0.1)))
hist.1a
When Vertical Axis is Relative Frequency
Using the above, we could write
hist.1b <- ggplot(df1, aes(x = v)) +
geom_histogram(aes(y = ..count../164), breaks = b1,
fill = "blue", color = "black") +
geom_density(aes(y = ..density..*(0.1)))
hist.1b
When Vertical Axis is Density
hist.1c <- ggplot(df1, aes(x = v)) +
geom_histogram(aes(y = ..density..), breaks = b1,
fill = "blue", color = "black") +
geom_density(aes(y = ..density..))
hist.1c
Try this instead:
ggplot(df1,aes(x = v)) +
geom_histogram(aes(y = ..ncount..)) +
geom_density(aes(y = ..scaled..))
library(ggplot2)
smoothedHistogram <- function(dat, y, bins=30, xlabel = y, ...){
gg <- ggplot(dat, aes_string(y)) +
geom_histogram(bins=bins, center = 0.5, stat="bin",
fill = I("midnightblue"), color = "#E07102", alpha=0.8)
gg_build <- ggplot_build(gg)
area <- sum(with(gg_build[["data"]][[1]], y*(xmax - xmin)))
gg <- gg +
stat_density(aes(y=..density..*area),
color="#BCBD22", size=2, geom="line", ...)
gg$layers <- gg$layers[2:1]
gg + xlab(xlabel) +
theme_bw() + theme(axis.title = element_text(size = 16),
axis.text = element_text(size = 12))
}
dat <- data.frame(x = rnorm(10000))
smoothedHistogram(dat, "x")

Resources