Multiple histograms with non-integer frequencies in R using ggplot - r

I'm trying to find a way to plot multiple histograms of non-integer frequencies in R. For example:
a = c(1,2,3,4,5)
a_freq = c(1.5, 2.5, 3.5, 4.5, 5.5)
b = c(2, 4, 6, 8, 10)
b_freq = c(2.5, 5, 6, 7, 8)
using something like
qplot(x = a, weight = a_freq, geom = "histogram")
works, but how do I superimpose b (and b_freq) onto this? any ideas?
This is what we would do if the frequencies are integer values:
require(ggplot2)
require(reshape2)
set.seed(1)
df <- data.frame(x = rnorm(n = 1000, mean = 5, sd = 2), y = rnorm(n = 1000, mean = 2), z = rnorm(n = 1000, mean = 10))
ggplot(melt(df), aes(value, fill = variable)) + geom_histogram(position = "dodge")
Something similar, when we have non_integer values?
Thanks,
Karan

I'm still not entirely sure what you're trying to do, so here are four options:
library(ggplot2)
a = c(1,2,3,4,5)
a_freq = c(1.5, 2.5, 3.5, 4.5, 5.5)
b = c(2, 4, 6, 8, 10)
b_freq = c(2.5, 5, 6, 7, 8)
dat <- data.frame(x = c(a,b),
freq = c(a_freq,b_freq),
grp = rep(letters[1:2],each = 5))
ggplot(dat,aes(x = x,weight = freq,fill = grp)) +
geom_histogram(position = "dodge")
ggplot(dat,aes(x = x,y = freq,fill = grp)) +
geom_bar(position = "dodge",stat = "identity",width = 0.5)
ggplot(dat,aes(x = x,y = freq,fill = grp)) +
facet_wrap(~grp) +
geom_bar(stat = "identity",width = 0.5)
ggplot() +
geom_bar(data = dat[dat$grp == 'a',],aes(x = x,y = freq),
fill = "blue",
alpha = 0.5,
stat = "identity",
width = 0.5) +
geom_bar(data = dat[dat$grp == 'b',],aes(x = x,y = freq),
fill = "red",
alpha = 0.5,
stat = "identity",
width = 0.5)
If you have a discrete x values and precomputed "heights" that is not a histogram, that is a bar plot, so I would opt for one of those.

Related

create a line graph using ggplot2 of the change in mean overtime with 95% CI

My dataset is something like this
df <- data.frame(time = c(0,0,1,1,2,2),
mean = c(8, 6, 7 , 6, 6, 5),
Group = c('intervention','control','intervention','control','intervention','control'),
lower = c(7, 5, 5, 4, 4, 4),
upper = c(12, 9, 10, 8, 8, 8))
I want to create a plot of the changes in mean at the 3-time points by group, each of which has the corresponding 95% CI (lower, upper), something like this, the y-axis is mean, and the x-axis is time.
ggplot(df, aes(x = time, y = mean, color = Group)) +
geom_line(aes(lty = as.character(Group)),
position = position_dodge(width = 0.1)) +
geom_errorbar(aes(ymax = upper, ymin = lower), width = 0.1,
position = position_dodge(width = 0.1)) +
geom_point(aes(shape = as.character(Group)),
position = position_dodge(width = 0.1)) +
guides(lty = "none", shape = "none")+
theme_classic()
But now, instead of using a continuous variable, I need to change the time variable to a categorical variable
df1 <- data.frame(time = c('baseline','baseline','week 1','week 1','week 2','week 2'),
mean = c(8, 6, 7 , 6, 6, 5),
Group = c('intervention','control','intervention','control','intervention','control'),
lower = c(7, 5, 5, 4, 4, 4),
upper = c(12, 9, 10, 8, 8, 8))
and the line disappeared. How can I fix it?
Perhaps like this?
ggplot(df, aes(x = time, y = mean, group = group)) +
geom_line(aes(lty = as.character(group)),
position = position_dodge(width = 0.1)) +
geom_errorbar(aes(ymax = upper, ymin = lower), width = 0.1,
position = position_dodge(width = 0.1)) +
geom_point(aes(shape = as.character(group)),
position = position_dodge(width = 0.1)) +
guides(lty = "none", shape = "none")

How can I add a mean bar and a jitter to my dot plots?

I am trying to compare data from three groups and I would like to have a mean bar on every group and some jitter.
first <- c(1, 1.2, 2, 3, 4)
second <- c(5, 6, 7, 8, 9)
third <- c(10, 16, 17, 18, 19)
df <- data.frame(Value = c(first,second),
Cat = c(rep("first",length(first)), rep("second",length(second))),
xseq = c(seq_along(first),seq_along(second)))
library(ggplot2)
ggplot(df, aes(x = Cat, y = Value, color = Cat)) + geom_point()+xlab("")
df <- data.frame(Value = c(first,second, third),
Cat = c(rep("first",length(first)),
rep("second",length(second)),
rep("third",length(third))),
xseq = c(seq_along(first),
seq_along(second),
seq_along(third)))
library(ggplot2)
ggplot(df, aes(x = Cat, y = Value, color = Cat)) + geom_point()+xlab("")
Something like this?
library(ggplot2)
ggplot(df, aes(x = Cat, y = Value, color = Cat)) +
geom_errorbar(stat = "summary", width = 0.1, color = "black", alpha = 0.5) +
stat_summary(geom = "point", fun = mean, color = "black") +
geom_point(position = position_jitter(width = 0.1), shape = 18, size = 4) +
scale_color_brewer(palette = "Set2") +
theme_light(base_size = 16)

How to animate the axis label using `gganimate`?

I am actually very amazed to see I cannot quickly find a guide to how to do this. Here is an example:
library(ggplot2)
library(gganimate)
library(data.table)
library(magrittr)
dt <- lapply(seq(10), function(i){
mean = i
label = paste0("T = ", i)
dt = data.table(x = seq(0, 50, length.out = 100))
set(dt, j = "y", value = dt[, dlnorm(x, meanlog = log(mean), sdlog = 0.2)])
set(dt, j = "frameN", value = i)
return(dt)
}) %>% rbindlist
print(dt)
p <- ggplot(dt, aes(x = x, y = y)) +
geom_line() +
scale_x_continuous(name = "x", breaks = c(0, 1)) +
transition_manual(frameN)
animate(p)
I want the breaks and labels of scale_x_continuous to follow my own definitions:
arr_breaks <- c(1, 3, 2, 4, 3, 5, 4, 6, 5, 7)
arr_labels <- paste0(seq(10, 100, 10), " kg")
And then
breaks = arr_breaks[1], labels = arr_labels[1] for frame 1
breaks = arr_breaks[2], labels = arr_labels[2] for frame 2
...
breaks = arr_breaks[10], labels = arr_labels[10] for frame 10
No matter how I do it I got errors. Any idea?
As #z-lin noted, gganimate is not currently set up (to my knowledge) to animate scales with different breaks. The effect could be closely approximated using geoms, and with some more work you could probably make an exact visual match to a changing scale.
breaks_df <- data.frame(
frameN = c(1:10),
arr_breaks = c(1, 3, 2, 4, 3, 5, 4, 6, 5, 7),
arr_labels = paste0(seq(10, 100, 10), " kg")
)
p <- ggplot(dt, aes(x = x, y = y)) +
geom_segment(data = breaks_df, color = "white",
aes(x = arr_breaks, xend = arr_breaks,
y = -Inf, yend = Inf)) +
geom_text(data = breaks_df, vjust = 3, size = 3.5, color = "gray30",
aes(x = arr_breaks, y = 0, label = arr_labels)) +
geom_line() +
scale_x_continuous(name = "x", breaks = c(0)) +
coord_cartesian(clip = "off") +
transition_manual(frameN)
animate(p, width = 600, height = 250)

How to define individual legend elements in ggplot2?

I need to create sensible legends for this chart.
I am using R version 3.5.2 and ggplot2 version 3.1.0.9000.
What I have so far:
as.data.frame(list(
name = c('alice', 'bob', 'charlie'),
y = c(2, 3, 3.5),
y_min = c(1, 1.5, 1.25),
y_max = c(4, 3.5, 7),
asterisk = c(6, 3.75, 9)
)
) %>%
ggplot(aes(y = y, x = name)) +
geom_point(aes(color = 'main', shape = 'main'), size = 4) +
geom_point(aes(y = asterisk, color = 'asterisk', shape = 'asterisk'), size = 6) +
scale_color_manual(values = list('main' = 'black', 'asterisk' = 'red')) +
scale_shape_manual(values = list('main' = 16, 'asterisk' = 42)) +
geom_segment(aes(y = y_min, yend = y_max, xend = name)) +
coord_flip()
I am hoping to get the legend to show just a large-black-dot and a small-red-asterisk, preferably under one header. There is no large-red-dot or small-black-asterisk, and it is confusing that those symbols appear in the legend.
You need to give both legends the same name so they become one
ggplot(dat, aes(y=y, x=name)) +
geom_point(aes(color='main',
shape='main'), size=4) +
geom_point(aes(y=asterisk,
color='asterisk',
shape='asterisk'), size=6) +
scale_color_manual(name = "legend_title", # changed name here
values=c('main'='black', 'asterisk'='red')) +
scale_shape_manual(name = "legend_title", # and here
values=c('main'=16, 'asterisk'=42)) +
geom_segment(aes(y=y_min, yend=y_max, xend=name)) +
coord_flip()
data
dat <- data.frame(
name = c('alice', 'bob', 'charlie'),
y = c(2, 3, 3.5),
y_min = c(1, 1.5, 1.25),
y_max = c(4, 3.5, 7),
asterisk = c(6, 3.75, 9), stringsAsFactors = FALSE)

Why do geom_density and stat_density(geom = "line") give different results?

In the following illustration, why do geom_density and stat_density(geom = "line") give different results?
library(ggplot2)
df <- data.frame(
x.values = c(
rnorm(100, mean = 1, sd = 1),
rnorm(100, mean = 4, sd = 1),
rnorm(100, mean = 7, sd = 1),
rnorm(100, mean = 10, sd = 1)
),
mean.values = sort(rep(c(1, 4, 7, 10), 100))
)
p <- ggplot(df, aes(x = x.values, color = mean.values, group = mean.values))
p + geom_density()
p + stat_density(geom = "line")
It's a difference in the position argument. The default in stat_density is position = "stack", whilst with geom_density() it is position = "identity".
If you call p + stat_density(geom = "line", position = "identity") you get the same as geom_density():

Resources