Labels in geom_bar having only x variable and fill factor - r

I have created a DF based on the following code.
sex <- c("m","f","m","m","m","m","m","f","f","f")
age <- c(">10",">20",">30",">10",">20",">30",">10",">20",">30",">10")
df1 <- data.frame(sex,age)
ggplot (df1, aes(sex, fill = factor(age))) + geom_bar()
I want to individually label the counts of combination of age and sex
sex="f" and age = ">10" = 1, sex="f" and age = ">20" = 2, sex="f" and
age = ">30" = 1, sex="m" and age = ">10" = 3, sex="m" and age = ">20"
= 1, sex="m" and age = ">30" = 2

I think you want something like this:
ggplot(df1, aes(sex, fill = factor(age))) + geom_bar() +
geom_text(stat = "count", aes(y = ..count.., label = ..count..), position = "stack", vjust = 3)

Not sure if I understood your question correctly, but do you mean something like this:
df2 <- as.data.frame(table(df1))
df2$sex_age <- paste(df2$sex, df2$age, sep = "_")
ggplot(df2, aes(x = sex_age, y = Freq)) + geom_bar(stat = "identity")

Related

R ggplot2 : geom_jitter and fill, problem to have the dots on the right boxplot

Here's my R code
ggplot(dat = Table, aes(x = Group, y = value, fill = Type)) +
geom_boxplot(alpha=0.08)+
geom_jitter()+
scale_fill_brewer(palette="Spectral")+
theme_minimal()
Like you can see the dots are in the middle of the boxplots. What can I add in geom_jitter to have each point in the righ boxplot and not in the middle like this ? I also tried geom_point, it gave the same result !
Thanks to the help now It works, but I wanted to add a line to connect the dots and I got this.. can someone tell how to really connect the dots with lines
I think if you group by interaction(Group, Type) and use position_jitterdodge() you should get what you're looking for.
ggplot(mtcars, aes(as.character(am), mpg, color = as.character(vs),
group = interaction(as.character(vs), as.character(am)))) +
geom_boxplot() +
geom_jitter(position = position_jitterdodge()) # same output with geom_point()
Edit - here's an example with manual jittering applied to data where the each subject appears once in each Group.
I looked for a built-in way to do this, and this answer comes close, but I couldn't get it to work in terms of using position_jitterdodge with position defined by the groups of Group/Type, but line grouping defined by id alone and not by Group/Type. Both aesthetics (position adjustment and series identification) rely on the same group parameter, but they each need a different value for it.
Table = data.frame(id = 1:4,
value = rnorm(8),
Group = rep(c("a","b"), each = 4),
Type = c("1", "2"))
library(dplyr)
Table %>%
mutate(x = as.numeric(as.factor(Group)) +
0.2 * scale(as.numeric(as.factor(Type))) +
rnorm(n(), sd = 0.06)) %>%
ggplot(aes(x = Group, y = value, fill = Type, group = interaction(Group, Type))) +
geom_boxplot(alpha=0.2)+
geom_point(aes(x = x)) +
geom_line(aes(x = x, group = id), alpha = 0.1) +
scale_fill_brewer(palette="Spectral")+
theme_minimal()
Best to use position_dodge instead if you want them to line up:
library(ggplot2)
Table <- tibble::tibble(
Group = rep(c("A", "B"), each = 20),
Type = factor(rep(c(1:2, 1:2), each = 10)),
value = rnorm(40, mean = 10)
)
ggplot(dat = Table, aes(x = Group, y = value, fill = Type)) +
geom_boxplot(alpha=0.08)+
geom_point(position = position_dodge(width = 0.75))+
scale_fill_brewer(palette="Spectral")+
theme_minimal()
To add a line, make sure group = ID goes in both the geom_point and geom_line calls:
library(ggplot2)
Table <- tibble::tibble(
Group = rep(c("A", "B"), each = 20),
Type = factor(rep(c(1:2, 1:2), each = 10)),
ID = factor(rep(1:20, times = 2)),
value = rnorm(40, mean = 10)
)
ggplot(dat = Table, aes(x = Group, y = value, fill = Type)) +
geom_boxplot(alpha = 0.08) +
geom_point(aes(group = ID), position = position_dodge(width = 0.75))+
geom_line(aes(group = ID), position = position_dodge(width = 0.75), colour = "grey")+
scale_fill_brewer(palette = "Spectral") +
theme_minimal()

Is there a way for adding labels with number of observations per value in violin plot in ggplot?

Image you want to create a violin plot and have data like this:
set.seed(123)
Bodytype <- sample(LETTERS[1:3], 500, replace = T)
Weight <- rnorm(500,40,1)
df <- data.frame(Bodytype, Weight)
ggplot(data = df,
aes(x = Bodytype, y = Weight, fill = Bodytype)) +
geom_violin(scale = "count", trim = F, adjust = 0.75) +
scale_y_continuous(breaks = seq(34, 46, 1)) +
theme_gray()
Now I would like to add text label or something for each bodytype at each kg level to see how many observations in each bodytype category that weigh 36 kg, 37kg, etc. Is there are way for achieving this, or am I better off using another plot?
This can be done in many ways, here is one:
library(dplyr)
library(ggplot2)
summ <- df %>%
group_by(Bodytype) %>%
summarize(n = n(), Weight = mean(Weight))
ggplot(data = df, aes(x = Bodytype, y = Weight, fill = Bodytype)) +
geom_violin(scale = "count", trim = F, adjust = 0.75) +
scale_y_continuous(breaks = seq(34, 46, 1)) +
theme_gray() +
geom_text(aes(label = n), data = summ)
Okay, so you want multiple weight counts:
weightcounts <- df %>%
mutate(Weight = as.integer(round(Weight, 0))) %>%
group_by(Bodytype, Weight) %>%
count()
ggplot(data = df, aes(x = Bodytype, y = Weight, fill = Bodytype)) +
geom_violin(scale = "count", trim = F, adjust = 0.75) +
scale_y_continuous(breaks = seq(34, 46, 1)) +
theme_gray() +
geom_text(aes(label = n), data = weightcounts)
Either way, the premise is that you can generate a summary frame with the associated labels you need, and then add geom_text (or geom_label) with the new dataset as an argument.
Another way computing labels outside in base R:
set.seed(123)
Bodytype <- sample(LETTERS[1:3], 500, replace = T)
Weight <- rnorm(500,40,1)
df <- data.frame(Bodytype, Weight)
#Labels
df$i <- 1
labs <- aggregate(i~Bodytype,df,sum)
labs$Weight<-NA
#Plot
ggplot(data = df,
aes(x = Bodytype, y = Weight, fill = Bodytype)) +
geom_violin(scale = "count", trim = F, adjust = 0.75) +
geom_text(data=labs,aes(x=Bodytype,y=45,label=i))
scale_y_continuous(breaks = seq(34, 46, 1)) +
theme_gray()
Output:

R: show values in geom_bar(position = 'fill')

I have the following fake data:
n <- 100
set.seed(1)
df <- data.frame(grp = sample(c("A", "B", "C"), size = n, replace = TRUE),
values = sample(1:10, n, replace = TRUE) )
df
My goal is to have a "filled" barplot as follow, but I don't know how to use geom_text() in order to add the values of the percentages for each segment of the bars.
ggplot(df, aes(x = values, fill = grp)) +
geom_bar(position = 'fill') +
geom_text(??)
Can anyone help me please?
Are you looking for something like this?
df2 <- as.data.frame(apply(table(df), 2, function(x) x/sum(x)))
df2$grp <- rownames(df2)
df2 <- reshape2::melt(df2)
ggplot(df2, aes(x = variable, y = value, fill = grp)) +
geom_col(position = "fill") +
geom_text(aes(label = ifelse(value == 0, "", scales::percent(value))),
position = position_fill(vjust = 0.5)) +
scale_y_continuous(labels = scales::percent, name = "Percent") +
labs(x = "Value")

ggplot2:: Facetting plot with the same reference plot in all panels

I would like to facet a plot, but with a reference plot in each panel. Let me try to show with pictures what I want to achieve: My example data_frame:
require(dplyr)
df <- data_frame( id = c(rep('ctr',40), rep('pat',80)),
class = c(rep('ctr',40), rep(c('a','b'), each = 40)),
rank = rep (1:20,6),
mean = c(rep(seq(3,-3, length.out = 20),2),
rep(seq(1,-4, length.out = 20),2),
rep(seq(-2,-8, length.out = 20),2)),
sd = rep(seq(1.2,0.8, length.out = 20), times = 6),
exam = rep(c('blue','red'), each = 20, times = 3))
My plot:
# first, create reference plot of the 'controls'
require(ggplot2)
p_ctr <- ggplot() +
geom_line(data = filter(df, id == 'ctr'),
aes(x=rank, y=mean, color=exam), linetype=1) +
geom_ribbon(data = filter(df, id == 'ctr'),
aes(x = rank, ymax = mean+sd, ymin = mean-sd,
fill = exam), alpha = .1) +
scale_colour_manual(values = c("#00b6eb","#eb0041")) +
scale_fill_manual(values = c("#00b6eb","#eb0041"))
# then, overlay with plot of 'patients'
p_ctr + geom_line(data = filter(df, id == 'pat'),
aes(x=rank, y=mean, linetype = class)) +
geom_ribbon(data = filter(df, id == 'pat'),
aes(x = rank, ymax = mean+sd, ymin = mean-sd,
group = class),
alpha = .1) +
facet_wrap(~exam)
That is halfway there:
Ideally, however, I would like to plot the different "classes" in separate panels, but with the control plot as a reference in each panel:
Expected result:
I have tried different combinations of facetting, without good result. I guess, there must be a simple solution?
Maybe like so.
library(dplyr)
library(ggplot2)
df1 <- filter(df, id == 'ctr')
df2 <- filter(df, id == 'pat')
df2 <- dplyr::rename(df2, class_2 = class)
p_ctr <- ggplot() +
geom_line(data = df1, aes(x=rank, y=mean, color=exam)) +
geom_ribbon(data = df1,
aes(x = rank, ymax = mean+sd, ymin = mean-sd, fill = exam),
alpha = .1) +
scale_colour_manual(values = c("#00b6eb","#eb0041")) +
scale_fill_manual(values = c("#00b6eb","#eb0041")) +
geom_line(data = df2,
aes(x=rank, y=mean)) +
geom_ribbon(data = df2,
aes(x = rank, ymax = mean+sd, ymin = mean-sd),
alpha = .1) +
facet_grid(class_2 ~ exam)
p_ctr
Using facet_wrap gives me the following error:
error in gList(list(x = 0.5, y = 0.5, width = 1, height = 1, just = "centre", :
only 'grobs' allowed in "gList"
You probably came across this plot while looking for the solution.
p_ctr + geom_line(data = filter(df, id == 'pat'),
aes(x=rank, y=mean)) +
geom_ribbon(data = filter(df, id == 'pat'),
aes(x = rank, ymax = mean+sd, ymin = mean-sd),
alpha = .1) +
# facet_wrap(~exam) +
facet_grid(class ~ exam)
This is basically your reference plot and its overlay, without the linetype and group arguments. Additionally I faceted by class ~ exam. From this plot you see that 'the problem' is that class contains three unique elements: a, b and ctr. That's why I renamed the variable class in df2 to be class_2 which has only two unique elements: a and b. Faceting by class_2 ~ exam then gives the desired output.
I hope this helps.

ggplot2 coord_polar preserve order when using fill

Specifying the fill argument for aes results in a reverse order of the pie chart, so the breaks/labels wont match with pie pieces anymore. Please see the example and resulting plots below.
df = data.frame(Var1 = letters[1:5], Var2 = c(6, 31, 34, 66, 77))
df$Var1 = factor(df$Var1, levels = df$Var1, ordered = T)
# just fine, but no colors
ggplot(df, aes(x = 1,
y = Var2)) +
geom_bar(width = 1, stat = "identity") +
coord_polar(theta = "y") +
scale_fill_manual(values = c("red","green","yellow","black","white"),
guide_legend(title = "My_Title")) +
scale_y_continuous(breaks = (cumsum(df$Var2) -
df$Var2 / 2),
labels = df$Var1)
# reverse order appears
ggplot(df, aes(x = 1,
y = Var2,
fill = Var1)) +
geom_bar(width = 1, stat = "identity") +
coord_polar(theta = "y") +
scale_fill_manual(values = c("red","green","yellow","black","white"),
guide_legend(title = "My_Title")) +
scale_y_continuous(breaks = (cumsum(df$Var2) -
df$Var2 / 2),
labels = df$Var1)
Stacking will occur in reversed factor order (per v2.2.0), and therefore we can use the following code to stack in original order:
ggplot(df, aes(x = 1,
y = Var2,
fill = forcats::fct_rev(Var1))) +
geom_bar(width = 1, stat = "identity", col = 1) +
coord_polar(theta = "y") +
scale_y_continuous(breaks = (cumsum(df$Var2) -
df$Var2 / 2),
labels = df$Var1)
Also, you may use geom_col instead of geom_bar(stat = "identity").
Another option to reverse the order of the stack would be to make use of position_stack(reverse=TRUE):
df = data.frame(Var1 = letters[1:5], Var2 = c(6, 31, 34, 66, 77))
df$Var1 = factor(df$Var1, levels = df$Var1, ordered = T)
library(ggplot2)
# reverse order appears
ggplot(df, aes(x = 1,
y = Var2,
fill = Var1)) +
geom_bar(width = 1,
stat = "identity",
position = position_stack(reverse = TRUE)) +
coord_polar(theta = "y") +
scale_fill_manual(values = c("red","green","yellow","black","white"),
guide_legend(title = "My_Title")) +
scale_y_continuous(breaks = (cumsum(df$Var2) - df$Var2 / 2), labels = df$Var1)

Resources