Write group labels in first bar or above instead of legend - r

I have a stacked barplot with the following data
df <- expand.grid(name = c("oak","birch","cedar"),
sample = c("one","two"),
type = c("sapling","adult","dead"))
df$count <- sample(5:200, size = nrow(df), replace = T)
I generate a barplot and try to add the group lables to it:
ggplot(df, aes(x = name, y = count, fill = type)) +
geom_bar(stat = "identity") +
coord_flip() +
theme(legend.position="none") +
geom_text(aes(label = type, position = "stack"))
It produces:
Two to three questions arise:
How can I make the labels appear in the top bar only?
How can I make the labels appear in the center of the bar section?
Optionally: How can I make the labels appear on top of the top bar being connected to their sections by arrows?

There is a link suggested above. That will help you. Here, I have another suggestion.
set.seed(123)
df <- expand.grid(name = c("oak","birch","cedar"),
sample = c("one","two"),
type = c("sapling","adult","dead"))
df$count <- sample(5:200, size = nrow(df), replace = T)
### Arrange a data frame (summing up sample one and two)
library(dplyr)
ana <- df %>%
group_by(name, type) %>%
summarise(total = sum(count))
# Draw a figure once
bob <- ggplot(ana, aes(x = name, y = total, fill = type)) +
geom_bar(stat = "identity", position = "stack")
# Get a data frame for ggplot
cathy <- ggplot_build(bob)$data[[1]]
# calculate text position & add text labels
cathy$y_pos <- (cathy$ymin + cathy$ymax) / 2
cathy$label <- rep(c("sampling", "adult", "dead"), times = 3)
# Subset the data for labeling for the top bar
dan <- cathy[c(7:9), ]
# Draw a figure again
bob +
annotate(x = dan$x, y = dan$y_pos, label = dan$label, geom="text", size=3) +
coord_flip()

Related

How create a box plot + line plot in a single plot using ggplot2

I want to create a box plot + line plot in a single plot using ggplot2
This is what my code now:
library(ggplot2)
dat <- data.frame(day = c(0,0,0,0,0,0,10,10,10,10,10,10,14,14,14,14,14,14,21,21,21,21,21,21,28,28,28,28,28,28,35,35,35,35,35,35,42,42,42,42,42,42), group = c('Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP'), score = c(37.5,43,7,63,26,15,17,16,43,26,53,26,26,26,43,10,6,15,18,9,10,4,8,18,60,26,20,12.5,9,43,43,43,11,10,7,60,43,43,32,10.5,8,57.5))
g1 = ggplot(data = dat, aes(x = factor(day), y = score)) +
geom_boxplot(aes(fill = group))
g1
When doing box plot, I want scores of different treatments(groups) to be represented separately, so I let x = factor(day).
But for line plot, I want each day's score to be the average of the two treatments(group) of the day.
This is how my plot look like now
This is how I want my plot to look
How can I do this? Thank you so much!
#Libraries
library(tidyverse)
#Data
dat <- data.frame(day = c(0,0,0,0,0,0,10,10,10,10,10,10,14,14,14,14,14,14,21,21,21,21,21,21,28,28,28,28,28,28,35,35,35,35,35,35,42,42,42,42,42,42), group = c('Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP'), score = c(37.5,43,7,63,26,15,17,16,43,26,53,26,26,26,43,10,6,15,18,9,10,4,8,18,60,26,20,12.5,9,43,43,43,11,10,7,60,43,43,32,10.5,8,57.5))
#How to
dat %>%
ggplot(aes(x = factor(day), y = score)) +
geom_boxplot(aes(fill = group))+
geom_line(
data = dat %>%
group_by(day) %>%
summarise(score = median(score,na.rm = TRUE)),
aes(group = 1),
size = 1,
col = "red"
)

Barplot side by side and line charts in the same plot

I want to create in R a plot which contains side by side bars and line charts as follows:
I tried:
Total <- c(584,605,664,711,759,795,863,954,1008,1061,1117,1150)
Infected <- c(366,359,388,402,427,422,462,524,570,560,578,577)
Recovered <- c(212,240,269,301,320,359,385,413,421,483,516,548)
Death <- c(6,6,7,8,12,14,16,17,17,18,23,25)
day <- itemizeDates(startDate="01.04.20", endDate="12.04.20")
df <- data.frame(Day=day, Infected=Infected, Recovered=Recovered, Death=Death, Total=Total)
value_matrix = matrix(, nrow = 2, ncol = 12)
value_matrix[1,] = df$Recovered
value_matrix[2,] = df$Death
plot(c(1:12), df$Total, ylim=c(0,1200), xlim=c(1,12), type = "b", col="peachpuff", xaxt="n", xlab = "", ylab = "")
points(c(1:12), df$Infected, type = "b", col="red")
barplot(value_matrix, beside = TRUE, col = c("green", "black"), width = 0.35, add = TRUE)
But the bar chart does not fit the line chart. I guess it would be easier to use ggplot2, but don't know how. Could anyone help me? Thanks a lot in advance!
With ggplot2, the margins are handled nicely for you, but you'll need the data in two separate long forms. Reshape from wide to long with tidyr::gather, tidyr::pivot_longer, reshape2::melt, reshape, or whatever you prefer.
library(tidyr)
library(ggplot2)
df <- data.frame(
Total = c(584,605,664,711,759,795,863,954,1008,1061,1117,1150),
Infected = c(366,359,388,402,427,422,462,524,570,560,578,577),
Recovered = c(212,240,269,301,320,359,385,413,421,483,516,548),
Death = c(6,6,7,8,12,14,16,17,17,18,23,25),
day = seq(as.Date("2020-04-01"), as.Date("2020-04-12"), by = 'day')
)
ggplot(
tidyr::gather(df, Population, count, Total:Infected),
aes(day, count, color = Population, fill = Population)
) +
geom_line() +
geom_point() +
geom_col(
data = tidyr::gather(df, Population, count, Recovered:Death),
position = 'dodge', show.legend = FALSE
)
Another way to do it is to gather twice before plotting. Not sure if this is easier or harder to understand, but you get the same thing.
df %>%
tidyr::gather(Population, count, Total:Infected) %>%
tidyr::gather(Resolution, count2, Recovered:Death) %>%
ggplot(aes(x = day, y = count, color = Population)) +
geom_line() +
geom_point() +
geom_col(
aes(y = count2, color = Resolution, fill = Resolution),
position = 'dodge', show.legend = FALSE
)
You can actually plot the lines and points without reshaping by making separate calls for each, but to dodge bars (or get legends), you'll definitely need to reshape.

plotting stacked points using ggplot

I have a data frame and I would like to stack the points that have overlaps exactly on top of each other.
here is my example data:
value <- c(1.080251e-04, 1.708859e-01, 1.232473e-05, 4.519876e-03,2.914256e-01, 5.869711e-03, 2.196347e-01,4.124873e-01, 5.914052e-03, 2.305623e-03, 1.439013e-01, 5.407597e-03, 7.530298e-02, 7.746897e-03)
names = letters[1:7]
data <- data.frame(names = rep(names,), group = group, value = value, stringsAsFactors = T)
group <- c(rep("AA", 7) , rep("BB", 7))
I am using the following command:
p <- ggplot(data, aes(x = names, y = "", color = group)) +
geom_point(aes(size = -log(value)), position = "stack")
plot(p)
But the stacked circle outlines out of the grid. I want it close or exactly next to the bottom circle. do you have any idea how I can fix the issue?
Thanks,
The y-axis has no numeric value, so use the group instead. And we don't need the color legend now since the group labels are shown on the y-axis.
ggplot(data, aes(x = names, y = group, color = group)) +
geom_point(aes(size = -log(value))) +
guides(color=FALSE)

Box plot with multiple groups + Dots + Counts

I have a boxplot with multiple groups in R.
When i add the dots within the boxplots, they are not in the center.
Since each week has a different number of boxplots, the dots are not centered within the box.
The problem is in the geom_point part.
I uploaded my data of df.m in a text file and a figure of what i get.
I am using ggplot, and here is my code:
setwd("/home/usuario")
dput("df.m")
df.m = read.table("df.m.txt")
df.m$variable <- as.factor(df.m$variable)
give.n = function(elita){
return(c(y = median(elita)*-0.1, label = length(elita)))
}
p = ggplot(data = df.m, aes(x=variable, y=value))
p = p + geom_boxplot(aes(fill = Label))
p = p + geom_point(aes(fill = Label), shape = 21,
position = position_jitterdodge(jitter.width = 0))
p = p + stat_summary(fun.data = give.n, geom = "text", fun.y = median)
p
Here is my data in a text file:
https://drive.google.com/file/d/1kpMx7Ao01bAol5eUC6BZUiulLBKV_rtH/view?usp=sharing
Only in variable 12 is in the center, because there are 3 groups (the maximum of possibilities!
I would also like to show the counting of observations. If I use the code shown, I can only get the number of observations for all the groups. I would like to add the counting for EACH GROUP.
Thank you in advance
enter image description here
Here's a solution using boxplot and dotplot and an example dataset:
library(tidyverse)
# example data
dt <- data.frame(week = c(1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2),
value = c(6.40,6.75,6.11,6.33,5.50,5.40,5.83,4.57,5.80,
6.00,6.11,6.40,7.00,3,5.44,6.00,5,6.00),
donor_type = c("A","A","A","A","CB","CB","CB","CB","CB",
"CB","CB","CB","CB","CB","A","A","A","A"))
# create the plot
ggplot(dt, aes(x = factor(week), y = value, fill = donor_type)) +
geom_boxplot() +
geom_dotplot(binaxis='y', stackdir='center', position = position_dodge(0.75))
You should be able to adjust my code to your real dataset easily.
Edited answer with OP's dataset:
Using some generated data and geom_point():
library(tidyverse)
df.m <- df.m %>%
mutate(variable = as.factor(variable)) %>%
filter(!is.na(value))
ggplot(df.m, aes(x = variable, y = value, fill = Label)) +
geom_boxplot() +
geom_point(shape = 21, position = position_jitterdodge(jitter.width = 0)) +
scale_x_discrete("variable", drop = FALSE)

Ggplot Heatmap - customized colors for customized count ranges

I want to make a heatmap that creates a group of clarity & color combinations as the X axis and cut as the Y axis. The heatmap would color based upon the counts of clarity+color and its intersection with the cut.
library(ggplot2)
library(dplyr)
## rename diamonds df
# 1. Generate a count for the frequency of cut+clarity
# 2. Make a heatmap of this using the following bins
# 3. Red <= 100 Frequency
Yellow = between (100 and 500)
Green > 500
# place counts inside the cell:
df = diamonds %>%
select( cut, clarity) %>%
group_by(cut,clarity)%>%
mutate(count = n())
myplot = ggplot(df, aes(x = clarity, y=cut)) +
geom_bin2d( bins = c(100,500,50000), col='orange') #
geom_text( aes(label = count),col='red')
myplot
Try this:
df$col <- cut(df$count,breaks = c(-Inf,100,500,Inf),right = TRUE)
df$color<-df$col
levels(df$color) <- c("<=100","100<#<=500",">500")
ggplot(data = df, aes(x = clarity, y = cut)) +
geom_tile(aes(fill = df$color), colour = "white") +
scale_fill_brewer("Count",palette = "Set1")+
geom_text(aes(label = count),col='yellow',cex=3)

Resources