ggplot bar plot comparing multiple groups with a reference group - r

dat <-
data.frame(keyId = rep(c('A','B','C','D'), times = 4),
group = rep(1:4, each = 4),
value = sample(1:100, 16))
ggplot(dat, aes(x = as.factor(keyId), y = as.factor(value))) +
geom_bar(position = 'dodge', stat = 'identity') +
facet_wrap(~group)
What I want to is:
temp1 <- dat %>% dplyr::filter(group %in% c(1, 4))
temp2 <- dat %>% dplyr::filter(group %in% c(2, 4))
temp3 <- dat %>% dplyr::filter(group %in% c(3, 4))
ggplot(temp1, aes(x = as.factor(keyId), y = value, fill = as.factor(group))) +
geom_bar(position = 'dodge', stat = 'identity')
ggplot(temp2, aes(x = as.factor(keyId), y = value, fill = as.factor(group))) +
geom_bar(position = 'dodge', stat = 'identity')
ggplot(temp3, aes(x = as.factor(keyId), y = value, fill = as.factor(group))) +
geom_bar(position = 'dodge', stat = 'identity')
i.e I want to plot bar plots comparing group 4 against group 1, 2, 3. I am trying to do this in a single panel using facet_wrap(~group). How can I do it?

Consider adding a new indicator and then expand the data frame by group where each subset concatenates the 4th group with a new_group indicator.
Below uses base R methods: ifelse (for conditional assignment of indicator column), by (for grouping), rbind + do.call (for concatenating data frames), transform (to add new_group column), subset (to remove redundant row).
# ADD INDICATOR
dat$indicator <- factor(ifelse(dat$group == 4, 'control', 'treatment'))
# ITERATIVELY EXPAND BY GROUP
df_list <- by(dat, dat$group, function(sub)
transform(rbind(sub, dat[dat$group == 4,]),
new_group = sub$group[[1]])
)
# BIND ALL DFs AND FILTER OUT 4 AND 4 PAIRINGS
graph_df <- subset(do.call(rbind, df_list), new_group != 4)
# PLOT USING fill AND LABEL CLEANUP
ggplot(graph_df, aes(x = as.factor(keyId), y = value, fill=indicator)) +
geom_bar(position = 'dodge', stat = 'identity') +
labs(title="Treatments vs Control Group", x="keyId") +
theme(plot.title = element_text(hjust = 0.5),
legend.position="bottom") +
facet_wrap(~new_group)

Maybe I'm missing something that makes this more complicated, but you can forgo the temporary data frames with a list, and just make one long data frame of all those subsets. If you give the list names, you can use that with the .id parameter of dplyr::bind_rows, and that becomes your faceting variable.
library(dplyr)
library(ggplot2)
dat <- data.frame(keyId = rep(c('A','B','C','D'), times = 4),
group = rep(1:4, each = 4),
value = sample(1:100, 16))
dat_paired <- list(
set1 = filter(dat, group %in% c(1, 4)),
set2 = filter(dat, group %in% c(2, 4)),
set3 = filter(dat, group %in% c(3, 4))
) %>%
bind_rows(.id = "set")
head(dat_paired)
#> set keyId group value
#> 1 set1 A 1 21
#> 2 set1 B 1 57
#> 3 set1 C 1 66
#> 4 set1 D 1 33
#> 5 set1 A 4 1
#> 6 set1 B 4 32
ggplot(dat_paired, aes(x = as.factor(keyId), y = value, fill = as.factor(group))) +
geom_col(position = "dodge") +
facet_wrap(vars(set))
Also an aside: geom_bar(stat = "identity") is the same as geom_col().

Here is an alternate solution using dplyr and ggplot2, where the values of group 4 are subtracted from the other 3 groups, then the differences are plotted.
dat %>% filter(group==4) %>% select(-group,value.4=value) %>%
left_join(dat,.) %>% mutate(diff=value-value.4) %>%
filter(group!=4) %>%
ggplot(aes(keyId,diff,fill=keyId)) + geom_bar(stat='identity') +
facet_wrap(~group) + theme(legend.position = 'none') +
ylab('Difference of Group 4')

Related

Obtain two barplots in the same graph ggplot2

I have two dataframes:
df1 <- data.frame(name = rep(LETTERS[1:5], each = 5), age = 1:25)
df2 <- data.frame(name = rep(LETTERS[1:5], each = 5), age = c(rep(1,5), rep(5,5), 1,12,3,2,1,1:5,6:10))
And I want to produce horizontal barplots like these:
df1 %>%
mutate(name = fct_reorder(name, age)) %>%
ggplot( aes(x = name, y = age)) +
geom_bar(stat = "identity", fill = "#74D055FF", alpha = .6, width = .6) +
coord_flip() +
theme_bw()
df2 %>%
mutate(name = fct_reorder(name, age)) %>%
ggplot( aes(x = name, y = age)) +
geom_bar(stat = "identity", fill = "#481568FF", alpha = .6, width = .6) +
coord_flip() +
theme_bw()
I would like to show them in the same figure: there should be a vertical line in correspondence of age = 0, and then the violet bars should go on one side and the green ones on the other (of course it will then be sorted only based on age of either df1 or df2, as the descending order of age is not the same in both dataframes). I don't know how this type of plot it's called and how to approach this.
One option would be to bind your datasets into one dataframe and add an identifier column for which I use dplyr::bind_rows. The identifier could then be mapped on the fill aes and the colors set via scale_fill_manual. Also I aggregated the data using count instead of relying on stacking:
library(dplyr)
library(ggplot2)
dplyr::bind_rows(df1, df2, .id = "id") %>%
count(id, name, wt = age, name = "age") |>
mutate(
name = reorder(name, (id == 1) * age, sum),
age = if_else(id == 2, -age, age)
) |>
ggplot(aes(y = name, x = age, fill = id, group = 1)) +
geom_col(alpha = .6, width = .6) +
geom_vline(xintercept = 0) +
scale_fill_manual(values = c("#74D055FF", "#481568FF")) +
theme_bw()

Comparing specific rows and whole rows in boxplot

I have a data frame ("Date", "A", "B"). I'm trying to use boxplot (by month) to analysis the data "A" for the row filtered by "B" and also for all the "A". I can only create two separate plots to do the boxplot for specific rows and for whole rows of data.
I tried two have 2 geom_boxplot under one ggplot(), but two boxplot just overlap with each other. Here is the code I used. Does anyone know how I can combime those two boxplot into one, so two boxplots will share same x axis, and each month in x axis will have two boxes.
ggplot() +
geom_boxplot(data = df %>% filter(B == 1),
aes(x = Month, y = A, group=Month, fill = "Chamber_no fire"), outlier.shape = T) +
geom_boxplot(data = df, aes(x = Month, y = A, group=Month, fill="Chamber"), outlier.shape = T) +
theme_bw() +
theme(panel.grid.major = element_blank()) +
scale_x_continuous(breaks=seq(2,12,1), minor_breaks = F) +
geom_hline(yintercept = 0, linetype="dotted")
ggsave("sate_meas_O3_NOx_5km_nofire.png", width = 6, height = 4, units = "in")
One approach to achieve your desired result is to
Bind the filtered dataset and the total dataset by row and add an identifier id for each dataset which could easily be done via dplyr::bind_rows.
Make a boxplot where you map id on the fill aesthetic and group by both id and Month using interaction
Set the legend labels via scale_fill_discrete
As you provided no data I make use of a random example data set:
set.seed(42)
df <- data.frame(
Month = sample(2:12, 100, rep = TRUE),
A = rnorm(100),
B = sample(1:2, 100, rep = TRUE)
)
library(ggplot2)
library(dplyr)
d <- bind_rows(list(b1 = df %>% filter(B == 1),
all = df), .id = "id")
ggplot(data = d, mapping = aes(x = Month, y = A, group=interaction(Month, id), fill = id)) +
geom_boxplot(outlier.shape = T, position = "dodge") +
scale_fill_discrete(labels = c(b1 = "Chamber_no fire", all = "Chamber")) +
theme_bw() +
theme(panel.grid.major = element_blank()) +
scale_x_continuous(breaks=seq(2,12,1), minor_breaks = F) +
geom_hline(yintercept = 0, linetype="dotted")

R ggplot with percentages

I'm tryng to do a bar plot with percentages instead of counts, and I've tried this:
ggplot(data = newdf3) +
geom_bar(mapping = aes(x = key, y = ..prop..,fill=value,group = 1), stat = "count",position=position_dodge()) +
scale_y_continuous(labels = scales::percent_format())
but apparently "group=1" is not working because this is what it returns:
and if I don't use "group=1" it returns:
here's a sample of data I'm using:
key value
1 Before
1 After
1 During
1 Before
2 Before
2 After
3 During
...
Can someone help me with this?
Consider using geom_col() instead of geom_bar().
However, you should be able to get around your problem with stat="identity".
library(ggplot2)
#sample data.frame
df <- data.frame(
group = c("A","A","B","B","C","C"),
value = c(0.1,0.5,0.3,0.1,0.2,0.6)
)
df %>% head
#histogram
df %>%
ggplot(aes(x = group)) +
geom_bar()
#NOT histogram
df %>%
ggplot(aes(x = group, y = value)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = scales::percent_format())
One solution would be to calculate relative frequency with you input data and pass the results directly to ggplot, using the stat = "identity" parameter in geom_bar (see this post):
library(tidyverse)
df <- tibble::tribble(
~key, ~value,
1, "Before",
1, "After",
1, "During",
1, "Before",
2, "Before",
2, "After",
3, "During"
)
df %>%
dplyr::count(key, value) %>%
dplyr::group_by(key) %>%
dplyr::mutate(p = n / sum(n)) %>%
ggplot() +
geom_bar(
mapping = aes(x = key, y = p, fill = value),
stat = "identity",
position = position_dodge()
) +
scale_y_continuous(labels = scales::percent_format())
Created on 2019-10-28 by the reprex package (v0.3.0)

How to connect points of two dataframes to each other using ggplot in R?

I have two dataframes df1 and df2 as follows:
> df1
time value
1 1 6
2 2 2
3 3 3
4 4 1
> df2
time value
1 2 3
2 3 8
3 4 4
4 5 5
I want to plot these dataframes in just one diagram, show their name on their plots with a colour, and connect each value of df1 to the corresponding value of df2. Actually, here is the diagram which I want:
The code which I wrote to try to get the above diagram is:
ggplot() +
geom_point() +
geom_line(data=df1, aes(x=time, y=value), color='green') +
geom_line(data=df2, aes(x=time, y=value), color='red') +
xlab("time") +
geom_text(aes(x = df1$time[1], y = 6.2, label = "df1", color = "green", size = 18)) +
geom_text(aes(x = df2$time[1], y = 2.8, label = "df2", color = "red", size = 18)) +
theme(axis.text=element_text(size = 14), axis.title=element_text(size = 14))
But the result is:
As you can see in plot 2, there are no points even I used geom_point(), the names colour are wrong, there is no connection between each values of df1 to the corresponding value of df2, and also I cannot increase the text size for the names even I determined size = 18 in my code.
A very similar solution to zx8754’s answer but with more explicit data wrangling. In theory my solution should be more general as the dataframes could be unsorted, they would just need a common variable to join.
library(magrittr)
library(ggplot2)
df1 = data.frame(
time = 1:4,
value = c(6,2,3,1),
index = 1:4
)
df2 = data.frame(
time = 2:5,
value = c(3,8,4,5),
index = 1:4
)
df3 = dplyr::inner_join(df1,df2,by = "index")
df1$type = "1"
df2$type = "2"
plot_df = dplyr::bind_rows(list(df1,df2))
plot_df %>% ggplot(aes(x = time, y = value, color = type)) +
geom_point(color = "black")+
geom_line() +
geom_segment(inherit.aes = FALSE,
data = df3,
aes(x = time.x,
y = value.x,
xend = time.y,
yend = value.y),
linetype = "dashed") +
scale_color_manual(values = c("1" = "green",
"2" = "red"))
Created on 2019-04-25 by the reprex package (v0.2.0).
Combine (cbind) dataframes then use geom_segment:
ggplot() +
geom_line(data = df1, aes(x = time, y = value), color = 'green') +
geom_line(data = df2, aes(x = time, y = value), color = 'red') +
geom_segment(data = setNames(cbind(df1, df2), c("x1", "y1", "x2", "y2")),
aes(x = x1, y = y1, xend = x2, yend = y2), linetype = "dashed")
There is a very simple solution (from here):
plot_df$'Kukulkan' <- rep(1:4, 2)
plot_df %>% ggplot(aes(x = time, y = value, color = type)) +
geom_point(size=3)+
geom_line(aes(group = Kukulkan))

R ggplot // Multiple Grouping in X-axis

I am trying to implement the diagram 1 from Excel to Shiny. So far I got this code with the resulting diagram 2.
ggplot(filteredData(), aes(x=interaction(month, year), y=sum))
+ geom_bar(stat="identity") + facet_grid(. ~ X) + theme(legend.position="none")
I want to group month and year like in the Excel example, so hat you have only the month counter ("1", "2", ...) in the first row of the legend and the year ("2016", "2017", ...) in the second. The number of months can vary.
The data set looks like:
X year month sum
10 2016 1 450
10 2016 2 670
... ... ... ...
10 2017 1 200
11 2016 1 460
I slightly changed the data set, this is the closest I got to your specs:
df <- read.table(text = "X year month sum
10 2016 1 450
10 2016 2 670
10 2017 1 200
11 2016 1 460
11 2017 2 500", header = T)
# Notice the variable type for month and year
df$month <- as.factor(df$month)
df$year <- as.factor(df$year)
df$X <- as.factor(df$X)
ggplot(df, aes(x = month, y = sum)) + geom_bar(stat = "identity") +
facet_grid(.~X + year,
switch = "x", # Moves the labels from the top to the bottom
labeller = label_both # Adds the labels to the year and X variables
) +
xlab("") # Removes the month label
Result:
Or if you want to drop unused levels:
ggplot(df, aes(x = month, y = sum)) + geom_bar(stat = "identity") +
facet_grid(.~X + year,
switch = "x", # Moves the labels from the top to the bottom
labeller = label_both, # Adds the labels to the year and X variables
scales = "free_x") +
xlab("") # Removes the month legend
You can get a little more complex and use cowplot to merge the plots together. You could automate this using lapply to loop through your unique values, though that is probably overkill for just two groups.
library(ggplot2)
library(cowplot)
library(dplyr)
# Return to default theme, as cowplot sets its own
theme_set(theme_gray())
# Save y limits to get same scale
myYlims <- c(0, ceiling(max(df$sum)/100)*100)
# Generate each plot
x10 <-
ggplot(df %>%
filter(X == 10)
, aes(x = month, y = sum)) + geom_bar(stat = "identity") +
facet_grid(~ year,
switch = "x") +
panel_border() +
coord_cartesian(ylim = myYlims) +
xlab("X = 10")
x11 <-
ggplot(df %>%
filter(X == 11)
, aes(x = month, y = sum)) + geom_bar(stat = "identity") +
facet_grid(~ year,
switch = "x") +
panel_border() +
coord_cartesian(ylim = myYlims) +
xlab("X = 11")
# Put the plots together
plot_grid(x10
, x11 +
theme(axis.title.y = element_blank()
, axis.text.y = element_blank()
, axis.ticks.y = element_blank())
, rel_widths = c(1.1,1)
)
Here is an approach to automate this, including more complex data to justify the automation. Note that you will need to play with the aspect ratio of your output and with the rel_widths option to make it look decent:
df <-
data.frame(
X = rep(1:6, each = 9)
, year = rep(rep(2016:2018, each = 3),3)
, month = rep(1:3, 6)
, sum = rnorm(9*6, 700, 100)
)
# Notice the variable type for month and year
df$month <- as.factor(df$month)
df$year <- as.factor(df$year)
df$X <- as.factor(df$X)
# Save y limits to get same scale
myYlims <- c(0, ceiling(max(df$sum)/100)*100)
# Generate each plot
eachPlot <- lapply(levels(df$X), function(thisX){
ggplot(df %>%
filter(X == thisX)
, aes(x = month, y = sum)) +
geom_bar(stat = "identity") +
facet_grid(~ year,
switch = "x") +
panel_border() +
coord_cartesian(ylim = myYlims) +
xlab(paste("X =", thisX))
})
# Remove axes from all but the first
eachPlot[-1] <- lapply(eachPlot[-1], function(x){
x +
theme(axis.title.y = element_blank()
, axis.text.y = element_blank()
, axis.ticks.y = element_blank()
)
})
# Put the plots together
plot_grid(plotlist = eachPlot
, rel_widths = c(1.4, rep(1, length(eachPlot)-1))
, nrow = 1
)

Resources