How can I organize my legend into subgroups? - r

The legend for my bar graph currently lists all the items in the graph in one long list. I would like to have the legend group itself by each column.
The number of columns is dynamic so the legend must be able to adjust accordingly.
library("phyloseq"); packageVersion("phyloseq")
library(ggplot2)
library(scales)
data("GlobalPatterns")
TopNOTUs <- names(sort(taxa_sums(GlobalPatterns), TRUE)[1:50])
gp.ch <- prune_species(TopNOTUs, GlobalPatterns)
gp.ch = subset_taxa(gp.ch, Genus != "NA")
mdf = psmelt(gp.ch)
# Create a ggplot similar to
library("ggplot2")
mdf$group <- paste0(mdf$Phylum, "-", mdf$Genus, sep = "")
colours <-ColourPalleteMulti(mdf, "Phylum", "Genus")
# Plot resultss
ggplot(mdf, aes(Phylum)) +
geom_bar(aes(fill = group), colour = "grey", position = "stack")
Right now the legend prints the items:
Actinobacteria-Bifidobacterium
Actinobacteria-Rothia
Bacteriodetes-Alistipes
Bacteriodetes-Bacteroides
...
I would like it to print:
Actinobacteria
-Bifidobacterium
-Rothia
Bacteriodetes
-Alistipes
-Bacteroides
...

This is hacky but might work for you. First, using mtcars dataset, I add dummy rows to the data representing the groupings, then assign a factor level to each of the groupings and component categories. Finally, I hack the alpha in the legend so that grouping headers have transparent colors and look hidden.
# Fake data sample
library(tidyverse)
cars_sample <- mtcars %>%
rownames_to_column(var = "name") %>%
mutate(make = word(name, end = 1),
model = word(name, start = 2, end = -1)) %>%
filter(make %in% c("Mazda", "Merc", "Hornet")) %>%
select(name, make, model, mpg, wt)
# Add rows for groups and make a factor for each group and each component
cars_sample_fct <- cars_sample %>%
bind_rows( cars_sample %>% count(make) %>% mutate(model = make, name = "")) %>%
arrange(make, name) %>%
mutate(name_fct = fct_inorder(if_else(name == "", make, paste0("- ", model))))
# Plot with transparent grouping legend labels
ggplot(cars_sample_fct, aes(wt, mpg, color = name_fct)) +
geom_point() +
scale_color_discrete(name = "Car") +
guides(color = guide_legend(
override.aes = list(size = 5,
alpha = cars_sample_fct$name != "")))

Related

In R/ggplot2 how to combine labeller (renaming plots) and factor (changing order of plots) in a facet_wrap?

I have a ggplot that I use facet_wrap on. I changed the lables of the individual plots with labeller according to this guide:
# New facet label names for dose variable
dose.labs <- c("D0.5", "D1", "D2")
names(dose.labs) <- c("0.5", "1", "2")
# New facet label names for supp variable
supp.labs <- c("Orange Juice", "Vitamin C")
names(supp.labs) <- c("OJ", "VC")
# Create the plot
p + facet_grid(
dose ~ supp,
labeller = labeller(dose = dose.labs, supp = supp.labs)
)
I now also want to change the order of the plots. For this I referred to this post.
But the solution for the renaming and relabelling provided in the comment by glenn_in_boston didn't work for me. I have tried their solution only, I have tried playing around with the labeller I used before I had the desire to reorder my plots.
I can apply whatever solution someone has with a dummy data frame, no need to use my (very extensive) data frame. But if it helps, this is what I have without reordering the plots:
v1.labs <- c("T/trans", "T/transmann, T/trans-Frau", "T/transgendermann, T/transgender-Frau", "...transsexue...", "...Transsexue...", "transgender", "Transgender", "transident...", "...Transident...", "...T/transgeschlechtlich...", "...T/transvestit...", "...T/transe(n)", "...T/tranny/ies, ...S/shemale...", "...T/travo...")
names(v1.labs) <- c("d_trans", "d_transX", "d_transgenderX", "d_transsexue", "d_Transsexue", "d_transgender", "d_Transgender", "d_transident", "d_Transident", "d_transgeschlechtlich", "d_transvestit", "d_transe", "d_tranny", "d_travo")
qy %>%
pivot_longer(c(d_trans, d_transX, d_transgenderX , d_transsexue, d_Transsexue, d_transgender, d_Transgender, d_transident, d_Transident, d_transgeschlechtlich, d_transvestit, d_transe, d_tranny, d_travo), names_to ="words", values_to = "d") %>%
ggplot(aes(x = year, y = d)) +
geom_line(aes(color = words), show.legend = FALSE) +
labs(title = "Sprachwandel: Vergleich 2",
subtitle = "Personenbezeichnungen",
x = "Jahr",
y = "Wortdichte pro Artikel in Prozent",
linetype = "Worte")+
facet_wrap(~words, labeller = labeller(words = v1.labs))
And what I would like the order to be:
levels = c("d_Transgender", "d_transgender", "d_transgenderX", "d_Transident", "d_Transsexue", "d_transsexue", "d_transX", "d_transident", "d_transvestit", "d_travo", "d_trans", "d_transgeschlechtlich", "d_transe", "d_tranny")
You can do this in ggplot2 using the labeller function as you've specified, by using factor(levels=) to reorder the variable in your desired order:
library(tidyverse)
cyls = c('four','six','eight')
names(cyls) = c('4','6','8')
mtcars %>%
mutate(cyl = factor(cyl, levels = c(6,4,8))) %>%
ggplot(aes(x = hp, y = mpg)) +
geom_point() +
facet_wrap(~cyl, labeller = as_labeller(cyls))
Note you can also avoid all the labeller difficulties by just bringing in a properly-labeled version of the variable instead:
newnames <- data.frame(cyl = c(4,6,8), cyl_label = c('four','six','eight'))
mtcars <- mtcars %>%
inner_join(newnames, by = 'cyl')
and then facet using cyl_label instead of cyl

Plotting multiple plots with two discrete variables - how to include all discrete variables in both axes

I have a dataset that looks like this:
test<-data.frame("M"=c("a","b","c","a","b","b","c","a","b","c"),
"N"=c(1,3,4,6,6,7,7,8,8,8),
"X"=c(0,1,0,1,1,0,1,0,1,1),
"Y"=c(1,1,0,0,1,0,1,1,1,0))
I'm making a simple plot where I want X and Y on the y axis, M on the x axis, each grid colored if the value of X or Y is 1 and empty if the value of X or Y is 0. I'm repeating this for each categories in N (the categories of N are 1 to 5, 6, 7, 8), then stacking all plots together. Right now, I'm doing this with the following code.
test <- test[order(test$N),]
test1 <- test[c(1:3),]
test2 <- test[c(4:5),]
test3 <- test[c(6:7),]
test4 <- test[c(8:10),] # I'm doing this to "separate" categories of `N` manually
p1 <- test1[,c(1,3:4)] %>%
gather(col_name, value, -M) %>%
ggplot(aes(factor(M), col_name, fill = value == 1))+
geom_tile(colour = 'black')+
scale_fill_manual(values = c('FALSE' = 'white', 'TRUE' = 'red'))
p2 <- test2[,c(1,3:4)] %>%
gather(col_name, value, -M) %>%
ggplot(aes(factor(M), col_name, fill = value == 1))+
geom_tile(colour = 'black')+
scale_fill_manual(values = c('FALSE' = 'white', 'TRUE' = 'yellow'))
p3 <- test3[,c(1,3:4)] %>%
gather(col_name, value, -M) %>%
ggplot(aes(factor(M), col_name, fill = value == 1))+
geom_tile(colour = 'black')+
scale_fill_manual(values = c('FALSE' = 'white', 'TRUE' = 'green'))
p4 <- test4[,c(1,3:4)] %>%
gather(col_name, value, -M) %>%
ggplot(aes(factor(M), col_name, fill = value == 1))+
geom_tile(colour = 'black')+
scale_fill_manual(values = c('FALSE' = 'white', 'TRUE' = 'blue'))
grid.arrange(p1, p2, p3, p4, ncol = 1)
I'm attaching an image of what I have right now. I want to fix these plots so that I would have the same factors of M for all four plots (right now, only p1 and p4 have all three factors (a, b and c) in the x axis but I want to add factor c to p2 and a to p3 so that all x axes are identical to each other. Can anyone give me suggestions on how to do this?
(Also, I'm suspecting that the current way I'm plotting things is probably not the most quickest/easiest way to go, if anyone has suggestions on how to improve things it'd be really helpful!)
To continue using grid.arrange(), instead of facet_wrap(), do the following:
Make M a factor:
test$M <- factor(test$M)
Add the following to each of your plots:
scale_x_discrete(limits = levels(test$M))
Maybe one approach I can suggest you is using facets after applying a smart trick to group your values and avoid splitting in different dataframes. Here the code as an option for you (The colors will be the same across the facets in base of TRUE/FALSE values):
library(tidyverse)
#Code
test %>% mutate(Var=lead(N)) %>%
mutate(Diff=Var-N,Diff=ifelse(row_number()==1,0,Diff)) %>%
mutate(Group=ifelse(Diff==0,N,NA)) %>%
fill(Group) %>% select(-c(N,Var,Diff)) %>%
group_by(Group) %>% mutate(NG=paste0('p',cur_group_id())) %>% ungroup() %>%
select(-Group) %>%
pivot_longer(cols = -c(NG,M)) %>%
ggplot(aes(factor(M), name, fill = value == 1,group=value))+
geom_tile(colour = 'black')+
facet_wrap(.~NG,ncol = 1)+
scale_fill_manual('value',values=c('tomato','cyan3'))+
xlab('M')
Output:
The othe option would be patchwork with a customized function:
library(tidyverse)
library(patchwork)
#Code
data <- test %>% mutate(Var=lead(N)) %>%
mutate(Diff=Var-N,Diff=ifelse(row_number()==1,0,Diff)) %>%
mutate(Group=ifelse(Diff==0,N,NA)) %>%
fill(Group) %>% select(-c(N,Var,Diff)) %>%
group_by(Group) %>% mutate(NG=paste0('p',cur_group_id())) %>% ungroup() %>%
select(-Group) %>%
mutate(M=factor(M,levels = unique(M),ordered = T)) %>%
pivot_longer(cols = -c(NG,M))
#List
List <- split(data,data$NG)
#Function
myfun <- function(x)
{
#Test for color
val <- unique(x$NG)
#Conditioning for color
if(val=='p1') {vcolor=c('FALSE' = 'white', 'TRUE' = 'red')} else
if(val=='p2') {vcolor=c('FALSE' = 'white', 'TRUE' = 'yellow')} else
if(val=='p3') {vcolor=c('FALSE' = 'white', 'TRUE' = 'green')} else
{vcolor=c('FALSE' = 'white', 'TRUE' = 'blue')}
#Update data
x <- x %>% mutate(M=factor(M,levels = c('a','b','c'),ordered = T)) %>% complete(M=M)
#Plot
G <- ggplot(x,aes(factor(M), name, fill = (value == 1 & !is.na(value))))+
geom_tile(colour = 'black')+
scale_fill_manual('value',values=vcolor)+
xlab('M')+
scale_y_discrete(limits=c('X','Y'))+
theme_bw()+
ggtitle(val)
return(G)
}
#Apply
Lplot <- lapply(List,myfun)
#Wrap
GF <- wrap_plots(Lplot,ncol = 1)
Output:
Something like this?
test<-data.frame("M"=c("a","b","c","a","b","b","c","a","b","c"),
"N"=c(1,3,4,6,6,7,7,8,8,8),
"X"=c(0,1,0,1,1,0,1,0,1,1),
"Y"=c(1,1,0,0,1,0,1,1,1,0))
library(tidyverse)
test = mutate(test, N2 = cut(N, breaks = c(0,5:100)))
m = pivot_longer(test, c(X, Y))
ggplot(m, aes(M, name,fill=factor(value))) +
geom_tile(colour = 'black') +
facet_wrap(~N2, scales = 'free') +
scale_fill_manual(values = c(`0` = 'white', `1` = 'red'))

ggpubr not creating multiple bars in ggdotchart

Utilizing the example package code in ggpubr, the ggdotchart function does not create separate segments as is shown in the example, instead there is only a single segment, though the dots seem to be placed in the correct orientation. Does anyone have any tips on what the problem may be? I've thought it may be due to factors, tibbles vs. df, but I haven't been able to determine the problem.
Code:
df <- diamonds %>%
filter(color %in% c("J", "D")) %>%
group_by(cut, color) %>%
summarise(counts = n())
ggdotchart(df, x = "cut", y ="counts",
color = "color", palette = "jco", size = 3,
add = "segment",
add.params = list(color = "lightgray", size = 1.5),
position = position_dodge(0.3),
ggtheme = theme_pubclean()
)
With the expected output of:
But instead I am getting:
Here is a way to get your desired plot without ggpubr::ggdotchart. The issue seems to be that geom_segment does not allow dodging, as discussed here: R - ggplot dodging geom_lines and here: how to jitter/dodge geom_segments so they remain parallel?.
# your data
df <- diamonds %>%
filter(color %in% c("J", "D")) %>%
group_by(cut, color) %>%
summarise(counts = n())
The first step is to expand your data. We will need this when we call geom_line which allows for dodging. I took this idea from #Stibu's answer. We create a copy of df and change the counts column to be 0 in df2. Finally we use bind_rows to create a single data frame from df and df2.
df2 <- df
df2$counts <- 0
df_out <- purrr::bind_rows(df, df2)
df_out
Then I use ggplot to create / replicate your desired output.
ggplot(df_out, aes(x = cut, y = counts)) +
geom_line(
aes(col = color), # needed for dodging, we'll later change colors to "lightgrey"
position = position_dodge(width = 0.3),
show.legend = FALSE,
size = 1.5
) +
geom_point(
aes(fill = color),
data = subset(df_out, counts > 0),
col = "transparent",
shape = 21,
size = 3,
position = position_dodge(width = 0.3)
) +
scale_color_manual(values = c("lightgray", "lightgray")) + #change line colors
ggpubr::fill_palette(palette = "jco") +
ggpubr::theme_pubclean()
There is an extra "group" argument you need!
df <- diamonds %>%
dplyr::filter(color %in% c("J", "D")) %>%
dplyr::group_by(cut, color) %>%
dplyr::summarise(counts = n())
ggdotchart(df, x = "cut", y ="counts",
color = "color", group="color", # here it is
palette = "jco", size = 3,
add = "segment",
add.params = list(color = "lightgray", size = 1.5),
position = position_dodge(0.3),
ggtheme = theme_pubclean()
)

Stacked bar graphs in plotly: how to control the order of bars in each stack

I'm trying to order a stacked bar chart in plotly, but it is not respecting the order I pass it in the data frame.
It is best shown using some mock data:
library(dplyr)
library(plotly)
cars <- sapply(strsplit(rownames(mtcars), split = " "), "[", i = 1)
dat <- mtcars
dat <- cbind(dat, cars, stringsAsFactors = FALSE)
dat <- dat %>%
mutate(carb = factor(carb)) %>%
distinct(cars, carb) %>%
select(cars, carb, mpg) %>%
arrange(carb, desc(mpg))
plot_ly(dat) %>%
add_trace(data = dat, type = "bar", x = carb, y = mpg, color = cars) %>%
layout(barmode = "stack")
The resulting plot doesn't respect the ordering, I want the cars with the largest mpg stacked at the bottom of each cylinder group. Any ideas?
As already pointed out here, the issue is caused by having duplicate values in the column used for color grouping (in this example, cars). As indicated already, the ordering of the bars can be remedied by grouping your colors by a column of unique names. However, doing so will have a couple of undesired side-effects:
different model cars from the same manufacturer would be shown with different colors (not what you are after - you want to color by manufacturer)
the legend will have more entries in it than you want i.e. one per model of car rather than one per manufacturer.
We can hack our way around this by a) creating the legend from a dummy trace that never gets displayed (add_trace(type = "bar", x = 0, y = 0... in the code below), and b) setting the colors for each category manually using the colors= argument. I use a rainbow pallette below to show the principle. You may like to select sme more attractive colours yourself.
dat$unique.car <- make.unique(as.character(dat$cars))
dat2 <- data.frame(cars=levels(as.factor(dat$cars)),color=rainbow(nlevels(as.factor(dat$cars))))
dat2[] <- lapply(dat2, as.character)
dat$color <- dat2$color[match(dat$cars,dat2$cars)]
plot_ly() %>%
add_trace(data=dat2, type = "bar", x = 0, y = 0, color = cars, colors=color, showlegend=T) %>%
add_trace(data=dat, type = "bar", x = carb, y = mpg, color = unique.car, colors=color, showlegend=F, marker=list(line=list(color="black", width=1))) %>%
layout(barmode = "stack", xaxis = list(range=c(0.4,8.5)))
One way to address this is to give unique names to all models of car and use that in plotly, but it's going to make the legend messier and impact the color mapping. Here are a few options:
dat$carsID <- make.unique(as.character(dat$cars))
# dat$carsID <- apply(dat, 1, paste0, collapse = " ") # alternative
plot_ly(dat) %>%
add_trace(data = dat, type = "bar", x = carb, y = mpg, color = carsID) %>%
layout(barmode = "stack")
plot_ly(dat) %>%
add_trace(data = dat, type = "bar", x = carb, y = mpg, color = carsID,
colors = rainbow(length(unique(carsID)))) %>%
layout(barmode = "stack")
I'll look more tomorrow to see if I can improve the legend and color mapping.

ggplot/GGally - Parallel Coordinates - y-axis labels

Does anyone know if there is a way to add variable labels to the ggparcoord function in GGally? I've tried numerous ways with geom_text, but nothing is yielding results.
To be more explicit, I am looking to pass the row.names(mtcars) through geom_text. The only way that I can distinguish the car is passing row.names(mtcars) through the groupColumn argument, but I don't like the way this looks.
Doesn't work:
mtcars$carName <- row.names(mtcars) # This becomes column 12
library(GGally)
# Attempt 1
ggparcoord(mtcars,
columns = c(12, 1, 6),
groupColumn = 1) +
geom_text(aes(label = carName))
# Attempt 2
ggparcoord(mtcars,
columns = c(12, 1, 6),
groupColumn = 1,
mapping = aes(label = carName))
Any ideas would be appreciated!
Solution 1: If you want to stick close to your original attempt, you can calculate the appropriate y coordinates for the car names, & add that as a separate data source. Use inherit.aes = FALSE so that this geom_text layer doesn't inherit anything from the ggplot object created using ggparcoord():
library(dplyr)
p1 <- ggparcoord(mtcars,
columns = c(12, 1, 6),
groupColumn = 1) +
geom_text(data = mtcars %>%
select(carName) %>%
mutate(x = 1,
y = scale(as.integer(factor(carName)))),
aes(x = x, y = y, label = carName),
hjust = 1.1,
inherit.aes = FALSE) +
# optional: remove "carName" from x-axis labels
scale_x_discrete(labels = function(x) c("", x[-1])) +
# also optional: hide legend, which doesn't really seem relevant here
theme(legend.position = "none")
p1
Solution 2: This alternative uses carName as the group column, & doesn't pass it as one of the parallel coordinate columns. (which I think this might be closer to the use cases intended by this function...) Specifying carName as the group column allows the car name values to be captured in the data slot of the ggplot object created by ggparcoord() this time, so our geom_text label can inherit it directly, & even filter only for rows corresponding to variable == "mpg" (or whatever the first of the parallel coordinate columns is named, in the actual use case). The y coordinates are not as evenly spread out as above, but geom_text_repel from the ggrepel package does a decent job at shifting overlapping text labels away from one another.
library(dplyr)
library(ggrepel)
p2 <- ggparcoord(mtcars,
columns = c(1, 6),
groupColumn = "carName") +
geom_text_repel(data = . %>%
filter(variable == "mpg"),
aes(x = variable, y = value, label = carName),
xlim = c(NA, 1)) + # limit repel region to the left of the 1st column
theme(legend.position = "none") # as before, hide legend since the labels
# are already in the plot
p2
Solution 3 / 4: You can actually plot the same with ggplot(), without relying on extensions that may do unexpected stuff behind the scenes:
library(dplyr)
library(tidyr)
library(ggrepel)
# similar output to solution 1
p3 <- mtcars %>%
select(carName, mpg, wt) %>%
mutate(carName.column = as.integer(factor(carName))) %>%
gather(variable, value, -carName) %>%
group_by(variable) %>%
mutate(value = scale(value)) %>%
ungroup() %>%
ggplot(aes(x = variable, y = value, label = carName, group = carName)) +
geom_line() +
geom_text(data = . %>% filter(variable == "carName.column"),
hjust = 1.1) +
scale_x_discrete(labels = function(x) c("", x[-1]))
p3
# similar output to solution 2
p4 <- mtcars %>%
select(carName, mpg, wt) %>%
gather(variable, value, -carName) %>%
group_by(variable) %>%
mutate(value = scale(value)) %>%
ungroup() %>%
ggplot(aes(x = variable, y = value, label = carName, group = carName)) +
geom_line() +
geom_text_repel(data = . %>% filter(variable == "mpg"),
xlim = c(NA, 1))
p4
Edit
You can add text labels on the right as well, for each of the above. Do note that the location for labels may not be nicely spaced out, since they are positioned according to wt's scaled values:
p1 +
geom_text(data = mtcars %>%
select(carName, wt) %>%
mutate(x = 3,
y = scale(wt)),
aes(x = x, y = y, label = carName),
hjust = -0.1,
inherit.aes = FALSE)
p2 +
geom_text_repel(data = . %>%
filter(variable == "wt"),
aes(x = variable, y = value, label = carName),
xlim = c(2, NA))
p3 +
geom_text(data = . %>% filter(variable == "wt"),
hjust = -0.1)
p4 +
geom_text_repel(data = . %>% filter(variable == "wt"),
xlim = c(2, NA))

Resources