Is there a way to order factors with facet wrap? - r

Am trying to do a graph but want to order the factor variable based on given values. It seems the plot does not mirror what I want. I would the languages be ordered based on the meanscore. Any ideas?
library(tidyverse)
set.seed(200) # reproducibility
df <- tibble(
language = gl(4, 10, labels = c("Python", "R", "Javascipt", "Excel")),
gender = factor(ifelse(sign(rnorm(40))==-1, 0, 1), labels = c("Male", "Female")),
score = floor(runif(40, 25, 80))
)
df <- df %>% group_by(gender, language) %>%
summarise(meanscore = mean(score))
df %>%
mutate(language = fct_reorder(language, meanscore)) %>%
ggplot(aes(language, meanscore, fill = gender)) +
geom_col() +
facet_wrap(~gender) +
coord_flip()

I believe this is what you want? Utilizing the reorder_within from the package tidytext.
library(tidytext)
set.seed(200)
df <- tibble(
language = gl(4, 10, labels = c("Python", "R", "Javascipt", "Excel")),
gender = factor(ifelse(sign(rnorm(40))==-1, 0, 1), labels = c("Male", "Female")),
score = floor(runif(40, 25, 80))
)
df <- df %>% group_by(gender, language) %>%
summarise(meanscore = mean(score))
ggplot(df, aes(reorder_within(language, meanscore, gender), meanscore, fill = gender)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_x_reordered() +
facet_wrap(gender ~., scales = "free")

Related

Combining two heatmaps with the variables next to each other

I'm trying to combine two heatmaps. I want var_a and var_x on the y axis with for example: var_a first and then var_x. I don't know if I should do this by changing the dataframe or combining them, or if I can do this in ggplot.
Below I have some example code and a drawing of what I want (since I don't know if I explained it right).
I hope someone has ideas how I can do this either in the dataframe or in ggplot!
Example code:
df_one <- data.frame(
vars = c("var_a", "var_b", "var_c"),
corresponding_vars = c("var_x", "var_y", "var_z"),
expression_organ_1_vars = c(5, 10, 20),
expression_organ_2_vars = c(50, 2, 10),
expression_organ_3_vars = c(5, 10, 3)
)
df_one_long <- pivot_longer(df_one,
cols=3:5,
names_to = "tissueType",
values_to = "Expression")
expression.df_one <- ggplot(df_one_long,
mapping = aes(y=tissueType, x=vars, fill = Expression)) +
geom_tile()
expression.df_one
df_two <- data.frame(
corresponding_vars = c("var_x", "var_y", "var_z"),
expression_organ_1_corresponding_vars = c(100, 320, 120),
expression_organ_2_corresponding_vars = c(23, 30, 150),
expression_organ_3_corresponding_vars = c(89, 7, 200)
)
df_two_long <- pivot_longer(df_one,
cols=3:5,
names_to = "tissueType",
values_to = "Expression")
expression.df_two <- ggplot(df_two_long,
mapping = aes(y=tissueType, x=vars, fill = Expression)) +
geom_tile()
expression.df_two
Drawing:
You can bind your data frames together and pivot into a longer format so that vars and corresponding vars are in the same column, but retain a grouping variable to facet by:
df_two %>%
mutate(cor = corresponding_vars) %>%
rename_with(~sub('corresponding_', '', .x)) %>%
bind_rows(df_one %>% rename(cor = corresponding_vars)) %>%
pivot_longer(contains('expression'), names_to = 'organ') %>%
mutate(organ = gsub('expression_|_vars', '', organ)) %>%
group_by(cor) %>%
summarize(vars = vars, organ = organ, value = value,
cor = paste(sort(unique(vars)), collapse = ' cor ')) %>%
ggplot(aes(vars, organ, fill = value)) +
geom_tile(color = 'white', linewidth = 1) +
facet_grid(.~cor, scales = 'free_x', switch = 'x') +
scale_fill_viridis_c() +
coord_cartesian(clip = 'off') +
scale_x_discrete(expand = c(0, 0)) +
theme_minimal(base_size = 16) +
theme(strip.placement = 'outside',
axis.text.x = element_blank(),
axis.ticks.x.bottom = element_line(),
panel.spacing.x = unit(3, 'mm'))
Okay, so I solved the issue for my own project, which is to convert it to a scatter plot. I combined both datasets and then used a simple scatterplot.
df.combined <- dplyr::full_join(df_two_long, df_one_long,
by = c("vars", "corresponding_vars", "tissueType"))
ggplot(df.combined,
aes(x=vars, y=tissueType, colour=Expression.x, size = Expression.y)) +
geom_point()
It's not a solution with heatmaps, but I don't know how to do that at the moment.

How to plot two overlappling histograms like this?

The data is
y0
y1
M
100
200
F
50
250
How to plot the histogram like this? Note that M and F do not block each other, so this is not the case in How to plot two histograms together in R. Thanks in advance.
First, convert your data to long format with pivot_longer().
library(ggplot2)
library(tidyr)
df_long <- pivot_longer(df, cols = c("y0","y1"))
ggplot(data = df_long) +
geom_col(aes(x = name, y = value, fill = sex)) +
scale_fill_manual(values = c("M" = "blue", "F" = "darkorange")) +
theme(legend.position = "bottom")
data:
df <- data.frame(sex = c("M","F"),
y0 = c(100,50),
y1 = c(200,250))
Here's a straight solution:
library(tidyverse)
my_df <- tribble(~ sex, ~ y0, ~ y1,
"M", 100, 200,
"F", 50, 250)
my_df %>%
pivot_longer(starts_with("y")) %>%
ggplot(aes(name, value, fill = sex)) +
geom_col(position = "stack")
If your data is like df below:
library(tidyverse)
df <- tibble::tribble(
~V1, ~y0, ~y1,
"M", 100L, 200L,
"F", 50L, 250L
)
df %>%
pivot_longer(-V1) %>%
ggplot(aes(x = name, y = value, fill = V1)) +
geom_bar(stat = 'identity')
Which gives:

Making multi-group line plot with many observations more readable

I have created the following plot:
From a bigger version (5 rows, 58 columns) of this df:
df <- data.frame(row.names = c("ROBERT", "FRANK", "MICHELLE", "KATE"), `1` = c(31, 87, 22, 12), `2` = c(37, 74, 33, 20), `3` = c(35, 32, 44, 14))
colnames(df) <- c("1", "2", "3")
In the following manner:
df = df %>%
rownames_to_column("Name") %>%
as.data.frame()
df <- melt(df , id.vars = 'Name', variable.name = 'ep')
ggplot(df, aes(ep,value)) + geom_line(aes(colour = Name, group=Name))
The plot kind of shows what I'd like to, but it really is a mess. Does anyone have a suggestion that would help me increasing its readability?
Any help is very much appreciated!
Here are a few options for visualizing lots of datapoints across a smallish number of cases. These are illustrated with a subset of the txhousing data included with ggplot2.
Solution 1: Faceting
As #rdelrossi suggested, one solution is to facet by Name:
library(ggplot2)
ggplot(df, aes(ep,value)) +
geom_line(aes(colour = Name, group=Name), show.legend = FALSE) +
scale_x_continuous(expand = c(0,0)) +
facet_wrap(vars(Name), ncol = 1, scales = "free_x") +
theme_bw()
Solution 2: Smoothing
Use geom_smooth() to smooth out local fluctuations to see larger longer-term trends:
ggplot(df, aes(ep,value)) +
geom_smooth(
aes(colour = Name, group=Name),
se = FALSE,
span = 1, # higher number = smoother
size = 1.25
) +
scale_x_date(expand = c(0,0)) +
theme_bw()
Solution 3: Lasagna
Sometimes called a "lasagna plot," this is a heatmap with cases on the y axis, time (or whatever) on the x axis, and values mapped to color. It's a different way of comparing changes within (left to right) and between (up and down) individuals.
ggplot(df, aes(ep, Name, colour = value, fill = value)) +
geom_tile(size = .5) +
scale_fill_viridis_c(option = "B", aesthetics = c("colour", "fill")) +
coord_cartesian(expand = FALSE) +
theme(
axis.text.y = element_text(size = 12, face = "bold"),
axis.title.y = element_blank()
)
(may want to click through to larger image)
Data prep:
library(dplyr)
library(lubridate)
df <- txhousing %>%
filter(
city %in% c("Beaumont", "Amarillo", "Arlington", "Corpus Christi", "El Paso"),
between(year, 2004, 2012)
) %>%
group_by(city) %>%
mutate(
Name = city,
value = scale(sales),
ep = ym(str_c(year, month))
) %>%
ungroup()
If your readability concern is just the x axis labels, then I think the main issue is that when you use reshape2::melt() the result is that the column ep is a factor which means that the x axis of your plot will show all the levels and get crowded. The solution is to convert it to numeric and then it will adjust the labels in a sensible way.
I replace your use of reshape2::melt() with tidyr::pivot_longer() which has superseded it within the {tidyverse} but your original code would still work.
library(tidyverse)
df <- structure(list(`1` = c(31, 87, 22, 12), `2` = c(37, 74, 33, 20), `3` = c(35, 32, 44, 14)), class = "data.frame", row.names = c("ROBERT", "FRANK", "MICHELLE", "KATE"))
df %>%
rownames_to_column("Name") %>%
pivot_longer(-Name, names_to = "ep") %>%
mutate(ep = as.numeric(ep)) %>%
ggplot(aes(ep, value, color = Name)) +
geom_line()
Created on 2022-03-07 by the reprex package (v2.0.1)
Another solution could be the use of a geom_bar()
Sample code:
ggplot(df, aes(fill=Name)) +
geom_bar(aes(x=ep, y=value, group=Name),stat="identity", position = position_dodge(width = 0.9)) +
labs(x="ep", y="count")+
scale_y_continuous(expand=c(0,0))+
theme_bw()
Plot:
Also you can add facet_grid(~Name)+
Also you can add
geom_text(aes(label=value), position = position_stack(vjust = .5))+

Fail to change the legend title and label with ggplot2 in R

I was trying to change the legend title from group to the Greek letter "sigma" and the label "power.1, power.2, power.3" to "35, 40, 45" but it did not appear and still shows the default name and label. Could you please help me with it? Thanks so much.
# Load the library and input the data
library(ggplot2)
library(tidyr)
n <- 2:10
control <- rep(150, 4)
infected <- c(150, 170, 200, 250)
all <- c(control, infected)
sigma <- c(35, 40, 45)
# Compute the population mean
mu <- mean(all)
# Compute the sum of the tau squared
tau2 <- sum((all-mu)^2)
# Compute the gamma
gamma.1 <- (n*tau2)/(sigma[1]^2)
gamma.2 <- (n*tau2)/(sigma[2]^2)
gamma.3 <- (n*tau2)/(sigma[3]^2)
# Compute the power
power.1 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.1)
power.2 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.2)
power.3 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.3)
data <- data.frame(n, power.1, power.2, power.3)
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_fill_discrete(name = expression(sigma), labels = c("35","40","45"))
Try this in the final part of your code. One lesson you can learn is that fill and color are different aesthetics. So, if you set color you must use scale_color_manual. Here the code:
#Code
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_color_discrete(name = expression(sigma), labels = c("35","40","45"))
Output:
Or you can also try with guides() which will produce the same output (But first option is more direct):
#Code 2
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_color_discrete(labels = c("35","40","45"))+
guides(color=guide_legend(title=expression(sigma)))
You should used:
scale_colour_discrete(name = expression(sigma), labels = c("35","40","45"))

How can I create this chart in R using ggplot2?

I am using R in RStudio and I have the following data frame.
df1 <- data.frame(
comp = c("A", "B", "C", "D", "E", "F"),
Q2_2018 = c(27, 10, 6, 4, 3, 2),
Q2_2019 = c(31, 12, 8, 6, 5, 4))
I would like to create a chart (from the above data) like the one shown below (excluding the Amazon logo).
I am mostly stuck at drawing the circles with the % changes.
So far,
library(ggplot2)
library(reshape2)
library(magrittr)
melt(df1, id.vars = "comp") %>%
ggplot(aes(x= comp, y=value, fill=variable)) + geom_bar(stat = "identity", position = "dodge")
Can it be done with ggplot2?
Most of the way:
library(tidyverse)
df1 %>%
gather(year, val, -comp) %>%
group_by(comp) %>%
mutate(change = val / lag(val) - 1) %>%
mutate(change_lab = if_else(!is.na(change),
scales::percent(change,
accuracy = 1,
prefix = if_else(change > 0, "+", "-")),
NA_character_)) %>%
ungroup() %>%
ggplot(aes(comp, val, fill = year, label = val)) +
geom_col(position = position_dodge()) +
geom_text(position = position_dodge(width = 1), vjust = -0.5) +
geom_point(aes(comp, val + 5, size = change), color = "lightgreen") +
geom_text(aes(comp, val+5, label = change_lab)) +
scale_size_area(max_size = 30) +
guides(size= F) +
theme_classic()

Resources