Fail to change the legend title and label with ggplot2 in R - r

I was trying to change the legend title from group to the Greek letter "sigma" and the label "power.1, power.2, power.3" to "35, 40, 45" but it did not appear and still shows the default name and label. Could you please help me with it? Thanks so much.
# Load the library and input the data
library(ggplot2)
library(tidyr)
n <- 2:10
control <- rep(150, 4)
infected <- c(150, 170, 200, 250)
all <- c(control, infected)
sigma <- c(35, 40, 45)
# Compute the population mean
mu <- mean(all)
# Compute the sum of the tau squared
tau2 <- sum((all-mu)^2)
# Compute the gamma
gamma.1 <- (n*tau2)/(sigma[1]^2)
gamma.2 <- (n*tau2)/(sigma[2]^2)
gamma.3 <- (n*tau2)/(sigma[3]^2)
# Compute the power
power.1 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.1)
power.2 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.2)
power.3 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.3)
data <- data.frame(n, power.1, power.2, power.3)
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_fill_discrete(name = expression(sigma), labels = c("35","40","45"))

Try this in the final part of your code. One lesson you can learn is that fill and color are different aesthetics. So, if you set color you must use scale_color_manual. Here the code:
#Code
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_color_discrete(name = expression(sigma), labels = c("35","40","45"))
Output:
Or you can also try with guides() which will produce the same output (But first option is more direct):
#Code 2
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_color_discrete(labels = c("35","40","45"))+
guides(color=guide_legend(title=expression(sigma)))

You should used:
scale_colour_discrete(name = expression(sigma), labels = c("35","40","45"))

Related

Combining two heatmaps with the variables next to each other

I'm trying to combine two heatmaps. I want var_a and var_x on the y axis with for example: var_a first and then var_x. I don't know if I should do this by changing the dataframe or combining them, or if I can do this in ggplot.
Below I have some example code and a drawing of what I want (since I don't know if I explained it right).
I hope someone has ideas how I can do this either in the dataframe or in ggplot!
Example code:
df_one <- data.frame(
vars = c("var_a", "var_b", "var_c"),
corresponding_vars = c("var_x", "var_y", "var_z"),
expression_organ_1_vars = c(5, 10, 20),
expression_organ_2_vars = c(50, 2, 10),
expression_organ_3_vars = c(5, 10, 3)
)
df_one_long <- pivot_longer(df_one,
cols=3:5,
names_to = "tissueType",
values_to = "Expression")
expression.df_one <- ggplot(df_one_long,
mapping = aes(y=tissueType, x=vars, fill = Expression)) +
geom_tile()
expression.df_one
df_two <- data.frame(
corresponding_vars = c("var_x", "var_y", "var_z"),
expression_organ_1_corresponding_vars = c(100, 320, 120),
expression_organ_2_corresponding_vars = c(23, 30, 150),
expression_organ_3_corresponding_vars = c(89, 7, 200)
)
df_two_long <- pivot_longer(df_one,
cols=3:5,
names_to = "tissueType",
values_to = "Expression")
expression.df_two <- ggplot(df_two_long,
mapping = aes(y=tissueType, x=vars, fill = Expression)) +
geom_tile()
expression.df_two
Drawing:
You can bind your data frames together and pivot into a longer format so that vars and corresponding vars are in the same column, but retain a grouping variable to facet by:
df_two %>%
mutate(cor = corresponding_vars) %>%
rename_with(~sub('corresponding_', '', .x)) %>%
bind_rows(df_one %>% rename(cor = corresponding_vars)) %>%
pivot_longer(contains('expression'), names_to = 'organ') %>%
mutate(organ = gsub('expression_|_vars', '', organ)) %>%
group_by(cor) %>%
summarize(vars = vars, organ = organ, value = value,
cor = paste(sort(unique(vars)), collapse = ' cor ')) %>%
ggplot(aes(vars, organ, fill = value)) +
geom_tile(color = 'white', linewidth = 1) +
facet_grid(.~cor, scales = 'free_x', switch = 'x') +
scale_fill_viridis_c() +
coord_cartesian(clip = 'off') +
scale_x_discrete(expand = c(0, 0)) +
theme_minimal(base_size = 16) +
theme(strip.placement = 'outside',
axis.text.x = element_blank(),
axis.ticks.x.bottom = element_line(),
panel.spacing.x = unit(3, 'mm'))
Okay, so I solved the issue for my own project, which is to convert it to a scatter plot. I combined both datasets and then used a simple scatterplot.
df.combined <- dplyr::full_join(df_two_long, df_one_long,
by = c("vars", "corresponding_vars", "tissueType"))
ggplot(df.combined,
aes(x=vars, y=tissueType, colour=Expression.x, size = Expression.y)) +
geom_point()
It's not a solution with heatmaps, but I don't know how to do that at the moment.

ggplot2 + ggrepel adding legend with label colours changes the colours itself

Assume the following data:
library(tidyverse)
library(ggrepel)
df <- data.frame(name = rep(letters[1:3], 3),
points = c(5, 3, 7, 12, 13, 14, 20, 30, 40),
time = rep(c("day 1", "day 2", "day 3"), each = 3))
df2 <- df %>%
group_by(name) %>%
mutate(points_sum = cumsum(points)) %>%
group_by(time) %>%
mutate(rank = rank(desc(points_sum), ties.method = "min")) %>%
ungroup() %>%
mutate(name_colour = case_when(rank == 1 ~ "#336600",
rank == 2 ~ "#339900",
rank == 3 ~ "#66ff33"))
I now want to draw th following plot, i.e. give the names/labels the colour specified in the name_colour column:
df2 %>%
ggplot(aes(x = time,
y = points_sum,
group = name,
label = name)) +
geom_point() +
geom_text_repel(direction = "y", size = 10, colour = df2$name_colour) +
theme_minimal()
However, this plot is missing a legend for these colours, i.e. I want to add a legend that has the ranks next to the according colour.
I'm not sure how I could manually add such a legend here. I tried to change my code above by the one below (only chenge in the second to last line), but this completely changes the colours of the labels:
df2 %>%
ggplot(aes(x = time,
y = points_sum,
group = name,
label = name)) +
geom_point() +
geom_text_repel(direction = "y", size = 10, aes(colour = name_colour)) +
theme_minimal()
Any ideas?
If you want to use the color codes from your dataframe then make use of scale_color_identity. By default this will not give you a legend so you have to add guide = guide_legend():
library(tidyverse)
library(ggrepel)
df <- data.frame(
name = rep(letters[1:3], 3),
points = c(5, 3, 7, 12, 13, 14, 20, 30, 40),
time = rep(c("day 1", "day 2", "day 3"), each = 3)
)
df2 <- df %>%
group_by(name) %>%
mutate(points_sum = cumsum(points)) %>%
group_by(time) %>%
mutate(rank = rank(desc(points_sum), ties.method = "min")) %>%
ungroup() %>%
mutate(name_colour = case_when(
rank == 1 ~ "#336600",
rank == 2 ~ "#339900",
rank == 3 ~ "#66ff33"
))
df2 %>%
ggplot(aes(
x = time,
y = points_sum,
group = name,
label = name
)) +
geom_point() +
geom_text_repel(aes(color = name_colour), direction = "y", size = 10) +
scale_color_identity(labels = c("A", "B", "C"), guide = guide_legend()) +
theme_minimal()
In general, I think a more typical ggplot approach would be to specify the colours in scale_colour_manual or equivalent, rather than coding them into the data frame itself. For example:
library(ggplot2)
library(dplyr)
library(ggrepel)
data.frame(
name = rep(letters[1:3], 3),
points = c(5, 3, 7, 12, 13, 14, 20, 30, 40),
time = rep(c("day 1", "day 2", "day 3"), each = 3)
) %>%
group_by(name) %>%
mutate(points_sum = cumsum(points)) %>%
group_by(time) %>%
mutate(rank = factor(rank(desc(points_sum), ties.method = "min"))) %>%
ungroup() %>%
ggplot(aes(
x = time,
y = points_sum,
group = name,
label = name)) +
geom_point() +
geom_text_repel(direction = "y", size = 10, aes(colour = rank)) +
theme_minimal() +
scale_colour_manual(
values = c("1" = "#336600", "2" = "#339900", "3" = "#66ff33")
)

Add regression line with geom_smooth to plot with discrete x-axis in R

I would like to add a regression line from a linear model to a plot in R.
I have created the following sample dataset:
# Load libraries
library(tidyverse)
library(ggbeeswarm)
# Set seed
set.seed(123)
# Create dataset
ID <- sprintf("ID-%s",seq(1:30))
baseline <- rnorm(30, mean = 50, sd = 3)
df <- data.frame(ID, baseline) %>%
mutate(`1` = baseline - rnorm(1, mean = 5, sd = 4),
`2` = `1` - rnorm(1, mean = 7, sd = 5),
`3` = `2` - rnorm(1, mean = 10, sd = 9)) %>%
pivot_longer(-ID) %>%
rename(time = name) %>%
mutate(time = as.factor(time))
Which creates the following plot:
# Plot
ggplot(data = df, aes(x=time, y = value)) +
geom_quasirandom() +
theme_classic() +
scale_x_discrete(limits = c("baseline", "1", "2", "3") ) +
labs(x = "Time", y = "Value")
If I add geom_smooth(method = 'lm') to the plot, nothing happens and I think it has something to do with the fact that time is a factor. However, adding geom_smooth(method = 'lm', formula = y~as.numeric(x) also doesn't work.
How do I plot a regression line on top of this graph?
EDIT1.0:
I have been able to use geom_smooth to plot a regression line with geom_smooth(aes(x = as.numeric(time), y = value), method = "lm", formula = y~x), but sadly, the regression line is incorrect...
How about using ordered factor to enable overlay with aes(as.numeric(time), value)?
# Load libraries
library(tidyverse)
library(ggbeeswarm)
# Set seed
set.seed(123)
# Create dataset
ID <- sprintf("ID-%s",seq(1:30))
baseline <- rnorm(30, mean = 50, sd = 3)
df <- data.frame(ID, baseline) %>%
mutate(`1` = baseline - rnorm(1, mean = 5, sd = 4),
`2` = `1` - rnorm(1, mean = 7, sd = 5),
`3` = `2` - rnorm(1, mean = 10, sd = 9)) %>%
pivot_longer(-ID) %>%
rename(time = name) %>%
# create ordered factor to allow synchronized order of x after as.numeric
mutate(time = factor(time, ordered = T, c("baseline", "1", "2", "3")))
## rendered results
ggplot(data = df, aes(x=time, y = value)) +
geom_quasirandom() +
theme_classic() +
labs(x = "Time", y = "Value") +
geom_smooth(aes(as.numeric(time), value), method = "lm")
## verify with this
ggplot(data = df, aes(x=time, y = value)) +
geom_point() +
theme_classic() +
labs(x = "Time", y = "Value") +
geom_smooth(aes(as.numeric(time), value), method = "lm")
Created on 2020-04-15 by the reprex package (v0.3.0)
Try this. One option to add a regression line is to compute it manually and add it to the plot via geom_line and mapping ID on the group aesthetic. The red line is the regression line for value ~ time while in case of the blue line I recoded time as a numeric, setting "baseline" to 0.
# Load libraries
library(tidyverse)
library(ggbeeswarm)
# Set seed
set.seed(123)
# Create dataset
ID <- sprintf("ID-%s",seq(1:30))
baseline <- rnorm(30, mean = 50, sd = 3)
df <- data.frame(ID, baseline) %>%
mutate(`1` = baseline - rnorm(1, mean = 5, sd = 4),
`2` = `1` - rnorm(1, mean = 7, sd = 5),
`3` = `2` - rnorm(1, mean = 10, sd = 9)) %>%
pivot_longer(-ID) %>%
rename(time = name) %>%
mutate(time = as.factor(time),
time1 = ifelse(time == "baseline", 0, as.numeric(time)),
smooth = predict(lm(value ~ time, data = .)),
smooth1 = predict(lm(value ~ time1, data = .)))
# Plot
ggplot(data = df, aes(x=time, y = value)) +
geom_quasirandom() +
# Regression line
geom_line(aes(x = time, y = smooth, group = ID), color = "red") +
geom_line(aes(x = time, y = smooth1, group = ID), color = "blue") +
theme_classic() +
scale_x_discrete(limits = c("baseline", "1", "2", "3") ) +
labs(x = "Time", y = "Value")
Created on 2020-04-15 by the reprex package (v0.3.0)

How to fix 'Error in summarise_impl(.data, dots) : Evaluation error: zero non-NA points' error in R?

I'm trying to replicate this visualization (for practice).
The issue I am facing is this error that says
Error in summarise_impl(.data, dots) : Evaluation error: zero non-NA points.
I can't seem to figure out what's going on.
I've tried removing the est_alive_today variable, and it generated a plot, but the data looks quite off (so it appears that the variable is important in the accurate generation of the plot).
library(babynames)
library(Hmisc)
library(ggplot2)
BabynamesDist <- make_babynames_dist()
data("babynames")
com_fem <- na.omit(com_fem)
com_fem <- BabynamesDist %>%
filter(sex == "F") %>%
group_by(name) %>%
summarise(N = n(),
est_num_alive = sum(est_alive_today),
q1_age = wtd.quantile(age_today, est_alive_today, probs = 0.25),
median_age = wtd.quantile(age_today, est_alive_today, probs = 0.5),
q3_age = wtd.quantile(age_today, est_alive_today, probs = 0.75)) %>%
arrange(desc(est_num_alive)) %>%
head(25)
w_plot <- ggplot(data = com_fem,
aes(x = reorder(name, -median_age), y = median_age)) +
xlab(NULL) + ylab("Age (in years)") +
ggtitle("Median ages for females with the 25 most common names")
w_plot <- w_plot +
geom_linerange(aes(ymin = q1_age, ymax = q3_age),
color = "#f3d478", size = 10, alpha = 0.8)
w_plot <- w_plot +
geom_point(fill = "#ed3324", colour = "white", size = 4, shape = 21)
w_plot +
geom_point(aes(y = 55, x = 24),
fill = "#ed3324", colour = "white", size = 4, shape = 21) +
geom_text(aes(y = 58, x = 24, label = "median")) +
geom_text(aes(y = 26, x = 16, label = "25th")) +
geom_text(aes(y = 51, x = 16, label = "75th percentile")) +
geom_point(aes(y = 24, x = 16), shape = 17) +
geom_point(aes(y = 56, x = 16), shape = 17) +
coord_flip()
I should be getting a plot (not quite a clone of the one I put a picture link of above), but somewhat close enough.
My actual result is the error:
Error in summarise_impl(.data, dots) : Evaluation error: zero non-NA points.
What can I do?
Use this code for the com_fem assigment
com_fem <- BabynamesDist %>%
filter(sex == "F") %>%
group_by(name) %>%
summarise(
N = n(),
est_num_alive = sum(est_alive_today)
) %>%
arrange(desc(est_num_alive)) %>%
head(25) %>%
select(name) %>%
left_join(., BabynamesDist, by = "name") %>%
group_by(name) %>%
summarise(
N = n(),
est_num_alive = sum(est_alive_today),
q1_age = wtd.quantile(age_today, weight = est_alive_today, probs = 0.25),
median_age = wtd.quantile(age_today, weight = est_alive_today, probs = 0.5),
q3_age = wtd.quantile(age_today, weight = est_alive_today, probs = 0.75)
)
Props to this site: http://www.sci.csueastbay.edu/~esuess/classes/Statistics_651/Presentations/03_ggplot2_02/ggplot2_02_examples.Rmd

ggplot2: geom_violin(), geom_text() with facet_grid() to print total number of rows in each factor excluding NA

In the violin plot below, I want to add total number of rows used to draw each plot excluding NA values.
Input:
df <- cbindX(as.data.frame(rep(c(rep("trt", 4*500), rep("trt2",4*500)),2)),
as.data.frame(rnorm(15*500,2)),
as.data.frame(c(rep("A", 8*500), rep("B", 8*500))))
colnames(df) <- c("variable", "value", "mark")
code:
ggplot(df,aes(x=variable,y=value)) + geom_violin(trim = T) + geom_text(aes(x = variable, y = -2, label=nrow(df)),color="red")
Output:
Expected output:
This should help you:
library(dplyr)
count<-df %>% filter(!is.na(value)) %>%
group_by(variable) %>%
summarise(n=n()) %>%
as.data.frame
# variable n
# 1 trt 4000
# 2 trt2 3500
ggplot(df,aes(x=variable,y=value)) + geom_violin(trim = T) +
geom_text(data=count,aes(x = variable, y = -2, label=n),color="red")
would this workout for you
ggplot(df,aes(x=variable,y=value)) + geom_violin(trim = T) + annotate("text", label = "4000", x =1, y = -3, size = 10, colour = "black") + annotate("text", label = "3500", x =2, y = -3, size = 10, colour = "black")

Resources