ggplot2 + ggrepel adding legend with label colours changes the colours itself - r

Assume the following data:
library(tidyverse)
library(ggrepel)
df <- data.frame(name = rep(letters[1:3], 3),
points = c(5, 3, 7, 12, 13, 14, 20, 30, 40),
time = rep(c("day 1", "day 2", "day 3"), each = 3))
df2 <- df %>%
group_by(name) %>%
mutate(points_sum = cumsum(points)) %>%
group_by(time) %>%
mutate(rank = rank(desc(points_sum), ties.method = "min")) %>%
ungroup() %>%
mutate(name_colour = case_when(rank == 1 ~ "#336600",
rank == 2 ~ "#339900",
rank == 3 ~ "#66ff33"))
I now want to draw th following plot, i.e. give the names/labels the colour specified in the name_colour column:
df2 %>%
ggplot(aes(x = time,
y = points_sum,
group = name,
label = name)) +
geom_point() +
geom_text_repel(direction = "y", size = 10, colour = df2$name_colour) +
theme_minimal()
However, this plot is missing a legend for these colours, i.e. I want to add a legend that has the ranks next to the according colour.
I'm not sure how I could manually add such a legend here. I tried to change my code above by the one below (only chenge in the second to last line), but this completely changes the colours of the labels:
df2 %>%
ggplot(aes(x = time,
y = points_sum,
group = name,
label = name)) +
geom_point() +
geom_text_repel(direction = "y", size = 10, aes(colour = name_colour)) +
theme_minimal()
Any ideas?

If you want to use the color codes from your dataframe then make use of scale_color_identity. By default this will not give you a legend so you have to add guide = guide_legend():
library(tidyverse)
library(ggrepel)
df <- data.frame(
name = rep(letters[1:3], 3),
points = c(5, 3, 7, 12, 13, 14, 20, 30, 40),
time = rep(c("day 1", "day 2", "day 3"), each = 3)
)
df2 <- df %>%
group_by(name) %>%
mutate(points_sum = cumsum(points)) %>%
group_by(time) %>%
mutate(rank = rank(desc(points_sum), ties.method = "min")) %>%
ungroup() %>%
mutate(name_colour = case_when(
rank == 1 ~ "#336600",
rank == 2 ~ "#339900",
rank == 3 ~ "#66ff33"
))
df2 %>%
ggplot(aes(
x = time,
y = points_sum,
group = name,
label = name
)) +
geom_point() +
geom_text_repel(aes(color = name_colour), direction = "y", size = 10) +
scale_color_identity(labels = c("A", "B", "C"), guide = guide_legend()) +
theme_minimal()

In general, I think a more typical ggplot approach would be to specify the colours in scale_colour_manual or equivalent, rather than coding them into the data frame itself. For example:
library(ggplot2)
library(dplyr)
library(ggrepel)
data.frame(
name = rep(letters[1:3], 3),
points = c(5, 3, 7, 12, 13, 14, 20, 30, 40),
time = rep(c("day 1", "day 2", "day 3"), each = 3)
) %>%
group_by(name) %>%
mutate(points_sum = cumsum(points)) %>%
group_by(time) %>%
mutate(rank = factor(rank(desc(points_sum), ties.method = "min"))) %>%
ungroup() %>%
ggplot(aes(
x = time,
y = points_sum,
group = name,
label = name)) +
geom_point() +
geom_text_repel(direction = "y", size = 10, aes(colour = rank)) +
theme_minimal() +
scale_colour_manual(
values = c("1" = "#336600", "2" = "#339900", "3" = "#66ff33")
)

Related

How to plot a grouped geom_bar plot having a dataframe?

I have a dataframe like this one:
and want to plot in R something like this:
But for some reason I am really struggling with the grouped geom_bar code...
Can you help me please?
We may use barplot from base R
barplot(t(df1), beside = TRUE, col = c("blue", "orange", "grey",
"yellow", "lightblue" ))
-output
Or if we need a ggplot/plotly
library(ggplot2)
library(dplyr)
library(tidyr)
library(plotly)
library(tibble)
p <- df1 %>%
rownames_to_column('rn') %>%
pivot_longer(cols = -rn) %>%
ggplot(aes(x = rn, y = value, fill = name)) +
geom_col(position = 'dodge') +
theme_bw()
ggplotly(p)
-output
data
df1 <- structure(list(A = c(65, 9, 7, 70, 9), B = c(23, 4, 5, 53, 2),
C = c(42, 5, 2, 17, 7), D = c(51, 7, 5, 57, 5), E = c(14,
2, 2, 13, 4)), class = "data.frame", row.names = c("AAA",
"BBB", "CCC", "DDD", "EEE"))
Here's a solution using the tidyverse package that contains ggplot2 and tidyr packages. Additionally, this answer includes plotting the numbers as text on top of the bars.
library(tidyverse)
df1 %>%
# Convert rownames to a new column named rowname
rownames_to_column() %>%
# Put data into long format
pivot_longer(cols = -rowname,
names_to = "letter") %>%
# Build plot
ggplot(aes(x = rowname, y = value, fill = letter)) +
# Draw column plot and set position = "dodge" so every column
# starts from 0 value
geom_col(position = "dodge", width = 0.9) +
# Add text, sum 2 to value so the label is plotted on top of the column
geom_text(aes(y = value+2, label = value),
position = position_dodge(width = 0.9)) +
# Add bw theme or any other
theme_bw()

Is there a way to order factors with facet wrap?

Am trying to do a graph but want to order the factor variable based on given values. It seems the plot does not mirror what I want. I would the languages be ordered based on the meanscore. Any ideas?
library(tidyverse)
set.seed(200) # reproducibility
df <- tibble(
language = gl(4, 10, labels = c("Python", "R", "Javascipt", "Excel")),
gender = factor(ifelse(sign(rnorm(40))==-1, 0, 1), labels = c("Male", "Female")),
score = floor(runif(40, 25, 80))
)
df <- df %>% group_by(gender, language) %>%
summarise(meanscore = mean(score))
df %>%
mutate(language = fct_reorder(language, meanscore)) %>%
ggplot(aes(language, meanscore, fill = gender)) +
geom_col() +
facet_wrap(~gender) +
coord_flip()
I believe this is what you want? Utilizing the reorder_within from the package tidytext.
library(tidytext)
set.seed(200)
df <- tibble(
language = gl(4, 10, labels = c("Python", "R", "Javascipt", "Excel")),
gender = factor(ifelse(sign(rnorm(40))==-1, 0, 1), labels = c("Male", "Female")),
score = floor(runif(40, 25, 80))
)
df <- df %>% group_by(gender, language) %>%
summarise(meanscore = mean(score))
ggplot(df, aes(reorder_within(language, meanscore, gender), meanscore, fill = gender)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_x_reordered() +
facet_wrap(gender ~., scales = "free")

Fail to change the legend title and label with ggplot2 in R

I was trying to change the legend title from group to the Greek letter "sigma" and the label "power.1, power.2, power.3" to "35, 40, 45" but it did not appear and still shows the default name and label. Could you please help me with it? Thanks so much.
# Load the library and input the data
library(ggplot2)
library(tidyr)
n <- 2:10
control <- rep(150, 4)
infected <- c(150, 170, 200, 250)
all <- c(control, infected)
sigma <- c(35, 40, 45)
# Compute the population mean
mu <- mean(all)
# Compute the sum of the tau squared
tau2 <- sum((all-mu)^2)
# Compute the gamma
gamma.1 <- (n*tau2)/(sigma[1]^2)
gamma.2 <- (n*tau2)/(sigma[2]^2)
gamma.3 <- (n*tau2)/(sigma[3]^2)
# Compute the power
power.1 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.1)
power.2 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.2)
power.3 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.3)
data <- data.frame(n, power.1, power.2, power.3)
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_fill_discrete(name = expression(sigma), labels = c("35","40","45"))
Try this in the final part of your code. One lesson you can learn is that fill and color are different aesthetics. So, if you set color you must use scale_color_manual. Here the code:
#Code
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_color_discrete(name = expression(sigma), labels = c("35","40","45"))
Output:
Or you can also try with guides() which will produce the same output (But first option is more direct):
#Code 2
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_color_discrete(labels = c("35","40","45"))+
guides(color=guide_legend(title=expression(sigma)))
You should used:
scale_colour_discrete(name = expression(sigma), labels = c("35","40","45"))

Time series connected scatter plot in R (as image attached)

I want to plot these data as connected temporal scatter kind of plot in R. Any hint/code would be greatly helpful. I can do with single but not with time series one.
See if you can apply this to your data.
I've assumed that it is not important that the years are treated as 'time' data for the purposes of graphical representation.
library(tidyr)
library(dplyr)
library(stringr)
library(ggplot2)
library(ggrepel)
pop <- tibble(city = c("Delhi", "Madurai", "Cape Town", "Jo Burg"),
`1990` = c(10, 12, 5, 6),
`2000` = c(13, 17, 7, 8),
`2015` = c(20, 24, 7.5, 9),
km2_1990 = c(12, 15, 2, 4),
km2_2000 = c(13, 14, 3, 4),
km2_2015 = c(15, 16, 5, 6))
cont <- tibble(city = c("Delhi", "Madurai", "Cape Town", "Jo Burg"),
cont = c("Asia", "Asia", "Africa", "Africa"))
tib <-
pop %>%
pivot_longer(cols = `1990`:km2_2015, names_to = "year", values_to = "val") %>%
mutate(type = case_when(str_detect(year, "km") ~ "area",
TRUE ~ "pop"),
year = as.numeric(str_remove(year, "km2_"))) %>%
pivot_wider(names_from = type, values_from = val) %>%
left_join(cont) %>%
group_by(year, cont) %>%
summarise(area = sum(area),
pop = sum(pop)) %>%
group_by(cont) %>%
mutate(lab_cont = case_when(pop == max(pop)~cont,
TRUE ~ ""))
ggplot(tib, aes(area, pop, label = lab_cont))+
geom_line(aes(group = cont), colour = "red")+
geom_point(fill = "red", colour = "red", shape = 22, size = 4)+
geom_text_repel(aes(label = year))+
geom_text_repel(nudge_x = 1, size = 5)
This gives you the following graph, you can adjust arguments as required to pretty up the appearance to suit your purposes:
Transform your data like this
Then, plot using ggplot2 package.
mydata %>%
ggplot(aes(x=bsurf, y=pop, group=city, color=city, dlabel=year)) + geom_point(,size = 2)+
geom_line(size=1)+ geom_text(label=mydata$year, vjust = 1.2, nudge_y = 0.5)+
ggtitle("Dummy Title", subtitle = "Dummy") + xlab("Population") + ylab("Total Built-up Surface")

How can I create this chart in R using ggplot2?

I am using R in RStudio and I have the following data frame.
df1 <- data.frame(
comp = c("A", "B", "C", "D", "E", "F"),
Q2_2018 = c(27, 10, 6, 4, 3, 2),
Q2_2019 = c(31, 12, 8, 6, 5, 4))
I would like to create a chart (from the above data) like the one shown below (excluding the Amazon logo).
I am mostly stuck at drawing the circles with the % changes.
So far,
library(ggplot2)
library(reshape2)
library(magrittr)
melt(df1, id.vars = "comp") %>%
ggplot(aes(x= comp, y=value, fill=variable)) + geom_bar(stat = "identity", position = "dodge")
Can it be done with ggplot2?
Most of the way:
library(tidyverse)
df1 %>%
gather(year, val, -comp) %>%
group_by(comp) %>%
mutate(change = val / lag(val) - 1) %>%
mutate(change_lab = if_else(!is.na(change),
scales::percent(change,
accuracy = 1,
prefix = if_else(change > 0, "+", "-")),
NA_character_)) %>%
ungroup() %>%
ggplot(aes(comp, val, fill = year, label = val)) +
geom_col(position = position_dodge()) +
geom_text(position = position_dodge(width = 1), vjust = -0.5) +
geom_point(aes(comp, val + 5, size = change), color = "lightgreen") +
geom_text(aes(comp, val+5, label = change_lab)) +
scale_size_area(max_size = 30) +
guides(size= F) +
theme_classic()

Resources