I am using R in RStudio and I have the following data frame.
df1 <- data.frame(
comp = c("A", "B", "C", "D", "E", "F"),
Q2_2018 = c(27, 10, 6, 4, 3, 2),
Q2_2019 = c(31, 12, 8, 6, 5, 4))
I would like to create a chart (from the above data) like the one shown below (excluding the Amazon logo).
I am mostly stuck at drawing the circles with the % changes.
So far,
library(ggplot2)
library(reshape2)
library(magrittr)
melt(df1, id.vars = "comp") %>%
ggplot(aes(x= comp, y=value, fill=variable)) + geom_bar(stat = "identity", position = "dodge")
Can it be done with ggplot2?
Most of the way:
library(tidyverse)
df1 %>%
gather(year, val, -comp) %>%
group_by(comp) %>%
mutate(change = val / lag(val) - 1) %>%
mutate(change_lab = if_else(!is.na(change),
scales::percent(change,
accuracy = 1,
prefix = if_else(change > 0, "+", "-")),
NA_character_)) %>%
ungroup() %>%
ggplot(aes(comp, val, fill = year, label = val)) +
geom_col(position = position_dodge()) +
geom_text(position = position_dodge(width = 1), vjust = -0.5) +
geom_point(aes(comp, val + 5, size = change), color = "lightgreen") +
geom_text(aes(comp, val+5, label = change_lab)) +
scale_size_area(max_size = 30) +
guides(size= F) +
theme_classic()
Related
I have below df that I have 2 labels, A and B. And I want the bar chart for A to start from 0 to 2, for B to start from 3 to 6. How can I do that? If the df needs to be wrangled to do this, it's fine as well.
df <- data.frame(labels = c("A", "A", "B", "B"), values = c(0, 2, 3,6))
ggplot(df, aes(x = labels, y = values, fill = labels, colour = labels)) +
geom_bar(stat = "identity")
One option to achieve your desired result would be to make use of geom_rect which involves some data wrangling to get the data into the right shape:
library(ggplot2)
library(dplyr)
library(tidyr)
df <- data.frame(labels = c("A", "A", "B", "B"), values = c(0, 2, 3, 6))
df <- df %>%
group_by(labels) %>%
arrange(values) %>%
mutate(id = row_number()) %>%
ungroup() %>%
pivot_wider(names_from = id, values_from = values) %>%
rename(ymin = 2, ymax = 3) %>%
mutate(xmin = as.numeric(factor(labels)) - .45,
xmax = as.numeric(factor(labels)) + .45)
ggplot(df, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = labels, colour = labels)) +
geom_rect(stat = "identity") +
scale_x_continuous(breaks = 1:2, labels = c("A", "B"))
Assume the following data:
library(tidyverse)
library(ggrepel)
df <- data.frame(name = rep(letters[1:3], 3),
points = c(5, 3, 7, 12, 13, 14, 20, 30, 40),
time = rep(c("day 1", "day 2", "day 3"), each = 3))
df2 <- df %>%
group_by(name) %>%
mutate(points_sum = cumsum(points)) %>%
group_by(time) %>%
mutate(rank = rank(desc(points_sum), ties.method = "min")) %>%
ungroup() %>%
mutate(name_colour = case_when(rank == 1 ~ "#336600",
rank == 2 ~ "#339900",
rank == 3 ~ "#66ff33"))
I now want to draw th following plot, i.e. give the names/labels the colour specified in the name_colour column:
df2 %>%
ggplot(aes(x = time,
y = points_sum,
group = name,
label = name)) +
geom_point() +
geom_text_repel(direction = "y", size = 10, colour = df2$name_colour) +
theme_minimal()
However, this plot is missing a legend for these colours, i.e. I want to add a legend that has the ranks next to the according colour.
I'm not sure how I could manually add such a legend here. I tried to change my code above by the one below (only chenge in the second to last line), but this completely changes the colours of the labels:
df2 %>%
ggplot(aes(x = time,
y = points_sum,
group = name,
label = name)) +
geom_point() +
geom_text_repel(direction = "y", size = 10, aes(colour = name_colour)) +
theme_minimal()
Any ideas?
If you want to use the color codes from your dataframe then make use of scale_color_identity. By default this will not give you a legend so you have to add guide = guide_legend():
library(tidyverse)
library(ggrepel)
df <- data.frame(
name = rep(letters[1:3], 3),
points = c(5, 3, 7, 12, 13, 14, 20, 30, 40),
time = rep(c("day 1", "day 2", "day 3"), each = 3)
)
df2 <- df %>%
group_by(name) %>%
mutate(points_sum = cumsum(points)) %>%
group_by(time) %>%
mutate(rank = rank(desc(points_sum), ties.method = "min")) %>%
ungroup() %>%
mutate(name_colour = case_when(
rank == 1 ~ "#336600",
rank == 2 ~ "#339900",
rank == 3 ~ "#66ff33"
))
df2 %>%
ggplot(aes(
x = time,
y = points_sum,
group = name,
label = name
)) +
geom_point() +
geom_text_repel(aes(color = name_colour), direction = "y", size = 10) +
scale_color_identity(labels = c("A", "B", "C"), guide = guide_legend()) +
theme_minimal()
In general, I think a more typical ggplot approach would be to specify the colours in scale_colour_manual or equivalent, rather than coding them into the data frame itself. For example:
library(ggplot2)
library(dplyr)
library(ggrepel)
data.frame(
name = rep(letters[1:3], 3),
points = c(5, 3, 7, 12, 13, 14, 20, 30, 40),
time = rep(c("day 1", "day 2", "day 3"), each = 3)
) %>%
group_by(name) %>%
mutate(points_sum = cumsum(points)) %>%
group_by(time) %>%
mutate(rank = factor(rank(desc(points_sum), ties.method = "min"))) %>%
ungroup() %>%
ggplot(aes(
x = time,
y = points_sum,
group = name,
label = name)) +
geom_point() +
geom_text_repel(direction = "y", size = 10, aes(colour = rank)) +
theme_minimal() +
scale_colour_manual(
values = c("1" = "#336600", "2" = "#339900", "3" = "#66ff33")
)
I was trying to change the legend title from group to the Greek letter "sigma" and the label "power.1, power.2, power.3" to "35, 40, 45" but it did not appear and still shows the default name and label. Could you please help me with it? Thanks so much.
# Load the library and input the data
library(ggplot2)
library(tidyr)
n <- 2:10
control <- rep(150, 4)
infected <- c(150, 170, 200, 250)
all <- c(control, infected)
sigma <- c(35, 40, 45)
# Compute the population mean
mu <- mean(all)
# Compute the sum of the tau squared
tau2 <- sum((all-mu)^2)
# Compute the gamma
gamma.1 <- (n*tau2)/(sigma[1]^2)
gamma.2 <- (n*tau2)/(sigma[2]^2)
gamma.3 <- (n*tau2)/(sigma[3]^2)
# Compute the power
power.1 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.1)
power.2 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.2)
power.3 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.3)
data <- data.frame(n, power.1, power.2, power.3)
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_fill_discrete(name = expression(sigma), labels = c("35","40","45"))
Try this in the final part of your code. One lesson you can learn is that fill and color are different aesthetics. So, if you set color you must use scale_color_manual. Here the code:
#Code
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_color_discrete(name = expression(sigma), labels = c("35","40","45"))
Output:
Or you can also try with guides() which will produce the same output (But first option is more direct):
#Code 2
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_color_discrete(labels = c("35","40","45"))+
guides(color=guide_legend(title=expression(sigma)))
You should used:
scale_colour_discrete(name = expression(sigma), labels = c("35","40","45"))
I am trying to overlay two plots with different variable treament (from the same dataset) :
- the first one with a mutate and reorder (to make a geom_boxplot + geom_jitter) ;
- the second one with a group_by and summarize (to make a geom_line). Both of them have to be overlayed.
When I try the following code, this Error is given : Aesthetics must be either length 1 or the same as the data (4): label
Local <- c("A", "B", "C", "D", "A", "B", "C", "D")
Case <- c("QQ", "DD", "GG", "PP", "QQ", "DD", "GG", "PP")
Div <- c(2, 4, 5, 1, 3, 5, 6, 7)
dat <- data.frame(Local, Case, Div)
p1 <- dat %>%
mutate(Loc = reorder(Local, Div, FUN = median)) %>%
ggplot(aes(Loc, Div, label = Case)) +
geom_boxplot(outlier.size = -1) +
geom_jitter(width = 0.1, alpha = 1, aes(color = Case, size = Div)) +
geom_text_repel()
dat %>%
group_by(Local) %>%
summarise(Div = mean(Div)) %>%
mutate(Loc = reorder(Local, Div, FUN = median)) %>%
ggplot(aes(Loc, Div, group = 1)) +
geom_line()
p1 + geom_line (data = dat %>%
group_by(Local) %>%
summarise(Div = mean(Div)) %>%
mutate(Loc = reorder(Local, Div, FUN = median)), aes(Loc, Div, group = 1))
The first plot gives :
And the second one :
But how to overlay them ?
datBox <- dat %>%
mutate(Loc = reorder(Local, Div, FUN = median))
datLine <- dat %>%
group_by(Local) %>%
summarise(Div = mean(Div)) %>%
mutate(Loc = reorder(Local, Div, FUN = median)) %>%
mutate(LocNum = recode(Loc, A = "1", D="2", B="3", C="4"))
ggplot(data = datBox, aes(Loc, Div)) +
geom_boxplot(outlier.size = -1) +
geom_jitter(width = 0.1, alpha = 1, aes(color = Case, size = Div)) +
geom_text_repel(aes(label = Case)) +
geom_line(data =datLine, aes(as.numeric(LocNum),Div))
I made a secondary axis with numeric values corresponding to the order of the Loc factor. geom_line does not appreciate the factor in axis. There is propably more elegant solutions. Also I inserted label in the geom_text_repel aes
Data <- data.frame(Time = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
Group = c("A", "B", "C", "A", "B", "C", "A", "B", "C"),
Value = c(20, 10, 15, 20, 20, 20, 30, 25, 35))
I have three Groups with Values at three different points in Time.
library(ggplot2)
library(gganimate)
p <- ggplot(Data, aes(Group, Value)) +
geom_col(position = "identity") +
geom_text(aes(label = Value), vjust = -1) +
coord_cartesian(ylim = c(0, 40)) +
transition_time(Time)
p
The above code produces the animation for the transformation of the bars quite well, but the change in the geom_text leaves much to be desired, as the geom_text tweens/transitions with >10 decimal places. Ideally I want the geom_text numeric values to remain as an integer whilst transitioning, or some way to control the degree of rounding.
Edit: Changing Value to an integer type doesn't help.
You can try to calculate the transitions by your own beforehand...
library(gganimate)
library(tidyverse)
Data2 <- Data %>%
group_by(Group) %>%
arrange(Group) %>%
mutate(diff = c((Value - lag(Value))[-1],0))
Seq <- seq(1,3,0.01)
library(gganimate)
tibble(Time_new=rep(Seq,3), Group = rep(LETTERS[1:3], each = length(Seq))) %>%
mutate(Time=as.numeric(str_sub(as.character(Time_new),1,1))) %>%
left_join(Data2) %>%
group_by(Group, Time) %>%
mutate(diff = cumsum(diff/n())) %>%
mutate(Value2 = Value + diff) %>%
mutate(new_old = ifelse(Time == Time_new, 2, 1)) %>%
ggplot(aes(Group, Value2)) +
geom_col(position = "identity") +
geom_text(aes(label = sprintf("%1.2f",Value2)), vjust = -1) +
coord_cartesian(ylim = c(0, 40)) +
transition_manual(Time_new)
or try geom_text(aes(label = round(Value2,2)), vjust = -1)
There is a very elegant, general solution provided by the package author, which is
..just put "Year: {as.integer(frame_time)}" as your title
From here