How to overlay two plots with different variable treatment? - r

I am trying to overlay two plots with different variable treament (from the same dataset) :
- the first one with a mutate and reorder (to make a geom_boxplot + geom_jitter) ;
- the second one with a group_by and summarize (to make a geom_line). Both of them have to be overlayed.
When I try the following code, this Error is given : Aesthetics must be either length 1 or the same as the data (4): label
Local <- c("A", "B", "C", "D", "A", "B", "C", "D")
Case <- c("QQ", "DD", "GG", "PP", "QQ", "DD", "GG", "PP")
Div <- c(2, 4, 5, 1, 3, 5, 6, 7)
dat <- data.frame(Local, Case, Div)
p1 <- dat %>%
mutate(Loc = reorder(Local, Div, FUN = median)) %>%
ggplot(aes(Loc, Div, label = Case)) +
geom_boxplot(outlier.size = -1) +
geom_jitter(width = 0.1, alpha = 1, aes(color = Case, size = Div)) +
geom_text_repel()
dat %>%
group_by(Local) %>%
summarise(Div = mean(Div)) %>%
mutate(Loc = reorder(Local, Div, FUN = median)) %>%
ggplot(aes(Loc, Div, group = 1)) +
geom_line()
p1 + geom_line (data = dat %>%
group_by(Local) %>%
summarise(Div = mean(Div)) %>%
mutate(Loc = reorder(Local, Div, FUN = median)), aes(Loc, Div, group = 1))
The first plot gives :
And the second one :
But how to overlay them ?

datBox <- dat %>%
mutate(Loc = reorder(Local, Div, FUN = median))
datLine <- dat %>%
group_by(Local) %>%
summarise(Div = mean(Div)) %>%
mutate(Loc = reorder(Local, Div, FUN = median)) %>%
mutate(LocNum = recode(Loc, A = "1", D="2", B="3", C="4"))
ggplot(data = datBox, aes(Loc, Div)) +
geom_boxplot(outlier.size = -1) +
geom_jitter(width = 0.1, alpha = 1, aes(color = Case, size = Div)) +
geom_text_repel(aes(label = Case)) +
geom_line(data =datLine, aes(as.numeric(LocNum),Div))
I made a secondary axis with numeric values corresponding to the order of the Loc factor. geom_line does not appreciate the factor in axis. There is propably more elegant solutions. Also I inserted label in the geom_text_repel aes

Related

ggplot bar chart (geom_bar) to use start and end point from df

I have below df that I have 2 labels, A and B. And I want the bar chart for A to start from 0 to 2, for B to start from 3 to 6. How can I do that? If the df needs to be wrangled to do this, it's fine as well.
df <- data.frame(labels = c("A", "A", "B", "B"), values = c(0, 2, 3,6))
ggplot(df, aes(x = labels, y = values, fill = labels, colour = labels)) +
geom_bar(stat = "identity")
One option to achieve your desired result would be to make use of geom_rect which involves some data wrangling to get the data into the right shape:
library(ggplot2)
library(dplyr)
library(tidyr)
df <- data.frame(labels = c("A", "A", "B", "B"), values = c(0, 2, 3, 6))
df <- df %>%
group_by(labels) %>%
arrange(values) %>%
mutate(id = row_number()) %>%
ungroup() %>%
pivot_wider(names_from = id, values_from = values) %>%
rename(ymin = 2, ymax = 3) %>%
mutate(xmin = as.numeric(factor(labels)) - .45,
xmax = as.numeric(factor(labels)) + .45)
ggplot(df, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = labels, colour = labels)) +
geom_rect(stat = "identity") +
scale_x_continuous(breaks = 1:2, labels = c("A", "B"))

How can I create this chart in R using ggplot2?

I am using R in RStudio and I have the following data frame.
df1 <- data.frame(
comp = c("A", "B", "C", "D", "E", "F"),
Q2_2018 = c(27, 10, 6, 4, 3, 2),
Q2_2019 = c(31, 12, 8, 6, 5, 4))
I would like to create a chart (from the above data) like the one shown below (excluding the Amazon logo).
I am mostly stuck at drawing the circles with the % changes.
So far,
library(ggplot2)
library(reshape2)
library(magrittr)
melt(df1, id.vars = "comp") %>%
ggplot(aes(x= comp, y=value, fill=variable)) + geom_bar(stat = "identity", position = "dodge")
Can it be done with ggplot2?
Most of the way:
library(tidyverse)
df1 %>%
gather(year, val, -comp) %>%
group_by(comp) %>%
mutate(change = val / lag(val) - 1) %>%
mutate(change_lab = if_else(!is.na(change),
scales::percent(change,
accuracy = 1,
prefix = if_else(change > 0, "+", "-")),
NA_character_)) %>%
ungroup() %>%
ggplot(aes(comp, val, fill = year, label = val)) +
geom_col(position = position_dodge()) +
geom_text(position = position_dodge(width = 1), vjust = -0.5) +
geom_point(aes(comp, val + 5, size = change), color = "lightgreen") +
geom_text(aes(comp, val+5, label = change_lab)) +
scale_size_area(max_size = 30) +
guides(size= F) +
theme_classic()

GGanimate: geom_text with numeric values animates at decimal numbers instead of integers

Data <- data.frame(Time = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
Group = c("A", "B", "C", "A", "B", "C", "A", "B", "C"),
Value = c(20, 10, 15, 20, 20, 20, 30, 25, 35))
I have three Groups with Values at three different points in Time.
library(ggplot2)
library(gganimate)
p <- ggplot(Data, aes(Group, Value)) +
geom_col(position = "identity") +
geom_text(aes(label = Value), vjust = -1) +
coord_cartesian(ylim = c(0, 40)) +
transition_time(Time)
p
The above code produces the animation for the transformation of the bars quite well, but the change in the geom_text leaves much to be desired, as the geom_text tweens/transitions with >10 decimal places. Ideally I want the geom_text numeric values to remain as an integer whilst transitioning, or some way to control the degree of rounding.
Edit: Changing Value to an integer type doesn't help.
You can try to calculate the transitions by your own beforehand...
library(gganimate)
library(tidyverse)
Data2 <- Data %>%
group_by(Group) %>%
arrange(Group) %>%
mutate(diff = c((Value - lag(Value))[-1],0))
Seq <- seq(1,3,0.01)
library(gganimate)
tibble(Time_new=rep(Seq,3), Group = rep(LETTERS[1:3], each = length(Seq))) %>%
mutate(Time=as.numeric(str_sub(as.character(Time_new),1,1))) %>%
left_join(Data2) %>%
group_by(Group, Time) %>%
mutate(diff = cumsum(diff/n())) %>%
mutate(Value2 = Value + diff) %>%
mutate(new_old = ifelse(Time == Time_new, 2, 1)) %>%
ggplot(aes(Group, Value2)) +
geom_col(position = "identity") +
geom_text(aes(label = sprintf("%1.2f",Value2)), vjust = -1) +
coord_cartesian(ylim = c(0, 40)) +
transition_manual(Time_new)
or try geom_text(aes(label = round(Value2,2)), vjust = -1)
There is a very elegant, general solution provided by the package author, which is
..just put "Year: {as.integer(frame_time)}" as your title
From here

Add count as label to points in geom_count

I used geom_count to visualise overlaying points as sized groups, but I also want to add the actual count as a label to the plotted points, like this:
However, to achieve this, I had to create a new data frame containing the counts and use these data in geom_text as shown here:
#Creating two data frames
data <- data.frame(x = c(2, 2, 2, 2, 3, 3, 3, 3, 3, 4),
y = c(1, 2, 2, 2, 2, 2, 3, 3, 3, 3),
id = c("a", "b", "b", "b", "c",
"c", "d", "d", "d", "e"))
data2 <- data %>%
group_by(id) %>%
summarise(x = mean(x), y = mean(y), count = n())
# Creating the plot
ggplot(data = data, aes(x = x, y = y)) +
geom_count() +
scale_size_continuous(range = c(10, 15)) +
geom_text(data = data2,
aes(x = x, y = y, label = count),
color = "#ffffff")
Is there any way to achieve this in a more elegant way (i.e. without the need for the second data frame)? I know that you can access the count in geom_count using ..n.., yet if I try to access this in geom_text, this is not working.
Are you expecting this:
ggplot(data %>%
group_by(id) %>%
summarise(x = mean(x), y = mean(y), count = n()),
aes(x = x, y = y)) + geom_point(aes(size = count)) +
scale_size_continuous(range = c(10, 15)) +
geom_text(aes(label = count),
color = "#ffffff")
update:
If the usage of geom_count is must, then the expected output can be achieved using:
p <- ggplot(data = data, aes(x = x, y = y)) +
geom_count() + scale_size_continuous(range = c(10, 15))
p + geom_text(data = ggplot_build(p)$data[[1]],
aes(x, y, label = n), color = "#ffffff")
here would be a solution for a code with discrete values
f<-ggplot(data = STest, aes(x = x, y = y)) + geom_count()+scale_x_discrete(labels = c("strong decrease","decrease","no change","increase","strong increase","no opinion"))+scale_y_discrete(labels = c("strong decrease","decrease","no change","increase","strong increase","no opinion"))
f + geom_text(data = ggplot_build(p)$data[[1]],aes(x, y, label = n,vjust= -2))
Thank you so much!
A much easier way to change this is to use the labs() function so in this case it would be ...labs(size = "Count") + ....
That should be all you need.

“for” loop only adding one of the layers in ggplot

I recognize that this has been an issue that's been asked in many other instances, but none of the solutions provided worked for my particular problem.
Here, I have the following data:
library(tidyverse)
library(scales)
mydata <- tibble(Category = c("A", "B", "C", "D"),
Result = c(0.442, 0.537, 0.426, 0.387),
A = c(NA, "A", NA, NA),
B = rep(NA, 4),
C = c(NA, "C", NA, NA),
D = c("D", "D", NA, NA))
mydata$Category <- factor(mydata$Category)
And I have the following vector for the colors:
colors_vct <- c(A = "#0079c0", B = "#cc9900", C = "#252525", D = "#c5120e")
With this information, I can create the following plot:
p <- ggplot(data = mydata , aes(x = Category, y = Result, fill = Category)) +
geom_bar(stat = "identity") + geom_text(aes(label = percent(Result), color = Category), hjust = -.25) +
coord_flip() + scale_y_continuous(limits = c(0,1), labels = percent) +
scale_colour_manual(values = colors_vct) + scale_fill_manual(values = colors_vct)
p
And I'd like to have little triangles appear after the labels based on whether a certain category is mentioned in the last 4 columns of mydata, colored by that category's color, as so:
p <- p + geom_text(data = filter(mydata, mydata[,3] == "A"), aes(label = sprintf("\u25b2")), colour = colors_vct["A"], hjust = -4)
#p <- p + geom_text(data = filter(mydata, mydata[,4] == "B"), aes(label = sprintf("\u25b2")), colour = colors_vct["B"], hjust = -5) #This is commented out because there are no instances where the layer ends up being applied.
p <- p + geom_text(data = filter(mydata, mydata[,5] == "C"), aes(label = sprintf("\u25b2")), colour = colors_vct["C"], hjust = -6)
p <- p + geom_text(data = filter(mydata, mydata[,6] == "D"), aes(label = sprintf("\u25b2")), colour = colors_vct["D"], hjust = -7)
p
This is what I want the final chart to look like (more or less, see bonus question below). Now, I'd like to iterate the last bit of code using a for loop. And this is where I'm running into trouble. It just ends up adding one layer only. How do I make this work? Here is my attempt:
#Set the colors into another table for matching:
colors_tbl <- tibble(Category = levels(mydata$Category),
colors = c("#0079c0", "#cc9900", "#252525", "#c5120e"))
for (i in seq_along(mydata$Category)) {
if (is_character(mydata[[i]])) { #This makes the loop skip if there is nothing to be applied, as with category B.
#Filters to just the specific categories I need to have the triangles shown.
triangles <- filter(mydata, mydata[,(i+2)] == levels(mydata$Category)[i])
#Matches up with the colors_tbl to determine which color to use for that triangle.
triangles <- mutate(triangles, colors = colors_tbl$colors[match(levels(triangles$Category)[i], colors_tbl$Category)])
#Sets a particular position for that triangle for the hjust argument below.
pos <- -(i+3)
#Adding the layer to the plot object
p <- p + geom_text(data = triangles, aes(label = sprintf("\u25b2")), color = triangles$colors, hjust = pos)
}
}
p
:(
Bonus question: Is there a way I can avoid gaps in between the triangles, as per the 2nd chart?
EDIT: As per #baptiste 's suggestion, I re-processed the data as such:
mydata2 <- mydata %>% gather(key = comp, value = Present, -Result, -Category)
mydata2 <- mydata2 %>% mutate(colors = colors_tbl$colors[match(mydata2$Present, colors_tbl$Category)]) %>%
filter(!is.na(mydata2$Present)) %>% select(-comp)
mydata2 <- mydata2 %>% mutate(pos = if_else(Present == "A", -4, if_else(Present == "B", -5, if_else(Present == "C", -6, -7))))
p <- p + geom_text(data = mydata2, aes(x = Category, label = sprintf("\u25b2")), colour = mydata2$colors, hjust = mydata2$pos)
p
Ok, I got it to work. my bonus question still stands.

Resources