Set the Axis values (in an animation) - r

How do I stop the Y-axis changing during an animation?
The graph I made is at http://i.imgur.com/EKx6Tw8.gif
The idea is to make an animated heatmap of population and income each year. The problem is the y axis jumps to include 0 or not include the highest value sometime. How do you solidly set the axis values? I know this must be a common issue but I can't find the answer
The code to recreate it is
library(gapminder)
library(ggplot2)
library(devtools)
install_github("dgrtwo/gganimate")
library(gganimate)
library(dplyr)
mydata <- dplyr::select(gapminder, country,continent,year,lifeExp,pop,gdpPercap)
#bin years into 5 year bins
mydata$lifeExp2 <- as.integer(round((mydata$lifeExp-2)/5)*5)
mydata$income <- cut(mydata$gdpPercap, breaks=c(0,250,500,750,1000,1500,2000,2500,3000,3500,4500,5500,6500,7500,9000,11000,21000,31000,41000, 191000),
labels=c(0,250,500,750,1000,1500,2000,2500,3000,3500,4500,5500,6500,7500,9000,11000,21000,31000,41000))
sizePer <- mydata%>%
group_by(lifeExp2, income, year)%>%
mutate(popLikeThis = sum(pop))%>%
group_by(year)%>%
mutate(totalPop = sum(as.numeric(pop)))%>%
mutate(per = (popLikeThis/totalPop)*100)
sizePer$percent <- cut(sizePer$per, breaks=c(0,.1,.3,1,2,3,5,10,20,Inf),
labels=c(0,.1,.3,1,2.0,3,5,10,20))
saveGIF({
for(i in c(1997,2002,2007)){
print(ggplot(sizePer %>% filter(year == i),
aes(x = lifeExp2, y = income)) +
geom_tile(aes(fill = percent)) +
theme_bw()+
theme(legend.position="top", plot.title = element_text(size=30, face="bold",hjust = 0.5))+
coord_cartesian(xlim = c(20,85), ylim = c(0,21)) +
scale_fill_manual("%",values = c("#ffffcc","#ffeda0","#fed976","#feb24c","#fd8d3c","#fc4e2a","#e31a1c","#bd0026","#800026"),drop=FALSE)+
annotate(x=80, y=3, geom="text", label=i, size = 6) +
annotate(x=80, y=1, geom="text", label="#iamreddave", size = 5) +
ylab("Income") + # Remove x-axis label
xlab("Life Expenctancy")+
ggtitle("Worldwide Life Expectancy and Income")
)}
}, interval=0.7,ani.width = 900, ani.height = 600)

Solution:
Adding scale_y_discrete(drop = F) to the ggplot call. Answered by #bdemarest in comments.

Related

How to add percentages on top of an histogram when data is grouped

This is not my data (for confidentiality reasons), but I have tried to create a reproducible example using a dataset included in the ggplot2 library. I have an histogram summarizing the value of some variable by group (factor of 2 levels). First, I did not want the counts but proportions of the total, so I used that code:
library(ggplot2)
library(dplyr)
df_example <- diamonds %>% as.data.frame() %>% filter(cut=="Premium" | cut=="Ideal")
ggplot(df_example,aes(x=z,fill=cut)) +
geom_histogram(aes(y=after_stat(width*density)),binwidth=1,center=0.5,col="black") +
facet_wrap(~cut) +
scale_x_continuous(breaks=seq(0,9,by=1)) +
scale_y_continuous(labels=scales::percent_format(accuracy=2,suffix="")) +
scale_fill_manual(values=c("#CC79A7","#009E73")) +
labs(x="Depth (mm)",y="Count") +
theme_bw() + theme(legend.position="none")
It gave me this as a result.
enter image description here
The issue is that I would like to print the numeric percentages on top of the bins and haven't find a way to do so.
As I saw it done for printing counts elsewhere, I attempted to print them using stat_bin(), including the same y and label values as the y in geom_histogram, thinking it would print the right numbers:
ggplot(df_example,aes(x=z,fill=cut)) +
geom_histogram(aes(y=after_stat(width*density)),binwidth=1,center=0.5,col="black") +
stat_bin(aes(y=after_stat(width*density),label=after_stat(width*density*100)),geom="text",vjust=-.5) +
facet_wrap(~cut) +
scale_x_continuous(breaks=seq(0,9,by=1)) +
scale_y_continuous(labels=scales::percent_format(accuracy=2,suffix="")) +
scale_fill_manual(values=c("#CC79A7","#009E73")) +
labs(x="Depth (mm)",y="%") +
theme_bw() + theme(legend.position="none")
However, it does print way more values than there are bins, these values do not appear consistent with what is portrayed by the bar heights and they do not print in respect to vjust=-.5 which would make them appear slightly above the bars.
enter image description here
What am I missing here? I know that if there was no grouping variable/facet_wrap, I could use after_stat(count/sum(count)) instead of after_stat(width*density) and it seems that it would have fixed my issue. But I need the histograms for both groups to appear next to each other. Thanks in advance!
You have to use the same arguments in stat_bin as for the histogram when adding your labels to get same binning for both layers and to align the labels with the bars:
library(ggplot2)
library(dplyr)
df_example <- diamonds %>%
as.data.frame() %>%
filter(cut == "Premium" | cut == "Ideal")
ggplot(df_example, aes(x = z, fill = cut)) +
geom_histogram(aes(y = after_stat(width * density)),
binwidth = 1, center = 0.5, col = "black"
) +
stat_bin(
aes(
y = after_stat(width * density),
label = scales::number(after_stat(width * density), scale = 100, accuracy = 1)
),
geom = "text", binwidth = 1, center = 0.5, vjust = -.25
) +
facet_wrap(~cut) +
scale_x_continuous(breaks = seq(0, 9, by = 1)) +
scale_y_continuous(labels = scales::number_format(scale = 100)) +
scale_fill_manual(values = c("#CC79A7", "#009E73")) +
labs(x = "Depth (mm)", y = "%") +
theme_bw() +
theme(legend.position = "none")

How can I get the real scale from a facet_grid plot in R?

I am trying to add captions as it appears in this post.
For that reason, I need the real scale of the plot (x and y axis) when I am using facet_grid. I know that I can use layer_data, since it saves everything from the plot... However, it is not really accurate, because when I try to establish the limits using min and max from that output, the plot changes.
Here you have an example:
library(ggplot2)
library(dplyr)
val1 <- c(2.1490626,2.2035281,1.5927854,3.1399245,2.3967338,3.7915825,4.6691277,3.0727319,2.9230937,2.6239759,3.7664386,4.0160378,1.2500835,4.7648343,0.0000000,5.6740227,2.7510256,3.0709322,2.7998003,4.0809085,2.5178086,5.9713330,2.7779843,3.6724801,4.2648527,3.6841084,2.5597235,3.8477471,2.6587736,2.2742209,4.5862788,6.1989269,4.1167091,3.1769325,4.2404515,5.3627032,4.1576810,4.3387921,1.4024381,0.0000000,4.3999099,3.4381837,4.8269218,2.6308474,5.3481382,4.9549753,4.5389650,1.3002293,2.8648220,2.4015338,2.0962332,2.6774765,3.0581759,2.5786137,5.0539080,3.8545796,4.3429043,4.2233248,2.0434363,4.5980727)
val2 <- c(3.7691229,3.6478055,0.5435826,1.9665861,3.0802654,1.2248374,1.7311236,2.2492826,2.2365337,1.5726119,2.0147144,2.3550348,1.9527204,3.3689502,1.7847986,3.5901329,1.6833872,3.4240479,1.8372175,0.0000000,2.5701453,3.6551315,4.0327091,3.8781182)
df1 <- data.frame(value = val1)
df2 <- data.frame(value = val2)
data <- bind_rows(lst(df1, df2), .id = 'id')
data$Sex <- rep(c("Male", "Female"), times=84/2)
p <- data %>%
ggplot(aes(value)) +
geom_density(lwd = 1.2, colour="red", show.legend = FALSE) +
geom_histogram(aes(y=..density.., fill = id), bins=10, col="black", alpha=0.2) +
facet_grid(id ~ Sex ) +
xlab("type_data") +
ylab("Density") +
ggtitle("title") +
guides(fill=guide_legend(title="legend_title")) +
theme(strip.text.y = element_blank())
p
plot_info <- layer_data(p)
> min(plot_info$density)
[1] 7.166349e-09
> max(plot_info$density)
[1] 0.5738021
As you can see in the plot, the y-axis starts at 0 and if finishes around 0.7 more less. However, the maximum density is 0.57.
If I try to use the info from layer_data:
p + coord_cartesian(clip="off", ylim=c(min(plot_info$density), max(plot_info$density)),
xlim = c(min(plot_info$x), max(plot_info$x)))
The plot changes completely.
Does anyone know how can I get the scales that ggplot2 and facet_grid are using? I need the information of the density (y_axis) and the info from the x_axis.
Yes, to get the scales directly, use layer_scales(p), which gives you the range of the axes rather than just the range of the data, which is what you get from layer_data(p)
p + coord_cartesian(clip = "off",
ylim = layer_scales(p)$y$range$range,
xlim = layer_scales(p)$x$range$range)
Or, to combine this question with your last, where you add the text labels outside of the plotting panels, your result might be something like:
p + coord_cartesian(clip = "off",
ylim = layer_scales(p)$y$range$range,
xlim = layer_scales(p)$x$range$range) +
geom_text(data = data.frame(value = c(0, 6), id = c("df2", "df2"),
Sex = c('Female', 'Male')),
aes(y = -0.15, label = c('Female', 'Male')))
Does this help?
?layer_data
summary(layer_data(p, i = 2))
i is the layer you want to return
Can min the xmin and max the xmax etc

Edit hover label text in ggplot to show percentage

I'm trying to update the hover labels in my plot to show "Percentage: XX%" (where XX is the value of the percentage of each bar).
Here is some reproducible code:
## Data from https://data.melbourne.vic.gov.au/People/Indicators-of-quality-of-life-and-city-services-by/e6er-4cb3
library(dplyr)
library(ggplot2)
library(plotly)
data <- read.csv("Indicators_of_quality_of_life_and_city_services_by_year.csv")
head(data)
#data$Indicator.Theme
#data$Type
data <- data[,c("Indicator.Theme", "Type")]
data
que_code <- data %>% mutate(newcat = Indicator.Theme)
response <- que_code$newcat
category <- factor(que_code$Type)
textfill= "Type"
plott <- ggplot(que_code, aes(x=response, fill=category)) +
geom_bar(position="dodge", width = 0.5, aes(y = (..count..)*100/sum(..count..), label="Percentage")) + labs(fill= textfill) + xlab("Response to survey questions")+ylab("Percentage")+
scale_fill_manual(values = c("#ae3cc6", "#9630a8", "#842791", "#6d1b73", "#780e55",
"#7e0643", "#820036", "#a11635", "#bb2835", "#d93d35",
"#e74735", "#fd5634", "#fe7c5b", "#ffa182"), drop=FALSE) + scale_x_discrete(drop=FALSE)+
theme(axis.text.x = element_text(size = 10, angle = 30))
plott <- ggplotly(plott, tooltip="y")
What my plot looks like
I would like to change the variable name in the hover label from (..count..)*100/sum(..count..) to "Percentage".
Any help would be greatly appreciated, I've been struggling with this for a while ahaha
One way would be to pre-calculate the values to plot instead of using (..count..)*100/sum(..count..). This would also need to change geom_bar to geom_col.
library(dplyr)
library(ggplot2)
library(plotly)
plott <- que_code %>%
count(Type, Indicator.Theme, name = 'Percentage') %>%
mutate(Percentage = prop.table(Percentage) * 100) %>%
ggplot(aes(x=Indicator.Theme, fill=Type)) +
geom_col(position="dodge", width = 0.5, aes(y = Percentage)) +
labs(fill= textfill) +
xlab("Response to survey questions")+
ylab("Percentage") +
scale_fill_manual(values = c("#ae3cc6", "#9630a8", "#842791", "#6d1b73", "#780e55",
"#7e0643", "#820036", "#a11635", "#bb2835", "#d93d35",
"#e74735", "#fd5634", "#fe7c5b", "#ffa182"), drop=FALSE) +
scale_x_discrete(drop=FALSE)+
theme(axis.text.x = element_text(size = 10, angle = 30))
plott <- ggplotly(plott, tooltip="y")
plott

Add a “fake” discrete or continuous legend to a plot with values that are not in the data

I have data (percentage changes) for several months for different states of a country that I want to plot as a map (each month as a separate png) and animate it as a GIF with magick.
The percentage changes (discrete values), however, do not have the same maximum and minimum value in each month. If I would simply plot each month the specified red color for the highest value would stand for different maximum values in each month (for example +240% - 245% in January and +260% - 265% in February). To tackle this issue I gathered all occurring percentage changes of all months in a vector. These discrete values got assigned colors (from light red - "0% - 5%" - to dark red - "260% - 265%") so that e.g. "240% - 245%" would show as the same red in January as well as in February.
The problem is: the legends that are plotted with each map differ since not every percentage change is present in each month and of course only values that exist in each subset for each month are shown in the legend.
Is it possible to (1) show the same legend for all maps (with all discrete values from "0-5%" to "260% - 265%" even though not all the values are plotted each month) or (2) can I simply add a "fake" continuous legend ranging from light red to dark red that ranges from 0% to 265%? (I found geom_blank() might be helpful for that, however, I have not managed to make it work.)
Here is a minimal reproducible example:
install.packages("sf")
install.packages("ggplot2")
install.packages("magick")
install.packages("tidyverse")
install.packages("maps")
library(sf)
library(ggplot2)
library(magick)
library(tidyverse)
library(maps)
states <- st_as_sf(map("state",
plot = FALSE,
fill = TRUE))
labels <- function(start, end) {
vec <- seq(start, end, 5)
paste0(vec,
"%",
" – ",
vec + 5,
"%")
}
lab_jan <- labels(0, (length(states$ID) - 1) * 5)
lab_feb <- labels(20, (length(states$ID) + 3) * 5)
colfun <- colorRampPalette(c("#EE7F74", "#86372E"))
col <- colfun(length(unique(c(lab_jan, lab_feb))))
lab_col <- tibble(label = unique(c(lab_jan, lab_feb)),
color = col)
states_jan <- bind_cols(states,
lab_jan = factor(lab_jan,
levels = lab_jan))
states_feb <- bind_cols(states,
lab_feb = factor(lab_feb,
levels = lab_feb))
jan_01 <- ggplot() +
geom_sf(data = states_jan,
aes(fill = lab_jan)) +
theme_void() +
scale_fill_manual(values = lab_col %>%
filter(label %in% states_jan$lab_jan) %>%
pull(color)) +
#theme(legend.position = "none") +
ggsave("01_jan.png", width = 10)
feb_02 <- ggplot() +
geom_sf(data = states_feb,
aes(fill = lab_feb)) +
theme_void() +
scale_fill_manual(values = lab_col %>%
filter(label %in% states_feb$lab_feb) %>%
pull(color)) +
#theme(legend.position = "none") +
ggsave("02_feb.png", width = 10)
list.files(pattern = '*.png', full.names = TRUE) %>%
image_read() %>%
image_join() %>%
image_animate(fps = 1) %>%
image_write("states.gif")
```
How about this approach:
lab_jan <- labels(0, (length(states$ID) - 1) * 5)
lab_feb <- labels(20, (length(states$ID) + 3) * 5)
lab_all <- union(lab_jan, lab_feb)
states_jan <- bind_cols(states, lab_jan = lab_jan)
states_feb <- bind_cols(states,lab_feb = lab_feb)
states_jan <- states_jan %>%
mutate(lab_jan = factor(lab_jan, levels=lab_all))
states_feb <- states_feb %>%
mutate(lab_feb = factor(lab_feb, levels=lab_all))
jan_01 <- ggplot() +
geom_sf(data = states_jan,
aes(fill = as.numeric(lab_jan))) +
theme_void() +
scale_fill_gradient(low = "#EE7F74", high="#86372E",
limits=c(1, 53),
breaks=c(1,10,20,30,40,50),
labels=lab_all[c(1,10,20,30,40,50)]) +
labs(fill="")
#theme(legend.position = "none") +
# ggsave("01_jan.png", width = 10)
feb_02 <- ggplot() +
geom_sf(data = states_feb,
aes(fill = as.numeric(lab_feb))) +
theme_void() +
scale_fill_gradient(low = "#EE7F74", high="#86372E",
limits=c(1, 53),
breaks=c(1,10,20,30,40,50),
labels=lab_all[c(1,10,20,30,40,50)]) +
labs(fill="")
gridExtra::grid.arrange(jan_01, feb_02, nrow=1)

Format axis and label for line graph using ggplot2

Here is my sample data:
Singer <- c("A","B","C","A","B","C")
Rank <- c(1,2,3,3,2,1)
Episode <- c(1,1,1,2,2,2)
Votes <- c(0.3,0.28,0.11,0.14,0.29,0.38)
data <- data_frame(Episode,Singer,Rank,Votes)
data$Episode <- as.character(data$Episode)
I would like to make a line graph to show the performance of each singer.
I tried to use ggplot2 like below:
ggplot(data,aes(x=Episode,y=Votes,group = Singer)) + geom_line()
I have two questions:
How can I format the y-axis as percentage?
How can I label each dot in this line graph as the values of "Rank", which allows me to show rank and votes in the same graph?
To label each point use:
geom_label(aes(label = Rank))
# or
geom_text(aes(label = Rank), nudge_y = .01, nudge_x = 0)
To format the axis labels use:
scale_y_continuous(labels = scales::percent_format())
# or without package(scales):
scale_y_continuous(breaks = (seq(0, .4, .2)), labels = sprintf("%1.f%%", 100 * seq(0, .4, .2)), limits = c(0,.4))
Complete code:
library(ggplot2)
library(scales)
ggplot(data, aes(x = factor(Episode), y = Votes, group = Singer)) +
geom_line() +
geom_label(aes(label = Rank)) +
scale_y_continuous(labels = scales::percent_format())
Data:
Singer <- c("A","B","C","A","B","C")
Rank <- c(1,2,3,3,2,1)
Episode <- c(1,1,1,2,2,2)
Votes <- c(0.3,0.28,0.11,0.14,0.29,0.38)
data <- data_frame(Episode,Singer,Rank,Votes)
# no need to transform to character bc we use factor(Episode) in aes(x=..)

Resources