Create a 'flyover' map animation using ggmap and gganimate - r

I have a dataset of people arriving in a location, how long they stayed, and their home locations. I want to create an animated chart which 'flies' them to their destination, and returns them to their original point once their trip is over. But I'm not sure if this is possible with gganimate or not. At the moment I only seem to be able to do a "start" and "end" frame, though it's a little hard to tell whether it just doesn't have enough frames to do the intended action.
Here's something like what I have so far:
library(dplyr)
library(ggplot2)
library(ggmap)
library(gganimate)
#Coordinates
europecoords <- c(left = -23, bottom = 36, right = 27.87, top = 70.7)
londonareacoords <- c(left = -.7, bottom = 51, right = 0.2, top = 52)
londonpointcoords <- as.data.frame(list(lon = -.14, lat = 51.49))
#Get the map we'll use as the background
europe <- get_stamenmap(europecoords, zoom = 4, maptype = "toner-lite")
#Sample dataset configuration
numberofpoints <- 10
balance <- 0.1
#Set up an example dataset
ids <- seq(1:numberofpoints)
arrivalday <- sample(x = 30, size = numberofpoints, replace = TRUE)
staylength <- sample(x = 7, size = numberofpoints, replace = TRUE)
startlocationlondonarealon <- sample(x = seq(londonareacoords['left'] * 10, londonareacoords['right'] * 10), size = numberofpoints * balance, replace = TRUE) / 10
startlocationlondonarealat <- sample(x = seq(londonareacoords['bottom'] * 10, londonareacoords['top'] * 10), size = numberofpoints * balance, replace = TRUE) / 10
startlocationeuropelon <- sample(x = seq(europecoords['left'] * 10, europecoords['right'] * 10), size = (numberofpoints * (1 - balance)), replace = TRUE) / 10
startlocationeuropelat <- sample(x = seq(europecoords['bottom'] * 10, europecoords['top'] * 10), size = (numberofpoints * (1 - balance)), replace = TRUE) / 10
startlocationlon <- c(startlocationlondonarealon, startlocationeuropelon)
startlocationlat <- c(startlocationlondonarealat, startlocationeuropelat)
points <- as.data.frame(cbind(ID = ids, arrivalday, staylength, departureday = arrivalday + staylength, startlocationlon, startlocationlat))
#Map the sample dataset to check it looks reasonable
ggmap(europe) +
geom_point(data = points, aes(x = startlocationlon, y = startlocationlat), col = "blue", size = 2) +
geom_point(data = londonpointcoords, aes(x = lon, y = lat), col = "red")
#Separate the events out to rearrange, then glue them back together
event1 <- points %>%
mutate(Event = "Day Before Arrival", Date = arrivalday - 1) %>%
mutate(Lon = startlocationlon,
Lat = startlocationlat) %>%
select(ID, Event, Date, Lon, Lat)
event2 <- points %>%
mutate(Event = "Arrival Date", Date = arrivalday) %>%
mutate(Lon = londonpointcoords$lon[1],
Lat = londonpointcoords$lat[1]) %>%
select(ID, Event, Date, Lon, Lat)
event3 <- points %>%
mutate(Event = "Departure Date", Date = departureday) %>%
mutate(Lon = londonpointcoords$lon[1],
Lat = londonpointcoords$lat[1]) %>%
select(ID, Event, Date, Lon, Lat)
event4 <- points %>%
mutate(Event = "Day After Departure", Date = departureday + 1) %>%
mutate(Lon = startlocationlon,
Lat = startlocationlat) %>%
select(ID, Event, Date, Lon, Lat)
events <- rbind(event1, event2, event3, event4) %>%
mutate(Event = factor(Event, ordered = TRUE, levels = c("Day Before Arrival", "Arrival Date", "Departure Date", "Day After Departure"))) %>%
mutate(ID = factor(ID))
#Make an animation
ggmap(europe) +
geom_point(data = events, aes(x = Lon, y = Lat, group = ID, col = ID), size = 2) +
#geom_point(data = londonpointcoords, aes(x = lon, y = lat), col = "red") +
transition_manual(Date) +
labs(title = "Date: {frame}") +
NULL
But as I said, the points don't seem to be 'flying' as much as just appearing and disappearing. Should I be using a different data format? Transition type? Number of frames? (I'm having trouble finding documentation on any of the above, which is part of why I'm stuck...)

Final result
Code
library(ggplot2)
library(ggmap)
library(gganimate)
ggm <- ggmap(europe) +
geom_point(data = events,
aes(x = Lon, y = Lat,
colour = ID, group = ID, shape = Event),
size = 3, alpha = 0.8) +
transition_time(Date) +
labs(title = paste("Day", "{round(frame_time,0)}")) +
shadow_wake(wake_length = 0.1)
animate(ggm, fps = 24, duration = 16)
========================================================
Step-by-step
You have lots of moving parts there. Let's break it down a bit:
0. Load libraries
library(ggplot2)
library(ggmap)
library(gganimate)
library(ggrepel) # will be useful for data exploration in step 1
1. Data exploration
ggplot(data = events, aes(x = ID, y = Date, colour = Event)) +
geom_point()
We see, that the arrival and departure events are each quite close together for each plane. Also, there is always a gap of a couple of days inbetween. That seems reasonable.
Let's check the Date variable:
> length(unique(events$Date))
[1] 24
> min(events$Date)
[1] 2
> max(events$Date)
[1] 33
Okay, this means two things:
Our data points are unevenly spaced.
We don't have data for all Dates.
Both things will make the animation part quite challenging.
ggplot(data = unique(events[, 4:5]), aes(x = Lon, y = Lat)) +
geom_point()
Furthermore, we only have 11 unique locations (== airports). This will probaly lead to overlapping data. Let's plot it by day:
ggplot(data = unique(events[, 3:5]), aes(x = Lon, y = Lat, label = Date)) +
geom_point() +
geom_text_repel()
Yup, this will be fun... Lots of things happening at that airport in the middle.
2. Basic animation
gga <- ggplot(data = events, aes(x = Lon, y = Lat)) +
geom_point() +
transition_time(Date)
animate(gga)
We used transition_time() and not transition_states(), because the former is used for linear time variables (e.g., second, day, year) and automatic interpolation, while the latter gives more manual control to the user.
3. Let's add colour
gga <- ggplot(data = events, aes(x = Lon, y = Lat, colour = ID)) +
geom_point() +
transition_time(Date)
animate(gga)
It's starting to look like something!
4. Add title, transparency, increase size
gga <- ggplot(data = events, aes(x = Lon, y = Lat, col = ID)) +
geom_point(size = 3, alpha = 0.5) +
transition_time(Date) +
labs(title = paste("Day", "{round(frame_time, 0)}"))
Note the rounded {round(frame_time, 0)}. Try using {frame_time} and see what happens!
5. Add some pizzaz
gga <- ggplot(data = events, aes(x = Lon, y = Lat, col = ID, group = ID,
shape = Event)) +
geom_point(size = 3, alpha = 0.5) +
transition_time(Date) +
labs(title = paste("Day", "{round(frame_time, 0)}")) +
shadow_wake(wake_length = 0.05)
animate(gga)
Looks good, let's finish it up!
6. Add the map, make animation slower, tweak some details
ggm <- ggmap(europe) +
geom_point(data = events,
aes(x = Lon, y = Lat,
colour = ID, group = ID, shape = Event),
size = 3, alpha = 0.8) +
transition_time(Date) +
labs(title = paste("Day", "{round(frame_time,0)}")) +
shadow_wake(wake_length = 0.1)
animate(ggm, fps = 24, duration = 16)
Not too shabby, eh? As a side note: animate(ggm, nframes = 384) would have had the same effect on the animation as fps = 24 with duration = 16.
If you have any question please do not hesitate to shoot me a comment.
I will try my best to help or clarify things.

Related

Arrows showing direction of path on every nth data point in R ggplot2 plot

I want to use ggplot2 to create a path with arrows in a plot. However, I have a lot of data points and so I only want the arrow on every nth datapoint. I adapted this answer for every nth label to put an observation point every nth data point, but if I try to use this with path I get straight lines between these points. I just want the arrow head.
The MWE below shows my attempt to get the two paths working together (I do want the full path as a line), and what worked for points (that I want to be directional arrows). In my real data set the arrows will point in different directions (so I can't just use a static arrow head as the observation symbol). I am also working with other filtering within the plots, and so creating new data frames that only keep some points is not a convenient solution.
MWE
library(tidyverse)
library(tidyr)
library(dplyr)
x <- seq(from = -100, to = 100, by = 0.01)
y <- x^3 - 2 * x + x
df<- data.frame(x,y)
df$t<- seq(1:nrow(df))
ggplot(data = df, aes(x = x, y = y)) +
geom_path(size = 0.1, aes(colour = t)) +
geom_path(aes(colour = t),data = . %>% filter(row_number() %% 2000 == 0), arrow = arrow(type = 'open', angle = 30, length = unit(0.1, "inches")))
ggplot(data = df, aes(x = x, y = y)) +
geom_path(size = 0.1, aes(colour = t)) +
geom_point(aes(colour = t),data = . %>% filter(row_number() %% 2000 == 0))
You can try to add a grouping variable.
ggplot(data = df, aes(x = x, y = y)) +
geom_path(aes(colour = t, group =factor(gr)),data = . %>% filter(row_number() %% 2000 == 0) %>%
mutate(gr = gl(n()/2, 2)),
arrow = arrow(type = 'open', angle = 30, length = unit(0.1, "inches")))

R: ggplot2 let the characters of geom_text exactly cover one X unit

I want to highlight text based on the position in a string, for example if we have this text:
this is a really nice informative piece of text
Then I want to say let's draw a rectangle around positions 2 till 4:
t[his] is a really nice informative piece of text
I tried to do so in ggplot2 using the following code:
library(ggplot2)
library(dplyr)
box.data <- data.frame(
start = c(4,6,5,7,10,7),
type = c('BOX1.start', 'BOX1.start', 'BOX1.start','BOX1.end', 'BOX1.end', 'BOX1.end'),
text.id = c(1,2,3,1,2,3)
)
text.data <- data.frame(
x = rep(1,3),
text.id = c(1,2,3),
text = c('Thisissomerandomrandomrandomrandomtext1',
'Thisissomerandomrandomrandomrandomtext2',
'Thisissomerandomrandomrandomrandomtext3')
)
ggplot(data = text.data, aes(x = x, y = text.id)) +
scale_x_continuous(limits = c(1, nchar(as.character(text.data$text[1])))) +
geom_text(label = text.data$text, hjust = 0, size = 3) +
geom_line(data = box.data, aes(x = start, y = text.id, group = text.id, size = 3, alpha = 0.5, colour = 'red'))
This produces the following graph:
My method fails as a letter does not cover exactly one unit of the x-axis, is there any way to achieve this?
I just figured out that I can split the string in characters and plot these, perhaps it is useful for someone else.
library(ggplot2)
library(dplyr)
library(splitstackshape)
# First remember the plotting window, which equals the text length
text.size = nchar(as.character(text.data$text[1]))
# Split the string into single characters, and adjust the X-position to the string position
text.data <- cSplit(text.data, 'text', sep = '', direction = 'long', stripWhite = FALSE) %>%
group_by(text.id) %>%
mutate(x1 = seq(1,n()))
# Plot each character and add highlights
ggplot(data = text.data, aes(x = x1, y = text.id)) +
scale_x_continuous(limits = c(1, text.size)) +
geom_text(aes(x = text.data$x1, y = text.data$text.id, group = text.id, label = text)) +
geom_line(data = box.data, aes(x = start, y = text.id, group = text.id, size = 3, alpha = 0.5, colour = 'red'))
Which produces this plot:
Perhaps the marking should extend a little but upwards and downwards, but that's an easy fix.

Connect multiple points across multiple facets with a dashed line

I have following data. Each observation is a genomic coordinate with copy number changes (copy.number.type) which is found in some percentage of samples (per.found).
chr<-c('1','12','2','12','12','4','2','X','12','12','16','16','16','5'
,'4','16','X','16','16','4','1','5','2','4','5','X','X','X','4',
'1','16','16','1','4','4','12','2','X','1','16','16','2','1','12',
'2','2','4','4','2','1','5','X','4','2','12','16','2','X','4','5',
'4','X','5','5')
start <- c(247123880,91884413,88886155,9403011,40503634,10667741,88914884,
100632615,25804205,25803542,18925987,21501823,21501855,115902990,
26120955,22008406,432498,22008406,22008406,69306802,4144380,73083197,
47743372,34836043,16525257,315832,1558229,51048657,49635818,239952709,
69727769,27941625,80328938,49136485,49136654,96076105,133702693,315823,
16725215,69728318,88520557,89832606,202205081,124379013,16045662,89836880,
49657307,97117994,76547133,35051701,344973,1770075,49139874,77426085,
9406416,69727781,108238962,151006944,49121333,6669602,89419843,74214551,
91203955,115395615)
type <- c('Inversions','Deletions','Deletions','Deletions','Deletions','Duplications','Deletions','Deletions',
'Duplications','Deletions','Duplications','Inversions','Inversions','Deletions','Duplications',
'Deletions','Deletions','Deletions','Deletions','Inversions','Duplications','Inversions','Inversions',
'Inversions','Deletions','Deletions','Deletions','Insertions','Deletions','Inversions','Inversions',
'Inversions','Inversions','Deletions','Deletions','Inversions','Deletions','Deletions','Inversions',
'Inversions','Deletions','Deletions','Deletions','Insertions','Inversions','Deletions','Deletions',
'Deletions','Inversions','Deletions','Duplications','Inversions','Deletions','Deletions','Deletions',
'Inversions','Deletions','Inversions','Deletions','Inversions','Inversions','Inversions','Deletions','Deletions')
per.found <- c(-0.040,0.080,0.080,0.040,0.080,0.040,0.080,0.040,0.040,0.120,0.040,-0.080,-0.080,0.040,0.040,0.120,
0.040,0.120,0.120,-0.040,0.011,-0.011,-0.023,-0.023,0.011,0.023,0.011,0.011,0.011,-0.011,-0.034,
-0.011,-0.023,0.011,0.011,-0.011,0.023,0.023,-0.023,-0.034,0.011,0.023,0.011,0.011,-0.023,0.023,
0.011,0.011,-0.011,0.011,0.011,-0.023,0.011,0.057,0.011,-0.034,0.023,-0.011,0.011,-0.011,-0.023,
-0.023,0.011,0.011)
df <- data.frame(chromosome = chr, start.coordinate = start, copy.number.type = type, per.found = per.found )
I would like to create a line plot. I created a plot using ggplot (facets), but the problem is I can not connect the points between two facets. Is there any way to do that. I do not necessarily need to use facets if there is a way to annotate x axis scales by chromosome. In the following image the dotted line shows what I would like to have for all copy.number.type lines.
EDIT: Looking for simplified approach.
library(ggplot2)
ggplot(df, aes(x=start.coordinate,y=per.found, group=copy.number.type, color=copy.number.type))+
geom_line()+
geom_point()+
facet_grid(.~chromosome,scales = "free_x", space = "free_x")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Desired output: As shown by the red dashed lines. I want to connect all the border points with a dashed line across facets.
Note: it may not make sense to connect the lines between the chromosomes.
But here is one way, by avoiding facets:
library(dplyr)
df2 <- df %>%
mutate(chromosome = factor(chromosome, c(1, 2, 4, 5, 12, 16, 'X'))) %>%
arrange(chromosome, start.coordinate)
chromosome_positions <- df2 %>%
group_by(chromosome) %>%
summarise(start = first(start.coordinate), end = last(start.coordinate)) %>%
mutate(
size = end - start,
new_start = cumsum(lag(size, default = 0)),
new_end = new_start + size
)
df3 <- df2 %>%
left_join(chromosome_positions, 'chromosome') %>%
mutate(new_x = start.coordinate + (new_start - start))
ggplot(df3, aes(x=new_x,y=per.found, group=copy.number.type, color=copy.number.type))+
geom_rect(
aes(xmin = new_start, xmax = new_end, ymin = -Inf, ymax = Inf, fill = chromosome),
chromosome_positions, inherit.aes = FALSE, alpha = 0.3
) +
geom_line() +
geom_point() +
geom_text(
aes(x = new_start + 0.5 * size, y = Inf, label = chromosome),
chromosome_positions, inherit.aes = FALSE, vjust = 1
) +
scale_fill_manual(values = rep(c('grey60', 'grey90'), 10), guide = 'none') +
scale_x_continuous(expand = c(0, 0))

smoothed grouped proportion plot

I have the following data set:
set.seed(10)
start_date <- as.Date('2000-01-01')
end_date <- as.Date('2000-01-10')
Data <- data.frame(
id = rep((1:1000),10),
group = rep(c("A","B"), 25),
x = sample(1:100),
y = sample(c("1", "0"), 10, replace = TRUE),
date = as.Date(
sample(as.numeric(start_date):
as.numeric(end_date), 1000,
replace = T), origin = '2000-01-01'))
With that, I create the following plot:
Data %>% mutate(treated = factor(group)) %>%
mutate(date = as.POSIXct(date)) %>% #convert date to date
group_by(treated, date) %>% #group
summarise(prop = sum(y=="1")/n()) %>% #calculate proportion
ggplot()+ theme_classic() +
geom_line(aes(x = date, y = prop, color = treated)) +
geom_point(aes(x = date, y = prop, color = treated)) +
geom_vline(xintercept = as.POSIXct("2000-01-05 12:00 GMT"), color = 'black', lwd = 1)
Unfortunately the plot is pretty 'jumpy' and I would like to smooth it. I tried geom_smooth() but can't get it to work. Other questions regarding smoothing didn't help me because they missed the grouping aspect and therefore had a different structure. However, the example data set is in reality part of a larger data set so I need to stick to that code.
[Edit: the geom_smooth() code I tried is geom_smooth(method = 'auto', formula = y ~ x)]
Can someone point me into the right direction?
Many thanks and all the best.
Is this what you want by a smoothed line? You call geom_smooth with aesthetics, not in combination with geom_line. You can choose different smoothing methods, though the default loess with low observations is usually what people want. As an aside, I don't think this is necessarily nicer to look at than the geom_line version, and in fact is slightly less readable. geom_smooth is best used when there are many y observations for every x which makes patterns hard to see, geom_line is good for 1-1.
EDIT: After looking at what you're doing more closely, I added a second plot that doesn't directly calculate the treatment-date means and just uses geom_smooth directly. That lets you get a more reasonable confidence interval instead of having to remove it as before.
set.seed(10)
start_date <- as.Date('2000-01-01')
end_date <- as.Date('2000-01-10')
Data <- data.frame(
id = rep((1:1000),10),
group = rep(c("A","B"), 25),
x = sample(1:100),
y = sample(c("1", "0"), 10, replace = TRUE),
date = as.Date(
sample(as.numeric(start_date):
as.numeric(end_date), 1000,
replace = T), origin = '2000-01-01'))
library(tidyverse)
Data %>%
mutate(treated = factor(group)) %>%
mutate(date = as.POSIXct(date)) %>% #convert date to date
group_by(treated, date) %>% #group
summarise(prop = sum(y=="1")/n()) %>% #calculate proportion
ggplot() +
theme_classic() +
geom_smooth(aes(x = date, y = prop, color = treated), se = F) +
geom_point(aes(x = date, y = prop, color = treated)) +
geom_vline(xintercept = as.POSIXct("2000-01-05 12:00 GMT"), color = 'black', lwd = 1)
#> `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Data %>%
mutate(treated = factor(group)) %>%
mutate(y = ifelse(y == "0", 0, 1)) %>%
mutate(date = as.POSIXct(date)) %>% #convert date to date
ggplot() +
theme_classic() +
geom_smooth(aes(x = date, y = y, color = treated), method = "loess") +
geom_vline(xintercept = as.POSIXct("2000-01-05 12:00 GMT"), color = 'black', lwd = 1)
Created on 2018-03-27 by the reprex package (v0.2.0).

Select random data with ggmap

This code only works for the center coordinates (x=-95.36, y=29.76). I want to change them and select random data from that area.
Does anyone knows what is happening?
#Select random data and connect it
get_googlemap(urlonly = TRUE)
ggmap(get_googlemap())
# markers and paths are easy to access
d <- function(x=-95.36, y=29.76, n,r,a){
round(data.frame(lon = jitter(rep(x,n), amount = a),
lat = jitter(rep(y,n), amount = a)),
digits = r)
}
df <- d(n=50,r=3,a=.3)
map <- get_googlemap(markers = df, path = df,, scale = 2)
ggmap(map)
ggmap(map, extent = "device") +
geom_point(aes(x = lon, y = lat), data = df, size = 3, colour = "black") +
geom_path(aes(x = lon, y = lat), data = df)

Resources