Related
Here is the codes and the present outplot
df <- data.frame(state = c('0','1'),
male = c(26287942,9134784),
female = c(16234000,4406645))
#output
> df
state male female
1 0 26287942 16234000
2 1 9134784 4406645
library(ggplot2)
library(tidyr)
df_long <- pivot_longer(df, cols = c("female","male"))
names(df_long) <- c('state','sex','observations')
ggplot(data = df_long) +
geom_col(aes(x = sex, y =observations, fill = state)) +
theme(legend.position = c(0.1,0.9),
legend.background = element_rect(fill='lightgrey') )
I want to adjust the plots like this. (I marked what I want to change.)
Simplify the scientific records in y-axis.
Count the ratio (the number of state 1)/(the number of state 0 + state 1) and plot like this.
It may be a little complicated, and I don't know which functions to use. If possible, can anyone tell me some related functions or examples?
You can set options(scipen = 99) to disable scientific notation on y-axis. We can create a separate dataset for label data.
library(tidyverse)
options(scipen = 99)
long_data <- df %>%
pivot_longer(cols = c(male, female),
names_to = "sex",
values_to = "observations")
label_data <- long_data %>%
group_by(sex) %>%
summarise(perc = observations[match(1, state)]/sum(observations),
total = sum(observations), .groups = "drop")
ggplot(long_data) +
geom_col(aes(x = sex, y = observations, fill = state)) +
geom_text(data = label_data,
aes(label = round(perc, 2), x = sex, y = total),
vjust = -0.5) +
theme(legend.position = c(0.1,0.9),
legend.background = element_rect(fill='lightgrey'))
By searching the Internet for about two days, I have finished the work!
sex <- c('M','F')
y0 <- c(26287942,16234000)
y1 <- c(9134784, 4406645)
y0 <- y0*10^{-7}
y1 <- y1*10^{-7}
ratio <- y1/(y0+y1)
ratio <- round(ratio,2)
m <- t(matrix(c(y0,y1),ncol=2))
colnames(m) <- c(as.character(sex))
df <- as.data.frame(m)
df <- cbind(c('0','1'),df)
colnames(df)[1] <- 'observations'
df
df_long <- pivot_longer(df, cols = as.character(sex))
names(df_long) <- c('state','sex','observations')
df_r <- as.data.frame(df_long)
df_r <- data.frame(df_r,ratio=rep(ratio,2))
ggplot(data = df_r) +
geom_col(aes(x =sex, y = observations, fill = state))+
theme(legend.position = c(0.1,0.9),
legend.background = element_rect(fill=NULL) )+
geom_line(aes(x=sex,y=ratio*10),group=1)+
geom_point(aes(x=sex,y=ratio*10))+
geom_text(aes(x=sex,y=ratio*10+0.35),label=rep(ratio,2))+
scale_y_continuous(name =expression(paste('observations(','\u00D7', 10^7,')')),
sec.axis = sec_axis(~./10,name='ratio'))
The output:
Basically, I have a heatmap that contains some points. What Im trying to do is automatically rescale the size of the points in a sensible way for different sized heatmaps. For example, if I have a heatmap that looks like so:
library(reshape)
library(ggplot2)
library(ggnewscale)
# Create matrix
set.seed(1701)
a <- sample(1:10,100, replace=TRUE)
s <- matrix(a, nrow = 5, ncol=5)
s[upper.tri(s)] = t(s)[upper.tri(s)]
rownames(s) <- colnames(s) <- paste0("x", 1:5)
diag(s) <- 0
sDf <- melt(s)
# create diagonal values
diagDf <- data.frame(
var1 = c(paste0("x", 1:5)),
var2 = c(paste0("x", 1:5)),
val = c(2,5,3,1,5)
)
# make plot
ggplot(sDf, aes(X1,X2)) +
geom_tile(aes(fill = value)) +
scale_fill_gradientn(colors = rev(colorspace::sequential_hcl(palette = "Blues 3", n = 100))) +
new_scale_fill() +
geom_point(data = diagDf, aes(var1, var2, col = val), size = 20) +
theme(aspect.ratio = 1)
So in the image above, the diagonal contains geom_points and their size is manually set to size = 20.... This works for this example, but the issue is:
If the heatmap dimensions were changed to say 20x20, then having the size hardcoded to equal 20 won't work due to overlapping & the points being too big etc.
So what Im trying to do is come up with a method that will automatically resize the points to effectively fill square they are contained in without overlapping, being too big or too small.
Any suggestions as to how I could do this?
I would do something like this:
library(reshape)
library(ggplot2)
library(ggnewscale)
n <- 5
# Create matrix
set.seed(1701)
a <- sample(1:10,100, replace=TRUE)
s <- matrix(a, nrow = n, ncol=n)
s[upper.tri(s)] = t(s)[upper.tri(s)]
rownames(s) <- colnames(s) <- paste0("x", 1:n)
diag(s) <- 0
sDf <- melt(s)
# create diagonal values
diagDf <- data.frame(
var1 = c(paste0("x", 1:n)),
var2 = c(paste0("x", 1:n)),
val = sample(1:5,n,replace = T)
)
# make plot
ggplot(sDf, aes(X1,X2)) +
geom_tile(aes(fill = value)) +
scale_fill_gradientn(colors = rev(colorspace::sequential_hcl(palette = "Blues 3", n = 100))) +
new_scale_fill() +
geom_point(data = diagDf, aes(var1, var2, col = val), size = 1/sqrt(nrow(sDf))*80) +
theme(aspect.ratio = 1)
here the size of the points depends on the dimension of the matrix.
an example of the output with a 3x3, 5x5, and 10x10 matrix
You can modify diagDf to contain the co-ordinates of the circles you want to plot using some basic trigonometry, then plot them as filled polygons. This ensures they will always scale exactly with your plot.
library(dplyr)
diagDf <- diagDf %>%
mutate(var1 = as.numeric(as.factor(var1)),
var2 = as.numeric(as.factor(var2))) %>%
split.data.frame(diagDf$var1) %>%
lapply(function(x) {
deg <- seq(0, 2 * pi, length = 100)
var1 <- cos(deg)/2.2
var2 <- sin(deg)/2.2
val <- rep(x$val, 100)
data.frame(var1 = var1 + x$var1, var2 = var2 + x$var2, val = val)}) %>%
{do.call(rbind, .)}
Now with slightly modified plot code, we get:
ggplot(sDf, aes(X1,X2)) +
geom_tile(aes(fill = value)) +
scale_fill_gradientn(colors=rev(colorspace::sequential_hcl(palette = "Blues 3", n=100))) +
new_scale_fill() +
geom_polygon(data = diagDf, aes(var1, var2, fill = val, group = val)) +
theme(aspect.ratio = 1)
Created on 2021-09-27 by the reprex package (v2.0.0)
I want to dissolve a polygon so I get only a lines for the outline of the whole region instead of it being broken up by county.
install.packages (c("tidyverse","mapdata","maps","stringr","viridis"))
library(tidyverse)
library(mapdata)
library(maps)
library(stringr)
library(viridis)
california <- map_data("state", region="california")
california1 <- ggplot() +
geom_polygon(data = california,
aes(x = long, y = lat, group = group),
color="black", fill="NA") +
coord_quickmap()
#california county lines
uscounties <-map_data("county")
ca_county <- uscounties %>% filter(region == "california")
central<- ca_county %>%
filter(subregion %in% c("alpline", "kings", "tulare", "fresno", "inyo", "kern", "madera"))
ca2 <- california1 +
theme_void() +
geom_polygon(data = central,
aes(x = long, y = lat, group = group),
fill = "white", color = "black") +
geom_polygon(color = "black", fill = NA) +
annotate("text", x = -119, y = 46.5, label="Central", colour="black")
ca2
Thanks in advance for the help!
I've answered a similar question before. Reworked it slightly for your use case, with explanations in annotated code below:
library(tidyverse)
library(maps)
# get map (as map object)
county_map <- map("county", regions = "california",
fill = T, plot = FALSE)
# convert to SpatialPolygonsDataFrame object (using maptools & sp packages)
county_map_match <- data.frame(name = county_map$names) %>%
separate(name, c("region", "subregion"), sep = ",", remove = FALSE) %>%
mutate(central = subregion %in% c("alpline", "kings", "tulare",
"fresno", "inyo", "kern", "madera")) %>%
column_to_rownames("name")
county_map <- maptools::map2SpatialPolygons(county_map, ID = county_map$names)
county_map <- sp::SpatialPolygonsDataFrame(county_map, county_map_match)
rm(county_map_match)
# remove any invalidity (using rgeos package) before dissolving
rgeos::gIsValid(county_map) # check
county_map <- rgeos::gBuffer(county_map, byid = TRUE, width = 0)
rgeos::gIsValid(county_map) # check again (invalidities removed)
# dissolve by whether each polygon is part of central area
county_map <- maptools::unionSpatialPolygons(county_map, IDs = county_map$central)
county_map <- fortify(county_map)
county_map <- county_map %>% filter(group == "TRUE.1")
# plot all the central counties as one polygon
ggplot() +
geom_polygon(data = county_map,
aes(x = long, y = lat, group = group),
fill = "white", colour = "black") +
coord_map()
I have a dataset of people arriving in a location, how long they stayed, and their home locations. I want to create an animated chart which 'flies' them to their destination, and returns them to their original point once their trip is over. But I'm not sure if this is possible with gganimate or not. At the moment I only seem to be able to do a "start" and "end" frame, though it's a little hard to tell whether it just doesn't have enough frames to do the intended action.
Here's something like what I have so far:
library(dplyr)
library(ggplot2)
library(ggmap)
library(gganimate)
#Coordinates
europecoords <- c(left = -23, bottom = 36, right = 27.87, top = 70.7)
londonareacoords <- c(left = -.7, bottom = 51, right = 0.2, top = 52)
londonpointcoords <- as.data.frame(list(lon = -.14, lat = 51.49))
#Get the map we'll use as the background
europe <- get_stamenmap(europecoords, zoom = 4, maptype = "toner-lite")
#Sample dataset configuration
numberofpoints <- 10
balance <- 0.1
#Set up an example dataset
ids <- seq(1:numberofpoints)
arrivalday <- sample(x = 30, size = numberofpoints, replace = TRUE)
staylength <- sample(x = 7, size = numberofpoints, replace = TRUE)
startlocationlondonarealon <- sample(x = seq(londonareacoords['left'] * 10, londonareacoords['right'] * 10), size = numberofpoints * balance, replace = TRUE) / 10
startlocationlondonarealat <- sample(x = seq(londonareacoords['bottom'] * 10, londonareacoords['top'] * 10), size = numberofpoints * balance, replace = TRUE) / 10
startlocationeuropelon <- sample(x = seq(europecoords['left'] * 10, europecoords['right'] * 10), size = (numberofpoints * (1 - balance)), replace = TRUE) / 10
startlocationeuropelat <- sample(x = seq(europecoords['bottom'] * 10, europecoords['top'] * 10), size = (numberofpoints * (1 - balance)), replace = TRUE) / 10
startlocationlon <- c(startlocationlondonarealon, startlocationeuropelon)
startlocationlat <- c(startlocationlondonarealat, startlocationeuropelat)
points <- as.data.frame(cbind(ID = ids, arrivalday, staylength, departureday = arrivalday + staylength, startlocationlon, startlocationlat))
#Map the sample dataset to check it looks reasonable
ggmap(europe) +
geom_point(data = points, aes(x = startlocationlon, y = startlocationlat), col = "blue", size = 2) +
geom_point(data = londonpointcoords, aes(x = lon, y = lat), col = "red")
#Separate the events out to rearrange, then glue them back together
event1 <- points %>%
mutate(Event = "Day Before Arrival", Date = arrivalday - 1) %>%
mutate(Lon = startlocationlon,
Lat = startlocationlat) %>%
select(ID, Event, Date, Lon, Lat)
event2 <- points %>%
mutate(Event = "Arrival Date", Date = arrivalday) %>%
mutate(Lon = londonpointcoords$lon[1],
Lat = londonpointcoords$lat[1]) %>%
select(ID, Event, Date, Lon, Lat)
event3 <- points %>%
mutate(Event = "Departure Date", Date = departureday) %>%
mutate(Lon = londonpointcoords$lon[1],
Lat = londonpointcoords$lat[1]) %>%
select(ID, Event, Date, Lon, Lat)
event4 <- points %>%
mutate(Event = "Day After Departure", Date = departureday + 1) %>%
mutate(Lon = startlocationlon,
Lat = startlocationlat) %>%
select(ID, Event, Date, Lon, Lat)
events <- rbind(event1, event2, event3, event4) %>%
mutate(Event = factor(Event, ordered = TRUE, levels = c("Day Before Arrival", "Arrival Date", "Departure Date", "Day After Departure"))) %>%
mutate(ID = factor(ID))
#Make an animation
ggmap(europe) +
geom_point(data = events, aes(x = Lon, y = Lat, group = ID, col = ID), size = 2) +
#geom_point(data = londonpointcoords, aes(x = lon, y = lat), col = "red") +
transition_manual(Date) +
labs(title = "Date: {frame}") +
NULL
But as I said, the points don't seem to be 'flying' as much as just appearing and disappearing. Should I be using a different data format? Transition type? Number of frames? (I'm having trouble finding documentation on any of the above, which is part of why I'm stuck...)
Final result
Code
library(ggplot2)
library(ggmap)
library(gganimate)
ggm <- ggmap(europe) +
geom_point(data = events,
aes(x = Lon, y = Lat,
colour = ID, group = ID, shape = Event),
size = 3, alpha = 0.8) +
transition_time(Date) +
labs(title = paste("Day", "{round(frame_time,0)}")) +
shadow_wake(wake_length = 0.1)
animate(ggm, fps = 24, duration = 16)
========================================================
Step-by-step
You have lots of moving parts there. Let's break it down a bit:
0. Load libraries
library(ggplot2)
library(ggmap)
library(gganimate)
library(ggrepel) # will be useful for data exploration in step 1
1. Data exploration
ggplot(data = events, aes(x = ID, y = Date, colour = Event)) +
geom_point()
We see, that the arrival and departure events are each quite close together for each plane. Also, there is always a gap of a couple of days inbetween. That seems reasonable.
Let's check the Date variable:
> length(unique(events$Date))
[1] 24
> min(events$Date)
[1] 2
> max(events$Date)
[1] 33
Okay, this means two things:
Our data points are unevenly spaced.
We don't have data for all Dates.
Both things will make the animation part quite challenging.
ggplot(data = unique(events[, 4:5]), aes(x = Lon, y = Lat)) +
geom_point()
Furthermore, we only have 11 unique locations (== airports). This will probaly lead to overlapping data. Let's plot it by day:
ggplot(data = unique(events[, 3:5]), aes(x = Lon, y = Lat, label = Date)) +
geom_point() +
geom_text_repel()
Yup, this will be fun... Lots of things happening at that airport in the middle.
2. Basic animation
gga <- ggplot(data = events, aes(x = Lon, y = Lat)) +
geom_point() +
transition_time(Date)
animate(gga)
We used transition_time() and not transition_states(), because the former is used for linear time variables (e.g., second, day, year) and automatic interpolation, while the latter gives more manual control to the user.
3. Let's add colour
gga <- ggplot(data = events, aes(x = Lon, y = Lat, colour = ID)) +
geom_point() +
transition_time(Date)
animate(gga)
It's starting to look like something!
4. Add title, transparency, increase size
gga <- ggplot(data = events, aes(x = Lon, y = Lat, col = ID)) +
geom_point(size = 3, alpha = 0.5) +
transition_time(Date) +
labs(title = paste("Day", "{round(frame_time, 0)}"))
Note the rounded {round(frame_time, 0)}. Try using {frame_time} and see what happens!
5. Add some pizzaz
gga <- ggplot(data = events, aes(x = Lon, y = Lat, col = ID, group = ID,
shape = Event)) +
geom_point(size = 3, alpha = 0.5) +
transition_time(Date) +
labs(title = paste("Day", "{round(frame_time, 0)}")) +
shadow_wake(wake_length = 0.05)
animate(gga)
Looks good, let's finish it up!
6. Add the map, make animation slower, tweak some details
ggm <- ggmap(europe) +
geom_point(data = events,
aes(x = Lon, y = Lat,
colour = ID, group = ID, shape = Event),
size = 3, alpha = 0.8) +
transition_time(Date) +
labs(title = paste("Day", "{round(frame_time,0)}")) +
shadow_wake(wake_length = 0.1)
animate(ggm, fps = 24, duration = 16)
Not too shabby, eh? As a side note: animate(ggm, nframes = 384) would have had the same effect on the animation as fps = 24 with duration = 16.
If you have any question please do not hesitate to shoot me a comment.
I will try my best to help or clarify things.
I saw yesterday this beautiful map of McDonalds restaurants in USA. I wanted to replicate it for France (I found some data that can be downloaded here).
I have no problem plotting the dots:
library(readxl)
library(ggplot2)
library(raster)
#open data
mac_do_FR <- read_excel("./mcdo_france.xlsx")
mac_do_FR_df <- as.data.frame(mac_do_FR)
#get a map of France
mapaFR <- getData("GADM", country="France", level=0)
#plot dots on the map
ggplot() +
geom_polygon(data = mapaFR, aes(x = long, y = lat, group = group),
fill = "transparent", size = 0.1, color="black") +
geom_point(data = mac_do_FR_df, aes(x = lon, y = lat),
colour = "orange", size = 1)
I tried several methods (Thiessen polygons, heat maps, buffers), but the results I get are very poor. I can't figure out how the shaded polygons were plotted on the American map. Any pointers?
Here's my result, but it did take some manual data wrangling.
Step 1: Get geospatial data.
library(sp)
# generate a map of France, along with a fortified dataframe version for ease of
# referencing lat / long ranges
mapaFR <- raster::getData("GADM", country="France", level=0)
map.FR <- fortify(mapaFR)
# generate a spatial point version of the same map, defining your own grid size
# (a smaller size yields a higher resolution heatmap in the final product, but will
# take longer to calculate)
grid.size = 0.01
points.FR <- expand.grid(
x = seq(min(map.FR$long), max(map.FR$long), by = grid.size),
y = seq(min(map.FR$lat), max(map.FR$lat), by = grid.size)
)
points.FR <- SpatialPoints(coords = points.FR, proj4string = mapaFR#proj4string)
Step 2: Generate a voronoi diagram based on store locations, & obtain the corresponding polygons as a SpatialPolygonsDataFrame object.
library(deldir)
library(dplyr)
voronoi.tiles <- deldir(mac_do_FR_df$lon, mac_do_FR_df$lat,
rw = c(min(map.FR$long), max(map.FR$long),
min(map.FR$lat), max(map.FR$lat)))
voronoi.tiles <- tile.list(voronoi.tiles)
voronoi.center <- lapply(voronoi.tiles,
function(l) data.frame(x.center = l$pt[1],
y.center = l$pt[2],
ptNum = l$ptNum)) %>%
data.table::rbindlist()
voronoi.polygons <- lapply(voronoi.tiles,
function(l) Polygon(coords = matrix(c(l$x, l$y),
ncol = 2),
hole = FALSE) %>%
list() %>%
Polygons(ID = l$ptNum)) %>%
SpatialPolygons(proj4string = mapaFR#proj4string) %>%
SpatialPolygonsDataFrame(data = voronoi.center,
match.ID = "ptNum")
rm(voronoi.tiles, voronoi.center)
Step 3. Check which voronoi polygon each point on the map overlaps with, & calculate its distance to the corresponding nearest store.
which.voronoi <- over(points.FR, voronoi.polygons)
points.FR <- cbind(as.data.frame(points.FR), which.voronoi)
rm(which.voronoi)
points.FR <- points.FR %>%
rowwise() %>%
mutate(dist = geosphere::distm(x = c(x, y), y = c(x.center, y.center))) %>%
ungroup() %>%
mutate(dist = ifelse(is.na(dist), max(dist, na.rm = TRUE), dist)) %>%
mutate(dist = dist / 1000) # convert from m to km for easier reading
Step 4. Plot, adjusting the fill gradient parameters as needed. I felt the result of a square root transformation looks quite good for emphasizing distances close to a store, while a log transformation is rather too exaggerated, but your mileage may vary.
ggplot() +
geom_raster(data = points.FR %>%
mutate(dist = pmin(dist, 100)),
aes(x = x, y = y, fill = dist)) +
# optional. shows outline of France for reference
geom_polygon(data = map.FR,
aes(x = long, y = lat, group = group),
fill = NA, colour = "white") +
# define colour range, mid point, & transformation (if desired) for fill
scale_fill_gradient2(low = "yellow", mid = "red", high = "black",
midpoint = 4, trans = "sqrt") +
labs(x = "longitude",
y = "latitude",
fill = "Distance in km") +
coord_quickmap()