I am looking for a way to plot some table data in facets using ggplot2.
Below are sample data and some code that sort of produces a plot of what I am looking for.
However, using geom_text() makes it difficult to align the lines, and I have problems with text being cropped when I combine the facetted table-plot with other plots.
Thanks in advance
Libray and sample data text_data
library(tidyverse)
text_data <- data.frame(
rep = 1:5,
time = rnorm(5, 2, 0.2),
x = sample(100:130, 5),
y = sample(40:50, 5),
z = rnorm(5, -1, 0.3)
)
# Text data in wide format
text_data
#> rep time x y z
#> 1 1 2.192189 129 47 -1.308432
#> 2 2 2.161335 105 46 -1.186042
#> 3 3 2.340631 106 48 -1.270763
#> 4 4 2.136504 124 44 -1.332719
#> 5 5 2.148028 108 42 -1.249902
Text data in the table format I would like to plot, each rep should be a facet
text_data %>%
pivot_longer(cols = -rep) %>%
knitr::kable(digits = 1)
rep
name
value
1
time
2.2
1
x
129.0
1
y
47.0
1
z
-1.3
2
time
2.2
2
x
105.0
2
y
46.0
2
z
-1.2
3
time
2.3
3
x
106.0
3
y
48.0
3
z
-1.3
4
time
2.1
4
x
124.0
4
y
44.0
4
z
-1.3
5
time
2.1
5
x
108.0
5
y
42.0
5
z
-1.2
Some NOT OPTIMAL to code that produces a facetted plot to show kind of what I am looking for
ggplot(data = text_data) +
geom_text(
y = 0.9,
x = 0.5,
aes(label = paste("Time:", round(time, 1), "seconds"))
) +
geom_text(
y = 0.7,
x = 0.5,
aes(label = paste("X: ", x))
) +
geom_text(
y = 0.5,
x = 0.5,
aes(label = paste("y: ", y))
) +
geom_text(
y = 0.3,
x = 0.5,
aes(label = paste("z: ", round(z, 1)))
) +
facet_grid(rows = vars(rep)) +
theme_minimal()
Created on 2022-09-26 by the reprex package (v2.0.1)
With ggpp::geom_table:
library(ggplot2)
library(ggpp)
library(dplyr)
library(tidyr)
library(tibble)
text_data <- data.frame(
rep = 1:5,
time = rnorm(5, 2, 0.2),
x = sample(100:130, 5),
y = sample(40:50, 5),
z = rnorm(5, -1, 0.3)
)
dat <- text_data %>%
pivot_longer(cols = -rep)
tbs <- lapply(split(dat, dat$rep), "[", -1L)
df <- tibble(x = rep(-Inf, length(tbs)),
y = rep(Inf, length(tbs)),
rep = levels(as.factor(dat$rep)),
tbl = tbs)
ggplot(text_data) +
geom_point(aes(x = x, y = y)) +
geom_table(data = df, aes(x = x, y = y, label = tbl),
hjust = 0, vjust = 1) +
facet_wrap(~ rep)
Here's an approach using the gridExtra package.
library(tidyverse)
library(gridExtra)
text_data <- data.frame(
time = rnorm(5, 2, 0.2) ,
x = sample(100:130, 5),
y = sample(40:50, 5),
z = rnorm(5, -1, 0.3)
) %>%
mutate(
across(.f = ~ as.character(round(., 2))),
time = paste(time, "seconds")
)
grob_list <- map(1:nrow(text_data), ~ {
text_data[.x,] %>%
as_vector() %>%
as.matrix(ncol = 1) %>%
tableGrob(theme = ttheme_minimal())
})
grid.arrange(grobs = grob_list, ncol = 1)
Related
I am trying to plot multiple paths in a gganimate plot. I want the lines to fade out over the last N frames (e.g. N=5 in this example).
The data look like this:
set.seed(27)
df <- data.frame(Frame = rep(1:10, 3),
id = factor(rep(1:3, each = 10)),
x = runif(30),
y = runif(30))
head(df)
Frame id x y
1 1 1 0.97175023 0.14257923
2 2 1 0.08375751 0.47864658
3 3 1 0.87386992 0.05182206
4 4 1 0.32923136 0.25514379
5 5 1 0.22227551 0.14262912
6 6 1 0.40164822 0.48288482
I tried to make the plot using shadow_mark, but this doesn't appear to have the lines fade out over time.
df %>%
ggplot(aes(x = x, y = y, group = id, color = id)) +
geom_path() +
geom_point()+
scale_color_manual(values=c("red","blue","green")) +
transition_reveal(along = Frame) +
shadow_mark(size = 0.75) +
theme_void()
This just produces the below:
Is there a way to make these lines fade. Ideally, I'm just plotting a rolling path of N frames.
Is this something like what you're looking for? Adapted from the post mentioned in the comments. You don't need to use transition_reveal() if you use geom_segment().
library(gganimate)
library(dplyr)
library(tidyr)
set.seed(27)
n <- 10
df <- data.frame(Frame = rep(1:n, 3),
id = factor(rep(1:3, each = n)),
x = runif(3*n),
y = runif(3*n))
newdf <- df %>%
uncount(n, .id = "newframe") %>%
filter(Frame <= newframe) %>%
arrange(newframe, Frame) %>%
group_by(newframe, id) %>%
mutate(x_lag = lag(x),
y_lag = lag(y),
tail = last(Frame) - Frame,
# Make the points solid for 1 frame then alpha 0
point_alpha = if_else(tail == 0, 1, 0),
# Make the lines fade out over 3 frames
segment_alpha = pmax(0, (3-tail)/3)) %>%
ungroup()
ggplot(newdf,
aes(x = y, y = x, xend = y_lag, yend = x_lag, group = Frame, color = id)) +
geom_segment(aes(alpha = segment_alpha)) +
geom_point(aes(alpha = point_alpha)) +
scale_alpha(range = c(0,1)) +
guides(alpha = F) +
transition_manual(newframe) +
theme_void() +
scale_color_manual(values = c("red","blue","green"))
I'm projecting a variable for the next 120 months. I'm having trouble with the following when using ggplot:
In the intervals I'm creating I want to display the last value of each one. Ideally, I want some label that says -for example- for the interval 0.8: "80%:(here would go the last value of that interval)". If this is too difficult, then just the value would be perfect.
Here is a reproducible example
#libraries
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggfan)
library(gridExtra)
library(stringr)
library(scales)
#Create a dataframe
month <- 1:120
price_a <- 5000
demand <- 10
data <- data.frame(month, price_a, demand)
#Create 100 simulations to project price_a and demand for the future
simulations <- 100
intervalo <- seq_len(120)
set.seed(96)
lista_meses <- lapply(setNames(intervalo, paste0("data", intervalo)), function(i) {
cbind(
data[rep(i, simulations),],
growth_pricea = as.numeric(runif(simulations, min = -0.02, max = 0.05)),
growth_demand = as.numeric(runif(simulations, min = -0.03, max = 0.03)),
revenue = demand*price_a
)
})
#Calculate the growth of each variable and revenue
for (i in 2:length(lista_meses)){
lista_meses[[i]][["price_a"]] <- lista_meses[[i-1]][["price_a"]]*(1+lista_meses[[i]][["growth_pricea"]])
lista_meses[[i]][["demand"]] <- lista_meses[[i-1]][["demand"]]*(1+lista_meses[[i]][["growth_demand"]])
lista_meses[[i]][["revenue"]] <- lista_meses[[i]][["price_a"]]*lista_meses[[i]][["demand"]]
}
#Extract revenue columns from all dataframes in list
time <- 1:120 #10 years.
extract_column <- lapply(lista_meses, function(x) x["revenue"])
fandataq <- do.call("cbind", extract_column)
mandataq <- as.matrix.data.frame(fandataq)
pdataq <- data.frame(x=time, t(fandataq)) %>% gather(key=sim, value=y, -x)
#Graph: I WANT TO SHOW THE LAST VALUES OF EACH INTERVAL IN GEOM_INTERVAL
ggplot(pdataq, aes(x=x, y= y)) + geom_fan(intervals =c(80)/100, show.legend = FALSE) +
scale_fill_gradient(low="steelblue1", high="steelblue")+scale_y_continuous(labels = scales::comma)+
geom_interval(intervals = c(0.80,1), show.legend = FALSE) + scale_linetype_manual(values=c("dotted", "dotted")) +
theme_bw()
Does anybody knows how to achieve this? Thanks in advance!
This could be accomplished by pre-calculating the labels and feeding those in as text:
probs = c(0, 0.1, 0.9, 1) # 80% interval from 0.1 to 0.9
label_table <- tibble(x = max(pdataq$x),
probs,
y = quantile(pdataq[pdataq$x == max(pdataq$x), "y"],
probs = probs),
y_label = scales::comma(y))
# OR, using ggfan::calc_quantiles:
#label_table <- calc_quantiles(pdataq, intervals = c(0.8, 1), x_var = "x", y_var = "y") %>%
# ungroup() %>%
# filter(x == max(x)) %>%
# mutate(y_label = scales::comma(y))
## A tibble: 4 x 4
# x probs y y_label
# <int> <dbl> <dbl> <chr>
#1 120 0 124311. 124,311
#2 120 0.1 198339. 198,339
#3 120 0.9 434814. 434,814
#4 120 1 520464. 520,464
ggplot(pdataq, aes(x=x, y= y)) +
geom_fan(intervals =c(80)/100, show.legend = FALSE) +
scale_fill_gradient(low="steelblue1", high="steelblue")+
scale_y_continuous(labels = scales::comma)+
geom_interval(intervals = c(0.80,1), show.legend = FALSE) +
geom_text(data = label_table,
aes(label = y_label), hjust = -0.1, size = 3) +
coord_cartesian(clip = "off") +
scale_x_continuous(expand = expansion(add = c(5, 20))) +
scale_linetype_manual(values=c("dotted", "dotted")) +
theme_bw()
Say I have the following data frame:
# Set seed for RNG
set.seed(33550336)
# Create toy data frame
loc_x <- c(a = 1, b = 2, c = 3)
loc_y <- c(a = 3, b = 2, c = 1)
scaling <- c(temp = 100, sal = 10, chl = 1)
df <- expand.grid(loc_name = letters[1:3],
variables = c("temp", "sal", "chl"),
season = c("spring", "autumn")) %>%
mutate(loc_x = loc_x[loc_name],
loc_y = loc_y[loc_name],
value = runif(nrow(.)),
value = value * scaling[variables])
which looks like,
# > head(df)
# loc_name variables season loc_x loc_y value
# 1 a temp spring 1 3 86.364697
# 2 b temp spring 2 2 35.222573
# 3 c temp spring 3 1 52.574082
# 4 a sal spring 1 3 0.667227
# 5 b sal spring 2 2 3.751383
# 6 c sal spring 3 1 9.197086
I want to plot these data in a facet grid using variables and season to define panels, like this:
g <- ggplot(df) + geom_point(aes(x = loc_name, y = value), size = 5)
g <- g + facet_grid(variables ~ season)
g
As you can see, different variables have very different scales. So, I use scales = "free" to account for this.
g <- ggplot(df) + geom_point(aes(x = loc_name, y = value), size = 5)
g <- g + facet_grid(variables ~ season, scales = "free")
g
Mucho convenient. Now, say I want to do this, but plot the points by loc_x and loc_y and have value represented by colour instead of y position:
g <- ggplot(df) + geom_point(aes(x = loc_x, y = loc_y, colour = value),
size = 5)
g <- g + facet_grid(variables ~ season, scales = "free")
g <- g + scale_colour_gradient2(low = "#3366CC",
mid = "white",
high = "#FF3300",
midpoint = 50)
g
Notice that the colour scales are not free and, like the first figure, values for sal and chl cannot be read easily.
My question: is it possible to do an equivalent of scales = "free" but for colour, so that each row (in this case) has a separate colour bar? Or, do I have to plot each variable (i.e., row in the figure) and patch them together using something like cowplot?
Using the development version of dplyr:
library(dplyr)
library(purrr)
library(ggplot2)
library(cowplot)
df %>%
group_split(variables, season) %>%
map(
~ggplot(., aes(loc_x, loc_y, color = value)) +
geom_point(size = 5) +
scale_colour_gradient2(
low = "#3366CC",
mid = "white",
high = "#FF3300",
midpoint = median(.$value)
) +
facet_grid(~ variables + season, labeller = function(x) label_value(x, multi_line = FALSE))
) %>%
plot_grid(plotlist = ., align = 'hv', ncol = 2)
I've got a dataset similar to this:
x <- 100 - abs(rnorm(1e6, 0, 5))
y <- 50 + rnorm(1e6, 0, 3)
dist <- sqrt((x - 100)^2 + (y - 50)^2)
z <- exp(-(dist / 8)^2)
which can be visualised as follows:
data.frame(x, y, z) %>%
ggplot() + geom_point(aes(x, y, color = z))
What I would like to do is a stacked half-circle plot with averaged value of z in subsequent layers. I think it can be done with the combination of geom_col and coord_polar(), although the farthest I can get is
data.frame(x, y, z, dist) %>%
mutate(dist_fct = cut(dist, seq(0, max(dist), by = 5))) %>%
ggplot() + geom_bar(aes(x = 1, y = 1, fill = dist_fct), stat = 'identity', position = 'fill') +
coord_polar()
which is obviously far from the expectation (layers should be of equal size, plot should be clipped on the right half).
The problem is that I can't really use coord_polar() due to further use of annotate_custom(). So my question are:
can plot like this can be done without coord_polar()?
If not, how can it be done with coord_polar()?
The result should be similar to a graphic below, except from plotting layers constructed from points I would like to plot only layers as a whole with color defined as an average value of z inside a layer.
If you want simple radius bands, perhaps something like this would work as you pictured it in your question:
# your original sample data
x <- 100 - abs(rnorm(1e6, 0, 5))
y <- 50 + rnorm(1e6, 0, 3)
dist <- sqrt((x - 100)^2 + (y - 50)^2)
nbr_bands <- 6 # set nbr of bands to plot
# calculate width of bands
band_width <- max(dist)/(nbr_bands-1)
# dist div band_width yields an integer 0 to nbr bands
# as.factor makes it categorical, which is what you want for the plot
band = as.factor(dist %/% (band_width))
library(dplyr)
library(ggplot2)
data.frame(x, y, band) %>%
ggplot() + geom_point(aes(x, y, color = band)) + coord_fixed() +
theme_dark() # dark theme
Edit to elaborate:
As you first attempted, it would be nice to use the very handy cut() function to calculate the radius color categories.
One way to get categorical (discrete) colors, rather than continuous shading, for your plot color groups is to set your aes color= to a factor column.
To directly get a factor from cut() you may use option ordered_result=TRUE:
band <- cut(dist, nbr_bands, ordered_result=TRUE, labels=1:nbr_bands) # also use `labels=` to specify your own labels
data.frame(x, y, band) %>%
ggplot() + geom_point(aes(x, y, color = band)) + coord_fixed()
Or more simply you may use cut() without options and convert to a factor using as.factor():
band <- as.factor( cut(dist, nbr_bands, labels=FALSE) )
data.frame(x, y, band) %>%
ggplot() + geom_point(aes(x, y, color = band)) + coord_fixed()
Sounds like you may find the circle & arc plotting functions from the ggforce package useful:
# data
set.seed(1234)
df <- data.frame(x = 100 - abs(rnorm(1e6, 0, 5)),
y = 50 + rnorm(1e6, 0, 3)) %>%
mutate(dist = sqrt((x - 100)^2 + (y - 50)^2)) %>%
mutate(z = exp(-(dist / 8)^2))
# define cut-off values
cutoff.values <- seq(0, ceiling(max(df$dist)), by = 5)
df %>%
# calculate the mean z for each distance band
mutate(dist_fct = cut(dist, cutoff.values)) %>%
group_by(dist_fct) %>%
summarise(z = mean(z)) %>%
ungroup() %>%
# add the cutoff values to the dataframe for inner & outer radius
arrange(dist_fct) %>%
mutate(r0 = cutoff.values[-length(cutoff.values)],
r = cutoff.values[-1]) %>%
# add coordinates for circle centre
mutate(x = 100, y = 50) %>%
# plot
ggplot(aes(x0 = x, y0 = y,
r0 = r0, r = r,
fill = z)) +
geom_arc_bar(aes(start = 0, end = 2 * pi),
color = NA) + # hide outline
# force equal aspect ratio in order to get true circle
coord_equal(xlim = c(70, 100), expand = FALSE)
Plot generation took <1s on my machine. Yours may differ.
I'm not sure this satisfies everything, but it should be a start. To cut down on the time for plotting, I'm summarizing the data into a grid, which lets you use geom_raster. I don't entirely understand the breaks and everything you're using, so you might want to tweak some of how I divided the data for making the distinct bands. I tried out a couple ways with cut_interval and cut_width--this would be a good place to plug in different options, such as the number or width of bands.
Since you mentioned getting the average z for each band, I'm grouping by the gridded x and y and the cut dist, then using mean of z for setting bands. I threw in a step to make labels like in the example--you probably want to reverse them or adjust their positioning--but that comes from getting the number of each band's factor level.
library(tidyverse)
set.seed(555)
n <- 1e6
df <- data_frame(
x = 100 - abs(rnorm(n, 0, 5)),
y = 50 + rnorm(n, 0, 3),
dist = sqrt((x - 100)^2 + (y - 50)^2),
z = exp(-(dist / 8)^2)
) %>%
mutate(brk = cut(dist, seq(0, max(dist), by = 5), include.lowest = T))
summarized <- df %>%
filter(!is.na(brk)) %>%
mutate(x_grid = floor(x), y_grid = floor(y)) %>%
group_by(x_grid, y_grid, brk) %>%
summarise(avg_z = mean(z)) %>%
ungroup() %>%
# mutate(z_brk = cut_width(avg_z, width = 0.15)) %>%
mutate(z_brk = cut_interval(avg_z, n = 9)) %>%
mutate(brk_num = as.numeric(z_brk))
head(summarized)
#> # A tibble: 6 x 6
#> x_grid y_grid brk avg_z z_brk brk_num
#> <dbl> <dbl> <fct> <dbl> <fct> <dbl>
#> 1 75 46 (20,25] 0.0000697 [6.97e-05,0.11] 1
#> 2 75 47 (20,25] 0.000101 [6.97e-05,0.11] 1
#> 3 75 49 (20,25] 0.0000926 [6.97e-05,0.11] 1
#> 4 75 50 (20,25] 0.0000858 [6.97e-05,0.11] 1
#> 5 75 52 (20,25] 0.0000800 [6.97e-05,0.11] 1
#> 6 76 51 (20,25] 0.000209 [6.97e-05,0.11] 1
To make the labels, summarize that data to have a single row per band--I did this by taking the minimum of the gridded x, then using the average of y so they'll show up in the middle of the plot.
labels <- summarized %>%
group_by(brk_num) %>%
summarise(min_x = min(x_grid)) %>%
ungroup() %>%
mutate(y_grid = mean(summarized$y_grid))
head(labels)
#> # A tibble: 6 x 3
#> brk_num min_x y_grid
#> <dbl> <dbl> <dbl>
#> 1 1 75 49.7
#> 2 2 88 49.7
#> 3 3 90 49.7
#> 4 4 92 49.7
#> 5 5 93 49.7
#> 6 6 94 49.7
geom_raster is great for these situations where you have data in an evenly spaced grid that just needs uniform tiles at each position. At this point, the summarized data has 595 rows, instead of the original 1 million, so the time to plot shouldn't be an issue.
ggplot(summarized) +
geom_raster(aes(x = x_grid, y = y_grid, fill = z_brk)) +
geom_label(aes(x = min_x, y = y_grid, label = brk_num), data = labels, size = 3, hjust = 0.5) +
theme_void() +
theme(legend.position = "none", panel.background = element_rect(fill = "gray40")) +
coord_fixed() +
scale_fill_brewer(palette = "PuBu")
Created on 2018-11-04 by the reprex package (v0.2.1)
I have data consisting of x,y-coordinates and heading angle that I'd like to divide into 2D bins in order to calculate mean heading for each bin and plot with ggplot's geom_spoke.
Here's an example of what I want to do, with bins created manually:
# data
set.seed(1)
dat <- data.frame(x = runif(100,0,100), y = runif(100,0,100), angle = runif(100, 0, 2*pi))
# manual binning
bins <- rbind(
#bottom left
dat %>%
filter(x < 50 & y < 50) %>%
summarise(x = 25, y = 25, angle = mean(angle), n = n()),
#bottom right
dat %>%
filter(x > 50 & y < 50) %>%
summarise(x = 75, y = 25, angle = mean(angle), n = n()),
#top left
dat %>%
filter(x < 50 & y > 50) %>%
summarise(x = 25, y = 75, angle = mean(angle), n = n()),
#top right
dat %>%
filter(x > 50 & y > 50) %>%
summarise(x = 75, y = 75, angle = mean(angle), n = n())
)
# plot
ggplot(bins, aes(x, y)) +
geom_point() +
coord_equal() +
scale_x_continuous(limits = c(0,100)) +
scale_y_continuous(limits = c(0,100)) +
geom_spoke(aes(angle = angle, radius = n/2), arrow=arrow(length = unit(0.2,"cm")))
I know how to create 2D bins containing count data for each bin, e.g.:
# heatmap of x,y counts
p <- ggplot(dat, aes(x, y)) +
geom_bin2d(binwidth = c(50, 50)) +
coord_equal()
#ggplot_build(p)$data[[1]] #access binned data
But I can't seem to find a way to summarise other variables such as heading for each bin before passing to geom_spoke. Without first binning, my plot looks like this instead:
Here's one approach. You'll need to determine the number / range of bins in each dimension (x & y) once, & everything else should be covered by code:
# adjust range & number of bins here
x.range <- pretty(dat$x, n = 3)
y.range <- pretty(dat$y, n = 3)
> x.range
[1] 0 50 100
> y.range
[1] 0 50 100
Automatically assign each row to a bin based on which x & y intervals it falls into:
dat <- dat %>%
rowwise() %>%
mutate(x.bin = max(which(x > x.range)),
y.bin = max(which(y > y.range)),
bin = paste(x.bin, y.bin, sep = "_")) %>%
ungroup()
> head(dat)
# A tibble: 6 x 6
x y angle x.bin y.bin bin
<dbl> <dbl> <dbl> <int> <int> <chr>
1 26.55087 65.47239 1.680804 1 2 1_2
2 37.21239 35.31973 1.373789 1 1 1_1
3 57.28534 27.02601 3.247130 2 1 2_1
4 90.82078 99.26841 1.689866 2 2 2_2
5 20.16819 63.34933 1.138314 1 2 1_2
6 89.83897 21.32081 3.258310 2 1 2_1
Calculate the mean values for each bin:
dat <- dat %>%
group_by(bin) %>%
mutate(x.mean = mean(x),
y.mean = mean(y),
angle.mean = mean(angle),
n = n()) %>%
ungroup()
> head(dat)
# A tibble: 6 x 10
x y angle x.bin y.bin bin x.mean y.mean angle.mean n
<dbl> <dbl> <dbl> <int> <int> <chr> <dbl> <dbl> <dbl> <int>
1 26.55087 65.47239 1.680804 1 2 1_2 26.66662 68.56461 2.672454 29
2 37.21239 35.31973 1.373789 1 1 1_1 33.05887 28.86027 2.173177 23
3 57.28534 27.02601 3.247130 2 1 2_1 74.71214 24.99131 3.071629 23
4 90.82078 99.26841 1.689866 2 2 2_2 77.05622 77.91031 3.007859 25
5 20.16819 63.34933 1.138314 1 2 1_2 26.66662 68.56461 2.672454 29
6 89.83897 21.32081 3.258310 2 1 2_1 74.71214 24.99131 3.071629 23
Plot without hard-coding any bin number / bin width:
ggplot(dat,
aes(x, y, fill = bin)) +
geom_bin2d(binwidth = c(diff(x.range)[1],
diff(y.range)[1])) +
geom_point(aes(x = x.mean, y = y.mean)) +
geom_spoke(aes(x = x.mean, y = y.mean, angle = angle.mean, radius = n/2),
arrow=arrow(length = unit(0.2,"cm"))) +
coord_equal()
Other details such as the choice of fill palette, legend label, plot title, etc can be tweaked subsequently.
Just to expand on #Z.Lin's answer, here's a modification which lets one plot points at the centre of each bin rather than the mean x,y-coordinates. I'd be happy to hear if there are more eloquent solutions than using left_join.
# data
set.seed(1)
dat <- data.frame(x = runif(100,0,100),
y = runif(100,0,100),
angle = runif(100, 0, 2*pi))
# set parameters
n <- 2 #n bins
x.max #maximum x value
y.max #maximum y value
x.range <- seq(0, x.max, length.out = n+1)
y.range <- seq(0, y.max, length.out = n+1)
# bin data
dat <- dat %>%
rowwise() %>%
mutate(x.bin = max(which(x > x.range)),
y.bin = max(which(y > y.range)),
bin = paste(x.bin, y.bin, sep = "_")) %>%
ungroup()
# summarise values for each bin
dat <- dat %>%
group_by(bin) %>%
select(bin, x.bin, y.bin, x, y, angle) %>%
mutate(angle.mean = mean(angle),
n = n()) %>%
ungroup()
# add x,y-coords for centre points of each bin
x.bin.coords <- data.frame(x.bin = 1:n,
x.bin.coord = (x.range + (x.max / n / 2))[1:n])
y.bin.coords <- data.frame(y.bin = 1:n,
y.bin.coord = (y.range + (y.max / n / 2))[1:n])
dat <- left_join(dat, x.bin.coords, by = "x.bin")
dat <- left_join(dat, y.bin.coords, by = "y.bin")
# plot
ggplot(data = dat, aes(x, y)) +
geom_bin2d(binwidth = c(diff(x.range)[1], diff(y.range)[1])) +
geom_point(data = dat, aes(x = x.bin.coord, y = y.bin.coord)) +
geom_spoke(data = dat, aes(x = x.bin.coord, y = y.bin.coord, angle = angle.mean, radius = n/2), arrow=arrow(length = unit(0.2,"cm"))) +
coord_equal()