geom_text in the right of a heatmap in ggplot - r

I created a heatmap with this dataframe:
datos<- data.frame(
stringsAsFactors = FALSE,
country_name = c("Argentina","Bolivia",
"Brazil","Chile","Colombia","Paraguay","Peru","Uruguay",
"Argentina","Bolivia","Brazil","Chile","Colombia",
"Paraguay","Peru","Uruguay","Argentina","Bolivia",
"Brazil","Chile"),
year = c("1961","1961","1961","1961",
"1961","1961","1961","1961","1962","1962","1962",
"1962","1962","1962","1962","1962","1963","1963",
"1963","1963"),
crec = c(1,1,1,1,1,1,1,1,0,1,1,
1,1,1,1,0,0,1,1,1)
)
colors<-c("red","blue")
chart<- ggplot(datos,aes(x=year,y=country_name,fill=factor(crec))) +
geom_tile(color=gris,size=0.01)+
scale_fill_manual(values=colors)+
scale_y_discrete(limits = crisis$country_name)+
guides(fill=FALSE)
I would like to add a geom_text at the right of the last year of each country, so I can show the counts how many red squares each country has. I think geom_text would be good, but i am not sure about how to create one for each country.
text<- data.frame(
stringsAsFactors = FALSE,
country_name = c("Colombia","Bolivia","Chile",
"Peru","Brazil","Paraguay","Uruguay","Argentina"),
label = c("0 years","0 years","0 years",
"0 years","0 years","0 years","1 years","2 years")
)

library(dplyr)
# get the maximum year per country
text = text %>%
left_join(
datos %>% group_by(country_name) %>%
summarize(year = max(year))
)
chart +
geom_text(
data = text,
aes(label = label, x = year, y = country_name),
# left justified, but nudged to the right
hjust = 0, nudge_x = 0.55,
inherit.aes = FALSE
) +
# give a little extra room for the text
scale_x_discrete(expand = expansion(mult = 0, add = c(0, 1)))

Related

ggplot mirrored geom_bars customized colour

I am plotting max_temperature (mean_tmax) against rainfall (mean_rain) in a mirrored barplot: max temp displayed upwards, rain values downwards on the negative scale. These two are stored in the "name" variable.
To highlight the highest values out of the 32 years plotted, I created two vectors colVecTmax, colVecRain. They return a color vector of length 32 each, with the index of max values marked differently.
But when adding these two vectors to fill within geom_bar(), it turns out that ggplot stops counting the top after 16 bars, and moves down to the negative scale to continue. So it does not count by the name (mean_tmax, or mean_rain) variable.
This messes up the plot, and I am not sure how to get ggplot count through on the top bars for max_temperature first, coloring by colVecTmax, and then move down to do the same for rain on the negative scale with colVecRain.
Can anyone give a hint on how to solve this?
colVecTmax <- rep("orange",32)
colVecTmax[which.max(as.numeric(unlist(df.long[df.long$place=="sheffield" & df.long$name == "mean_tmax",4])))] <- "blue"
colVecRain <- rep("grey",32)
colVecRain[which.max(as.numeric(unlist(df.long[df.long$place=="sheffield" & df.long$name == "mean_rain",4])))] <- "blue"
ggplot(df.long[df.long$name %in% c('mean_rain', 'mean_tmax'), ] %>% filter(place== "sheffield")%>%
group_by(name) %>% mutate(value = case_when(
name == 'mean_rain' ~ value/10 * -1,
TRUE ~ value)) %>% mutate(place==str_to_sentence(placenames)) %>%
mutate(name = recode(name,'mean_rain' = "rainfall" , "mean_tmax" = "max temp"))
, aes(x = yyyy, y = value, fill=name))+
geom_bar(stat="identity", position="identity", fill=c(colVecTmax,colVecRain))+
labs(x="Year", y=expression("Rain in cm, temperature in ("*~degree *C*")"))+
geom_smooth(colour="black", lwd=0.5,se=F)+
scale_y_continuous(breaks = seq(-30, 30 , 5))+
scale_x_continuous(breaks = seq(1990, 2025, 5))+
guides(fill= guide_legend(title=NULL))+
scale_fill_discrete(labels=c("Max temperature", "Rainfall"))+
guides(fill=guide_legend(reverse=T), res=96)
Using ggplot2 there are much easier and less error prone ways to assign colors. Instead of creating color vectors which you pass to the color or fill argument you could simply map on aesthetics (which you basically already have done) and assign your desired colors using a manual scale, e.g. scale_fill_manual. The same approach works fine when you want to highlight some values. To this end you could create additional categories, e.g. in the code below I add "_max" to the name for the observations with the max temperature or rainfall and assign your desired "blue" color to these categories. As doing so will add additional categories I use the breaks argument of scale_fill_manual so that these max categories will not show up in the legend.
Using some fake random example data:
# Create example data
set.seed(123)
df.long <- data.frame(
name = rep(c("mean_rain", "mean_tmax"), each = 30),
place = "sheffield",
yyyy = rep(1991:2020, 2),
value = c(runif(30, 40, 100), runif(30, 12, 16))
)
library(ggplot2)
library(dplyr)
df_plot <- df.long %>%
filter(name %in% c("mean_rain", "mean_tmax")) |>
filter(place == "sheffield") %>%
mutate(value = case_when(
name == "mean_rain" ~ -value / 10,
TRUE ~ value
)) |>
# Maximum values
group_by(name) |>
mutate(name = ifelse(abs(value) >= max(abs(value)), paste(name, "max", sep = "_"), name))
ggplot(df_plot, aes(x = yyyy, y = value, fill = name)) +
geom_col(position = "identity") +
geom_smooth(colour = "black", lwd = 0.5, se = F) +
scale_y_continuous(breaks = seq(-30, 30, 5), labels = abs) +
scale_x_continuous(breaks = seq(1990, 2025, 5)) +
scale_fill_manual(
values = c(
mean_rain = "orange", mean_tmax = "grey",
mean_rain_max = "blue", mean_tmax_max = "blue"
),
labels = c(mean_tmax = "Max temperature", mean_rain = "Rainfall"),
breaks = c("mean_rain", "mean_tmax")
) +
labs(x = "Year", y = expression("Rain in cm, temperature in (" * ~ degree * C * ")"), fill = NULL) +
guides(fill = guide_legend(reverse = TRUE))

Using different data for positioning and display of labels in plots

TL;DR: with plot labels using geom_label etc., is it possible to use different data for the calculation of positions of using position_stack or similar functions, than for the display of the label itself? Or, less generally, is it possible to subset the label data after positions have been calculated?
I have some time series data for many different subjects. Observations took place at multiple time points, which are the same for each subject. I would like to plot this data as a stacked area plot, where the height of a subject's curve at each time point corresponds to the observed value for that subject at that time point. Crucially, I also need to add labels to identify each subject.
However, the trivial solution of adding one label at each observation makes the plot unreadable, so I would like to limit the displayed labels to the "most important" subjects (the ones that have the highest peak), as well as only display a label at the respective peak. This subsetting of the labels themselves is not a problem either, but I cannot figure out how to then position the (subset of) labels correctly so they match with the stacked area chart.
Here is some example code, which should work out of the box with tidyverse installed, to illustrate my issue. First, we generate some data which has the same structure as mine:
library(tidyverse)
set.seed(0)
# Generate some data
num_subjects = 50
num_timepoints = 10
labels = paste(sample(words, num_subjects), sample(fruit, num_subjects), sep = "_")
col_names = c("name", paste0("timepoint_", c(1:num_timepoints)))
df = bind_rows(map(labels,
~c(., cumsum(rnorm(num_timepoints))) %>%
set_names(col_names))) %>%
pivot_longer(starts_with("timepoint_"), names_to = "timepoint", names_prefix = "timepoint_") %>%
mutate(across(all_of(c("timepoint", "value")), as.numeric)) %>%
mutate(value = if_else(value < 0, 0, value)) %>%
group_by(name) %>% mutate(peak = max(value)) %>% ungroup()
Now, we can trivially make a simple stacked area plot without labels:
# Plot (without labels)
ggplot(df,
mapping = aes(x = factor(timepoint), y = value, group = name, fill = factor(peak))) +
geom_area(show.legend = FALSE, position = "stack", colour = "gray25") +
scale_fill_viridis_d()
Plot without labels (it appears that I currently cannot embed images, which is very unfortunate as they are extremely illustrative here...)
It is also not too hard to add non-specific labels to this data. They can easily be made to appear at the correct position — so the center of the label is at the middle of the area for each time point and subject — using position_stack:
# Plot (all labels, positions are correct but the plot is basically unreadable)
ggplot(df,
mapping = aes(x = factor(timepoint), y = value, group = name, fill = factor(peak))) +
geom_area(show.legend = FALSE, position = "stack", colour = "gray25") +
geom_label(mapping = aes(label = name), show.legend = FALSE, position = position_stack(vjust = 0.5)) +
scale_fill_viridis_d()
Plot with a label at each observation
However, as noted before, the labels almost entirely obscure the plot itself. So my approach would be to only show labels at the peaks, and only for the 10 subjects with the highest peaks:
# Plot (only show labels at the peak for the 10 highest peaks, readable but positions are wrong)
max_labels = 10 # how many labels to show
df_labels = df %>%
group_by(name) %>% slice_max(value, n = 1) %>% ungroup() %>%
slice_max(value, n = max_labels)
ggplot(df,
mapping = aes(x = factor(timepoint), y = value, group = name, fill = factor(peak))) +
geom_area(show.legend = FALSE, position = "stack", colour = "gray25") +
geom_label(data = df_labels, mapping = aes(label = name), show.legend = FALSE, position = position_stack(vjust = 0.5)) +
scale_fill_viridis_d()
Plot with only a subset of labels
This code also works fine, but it is apparent that the labels no longer show up at the correct positions, but are instead too low on the plot, especially for the subjects which would otherwise be higher up. (The only subject where the position is correct is work_eggplant.) This makes perfect sense, as the data used for calculation of position_stack are now only a subset of the original data, so the observations which would receive no labels are not considered when stacking. This can be illustrated by zeroing out all the observations which would not receive a label:
df_zeroed = anti_join(df %>% mutate(value = 0),
df_labels,
by = c("name", "timepoint")) %>% bind_rows(df_labels)
ggplot(df_zeroed,
mapping = aes(x = factor(timepoint), y = value, group = name, fill = factor(peak))) +
geom_area(show.legend = FALSE, position = "stack", colour = "gray25") +
geom_label(data = df_labels, mapping = aes(label = name), show.legend = FALSE, position = position_stack(vjust = 0.5)) +
scale_fill_viridis_d()
Plot with unlabeled observations zeroed out
So now my question is, how can this problem be solved? Is there a way to use the original data for the positioning, but the subset data for the actual display of the labels?
Maybe this is what you are looking for. To achieve the desired result you could
use the whole dataset for plotting the labels to get the right positions,
use an empty string "" for the non-desired labels ,
set the fill and color of non-desired labels to "transparent"
# Plot (only show labels at the peak for the 10 highest peaks, readable but positions are wrong)
max_labels = 10 # how many labels to show
df_labels = df %>%
group_by(name) %>%
slice_max(value, n = 1) %>%
ungroup() %>%
slice_max(value, n = max_labels) %>%
mutate(label = name)
df1 <- df %>%
left_join(df_labels) %>%
replace_na(list(label = ""))
#> Joining, by = c("name", "timepoint", "value", "peak")
ggplot(df1,
mapping = aes(x = factor(timepoint), y = value, group = name, fill = as.character(peak))) +
geom_area(show.legend = FALSE, position = "stack", colour = "gray25") +
geom_label(mapping = aes(
label = label,
fill = ifelse(label != "", as.character(peak), NA_character_),
color = ifelse(label != "", "black", NA_character_)),
show.legend = FALSE, position = position_stack(vjust = 0.5)) +
scale_fill_viridis_d(na.value = "transparent") +
scale_color_manual(values = c("black" = "black"), na.value = "transparent")
EDIT If you want the fill colors to correspond to the value of peak then
a simple solution would be to map peak on fill instead of factor(peak) and make use of fill = ifelse(label != "", peak, NA_real_) in geom_label. However, in that case you have to switch to a continuous fill scale.
as I guess that you had a good reason to make use of discrete scale an other option would be to make peak an orderd factor. This approach however is not that simple. To make this work I first reorder factor(peak) according to peak, add an additional NA level and make us of an auxilliary variable peak1 to fill the labels. However, as we have two different variables to be mapped on fill I would suggest to make use of a second fill scale using ggnewscale::new_scale_fill to achieve the desired result:
library(tidyverse)
set.seed(0)
#cumsum(rnorm(num_timepoints)) * 3
# Generate some data
num_subjects = 50
num_timepoints = 10
labels = paste(sample(words, num_subjects), sample(fruit, num_subjects), sep = "_")
col_names = c("name", paste0("timepoint_", c(1:num_timepoints)))
df = bind_rows(map(labels,
~c(., cumsum(rnorm(num_timepoints)) * 3) %>%
set_names(col_names))) %>%
pivot_longer(starts_with("timepoint_"), names_to = "timepoint", names_prefix = "timepoint_") %>%
mutate(across(all_of(c("timepoint", "value")), as.numeric)) %>%
mutate(value = if_else(value < 0, 0, value)) %>%
group_by(name) %>% mutate(peak = max(value)) %>% ungroup()
# Plot (only show labels at the peak for the 10 highest peaks, readable but positions are wrong)
max_labels = 10 # how many labels to show
df_labels = df %>%
group_by(name) %>%
slice_max(value, n = 1) %>%
ungroup() %>%
slice_max(value, n = max_labels) %>%
mutate(label = name)
df1 <- df %>%
left_join(df_labels) %>%
replace_na(list(label = ""))
#> Joining, by = c("name", "timepoint", "value", "peak")
df2 <- df1 %>%
mutate(
# Make ordered factor
peak = fct_reorder(factor(peak), peak),
# Add NA level to peak
peak = fct_expand(peak, NA),
# Auxilliary variable to set the fill to NA for non-desired labels
peak1 = if_else(label != "", peak, factor(NA)))
ggplot(df2, mapping = aes(x = factor(timepoint), y = value, group = name, fill = peak)) +
geom_area(show.legend = TRUE, position = "stack", colour = "gray25") +
scale_fill_viridis_d(na.value = "transparent") +
# Add a second fill scale
ggnewscale::new_scale_fill() +
geom_label(mapping = aes(
label = label,
fill = peak1,
color = ifelse(label != "", "black", NA_character_)),
show.legend = FALSE, position = position_stack(vjust = 0.5)) +
scale_fill_viridis_d(na.value = "transparent") +
scale_color_manual(values = c("black" = "black"), na.value = "transparent")

Combine text and image in a geom_label_repel in ggplot

I'm trying to do a line graph and have the last point of each series be labelled by a combination of text and image. I usually use ggrepel package for this and have no problem doing this with text only. My problem is I can't figure out how to add an image in the label.
I thought that a label like Country <img src='https://link.com/to/flag.png' width='20'/> would work and so this is what I've tried to do:
library(dplyr)
library(ggplot2)
library(ggrepel)
# example df
df <- data.frame(
Country = c(rep("France", 5), rep("United Kingdom", 5)),
Ratio = rnorm(10),
Days = c(seq(1, 5, 1), seq(4, 8, 1)),
abbr = c(rep("FR", 5), rep("GB", 5))) %>%
group_by(Country) %>%
# add "label" only to last point of the graph
mutate(label = if_else(Days == max(Days),
# combine text and img of country's flag
true = paste0(Country, " <img src='https://raw.githubusercontent.com/behdad/region-flags/gh-pages/png/", abbr, ".png' width='20'/>"),
false = NA_character_)
)
# line graph
ggplot(data = df, aes(x = Days, y = Ratio, color = Country)) +
geom_line(size = 1) +
theme(legend.position = "none") +
geom_label_repel(aes(label = label),
nudge_x = 1,
na.rm = T)
But this produces the raw label and not the country's name with its flag, as intended:
This is obviously not the way to go, can anyone please help me?
Try this approach using ggtext function geom_richtext(). You can customize other elements if you wish. Here the code:
library(dplyr)
library(ggplot2)
library(ggrepel)
library(ggtext)
# example df
df <- data.frame(
Country = c(rep("France", 5), rep("United Kingdom", 5)),
Ratio = rnorm(10),
Days = c(seq(1, 5, 1), seq(4, 8, 1)),
abbr = c(rep("FR", 5), rep("GB", 5))) %>%
group_by(Country) %>%
# add "label" only to last point of the graph
mutate(label = if_else(Days == max(Days),
# combine text and img of country's flag
true = paste0(Country, " <img src='https://raw.githubusercontent.com/behdad/region-flags/gh-pages/png/", abbr, ".png' width='20'/>"),
false = NA_character_)
)
# line graph
ggplot(data = df, aes(x = Days, y = Ratio, color = Country,label = label)) +
geom_line(size = 1) +
theme(legend.position = "none") +
geom_richtext(na.rm = T,nudge_x = -0.1,nudge_y = -0.1)
Output:

Ordering of items within a stacked geom_bar

I want, for reasons which seems good to me, to plot a stacked bar chart, with the bars in a specific, data dependent order. For reasons which are obscure to me, it does not seem to work. Specifically, while I can readily arrange the rows of my dataframe in the right order, and make the column of names identifying the bars an ordered factor, so getting the bars in the order I desire, the graph does not list the columns of the dataframe in the order I desire.
An example
tab <- structure(list(Item = c("Personal", "Peripheral", "Communication", "Multimedia", "Office", "Social Media"), `Not at all` = c(3.205128, 18.709677, 5.844156, 31.578947, 20.666667, 25.827815), Somewhat = c(30.76923, 23.87097, 24.67532, 18.42105, 30, 16.55629), `Don't know` = c(0.6410256, 2.5806452, 1.9480519, 11.1842105, 2.6666667, 5.9602649), Confident = c(32.69231, 29.67742, 33.11688, 17.10526, 23.33333, 27.15232), `Very confident` = c(32.69231, 25.16129, 34.41558, 21.71053, 23.33333, 24.50331)), .Names = c("Item", "Not at all", "Somewhat", "Don't know", "Confident", "Very confident"), row.names = c(NA, -6L), class = "data.frame")
Title <- 'Plot title'
ResponseLevels <- c("Not at all", "Somewhat", "Don't know", "Confident", "Very confident") # Labels for bars
pal.1 <- brewer.pal(category, 'BrBG') # Colours
tab <- tab %>% arrange(.[,2]) # Sort by first columns of responses
tab$Item <- factor(tab$Item, levels = tab$Item[order(tab[,2])], ordered = TRUE) # Reorder factor levels
tab.m <- melt(tab, id = 'Item')
tab.m$col <- rep(pal.1, each = items) # Set colours
g <- ggplot(data = tab.m, aes(x = Item, y = value, fill = col)) +
geom_bar(position = "stack", stat = "identity", aes(group = variable)) +
coord_flip() +
scale_fill_identity("Percent", labels = ResponseLevels,
breaks = pal.1, guide = "legend") +
labs(title = Title, y = "", x = "") +
theme(plot.title = element_text(size = 14, hjust = 0.5)) +
theme(axis.text.y = element_text(size = 16,hjust = 0)) +
theme(legend.position = "bottom")
g
The stacked pieces of the bars run from right to left, from 'Not at all' to 'Very confident'. The items are in the correct order, from 'Multimedia' to 'Personal', ordered by the proportion of those who said 'Not at all' to each item.
What I want to get is this graph with the responses ordered the other way, the same way as the legend, that is from 'Not at all' on the left, to 'Very confident' on the right. I cannot figure out how this ordering is set, nor how to change it.
I've read through the 'similar questions', but can see no answer to this specific query. Suggestions, using ggplot, not base R graphics, welcome.
Ok, building on the useful, and much appreciated answer from allstaire, I try the following
library(tidyverse)
tab <- structure(list(Item = c("Personal", "Peripheral", "Communication", "Multimedia", "Office", "Social Media"), `Not at all` = c(3.205128, 18.709677, 5.844156, 31.578947, 20.666667, 25.827815), Somewhat = c(30.76923, 23.87097, 24.67532, 18.42105, 30, 16.55629), `Don't know` = c(0.6410256, 2.5806452, 1.9480519, 11.1842105, 2.6666667, 5.9602649), Confident = c(32.69231, 29.67742, 33.11688, 17.10526, 23.33333, 27.15232), `Very confident` = c(32.69231, 25.16129, 34.41558, 21.71053, 23.33333, 24.50331)), .Names = c("Item", "Not at all", "Somewhat", "Don't know", "Confident", "Very confident"), row.names = c(NA, -6L), class = "data.frame")
tab <- tab %>% select(1,6,5,4,3,2,1) ## Re-order the columns of tab
tab.m <- tab %>% arrange(`Not at all`) %>%
mutate(Item = factor(Item, levels = Item[order(`Not at all`)])) %>%
gather(variable, value, -Item, factor_key = TRUE)
ggplot(data = tab.m, aes(x = Item, y = value, fill = variable)) +
geom_col() +
coord_flip() +
scale_fill_brewer("Percent", type = 'cat', palette = 'BrBG',
guide = guide_legend(reverse = TRUE)) +
labs(title = 'Plot title', y = NULL, x = NULL) +
theme(legend.position = "bottom")
And this is exactly the graph I want, so my pressing problem is solved.
However, if I say instead
ggplot(data = tab.m, aes(x = Item, y = value, fill = variable)) +
geom_col() +
coord_flip() +
scale_fill_brewer("Percent", type = 'cat', palette = 'BrBG',
guide = guide_legend(reverse = FALSE)) +
labs(title = 'Plot title', y = NULL, x = NULL) +
theme(legend.position = "bottom")
The picture I get is this
Here the body of the chart is correct, but the legend is going in the wrong direction.
This solves my problem, but does not quite answer my question. I start with a dataframe, and to get what I want I have to reverse the order of the data columns, and reverse the guide legend. This evidently works, but it's perverse.
So, how does a stacked bar chart decide in what order to present the stacked items? It's clearly related to their order in the melted dataset, but simply changing the order leaves the legend going in the wrong direction. Looking at the melted dataset, tab.m, from top to bottom, the responses are in the order 'Very confident' to 'Not at all', but the default legend is the reverse order 'Not at all' to 'Very confident'.
If you pass guide_legend instead of just a string, you can set its reverse parameter to TRUE. Simplifying a bit,
library(tidyverse)
tab <- structure(list(Item = c("Personal", "Peripheral", "Communication", "Multimedia", "Office", "Social Media"), `Not at all` = c(3.205128, 18.709677, 5.844156, 31.578947, 20.666667, 25.827815), Somewhat = c(30.76923, 23.87097, 24.67532, 18.42105, 30, 16.55629), `Don't know` = c(0.6410256, 2.5806452, 1.9480519, 11.1842105, 2.6666667, 5.9602649), Confident = c(32.69231, 29.67742, 33.11688, 17.10526, 23.33333, 27.15232), `Very confident` = c(32.69231, 25.16129, 34.41558, 21.71053, 23.33333, 24.50331)), .Names = c("Item", "Not at all", "Somewhat", "Don't know", "Confident", "Very confident"), row.names = c(NA, -6L), class = "data.frame")
tab.m <- tab %>% arrange(`Not at all`) %>%
mutate(Item = factor(Item, levels = Item[order(`Not at all`)])) %>%
gather(variable, value, -Item, factor_key = TRUE)
ggplot(data = tab.m, aes(x = Item, y = value, fill = variable)) +
geom_col() +
coord_flip() +
scale_fill_brewer("Percent", palette = 'BrBG',
guide = guide_legend(reverse = TRUE)) +
labs(title = 'Plot title', y = NULL, x = NULL) +
theme(legend.position = "bottom")
For the edit:
Bar order is determined by factor level order, which in the above is determined by column order due to the use of gather to create the factor, thoughcoord_flip is making it less obvious. It's easy to reverse level order with levels<- or by reassembling the factor, though. To keep the colors with the same levels, pass direction = -1 to scale_fill_brewer to reverse their order, as well.
tab.m <- tab %>% arrange(`Not at all`) %>%
mutate(Item = factor(Item, levels = Item[order(`Not at all`)])) %>%
gather(variable, value, -Item, factor_key = TRUE) %>%
mutate(variable = factor(variable, levels = rev(levels(variable)), ordered = TRUE))
ggplot(data = tab.m, aes(x = Item, y = value, fill = variable)) +
geom_col() +
coord_flip() +
scale_fill_brewer("Percent", palette = 'BrBG', direction = -1,
guide = guide_legend(reverse = TRUE)) +
labs(title = 'Plot title', y = NULL, x = NULL) +
theme(legend.position = "bottom")

How do I create bar charts in R where the starting point of the bar is greater than zero?

I'm trying to create a series of bar charts (to be replicated for multiple sites) that highlight the difference between the main site and the satellite locations. I can come somewhat close using geom_point, but I'd like to have them represented as bar charts, where the bar starts at the lowest point, there are labels for the main site and satellite locations, as well as the difference between them. Here is some sample code and screenshots of what I have, and an idea of what I'd like it to look like.
library(ggplot2)
library(dplyr)
site <- c("Site A", "Main Site", "Site A", "Main Site", "Site A", "Main Site")
year <- c("2013", "2013", "2014", "2014","2015", "2015" )
value <- c(57, 74, 60, 50, 60, 68)
df <- data.frame (site, year, value)
df %>%
mutate (label = paste0(site, " (", value, ")")) %>%
ggplot (aes (x = year, y = value, group = site, colour = site)) +
geom_point (size = 0.5) +
scale_y_continuous(limits = c (0,100)) +
geom_text (aes(label = label))
Using the comment from #Gregor I managed to come up with something that will work. Probably isn't the most elegant solution but will work for now.
df %>%
spread(site, value) %>%
mutate (diff = SiteA - MainSite) %>%
mutate (AboveBelow = recode (diff," -100:-1 = 'Below';
0 = 'No Difference';
1:100 = 'Above'")) %>%
ggplot() +
scale_x_continuous(name = "Year", breaks = c (2013, 2014, 2015)) +
scale_y_continuous(name = "Percentage", limits = c(0,100)) +
geom_rect (aes (xmin = year - 0.33, xmax = year + 0.33, ymin = SiteA, ymax = MainSite, fill = AboveBelow)) +
geom_text (aes (x = year, y = ifelse (diff < 0, MainSite + 5, MainSite - 3), label = paste0("MainSite - ", MainSite))) +
geom_text (aes (x = year, y = ifelse (diff < 0, SiteA - 3, SiteA +5), label = paste0("SiteA - ", SiteA))) +
geom_text (aes (x = year, y = MainSite + (diff/2), label = diff)) +
scale_fill_manual(values = c("green", "red", "white" ))
Gives me this:
Following up on the comment from #gregor, you can try the below (note dcast is from reshape2 and the heavy use of dplyr
df %>%
dcast(year~site) %>%
mutate(midpt = (`Main Site` + `Site A`)/2
, dir = factor( (`Main Site` - `Site A`) > 0
, levels = c(FALSE,TRUE)
, labels = c("Negative", "Positive"))
, diff = abs(`Main Site` - `Site A`)) %>%
ggplot(aes(x = year
, y = midpt
, fill = dir
, height = diff)) +
geom_tile() +
scale_fill_manual(values = c("Positive" = "darkgreen"
, "Negative" = "red3"))
If you have more than 2 sites, you would likely want a more flexible solution, probably using dplyr directly.

Resources