Label of last value in geom_interval - r

I'm projecting a variable for the next 120 months. I'm having trouble with the following when using ggplot:
In the intervals I'm creating I want to display the last value of each one. Ideally, I want some label that says -for example- for the interval 0.8: "80%:(here would go the last value of that interval)". If this is too difficult, then just the value would be perfect.
Here is a reproducible example
#libraries
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggfan)
library(gridExtra)
library(stringr)
library(scales)
#Create a dataframe
month <- 1:120
price_a <- 5000
demand <- 10
data <- data.frame(month, price_a, demand)
#Create 100 simulations to project price_a and demand for the future
simulations <- 100
intervalo <- seq_len(120)
set.seed(96)
lista_meses <- lapply(setNames(intervalo, paste0("data", intervalo)), function(i) {
cbind(
data[rep(i, simulations),],
growth_pricea = as.numeric(runif(simulations, min = -0.02, max = 0.05)),
growth_demand = as.numeric(runif(simulations, min = -0.03, max = 0.03)),
revenue = demand*price_a
)
})
#Calculate the growth of each variable and revenue
for (i in 2:length(lista_meses)){
lista_meses[[i]][["price_a"]] <- lista_meses[[i-1]][["price_a"]]*(1+lista_meses[[i]][["growth_pricea"]])
lista_meses[[i]][["demand"]] <- lista_meses[[i-1]][["demand"]]*(1+lista_meses[[i]][["growth_demand"]])
lista_meses[[i]][["revenue"]] <- lista_meses[[i]][["price_a"]]*lista_meses[[i]][["demand"]]
}
#Extract revenue columns from all dataframes in list
time <- 1:120 #10 years.
extract_column <- lapply(lista_meses, function(x) x["revenue"])
fandataq <- do.call("cbind", extract_column)
mandataq <- as.matrix.data.frame(fandataq)
pdataq <- data.frame(x=time, t(fandataq)) %>% gather(key=sim, value=y, -x)
#Graph: I WANT TO SHOW THE LAST VALUES OF EACH INTERVAL IN GEOM_INTERVAL
ggplot(pdataq, aes(x=x, y= y)) + geom_fan(intervals =c(80)/100, show.legend = FALSE) +
scale_fill_gradient(low="steelblue1", high="steelblue")+scale_y_continuous(labels = scales::comma)+
geom_interval(intervals = c(0.80,1), show.legend = FALSE) + scale_linetype_manual(values=c("dotted", "dotted")) +
theme_bw()
Does anybody knows how to achieve this? Thanks in advance!

This could be accomplished by pre-calculating the labels and feeding those in as text:
probs = c(0, 0.1, 0.9, 1) # 80% interval from 0.1 to 0.9
label_table <- tibble(x = max(pdataq$x),
probs,
y = quantile(pdataq[pdataq$x == max(pdataq$x), "y"],
probs = probs),
y_label = scales::comma(y))
# OR, using ggfan::calc_quantiles:
#label_table <- calc_quantiles(pdataq, intervals = c(0.8, 1), x_var = "x", y_var = "y") %>%
# ungroup() %>%
# filter(x == max(x)) %>%
# mutate(y_label = scales::comma(y))
## A tibble: 4 x 4
# x probs y y_label
# <int> <dbl> <dbl> <chr>
#1 120 0 124311. 124,311
#2 120 0.1 198339. 198,339
#3 120 0.9 434814. 434,814
#4 120 1 520464. 520,464
ggplot(pdataq, aes(x=x, y= y)) +
geom_fan(intervals =c(80)/100, show.legend = FALSE) +
scale_fill_gradient(low="steelblue1", high="steelblue")+
scale_y_continuous(labels = scales::comma)+
geom_interval(intervals = c(0.80,1), show.legend = FALSE) +
geom_text(data = label_table,
aes(label = y_label), hjust = -0.1, size = 3) +
coord_cartesian(clip = "off") +
scale_x_continuous(expand = expansion(add = c(5, 20))) +
scale_linetype_manual(values=c("dotted", "dotted")) +
theme_bw()

Related

Plotting tables in facets using ggplot2

I am looking for a way to plot some table data in facets using ggplot2.
Below are sample data and some code that sort of produces a plot of what I am looking for.
However, using geom_text() makes it difficult to align the lines, and I have problems with text being cropped when I combine the facetted table-plot with other plots.
Thanks in advance
Libray and sample data text_data
library(tidyverse)
text_data <- data.frame(
rep = 1:5,
time = rnorm(5, 2, 0.2),
x = sample(100:130, 5),
y = sample(40:50, 5),
z = rnorm(5, -1, 0.3)
)
# Text data in wide format
text_data
#> rep time x y z
#> 1 1 2.192189 129 47 -1.308432
#> 2 2 2.161335 105 46 -1.186042
#> 3 3 2.340631 106 48 -1.270763
#> 4 4 2.136504 124 44 -1.332719
#> 5 5 2.148028 108 42 -1.249902
Text data in the table format I would like to plot, each rep should be a facet
text_data %>%
pivot_longer(cols = -rep) %>%
knitr::kable(digits = 1)
rep
name
value
1
time
2.2
1
x
129.0
1
y
47.0
1
z
-1.3
2
time
2.2
2
x
105.0
2
y
46.0
2
z
-1.2
3
time
2.3
3
x
106.0
3
y
48.0
3
z
-1.3
4
time
2.1
4
x
124.0
4
y
44.0
4
z
-1.3
5
time
2.1
5
x
108.0
5
y
42.0
5
z
-1.2
Some NOT OPTIMAL to code that produces a facetted plot to show kind of what I am looking for
ggplot(data = text_data) +
geom_text(
y = 0.9,
x = 0.5,
aes(label = paste("Time:", round(time, 1), "seconds"))
) +
geom_text(
y = 0.7,
x = 0.5,
aes(label = paste("X: ", x))
) +
geom_text(
y = 0.5,
x = 0.5,
aes(label = paste("y: ", y))
) +
geom_text(
y = 0.3,
x = 0.5,
aes(label = paste("z: ", round(z, 1)))
) +
facet_grid(rows = vars(rep)) +
theme_minimal()
Created on 2022-09-26 by the reprex package (v2.0.1)
With ggpp::geom_table:
library(ggplot2)
library(ggpp)
library(dplyr)
library(tidyr)
library(tibble)
text_data <- data.frame(
rep = 1:5,
time = rnorm(5, 2, 0.2),
x = sample(100:130, 5),
y = sample(40:50, 5),
z = rnorm(5, -1, 0.3)
)
dat <- text_data %>%
pivot_longer(cols = -rep)
tbs <- lapply(split(dat, dat$rep), "[", -1L)
df <- tibble(x = rep(-Inf, length(tbs)),
y = rep(Inf, length(tbs)),
rep = levels(as.factor(dat$rep)),
tbl = tbs)
ggplot(text_data) +
geom_point(aes(x = x, y = y)) +
geom_table(data = df, aes(x = x, y = y, label = tbl),
hjust = 0, vjust = 1) +
facet_wrap(~ rep)
Here's an approach using the gridExtra package.
library(tidyverse)
library(gridExtra)
text_data <- data.frame(
time = rnorm(5, 2, 0.2) ,
x = sample(100:130, 5),
y = sample(40:50, 5),
z = rnorm(5, -1, 0.3)
) %>%
mutate(
across(.f = ~ as.character(round(., 2))),
time = paste(time, "seconds")
)
grob_list <- map(1:nrow(text_data), ~ {
text_data[.x,] %>%
as_vector() %>%
as.matrix(ncol = 1) %>%
tableGrob(theme = ttheme_minimal())
})
grid.arrange(grobs = grob_list, ncol = 1)

How to adjust the the y axis like this in ggplot2?

Here is the codes and the present outplot
df <- data.frame(state = c('0','1'),
male = c(26287942,9134784),
female = c(16234000,4406645))
#output
> df
state male female
1 0 26287942 16234000
2 1 9134784 4406645
library(ggplot2)
library(tidyr)
df_long <- pivot_longer(df, cols = c("female","male"))
names(df_long) <- c('state','sex','observations')
ggplot(data = df_long) +
geom_col(aes(x = sex, y =observations, fill = state)) +
theme(legend.position = c(0.1,0.9),
legend.background = element_rect(fill='lightgrey') )
I want to adjust the plots like this. (I marked what I want to change.)
Simplify the scientific records in y-axis.
Count the ratio (the number of state 1)/(the number of state 0 + state 1) and plot like this.
It may be a little complicated, and I don't know which functions to use. If possible, can anyone tell me some related functions or examples?
You can set options(scipen = 99) to disable scientific notation on y-axis. We can create a separate dataset for label data.
library(tidyverse)
options(scipen = 99)
long_data <- df %>%
pivot_longer(cols = c(male, female),
names_to = "sex",
values_to = "observations")
label_data <- long_data %>%
group_by(sex) %>%
summarise(perc = observations[match(1, state)]/sum(observations),
total = sum(observations), .groups = "drop")
ggplot(long_data) +
geom_col(aes(x = sex, y = observations, fill = state)) +
geom_text(data = label_data,
aes(label = round(perc, 2), x = sex, y = total),
vjust = -0.5) +
theme(legend.position = c(0.1,0.9),
legend.background = element_rect(fill='lightgrey'))
By searching the Internet for about two days, I have finished the work!
sex <- c('M','F')
y0 <- c(26287942,16234000)
y1 <- c(9134784, 4406645)
y0 <- y0*10^{-7}
y1 <- y1*10^{-7}
ratio <- y1/(y0+y1)
ratio <- round(ratio,2)
m <- t(matrix(c(y0,y1),ncol=2))
colnames(m) <- c(as.character(sex))
df <- as.data.frame(m)
df <- cbind(c('0','1'),df)
colnames(df)[1] <- 'observations'
df
df_long <- pivot_longer(df, cols = as.character(sex))
names(df_long) <- c('state','sex','observations')
df_r <- as.data.frame(df_long)
df_r <- data.frame(df_r,ratio=rep(ratio,2))
ggplot(data = df_r) +
geom_col(aes(x =sex, y = observations, fill = state))+
theme(legend.position = c(0.1,0.9),
legend.background = element_rect(fill=NULL) )+
geom_line(aes(x=sex,y=ratio*10),group=1)+
geom_point(aes(x=sex,y=ratio*10))+
geom_text(aes(x=sex,y=ratio*10+0.35),label=rep(ratio,2))+
scale_y_continuous(name =expression(paste('observations(','\u00D7', 10^7,')')),
sec.axis = sec_axis(~./10,name='ratio'))
The output:

R – Replicate/Automate scatterplots for each string group in column

I have a dataframe (170 observation and 3 columns) like so:
site_names <- c("WS1", "WS1", "WS2", "WS2", "WS3", "WS3")
x <- c(.15, .20, .17, .16, .20, .22)
y <- c(.026, .031, .045, .087, .09, .033)
df <- data.frame(site_names, x, y)
and I have a simple ggplot formula I can run for each site_name where I plot x and y accordingly:
df %>%
filter(site_names == "") %>%
ggplot(aes(x = x, y = y)) +
geom_point() +
labs(x = "Discharge", y = "Stage")
What I would like is to automate these plots and generate 26 separate plots by site_name and save them all to a PDF
Here is what I have tried:
scatter_expl = function(x, y) {
ggplot(df, aes(x = x, y = y) ) +
geom_point() +
theme_bw() +
labs(x = "Discharge", y = "Stage")
}
plots = map(df, ~scatter_expl())
plots
But this runs through the whole dataframe, returns a list and produces all scatters on the same plot: https://i.stack.imgur.com/gVBhf.png! How do I group by site and return individual graphs?
We may need to group_by
library(dplyr)
library(ggplot2)
library(gridExtra)
out <- df %>%
group_by(site_names) %>%
summarise(plot = list(ggplot(cur_data(), aes(x = x, y = y)) +
geom_point() +
labs(x = "Discharge", y = "Stage")+
ggtitle(sprintf('Plot for %s', cur_group()$site_names))))
-output
> out
# A tibble: 3 × 2
site_names plot
<chr> <list>
1 WS1 <gg>
2 WS2 <gg>
3 WS3 <gg>
-save the output as pdf
ggsave(file.path(getwd(), "plot.pdf"),
marrangeGrob(out$plot, nrow = 1, ncol = 1), device = "pdf")
data
df <- structure(list(site_names = c("WS1", "WS1", "WS2", "WS2", "WS3",
"WS3"), x = c(0.15, 0.2, 0.17, 0.16, 0.2, 0.22), y = c(0.026,
0.031, 0.045, 0.087, 0.09, 0.033)), class = "data.frame", row.names = c(NA,
-6L))
Updated with the help of dear #akrun:
library(tidyverse)
library(purrr)
library(gridExtra)
# create list of df by site_names
dflist <- df %>%
group_split(site_names)
# create a function to plot
scatter_fun = function(dat) {
ggplot(dat, aes(x = x, y = y)) +
geom_point() +
theme_bw() +
labs(x = "Discharge", y = "Stage")
}
# iterate through each list element and apply function (many thanks to #akrun for his help)
p <- map(dflist, ~scatter_fun(.x))
# save all in one pdf
ggsave(
filename = "plots.pdf",
plot = marrangeGrob(p, nrow=2, ncol=2),
width = 7.5, height = 4.5
)

Scaling geom_point size on heatmap to fit correctly in R?

Basically, I have a heatmap that contains some points. What Im trying to do is automatically rescale the size of the points in a sensible way for different sized heatmaps. For example, if I have a heatmap that looks like so:
library(reshape)
library(ggplot2)
library(ggnewscale)
# Create matrix
set.seed(1701)
a <- sample(1:10,100, replace=TRUE)
s <- matrix(a, nrow = 5, ncol=5)
s[upper.tri(s)] = t(s)[upper.tri(s)]
rownames(s) <- colnames(s) <- paste0("x", 1:5)
diag(s) <- 0
sDf <- melt(s)
# create diagonal values
diagDf <- data.frame(
var1 = c(paste0("x", 1:5)),
var2 = c(paste0("x", 1:5)),
val = c(2,5,3,1,5)
)
# make plot
ggplot(sDf, aes(X1,X2)) +
geom_tile(aes(fill = value)) +
scale_fill_gradientn(colors = rev(colorspace::sequential_hcl(palette = "Blues 3", n = 100))) +
new_scale_fill() +
geom_point(data = diagDf, aes(var1, var2, col = val), size = 20) +
theme(aspect.ratio = 1)
So in the image above, the diagonal contains geom_points and their size is manually set to size = 20.... This works for this example, but the issue is:
If the heatmap dimensions were changed to say 20x20, then having the size hardcoded to equal 20 won't work due to overlapping & the points being too big etc.
So what Im trying to do is come up with a method that will automatically resize the points to effectively fill square they are contained in without overlapping, being too big or too small.
Any suggestions as to how I could do this?
I would do something like this:
library(reshape)
library(ggplot2)
library(ggnewscale)
n <- 5
# Create matrix
set.seed(1701)
a <- sample(1:10,100, replace=TRUE)
s <- matrix(a, nrow = n, ncol=n)
s[upper.tri(s)] = t(s)[upper.tri(s)]
rownames(s) <- colnames(s) <- paste0("x", 1:n)
diag(s) <- 0
sDf <- melt(s)
# create diagonal values
diagDf <- data.frame(
var1 = c(paste0("x", 1:n)),
var2 = c(paste0("x", 1:n)),
val = sample(1:5,n,replace = T)
)
# make plot
ggplot(sDf, aes(X1,X2)) +
geom_tile(aes(fill = value)) +
scale_fill_gradientn(colors = rev(colorspace::sequential_hcl(palette = "Blues 3", n = 100))) +
new_scale_fill() +
geom_point(data = diagDf, aes(var1, var2, col = val), size = 1/sqrt(nrow(sDf))*80) +
theme(aspect.ratio = 1)
here the size of the points depends on the dimension of the matrix.
an example of the output with a 3x3, 5x5, and 10x10 matrix
You can modify diagDf to contain the co-ordinates of the circles you want to plot using some basic trigonometry, then plot them as filled polygons. This ensures they will always scale exactly with your plot.
library(dplyr)
diagDf <- diagDf %>%
mutate(var1 = as.numeric(as.factor(var1)),
var2 = as.numeric(as.factor(var2))) %>%
split.data.frame(diagDf$var1) %>%
lapply(function(x) {
deg <- seq(0, 2 * pi, length = 100)
var1 <- cos(deg)/2.2
var2 <- sin(deg)/2.2
val <- rep(x$val, 100)
data.frame(var1 = var1 + x$var1, var2 = var2 + x$var2, val = val)}) %>%
{do.call(rbind, .)}
Now with slightly modified plot code, we get:
ggplot(sDf, aes(X1,X2)) +
geom_tile(aes(fill = value)) +
scale_fill_gradientn(colors=rev(colorspace::sequential_hcl(palette = "Blues 3", n=100))) +
new_scale_fill() +
geom_polygon(data = diagDf, aes(var1, var2, fill = val, group = val)) +
theme(aspect.ratio = 1)
Created on 2021-09-27 by the reprex package (v2.0.0)

ggplot not drawing boxplots as expected

Consider the MWE below. I would like to generate boxplots with these ideas in mind:
Food on the y-axix ordered according to Amot for Home, while Amt (1:40) on x-axis
show mean points overlaying the boxes
boxplots of Food to be ordered based on median of Home Site from dfsummary data
text annotations of N of observations (to be taken from dfsummary data)
MWE
df <- data.frame(
Site = sample(rep(c("Home", "Office"), size = 884)),
Food = sample(rep(c("Banana","Apple","Egg","Berry","Tomato","Potato","Bean","Pea","Nuts","Onion","Carrot","Cabbage","Eggplant"), size=884)),
Amt = sample(seq(1, 40, by = 0.25), size = 884, replace = TRUE)
)
random <- sample(seq(1, 884, by = 1), size = 100, replace = TRUE) # to randomly introduce 100 NAs to Amt vector
df$Amt[random] <- NA
Summary code
dfsummary <- df %>%
dplyr::group_by(Food, Site) %>%
dplyr::summarise(Median = round(median(Amt, na.rm=TRUE), digits=2), N = sum(!is.na(Amt))) %>%
ungroup()
ggplot code
p1 <- ggplot(df, aes(Amt, Food)) +
geom_boxplot() +
facet_grid(facets = . ~ Site)
Graph
I was expecting to see boxplots here.
Adding annotation
p2 <- p1 + geom_text(aes(y = 42, Food, label = paste("n=", N)), data = dfsummary, size = 3, nudge_x = 0.1) +
facet_grid(facets = . ~ Site)
Unfortunately, this doesn't work either.
Note
tidyverse version is 1.3.0
R version 3.6.2 (2019-12-12) -- "Dark and Stormy Night"
To work out the problem, you may want to generate a scatter plot, first:
library(ggplot2)
p1 <- ggplot(df, aes(Amt, Food)) +
geom_point() +
facet_grid(facets = . ~ Site)
p1
As you can see it is impossible to generate a boxplot.
However, if you switch x and y
ggplot(df, aes(Food, Amt)) +
geom_boxplot() +
facet_grid(facets = . ~ Site)
You get:
This works just fine in the current development version of ggplot2, to be released in January 2020.
# If your ggplot2 version is <= 3.2.1, do:
# remotes::install_github("tidyverse/ggplot2")
library(tidyverse)
df <- data.frame(
Site = sample(rep(c("Home", "Office"), size = 884)),
Food = sample(rep(c("Banana","Apple","Egg","Berry","Tomato","Potato","Bean","Pea","Nuts","Onion","Carrot","Cabbage","Eggplant"), size=884)),
Amt = sample(seq(1, 40, by = 0.25), size = 884, replace = TRUE)
)
random <- sample(seq(1, 884, by = 1), size = 100, replace = TRUE) # to randomly introduce 100 NAs to Amt vector
df$Amt[random] <- NA
ggplot(df, aes(Amt, Food)) +
geom_boxplot() +
facet_grid(facets = . ~ Site)
#> Warning: Removed 98 rows containing non-finite values (stat_boxplot).
Created on 2020-01-01 by the reprex package (v0.3.0)
With annotations:
library(tidyverse)
df <- data.frame(
Site = sample(rep(c("Home", "Office"), size = 884)),
Food = sample(rep(c("Banana","Apple","Egg","Berry","Tomato","Potato","Bean","Pea","Nuts","Onion","Carrot","Cabbage","Eggplant"), size=884)),
Amt = sample(seq(1, 40, by = 0.25), size = 884, replace = TRUE)
)
random <- sample(seq(1, 884, by = 1), size = 100, replace = TRUE) # to randomly introduce 100 NAs to Amt vector
df$Amt[random] <- NA
dfsummary <- df %>%
dplyr::group_by(Food, Site) %>%
dplyr::summarise(Median = round(median(Amt, na.rm=TRUE), digits=2), N = sum(!is.na(Amt))) %>%
ungroup()
ggplot(df, aes(Amt, Food)) +
geom_boxplot() +
geom_text(
aes(x = 42, Food, label = paste("n=", N)),
data = dfsummary,
size = 3, nudge_x = 0.1
) +
facet_grid(facets = . ~ Site)
#> Warning: Removed 95 rows containing non-finite values (stat_boxplot).
Created on 2020-01-01 by the reprex package (v0.3.0)

Resources