I'm plotting a Scatterplot with ggplot() as follows:
library(data.table)
library(plotly)
library(ggplot2)
library(lubridate)
dt.allData <- data.table(date = seq(as.Date('2020-01-01'), by = '1 day', length.out = 365),
DE = rnorm(365, 4, 1), Austria = rnorm(365, 10, 2),
Czechia = rnorm(365, 1, 2), check.names = FALSE)
## Calculate Pearson correlation coefficient: ##
corrCoeff <- cor(dt.allData$Austria, dt.allData$DE, method = "pearson", use = "complete.obs")
corrCoeff <- round(corrCoeff, digits = 2)
## Linear regression function extraction by creating linear model: ##
regLine <- lm(DE ~ Austria, data = dt.allData)
## Extract k and d values for the linear function f(x) = kx+d: ##
k <- round(regLine$coef[2], digits = 5)
d <- round(regLine$coef[1], digits = 2)
linRegFunction <- paste0("y = ", d, " + (", k, ")x")
## PLOT: ##
p1 <- ggplot(data = dt.allData, aes(x = Austria, y = DE,
text = paste("Date: ", date, '\n',
"Austria: ", Austria, "MWh/h", '\n',
"DE: ", DE, "\u20ac/MWh"),
group = 1)
) +
geom_point(aes(color = ifelse(date >= now()-weeks(5), "#419F44", "#F07D00"))) +
scale_color_manual(values = c("#F07D00", "#419F44")) +
geom_smooth(method = "lm", se = FALSE, color = "#007d3c") +
annotate("text", x = 10, y = 10,
label = paste("\u03c1 =", corrCoeff, '\n',
linRegFunction), parse = TRUE) +
theme_classic() +
theme(legend.position = "none") +
theme(panel.background = element_blank()) +
xlab("Austria") +
ylab("DE")+
ggtitle("DE vs Austria") +
theme(plot.title = element_text(hjust = 0.5, face = "bold"))
# Correlation plot converting from ggplot to plotly: #
plot <- plotly::ggplotly(p1, tooltip = "text")
which gives the following plot here:
I use annotate() to represent the correlation coefficient and the regression function. I define the x and y coordinates manually so that the text output is displayed in the middle at the top. Since I have some of such data tables dt.allData that have different axis scalings, I would like to define in the plot that the text should always be displayed in the middle at the top, depending on the axis scaling without defining x and y coordinate manually before.
I'd suggest using ggtitle and hjust = 0.5:
Edit: using plotly::layout and a span tag to create the title:
library(data.table)
library(ggplot2)
library(plotly)
library(lubridate)
dt.allData <- data.table(date = seq(as.Date('2020-01-01'), by = '1 day', length.out = 365),
DE = rnorm(365, 4, 1), Austria = rnorm(365, 10, 2),
Czechia = rnorm(365, 1, 2), check.names = FALSE)
## Calculate Pearson correlation coefficient: ##
corrCoeff <- cor(dt.allData$Austria, dt.allData$DE, method = "pearson", use = "complete.obs")
corrCoeff <- round(corrCoeff, digits = 2)
## Linear regression function extraction by creating linear model: ##
regLine <- lm(DE ~ Austria, data = dt.allData)
## Extract k and d values for the linear function f(x) = kx+d: ##
k <- round(regLine$coef[2], digits = 5)
d <- round(regLine$coef[1], digits = 2)
linRegFunction <- paste0("y = ", d, " + (", k, ")x")
## PLOT: ##
p1 <- ggplot(data = dt.allData, aes(x = Austria, y = DE,
text = paste("Date: ", date, '\n',
"Austria: ", Austria, "MWh/h", '\n',
"DE: ", DE, "\u20ac/MWh"),
group = 1)
) +
geom_point(aes(color = ifelse(date >= now()-weeks(5), "#419F44", "#F07D00"))) +
scale_color_manual(values = c("#F07D00", "#419F44")) +
geom_smooth(method = "lm", formula = 'y ~ x', se = FALSE, color = "#007d3c") +
# ggtitle(label = paste("My pretty useful title", '\n', "\u03c1 =", corrCoeff, '\n', linRegFunction)) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position = "none") +
theme(panel.background = element_blank()) +
xlab("Austria") +
ylab("DE")
# Correlation plot converting from ggplot to plotly: #
# using span tag (directly in control of font-size):
span_plot <- plotly::ggplotly(p1, tooltip = "text") %>% layout(
title = paste(
'<b>My pretty useful title</b>',
'<br><span style="font-size: 15px;">',
'\u03c1 =<i>',
corrCoeff,
'</i><br>',
linRegFunction,
'</span>'
),
margin = list(t = 100)
)
span_plot
Edit: added the sup alternative as per this answer
# using sup tag:
sup_plot <- plotly::ggplotly(p1, tooltip = "text") %>% layout(
title = paste(
'<b>My pretty useful title</b>',
'<br><sup>',
"\u03c1 =<i>",
corrCoeff,
'</i><br>',
linRegFunction,
'</sup>'
),
margin = list(t = 100)
)
sup_plot
Here you can find some related information in the plotly docs.
First I would start by seeing if something like this could help you:
annotate("text",
x = mean(dt.allData$Austria, na.rm = TRUE),
y = max(dt.allData$DE, na.rm = TRUE),
label = paste("\u03c1 =",
corrCoeff, '\n',
linRegFunction),
parse = TRUE,
hjust = .5)
and then, in the case where you want to go through a list of x,y pairs, you'd eventually you'd want to move towards functional programming where you are passing x columns x1, x2, x3 and ycolumns y1, y2, y3 to a map function which then pulls out the relevant information from each pair and plots them.
Related
i used iris data for an example
`
iris %>%
ggplot(aes(Sepal.Length,
fill = Species))+
geom_density(alpha = .6,
bw = 0.5)+
theme_classic()+
annotate("text",
x = 7,
y = c(.55, .60),
size = 4,
label = c(
paste0("Mean = ", round(mean(iris$Sepal.Length),4), " cm"),
paste0("r = ", round(cor(iris$Sepal.Length, iris$Sepal.Width),2), "")))
`
I try use force italic using Expression function, but does't work.
Adding parse=TRUE and using ?plotmath notation you could do:
EDIT: Getting a "," as decimal mark is a bit tricky. In the code below I use gsub to replace the "." by "*','*".
library(ggplot2)
mean <- round(mean(iris$Sepal.Length), 4)
mean <- gsub("\\.", "*','*", mean)
cor <- round(cor(iris$Sepal.Length, iris$Sepal.Width), 2)
cor <- gsub("\\.", "*','*", cor)
ggplot(iris, aes(Sepal.Length,
fill = Species
)) +
geom_density(
alpha = .6,
bw = 0.5
) +
theme_classic() +
annotate("text",
x = 7,
y = c(.55, .60),
size = 4,
label = c(
paste0("Mean == ", mean, "~cm"),
paste0("italic(r) == ", cor, "")
),
parse = TRUE
)
I'm trying to make a labeled scatterplot in ggplot and the specifics of the labels are causing me fits. Basically, among other things, I want to annotate my facet_wrapped 2-panel ggplot with the R^2 and Mean Bias. Notably, I want to label the mean bias with the appropriate units.
A simple version of my data might look as follows:
library(tidyverse)
Demo_Df <- tibble(Modeled = rnorm(50,0,1), Observed = rnorm(50, 0.5, 1),
Scheme = c(rep("Scheme1", 25), rep("Scheme2", 25)))
Demo_Annotation <- tibble(r.squared = c(0.589, 0.573), Mean_Bias = c(-2.038, -1.049), Scheme = c("Scheme1", "Scheme2"))
Demo_Scatter <- Demo_Df %>%
ggplot(aes(x = Observed, y = Modeled, color = Scheme)) +
geom_point(size = 1.5) +
facet_wrap(~Scheme) +
theme_tufte() +
xlab(expression(paste("Observed Aerosol (", mu, "g m" ^ "-3", ")"), sep = "")) +
ylab(expression(paste("Modeled Aerosol (", mu, "g m" ^ "-3", ")"), sep = "")) +
ylim(-3, 4) +
theme(legend.position = "none")
Demo_Labeled <- Demo_Scatter +
geom_text(data = Demo_Annotation, aes(-2, 3,
label = paste(
"R2 = ", sprintf("%.2f", signif(r.squared, 3)), "\n",
"Mean Bias = ", sprintf("%.2f", signif(Mean_Bias, 3))
)),
size = 5, hjust = 0, color = "black")
This produces almost the right figure, but I would like the R2 to have a superscript 2 and I need to add micrograms per cubic meter (ug/m3) to the end of the "Mean Bias = " label, as it is on the x and y-axes.
To date, I've completely failed at this. I cannot find a solution that supports multiple lines, facet_wrap, variable inputs, AND expressions. There has to be a way to do this. Please help me, tidyverse gods!
One option to achieve your desired result is to add you multiple lines via multiple geom_text layers. To parse the labels as math notation add parse=TRUE to geom_text. Finally I added the labels to you annotations df where I made use of ?plotmath for the math notation.
library(tidyverse)
library(ggthemes)
Demo_Annotation <- Demo_Annotation %>%
mutate(r.squared = paste0("R^{2} == ", sprintf("%.2f", signif(r.squared, 3))),
Mean_Bias = paste0("Mean~Bias == ", sprintf("%.2f", signif(Mean_Bias, 3)), "~mu*g~m^{-3}"))
Demo_Scatter +
geom_text(data = Demo_Annotation, aes(x = -2, y = 4, label = r.squared),
size = 5, hjust = 0, color = "black", parse = TRUE, family = "serif") +
geom_text(data = Demo_Annotation, aes(x = -2, y = 3.5, label = Mean_Bias),
size = 5, hjust = 0, color = "black", parse = TRUE, family = "serif")
DATA
set.seed(42)
Demo_Df <- tibble(Modeled = rnorm(50,0,1), Observed = rnorm(50, 0.5, 1),
Scheme = c(rep("Scheme1", 25), rep("Scheme2", 25)))
I have a large number of variables and would like to create scatterplots comparing all variables to a single variable. I have been able to do this in base R using lapply, but I cannot complete the same task in ggplot2 using lapply.
Below is an example dataset.
df <- data.frame("ID" = 1:16)
df$A <- c(1,2,3,4,5,6,7,8,9,10,11,12,12,14,15,16)
df$B <- c(5,6,7,8,9,10,13,15,14,15,16,17,18,18,19,20)
df$C <- c(11,12,14,16,10,12,14,16,10,12,14,16,10,12,14,16)
I define the variables I would like to generate scatterplots with, using the code below:
df_col_names <- df %>% select(A:C) %>% colnames(.)
Below is how I have been able to successfully complete the task of plotting all variables against variable A, using lapply in base R:
lapply(df_col_names, function(x) {
tiff(filename=sprintf("C:\\Documents\\%s.tiff", x),
width = 1000, height = 1000, res=200)
plot(df$A, df[[x]],
pch=19,
cex = 1.5,
ylab = x,
ylim = c(0, 20),
xlim = c(0, 20))
dev.off()
})
Below is my attempt at completing the task in ggplot2 without any success. It generates the tiff images, although they are empty.
lapply(df_col_names, function(x) {
tiff(filename=sprintf("C:\\Documents\\%s.tiff", x),
width = 1000, height = 1000, res=200)
ggplot(df) +
geom_point(data = df,
aes(x = A, y = df_col_names[[x]], size = 3)) +
geom_smooth(aes(x = A, y = df_col_names[[x]], size = 0), method = "lm", size=0.5) +
coord_fixed(ratio = 1, xlim = c(0, 20), ylim = c(0, 20)) +
guides(size = FALSE, color = FALSE) +
theme_bw(base_size = 14)
dev.off()
})
It works for me with ggsave. Also note that you are passing string column names to ggplot so use .data to refer to actual column values.
library(ggplot2)
lapply(df_col_names, function(x) {
ggplot(df) +
geom_point( aes(x = A, y = .data[[x]], size = 3)) +
geom_smooth(aes(x = A, y = .data[[x]], size = 0), method = "lm", size=0.5) +
coord_fixed(ratio = 1, xlim = c(0, 20), ylim = c(0, 20)) +
guides(size = FALSE, color = FALSE) +
theme_bw(base_size = 14) -> plt
ggsave(sprintf("%s.tiff", x), plt)
})
I want to generate a number of plots of linear regressions (bacterial OTUs plotted against temperature) using ggplot. I want the titles of the plots to be the linear regression equation, which I am determining with a function. The code works when I make the plots individually but not when I use a for loop.
I keep getting the following error:
Error in model.frame.default(formula = taxa_list[i] ~ Temperature, data = dataframe, :
variable lengths differ (found for 'Temperature')
See below for my code. Do I need a nested for loop to make this work?
taxa_list <- c("Vibrio","Salmonella","Campylobacter","Listeria","Streptococcus","Legionella")
taxa_list <- sort(taxa_list)
for (i in seq_along(taxa_list)) {
lm_eqn <- function(dataframe) {
m <- lm(taxa_list[i] ~ Temperature, dataframe)
p <- summary(m)
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2 %.% italic(x)*","~~italic(p)~"="~p0,
list(a = format(unname(coef(m)[1]), digits = 2),
b = format(unname(coef(m)[2]), digits = 2),
r2 = format(summary(m)$r.squared, digits = 3),
p0 = format(p$coefficients[8], digits = 3)))
as.expression(eq);
}
plot <- ggplot(data = all_data, aes(x = Temperature, y = taxa_list[i], fill = taxa_list[i])) +
geom_point(data = all_data, aes(x = Temperature, y = taxa_list[i]), color = "black", size = 3) +
geom_smooth(method = "lm", size = 1, color = "black", fill = "gray") +
labs(title = lm_eqn(dataframe = all_data), subtitle = "") + xlab("Temperature") + ylab("Number of OTUs")
print(plot)
}
I tried to rewrite your code to make it more readable, efficient and maintainable. I used tidyverse choices. I believe there was an extra * x in your original eq function that I removed.
library(dplyr)
library(ggplot2)
library(purrr)
library(broom)
taxa_list <- c("Vibrio","Salmonella","Campylobacter","Listeria","Streptococcus","Legionella")
taxa_list <- sort(taxa_list)
MyFunctionNew <- function(data, bacteria, temperature)
{
my_lm <- lm(as.formula(paste(bacteria, "~", temperature)), data = data)
terms_info <- broom::tidy(my_lm)
model_info <- broom::glance(my_lm)
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2 *","~~italic(p)~"="~p0,
list(a = format(terms_info$estimate[1], digits = 2),
b = format(terms_info$estimate[2], digits = 2),
r2 = format(model_info$r.squared, digits = 3),
p0 = format(model_info$p.value, digits = 3)))
plot <- ggplot(data = data, aes_string(x = temperature, y = bacteria, fill = bacteria)) +
geom_point(size = 3, show.legend = TRUE) +
geom_smooth(method = "lm", size = 1, color = "black", fill = "gray") +
labs(title = eq, subtitle = "") + xlab("Temperature") + ylab("Number of OTUs")
return(plot)
}
MyFunctionNew(dat1, "Vibrio", "Temperature")
#> `geom_smooth()` using formula 'y ~ x'
purrr::map(taxa_list, ~ MyFunctionNew(dat1, .x, "Temperature"))
#> [[1]]
#> `geom_smooth()` using formula 'y ~ x'
Here's some made up data that should more or less be close enough
set.seed(1111)
dat1 <- data.frame(Temperature = runif(200, min = 32, max = 100),
Vibrio = rnorm(200),
Salmonella = rnorm(200),
Campylobacter = rnorm(200),
Listeria = rnorm(200),
Streptococcus = rnorm(200),
Legionella = rnorm(200)
)
I had the same issue described in this question:
R: ggplot and plotly axis margin won't change
but when I implemented the solution, I got the following error:
Warning: Ignoring unknown aesthetics: text We recommend that you use the dev version of ggplot2 with ggplotly() Install it with: devtools::install_github('hadley/ggplot2') Error in tmp[[2]] : subscript out of bounds
This code will produce this error on my machine:
library(gapminder)
library(plotly)
library(ggplot2)
lead <- rep("Fred Smith", 30)
lead <- append(lead, rep("Terry Jones", 30))
lead <- append(lead, rep("Henry Sarduci", 30))
proj_date <- seq(as.Date('2017-11-01'), as.Date('2017-11-30'), by = 'day')
proj_date <- append(proj_date, rep(proj_date, 2))
set.seed(1237)
actHrs <- runif(90, 1, 100)
cummActHrs <- cumsum(actHrs)
forHrs <- runif(90, 1, 100)
cummForHrs <- cumsum(forHrs)
df <- data.frame(Lead = lead, date_seq = proj_date,
cActHrs = cummActHrs,
cForHrs = cummForHrs)
makePlot <- function(dat=df, man_level = 'Lead') {
p <- ggplot(dat, aes_string(x='date_seq', y='cActHrs',
group = man_level,
color = man_level),
linetype = 1) +
geom_line() +
geom_line(data=df,
aes_string(x='date_seq', y = 'cForHrs',
group = man_level,
color = man_level),
linetype = 2)
p <- p + geom_point(aes(text=sprintf('%s\nManager: %s\n MTD Actual Hrs: %s\nMTD Forecasted Hrs: %s',
date_seq, Lead, round(cActHrs, 2), round(cForHrs, 2))))
p <- p + theme_classic() + ylab('Hours') + xlab('Date')
gp <- ggplotly(p, tooltip = "text") %>% layout(hovermode = "compare")
### FIX IMPLEMENTED HERE ###
gp[['x']][['layout']][['annotations']][[2]][['x']] <- -0.1
gp %>% layout(margin = list(l = 75))
return(gp)
}
## run the example
p1 <- makePlot()
Try this:
makePlot <- function(dat=df, man_level = "Lead") {
dat$var <- dat[,man_level]
dat$grp <- ""
p <- ggplot(dat, aes(x=date_seq, y=cActHrs,
group = var, color = var,
text=paste0("Manager:", date_seq,"<br>MTD Actual Hrs:", round(cActHrs, 2),
"<br>MTD Forecasted Hrs:", round(cForHrs, 2))),
linetype = 1) +
geom_line() +
geom_line(data=dat,
aes(x=date_seq, y = cForHrs,
group = var, color = var),
linetype = 2) +
geom_point() +
theme_classic() + ylab("Hours") + xlab("Date") +
scale_color_discrete(name=man_level) +
facet_wrap(~grp)
gp <- ggplotly(p, tooltip = "text")
# Set y-axis label position
gp[["x"]][["layout"]][["annotations"]][[2]][["x"]] <- -0.06
# Set legend label position
gp[["x"]][["layout"]][["annotations"]][[3]][["y"]] <- 0.93
gp <- gp %>% layout(margin = list(l = 120, b=70), hovermode = "compare")
return(gp)
}
The problem in your case is the opposite of the linked question. Your axis title is a real axis title, not an annotation. Currently there is no solution to move axis titles in any direction (see https://github.com/lleslie84/plotly.js/pull/1).
Workarounds like adding line breaks to the axis title or adding spaces to the tick labels don't work in your case.
One possible workaround would be to add an annotation with your axis title. The annotation can then be freely moved.
gp <- layout(gp, yaxis = list(title = ""),
margin = list(l = 100),
annotations = c(list(text = "Hours",
x = -0.15,
xref = "paper",
showarrow = F,
textangle = -90))
)
Complete code
library(gapminder)
library(plotly)
library(ggplot2)
lead <- rep("Fred Smith", 30)
lead <- append(lead, rep("Terry Jones", 30))
lead <- append(lead, rep("Henry Sarduci", 30))
proj_date <- seq(as.Date('2017-11-01'), as.Date('2017-11-30'), by = 'day')
proj_date <- append(proj_date, rep(proj_date, 2))
set.seed(1237)
actHrs <- runif(90, 1, 100)
cummActHrs <- cumsum(actHrs)
forHrs <- runif(90, 1, 100)
cummForHrs <- cumsum(forHrs)
df <- data.frame(Lead = lead, date_seq = proj_date,
cActHrs = cummActHrs,
cForHrs = cummForHrs)
makePlot <- function(dat=df, man_level = 'Lead') {
p <- ggplot(dat, aes_string(x='date_seq', y='cActHrs',
group = man_level,
color = man_level),
linetype = 1) +
geom_line() +
geom_line(data=df,
aes_string(x='date_seq', y = 'cForHrs',
group = man_level,
color = man_level),
linetype = 2)
p <- p + geom_point(aes(text=sprintf('%s\nManager: %s\n MTD Actual Hrs: %s\nMTD Forecasted Hrs: %s',
date_seq, Lead, round(cActHrs, 2), round(cForHrs, 2))))
p <- p + theme_classic() + ylab('Hours') + xlab('Date')
gp <- ggplotly(p, tooltip = "text") %>% layout(hovermode = "compare")
### FIX IMPLEMENTED HERE ###
gp <- layout(gp,
yaxis = list(title = ""),
margin = list(l = 100),
annotations = c(list(text = "Hours",
x = -0.15,
xref = "paper",
showarrow = F,
textangle = -90))
)
return(gp)
}
## run the example
p1 <- makePlot()
p1