I have an R script that creates multiple trend plots from tabular data. I need to export each plot as a png file. I have searched and tried (to no avail) using png(). It seems like this should be a relatively easy fix. My code is below. Could someone please offer some suggestions as to how I might solve this dilemma?
library(dplyr)
library(tidyr)
library(readr)
library(ggplot2)
library(magrittr)
library(stringi)
library(lubridate)
library(stats)
#load in datafiles
c_data <- read_csv ("C:/Projects/A_AX_tech_memo/data_analysis/AAX_data_2017_dtcts.csv")
C_data_out <-
c_data %>%
group_by(METHOD_NAME, STD_CON_LONG_NAME, SAMP_SITE_NAME, FILTERED_FLAG) %>%
ungroup() %>%
select(METHOD_NAME, STD_CON_LONG_NAME, SAMP_SITE_NAME, SAMP_DATE, STD_VALUE_RPTD, STD_ANAL_UNITS_RPTD, FILTERED_FLAG, STD_REPORTING_LIMIT, STD_REQUIRED_DETECTION_LIMIT, LAB_QUALIFIER) %>%
rename(Method = METHOD_NAME, Constit = STD_CON_LONG_NAME, Well = SAMP_SITE_NAME, Date = SAMP_DATE, Value = STD_VALUE_RPTD, Unit = STD_ANAL_UNITS_RPTD, Filtered = FILTERED_FLAG, MDL = STD_REPORTING_LIMIT, RDL = STD_REQUIRED_DETECTION_LIMIT, Flag =LAB_QUALIFIER) %>%
mutate(Date = mdy(Date))
dfs <- split(C_data_out, with(C_data_out, interaction(Well, Constit, Filtered)), drop = TRUE)
plotz <- lapply(dfs, function(x){
ggplot(data = x, aes(Date, Value)) +
geom_point(data = x, aes(color = Flag), size = 3) +
ggtitle(paste(x$Well, x$Constit, x$Filtered, sep =".")) +
ylab("ug/L or Pci/L") +
geom_smooth(method = "lm", se = FALSE, rm.na = TRUE) +
geom_hline(aes(yintercept=x$MDL, linetype="MDL"), color ="dark green", lwd=1, lty=2) +
geom_hline(aes(yintercept=x$RDL, linetype="RDL"), color ="purple", lwd=1, lty=2)
})
Here is an example of what I have tried with png:
names <- lapply(dfs, function(x){
ggtitle(paste(x$Well, x$Constit, x$Filtered, sep ="."))
})
plotz <- lapply(dfs, function(x){
mypath <- file.path("C:","plots", paste(names[i], ".png", sep = ""))
png(file=mypath)
ggplot(data = x, aes(Date, Value)) +
geom_point(data = x, aes(color = Flag), size = 3) +
ggtitle(paste(x$Well, x$Constit, x$Filtered, sep =".")) +
ylab("ug/L or Pci/L") +
geom_smooth(method = "lm", se = FALSE, rm.na = TRUE) +
geom_hline(aes(yintercept=x$MDL, linetype="MDL"), color ="dark green", lwd=1, lty=2) +
geom_hline(aes(yintercept=x$RDL, linetype="RDL"), color ="purple", lwd=1, lty=2)
dev.off()
})
This gives me this error:
Error in paste(names[i], ".png", sep = "") : object 'i' not found
Any and all help would be much appreciated.
This snippet works using the map and walk functions from purrr.
mtcars_split <-
mtcars %>%
split(.$cyl)
paths <-
paste0(names(mtcars_split),".png")
plots <-
mtcars_split %>%
map(~ ggplot(data=.,mapping = aes(y=mpg,x=wt)) + geom_point())
pwalk(list(filename=paths,plot=plots),ggsave)
Related
Given two monthly time series data sample from this link.
I will need to create one plot containing 3 subplots: plot1 for the original values, plot2 for month over month changes, and plot3 for year over year changes.
I'm able to draw the plot with code below, but the code is too redundant. So my question is how could achieve that in a concise way? Thanks.
library(xlsx)
library(ggplot2)
library(reshape)
library(dplyr)
library(tidyverse)
library(lubridate)
library(cowplot)
library(patchwork)
df <- read.xlsx('./sample_data.xlsx', 'Sheet1')
colnames(df)
# df
cols <- c('food_index', 'energy_index')
df <- df %>% mutate(date=as.Date(date)) %>%
mutate(across(-contains('date'), as.numeric)) %>%
mutate(date= floor_date(date, 'month')) %>%
group_by(date) %>%
summarise_at(vars(cols), funs(mean(., na.rm=TRUE))) %>%
mutate(across(cols, list(yoy = ~(. - lag(., 12))/lag(., 12)))*100) %>%
mutate(across(cols, list(mom = ~(. - lag(., 1))/lag(., 1)))*100) %>%
filter(date >= '2018-01-01' & date <= '2021-12-31') %>%
as.data.frame()
df1 <- df %>%
select(!grep('mom|yoy', names(df)))
df1_long <- melt(df1, id.vars = 'date')
plot1 <- ggplot(df1_long[!is.na(df1_long$value), ],
aes(x = date,
y = value,
col = variable)) +
geom_line(size=0.6, alpha=0.5) +
geom_point(size=1, alpha=0.8) +
labs(
x='',
y='Unit: $'
)
# MoM changes
df2 <- df %>%
select(grep('date|mom', names(df)))
df2_long <- melt(df2, id.vars = 'date')
plot2 <- ggplot(df2_long[!is.na(df2_long$value), ],
aes(x = date,
y = value,
col = variable)) +
geom_line(size=0.6, alpha=0.5) +
geom_point(size=1, alpha=0.8) +
labs(
x='',
y='Unit: %'
)
# YoY changes
df3 <- df %>%
select(grep('date|yoy', names(df)))
df3_long <- melt(df3, id.vars = 'date')
plot3 <- ggplot(df3_long[!is.na(df3_long$value), ],
aes(x = date,
y = value,
col = variable)) +
geom_line(size=0.6, alpha=0.5) +
geom_point(size=1, alpha=0.8) +
labs(
x='',
y='Unit: %'
)
plot <- plot1 + plot2 + plot3 + plot_layout(ncol=1)
# plot <- plot_grid(plot1, plot2, plot3, labels = c('Value', 'MoM', 'YoY'), label_size = 12)
plot
Out:
The expected result will be similar to the plot below (the upper plot will display the original data, the middle plot will display the mom changes data, and the lower plot will display the yoy changes data):
References:
https://waterdata.usgs.gov/blog/beyond-basic-plotting/
http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/81-ggplot2-easy-way-to-mix-multiple-graphs-on-the-same-page/
Side-by-side plots with ggplot2
Maybe this is what you are looking for? By reshaping your data to the right shape, using a plot function and e.g. purrr::map2 you could achieve your desired result without duplicating your code like so.
Using some fake random example data to mimic your true data:
library(tidyr)
library(dplyr)
library(ggplot2)
df_long <- df |>
rename(food_index_raw = food_index, energy_index_raw = energy_index) |>
pivot_longer(-date, names_to = c("variable", ".value"), names_pattern = "^(.*?_index)_(.*)$")
plot_fun <- function(x, y, ylab) {
x <- x |>
select(date, variable, value = .data[[y]]) |>
filter(!is.na(value))
ggplot(
x,
aes(
x = date,
y = value,
col = variable
)
) +
geom_line(size = 0.6, alpha = 0.5) +
geom_point(size = 1, alpha = 0.8) +
labs(
x = "",
y = ylab
)
}
yvars <- c("raw", "mom", "yoy")
ylabs <- paste0("Unit: ", c("$", "%", "%"))
plots <- purrr::map2(yvars, ylabs, plot_fun, x = df_long)
library(patchwork)
wrap_plots(plots) + plot_layout(ncol = 1)
DATA
set.seed(123)
date <- seq.POSIXt(as.POSIXct("2017-01-31"), as.POSIXct("2022-12-31"), by = "month")
food_index <- runif(length(date))
energy_index <- runif(length(date))
df <- data.frame(date, food_index, energy_index)
EDIT Adding subtitles to each plot when using patchwork is (as of the moment) a bit tricky. What I would do in this case would be to use a faceting "hack". To this end I slightly adjusted the function to take a subtitle argument and switched to purrr::pmap:
library(tidyr)
library(dplyr)
library(ggplot2)
df_long <- df |>
rename(food_index_raw = food_index, energy_index_raw = energy_index) |>
pivot_longer(-date, names_to = c("variable", ".value"), names_pattern = "^(.*?_index)_(.*)$")
plot_fun <- function(x, y, ylab, subtitle) {
x <- x |>
select(date, variable, value = .data[[y]]) |>
filter(!is.na(value))
ggplot(
x,
aes(
x = date,
y = value,
col = variable
)
) +
geom_line(size = 0.6, alpha = 0.5) +
geom_point(size = 1, alpha = 0.8) +
facet_wrap(~.env$subtitle) +
labs(
x = "",
y = ylab
) +
theme(strip.background = element_blank(), strip.text.x = element_text(hjust = 0))
}
yvars <- c("raw", "mom", "yoy")
ylabs <- paste0("Unit: ", c("$", "%", "%"))
subtitle <- c("Original", "Month-to-Month", "Year-to-Year")
plots <- purrr::pmap(list(y = yvars, ylab = ylabs, subtitle = subtitle), plot_fun, x = df_long)
library(patchwork)
wrap_plots(plots) + plot_layout(ncol = 1)
The target output is done with facets rather than stitching plots together. You could do this too if you like, but it requires reshaping your data in a different way. Which approach you take is really a matter of taste.
library(ggplot2)
library(dplyr)
yoy <- function(x) 100 * (x - lag(x, 13)) / lag(x, 12)
mom <- function(x) 100 * (x - lag(x)) / lag(x)
df %>%
mutate(date = as.Date(date, origin = "1899-12-30"),
`Actual value (Dollars).Food Index` = food_index,
`Month-on-month change (%).Food Index` = mom(food_index),
`Year-on-year change (%).Food Index` = yoy(food_index),
`Actual value (Dollars).Energy Index` = energy_index,
`Month-on-month change (%).Energy Index` = mom(energy_index),
`Year-on-year change (%).Energy Index` = yoy(energy_index)) %>%
select(-food_index, -energy_index) %>%
tidyr::pivot_longer(-1) %>%
filter(date > as.Date("2018-01-01")) %>%
tidyr::separate(name, into = c("series", "index"), sep = "\\.") %>%
ggplot(aes(date, value, color = index)) +
geom_point(na.rm = TRUE) +
geom_line() +
facet_grid(series~., scales = "free_y") +
theme_bw(base_size = 16)
Reproducible data taken from link in question
df <- structure(list(date = c(42766, 42794, 42825, 42855, 42886, 42916,
42947, 42978, 43008, 43039, 43069, 43100, 43131, 43159, 43190,
43220, 43251, 43281, 43312, 43343, 43373, 43404, 43434, 43465,
43496, 43524, 43555, 43585, 43616, 43646, 43677, 43708, 43738,
43769, 43799, 43830, 43861, 43890, 43921, 43951, 43982, 44012,
44043, 44074, 44104, 44135, 44165, 44196, 44227, 44255, 44286,
44316, 44347, 44377, 44408, 44439, 44469, 44500, 44530, 44561
), food_index = c(58.53, 61.23, 55.32, 55.34, 61.73, 56.91, 54.27,
59.08, 60.11, 66.01, 60.11, 63.41, 69.8, 72.45, 81.11, 89.64,
88.64, 88.62, 98.27, 111.11, 129.39, 140.14, 143.44, 169.21,
177.39, 163.88, 135.07, 151.28, 172.81, 143.82, 162.13, 172.22,
176.67, 179.3, 157.27, 169.12, 192.51, 194.2, 179.4, 169.1, 193.17,
174.92, 181.92, 188.41, 192.14, 203.41, 194.19, 174.3, 174.86,
182.33, 182.82, 185.36, 192.41, 195.59, 202.6, 201.51, 225.01,
243.78, 270.67, 304.57), energy_index = c(127.36, 119.87, 120.96,
112.09, 112.19, 109.24, 109.56, 106.89, 109.35, 108.35, 112.39,
117.77, 119.52, 122.24, 120.91, 125.41, 129.72, 135.25, 139.33,
148.6, 169.62, 184.23, 204.38, 198.55, 189.29, 202.47, 220.23,
240.67, 263.12, 249.74, 240.84, 243.42, 261.2, 256.76, 258.69,
277.98, 289.63, 293.46, 310.81, 318.68, 310.04, 302.17, 298.62,
260.92, 269.29, 258.84, 241.68, 224.18, 216.36, 226.57, 235.98,
253.86, 267.37, 261.99, 273.37, 280.91, 291.84, 297.88, 292.78,
289.79)), row.names = c(NA, 60L), class = "data.frame")
I would like to align the area of several plots, each of them created by separate chunks in an RMarkdown document (preferably .html) "nicely". My problem: Because of the different lengths of the y-axis texts. The plotted area doesn't overlap perfectly (A pity because my actual x-axis is months).
Setting the fig.width= and out.width= don't help here as they consider the axis text lengths.
Dummy Data chunk:
require(ggplot2)
df = expand.grid(y = LETTERS,
x = paste0('A', 1:10),
stringsAsFactors = FALSE)
set.seed(42)
df$fill = rnorm(nrow(df))
df2 = df
df2$y = unlist(lapply(lapply(df2$y, function(x) rep(x, 10)), paste0, collapse = ''))
Plot-Chunk1:
gg1 = ggplot(df, aes(y = y, x = x, fill = fill)) +
geom_tile()
gg1
Plot-Chunk2:
gg2 = ggplot(df2, aes(y = y, x = x, fill = fill)) +
geom_tile()
gg2
The plots in the RMarkdown document should look like that (red lines highlight the desired alignment):
I achieved this with the patchwork package. However, like this I can only use one chunk and not multiple.
Patchwork-Plot-Chunk:
require(patchwork)
gg1 / gg2 +
plot_annotation(tag_levels = 'A')
Edited (tidier?) solution: cowplot::align_plots
Having a bit of a play around with cowplot::align_plots, it would be possible to set a standard panel width to use across all graphs. But to do this across chunks when you're constructing each graph 'blind' to the forthcoming ones, you could create a 'template' plot with labels as wide as needed (gg_set below). Each subsequent graph would then adopt the sizing of this unused plot:
require(ggplot2)
df <- expand.grid(y = LETTERS,
x = paste0('A', 1:10),
stringsAsFactors = FALSE)
set.seed(42)
df$fill = rnorm(nrow(df))
df2 <- df
df2$y <-
unlist(lapply(lapply(df2$y, function(x)
rep(x, 5)), paste0, collapse = ''))
# df for setting max size needed - might need experimented with
dfset <- df
dfset$y <-
unlist(lapply(lapply(df$y, function(x)
rep(x, 10)), paste0, collapse = ''))
# 'template' plot
gg_set <- ggplot(dfset, aes(y = y, x = x, fill = fill)) +
geom_tile()
require(cowplot)
# Chunk 1
gg1 <- ggplot(df, aes(y = y, x = x, fill = fill)) +
geom_tile()
ggs <- align_plots(gg_set, gg1, align = "v")
# Only extracting relevant graph.
ggdraw(ggs[[2]])
# Chunk 2
gg2 <- ggplot(df2, aes(y = y, x = x, fill = fill)) +
geom_tile()
ggs <- align_plots(gg_set, gg2, align = "v")
ggdraw(ggs[[2]])
Created on 2021-12-17 by the reprex package (v2.0.1)
Untidy former solution
I've previously used an admittedly messy solution, which really just involves padding all labels with blank rows above and below to greater than the max length:
require(ggplot2)
#> Loading required package: ggplot2
df <- expand.grid(y = LETTERS,
x = paste0('A', 1:10),
stringsAsFactors = FALSE)
set.seed(42)
df$fill = rnorm(nrow(df))
df2 <- df
df2$y <-
unlist(lapply(lapply(df2$y, function(x)
rep(x, 10)), paste0, collapse = ''))
df$y <-
paste0(paste0(rep(" ", 40), collapse = ""), "\n", df$y, "\n", paste0(rep(" ", 40)))
df2$y <-
paste0(paste0(rep(" ", 40), collapse = ""), "\n", df2$y, "\n", paste0(rep(" ", 40)))
gg1 <- ggplot(df, aes(y = y, x = x, fill = fill)) +
geom_tile()
gg1
gg2 <- ggplot(df2, aes(y = y, x = x, fill = fill)) +
geom_tile()
gg2
I would hope their is a more formal solution which allows a static panel sizing, and I look forward to hearing other answers. But had used this as a quick fix!
Created on 2021-12-17 by the reprex package (v2.0.1)
The patchwork package also includes the function align_patches() which works similar to cowplot::align_plots().
gg_l = patchwork::align_patches(gg1,
gg2)
Plot-Chunk1:
gg_l[[1]]
Plot-Chunk2:
gg_l[[2]]
Data from question.
I would like to be able to plot each of "X1 by grpA", "X2 by grpA", "X3 by grpB", "X1 by grpB", "X2 by grpB", and "x3 by grpB" using ggplot2::ggplot() in conjunction with a for loop.
So far, I can get it to almost work, but the argument for the column of the grouping variable in the facet_grid() function does not resolve correctly when I try to use tidy_eval properties. It does work, however, when I type the column name explicitly, but of course, having to type the name explicitly would make it so I would not be able to dynamically change the grouping variable.
I provide the following data-set returned by the following code snippet to give context to my question:
set.seed(1)
dfr <- tibble(x1 = factor(sample(letters[1:7], 50, replace = T), levels=letters[1:7]),
x2 = factor(sample(letters[1:7], 50, replace = T), levels=letters[1:7]),
x3 = factor(sample(letters[1:7], 50, replace = T), levels=letters[1:7]),
grpA = factor(sample(c("grp1","grp2"),50, prob=c(0.3, 0.7) ,replace=T), levels = c("grp1", "grp2")),
grpB = factor(sample(c("grp1","grp2"),50, prob=c(0.6, 0.4) ,replace=T), levels = c("grp1", "grp2"))
)
head(df)
I also provide a function that creates the plotting data I need to make the grouped plots. It accepts strings as arguments for the parameters 'groupvar' and 'mainvar':
plot_data_prepr <- function(dat, groupvar, mainvar){
groupvar <- sym(groupvar)
mainvar <- sym(mainvar)
plot_data <- dat %>%
group_by(!!groupvar) %>%
count(!!mainvar, .drop = F) %>% drop_na() %>%
mutate(pct = n/sum(n),
pct2 = ifelse(n == 0, 0.005, n/sum(n)),
grp_tot = sum(n),
pct_lab = paste0(format(pct*100, digits = 1),'%'),
pct_pos = pct2 + .02)
return(plot_data)
}
here is normal usage of the function:
plot_data_prepr(dat = dfr, groupvar = "grpA", mainvar = "x1")
Now I share my for loop that fails when I try to use tidy_eval in the facet_grid() function in the context of ggplot(); the returned error = "Error in !sgvar : invalid argument type"
"FAILING EXAMPLE:"
for (i in seq_along(names(dfr)[1:3])){
mvar <- names(dfr)[i]
print(mvar)
gvar <- names(dfr[4])
print(gvar)
smvar <- sym(mvar)
sgvar <- sym(gvar)
plot <- ggplot(data=plot_data_prepr(dfr, gvar, mvar),
mapping = aes(x=!!smvar, y = pct2, fill = !!smvar)) +
geom_bar(stat = 'identity') +
ylim(0,1) +
geom_text(aes(x=!!smvar, label=pct_lab, y = pct_pos + .02)) +
facet_grid(. ~ !!sgvar) +
ggtitle(paste0(mvar," by ",gvar))
print(plot)
}
When I run the loop by explicitly typing grpA in place of !!sgvar in the facet_grid() function, it works for some reason:
"FUNCTIONING BUT NOT WHAT I WANT EXAMPLE:"
for (i in seq_along(names(dfr)[1:3])){
mvar <- names(dfr)[i]
print(mvar)
gvar <- names(dfr[4])
print(gvar)
smvar <- sym(mvar)
sgvar <- sym(gvar)
plot <- ggplot(data=plot_data_prepr(dfr, gvar, mvar),
mapping = aes(x=!!smvar, y = pct2, fill = !!smvar)) +
geom_bar(stat = 'identity') +
ylim(0,1) +
geom_text(aes(x=!!smvar, label=pct_lab, y = pct_pos + .02)) +
facet_grid(. ~ grpA) +
ggtitle(paste0(mvar," by ",gvar))
print(plot)
}
Of course, if I wanted to loop through a set of grouping variables, then needing to explicitly type each one would not allow for looping. Could someone explain why my code with the 'bang bang' operator inside facet_gric() doesn't work properly in the 'FAILING EXAMPLE' and also suggest how to remedy this error?
Thank you.
It's difficult to piece together exactly what you're looking for, since your example code has errors, unassigned variable names and pieces of code missing. However, I think you're wanting the loop to print all of the pairs of grouping variables and main variables by cycling through the names of your data frame.
So that there is no dubiety, here is a full reprex:
Load packages and create reproducible data:
library(dplyr)
library(ggplot2)
set.seed(1)
df <- tibble(x1 = factor(sample(letters[1:7], 50, replace = TRUE)),
x2 = factor(sample(letters[1:7], 50, replace = TRUE)),
x3 = factor(sample(letters[1:7], 50, replace = TRUE)),
grpA = factor(sample(c("grp1", "grp2"), 50,
prob = c(0.3, 0.7), replace=TRUE)),
grpB = factor(sample(c("grp1", "grp2"), 50,
prob = c(0.6, 0.4), replace=TRUE)))
Define data preparation function
plot_data_prepr <- function(dat, groupvar, mainvar)
{
groupvar <- sym(groupvar)
mainvar <- sym(mainvar)
plot_data <- dat %>%
group_by(!!groupvar) %>%
count(!!mainvar, .drop = F) %>% tidyr::drop_na() %>%
mutate(pct = n/sum(n),
pct2 = ifelse(n == 0, 0.005, n/sum(n)),
grp_tot = sum(n),
pct_lab = paste0(format(pct*100, digits = 1),'%'),
pct_pos = pct2 + .02)
return(plot_data)
}
Loop to create all 6 plots
for(gvar in names(df)[4:5]){
for(mvar in names(df)[1:3])
{
print(ggplot(plot_data_prepr(df, gvar, mvar),
aes(x = !!sym(mvar), y = pct2, fill = !!sym(mvar))) +
geom_bar(stat = 'identity') +
ylim(0,1) +
geom_text(aes(label=pct_lab, y = pct_pos + .02)) +
facet_grid(as.formula(paste0(".~", gvar))) +
ggtitle(paste0(mvar, " by ", gvar))
)
}
}
Output:
Created on 2020-06-30 by the reprex package (v0.3.0)
So I have been making this scatterplot using ggplot in R.
By using this code as listed below in a regular r script in RStudio I am able to produce the plot that I want to without any errors.
The problem is when I am trying to use the same code in a chunk using rmarkdown to knit to PDF.
I get an error saying: Error in check_breaks_labels(breaks, labels): object percent not found.
Any suggestions? Hope the reproducable example is ok.
library(tidyquant)
library(timetk)
library(ggplot2)
SPY <- tq_get("SPY", from = '2010-01-01',
to = "2020-04-04",
get = "stock.prices")
FXI <- tq_get("FXI", from = '2010-01-01',
to = "2020-04-04",
get = "stock.prices")
QQQ <- tq_get("QQQ", from = '2010-01-01',
to = "2020-04-04",
get = "stock.prices")
SPY_monthly_returns <- SPY %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "monthly",
col_rename = "SPY_ret")
FXI_monthly_returns <- FXI %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "monthly",
col_rename = "FXI_ret")
QQQ_monthly_returns <- QQQ %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "monthly",
col_rename = "QQQ_ret")
SPY_monthly_mean_ret <- SPY_monthly_returns %>%
select(SPY_ret) %>%
.[[1]] %>%
mean(na.rm = TRUE)
FXI_monthly_mean_ret <- FXI_monthly_returns %>%
select(FXI_ret) %>%
.[[1]] %>%
mean(na.rm = TRUE)
QQQ_monthly_mean_ret <- QQQ_monthly_returns %>%
select(QQQ_ret) %>%
.[[1]] %>%
mean(na.rm = TRUE)
SPY_monthly_sd_ret <- SPY_monthly_returns %>%
select(SPY_ret) %>%
.[[1]] %>%
sd()
FXI_monthly_sd_ret <- FXI_monthly_returns %>%
select(FXI_ret) %>%
.[[1]] %>%
sd()
QQQ_monthly_sd_ret <- QQQ_monthly_returns %>%
select(QQQ_ret) %>%
.[[1]] %>%
sd()
d <- data.frame(meanret = c(SPY_monthly_mean_ret,FXI_monthly_mean_ret,QQQ_monthly_mean_ret), sd = c(SPY_monthly_sd_ret,FXI_monthly_sd_ret,QQQ_monthly_sd_ret), names = c("SPY","FXI","QQQ"))
ggplot(d, aes(sd,meanret, color= ticker)) +
geom_point(size=1) + geom_text(aes(label=names)) +
ggtitle("Monthly Risk-Return Plot") + xlab("Volatility") +
ylab("Mean Return") + theme_bw() +
scale_y_continuous(label = percent, limits = c(0, 0.02)) +
scale_x_continuous(label = percent, limits = c(0, 0.08))
The solution was to require(scales) and after that specify "ticker" as I had not done that in my code chunk.
Both were suggested really quickly which is very helpful to me who is learning while writing my bachelor thesis. Thanks a lot!
You forgot to quote percent.
ggplot(d, aes(sd,meanret, color= ticker)) +
geom_point(size=1) + geom_text(aes(label=names)) +
ggtitle("Monthly Risk-Return Plot") + xlab("Volatility") +
ylab("Mean Return") + theme_bw() +
scale_y_continuous(label = "percent", limits = c(0, 0.02)) +
scale_x_continuous(label = "percent", limits = c(0, 0.08))
When not using quotes, ggplot is looking for an object named percent that should hold a string. Because of this, you could also do
p <- ggplot(...)
mylabel = "This is my label"
p + scale_x_continuous(label = "mylabel", limits = c(0, 0.08))
I have the following data in R:
id <- factor(seq(1:72))
initial.e <- rnorm(n=72, mean = 21.51, sd = 6.58)
initial.f <- rnorm(n = 72, mean = 20.75, sd = 3.378)
final.e <- rnorm(n = 72, mean = 19.81, sd = 7.48)
final.f <- rnorm(n = 72, mean = 19.77, sd = 5.389)
data <- data.frame(id,initial.e, initial.f, final.e, final.f)
I need to create a scatter plot with two straight trendlines for e and f, but I'm lost on how to create that. I found this article: https://sakaluk.wordpress.com/2015/08/27/6-make-it-pretty-plotting-2-way-interactions-with-ggplot2/ which I tried following, but didn't work the way I wanted.
I also tried using melt from reshape2 package, but I can't get the plots to show the way I want to - with two trendlines for e and f in the scatter plot.
datamelt <- melt(data, id = 'id')
datamelt <- datamelt %>% mutate(names = ifelse(datamelt$variable %in% c('initial.e', 'initial.f'), 'Before', 'After'))
datamelt <- datamelt %>% mutate(types = ifelse(datamelt$variable %in% c('final.e', 'final.f'), 'e', 'f'))
After this things went downhill. All the codes I tried either have some basic scatter plot with geom_smooth() or just some generic error.
EDIT
The plot should contain scatterplot containing relationship between intial.e and initial.f with a trend line, and another relationship between final.e and final.f with a trend line in the same plot.
I think what you're looking for is something like this: I haven't tested the code, but it should give you an idea
ggplot(data) +
geom_point(aes(x=initial.e, y=initial.f)) +
geom_smooth(method = "lm", se = FALSE, aes(initial.e, final.e)) +
geom_point(aes(x=final.e, y = final.f)) +
geom_smooth(method = "lm", se = FALSE, aes(final.e, final.f))
How about something like this?
data %>%
gather(k, value, -id) %>%
mutate(
state = gsub("(\\.e$|\\.f$)", "", k),
what = gsub("(initial\\.|final\\.)", "", k)) %>%
ggplot(aes(id, value, colour = what)) +
geom_line() +
facet_wrap(~ state)
Or with points
data %>%
gather(k, value, -id) %>%
mutate(
state = gsub("(\\.e$|\\.f$)", "", k),
what = gsub("(initial\\.|final\\.)", "", k)) %>%
ggplot(aes(id, value, colour = what)) +
geom_line() +
geom_point() +
facet_wrap(~ state)
Update
data %>%
gather(k, value, -id) %>%
mutate(
state = gsub("(\\.e$|\\.f$)", "", k),
what = gsub("(initial\\.|final\\.)", "", k)) %>%
select(-k) %>%
spread(state, value) %>%
ggplot(aes(x = initial, y = final, colour = what, fill = what)) +
geom_smooth(fullrange = T, method = "lm") +
geom_point()
We're showing a trend-line based on a simple linear regression lm, including confidence band (disable with se = F inside geom_smooth). You could also show a LOESS trend with method = loess inside geom_smooth. See ?geom_smooth for more details.