exporting to powerpoint using a function to create graphs - r

I’m currently trying to export multiple graphs into the same Powerpoint presentation in R. The multiple graphs are created using a function.
However, when I run the code below it produces a separate Powerpoint for each of the variables (I want them in the same one for each of Calc_Commissiona and CalcCommission_Perc), Age_Banded, InstalmentsRequestedInd and NetPrem_Banded. This is because the ggsave just looks at the last plot I’m assuming.
Any ideas?
Also, the CreateGraph function is just producing the graph for CalcCommission Perc. Both CalcCommission and CalcCommission_Perc work independently when the other is removed…
require(ggplot2)
require(RDCOMClient)
require(R2PPT)
date <- "20160401"
CalcCommission <- function(Variable,FName,AxisAngle){
Actual_Commission <- tapply(Converted_A$Commission,Converted_A[Variable],mean)
Predicted_Commission <- tapply(Final_cut$Commission_Response*Final_cut$Origination.Demand,Final_cut[Variable],sum)/tapply(Final_cut$Origination.Demand,Final_cut[Variable],sum)/100
Data <- data.frame(x=names(Actual_Commission),Actual_Commission,Predicted_Commission)
Commission_Plot <- ggplot(Data,aes(x=seq(length(unique(x))))) +
geom_line(aes(y=Actual_Commission, colour = "Actual Commission")) +
geom_line(aes(y=Predicted_Commission, colour = "Predicted Commission")) +
scale_x_continuous(name = FName,
breaks = seq(length(unique(Data$x))),
labels = unique(Data$x)) +
scale_y_continuous(name = "Commission £") +
ggtitle("Commission £") +
theme(legend.title=element_blank(),axis.text.x = element_text(angle = AxisAngle, hjust = 1))
mypres <- PPT.Init(method="RDCOMClient")
mypres<-PPT.AddTitleSlide(mypres,title="Commission £",subtitle=date)
ggsave(my_temp_file<-paste(tempfile(),".wmf",sep=""), plot=Commission_Plot)
mypres <- PPT.AddBlankSlide(mypres)
mypres <- PPT.AddGraphicstoSlide(mypres,file=my_temp_file)
unlink(my_temp_file)
}
CalcCommission_Perc <- function(Variable,FName,AxisAngle){
Actual_Commission_Perc <- tapply((Converted_A$Commission/Converted_A$NetPremium)*100,Converted_A[Variable],mean)
Predicted_Commission_Perc <- (((tapply(Final_cut$Commission_Response*Final_cut$Origination.Demand,Final_cut[Variable],sum)/tapply(Final_cut$Origination.Demand,Final_cut[Variable],sum))/100)/
(tapply(Final_cut$Prem_Net*Final_cut$Origination.Demand,Final_cut[Variable],sum)/tapply(Final_cut$Origination.Demand,Final_cut[Variable],sum)))*100
Data <- data.frame(x=names(Actual_Commission_Perc),Actual_Commission_Perc,Predicted_Commission_Perc)
Commission_Perc_Plot <- ggplot(Data,aes(x=seq(length(unique(x))))) +
geom_line(aes(y=Actual_Commission_Perc, colour = "Actual Commission %")) +
geom_line(aes(y=Predicted_Commission_Perc, colour = "Predicted Commission %")) +
scale_x_continuous(name = FName,
breaks = seq(length(unique(Data$x))),
labels = unique(Data$x)) +
scale_y_continuous(name = "Commission £") +
ggtitle("Commission %") +
theme(legend.title=element_blank(),axis.text.x = element_text(angle = AxisAngle, hjust = 1))
mypres <- PPT.Init(method="RDCOMClient")
mypres<-PPT.AddTitleSlide(mypres,title="Commission %",subtitle=date)
ggsave(my_temp_file<-paste(tempfile(),".wmf",sep=""), plot=Commission_Perc_Plot)
mypres <- PPT.AddBlankSlide(mypres)
mypres <- PPT.AddGraphicstoSlide(mypres,file=my_temp_file)
unlink(my_temp_file)
}
CreateGraph <- function(Variable,FName,AxisAngle){
CalcCommission(Variable,FName,AxisAngle)
CalcCommission_Perc(Variable,FName,AxisAngle)
}
CreateGraph("Age_Banded","Age",0)
CreateGraph("InstalmentsRequestedInd","DD Payment",0)
CreateGraph("NetPrem_Banded","Net Premium",45)

Here's one way to save two plots in one pptx file:
library(ReporteRs)
library(ggplot2)
library(magrittr)
pptx() %>%
addSlide("Title and Content") %>%
addTitle("plot #1") %>%
addPlot(function() barplot( 1:8, col = 1:8 )) %>%
addSlide("Title and Content") %>%
addTitle("plot #2") %>%
addPlot(fun = print, x = qplot(Sepal.Length, Petal.Length, data = iris, color = Species, size = Petal.Width, alpha = I(0.7) )) %>%
writeDoc(ppfn <<- tempfile(fileext = ".pptx"))
ppfn contains the PowerPoint file name including its path. Check out the package documentation here.

Answer above is outdated, as ReporteRs has been removed from CRAN and is superseded by officer. I just made a new package export built on top of officer that easily allows one to export several graphs to a single Powerpoint presentation using the graph2ppt() command and the append=TRUE option, e.g. to produce a presentation with 2 slides :
install.packages("export")
library(export)
library(ggplot2)
qplot(Sepal.Length, Petal.Length, data = iris, color = Species,
size = Petal.Width, alpha = I(0.7))
graph2ppt(file="plots.pptx", width=6, height=5)
qplot(Sepal.Width, Petal.Length, data = iris, color = Species,
size = Petal.Width, alpha = I(0.7))
graph2ppt(file="plots.pptx", width=6, height=5, append=TRUE)

Related

Retaining editable text in exported vector formats in R/ggplot

It seems like there has to be a way to retain editable text when exporting graphics from R into a vector format (e.g., pdf, eps, svg), but I have not been able to locate it. My graphics are primarily created using ggplot2, and I am running R via RStudio on Windows.
#generate a plot
set.seed(1)
df <- data.frame(
gp = factor(rep(letters[1:3], each = 10)),
y = rnorm(30)
)
ds <- do.call(rbind, lapply(split(df, df$gp), function(d) {
data.frame(mean = mean(d$y), sd = sd(d$y), gp = d$gp)
}))
ggplot(df, aes(gp, y)) +
geom_point() +
geom_point(data = ds, aes(y = mean), colour = 'red', size = 3)
#export
ggsave("plot.eps")
ggsave("plot.pdf")
ggsave("plot_cairo.pdf", device=cairo_pdf)
ggsave("plot.svg")
All of these options generate a vector file with text (axis labels, etc) converted to outlines, which are no longer editable as text - which defeats a major point of the vector format, at least for my use case.
Ok, so typical use cases, the svglite library will retain text - see plot 1 export below. If you put two plots together using the patchwork library, the text is converted to outlines and no longer retained as editable text.
set.seed(1)
df <- data.frame(
gp = factor(rep(letters[1:3], each = 10)),
y = rnorm(30)
)
ds <- do.call(rbind, lapply(split(df, df$gp), function(d) {
data.frame(mean = mean(d$y), sd = sd(d$y), gp = d$gp)
}))
p1<-ggplot(df, aes(gp, y)) +
geom_point() +
geom_point(data = ds, aes(y = mean), colour = 'red', size = 3)
p2<-ggplot(df, aes(gp, y)) +
geom_point() +
geom_point(data = ds, aes(y = mean), colour = 'green', size = 3)
library(patchwork)
p3 <- p1|p2
ggsave(plot = p1, "p1.svg", device = svglite)
ggsave(plot = p3, "p3.svg", device = svglite)

how do i combine multiple data sources in ggplot using split and sapply?

this question is linked to a previous one answered by #Rui Barradas and #Duck, but i need more help. Previous link here:
how do i vectorise (automate) plot creation in R
Basically, I need to combine 3 datasets into one plot with a secondary y axis. All datasets need to be split by SITENAME and will facet wrap by Sampling.Year. I am using split and sapply. Being facet wrap the plots look something like this:
However, i'm now trying to add the two other data sources into the plots, to look something like this:
But i am struggling to add the two other data sources and get them to split by SITENAME. Her is my code so far...
Record plot format as a function to be applied to a split list df (ideally 'df' would be added as geom_line with a secondary y axis, and 'FF_start_dates' will be added as a vertical dashed line):
SITENAME_plot <- function(AllDates_TPAF){
ggplot(AllDates_TPAF, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
labs(x = "Month", y = "Total PAF (% affected)") +
scale_x_date(breaks = "1 month", labels = scales::date_format("%B")) +
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
scale_y_continuous(limits = c(0, 100), sec.axis = sec_axis(~., name = "Water level (m)")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(AllDates_TPAF$SITENAME))
}
plot write function:
SITENAME_plot_write <- function(name, g, dir = "N:/abc/"){
flname <- file.path(dir, name)
flname <- paste0(flname, ".jpg")
png(filename = flname, width = 1500, height = 1000)
print(g)
dev.off()
flname
}
Apply function to list split by SITENAME:
sp1 <- split(AllDates_TPAF, AllDates_TPAF$SITENAME)
gg_list <- sapply(sp1, SITENAME_plot, simplify = FALSE)
mapply(SITENAME_plot_write, names(gg_list), gg_list, MoreArgs = list(dir = getwd()))
dev.off()
I have uploaded samples of all 3 datasets here: Sample Data
Apologies for not using gsub but there was too much data and I couldn't get it to work properly
thanks in advance for any help you can give, even if it is just to point me towards a web tutorial of some kind.
You can try next code. I used the data you shared. Just be careful with names of all datasets. Ideally, the key columns as DATE and Sampling.Year should be present in all dataframes before making the split. Also some variables as Risk was absent so I added an example var with same name. Here the code, I added a function for the plot you want:
library(tidyverse)
library(readxl)
#Data
df1 <- read_excel('Sample data.xlsx',1)
#Create var
df1$Risk <- c(rep(c("Very Low","Low","Moderate","High","Very High"),67),"Very High")
#Other data
df2 <- read_excel('Sample data.xlsx',2)
df3 <- read_excel('Sample data.xlsx',3)
#Split 1
L1 <- split(df1,df1$SITENAME)
L2 <- split(df2,df2$SITENAME)
L3 <- split(df3,df3$`Site Name`)
#Function to create plots
myplot <- function(x,y,z)
{
#Merge x and y
#Check for duplicates and avoid column
y <- y[!duplicated(paste(y$DATE,y$Sampling.Year)),]
y$SITENAME <- NULL
xy <- merge(x,y,by.x = c('Sampling.Year','DATE'),by.y = c('Sampling.Year','DATE'),all.x=T)
#Format to dates
xy$DATE <- as.Date(xy$DATE)
#Scale factor
scaleFactor <- max(xy$Daily.Ave.PAF) / max(xy$Height)
#Rename for consistency in names
names(z)[4] <- 'DATE'
#Format date
z$DATE <- as.Date(z$DATE)
#Plot
#Plot
G <- ggplot(xy, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
scale_x_date(breaks = "1 month", labels = scales::date_format("%b %Y")) +
geom_line(aes(x=DATE,y=Height*scaleFactor))+
scale_y_continuous(name="Total PAF (% affected)", sec.axis=sec_axis(~./scaleFactor, name="Water level (m)"))+
labs(x = "Month") +
geom_vline(data = z,aes(xintercept = DATE),linetype="dashed")+
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(xy$SITENAME))
return(G)
}
#Create a list of plots
Lplots <- mapply(FUN = myplot,x=L1,y=L2,z=L3,SIMPLIFY = FALSE)
#Now format names
vnames <- paste0(names(Lplots),'.png')
mapply(ggsave, Lplots,filename = vnames,width = 30,units = 'cm')
You will end up with plots like these saved in your dir:
Some dashed lines do not appear in plots because they were not present in the data you provided.

The mpg dataset in R

I am trying to figure out a way to color my point on a geom_point plot based upon the type of transmission, but in the mpg dataset, the trans column has different names for auto and manual trans. How can I rename the values in the trans column to be either Auto for automatic and Manual for manual transmissions? I also attached a picture of the desired graph for reference.
Here is my main plot code:
data <- mpg
n <- nrow(mpg)
mpg_auto <- subset(mpg, substring(trans[1:n],1,1)=="a")
mpg_manual <- subset(mpg, substring(trans[1:n],1,1)=="m")
mpg$trans <- factor(mpg$trans, levels=c(mpg_auto,mpg_manual),
labels = c("0","1"))
mpg_select <- subset(mpg, mpg$hwy > 30 & mpg$displ < 3)
mpg_select <- as.data.frame(mpg_select)
gg<- ggplot(mpg) + aes(x = displ, y = hwy) +
geom_point(aes(col = trans))+
geom_encircle(aes(x = displ, y = hwy),
data = mpg_select,
color= "darkgreen",
expand = .05,
size = 2) +
annotate_textp(x = .2, y = .9, size = 15,
label = "Efficient Vehicle", color = "darkgreen")+
labs(y = "Hwy MPG",
x = "Displacement")
ggMarginal(gg, type= "density", alpha = 0.5,
groupColour = TRUE, groupFill = TRUE)
Picture of the plot with the above code: https://ibb.co/fGMSXdn
Here's a good way to relabel the transmission (I create a new column named transmission, but you could just as easily overwrite the existing column).
mpg$transmission = ifelse(substring(mpg$trans, 1, 1) == "a", "automatic", "manual")
Now that's done, coloring is easy:
gg <- ggplot(mpg, aes(x = displ, y = hwy) +
geom_point(aes(color = transmission))+
labs(y = "Hwy MPG",
x = "Displacement")
I've left out all your non-standard ggplot stuff because I'm not sure what package(s) it's from. It doesn't seem related to your issue anyway, so you should be able to just add it back in.

How do I export multiple plots as png files from R?

I have an R script that creates multiple trend plots from tabular data. I need to export each plot as a png file. I have searched and tried (to no avail) using png(). It seems like this should be a relatively easy fix. My code is below. Could someone please offer some suggestions as to how I might solve this dilemma?
library(dplyr)
library(tidyr)
library(readr)
library(ggplot2)
library(magrittr)
library(stringi)
library(lubridate)
library(stats)
#load in datafiles
c_data <- read_csv ("C:/Projects/A_AX_tech_memo/data_analysis/AAX_data_2017_dtcts.csv")
C_data_out <-
c_data %>%
group_by(METHOD_NAME, STD_CON_LONG_NAME, SAMP_SITE_NAME, FILTERED_FLAG) %>%
ungroup() %>%
select(METHOD_NAME, STD_CON_LONG_NAME, SAMP_SITE_NAME, SAMP_DATE, STD_VALUE_RPTD, STD_ANAL_UNITS_RPTD, FILTERED_FLAG, STD_REPORTING_LIMIT, STD_REQUIRED_DETECTION_LIMIT, LAB_QUALIFIER) %>%
rename(Method = METHOD_NAME, Constit = STD_CON_LONG_NAME, Well = SAMP_SITE_NAME, Date = SAMP_DATE, Value = STD_VALUE_RPTD, Unit = STD_ANAL_UNITS_RPTD, Filtered = FILTERED_FLAG, MDL = STD_REPORTING_LIMIT, RDL = STD_REQUIRED_DETECTION_LIMIT, Flag =LAB_QUALIFIER) %>%
mutate(Date = mdy(Date))
dfs <- split(C_data_out, with(C_data_out, interaction(Well, Constit, Filtered)), drop = TRUE)
plotz <- lapply(dfs, function(x){
ggplot(data = x, aes(Date, Value)) +
geom_point(data = x, aes(color = Flag), size = 3) +
ggtitle(paste(x$Well, x$Constit, x$Filtered, sep =".")) +
ylab("ug/L or Pci/L") +
geom_smooth(method = "lm", se = FALSE, rm.na = TRUE) +
geom_hline(aes(yintercept=x$MDL, linetype="MDL"), color ="dark green", lwd=1, lty=2) +
geom_hline(aes(yintercept=x$RDL, linetype="RDL"), color ="purple", lwd=1, lty=2)
})
Here is an example of what I have tried with png:
names <- lapply(dfs, function(x){
ggtitle(paste(x$Well, x$Constit, x$Filtered, sep ="."))
})
plotz <- lapply(dfs, function(x){
mypath <- file.path("C:","plots", paste(names[i], ".png", sep = ""))
png(file=mypath)
ggplot(data = x, aes(Date, Value)) +
geom_point(data = x, aes(color = Flag), size = 3) +
ggtitle(paste(x$Well, x$Constit, x$Filtered, sep =".")) +
ylab("ug/L or Pci/L") +
geom_smooth(method = "lm", se = FALSE, rm.na = TRUE) +
geom_hline(aes(yintercept=x$MDL, linetype="MDL"), color ="dark green", lwd=1, lty=2) +
geom_hline(aes(yintercept=x$RDL, linetype="RDL"), color ="purple", lwd=1, lty=2)
dev.off()
})
This gives me this error:
Error in paste(names[i], ".png", sep = "") : object 'i' not found
Any and all help would be much appreciated.
This snippet works using the map and walk functions from purrr.
mtcars_split <-
mtcars %>%
split(.$cyl)
paths <-
paste0(names(mtcars_split),".png")
plots <-
mtcars_split %>%
map(~ ggplot(data=.,mapping = aes(y=mpg,x=wt)) + geom_point())
pwalk(list(filename=paths,plot=plots),ggsave)

How can I add annotation in ggplotly animation?

I am creating animated plotly graph for my assignment in r, where I am comparing several models with various number of observations. I would like to add annotation showing what is the RMSE of the current model - this means I would like to have text that changes together with slider. Is there any easy way how to do that?
Here is my dataset stored on GitHub. There already is created variable with RMSE: data
The base ggplot graphic is as follows:
library(tidyverse)
library(plotly)
p <- ggplot(values_predictions, aes(x = x)) +
geom_line(aes(y = preds_BLR, frame = n, colour = "BLR")) +
geom_line(aes(y = preds_RLS, frame = n, colour = "RLS")) +
geom_point(aes(x = x, y = target, frame = n, colour = "target"), alpha = 0.3) +
geom_line(aes(x = x, y = sin(2 * pi * x), colour = "sin(2*pi*x)"), alpha = 0.3) +
ggtitle("Comparison of performance) +
labs(y = "predictions and targets", colour = "colours")
This is converted to plotly, and I have added an animation to the Plotly graph:
plot <- ggplotly(p) %>%
animation_opts(easing = "linear",redraw = FALSE)
plot
Thanks!
You can add annotations to a ggplot graph using the annotate function: http://ggplot2.tidyverse.org/reference/annotate.html
df <- data.frame(x = rnorm(100, mean = 10), y = rnorm(100, mean = 10))
# Build model
fit <- lm(x ~ y, data = df)
# function finds RMSE
RMSE <- function(error) { sqrt(mean(error^2)) }
library(ggplot2)
ggplot(df, aes(x, y)) +
geom_point() +
annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 2,
label = paste("RMSE", RMSE(fit$residuals)) )
There seems to be a bit of a problem converting between ggplot and plotly. However this workaround here shows a workaround which can be used:
ggplotly(plot) %>%
layout(annotations = list(x = 12, y = 13, text = paste("RMSE",
RMSE(fit$residuals)), showarrow = F))
Here's an example of adding data dependent text using the built in iris dataset with correlation as text to ggplotly.
library(plotly)
library(ggplot2)
library(dplyr)
mydata = iris %>% rename(variable1=Sepal.Length, variable2= Sepal.Width)
shift_right = 0.1 # number from 0-1 where higher = more right
shift_down = 0.02 # number from 0-1 where higher = more down
p = ggplot(mydata, aes(variable1,variable2))+
annotate(geom = "text",
label = paste0("Cor = ",as.character(round(cor.test(mydata$variable1,mydata$variable2)$estimate,2))),
x = min(mydata$variable1)+abs(shift_right*(min(mydata$variable1)-max(mydata$variable1))),
y = max(mydata$variable2)-abs(shift_down*(min(mydata$variable2)-max(mydata$variable2))), size=4)+
geom_point()
ggplotly(p) %>% style(hoverinfo = "none", traces = 1) # remove hover on text

Resources