I've plotted multiple ROC curves with pROC and now I want to add a common legend horizontally across the bottom-centre of the plot space. I typically use ggplot and now I'm lost with base R plotting.
Here's my code:
# Set plotting parameters
par(mfrow=c(1,2), pty = "s", oma = c(4,1,1,1))
# ROC plot 1
roc(logit_A$y, logit_A$fitted.values, plot=TRUE, legacy.axes=FALSE, percent=TRUE, col="salmon", lwd=2, print.auc=TRUE)
plot.roc(logit_B$y, logit_B$fitted.values, percent=TRUE, col="goldenrod", lwd=2, print.auc=TRUE, add=TRUE, print.auc.y=40)
plot.roc(logit_C$y, logit_C$fitted.values, percent=TRUE, col="lightsteelblue", lwd=2, print.auc=TRUE, add=TRUE, print.auc.y=30)
title(main = "Model 1", line = 2.5)
# ROC plot 2
roc(logit_A2$y, logit_A2$fitted.values, plot=TRUE, legacy.axes=FALSE, percent=TRUE, col="salmon", lwd=2, print.auc=TRUE)
plot.roc(logit_B2$y, logit_B2$fitted.values, percent=TRUE, col="goldenrod", lwd=2, print.auc=TRUE, add=TRUE, print.auc.y=40)
plot.roc(logit_C2$y, logit_C2$fitted.values, percent=TRUE, col="lightsteelblue", lwd=2, print.auc=TRUE, add=TRUE, print.auc.y=30)
title(main = "Model 2", line = 2.5)
# Add legend
legend("bottom",
legend=c("A", "B", "C"),
col=c("salmon", "goldenrod", "lightsteelblue"),
lwd=4, cex =0.4, xpd = TRUE, horiz = TRUE)
How do I make it so the legend is centered along the bottom between both plots?
Do you mean something like this :
m <- matrix(c(1,2,3,3),nrow = 2,ncol = 2,byrow = TRUE)
layout(mat = m,heights = c(0.4,0.4))
for (i in 1:2){
par(mar = c(2,2,1,1))
plot(runif(5),runif(5),xlab = "",ylab = "")
}
par(mar=c(0,0,1,0))
plot(1, type = "n", axes=FALSE, xlab="", ylab="")
plot_colors <- c("blue","green","pink")
legend(x = "top",inset = 0,
legend = c("AUC:72.9%", "AUC: 71.0%", "AUC:80.0%"),
col=plot_colors, lwd=7, cex=.7, horiz = TRUE)
Related
I need help with the legend, when I run the code, it doesn't appear.
v_Treasury <- c("DGS5", "DGS10", "DGS30")
getSymbols(Symbols = v_Treasury,
src = "FRED")
I am cleaning the dataset from NA
US5Y<-DGS5["2021/2021"]
US5Y<-na.omit(US5Y)
US10Y<-DGS10["2021/2021"]
US10Y<-na.omit(US10Y)
US30Y<-DGS30["2021/2021"]
US30Y<-na.omit(US30Y)
But the plot has no legend in the graph
plot(US5Y, lwd=2, col="blue",lty = "solid" ,ylab="", xlab="",ylim=c(0.4,3),
main = "US Treasury Yields")
lines(US10Y, col="black", lwd=2, lty="dotted")
lines(US30Y, col="red", lwd=2, lty="dashed")
legend(x="topleft", legend=c("US5Y", "US10Y","US30Y"), col=c("blue", "black", "red"),
lwd=2, lty=c("solid", "dotted","dashed"))
I want to know what is wrong with the code, because the legend is not in the graph
Here is a potential solution:
library(quantmod)
# Data from https://fred.stlouisfed.org/series/DGS5
DGS5 <- read.csv("DGS5.csv")
DGS10 <- read.csv("DGS10.csv")
DGS30 <- read.csv("DGS30.csv")
# the "getSymbols" function from quantmod specifies that
# this is not an 'ordinary' plot - it is an xts 'time-series' plot
# this has implications for e.g. the legend
v_Treasury <- c("DGS5", "DGS10", "DGS30")
getSymbols(Symbols = v_Treasury,
src = "FRED")
US5Y<-DGS5["2021/2021"]
US5Y<-na.omit(US5Y)
US10Y<-DGS10["2021/2021"]
US10Y<-na.omit(US10Y)
US30Y<-DGS30["2021/2021"]
US30Y<-na.omit(US30Y)
# If you turn off clipping ("xpd=NA"), you can see where
# "legend()" is trying to put the legend ("topleft")
par(xpd=NA)
plot(US5Y, lwd=2, col="blue",lty = "solid" ,ylab="", xlab="",ylim=c(0.4,3),
main = "US Treasury Yields")
lines(US10Y, col="black", lwd=2, lty="dotted")
lines(US30Y, col="red", lwd=2, lty="dashed")
legend(x = "topleft", y = NULL, legend=c("US5Y", "US10Y","US30Y"),
col=c("blue", "black", "red"),
lwd=2, lty=c("solid", "dotted","dashed"))
# If you use the "addLegend()" function from the xts package
# then your legend goes where you want it to go
par(xpd=TRUE)
plot(US5Y, lwd=2, col="blue",lty = "solid" ,ylab="", xlab="",ylim=c(0.4,3),
main = "US Treasury Yields")
lines(US10Y, col="black", lwd=2, lty="dotted")
lines(US30Y, col="red", lwd=2, lty="dashed")
addLegend(legend.loc = "topleft", legend.names = c("US5Y", "US10Y","US30Y"),
col=c("blue", "black", "red"), fill=c("blue", "black", "red"), ncol = 1)
Here is a different solution (see the legend.loc option of plot.xts).
library(quantmod)
v_Treasury <- c("DGS5", "DGS10", "DGS30")
getSymbols(Symbols = v_Treasury, src = "FRED")
US5Y <- DGS5["2021/2021"]
US5Y <- na.omit(US5Y)
US10Y <- DGS10["2021/2021"]
US10Y <- na.omit(US10Y)
US30Y <- DGS30["2021/2021"]
US30Y <- na.omit(US30Y)
# Merge the 3 xts objects into one
USdat <- merge(US5Y, US10Y, US30Y, all=TRUE)
# Add a legend using the legend.loc option of plot.xts
plot(USdat, lwd=2,
ylab="", xlab="",ylim=c(0.4,3),
col=c("blue", "black", "red"),
lty=c("solid", "dotted","dashed"),
main = "US Treasury Yields", legend.loc="topright")
I have this line-and-dots plot:
#generate fake data
xLab <- seq(0, 50, by=5);
yLab <- c(0, sort(runif(10, 0, 1)));
#this value is fixed
fixedVal <- 27.3
#new window
dev.new();
#generate the plot
paste0(plot(xLab, yLab, col=rgb(50/255, 205/255, 50/255, 1), type="o", lwd=3,
main="a line-and-dots plot", xlab="some values", ylab="a percentage",
pch=20, xlim=c(0, 50), ylim=c(0, 1), xaxt="n", cex.lab=1.5, cex.axis=1.5,
cex.main=1.5, cex.sub=1.5));
#set axis
axis(side = 1, at=c(seq(min(xLab), max(xLab), by=5)))
#plot line
abline(v=fixedVal, col="firebrick", lwd=3, lty=1);
now, I would like to find the y coordinate of the intersection point between the green and the red lines.
Can I achieve the goal without the need of a regression line? Is there a simple way of getting the coordinates of that unknown point?
You can use approxfun to do the interpolation:
> approxfun(xLab,yLab)(fixedVal)
[1] 0.3924427
Alternatively, just use approx:
> approx(xLab,yLab,fixedVal)
$x
[1] 27.3
$y
[1] 0.3924427
Quick fix like #JohnColeman said:
# find the two points flanking your value
idx <- findInterval(fixedVal,xLab)
# calculate the deltas
y_delta <- diff(yLab[idx:(idx+1)])
x_delta <- diff(xLab[idx:(idx+1)])
# interpolate...
ycut = (y_delta/x_delta) * (fixedVal-xLab[idx]) + yLab[idx]
ycut
[1] 0.4046399
So we try it on the plot..
paste0(plot(xLab, yLab, col=rgb(50/255, 205/255, 50/255, 1), type="o", lwd=3,
main="a line-and-dots plot", xlab="some values", ylab="a percentage",
pch=20, xlim=c(0, 50), ylim=c(0, 1), xaxt="n", cex.lab=1.5, cex.axis=1.5,
cex.main=1.5, cex.sub=1.5));
#set axis
axis(side = 1, at=c(seq(min(xLab), max(xLab), by=5)))
#plot line
abline(v=fixedVal, col="firebrick", lwd=3, lty=1);
abline(h=ycut, col="lightblue", lwd=3, lty=1);
I am trying to pull charts (.png) from the data from SQL Server using ODBC in R. The code is running fine but label on y-axis is in decimals. eg. 10.0%, 20.0%, 30.0%, 40.0% and 50.0%.
I do not want this in decimal place on y-axis. Please help me to get rid of this issue.
Please see my below code and help me with this issue.
Output Chart
# produce plots for all the pids
for (i in 1:n_pids){
print(paste0('working on ', i, ' pid ', def_dates$pid_names[i]))
pidx <- def_dates[i,'pid']
sub <- res[pid == pidx]
png(file = paste0(pidx, '-misrated.png'), width = 750, height = 350, units = "px", pointsize = 12 )
par(oma = c(5, 1,1,1), mar=c(0,3,3,3))
with(sub, plot(date, edf1/100,
type="l", col="blue",
ylim=c(0,.5), bty='n',
xlab=NA, ylab=NA, cex=0.9,lwd=2,
axes=F,xaxs = "i",yaxs="i",
panel.first =
c(abline(h = seq(0,1,.1), lty = 2, col = 'grey')
,NULL)
))
abline(v=as.Date(def_dates[def_dates$pid == pidx,'def'],'%m-%d-%Y'),
col="red",lwd=2)
axis.Date(1, at = seq(min(sub$date), max(sub$date), length.out = 10),
format= "%b %Y") # bottom dates
var1= seq(0,0.5,.1)#pretty_breaks(n=5)(sub$edf1/100)
axis(2, lwd=0,
at=var1,
label=percent(var1),
las=1) # EDF values
par(new=T)
with(sub, plot(date, displaystring, type="l",
col="darkgreen", lwd=2,
axes=F, xlab=NA, ylab=NA))
axis(side = 4,
at=seq(1,NROW(unique(sub$displaystring))),
labels=unique(sub$displaystring),
lwd=0,cex=0.5,las=1,
col="darkgreen"
)
par(oma = c(0,0,0,0))
legend("bottom", legend=c("DENBURY RESOURCES INC [1-Yr EDF9]","MIS RATING"), xpd = TRUE,
horiz = TRUE,
inset = c(0,0), bty = "n",
col=c("blue", "darkgreen"), lty=1, cex = 1)
dev.off()
#ggsave(paste0(pidx, '-rated.png'), p, scale=3, height=3, width=6, unit="in")
}
I can't vouch for the rest of the code, but the only portion that has affect on the y-axis here is:
axis(2, lwd=0,
at=var1,
label=percent(var1),
las=1) # EDF values
When you find scales::percent, the help page does not appear to give an easy way to control the decimal places. I suggest we use good-ole sprintf for this.
scales::percent(var1)
# [1] "0.0%" "10.0%" "20.0%" "30.0%" "40.0%" "50.0%"
sprintf("%0.0f%%", 100*var1)
# [1] "0%" "10%" "20%" "30%" "40%" "50%"
So your resulting code will be:
axis(2, lwd=0,
at=var1,
label=sprintf("%0.0f%%", 100*var1),
las=1) # EDF values
I have several multiplots to create, where the difference between them it's just the x and y values of 3 of the four curves of the multiplot.
So one multiplot looks like this:
png(file=paste("~/Documents/plot1.png", sep=""), width=800, height=800 )
plot(xplot1, yplot1, xlab="x1", ylab="y1", type="p", pch=20, cex=0.5, col="yellow", main = "Plot1")
legend("topleft", c("Original curve","Cut curve", "Shifted curve" ), pch=20, cex=0.7, col=c("yellow", "black", "green"))
points(xcutplot1, ycutplot1, type="p", pch=20, cex=0.5, col="black")
points(xsampleplot, ysampleplot, type="p", pch=20, cex=0.05, col="red")
lm.r = lm(ysampleplot ~ xsampleplot)
abline(lm.r)
points(xcutplot1 + shiftX1, ycutplot1 + shiftY1, col="green", type = "p", pch = 20, cex = 0.5)
dev.off()
That works perfectly. My goal now is to create a function where to put the structure of the multiplot above, and then call the function depending on the x and y values of each. So something like this:
getPlot = function(xplot, yplot, xcutplot, ycutplot, shiftX, shiftY){
plot(xplot, yplot, xlab="x", ylab="y", type="p", pch=20, cex=0.5, col="yellow", main = "Plot1")
legend("topleft", c("Original curve","Cut curve", "Shifted curve" ), pch=20, cex=0.7, col=c("yellow", "black", "green"))
points(xcutplot, ycutplot, type="p", pch=20, cex=0.5, col="black")
points(xsampleplot, ysampleplot, type="p", pch=20, cex=0.05, col="red")
lm.r = lm(ysampleplot ~ xsampleplot)
abline(lm.r)
points(xcutplot + shiftX, ycutplot + shiftY, col="green", type = "p", pch = 20, cex = 0.5)
}
And then call the function for each multiplot as:
png(file=paste("~/Documents/plot1.png", sep=""), width=800, height=800 )
getPlot(xplot1, yplot1, xcutplot1, ycutplot1, shiftX1, shiftY1)
dev.off()
png(file=paste("~/Documents/plot2.png", sep=""), width=800, height=800 )
getPlot(xplot2, yplot2, xcutplot2, ycutplot2, shiftX2, shiftY2)
dev.off()
...and so on.
You see that my problem is what to put in return(), since I don't know how to put the plot within a kind of "variable" and then add the other curves to this "variable"...
If I don't put anything in return(), it returns me a plot with only the first curve.
I've not found any solution on the internet, that's why I put the question here to get more ideas...
Thank you in advanced for your help.
Here is the full solution.
Function to create several multiplots:
getPlot = function(xplot, yplot, xcutplot, ycutplot, shiftX, shiftY){
plot(xplot, yplot, xlab="x", ylab="y", type="p", pch=20, cex=0.5, col="yellow", main = "Plot1")
legend("topleft", c("Original curve","Cut curve", "Shifted curve" ), pch=20, cex=0.7, col=c("yellow", "black", "green"))
points(xcutplot, ycutplot, type="p", pch=20, cex=0.5, col="black")
points(xsampleplot, ysampleplot, type="p", pch=20, cex=0.05, col="red")
lm.r = lm(ysampleplot ~ xsampleplot)
abline(lm.r)
points(xcutplot + shiftX, ycutplot + shiftY, col="green", type = "p", pch = 20, cex = 0.5)
}
Some data to call the function:
xplot1 = c(1,2,13,4,15)
yplot1 = c(2,5,14,8,19)
xcutplot1 = c(4,5,6)
ycutplot1 = c(5,8,9)
xsampleplot = c(3,7,4,4,0,4,7,2,11,5,8)
ysampleplot = c(5,7,9,3,2,4,6,7,5,8,10)
shiftX1 = 0.5
shiftY1 = 0.8
Function call:
png(file=paste("~/Documents/plot1.png", sep=""), width=800, height=800 )
getPlot(xplot1, yplot1, xcutplot1, ycutplot1, shiftX1, shiftY1)
dev.off()
I have 3 sets of data that I am trying to plot on a single plot. The first data set x values range from ~ 1 to 1700 whereas the other two data sets x values are less than 20. Therefore I want to plot them on a log axis to show variations in all the data sets. However I do not want to transform the data as I want to be able to read the values off the graph. The x axis labels I would like are 1, 10, 100 and 1000 all equally spaced. Does anyone know how to do this? I can only find examples where the data is log as well as the axis. I have attached the code I am currently using below:
Thanks in advance for any help given.
Holly
Stats_nineteen<-read.csv('C:/Users/Holly/Documents/Software Manuals/R Stuff/Stats_nineteen.csv')
attach(Stats_nineteen)
x<-Max
x1<-Min
x2<-Max
y1<-Depth
y2<-Depth
par(bg="white")
par(xlog=TRUE)
plot(x2,y1, type="n", ylim=c(555,0), log="x", axes=FALSE, ann=FALSE)
box()
axis(3, at=c(1,10,100,1000), label=c(1,10,100,1000), pos=0, cex.axis=0.6)
axis(1, at=c(1,10,100,1000), label=c(1,10,100,1000), cex.axis=0.6)
axis(2, at=c(600,550,500,450,400,350,300,250,200,150,100,50,0), label=c
(600,"",500,"",400,"",300,"",200,"",100,"",0), cex.axis=0.6)
mtext("CLAST SIZE / mm", side=3, line=1, cex=0.6, las=0, col="black")
mtext("DEPTH / m", side=2, line=2, cex=0.6, las=0, col="black")
grid(nx = NULL, ny = NULL, col = "lightgray", lty = "solid",
lwd = par("lwd"), equilogs = TRUE)
par(new=TRUE)
lines(x1,y1, col="black", lty="solid", lwd=1)
lines(x2,y2, col="black", lty="solid", lwd=1)
polygon(c(x1,rev(x2)), c(y1,rev(y2)), col="grey", border="black")
par(new=TRUE)
plot(x=Average,y=Depth, type="o",
bg="red", cex=0.5, pch=21,
col="red", lty="solid",
axes=FALSE, xlim=c(0,1670), ylim=c(555,0),
ylab = "",xlab = "")
par(new=TRUE)
plot(x=Mode,y=Depth, type="o",
bg="blue", cex=0.5, pch=21,
col="blue", lty="solid",
axes=FALSE, xlim=c(0,1670), ylim=c(555,0),
ylab = "",xlab = "")
You can do this in ggplot using scale_x_log
so something like:
myplot <- ggplot( StatsNinetee,
aes (x = myResponse,
y = myPredictor,
groups = myGroupingVariable) ) +
geom_point() +
scale_x_log()
myplot
also, avoid attach() it can give odd behavior.