Plot multipoints and a best fit line - r

I want to create one plot graph with the Roundrobin and Prediction points, without colors, where the Roundrobin and Prediction type of points are different, and it has a legend. I was want to add a best fit line for the results.
I am having trouble in adding all these features into one graph that has 2 points. I am used to Gnuplot, but I don't know how to do this with R. How I do this with R?
[1] Input data
Inputdata,Roundrobin,Prediction
1,178,188
2,159,185
3,140,175
[2] Script to generate data
no_faults_data <- read.csv("testresults.csv", header=TRUE, sep=",")
# Graph 1
plot(no_faults_data$Inputdata, no_faults_data$Roundrobin,ylim = range(c(no_faults_data$Roundrobin,no_faults_data$Prediction)),xlab="Input data size (MB)", ylab="Makespan (seconds)")
points(no_faults_data$Inputdata, no_faults_data$Prediction)
abline(no_faults_data$Inputdata, no_faults_data$Roundrobin, untf = FALSE, \dots)
abline(no_faults_data$Inputdata, no_faults_data$Prediction, untf = FALSE, \dots)
legend("top", notitle, c("Round-robin","Prediction"), fill=terrain.colors(2), horiz=TRUE)

In base R you will have to create a fitted model first:
robin <- lm(Roundrobin ~ Inputdata, data = no_faults_data)
pred <- lm(Prediction ~ Inputdata, data = no_faults_data)
plot(no_faults_data$Inputdata, no_faults_data$Roundrobin,
ylim = range(c(no_faults_data$Roundrobin,no_faults_data$Prediction)),
xlab = "Input data size (MB)", ylab = "Makespan (seconds)",
col = "green", pch = 19, cex = 1.5)
points(no_faults_data$Inputdata, no_faults_data$Prediction, pch = 22, cex = 1.5)
abline(robin, lty = 1)
abline(pred, lty = 5)
legend(1.1, 155, legend = c("Round-robin","Prediction"), pch = c(19,22), col = c("green","black"),
bty = "n", cex = 1.2)
which gives:
For further customization of the base R plot, see ?par and ?legend.
With ggplot2 you will need to reshape your data into long format:
library(reshape2)
library(ggplot2)
ggplot(melt(no_faults_data, id="Inputdata"),
aes(x=Inputdata, y=value, shape=variable, color=variable)) +
geom_point(size=4) +
geom_smooth(method = "lm", se = FALSE) +
theme_minimal()
which gives:
Used data:
no_faults_data <- read.csv(text="Inputdata,Roundrobin,Prediction
1,178,188
2,159,185
3,140,175", header=TRUE)

You should look into the ggplot2 package for plotting. Maybe not needed for the 3 points data you provided but it makes much nicer plots than the default.
df <- data.frame("Inputdata" = c(1,2,3,1,2,3), "score" = c(178,159,140,188,185,175), "scoreType" = c(rep("Roundrobin",3), rep("Prediction",3)))
p <- ggplot(data=df, aes(x=Inputdata, y=score, group=scoreType, shape = scoreType)) + geom_point(size=5)
p <- p + ggtitle("My Title")
p+stat_smooth(method="lm",se = FALSE)
Here you group by the type of score and let GG plot make the legend for you. stat_smooth is using lm here.

Related

Customizing a competing risks plot in R with package "cmprsk"

I am trying to customize a plot for competing risks using R and the package cmprsk. Specifically, I want to overwrite the default that for competing events colors are used and for different groups linetypes are used.
Here is my reproducible example:
library(ggplot2)
library(cmprsk)
library(survminer)
# some simulated data to get started
comp.risk.data <- data.frame("tfs.days" = rweibull(n = 100, shape = 1, scale = 1)*100,
"status.tfs" = c(sample(c(0,1,1,1,1,2), size=50, replace=T)),
"Typing" = sample(c("A","B","C","D"), size=50, replace=T))
# fitting a competing risks model
CR <- cuminc(ftime = comp.risk.data$tfs.days,
fstatus = comp.risk.data$status.tfs,
cencode = 0,
group = comp.risk.data$Typing)
# the default plot makes it impossible to identify the groups
ggcompetingrisks(fit = CR, multiple_panels = F, xlab = "Days", ylab = "Cumulative incidence of event",title = "Competing Risks Analysis")+
scale_color_manual(name="", values=c("blue","red"), labels=c("Tumor", "Death without tumor"))
Using ggplot_build() I managed to change the default regarding linetype and color, but I cannot find a way to add a legend.
p2 <- ggcompetingrisks(fit = CR, multiple_panels = FALSE, xlab = "Days", ylab = "Cumulative incidence of event",title = "Death by TCR", ylim = c(0, 1)) +
scale_color_manual(name="", values=c("blue","red"), labels=c("Tumor", "Death without tumor"))
q <- ggplot_build(p2)
q$data[[1]]$colour2 <- ifelse(q$data[[1]]$linetype=="solid","blue", ifelse(q$data[[1]]$linetype==22,"red", ifelse(q$data[[1]]$linetype==42,"green", ifelse(q$data[[1]]$linetype==44,"black", NA))))
q$data[[1]]$linetype <- ifelse(q$data[[1]]$colour=="blue","solid", ifelse(q$data[[1]]$colour=="red","dashed", NA))
q$data[[1]]$colour <- q$data[[1]]$colour2
q$plot <- q$plot + ggtitle("Competing Risks Analysis") + guides(col = guide_legend()) + theme(legend.position = "right")
p2 <- ggplot_gtable(q)
plot(p2)
Does anyone know how to add the legend to a plot manipulated by ggplot_build()? Or an alternative way to plot the competing risks such that color indicated group and linetype indicates event?
You don't need to go down the ggplot_build route. The function ggcompetingrisks returns a ggplot object, which itself contains the aesthetic mappings. You can overwrite these with aes:
p <- ggcompetingrisks(fit = CR,
multiple_panels = F,
xlab = "Days",
ylab = "Cumulative incidence of event",
title = "Competing Risks Analysis")
p$mapping <- aes(x = time, y = est, colour = group, linetype = event)
Now we have reversed the linetype and color aesthetic mappings, we just need to swap the legend labels and we're good to go:
p + labs(linetype = "event", colour = "group")
Note that you can also add color scales, themes, coordinate transforms to p like any other ggplot object.

R - Contour plot from raster dataset with country borders overlaid

I have a fairly simple and probably common task, plotting a raster dataset with countour lines and adding country borders together in one plot, however I did not find a solution anywhere. There are a a few hints available (such as this one), but no raster dataset is used there and I can't get it to work.
The dataset I am using is actually in netcdf format and available here (15mb in size) and contains about 40 years of gridded precipitation data.
Here is my line of code:
setwd("...netcdf Data/GPCP")
library("raster")
library("maps")
nc_brick79_17 <- brick("precip.mon.mean.nc") # load in the ncdf data as a
raster brick
newextent <- c(85, 125, -20, 20) # specify region of interest
SEA_brick <- crop(nc_brick79_17, newextent) # crop the region
day1 <- SEA_brick[[1]] # select very first day as example
colfunc<-colorRampPalette(c("white","lightblue","yellow","red","purple")) # colorscale for plotting
So it works of course when I just plot the raster data together with a map overlaid:
plot(day1, col=(colfunc(100)), interpolate=F, main="day1",legend.args=list(text='mm/hr', side=4,font=1, line=2.5, cex=1.1))
map("world", add=TRUE, lwd=0.5, interior = FALSE, col = "black")
We get this plot (Raster Plot with country borders added)
Now the code I use to generate the contour plot is the following:
filledContour(day1,zlim=c(0,20),color=colorRampPalette(c("white","lightblue","yellow","red","purple")),
xlab = "Longitude (°)", ylab = "Latitude (°)")
map("world", add=TRUE, lwd=0.5, interior = FALSE, col = "black") # add map overlay
I end up with a plot where obviously the country borders do not align and are even covering the colorbar.
Contour plot with map overlay shifted
In this last part I am trying to add the country boundaries to the contour plot, but it does not work, even though it should I assume. The map is simply not there, no error though:
filledContour(day1, zlim=c(0,20),
color.palette = colorRampPalette(c("white","lightblue","yellow","red","purple")),
xlab = "Longitude (°)", ylab = "Latitude (°)",
xlim = c(90, 120), ylim = c(-20, 20), nlevels = 25,
plot.axes = {axis(1); axis(2);
map('world', xlim = c(90, 120), ylim = c(-20, 20), add = TRUE, lwd=0.5, col = "black")})
From that line of code I get this plot.
Contour plot but no country borders added
What could I improve or is there any mistake somewhere? Thank you!
I chose to use ggplot here. I leave two maps for you. The first one is the one you created. This is a replication with ggplot. The second one is the one you could not produce. There are many things to explain. But I am afraid I do not have enough time to write all. But I left some comments in my code below. Please check this question to learn more about the second graphic. Finally, I'd like to give credit to hrbrmstr who wrote a great answer in the linked question.
library(maptools)
library(akima)
library(raster)
library(ggplot2)
# This is a data set from the maptools package
data(wrld_simpl)
# Create a data.frame object for ggplot. ggplot requires a data frame.
mymap <- fortify(wrld_simpl)
# This part is your code.
nc_brick79_17 <- brick("precip.mon.mean.nc")
newextent <- c(85, 125, -20, 20)
SEA_brick <- crop(nc_brick79_17, newextent)
day1 <- SEA_brick[[1]]
# Create a data frame with a raster object. This is a spatial class
# data frame, not a regular data frame. Then, convert it to a data frame.
spdf <- as(day1, "SpatialPixelsDataFrame")
mydf <- as.data.frame(spdf)
colnames(mydf) <- c("value", "x", "y")
# This part creates the first graphic that you drew. You draw a map.
# Then, you add tiles on it. Then, you add colors as you wish.
# Since we have a world map data set, we trim it at the end.
ggplot() +
geom_map(data = mymap, map = mymap, aes(x = long, y = lat, map_id = id), fill = "white", color = "black") +
geom_tile(data = mydf, aes(x = x, y = y, fill = value), alpha = 0.4) +
scale_fill_gradientn(colors = c("white", "lightblue", "yellow", "red", "purple")) +
scale_x_continuous(limits = c(85, 125), expand = c(0, 0)) +
scale_y_continuous(limits = c( -20, 20), expand = c(0, 0)) +
coord_equal()
ggplot version of filled.contour()
# As I mentioned above, you want to study the linked question for this part.
mydf2 <- with(mydf, interp(x = x,
y = y,
z = value,
xo = seq(min(x), max(x), length = 400),
duplicate = "mean"))
gdat <- interp2xyz(mydf2, data.frame = TRUE)
# You need to draw countries as lines here. You gotta do that after you draw
# the contours. Otherwise, you will not see the map.
ggplot(data = gdat, aes(x = x, y = y, z = z)) +
geom_tile(aes(fill = z)) +
stat_contour(aes(fill = ..level..), geom = "polygon", binwidth = 0.007) +
geom_contour(color = "white") +
geom_path(data = mymap, aes(x = long, y = lat, group = group), inherit.aes = FALSE) +
scale_x_continuous(limits = c(85, 125), expand = c(0, 0)) +
scale_y_continuous(limits = c(-20, 20), expand = c(0, 0)) +
scale_fill_gradientn(colors = c("white", "lightblue", "yellow", "red", "purple")) +
coord_equal() +
theme_bw()

Change loadings (arrows) length in PCA plot using ggplot2/ggfortify?

I have been struggling with rescaling the loadings (arrows) length in a ggplot2/ggfortify PCA. I have looked around extensively for an answer to this, and the only information I have found either code new biplot functions or refer to other entirely different packages for PCA (ggbiplot, factoextra), neither of which address the question I would like to answer:
Is it possible to scale/change size of PCA loadings in ggfortify?
Below is the code I have to plot a PCA using stock R functions as well as the code to plot a PCA using autoplot/ggfortify. You'll notice in the stock R plots I can scale the loads by simply multiplying by a scalar (*20 here) so my arrows aren't cramped in the middle of the PCA plot. Using autoplot...not so much. What am I missing? I'll move to another package if necessary but would really like to have a better understanding of ggfortify.
On other sites I have found, the graph axes limits never seem to exceed +/- 2. My graph goes +/- 20, and the loadings sit staunchly near 0, presumably at the same scale as graphs with smaller axes. I would still like to plot PCA using ggplot2, but if ggfortify won't do it then I need to find another package that will.
#load data geology rocks frame
georoc <- read.csv("http://people.ucsc.edu/~mclapham/earth125/data/georoc.csv")
#load libraries
library(ggplot2)
library(ggfortify)
geo.na <- na.omit(georoc) #remove NA values
geo_matrix <- as.matrix(geo.na[,3:29]) #create matrix of continuous data in data frame
pca.res <- prcomp(geo_matrix, scale = T) #perform PCA using correlation matrix (scale = T)
summary(pca.res) #return summary of PCA
#plotting in stock R
plot(pca.res$x, col = c("salmon","olivedrab","cadetblue3","purple")[geo.na$rock.type], pch = 16, cex = 0.2)
#make legend
legend("topleft", c("Andesite","Basalt","Dacite","Rhyolite"),
col = c("salmon","olivedrab","cadetblue3","purple"), pch = 16, bty = "n")
#add loadings and text
arrows(0, 0, pca.res$rotation[,1]*20, pca.res$rotation[,2]*20, length = 0.1)
text(pca.res$rotation[,1]*22, pca.res$rotation[,2]*22, rownames(pca.res$rotation), cex = 0.7)
#plotting PCA
autoplot(pca.res, data = geo.na, colour = "rock.type", #plot results, name using original data frame
loadings = T, loadings.colour = "black", loadings.label = T,
loadings.label.colour = "black")
The data comes from an online file from a class I'm taking, so you could just copy this if you have the ggplot2 and ggfortify packages installed. Graphs below.
R plot of what I want ggplot to look like
What ggplot actually looks like
Edit:
Adding reproducible code below.
iris.res <-
iris %>%
select(Sepal.Length:Petal.Width) %>%
as.matrix(.) %>%
prcomp(., scale = F)
autoplot(iris.res, data = iris, size = 4, col = "Species", shape = "Species",
x = 1, y = 2, #components 1 and 2
loadings = T, loadings.colour = "grey50", loadings.label = T,
loadings.label.colour = "grey50", loadings.label.repel = T) + #loadings are arrows
geom_vline(xintercept = 0, lty = 2) +
geom_hline(yintercept = 0, lty = 2) +
theme(aspect.ratio = 1) +
theme_bw()
This answer is probably long after the OP needs it, but I'm offering it because I have been wrestling with the same issue for a while, and maybe I can save someone else the same effort.
# Load data
iris <- data.frame(iris)
# Do PCA
PCA <- prcomp(iris[,1:4])
# Extract PC axes for plotting
PCAvalues <- data.frame(Species = iris$Species, PCA$x)
# Extract loadings of the variables
PCAloadings <- data.frame(Variables = rownames(PCA$rotation), PCA$rotation)
# Plot
ggplot(PCAvalues, aes(x = PC1, y = PC2, colour = Species)) +
geom_segment(data = PCAloadings, aes(x = 0, y = 0, xend = (PC1*5),
yend = (PC2*5)), arrow = arrow(length = unit(1/2, "picas")),
color = "black") +
geom_point(size = 3) +
annotate("text", x = (PCAloadings$PC1*5), y = (PCAloadings$PC2*5),
label = PCAloadings$Variables)
In order to increase the arrow length, multiply the loadings for the xend and yend in the geom_segment call. With a bit of trial and effort, can work out what number to use.
To place the labels in the correct place, multiply the PC axes by the same value in the annotate call.

How to superimpose bar plots in R?

I'm trying to create a figure similar to the one below (taken from Ro, Russell, & Lavie, 2001). In their graph, they are plotting bars for the errors (i.e., accuracy) within the reaction time bars. Basically, what I am looking for is a way to plot bars within bars.
I know there are several challenges with creating a graph like this. First, Hadley points out that it is not possible to create a graph with two scales in ggplot2 because those graphs are fundamentally flawed (see Plot with 2 y axes, one y axis on the left, and another y axis on the right)
Nonetheless, the graph with superimposed bars seems to solve this dual sclaing problem, and I'm trying to figure out a way to create it in R. Any help would be appreciated.
It's fairly easy in base R, by using par(new = T) to add to an existing graph
set.seed(54321) # for reproducibility
data.1 <- sample(1000:2000, 10)
data.2 <- sample(seq(0, 5, 0.1), 10)
# Use xpd = F to avoid plotting the bars below the axis
barplot(data.1, las = 1, col = "black", ylim = c(500, 3000), xpd = F)
par(new = T)
# Plot the new data with a different ylim, but don't plot the axis
barplot(data.2, las = 1, col = "white", ylim = c(0, 30), yaxt = "n")
# Add the axis on the right
axis(4, las = 1)
It is pretty easy to make the bars in ggplot. Here is some example code. No two y-axes though (although look here for a way to do that too).
library(ggplot2)
data.1 <- sample(1000:2000, 10)
data.2 <- sample(500:1000, 10)
library(ggplot2)
ggplot(mapping = aes(x, y)) +
geom_bar(data = data.frame(x = 1:10, y = data.1), width = 0.8, stat = 'identity') +
geom_bar(data = data.frame(x = 1:10, y = data.2), width = 0.4, stat = 'identity', fill = 'white') +
theme_classic() + scale_y_continuous(expand = c(0, 0))

Global legend using grid.arrange (gridExtra) and lattice based plots

I am producing four plots using xyplot (lattice) and further combine them with grid.arrange (gridExtra).
I would like to obtain a graph with a common global legend. The closest that I have reached is the following. They have to be in a matrix layout, otherwise an option would be to put them in a column and include only a legend for the top or bottom one.
# Load packages
require(lattice)
require(gridExtra)
# Generate some values
x1<-rnorm(100,10,4)
x2<-rnorm(100,10,4)
x3<-rnorm(100,10,4)
x4<-rnorm(100,10,4)
y<-rnorm(100,10,1)
cond<-rbinom(100,1,0.5)
groups<-sample(c(0:10),100,replace=TRUE)
dataa<-data.frame(y,x1,x2,x3,x4,cond,groups)
# ploting function
plott<-function(x){
xyplot(y~x|cond,groups=groups,
col = gray(seq(0.01,0.7,length=length(levels(as.factor(groups))))),
pch = 1:length(levels(as.factor(groups))),
key = list(space="top",
text = list(as.character(levels(as.factor(groups)))),
points = TRUE, lines = TRUE, columns = 3,
pch = 1:length(levels(as.factor(groups))),
col = gray(seq(0.01,0.7,length=length(levels(as.factor(groups))))),
cex=1))
}
plot1<-plott(x=x1)
plot2<-plott(x=x2)
plot3<-plott(x=x3)
plot4<-plott(x=x4)
grid.arrange(plot1,plot2,plot2,plot4,ncol=2)
In a similar post, I have seen that it can be performed with the use of ggplot2 e.g. here and here but is there a way to include a global common legend using gridExtra and a lattice based plot e.g. xyplot?
Thank you.
One possible solution is to use ggplot, hinted here.
my.cols <- 1:3
my.grid.layout <- rbind(c(1,2),
c(3,3))
g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)
}
legend.plot <- ggplot(iris, aes(x=Petal.Length, y=Sepal.Width,colour=Species)) +
geom_line(size=1) + # legend should show lines, not points or rects ...
theme(legend.position="right", legend.background = element_rect(colour = "black"),
legend.key = element_rect(fill = "white")) + # position, box and background colour of legend
scale_color_manual(values=my.cols, name = "Categories") + # manually insert colours as used in corresponding xyplot
guides(colour = guide_legend(reverse=T)) # inverts order of colours in legend
mylegend <- g_legend(legend.plot)
plot1 <- xyplot(Sepal.Width ~ Petal.Length, groups = Species, data = iris, type = 'l',
par.settings = simpleTheme(col=my.cols))
plot2 <- xyplot(Sepal.Length ~ Petal.Length, groups = Species, data = iris, type = 'l',
par.settings = simpleTheme(col=my.cols))
grid.arrange(plot1,plot2,mylegend,layout_matrix=my.grid.layout,
top=textGrob(gp=gpar(col='black',fontsize=20),"Some useless example"))
I managed to produce something more close to what I first imagined. For that I am including an extra graphical element and I am using the layout_matrix option in grid.arrange to minimize its effect. That way I am keeping the legend and almost exclude the plot.
# Load packages
require(lattice)
require(gridExtra)
# Generate some values
x1<-rnorm(100,10,4)
x2<-rnorm(100,10,4)
x3<-rnorm(100,10,4)
x4<-rnorm(100,10,4)
y<-rnorm(100,10,1)
cond<-rbinom(100,1,0.5)
groups<-sample(c(0:10),100,replace=TRUE)
dataa<-data.frame(y,x1,x2,x3,x4,cond,groups)
# ploting function
plottNolegend<-function(x){
xyplot(y~x|cond,groups=groups,
col = gray(seq(0.01,0.7,length=length(levels(as.factor(groups))))),
pch = 1:length(levels(as.factor(groups)))
)
}
plott<-function(x){
xyplot(y~x|cond,groups=groups,
col = gray(seq(0.01,0.7,length=length(levels(as.factor(groups))))),
pch = 1:length(levels(as.factor(groups))),
key = list(space="top",
text = list(as.character(levels(as.factor(groups)))),
points = TRUE, lines = TRUE, columns = 3,
pch = 1:length(levels(as.factor(groups))),
col = gray(seq(0.01,0.7,length=length(levels(as.factor(groups))))),
cex=1))
}
plot1<-plottNolegend(x=x1)
plot2<-plottNolegend(x=x2)
plot3<-plottNolegend(x=x3)
plot4<-plottNolegend(x=x4)
legend<-plott(x=x4)
lay <- rbind(c(1,2),
c(1,2),
c(3,4),
c(3,4),
c(5,5))
grid.arrange(plot1,plot2,plot2,plot4,legend, layout_matrix = lay)
Updated: The answer was much simpler than I expected. Thank you all for your help.
# Load packages
require(lattice)
require(gridExtra)
require(grid)
# Generate some values
x1<-rnorm(100,10,4)
x2<-rnorm(100,10,4)
x3<-rnorm(100,10,4)
x4<-rnorm(100,10,4)
y<-rnorm(100,10,1)
cond<-rbinom(100,1,0.5)
groups<-sample(c(0:10),100,replace=TRUE)
dataa<-data.frame(y,x1,x2,x3,x4,cond,groups)
# ploting function
plott<-function(x){
xyplot(y~x|cond,groups=groups,
col = gray(seq(0.01,0.7,length=length(levels(as.factor(groups))))),
pch = 1:length(levels(as.factor(groups))),
key = NULL)
}
plot1<-plott(x=x1)
plot2<-plott(x=x2)
plot3<-plott(x=x3)
plot4<-plott(x=x4)
grid.arrange(plot1,plot2,plot2,plot4,ncol=2)
KeyA<-list(space="top",
text = list(as.character(levels(as.factor(groups)))),
points = TRUE, lines = TRUE, columns = 11,
pch = 1:length(levels(as.factor(groups))),
col = gray(seq(0.01,0.7,length=length(levels(as.factor(groups))))),
cex=1)
draw.key(KeyA, draw = TRUE, vp =
viewport(.50, .99))
I think the better solution is to use c.trellis from latticeExtra:
library(latticeExtra)
c(plot1, plot2, plot3, plot4)

Resources