Could using grid.arrange affect plot output in R? - r

For a function that accepts a data frame, a plot switch value, and bin size vector, I'm trying to create plots for each count-bin and each density-bin size combination for every numerical variable column when a plot switch value of 'grid' is inputted. I have it working that I get individual plots for each combination, but when I tried to put the plots for each numerical variable on a grid and I'm seeing variations in the actual plots even though as far as I can tell I didn't alter the calculations nor the plot commands.
For instance, notice that, even accounting for scale variations, y-density plots are different in the images below. Tried looking at the documentation for grid.arrange and list just to make sure I wasn't missing something, but I still can't figure it out. Did I somehow change the plot command without realizing it?
Tested with:
data(diamonds, package = "ggplot2")
test <- diamonds[1:100,]
hstPlts(test,ps='grid') #without grid
hst1(test,ps='grid') #with grid
hstPlts <- function(df,ps = 'off',bnSize=c(30)){
#hstPlts() accepts any data frame and if the ps parameter is “on” or
#“grid”, then plot a pair of blue histograms with a vertical red line at the
#mean
#for every numerical variable at each number of bins integer specified in
#the bin
#vector parameter. if the plot switch is set to “grid”, there should be a
#grid for each count-bin combination and a separate grid for
#each density-bin size combination.
#parameters:
#df : data frame
#ps : plot switch defaulted to 'off' with two additional value options -
#'on' and 'grid'
#bnSize : vector containing numeric values representing number of bins for
histograms
num_var <- df[sapply(df,is.numeric)] #extract numeric columns
for(i in 1:length(bnSize)){ #iterate through each bin size
for(j in 1:(ncol(num_var))){ #iterate through each numeric variable
if(ps=='on' | ps =='grid'){ #conditional for both 'on' and 'grid'
#values
bnWid <- (max(num_var[,j]) - min(num_var[,j]))/bnSize[i] # compute
#bin widths
vrMn <- mean(num_var[[j]]) #compute column means for red line
mean = sprintf("%8.3f ", vrMn) #set up label for mean line with
#formatted decimals
cntPlt <- ggplot(num_var, aes(x=num_var[,j])) + #plot count
#histogram with numeric variable on x-axis
geom_histogram(colour = "blue", fill = "blue", binwidth =
bnWid) +
#detail bar fill colors and histogram bin widths
geom_vline(xintercept = mean(num_var[,j]),colour="red") +
#mean line
abs(x=colnames(num_var)[j]) #label x-axis
densPlt <- cntPlt + aes(y = ..density..) + labs(y = "density")
#create corresponding density histogram
print(cntPlt)
print(densPlt)
}
if(ps == 'grid'){ #conditional for just 'grid' value
grdPlt <- ggplot(num_var, aes(x=num_var[,j])) + #create plot for
#each count-bin combination and a separate grid for
#each density-bin size combination
geom_histogram(colour = "blue", fill = "blue", binwidth =
bnWid) +
#detail bar fill colors and histogram bin widths
labs(x=colnames(num_var)[j]) #label x-axis
print(grdPlt)
print(grdPlt + aes(y = ..density..) + labs(y = "density"))
}
}
}
}
hst1 <- function(df,ps = 'off',bsizes=c(30)){
numvar <- df[sapply(df,is.numeric)]
if(ps=='on')
for(i in 1:length(bsizes)){
for(j in 1:(ncol(numvar))){
bwidth <- (max(numvar[,j]) - min(numvar[,j]))/bsizes[i]
var_mean <- mean(numvar[[j]])
mean = sprintf("%8.3f ", var_mean)
counts <- ggplot(numvar, aes(x=numvar[,j])) +
geom_histogram(colour = "blue", fill = "blue", binwidth
= bwidth) +
geom_vline(xintercept = mean(numvar[,j]),colour="red") +
labs(x=colnames(numvar)[j]) # Labeling the x axis
dense <- counts + aes(y = ..density..) + labs(y = "density")
print(counts)
print(dense)
}
}
}
else if(ps == 'grid'){
for(i in 1:length(bsizes)){
cntLst <- list()
denseLst <- list()
for(j in 1:(ncol(numvar))){
bwidth <- (max(numvar[,j]) - min(numvar[,j]))/bsizes[i]
cntGrPlt <- ggplot(numvar, aes(x=numvar[,j])) +
geom_histogram(colour = "blue", fill = "blue", binwidth =
bwidth) +
labs(x=colnames(numvar)[j])
cntLst[[length(cntLst)+1]] <- cntGrPlt
denseLst[[j]] <- cntGrPlt + aes(y = ..density..) + labs(y =
"density")
}
grid.arrange(grobs=cntLst,ncol=2)
grid.arrange(grobs=denseLst,ncol=2)
}
}
}

Related

How to overlap R histograms

Reproduced from this code:
library(haven)
library(survey)
library(dplyr)
nhanesDemo <- read_xpt(url("https://wwwn.cdc.gov/Nchs/Nhanes/2015-2016/DEMO_I.XPT"))
# Rename variables into something more readable
nhanesDemo$fpl <- nhanesDemo$INDFMPIR
nhanesDemo$age <- nhanesDemo$RIDAGEYR
nhanesDemo$gender <- nhanesDemo$RIAGENDR
nhanesDemo$persWeight <- nhanesDemo$WTINT2YR
nhanesDemo$psu <- nhanesDemo$SDMVPSU
nhanesDemo$strata <- nhanesDemo$SDMVSTRA
nhanesAnalysis <- nhanesDemo %>%
mutate(LowIncome = case_when(
INDFMIN2 < 40 ~ T,
T ~ F
)) %>%
# Select the necessary columns
select(INDFMIN2, LowIncome, persWeight, psu, strata)
# Set up the design
nhanesDesign <- svydesign(id = ~psu,
strata = ~strata,
weights = ~persWeight,
nest = TRUE,
data = nhanesAnalysis)
svyhist(~log10(INDFMIN2), design=nhanesDesign, main = '')
How do I color the histogram by independent variable, say, LowIncome? I want to have two separate histograms, one for each value of LowIncome. Unfortunately I picked a bad example, but I want them to be see-through in case their values overlap.
If you want to plot a histogram from your model, you can get its data from model.frame (this is what svyhist does under the hood). To get the histogram filled by group, you could use this data frame inside ggplot:
library(ggplot2)
ggplot(model.frame(nhanesDesign), aes(log10(INDFMIN2), fill = LowIncome)) +
geom_histogram(alpha = 0.5, color = "gray60", breaks = 0:20 / 10) +
theme_classic()
Edit
As Thomas Lumley points out, this does not incorporate sampling weights, so if you wanted this you could do:
ggplot(model.frame(nhanesDesign), aes(log10(INDFMIN2), fill = LowIncome)) +
geom_histogram(aes(weight = persWeight), alpha = 0.5,
color = "gray60", breaks = 0:20 / 10) +
theme_classic()
To demonstrate this approach works, we can replicate Thomas's approach in ggplot using the data example from svyhist. To get the uneven bin sizes (if this is desired), we need two histogram layers, though I'm guessing this would not be required for most use-cases.
ggplot(model.frame(dstrat), aes(enroll)) +
geom_histogram(aes(fill = "E", weight = pw, y = after_stat(density)),
data = subset(model.frame(dstrat), stype == "E"),
breaks = 0:35 * 100,
position = "identity", col = "gray50") +
geom_histogram(aes(fill = "Not E", weight = pw, y = after_stat(density)),
data = subset(model.frame(dstrat), stype != "E"),
position = "identity", col = "gray50",
breaks = 0:7 * 500) +
scale_fill_manual(NULL, values = c("#00880020", "#88000020")) +
theme_classic()
You can't just extract the data and use ggplot, because that won't use the weights and so misses the whole point of svyhist. You can use the add=TRUE argument, though. You do need to set the x and y axis ranges correctly to make sure the whole plot is visible
Using the data example from ?svyhist
svyhist(~enroll, subset(dstrat,stype=="E"), col="#00880020",ylim=c(0,0.003),xlim=c(0,3500))
svyhist(~enroll, subset(dstrat,stype!="E"), col="#88000020",add=TRUE)

Automatic Highlighting Outliers in ggplots

I have a dataframe df. While plotting this in ggplot. Can we also highlight outliers. Below is the sample code
df <- data.frame(col=runif(100, min=0, max=100000))
df$D <- c(1:100)
ggplot(df,aes(x=D,y=col))+geom_line()
Is there the way to highlight outliers here
We can define a function for this. The line_outlier_plot has four arguments. df has the same format as your example data frame. outlier_color and normal_color are to specify the color for the points.drop indicates if we want to drop the category in the legend.
We have to define how to determine an outlier. Here, I decided that an outlier is a value larger or smaller than the mean plus or minus 3 times of the standard deviation. You can define your own approach to determine the outlier by modifying the code in the ifelse statement.
library(ggplot2)
line_outlier_plot <- function(df, outlier_color = "red", normal_color = "black", drop = FALSE){
# Assign a label to show if it is an outlier or not
df$label <- ifelse(df$col > mean(df$col) + 3 * sd(df$col) |
df$col < mean(df$col) - 3 * sd(df$col), "Outlier", "Normal")
df$label <- factor(df$label, levels = c("Normal", "Outlier"))
# Set the color palette
pal <- c("Outlier" = outlier_color, "Normal" = normal_color)
p <- ggplot(df, aes(x = D, y = col)) +
geom_line() +
geom_point(aes(color = label)) +
scale_color_manual(values = pal, drop = drop)
return(p)
}
Below is an example of the plot using this function.
set.seed(155)
df <- data.frame(col=rnorm(1000))
df$D <- c(1:1000)
line_outlier_plot(df)

R: Color in CreateRadialPlot source

I am using CreateRadialPlot.R source and trying to plot this:
var.names <- c("All Flats", "No central heating", "Rooms per\nhousehold", "People per room",
"HE Qualification", "Routine/Semi-Routine\nOccupation", "2+ Car household",
"Public Transport\nto work", "Work from home")
values.a <- c(-0.1145725, -0.1824095, -0.01153078, -0.0202474, 0.05138737, -0.1557234,
0.1099018, -0.05310315, 0.0182626)
values.b <- c(0.2808439, -0.2936949, -0.1925846, 0.08910815, -0.03468011, 0.07385727,
-0.07228813, 0.1501105, -0.06800127)
group.names <- c("Blue Collar Communities", "Prospering Suburbs")
m2 <- matrix(c(values.a, values.b), nrow = 2, ncol = 9, byrow = TRUE)
group.names <- c(group.names)
df2 <- data.frame(group = group.names, m2)
colnames(df2)[2:10] <- var.names
source("http://pcwww.liv.ac.uk/~william/Geodemographic%20Classifiability/func%20CreateRadialPlot.r")
CreateRadialPlot(df2, plot.extent.x = 1.5)
I want to change the color of the lines for "Blue Collar Communities" and for "Prospering Suburbs", but the function does not have an argument for that. Would I need to create a new function? And what if I would like to set a title for the plot?
You need to adapt the function to accept colors for geom_path. Here, I added the third line in this part of the function (and the "+" the line before):
base <- base + geom_path(data=group$path,aes(x=x,y=y,group=group,colour=group),
size=group.line.width) +
scale_color_brewer(palette = "Set1",levels(group)) #added
This is the result:
CreateRadialPlot(df2, plot.extent.x = 1.5,background.circle.colour="white")
Here's the modified function if you want it:
CreateRadialPlot <- function(plot.data,
axis.labels=colnames(plot.data)[-1],
grid.min=-0.5, #10,
grid.mid=0, #50,
grid.max=0.5, #100,
centre.y=grid.min - ((1/9)*(grid.max-grid.min)),
plot.extent.x.sf=1.2,
plot.extent.y.sf=1.2,
x.centre.range=0.02*(grid.max-centre.y),
label.centre.y=FALSE,
grid.line.width=0.5,
gridline.min.linetype="longdash",
gridline.mid.linetype="longdash",
gridline.max.linetype="longdash",
gridline.min.colour="grey",
gridline.mid.colour="blue",
gridline.max.colour="grey",
grid.label.size=4,
gridline.label.offset=-0.02*(grid.max-centre.y),
label.gridline.min=TRUE,
axis.label.offset=1.15,
axis.label.size=3,
axis.line.colour="grey",
group.line.width=1,
group.point.size=4,
background.circle.colour="yellow",
background.circle.transparency=0.2,
plot.legend=if (nrow(plot.data)>1) TRUE else FALSE,
legend.title="Cluster",
legend.text.size=grid.label.size ) {
var.names <- colnames(plot.data)[-1] #'Short version of variable names
#axis.labels [if supplied] is designed to hold 'long version' of variable names
#with line-breaks indicated using \n
#caclulate total plot extent as radius of outer circle x a user-specifiable scaling factor
plot.extent.x=(grid.max+abs(centre.y))*plot.extent.x.sf
plot.extent.y=(grid.max+abs(centre.y))*plot.extent.y.sf
#Check supplied data makes sense
if (length(axis.labels) != ncol(plot.data)-1)
return("Error: 'axis.labels' contains the wrong number of axis labels")
if(min(plot.data[,-1])<centre.y)
return("Error: plot.data' contains value(s) < centre.y")
if(max(plot.data[,-1])>grid.max)
return("Error: 'plot.data' contains value(s) > grid.max")
#Declare required internal functions
CalculateGroupPath <- function(df) {
#Converts variable values into a set of radial x-y coordinates
#Code adapted from a solution posted by Tony M to
#http://stackoverflow.com/questions/9614433/creating-radar-chart-a-k-a-star-plot-spider-plot-using-ggplot2-in-r
#Args:
# df: Col 1 - group ('unique' cluster / group ID of entity)
# Col 2-n: v1.value to vn.value - values (e.g. group/cluser mean or median) of variables v1 to v.n
path <- as.factor(as.character(df[,1]))
##find increment
angles = seq(from=0, to=2*pi, by=(2*pi)/(ncol(df)-1))
##create graph data frame
graphData= data.frame(seg="", x=0,y=0)
graphData=graphData[-1,]
for(i in levels(path)){
pathData = subset(df, df[,1]==i)
for(j in c(2:ncol(df))){
#pathData[,j]= pathData[,j]
graphData=rbind(graphData, data.frame(group=i,
x=pathData[,j]*sin(angles[j-1]),
y=pathData[,j]*cos(angles[j-1])))
}
##complete the path by repeating first pair of coords in the path
graphData=rbind(graphData, data.frame(group=i,
x=pathData[,2]*sin(angles[1]),
y=pathData[,2]*cos(angles[1])))
}
#Make sure that name of first column matches that of input data (in case !="group")
colnames(graphData)[1] <- colnames(df)[1]
graphData #data frame returned by function
}
CaclulateAxisPath = function(var.names,min,max) {
#Caculates x-y coordinates for a set of radial axes (one per variable being plotted in radar plot)
#Args:
#var.names - list of variables to be plotted on radar plot
#min - MININUM value required for the plotted axes (same value will be applied to all axes)
#max - MAXIMUM value required for the plotted axes (same value will be applied to all axes)
#var.names <- c("v1","v2","v3","v4","v5")
n.vars <- length(var.names) # number of vars (axes) required
#Cacluate required number of angles (in radians)
angles <- seq(from=0, to=2*pi, by=(2*pi)/n.vars)
#calculate vectors of min and max x+y coords
min.x <- min*sin(angles)
min.y <- min*cos(angles)
max.x <- max*sin(angles)
max.y <- max*cos(angles)
#Combine into a set of uniquely numbered paths (one per variable)
axisData <- NULL
for (i in 1:n.vars) {
a <- c(i,min.x[i],min.y[i])
b <- c(i,max.x[i],max.y[i])
axisData <- rbind(axisData,a,b)
}
#Add column names + set row names = row no. to allow conversion into a data frame
colnames(axisData) <- c("axis.no","x","y")
rownames(axisData) <- seq(1:nrow(axisData))
#Return calculated axis paths
as.data.frame(axisData)
}
funcCircleCoords <- function(center = c(0,0), r = 1, npoints = 100){
#Adapted from Joran's response to http://stackoverflow.com/questions/6862742/draw-a-circle-with-ggplot2
tt <- seq(0,2*pi,length.out = npoints)
xx <- center[1] + r * cos(tt)
yy <- center[2] + r * sin(tt)
return(data.frame(x = xx, y = yy))
}
### Convert supplied data into plottable format
# (a) add abs(centre.y) to supplied plot data
#[creates plot centroid of 0,0 for internal use, regardless of min. value of y
# in user-supplied data]
plot.data.offset <- plot.data
plot.data.offset[,2:ncol(plot.data)]<- plot.data[,2:ncol(plot.data)]+abs(centre.y)
#print(plot.data.offset)
# (b) convert into radial coords
group <-NULL
group$path <- CalculateGroupPath(plot.data.offset)
#print(group$path)
# (c) Calculate coordinates required to plot radial variable axes
axis <- NULL
axis$path <- CaclulateAxisPath(var.names,grid.min+abs(centre.y),grid.max+abs(centre.y))
#print(axis$path)
# (d) Create file containing axis labels + associated plotting coordinates
#Labels
axis$label <- data.frame(
text=axis.labels,
x=NA,
y=NA )
#print(axis$label)
#axis label coordinates
n.vars <- length(var.names)
angles = seq(from=0, to=2*pi, by=(2*pi)/n.vars)
axis$label$x <- sapply(1:n.vars, function(i, x) {((grid.max+abs(centre.y))*axis.label.offset)*sin(angles[i])})
axis$label$y <- sapply(1:n.vars, function(i, x) {((grid.max+abs(centre.y))*axis.label.offset)*cos(angles[i])})
#print(axis$label)
# (e) Create Circular grid-lines + labels
#caclulate the cooridinates required to plot circular grid-lines for three user-specified
#y-axis values: min, mid and max [grid.min; grid.mid; grid.max]
gridline <- NULL
gridline$min$path <- funcCircleCoords(c(0,0),grid.min+abs(centre.y),npoints = 360)
gridline$mid$path <- funcCircleCoords(c(0,0),grid.mid+abs(centre.y),npoints = 360)
gridline$max$path <- funcCircleCoords(c(0,0),grid.max+abs(centre.y),npoints = 360)
#print(head(gridline$max$path))
#gridline labels
gridline$min$label <- data.frame(x=gridline.label.offset,y=grid.min+abs(centre.y),
text=as.character(grid.min))
gridline$max$label <- data.frame(x=gridline.label.offset,y=grid.max+abs(centre.y),
text=as.character(grid.max))
gridline$mid$label <- data.frame(x=gridline.label.offset,y=grid.mid+abs(centre.y),
text=as.character(grid.mid))
#print(gridline$min$label)
#print(gridline$max$label)
#print(gridline$mid$label)
### Start building up the radar plot
# Delcare 'theme_clear', with or without a plot legend as required by user
#[default = no legend if only 1 group [path] being plotted]
theme_clear <- theme_bw() +
theme(axis.text.y=element_blank(),
axis.text.x=element_blank(),
axis.ticks=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.border=element_blank(),
legend.key=element_rect(linetype="blank"))
if (plot.legend==FALSE) theme_clear <- theme_clear + theme(legend.position="none")
#Base-layer = axis labels + plot extent
# [need to declare plot extent as well, since the axis labels don't always
# fit within the plot area automatically calculated by ggplot, even if all
# included in first plot; and in any case the strategy followed here is to first
# plot right-justified labels for axis labels to left of Y axis for x< (-x.centre.range)],
# then centred labels for axis labels almost immediately above/below x= 0
# [abs(x) < x.centre.range]; then left-justified axis labels to right of Y axis [x>0].
# This building up the plot in layers doesn't allow ggplot to correctly
# identify plot extent when plotting first (base) layer]
#base layer = axis labels for axes to left of central y-axis [x< -(x.centre.range)]
base <- ggplot(axis$label) + xlab(NULL) + ylab(NULL) + coord_equal() +
geom_text(data=subset(axis$label,axis$label$x < (-x.centre.range)),
aes(x=x,y=y,label=text),size=axis.label.size,hjust=1) +
scale_x_continuous(limits=c(-plot.extent.x,plot.extent.x)) +
scale_y_continuous(limits=c(-plot.extent.y,plot.extent.y))
# + axis labels for any vertical axes [abs(x)<=x.centre.range]
base <- base + geom_text(data=subset(axis$label,abs(axis$label$x)<=x.centre.range),
aes(x=x,y=y,label=text),size=axis.label.size,hjust=0.5)
# + axis labels for any vertical axes [x>x.centre.range]
base <- base + geom_text(data=subset(axis$label,axis$label$x>x.centre.range),
aes(x=x,y=y,label=text),size=axis.label.size,hjust=0)
# + theme_clear [to remove grey plot background, grid lines, axis tick marks and axis text]
base <- base + theme_clear
# + background circle against which to plot radar data
base <- base + geom_polygon(data=gridline$max$path,aes(x,y),
fill=background.circle.colour,
alpha=background.circle.transparency)
# + radial axes
base <- base + geom_path(data=axis$path,aes(x=x,y=y,group=axis.no),
colour=axis.line.colour)
# ... + group (cluster) 'paths'
base <- base + geom_path(data=group$path,aes(x=x,y=y,group=group,colour=group),
size=group.line.width) +
scale_color_brewer(palette = "Set1",levels(group))
# ... + group points (cluster data)
base <- base + geom_point(data=group$path,aes(x=x,y=y,group=group,colour=group),size=group.point.size)
#... + amend Legend title
if (plot.legend==TRUE) base <- base + labs(colour=legend.title,size=legend.text.size)
# ... + circular grid-lines at 'min', 'mid' and 'max' y-axis values
base <- base + geom_path(data=gridline$min$path,aes(x=x,y=y),
lty=gridline.min.linetype,colour=gridline.min.colour,size=grid.line.width)
base <- base + geom_path(data=gridline$mid$path,aes(x=x,y=y),
lty=gridline.mid.linetype,colour=gridline.mid.colour,size=grid.line.width)
base <- base + geom_path(data=gridline$max$path,aes(x=x,y=y),
lty=gridline.max.linetype,colour=gridline.max.colour,size=grid.line.width)
# ... + grid-line labels (max; ave; min) [only add min. gridline label if required]
if (label.gridline.min==TRUE) {
base <- base + geom_text(aes(x=x,y=y,label=text),data=gridline$min$label,face="bold",size=grid.label.size, hjust=1) }
base <- base + geom_text(aes(x=x,y=y,label=text),data=gridline$mid$label,face="bold",size=grid.label.size, hjust=1)
base <- base + geom_text(aes(x=x,y=y,label=text),data=gridline$max$label,face="bold",size=grid.label.size, hjust=1)
# ... + centre.y label if required [i.e. value of y at centre of plot circle]
if (label.centre.y==TRUE) {
centre.y.label <- data.frame(x=0, y=0, text=as.character(centre.y))
base <- base + geom_text(aes(x=x,y=y,label=text),data=centre.y.label,face="bold",size=grid.label.size, hjust=0.5) }
return(base)
}

Custom scatterplot matrix using facet_grid in ggplot2

I'm trying to write a custom scatterplot matrix function in ggplot2 using facet_grid. My data have two categorical variables and one numeric variable.
I'd like to facet (make the scatterplot rows/cols) according to one of the categorical variables and change the plotting symbol according to the other categorical.
I do so by first constructing a larger dataset that includes all combinations (combs) of the categorical variable from which I'm creating the scatterplot panels.
My questions are:
How to use geom_rect to white-out the diagonal and upper panels in facet_grid (I can only make the middle ones black so far)?
How can you move the titles of the facets to the bottom and left hand sides respectively?
How does one remove tick axes and labels for the top left and bottom right facets?
Thanks in advance.
require(ggplot2)
# Data
nC <- 5
nM <- 4
dat <- data.frame(
Control = rep(LETTERS[1:nC], nM),
measure = rep(letters[1:nM], each = nC),
value = runif(nC*nM))
# Change factors to characters
dat <- within(dat, {
Control <- as.character(Control)
measure <- as.character(measure)
})
# Check, lapply(dat, class)
# Define scatterplot() function
scatterplotmatrix <- function(data,...){
controls <- with(data, unique(Control))
measures <- with(data, unique(measure))
combs <- expand.grid(1:length(controls), 1:length(measures), 1:length(measures))
# Add columns for values
combs$value1 = 1
combs$value2 = 0
for ( i in 1:NROW(combs)){
combs[i, "value1"] <- subset(data, subset = Control==controls[combs[i,1]] & measure == measures[combs[i,2]], select = value)
combs[i, "value2"] <- subset(data, subset = Control==controls[combs[i,1]] & measure == measures[combs[i,3]], select = value)
}
for ( i in 1:NROW(combs)){
combs[i,"Control"] <- controls[combs[i,1]]
combs[i,"Measure1"] <- measures[combs[i,2]]
combs[i,"Measure2"] <- measures[combs[i,3]]
}
# Final pairs plot
plt <- ggplot(combs, aes(x = value1, y = value2, shape = Control)) +
geom_point(size = 8, colour = "#F8766D") +
facet_grid(Measure2 ~ Measure1) +
ylab("") +
xlab("") +
scale_x_continuous(breaks = c(0,0.5,1), labels = c("0", "0.5", "1"), limits = c(-0.05, 1.05)) +
scale_y_continuous(breaks = c(0,0.5,1), labels = c("0", "0.5", "1"), limits = c(-0.05, 1.05)) +
geom_rect(data = subset(combs, subset = Measure1 == Measure2), colour='white', xmin = -Inf, xmax = Inf,ymin = -Inf,ymax = Inf)
return(plt)
}
# Call
plt1 <- scatterplotmatrix(dat)
plt1
I'm not aware of a way to move the panel strips (the labels) to the bottom or left. Also, it's not possible to format the individual panels separately (e.g., turn off the tick marks for just one facet). So if you really need these features, you will probably have to use something other than, or in addition to ggplot. You should really look into GGally, although I've never had much success with it.
As far as leaving some of the panels blank, here is a way.
nC <- 5; nM <- 4
set.seed(1) # for reproducible example
dat <- data.frame(Control = rep(LETTERS[1:nC], nM),
measure = rep(letters[1:nM], each = nC),
value = runif(nC*nM))
scatterplotmatrix <- function(data,...){
require(ggplot2)
require(data.table)
require(plyr) # for .(...)
DT <- data.table(data,key="Control")
gg <- DT[DT,allow.cartesian=T]
setnames(gg,c("Control","H","x","V","y"))
fmt <- function(x) format(x,nsmall=1)
plt <- ggplot(gg, aes(x,y,shape = Control)) +
geom_point(subset=.(as.numeric(H)<as.numeric(V)),size=5, colour="#F8766D") +
facet_grid(V ~ H) +
ylab("") + xlab("") +
scale_x_continuous(breaks=c(0,0.5,1), labels=fmt, limits=c(-0.05, 1.05)) +
scale_y_continuous(breaks=c(0,0.5,1), labels=fmt, limits=c(-0.05, 1.05))
return(plt)
}
scatterplotmatrix(dat)
The main feature of this is the use of subset=.(as.numeric(H)<as.numeric(V)) in the call to geom_point(...). This subsets the dataset so you only get a point layer when the condition is met, e.g. in facets where is.numeric(H)<is.numeric(V). This works because I've left the H and V columns as factors and is.numeric(...) operating on a factor returns the levels, not the names.
The rest is just a more compact (and much faster) way of creating what you called comb.

Using ggplot2 how can I represent a dot and a line in the legend

Using ggplot2 I am plotting several functions and a series of points. I cannot figure out how to represent the points on the legend. I realize I need to use an aes() function, but I don't fully understand how to do this. I apologize that the example is so long, but I don't know how else to illustrate it.
## add ggplot2
library(ggplot2)
# Declare Chart values
y_label = expression("y_axis"~~bgroup("(",val / km^{2},")"))
x_label = "x_axis"
#############################
## Define functions
# Create a list to hold the functions
funcs <- list()
funcs[]
# loop through to define functions
for(k in 1:21){
# Make function name
funcName <- paste('func', k, sep = '' )
# make function
func = paste('function(x){exp(', k, ') * exp(x*0.01)}', sep = '')
funcs[[funcName]] = eval(parse(text=func))
}
# Specify values
yval = c(1:20)
xval = c(1:20)
# make a dataframe
d = data.frame(xval,yval)
# Specify Range
x_range <- range(1,51)
# make plot
p <-qplot(data = d,
x=xval,y=yval,
xlab = x_label,
ylab = y_label,
xlim = x_range
)+ geom_point(colour="green")
for(j in 1:length(funcs)){
p <- p + stat_function(aes(y=0),fun = funcs[[j]], colour="blue", alpha=I(1/5))
}
# make one function red
p <- p + stat_function(fun = funcs[[i]], aes(color="red"), size = 1) +
scale_colour_identity("", breaks=c("red", "green","blue"),
labels=c("Fitted Values", "Measured values","All values"))
# position legend and make remove frame
p <- p + opts(legend.position = c(0.85,0.7), legend.background = theme_rect(col = 0))
print(p)
Thank you in advance - I have learned I a lot from this community over the last few days.
See below for a solution. The main idea is the following: imagine the points having an invisible line under them, and the lines having invisible points. So each "series" gets color and shape and linetype attributes, and at the end we will manually set them to invisible values (0 for lines, NA for points) as necessary. ggplot2 will merge the legends for the three attributes automatically.
# make plot
p <- qplot(data = d, x=xval, y=yval, colour="Measured", shape="Measured",
linetype="Measured", xlab = x_label, ylab = y_label, xlim = x_range,
geom="point")
#add lines for functions
for(j in 1:length(funcs)){
p <- p + stat_function(aes(colour="All", shape="All", linetype="All"),
fun = funcs[[j]], alpha=I(1/5), geom="line")
}
# make one function special
p <- p + stat_function(fun = funcs[[1]], aes(colour="Fitted", shape="Fitted",
linetype="Fitted"), size = 1, geom="line")
# modify look
p <- p + scale_colour_manual("", values=c("green", "blue", "red")) +
scale_shape_manual("", values=c(19,NA,NA)) +
scale_linetype_manual("", values=c(0,1,1))
print(p)
Setting the colour aesthetic for each geom to a constant may help. Here is a small example:
require(ggplot2)
set.seed(666)
N<-20
foo<-data.frame(x=1:N,y=runif(N),z=runif(N))
p<-ggplot(foo)
p<-p+geom_line(aes(x,y,colour="Theory"))
p<-p+geom_point(aes(x,z,colour="Practice"))
#Optional, if you want your own colours
p<-p+scale_colour_manual("Source",c('blue','red'))
print(p)
This isn't supported natively in ggplot2, but I'm hoping I'll figure out how for a future version.

Resources