Add legend inside each panel with Lattice in R - r

I wanna make a scatter plot with connecting lines for different groups and different individuals. I make panels conditioned by my group variable and groups conditioned by my individual variables. Now, I would like to add legend inside each panels(see the code below). In the plots, I would like to have legends of individuals for GRP==1 in the first panel, GRP==2 in the second panel, so on so forth. All the legends are located in the upper left corner of the panel they belong to. How shall I code?
library(lattice)
mydata <- data.frame(ID = rep(1: 20, each = 10),
GRP = rep(1: 4, each = 50),
x = rep(0: 9, 20))
mydata$y <- 1.2 * mydata$GRP * mydata$x +
rnorm(nrow(mydata), sd = mydata$GRP)
xyplot(y~ x | factor(GRP), data = mydata,
groups = ID,
type = "b",
as.table = T,
layout = c(2, 2),
panel = panel.superpose,
panel.groups = function (x, y, ...) {
panel.xyplot(x, y, ...)
}
)

Try something like this. Note that the subset command comes in the data statement in xyplot. This is on purpose. If you call subset as an xyplot argument, then the plots would have shown all 20 labels in each plot.
library(lattice)
mydata <- data.frame(ID = rep(1:20, each = 10), GRP = rep(1:4, each = 50), x = rep(0:9, 20))
mydata$y <- 1.2 * mydata$GRP * mydata$x + rnorm(nrow(mydata), sd = mydata$GRP)
i=1; j=1
for(grp in 1:4) {
a <- xyplot(y~x|factor(GRP), data=subset(mydata, GRP==grp),
groups = factor(ID),
type = "b",
auto.key=list(columns=4,space="inside")
)
print(a, split=c(i,j,2,2), more=T)
i=i+1; if(i>2){i=1;j=j+1} # basically, tell the plots which quadrant to go in
}

Related

How to add text on a levelplot at specific coordinates?

Here is a reproducible example to work with:
library(lattice)
myimage<-matrix(c(1,1,2,3,3,4), nrow=3, ncol=2)
mytable<-data.frame(Xcoord=c(1.5, 1.5, 3,3), Ycoord=c(1,2,1,2), Labels=c("A","B","C","D"))
mycolors<-colorRampPalette(c("red","yellow","green","cyan","blue"))
windows()
levelplot(myimage, aspect="iso", col.regions = mycolors)
which produce the graph below.
Now I want to add (as text) the Labels in mytable at the specified coordinates indicated by Xcoord & Ycoord (which correspond to the rows and columns of the images). How can I do this ?
Only solution I could find is following user20650's link above and converting the matrix image to a data.frame with x, y coordinates (which I would have preferred to avoid):
dat <- data.frame(expand.grid(x = 1:3, y = 1:2), value = c(myimage))
Obj <-
levelplot(value ~ x+y, data = dat, aspect="iso", col.regions = mycolors) +
xyplot(y ~ x, data = dat,
panel = function(y, x, ...) {
ltext(x = mytable$Xcoord, y = mytable$Ycoord, labels = mytable$Labels, cex = 1, font = 2)
})
print({Obj})

Use plot_grid to arrange plots where plot information is stored in an R data frame

I generate a series of plots stored in a matrix as part of a for loop much like in the MWE below. This same matrix also stores two other columns of information (Colour and Animal in this example). I then want to be able to create a grid of plots, where I identify the plot based on the corresponding Colour and Animal.
I tried creating a data frame and then using row names to call out the plots I needed, but had the common error of Cannot convert object of class list into a grob.. If I call from the matrix directly this works - however I want a way not have to do this in case the order of the data changes in the input files. Is it possible to work directly from the data frame? I've seen similar examples, but couldn't apply to my case. I want to stick with cow plot and change as little as possible in the data generation stage.
MWE
library(cowplot)
p <- vector('list', 15)
p <-
matrix(
p,
nrow = 5,
ncol = 3
)
myColours = c("Yellow", "Red", "Blue", "Green", "Orange")
myAnimals = c("Kangaroo", "Emu", "Echidna", "Platypus", "Cassowary")
x = seq(1,10)
it = 1
for (i in seq(0,4)){ # generate example data and plots
y = x^i
t = runif(5)
df <- data.frame("X" = x, "Y" = y, "T" = t)
theanimal = myAnimals[i+1]
thecolour = myColours[i+1]
p[[it,1]] = thecolour
p[[it,2]] = theanimal
p[[it,3]] = ggplot(data = df, mapping = aes(x = X, y = Y)) +
geom_point(aes(color = T)) +
ggtitle(paste(thecolour, theanimal, sep = " "))
it = it+ 1
}
# turn into df
pltdf<- as.data.frame(p)
colnames(pltdf) <- c("Colour", "Animal", "plot")
rownames(pltdf) <- do.call(paste, c(pltdf[c("Colour", "Animal")], sep="-"))
pltdf[[1,3]] # this is what I expect for a single plot
plot1 = vector('list', 4)
plot1 <-
matrix(
plot1,
nrow = 2,
ncol = 2
)
plot1[[1,1]] = pltdf["Red-Emu", "plot"]. # also tried with just plot[[1]] = etc.
plot1[[1,2]] = pltdf["Blue-Echidna", "plot"]
plot1[[2,1]] = pltdf["Orange-Cassowary", "plot"]
plot1[[2,2]] = pltdf["Green-Platypus", "plot"]
plot_grid(plotlist = t(plot1), ncol = 2)
plot_grid(plotlist = list(plot1), ncol = 2) # suggested solution on a dif problem
plot2 = vector('list', 4) # what I want plots to look like in the end
plot2[[1]] = p[[1,3]]
plot2[[2]] = p[[4,3]]
plot2[[3]] = p[[2, 3]]
plot2[[4]] = p[[5, 3]]
plot_grid(plotlist = t(plot2), ncol = 2)
You can specify the order that you want the plots to be in and subset the dataframe accordingly which can be used in plot_grid.
library(cowplot)
order <- c("Red-Emu", "Blue-Echidna", "Orange-Cassowary", "Green-Platypus")
plot_grid(plotlist = pltdf[order, 'plot'], ncol = 2)

How do I represent an enormous bar plot with very long axis labels in R and ggplot?

I have a ggplot of bar graphs for each cluster.
The axis labels ("Path") for the bar plot are all unique and long, but they are grouped by "PathTypes" and "Cluster" - info which I want to represent on the bar graphs. I use a texture (stripes, dots, etc) from the ggpattern package to represent the "PathType" and I use colors to represent the "Cluster".
The resulting graph I produce is illegible bc it's just too large. I've been butting heads with facet_grid and facet_wrap. I am fine with using 2-3 pages to represent all the clusters, but I'm unsure about how to split the data smartly to accomplish that.
Example code follows:
library(data.table)
library(ggpattern)
library(gridExtra)
library(ggpubr)
library(truncnorm)
library(ggplot2)
library(stringi)
# generating sample data for data table called all.cluster.dt
PathType <- sample(x = c("Type1", "Type2", "Type3", "Type4", "Type5"), # create the PathType column
size = 400,
replace = T)
Score <- rtruncnorm(n = 400, a = 15, b = 90, mean = 55, sd = 15) # create the Score Column
Path <- NA # initialize the Path column
Path.generator <- function() { # function to write unique Paths
a <- do.call(paste0, replicate(10, sample(LETTERS, 15, TRUE), FALSE))
single.Name <- paste(a, collapse = ' ')
return(single.Name)
}
cluster <- sample(x = c(1:14), # create the Cluster column
size = 400,
replace = T)
all.cluster.dt <- data.table( # create the data table with desired columns
PathType,
Score,
Path,
cluster
)
for(i in 1:length(all.cluster.dt$Path)){ # loop down Path column calling function to generate unique Path name for each row
all.cluster.dt$Path[i] <- Path.generator()
}
wrap.it <- function(x, len) # function to try and wrap long Path label text
{
sapply(x, function(y) paste(strwrap(y, len),
collapse = "\n"),
USE.NAMES = FALSE)
}
# Call this function with a list or vector
wrap.labels <- function(x, len)
{
if (is.list(x))
{
lapply(x, wrap.it, len)
} else {
wrap.it(x, len)
}
}
wr.lap <- wrap.labels(all.cluster.dt$Path, 40) # wrap Path labels to 40 characters long
all.cluster.dt$Path <- wr.lap
all.cluster.dt$Path <- factor(all.cluster.dt$Path, # group and factorize the data by PathType and Score
levels = unique(all.cluster.dt$Path[order(all.cluster.dt$PathType, all.cluster.dt$Score)]))
cluster.color.df <- data.frame("cluster" = c(1:14), # add custom colors to represent which Cluster the Path belongs to
"color" = c("#F5F2D4", "#CAD8F2", "#8FB6FF", "#FFFDD7", "#DADADA", "#DAEB9B", "#EED1F2", "#C9E2D0", "#FFDFA2", "#DFFFD6", "#F6DFDE", "#E2DEF5", "#F0B8BC", "#CAF3EF"))
setDT(all.cluster.dt)[cluster.color.df, color := i.color, on = .(cluster)] # match color to cluster in all.cluster.dt
bar.plots <- ggplot(all.cluster.dt, aes(x=Score, y=Path)) +
ggpattern::geom_col_pattern( # adds texture/patterns to the bars based on the PathType column
aes(pattern = `PathType`),
fill = all.cluster.dt$color,
colour = "black",
pattern_density = 0.2, # how dense the pattern should be
pattern_fill = "black",
pattern_spacing = 0.1) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 90)) +
theme_bw() +
theme(axis.title.y = element_blank()) +
theme(legend.position = "none",
text = element_text(size = 8))
bar.plots + facet_grid(rows = vars(cluster), scales = "fixed") # draw the bar graph
I now attempt to use grid.arrange and ggsave to arrange the plots by Cluster on a page, but get an error: "replacement has 17 rows, data has 400"...
pdf("bar_graphs.pdf", wi=8.1,hei=10.6)
do.call(grid.arrange, bar.plots)
ggsave("bar_graphs.pdf", marrangeGrob(bar.plots, nrow=4, ncol=2))
dev.off()
Any answers that provide the overall solution to my goal (getting my bar graphs into a legible figure) or elucidate why I get an error arranging grobs is much appreciated.

How to plotmeans of a group conditional on a variable

Basically right now I have a graph that displays the mean of one of my variables by year. I want to get two mean lines of that variable: one for when another variable in my frame = 0 and one for when it = 1 on the same plot.
Right now this is what I have:
library(gplots)
plotmeans(x ~ year, data = df, frame = FALSE,
mean.labels = TRUE)
This currently works for just giving me the mean of x by year with no conditional. However, I want two lines one for the graphed mean of x when for example y=0 and one for the graphed mean of x when y=1 all by year still on the x axis.
Function gplots::plotmeans accepts only one variable on the RHS of the formula argument. The trick is to use interaction between the variables of interest.
First, make up a data set.
set.seed(1234) # Reproducible results
df <- data.frame(x = rnorm(210),
y = rbinom(210, 1, 0.5),
year = rep(2017:2019, 70))
Now the graphs.
library(gplots)
plotmeans(x ~ interaction(year, y, sep = " y = "), data = df,
mean.labels = TRUE, digits = 2,
connect = list(1:3, 4:6))
plotmeans(x ~ interaction(y, year, sep = " "), data = df,
mean.labels = TRUE, digits = 2,
connect = list(1:2, 3:4, 5:6))

Find coordinates for overlapping hexagonal bins between hexbin objects

I have two spatial datasets with coordinates indicating observations of a species and want to estimate the area of overlap among these datasets. Since point coordinates cannot represent an area, one has to bin the coordinates using similar x (longitude) and y (latitude) categories for both datasets.
For this task, I found the practical hexbin package, which does hexagonal binning. The package is great, but at least I fail to find a function that directly outputs the coordinates / IDs of overlapping bins among hexbin objects. For example, the hdiffplot returns a nice graphical overview of overlapping bins, but how to extract this information for further analysis?
library(hexbin)
set.seed(1); df1 <- data.frame(x = rnorm(10, 0, 5), y = rnorm(10, 0, 5))
set.seed(2); df2 <- data.frame(x = rnorm(10, 0, 5), y = rnorm(10, 0, 5))
xrange <- c(floor(min(c(df1$x, df2$x))-1), ceiling(max(c(df1$x, df2$x))+1))
#-/+1 just to make the plot nicer
yrange <- c(floor(min(c(df1$y, df2$y))-1), ceiling(max(c(df1$y, df2$y)))+1)
hb1 <- hexbin(df1$x, df1$y, xbins = 10, xbnds = xrange, ybnds = yrange)
hb2 <- hexbin(df2$x, df2$y, xbins = 10, xbnds = xrange, ybnds = yrange)
hdiffplot(hb1,hb2, xbnds = xrange, ybnds = yrange)
I figured out a solution to this problem while making the question. Will post it here in hopes that it will help someone one day.
You can extract the coordinates using the hcell2xy function. Here is a little function to find the unique and overlapping coordinates for bin centroids:
#' #title Print overlapping and unique bin centroid coordinates for two hexbin objects
#' #param bin1,bin2 two objects of class hexbin.
#' #details The hexbin objects for comparison, bin1 and bin2, must have the same plotting limits and cell size.
#' #return Returns a list of data frames with unique coordinates for \code{bin1} and \code{bin2} as well as overlapping coordinates among bins.
hdiffcoords <- function(bin1, bin2) {
## Checks modified from: https://github.com/edzer/hexbin/blob/master/R/hdiffplot.R
if(is.null(bin1) | is.null(bin1)) {
stop("Need 2 hex bin objects")
} else {
if(bin1#shape != bin2#shape)
stop("Bin objects must have same shape parameter")
if(all(bin1#xbnds == bin2#xbnds) & all(bin1#ybnds == bin2#ybnds))
equal.bounds <- TRUE
else stop("Bin objects need the same xbnds and ybnds")
if(bin1#xbins != bin2#xbins)
stop("Bin objects need the same number of bins")
}
## Find overlapping and unique bins
hd1 <- data.frame(hcell2xy(bin1), count_bin1 = bin1#count, cell_bin1 = bin1#cell)
hd2 <- data.frame(hcell2xy(bin2), count_bin2 = bin2#count, cell_bin2 = bin2#cell)
overlapping_hd1 <- apply(hd1, 1, function(r, A){ sum(A$x==r[1] & A$y==r[2]) }, hd2)
overlapping_hd2 <- apply(hd2, 1, function(r, A){ sum(A$x==r[1] & A$y==r[2]) }, hd1)
overlaps <- merge(hd1[as.logical(overlapping_hd1),], hd2[as.logical(overlapping_hd2),])
unique_hd1 <- hd1[!as.logical(overlapping_hd1),]
unique_hd2 <- hd2[!as.logical(overlapping_hd2),]
## Return list of data.frames
list(unique_bin1 = unique_hd1, unique_bin2 = unique_hd2, overlapping = overlaps)
}
This information should be the same than returned by hdiffplot in graphical format:
df <- hdiffcoords(hb1, hb2)
library(ggplot2)
ggplot() +
geom_point(data = df$unique_bin1, aes(x = x, y = y), color = "red", size = 10) +
geom_point(data = df$unique_bin2, aes(x = x, y = y), color = "cyan", size = 10) +
geom_point(data = df$overlapping, aes(x = x, y = y), color = "green", size = 10) + theme_bw()
Any comments/corrections are appreciated.

Resources