plot rectangle given 4 points in ggplot heatmap - r

I have this data in a txt file (fruits2612e.txt)
"people","1","2","3","4","5","6","7","8","9","10","11","12"
"Ej1",0,0,0,0,0,0,0,0,0,0,0,0
"Ej2",0,0,1,1,1,1,1,1,0,0,0,0
"Ej3",0,0,0,0,0,0,1,0,0,0,0,0
"Ej4",0,1,1,1,0,0,1,1,0,0,0,1
"Ej5",1,1,1,1,1,1,1,1,0,0,0,1
"Ej6",1,1,1,1,0,1,1,1,0,0,1,1
"Ej7",1,1,1,1,1,1,1,1,0,0,0,0
"Ej8",0,0,0,1,1,1,0,0,0,0,0,0
"Ej9",0,0,1,1,1,1,1,1,0,0,0,0
"Ej10",0,0,0,1,1,1,1,0,0,0,0,0
"Ej11",0,0,0,0,1,0,0,0,0,1,1,0
"Ej12",0,0,1,1,1,0,0,0,0,1,1,1
"Ej13",0,1,1,1,0,0,0,1,1,1,1,1
"Ej14",1,1,0,0,0,0,0,1,1,1,0,1
"Ej15",0,0,0,0,0,0,0,1,1,1,1,1
"Ej16",0,0,0,0,0,0,0,1,1,1,1,1
I built the heatmap (without the black rectangle) using this code
library(reshape2)
library(ggplot2)
library(scales)
library(plyr)
data <- read.csv("fruits2612e.txt", head=TRUE, sep=",")
data$people <- factor(data$people,levels=rev(data$people))
data.m = melt(data)
#data.m <- ddply(data.m, .(variable), transform, rescale = rescale(value))
data.m[,"rescale"]<-rescale(data.m[,"value"],to=c(0,1))
fewer.labels <- c("Ej16","Ej15","Ej14","Ej13","Ej12","Ej11","Ej10","Ej9","Ej8","Ej7","Ej6","Ej5","Ej4","Ej3","Ej2","Ej1")
p <- ggplot(data.m, aes(variable, people)) +
geom_tile(aes(fill = rescale), colour = "white") +
scale_y_discrete(labels=fewer.labels) +
scale_fill_gradient(low = "red", high = "green") +
theme(axis.text=element_text(size=8))
Now I'm trying to add the black rectangle to the heatmap, but can't find out how, the coordinates are
maxR<-c(topLeftx,topLefty,botRightX,botRightY)
[1] 5 1 7 8

Following my own advice, I searched SO for "annotate geom rect" and found one hit: Draw multiple squares with ggplot
maxR<-c(topLeftx=5,topLefty=1,botRightX=7,botRightY=8)
p + annotate(geom='rect', xmin= maxR['botRightX'], ymin= maxR['botRightY'],
xmax=maxR['topLeftx'], ymax= maxR['topLefty'],
fill="transparent", col="black", lwd=3)
So apparently we have different notions about how to specify the indices of a rectangle but you should be able to take it from here.

ggplot manipulates factors internally using their codes, which are accessible using, e.g. as.integer(data.m$people), etc. The corners of the tiles are the codes +/- 0.5. So, assuming you really are using factors for both the x- and y-direction, then this will draw the box you want
maxR <-c(topLeftx=5,topLefty=1,botRightX=7,botRightY=8)
sub.data <- with(data.m,
with(as.list(maxR),
data.m[people %in% paste0("Ej",topLeftx:botRightX)
& variable %in% paste0("X",topLefty:botRightY),]))
p+with(sub.data,annotate(geom="rect", fill="transparent",color="black", size=1.5,
xmin=min(as.integer(variable))-0.5,ymin=min(as.integer(people))-0.5,
xmax=max(as.integer(variable))+0.5,ymax=max(as.integer(people))+0.5))
The tortured code at the beginning is needed because of the bizarre way you've chosen to specify the corners of the box.

Related

ggplot2 : Bind 2 heatmaps and add a dataframe box in the corner

I made two heatmaps with the code:
I create the first heatmap
heatmap1<-ggplot(mod_mat_constraint, aes(x=Categorie, y=label)) +
geom_tile(aes(fill=Value)) + scale_fill_manual(values = c("#86d65e","#404040","#86d65e","#40c5e8","#e84a4a","#86d65e","#404040","#e2e2e2"), breaks=label_text)
I create the second heatmap
heatmap2<-ggplot(mod_mat_gen_env, aes(x=Categorie, y=label)) +
geom_tile(aes(fill=Value)) + scale_fill_manual(values = c("#86d65e","#404040","#86d65e","#40c5e8","#e84a4a","#86d65e","#404040","#e2e2e2"), breaks=label_text)
and I add them with a tree with:
heatmap2 %>% insert_left(tree) %>% insert_right(heatmap1, width=.5)
which gives me:
and I wondered if there were a way with ggplot2 to add an additional df box at the right corner such as:
from a dataframe called DF1
COL1 COL2 COL3
0.1 Peter USA
Hard to help precisely without a dataset, but to get a table overlaid on your plot, probably the best way would be to use annotation_custom() with a tableGrob() from the gridExtra package.
Here's an example heatmap pulled right from the R Graph Gallery which I used to add in your table as a grob.
# Library
library(ggplot2)
library(gridExtra)
# Dummy data
x <- LETTERS[1:20]
y <- paste0("var", seq(1,20))
data <- expand.grid(X=x, Y=y)
data$Z <- runif(400, 0, 5)
# Heatmap
p <- ggplot(data, aes(X, Y, fill= Z)) +
geom_tile()
df <- data.frame(COL1=0.1,COL2='Peter',COL3='USA')
# adding the table
p + coord_cartesian(clip='off') +
theme(plot.margin = margin(r=140)) +
annotation_custom(
grob=tableGrob(df, theme=ttheme_default(base_size = 7)),
xmin=20, xmax=27, ymin=1, ymax=5
)
You can probably use a similar approach in your case. Note the few things that I had to do here to get this to work:
add the grob as annotation_custom(). You will need to play around with the positioning... really just play with those numbers. Also note you may want to play with the base size to ensure the table is the right aspect ratio compared to your plot.
Extend the plot margin so that you have the real estate on that side to include the table.
Turn clipping off so that you can see things outside the plot area properly.

How to clip an interpolated layer in R so it does not extend past data boundaries

I am trying to display a cross-section of conductivity in a lagoon environment using isolines. I have applied interp() and stat_contour() to my data, but I would like to clip the interpolated output so that it doesn't extend past my data points. This way the bathymetry of the lagoon in the cross-section is clear. Here is the code I have used so far:
cond_df <- read_csv("salinity_profile.csv")
di <- interp(cond_df$stop, cond_df$depth, cond_df$conductivity,
xo = seq(min(cond_df$stop), max(cond_df$stop), length = 200),
yo = seq(min(cond_df$depth), max(cond_df$depth), length = 200))
dat_interp <- data.frame(expand.grid(x=di$x, y=di$y), z=c(di$z))
ggplot(dat_interp) +
aes(x=x, y=y, z=z, fill=z)+
scale_y_reverse() +
geom_tile()+
stat_contour(colour="white", size=0.25) +
scale_fill_viridis_c() +
theme_tufte(base_family="Helvetica")
Here is the output:
interpolated plot
To help clarify, here is the data just as a geom_point() graph, and I do not want the interpolated layer going past the lower points of the graph:
cond_df%>%
ggplot(mapping=aes(x=stop, y=depth, z=conductivity, fill=conductivity)) +
geom_point(aes(colour = conductivity), size = 3) +
scale_y_reverse()
point plot
You can mask the unwanted region of the plot by using geom_ribbon.
You will need to generate a data.frame with values for the max depth at each stop. Here's one somewhat inelegant way to do that:
# Create the empty data frame for all stops
bathymetry <- data.frame(depth = as.numeric(NA),
stop = unique(cond_df$stop))
# Find the max depth for each stop
for(thisStop in bathymetry$stop){
bathymetry[bathymetry$stop==thisStop, "depth"] <- max(cond_df[cond_df$stop==thisStop, "depth"])
}
Then, you can add the geom_ribbon as the last geom of your plot, like so
geom_ribbon(data=bathymetry, aes(x=stop, ymin=depth, ymax=max(cond_df$depth)), inherit.aes = FALSE)

3-variables plotting heatmap ggplot2

I'm currently working on a very simple data.frame, containing three columns:
x contains x-coordinates of a set of points,
y contains y-coordinates of the set of points, and
weight contains a value associated to each point;
Now, working in ggplot2 I seem to be able to plot contour levels for these data, but i can't manage to find a way to fill the plot according to the variable weight. Here's the code that I used:
ggplot(df, aes(x,y, fill=weight)) +
geom_density_2d() +
coord_fixed(ratio = 1)
You can see that there's no filling whatsoever, sadly.
I've been trying for three days now, and I'm starting to get depressed.
Specifying fill=weight and/or color = weight in the general ggplot call, resulted in nothing. I've tried to use different geoms (tile, raster, polygon...), still nothing. Tried to specify the aes directly into the geom layer, also didn't work.
Tried to convert the object as a ppp but ggplot can't handle them, and also using base-R plotting didn't work. I have honestly no idea of what's wrong!
I'm attaching the first 10 points' data, which is spaced on an irregular grid:
x = c(-0.13397460,-0.31698730,-0.13397460,0.13397460,-0.28867513,-0.13397460,-0.31698730,-0.13397460,-0.28867513,-0.26794919)
y = c(-0.5000000,-0.6830127,-0.5000000,-0.2320508,-0.6547005,-0.5000000,-0.6830127,-0.5000000,-0.6547005,0.0000000)
weight = c(4.799250e-01,5.500250e-01,4.799250e-01,-2.130287e+12,5.798250e-01,4.799250e-01,5.500250e-01,4.799250e-01,5.798250e-01,6.618956e-01)
any advise? The desired output would be something along these lines:
click
Thank you in advance.
From your description geom_density doesn't sound right.
You could try geom_raster:
ggplot(df, aes(x,y, fill = weight)) +
geom_raster() +
coord_fixed(ratio = 1) +
scale_fill_gradientn(colours = rev(rainbow(7)) # colourmap
Here is a second-best using fill=..level... There is a good explanation on ..level.. here.
# load libraries
library(ggplot2)
library(RColorBrewer)
library(ggthemes)
# build your data.frame
df <- data.frame(x=x, y=y, weight=weight)
# build color Palette
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")), space="Lab")
# Plot
ggplot(df, aes(x,y, fill=..level..) ) +
stat_density_2d( bins=11, geom = "polygon") +
scale_fill_gradientn(colours = myPalette(11)) +
theme_minimal() +
coord_fixed(ratio = 1)

Making adjustments to a forest plot using ggplot2

I'm trying to create a forest plot in R from meta-analysis results. However, I'm having difficulties adjusting the line thickness & the center points as well as getting rid of the automatic legend and creating my own legend.
#d is a data frame with 4 columns
#d$x gives variable names
#d$y gives center point
#d$ylo gives lower limits
#d$yhi gives upper limits
#data
d <- data.frame(x = toupper(letters[1:10]),
y = rnorm(10, 0, 0.1))
d <- transform(d, ylo = y-1/10, yhi=y+1/10)
d$x <- factor(d$x, levels=rev(d$x)) #Reverse ordering in the way that it's is in the
#function
credplot.gg <- function(d){
require(ggplot2)
p <- ggplot(d, aes(x=x, y=y, ymin=ylo, ymax=yhi,group=x,colour=x))+
geom_pointrange()+ theme_bw()+ coord_flip()+
guides(color=guide_legend(title="Cohort"))+
geom_hline(aes(x=0),colour = 'red', lty=1)+
xlab('Cohort') + ylab('Beta') + ggtitle('rs6467890_CACNA2D1')
return(p)
}
credplot.gg(d)
The issues that I'm having are:
when insert "size" into ggplot(d, aes(x=x, y=y, ymin=ylo, ymax=yhi, group=x,colour=x), size=1.5) the line and points are extremely large
How do I get rid of the legend that is automatically generated with the plot and how do I create my own legend?
I'm fairly new to r so and any help is gladly appreciated

R - Smoothing color and adding a legend to a scatterplot

I have a scatterplot in R. Each (x,y) point is colored according to its z value. So you can think of each point as (x,y,z), where (x,y) determines its position and z determines its color along a color gradient. I would like to add two things
A legend on the right side showing the color gradient and what z values correspond to what colors
I would like to smooth all the color using some type of interpolation, I assume. In other words, the entire plotting region (or at least most of it) should become colored so that it looks like a huge heatmap instead of a scatterplot. So, in the example below, there would be lots of orange/yellow around and then some patches of purple throughout. I'm happy to further clarify what I'm trying to explain here, if need be.
Here is the code I have currently, and the image it makes.
x <- seq(1,150)
y <- runif(150)
z <- c(rnorm(mean=1,100),rnorm(mean=20,50))
colorFunction <- colorRamp(rainbow(100))
zScaled <- (z - min(z)) / (max(z) - min(z))
zMatrix <- colorFunction(zScaled)
zColors <- rgb(zMatrix[,1], zMatrix[,2], zMatrix[,3], maxColorValue=255)
df <- data.frame(x,y)
x <- densCols(x,y, colramp=colorRampPalette(c("black", "white")))
df$dens <- col2rgb(x)[1,] + 1L
plot(y~x, data=df[order(df$dens),],pch=20, col=zColors, cex=1)
Here are some solutions using the ggplot2 package.
# Load library
library(ggplot2)
# Recreate the scatterplot from the example with default colours
ggplot(df) +
geom_point(aes(x=x, y=y, col=dens))
# Recreate the scatterplot with a custom set of colours. I use rainbow(100)
ggplot(df) +
geom_point(aes(x=x, y=y, col=dens)) +
scale_color_gradientn(colours=rainbow(100))
# A 2d density plot, using default colours
ggplot(df) +
stat_density2d(aes(x=x, y=y, z=dens, fill = ..level..), geom="polygon") +
ylim(-0.2, 1.2) + xlim(-30, 180) # I had to twiddle with the ranges to get a nicer plot
# A better density plot, in my opinion. Tiles across your range of data
ggplot(df) +
stat_density2d(aes(x=x, y=y, z=dens, fill = ..density..), geom="tile",
contour = FALSE)
# Using custom colours. I use rainbow(100) again.
ggplot(df) +
stat_density2d(aes(x=x, y=y, z=dens, fill = ..density..), geom="tile",
contour = FALSE) +
scale_fill_gradientn(colours=rainbow(100))
# You can also plot the points on top, if you want
ggplot(df) +
stat_density2d(aes(x=x, y=y, z=dens, fill = ..density..), geom="tile",
contour = FALSE) +
geom_point(aes(x=x, y=y, col=dens)) +
scale_colour_continuous(guide=FALSE) # This removes the extra legend
I attach the plots as well:
Also, using ggplot2, you can use color and size together, as in:
ggplot(df, aes(x=x, y=y, size=dens, color=dens)) + geom_point() +
scale_color_gradientn(name="Density", colours=rev(rainbow(100))) +
scale_size_continuous(range=c(1,15), guide="none")
which might make it a little clearer.
Notes:
The expression rev(rainbow(100)) reverses the rainbow color scale,
so that red goes with the larger values of dens.
Unfortunately, you cannot combine a continuous legend (color) and a
discrete legend (size), so you would normally get two legends. The
expression guide="none" hides the size legend.
Here's the plot:

Resources