Average line for 2D Histogram? - r

I am a very new user of "R" and have a question.I am currently working on making 2D Histograms on R. The material necessarily does not matter but how do I plot an average line on the 2D Histogram. The code I am running is this:
load("mydatabin.RData")
# Color housekeeping
library(RColorBrewer)
rf <- colorRampPalette(rev(brewer.pal(11,'Spectral')))
r <- rf(32)
# Create normally distributed data for plotting
x <- mydata$AGE
y <- mydata$BP
df <- data.frame(x,y)
# Plot
plot(df, pch=16, col='black', cex=0.5)
This gives me a scatter plot and then to turn it into a 2D Histogram I do:
library(ggplot2)
# Default call (as object)
p <- ggplot(df, aes(x,y))
h3 <- p + stat_bin2d()
h3
# Default call (using qplot)
qplot(x,y,data=df, geom='bin2d')
After this I do:
h3 <- p + stat_bin2d(bins=25) + scale_fill_gradientn(colours=r)
h3
to add color.
Therefore, from here how do I plot an average line of the data.
And if anyone can tell me how to plot a heat map that looks like this using mydatebin.RData:
Thanks.

You can use geom_hline or geom_vline in ggplot2, passing y/xintercept as a parameter to draw a line. In your case, the parameter can be an average of one of your column to draw an average line. See the code for the example.
I also played around and tried two different ways to draw 2D histograms. Yours seems better and more precise, though I removed colorBrewer.
library(ggplot2)
# Create normally distributed data for plotting
x <- rnorm(10000)
y <- rnorm(10000)
df <- data.frame(x,y)
# stat_density2d way, with average lines
p1 <- ggplot(df,aes(x=x,y=y))+
stat_density2d(aes(fill=..level..), geom="polygon") +
scale_fill_gradient(low="navy", high="yellow") +
# Here go average lines
geom_hline(yintercept = mean(df$y), color = "red") +
geom_vline(xintercept = mean(df$x), color = "red") +
# Just to remove grid and set background color
theme(line = element_blank(),
panel.background = element_rect(fill = "navy"))
p1
# stat_bin2d way, with average lines
p2 <- ggplot(df, aes(x,y)) +
stat_bin2d(bins=50) +
scale_fill_gradient(low="navy", high="yellow") +
# Here go average lines
geom_hline(yintercept = mean(df$y), color = "red") +
geom_vline(xintercept = mean(df$x), color = "red") +
# Just to remove grid and set background color
theme(line = element_blank(),
panel.background = element_rect(fill = "navy"))
p2

Related

ggplot2: Set custom color, shape, and size for points added to a map

I want to set custom shape, size and color for points added to a map based upon a variable called 'Dataset'. I'm able to set the color of the points if I set the shape to the same type for all the points, but I'm hoping to have a map with a little more information. When I runt this code, all the points are circles colored black. What am I missing?
Thanks everyone for your help & time!!
Here's a reproducible example:
# Read in libraries
library(ggplot2)
library(maps)
library(maptools)
library(ggmap)
# Create mapping objects
world <- map_data("world2")
world$long <- world$long
state_dat <- map_data("state")
canada <- world[world$region==c("Canada"),]
map_dat <- rbind(state_dat, canada)
# Create custom shapes, sizes, colors
pt_colors=c("red", "blue", "grey", "green")
shapes = c(120, 22, 24, 21)
shape_size = c(1.1, 0.8, 1, 1)
# Create lat/long dataframe
xy <- data.frame(Dataset=c("GBIF","Flower","GBIF","Leaf","DNA","GBIF","GBIF","Leaf","GBIF","GBIF","DNA","GBIF","DNA","GBIF","GBIF","Leaf","GBIF","GBIF","GBIF","DNA"),
lat=c(38.89450,34.45300,39.86556,30.38818,28.74590,33.78527,41.23439,30.37935,41.38250,40.60648,30.87580,40.56425,28.75000,41.52666,35.46451,30.73621,38.50221,33.70335,38.98000,29.61100),
long=c(-77.06292,-84.22643,-79.50248,-84.64519,-81.47860,-84.37109,-81.46374,-86.17667,-72.10861,-74.53538,-84.41520,-74.86654,-81.47750,-73.15833,-78.89952,-86.73095,-78.40308,-86.70289,-77.03917,-81.78740)
)
# Create base map
p0 <- ggplot() +
geom_polygon(data=map_dat,aes(x=long,y=lat,group=group, fill=region),fill="white",color="black", show.legend=FALSE)+
coord_map("gilbert",xlim=c(-60,-97),ylim=c(15,47.5)) +#mollweide is pretty good
labs(x=expression("Longitude"*~degree*W), y=expression("Latitude"*~degree*N)) +
theme(panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.margin=unit(c(0.25,0.25,0.25,0.25),'inches'),
legend.position='none') +
theme(rect = element_blank())
# Add points to the map
p1 <- p0 +
geom_point(data=xy,aes(x=long,y=lat,fill=Dataset)) +
scale_color_manual(values=pt_colors) +
scale_shape_manual(values=shapes) +
scale_size_manual(values=shape_size)
You need to have colour, shape, and size within your geom_point aesthetic values. Geom_point doesn't use fill as an aesthetic, but uses colour.
Simply fixing that will generate what you want.
p1 <- p0 +
geom_point(data=xy,aes(x=long,y=lat,colour = Dataset, shape = Dataset, size = Dataset)) +
scale_color_manual(values=pt_colors) +
scale_shape_manual(values=shapes) +
scale_size_manual(values=shape_size)

R adding legend and directlabels to ggplot2 contour plot

I have a raster map that I want to plot using ggplot2 using a continuous scale and labeled isolines on top of that.
For that I'm using the directlabels package and am close to getting what I want but I can't get both the legend and the labeled isolines on the same map
The following code reproduces my problem:
install.packages(c('ggplot2', 'directlabels'))
library('ggplot2')
library('directlabels')
df <- expand.grid(x=1:100, y=1:100)
df$z <- df$x * df$y
# Plot 1: this plot is fine but without contours
p <- ggplot(aes(x=x, y=y, z=z), data = df) +
geom_raster(data=df, aes(fill=z)) +
scale_fill_gradient(limits=range(df$z), high = 'white', low = 'red')
p
# Plot 2: This plot adds the isolines but no labels and it also adds a second legend for level which I don't want
p <- p + geom_contour(aes(colour = ..level..), color='gray30', na.rm=T, show.legend=T)
p
# Plot 3: This plot has the labeled isolines but it removes the z legend that I want to show
direct.label(p, list("bottom.pieces", colour='black'))
Plot 1
Plot 2
Plot 3
I would like to have the coloured raster in the background, with it's color legend on the side and the labeled isolines on top. Is there a way to do this?
Also is there a way to get the labels placed in the middle of the isolines instead of the bottom or top?
Thanks in advance
Pablo
First, fixing the issue to do with the legends.
library(ggplot2)
library(directlabels)
df <- expand.grid(x=1:100, y=1:100)
df$z <- df$x * df$y
p <- ggplot(aes(x=x, y=y, z=z), data = df) +
geom_raster(data=df, aes(fill=z), show.legend = TRUE) +
scale_fill_gradient(limits=range(df$z), high = 'white', low = 'red') +
geom_contour(aes(colour = ..level..)) +
scale_colour_gradient(guide = 'none')
p1 = direct.label(p, list("bottom.pieces", colour='black'))
p1
There aren't too many options for positioning the labels. One possibility is angled.boxes, but the fill colour might not be too nice.
p2 = direct.label(p, list("angled.boxes"))
p2
To change the fill colour to transparent (using code from here.
p3 = direct.label(p, list("far.from.others.borders", "calc.boxes", "enlarge.box",
box.color = NA, fill = "transparent", "draw.rects"))
p3
And to move the labels off the contour lines:
p4 = direct.label(p, list("far.from.others.borders", "calc.boxes", "enlarge.box",
hjust = 1, vjust = 1, box.color = NA, fill = "transparent", "draw.rects"))
p4

Cannot remove grey area behind legend symbol when using smooth

I'm using ggplot2 with a GAM smooth to look at the relationship between two variables. When plotting I'd like to remove the grey area behind the symbol for the two types of variables. For that I would use theme(legend.key = element_blank()), but that doesn't seem to work when using a smooth.
Can anyone tell me how to remove the grey area behind the two black lines in the legend?
I have a MWE below.
library(ggplot2)
len <- 10000
x <- seq(0, len-1)
df <- as.data.frame(x)
df$y <- 1 - df$x*(1/len)
df$y <- df$y + rnorm(len,sd=0.1)
df$type <- 'method 1'
df$type[df$y>0.5] <- 'method 2'
p <- ggplot(df, aes(x=x, y=y)) + stat_smooth(aes(lty=type), col="black", method = "auto", size=1, se=TRUE)
p <- p + theme_classic()
p <- p + theme(legend.title=element_blank())
p <- p + theme(legend.key = element_blank()) # <--- this doesn't work?
p
Here is a very hacky workaround, based on the notion that if you map things to aestethics in ggplot, they appear in the legend. geom_smooth has a fill aesthetic which allows for different colourings of different groups if one so desires. If it's hard to fix that downstream, sometimes it's easier to keep those unwanted items out of the legend altogether. In your case, the color of the se appeared in the legend. As such, I've created two geom_smooths. One without a line color (but grouped by type) to create the plotted se's, and one with linetype mapped to aes but se set to false.
p <- ggplot(df, aes(x=x, y=y)) +
#first smooth; se only
stat_smooth(aes(group=type),col=NA, method = "auto", size=1, se=TRUE)+
#second smooth: line only
stat_smooth(aes(lty=type),col="black", method = "auto", size=1, se=F) +
theme_classic() +
theme(
legend.title = element_blank(),
legend.key = element_rect(fill = NA, color = NA)) #thank you #alko989

HeatMap not displaying correctly using ggplot()

I am having a strange situation when I am trying to plot a heatmap on a dataset that I have which can be found here.
I am using the following code to plot the heat map:
xaxis<-c('density')
midrange<-range(red[,xaxis])
xaxis <- c(xaxis,'quality')
molten<-melt(red[,xaxis],'quality')
p <- ggplot(molten, aes(x = value, y = quality))
p <- p + geom_tile(aes(fill = value), colour = "white")
p <- p + theme_minimal()
# turn y-axis text 90 degrees (optional, saves space)
p <- p + theme(axis.text.y = element_text(angle = 90, hjust = 0.5))
# remove axis titles, tick marks, and grid
p <- p + theme(axis.title = element_blank())
p <- p + theme(axis.ticks = element_blank())
p <- p + theme(panel.grid = element_blank())
p <- p + scale_y_discrete(expand = c(0, 0))
# optionally remove row labels (not useful depending on molten)
p <- p + theme(axis.text.x = element_blank())
# get diverging color scale from colorbrewer
# #008837 is green, #7b3294 is purple
palette <- c("#008837", "#b7f7f4", "#b7f7f4", "#7b3294")
if(midrange[1] == midrange[2]) {
# use a 3 color gradient instead
p <- p + scale_fill_gradient2(low = palette[1], mid = palette[2], high = palette[4], midpoint = midrange[1]) +
xlim(midrange[1],midrange[2])
}else{
# use a 4 color gradient (with a swath of white in the middle)
p <- p + scale_fill_gradientn(colours = palette, values = c(0, midrange[1], midrange[2], 1)) +
xlim(midrange[1],midrange[2])
}
p
I am trying to plot the heat map on the variable Density and would like to use the variable quality as separation in my heat map. When I use the above code, I get the following plot:
It can be clearly seen that it is a blank image. This is happening because the range of the variable Density is very low, it doesn't happen if I change the variable to the one having a wider range (pH for example).
Should ggplot automatically adjust to this? If not, how can I get ggplot to show the real plot?
Any help in this regard will be much appreciated.
So there are (at least) two problems here.
First, you have almost 1600 tiles in the x-direction, so specifying color="white" for the outline means that all you see is the outline, hence, white. Try taking this out.
Second, in your values=c(...) argument to scale_fill_gradientn(...) you seem to expect the midrange[1] and midrange[2] to be between (0,1), but midrange[2] = 1.003.
After taking out color="white" from the call to geom_tile(...), I get this:

overlaying plots in ggplot2

How to overlay one plot on top of the other in ggplot2 as explained in the following sentences? I want to draw the grey time series on top of the red one using ggplot2 in R (now the red one is above the grey one and I want my graph to be the other way around). Here is my code (I generate some data in order to show you my problem, the real dataset is much more complex):
install.packages("ggplot2")
library(ggplot2)
time <- rep(1:100,2)
timeseries <- c(rep(0.5,100),rep(c(0,1),50))
upper <- c(rep(0.7,100),rep(0,100))
lower <- c(rep(0.3,100),rep(0,100))
legend <- c(rep("red should be under",100),rep("grey should be above",100))
dataset <- data.frame(timeseries,upper,lower,time,legend)
ggplot(dataset, aes(x=time, y=timeseries)) +
geom_line(aes(colour=legend, size=legend)) +
geom_ribbon(aes(ymax=upper, ymin=lower, fill=legend), alpha = 0.2) +
scale_colour_manual(limits=c("grey should be above","red should be under"),values = c("grey50","red")) +
scale_fill_manual(values = c(NA, "red")) +
scale_size_manual(values=c(0.5, 1.5)) +
theme(legend.position="top", legend.direction="horizontal",legend.title = element_blank())
Convert the data you are grouping on into a factor and explicitly set the order of the levels. ggplot draws the layers according to this order. Also, it is a good idea to group the scale_manual codes to the geom it is being applied to for readability.
legend <- factor(legend, levels = c("red should be under","grey should be above"))
c <- data.frame(timeseries,upper,lower,time,legend)
ggplot(c, aes(x=time, y=timeseries)) +
geom_ribbon(aes(ymax=upper, ymin=lower, fill=legend), alpha = 0.2) +
scale_fill_manual(values = c("red", NA)) +
geom_line(aes(colour=legend, size=legend)) +
scale_colour_manual(values = c("red","grey50")) +
scale_size_manual(values=c(1.5,0.5)) +
theme(legend.position="top", legend.direction="horizontal",legend.title = element_blank())
Note that the ordering of the values in the scale_manual now maps to "grey" and "red"

Resources