Considereing the number of the coordinate in the heatmap - r

I'm trying to make a heatmap considering the value of the point (variable 'x'). But when I run my code I only have the heatmap considering the points, and not its values.
Here is my code:
head(dengue)
lat long x
1 7791000 598157.0 156
2 7790677 598520.0 307
3 7790795 598520.0 153
4 7790153 598808.0 135
5 7790935 598813.0 1888
6 7790765 598881.7 1169
library(ggplot2)
library(ggsn)
hmap <- ggplot(dengue, aes(x=long, y=lat)) +
stat_density2d(aes(fill = ..level..), alpha=0.8, geom="polygon") +
geom_point(colour="red") +
geom_path(data=map.df,aes(x=long, y=lat,group=group), colour="grey50") +
scale_fill_gradientn(colours=rev(brewer.pal(5,"Spectral"))) +
coord_fixed() +
scalebar(location="bottomright",y.min=7781600.0, y.max=7812898.0,
x.min=597998.4, x.max=619721.2,
dist=2, transform = F,
st.dist=.04,dist_unit="km") +
blank() +
guides(fill=guide_legend(title=""))
north2(hmap, x=.7, y=.9, symbol=16)
And here is the map that I got:
Any hint on how can I make a heatmap considering the values of the points (variable 'x'), and not just its coordinates?

There was a post here that describes the adaptation of the MASS package's kde2d function to take into account the weights of points.
library(MASS)
kde2d.weighted <- function (x, y, w, h, n = 25, lims = c(range(x), range(y))) {
nx <- length(x)
if (length(y) != nx)
stop("data vectors must be the same length")
gx <- seq(lims[1], lims[2], length = n) # gridpoints x
gy <- seq(lims[3], lims[4], length = n) # gridpoints y
if (missing(h))
h <- c(bandwidth.nrd(x), bandwidth.nrd(y));
if (missing(w))
w <- numeric(nx)+1;
h <- h/4
ax <- outer(gx, x, "-")/h[1] # distance of each point to each grid point in x-direction
ay <- outer(gy, y, "-")/h[2] # distance of each point to each grid point in y-direction
z <- (matrix(rep(w,n), nrow=n, ncol=nx, byrow=TRUE)*matrix(dnorm(ax), n, nx)) %*% t(matrix(dnorm(ay), n, nx))/(sum(w) * h[1] * h[2]) # z is the density
return(list(x = gx, y = gy, z = z))
}
This is not natively embedded in ggplot2 as far as I'm aware, but you could preprocess your data outside ggplot to get the data you can put into stat_contour:
# Reading in your example data
zz <- " lat long x
1 7791000 598157.0 156
2 7790677 598520.0 307
3 7790795 598520.0 153
4 7790153 598808.0 135
5 7790935 598813.0 1888
6 7790765 598881.7 1169"
df <- read.table(text = zz)
# Doing the weighted 2d kde
wdf <- kde2d.weighted(df$lat, df$long, df$x)
wdf <- data.frame(lat = wdf$x[row(wdf$z)],
long = wdf$y[col(wdf$z)],
value = wdf$z[T])
# Plotting the result:
ggplot(df, aes(lat, long)) +
stat_contour(data = wdf, aes(z = value, fill = stat(level)), geom = "polygon") +
geom_text(aes(label = x)) # to show the weights
As you can see, the contours are a bit cut off at ugly points, but I suppose this could be amended by playing around with the lims argument of the kde2d.weighted().

Related

How to set a logarithmic scale across multiple ggplot2 contour plots?

I am attempting to create three contour plots, each illustrating the following function applied to two input vectors and a fixed alpha:
alphas <- c(1, 5, 25)
x_vals <- seq(0, 25, length.out = 100)
y_vals <- seq(0, 50, length.out = 100)
my_function <- function(x, y, alpha) {
z <- (1 / (x + alpha)) * (1 / (y + alpha))
}
for each alpha in the vector alphas, I am creating a contour plot of z values—relative to the minimal z value—over x and y axes.
I do so with the following code (probably not best practices; I'm still learning the basics with R):
plots <- list()
for(i in seq_along(alphas)) {
z_table <- sapply(x_vals, my_function, y = y_vals, alpha = alphas[i])
x <- rep(x_vals, each = 100)
y <- rep(y_vals, 100)
z <- unlist(flatten(list(z_table)))
z_rel <- z / min(z)
d <- data.frame(cbind(x, y, z_rel))
plots[[i]] <- ggplot(data = d, aes(x = x, y = y, z = z_rel)) +
geom_contour_filled()
}
When alpha = 1:
When alpha = 25:
I want to display these plots in one grouping using ggarrange(), with one logarithmic color scale (as relative z varies so much from plot to plot). Is there a way to do this?
You can build a data frame with all the data for all alphas combined, with a column indicating the alpha, so you can facet your graph:
I basically removed the plot[[i]] part, and stacked up the d's created in the former loop:
d = numeric()
for(i in seq_along(alphas)) {
z_table <- sapply(x_vals, my_function, y = y_vals, alpha = alphas[i])
x <- rep(x_vals, each = 100)
y <- rep(y_vals, 100)
z <- unlist(flatten(list(z_table)))
z_rel <- z / min(z)
d <- rbind(d, cbind(x, y, z_rel))}
d = as.data.frame(d)
Then we create the alphas column:
d$alpha = factor(paste("alpha =", alphas[rep(1:3, each=nrow(d)/length(alphas))]),
levels = paste("alpha =", alphas[1:3]))
Then build the log scale inside the contour:
ggplot(data = d, aes(x = x, y = y, z = z_rel)) +
geom_contour_filled(breaks=round(exp(seq(log(1), log(1400), length = 14)),1)) +
facet_wrap(~alpha)
Output:

Custom +/- 10% Band from geom_abline?

I'm trying to create a plot that compares measured to predicted values where the line going through the plot is of the form y = 0 + x. I would like to then shade the region +/- 10% from the line. Is there a way to do this without adding a column of data to the data.frame?
Code
library(tidyverse)
# Fake Data - Not my real data
N <- 1e3
x <- runif(N, 200, 600)
a <- 0
b <- 1
y_true <- a + b * x
sigma <- 50
y <- rnorm(N, y_true, sigma)
d <- tibble(y, x)
ggplot(d, aes(x, y)) +
geom_point() +
geom_abline(slope = 1) +
#geom_ribbion(aes(x = ? * .9, y = ? * 1.1)
Created on 2019-07-29 by the reprex package (v0.3.0)

Add jitterred points to a ggnetwork plot

I have a graph of vertices and edges which I'd like to plot using a fruchtermanreingold layout.
Here's the graph edges matrix:
edge.mat <- matrix(as.numeric(strsplit("3651,0,0,1,0,0,0,0,2,0,11,2,0,0,0,300,0,1,0,0,66,0,78,9,0,0,0,0,0,0,11690,0,1,0,0,0,0,0,0,0,0,493,1,1,0,4288,5,0,0,36,0,9,7,3,0,6,1,0,1,7,490,0,0,0,6,0,0,628,6,12,0,0,0,0,0,641,0,0,4,0,0,0,0,0,0,66,0,0,0,0,3165,0,281,0,0,0,0,0,0,0,0,45,1,0,0,35248,0,1698,2,0,1,0,2,99,0,0,6,29,286,0,31987,0,1,10,0,8,0,16,0,21,1,0,0,1718,0,51234,0,0,17,3,12,0,0,7,0,0,0,1,0,2,16736,0,0,0,3,0,0,4,630,0,0,0,9,0,0,29495,53,6,0,0,0,0,5,0,0,0,0,3,0,19,186,0,0,0,482,8,12,0,1,0,7,1,0,6,0,26338",
split = ",")[[1]]),
nrow = 14,
dimnames = list(LETTERS[1:14], LETTERS[1:14]))
I then create an igraph object from that using:
gr <- igraph::graph_from_adjacency_matrix(edge.mat, mode="undirected", weighted=T, diag=F)
And then use ggnetwork to convert gr to a data.frame, with specified vertex colors:
set.seed(1)
gr.df <- ggnetwork::ggnetwork(gr,
layout="fruchtermanreingold",
weights="weight",
niter=50000,
arrow.gap=0)
And then I plot it using ggplot2 and ggnetwork:
vertex.colors <- strsplit("#00BE6B,#DC2D00,#F57962,#EE8044,#A6A400,#62B200,#FF6C91,#F77769,#EA8332,#DA8E00,#C59900,#00ACFC,#C49A00,#DC8D00",
split=",")[[1]]
library(ggplot2)
library(ggnetwork)
ggplot(gr.df, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(color = "gray", aes(size = weight)) +
geom_nodes(color = "black")+
geom_nodelabel(aes(label = vertex.names),
color = vertex.colors, fontface = "bold")+
theme_minimal() +
theme(axis.text=element_blank(),
axis.title=element_blank(),
legend.position="none")
In my case each vertex actually represents many points, where each vertex has a different number of points. Adding that information to gr.df:
gr.df$n <- NA
gr.df$n[which(is.na(gr.df$weight))] <- as.integer(runif(length(which(is.na(gr.df$weight))), 100, 500))
What I'd like to do is add to the plot gr.df$n radially jittered points around each vertex (i.e., with its corresponding n), with the same vertex.colors coding. Any idea how to do that?
I think sampling and then plotting with geom_point is a reasonable strategy. (otherwise you could create your own geom).
Here is some rough code, starting from the relevant bit of your question
gr.df$n <- 1
gr.df$n[which(is.na(gr.df$weight))] <- as.integer(runif(length(which(is.na(gr.df$weight))), 100, 500))
# function to sample
# https://stackoverflow.com/questions/5837572/generate-a-random-point-within-a-circle-uniformly
circSamp <- function(x, y, R=0.1){
n <- length(x)
A <- a <- runif(n,0,1)
b <- runif(n,0,1)
ind <- b < a
a[ind] <- b[ind]
b[ind] <- A[ind]
xn = x+b*R*cos(2*pi*a/b)
yn = y+b*R*sin(2*pi*a/b)
cbind(x=xn, y=yn)
}
# sample
d <- with(gr.df, data.frame(vertex.names=rep(vertex.names, n),
circSamp(rep(x,n), rep(y,n))))
# p is your plot
p + geom_point(data=d, aes(x, y, color = vertex.names),
alpha=0.1, inherit.aes = FALSE) +
scale_color_manual(values = vertex.colors)
Giving

A ggplot2 equivalent of the lines() function in basic plot

For reasons I won't go into I need to plot a vertical normal curve on a blank ggplot2 graph. The following code gets it done as a series of points with x,y coordinates
dfBlank <- data.frame()
g <- ggplot(dfBlank) + xlim(0.58,1) + ylim(-0.2,113.2)
hdiLo <- 31.88
hdiHi <- 73.43
yComb <- seq(hdiLo, hdiHi, length = 75)
xVals <- 0.79 - (0.06*dnorm(yComb, 52.65, 10.67))/0.05
dfVertCurve <- data.frame(x = xVals, y = yComb)
g + geom_point(data = dfVertCurve, aes(x = x, y = y), size = 0.01)
The curve is clearly discernible but is a series of points. The lines() function in basic plot would turn these points into a smooth line.
Is there a ggplot2 equivalent?
I see two different ways to do it.
geom_segment
The first uses geom_segment to 'link' each point with its next one.
hdiLo <- 31.88
hdiHi <- 73.43
yComb <- seq(hdiLo, hdiHi, length = 75)
xVals <- 0.79 - (0.06*dnorm(yComb, 52.65, 10.67))/0.05
dfVertCurve <- data.frame(x = xVals, y = yComb)
library(ggplot2)
ggplot() +
xlim(0.58, 1) +
ylim(-0.2, 113.2) +
geom_segment(data = dfVertCurve, aes(x = x, xend = dplyr::lead(x), y = y, yend = dplyr::lead(y)), size = 0.01)
#> Warning: Removed 1 rows containing missing values (geom_segment).
As you can see it just link the points you created. The last point does not have a next one, so the last segment is removed (See the warning)
stat_function
The second one, which I think is better and more ggplotish, utilize stat_function().
library(ggplot2)
f = function(x) .79 - (.06 * dnorm(x, 52.65, 10.67)) / .05
hdiLo <- 31.88
hdiHi <- 73.43
yComb <- seq(hdiLo, hdiHi, length = 75)
ggplot() +
xlim(-0.2, 113.2) +
ylim(0.58, 1) +
stat_function(data = data.frame(yComb), fun = f) +
coord_flip()
This build a proper function (y = f(x)), plot it. Note that it is build on the X axis and then flipped. Because of this the xlim and ylim are inverted.

Three factor plotting using xyplot

I had a problem with ggplot that I am not able to solve, so maybe someone here can point out the reason. Sorry that I am not able to upload my dataset, but some data description can be found below. The output of the ggplot is shown below, except NO line, every other thing is OK.
> all.data<-read.table("D:/PAM/data/Rural_Recovery_Edit.csv",head=T,sep=",")
> all.data$Water<-factor(all.data$Water,labels=c("W30","W60","W90"))
> all.data$Polymer<-factor(all.data$Polymer,labels=c("PAM-0 ","PAM-10 ","PAM-40 "))
> all.data$Group<-factor(all.data$Group,labels=c("Day20","Day25","Day30"))
> dat<-data.frame(Waterconsump=all.data[,9],Water=all.data$Water,Polymer=all.data$Polymer,Age=all.data$Group)
> ggplot(dat,aes(x=Water,y=Waterconsump,colour=Polymer))+
+ stat_summary(fun.y=mean, geom="line",size=2)+
+ stat_summary(fun.ymin=min,fun.ymax=max,geom="errorbar")+#,position="dodge"
+ facet_grid(~Age)
> dim(dat)
[1] 108 4
> head(dat)
Waterconsump Water Polymer Age
1 10.5 W30 PAM-10 Day20
2 10.3 W30 PAM-10 Day20
3 10.1 W30 PAM-10 Day20
4 7.7 W30 PAM-10 Day20
5 8.6 W60 PAM-10 Day20
6 8.4 W60 PAM-10 Day20
> table(dat$Water)
W30 W60 W90
36 36 36
> table(dat$Polymer)
PAM-0 PAM-10 PAM-40
36 36 36
> table(dat$Age)
Day20 Day25 Day30
36 36 36
and, if I changed the geom into "bar", the output is OK.
below is the background for this Q
#
I would like to plot several variables that were subjected to the same, 3 factors. Using xyplot, I am able to plot 2 of them, within one figure. However, I have no idea how to include the third, and arrange the figure into N subplots (N equals the level number of the third factor).
So, my aims would be:
Plot the 3rd facotors, and split the plot into N subplots, where N is the levels of the 3rd factor.
Better to work as a function, as I need to plot a several variables.
Below is the example figure with only two factors, and my working example to plot 2 factors.
Thanks in advance~
Marco
library(reshape)
library(agricolae)
library(lattice)
yr<-gl(10,3,90:99)
trt<-gl(4,75,labels=c("A","B","C","D"))
third<-gl(3,100,lables=c("T","P","Q")) ### The third factor to split the figure in to 4 subplots
dat<-cbind(runif(300),runif(300,min=1,max=10),runif(300,min=100,max=200),runif(300,min=1000,max=1500))
colnames(dat)<-paste("Item",1:4,sep="-")
fac<-factor(paste(trt,yr,sep="-"))
dataov<-aov(dat[,1]~fac)
dathsd<-sort_df(HSD.test(dataov,'fac'),'trt')
trtplt<-gl(3,10,30,labels=c("A","B","C"))
yrplt<-factor(substr(dathsd$trt,3,4))
prepanel.ci <- function(x, y, ly, uy, subscripts, ...)
{
x <- as.numeric(x)
ly <- as.numeric(ly[subscripts])
uy <- as.numeric(uy[subscripts])
list(ylim = range(y, uy, ly, finite = TRUE))
}
panel.ci <- function(x, y, ly, uy, subscripts, pch = 16, ...)
{
x <- as.numeric(x)
y <- as.numeric(y)
ly <- as.numeric(ly[subscripts])
uy <- as.numeric(uy[subscripts])
panel.arrows(x, ly, x, uy, col = "black",
length = 0.25, unit = "native",
angle = 90, code = 3)
panel.xyplot(x, y, pch = pch, ...)
}
xyplot(dathsd$means~yrplt,group=trtplt,type=list("l","p"),
ly=dathsd$means-dathsd$std.err,
uy=dathsd$means+dathsd$std.err,
prepanel = prepanel.ci,
panel = panel.superpose,
panel.groups = panel.ci
)
!
Here is another way of doing it, using the magic of ggplot. Because ggplot will calculate summaries for you, I suspect it means you can skip the entire step of doing aov.
The key is that your data should be in single data.frame that you can pass to ggplot. Note that I have created new sample data to demonstrate.
library(ggplot2)
df <- data.frame(
value = runif(300),
yr = rep(1:10, each=3),
trt = rep(LETTERS[1:4], each=75),
third = rep(c("T", "P", "Q"), each=100)
)
ggplot(df, aes(x=yr, y=value, colour=trt)) +
stat_summary(fun.y=mean, geom="line", size=2) +
stat_summary(fun.ymin=min, fun.ymax=max, geom="errorbar") +
facet_grid(~third)
You can go one step further and produce facets in two dimensions:
ggplot(df, aes(x=yr, y=value, colour=trt)) +
stat_summary(fun.y=mean, geom="line", size=2) +
stat_summary(fun.ymin=min, fun.ymax=max, geom="errorbar") +
facet_grid(trt~third)
This gets pretty close, but I forget how to colour the error lines using the group variable in Lattice and Deepayan's book is at work.
## format a new data structure with all variables we want
dat <- data.frame(dathsd[, c(2,5)], treat = trtplt, yrplt = yrplt,
upr = dathsd$means + 2 * dathsd$std.err,
lwr = dathsd$means - 2 * dathsd$std.err)
## compute ylims
ylims <- range(dat$lwr, dat$upr)
ylims <- ylims + (c(-1,1) * (0.05 * diff(ylims)))
## plot
xyplot(means ~ yrplt, data = dat, group = treat, lwr = dat$lwr, upr = dat$upr,
type = c("p","l"), ylim = ylims,
panel = function(x, y, lwr, upr, ...) {
panel.arrows(x0 = x, y0 = lwr, x1 = x, y1 = upr,
angle = 90, code = 3, length = 0.05)
panel.xyplot(x, y, ...)
})
And produces:

Resources