Approximate area size by counting pixels in ggplot2 - r

Is it possible to extract the number of pixels of some arbitrary filled area to approximate the size of the area in ggplot2?
Specifically, I am using geom_ribbon to highlight some area in a graph and was wondering if there is some generic way to approximate the corresponding area size (without directly referring to the input data). I was thinking about hidden functions or intermediate data which ggplot2 might produce to color each pixel of the blue area on the screen.
library(ggplot2)
yint <- 5
x <- 1:100
df <- data.frame(x=x, y=x^0.5)
rib <- df[df$y>=yint,]
ggplot(df, aes(x=x)) + geom_line(aes(y=y)) + geom_hline(aes(yintercept=yint)) + geom_ribbon(data=rib, aes(x=x, ymin=yint, ymax=y), fill='lightblue')
EDIT:
Adopting lukeA's first solution, I can simply complete my previous ribbon data to form a closed polygon and then use the Polygon() function on it which automatically also calculates the corresponding area.
library(sp)
pol <- rbind(rib, c(100,5), c(25,5))
Polygon(pol)#area

Here are two approaches - one is polygon/vector based, the other one is pixel/raster based:
library(ggplot2)
library(rgeos)
library(sp)
library(png)
f <- function(x) Polygons(list(Polygon(map_data("world", region = x)[, 1:2])), x)
(tab1 <- gArea(SpatialPolygons(list(f("Germany"), f("France"))), byid=T))
# Germany France
# 41.21485 84.34209
unname(tab1["France"]/tab1["Germany"])
# [1] 2.046401
map <- map_data("world", region=c("Germany", "France"))
p <- ggplot(map, aes(long, lat, group=group, fill=region)) +
geom_polygon() + coord_map() +
theme_minimal() +
scale_fill_manual(values=c("Germany"="#00BFC4", "France"="#F8766D"), guide="none")
ggsave(tf <- tempfile(fileext = ".png"), p, dpi = 90)
r <- as.matrix(as.raster(readPNG(tf)))
(tab2 <- table(r[r %in% c("#F8766D", "#00BFC4")]))
unname(tab2["#F8766D"]/tab2["#00BFC4"])
# 1.2781

Related

How to clip an interpolated layer in R so it does not extend past data boundaries

I am trying to display a cross-section of conductivity in a lagoon environment using isolines. I have applied interp() and stat_contour() to my data, but I would like to clip the interpolated output so that it doesn't extend past my data points. This way the bathymetry of the lagoon in the cross-section is clear. Here is the code I have used so far:
cond_df <- read_csv("salinity_profile.csv")
di <- interp(cond_df$stop, cond_df$depth, cond_df$conductivity,
xo = seq(min(cond_df$stop), max(cond_df$stop), length = 200),
yo = seq(min(cond_df$depth), max(cond_df$depth), length = 200))
dat_interp <- data.frame(expand.grid(x=di$x, y=di$y), z=c(di$z))
ggplot(dat_interp) +
aes(x=x, y=y, z=z, fill=z)+
scale_y_reverse() +
geom_tile()+
stat_contour(colour="white", size=0.25) +
scale_fill_viridis_c() +
theme_tufte(base_family="Helvetica")
Here is the output:
interpolated plot
To help clarify, here is the data just as a geom_point() graph, and I do not want the interpolated layer going past the lower points of the graph:
cond_df%>%
ggplot(mapping=aes(x=stop, y=depth, z=conductivity, fill=conductivity)) +
geom_point(aes(colour = conductivity), size = 3) +
scale_y_reverse()
point plot
You can mask the unwanted region of the plot by using geom_ribbon.
You will need to generate a data.frame with values for the max depth at each stop. Here's one somewhat inelegant way to do that:
# Create the empty data frame for all stops
bathymetry <- data.frame(depth = as.numeric(NA),
stop = unique(cond_df$stop))
# Find the max depth for each stop
for(thisStop in bathymetry$stop){
bathymetry[bathymetry$stop==thisStop, "depth"] <- max(cond_df[cond_df$stop==thisStop, "depth"])
}
Then, you can add the geom_ribbon as the last geom of your plot, like so
geom_ribbon(data=bathymetry, aes(x=stop, ymin=depth, ymax=max(cond_df$depth)), inherit.aes = FALSE)

Continuous colour gradient that applies to a single geom_polygon element with ggplot2 in R

I am creating a choropleth map for CO2 emissions in Europe using ggplot2 and ggmap in R. The fill colour for each country corresponds to its CO2 emissions. The colours and continuous legend are implemented with scale_fill_gradient. However, I would also like for a single country to have a colour that is not from the continuous legend (for later use in a Shiny application). I have not worked out how to have scale_fill_gradient only apply to one of multiple geom_polygon layers.
Here is my initial map:
To reproduce this map, download the map data from http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/cultural/ne_50m_admin_0_countries.zip
Here is the code:
library(maptools)
library(ggplot2)
library(ggmap)
# read in a world map
WorldMap <- readShapePoly(fn="ne_50m_admin_0_countries/ne_50m_admin_0_countries")
# reduce shape file to a filtered data frame
WorldMapDf <- fortify(WorldMap,region='iso_a2')
# read in CO2 emissions
GEO <- c('FR','AT','BE','DE','DK','FI','GB','IE','NL','NO','SE')
CO2 <- c(59.5,86.9,137.4,425.8,303.9,353.2,380.3,427.0,476.4,1.8,47.6)
CO2_df <- data.frame(GEO,CO2)
# the range of values that the colour scale should cover
colorbar_range <- range(CO2_df$CO2)
mean_price <- mean(colorbar_range)
# merge map polygons with electricity CO2 emissions
CO2Map <- merge(WorldMapDf, CO2_df, by.x="id", by.y="GEO", all.x = T)
CO2Map <- CO2Map[order(CO2Map$order),]
#limit data to main Europe
europe.limits <- data.frame(matrix(c(35.50,-11.43,70,31.11),nrow=2,ncol=2,byrow = T))
names(europe.limits) <- c('lat','long')
CO2MapSubset <- subset(CO2Map, long > min(europe.limits$lon) & long < max(europe.limits$lon) & lat > min(europe.limits$lat) & lat < max(europe.limits$lat))
# create x and y limits
xrange <- c(min(CO2MapSubset$long),max(CO2MapSubset$long))
yrange <- c(min(CO2MapSubset$lat),max(CO2MapSubset$lat))
initial_map <- ggplot(data=CO2MapSubset) + # data layer
geom_polygon(aes(x=long, y=lat, group=group, fill=CO2)) +
coord_map(projection = "mercator",xlim=xrange,ylim=yrange) +
scale_fill_gradient2(low='gold',mid = "white",high='firebrick2',na.value = "lightgrey",midpoint=mean_price,limits=colorbar_range, name='CO2 (g/kWh)') +
geom_path(aes(x=long, y=lat, group=group), color='black',size=0.2)
# display map
initial_map
Now, I would like to make one country a different colour (for example, blue) that is not on the continuous colour scale as shown in the legend.
I thought I could do this by adding an additional geom_polygon layer to the initial map. To make Denmark blue, I tried this:
map_with_selected_country <- initial_map +
geom_polygon(data = CO2MapSubset[CO2MapSubset$id == 'DK',], aes(x=long, y=lat, group=group, fill='blue'))
but I get the error message: 'Error: Discrete value supplied to continuous scale' because the fill 'blue' conflicts with scale_fill_gradient2. Is there a way to make scale_fill_gradient2 only point to one dataset? Or is there another way to tackle this problem?
Here's an example:
library(ggplot2)
map <- map_data("world")
map$value <- setNames(sample(1:50, 252, T), unique(map$region))[map$region]
p <- ggplot(map, aes(long, lat, group=group, fill=value)) +
geom_polygon() +
coord_quickmap(xlim = c(-50,50), ylim=c(25,75))
p + geom_polygon(data = subset(map, region=="Germany"), fill = "red")
Germany is overplotted using a red fill color:
You can adapt this example to fit your needs.

How to get the points inside of the ellipse in ggplot2?

I'm trying to identify the densest region in the plot. And I do this using stat_ellipse() in ggplot2. But I can not get the information (sum total, order number of each point and so on) of the points inside of the ellipse.
Seldom see the discussion about this problem. Is this possible?
For example:
ggplot(faithful, aes(waiting, eruptions))+
geom_point()+
stat_ellipse()
Here is Roman's suggestion implemented. The help for stat_ellipse says it uses a modified version of car::ellipse, so therefore I chose to extract the ellipse points from the ggplot object. That way it should always be correct (also if you change options in stat_ellipse).
# Load packages
library(ggplot2)
library(sp)
# Build the plot first
p <- ggplot(faithful, aes(waiting, eruptions)) +
geom_point() +
stat_ellipse()
# Extract components
build <- ggplot_build(p)$data
points <- build[[1]]
ell <- build[[2]]
# Find which points are inside the ellipse, and add this to the data
dat <- data.frame(
points[1:2],
in.ell = as.logical(point.in.polygon(points$x, points$y, ell$x, ell$y))
)
# Plot the result
ggplot(dat, aes(x, y)) +
geom_point(aes(col = in.ell)) +
stat_ellipse()

plot rectangle given 4 points in ggplot heatmap

I have this data in a txt file (fruits2612e.txt)
"people","1","2","3","4","5","6","7","8","9","10","11","12"
"Ej1",0,0,0,0,0,0,0,0,0,0,0,0
"Ej2",0,0,1,1,1,1,1,1,0,0,0,0
"Ej3",0,0,0,0,0,0,1,0,0,0,0,0
"Ej4",0,1,1,1,0,0,1,1,0,0,0,1
"Ej5",1,1,1,1,1,1,1,1,0,0,0,1
"Ej6",1,1,1,1,0,1,1,1,0,0,1,1
"Ej7",1,1,1,1,1,1,1,1,0,0,0,0
"Ej8",0,0,0,1,1,1,0,0,0,0,0,0
"Ej9",0,0,1,1,1,1,1,1,0,0,0,0
"Ej10",0,0,0,1,1,1,1,0,0,0,0,0
"Ej11",0,0,0,0,1,0,0,0,0,1,1,0
"Ej12",0,0,1,1,1,0,0,0,0,1,1,1
"Ej13",0,1,1,1,0,0,0,1,1,1,1,1
"Ej14",1,1,0,0,0,0,0,1,1,1,0,1
"Ej15",0,0,0,0,0,0,0,1,1,1,1,1
"Ej16",0,0,0,0,0,0,0,1,1,1,1,1
I built the heatmap (without the black rectangle) using this code
library(reshape2)
library(ggplot2)
library(scales)
library(plyr)
data <- read.csv("fruits2612e.txt", head=TRUE, sep=",")
data$people <- factor(data$people,levels=rev(data$people))
data.m = melt(data)
#data.m <- ddply(data.m, .(variable), transform, rescale = rescale(value))
data.m[,"rescale"]<-rescale(data.m[,"value"],to=c(0,1))
fewer.labels <- c("Ej16","Ej15","Ej14","Ej13","Ej12","Ej11","Ej10","Ej9","Ej8","Ej7","Ej6","Ej5","Ej4","Ej3","Ej2","Ej1")
p <- ggplot(data.m, aes(variable, people)) +
geom_tile(aes(fill = rescale), colour = "white") +
scale_y_discrete(labels=fewer.labels) +
scale_fill_gradient(low = "red", high = "green") +
theme(axis.text=element_text(size=8))
Now I'm trying to add the black rectangle to the heatmap, but can't find out how, the coordinates are
maxR<-c(topLeftx,topLefty,botRightX,botRightY)
[1] 5 1 7 8
Following my own advice, I searched SO for "annotate geom rect" and found one hit: Draw multiple squares with ggplot
maxR<-c(topLeftx=5,topLefty=1,botRightX=7,botRightY=8)
p + annotate(geom='rect', xmin= maxR['botRightX'], ymin= maxR['botRightY'],
xmax=maxR['topLeftx'], ymax= maxR['topLefty'],
fill="transparent", col="black", lwd=3)
So apparently we have different notions about how to specify the indices of a rectangle but you should be able to take it from here.
ggplot manipulates factors internally using their codes, which are accessible using, e.g. as.integer(data.m$people), etc. The corners of the tiles are the codes +/- 0.5. So, assuming you really are using factors for both the x- and y-direction, then this will draw the box you want
maxR <-c(topLeftx=5,topLefty=1,botRightX=7,botRightY=8)
sub.data <- with(data.m,
with(as.list(maxR),
data.m[people %in% paste0("Ej",topLeftx:botRightX)
& variable %in% paste0("X",topLefty:botRightY),]))
p+with(sub.data,annotate(geom="rect", fill="transparent",color="black", size=1.5,
xmin=min(as.integer(variable))-0.5,ymin=min(as.integer(people))-0.5,
xmax=max(as.integer(variable))+0.5,ymax=max(as.integer(people))+0.5))
The tortured code at the beginning is needed because of the bizarre way you've chosen to specify the corners of the box.

How to modify the legend of a heat map generated with ggplot's geom_tile?

Is there a way to modify the legend of a heat map that was generated with geom_tile from the ggplot2 package? I would like to increase the number of tiles in the legend and to set the minimum and maximum of the shown value there.
In this example from the manual page the legend contains five colored tiles representing values from -0.4 to 0.4. How could I let e.g. 9 tile be displayed instead?
library (ggplot2)
pp <- function (n,r=4) {
x <- seq(-r*pi, r*pi, len=n)
df <- expand.grid(x=x, y=x)
df$r <- sqrt(df$x^2 + df$y^2)
df$z <- cos(df$r^2)*exp(-df$r/6)
df
}
p <- ggplot(pp(20), aes(x=x,y=y))
p + geom_tile(aes(fill=z))
I guess there are several possible ways to archive this. One solution would be to specify the breaks for the legend manually.
d = pp(20)
ggplot(d, aes(x=x,y=y,fill=z)) + geom_tile() +
scale_fill_continuous( breaks = round( seq(-.4, .4, length.out = 10 ), 1) )

Resources