ggplot from two tibbles; scatterplot with contours in background - r

I have two tibbles -
tbl1 contains real data : X, Y and choice.
tbl2 is synthetic tibble to calculate contours of predicted choice P.
library(tidyverse)
# tibble1
X <- c(1, 3, 5)
Y <- c(1, 5, 3)
choice <- c(0, 1, 1)
tbl1 <- tibble(X,Y,choice)
# tibble2
X <- seq(0, 5, 0.1)
Y <- seq(0, 5, 0.1)
tbl2 <- crossing(X,Y)
tbl2 <- tbl2 %>%
mutate(V = (X + Y - 4)/2,
P = 1/(1+exp(-V)))
I wish to create a single ggplot with
scatterplot X vs Y from tbl1 (with color = choice)
filled contours of P from tbl2 in the background
Thanks

Perhaps this?
library(ggplot2)
ggplot(tbl2, aes(X, Y)) +
geom_contour_filled(aes(z = P), alpha = 0.3) +
geom_point(aes(color = factor(choice)), size = 5, data = tbl1) +
guides(fill = guide_none()) +
labs(color = "Choice")

Related

Set / Link point and shape options for variables in ggplot2

I would like to link variables I have in a dataframe i.e. ('prop1', 'prop2', 'prop3') to specific colours and shapes in the plot. However, I also want to exclude data (using dplyr::filter) to customise the plot display WITHOUT changing the points and shapes used for a specific variable. A minimal example is given below.
library(ggplot2)
library(dplyr)
library(magrittr)
obj <- c("cmpd 1","cmpd 1","cmpd 1","cmpd 2","cmpd 2")
x <- c(1, 2, 4, 7, 3)
var <- c("prop1","prop2","prop3","prop2","prop3")
y <- c(1, 2, 3, 2.5, 4)
col <- c("#E69F00","#9E0142","#56B4E9","#9E0142","#56B4E9")
shp <- c(0,1,2,1,2)
df2 <- cbind.data.frame(obj,x,var,y,col,shp)
plot <- ggplot(data = df2 %>%
filter(obj %in% c(
"cmpd 1",
"cmpd 2"
)),
aes(x = x,
y = y,
colour = as.factor(var),
shape = as.factor(var))) +
geom_point(size=2) +
#scale_shape_manual(values=shp) +
#scale_color_manual(values=col) +
facet_grid(.~obj)
plot
However, when I redact cmpd1 (just hashing in code) the colour and shape of prop2 and prop3 for cmpd2 change (please see plot2).
To this end, I tried adding in scale_shape_manual and scale_color_manual to the code (currently hashed) and linked these to specific vars (col and shp) in the dataframe (df2), but the same problem arises that both the shape and color of these variables changes when excluding one of the conditions?
Any and all help appreciated.
Try something like this:
library(tidyverse)
obj <- c("cmpd 1","cmpd 1","cmpd 1","cmpd 2","cmpd 2")
x <- c(1, 2, 4, 7, 3)
var <- c("prop1","prop2","prop3","prop2","prop3")
y <- c(1, 2, 3, 2.5, 4)
df2 <- cbind.data.frame(obj,x,var,y)
col <- c("prop1" = "#E69F00",
"prop2" = "#9E0142",
"prop3" = "#56B4E9")
shp <- c("prop1" = 0,
"prop2" = 1,
"prop3" = 2)
plot <- ggplot(data = df2 %>%
filter(obj %in% c(
"cmpd 1",
"cmpd 2"
)),
aes(x = x,
y = y,
colour = var,
shape = var)) +
geom_point(size=2) +
scale_shape_manual(values=shp) +
scale_color_manual(values=col) +
facet_grid(.~obj)
plot

ggplot: Mask Circles inside a non Geographic Shape

Is there a way within ggplot, to plot circles within a defined, non geographic shape, defined through a series of points, or alternatively an imported SVG?
The circles would be placed in rows and columns, similar to the simple example below. But then any circles, either with their circumference, or centre if that is more achievable, outside the shape would be excluded from the plot. So a kind of mask.
I know I could do this by through comparing the coordinates, but I'm interested to know if there is a more sophisticated masking function.
library(tidyverse)
maxX <- 12
maxY <- 9
circles <- data.frame(circleNo = seq(1, maxX * maxY, 1) - 1) %>%
mutate(x = circleNo %% maxX, y = floor(circleNo / maxX))
# Set line end to coordinates for next point
shape <- data.frame(x = c(1, 1, 7, 7, 11, 11, 6, 5, 3, 2, 1), y = c(1, 8, 7, 5, 5, 1, 3, 3, 3, 1, 1)) %>%
mutate(xend = lead(x), yend = lead(y))
# Set line end for last point to the first
shape[nrow(shape),3] = shape[1,1]
shape[nrow(shape),4] = shape[1,2]
ggplot(circles, aes(x = x, y = y)) +
geom_point(shape = 1, size = 9, fill = NA) +
geom_segment(data = shape, aes(x = x, xend = xend, y = y, yend = yend)) +
theme_void() +
coord_fixed(ratio = 1)
Here's one approach that is based on manipulating the pixels as a last step. It is not sophisticated enough to identify which circles are entirely within the polygon, though. For that, the sf package and this approach sound like what you want:
How to mark points by whether or not they are within a polygon
library(ggfx)
ggplot(circles, aes(x = x, y = y)) +
as_reference(
geom_polygon(data = shape),
id = "mask_layer"
) +
with_mask(
geom_point(shape = 1, size = 9, fill = NA),
mask = "mask_layer"
) +
theme_void() +
coord_fixed(ratio = 1)
My thanks to #Jon above for the pointers. This is what I came up. Note that I added a hole in the middle of the polygon for good measure.
library(tidyverse)
library(ggplot)
# Create grid of circles
maxX <- 24
maxY <- 18
circles <- data.frame(circleNo = seq(1, maxX * maxY, 1) - 1)
circles <- circles %>%
mutate(x = circleNo %% maxX, y = floor(circleNo / maxX))
# Create polygon
shape <- data.frame(x = c(2, 2, 14, 14, 22, 22, 12, 10, 6, 4, 2), y = c(2, 16, 14, 10, 10, 2, 6, 6, 6, 2, 2)) %>%
# With line ends equal to the next point
mutate(xend = lead(x), yend = lead(y))
# Except for the last, where it needs to equal the first
shape[nrow(shape),3] = shape[1,1]
shape[nrow(shape),4] = shape[1,2]
# Plot the circles and polygon without any masking
ggplot(circles, aes(x = x, y = y)) +
geom_point(shape = 1, size = 5, fill = NA) +
geom_segment(data = shape, aes(x = x, xend = xend, y = y, yend = yend)) +
theme_void() +
coord_fixed(ratio = 1)
# Now do similar with SF which allows masking using the helpful posts below
# Create simple feature from a numeric vector, matrix or list
# https://r-spatial.github.io/sf/reference/st.html
# How to mark points by whether or not they are within a polygon
# https://stackoverflow.com/questions/50144222/how-to-mark-points-by-whether-or-not-they-are-within-a-polygon
library(sf)
# Create outer polygon
outer = matrix(c(2,2, 2,16, 14,14, 14,10, 22,10, 22,2, 12,6, 10,6, 6,6, 4,2, 2,2), ncol=2, byrow=TRUE)
# And for good measure, lets put a hole in it
hole1 = matrix(c(10,10, 10,12, 12,12, 12,10, 10,10),ncol=2, byrow=TRUE)
polygonList= list(outer, hole1)
# Convert to simple feature
combinedPoints = lapply(polygonList, function(x) cbind(x, 0))
polygons = st_polygon(combinedPoints)
# Plot these new polygons
ggplot(polygons) +
geom_sf(aes())
# Not entirely sure why we need these two lines
polygonCast <- polygons %>% st_cast("POLYGON")
circlesSF <- st_as_sf(circles, coords = c("x", "y"))
# Detect which ones are inside the outer polygon and outside the inner one
circlesSF <- circlesSF %>% mutate(outside = lengths(st_within(circlesSF, polygonCast)))
# Convert to a data frame, extract out the coordinates and filter out the ones outside
circleCoords <- as.data.frame(st_coordinates(circlesSF))
circles2 <- circlesSF %>%
as.data.frame() %>%
cbind(circleCoords) %>%
select(-geometry) %>%
filter(outside > 0)
ggplot(circles2, aes(x = X, y = Y)) +
geom_point(shape = 1, size = 5, fill = NA) +
geom_segment(data = shape, aes(x = x, xend = xend, y = y, yend = yend)) +
theme_void() +
coord_fixed(ratio = 1)

Colouring of datapoint with ggplot2 and RColorBrewer

I have got a df with over 400 datapoints and want to colour those accourding to the RColorBrewer package with the palette = "Blues".
I plotted my data and even expanded the color input maximum of the palette to the length of my data points to avoid getting "Error messages" (see below), but the colors in the plot aren't changing (only a black line).
Error: Aesthetics must be either length 1 or the
same as the data (427): fill
I've created a dummy df to make my problem reproducible:
library(ggplot2)
library(RColorBrewer)
color = colorRampPalette(rev(brewer.pal(n = 9, name = "Blues")))(300)
df = (curve(3*x^2 + x, from=1, to=10, n=300, xlab="xvalue", ylab="yvalue",
col="blue", lwd=2, main="Plot of (3x^2 + x)"))
dfx = matrix(data = df$x, ncol = 1)
dfy = matrix(data = df$y, ncol = 1)
dfa = cbind(dfx,dfy)
DF = ggplot(dfa, aes(x = dfx, y = dfy)) +
geom_point(fill = color)
I expect the curve to change into a light blue at the start (1,4) with an increase in darkness till the end (dark blue at the end (10,310)).
Thanks in advance!
color_seq = colorRampPalette(brewer.pal(n = 9, name = "Blues"))(300)
df = (curve(3*x^2 + x, from=1, to=10, n=300, xlab="xvalue", ylab="yvalue",
col="blue", lwd=2, main="Plot of (3x^2 + x)"))
dfx = matrix(data = df$x, ncol = 1)
dfy = matrix(data = df$y, ncol = 1)
dfa = as.data.frame(cbind(dfx , dfy, color_seq), stringsAsFactors = FALSE)
dfa$V1 <- as.numeric(dfa$V1) ## convert both to numeric so the scale is continous
dfa$V2 <- as.numeric(dfa$V2)
ggplot(dfa, aes(x = V1, y = V2, color = color_seq)) +
geom_point() +
scale_color_identity()
exDF <- data.frame(dataX = seq(1, 10, .1),
dataY = sapply(seq(1, 10, .1), function(x) 3*x^2 + x))
exDFcolors <- colorRampPalette(brewer.pal(9, "Blues"))(nrow(exDF))
ggplot(exDF, aes(dataX, dataY)) +
geom_line(size = 2, color = exDFcolors)

expanding table with gridExtra

How do increase the size of the table so that it take up all of the available space in..i.e so there's no white space.
Also- how do you remove row names of the table?
Thank you
dat = data.frame(x = c(1,2,4), y = c(12,3,5),z = c(5,6,7))
p =ggplot(dat, aes(x=x, y = y))+geom_point()+geom_line()
library(gridExtra)
t = tableGrob(dat)
rownames(t) =NULL
t$widths <- unit(rep(1/ncol(t), ncol(t)), "npc")
grid.arrange(t, p,p,nrow = 1)
I updated your code. The important parts are the rows = NULL option to tableGrob and the setting of t$heights. You probably need to tweak this to get something to your taste.
library(gridExtra)
library(ggplot2)
dat <- data.frame(x = c(1, 2, 4), y = c(12, 3, 5), z = c(5, 6, 7))
p <- ggplot(dat, aes(x = x, y = y)) +
geom_point() +
geom_line()
t <- tableGrob(dat, rows = NULL) # notice rows = NULL
t$widths <- unit(rep(1 / ncol(t), ncol(t)), "npc")
t$heights <- unit(rep(1 / nrow(t), nrow(t)), "npc") # new
grid.arrange(t, p, p, nrow = 1)

How to specify size for geom_point and geom_line separately and get separate legends in ggplot2

I'm trying to plot igraph network with ggplot2. I use geom_point() for the vertex and geom_line() for the edges. The point size and line width in ggplot2 are both specified by size. When I specified them separately, I hope to get two separate legends but they turned out to be merged. Below is an example.
# an example graph
df <- data.frame(from=letters[c(1, 1, 2:4, 4)], to=letters[c(2, 3, 5:7, 1)])
g <- graph.data.frame(df, directed=FALSE)
Vname <- V(g)$name
set.seed(100)
V(g)$varVsize <- rnorm(length(Vname))
E(g)$varEwidth <- rnorm(nrow(df))
E(g)$varEcol <- rnorm(nrow(df), 1, 1)
Vcord <- layout.fruchterman.reingold(g) # get coordinates for the Vertex
rownames(Vcord) <- V(g)$name
# 2-column edge list
el <- get.edgelist(g)
el_df <- data.frame(as.data.frame(el), Ewidth=E(g)$varEwidth, Ecol=E(g)$varEcol)
# assigning edge id so that it can be used as group
el_df$id <- 1:nrow(el_df)
## now g_df have each node in the edgelist as a row: first nodesFrom (id=1:145), then nodesTo (id=146:290, i.e.)
el_dfm <- melt(el_df, id=3:5)
xy_s <- data.frame(value = rownames(Vcord),
Vsize=V(g)$varVsize, # add Vetex attribues here: affects size and color
x = Vcord[, 1],
y = Vcord[, 2])
g_df2 <- merge(el_dfm, xy_s, by = "value")
# fig 1
ggplot(g_df2, aes(x, y)) +
geom_point(data=g_df2, aes(x, y, size=Vsize), alpha=0.4) +
geom_line(aes(color=Ecol, group = id)) +
geom_text(size = 4, aes(label = value), colour='blue')
# fig 2: Vsize and Ewidth legend are merged: transparency in geom_line is used so it will not overlap geom_point
ggplot(g_df2, aes(x, y)) +
geom_point(data=g_df2, aes(x, y, size=Vsize), alpha=0.4) +
geom_line(aes(color=Ecol, group = id, size=Ewidth), alpha=0.3) +
geom_text(size = 4, aes(label = value), colour='blue')

Resources