I'm trying to produce a scatter plot with geom_point where the points are circumscribed by a smoothed polygon, with geom_polygon.
Here's my point data:
set.seed(1)
df <- data.frame(x=c(rnorm(30,-0.1,0.1),rnorm(30,0,0.1),rnorm(30,0.1,0.1)),y=c(rnorm(30,-1,0.1),rnorm(30,0,0.1),rnorm(30,1,0.1)),val=rnorm(90),cluster=c(rep(1,30),rep(2,30),rep(3,30)),stringsAsFactors=F)
I color each point according the an interval that df$val is in. Here's the interval data:
intervals.df <- data.frame(interval=c("(-3,-2]","(-2,-0.999]","(-0.999,0]","(0,1.96]","(1.96,3.91]","(3.91,5.87]","not expressed"),
start=c(-3,-2,-0.999,0,1.96,3.91,NA),end=c(-2,-0.999,0,1.96,3.91,5.87,NA),
col=c("#2f3b61","#436CE8","#E0E0FF","#7d4343","#C74747","#EBCCD6","#D3D3D3"),stringsAsFactors=F)
Assigning colors and intervals to the points:
df <- cbind(df,do.call(rbind,lapply(df$val,function(x){
if(is.na(x)){
return(data.frame(col=intervals.df$col[nrow(intervals.df)],interval=intervals.df$interval[nrow(intervals.df)],stringsAsFactors=F))
} else{
idx <- which(intervals.df$start <= x & intervals.df$end >= x)
return(data.frame(col=intervals.df$col[idx],interval=intervals.df$interval[idx],stringsAsFactors=F))
}
})))
Preparing the colors for the leged which will show each interval:
df$interval <- factor(df$interval,levels=intervals.df$interval)
colors <- intervals.df$col
names(colors) <- intervals.df$interval
Here's where I constructed the smoothed polygons (using a function courtesy of this link):
clusters <- sort(unique(df$cluster))
cluster.cols <- c("#ff00ff","#088163","#ccbfa5")
splinePolygon <- function(xy,vertices,k=3, ...)
{
# Assert: xy is an n by 2 matrix with n >= k.
# Wrap k vertices around each end.
n <- dim(xy)[1]
if (k >= 1) {
data <- rbind(xy[(n-k+1):n,], xy, xy[1:k, ])
} else {
data <- xy
}
# Spline the x and y coordinates.
data.spline <- spline(1:(n+2*k), data[,1], n=vertices, ...)
x <- data.spline$x
x1 <- data.spline$y
x2 <- spline(1:(n+2*k), data[,2], n=vertices, ...)$y
# Retain only the middle part.
cbind(x1, x2)[k < x & x <= n+k, ]
}
library(data.table)
hulls.df <- do.call(rbind,lapply(1:length(clusters),function(l){
dt <- data.table(df[which(df$cluster==clusters[l]),])
hull <- dt[, .SD[chull(x,y)]]
spline.hull <- splinePolygon(cbind(hull$x,hull$y),100)
return(data.frame(x=spline.hull[,1],y=spline.hull[,2],val=NA,cluster=clusters[l],col=cluster.cols[l],interval=NA,stringsAsFactors=F))
}))
hulls.df$cluster <- factor(hulls.df$cluster,levels=clusters)
And here's my ggplot command:
library(ggplot2)
p <- ggplot(df,aes(x=x,y=y,colour=interval))+geom_point(cex=2,shape=1,stroke=1)+labs(x="X", y="Y")+theme_bw()+theme(legend.key=element_blank(),panel.border=element_blank(),strip.background=element_blank())+scale_color_manual(drop=FALSE,values=colors,name="DE")
p <- p+geom_polygon(data=hulls.df,aes(x=x,y=y,group=cluster),color=hulls.df$col,fill=NA)
which produces:
My question is how do I add a legend for the polygon under the legend for the points? I want it to a legend with 3 lines colored according to the cluster colors and the corresponding cluster number beside each line?
Slightly different output, only changing the last line of your code, it may solve your purpose:
p+geom_polygon(data=hulls.df,aes(x=x,y=y,group=cluster, fill=cluster),alpha=0.1)
Say, you want to add a legend of the_factor. My basic idea is,
(1) put the_factor into mapping by using unused aes arguments; aes(xx = the_factor)
(2) if (1) affects something, delete the effect by using scale_xx_manual()
(3) modify the legend by using guides(xx = guide_legend(override.aes = list()))
In your case, aes(fill) and aes(alpha) are unused. The former is better to do it because of no effect. So I used aes(fill=as.factor(cluster)).
p <- ggplot(df,aes(x=x,y=y,colour=interval, fill=as.factor(cluster))) + # add aes(fill=...)
geom_point(cex=2, shape=1, stroke=1) +
labs(x="X", y="Y",fill="cluster") + # add fill="cluster"
theme_bw() + theme(legend.key=element_blank(),panel.border=element_blank(),strip.background=element_blank()) + scale_color_manual(drop=FALSE,values=colors,name="DE") +
guides(fill = guide_legend(override.aes = list(colour = cluster.cols, pch=0))) # add
p <- p+geom_polygon(data=hulls.df,aes(x=x,y=y,group=cluster), color=hulls.df$col,fill=NA)
Of course, you can make the same graph by using aes(alpha = the_factor)). Because it has influence, you need to control it by using scale_alpha_manual().
g <- ggplot(df, aes(x=x,y=y,colour=interval)) +
geom_point(cex=2, shape=1, stroke=1, aes(alpha=as.factor(cluster))) + # add aes(alpha)
labs(x="X", y="Y",alpha="cluster") + # add alpha="cluster"
theme_bw() + theme(legend.key=element_blank(),panel.border=element_blank(),strip.background=element_blank()) + scale_color_manual(drop=FALSE,values=colors,name="DE") +
scale_alpha_manual(values=c(1,1,1)) + # add
guides(alpha = guide_legend(override.aes = list(colour = cluster.cols, pch=0))) # add
g <- p+geom_polygon(data=hulls.df,aes(x=x,y=y,group=cluster), color=hulls.df$col,fill=NA)
What you are asking for is two colour scales. My understanding is that this is not possible. But you can give the impression of having two colour scales with a bit of a cheat and using the filled symbols (shapes 21 to 25).
p <- ggplot(df, aes(x = x, y = y, fill = interval)) +
geom_point(cex = 2, shape = 21, stroke = 1, colour = NA)+
labs(x = "X", y = "Y") +
theme_bw() +
theme(legend.key = element_blank(), panel.border = element_blank(), strip.background = element_blank()) +
scale_fill_manual(drop=FALSE, values=colors, name="DE") +
geom_polygon(data = hulls.df, aes(x = x, y = y, colour = cluster), fill = NA) +
scale_colour_manual(values = cluster.cols)
p
Alternatively, use a filled polygon with a low alpha
p <- ggplot(df,aes(x=x,y=y,colour=interval))+
geom_point(cex=2,shape=1,stroke=1)+
labs(x="X", y="Y")+
theme_bw() +
theme(legend.key = element_blank(),panel.border=element_blank(), strip.background=element_blank()) +
scale_color_manual(drop=FALSE,values=colors,name="DE", guide = guide_legend(override.aes = list(fill = NA))) +
geom_polygon(data=hulls.df,aes(x=x,y=y,group=cluster, fill = cluster), alpha = 0.2, show.legend = TRUE) +
scale_fill_manual(values = cluster.cols)
p
But this might make the point colours difficult to see.
Related
I am working with the R programming language. I made the following graph that shows a scatterplot between points of two different colors :
library(ggplot2)
a = rnorm(10000,10,10)
b = rnorm(10000, 10, 10)
c = as.factor("red")
data_1 = data.frame(a,b,c)
a = rnorm(10000,7,5)
b = rnorm(10000, 7, 5)
c = as.factor("blue")
data_2 = data.frame(a,b,c)
final = rbind(data_1, data_2)
my_plot = ggplot(final, aes(x=a, y=b, col = c)) + geom_point() + theme(legend.position="top") + ggtitle("My Plot")
My Question: Is there a way to "change the colors of overlapping points"?
Here is what I tried so far:
1) I found the following question (Visualizing two or more data points where they overlap (ggplot R)) and tried the strategy suggested:
linecolors <- c("#714C02", "#01587A", "#024E37")
fillcolors <- c("#9D6C06", "#077DAA", "#026D4E")
# partially transparent points by setting `alpha = 0.5`
ggplot(final, aes(a,b, colour = c, fill = c)) +
geom_point(alpha = 0.5) +
scale_color_manual(values=linecolors) +
scale_fill_manual(values=fillcolors) +
theme_bw()
This shows the two different colors along with the overlap, but it is quite dark and still not clear. Is there a way to pick better colors/resolutions for this?
2) I found the following link which shows how to make color gradients for continuous variables : https://drsimonj.svbtle.com/pretty-scatter-plots-with-ggplot2 - but I have discrete colors and I do not know how to apply this
3) I found this question over here (Any way to make plot points in scatterplot more transparent in R?) which shows to do this with the base R plot, but not with ggplot2:
addTrans <- function(color,trans)
{
# This function adds transparancy to a color.
# Define transparancy with an integer between 0 and 255
# 0 being fully transparant and 255 being fully visable
# Works with either color and trans a vector of equal length,
# or one of the two of length 1.
if (length(color)!=length(trans)&!any(c(length(color),length(trans))==1)) stop("Vector lengths not correct")
if (length(color)==1 & length(trans)>1) color <- rep(color,length(trans))
if (length(trans)==1 & length(color)>1) trans <- rep(trans,length(color))
num2hex <- function(x)
{
hex <- unlist(strsplit("0123456789ABCDEF",split=""))
return(paste(hex[(x-x%%16)/16+1],hex[x%%16+1],sep=""))
}
rgb <- rbind(col2rgb(color),trans)
res <- paste("#",apply(apply(rgb,2,num2hex),2,paste,collapse=""),sep="")
return(res)
}
cols <- sample(c("red","green","pink"),100,TRUE)
# Very transparant:
plot(final$a , final$b ,col=addTrans(cols,100),pch=16,cex=1)
But this is also not able to differentiate between the two color classes that I have.
Problem: Can someone please suggest how to fix the problem with overlapping points, such that the overlap appear more visible?
Thanks!
I would use a density heatmap
ggplot(final, aes(x=a, y=b, col = c))+
stat_density_2d(aes(fill = stat(density)), geom = 'raster', contour = FALSE) +
scale_fill_viridis_c() +
coord_cartesian(expand = FALSE) +
geom_point(shape = '.', col = 'white')
or
ggplot(final, aes(x=a, y=b, col = c))+
stat_density_2d(aes(fill = stat(level)), geom = 'polygon') +
scale_fill_viridis_c(name = "density") +
geom_point(shape = '.')
or
ggplot(final, aes(x=a, y=b, col = c))+
geom_point(alpha = 0.1) +
geom_rug(alpha = 0.01)
I want to plot the gradient plot of intensities, something like this:
I though myself about creating a gradient grid whose distribution was my "I" function, but I have no idea how to do it or if there is an explicit package in R to accomplish this task.
Thank you so much for even thinking about this.
a <- 5*10^(-6)
d <- 0.5*0.005
l <- 500*10^(-9)
n <- pi
theta <- seq(-n,n,length=3500)
I <- function(x){(cos((pi*d*sin(x))/l))^2*(sin((pi*a*sin(x))/l)/((pi*a*sin(x))/l))^2}
y1 <- lapply(theta,I)
y <- unlist(y1)
df <- data.frame(theta,y)
I2 <- function(x){(sin((pi*a*sin(x))/l)/((pi*a*sin(x))/l))^2}
y12 <- lapply(theta,I2)
y2 <- unlist(y12)
df2 <- data.frame(theta,y2)
p = ggplot()
p +
geom_line(data = df, aes(theta,y)) +
xlim(-0.3,0.3) +
geom_line(data = df2, aes(theta,y2))
Making use of patchwork this could be achieved like so:
For the gradient make a second ggplot of rectangles using e.g. geom_rect where you map intensity on color and/or fill
This gradient plot could then be glued to the main plot via patchwork
To get a nice gradient plot
I tripled the number of grid points for the gradient plot,
mapped the cubic root of intensity on color and
get rid of all unnecessary elemnts like y-axis, color guide, ...
BTW:
As your functions are vectorized you don't need lapply to compute the intensities.
Instead of adjusting the limits via xlim() (which removes rows falling outside of the range), set them using coord_cartesian.
library(ggplot2)
library(tibble)
library(patchwork)
a <- 5*10^(-6)
d <- 0.5*0.005
l <- 500*10^(-9)
n <- pi
theta <- seq(-n,n,length=3500)
I <- function(x){(cos((pi*d*sin(x))/l))^2*(sin((pi*a*sin(x))/l)/((pi*a*sin(x))/l))^2}
y <- I(theta)
df <- data.frame(theta,y)
I2 <- function(x){(sin((pi*a*sin(x))/l)/((pi*a*sin(x))/l))^2}
y2 <- I2(theta)
df2 <- data.frame(theta,y2)
p1 = ggplot() +
geom_line(data = df, aes(theta,y)) +
geom_line(data = df2, aes(theta,y2)) +
coord_cartesian(xlim = c(-0.3,0.3))
g <- tibble(
xmin = seq(-n, n, length = 3 * 3500),
xmax = dplyr::lead(xmin),
y = I(xmin)
)
p2 <- ggplot(g, aes(xmin = xmin, xmax = xmax, ymin = 0, ymax = 1, color = y^(1/3))) +
geom_rect() +
coord_cartesian(xlim = c(-0.3,0.3)) +
guides(color = FALSE) +
theme_minimal() +
theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())
p1 / p2 + plot_layout(heights = c(10, 1))
#> Warning: Removed 1 rows containing missing values (geom_rect).
I want to plot some point estimates with a couple of interval estimates around them, and then to superimpose the true point values using a different color and size, with a legend for the color.
I've tried lots of things. If I just use a new call to geom_point, I can't figure out how to add a legend. Therefore, my current approach resorts to stacking the data on top of itself, which is clumsy. Even then, the graph comes out wrong with big blue points for the True values, with the desired orange points on top of them.
I'd appreciate any help I can get.
nms <- c("2.5%","25%","50%","75%","97.5%","dose","truep")
a <- c(9.00614679684893e- 44,0.000123271800672435,0.0339603711049475,0.187721170170911,0.67452033450121,5,0.040752445325937)
b <- c(1.59502878028266e-25,0.00328588588499889,0.0738203422543555,0.25210200886225,0.714843425007051,10,0.0885844107052267)
cc <- c(1.41975723605948e-14,0.0184599181547097,0.118284929584256,0.311068595276067,0.74339745948793,15,0.141941915501108)
d <- c(0.0311851190805834,0.154722028150561,0.299318020818234,0.50887634580605,0.838779816278485,25,0.359181624981881)
e <- c(0.0529617924263383,0.289588386297245,0.566777817134668,0.883959271416755,0.999999999999317,40,0.680133380561602)
f <- c(0.0598904847882839,0.327655201251564,0.640100529843672,0.950060245074853,1,50,0.768120635812406)
g <- c(0.0641613025760661,0.355626055560067,0.686504841650593,0.978023943968809,1,60,0.823805809980712)
p <- as.data.frame(t(data.frame(a, b, cc, d, e, f, g)))
names(p) <- nms
# Faff duplicating data
p$truep <- 1.2 * p$truep
p2 <- p
p2[, 1:5] <- p$truep # truep is known, so there are no intervals
p3 <- rbind(p2, p)
p3$wh <- rep((c(2, 3)), each=nrow(p))
p3$col <- rep(c("orange", "blue"), each=nrow(p))
ggplot(p3, aes(dose, `50%`)) +
geom_point(aes(size=wh, color=col)) +
scale_size(range=c(5, 7), guide="none") +
scale_color_manual(name="", labels=c("Prior", "True"), values=c("blue", "orange")) +
geom_pointrange(aes(ymin=`2.5%`, ymax=`97.5%`, x=dose), color="blue") +
geom_pointrange(aes(ymin=`25%`, ymax=`75%`, x=dose), color="blue", size=2) +
geom_point(aes(dose, truep), color="orange") +
theme(axis.text.x=element_text(size=12), axis.title.x=element_text(size=14),
axis.text.y=element_text(size=12), axis.title.y=element_text(size=14),
legend.text=element_text(size=12))
R 3.3.1, ggplot2_2.1.1
Thanks,
Harry
I found a solution by splitting the dataset in two parts:
library(dplyr)
priors <- p%>%
mutate(datatype = 'Prior')
truevals <- p%>%
select(dose, truep)%>%
mutate(datatype = 'True')
ggplot(truevals, aes(x = dose, y = truep, colour = datatype))+
geom_pointrange(data = priors, aes(ymin=`25%`, ymax=`75%`, y = `50%`), size=1.5) +
geom_pointrange(data = priors, aes(ymin=`2.5%`, ymax=`97.5%`, y = `50%`))+
geom_point()+
scale_color_manual(name="", values=c("Prior" = "blue", "True" = "orange")) +
theme(axis.text.x=element_text(size=12), axis.title.x=element_text(size=14),
axis.text.y=element_text(size=12), axis.title.y=element_text(size=14),
legend.text=element_text(size=12))
First we plot the two pointranges based on the dataset with priors. Then the actual values. By adding a row with the datatype to both datasets we can add the legend. The result is this graph:
For the method ggplot2::geom_point() there is a show.legend attribute which is NA by default so setting this to TRUE should help.
You can add a legend using the labels attribute as follows:
ggplot2::scale_fill_manual(values = c("red", "black",
labels = c("Number of people",
"Number of birds"))
You are already doing this with labels=c("Prior", "True")
You can also change the look of the legend with:
ggplot2::theme(legend.position = "bottom",
legend.text = ggplot2::element_text(size = 22),
legend.box = "horizontal",
legend.key = ggplot2::element_blank())
I would like to create a colour blind test, similar to that below, using ggplot.
The basic idea is to use geom_hex (or perhaps a voronoi diagram, or possibly even circles as in the figure above) as the starting point, and define a dataframe that, when plotted in ggplot, produces the image.
We would start by creating a dataset, such as:
df <- data.frame(x = rnorm(10000), y = rnorm(10000))
then plot this:
ggplot(df, aes(x, y)) +
geom_hex() +
coord_equal() +
scale_fill_gradient(low = "red", high = "green", guide = FALSE) +
theme_void()
which gives the image below:
The main missing step is to create a dataset that actually plots a meaningful symbol (letter or number), and I'm not sure how best to go about this without painstakingly mapping the coordinates. Ideally one would be able to read in the coordinates perhaps from an image file.
Finally, a bit of tidying up could round the plot edges by removing the outlying points.
All suggestions are very welcome!
EDIT
Getting a little closer to what I'm after, we can use the image below of the letter 'e':
Using the imager package, we can read this in and convert it to a dataframe:
img <- imager::load.image("e.png")
df <- as.data.frame(img)
then plot that dataframe using geom_raster:
ggplot(df, aes(x, y)) +
geom_raster(aes(fill = value)) +
coord_equal() +
scale_y_continuous(trans = scales::reverse_trans()) +
scale_fill_gradient(low = "red", high = "green", guide = FALSE) +
theme_void()
If we use geom_hex instead of geom_raster, we can get the following plot:
ggplot(df %>% filter(value %in% 1), aes(x, y)) +
geom_hex() +
coord_equal() +
scale_y_continuous(trans = scales::reverse_trans()) +
scale_fill_gradient(low = "red", high = "green", guide = FALSE) +
theme_void()
so, getting there but clearly still a long way off...
Here's an approach for creating this plot:
Packages you need:
library(tidyverse)
library(packcircles)
Get image into a 2D matrix (x and y coordinates) of values. To do this, I downloaded the .png file of the e as "e.png" and saved in my working directory. Then some processing:
img <- png::readPNG("e.png")
# From http://stackoverflow.com/questions/16496210/rotate-a-matrix-in-r
rotate <- function(x) t(apply(x, 2, rev))
# Convert to one colour layer and rotate it to be in right direction
img <- rotate(img[,,1])
# Check that matrix makes sense:
image(img)
Next, create a whole lot of circles! I did this based on this post.
# Create random "circles"
# *** THESE VALUES WAY NEED ADJUSTING
ncircles <- 1200
offset <- 100
rmax <- 80
x_limits <- c(-offset, ncol(img) + offset)
y_limits <- c(-offset, nrow(img) + offset)
xyr <- data.frame(
x = runif(ncircles, min(x_limits), max(x_limits)),
y = runif(ncircles, min(y_limits), max(y_limits)),
r = rbeta(ncircles, 1, 10) * rmax)
# Find non-overlapping arrangement
res <- circleLayout(xyr, x_limits, y_limits, maxiter = 1000)
cat(res$niter, "iterations performed")
#> 1000 iterations performed
# Convert to data for plotting (just circles for now)
plot_d <- circlePlotData(res$layout)
# Check circle arrangement
ggplot(plot_d) +
geom_polygon(aes(x, y, group=id), colour = "white", fill = "skyblue") +
coord_fixed() +
theme_minimal()
Finally, interpolate the image pixel values for the centre of each circle. This will indicate whether a circle is centered over the shape or not. Add some noise to get variance in colour and plot.
# Get x,y positions of centre of each circle
circle_positions <- plot_d %>%
group_by(id) %>%
summarise(x = min(x) + (diff(range(x)) / 2),
y = min(y) + (diff(range(y)) / 2))
# Interpolate on original image to get z value for each circle
circle_positions <- circle_positions %>%
mutate(
z = fields::interp.surface(
list(x = seq(nrow(img)), y = seq(ncol(img)), z = img),
as.matrix(.[, c("x", "y")])),
z = ifelse(is.na(z), 1, round(z)) # 1 is the "empty" area shown earlier
)
# Add a little noise to the z values
set.seed(070516)
circle_positions <- circle_positions %>%
mutate(z = z + rnorm(n(), sd = .1))
# Bind z value to data for plotting and use as fill
plot_d %>%
left_join(select(circle_positions, id, z)) %>%
ggplot(aes(x, y, group = id, fill = z)) +
geom_polygon(colour = "white", show.legend = FALSE) +
scale_fill_gradient(low = "#008000", high = "#ff4040") +
coord_fixed() +
theme_void()
#> Joining, by = "id"
To get colours right, tweak them in scale_fill_gradient
I would like to have grouped boxplots which whiskers is defined by stat_summary. With help of changing-whisker-definition I wrote the following code:
# Data
xdf2 <- data.frame(month = rep(1:6,each=100)
, grp = rep(c('A','B'), 50*6)
)
xdf2$m <- rpois(n=nrow(xdf2),10)
# Definition of whiskers
f <- function(x) {
r <- quantile(x, probs = c(0.10, 0.25, 0.5, 0.75, 0.90))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
# Add points outside of whiskers
o <- function(x) {
subset(x, x < quantile(x,probs=0.1) | quantile(x,probs=0.9) < x)
}
# Plot
ggplot(data = xdf2
, aes(factor(month),m, color=grp)
) +
stat_summary(fun.data = f
, geom="boxplot"
, position=position_dodge(width=1)
, size=1
) +
stat_summary(fun.y = o, geom="point", position=position_dodge(width=1)) +
scale_color_manual(values = c("gray30","darkgrey"),labels = c("AAA","BBB")) +
theme_bw()
which gives the following graphs:
There are some changes I would like to perform:
How can I change the width of the boxes?
How can I fill the boxes with the same color of the border?
I would be happy for any help. Thanks a lot.
Map fill aesthetic to grp and add a similar scale for it. I'm using slightly different colours to make the mean visible.
To change boxplot widths, use ggsave with various width parameters, boxplots will be adjusted automatically. If you would like to add some space in between, you'll have to cheat a bit, see below.
It is not easy to modify width in conjunction with stat_summary: though there is a width parameter for geom_bar and geom_boxplot, I couldn't make it work properly with stat_summary. Instead, I'm using some dirty tricks with scale_x.
K <- length(unique(xdf2$month))
lev <- seq_len(1 + 2 * K)
xdf2$month2 <- factor(2 * xdf2$month,
levels = lev)
ggplot(data = xdf2, aes(month2, m, color = grp, fill = grp)) +
stat_summary(fun.data = f, geom="boxplot",
position=position_dodge(width=1.5), size=1) +
stat_summary(fun.y = o, geom="point", position=position_dodge(width=1.5)) +
scale_color_manual(values = c("gray30","darkgrey"),labels = c("AAA","BBB")) +
scale_fill_manual(values = c("gray20","grey75"),labels = c("AAA","BBB")) +
theme_bw() +
scale_x_discrete(limits = lev, breaks = 1:K*2, labels = 1:K)
Play around width in position_dodge for additional adjustment.