Adjust hexbin legend breaks - r

In this example of a hexbin plot, the legend on the right has 10 levels/classes/breaks. Does anyone know how to change the number of levels? Say I want to change it to 5 or something.
library(hexbin)
x=rnorm(1000, mean = 50, sd = 1)
y=rnorm(1000, mean = 30, sd = 0.5)
df <- data.frame(x,y)
#plot(df)
hb <- hexbin(x=df$x, df$y)
#hb <- hexbin(x=df$x, df$y,xbins=30)
#plot(hb)
gplot.hexbin(hb)

Like this?
gplot.hexbin(hb,colorcut=5)
And here's approximately the same thing using ggplot.
library(ggplot2)
ggplot(df, aes(x,y))+
geom_hex(aes(fill=cut(..value..,breaks=pretty(..value..,n=5))),bins=15)+
scale_fill_manual("Count",values=grey((5:0)/6))

Related

Can I draw a horizontal line at specific number of range of values using ggplot2?

I have data (from excel) with the y-axis as ranges (also calculated in excel) and the x-axis as cell counts and I would like to draw a horizontal line at a specific value in the range, like a reference line. I tried using geom_hline(yintercept = 450) but I am sure it is quite naive and does not work that way for a number in range. I wonder if there are any better suggestions for it :)
plot.new()
library(ggplot2)
d <- read.delim("C:/Users/35389/Desktop/R.txt", sep = "\t")
head(d)
d <- cbind(row.names(d), data.frame(d), row.names=NULL)
d
g <- ggplot(d, aes(d$CTRL,d$Bin.range))+ geom_col()
g + geom_hline(yintercept = 450)
First of all, have a look at my comments.
Second, this is how I suggest you to proceed: don't calculate those ranges on Excel. Let ggplot do it for you.
Say, your data is like this:
df <- data.frame(x = runif(100, 0, 500))
head(df)
#> x
#>1 322.76123
#>2 57.46708
#>3 223.31943
#>4 498.91870
#>5 155.05416
#>6 107.27830
Then you can make a plot like this:
library(ggplot2)
ggplot(df) +
geom_histogram(aes(x = x),
boundary = 0,
binwidth = 50,
fill = "steelblue",
colour = "white") +
geom_vline(xintercept = 450, colour = "red", linetype = 2, size = 1) +
coord_flip()
We don't have your data, but the following data frame is of a similar structure:
d <- data.frame(CTRL = sample(100, 10),
Bin.range = paste(0:9 * 50, 0:9 * 50 + 49.9, sep = "-"))
The first thing to note is that your y axis does not have your ranges ordered correctly. You have 50-99.9 at the top of the y axis. This is because your ranges are stored as characters and ggplot will automatically arrange these alphabetically, not numerically. So you need to reorder the factor levels of your ranges:
d$Bin.range <- factor(d$Bin.range, d$Bin.range)
When you create your plot, don't use d$Bin.range, but instead just use Bin.range. ggplot knows to look for this variable in the data frame you have passed.
g <- ggplot(d, aes(CTRL, Bin.range)) + geom_col()
If you want to draw a horizontal line, your two options are to specify the y axis label at which you want to draw the line (i.e. yintercept = "400-449.9") or, which is what I suspect you want, use a numeric value of 9.5 which will put it between the top two values:
g + geom_hline(yintercept = 9.5, linetype = 2)

quarter circles ggplot-s with nonexisting points

I try to use ggplot to plot quarted circles to visualize contour plots but I get misconfigured plot using geom_area (following this tutorial on stacked area with ggplot2)
The code I tried reads
library(ggplot2)
library(dplyr)
N <- 1E2
r <- rev(c(1,2,4,7))
maxXY = max(r)+.25*max(r)
grupp <- c("0","0.25","0.5","0.75")
datalist = list()
plot(0,0,xlim=c(0,maxXY),ylim=c(0,maxXY))
for (i in 1:length(r)) {
quadX <- seq(from = 0,to = r[i],length.out = N) # calculate x coords
quadY <- sqrt(r[i]^2 - quadX^2) # calculate y coords
lines(quadX,quadY)
# data for ggplot
dat <- data.frame(X = quadX, Y = quadY)
dat$group <- grupp[i]
datalist[[i]] <- dat # add it to your list
}
DF = do.call(rbind, datalist)
# stacked area chart
p1 <- ggplot(DF, aes(x=X, y=Y, fill=group)) +
geom_area(alpha=0.6 , size=1, colour="black")
plot(p1)
and I get quarter circles plotted correctly with basic plot
but a weird one with geom_area
Any help would be very appreciated. MJS
EDIT: using Z.Lin's suggestions I get the correct plot, thanks!

ggplot2 geom_tile: how to have no spacing between lines when plotting non-continuous data

I'm trying to do a levelplot using ggplot2 for a (meteorological) variable. The variable is measured continuously in time (my x-axis), but in non-continuous heights (y-axis) at every time step.
The produced plot therefore shows data at the heights (y-coordinates) specified, but nothing in between.
Here's an example:
library(ggplot2)
data <- runif(400, min=0, max=10)
index <- c(1:20)
heights <- c(1,2,3,4,5,7,9,12,15,19,23,28,33,39,45,52,59,67,75,83)
dat <- as.data.frame(cbind(expand.grid(X=index,Y=heights),data))
ggplot(dat, aes(x=dat[,1], y=dat[,2],z=dat[,3])) +geom_tile(aes(fill=dat[,3]))
This produces the following plot:
Is there an easy way to fill the plot fully, i.e. make the lines in the upper part of the plot broader?
Thank you!
OK one more solution.. you could interpolate using the approx function. Although maybe 2D kriging would be more appropriate for your application???
library(purrr)
dat2<- dat %>%
split(.$X) %>%
map_dfr(~ approx(.$Y, .$data, xout =1:83), .id = "X")
ggplot(dat2, aes(x=as.integer(dat2$X), y=dat2$x, z=dat2$y)) +geom_tile(aes(fill=dat2$y))
That will give you :
You can use the height and width attributes in geom_tile, alternatively geom_rect
library(tidyverse)
data <- runif(400, min=0, max=10)
index <- c(1:20)
heights <- c(1,2,3,4,5,7,9,12,15,19,23,28,33,39,45,52,59,67,75,83)
dat <- crossing(index = index, heights = heights) %>%
mutate(
Z = data,
index0 = index - 1) %>%
left_join(data_frame(heights, heights0 = c(0, heights[-length(heights)])))
ggplot(dat, aes(xmin = index0, xmax = index, ymin = heights0, ymax = heights, fill = Z)) +
geom_rect()
This assumes that your heights are the top of each level and that they start at zero.
You could convert the y axis to be factor in order to eliminate the dead space. This will, however, not make the upper lines broader.
ggplot(dat, aes(x=dat[,1], y=factor(dat[,2]),z=dat[,3])) +geom_tile(aes(fill=dat[,3]))

colored points in R

I have a table with 3 numeric columnes. Two of them are coordinates and the third one means color. There are hundreds of rows in my text file.
I want to make a picture, where to first numbers mean coordinates of each point and the third one is the color of the point. The bigger number - the darker point.
How could i do this?
The example of the row in my file:
99.421875 48.921875 0.000362286050144
Will this do?
require(ggplot2)
# assuming your data is in df and x,y, and col are the column names.
ggplot(data = df, aes(x = x, y = y)) +
geom_point(colour="red", size = 3, aes(alpha=col))
# sample data
set.seed(45)
df <- data.frame(x=runif(100)*sample(1:10, 100, replace=T),
y= runif(100*sample(1:50, 100, replace=T)),
col=runif(100/sample(1:100)))
Plot:
A lattice solution:
library(lattice)
mydata <- matrix(c(1,2,3,1,1,1,2,5,10),nrow=3)
xyplot(mydata[,2] ~ mydata[,1], col = mydata[,3], pch= 19 ,
alpha = (mydata[,3]/10), cex = 15)
alpha here controls the transparency.
Here is a base R solution:
##Generate data
##Here z lies between 0 and 10
dd = data.frame(x = runif(100), y= runif(100), z= runif(100, 0, 10))
First normalise z:
dd$z = dd$z- min(dd$z)
dd$z = dd$z/max(dd$z)
Then plot as normal using the size of z for the shading:
##See ?gray for other colour combinations
##pch=19 gives solid points. See ?point for other shapes
plot(dd$x, dd$y, col=gray(dd$z), pch=19)
Another solution using base... to change the colour, you can replace some of data[,3] to 0 inside the rgb()
n <- 1000
data <- data.frame(x=runif(n),y=runif(n),col=runif(n))
plot(data[,1:2],col=rgb(data[,3],data[,3],data[,3],maxColorValue = max(data[,3])),pch=20)

How to change color of scatterplot in ggplot2

In ggplot2, how could I change the color of coloring in scatter plot?
Here's a small dataset:
dat <- data.frame(x=1:20,
y=rnorm(20,0,10),
v=20:1)
Suppose I want my points colored using the value v. I can change the way in which the coloring is performed using the scale_colour_gradient() function.
library(ggplot2)
qplot(x,y,data=dat,colour=color,size=4) +
scale_colour_gradient(low="black", high="white")
This example should just get you started. For more, check out the scale_brewer() mentioned in the other post.
check out the ggplot documentation for scale_brewer http://www.had.co.nz/ggplot2/scale_brewer.html
some examples:
#see available pallets:
library(RColorBrewer)
display.brewer.all(5)
#scatter plot
dsamp <- diamonds[sample(nrow(diamonds), 1000), ]
d <- qplot(carat, price, data=dsamp, colour=clarity)
dev.new()
d
dev.new()
d + scale_colour_brewer(palette="Set1")
dev.new()
d + scale_colour_brewer(palette="Blues")
If your data have discrete categories that you wish to colour, then your task is a bit easier. For example, if your data look like this, with each row representing a transaction,
> d <- data.frame(customer = sample(letters[1:5], size = 20, replace = TRUE),
> sales = rnorm(20, 8000, 2000),
> profit = rnorm(20, 40, 15))
> head(d,6)
customer sales profit
a 8414.617 15.33714
a 8759.878 61.54778
e 8737.289 56.85504
d 9516.348 24.60046
c 8693.642 67.23576
e 7291.325 26.12234
and you want to make a scatter plot of transactions coloured by customer, then you can do this
p <- ggplot(d, aes(sales,profit))
p + geom_point(aes(colour = customer))
to get....

Resources