How to increase the hexbin legend in ggplot - r

I have the following hexbin plot:
I would like the count to start from the lowest possible count, for example 10, and show that with different colour. Note that the lowest count differs with different datasets. Therefore, it is difficult to set it to a specific number. The script that I have written to generate the plot is:
d <- ggplot(selectedDF, aes(BEC_Agg, AC)) + geom_hex(bins = 30) + theme_bw() +
theme(text = element_text(face = "bold", size = 16)) + xlab("\nNormalized BEC") + ylab("AC\n") + scale_fill_gradientn(colors = brewer.pal(3,"Dark2"))
I tried the solution here:
d <- ggplot(selectedDF, aes(BEC_Agg, AC)) + geom_hex(aes(fill=cut(..value..,breaks=pretty(..value..,n=5))),bins = 30) + theme_bw() +
theme(text = element_text(face = "bold", size = 16)) + xlab("\nNormalized BEC") + ylab("AC\n") + scale_fill_gradientn(colors = brewer.pal(3,"Dark2"))
But I got the following error:
Error in cut(value, breaks = pretty(value, n = 5)) :
object 'value' not found
How can I fix that?

You should define the variable value before running ggplot. Since lowest count differs among datasets, you might want to try something like value <- min(count(yourDF)).

Since your focus is tweaking the legend, here is a method. A sample data is generated as you didn't provide any.
# sample dataframe
set.seed(77)
x=rnorm(1000, mean = 4, sd = 1)
y=rnorm(1000, mean = 2, sd = 0.5)
df <- data.frame(x,y)
# -------------------------------------------------------------------------
# The following is from your script
base <- ggplot(df, aes(x, y)) + geom_hex(bins = 30) + theme_bw() +
theme(text = element_text(face = "bold", size = 16)) + xlab("\nNormalized BEC") + ylab("AC\n")
# -------------------------------------------------------------------------
base_limit_break <- base + scale_fill_continuous(limits = c(1,20), breaks = c(1:20))
# -------------------------------------------------------------------------
# This is the part relevant to your question
base_limit_break + guides(fill = guide_colorbar(barheight = unit(10, "cm"), reverse = TRUE))
Output

Related

How to remove zig-zag pattern in marginal distribution plot of integer values in R?

I am including marginal distribution plots on a scatterplot of a continuous and integer variable. However, in the integer variable maringal distribution plot (y-axis) there is this zig-zag pattern that shows up because the y-values are all integers. Is there any way to increase the "width" (not sure that's the right term) of the bins/values the function calculates the distribution density over?
The goal is to get rid of that zig-zag pattern that develops because the y-values are integers.
library(GlmSimulatoR)
library(ggplot2)
library(patchwork)
### Create right-skewed dataset that has one continous variable and one integer variable
set.seed(123)
df1 <- data.frame(matrix(ncol = 2, nrow = 1000))
x <- c("int","cont")
colnames(df1) <- x
df1$int <- round(rgamma(1000, shape = 1, scale = 1),0)
df1$cont <- round(rgamma(1000, shape = 1, scale = 1),1)
p1 <- ggplot(data = df1, aes(x = cont, y = int)) +
geom_point(shape = 21, size = 2, color = "black", fill = "black", stroke = 1, alpha = 0.4) +
xlab("Continuous Value") +
ylab("Integer Value") +
theme_bw() +
theme(panel.grid = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"))
dens1 <- ggplot(df1, aes(x = cont)) +
geom_density(alpha = 0.4) +
theme_void() +
theme(legend.position = "none")
dens2 <- ggplot(df1, aes(x = int)) +
geom_density(alpha = 0.4) +
theme_void() +
theme(legend.position = "none") +
coord_flip()
dens1 + plot_spacer() + p1 + dens2 +
plot_layout(ncol = 2, nrow = 2, widths = c(6,1), heights = c(1,6))
From ?geom_density:
adjust: A multiplicate [sic] bandwidth adjustment. This makes it possible
to adjust the bandwidth while still using the a bandwidth
estimator. For example, ‘adjust = 1/2’ means use half of the
default bandwidth.
So as a start try e.g. geom_density(..., adjust = 2) (bandwidth twice as wide as default) and go from there.

R Windrose percent label on figure

I am using the windrose function posted here: Wind rose with ggplot (R)?
I need to have the percents on the figure showing on the individual lines (rather than on the left side), but so far I have not been able to figure out how. (see figure below for depiction of goal)
Here is the code that makes the figure:
p.windrose <- ggplot(data = data,
aes(x = dir.binned,y = (..count..)/sum(..count..),
fill = spd.binned)) +
geom_bar()+
scale_y_continuous(breaks = ybreaks.prct,labels=percent)+
ylab("")+
scale_x_discrete(drop = FALSE,
labels = waiver()) +
xlab("")+
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE)+
theme_bw(base_size = 12, base_family = "Helvetica")
I marked up the figure I have so far with what I am trying to do! It'd be neat if the labels either auto-picked the location with the least wind in that direction, or if it had a tag for the placement so that it could be changed.
I tried using geom_text, but I get an error saying that "aesthetics must be valid data columns".
Thanks for your help!
One of the things you could do is to make an extra data.frame that you use for the labels. Since the data isn't available from your question, I'll illustrate with mock data below:
library(ggplot2)
# Mock data
df <- data.frame(
x = 1:360,
y = runif(360, 0, 0.20)
)
labels <- data.frame(
x = 90,
y = scales::extended_breaks()(range(df$y))
)
ggplot(data = df,
aes(x = as.factor(x), y = y)) +
geom_point() +
geom_text(data = labels,
aes(label = scales::percent(y, 1))) +
scale_x_discrete(breaks = seq(0, 1, length.out = 9) * 360) +
coord_polar() +
theme(axis.ticks.y = element_blank(), # Disables default y-axis
axis.text.y = element_blank())
#teunbrand answer got me very close! I wanted to add the code I used to get everything just right in case anyone in the future has a similar problem.
# Create the labels:
x_location <- pi # x location of the labels
# Get the percentage
T_data <- data %>%
dplyr::group_by(dir.binned) %>%
dplyr::summarise(count= n()) %>%
dplyr::mutate(y = count/sum(count))
labels <- data.frame(x = x_location,
y = scales::extended_breaks()(range(T_data$y)))
# Create figure
p.windrose <- ggplot() +
geom_bar(data = data,
aes(x = dir.binned, y = (..count..)/sum(..count..),
fill = spd.binned))+
geom_text(data = labels,
aes(x=x, y=y, label = scales::percent(y, 1))) +
scale_y_continuous(breaks = waiver(),labels=NULL)+
scale_x_discrete(drop = FALSE,
labels = waiver()) +
ylab("")+xlab("")+
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE)+
theme_bw(base_size = 12, base_family = "Helvetica") +
theme(axis.ticks.y = element_blank(), # Disables default y-axis
axis.text.y = element_blank())

No legend shows up using multiple geom_point and geom_line functions in 1 graph

I'm struggling to learn the ins and outs of R, ggplot2, etc - being more used to being taught in an A to Z manner an entire (fixed) coding language (not used to open source - I learned to code when dinosaurs roamed the earth). So I have kluged together the following code to create one graph. Only ... I don't have the dupe legends problem -- I have no legend a'tall!
erc <- ggplot(usedcarval, aes(x = usedcarval$age)) +
geom_line(aes(y = usedcarval$dealer), colour = "orange", size = .5) +
geom_point(aes(y = usedcarval$dealer),
show.legend = TRUE, colour = "orange", size = 1) +
geom_line(aes(y = usedcarval$pvtsell), colour = "green", size = .5) +
geom_point(aes(y = usedcarval$pvtsell), colour = "green", size = 1) +
geom_line(aes(y = usedcarval$tradein), colour = "blue", size = .5) +
geom_point(aes(y = usedcarval$tradein), colour = "blue", size = 1) +
geom_line(aes(y = as.integer(predvalt)), colour = "gray", size = 1) +
geom_line(aes(y = as.integer(predvalp)), colour = "gray", size = 1) +
geom_line(aes(y = as.integer(predvald)), colour = "gray", size = 1) +
labs(x = "Value of a Used Car as it Ages (Years)", y = "Dollars") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle = 60, vjust = .6))
erc
I can't figure out how to put an image in this text since I have no link except to my dropbox...
I would appreciate any help. Sincerely, Stephanie
Ok, I felt like doing some ggplot, and it was an interesting task to contrast the way ggplot-beginners (I was one not so long ago) approach it compared to the way you need to do it to get things like legends.
Here is the code:
library(ggplot2)
library(gridExtra)
library(tidyr)
# fake up some data
n <- 100
dealer <- 12000 + rnorm(n,0,100)
age <- 10 + rnorm(n,3)
pvtsell <- 10000 + rnorm(n,0,300)
tradein <- 5000 + rnorm(n,0,100)
predvalt <- 6000 + rnorm(n,0,120)
predvalp <- 7000 + rnorm(n,0,100)
predvald <- 8000 + rnorm(n,0,100)
usedcarval <- data.frame(dealer=dealer,age=age,pvtsell=pvtsell,tradein=tradein,
predvalt=predvalt,predvalp=predvalp,predvald=predvald)
# The ggplot-naive way
erc <- ggplot(usedcarval, aes(x = usedcarval$age)) +
geom_line(aes(y = usedcarval$dealer), colour = "orange", size = .5) +
geom_point(aes(y = usedcarval$dealer),
show.legend = TRUE, colour = "orange", size = 1) +
geom_line(aes(y = usedcarval$pvtsell), colour = "green", size = .5) +
geom_point(aes(y = usedcarval$pvtsell), colour = "green", size = 1) +
geom_line(aes(y = usedcarval$tradein), colour = "blue", size = .5) +
geom_point(aes(y = usedcarval$tradein), colour = "blue", size = 1) +
geom_line(aes(y = as.integer(predvalt)), colour = "gray", size = 1) +
geom_line(aes(y = as.integer(predvalp)), colour = "gray", size = 1) +
geom_line(aes(y = as.integer(predvald)), colour = "gray", size = 1) +
labs(x = "ggplot naive way - Value of a Used Car as it Ages (Years)", y = "Dollars") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle = 60, vjust = .6))
# The tidyverse way
# ggplot needs long data, not wide data.
# Also we have two different sets of data for points and lines
gdf <- usedcarval %>% gather(series,value,-age)
pdf <- gdf %>% filter( series %in% c("dealer","pvtsell","tradein"))
# our color and size lookup tables
clrs = c("dealer"="orange","pvtsell"="green","tradein"="blue","predvalt"="gray","predvalp"="gray","predvald"="gray")
szes = c("dealer"=0.5,"pvtsell"=0.0,"tradein"=0.5,"predvalt"=1,"predvalp"=1,"predvald"=1)
trc <- ggplot(gdf,aes(x=age)) + geom_line(aes(y=value,color=series,size=series)) +
scale_color_manual(values=clrs) +
scale_size_manual(values=szes) +
geom_point(data=pdf,aes(x=age,y=value,color=series),size=1) +
labs(x = "tidyverse way - Value of a Used Car as it Ages (Years)", y = "Dollars") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle = 60, vjust = .6))
grid.arrange(erc, trc, ncol=1)
Study it, espeically look at gdf,pdf and gather. You just can't get legends without using "long data".
If you want more information on the "tidyverse", start here: Hadley Wickham's tidyverse
If you are looking for a short example of how to take some series data that comes in wide format, convert it to long format (using gather), and then plot it with a ggplot (with a legend), here is a nice short example I cooked up for someone recently:
library(ggplot2)
library(tidyr)
# womp up some fake news (uhh... data)
x <- seq(-pi,pi,by=0.25)
y <- sin(x)
yhat <- sin(x) + 0.4*rnorm(length(x))
# This is the data in wide form
# you will never get ggplot to make a legend for it
# it simply hates wide data
df1 <- data.frame(x=x,y=y,yhat=yhat)
# So we use gather from tidyr to make it into long data
# creates two new colums, throws y and yhat in them, and replicates x as needed
# you have to look at the data frame to understand gather,
# and read the docs a few times
df2 <- gather(df1,series,value,-x)
# it is now in long form and we can plot it
ggplot(df2) + geom_line(aes(x,value,color=series))
So here is the plot:

R highlight a point on a line

Here is my code that produces a plot. You can run it:
library(ggplot2)
library(grid)
time <- c(87,87.5, 88,87,87.5,88)
value <- c(10.25,10.12,9.9,8,7,6)
variable <-c("a","a","a","b","b","b")
PointSize <-c(5,5,5,5,5,5)
ShapeType <-c(10,10,10,10,10,10)
stacked <- data.frame(time, value, variable, PointSize, ShapeType)
stacked$PointSize <- ifelse(stacked$time==88, 8, 5)
stacked$ShapeType <- ifelse(stacked$time==88, 16,10)
MyPlot <- ggplot(stacked, aes(x=time, y=value, colour=variable, group=variable)) + geom_line() + xlab("Strike") + geom_point(aes(shape = ShapeType, size = PointSize)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.text = element_text(size = 10), axis.title=element_text(size=14), plot.title = element_text(size = rel(2)) , legend.position = "bottom", legend.text = element_text(size = 10), legend.key.size = unit(1, "cm") ) + scale_shape_identity(guide="none")+scale_size_identity(guide="none")
MyPlot
The plot that is produced highlight the point on the line where the time = 88.
I want to also highlight the point on the the line where the time = 87.925
Is this possible? The thing is that I do not have corresponding value for that time. IS there a way to just find put the point on the lines where time = 87.925 or does some interpolation need to take place so I can get a a value for that time?
Thank you!
You can use ggplot_build to pull out an interpolated value for each line . . .
## create a fake ggplot to smooth your values using a linear fit ##
tmp.plot <- ggplot(stacked, aes(x = time, y = value, colour = variable)) + stat_smooth(method="lm")
## use ggplot_build to pull out the smoothing values ##
tmp.dat <- ggplot_build(tmp.plot)$data[[1]]
## find the x values closest to 87.925 for each variable ##
tmp.ids <- which(abs(tmp.dat$x - 87.925)<0.001)
## store the x and y values for each variable ##
new.points <- tmp.dat[tmp.ids,2:3]
## create a data frame with the new points ##
newpts <- data.frame(new.points,c("a","b"),c(8,8),c(16,16))
names(newpts) <- c("time","value","variable","PointSize","ShapeType")
## add the new points to your original data frame ##
stacked <- rbind(stacked,newpts)
## plot ##
MyPlot
Instead of using a point for highlighting the 87.925 value for time, you can also use a vertical line:
ggplot(stacked, aes(x=time, y=value, colour=variable, group=variable)) +
geom_line() +
geom_point(aes(shape = ShapeType, size = PointSize)) +
geom_vline(aes(xintercept=87.925)) +
xlab("Strike") +
theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.text = element_text(size = 10),
axis.title=element_text(size=14), plot.title = element_text(size = rel(2)), legend.position = "bottom",
legend.text = element_text(size = 10), legend.key.size = unit(1, "cm")) +
scale_shape_identity(guide="none") +
scale_size_identity(guide="none")
the result:
Update: you can add short lines with geom_segment. Replace geom_vline with
geom_segment(aes(x = 87.925, y = 6, xend = 87.925, yend = 6.3), color="black") +
geom_segment(aes(x = 87.925, y = 9.8, xend = 87.925, yend = 10.05), color="black") +
which results in:

ggplot2: A mean row in heatmaps

Say I created a heatmap using the function geom_raster() (from ggplot2).
What's a smart way to add a row at the bottom of the table showing (in my case) the 'Mean return' for each month on the period considered ?
It would be nice there is some space left between the 1985-2013 period and the row for the average, and maybe police color and 'cases' could be customized.
The core of my code is as follows (the object molten contains the my data, originally a matrix passed through the melt() function of reshape2.
hm <- ggplot(data = molten, aes(x = factor(Var2, levels = month.abb), y=Var1, fillll=value)) + geom_raster()
hm <- hm + scale_fill_gradient2(low=LtoM(100), mid=Mid, high=MtoH(100))
hm <- hm + labs(fill='% Return')
hm <- hm + geom_text(aes(label=paste(sprintf("%.1f %%", value))), size = 4)
hm <- hm + scale_y_continuous(breaks = 1985:2013)
hm <- hm + xlab(label = NULL) + ylab(label = NULL)
hm <- hm + theme_bw()
hm <- hm + theme(axis.text.x = element_text(size = 10, hjust = 0, vjust = 0.4, angle=90))
It's not very concise, but I think this should do what you need.
You didn't provide a data set, so I just made some up. Also, the LtoM and MtoH functions are not included in any R package I could find, so I did a quick Google search and found them here
The following code produces a plot hm2 with facets to make the "Mean Return" row at the bottom:
require(reshape2)
require(ggplot2)
# Random data
set.seed(100)
casted = data.frame(Var1 = rep(1985:2013, times=12), Var2 = rep(month.abb, each=29), return = rnorm(12*29, 0, 9))
molten = melt(casted, id.vars = c("Var1", "Var2"))
LtoM <-colorRampPalette(c('red', 'yellow' ))
Mid <- "snow3"
MtoH <-colorRampPalette(c('lightgreen', 'darkgreen'))
# Averages
monthly.avg = cbind(Var1 = rep("Mean", 12), dcast(molten, Var2 ~ ., mean))
colnames(monthly.avg)[3] = "Mean"
molten2 = merge(molten, melt(monthly.avg), all.x = TRUE, all.y = TRUE)
# New plot
hm2 =
ggplot(data = molten2, aes(x = factor(Var2, levels = month.abb), y=Var1, fill=value)) +
geom_raster() +
scale_fill_gradient2(low=LtoM(100), mid=Mid, high=MtoH(100)) +
labs(fill='% Return') +
geom_text(aes(label=paste(sprintf("%.1f %%", value))), size = 4) +
xlab(label = NULL) + ylab(label = NULL) +
theme_bw() +
theme(axis.text.x = element_text(size = 10, hjust = 0, vjust = 0.4, angle=90)) +
facet_grid(variable ~ ., scales = "free_y", space = "free_y") + # grid layout
theme(strip.background = element_rect(colour = 'NA', fill = 'NA'), strip.text.y = element_text(colour = 'white')) # remove facet labels
which gives the following plot:
How about this:
I created a grid to mock up your data
Main changes, are to precalculate the aggregate and "spacer" data rows, and add to molten,
then add scale_y_discrete so you can label the rows,
then make sure the format works for the grey spacer bar with no % label (comments in code)
Easier in future if you include the data (or a sample) in the question
require(ggplot2)
molten<-expand.grid(c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"),1985:2013,0)
colnames(molten)<-c("Var2","Var1","value")
molten$value=(runif(nrow(molten))*60)-30
#create means
means<-aggregate(molten[,c(1,3)], by=list(molten$Var2),FUN=mean, na.rm=TRUE)
colnames(means)<-c("Var2","Var1","value")
means$Var1<-"MEANS"
#create spacer bar
spacer<-means
spacer$Var1<-" "
spacer$value<-NA
#append them to the data
molten<-rbind(molten,spacer,means)
hm <- ggplot(data = molten, aes(x = Var2, y=Var1, fill=value)) +
geom_raster() +
# replaced your functions for ease of use
scale_fill_gradient2(low="red", mid="yellow", high="green",na.value="grey") +
labs(fill='% Return') +
# don't format the NA vals with %, return blank
geom_text(aes(label=ifelse((is.na(value)),"",paste(sprintf("%.1f %%", value)))), size = 4) +
# make the scale discrete to add labels and enforce order (use a blank space for the spacer)
scale_y_discrete(limits = c("MEANS"," ",1985:2013)) +
xlab(label = NULL) + ylab(label = NULL) +
theme_bw() +
theme(axis.text.x = element_text(size = 10, hjust = 0, vjust = 0.4, angle=90))
hm

Resources