I use an example from here. My question is how can I add a specific bounding box to this heatmap, such as add a red line box to the top left four tiles?
require(ggplot2)
require(reshape)
require(scales)
mydf <- data.frame(industry = c('all industries','steel','cars'),
'all regions' = c(250,150,100), americas = c(150,90,60),
europe = c(150,60,40), check.names = FALSE)
mydf
mymelt <- melt(mydf, id.var = c('industry'))
mymelt
ggplot(mymelt, aes(x = industry, y = variable, fill = value)) +
geom_tile() + geom_text(aes(fill = mymelt$value, label = mymelt$value))
A quick and dirty (some hard-coding) possibility is to use geom_rect, where the positions are given by the numerical values of the levels of x and y variables to be bound with a box, plus/minus an offset.
ggplot(mymelt, aes(x = industry, y = variable, fill = value, label = value)) +
geom_tile() +
geom_text() +
geom_rect(aes(xmin = 1 - 0.5, xmax = 2 + 0.5, ymin = 2 - 0.5, ymax = 3 + 0.5),
fill = "transparent", color = "red", size = 1.5)
A less hard-coded version:
# convert x and y variables to factors
ind <- as.factor(mymelt$industry)
vars <- as.factor(mymelt$variable)
# numeric version of the levels to be bound by a box
xmin <- unique(as.numeric(ind[ind == "all industries"]))
xmax <- unique(as.numeric(ind[ind == "cars"]))
ymin <- unique(as.numeric(vars[vars == "americas"]))
ymax <- unique(as.numeric(vars[vars == "europe"]))
# set offset
offset <- 0.5
ggplot(mymelt, aes(x = industry, y = variable, fill = value, label = value)) +
geom_tile() +
geom_text() +
geom_rect(aes(xmin = xmin - offset,
xmax = xmax + offset,
ymin = ymin - offset,
ymax = ymax + offset),
fill = "transparent", color = "red", size = 1.5)
Related
I want to shade part of the background in each facet of a simple plot. If I omit faceting and run geom_rect + geom_point, the expected results appear as shown in the MRE below. If I omit the rectangle and run geom_point + facet_grid, the expected 4 panels have each point in the correct facet. But when I combine geom_rect + geom_point + and facet_grid, the points in the first category and only those get plotted in every facet. What is going on please???
library(ggplot2)
set.seed(42)
syn.dat <- data.frame(
category.1 = as.factor(rep(c("1A", "1B"), each = 8)),
category.2 = as.factor(rep(rep(c("2A", "2B"), times = 2), each = 4)),
x = rep(-1:2, each = 4) + runif(8, max = .4),
y = rep(-1:2, each = 4) + runif(8, max = .4))
ggplot() +
geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = .5,
ymax = Inf), fill = "lightyellow") +
geom_point(data = syn.dat, aes(x = x, y = y)) +
facet_grid(cols = vars(category.1),
rows = vars(category.2))
I'm not totally sure about this, but it may be that you need to explicitly provide the data argument to ggplot itself, in order for facet_grid to correctly pick up all the values?
ggplot(syn.dat) +
geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = 0.5, ymax = Inf), fill = "lightyellow") +
geom_point(aes(x = x, y = y)) +
facet_grid(rows = vars(category.2), vars(cols = category.1))
I have been trying to build a bar chart for GDP growth in UK and overlay it with a recession bands. I can do what is necessary with the bar plot but the moment I overlay with the recession bands, i get an error that a variable cannot be found.
uk.recessions.df <- read.table(textConnection(
"Peak, Trough
1857-06-01, 1858-12-01
1867-06-01, 1869-12-01
1873-10-01, 1879-03-01
1882-03-01, 1885-05-01
1887-03-01, 1888-04-01
1890-07-01, 1891-05-01
1893-01-01, 1894-06-01
1895-12-01, 1897-06-01
1919-03-01, 1921-07-01
1930-01-01, 1931-12-01
1956-04-01, 1956-08-01
1961-07-01, 1962-01-01
1973-09-01, 1974-04-01
1975-04-01, 1975-10-01
1980-01-01, 1981-04-01
1990-07-01, 1991-09-01
2008-04-01, 2009-07-01
2020-01-01, 2020-07-01"), sep=',',
colClasses=c('Date', 'Date'), header=TRUE)
uk.recessions.trim.df <- subset(uk.recessions.df, Peak >= min(tbl.QQGDP$Date))
tbl.data <- tbl.QQGDP %>%
mutate(Value = GDPGrowth < 0)
p <- ggplot(data = tbl.data, aes(x = Date, y = GDPGrowth, fill = Value)) +
geom_col(position = "identity", colour = "black", size = 0.25) +
scale_fill_manual(values = c("#85225f","#dbab01"), guide = FALSE) +
theme_tq()
p <- p +
geom_rect(data = uk.recessions.trim.df,
aes(xmin = Peak, xmax = Trough, ymin = -Inf, ymax = Inf),
fill = "grey", alpha = 0.5)
p
The error i get is
Error in FUN(X[[i]], ...) : object 'GDPGrowth' not found
I am cannot figure out what i am doing wrong. Any help (even if to tell me off for a silly mistake!!) will be greatly appreciated.
By default, geom_*() elements inherit the aesthetic mappings from the top level of the plot (ggplot()). In your case, the geom_rect() call is inheriting aes(x = Date, y = GDPGrowth, fill = Value) but can't find those objects as you have a different data source (uk.recessions.trim.df instead of tbl.data).
If you add the option inherit.aes = FALSE to geom_rect() you'll get the desired plot.
p <- ggplot(data = tbl.data, aes(x = Date, y = GDPGrowth, fill = Value)) +
geom_col(position = "identity", colour = "black", size = 0.25) +
scale_fill_manual(values = c("#85225f","#dbab01"), guide = FALSE)
p <- p +
geom_rect(data = uk.recessions.trim.df,
aes(xmin = Peak, xmax = Trough, ymin = -Inf, ymax = Inf),
fill = "grey", alpha = 0.5,
inherit.aes = FALSE)
p
An alternative (and probably better method) is to define data and aes in each geom separately, instead of in the initial ggplot() call. Eg:
p <- ggplot() +
geom_col(data = tbl.data,
aes(x = Date, y = GDPGrowth, fill = Value),
position = "identity", colour = "black", size = 0.25) +
scale_fill_manual(values = c("#85225f","#dbab01"), guide = FALSE)
p <- p +
geom_rect(data = uk.recessions.trim.df,
aes(xmin = Peak, xmax = Trough, ymin = -Inf, ymax = Inf),
fill = "grey", alpha = 0.5)
p
This is my dataset example:
df <- data.frame(group = rep(c("group1","group2","group3", "group4", "group5", "group6"), each=3),
X = paste(letters[1:18]),
Y = c(1:18))
As you can see, there are three variables, two of them categorical (group and X). I have constructed a line chart using ggplot2 where the X axis is X and Y axis is Y.
I want to shade the background using the group variable, so that 6 different colors must appear.
I tried this code:
ggplot(df, aes(x = X, y = Y)) +
geom_rect(xmin = 0, xmax = 3, ymin = -0.5, ymax = Inf,
fill = 'blue', alpha = 0.05) +
geom_point(size = 2.5)
But geom_rect() only colorize the area between 0 and 3, in the X axis.
I guess I can do it manually by replicating the the geom_rect() so many times as groups I have. But I am sure there must be a more beautiful code using the variable itself. Any idea?
To get shading for the entire graph, geom_rect needs the xmin and xmax locations for all the rectangles, so these need to be provided by mapping xmin and xmax to columns in the data, rather than hard-coding them.
ggplot(df, aes(x = X, y = Y)) +
geom_rect(aes(xmin = X, xmax = dplyr::lead(X), ymin = -0.5, ymax = Inf, fill = group),
alpha = 0.5) +
geom_point(size = 2.5) +
theme_classic()
Here is one way:
df2 <- df %>% mutate(Xn=as.numeric(X))
ggplot(df2) +
geom_rect(aes(xmin=Xn-.5, xmax=Xn+.5, ymin=-Inf, ymax=Inf, fill = group), alpha=0.5, stat="identity") +
geom_point(aes(x = Xn, y = Y), size = 2.5) + scale_x_continuous(breaks=df2$Xn, labels=df2$X)
This will get you close - need to add a couple columns to your data frame. Using dplyr here.
df <- df %>%
group_by(group) %>%
mutate(xmin = sort(X)[1],
xmax = sort(X, decreasing = T)[1])
ggplot(df, aes(x = X, y = Y)) +
geom_point(size = 2.5) +
geom_rect(aes(xmin=xmin, xmax = xmax, fill = group), ymin = -0.5, ymax = Inf,
alpha = 0.05)
I have a data frame in this kind of format:
df <- data.frame(
time = rep(seq(from = as.POSIXct("2016-08-10 11:00:00"),
to = as.POSIXct("2016-08-10 12:00:00"), by="sec"), 2),
value = c(diffinv(rnorm(3601)), diff(rnorm(3601))),
facets = c(rep("A",3601), rep("B", 3601)),
shading = rep(c(rep("x", 1500), rep("y", 750), rep("z", 1351)), 2),
stringsAsFactors = FALSE
)
I can plot the value time series on separate graphs sharing the x-axis using ggplot2's facet_grid function. I also want to include another dimension in my plot - the variable shading to shade the background.
I know I can do this by specifying the ranges of the x-axis the shaded regions will cover:
xRange1 <- range(df$time[df$shading=="x"])
xRange2 <- range(df$time[df$shading=="y"])
xRange3 <- range(df$time[df$shading=="z"])
yRange <- range(df$value)
When I first set this up I include alpha in each of my geom_rect
ggplot(df, aes(x = time, y = value)) +
geom_line() +
facet_grid(facets ~ ., scales = "free_y") +
geom_rect(aes(xmin = xRange1[1], xmax = xRange1[2]),
ymin = yRange[1], ymax = yRange[2],
alpha = 0.3, fill = "#EEF2BF") +
geom_rect(aes(xmin = xRange2[1], xmax = xRange2[2]),
ymin = yRange[1], ymax = yRange[2],
alpha = 0.3, fill = "#A3BAB6",) +
geom_rect(aes(xmin = xRange3[1], xmax = xRange3[2]),
ymin = yRange[1], ymax = yRange[2],
alpha = 0.3, fill = "#BFA67E")
Obviously the alpha didn't work.
One way to get around this is to put geom_line() at the end:
ggplot(df, aes(x = time, y = value)) +
facet_grid(facets ~ ., scales = "free_y") +
geom_rect(aes(xmin = xRange1[1], xmax = xRange1[2]),
ymin = yRange[1], ymax = yRange[2],
alpha = 0.3, fill = "#EEF2BF") +
geom_rect(aes(xmin = xRange2[1], xmax = xRange2[2]),
ymin = yRange[1], ymax = yRange[2],
alpha = 0.3, fill = "#A3BAB6",) +
geom_rect(aes(xmin = xRange3[1], xmax = xRange3[2]),
ymin = yRange[1], ymax = yRange[2],
alpha = 0.3, fill = "#BFA67E") +
geom_line()
But that hides the grid and doesn't solve the underlying problem.
I have looked at several posts and none of them address this directly. I have looked at using other functions in my plot including scale_fill_manual
(last example on page) and scale_alpha
Edit: I suspect the best solution also involves setting up the geom_rect in a less manual way. My actual data frame has more than 3 character values I want to shade with.
I use an example from here. My question is how can I add a specific bounding box to this heatmap, such as add a red line box to the top left four tiles?
require(ggplot2)
require(reshape)
require(scales)
mydf <- data.frame(industry = c('all industries','steel','cars'),
'all regions' = c(250,150,100), americas = c(150,90,60),
europe = c(150,60,40), check.names = FALSE)
mydf
mymelt <- melt(mydf, id.var = c('industry'))
mymelt
ggplot(mymelt, aes(x = industry, y = variable, fill = value)) +
geom_tile() + geom_text(aes(fill = mymelt$value, label = mymelt$value))
A quick and dirty (some hard-coding) possibility is to use geom_rect, where the positions are given by the numerical values of the levels of x and y variables to be bound with a box, plus/minus an offset.
ggplot(mymelt, aes(x = industry, y = variable, fill = value, label = value)) +
geom_tile() +
geom_text() +
geom_rect(aes(xmin = 1 - 0.5, xmax = 2 + 0.5, ymin = 2 - 0.5, ymax = 3 + 0.5),
fill = "transparent", color = "red", size = 1.5)
A less hard-coded version:
# convert x and y variables to factors
ind <- as.factor(mymelt$industry)
vars <- as.factor(mymelt$variable)
# numeric version of the levels to be bound by a box
xmin <- unique(as.numeric(ind[ind == "all industries"]))
xmax <- unique(as.numeric(ind[ind == "cars"]))
ymin <- unique(as.numeric(vars[vars == "americas"]))
ymax <- unique(as.numeric(vars[vars == "europe"]))
# set offset
offset <- 0.5
ggplot(mymelt, aes(x = industry, y = variable, fill = value, label = value)) +
geom_tile() +
geom_text() +
geom_rect(aes(xmin = xmin - offset,
xmax = xmax + offset,
ymin = ymin - offset,
ymax = ymax + offset),
fill = "transparent", color = "red", size = 1.5)