overlay lexis_grid with heatmap - r

I am trying to make a lexis_grid for a series of events for a synthetic cohorot of people aged 0:80 over the time period 1900-2021. What I'd like to get is something that looks a little like this:
Which I have taken from this article.
I have some dummy code created below:
library('dplyr')
library('LexisPlotR')
library('lubridate')
library('ggplot2')
df <- data.frame(
year <- sample(c(1900:2021), 1000, TRUE),
age <- sample(c(0:80), 1000, TRUE),
event <- sample(c(0:5), 1000, TRUE)
)
colnames(df) <- c("year", "age", "event")
mylexis <- lexis_grid(year_start = 1900,
year_end = 2021,
age_start = 0,
age_end = 80,
delta = 10
)
And I can create a heatmap in ggplot:
ggplot(df, aes(x = year, y = age, fill = event)) + geom_tile()
But I have been unsuccessful at combining them. These were my best guesses:
mylexis + geom_tile(df, mapping = aes(x = year(year), y = age, fill = event))
mylexis + ggplot(df, aes(x = year, y = age, fill = event)) + geom_tile()
Any advice on where to go from here?

One option would be to convert your year variable to a proper date:
library(ggplot2)
mylexis +
geom_tile(data = df, mapping = aes(x = as.Date(paste0(year, "-01-01")), y = age, fill = event))
EDIT A bit hacky but also a quick approach to change the order of the3 layers would be to manipulate the layers of the ggplot2 object like so, i.e. move the geom_tile (layer 3) to the first position (But I have to admit that at least for your example data the difference is hardly visible):
library(ggplot2)
p <- mylexis +
geom_tile(data = df, mapping = aes(x = as.Date(paste0(year, "-01-01")), y = age, fill = event))
p$layers <- p$layers[c(3, 1, 2)]
p

Related

How to group variable by quality number?

Here's a madeup dataset that demonstrates the general idea of what I'm working with.
Quality <- sample(1:4, 300, replace = TRUE)
reader_ID <- rep(1:3, each = 100)
df <- data.frame(Quality, reader_ID)
df
quality_percentage <- ggplot(df, aes(x = reader_ID, y = Quality, fill = Quality)) +
geom_bar(position="fill", stat="identity")
quality_percentage
Here is the graph it produced. I'm trying to have each quality grouped together instead of having them all separate.
You can simply sort your data frame by Quality before plotting:
ggplot(df[order(df$Quality),],
aes(x = reader_ID, y = Quality, fill = Quality)) +
geom_col(position = "fill")

How create a box plot + line plot in a single plot using ggplot2

I want to create a box plot + line plot in a single plot using ggplot2
This is what my code now:
library(ggplot2)
dat <- data.frame(day = c(0,0,0,0,0,0,10,10,10,10,10,10,14,14,14,14,14,14,21,21,21,21,21,21,28,28,28,28,28,28,35,35,35,35,35,35,42,42,42,42,42,42), group = c('Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP'), score = c(37.5,43,7,63,26,15,17,16,43,26,53,26,26,26,43,10,6,15,18,9,10,4,8,18,60,26,20,12.5,9,43,43,43,11,10,7,60,43,43,32,10.5,8,57.5))
g1 = ggplot(data = dat, aes(x = factor(day), y = score)) +
geom_boxplot(aes(fill = group))
g1
When doing box plot, I want scores of different treatments(groups) to be represented separately, so I let x = factor(day).
But for line plot, I want each day's score to be the average of the two treatments(group) of the day.
This is how my plot look like now
This is how I want my plot to look
How can I do this? Thank you so much!
#Libraries
library(tidyverse)
#Data
dat <- data.frame(day = c(0,0,0,0,0,0,10,10,10,10,10,10,14,14,14,14,14,14,21,21,21,21,21,21,28,28,28,28,28,28,35,35,35,35,35,35,42,42,42,42,42,42), group = c('Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP','Saline','RP','Saline','Saline','RP','RP'), score = c(37.5,43,7,63,26,15,17,16,43,26,53,26,26,26,43,10,6,15,18,9,10,4,8,18,60,26,20,12.5,9,43,43,43,11,10,7,60,43,43,32,10.5,8,57.5))
#How to
dat %>%
ggplot(aes(x = factor(day), y = score)) +
geom_boxplot(aes(fill = group))+
geom_line(
data = dat %>%
group_by(day) %>%
summarise(score = median(score,na.rm = TRUE)),
aes(group = 1),
size = 1,
col = "red"
)

plot multiple lines in ggplot

I need to plot hourly data for different days using ggplot, and here is my dataset:
The data consists of hourly observations, and I want to plot each day's observation into one separate line.
Here is my code
xbj1 = bj[c(1:24),c(1,6)]
xbj2 = bj[c(24:47),c(1,6)]
xbj3 = bj[c(48:71),c(1,6)]
ggplot()+
geom_line(data = xbj1,aes(x = Date, y= Value), colour="blue") +
geom_line(data = xbj2,aes(x = Date, y= Value), colour = "grey") +
geom_line(data = xbj3,aes(x = Date, y= Value), colour = "green") +
xlab('Hour') +
ylab('PM2.5')
Please advice on this.
I'll make some fake data (I won't try to transcribe yours) first:
set.seed(2)
x <- data.frame(
Date = rep(Sys.Date() + 0:1, each = 24),
# Year, Month, Day ... are not used here
Hour = rep(0:23, times = 2),
Value = sample(1e2, size = 48, replace = TRUE)
)
This is a straight-forward ggplot2 plot:
library(ggplot2)
ggplot(x) +
geom_line(aes(Hour, Value, color = as.factor(Date))) +
scale_color_discrete(name = "Date")
ggplot(x) +
geom_line(aes(Hour, Value)) +
facet_grid(Date ~ .)
I highly recommend you find good tutorials for ggplot2, such as http://www.cookbook-r.com/Graphs/. Others exist, many quite good.

R ggplot2 - Add a ribbon for only part of the x axis

Say I have two datasets. One that contains two months of data:
units_sold <- data.frame(date = seq(as.Date("2017-05-01"), as.Date("2017-07-01"), 1),
units = rep(20,62),
category = "units_sold")
And one that contains just a week:
forecast <- data.frame(date = seq(as.Date("2017-06-12"), as.Date("2017-06-18"), 1),
units = 5,
category = "forecast")
I can put them on the same plot. I.e.,
joined <- rbind(units_sold, forecast)
ggplot(data = joined, aes(x=date, y=units, colour = category)) + geom_line()
However, I can't seem to figure out how to put a ribbon between the two lines.
This is what I'm trying:
library(dplyr)
ribbon_dat <- left_join(forecast, units_sold, by = "date") %>%
rename(forecast = units.x) %>%
rename(units_sold = units.y) %>%
select(-c(category.x, category.y))
ggplot(data = joined, aes(x=date, y=units, colour = category)) +
geom_line() +
geom_ribbon(aes(x=ribbon_dat$date, ymin=ribbon_dat$forecast, ymax=ribbon_dat$units_sold))
I get this error: Error: Aesthetics must be either length 1 or the same as the data (69): x, ymin, ymax, y, colour
You are very close, you need to pass the second dataset to the data argument in geom_ribbon().
ggplot(data = joined, aes(x = date)) +
geom_line(aes(y = units, colour = category)) +
geom_ribbon(
data = ribbon_dat,
mapping = aes(ymin = forecast, ymax = units_sold)
)

Simple ?heatmap? of temperature in R (ggplot2)

I'm trying to make a simple Date * temperature heatmap (?raster graph?) that shows temperature over time based on binned temperature groups. Something like this but only along the date axis (no time variable. I'd prefer to use ggplot, but keep runnning astray. The graph the data produces is headed in the right direction, but I can't figure out how to get rid of the y-axis. I'd appreciate any help
dat <- data.frame(temp = sample(20,100, replace=TRUE), date=seq(as.Date("2011-07-01"), by=1, len=100))
p <- ggplot(dat, aes(date, temp)) + geom_tile(aes(fill = temp)) + scale_fill_gradient(low = "blue", high = "red")
Thanks!
So you don't want to map temp on the y axis?
Well then you could use a fixed value for y and remove the rest of the y-axis:
dat <- data.frame(temp = sample(20,100, replace=TRUE),
date=seq(as.Date("2011-07-01"), by=1, len=100))
require(ggplot2)
ggplot(dat, aes(x = date, y = 1)) +
geom_tile(aes(fill = temp)) +
scale_fill_gradient(low = "blue", high = "red") +
labs(y = NULL) +
scale_y_continuous(breaks = NULL)
You could also try doing something like the plot below with the metvurst package.
http://i.imgur.com/8Js1Uz7.png
dat <- data.frame(temp = sample(20,60, replace=TRUE),
date=seq(as.POSIXct("2011-01-01 00:00"), by=3600, len=8760))
dat$year <- as.numeric(format(dat$date,"%Y"))
dat$month <- as.numeric(format(dat$date,"%m"))
# Install and load metvurst library
install_github('metvurst', 'tim-salabim')
library(metvurst)
plot.air.temp <- strip(x = dat$temp,
date = dat$date,
cond = dat$year,
arrange = "long",
colour = colorRampPalette(rev(brewer.pal(11, "Spectral"))),
main = "Daily Air Temperatures\n\nTemperature [°C]")
plot.air.temp

Resources