I'd love to make a timeline density graph like the New York Times does for COVID-19 cases (screenshot below). I am trying to do it with crime data instead of COVID data. Any ideas on how to use R (ggplot2, plotly, etc) to make a graph like this that shows density by day?
So far, I haven't found a similar style in the R-graph Gallery. Thanks.
update: Here is the closest approximation I have come up with so far:
dat_c_grp <- dat_c %>%
group_by(report_date, month) %>%
summarize(count = n())
p <- ggplot(dat_c_grp, aes(report_date, month, fill = count))+
geom_tile(color= "white",size=0.1) +
scale_fill_viridis(name="Daily",option ="C")
output:
p
I'd like the months collapsed the months into one row. I can't figure out how to make it all one row.
Here's a full reprex of one way to do this which emulates the look of the original fairly well (obviously I've had to make the data up):
library(ggplot2)
set.seed(1)
Dates <- rep(seq(as.Date("2020-03-01"), by = "1 week", length.out = 36), 3)
Places <- rep(c("Conneticut", "Fairfield", "New Haven"), each = 36)
Cases <- as.numeric(replicate(3, rpois(36, dgamma((1:36)/3, 2.5) * 100))) +
as.numeric(replicate(3, rpois(36, 0.0002 * exp(1:36)^(1/3))))
df <- data.frame(Dates, Places, Cases)
ggplot(df, aes(Dates, Places, fill = Cases)) +
geom_tile(color = "gray92") +
facet_grid(Places~., scales = "free_y") +
scale_fill_gradientn(colors = c("#f3df8e", "#fdad45", "#ff700a", "#cc0a06")) +
theme_minimal() +
scale_x_date(date_breaks = "month", labels = scales::date_format("%b")) +
theme(panel.spacing = unit(50, "points"),
legend.position = "top",
axis.title.y = element_blank(),
strip.text = element_blank(),
panel.grid = element_blank())
Thanks to both Allan Cameron and Konrad Rudolph.
Here is the code to answer my question:
dat_c_grp <- dat_c %>%
count(report_date, month, name = 'count')
p <- ggplot(dat_c_grp, aes(report_date, 0, fill = count))+
geom_tile(color= "white",size=0.1) +
scale_fill_gradientn(colors = c("#f3df8e", "#fdad45", "#ff700a", "#cc0a06"))+
removeGrid()+
theme(panel.spacing = unit(50, "points"),
legend.position = "top",
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y=element_blank(),
strip.text = element_blank(),
panel.grid = element_blank())
p
And output:
Related
I have a sample data frame as follows:
set.seed(87)
df = data.frame(
sample = rep(c("PB","PB","BM","BM"),8),
status = rep(c("healthy","myeloma"),16),
family = c(rep("CD4",16),rep("CD8",16)),
phenotype = rep(c("Tn","Tn","Tn","Tn","Tcm","Tcm","Tcm","Tcm","Tem","Tem","Tem","Tem","Temra","Temra","Temra","Temra"),2),
percent = sample(20:30,32,replace=T)
)
I want to plot the data in pie chart format also with faceting in both the x and y axis formatted like this nice diagram I found online:
However I only know how to facet by grouping the two variables (CD4/CD8 and healthy/myeloma) together first with the following code which can't give me the nice separation of the x axis variables into two rows:
df %>%
mutate(group = paste(family,status)) %>%
ggplot(aes(x = "",y=percent,fill = phenotype)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start=0) +
facet_grid(sample~group) +
theme(axis.text.x=element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank())
Appreciate any ideas on how to make the whole diagram look more like the one online, thanks!
Maybe this is closer -
library(dplyr)
library(ggplot2)
df %>%
mutate(group = paste(family,status)) %>%
ggplot(aes(x = "",y=percent,fill = phenotype)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start=0) +
facet_grid(sample~group) +
theme_classic() +
theme(axis.text.x=element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size = 15, face = 'bold'),
legend.position="bottom")
I need to make a graph (using ggplot2) that organizes bars by the value of different categories. If you see below, I was able to sort by my first level ("Very Important"), but I have not been able to get the second level ("Important) to get organized correctly - example: 'Passion for farming' should end up above 'Cultivating a healthy workplace'.
library(ggplot2)
library(dplyr)
library(forcats)
library(reshape)
data<- data.frame(Category=c("Open attitude","Flexible","Interest in learning","Passion for farming",
"Dependable business networks","Community ties", "Reliable crew",
"Family support", "Responsive government", "Protect natural resources and biodiversity",
"Build healthy soil", "Diversify farm products", "Minimize external inputs",
"Water-use efficiency", "Effective planning and monitoring", "Cultivating a healthy workplace",
"Diversifying markets and venues", "Focusing on recurrent customers",
"Appropriate equipment and infrastructure", "Financial leeway and capacity"),
Very.important=c(78.57,85.71,85.71,92.86,100.00,85.71,78.57,64.29,50.00,57.14,
100.00,64.29,57.14,57.14,78.57,92.86,71.43,71.43,64.29,71.43),
Important=c(21.43,14.29,14.29,7.14,0.00,14.29,21.43,35.71,21.43,35.71,
0.00,35.71,28.57,35.71,21.43,0.00,14.29,7.14,28.57,21.43),
Slightly.important=c(0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,28.57,7.14,
0.00,0.00,14.29,7.14,0.00,7.14,14.29,21.43,7.14,7.14),
Not.important=c(0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,
0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00))
data
alldata <- melt(data)
First, to organize the variables in a new data.frame, I am using fct_relevel() from forcats package. However, even though I am adding 'Important' as a second level in the arrange function, it is not being recognized. The graph turns out the same as if I only include 'Very Important' in the function.
alldata1 = alldata %>%
ungroup() %>%
arrange(fct_relevel(variable, "Very.important"), value) %>%
mutate(Category= fct_inorder(Category))
I am including my code for the graph for your reference.
mycolors <- c('#0570b0','#74a9cf','#bdc9e1','#f1eef6')
ALLres <- ggplot(data = alldata1, aes(x =Category, y = value, fill = variable)) +
labs(y="Percentage", x = "") +
geom_col(width = 0.7, position = position_stack(reverse = T)) +
coord_flip() +
theme_bw() +
theme(text = element_text(size = rel(3), colour = "black"), # x-label
axis.text.y = element_text(size = rel(3.5), colour = "black"),
axis.text.x = element_text(size = rel(3), colour = "black")) +
theme(legend.text = element_text(size = rel(3))) + #legend size
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(legend.position="bottom") +
scale_fill_manual (values=mycolors,
name = "Response",
labels = c("Very Important", "Important",
"Slightly Important", "Not Important"))
ALLres
Thank you in advance!
You can arrange the data first on Very.important and then Important and assign the factor levels of Category column.
library(tidyverse)
mycolors <- rev(c('#0570b0','#74a9cf','#bdc9e1','#f1eef6'))
data %>%
arrange(Very.important, Important) %>%
mutate(Category = factor(Category, Category)) %>%
pivot_longer(cols = -Category) %>%
ggplot(aes(x=Category, y = value, fill = name)) +
labs(y="Percentage", x = "") +
geom_col(width = 0.7) +
coord_flip() +
theme_bw() +
theme(text = element_text(size = rel(3), colour = "black"), # x-label
axis.text.y = element_text(size = rel(3.5), colour = "black"),
axis.text.x = element_text(size = rel(3), colour = "black")) +
theme(legend.text = element_text(size = rel(3))) + #legend size
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(legend.position="bottom") +
scale_fill_manual (values=mycolors,
name = "Response")
I am using geom_tile to visualize random draws
Generate data:
set.seed(1)
df= crossing(sim=1:10,part= 1:10)
df$result = sample(c(1,0),size = nrow(df), replace=T)
df = df %>%
group_by(sim)%>%
# find out how many successful (1) pilots there were in the first 4 participants
summarize(good_pilots = sum(result[1:4])) %>%
arrange(good_pilots) %>%
ungroup() %>%
# add this back into full dataframe
full_join(df)
# plot data
plot = ggplot(df, aes( y=factor(sim), x=part)) +
geom_tile(aes(fill = factor(result)), colour = "black",
show.legend = T)+
scale_fill_manual(values=c("lightgrey", "darkblue"))+# c(0,1)
theme(panel.border = element_rect(size = 2),
plot.title = element_text(size = rel(1.2)),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
legend.title = element_blank(),
legend.position = "right")+ theme_classic()+ coord_fixed(ratio=1)
This results in:
What I actually want is the y axis to be ordered by the # of blue (ie 1's) in the first four columns of the block (which is calculated in good_pilots).
I tried scale_y_discrete but that cannot be what is intended:
plot + scale_y_discrete(limits=df$sim[order(df$good_pilots)])
resulting in:
From what I can tell it seems like the ordering worked correctly, but using scale_y_discrete caused the plot to be messed up.
You can use reorder here
ggplot(df, aes(y = reorder(sim, good_pilots), x = part)) +
...
This question already has answers here:
Multirow axis labels with nested grouping variables
(7 answers)
Closed 6 years ago.
variable <- c("PM10","SO2","NO","NO2","PM10","SO2","NO","NO2","PM10","SO2","NO","NO2","PM10","SO2","NO","NO2","PM10","SO2","NO","NO2","PM10","SO2","NO","NO2")
sex <- c("male","male","male","male","female","female","female","female",
"male","male","male","male","female","female","female","female",
"male","male","male","male","female","female","female","female")
exposureperiod <- c("P1","P1","P1","P1","P1","P1","P1","P1",
"P2","P2","P2","P2","P2","P2","P2","P2",
"P3","P3","P3","P3","P3","P3","P3","P3")
set.seed(100)
coef <- runif(24, -2, 2)
coef_lb <- coef - 0.3
coef_ub <- coef + 0.3
df <- data.frame(variable,sex,exposureperiod,coef,coef_lb,coef_ub)
df$variable <- factor(df$variable,levels=c("PM10","SO2","NO","NO2"))
levels(df$variable) <- c("PM[10]","SO[2]", "NO", "NO[2]")
df$exposureperiod <- factor(df$exposureperiod,levels=c("P1","P2","P3"))
df$sex <- factor(df$sex,levels=c("male","female"))
df <- df[order(df$variable,df$sex),]
df$aux <- c(1,2,3,
5,6,7,
11,12,13,
15,16,17,
21,22,23,
25,26,27,
31,32,33,
35,36,37)
library(ggplot2)
plot <- ggplot(data = df, aes(x = aux, y = coef)) +
geom_pointrange(aes(ymin=coef_lb,ymax=coef_ub),shape="none") +
geom_point(aes(shape = exposureperiod)) +
scale_shape_discrete(name ="Exposure period",
breaks=c("P1", "P2","P3"),
labels=c("P1","P2","P3")) +
scale_x_continuous("Sex and Pollutant",breaks=c(2,6,12,16,22,26,32,36),
labels=c("Boys","Girls","Boys","Girls","Boys","Girls","Boys","Girls")) +
scale_y_continuous("Mean Difference in Tanner Stage",
limits=c(-3, 3),
breaks=round(seq(-3, 3, by = 0.5),1)) +
geom_hline(yintercept=0,alpha=1,linetype="dashed") +
theme(axis.text.x = element_text()) +
theme_bw(base_size = 16,base_family="Arial") +
theme(legend.text.align = 0,
legend.title = element_text(face="plain"),
legend.key = element_blank(),
legend.position = "bottom") +
guides(shape= guide_legend(nrow = 3,byrow = TRUE)) +
theme(text = element_text(colour = "black",face="plain"),
axis.title.y = element_text(face="plain"),
axis.title.x = element_text(face="plain"),
axis.text.x = element_text(face="plain",hjust = 0),
axis.text.y = element_text(face="plain")) +
theme(panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black"))+
theme(axis.ticks = element_line(size = 1))
plot
With the above script, I got the graph as below.
But I want to add another level of x-axis, which indicate the PM10, SO2, NO, and NO2, like the below graph. (To illustrate, I added those pollutants manually.) And of course, the x-axis title and legend should move down accordingly.
I used facet before, but I want to avoid the gap between pollutants generate by facet.
Thank you.
You could try faceting the plot
plot <- plot + facet_wrap(~variable)
This is currently my code for the figure above
ggplot(AllData, aes(Year, AGResiduals, fill=Type)) +
geom_boxplot(outlier.size=0) +
scale_fill_manual(values=c("skyblue4", "skyblue"),
name="Male Type",
labels=c("Guarders","Sneakers")) +
labs(x=NULL, y = "Residual of Accessory Gland Mass x Total Mass") +
scale_x_discrete(limits=c("2007","2008","2010","2011","2013","2014","2015"),
labels=str_wrap(c("2007 (nG=37, nS=8)","2008 (nG=4, nS=6)","2010 (nG=31, nS=6)","2011 (nG=55, nS=5)","2013 (nG=202, nS=24)","2014 (nG=63)","2015 (nG=59, nS=3)"),
width=6)) +
theme(plot.title = element_text(size = rel(1.4)),
axis.title = element_text(size = rel(1.2)),
axis.text.x = element_text(size = rel(1.5)),
axis.text.y = element_text(size = rel(1.5)),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"))
I want to make the "G" and "S"'s in each x-axis tick labels subscript (they designate sample size for two different groups, G and S).
Writing
expression(2007 (n[G]=37, n[S]=8)
works, but only if I remove the preceding
str_wrap
code for some reason.
I need to constrain the width of the text for each x-axis tick label, so I need to retain str_wrap or use line breaks within the expression function somehow.
I also can't replace my list of labels with a factor since I have to set limits on the years I want to show.
Can someone please help on how to make a 3-line x-axis tick label that allows for subscript?
I couldn't find a way to display expressions on multiple lines, but you could try rotating the labels:
library(stringr)
library(ggplot2)
library(scales)
library(dplyr)
n <- 100
y <- as.character(sample(2007:2015,n,replace=T))
t <- sample(c("Guarders","Guarders","Sneakers"),n,replace=T)
r <- rnorm(n,10,20)
nsk <- sum(t=="Sneakers")
r[ t=="Sneakers" ] <- rnorm(nsk,1,5)
AllData <- data.frame(Year=y,AGResiduals=r,Type=t)
sdf <- AllData %>% group_by( Year ) %>%
summarize( n=n(), ng=sum(Type=="Guarders") )
fmts <- rep("%s (n[G]==%d) ~~ (n[S]==%d)",nrow(sdf))
labs2 <- do.call(sprintf,list(fmts,sdf$Year,sdf$ng, sdf$n-sdf$ng ) )
ex2 <- parse(text=labs2)
ggplot(AllData, aes(Year, AGResiduals, fill=Type)) +
geom_boxplot(outlier.size=0) +
scale_fill_manual(values=c("skyblue4", "skyblue"),
name="Male Type",
labels=c("Guarders","Sneakers")) +
labs(x=NULL, y = "Residual of Accessory Gland Mass x Total Mass") +
scale_x_discrete(limits=c("2007","2008","2010","2011","2013","2014","2015"),
labels=ex2) +
theme(plot.title = element_text(size = rel(1.4)),
axis.title = element_text(size = rel(1.2)),
axis.text.x = element_text(size = rel(1.0),angle=-30,hjust=0),
axis.text.y = element_text(size = rel(1.5)),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"))
Yields this:
Rawr made a suggestion that allows you to get two, but not three lines. Since it doesn't require rotation, I am entering it as a second solution.
This:
library(stringr)
library(ggplot2)
library(scales)
library(dplyr)
set.seed(23456)
n <- 100
y <- as.character(sample(2007:2015,n,replace=T))
t <- sample(c("Guarders","Guarders","Sneakers"),n,replace=T)
r <- rnorm(n,10,20)
nsk <- sum(t=="Sneakers")
r[ t=="Sneakers" ] <- rnorm(nsk,1,5)
AllData <- data.frame(Year=y,AGResiduals=r,Type=t)
sdf <- AllData %>% group_by( Year ) %>%
summarize( n=n(), ng=sum(Type=="Guarders") )
fmts <- rep("atop(%s, n[G]==%d ~~ n[S]==%d)",nrow(sdf)) # two rows
labs2 <- do.call(sprintf,list(fmts,sdf$Year,sdf$ng, sdf$n-sdf$ng ) )
ex2 <- parse(text=labs2)
ggplot(AllData, aes(Year, AGResiduals, fill=Type)) +
geom_boxplot(outlier.size=0) +
scale_fill_manual(values=c("skyblue4", "skyblue"),
name="Male Type",
labels=c("Guarders","Sneakers")) +
labs(x=NULL, y = "Residual of Accessory Gland Mass x Total Mass") +
scale_x_discrete(limits=c("2007","2008","2010","2011","2013","2014","2015"),
labels=ex2) +
theme(plot.title = element_text(size = rel(1.4)),
axis.title = element_text(size = rel(1.2)),
axis.text.x = element_text(size = rel(1.0),angle=0,hjust=0),
axis.text.y = element_text(size = rel(1.5)),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"))
yields this: