Facet by year, with overlapping data - r

I'm trying to visualize places where I've been over the last few years.
See the code below: I want to facet g1 by year (g2). Do I need to add year-end / year-beginning points to the data frame, or am I missing something obvious?
I've tried setting group = 1, but I'm not entirely sure what it's attempting to do.
My vision is for the final output to look something like this:
Or alternatively, like this, with some tweaking with mapping the "status" column to geom_line
Also, please point out any bad habits / inefficient code.
library("reshape2")
library("ggplot2")
library("scales")
travel <- structure(list(place = c("Brighton", "Madrid", "Home", "Berlin",
"Geneva", "Home", "New York & Canada", "Home", "Isle of Wight",
"Home", "Copenhagen", "Home"), day.out = c("2009-09-06", "2012-07-23",
"2012-07-27", "2012-11-21", "2012-11-23", "2012-11-26", "2013-04-04",
"2013-04-15", "2013-08-26", "2013-08-29", "2014-03-14", "2014-03-17"
), day.back = c("2012-07-22", "2012-07-26", "2012-11-20", "2012-11-22",
"2012-11-25", "2013-04-03", "2013-04-14", "2013-08-25", "2013-08-28",
"2014-03-13", "2014-03-16", "2014-03-30"), status = c("Live",
"Travel", "Live", "Travel", "Travel", "Live", "Travel", "Live",
"Travel", "Live", "Travel", "Live")), .Names = c("place", "day.out",
"day.back", "status"), class = "data.frame", row.names = c(NA,
-12L))
travel$day.out <- as.Date(travel$day.out)
travel$day.back <- as.Date(travel$day.back)
travel$midpoint <- travel$day.out + floor((travel$day.back-travel$day.out)/2)
travel$day.out <- as.character(travel$day.out)
travel$day.back <- as.character(travel$day.back)
travel <- melt(travel, measure.vars = c("day.out", "day.back"))
travel <- travel[order(travel$value, decreasing = TRUE), ]
travel$status <- factor(travel$status)
travel$value <- as.Date(travel$value)
travel$label <- travel$place
travel$label[seq(2, dim(travel)[1], 2)] <- ""
travel$year <- as.numeric(format(travel$value, "%Y"))
pos <- c(-2.5, 2.5)
travel$vjust <- rep(pos, times = dim(travel)[1]/4 , each = 2)
rm(pos)
g1 <- ggplot(travel, aes(y = 0, colour = place)) +
geom_line(aes(x = value, alpha = status), size = 8) +
geom_text(aes(x = midpoint, label = label, vjust = vjust), size = 4) +
scale_y_continuous(breaks = NULL) +
scale_x_date(breaks = date_breaks("1 month"), labels = date_format("%b")) +
labs(list(title = "g1", y = "", x = "")) +
theme_bw() +
theme(axis.text.y = element_blank(),
legend.position = "none")
g2 <- ggplot(travel, aes(y = 0, colour = place)) +
geom_line(aes(x = value, alpha = status), size = 8) +
geom_text(aes(x = midpoint, label = label, vjust = vjust), size = 4) +
scale_y_continuous(breaks = NULL) +
scale_x_date(breaks = date_breaks("1 month"), labels = date_format("%b")) +
labs(list(title = "g2", y = "", x = "")) +
theme_bw() +
theme(axis.text.y = element_blank(),
legend.position = "none") +
facet_grid(year ~ .)
g1
g2
UPDATE 1
I've (manually) added year-beginning / year-end points, which is not ideal (is there a programmatic way to do this?), which has brought a new problem. The dates are of class Date, so faceting by year half-works (see code). I've played with space = "free_x", scales = "free-x", with no success. Also, the code to set the vjust value for geom_text to either 2.5 or 2.5 has gone out of whack, is there a better way to set this?
library("reshape2")
library("ggplot2")
library("scales")
travel <- structure(list(place = c("Brighton", "Brighton", "Brighton",
"Brighton", "Madrid", "Home", "Berlin", "Geneva", "Home", "Home",
"New York & Canada", "Home", "Isle of Wight", "Home", "Home",
"Copenhagen", "Home"), day.out = c("2009-09-06", "2010-01-01",
"2011-01-01", "2012-01-01", "2012-07-23", "2012-07-27", "2012-11-21",
"2012-11-23", "2012-11-26", "2013-01-01", "2013-04-04", "2013-04-15",
"2013-08-26", "2013-08-29", "2014-01-01", "2014-03-14", "2014-03-17"
), day.back = c("2009-12-31", "2010-12-31", "2011-12-31", "2012-07-22",
"2012-07-26", "2012-11-20", "2012-11-22", "2012-11-25", "2012-12-31",
"2013-04-03", "2013-04-14", "2013-08-25", "2013-08-28", "2013-12-31",
"2014-03-13", "2014-03-16", "2014-03-30"), status = c("Live",
"Live", "Live", "Live", "Travel", "Live", "Travel", "Travel",
"Live", "Live", "Travel", "Live", "Travel", "Live", "Live", "Travel",
"Live")), .Names = c("place", "day.out", "day.back", "status"
), class = "data.frame", row.names = c(NA, -17L))
travel$day.out <- as.Date(travel$day.out)
travel$day.back <- as.Date(travel$day.back)
travel$midpoint <- travel$day.out + floor((travel$day.back-travel$day.out)/2)
travel$day.out <- as.character(travel$day.out)
travel$day.back <- as.character(travel$day.back)
travel <- melt(travel, measure.vars = c("day.out", "day.back"))
travel <- travel[order(travel$value, decreasing = TRUE), ]
travel$status <- factor(travel$status)
travel$value <- as.Date(travel$value)
travel$label <- travel$place
travel$label[seq(2, dim(travel)[1], 2)] <- ""
travel$year <- as.numeric(format(travel$value, "%Y"))
# pos <- c(-2.5, -2.5, 2.5, 2.5)
# travel$vjust <- rep(pos, times = dim(travel)[1]/4)
# rm(pos)
g1 <- ggplot(travel, aes(y = 0, colour = place)) +
geom_line(aes(x = value, alpha = status), size = 8) +
geom_text(aes(x = midpoint, label = label), size = 4) +
scale_y_continuous(breaks = NULL) +
scale_x_date(breaks = date_breaks("1 month"), labels = date_format("%b")) +
labs(list(title = "g1", y = "", x = "")) +
theme_bw() +
theme(axis.text.y = element_blank(),
legend.position = "none") +
facet_grid(year ~ .)
g1
UPDATE 2
Hi all,
Haven't had any success with this problem, maybe I over-elaborated in the initial question?
I thought I saw that someone had replied suggesting something with facet_wrap, but the problem from UPDATE 1 (Date class) remains.

You were almost there actually!
The value and midpoint values are specifying the year that you also want to facet over. You need to remove the "year" information from these values if you want the facets to work properly. I simply changed the values to all be this year instead:
travel$value <- as.Date(format(travel$value, "%m-%d"), "%m-%d")
travel$midpoint <- as.Date(format(travel$midpoint, "%m-%d"), "%m-%d")
The only problem I see with your vjust values is that you are trying to replicate them 34/4=8.5 times each, which will round down to 8 and give you an error. You can instead dynamically set the number of replicates using length.out
travel$vjust <- rep(pos, length.out=nrow(travel))
So you can add the vjust back in to your code now:
g1 <- ggplot(travel, aes(y = 0, colour = place)) +
geom_line(aes(x = value, alpha = status), size = 8) +
geom_text(aes(x = midpoint, label = label, vjust=vjust), size = 4) +
scale_y_continuous(breaks = NULL) +
scale_x_date(breaks = date_breaks("1 month"), labels = date_format("%b")) +
labs(list(title = "g1", y = "", x = "")) +
theme_bw() +
theme(axis.text.y = element_blank(),
legend.position = "none") +
facet_grid(year ~ .)
Hope this helps!

Related

Change the flow colours of a diagram [duplicate]

This question already has an answer here:
Custom colors for groups using ggplot2
(1 answer)
Closed 2 months ago.
Is there any command to set your own pallet?
In a plot like this:
library(ggalluvial)
library(ggplot2)
library(dplyr)
df <- data.frame(status = c("open", "close", "close", "open/close", "close"),
stock = c("google", "amazon", "amazon", "yahoo", "amazon"),
newspaper = c("times", "newyork", "london", "times", "times"))
# Count the number of occurance for each alluvial
df <- df %>% dplyr::group_by(stock, newspaper, status) %>%
summarise(n = n())
# Define the factors
df$status <- factor(df$status, levels = c("open", "open/close", "close"))
df$stock <- factor(df$stock, levels = c("google", "amazon", "yahoo"))
df$newspaper <- factor(df$newspaper, levels = c("times", "newyork", "london"))
# Plot the alluvial as in https://cran.r-project.org/web/packages/ggalluvial/vignettes/ggalluvial.html#alluvia-wide-format
ggplot2::ggplot(df.2, aes(y = n, axis1 = stock, axis2 = newspaper)) +
ggalluvial::geom_alluvium(aes(fill = status), width = 1/12) +
ggalluvial::geom_stratum(width = 1/12, fill = "black", color = "grey") +
ggplot2::geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
ggplot2::scale_x_discrete(limits = c("stock", "newspaper"), expand = c(.05, .05)) +
ggplot2::scale_fill_brewer(type = "qual", palette = "Set1")
How is it possible to set our own colours?
You already are setting your own palette (you have chosen the "Set1" palette from the RColorBrewer package via scale_fill_brewer). If you want to set the colors manually, use scale_fill_manual
ggplot(df, aes(y = n, axis1 = stock, axis2 = newspaper)) +
geom_alluvium(aes(fill = status), width = 1/12) +
geom_stratum(width = 1/12, fill = "gray30", color = "white", size = 2) +
geom_text(stat = "stratum", aes(label = after_stat(stratum), angle = 90),
color = "white", size = 6) +
scale_x_discrete(limits = c("stock", "newspaper"), expand = c(.05, .05)) +
scale_fill_manual(values = c(open = 'red4',
`open/close` = 'orange3',
close ='navy')) +
ggtitle("Alluvial-Test") +
theme_void(base_size = 16) +
theme(plot.margin = margin(20, 20, 20, 20))

How to update to custom tool tip for ggbarplot when converting to ggplotly / plotly?

I am creating a bar plot using ggbarplot. I am converting the ggbarplot to plotly so that the graph is interactive in the Shiny app. I want the tooltip to not only show the x and y axis but additional detail (i.e. Frequency).
I know in ggplot you can add the text parameter and include that in the ggplotly function. I am not sure how I can achieve the same result when I am using ggbarplot.
Here is my data and code:
data <- structure(list(`concept name` = structure(4:1, .Label = c("NERVOUS SYSTEM",
"ANTIBACTERIALS FOR SYSTEMIC USE", "ANTIINFECTIVES FOR SYSTEMIC USE",
"CARDIOVASCULAR SYSTEM"), class = "factor", scores = structure(c(`ANTIBACTERIALS FOR SYSTEMIC USE` = 189734,
`ANTIINFECTIVES FOR SYSTEMIC USE` = 200931, `CARDIOVASCULAR SYSTEM` = 201684,
`NERVOUS SYSTEM` = 188122), .Dim = 4L, .Dimnames = list(c("ANTIBACTERIALS FOR SYSTEMIC USE",
"ANTIINFECTIVES FOR SYSTEMIC USE", "CARDIOVASCULAR SYSTEM", "NERVOUS SYSTEM"
)))), `# of Patients` = c(201684, 200931, 189734, 188122), w_cond_rate = c(0.8921,
0.8888, 0.8392, 0.8321), w_exp_rate = c(85.26, 83.92, 73.55,
69.24), freq = c(89.21, 88.88, 83.93, 83.21)), class = c("data.table",
"data.frame"), row.names = c(NA, -4L), .internal.selfref = <pointer: 0x55b1b7cd6e90>)
p <- ggbarplot(
data = data,
y = "# of Patients",
x = "concept name",
orientation = "horiz",
fill = "#D91E49",
color = "#D91E49",
ylab = "Cohort Population",
xlab = "",
width = .5,
text = paste("Freq:", data$freq)
) + theme(legend.title = element_blank()) +
theme(plot.title = element_text(vjust = 1)) +
theme_bw() +
ggtitle("Distribution of Drug Treatments in US population") +
theme(plot.title = element_text(size = 10, face = "bold")) +
theme(plot.caption = element_text(size = 7, color = "red")) +
theme(legend.title = element_blank())
ggplotly(p)
I want to add values from column 'freq' displayed in the hovertext.
Link Shows the solution for ggplot with ggplolty. I am looking to do the same with ggbarplot.
You you achieve your desired result via + aes(text = paste("Freq:", freq)) which adds your tooltip to the set of global aesthetics:
library(ggpubr)
library(plotly)
p <- ggbarplot(
data = data,
y = "# of Patients",
x = "concept name",
orientation = "horiz",
fill = "#D91E49",
color = "#D91E49",
ylab = "Cohort Population",
xlab = "",
width = .5
) +
aes(text = paste("Freq:", freq)) +
theme(legend.title = element_blank()) +
theme(plot.title = element_text(vjust = 1)) +
theme_bw() +
ggtitle("Distribution of Drug Treatments in US population") +
theme(plot.title = element_text(size = 10, face = "bold")) +
theme(plot.caption = element_text(size = 7, color = "red")) +
theme(legend.title = element_blank())
ggplotly(p)

Positioning labels and color coding in sunburst - R

This is what is the output.I have a data set which contains unit, weight of each unit and compliance score for each unit in year 2016.
I was not able to add the table but here is the screenshot for the data in csv
I have named the columns in the data as unit, weight and year(which is compliance score) .
I want to create a sunburst chart where the first ring will be the unit divided based on weight and the second ring will be the same but will have labels compliance score.
The colour for each ring will be different.
I was able to do some code with the help from an online blog and the output I have gotten is similar to what I want but I am facing difficulty in positioning of the labels and also the colour coding for each ring
#using ggplot
library(ggplot2) # Visualisation
library(dplyr) # data wrangling
library(scales) # formatting
#read file
weight.eg = read.csv("Dummy Data.csv", header = FALSE, sep =
";",encoding = "UTF-8")
#change column names
colnames(weight.eg) <- c ("unit","weight","year")
#as weight column is factor change into integer
weight.eg$weight = as.numeric(levels(weight.eg$weight))
[as.integer(weight.eg$weight)]
weight.eg$year = as.numeric(levels(weight.eg$year))
[as.integer(weight.eg$year)]
#Nas are introduced, remove
weight.eg <- na.omit(weight.eg)
#Sum of the total weight
sum_total_weight = sum(weight.eg$weight)
#First layer
firstLevel = weight.eg %>% summarize(total_weight=sum(weight))
sunburst_0 = ggplot(firstLevel) # Just a foundation
#this will generate a bar chart
sunburst_1 =
sunburst_0 +
geom_bar(data=firstLevel, aes(x=1, y=total_weight),
fill='darkgrey', stat='identity') +
geom_text(aes(x=1, y=sum_total_weight/2, label=paste("Total
Weight", comma(total_weight))), color='black')
#View
sunburst_1
#this argument is used to rotate the plot around the y-axis which
the total weight
sunburst_1 + coord_polar(theta = "y")
sunburst_2=
sunburst_1 +
geom_bar(data=weight.eg,
aes(x=2, y=weight.eg$weight, fill=weight.eg$weight),
color='white', position='stack', stat='identity', size=0.6)
+
geom_text(data=weight.eg, aes(label=paste(weight.eg$unit,
weight.eg$weight), x=2, y=weight.eg$weight), position='stack')
sunburst_2 + coord_polar(theta = "y")
sunburst_3 =
sunburst_2 +
geom_bar(data=weight.eg,
aes(x=3, y=weight.eg$weight,fill=weight.eg$weight),
color='white', position='stack', stat='identity',
size=0.6)+
geom_text(data = weight.eg,
aes(label=paste(weight.eg$year),x=3,y=weight.eg$weight),position =
'stack')
sunburst_3 + coord_polar(theta = "y")
sunburst_3 + scale_y_continuous(labels=comma) +
scale_fill_continuous(low='white', high='darkred') +
coord_polar('y') + theme_minimal()
Output for dput(weight.eg)
structure(list(unit = structure(2:7, .Label = c("", "A", "B",
"C", "D", "E", "F", "Unit"), class = "factor"), weight = c(30,
25, 10, 17, 5, 13), year = c(70, 80, 50, 30, 60, 40)), .Names =
c("unit",
"weight", "year"), row.names = 2:7, class = "data.frame", na.action
= structure(c(1L,
8L), .Names = c("1", "8"), class = "omit"))
output for dput(firstLevel)
structure(list(total_weight = 100), .Names = "total_weight", row.names
= c(NA,
-1L), na.action = structure(c(1L, 8L), .Names = c("1", "8"), class =
"omit"), class = "data.frame")
So I think I might have some sort of solution for you. I wasn't sure what you wanted to color-code on the outer ring; from your code it seems you wanted it to be the weight again, but it was not obvious to me. For different colour scales per ring, you could use the ggnewscale package:
library(ggnewscale)
For the centering of the labels you could write a function:
cs_fun <- function(x){(cumsum(x) + c(0, cumsum(head(x , -1))))/ 2}
Now the plotting code could look something like this:
ggplot(weight.eg) +
# Note: geom_col is equivalent to geom_bar(stat = "identity")
geom_col(data = firstLevel,
aes(x = 1, y = total_weight)) +
geom_text(data = firstLevel,
aes(x = 1, y = total_weight / 2,
label = paste("Total Weight:", total_weight)),
colour = "black") +
geom_col(aes(x = 2,
y = weight, fill = weight),
colour = "white", size = 0.6) +
scale_fill_gradient(name = "Weight",
low = "white", high = "darkred") +
# Open up new fill scale for next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 3, y = weight, fill = weight),
size = 0.6, colour = "white") +
scale_fill_gradient(name = "Another Weight?",
low = "forestgreen", high = "white") +
geom_text(aes(label = paste0(year), x = 3,
y = cs_fun(weight))) +
coord_polar(theta = "y")
Which looks like this:

How do I send arrows which cover other labels to the back in geom_label_repel?

This should seem fairly straight forward but I can't find any argument to do this with ggrepel::geom_label_repel().
Sample of data:
df <- structure(list(Athletename = c("Aries Merritt", "Damian Warner"
), Score = c(12.8, 13.44), Event = c("110m hurdles", "110m hurdles"
), Points = c(1135, 1048), Record = c("World Record", "Decathlon Record"
), score_and_points = c("12.8s, 1135pts", "13.44s, 1048pts")), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame"), .Names = c("Athletename",
"Score", "Event", "Points", "Record", "score_and_points"))
ggplot2 code:
ggplot(data = data.frame(x = 0), mapping = aes(x = x)) +
geom_point(data = df, aes(x=Score, y=Points, colour=Record)) +
geom_label_repel(data = df,
aes(x=Score, y=Points, label = Athletename),
direction = "x",
nudge_x = -10) +
geom_label_repel(data = df,
aes(x=Score, y=Points, label = score_and_points),
direction = "y",
nudge_y = -200) +
scale_y_continuous(name = "Points",
breaks = seq(0,1500,100),
limits = c(0,1500)) +
scale_x_reverse(name = "110m hurdles time (m)",
breaks = seq(29,12,-1),
limits=c(29,12)) +
theme(legend.title = element_blank(), legend.position = "top")
Hacky but works: add a copy of the geom_label_repel call, but with the addition of segment.alpha = 0. Then all the labels will be on top of all the arrows.
library(ggrepel)
ggplot(data = data.frame(x = 0), mapping = aes(x = x)) +
geom_point(data = df, aes(x=Score, y=Points, colour=Record)) +
geom_label_repel(data = df,
aes(x=Score, y=Points, label = Athletename),
direction = "x",
nudge_x = -10) +
geom_label_repel(data = df,
aes(x=Score, y=Points, label = score_and_points),
direction = "y",
nudge_y = -200, ) +
geom_label_repel(data = df,
aes(x=Score, y=Points, label = score_and_points),
direction = "y", segment.alpha = 0,
nudge_y = -200, ) +
scale_y_continuous(name = "Points",
breaks = seq(0,1500,100),
limits = c(0,1500)) +
scale_x_reverse(name = "110m hurdles time (m)",
breaks = seq(29,12,-1),
limits=c(29,12)) +
theme(legend.title = element_blank(), legend.position = "top")

Adding points on a geom_line plot

Using an idea from a previous question I have created a gantt-like chart using ggplot2. Here is the example code:
tasks <- c("Review literature", "Mung data", "Stats analysis", "Write Report")
dfr <- data.frame(
name = tasks[c(1,2,3,4,2,3)],
start.date = c("24/08/2010", "01/10/2010", "01/11/2010", "14/02/2011","15/12/2010","1/9/2010"),
end.date = c("31/10/2010", "14/12/2010", "28/02/2011", "30/04/2011","05/02/2011","1/11/2010"),
type = c(TRUE, FALSE, TRUE, TRUE,TRUE,FALSE)
)
mdfr <- melt(dfr, measure.vars = c("start.date", "end.date"))
ggplot(mdfr, aes(as.Date(value, "%d/%m/%Y"), name, colour = type)) +
geom_line(size = 6) +
xlab("") + ylab("") +
theme_bw()
Now, I need to indicate one (or maybe more, some other day) specific critical date for each task, using a bullet or a star or anything, which maybe inside or outside the bar and also a textual annotation of that date. Can it be achieved using the above procedure. If not, is there another (not ggplot) way of doing this?
Thank you!
Here you go:
require(ggplot2)
tasks <- c("Review literature", "Mung data", "Stats analysis", "Write Report")
dfr <- data.frame(
name = tasks[c(1,2,3,4,2,3)],
start.date = c("24/08/2010", "01/10/2010", "01/11/2010", "14/02/2011","15/12/2010","1/9/2010"),
end.date = c("31/10/2010", "14/12/2010", "28/02/2011", "30/04/2011","05/02/2011","1/11/2010"),
type = c(TRUE, FALSE, TRUE, TRUE,TRUE,FALSE)
)
dfrLabels <- data.frame(
name = tasks[c(1,2,3,4)],
date = c("16/10/2010", "07/12/2010", "14/02/2011", "15/04/2011"),
event = c("Something", "Other", "Whatever", "Deadline")
)
mdfr <- melt(dfr, measure.vars = c("start.date", "end.date"))
ggplot(mdfr, aes(as.Date(value, "%d/%m/%Y"), name, colour = type)) +
geom_line(size = 6) +
xlab("") + ylab("") +
theme_bw() +
geom_text( data=dfrLabels, aes(x= as.Date(date, "%d/%m/%Y"), label = event), hjust = 0, vjust = 1, colour = "red", size = 5.0 ) +
geom_point( data=dfrLabels, aes(x= as.Date(date, "%d/%m/%Y")), size=3.0, colour="black" )

Resources