Adding points on a geom_line plot - r

Using an idea from a previous question I have created a gantt-like chart using ggplot2. Here is the example code:
tasks <- c("Review literature", "Mung data", "Stats analysis", "Write Report")
dfr <- data.frame(
name = tasks[c(1,2,3,4,2,3)],
start.date = c("24/08/2010", "01/10/2010", "01/11/2010", "14/02/2011","15/12/2010","1/9/2010"),
end.date = c("31/10/2010", "14/12/2010", "28/02/2011", "30/04/2011","05/02/2011","1/11/2010"),
type = c(TRUE, FALSE, TRUE, TRUE,TRUE,FALSE)
)
mdfr <- melt(dfr, measure.vars = c("start.date", "end.date"))
ggplot(mdfr, aes(as.Date(value, "%d/%m/%Y"), name, colour = type)) +
geom_line(size = 6) +
xlab("") + ylab("") +
theme_bw()
Now, I need to indicate one (or maybe more, some other day) specific critical date for each task, using a bullet or a star or anything, which maybe inside or outside the bar and also a textual annotation of that date. Can it be achieved using the above procedure. If not, is there another (not ggplot) way of doing this?
Thank you!

Here you go:
require(ggplot2)
tasks <- c("Review literature", "Mung data", "Stats analysis", "Write Report")
dfr <- data.frame(
name = tasks[c(1,2,3,4,2,3)],
start.date = c("24/08/2010", "01/10/2010", "01/11/2010", "14/02/2011","15/12/2010","1/9/2010"),
end.date = c("31/10/2010", "14/12/2010", "28/02/2011", "30/04/2011","05/02/2011","1/11/2010"),
type = c(TRUE, FALSE, TRUE, TRUE,TRUE,FALSE)
)
dfrLabels <- data.frame(
name = tasks[c(1,2,3,4)],
date = c("16/10/2010", "07/12/2010", "14/02/2011", "15/04/2011"),
event = c("Something", "Other", "Whatever", "Deadline")
)
mdfr <- melt(dfr, measure.vars = c("start.date", "end.date"))
ggplot(mdfr, aes(as.Date(value, "%d/%m/%Y"), name, colour = type)) +
geom_line(size = 6) +
xlab("") + ylab("") +
theme_bw() +
geom_text( data=dfrLabels, aes(x= as.Date(date, "%d/%m/%Y"), label = event), hjust = 0, vjust = 1, colour = "red", size = 5.0 ) +
geom_point( data=dfrLabels, aes(x= as.Date(date, "%d/%m/%Y")), size=3.0, colour="black" )

Related

Using crosstalk filter_select() in r for line plot but doesn't display lines in the graph

I am trying to get interactivity in the rmarkdown document by using crosstalk.
Issue: When I plot using cross talk it doesn't display lines in the graph but it gives values on hovering over the plot. And is there a way to have default option in filter ?
Code:
library(tidyverse)
library(plotly)
library(crosstalk)
library(glue)
library(scales)
library(tidytext)
data loading:
file_url <- "https://raw.githubusercontent.com/johnsnow09/covid19-df_stack-code/main/ts_all_long2.csv"
ts_all_long <- read.csv(url(file_url))
shared data
confirm_col = "#32a4ba"
death_col = "#f08080"
Country_selected = c("United Kingdom")
dual_axis_plt_data <- ts_all_long %>%
group_by(Country.Region) %>%
mutate(scaleFactor = max(Confirmed_daily) / max(Death_daily)) %>%
ungroup() %>%
SharedData$new()
ggplotly()
dual_axis_plt <- ggplotly(
ggplot(data = dual_axis_plt_data, aes(x = date)) +
geom_area(aes(y = Confirmed_daily), fill = confirm_col, alpha = .7) +
geom_line(aes(y = Death_daily * scaleFactor), col = death_col,
size = 0.8, alpha = 0.8) +
scale_y_continuous(name = "Daily Cases", sec.axis = sec_axis(~./scaleFactor, name = "Daily Deaths"),
labels = scales::comma_format()) +
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
theme_excel_new() +
theme(
axis.title.y.left=element_text(color=confirm_col),
axis.text.y.left=element_text(color=confirm_col),
axis.title.y.right=element_text(color=death_col),
axis.text.y.right=element_text(color=death_col),
plot.title = element_markdown(face = "plain", family = "serif", size = 14),
panel.grid.major = element_blank()
) +
labs(title = glue("<i>{Country_selected}</i>: Daily confirmed & death cases as of: {max(ts_all_long$date)}"),
caption = "Data source: covid19.analytics)
)
bscols(widths = c(3, 9),
list(
filter_select(id = "country", label = "Country",
sharedData = dual_axis_plt_data, group = ~Country.Region)
),
dual_axis_plt)
Desired result:
Without crosstalk interactivity it looks like this (Its a dual axis plot):

How do I fill a bar plot with a specific colour palette according to the variables?

Trying to assing each variable colour by creating my own colour palette, but some of the colours get mixed up. Any ideas on how I should fix this?
cor.partidos <- c(
`ps` = "#f71b75",
`psd` = "#ef6438",
`pcp-pev` = "#ff001d",
`pan` = "#71af64",
`outros` = "#f71b75",
`nulos` = "#565557",
`brancos` = "#aaa8ad",
`l` = "#f71b75",
`il` = "#f71b75",
`ch` = "#393195",
`cds-pp` = "#1192d8",
`be` = "#b40020",
`a` = "#f71b75")
#test graph
bars <- ggplot(leg19, aes(x = partido, y = votos)) +
geom_bar(stat="identity",
position="identity",
fill = cor.partidos) +
geom_hline(yintercept = 0, size = 1, colour="#333333") +
bbc_style() +
theme(axis.text=element_text(size=10))+
labs(subtitle = "Resultados Legislativas 2019",
ylab = "votos")
update with a mwe
It will work if the variables in the pallet are in the same order as the dataframe but if you mix it around a bit it won't work. Changing it to aes(fill = cor.partidos) won't work :(
test.pallet <- c(
`pink` = "#f71b75",
`orange` = "#ef6438",
`green` = "#71af64",
`red` = "#ff001d",
`other pink` = "#f71b72")
test.datafrane <- data_frame(
name = c("pink","orange","red","green","other pink"),
value = c(1,2,3,4,5)
)
test.datafrane$value <- as.numeric(test.datafrane$value)
test.graph <- ggplot(test.datafrane, aes(x = name, y = value)) +
geom_bar(stat="identity",
position="identity",
fill = test.pallet)
test.graph
As I suggested in my comment you could achieve your result by mapping your categorical var on fill inside aes() and make use of scale_fill_manual:
test.pallet <- c(
`pink` = "#f71b75",
`orange` = "#ef6438",
`green` = "#71af64",
`red` = "#ff001d",
`other pink` = "#f71b72")
test.datafrane <- data.frame(
name = c("pink","orange","red","green","other pink"),
value = c(1,2,3,4,5)
)
test.datafrane$value <- as.numeric(test.datafrane$value)
library(ggplot2)
test.graph <- ggplot(test.datafrane, aes(x = name, y = value, fill = name)) +
geom_bar(stat="identity",
position="identity") +
scale_fill_manual(values = test.pallet)
test.graph

Difficulty Plotting Time Series in R

I have been trying to plot time series data in R. I consulted several different sources online and somehow I am still having problems creating this plots. I have simulated some data below that represents daily information a fictitious company received from 2014 to 2016:
#create data
date_decision_made = seq(as.Date("2014/1/1"), as.Date("2016/1/1"),by="day")
date_decision_made <- format(as.Date(date_decision_made), "%Y/%m/%d")
property_damages_in_dollars <- rnorm(731,100,10)
car_damages_in_dollars <- rnorm(731,105,8)
other_damages_in_dollars <- rnorm(731,104,9)
location <- c("canada","usa")
location <- sample(location, 731, replace=TRUE, prob=c(0.3, 0.7))
type_of_house <- c("single","townhome", "rental" )
type_of_house<- sample(type_of_house , 731, replace=TRUE, prob=c(0.5, 0.3, 0.2))
response_variable <- c("claim_approved","claim_rejected")
response_variable<- sample(response_variable, 731, replace=TRUE, prob=c(0.4, 0.6))
final_dataset <- cbind(date_decision_made, property_damages_in_dollars, car_damages_in_dollars, other_damages_in_dollars, location, type_of_house, response_variable)
final_dataset <- as.data.frame(final_dataset)
final_dataset$other_damages_in_dollars = as.numeric(final_dataset$other_damages_in_dollars)
final_dataset$property_damages_in_dollars = as.numeric(final_dataset$property_damages_in_dollars)
final_dataset$car_damages_in_dollars = as.numeric(final_dataset$car_damages_in_dollars)
prop_damage <-subset(final_dataset, select = c(date_decision_made, property_damages_in_dollars))
car_damage <-subset(final_dataset, select = c(date_decision_made, car_damages_in_dollars))
other_damage <-subset(final_dataset, select = c(date_decision_made, other_damages_in_dollars))
new <-subset(final_dataset, select = c(date_decision_made, property_damages_in_dollars, car_damages_in_dollars, other_damages_in_dollars))
Based on this data, I have tried to plot this data as a time series in R. I tried several methods and all of them are producing errors. I have tried to resolve these problems but I just can't seem to figure them out. Could someone please help me?
#first way (error)
library(ggplot2)
library(reshape2) library(dplyr)
ggplot() + geom_line(data = prop_damage, aes(x = date_decision_made, y = property_damages_in_dollars, group = 1), color = "red")
+ scale_x_date(date_breaks = "days" , date_labels = "%b %d %a")+
geom_line(data = car_damage, aes(x = date_decision_made, y = car_damages_in_dollars, group =1 ), color = "blue")
+ geom_line(data = other_damage, aes(x = date_decision_made, y = other_damages_in_dollars, group =1), color = "green")
+ xlab('data_date') + ylab('percent.change')
#second way (error)
ggplot(data = new, aes(x = date_decision_made)) +
geom_line(aes(y = property_damages_in_dollars, colour = "property_damages_in_dollars")) +
geom_line(aes(y = car_damages_in_dollars, colour = "car_damages_in_dollars")) +
geom_line(aes(y =other_damages_in_dollars, colour = "other_damages_in_dollars")) +
scale_colour_manual("",
breaks = c("property_damages_in_dollars", "car_damages_in_dollars", "other_damages_in_dollars"),
values = c("red", "green", "blue")) +
xlab(" ") +
scale_y_continuous("Dollars", limits = c(0,10000)) +
labs(title="demo graph")
#3rd way error
##Subset the necessary columns
dd_sub = new[,c(1,2,3,4)]
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))
ggplot(dd) + geom_line(aes(x=date_decision_made, y=value, colour=variable, group=1)) + scale_x_date(date_breaks = "days" , date_labels = "%b %d %a")+ scale_colour_manual(values=c("red","green","blue"))
#4th error
mymts = ts(new,
frequency = 1,
start = c(2014, 1))
autoplot(mymts) +
ggtitle("Time Series Plot") +
theme(plot.title = element_text(hjust = 0.5))
#5th Method error
x1 = ts(prop_damage, frequency = 1, start = c(2014,1))
x2 = ts(other_damage, frequency = 1, start = c(2014,1))
ts.plot(x, y, gpars = list(col = c("black", "red")))
ts.plot(date_decision_made,gpars= list(col=rainbow(10)))
#6th method error
##Subset the necessary columns
dd_sub = new[,c(1,2,3,4)]
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))
qplot(date_decision_made,value,data=dd,geom='line',color=variable)
#7th way error
x1 = ts(prop_damage, frequency = 1, start = c(2014,1))
x2 = ts(other_damage, frequency = 1, start = c(2014,1))
comb_ts <- cbind(x1, x2)
plot.ts(comb_ts, plot.type = "single")
Could someone please show me what I am doing wrong in these codes? Thanks
Try this approach and be careful with the dates. There are plenty of dates because of years:
#Data
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))
dd$date_decision_made <- as.Date(as.character(dd$date_decision_made),'%Y/%m/%d')
#Plot
ggplot(dd) +
geom_line(aes(x=date_decision_made, y=value, colour=variable, group=1))+
scale_x_date(date_breaks = "months" ,breaks = '12 months', date_labels = "%b %d %a")+
scale_colour_manual(values=c("red","green","blue"))+
theme(axis.text.x = element_text(angle=90))
Output:
Another option is to use pivot_longer from tidyr
library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)
dd_sub %>%
pivot_longer(cols = -date_decision_made) %>%
mutate(date_decision_made = ymd(date_decision_made)) %>%
ggplot() +
geom_line(aes(x = date_decision_made, y=value, colour = name, group = 1)) +
scale_x_date(date_breaks = "months", breaks = '12 months',
date_labels = "%b %d %a")+
scale_colour_manual(values=c("red","green","blue"))+
theme(axis.text.x = element_text(angle = 90))

Ribbon chart in R

I search in R implementation (may be html widget on java script) a stacked bar chart in ribbon style, which allows you to see the rating change for each category in the dynamics.
It's look like ribbon chart in power bi desktop
Search rseek.org gave no results.
First off: Not a fan of that ribbon-styled stacked bar chart at all; while colourful and stylish, it's difficult to synthesise the relevant information. But that's just my opinion.
You could try building a similar plot in ggplot2 using geom_ribbon. See below for a minimal example:
# Sample data
set.seed(2017);
one <- sample(5:15, 10);
two <- rev(one);
df <- cbind.data.frame(
x = rep(1:10, 2),
y = c(one, two),
l = c(one - 1, two - 1),
h = c(one + 1, two + 1),
id = rep(c("one", "two"), each = 10));
require(ggplot2);
ggplot(df, aes(x = x, y = y)) +
geom_ribbon(aes(ymin = l, ymax = h, fill = id), alpha = 0.4) +
scale_fill_manual(values = c("#E69F00", "#56B4E9"));
If you need interactivity, you could wrap it inside plotly::ggplotly.
Using ggsankey package.
In the following you can make use of smooth argument geom_sankey_bump to control the look/feel of the chart as in ribbon chart of Power BI.
df <- data.frame (model = c("A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J"),
Year = c(2015,2015,2015,2015,2015,2015,2015,2015,2015,2015,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018),
sales = c(450,678,456,344,984,456,234,244,655,789,234,567,234,567,232,900,1005,1900,450,345,567,235,456,345,144,333,555,777,111,444,222,223,445,776,331,788,980,1003,456,434))
#install.packages("remotes")
#remotes::install_github("davidsjoberg/ggsankey")
library(ggsankey)
library(tidyverse)
ggplot(df, aes(x = Year,
node = model,
fill = model,
value = sales)) +
geom_sankey_bump(space = 0, type = "alluvial", color = "transparent", smooth = 15) +
scale_fill_viridis_d(option = "A", alpha = .8) +
theme_sankey_bump(base_size = 16) +
labs(x = NULL,
y = "Sales ($ ths)",
fill = "Model",
color = NULL) +
theme(legend.position = "bottom") +
labs(title = "Sales per model per year")
On suggestion in comments, I tried replicating some of the features of power BI chart.
# Prepare some data
set.seed(1)
df <- data.frame(
occupation = rep(c("Clerical", "Management", "Manual", "Professional", "Skilled"), 12),
Month = factor(rep(month.abb, 5), levels = month.abb, ordered = TRUE),
Sales = sample(200:1000, 60, replace = TRUE)
)
df %>%
group_by(Month) %>%
mutate(Max = sum(Sales)) %>%
ungroup() %>%
mutate(Max = max(Sales)) %>%
ggplot(aes(x = Month,
node = occupation,
fill = occupation,
value = Sales)) +
geom_col(aes(x = Month, y = Max/1.2),
alpha = 0.5,
fill = 'grey',
width = 0.4) +
geom_sankey_bump(space = 15,
type = "alluvial",
color = "transparent",
smooth = 8,
alpha = 0.8) +
scale_fill_brewer(palette = "Set3") +
theme_minimal() +
labs(x = NULL,
y = "Sales ($ ths)",
fill = "Occupation",
color = NULL) +
theme(legend.position = "top") +
labs(title = "Sales per occupation per month")
Created on 2022-07-07 by the reprex package (v2.0.1)
You may find your answers with ggalluvial package.
https://cran.r-project.org/web/packages/ggalluvial/vignettes/ggalluvial.html

Facet by year, with overlapping data

I'm trying to visualize places where I've been over the last few years.
See the code below: I want to facet g1 by year (g2). Do I need to add year-end / year-beginning points to the data frame, or am I missing something obvious?
I've tried setting group = 1, but I'm not entirely sure what it's attempting to do.
My vision is for the final output to look something like this:
Or alternatively, like this, with some tweaking with mapping the "status" column to geom_line
Also, please point out any bad habits / inefficient code.
library("reshape2")
library("ggplot2")
library("scales")
travel <- structure(list(place = c("Brighton", "Madrid", "Home", "Berlin",
"Geneva", "Home", "New York & Canada", "Home", "Isle of Wight",
"Home", "Copenhagen", "Home"), day.out = c("2009-09-06", "2012-07-23",
"2012-07-27", "2012-11-21", "2012-11-23", "2012-11-26", "2013-04-04",
"2013-04-15", "2013-08-26", "2013-08-29", "2014-03-14", "2014-03-17"
), day.back = c("2012-07-22", "2012-07-26", "2012-11-20", "2012-11-22",
"2012-11-25", "2013-04-03", "2013-04-14", "2013-08-25", "2013-08-28",
"2014-03-13", "2014-03-16", "2014-03-30"), status = c("Live",
"Travel", "Live", "Travel", "Travel", "Live", "Travel", "Live",
"Travel", "Live", "Travel", "Live")), .Names = c("place", "day.out",
"day.back", "status"), class = "data.frame", row.names = c(NA,
-12L))
travel$day.out <- as.Date(travel$day.out)
travel$day.back <- as.Date(travel$day.back)
travel$midpoint <- travel$day.out + floor((travel$day.back-travel$day.out)/2)
travel$day.out <- as.character(travel$day.out)
travel$day.back <- as.character(travel$day.back)
travel <- melt(travel, measure.vars = c("day.out", "day.back"))
travel <- travel[order(travel$value, decreasing = TRUE), ]
travel$status <- factor(travel$status)
travel$value <- as.Date(travel$value)
travel$label <- travel$place
travel$label[seq(2, dim(travel)[1], 2)] <- ""
travel$year <- as.numeric(format(travel$value, "%Y"))
pos <- c(-2.5, 2.5)
travel$vjust <- rep(pos, times = dim(travel)[1]/4 , each = 2)
rm(pos)
g1 <- ggplot(travel, aes(y = 0, colour = place)) +
geom_line(aes(x = value, alpha = status), size = 8) +
geom_text(aes(x = midpoint, label = label, vjust = vjust), size = 4) +
scale_y_continuous(breaks = NULL) +
scale_x_date(breaks = date_breaks("1 month"), labels = date_format("%b")) +
labs(list(title = "g1", y = "", x = "")) +
theme_bw() +
theme(axis.text.y = element_blank(),
legend.position = "none")
g2 <- ggplot(travel, aes(y = 0, colour = place)) +
geom_line(aes(x = value, alpha = status), size = 8) +
geom_text(aes(x = midpoint, label = label, vjust = vjust), size = 4) +
scale_y_continuous(breaks = NULL) +
scale_x_date(breaks = date_breaks("1 month"), labels = date_format("%b")) +
labs(list(title = "g2", y = "", x = "")) +
theme_bw() +
theme(axis.text.y = element_blank(),
legend.position = "none") +
facet_grid(year ~ .)
g1
g2
UPDATE 1
I've (manually) added year-beginning / year-end points, which is not ideal (is there a programmatic way to do this?), which has brought a new problem. The dates are of class Date, so faceting by year half-works (see code). I've played with space = "free_x", scales = "free-x", with no success. Also, the code to set the vjust value for geom_text to either 2.5 or 2.5 has gone out of whack, is there a better way to set this?
library("reshape2")
library("ggplot2")
library("scales")
travel <- structure(list(place = c("Brighton", "Brighton", "Brighton",
"Brighton", "Madrid", "Home", "Berlin", "Geneva", "Home", "Home",
"New York & Canada", "Home", "Isle of Wight", "Home", "Home",
"Copenhagen", "Home"), day.out = c("2009-09-06", "2010-01-01",
"2011-01-01", "2012-01-01", "2012-07-23", "2012-07-27", "2012-11-21",
"2012-11-23", "2012-11-26", "2013-01-01", "2013-04-04", "2013-04-15",
"2013-08-26", "2013-08-29", "2014-01-01", "2014-03-14", "2014-03-17"
), day.back = c("2009-12-31", "2010-12-31", "2011-12-31", "2012-07-22",
"2012-07-26", "2012-11-20", "2012-11-22", "2012-11-25", "2012-12-31",
"2013-04-03", "2013-04-14", "2013-08-25", "2013-08-28", "2013-12-31",
"2014-03-13", "2014-03-16", "2014-03-30"), status = c("Live",
"Live", "Live", "Live", "Travel", "Live", "Travel", "Travel",
"Live", "Live", "Travel", "Live", "Travel", "Live", "Live", "Travel",
"Live")), .Names = c("place", "day.out", "day.back", "status"
), class = "data.frame", row.names = c(NA, -17L))
travel$day.out <- as.Date(travel$day.out)
travel$day.back <- as.Date(travel$day.back)
travel$midpoint <- travel$day.out + floor((travel$day.back-travel$day.out)/2)
travel$day.out <- as.character(travel$day.out)
travel$day.back <- as.character(travel$day.back)
travel <- melt(travel, measure.vars = c("day.out", "day.back"))
travel <- travel[order(travel$value, decreasing = TRUE), ]
travel$status <- factor(travel$status)
travel$value <- as.Date(travel$value)
travel$label <- travel$place
travel$label[seq(2, dim(travel)[1], 2)] <- ""
travel$year <- as.numeric(format(travel$value, "%Y"))
# pos <- c(-2.5, -2.5, 2.5, 2.5)
# travel$vjust <- rep(pos, times = dim(travel)[1]/4)
# rm(pos)
g1 <- ggplot(travel, aes(y = 0, colour = place)) +
geom_line(aes(x = value, alpha = status), size = 8) +
geom_text(aes(x = midpoint, label = label), size = 4) +
scale_y_continuous(breaks = NULL) +
scale_x_date(breaks = date_breaks("1 month"), labels = date_format("%b")) +
labs(list(title = "g1", y = "", x = "")) +
theme_bw() +
theme(axis.text.y = element_blank(),
legend.position = "none") +
facet_grid(year ~ .)
g1
UPDATE 2
Hi all,
Haven't had any success with this problem, maybe I over-elaborated in the initial question?
I thought I saw that someone had replied suggesting something with facet_wrap, but the problem from UPDATE 1 (Date class) remains.
You were almost there actually!
The value and midpoint values are specifying the year that you also want to facet over. You need to remove the "year" information from these values if you want the facets to work properly. I simply changed the values to all be this year instead:
travel$value <- as.Date(format(travel$value, "%m-%d"), "%m-%d")
travel$midpoint <- as.Date(format(travel$midpoint, "%m-%d"), "%m-%d")
The only problem I see with your vjust values is that you are trying to replicate them 34/4=8.5 times each, which will round down to 8 and give you an error. You can instead dynamically set the number of replicates using length.out
travel$vjust <- rep(pos, length.out=nrow(travel))
So you can add the vjust back in to your code now:
g1 <- ggplot(travel, aes(y = 0, colour = place)) +
geom_line(aes(x = value, alpha = status), size = 8) +
geom_text(aes(x = midpoint, label = label, vjust=vjust), size = 4) +
scale_y_continuous(breaks = NULL) +
scale_x_date(breaks = date_breaks("1 month"), labels = date_format("%b")) +
labs(list(title = "g1", y = "", x = "")) +
theme_bw() +
theme(axis.text.y = element_blank(),
legend.position = "none") +
facet_grid(year ~ .)
Hope this helps!

Resources