I apologize that I am a beginner in R. I am trying to make the graph like the below picture.
This is what I did in code. But it does not work :
unemp <- read.csv("unemployment.csv", stringsAsFactors = FALSE)
# adding background colors for different presidents
name <- c("Truman", "Eisenhower", "Kennedy", "Johnson", "Nixon",
"Ford", "Carter", "Reagan", "Bush I", "Clinton", "Bush II",
"Obama")
start <- as.Date(c("1948-01-01", "1953-01-20", "1961-01-20", "1963-11-22",
"1969-01-20", "1974-08-09", "1977-01-20", "1981-01-20",
"1989-01-20", "1993-01-20", "2001-01-20", "2009-01-20"))
end <- c(start[-1], as.Date("2016-10-01"))
party <- c("D", "R", "D", "D", "R", "R", "D", "R", "R", "D", "R", "D")
pres <- data.frame(name, start, end, party, stringsAsFactors = FALSE)
head(unemp)
p <- ggplot(unemp) +
geom_rect(data = pres,
aes(xmin = start, xmax = end, fill = party),
ymin = -Inf, ymax = Inf, alpha = 0.2) +
geom_vline(aes(data = pres, xintercept = as.numeric(start)), colour = "grey50", alpha = 0.5) +
geom_text(data = pres, aes(x = start, y = 2500, label = name), size = 3, vjust = 0, hjust = 0, nudge_x = 50, check_overlap = TRUE) +
geom_line(data = pres aes(date, unemp)) + geom_rect(data = pres, aes(xmin = start, xmax = end),
ymin = 10000, ymax = Inf, alpha = 0.4, fill = "chartreuse")
Also, the used csv file("unemployment.csv") is like below
date uempmed
<date> <dbl>
1 1948-01-01 4.5
2 1948-02-01 4.7
3 1948-03-01 4.6
4 1948-04-01 4.9
5 1948-05-01 4.7
6 1948-06-01 4.8
What do I do for making the above picture?
Okay, here's a shot.
I slightly rewrote your pres data to fit a tidyverse style, and I created some random unemp data, since you didn't give us any (please do, in the future, as noted in the comments). I got HEX codes from here, which appear to match the ones you show.
Also, note that I'm using scales::label_percent(), which is from the newest scales1.3 release, so you may have to update your scales. Likewise, I don't know what scale your percentage data is on, and you may have to change the scale parameter to label_percent().
With that said, here goes:
library(glue)
library(lubridate)
library(tidyverse)
name <- c("Truman", "Eisenhower", "Kennedy", "Johnson", "Nixon",
"Ford", "Carter", "Reagan", "Bush I", "Clinton", "Bush II",
"Obama")
start <- as_date(c("1948-01-01", "1953-01-20", "1961-01-20", "1963-11-22",
"1969-01-20", "1974-08-09", "1977-01-20", "1981-01-20",
"1989-01-20", "1993-01-20", "2001-01-20", "2009-01-20"))
end <- c(start[-1], as_date("2016-10-01"))
party <- c("D", "R", "D", "D", "R", "R", "D", "R", "R", "D", "R", "D")
pres <- tibble(name, start, end, party)
unemp <- expand_grid(year = 1948:2016, month = 1:12) %>%
transmute(date = as_date(glue("{year}-{month}-01")),
unemployment = rnorm(n(), 5, 0.1) + rep(1:3, each = 100, length.out = n()))
min_unemp <- min(unemp$unemployment)
max_unemp <- max(unemp$unemployment)
ggplot(unemp,
aes(x = date,
y = unemployment)) +
geom_line() +
geom_vline(data = pres,
mapping = aes(xintercept = start),
colour = "grey50",
linetype = "dashed") +
geom_text(data = pres,
mapping = aes(x = start,
y = max_unemp + 0.25,
label = name),
angle = 90,
vjust = 1) +
geom_rect(data = pres,
mapping = aes(xmin = start,
xmax = end,
ymin = min_unemp,
ymax = max_unemp + 0.75,
fill = party),
inherit.aes = FALSE,
alpha = 0.25) +
coord_cartesian(expand = FALSE) +
scale_y_continuous(labels = scales::label_percent(scale = 1)) +
scale_fill_manual(name = "Party of President",
labels = c("Democratic", "Republican"),
values = c("#0015bc", "#ff0000")) +
labs(x = "Date",
y = "Unemplyment Rate") +
theme_minimal() +
theme(legend.position = "bottom")
Created on 2019-11-30 by the reprex package (v0.3.0)
Related
I am using ggarrange to produce a figure with three plots. One of these plots includes fill and a pattern created using ggpattern. I am able to create a combined figure when I combine one standard ggplot and the ggpattern plot, but I get an error (Error in seq.default(from, to, by) : invalid '(to - from)/by') when I try to combine all 3. I've included a simplified example below.
#fake data
data <- structure(list(Level= c(0.2, 0.3, 0.25, 0.35, 0.4, 0.5, 0.5, 0.6, 0.15, 0.35),
Group= c("A", "A", "B", "B", "C", "C", "D", "D", "E", "E"),
Condition = c("no", "yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes"),
Hx = c(0,1,1,1,0,1,0,0,0,0),
Type = c("T", "T", "T", "T", "T", "F", "F", "F", "F", "F")),
row.names = c(NA, -10L),
class = c("tbl_df", "tbl", "data.frame"))
#create plots
pattern_plot <-
ggplot(data = data, aes(x = Group, y = Level)) +
facet_grid(cols=vars(Type)) +
geom_bar_pattern(aes(pattern = Condition, fill = as.factor(Hx)),
stat = "identity",
position = "dodge",
color = "black",
pattern_fill = "black",
pattern_angle = 45,
pattern_density = 0.1,
pattern_spacing = 0.025,
pattern_key_scale_factor = 0.6) +
scale_pattern_manual(values = c("none", "stripe")) +
labs(x = "", y = "", pattern = "Condition", fill = "History",
subtitle = "Percentage of people with condition") +
guides(pattern = guide_legend(override.aes = list(fill = "white")),
fill = guide_legend(override.aes = list(pattern = "none"))) +
theme_bw() +
theme(legend.position="left") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1))
no_pattern_plot <-
ggplot(data = data, aes(x = Group, y = Level, fill = Condition)) +
geom_bar(position = "dodge", stat = "identity") +
theme_bw() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme(legend.position="left")
no_pattern_plot2 <- ggplot(data = data, aes(x = Group, y = Level, fill = as.factor(Hx))) +
geom_bar(position = "dodge", stat = "identity") +
theme_bw() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme(legend.position="left")
This works:
ggarrange(pattern_plot, no_pattern_plot2, nrow = 2)
To create this
plot
However ggarrange(pattern_plot, no_pattern_plot, no_pattern_plot2, nrow = 3)
produces Error in seq.default(from, to, by) : invalid '(to - from)/by'
Any ideas?
This question builds off of enter link description here but is in the context of faceted boxplots.
So, I have the following code:
set.seed(20210714)
dd <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = rep(c("X", "Y", "Z"), times = 30), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2), n = 10)
dd1 <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = rep(c("X", "Y", "Z"), times = 30), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2), n = 5)
dd <- rbind(dd, dd1)
library(ggplot2)
# create dummy dataframe.
dummy.df <- dd
dummy.df[nrow(dd) + 1:2,"Pattern"] <- unique(dd$Pattern)[-3]
dummy.df[nrow(dd) + 1:2,"Method"] <- "ZZZ"
dummy.df[nrow(dd) + 1:2,"Complexity"] <- c("High","Low")
dummy.df$dummy <- interaction(dummy.df$Method,dummy.df$Pattern)
ggplot(dummy.df, aes(x = dummy, y = X1, fill = Method)) +
geom_boxplot(aes(fill = Method)) +
facet_grid(~Complexity) +
theme_light() +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.35, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.25, colour = I("#525252")) +
scale_x_discrete(labels = c("","X", "", "", "", "Y", "", "", "", "Z","","")) +
xlab("Pattern") +
scale_fill_brewer(breaks=c("A", "B", "C"), type="qual", palette="Paired")
dummy.df <- dd
dummy.df[nrow(dd) + 1:2,"Pattern"] <- unique(dd$Pattern)[-3]
dummy.df[nrow(dd) + 1:2,"Method"] <- "ZZZ"
dummy.df[nrow(dd) + 1:2,"Complexity"] <- c("High","Low")
dummy.df$dummy <- interaction(dummy.df$Method,dummy.df$Pattern)
dummy.df$fill <- interaction(dummy.df$Method, dummy.df$n)
dummy.df$dummy <- interaction(dummy.df$fill, dummy.df$Pattern)
dummy.df$dummy <- factor(dummy.df$dummy, levels = levels(dummy.df$dummy)[-c(4, 12, 20, 24)])
dummy.df$dummy[361:362] <- "A.10.Z" ## dummy variables to get rid of NAs
theme_set(theme_bw(base_size = 14))
ggplot(dummy.df, aes(x = dummy, y = X1, fill = fill)) +
geom_boxplot(aes(fill = fill),lwd=0.1,outlier.size = 0.01) +
facet_grid(~Complexity) +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.35, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.25, colour = I("#525252")) +
scale_x_discrete(labels = c("X", "Y", "Z"), breaks = paste("A.10.", c("X", "Y", "Z"), sep = ""),drop=FALSE) +
xlab("Pattern") +
scale_fill_brewer(breaks= levels(dummy.df$fill)[-c(4,8)], type="qual", palette="Paired")
This yields the following plot.
All is well, except with the legend. I would like the following: the dark colors to be in the First group titled "n=5" on the left, with "A", "B", "C" for the three dark colors, and the light colors to be to the right, in a Second group titled "n=10" on the right, with "A", "B", "C" for the three light colors. Sort of like in the link enter link description here above.
What I can not figure out is how to call the boxplot twice to mimic the solution there.
Is there a way to do this? Please feel free to let me know if the question is not clear.
Thanks again, in advance, for any help!
Adapting my answer on your former question this could be achieved like so:
library(ggplot2)
fill <- levels(dummy.df$fill)[-c(4,8)]
fill <- sort(fill)
labels <- gsub("\\.\\d+", "", fill)
labels <- setNames(labels, fill)
colors <- scales::brewer_pal(type="qual", palette="Paired")(6)
colors <- setNames(colors, fill)
library(ggnewscale)
ggplot(dummy.df, aes(x = dummy, y = X1, fill = fill)) +
geom_boxplot(aes(fill = fill), lwd=0.1,outlier.size = 0.01) +
scale_fill_manual(name = "n = 5", breaks= fill[grepl("5$", fill)], labels = labels[grepl("5$", fill)], values = colors,
guide = guide_legend(title.position = "left", order = 1)) +
new_scale_fill() +
geom_boxplot(aes(fill = fill), lwd=0.1,outlier.size = 0.01) +
scale_fill_manual(name = "n = 10", breaks = fill[grepl("10$", fill)], labels = labels[grepl("10$", fill)], values = colors,
guide = guide_legend(title.position = "left", order = 2)) +
facet_grid(~Complexity) +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.35, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.25, colour = I("#525252")) +
scale_x_discrete(labels = c("X", "Y", "Z"), breaks = paste("A.10.", c("X", "Y", "Z"), sep = ""),drop=FALSE) +
xlab("Pattern")
#> Warning: Removed 2 rows containing non-finite values (new_stat_boxplot).
This is what is the output.I have a data set which contains unit, weight of each unit and compliance score for each unit in year 2016.
I was not able to add the table but here is the screenshot for the data in csv
I have named the columns in the data as unit, weight and year(which is compliance score) .
I want to create a sunburst chart where the first ring will be the unit divided based on weight and the second ring will be the same but will have labels compliance score.
The colour for each ring will be different.
I was able to do some code with the help from an online blog and the output I have gotten is similar to what I want but I am facing difficulty in positioning of the labels and also the colour coding for each ring
#using ggplot
library(ggplot2) # Visualisation
library(dplyr) # data wrangling
library(scales) # formatting
#read file
weight.eg = read.csv("Dummy Data.csv", header = FALSE, sep =
";",encoding = "UTF-8")
#change column names
colnames(weight.eg) <- c ("unit","weight","year")
#as weight column is factor change into integer
weight.eg$weight = as.numeric(levels(weight.eg$weight))
[as.integer(weight.eg$weight)]
weight.eg$year = as.numeric(levels(weight.eg$year))
[as.integer(weight.eg$year)]
#Nas are introduced, remove
weight.eg <- na.omit(weight.eg)
#Sum of the total weight
sum_total_weight = sum(weight.eg$weight)
#First layer
firstLevel = weight.eg %>% summarize(total_weight=sum(weight))
sunburst_0 = ggplot(firstLevel) # Just a foundation
#this will generate a bar chart
sunburst_1 =
sunburst_0 +
geom_bar(data=firstLevel, aes(x=1, y=total_weight),
fill='darkgrey', stat='identity') +
geom_text(aes(x=1, y=sum_total_weight/2, label=paste("Total
Weight", comma(total_weight))), color='black')
#View
sunburst_1
#this argument is used to rotate the plot around the y-axis which
the total weight
sunburst_1 + coord_polar(theta = "y")
sunburst_2=
sunburst_1 +
geom_bar(data=weight.eg,
aes(x=2, y=weight.eg$weight, fill=weight.eg$weight),
color='white', position='stack', stat='identity', size=0.6)
+
geom_text(data=weight.eg, aes(label=paste(weight.eg$unit,
weight.eg$weight), x=2, y=weight.eg$weight), position='stack')
sunburst_2 + coord_polar(theta = "y")
sunburst_3 =
sunburst_2 +
geom_bar(data=weight.eg,
aes(x=3, y=weight.eg$weight,fill=weight.eg$weight),
color='white', position='stack', stat='identity',
size=0.6)+
geom_text(data = weight.eg,
aes(label=paste(weight.eg$year),x=3,y=weight.eg$weight),position =
'stack')
sunburst_3 + coord_polar(theta = "y")
sunburst_3 + scale_y_continuous(labels=comma) +
scale_fill_continuous(low='white', high='darkred') +
coord_polar('y') + theme_minimal()
Output for dput(weight.eg)
structure(list(unit = structure(2:7, .Label = c("", "A", "B",
"C", "D", "E", "F", "Unit"), class = "factor"), weight = c(30,
25, 10, 17, 5, 13), year = c(70, 80, 50, 30, 60, 40)), .Names =
c("unit",
"weight", "year"), row.names = 2:7, class = "data.frame", na.action
= structure(c(1L,
8L), .Names = c("1", "8"), class = "omit"))
output for dput(firstLevel)
structure(list(total_weight = 100), .Names = "total_weight", row.names
= c(NA,
-1L), na.action = structure(c(1L, 8L), .Names = c("1", "8"), class =
"omit"), class = "data.frame")
So I think I might have some sort of solution for you. I wasn't sure what you wanted to color-code on the outer ring; from your code it seems you wanted it to be the weight again, but it was not obvious to me. For different colour scales per ring, you could use the ggnewscale package:
library(ggnewscale)
For the centering of the labels you could write a function:
cs_fun <- function(x){(cumsum(x) + c(0, cumsum(head(x , -1))))/ 2}
Now the plotting code could look something like this:
ggplot(weight.eg) +
# Note: geom_col is equivalent to geom_bar(stat = "identity")
geom_col(data = firstLevel,
aes(x = 1, y = total_weight)) +
geom_text(data = firstLevel,
aes(x = 1, y = total_weight / 2,
label = paste("Total Weight:", total_weight)),
colour = "black") +
geom_col(aes(x = 2,
y = weight, fill = weight),
colour = "white", size = 0.6) +
scale_fill_gradient(name = "Weight",
low = "white", high = "darkred") +
# Open up new fill scale for next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 3, y = weight, fill = weight),
size = 0.6, colour = "white") +
scale_fill_gradient(name = "Another Weight?",
low = "forestgreen", high = "white") +
geom_text(aes(label = paste0(year), x = 3,
y = cs_fun(weight))) +
coord_polar(theta = "y")
Which looks like this:
I have got this data set and want to generate a sunburst plot. The data is of 4 columns which are unit, weight, year16 and year17. The sunburst is based on the values in the weight column. The code is there and when adding the coding for the third layer it is giving me an error. I think the error is coming when I am adding the third layer.
library("ggnewscale")
library(ggplot2)
#read file
weight.eg = read.csv("Dummy Data.csv", header = FALSE, sep =
";",encoding = "UTF-8")
#change column names
colnames(weight.eg) <- c
("unit","weight","year16","year17")
#check the class
sapply(weight.eg, class)
#View(weight.eg)
#as weight column is factor change into integer
weight.eg$weight = as.numeric(levels(weight.eg$weight))
[as.integer(weight.eg$weight)]
weight.eg$year16 = as.numeric(levels(weight.eg$year16))
[as.integer(weight.eg$year16)]
weight.eg$year17 = as.numeric(levels(weight.eg$year17))
[as.integer(weight.eg$year17)]
#Nas are introduced, remove
weight.eg <- na.omit(weight.eg)
#Sum of the total weight
sum_total_weight = sum(weight.eg$weight)
#First layer
firstLevel = weight.eg %>% summarize(total_weight=sum(weight))
cs_fun <- function(x){(cumsum(x) + c(0, cumsum(head(x , -1))))/ 2}
ggplot(weight.eg) +
geom_col(data = firstLevel,
aes(x = 1, y = total_weight)) +
geom_text(data = firstLevel,
aes(x = 1, y = total_weight / 2,
label = paste("Total Weight:", total_weight)),
colour = "black") +
geom_col(aes(x = 2,
y = weight, fill = weight),
colour = "black", size = 0.6) +
scale_fill_gradient(name = "Weight",
low = "white", high = "lightblue") +
# Open up new fill scale for next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 3, y = weight, fill = year16),
size = 0.6, colour = "black") +
scale_fill_gradient(name = "Year16",
low = "red", high = "green") +
geom_text(aes(label = paste0(unit,year16), x = 3,
y = cs_fun(weight))) +
#next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 4, y = weight, fill = year17),
size = 0.6, colour = "black") +
scale_fill_gradient(name = "Year17",
low = "red", high = "green") +
geom_text(aes(label = paste0(unit,year17), x = 4,
y = cs_fun(weight))) +
coord_polar(theta = "y")
The output for dput(weight.eg) is
structure(list(unit = structure(1:6, .Label = c("A", "B", "C",
"D", "E", "F", "Unit"), class = "factor"), weight = c(30, 25,
10, 17, 5, 13), year16 = c(70, 80, 50, 30, 60, 40), year17 = c(50,
100, 20, 30, 70, 60)), .Names = c("unit", "weight", "year16",
"year17"), row.names = 2:7, class = "data.frame", na.action =
structure(1L, .Names = "1", class = "omit"))
I want to include year17 as well and in the future there will be
columns, so that has to be added as well. Because of the error I
am not able to figure out what is wrong.
This is my df :
df <- data.frame(annee = rep(c(2003,2004), times = 1, each = 3), sps = c("a", "b", "c"), nb = 1:3)
I create a column containing my labels :
df$labels <- paste("nb", df$sps, "=", df$nb)
Then I do my plot :
ggplot(df, aes(nb)) +
geom_density(aes(fill = sps, colour = sps), alpha = 0.1) +
facet_wrap(~ annee) +
geom_text(data=df, aes(x=8, y=2.5, label= labels), colour="black", inherit.aes=FALSE, parse=FALSE)
But I have a problem with my text in each facet : I would like to have 3 lines (one for each sps).
I tried with the symbol "\n" but I failed in trying to obtain :
"nb a = 1 \n nb b = 2 \n nb c = 3" for each year
Thanks for help
You will have to concatenate what you want broken into several lines into one single string.
newdf <- aggregate(labels ~ annee, data = df, FUN = paste, collapse = "\n")
ggplot(df, aes(nb)) +
geom_density(aes(fill = sps, colour = sps), alpha = 0.1) +
facet_wrap(~ annee) +
geom_text(data = newdf, aes(x = 8, y = 2, label = labels), color = "black") +
scale_x_continuous(limits = c(0, 11)) +
scale_y_continuous(limits = c(0, 2.25))
You can achieve what you want by creating a separate data.frame for your labels:
library(tidyverse)
df <- data.frame(annee = rep(c(2003,2004),
times = 1, each = 3),
sps = c("a", "b", "c"),
nb = 1:3)
# create labels in separate data.frame
label_df <- df %>%
mutate(labels = paste("nb", sps, "=", nb)) %>%
group_by(annee) %>%
summarise(labels = paste(labels, collapse = "\n")) %>%
mutate(x = 6.5,
y = 2.2)
ggplot(df, aes(nb)) +
geom_density(aes(fill = sps, colour = sps), alpha = 0.1) +
facet_wrap(~annee) +
geom_text(data = label_df, aes(x = x, y = y, label = labels)) +
coord_cartesian(ylim = c(0, 2.4), xlim = c(1, 8))