Related
I'm trying to recreate a bar graph found on page 4 of the following report:
The figure has three bars with the first two stacked and the third dodged next to it. I've seen iterations of this question but none that recreate the figure in this exact way.
Here is the data:
a <- rep(c('RHB', 'FERS', 'CSRS'), 3)
b <- c(rep('Assets', 3), rep('Amount Past Due', 3),
rep('Actuarial Liability', 3))
c <- c(45.0, 122.5, 152.3, 47.2, 3.4, 4.8, 114.4, 143.4, 181.3)
df <- data.frame(a,b,c)
names(df) <- c('Fund', 'Condition', 'Value')
And what I've managed so far:
p <- ggplot(subset_data, aes(fill=Condition, y=Value, x=Fund)) +
geom_bar(position="stack", stat="identity") +
coord_flip()
I'm not partial to ggplot so if there's another tool that works better I'm ok using another package.
Taking some ideas from the link #aosmith posted.
You can call geom_bar twice, once with Assets and Amounts Past Due stacked, and again with just Actuarial Liability.
You can use width to make the bars thinner, then nudge one set of bars so the two geom_bar calls are not overlapping. I chose to make the width 0.3 and nudge by 0.3 so the edges just line up. If you nudge by more you will see a gap between the two bars.
Edit: add some more formatting and numeric labels
library(tidyverse)
library(scales)
df_al <- filter(df, Condition == 'Actuarial Liability')
df_xal <- filter(df, Condition != 'Actuarial Liability')
bar_width <- 0.3
hjust_lab <- 1.1
hjust_lab_small <- -0.2 # hjust for labels on small bars
ggplot() +
theme_classic() +
geom_bar(data = df_al,
aes(fill=Condition, y=Value, x=Fund),
position = position_nudge(x = -bar_width),
width = bar_width,
stat="identity") +
geom_bar(data = df_xal,
aes(fill=Condition, y=Value, x=Fund),
position="stack",
stat="identity",
width = bar_width) +
geom_text(data = df_al,
aes(label= dollar(Value, drop0trailing = TRUE), y=Value, x=Fund),
position = position_nudge(x = -bar_width),
hjust = hjust_lab) +
geom_text(data = df_xal,
aes(label= dollar(Value, drop0trailing = TRUE), y=Value, x=Fund),
position="stack",
hjust = ifelse(df_xal$Value < 5, hjust_lab_small, hjust_lab)) +
scale_fill_manual(values = c('firebrick3', 'lightsalmon', 'dodgerblue')) +
scale_y_continuous(breaks = seq(0,180, by = 20), labels = dollar) +
coord_flip() +
labs(x = NULL, y = NULL, fill = NULL) +
theme(legend.position = "bottom")
I think I would use the "sneaky facet" method, after adding a dummy variable to dodge the columns and making Fund a factor with the correct order:
df$not_liability <-df$Condition != "Actuarial Liability"
df$Fund <- factor(df$Fund, levels = c('RHB', 'FERS', 'CSRS'))
Most of the plotting code is then an attempt to copy the look of the supplied plot:
ggplot(df, aes(fill=Condition, y=Value, x=not_liability)) +
geom_bar(position = "stack", stat = "identity") +
scale_x_discrete(expand = c(0.5, 0.5)) +
scale_y_continuous(breaks = 0:10 * 20, labels = scales::dollar) +
coord_flip() +
facet_grid(Fund~., switch = "y") +
scale_fill_manual(values = c("#c00000", "#f7c290", "#0071bf"), name = "") +
theme_classic() +
theme(panel.spacing = unit(0, "points"),
strip.background = element_blank(),
axis.text.y = element_blank(),
axis.ticks.length.y = unit(0, "points"),
axis.title = element_blank(),
strip.placement = "outside",
strip.text = element_text(),
legend.position = "bottom",
panel.grid.major.x = element_line())
I created a stacked bar chart using ggplot and the following code:
ggplot(group, aes(x = variable, y = value, fill = Taxa)) +
geom_bar(position = "fill", stat = "identity") +
scale_fill_manual(values = Cb64k) +
scale_y_continuous(labels = percent_format()) +
theme(legend.position = "bottom", text=element_text(size=10.5),
axis.text.x = element_text(angle=0, vjust=1)) +
guides(fill = guide_legend(ncol=6)) +
facet_grid(cols=vars(group), scales = "free_x", space = "free_x") +
ggtitle(opt$gtitle) +
xlab("Patient ID") + ylab("Relative Activity")
To get this output:
Is there a way to reorder the "stacks" in each bar so that the size of the stacks go from largest to smallest starting at the bottom? As you can see with the current output it seems to be random.
I am trying to create a barplot with two x-axis (grouped x-axis):
# read data
tmp <- read.table(text = "label CNV_x CNV_Type
17p -1 Loss
9p -1 Loss
16q 1 Gain
10p 1 Gain
8q 1 Gain
13q 1 Gain", header = T)
tmp$CNV_Type <- relevel(tmp$CNV_Type, ref = 'Loss')
# plot
ggplot(tmp, aes(x = label, y = CNV_x)) +
geom_bar(stat = 'identity') +
theme_bw() +
geom_hline(yintercept = 0) +
coord_flip() +
facet_wrap(~CNV_Type, strip.position = "bottom", scales = "free_x") +
theme(panel.spacing = unit(0, "lines"),
strip.background = element_blank(),
strip.placement = "outside",
panel.border = element_rect(colour = NA))
This creates a plot like this:
This plot shows 0.00 twice on x-axis and I can't figure out a way to remove the spacing between the two vertical lines separating the strips (one is Gain and other is Loss).
Any help would be much appreciated. Thanks!
UPDATE: I added scale_y_continuous(expand = c(0, 0)) as suggested below:
ggplot(tmp, aes(x = label, y = CNV_x)) +
geom_bar(stat = 'identity') +
theme_bw() +
geom_hline(yintercept = 0) +
scale_y_continuous(expand = c(0, 0)) +
coord_flip() +
facet_wrap(~CNV_Type, strip.position = "bottom", scales = "free_x") +
theme(panel.spacing = unit(0, "lines"),
strip.background = element_blank(),
strip.placement = "outside",
panel.border = element_rect(colour = NA))
This creates a plot like this:
The only issue now is there is no spacing between the bars and the left and right margins of the plot - not sure why that happened.
I would not use facets here. A couple of options. You could indicate the type by colour:
tmp %>%
ggplot(aes(label, CNV_x)) +
geom_col(aes(fill = CNV_Type)) +
geom_hline(yintercept = 0) +
coord_flip() +
scale_fill_manual(values = c("darkorange", "skyblue3"))
And/or add the labels for type to the plot using annotate. That requires some manual fiddling with x, y and expand to get it right:
tmp %>%
ggplot(aes(label, CNV_x)) +
geom_col() +
geom_hline(yintercept = 0) +
coord_flip() +
annotate("text",
label = c("Loss", "Gain"),
x = c(7, 7),
y = c(-0.5, 0.5)) +
scale_x_discrete(expand = c(0.1, 0.1))
I am working with a dataset that I found on wikipedia regarding the nutritional content of staple grains. I scraped the data table using the rvest package and created the graphic shown below
It was pointed out to me that perhaps it might be better to represent the "Recommended Dietary Allowance"(RDA) with a vertical line as opposed to a bar.
1) How to a create the separate vertical line representing "Recommended Dietary Allowance"?
The code used to create the graphic is below: I am not sure on whether I should include the code used to gather and wrangle the data. Please let me know if that would help.
ggplot(grain.nut, aes(grain, nutrients, fill = grain)) +
facet_wrap(~ nutrient.component., scales = "free") +
geom_bar(stat = "identity", position = "dodge") +
coord_flip() +
labs(title = "Nutrient Content of Major Staple Foods per 100 gram Portion",
caption = "https://en.wikipedia.org/wiki/Staple_food#Nutritional_content") +
theme(plot.title = element_text(size = 30, face = "bold")) +
theme(axis.text.y = element_blank()) +
theme(axis.ticks.y = element_blank()) +
theme(panel.grid.major.y = element_blank()) +
theme(panel.grid.minor.y = element_blank()) +
theme(axis.title = element_blank()) +
theme(legend.position = c(0.80,0.05), legend.direction = "horizontal") +
theme(legend.title = element_blank()) +
theme(plot.caption = element_text(hjust = 0.84)) +
guides(fill=guide_legend(reverse=TRUE)) +
scale_fill_manual(values = c("#e70000",
"#204bcc",
"#68ca3b",
"#fe9bff",
"#518901",
"#de0890",
"#fcba4c",
"#292c7a",
"#e69067",
"#79b5ff",
"#68272d",
"#c9cb6c"))
I have tried using geom_vline as well as geom_hline. But I think my problem is the way I am trying to call the value for RDA via levels(grain.nut$grain)1, the output of which is "Recommended Dietary Allowance".
geom_vline(aes(xintercept = levels(grain.nut$grain)[1]))
Any help would be appreciated!
Here is an approach using geom_linerange or geom_pointrange.
First the data:
library("rvest")
library(tidyverse)
url <- "https://en.wikipedia.org/wiki/Staple_food"
nutrient <- url %>%
read_html() %>%
html_nodes(xpath='//*[#id="mw-content-text"]/div/table[2]') %>%
html_table()
get the correct order of levels for discrete scale:
lev = levels(as.factor(z$grain))[c(1:4,6:12, 5)]
The plot:
ggplot() +
geom_col(data = nutrient[[1]] %>%
as.tibble() %>%
gather(grain, value, 2:ncol(.)) %>%
filter(grain!="RDA") %>%
mutate(nutrient = `Nutrient component:`,
value = as.numeric(value)), aes(grain, value, fill = grain), position = "dodge")+
geom_pointrange(data = nutrient[[1]] %>%
as.tibble() %>%
gather(grain, value, 2:ncol(.)) %>%
filter(grain=="RDA") %>%
mutate(nutrient = `Nutrient component:`,
value = as.numeric(value)), aes(x = grain, ymin = 0, ymax = value, y = value, color = grain), size = 0.3, show.legend = F)+
facet_wrap(~ nutrient, scales = "free") +
scale_x_discrete(limits = lev) +
coord_flip() +
labs(title = "Nutrient Content of Major Staple Foods per 100 gram Portion",
caption = "https://en.wikipedia.org/wiki/Staple_food#Nutritional_content") +
theme(plot.title = element_text(size = 30, face = "bold")) +
theme(axis.text.y = element_blank()) +
theme(axis.ticks.y = element_blank()) +
theme(panel.grid.major.y = element_blank()) +
theme(panel.grid.minor.y = element_blank()) +
theme(axis.title = element_blank()) +
theme(legend.position = c(0.80,0.05), legend.direction = "horizontal") +
theme(legend.title = element_blank()) +
theme(plot.caption = element_text(hjust = 0.84)) +
guides(fill=guide_legend(reverse=TRUE)) +
scale_fill_manual(values = c("#e70000",
"#204bcc",
"#68ca3b",
"#fe9bff",
"#518901",
"#de0890",
"#fcba4c",
"#292c7a",
"#e69067",
"#79b5ff",
"#68272d",
"#c9cb6c"))
Basically two layers are used with different data: geom_col with data without RDA and geom_pointrange for data with only RDA. And the order is changed in scale_x_discrete to match the lev object.
If you do not like the points use geom_linerange and omit the y in he aes call
or did u mean this?
ggplot() +
geom_col(data = nutrient[[1]] %>%
as.tibble() %>%
gather(grain, value, 2:ncol(.)) %>%
filter(grain!="RDA") %>%
mutate(nutrient = `Nutrient component:`,
value = as.numeric(value)), aes(grain, value, fill = grain), position = "dodge")+
geom_hline(data = nutrient[[1]] %>%
as.tibble() %>%
gather(grain, value, 2:ncol(.)) %>%
filter(grain=="RDA") %>%
mutate(nutrient = `Nutrient component:`,
value = as.numeric(value)), aes(yintercept = value), show.legend = F)+
facet_wrap(~ nutrient, scales = "free") +
coord_flip() +
labs(title = "Nutrient Content of Major Staple Foods per 100 gram Portion",
caption = "https://en.wikipedia.org/wiki/Staple_food#Nutritional_content") +
theme(plot.title = element_text(size = 30, face = "bold")) +
theme(axis.text.y = element_blank()) +
theme(axis.ticks.y = element_blank()) +
theme(panel.grid.major.y = element_blank()) +
theme(panel.grid.minor.y = element_blank()) +
theme(axis.title = element_blank()) +
theme(legend.position = c(0.80,0.05), legend.direction = "horizontal") +
theme(legend.title = element_blank()) +
theme(plot.caption = element_text(hjust = 0.84)) +
guides(fill=guide_legend(reverse=TRUE)) +
scale_fill_manual(values = c("#e70000",
"#204bcc",
"#68ca3b",
"#fe9bff",
"#518901",
"#de0890",
"#fcba4c",
"#292c7a",
"#e69067",
"#79b5ff",
"#68272d",
"#c9cb6c"))
I'm using ggplot2 with both + geom_line() + geom_point(). I have the colors/shapes worked out, but I can't scale the legend appropriately. If I do nothing it's tiny, and if I enlarge it, the color blocks the shape.
For example:
You can see that the shapes and colors are both in the legend, but the shapes are being drawn over by the colors. I would like to have shapes of the appropriate color drawn in the legend, but can't figure out how to do it.
My plot is being drown as follows:
ggplot(data=melted, aes(x=gene, y=value, colour=variable, shape=variable, group = variable, stroke=3, reorder(gene, value)))
+ theme_solarized()
+ scale_colour_solarized("blue")
+ geom_line()
+ geom_point()
+ theme(axis.text.x = element_text(angle = 90, hjust = 1), plot.title = element_text(size=16, face="bold"), legend.title=element_blank(), legend.text=element_text(size=20))
+ ggtitle('Signiture Profiles')
+ labs(x="Gene", y=expression(paste("Expression"), title="Expression"))
+ scale_colour_manual(name = "Virus / Time", labels = c("Mock", "ACali09_day1", "ACali09_day3", "ACali09_day8", "AShng113_day1", "AShng113_day3", "AShng113_day8", "AChkShng113_day1", "AChkShng113_day3", "AChkShng113_day8"), values = c("#ff420e","#89da59","#89da59","#89da59","#376467","#376467","#376467","#00293c","#00293c","#00293c"))
+ scale_shape_manual(name = "Virus / Time", labels = c("Mock", "ACali09_day1", "ACali09_day3", "ACali09_day8", "AShng113_day1", "AShng113_day3", "AShng113_day8", "AChkShng113_day1", "AChkShng113_day3", "AChkShng113_day8"), values = c(0,1,2,3,1,2,3,1,2,3))
+ guides(colour = guide_legend(override.aes = list(size=12)))
Here is some example data as requested:Example Data
Thanks in advance for any help you can provide.
You could perhaps rethink how you are differentiating your variables.
You could do something like the following. Note the changes in the first line, where I have separated the component parts of variable rather than setting colours and shapes via your scale statements. (I haven't got your theme, so I left that out).
ggplot(data=melted, aes(x=gene,
y=value,
colour=gsub("_.*","",variable),
shape=gsub(".*_","",variable),
group = variable,
stroke=3,
reorder(gene, value))) +
geom_line() +
geom_point() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
plot.title = element_text(size=16, face="bold"),
legend.title=element_blank(),
legend.text=element_text(size=20)) +
ggtitle('Signiture Profiles') +
labs(x="Gene", y=expression(paste("Expression"), title="Expression")) +
guides(shape = guide_legend(override.aes = list(size=5)),
colour = guide_legend(override.aes = list(size=5)))