Related
Me and my project partner are trying to create one stacked, horizontal barchart from multiple vertical barcharts as illustrated on the right hand side of the second picture. As of now, we were able to flip our graphs to a horizontal state with the help of this forum: Horizontal Barplot in ggplot2 . However, we are currently stuck at merging multiple horizontal bars into one.
We have currently been trying to implement ways to merge our bars into only one bar in our graphs (and to eventually merge all the graphs into a single one).
However, we could not find examples with datasets similar to ours, nor were we able to implement the posted answers/codes to our dataset/code.
This is our dataset simplfied thanks to an answer from this forum: How to make a great R reproducible example (with libraries included)
library(pdftools)
library(forcats)
library(stringr)
library(showtext)
library(ggplot2)
library(jpeg)
library(png)
library(gridExtra)
library(grid)
library(cowplot)
library(rgdal)
library(rgeos)
library(magick)
library(tidyverse)
ams <- structure(list(segment = c("Allgemein", "Allgemein", "Allgemein","Bundesland", "Bundesland", "Bundesland", "Bundesland", "Sektor","Sektor", "Sektor"),
group = structure(c(9L, 7L, 18L, 6L, 15L,20L, 21L, 8L, 11L, 17L), levels = c("25 bis 49", "Akad. Ausbildung","Ausländer_innen", "Bau", "Behinderung",
"Burgenland", "Frauen","Gastronomie", "Gesamt", "Gesundh. Einschr.", "Gesundheitswesen","Handel", "Höhere Ausbildung", "Inländer_innen",
"Kärnten","Lehrausbildung", "Leiharbeit", "Männer", "Mittlere Ausbildung","Niederösterreich", "Oberösterreich", "Ohne gesundh. Einschr.","Pflichtschule", "Salzburg", "Steiermark", "Tirol", "Über 49","Unter 25", "Verkehr", "Vorarlberg", "Warenherstellung", "Wien"), class = "factor"),
value = c(319232, 152957, 166275, 8247,18128, 44807, 35270, 42128, 10236, 26847),
abs_change = c(-17860,-10570, -7290, -544, -3944, -8778, -3329, -4019, 879, -5528),rel_change = c(-5.3, -6.5, -4.2, -6.2, -17.9, -16.4, -8.6,-8.7, 9.4, -17.1),
rel_change2 = c("-5,3%", "-6,5%", "-4,2%","-6,2%", "-17,9%", "-16,4%", "-8,6%", "-8,7%", "+9,4%", "-17,1%"),
abs_change_vj = c(-21910, -13342, -8568, -757, -1185,-5927, -2205, -471, -934, -4421), rel_change_vj = c(-6.4,-8, -4.9, -8.4, -6.1, -11.7, -5.9, -1.1, -8.4, -14.1),
rel_change2vj = c("-6,4%","-8%", "-4,9%", "-8,4%", "-6,1%", "-11,7%", "-5,9%", "-1,1%","-8,4%", "-14,1%"),
color = c("#CEDE8C", "#CEDE8C", "#CEDE8C","#CEDE8C", "#CEDE8C", "#CEDE8C", "#CEDE8C", "#CEDE8C", "#EE7F74","#CEDE8C"),
colorvj = c("#CEDE8C", "#CEDE8C", "#CEDE8C","#CEDE8C", "#CEDE8C", "#CEDE8C", "#CEDE8C", "#CEDE8C", "#CEDE8C","#CEDE8C"),
abs_change2 = c("-17.860", "-10.570", "-7.290","-544", "-3.944", "-8.778", "-3.329", "-4.019", "879", "-5.528"),
abs_change2vj = c("-21.910", "-13.342", "-8.568", "-757","-1.185", "-5.927", "-2.205", "-471", "-934", "-4.421")), row.names = c(1L,2L, 3L, 4L, 5L, 6L, 7L, 30L, 31L, 32L), class = "data.frame")
And here is an example of the code which produces one of our current bar graphs:
allgemein3 <-
ggplot(data = ams %>%
filter(segment == "Allgemein") %>%
# filters data that's not allgemein (general)
mutate(group = fct_relevel(group, "Männer", "Frauen", "Gesamt")),
# reorders sequence of "group" data in ams
aes(x = group, y = abs_change, fill = color)) +
# designates axis name and color
geom_bar(position = "stack", stat = "identity", width = 0.55) +
scale_fill_identity(guide = FALSE) +
geom_hline(yintercept = 0) +
coord_flip()
allgemein3
And this is the resulted bar graph: https://drive.google.com/file/d/1Uj_vWtuRtxiuIny6WT4_2s_V_07LGvaD/view?usp=sharing
And this illistrates the graphs in their original form (left hand side) and the graph which we desired (right hand side): https://drive.google.com/file/d/1P_2411HvTYB1OlXQCFbW_48_XzyHfILO/view?usp=share_link
This is our session info:
─ Session info ────────────────────────────────────────────────────────────────────────────────────────────
setting value
version R version 4.2.1 (2022-06-23)
os macOS Ventura 13.0
system x86_64, darwin17.0
ui RStudio
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz Europe/Vienna
date 2022-11-10
rstudio 2021.09.2+382 Ghost Orchid (desktop)
pandoc 2.14.0.3 # /Applications/RStudio.app/Contents/MacOS/pandoc/ (via rmarkdown)
Any solution or advice will be greatly appreciated! Please let us know any questions you have.
The quick and easy option would be to map your segments on the y axis and fill by group. The drawback is that you end up with one legend for all segments, even if there are some options like ggnewscale to get separate legends:
library(tidyverse)
ams_filtered <- ams %>%
filter(group != "Gesamt") %>%
mutate(group = fct_inorder(factor(group)),
segment = fct_rev(segment))
ggplot(data = ams_filtered, aes(x = abs_change, y = segment, fill = group)) +
geom_col(width = 0.55) +
geom_vline(xintercept = 0) +
labs(x = NULL, y = NULL, fill = NULL)
A second option would be to create separate plots which you could glue together using e.g. patchwork. To make my life easier I use a plotting function. However the same approach works if you create your plots one by one:
plot_fun <- function(.data, limits = NULL) {
ggplot(data = .data, aes(x = abs_change, y = segment, fill = group)) +
geom_col(width = 0.55) +
geom_vline(xintercept = 0) +
scale_x_continuous(limits = limits) +
labs(x = NULL, y = NULL, fill = NULL)
}
limits <- range(ams$abs_change)
ams_split <- split(ams_filtered, ams_filtered$segment)
plot_list <- purrr::map(ams_split, plot_fun, limits = limits)
library(patchwork)
wrap_plots(plot_list, ncol = 1)
I have a list of data frames,
>head(df.list.xyg[["archae.list"]])
motif obs pred prop stat pval stdres
AAB 1189 760.1757 0.05556028 11811.94 0 16.00425
CDD 1058 249.7147 0.01825133 11811.94 0 51.62291
DDE 771 415.1314 0.03034143 11811.94 0 17.73730
FBB 544 226.3529 0.01654385 11811.94 0 21.28994
>head(df.list.xyg[["eukaryote.list"]])
motif obs pred prop stat pval stdres
ABG 82015 48922.33 0.08773749 321891.7 0 156.64562
GBC 51601 64768.42 0.11615591 321891.7 0 -55.03402
AGG 41922 30136.56 0.05404701 321891.7 0 69.80141
CGG 25545 14757.24 0.02646569 321891.7 0 90.00215
BTT 15795 12433.58 0.02229843 321891.7 0 30.48747
I would like to
barplot motif versus stdres such that the plots are displayed in single column but two rows, and
label each plot corresponding to part of the file name "archae", eukaryote and so on.
After searching around, the following code snippet does the job but only partly. It only plots the last dataset. I assume it is "overwriting" the earlier dataset. How do I fix this? Can this be achieved by face_wrap or facet_grid?
myplots<-lapply(df.list.xyg,
function(x)
p<-ggplot(x,aes(x=motif,y=stdres)) +
geom_bar(stat="identity",width=0.5, color="blue", fill="gray")
)
print (myplots)
If you just want the plots to appear together in a single plotting window, you can use facets. You can bind the rows of your data frames together with dplyr::bind_rows, which will create an id column to label which data frame each row belongs to. We facet by this ID variable.
library(ggplot2)
ggplot(dplyr::bind_rows(df.list.xyg, .id = "Kingdom"), aes(motif, stdres)) +
geom_col(width = 0.5, fill = "deepskyblue4") +
geom_hline(yintercept = 0, color = "gray75") +
facet_grid(.~Kingdom) +
theme_bw(base_size = 16)
If you have lots of different data frames in your list, facet_grid may be better than facet_wrap.
Depending on how you wish to present the data, you may prefer to save individual plots to files using ggsave inside your lapply
Reproducible data taken from question
df.list.xyg <- list(archae = structure(list(motif = c("AAB",
"CDD", "DDE", "FBB"), obs = c(1189L, 1058L, 771L, 544L),
pred = c(760.1757, 249.7147,
415.1314, 226.3529), prop = c(0.05556028, 0.01825133, 0.03034143,
0.01654385), stat = c(11811.94, 11811.94, 11811.94, 11811.94),
pval = c(0L, 0L, 0L, 0L), stdres = c(16.00425, 51.62291,
17.7373, 21.28994)), class = "data.frame", row.names = c(NA,
-4L)), eukaryote.list = structure(list(motif = c("ABG", "GBC",
"AGG", "CGG", "BTT"), obs = c(82015L, 51601L, 41922L, 25545L,
15795L), pred = c(48922.33, 64768.42, 30136.56, 14757.24, 12433.58
), prop = c(0.08773749, 0.11615591, 0.05404701, 0.02646569, 0.02229843
), stat = c(321891.7, 321891.7, 321891.7, 321891.7, 321891.7),
pval = c(0L, 0L, 0L, 0L, 0L), stdres = c(156.64562, -55.03402,
69.80141, 90.00215, 30.48747)), class = "data.frame", row.names = c(NA,
-5L)))
Created on 2022-05-23 by the reprex package (v2.0.1)
#allancameron: Elegant solution! Thanks. The key then is to row-bind so that the data can be facet-wrapped according to kingdom. I modified the code as below
plot in a single column-two rows
scale the y-axis according to the range of data
turn the y-axis labels 90 degrees for better readability.
library(ggplot2)
ggplot(dplyr::bind_rows(df.list.xyg, .id = "Kingdom"), aes(motif, stdres)) +
geom_col(width = 0.5, fill = "deepskyblue4") +
geom_hline(yintercept = 0, color = "gray75") +
facet_grid(Kingdom ~ ., scales = "free") +
theme_bw(base_size = 16)
I have a data frame built like this:
Id Client data
1 5 25
2 8 63
3 13 42
4 5 87
5 8 35
and a array: clients <- c(5,8)
I need to plot a different histogram(of the data column) for each client that is in the "clients" array. In this example i would plot histogram for the client 5 with two bars(25,87) and one for the client 8 also with two bars(63,35). I think that i need to use the facet_wrap function to plot a histogram for each client, i also tried to do something like a for plotting for each client but didn't worked. I'm not sure about how i can do it so any help would be great!
Seems like you just didn't do enough data-wrangling. Also, from your description, you need barplot, not a histogram (which would report counts of particular values in data, not their value).
This is a solution in base.
dt = data.frame("id" = 1:5, "client" = c(5,8,13,5,8), "data"=c(25,63,42,87,35))
clients = c(5,8,13) # for particular clients, or unique(dt$client) for all clients
# get data for every client
lst = lapply(clients, function(x){dt[dt$client == x, "data"]})
# unify length and transform into a matrix
len = max(sapply(lst, length))
mat = do.call(cbind, lapply(lst, "[", seq_len(len)))
# Put some nice legend
colnames(mat) = paste("Client", clients)
# plot this matrix with barplot
barplot(mat, beside=TRUE, las=1)
You can plot on the same graph if there are limited number of clients.
library(dplyr)
library(ggplot2)
df %>%
filter(Client %in% clients) %>%
group_by(Client) %>%
mutate(Id = factor(row_number())) %>%
ggplot() + aes(Client, data, fill = Id) +
geom_bar(stat = 'identity', position = 'dodge')
With facets :
df %>%
filter(Client %in% clients) %>%
group_by(Client) %>%
mutate(Id = factor(row_number())) %>%
ggplot() + aes(Client, data, fill = Id) +
geom_bar(stat = 'identity', position = 'dodge') +
facet_wrap(~Client, scales = 'free_x')
data
df <- structure(list(Id = 1:5, Client = c(5L, 8L, 13L, 5L, 8L), data = c(25L,
63L, 42L, 87L, 35L)), class = "data.frame", row.names = c(NA, -5L))
clients <- c(5,8)
enter image description hereI have the following data; please can any one help me to plot it, I have tried to use a lot of different commands but none has given me a perfect graph
year x y
2012 4 5
2014 7 9
2017 4 3
enter image description here
this picture i need to make as it
Based on your comments you might be looking for:
library(tidyverse)
plot1 <- df %>% gather(key = measure, value = value, -year) %>%
ggplot(aes(x = year, y = value, color = measure))+
geom_point()+
geom_line()+
facet_wrap(~measure)
plot1
The biggest points here are gather and facet_wrap. I recommend the following two links:
https://ggplot2.tidyverse.org/reference/facet_grid.html
https://ggplot2.tidyverse.org/reference/facet_wrap.html
You need to convert year column type to Date.
This is a tidyverse style solution
library(tidyverse)
mydf %>%
rename("col1" = x, "col2" = y) %>%
mutate(year = paste0(year, "-01-01")) %>%
mutate(year = as.Date(year)) %>%
ggplot() +
geom_line(aes(x = year, y = col1), color = "red", size = 2) +
geom_line(aes(x = year, y = col2), color = "blue", size = 2) +
theme_minimal()
which returns this
Using the data shown reproducibly in the Note below use matplot. No packages are used.
matplot(dd[[1]], dd[-1], pch = c("x", "y"), type = "o", xlab = "year", ylab = "value")
Note
dd <- structure(list(year = c(2012L, 2014L, 2017L), x = c(4L, 7L, 4L),
y = c(5L, 9L, 3L)), class = "data.frame", row.names = c(NA, -3L))
df<-data.frame(adjuster=c("Mary","Mary","Bob","Bob"), date=as.Date(c("2012-1-1","2012-2-1","2012-3-1","2012-4-1")), value=c(10,15,25,15))
df
adjuster date value
1 Mary 2012-01-01 10
2 Mary 2012-02-01 15
3 Bob 2012-03-01 25
4 Bob 2012-04-01 15
ggplot(df,aes(x=date,y=value,color=adjuster))+geom_line()+geom_point()
In the above graph, notice the disconnect between the February and March points. How do I connect those points with a blue line, leaving the actual March point red? In other words, Bob should be associated with the value from [Jan - Mar) and Mary from [Mar-Apr].
EDIT: Turns out my example was overly simple. The answers listed don't generalize to the case where the adjuster changes between two people on more than one occasion. For example, consider
df<-data.frame(adjuster=c("Mary","Mary","Bob","Bob","Mary"), date=as.Date(c("2012-1-1","2012-2-1","2012-3-1","2012-4-1","2012-5-1")), value=c(10,15,25,15,20))
adjuster date value
1 Mary 2012-01-01 10
2 Mary 2012-02-01 15
3 Bob 2012-03-01 25
4 Bob 2012-04-01 15
5 Mary 2012-05-01 20
Since I didn't mention this in my original question, I'll pick an answer that simply worked for my original data.
Updated to minimise tinkering with data.frame, added the group = 1 argument
Tinkered around with your data.frame a little. You should be able to automate the tinkering around, I guess. Let me know if you aren't. Also, your ggplot command wasn't working as per the chart you've posted in the question
df<-data.frame(
adjuster=c("Mary","Mary","Bob","Bob"),
date=as.Date(c("2012-1-1","2012-2-1","2012-3-1","2012-4-1")),
value=c(10,15,25,15)
)
library(data.table)
library(ggplot2)
dt <- data.table(df)
dt[,adjuster := as.character(adjuster)]
dt[,prevadjuster := c(NA,head(adjuster,-1))]
dt[is.na(prevadjuster),prevadjuster := adjuster]
ggplot(dt) +
geom_line(aes(x=date,y=value, color = prevadjuster, group = 1)) +
geom_line(aes(x=date,y=value, color = adjuster, group = 1)) +
geom_point(aes(x=date,y=value, color = adjuster, group = 1))
I'd like to put forward a solution that does not require modifying the dataframe, that is intuitive (once you think about how the layers are drawn), and does not involve lines overwriting one another. It does, however, have one problem: it does not allow you to modify the linetype. I do not know why that is, so if someone could chime in to enlighten us, it would be great.
Quick answer to the OP:
ggplot(df, aes(x = date, y = value, color = adjuster))+
geom_line(aes(group = 1, colour = adjuster))+
geom_point(aes(group = adjuster, color = adjuster, shape = adjuster))
In the OP's dataframe, one can use group=1 to create a group spanning the whole period.
An example illustrated with figures:
# Create data
df <- structure(list(year = c(1990, 2000, 2010, 2020, 2030, 2040),
variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "Something", class = "factor"),
value = c(4, 5, 6, 7, 8, 9), category = structure(c(1L, 1L, 1L,
2L, 2L, 2L), .Label = c("Observed", "Projected"), class = "factor")), .Names = c("year",
"variable", "value", "category"), row.names = c(NA, 6L), class = "data.frame")
# Load library
library(ggplot2)
The basic plot, similar to the OP, groups data by category both inside geom_point(aes()) and inside geom_line(aes()), with the undesirable result, in this application, that the line does not 'bridge' the two points across the two categories.
# Basic ggplot with geom_point() and geom_line()
p <- ggplot(data = df, aes(x = year, y = value, group = category)) +
geom_point(aes(colour = category, shape = category), size = 4) +
geom_line(aes(colour = category), size = 1)
ggsave(p, file = "ggplot-points-connect_p1.png", width = 10, height = 10)
The key to my solution is to group by variable but to colour by categoryinside geom_line(aes())
# Modified version to connect the dots "continuously" while preserving color grouping
p <- ggplot(data = df, aes(x = year, y = value)) +
geom_point(aes(group = category, colour = category, shape = category), size = 4) +
geom_line(aes(group = variable, colour = category), size = 1)
ggsave(p, file = "ggplot-points-connect_p2.png", width = 10, height = 10)
However, sadly, with this approach it is not currently possible to control the linetype, as far as I can make out:
ggplot(data = df, aes(x = year, y = value)) +
geom_point(aes(group = category, colour = category, shape = category), size = 4) +
geom_line(aes(group = variable, colour = category), linetype = "dotted", size = 1)
## Error: geom_path: If you are using dotted or dashed lines, colour, size and linetype must be constant over the line
Remark: I'm using another dataframe because I'm copy-pasting from something I was doing and that made me visit this question -- this way I can upload my images.
Here is a simple solution. No need to change the original data.frame.
ggplot()+
geom_line(aes_string(x='date',y='value'), data=df, lty=2)+
geom_point(aes_string(x='date',y='value', color='adjuster'), data=df)+
geom_line(aes_string(x='date',y='value', color='adjuster'), data=df)
That's one of my favorite features of ggplot. You can layer your plots one on top of the other pretty cleanly.
Here is the result:
I came up with a solution that combines ideas from Codoremifa and JAponte.
df<-data.frame(adjuster=c("Mary","Mary","Bob","Bob"), date=as.Date(c("2012-1-1","2012-2-1","2012-3-1","2012-4-1")), value=c(10,15,25,15))
df$AdjusterLine<-df$adjuster
df[2:nrow(df),]$AdjusterLine<-df[1:(nrow(df)-1),]$adjuster
ggplot(df)+geom_line(aes(x=date,y=value, color=AdjusterLine), lty=2)+geom_line(aes(x=date,y=value, color=adjuster))+geom_point(aes(x=date,y=value, color=adjuster))