Single error bar for stacked graph equalling 100 - r

I have a stacked bar graph that shows the differences in classes between skeleton and tissue. The total of the two will always be 100 and their standard errors are the same. As such, the top error bar is superfluous and adds confusion.
Is there a way to only have the standard error for the bottom group? This link shows how to get a single bar for the top of the stack but isn't quite what I need: Single error bar on stacked bar plot ggplot Thanks.
Code:
library(reshape2)
library(Rmisc)
library(ggplot2)
melt <- melt(file, id=c("TREATMENT", "Species"),
value.name="Amount", variable.name = "Class")
x1 <- summarySE(melt, measurevar = "Amount",
groupvars = c("Species", "TREATMENT", "Class"), na.rm=TRUE)
x2 <- within(x1,lit2 <- ave(Amount, Class, Species, FUN = cumsum))
p10 <- ggplot(x2, aes(y = Amount, x = Class, fill = TREATMENT)) +
geom_bar(stat = "identity", colour = "black") +
geom_errorbar(aes(ymin = lit2-se, ymax = lit2+se), size = .5, width = .25)
p10
Data:
structure(list(TREATMENT = c("SKELETON", "SKELETON", "SKELETON",
"SKELETON", "TISSUE", "TISSUE", "TISSUE", "TISSUE"), Species = c("A",
"A", "A", "A", "A", "A", "A", "A"), `1` = c(42.1958615095789,
73.6083881998577, 62.1025409404354, 21.5264243794993, 57.8041384904211,
26.3916118001423, 37.8974590595646, 78.4735756205007), `2` = c(46.9398719372755,
89.6865089817669, 55.9907366318623, 18.1145895471236, 53.0601280627245,
10.3134910182331, 44.0092633681377, 81.8854104528764), `3` = c(55.4637732254405,
75.0933095632366, 20, 18.402199079204, 44.5362267745594, 24.9066904367634,
80, 81.597800920796)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -8L), .Names = c("TREATMENT", "Species",
"1", "2", "3"))

Related

R ggplot legend with Waffle chart

library(tidyverse)
library(waffle)
df_2 <- structure(list(group = c(2, 2, 2, 1, 1, 1),
parts = c("A", "B", "C", "A", "B", "C"),
values = c(1, 39, 60, 14, 15, 71)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
df_2 %>% ggplot(aes(label = parts)) +
geom_pictogram(
n_rows = 10, aes(color = parts, values = values),
family = "fontawesome-webfont",
flip = TRUE
) +
scale_label_pictogram(
name = "Case",
values = c("male"),
breaks = c("A", "B", "C"),
labels = c("A", "B", "C")
) +
scale_color_manual(
name = "Case",
values = c("A" = "red", "B" = "green", "C" = "grey85"),
breaks = c("A", "B", "C"),
labels = c("A", "B", "C")
) +
facet_grid(~group)
With the above code, I got the legend what I expected:
However, when I replaced df_2 with the following df_1 dataframe, I was unable to combine two legends.
df_1 <- structure(list(group = c(2, 2, 2, 1, 1, 1),
parts = c("A", "B", "C", "A", "B", "C"),
values = c(0, 0, 100, 0, 0, 100)),
row.names = c(NA,-6L), class = c("tbl_df", "tbl", "data.frame"))
I kind of know the cause of the problem (0 values) but I would like to keep the legend the same as the graph above. Any suggestions would be appreciated.
To make it clear, the package "waffle" referred to here is not the CRAN package "waffle", but the GitHub-only package:
remotes::install_github("hrbrmstr/waffle")
library(waffle)
You will also need a way of displaying the pictograms, such as:
library(emojifont)
load.fontawesome()
Now, as with any other discrete scale, if you want to add values that are not present in the (post-stat) data, you need to use the limits argument:
df_1 %>% ggplot(aes(label = parts)) +
geom_pictogram(
n_rows = 10, aes(color = parts, values = values),
family = "fontawesome-webfont",
flip = TRUE
) +
scale_label_pictogram(
name = "Case",
values = c("male"),
breaks = c("A", "B", "C"),
labels = c("A", "B", "C"),
limits = c("A", "B", "C")
) +
scale_color_manual(
name = "Case",
values = c("A" = "red", "B" = "green", "C" = "grey85"),
breaks = c("A", "B", "C"),
labels = c("A", "B", "C")
) +
facet_grid(~group)
It is a bit tricky, but what you could do is say let's add 1 to all values so it will plot it like before. But using ggplot_build to remove from each case one row to get it in the right amount like this:
library(tidyverse)
library(waffle)
library(ggplot2)
library(dplyr)
library(emojifont)
library(waffle)
library(extrafont)
p <- df_1 %>% ggplot(aes(label = parts)) +
geom_pictogram(
n_rows = 10, aes(color = parts, values = values+1),
family = "fontawesome-webfont",
flip = TRUE
) +
scale_label_pictogram(
name = "Case",
values = c("male"),
breaks = c("A", "B", "C"),
labels = c("A", "B", "C")
) +
scale_color_manual(
name = "Case",
values = c("A" = "red", "B" = "green", "C" = "grey85"),
breaks = c("A", "B", "C"),
labels = c("A", "B", "C")
) +
facet_grid(~group)
q <- ggplot_build(p)
q$data[[1]] <- q$data[[1]] %>%
group_by(PANEL) %>%
slice(4:n())
q <- ggplot_gtable(q)
plot(q)
Created on 2022-10-20 with reprex v2.0.2

Adding line with specific df value to each bar in ggplot

My df is organized this way for example:
OCCURED_COUNTRY_DESC | a | b | c | d | flagged | type | MedDRA_PT| **E** |
__________________________________________________________________________
UNITED STATES |403|1243|473|4077| yes | disp | Seizure |144.208|
__________________________________________________________________________
My data:
structure(list(OCCURED_COUNTRY_DESC = c("AUSTRALIA", "AUSTRIA",
"BELGIUM", "BRAZIL", "CANADA"), a = c(4L, 7L, 20L, 5L, 11L),
b = c(31, 27, 100, 51, 125), c = c(872, 869, 856, 871, 865
), d = c(5289, 5293, 5220, 5269, 5195), w = c(876, 876, 876,
876, 876), x = c(5320, 5320, 5320, 5320, 5320), y = c(35L,
34L, 120L, 56L, 136L), z = c(6161, 6162, 6076, 6140, 6060
), N = c(6196, 6196, 6196, 6196, 6196), k = c("0.5", "0.5",
"0.5", "0.5", "0.5"), SOR = c(0.80199821407511, 1.52042360060514,
1.21312776329214, 0.615857869962066, 0.539569644832803),
log = c(-0.318329070860348, 0.604473324558599, 0.278731499148795,
-0.699330656240263, -0.890118907611227), LL99 = c(-0.695969674426877,
0.382102954188229, 0.198127619344382, -1.00534117464748,
-1.03425468471322), UL99 = c(-0.0544058884186467, 0.763880731966007,
0.337239065783058, -0.482651467660248, -0.785935460582379
), flagged = c("no", "no", "no", "no", "yes"), type = c(NA,
NA, NA, NA, "under"), MedDRA_PT = c("Seizure", "Seizure",
"Seizure", "Seizure", "Seizure"), E = c(5.11098506333901,
4.43283582089552, 16.3984674329502, 8.43063199848168, 20.8132820019249
)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
))
I am using ggplot2 to create a bar chart using the following piece of code:
test2 %>% #using test2 as the df
ggplot(aes(a, OCCURED_COUNTRY_DESC, fill=type)) +
geom_bar(stat="identity")+
scale_fill_manual(values = c("disp" = "#FF8C00",
"under" = "#7EC0EE",
"NA"="#EEE9E9"))+
theme_classic()+
labs(title = "Seizure",
x = "Count",
y = "")
What I would like to do is to add a black line in each bar correspondent to the E value, from the dataframe, for that country. However I haven't been successful. Can someone kindly guide me on how to achieve this?
Thanks!
One option to achieve your desired result would be via a geom_segment, where you map your E column on both the x and the xend position. "Tricky" part are the y positions. However, as a categorical axis is still a numeric axis we could add a helper column to your data which contains the numeric positions of your categorical OCCURED_COUNTRY_DESC column. This helper column could then be mapped on the y and the yend aes needed by geom_segment where we also take into account the width of the bars:
library(ggplot2)
test2$OCCURED_COUNTRY_DESC_num <- as.numeric(factor(test2$OCCURED_COUNTRY_DESC))
width <- .9 # defautlt width of bars
ggplot(test2, aes(a, OCCURED_COUNTRY_DESC, fill = type)) +
geom_bar(stat = "identity") +
geom_segment(aes(x = E, xend = E,
y = OCCURED_COUNTRY_DESC_num - width / 2,
yend = OCCURED_COUNTRY_DESC_num + width / 2),
color = "black", size = 1) +
scale_fill_manual(values = c(
"disp" = "#FF8C00",
"under" = "#7EC0EE",
"NA" = "#EEE9E9"
)) +
theme_classic() +
labs(
title = "Seizure",
x = "Count",
y = ""
)

How to ggplot partial values with only max in whisker plot?

I am thinking how to present partial values in whisker plot/...
M2 has only the max.
Both measurements do not hav
Code which output in Fig. 1
library("reshape2")
library("ggplot2")
ds <- structure(list(Vars = c("M1", "M2", "M1", "M2", "M1", "M2"),
variable = structure(c(1L, 1L, 2L, 2L, 3L, 3L), .Label = c("Max",
"Ave", "Min"), class = "factor"), value = c("150",
"61", " 60", NA, " 41", NA)), row.names = c(NA, -6L), .Names = c("Vars",
"variable", "value"), class = "data.frame")
# http://stackoverflow.com/q/44100187/54964 eipi10
ds$value = as.numeric(ds$value)
# http://stackoverflow.com/a/44090815/54964
minmax <- ds[ds$variable %in% c("Min","Max"), ]
absol <- ds[ds$variable %in% c("Ave"), ]
# absol <- ds[ds$variable %in% c("Ave", "Absolute"), ]
minm <- dcast(minmax, Vars ~ variable)
absol <- merge(absol, minm, by = "Vars", all.x = T)
absol
ggplot(absol, aes(x = Vars, y = value, fill = variable)) +
geom_bar(stat = "identity") +
geom_errorbar(aes(ymin = Min, ymax = Max), width = .25)
Values at start
Max Ave Min Vars
M1 150 60 41 M1
M2 61 <NA> <NA> M2
Fig. 1 Output where no visualisations when only max value exists
The presentation of M1 is also weird in the barplot becuase no absolute values in data, designed initially in absol.
Expected output: mark maximum value in M2 presentation
OS: Debian 8.7
R: 3.4 (backports)
Add a column to absol, call it yMin, that will set the min value to the max value if the min value is missing.
absol$yMin <- ifelse(is.na(absol$Min), absol$Max, absol$Min)
Then, when plotting, have the geom_errorbar use yMin in the aesthetics.
ggplot(absol, aes(x = Vars, y = value, fill = variable)) +
geom_bar(stat = "identity") +
geom_errorbar(aes(ymin = yMin, ymax = Max), width = .25)

Create new column with percentages in data frame

I have the following dataframe:
dput(df1)
structure(list(month = c(1, 1, 2, 2, 3, 4), transaction_type = c("AAA",
"BBB", "BBB", "CCC",
"DDD", "AAA"), max_wt_per_month = c(54.9,
51.6833333333333, 52.3333333333333, 49.4666666666667, 49.85,
48.5833333333333), min_wt_per_month = c(0, 0, 0, 0, 0, 0), avg_wt_per_month = c(8.41701333107861,
7.65211141060198, 6.44184012508551, 7.74798927613941, 7.4360566888844,
7.50611319574734), prop = c(Inf, Inf, Inf, Inf, Inf, Inf)), .Names = c("month",
"transaction_type", "max_wt_per_month", "min_wt_per_month", "avg_wt_per_month",
"prop"), row.names = c(NA, -6L), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), vars = list(month), drop = TRUE, indices = list(
0:5), group_sizes = 6L, biggest_group_size = 6L, labels = structure(list(
month = 1), row.names = c(NA, -1L), class = "data.frame", vars = list(
month), drop = TRUE, .Names = "month"))
I want to create column prop that would contain the percentage of maximum waiting time with respect to each month. If I run this code, then I get Inf values in most of the rows... (especially it is evident in the real dataset):
my_fun=function(vec){
100*as.numeric(vec[3]) /
sum(with(data_merged_transactions, ifelse(month == vec[1], max_wt_per_month, 0))) }
data_merged_transactions$prop=apply(data_merged_transactions , 1 , my_fun)
I then finally need to create the filled area chart so that each area would be a percentage out of 100%:
ggplot(data_merged_transactions, aes(x=month, y=prop, fill=transaction_type)) +
geom_area(alpha=0.6 , size=1, colour="black")
Why do I get Inf if the sum is not equal to 0?
Moreover, is it possible to create filled area chart with months being factors (Jan, Feb,etc.), not numbers? I tried to substitute month id's by month names, but then I got very thin bars instead of a filled area.
Is this what you were looking for?
library(tidyverse)
df1_tidy <- df1 %>%
group_by(month) %>%
summarise(SUM = sum(max_wt_per_month)) %>%
full_join(df1) %>%
mutate(prop = max_wt_per_month / SUM)
ggplot(data = df1_tidy,
aes(x = month,
y = prop,
fill = transaction_type)) +
geom_area(alpha = 0.6,
size = 1,
colour = "black") +
scale_x_continuous(labels = c("Jan", "Feb", "Mar", "Apr"))

Line plot with factor variables in R

How can I make R draw lines between two observations according with factor variables?
I have two 'time' points, early and late, coded as categorical
plotdata <- structure(list(
x = structure(1:2, .Label = c("early", "late"), class = "factor"),
y = 1:2
),
.Names = c("x", "y"), row.names = c(NA, -2L), class = "data.frame"
)
I only get kind of a bar plot:
plot(plotdata)
I also tried coding the variables as 0 and 1, but then I get a continuous axis with.
Let's say your data is
d <- structure(list(x = structure(1:2, .Label = c("early", "late"), class = "factor"),
y = 1:2), .Names = c("x", "y"), row.names = c(NA, -2L), class = "data.frame")
d
# x y
# early 1
# late 2
With base R
plot(as.numeric(d$x), d$y, type = "l", xaxt = "n")
axis(1, labels = as.character(d$x), at = as.numeric(d$x))
With ggplot2
library(ggplot2)
ggplot(d, aes(x = x, y = y)) + geom_line(aes(group = 1))

Resources