R ggplot legend with Waffle chart - r

library(tidyverse)
library(waffle)
df_2 <- structure(list(group = c(2, 2, 2, 1, 1, 1),
parts = c("A", "B", "C", "A", "B", "C"),
values = c(1, 39, 60, 14, 15, 71)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
df_2 %>% ggplot(aes(label = parts)) +
geom_pictogram(
n_rows = 10, aes(color = parts, values = values),
family = "fontawesome-webfont",
flip = TRUE
) +
scale_label_pictogram(
name = "Case",
values = c("male"),
breaks = c("A", "B", "C"),
labels = c("A", "B", "C")
) +
scale_color_manual(
name = "Case",
values = c("A" = "red", "B" = "green", "C" = "grey85"),
breaks = c("A", "B", "C"),
labels = c("A", "B", "C")
) +
facet_grid(~group)
With the above code, I got the legend what I expected:
However, when I replaced df_2 with the following df_1 dataframe, I was unable to combine two legends.
df_1 <- structure(list(group = c(2, 2, 2, 1, 1, 1),
parts = c("A", "B", "C", "A", "B", "C"),
values = c(0, 0, 100, 0, 0, 100)),
row.names = c(NA,-6L), class = c("tbl_df", "tbl", "data.frame"))
I kind of know the cause of the problem (0 values) but I would like to keep the legend the same as the graph above. Any suggestions would be appreciated.

To make it clear, the package "waffle" referred to here is not the CRAN package "waffle", but the GitHub-only package:
remotes::install_github("hrbrmstr/waffle")
library(waffle)
You will also need a way of displaying the pictograms, such as:
library(emojifont)
load.fontawesome()
Now, as with any other discrete scale, if you want to add values that are not present in the (post-stat) data, you need to use the limits argument:
df_1 %>% ggplot(aes(label = parts)) +
geom_pictogram(
n_rows = 10, aes(color = parts, values = values),
family = "fontawesome-webfont",
flip = TRUE
) +
scale_label_pictogram(
name = "Case",
values = c("male"),
breaks = c("A", "B", "C"),
labels = c("A", "B", "C"),
limits = c("A", "B", "C")
) +
scale_color_manual(
name = "Case",
values = c("A" = "red", "B" = "green", "C" = "grey85"),
breaks = c("A", "B", "C"),
labels = c("A", "B", "C")
) +
facet_grid(~group)

It is a bit tricky, but what you could do is say let's add 1 to all values so it will plot it like before. But using ggplot_build to remove from each case one row to get it in the right amount like this:
library(tidyverse)
library(waffle)
library(ggplot2)
library(dplyr)
library(emojifont)
library(waffle)
library(extrafont)
p <- df_1 %>% ggplot(aes(label = parts)) +
geom_pictogram(
n_rows = 10, aes(color = parts, values = values+1),
family = "fontawesome-webfont",
flip = TRUE
) +
scale_label_pictogram(
name = "Case",
values = c("male"),
breaks = c("A", "B", "C"),
labels = c("A", "B", "C")
) +
scale_color_manual(
name = "Case",
values = c("A" = "red", "B" = "green", "C" = "grey85"),
breaks = c("A", "B", "C"),
labels = c("A", "B", "C")
) +
facet_grid(~group)
q <- ggplot_build(p)
q$data[[1]] <- q$data[[1]] %>%
group_by(PANEL) %>%
slice(4:n())
q <- ggplot_gtable(q)
plot(q)
Created on 2022-10-20 with reprex v2.0.2

Related

Plotly R: Hovertext for values which have 0 y-axis values

Problem: In plotly, is there a possibility to get the hoverinfo also for values which have zero y-value? So, in the example below, I want to have hovertext for xaxis value a, d and e.
Any suggestions?
library(data.table)
library(plotly)
dt <- data.table(
x = c("a", "b", "c", "d", "e"),
y = c(0 , 5, 2, 0, 0),
z = c(12, 14, 19, 23, 0)
)
plot_ly(dt,
x = ~x) %>%
add_bars(y = ~y,
text = ~paste("y-Values:", y, " z-Values:", z),
hoverinfo = "text")
I found the solution (add in layout hovermode = 'x'):
library(data.table)
library(plotly)
dt <- data.table(
x = c("a", "b", "c", "d", "e"),
y = c(0 , 5, 2, 0, 0),
z = c(12, 14, 19, 23, 0)
)
plot_ly(dt,
x = ~x) %>%
add_bars(y = ~y,
text = ~paste("y-Values:", y, " z-Values:", z),
hoverinfo = "text") %>%
layout(hovermode = 'x')

How to assign levels to nodes in multi-level sankey diagram?

I am trying to build interactive multi-level sankey diagram using R. I can't find the solution how to assign the levels to nodes. For example, a1 node should be on the second level in chart but not in the fifth. It seems that the package assigns to the last node in the chain the rightmost position, which is not preferable in my case.
I tried different packages like echarts4r, networkD3, ggvis but it seems that these packages doesn't provide the functionality to manage levels in graph.
If you know how to solve this issue, please, share.
library('networkD3')
library('echarts4r')
library('googleVis')
sankey <- data.frame(
source = c("a", "a", "b", "c", "d", "c"),
target = c("b", "a1", "c", "d", "e", "e"),
value = ceiling(rnorm(6, 10, 1)),
stringsAsFactors = FALSE
)
# googleVis solution
plot(gvisSankey(sankey, from = 'source', to = 'target', weight = 'value'))
# echarts4r solution
sankey %>%
e_charts() %>%
e_sankey(source, target, value, focusNodeAdjacency = 'allEdges')
# networkD3 solution
nodes <- data.frame(name = c("a", "a1", "b", "c", "d", "e"))
links <- data.frame(
source = c(0, 0, 2, 3, 4, 3),
target = c(2, 1, 3, 4, 5, 5),
value = ceiling(rnorm(6, 10, 1))
)
sankeyNetwork(Links = links,
Nodes = nodes,
Source = "source",
Target = "target", Value = "value", NodeID = "name",
fontSize = 12, nodeWidth = 30, sinksRight = TRUE)
Using networkd3, change sinksRight = TRUE to sinksRight = FALSE
library('networkD3')
sankey <- data.frame(
source = c("a", "a", "b", "c", "d", "c"),
target = c("b", "a1", "c", "d", "e", "e"),
value = ceiling(rnorm(6, 10, 1)),
stringsAsFactors = FALSE
)
sankeyNetwork(Links = links,
Nodes = nodes,
Source = "source",
Target = "target", Value = "value", NodeID = "name",
fontSize = 12, nodeWidth = 30, sinksRight = FALSE)

Plotting based on occurrence in group

I would to make a bar chart that plots the bar as a proportion of the total group rather than the usual percentage. For a var to "count" it only needs to occur once in a group. For example in this df where id is the grouping variable
df <-
tibble(id = c(rep(1, 3), rep(2, 3), rep(3, 3)),
vars = c("a", NA, "b", "c", "d", "e", "a", "a", "a"))
The a bars would be:
a = 2/3 # since a occurs in 2 out of 3 groups
b = 1/3
c = 1/3
d = 1/3
e = 1/3
If I understand you correctly, a one-liner would suffice:
ggplot(distinct(df)) + geom_bar(aes(vars, stat(count) / n_distinct(df$id)))
Working answer:
tibble(id = c(rep(1, 3), rep(2, 3), rep(3, 3)),
vars = c("a", "a", "b", "c", "d", "e", "a", "a", "a")) %>%
group_by(id) %>%
distinct(vars) %>%
ungroup() %>%
add_count(vars) %>%
mutate(prop = n / n_distinct(id)) %>%
distinct(vars, .keep_all = T) %>%
ggplot(aes(vars, prop)) +
geom_col()

Plot observation number (label) in outlier points

I have this boxplot with outliers, i need to plot the number of the line that contain the outlier observation, to make it easy to go in the data set and find where the value, somebody can help me?
set.seed(1)
a <- runif(10,1,100)
b <-c("A","A","A","A","A","B","B","B","B","B")
t <- cbind(a,b)
bp <- boxplot(a~b)
text(x = 1, y = bp$stats[,1] + 2, labels = round(bp$stats[,1], 2))
text(x = 2, y = bp$stats[,2] + 2, labels = round(bp$stats[,2], 2))
What is the point of t <- cbind(a, b)? That makes a character matrix and converts your numbers to character strings? You don't use it anyway. If you want a single data structure use data.frame(a, b) which will make a a factor and leave b numeric. I do not get the plot you do with set.seed(1) so I'll provide slightly different data. Note the use of the pos= and offset= arguments in text(). Be sure to read the manual page to see what they are doing:
a <- c(99.19, 59.48, 48.95, 18.17, 75.73, 45.94, 51.61, 21.55, 37.41,
59.98, 57.91, 35.54, 4.52, 64.64, 75.03, 60.21, 56.53, 53.08,
98.52, 51.26)
b <- c("A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B")
bp <- boxplot(a~b)
text(x = 1, y = bp$stats[,1], labels = round(bp$stats[, 1], 2),
pos=c(1, 3, 3, 1, 3), offset=.2)
text(x = 2, y = bp$stats[, 2], labels = round(bp$stats[, 2], 2),
pos=c(1, 3, 3, 1, 3), offset=.2)
obs <- which(a %in% bp$out)
text(bp$group, bp$out, obs, pos=4)

Single error bar for stacked graph equalling 100

I have a stacked bar graph that shows the differences in classes between skeleton and tissue. The total of the two will always be 100 and their standard errors are the same. As such, the top error bar is superfluous and adds confusion.
Is there a way to only have the standard error for the bottom group? This link shows how to get a single bar for the top of the stack but isn't quite what I need: Single error bar on stacked bar plot ggplot Thanks.
Code:
library(reshape2)
library(Rmisc)
library(ggplot2)
melt <- melt(file, id=c("TREATMENT", "Species"),
value.name="Amount", variable.name = "Class")
x1 <- summarySE(melt, measurevar = "Amount",
groupvars = c("Species", "TREATMENT", "Class"), na.rm=TRUE)
x2 <- within(x1,lit2 <- ave(Amount, Class, Species, FUN = cumsum))
p10 <- ggplot(x2, aes(y = Amount, x = Class, fill = TREATMENT)) +
geom_bar(stat = "identity", colour = "black") +
geom_errorbar(aes(ymin = lit2-se, ymax = lit2+se), size = .5, width = .25)
p10
Data:
structure(list(TREATMENT = c("SKELETON", "SKELETON", "SKELETON",
"SKELETON", "TISSUE", "TISSUE", "TISSUE", "TISSUE"), Species = c("A",
"A", "A", "A", "A", "A", "A", "A"), `1` = c(42.1958615095789,
73.6083881998577, 62.1025409404354, 21.5264243794993, 57.8041384904211,
26.3916118001423, 37.8974590595646, 78.4735756205007), `2` = c(46.9398719372755,
89.6865089817669, 55.9907366318623, 18.1145895471236, 53.0601280627245,
10.3134910182331, 44.0092633681377, 81.8854104528764), `3` = c(55.4637732254405,
75.0933095632366, 20, 18.402199079204, 44.5362267745594, 24.9066904367634,
80, 81.597800920796)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -8L), .Names = c("TREATMENT", "Species",
"1", "2", "3"))

Resources