Group columns and fit in one plot - r

I have the following data-frame:
structure(list(inst = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A8",
"b7", "X1"), class = "factor"), steps = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("5",
"10", "20"), class = "factor"), family = structure(c(6L,
1L, 4L, 5L, 7L, 2L, 3L, 6L, 7L, 5L, 4L, 1L, 2L, 3L, 3L, 6L, 4L,
1L, 2L, 7L, 5L, 6L, 5L, 4L, 1L, 7L, 2L, 3L, 6L, 5L, 4L, 7L, 1L,
2L, 3L, 6L, 1L, 3L, 5L, 4L, 2L, 7L, 6L, 4L, 1L, 5L, 7L, 2L, 3L,
6L, 4L, 1L, 5L, 7L, 2L, 3L, 6L, 3L, 5L, 4L, 7L, 1L, 2L), .Label = c("Bay",
"Grad", "none", "Upp", "base", "new", "tuna"
), class = "factor"), mean_error = c(5.930259, 6.0611, 6.241703,
6.270109, 6.277435, 6.587473, 6.865757, 5.936106, 6.084044, 6.140153,
6.142072, 6.146425, 6.364658, 6.621481, 6.759502, 7.02175, 7.16422,
7.19518, 7.36932, 7.395606, 7.44191, 5.113961, 5.123312, 5.289946,
5.292267, 5.455671, 5.768393, 5.840368, 5.140513, 5.346728, 5.371491,
5.463127, 5.475944, 5.602034, 5.995647, 5.784786, 6.00454, 6.121524,
6.22509, 6.24901, 6.37396, 6.41903, 4.0439, 4.223119, 4.260518,
4.31062, 4.500065, 4.822419, 5.107085, 4.221596, 4.371242, 4.505292,
4.524415, 4.681877, 4.703846, 5.14499, 4.944005, 5.007325, 5.0561975,
5.1926225, 5.3353825, 5.34204, 5.63557)), row.names = c(64L,
3L, 38L, 55L, 73L, 12L, 21L, 67L, 76L, 58L, 41L, 6L, 15L, 24L,
27L, 70L, 44L, 9L, 18L, 79L, 61L, 63L, 54L, 37L, 2L, 72L, 11L,
20L, 66L, 57L, 40L, 75L, 5L, 14L, 23L, 69L, 8L, 26L, 60L, 43L,
17L, 78L, 62L, 36L, 1L, 53L, 71L, 10L, 19L, 65L, 39L, 4L, 56L,
74L, 13L, 22L, 68L, 25L, 59L, 42L, 77L, 7L, 16L), class = "data.frame")
I am trying to create groups of three steps per inst in the x-axis and fit everything in one plot. The outcome should resemble this
So far I tried:
df_bri %>% select(steps, inst, family, mean_error) %>%
ggplot(aes(x = steps, y = mean_error, fill = mean_error)) +
geom_boxplot()
and I get this:
I don't know how to separate the groups into 3 steps per inst.

fill=steps is what you need:
ggplot(df, aes(x = inst, y = mean_error, fill = steps)) +
geom_boxplot()

Related

Overlay of forest plot from ZINB model

I want an overlay of a forest plot from the ZINB models of full and the subset of data using the sjPlot package. As you may know, the ZINB model produces two models: one for the count model and one for the zero-inflated model. plot_model works fine when employing the ZINB model from either full or a subset of data meaning producing a plot for both models (count and zero models), but when I overlay using plot_models then only one plot is produced for the count model. I am looking for the count and zero-inflated model plots from the full and sub-model for both the full and the subset of data. any help would be much appreciated
library(sjPlot)
library(sjlabelled)
library(sjmisc)
library(ggplot2)
library(MASS)
library(pscl)
library(boot)
zinb_all_uni <- zeroinfl(ivdays~age,
link="logit",
dist = "negbin",
data=caterpillor)
summary(zinb_all_uni)
plot_model(zinb_all_uni, type="est")
zinb_full_adj <- zeroinfl(ivdays~age+sex+edu,
link="logit",
dist = "negbin",
data=caterpillor)
summary(zinb_full_adj)
plot_model(zinb_full_adj, type="est", terms = c("count_ageb", "count_agec", "zero_ageb", "zero_agec"))
############ second model#######
Zinb_uni_sub <- zeroinfl(ivdays~age,
link="logit",
dist = "negbin",
data=subset(caterpillor, country=="eng"))
summary(zinb_uni_sub)
plot_model(zinb_uni_sub, type="est")
zinb_adj_sub <- zeroinfl(ivdays~age+sex+edu,
link="logit",
dist = "negbin",
data=subset(caterpillor, country=="eng"))
summary(zinb_adj_sub)
plot_model(zinb_adj_sub, type="est", terms = c("count_ageb", "count_agec", "zero_ageb", "zero_agec"))
### overlying plots from both models
plot_models(zinb_all_uni, Zinb_uni_sub)
plot_models(zinb_full_adj, zinb_adj_sub)
DATA:
caterpillor=structure(list(id = 1:100,
age = structure(c(1L, 1L, 2L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L,
2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L,
3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L),
.Label = c("a", "b", "c"), class = "factor"),
sex = structure(c(2L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 1L),
.Label = c("F", "M"), class = "factor"),
country = structure(c(1L,
1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L,
2L, 2L, 2L),
.Label = c("eng", "scot", "wale"), class = "factor"),
edu = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L),
.Label = c("x", "y", "z"), class = "factor"),
lungfunction = c(45L,
23L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 45L, 23L, 25L, 45L,
70L, 69L, 90L, 50L, 62L, 45L, 23L, 25L, 45L, 70L, 69L, 90L,
50L, 62L, 45L, 23L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 45L,
23L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 45L, 23L, 25L, 45L,
70L, 69L, 90L, 50L, 62L, 45L, 23L, 25L, 45L, 70L, 69L, 90L,
50L, 62L, 45L, 23L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 45L,
23L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 25L, 45L, 70L, 69L,
90L, 50L, 62L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 25L, 45L,
70L, 69L, 90L),
ivdays = c(15L, 26L, 36L, 34L, 2L, 4L, 5L,
8L, 9L, 15L, 26L, 36L, 34L, 2L, 4L, 5L, 8L, 9L, 15L, 26L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 5L, 8L, 9L, 36L, 34L, 2L, 4L, 5L, 8L,
9L, 36L, 34L, 2L, 4L, 5L),
no2_quintile = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L),
.Label = c("q1", "q2",
"q3", "q4", "q5"), class = "factor")),
class = "data.frame", row.names = c(NA,
-100L))
but when i overlay plots i get only one plot
Code below, basic points:
when I run into trouble with automated machinery like plot_model I usually prefer to use machinery like broom::tidy() (for coefficients) or the ggeffects or emmeans packages (for predictions) and build my own ggplot — for me, it's easier than trying to figure out what the more automated tool is doing
broom doesn't have a tidy() method for zeroinfl models, but a little googling finds one in the poissonreg package ...
... however, that tidy() method doesn't have machinery for constructing confidence intervals or back-transforming coefficients to a count-ratio or odds-ratio scale, so I had to implement my own below ...
library(broom)
library(poissonreg)
library(tidyverse) ## purrr::map_dfr, ggplot ...
theme_set(theme_bw())
library(colorspace)
mod_list <- list(all_uni = zinb_all_uni, uni_sub = Zinb_uni_sub,
full_adj = zinb_full_adj, adj_sub = zinb_adj_sub)
tidy(zinb_all_uni, type = "all")
coefs <- (mod_list
|> map_dfr(tidy, type = "all",
.id = "model")
## construct CIs
|> mutate(conf.low = qnorm(0.025, estimate, std.error),
conf.high = qnorm(0.975, estimate, std.error))
|> filter(term != "(Intercept)") ## usually don't want this
## cosmetic (strip results down to the components we actually need)
|> select(model, term, type, estimate, conf.low, conf.high)
## back-transform
|> mutate(across(c(estimate, conf.low, conf.high), exp))
)
ggplot(coefs, aes(x = estimate, y = term, colour = model)) +
geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
position = position_dodge(width = 0.5)) +
## separate count-ratio and odds-ratio (conditional/zero) plots
facet_wrap(~type, scale = "free") +
scale_color_discrete_qualitative() ## cosmetic
If you only want to see the age-related coefficients you can add
|> filter(stringr::str_detect(term, "^age"))
to the end of the pipeline that defines coefs.

ggalluvial assign different color for each node

I was following this post, but I do not get how can I manage it with my data.
My plot looks like:
And I would like that the "strings" were the same color as the 2nd column, i.e. for ESR1 I would like the orange string, and for PIK3CA green.
Any idea about how can I manage with scale_fill_manual or any other argument?
Thanks!
My code:
colorfill <- c("white", "white", "darkgreen", "orange", "white", "white", "white", "white", "white", "white", "white", "white", "white", "white", "white", "white", "white")
ggplot(data = Allu,
aes(axis1 = Gene_mut, axis2 = Metastasis_Location, y = Freq)) +
geom_alluvium(aes(fill = Gene_mut),
curve_type = "quintic") +
geom_stratum(width = 1/4, fill = colorfill) +
geom_text(stat = "stratum", size = 3,
aes(label = after_stat(stratum))) +
scale_x_discrete(limits = c("Metastasis_Location", "Gene_mut"),
expand = c(0.05, .05)) +
theme_void()
My data:
structure(list(Metastasis_Location = structure(c(1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L,
11L, 11L), .Label = c("adrenal", "bone", "breast", "liver", "lung",
"muscle", "node", "pancreatic", "peritoneum", "pleural", "skin"
), class = "factor"), T0_T2_THERAPY_COD = structure(c(2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("A",
"F"), class = "factor"), T0_T2_PD_event = structure(c(2L, 2L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("No Progression",
"Progression"), class = "factor"), Gene_mut = structure(c(4L,
5L, 1L, 3L, 4L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 3L, 6L, 6L, 6L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L,
5L, 6L, 2L, 3L, 4L, 4L, 3L, 3L, 3L, 4L, 5L, 6L, 3L, 6L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 3L, 4L, 4L, 5L, 6L,
1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 3L, 4L, 3L, 4L, 5L, 6L, 3L, 3L, 4L, 5L, 6L, 6L, 6L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 3L, 4L, 3L, 4L, 5L,
6L, 3L, 4L, 5L, 6L, 3L, 4L, 5L, 6L, 1L, 6L, 3L, 3L, 4L, 4L, 5L
), .Label = c("AKT1", "ERBB2", "ESR1", "PIK3CA", "TP53", "WT"
), class = "factor"), LABO_ID = structure(c(45L, 8L, 13L, 11L,
11L, 26L, 7L, 15L, 23L, 26L, 35L, 39L, 7L, 19L, 26L, 32L, 33L,
35L, 39L, 15L, 19L, 35L, 1L, 37L, 34L, 43L, 47L, 3L, 10L, 18L,
20L, 28L, 31L, 36L, 42L, 9L, 10L, 14L, 18L, 20L, 28L, 31L, 36L,
44L, 45L, 8L, 10L, 18L, 28L, 42L, 2L, 7L, 39L, 7L, 39L, 3L, 4L,
42L, 5L, 42L, 6L, 21L, 1L, 10L, 22L, 28L, 46L, 9L, 10L, 14L,
28L, 46L, 10L, 28L, 48L, 25L, 23L, 32L, 33L, 40L, 43L, 24L, 3L,
18L, 24L, 28L, 31L, 36L, 42L, 18L, 27L, 28L, 31L, 36L, 45L, 18L,
24L, 27L, 28L, 42L, 16L, 16L, 18L, 18L, 18L, 29L, 23L, 39L, 39L,
40L, 1L, 12L, 47L, 3L, 18L, 20L, 28L, 31L, 36L, 38L, 42L, 5L,
18L, 20L, 27L, 28L, 31L, 36L, 38L, 41L, 45L, 8L, 18L, 27L, 28L,
42L, 48L, 6L, 17L, 30L, 31L, 31L, 18L, 18L, 18L, 29L, 39L, 39L,
40L, 43L, 31L, 31L, 48L, 30L, 13L, 34L, 18L, 36L, 18L, 36L, 18L
), .Label = c("ER-11", "ER-19", "ER-21", "ER-22", "ER-29", "ER-30",
"ER-31", "ER-32", "ER-33", "ER-38", "ER-40", "ER-43", "ER-49",
"ER-8", "ER-AZ-04", "ER-AZ-05", "ER-AZ-06", "ER-AZ-07", "ER-AZ-08",
"ER-AZ-10", "ER-AZ-11", "ER-AZ-11=ER-47", "ER-AZ-13", "ER-AZ-14",
"ER-AZ-15", "ER-AZ-16", "ER-AZ-17", "ER-AZ-18", "ER-AZ-20", "ER-AZ-20=ER-27",
"ER-AZ-21", "ER-AZ-23", "ER-AZ-23=ER-52", "ER-AZ-24", "ER-AZ-29",
"ER-AZ-31", "ER-AZ-33", "ER-AZ-35", "ER-AZ-37", "ER-AZ-38", "ER-AZ-39",
"ER-AZ-40", "ER-AZ-43", "ER-AZ-44", "ER-AZ-45", "ER-AZ-49", "ER-AZ-51",
"ER-AZ-53"), class = "factor"), Freq = c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -161L), groups = structure(list(
Metastasis_Location = structure(c(1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L), .Label = c("adrenal",
"bone", "breast", "liver", "lung", "muscle", "node", "pancreatic",
"peritoneum", "pleural", "skin"), class = "factor"), T0_T2_THERAPY_COD = structure(c(2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L), .Label = c("A",
"F"), class = "factor"), T0_T2_PD_event = structure(c(2L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L), .Label = c("No Progression",
"Progression"), class = "factor"), Gene_mut = structure(c(4L,
5L, 1L, 3L, 4L, 1L, 2L, 3L, 4L, 5L, 6L, 3L, 6L, 3L, 4L, 5L,
6L, 2L, 3L, 4L, 3L, 4L, 5L, 6L, 3L, 6L, 3L, 4L, 5L, 6L, 3L,
4L, 5L, 6L, 1L, 3L, 4L, 5L, 3L, 4L, 3L, 4L, 5L, 6L, 3L, 4L,
5L, 6L, 6L, 3L, 4L, 5L, 6L, 3L, 4L, 3L, 4L, 5L, 6L, 3L, 4L,
5L, 6L, 3L, 4L, 5L, 6L, 1L, 6L, 3L, 4L, 5L), .Label = c("AKT1",
"ERBB2", "ESR1", "PIK3CA", "TP53", "WT"), class = "factor"),
.rows = structure(list(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8:12,
13:19, 20:22, 23L, 24L, 25:27, 28:35, 36:45, 46:50, 51L,
52L, 53L, 54:55, 56:58, 59L, 60L, 61L, 62L, 63L, 64:67,
68:72, 73:75, 76L, 77L, 78:79, 80L, 81L, 82L, 83:89,
90:95, 96:100, 101L, 102L, 103L, 104L, 105L, 106L, 107:108,
109L, 110L, 111:112, 113L, 114:121, 122:131, 132:137,
138:140, 141L, 142L, 143L, 144L, 145L, 146L, 147L, 148L,
149L, 150L, 151L, 152L, 153L, 154L, 155L, 156L, 157:158,
159:160, 161L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -72L), .drop = TRUE))
You're right to think of scale_fill_manual(). I think this is the more programmable alternative to passing a vector like colorfill to an aesthetic outside aes(). The following plot uses your data and color vector to control how the fill aesthetic is coded throughout the plot, and notice that fill is passed the same variable, Gene_mut, in both layers (alluvium and stratum):
ggplot(data = Allu,
aes(axis1 = Gene_mut, axis2 = Metastasis_Location, y = Freq)) +
geom_alluvium(aes(fill = Gene_mut),
curve_type = "quintic") +
geom_stratum(aes(fill = Gene_mut), width = 1/4) +
scale_fill_manual(values = colorfill) +
geom_text(stat = "stratum", size = 3,
aes(label = after_stat(stratum))) +
scale_x_discrete(limits = c("Metastasis_Location", "Gene_mut"),
expand = c(0.05, .05)) +
theme_void()
Since Metastasis_Location takes different values than Gene_mut, fill treats those strata as having missing values, which by default are colored grey. You can change that behavior by passing a color string to the na.value parameter of scale_fill_manual().

Assigning 4 different players to a station by round with no repeats

I am building an r-script that uses all the possible combinations of 4 different variables: Rounds, Stations, and Players (Male and Female). Below is my dput version of the dataset:
structure(list(x = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("round1", "round2", "round3"
), class = "factor"), x.x = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("station1", "station2",
"station3"), class = "factor"), x.y = structure(c(1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
6L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L), .Label = c("male1",
"male2", "male3", "male4", "male5", "male6"), class = "factor"),
y = structure(c(2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L), .Label = c("female1", "female2", "female3",
"female4", "female5", "female6"), class = "factor")), .Names = c("x",
"x.x", "x.y", "y"), row.names = c(55L, 109L, 163L, 217L, 271L,
10L, 118L, 172L, 226L, 280L, 19L, 73L, 181L, 235L, 289L, 28L,
82L, 136L, 244L, 298L, 37L, 91L, 145L, 199L, 307L, 46L, 100L,
154L, 208L, 262L, 58L, 112L, 166L, 220L, 274L, 13L, 121L, 175L,
229L, 283L, 22L, 76L, 184L, 238L, 292L, 31L, 85L, 139L, 247L,
301L, 40L, 94L, 148L, 202L, 310L, 49L, 103L, 157L, 211L, 265L,
61L, 115L, 169L, 223L, 277L, 16L, 124L, 178L, 232L, 286L, 25L,
79L, 187L, 241L, 295L, 34L, 88L, 142L, 250L, 304L, 43L, 97L,
151L, 205L, 313L, 52L, 106L, 160L, 214L, 268L, 56L, 110L, 164L,
218L, 272L, 11L, 119L, 173L, 227L, 281L, 20L, 74L, 182L, 236L,
290L, 29L, 83L, 137L, 245L, 299L, 38L, 92L, 146L, 200L, 308L,
47L, 101L, 155L, 209L, 263L, 59L, 113L, 167L, 221L, 275L, 14L,
122L, 176L, 230L, 284L, 23L, 77L, 185L, 239L, 293L, 32L, 86L,
140L, 248L, 302L, 41L, 95L, 149L, 203L, 311L, 50L, 104L, 158L,
212L, 266L, 62L, 116L, 170L, 224L, 278L, 17L, 125L, 179L, 233L,
287L, 26L, 80L, 188L, 242L, 296L, 35L, 89L, 143L, 251L, 305L,
44L, 98L, 152L, 206L, 314L, 53L, 107L, 161L, 215L, 269L, 57L,
111L, 165L, 219L, 273L, 12L, 120L, 174L, 228L, 282L, 21L, 75L,
183L, 237L, 291L, 30L, 84L, 138L, 246L, 300L, 39L, 93L, 147L,
201L, 309L, 48L, 102L, 156L, 210L, 264L, 60L, 114L, 168L, 222L,
276L, 15L, 123L, 177L, 231L, 285L, 24L, 78L, 186L, 240L, 294L,
33L, 87L, 141L, 249L, 303L, 42L, 96L, 150L, 204L, 312L, 51L,
105L, 159L, 213L, 267L, 63L, 117L, 171L, 225L, 279L, 18L, 126L,
180L, 234L, 288L, 27L, 81L, 189L, 243L, 297L, 36L, 90L, 144L,
252L, 306L, 45L, 99L, 153L, 207L, 315L, 54L, 108L, 162L, 216L,
270L), class = "data.frame")`
Below are the first 10 rows:
Round Station Partner1 Partner2
55 round1 station1 male1 female2
109 round1 station1 male1 female3
163 round1 station1 male1 female4
217 round1 station1 male1 female5
271 round1 station1 male1 female6
10 round1 station1 male2 female1
118 round1 station1 male2 female3
172 round1 station1 male2 female4
226 round1 station1 male2 female5
280 round1 station1 male2 female6
In this dataset there are the following:
3 Rounds, 3 Stations, 6 Male, 6 Female
How do I do the following:
1) No person should be in the same station twice
2) No person should be in the same round twice
3) No person should be partnered with the same person twice
4) Each station requires 2 male and 2 female
5) Ideally, each person should be with people in a round that have not been with them in previous rounds.
6) A player with the same number at the end (male1, female1) should never be partnered
So the final output should look something like this for one of the rounds:
Round Station Partner1 Partner2
55 round1 station1 male1 female5
109 round1 station1 male3 female4
163 round1 station2 male2 female3
217 round1 station2 male4 female1
271 round1 station3 male5 female6
10 round1 station3 male6 female2
For round2, all 3 stations should still be present but the male and female partners need to change and should follow the rules described above.
Also, let me know if this is mathematically impossible without adding additional stations.
Any help would be great!
Here's a simple solution. I ignore (5) because it does not seem to be a strict requirement (and I don't think it is possible as a strict rule).
This function takes a valid first round as input, and outputs a good configuration for the second round (and the third, if applied again).
I modified your data to have numbers which are much easier to manipulate than strings. Strictly speaking, only the Partner columns (renamed to Male and Female) need to be numbers for the function to work.
# data
r1 = read.table(text = " Round Station Partner1 Partner2
55 round1 station1 male1 female5
109 round1 station1 male3 female4
163 round1 station2 male2 female3
217 round1 station2 male4 female1
271 round1 station3 male5 female6
10 round1 station3 male6 female2", header = T)
# numericize
r1$Round = as.numeric(substr(r1$Round, 6, 6))
r1$Station = as.numeric(substr(r1$Station, start = 8, stop = 8))
r1$Male = as.numeric(substr(r1$Partner1, 5, 5))
r1$Female = as.numeric(substr(r1$Partner2, 7, 7))
r1[c("Partner1", "Partner2")] = list(NULL)
# function
next_round = function(r) {
r$Male = r$Male[c(3, 4, 5, 6, 1, 2)]
r$Female = r$Female[c(5, 6, 1, 2, 3, 4)]
problems = which(r$Male == r$Female)
# switch male problems
switch_with = ifelse(problems %% 2 == 0, problems - 1, problems + 1)
r$Male[c(rbind(switch_with, problems))] = r$Male[c(rbind(problems, switch_with))]
# increment round
r$Round = r$Round + 1
return(r)
}
# demonstration
r1
# Round Station Male Female
# 55 1 1 1 5
# 109 1 1 3 4
# 163 1 2 4 3
# 217 1 2 2 1
# 271 1 3 5 6
# 10 1 3 6 2
(r2 = next_round(r1))
# Round Station Male Female
# 55 2 1 2 6
# 109 2 1 4 2
# 163 2 2 6 5
# 217 2 2 5 4
# 271 2 3 1 3
# 10 2 3 3 1
(r3 = next_round(r2))
# Round Station Male Female
# 55 3 1 6 3
# 109 3 1 5 1
# 163 3 2 1 6
# 217 3 2 3 2
# 271 3 3 4 5
# 10 3 3 2 4
Essentially what this does is that the males and females stay with the same-sex partners from their starting stations. Males move down one station number each round, females move up one station number each round, and if there is ever a Male = Female collision the males at that station switch to correct it. I was inspired by square dancing, where males and females moving in different directions to rotate partners is common.

Negative Binomial in R: glm.nb: In sqrt(1/i) : NaNs produced, and other problems

I am running a negative binomial regression.
I would like to know why I have the following errors:
In sqrt(1/i) : NaNs produced
It appears that there are some negative values in "i", but how do I avoid that?
Another one is:
In loglik(n, th, mu, Y, w) : value out of range in 'lgamma'
It is probably a consequences of the first error, so if I fix the first one, the second might be gone. Or maybe not.
In some other cases I am able to calculate the regression but the following output seems strange for me:
(Dispersion parameter for Negative Binomial(10684331573) family taken
to be 1)
Null deviance: 8779.49 on 359 degrees of freedom
Residual deviance: 270.32 on 200 degrees of freedom
AIC: 2074.7
Number of Fisher Scoring iterations: 1
Theta: 10684331573
Std. Err.: 615849693813
2 x log-likelihood: -1752.749
Do these numbers seem okay? I mean the dispersion parameter, theta and standard error. They look enormously big to me and therefore I am not sure if the results are okay.
I never had any problems like that using poisson regression, but then I realized that I have an overdispersed data and that is why I am using negative binomial. However, I am having a lot of troubles with this one.
Here is the code:
negbin <- glm.nb(Freq ~ cluster*gender*agecombined*educ, maxit=100)
mod.good <- step(negbin, direction='both', maxit=100)
And here is the dput of the whole dataset:
structure(list(gender = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("1",
"2"), class = "factor"), agecombined = structure(c(1L, 1L, 2L, 2L,
3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L,
5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L,
2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L,
4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L,
1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L,
3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L,
6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L,
2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L,
5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L,
1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L,
4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L,
6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L,
3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L,
5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L,
2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L,
4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L,
1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L,
3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L,
6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L,
2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L,
5L, 5L, 6L, 6L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L),
.Label = c("18-24", "25-34", "35-44", "45-54", "55-64", "65 and
older"), class = "factor"), educ = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label =
c("2-year college", "BA", "Illiterate", "MA or higher", "Primary",
"Secondary"), class = "factor"),
cluster = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L), .Label = c("E", "A", "B", "C", "D"
), class = "factor"), Freq = c(27L, 18L, 48L, 29L, 18L, 19L,
14L, 10L, 2L, 1L, 2L, 0L, 48L, 36L, 69L, 54L, 33L, 15L, 12L,
4L, 5L, 1L, 0L, 0L, 2L, 4L, 12L, 14L, 17L, 17L, 23L, 32L,
16L, 17L, 18L, 6L, 4L, 2L, 17L, 7L, 8L, 4L, 5L, 0L, 1L, 0L,
0L, 0L, 53L, 42L, 82L, 58L, 81L, 60L, 42L, 35L, 16L, 14L,
22L, 6L, 83L, 40L, 62L, 54L, 43L, 46L, 26L, 12L, 15L, 3L,
3L, 3L, 11L, 13L, 11L, 23L, 16L, 18L, 11L, 5L, 1L, 3L, 1L,
1L, 26L, 44L, 34L, 54L, 25L, 41L, 19L, 17L, 10L, 3L, 3L,
0L, 4L, 4L, 7L, 14L, 22L, 31L, 14L, 34L, 14L, 33L, 14L, 20L,
7L, 11L, 22L, 11L, 14L, 8L, 8L, 1L, 2L, 0L, 1L, 2L, 29L,
65L, 34L, 84L, 36L, 65L, 28L, 39L, 16L, 15L, 16L, 9L, 25L,
51L, 12L, 38L, 23L, 29L, 22L, 19L, 7L, 5L, 5L, 1L, 7L, 16L,
14L, 35L, 6L, 27L, 8L, 5L, 1L, 1L, 1L, 0L, 24L, 57L, 29L,
53L, 24L, 28L, 11L, 9L, 7L, 2L, 0L, 0L, 3L, 7L, 1L, 8L, 2L,
18L, 5L, 13L, 10L, 11L, 5L, 10L, 3L, 1L, 5L, 13L, 4L, 2L,
2L, 1L, 1L, 0L, 0L, 0L, 14L, 51L, 21L, 77L, 23L, 50L, 25L,
31L, 17L, 16L, 13L, 13L, 19L, 52L, 24L, 59L, 18L, 44L, 9L,
20L, 6L, 3L, 7L, 2L, 14L, 28L, 34L, 47L, 29L, 47L, 15L, 13L,
9L, 3L, 2L, 0L, 46L, 75L, 124L, 81L, 67L, 45L, 33L, 15L,
9L, 4L, 5L, 3L, 0L, 10L, 6L, 19L, 12L, 28L, 22L, 37L, 31L,
41L, 26L, 31L, 7L, 6L, 21L, 13L, 6L, 7L, 8L, 2L, 2L, 1L,
0L, 0L, 67L, 89L, 116L, 159L, 99L, 102L, 64L, 80L, 42L, 25L,
25L, 8L, 108L, 123L, 60L, 97L, 68L, 66L, 44L, 35L, 12L, 5L,
9L, 2L, 7L, 3L, 53L, 15L, 33L, 3L, 8L, 3L, 4L, 0L, 0L, 0L,
48L, 19L, 76L, 40L, 55L, 11L, 16L, 1L, 4L, 1L, 2L, 0L, 6L,
7L, 21L, 22L, 18L, 23L, 32L, 37L, 40L, 13L, 23L, 10L, 4L,
2L, 19L, 2L, 8L, 3L, 6L, 0L, 1L, 0L, 1L, 0L, 68L, 37L, 90L,
42L, 76L, 38L, 47L, 16L, 29L, 5L, 18L, 2L, 82L, 32L, 62L,
27L, 44L, 22L, 20L, 8L, 8L, 2L, 1L, 0L)), .Names = c("gender", "agecombined", "educ", "cluster", "Freq"), row.names = c(NA,
-360L), class = "data.frame")

reshape/remould data frame to create normalized bar chart and pie chart

I have the following data_frame structure which has been read from a csv file (appended). Basically, this summarises for each Operator (A M D L J) whether their score is Excellent, Good, Ok, Poor or Terrible. The other fields date and scorer ( I plan to use later but are not required at the moment).
What I am struggling with is how to reduce this data to a format that allows me to plot a bar chart (normalized by dividing total counts for each operator) and a bar chart. How do I reduce this data frame to something like the following which will allow me to greate geom_bar.
Operator Score Count
A Good 11
A Poor 5
A Ok 3
A Terrible 0
A Excellent 0
D Good 36
D Poor 50
D Ok 10
D Terrible 1
D Excellent 0
I know I can subset the initial data frame according to operator and then get the numbers from summary
dfA = subset(df, Operator=='A')
summary(dfA)
but I would like to automate this process (i.e automatically remould the data frame into the above structure from which I can use ggplot2 to visualise the results). However, I have no idea where to start with this problem
structure(list(Operator = structure(c(5L, 5L, 5L, 5L, 5L, 5L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L,
3L, 5L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 5L, 2L, 2L, 2L,
2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 1L, 1L, 5L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 5L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 4L, 4L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 2L,
2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L,
3L, 3L, 1L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 2L, 4L, 4L, 4L, 4L,
3L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 2L,
2L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L,
3L, 5L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L,
3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 4L, 4L, 4L,
4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 5L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L), .Label = c("A", "D", "J", "L", "M"), class = "factor"),
ROI_Score = structure(c(3L, 1L, 1L, 2L, 1L, 3L, 1L, 3L, 3L,
2L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 3L,
3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 3L, 3L, 1L, 3L, 3L, 3L,
1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 1L, 1L, 3L, 3L,
3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L,
3L, 1L, 1L, 1L, 3L, 1L, 3L, 2L, 3L, 3L, 2L, 1L, 1L, 3L, 3L,
1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 3L,
1L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,
3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L,
1L, 3L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L,
1L, 3L, 1L, 3L, 1L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 1L, 2L,
1L, 3L, 2L, 3L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 3L,
3L, 1L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L,
3L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L,
1L, 1L, 1L, 1L, 2L, 3L, 1L, 1L, 3L, 3L, 1L, 3L, 1L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 1L, 1L, 3L, 1L, 3L, 2L, 3L, 3L, 2L,
1L, 1L, 3L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 3L,
1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 1L, 3L, 1L, 3L, 3L, 1L, 1L, 1L, 3L, 1L, 2L, 3L, 1L,
3L, 3L, 2L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 3L,
3L, 2L, 3L, 2L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L,
4L, 3L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 1L, 3L, 1L, 1L, 2L,
3L, 1L, 1L, 1L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L,
1L, 1L, 2L, 3L, 3L, 3L, 1L, 3L, 3L, 2L, 1L, 3L, 3L, 3L, 1L,
2L, 3L, 3L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 3L, 1L, 3L, 3L, 2L,
2L, 3L, 1L, 3L, 1L, 3L, 2L, 1L, 1L, 3L, 3L, 1L, 3L, 3L, 2L,
3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L), .Label = c("Good",
"OK", "Poor", "Terrible"), class = "factor"), Date = structure(c(3L,
3L, 5L, 5L, 5L, 7L, 3L, 3L, 9L, 9L, 9L, 11L, 11L, 3L, 3L,
5L, 5L, 5L, 7L, 7L, 7L, 11L, 11L, 11L, 3L, 15L, 15L, 21L,
13L, 17L, 17L, 19L, 21L, 13L, 13L, 13L, 15L, 15L, 17L, 17L,
17L, 19L, 19L, 19L, 21L, 21L, 30L, 30L, 23L, 25L, 25L, 25L,
27L, 27L, 27L, 29L, 29L, 29L, 23L, 23L, 25L, 25L, 25L, 27L,
27L, 27L, 30L, 30L, 30L, 30L, 30L, 32L, 32L, 36L, 2L, 36L,
36L, 36L, 39L, 39L, 34L, 34L, 34L, 36L, 36L, 36L, 39L, 39L,
2L, 2L, 32L, 34L, 34L, 36L, 41L, 41L, 41L, 43L, 1L, 38L,
38L, 41L, 42L, 43L, 38L, 38L, 41L, 41L, 41L, 42L, 42L, 42L,
43L, 43L, 1L, 1L, 1L, 38L, 42L, 42L, 42L, 42L, 1L, 1L, 1L,
3L, 3L, 7L, 3L, 3L, 3L, 5L, 7L, 11L, 3L, 3L, 3L, 3L, 5L,
5L, 5L, 7L, 7L, 7L, 9L, 9L, 11L, 11L, 11L, 13L, 15L, 17L,
19L, 19L, 21L, 21L, 13L, 21L, 13L, 13L, 13L, 15L, 17L, 17L,
17L, 19L, 19L, 21L, 21L, 21L, 29L, 29L, 29L, 30L, 23L, 25L,
29L, 29L, 23L, 23L, 23L, 25L, 25L, 25L, 27L, 27L, 30L, 30L,
30L, 32L, 32L, 32L, 2L, 2L, 39L, 39L, 32L, 32L, 32L, 34L,
34L, 34L, 36L, 36L, 2L, 2L, 2L, 43L, 1L, 38L, 41L, 41L, 42L,
42L, 42L, 43L, 43L, 1L, 1L, 43L, 1L, 42L, 1L, 1L, 1L, 32L,
32L, 36L, 2L, 36L, 36L, 36L, 39L, 39L, 34L, 34L, 34L, 36L,
36L, 36L, 39L, 39L, 2L, 2L, 32L, 34L, 34L, 36L, 10L, 4L,
6L, 6L, 10L, 10L, 10L, 12L, 4L, 4L, 12L, 12L, 6L, 6L, 6L,
8L, 8L, 8L, 12L, 12L, 14L, 16L, 14L, 14L, 18L, 20L, 14L,
18L, 18L, 18L, 14L, 14L, 14L, 16L, 16L, 16L, 22L, 22L, 22L,
28L, 28L, 31L, 28L, 28L, 28L, 31L, 31L, 31L, 33L, 33L, 33L,
35L, 35L, 35L, 37L, 37L, 37L, 33L, 33L, 33L, 35L, 37L, 37L,
40L, 40L, 32L, 32L, 32L, 2L, 2L, 39L, 39L, 32L, 32L, 32L,
34L, 34L, 34L, 36L, 36L, 2L, 2L, 2L, 6L, 6L, 10L, 10L, 10L,
10L, 4L, 4L, 6L, 6L, 8L, 8L, 8L, 10L, 10L, 12L, 4L, 8L, 8L,
8L, 8L, 12L, 4L, 4L, 4L, 4L, 8L, 12L, 16L, 16L, 14L, 16L,
18L, 18L, 20L, 20L, 20L, 14L, 14L, 20L, 20L, 22L, 22L, 14L,
16L, 18L, 18L, 18L, 18L, 24L, 24L, 24L, 26L, 26L, 31L, 31L,
24L, 26L, 26L, 26L, 26L, 24L, 24L, 24L, 24L, 31L, 31L, 40L,
37L, 33L, 33L, 33L, 33L, 35L, 35L, 35L, 37L, 37L, 37L, 37L,
40L), .Label = c("01/02/2013", "01/03/2013", "04/02/2013",
"04/03/2013", "05/02/2013", "05/03/2013", "06/02/2013", "06/03/2013",
"07/02/2013", "07/03/2013", "08/02/2013", "08/03/2013", "11/02/2013",
"11/03/2013", "12/02/2013", "12/03/2013", "13/02/2013", "13/03/2013",
"14/02/2013", "14/03/2013", "15/02/2013", "15/03/2013", "18/02/2013",
"18/03/2013", "19/02/2013", "19/03/2013", "20/02/2013", "20/03/2013",
"21/02/2013", "22/02/2013", "22/03/2013", "25/02/2013", "25/03/2013",
"26/02/2013", "26/03/2013", "27/02/2013", "27/03/2013", "28/01/2013",
"28/02/2013", "28/03/2013", "29/01/2013", "30/01/2013", "31/01/2013"
), class = "factor"), Scorer = structure(c(2L, 2L, 3L, 3L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 3L, 1L, 3L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 1L, 1L, 1L, 2L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 1L, 3L, 2L, 2L,
3L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 1L,
3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 3L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L,
3L, 1L, 3L, 1L, 3L, 3L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 3L, 3L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 1L, 1L, 3L, 2L, 1L,
1L, 1L, 1L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 1L, 2L, 3L, 1L, 3L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L,
2L, 2L, 3L, 2L, 2L, 1L, 1L, 3L, 2L, 2L, 2L, 3L, 3L, 2L, 1L,
2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
1L, 3L, 2L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L,
1L, 3L, 3L, 1L, 2L, 3L, 2L, 2L, 1L, 1L, 2L, 2L, 3L, 1L, 2L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L,
2L, 3L, 3L, 1L, 1L, 1L, 2L, 3L, 1L, 3L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 2L, 2L, 3L, 2L, 2L, 1L, 1L, 3L, 2L, 2L, 1L,
3L, 2L, 1L, 3L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 3L, 2L, 1L, 1L, 3L, 1L, 3L, 2L, 2L, 1L, 3L, 2L, 1L, 3L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 3L, 1L, 3L, 3L,
1L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 3L, 3L, 2L, 1L,
2L, 2L, 1L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L), .Label = c("", "B", "G"), class = "factor")), .Names = c("Operator",
"ROI_Score", "Date", "Scorer"), row.names = c(NA, -412L), class = "data.frame")
Here's to prepare your data using data.table:
require(data.table)
dt <- data.table(df)
ops <- as.character(unique(dt$Operator))
scr <- as.character(unique(dt$ROI_Score))
oo <- setkey(dt[, .N, by="Operator,ROI_Score"], Operator,
ROI_Score)[CJ(ops, scr)][is.na(N), N:= 0L]
And here's how you can get a normalised bar-chart with this data:
oo[, N.norm := N/sum(N), by=Operator]
One way to plot this would be with x = Operator:
require(ggplot2)
ggplot(data = oo, aes(x = Operator, y = N.norm)) +
geom_bar(positon="stack", stat="identity", aes(fill = ROI_Score))
You can simply do something like this to prepare your data :
data.frame(table(Operator=df$Operator, Score=df$ROI_Score))
Which gives :
Operator Score Freq
1 A Good 11
2 D Good 36
3 J Good 54
4 L Good 44
5 M Good 28
6 A OK 3
7 D OK 10
8 J OK 9
9 L OK 4
10 M OK 7
11 A Poor 5
12 D Poor 50
13 J Poor 56
14 L Poor 67
15 M Poor 27
16 A Terrible 0
17 D Terrible 1
18 J Terrible 0
19 L Terrible 0
20 M Terrible 0

Resources