How to add color in emmeans graph? - r

I would like to have one color for each level in x axis. I have tried different ways to enter the colour via col argument, but it doesn't seem to work. So far:
library(emmeans)
library(ggplot2)
df <- structure(list(Scanner = structure(c(4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("a", "b", "c", "d", "e",
"f"), class = "factor"), Reta = structure(c(1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("A", "B", "C", "D", "E",
"F"), class = "factor"), erro = c(0.0120000000000005, 0.0289999999999999,
0.088000000000001, 0.00600000000000023, -0.0289999999999964,
0.106999999999999, 0.0850000000000009, 0.172999999999998, 0.183999999999997,
0.208000000000006, 0.192, 0.0869999999999997, -0.0140000000000029,
-0.0420000000000016, -0.0350000000000037, 0.00600000000000023,
0, -0.0100000000000016, 0.167000000000002, 0.276, 0.262, 0.0790000000000006,
0.426000000000002, 0.202999999999999, -0.181000000000004, 0.0560000000000009,
-0.0219999999999985, -0.264999999999993, 0.106000000000002, 0.154999999999998,
-0.0420000000000016, 0.0670000000000002, 0.176000000000002, -0.18,
0.215000000000003, 0.189, -0.036999999999999, 0.169, 0.103000000000002,
-0.622999999999998, 0.268999999999998, 0.106999999999999, -0.0140000000000029,
0.169999999999998, 0.115000000000002, -0.622, 0.276000000000003,
0.0969999999999978, -0.0320000000000036, 0.155999999999999, 0.116,
-0.290999999999997, 0.283000000000001, 0.0439999999999969, 0.0940000000000012,
-0.117000000000001, 0.0249999999999986, 0.00900000000000034,
0.0760000000000005, 0.109999999999999, 0.0549999999999997, 0.0470000000000006,
-0.027000000000001, 0.0130000000000052, 0.036999999999999, 0.0139999999999993,
0.0420000000000016, 0.0459999999999994, -0.109999999999999, 0.007000000000005,
0.0339999999999989, 0.104999999999997, -0.240000000000002, 0.0940000000000012,
-0.0570000000000022, -0.352999999999994, 0.0129999999999981,
0.113, -0.251000000000005, 0.0760000000000005, -0.00200000000000244,
NA, 0.112000000000002, 0.0839999999999996, -0.242000000000004,
0.0530000000000008, -0.134999999999998, -0.446999999999996, 0.118000000000002,
0.075999999999997, -0.0769999999999982, -0.0590000000000011,
-0.0870000000000033, -0.445999999999998, 0.158999999999999, 0.0829999999999984,
-0.270000000000003, -0.0210000000000008, -0.0840000000000032,
-0.189999999999998, 0.116999999999997, 0.0519999999999996, -0.0960000000000036,
-0.0859999999999985, -0.177, -0.271999999999998, 0.0679999999999978,
0.0439999999999969)), row.names = c(NA, -108L), class = c("tbl_df",
"tbl", "data.frame"))
m1 <- lm(erro ~ Scanner*Reta, data = df)
l1 <- emmeans(m1, "Scanner", "Reta")
emmip(l1, ~ Reta |Scanner,
col = rep(cols, each = 108/6),
CIs = TRUE) +
geom_hline(yintercept = 0, linetype = 2) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(y = "Erro absoluto (mm)")

Right now, I think one of solution like this:
cols = RColorBrewer::brewer.pal(6,"Paired")
emmip(m1 ,Scanner~Reta,CIs =TRUE) + facet_wrap(~Scanner) +
geom_hline(yintercept = 0, linetype = 2) +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(y = "Erro absoluto (mm)")+
scale_color_manual(values=cols)

Related

ggplot: why does order on x-axis not level instead of printing alphabetically?

I have this plot
With
> str(a)
'data.frame': 150 obs. of 2 variables:
$ study: Factor w/ 7 levels "A","S","H","D",..: 7 2 4 5 3 1 7 2 2 4 ...
$ n : Factor w/ 6 levels "N0","N1","N2a",..: 1 1 2 4 1 1 2 1 1 1 ...
I would like the x-axis to arrange by sample size, i.e. level = c("all", "S", "H", "B", "C", "A", "K", "D")
As you can see, the order is printed alphabetically.
I have tried specifying as ... aes(x=factor(nystudie, level=c(...), but that does not work. What am I doing wrong? I followed this post
library(tidyverse)
colsze <- c("#E1B930", "#2C77BF", "#E38072", "#6DBCC3", "grey40", "black", "#8B3A62")
a %>%
as_tibble() %>%
mutate(nystudie=as.factor(study),
n.seven=as.factor(n)) %>%
bind_rows(., mutate(., nystudie="all")) %>%
count(nystudie, n.seven, .drop=F) %>%
ggplot(aes(x = factor(nystudie, level = c("all", "S", "H", "B", "C", "A", "K", "D")),
n, color = n.seven, fill= n.seven, label=n)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0.1))+
geom_text(aes(label=n),position = position_dodge2(0.9), vjust=-0.25, fontface=2, cex=4.5, show.legend = F) +
scale_fill_manual(values = alpha(colsze, .2),
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_color_manual(values = colsze,
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_x_discrete(name = "", label=c("All\n(n=1,905)",
"A\n(n=221)",
"B\n(n=234)",
"C\n(n=232)",
"D\n(n=108)",
"H\n(n=427)",
"K\n(n=221)",
"S\n(n=462)")) +
scale_y_continuous(name="",
breaks=seq(0,950,100)) +
coord_cartesian(ylim = c(0,950)) +
guides(fill = guide_legend(nrow = 1)) + theme(axis.text.x = element_text(color = "grey20", size =15),
legend.text=element_text(size=16), legend.title=element_text(size=16, face="bold"),
legend.position="top")
Data sample
a <- structure(list(study = structure(c(7L, 2L, 4L, 5L, 3L, 1L, 7L,
2L, 2L, 4L, 4L, 6L, 2L, 5L, 3L, 7L, 1L, 1L, 2L, 6L, 1L, 3L, 2L,
7L, 2L, 2L, 6L, 6L, 6L, 2L, 1L, 2L, 6L, 1L, 2L, 2L, 3L, 4L, 2L,
3L, 2L, 5L, 2L, 3L, 6L, 5L, 3L, 2L, 4L, 3L, 5L, 6L, 2L, 7L, 2L,
3L, 3L, 3L, 7L, 7L, 3L, 4L, 1L, 1L, 2L, 2L, 6L, 2L, 3L, 2L, 3L,
2L, 1L, 2L, 3L, 5L, 3L, 1L, 1L, 1L, 7L, 4L, 3L, 2L, 4L, 3L, 3L,
3L, 2L, 6L, 7L, 3L, 2L, 2L, 6L, 2L, 2L, 6L, 7L, 3L, 3L, 3L, 6L,
2L, 2L, 7L, 7L, 1L, 1L, 6L, 3L, 3L, 7L, 1L, 2L, 7L, 1L, 1L, 7L,
4L, 4L, 4L, 2L, 3L, 3L, 6L, 1L, 4L, 6L, 3L, 5L, 5L, 3L, 3L, 7L,
5L, 3L, 6L, 3L, 5L, 2L, 3L, 7L, 6L, 2L, 1L, 6L, 5L, 1L, 6L), .Label = c("A",
"S", "H", "D", "K", "C", "B"), class = "factor"), n = structure(c(1L,
1L, 2L, 4L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 4L, 2L, 1L, 2L,
3L, 2L, 2L, 4L, 4L, 4L, 2L, 4L, 1L, 2L, 4L, 1L, 1L, 4L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 4L, 1L, 1L, 4L, 2L, 1L, 1L, 4L, 1L, 1L, 2L,
1L, 5L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 2L, 1L,
4L, 1L, 1L, 1L, 1L, 6L, 1L, 2L, 5L, 4L, 2L, 6L, 1L, 4L, 2L, 4L,
2L, 1L, 1L, 4L, 1L, 2L, 1L, 1L, 4L, 4L, 4L, 1L, 4L, 2L, 1L, 1L,
4L, 2L, 1L, 2L, 1L, 5L, 5L, 1L, 4L, 1L, 2L, 2L, 4L, 1L, 1L, 1L,
2L, 4L, 4L, 1L, 5L, 2L, 1L, 5L, 2L, 4L, 1L, 1L, 1L, 4L, 4L, 1L,
1L, 4L, 4L, 4L, 1L, 4L, 4L, 1L, 4L, 5L, 4L, 5L, 1L, 5L, 1L, 1L,
4L, 2L, 1L, 2L, 4L), .Label = c("N0", "N1", "N2a", "N2b", "N2c",
"N3"), class = "factor")), row.names = c(NA, -150L), class = "data.frame")
The levels are being changed again at scale_x_discrete step. Try :
library(dplyr)
library(ggplot2)
a %>%
mutate(nystudie=as.factor(study),
n.seven=as.factor(n)) %>%
bind_rows(., mutate(., nystudie="all")) %>%
count(nystudie, n.seven, .drop=F) %>%
mutate(nystudie = factor(nystudie,
level = c("all", "S", "H", "B", "C", "A", "K", "D"),
labels = c("All\n(n=1,905)", "S\n(n=462)", "H\n(n=427)", "B\n(n=234)",
"C\n(n=232)", "A\n(n=221)", "K\n(n=221)", "D\n(n=108)"))) %>%
ggplot(aes(x = nystudie,
n, color = n.seven, fill= n.seven, label=n)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0.1))+
geom_text(aes(label=n),position = position_dodge2(0.9), vjust=-0.25, fontface=2, cex=4.5, show.legend = F) +
scale_fill_manual(values = alpha(colsze, .2),
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_color_manual(values = colsze,
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_x_discrete(name = "") +
scale_y_continuous(name="",
breaks=seq(0,950,100)) +
coord_cartesian(ylim = c(0,950)) +
guides(fill = guide_legend(nrow = 1)) +
theme(axis.text.x = element_text(color = "grey20", size =15),
legend.text=element_text(size=16),
legend.title=element_text(size=16, face="bold"),
legend.position="top")

Error in contrasts, supervised classification

I was trying to follow this tutorial using the following dataset: Mushroom Classification. I was looking for a supervised classification problem, and I think I got it.
After running the following code...
library(caret)
dataset = read.csv("mushrooms.csv")
dim(dataset)
sapply(dataset, class)
head(dataset)
levels(dataset$class)
set.seed(100)
inTrain <- createDataPartition(y=dataset$class,p=.75,list=FALSE)
str(inTrain)
training <- dataset[inTrain,]
testing <- dataset[-inTrain,]
nrow(training)
nrow(testing)
control <- trainControl(method="cv", number=10)
metric <- "Accuracy"
train.lda <- train(class ~., data=training, method="lda", trControl=control)
... I saw the dataset had 8124 rows and 22 variables —plus the classifier—.
dim(dataset)
[1] 8124 23
However when executing train I get the following error:
Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) :
contrasts can be applied only to factors with 2 or more levels
Looking around the web, and even here in Stack Overflow, the explanation I found was that my predictor has only one factor level. Like if the class variable only took one value? Nonetheless, previously in the code I check the level of that variable, and I get its level is 2, as it takes two values.
levels(dataset$class)
[1] "e" "p"
Therefore, I do not understand why I am getting the error. What's wrong with my reasoning? What am I doing wrong?
Thank you.
Sample request:
structure(list(class = structure(c(2L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("e",
"p"), class = "factor"), cap.shape = structure(c(6L, 6L, 1L,
6L, 6L, 6L, 1L, 1L, 6L, 1L, 6L, 6L, 1L, 6L, 6L, 5L, 3L, 6L, 6L,
6L, 1L, 6L, 1L, 1L, 1L, 3L, 6L, 6L, 3L, 6L, 1L, 6L, 6L, 6L, 1L,
6L, 5L, 6L, 6L, 1L, 1L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 3L, 6L,
6L, 1L, 6L, 6L, 1L, 3L, 1L, 6L, 6L, 5L, 1L, 1L, 1L, 1L, 3L, 6L,
3L, 6L, 6L, 3L, 1L, 3L, 6L, 1L, 3L, 6L, 3L, 6L, 3L, 6L, 6L, 3L,
6L, 6L, 6L, 1L, 6L, 3L, 5L, 6L, 1L, 6L, 6L, 6L, 6L, 3L, 6L, 1L,
6L), .Label = c("b", "c", "f", "k", "s", "x"), class = "factor"),
cap.surface = structure(c(3L, 3L, 3L, 4L, 3L, 4L, 3L, 4L,
4L, 3L, 4L, 4L, 3L, 4L, 1L, 1L, 1L, 3L, 4L, 3L, 3L, 4L, 4L,
4L, 3L, 3L, 4L, 4L, 1L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 4L,
1L, 3L, 4L, 4L, 1L, 4L, 3L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 3L,
4L, 3L, 4L, 1L, 3L, 3L, 4L, 1L, 4L, 3L, 4L, 4L, 3L, 3L, 4L,
4L, 1L, 1L, 4L, 1L, 4L, 3L, 3L, 3L, 4L, 4L, 4L, 3L, 4L, 1L,
1L, 4L, 3L, 3L, 3L, 4L, 1L, 1L, 3L, 4L, 4L, 3L, 3L, 4L, 3L,
3L, 4L), .Label = c("f", "g", "s", "y"), class = "factor"),
cap.color = structure(c(5L, 10L, 9L, 9L, 4L, 10L, 9L, 9L,
9L, 10L, 10L, 10L, 10L, 9L, 5L, 4L, 9L, 5L, 9L, 5L, 10L,
5L, 10L, 9L, 9L, 9L, 10L, 9L, 5L, 10L, 10L, 9L, 10L, 5L,
10L, 10L, 4L, 5L, 10L, 10L, 10L, 10L, 5L, 9L, 10L, 9L, 10L,
9L, 10L, 10L, 5L, 9L, 9L, 5L, 9L, 10L, 4L, 9L, 10L, 5L, 4L,
10L, 10L, 10L, 9L, 5L, 9L, 10L, 10L, 4L, 10L, 9L, 10L, 5L,
10L, 10L, 9L, 5L, 5L, 5L, 5L, 9L, 4L, 4L, 10L, 5L, 9L, 9L,
5L, 5L, 5L, 9L, 10L, 10L, 5L, 9L, 5L, 10L, 9L, 9L), .Label = c("b",
"c", "e", "g", "n", "p", "r", "u", "w", "y"), class = "factor"),
bruises = structure(c(2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
2L), .Label = c("f", "t"), class = "factor"), odor = structure(c(7L,
1L, 4L, 7L, 6L, 1L, 1L, 4L, 7L, 1L, 4L, 1L, 1L, 7L, 6L, 6L,
6L, 7L, 7L, 7L, 1L, 7L, 4L, 1L, 4L, 7L, 1L, 4L, 6L, 1L, 4L,
7L, 4L, 4L, 4L, 4L, 6L, 7L, 1L, 4L, 1L, 4L, 6L, 7L, 1L, 1L,
4L, 4L, 4L, 4L, 1L, 4L, 4L, 7L, 7L, 1L, 6L, 1L, 4L, 1L, 6L,
1L, 4L, 4L, 4L, 6L, 4L, 1L, 1L, 6L, 4L, 4L, 4L, 1L, 1L, 4L,
4L, 4L, 7L, 1L, 6L, 7L, 6L, 6L, 4L, 6L, 1L, 4L, 4L, 6L, 6L,
4L, 1L, 4L, 6L, 1L, 4L, 1L, 1L, 1L), .Label = c("a", "c",
"f", "l", "m", "n", "p", "s", "y"), class = "factor"), gill.attachment = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("a", "f"), class = "factor"),
gill.spacing = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L), .Label = c("c", "w"), class = "factor"), gill.size = structure(c(2L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L,
1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("b", "n"), class = "factor"),
gill.color = structure(c(5L, 5L, 6L, 6L, 5L, 6L, 3L, 6L,
8L, 3L, 3L, 6L, 11L, 5L, 6L, 5L, 5L, 6L, 6L, 5L, 5L, 6L,
5L, 11L, 3L, 6L, 6L, 11L, 5L, 6L, 3L, 5L, 6L, 8L, 6L, 11L,
5L, 11L, 8L, 5L, 6L, 6L, 3L, 8L, 11L, 6L, 5L, 11L, 6L, 11L,
11L, 5L, 5L, 5L, 5L, 11L, 6L, 11L, 5L, 8L, 5L, 5L, 3L, 3L,
6L, 5L, 6L, 11L, 11L, 8L, 8L, 3L, 11L, 8L, 5L, 8L, 6L, 8L,
11L, 6L, 5L, 11L, 6L, 6L, 11L, 5L, 11L, 6L, 11L, 6L, 6L,
5L, 3L, 3L, 6L, 3L, 8L, 6L, 3L, 3L), .Label = c("b", "e",
"g", "h", "k", "n", "o", "p", "r", "u", "w", "y"), class = "factor"),
stalk.shape = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L), .Label = c("e", "t"), class = "factor"), stalk.root = structure(c(4L,
3L, 3L, 4L, 4L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 4L, 3L, 3L, 4L, 2L, 3L,
4L, 3L, 5L, 3L, 2L, 4L, 4L, 2L, 3L, 3L, 5L, 4L, 4L, 3L, 3L,
3L, 3L, 5L, 5L, 5L, 3L, 3L, 4L, 4L, 3L, 4L, 3L, 3L, 5L, 4L,
3L, 3L, 3L, 3L, 4L, 3L, 5L, 3L, 4L, 2L, 3L, 2L, 5L, 3L, 2L,
2L, 5L, 4L, 5L, 4L, 4L, 4L, 4L, 5L, 4L, 3L, 3L, 5L, 4L, 4L,
3L, 3L, 3L, 4L, 3L, 5L, 3L, 3L, 3L), .Label = c("?", "b",
"c", "e", "r"), class = "factor"), stalk.surface.above.ring = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("f", "k",
"s", "y"), class = "factor"), stalk.surface.below.ring = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 3L,
3L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 4L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 3L), .Label = c("f", "k",
"s", "y"), class = "factor"), stalk.color.above.ring = structure(c(8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L), .Label = c("b", "c",
"e", "g", "n", "o", "p", "w", "y"), class = "factor"), stalk.color.below.ring = structure(c(8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L), .Label = c("b", "c",
"e", "g", "n", "o", "p", "w", "y"), class = "factor"), veil.type = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "p", class = "factor"),
veil.color = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L), .Label = c("n", "o", "w", "y"), class = "factor"),
ring.number = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("n", "o", "t"), class = "factor"), ring.type = structure(c(5L,
5L, 5L, 5L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 5L,
1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 1L, 5L, 5L, 1L, 5L, 1L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 1L, 5L, 5L, 5L, 5L, 5L), .Label = c("e", "f",
"l", "n", "p"), class = "factor"), spore.print.color = structure(c(3L,
4L, 4L, 3L, 4L, 3L, 3L, 4L, 3L, 3L, 4L, 3L, 4L, 4L, 3L, 4L,
4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 3L, 3L, 4L, 3L, 4L,
3L, 4L, 4L, 3L, 3L, 3L, 4L, 4L, 3L, 3L, 4L, 4L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 3L, 3L, 4L, 3L, 4L, 4L, 4L, 4L, 3L, 3L, 4L,
7L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 3L, 3L, 3L, 4L, 3L, 4L, 4L,
3L, 3L, 3L, 4L, 4L, 4L, 3L, 4L, 3L), .Label = c("b", "h",
"k", "n", "o", "r", "u", "w", "y"), class = "factor"), population = structure(c(4L,
3L, 3L, 4L, 1L, 3L, 3L, 4L, 5L, 4L, 3L, 4L, 4L, 5L, 1L, 6L,
1L, 4L, 4L, 4L, 4L, 5L, 4L, 3L, 4L, 5L, 3L, 3L, 6L, 5L, 3L,
4L, 3L, 6L, 4L, 5L, 5L, 4L, 5L, 4L, 4L, 6L, 6L, 5L, 3L, 3L,
4L, 3L, 4L, 4L, 4L, 4L, 3L, 5L, 5L, 4L, 1L, 3L, 3L, 6L, 5L,
4L, 4L, 3L, 4L, 1L, 4L, 4L, 3L, 5L, 5L, 4L, 5L, 4L, 4L, 5L,
5L, 6L, 5L, 6L, 4L, 4L, 6L, 4L, 4L, 4L, 4L, 4L, 6L, 5L, 6L,
4L, 4L, 3L, 1L, 4L, 4L, 3L, 4L, 4L), .Label = c("a", "c",
"n", "s", "v", "y"), class = "factor"), habitat = structure(c(6L,
2L, 4L, 6L, 2L, 2L, 4L, 4L, 2L, 4L, 2L, 4L, 2L, 6L, 2L, 6L,
2L, 2L, 6L, 6L, 4L, 2L, 4L, 4L, 4L, 2L, 4L, 4L, 6L, 1L, 4L,
6L, 4L, 5L, 4L, 1L, 6L, 6L, 1L, 4L, 2L, 5L, 6L, 2L, 4L, 2L,
4L, 4L, 5L, 5L, 2L, 2L, 4L, 6L, 6L, 4L, 2L, 2L, 2L, 5L, 6L,
4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 6L, 1L, 4L, 1L, 5L, 2L, 1L,
1L, 5L, 6L, 2L, 2L, 2L, 6L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 6L,
2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("d", "g",
"l", "m", "p", "u", "w"), class = "factor")), .Names = c("class",
"cap.shape", "cap.surface", "cap.color", "bruises", "odor", "gill.attachment",
"gill.spacing", "gill.size", "gill.color", "stalk.shape", "stalk.root",
"stalk.surface.above.ring", "stalk.surface.below.ring", "stalk.color.above.ring",
"stalk.color.below.ring", "veil.type", "veil.color", "ring.number",
"ring.type", "spore.print.color", "population", "habitat"), row.names = c(NA,
100L), class = "data.frame")
First five rows of data of the .csv file plus the headers
class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u
e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
e,b,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
p,x,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,u
e,x,s,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g
It is possible that your data set is not randomized and that what I am about to say is only true of your samples, not the full data set BUT many of your variables have only one value that is used. Type summary(dataset) and you will quickly see some examples. For example, part of the display is:
stalk.color.below.ring veil.type veil.color ring.number
w :100 p:100 n: 0 n: 0
b : 0 o: 0 o:100
c : 0 w:100 t: 0
e : 0 y: 0
g : 0
n : 0
(Other): 0
Notice that for veil.type, there is only one possible value.
levels(dataset$veil.type)
[1] "p"
I expect that is the source of your error message.
Factors = which(sapply(dataset, class) == "factor")
sapply(dataset[,Factors], function(x) { length(levels(x)) })
Shows that veil.type is the only attribute with only one possible level.

boxplot with multiple factor labels using base R functions

How can one possibly reproduce the ggplot-based boxplot shown in this answer but using base R boxplot function?
Sample date from the above link:
d<-data.frame(x=rnorm(1500),f1=rep(seq(1:20),75),f2=rep(letters[1:3],500))
# first factor has 20+ levels
d$f1<-factor(d$f1)
# second factor a,b,c
d$f2<-factor(d$f2)
boxplot(x~f2*f1,data=d,col=c("red","blue","green"),frame.plot=TRUE,axes=FALSE)
It would be great if the groups on the x-axis are spaced from each other.
I have limited knowledge about ggplot2.
EDIT
While waiting for more suggestions using base R functions, I am making some progress with ggplot2.
Using this sample data how can I produce a plot with well aligned x-axis as the one in the link above?
The following does not give me the correct alignment (I want the numbers 1:8 aligned at the center of each group):
library(ggplot2)
ggplot(dat3, aes(x = ID, y = value, group=interaction(obs, ID), fill=obs)) +
geom_boxplot() +
scale_fill_manual(values = c("yellow", "orange"))
dat3=structure(list(values = c(0, 0, 0, 0, 0, 0, 0, 0, -0.0169491525423729,
0, 0, 0, 0, 1, 1, 0.64367816091954, 0.64367816091954, 0, 0, -0.0163934426229508,
-0.021978021978022, 0.109195402298851, 0, 0, 0, 0, 0.207650273224044,
0.4375, 0, 0, 0, 0, 0.302325581395349, 0.303370786516854, 0.270588235294118,
-0.0188679245283019, 0.156462585034014, 0.092436974789916, 0.69,
-0.021978021978022, 0.64367816091954, 0.614906832298137, 0.612903225806452,
0.274853801169591, 0, 0.303370786516854, 0, 0, -0.03125, 0.229813664596273,
0.557142857142857, 0, 0.109195402298851, 0.0746268656716418,
0.180616740088106, 0.210526315789474, 0.310344827586207, 1, 1,
0.0825688073394495, 0.294117647058824, 0, 0.4375, 0, 0.230769230769231,
0.347826086956522, -0.0163934426229508, 0.156462585034014, 0,
0, 0, 1, 0, 0, 0, 0.483333333333333, 0.483333333333333, 0, 0,
0, 0, 0, -0.0169491525423729, 0, 0.310344827586207, 0, 0.296875,
0.302325581395349, 0, 0, 0, 0, 0, 0, 0.482758620689655, 0, 0,
0, 0, 0, 0, 0, 0, 0.150684931506849, 0.150684931506849, 0, 0,
-0.021978021978022, -0.021978021978022, 0.270588235294118, 0,
0, 0.482758620689655, 0.482758620689655, 0.272727272727273, 0.272727272727273,
0, 1, 0, 0, 0.642857142857143, 0.211864406779661, 0.156462585034014,
-0.0449438202247191, -0.0449438202247191, 0.389763779527559,
0.389763779527559, -0.021978021978022, 0.211864406779661, 0.213197969543147,
0.213197969543147, 0.358620689655172, -0.0163934426229508, 0.483333333333333,
0, 0, 0.362139917695473, 0.362139917695473, 0.261904761904762,
0.483333333333333, 1, 1, 0.236453201970443, 0.302325581395349,
0.310344827586207, 1, 1, 0.358974358974359, 0.358974358974359,
-0.0606060606060606, 0.0721649484536082, 0.615384615384615, 0.615384615384615,
0.347826086956522, 1, 0, 0, 0, -0.0273972602739726, -0.0273972602739726,
-0.0169491525423729, -0.0256410256410256, 0.107142857142857,
0.107142857142857, 0.302325581395349, -0.0163934426229508, -0.0264900662251656,
0.311111111111111, 0.311111111111111, 0.156462585034014, 0.156462585034014,
-0.0483091787439614, 0.311111111111111, -0.0333333333333333,
-0.0333333333333333, 0.311111111111111), ind = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L), .Label = c("ETS",
"ETS.1", "ETS.2", "ETS.3", "ETS.4", "ETS.5", "ETS.6", "ETS.7"
), class = "factor"), ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L), .Label = c("4", "5",
"6", "7", "8", "9", "10", "11"), class = "factor"), obs = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("obs",
"capa"), class = "factor")), .Names = c("values", "ind", "ID",
"obs"), row.names = c(NA, 176L), class = "data.frame")
You can specify the location of the boxes using at option.
set.seed(1)
d<-data.frame(x=rnorm(1500),f1=rep(seq(1:20),75),f2=rep(letters[1:3],500))
# first factor has 20+ levels
d$f1<-factor(d$f1)
# second factor a,b,c
d$f2<-factor(d$f2)
boxplot(x~f2*f1,data=d, at = (1:80)[-4*(1:20)], col=c("red","blue","green"),frame.plot=TRUE,axes=FALSE)
axis(1,at=seq(2,80,4),labels=1:20,cex.axis=0.7)

Overlay ggplot grouped tiles with polygon border depending on extra factor

I have a data frame with x and y positions and two factor columns blocknr and cat:
dput(testData)
structure(list(xpos = c(2L, 8L, 5L, 8L, 1L, 4L, 5L, 1L, 8L, 4L,
3L, 2L, 6L, 5L, 1L, 7L, 3L, 4L, 3L, 7L, 1L, 6L, 7L, 7L, 2L, 5L,
3L, 4L, 6L, 7L, 1L, 5L, 1L, 6L, 4L, 5L, 3L, 6L, 4L, 8L, 1L, 3L,
4L, 6L, 7L, 3L, 2L, 6L, 4L, 2L, 1L, 7L, 4L, 8L, 2L, 3L, 2L, 5L,
8L, 2L, 8L, 3L, 3L, 5L, 6L, 7L, 1L, 5L, 6L, 4L, 2L, 6L, 7L, 1L,
5L, 7L, 2L), ypos = c(1L, 2L, 8L, 1L, 6L, 7L, 1L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 7L, 8L, 10L, 2L, 6L, 9L, 1L, 2L, 10L, 4L, 5L,
6L, 3L, 5L, 9L, 3L, 9L, 10L, 3L, 7L, 8L, 2L, 5L, 6L, 3L, 4L,
10L, 1L, 4L, 10L, 2L, 8L, 9L, 3L, 6L, 8L, 5L, 7L, 10L, 3L, 4L,
7L, 2L, 4L, 5L, 6L, 7L, 9L, 4L, 7L, 8L, 1L, 2L, 9L, 5L, 9L, 10L,
1L, 6L, 8L, 3L, 5L, 7L), blocknr = c(1L, 3L, 2L, 3L, 1L, 2L,
2L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 1L, 3L, 2L, 2L, 1L, 3L, 1L, 2L,
3L, 3L, 1L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 1L, 3L, 2L, 2L, 1L, 3L,
2L, 3L, 1L, 1L, 2L, 3L, 3L, 2L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 3L,
1L, 2L, 1L, 2L, 3L, 1L, 3L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 3L, 2L,
1L, 2L, 3L, 1L, 2L, 3L, 1L), cat = structure(c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("A", "B", "C"
), class = "factor")), .Names = c("xpos", "ypos", "blocknr",
"cat"), row.names = c(NA, -77L), class = "data.frame")
I've made the following ggplot code to make 2D overview:
ggplot(data=testData, aes(x=xpos,y=ypos))+
geom_tile(aes(fill=cat), colour = "white")+
scale_fill_manual(values = c('A' = '#F8766D','C' = '#8ABF54','B' = '#C1DDA5'))+
geom_text(aes(x=xpos,y=ypos,label=blocknr),size=3)+
coord_cartesian(ylim = c(0.5, ymax + 0.5)) +
coord_cartesian(xlim = c(0.5, xmax + 0.5)) +
scale_x_continuous(breaks=seq(1,xmax,1))+
scale_y_continuous(breaks=seq(1,ymax,1))+
#geom_polygon(aes(group=blocknr))+
theme(axis.line = element_line(colour = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
which produces the following result:
Now I would like to highlight each group of blocknrs by drawing a border around them as shown below:
I've played around with geom_polygon, geom_path, but I can't quite find a way to do this. Is there a general way to achieve this in ggplot without constructing an algorithm to compute where each line should be and add those lines as a geom_segment?
As far as I know, there is no way to do this with standard ggplot2 tile options. But it's not to much trouble to constuct them if you do it as segments. For example
ymax <- max(testData$ypos)
xmax <- max(testData$xpos)
m <- matrix(0, nrow=ymax, ncol=xmax)
m[as.matrix(testData[,2:1])] <- testData[,3]
Here we are basically taking all the row/col assignment data and creating a matrix that essentially looks like the plot but we will with the block numbers. Now, we will scan for the locations we need to add "wall" by looking for changes in the block numbers as we go across each row and column separately.
has.breaks<-function(x) ncol(x)==2 & nrow(x)>0
hw<-do.call(rbind.data.frame, Filter(has.breaks, Map(function(i,x)
cbind(y=i,x=which(diff(c(0,x,0))!=0)), 1:nrow(m), split(m, 1:nrow(m)))))
vw<-do.call(rbind.data.frame, Filter(has.breaks, Map(function(i,x)
cbind(x=i,y=which(diff(c(0,x,0))!=0)), 1:ncol(m), as.data.frame(m))))
And you can add calls to geom_segments to add the horizontal and vertical walls to the plot.
ggplot(data=testData, aes(x=xpos,y=ypos))+
geom_tile(aes(fill=cat), colour = "white")+
scale_fill_manual(values = c('A' = '#F8766D','C' = '#8ABF54','B' = '#C1DDA5'))+
geom_text(aes(x=xpos,y=ypos,label=blocknr),size=3)+
geom_segment(data=hw, aes(x=x-.5, xend=x-.5, y=y-.5, yend=y+.5))+
geom_segment(data=vw, aes(x=x-.5, xend=x+.5, y=y-.5, yend=y-.5))+
coord_cartesian(ylim = c(0.4, ymax + 0.6)) +
coord_cartesian(xlim = c(0.4, xmax + 0.6)) +
scale_x_continuous(breaks=seq(1,xmax,1))+
scale_y_continuous(breaks=seq(1,ymax,1))+
theme(axis.line = element_line(colour = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
which gives
The desplot package will do this for you (using lattice):
library(desplot)
desplot(cat ~ xpos*ypos, testData, out1=blocknr, text=blocknr, main="testData")

Plotting a stacked bar plot?

I have the following data:
structure(list(Time = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L), Type = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L), Value = c(848565.34,
1463110.61, 626673.64, 277708.41, 841422.11, 956238.14, 461092.16,
208703.75, 800837.48, 1356164.25, 549509.34, 300241.53, 851247.9714,
1353358.318, 598536.5948, 307485.0918, 332042.2275, 666157.8721,
194981.1566, 79344.50328, 831003.6952, 1111311.517, 521632.3074,
274384.1731, 1174671.569, 1070301.745, 454876.1589, 351973.2418,
5631710.101, 279394.6061, 119034.4969, 39693.31587, 1166869.32,
1156855.09, 369816.8152, 274092.5751, 924474.1129, 975028.0207,
449213.7419, 213855.3067, 1967188.317, 178841.604, 43692.69319,
12493.90538, 835142.6168, 876273.4462, 354154.644, 182794.3813,
1158096.251, 998647.6908, 566726.9865, 195099.4295, 1798902.332,
171519.4741, 81644.02724, 12221.41779, 1301775.314, 920464.9992,
294140.4882, 175626.9677, 2179780.499, 1838687.535, 978775.2674,
366668.3462, 5385970.324, 177527.1577, 65310.32674, 5986.871716,
2250834.171, 1547858.632, 666444.2992, 251767.3006, 1786086.335,
1597055.451, 563976.9719, 309186.1626, 487105.824, 279712.1658,
86471.46603, 24434.05486, 1563940.414, 1409428.038, 531425.682,
257056.5524, 1685501.271, 1371943.438, 881348.5022, 313355.8284,
170771.9118, 155596.7479, 59881.60825, 12090.57989, 1668571.543,
1150257.058, 563054.758, 306767.0344, 2214849.859, 1724719.891,
822092.2031, 443194.4609, 8897796.235, 87491.42925, 10699.30103,
18131.89738, 2137240.993, 1476873.778, 741685.9913, 549539.9735,
1362085.657, 1266106.09, 448653.8889, 278236.8416, 1671665.39,
95239.07396, 54173.57043, 10125.82011, 1335200.152, 1167824.903,
426738.1845, 261255.2092)), .Names = c("Time", "Type", "Value"
), row.names = c(NA, -120L), class = "data.frame")
I am trying to plot a stacked bar graph that looks like this:
I know that adding position="identity" or position="dodge" produces different types of bar plots but am not sure how to produce the above chart with both types. Any suggestions?
ggplot(df, aes(x = factor(Time), y = Value, fill = factor(Type))) +
geom_bar(stat="identity", position = "stack")
ggplot(df, aes(x = factor(Time), y = Value, fill = factor(Type))) +
geom_bar(stat="identity", position = "dodge")
You can do one or the other but not both. When they are dodged, the different values of type are being used. By adding a color outline, you can see that.
ggplot(df, aes(x = factor(Time), y = Value, fill = factor(Type))) +
geom_bar(stat="identity", position = c("dodge"), colour = 'black')

Resources