Related
I am trying to implement a series of box plots which demonstrate the area, radius and concavity of human cells. The variable I am trying to plot is 'characters' split into two subsets 'Malignant' or 'Benign'.
I keep receiving the following error messages:
> Error in FUN(X[[i]], ...) : object 'Class_mean' not found
> Error in FUN(X[[i]], ...) : object 'Class_radius' not found
Please see my code:
ggplot(wisconsin, aes(x= Class, y=Class_mean, fill="pink")) +
geom_boxplot(fill= "yellow")+
ggtitle("radius of benign and malignant stage")
ggplot(wisconsin, aes(x= Class, y=Class_radius))+
geom_boxplot()+
ggtitle("area of benign and malignant stage")
ggplot(wisconsin, aes(x= Class, y=concavity_mean))+
geom_boxplot()+
ggtitle("concavity of benign and malignant stage")
Any ideas on how I could figure out the radius, mean and concavity object to Y variable?
All suggestions welcome
Please see head of data:
structure(list(Cl.thickness = c(5L, 5L, 3L, 6L, 4L, 8L, 1L, 2L,
2L, 4L, 1L, 2L, 5L, 1L, 8L, 7L, 4L, 4L, 10L, 6L), Cell.size = c(1L,
4L, 1L, 8L, 1L, 10L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 7L, 4L,
1L, 1L, 7L, 1L), Cell.shape = c(1L, 4L, 1L, 8L, 1L, 10L, 1L,
2L, 1L, 1L, 1L, 1L, 3L, 1L, 5L, 6L, 1L, 1L, 7L, 1L), Marg.adhesion = c(1L,
5L, 1L, 1L, 3L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 10L, 4L,
1L, 1L, 6L, 1L), Epith.c.size = c(2L, 7L, 2L, 3L, 2L, 7L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 7L, 6L, 2L, 2L, 4L, 2L), Bare.nuclei = c(1L,
10L, 2L, 4L, 1L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 9L, 1L,
1L, 1L, 10L, 1L), Bl.cromatin = c(3L, 3L, 3L, 3L, 3L, 9L, 3L,
3L, 1L, 2L, 3L, 2L, 4L, 3L, 5L, 4L, 2L, 3L, 4L, 3L), Normal.nucleoli = c(1L,
2L, 1L, 7L, 1L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 5L, 3L, 1L,
1L, 1L, 1L), Mitoses = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L,
1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 2L, 1L), Class = c("benign",
"benign", "benign", "benign", "benign", "malignant", "benign",
"benign", "benign", "benign", "benign", "benign", "malignant",
"benign", "malignant", "malignant", "benign", "benign", "malignant",
"benign")), row.names = c(NA, 20L), class = "data.frame")
> dput(head(wisconsin, 20))
structure(list(Cl.thickness = c(5L, 5L, 3L, 6L, 4L, 8L, 1L, 2L,
2L, 4L, 1L, 2L, 5L, 1L, 8L, 7L, 4L, 4L, 10L, 6L), Cell.size = c(1L,
4L, 1L, 8L, 1L, 10L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 7L, 4L,
1L, 1L, 7L, 1L), Cell.shape = c(1L, 4L, 1L, 8L, 1L, 10L, 1L,
2L, 1L, 1L, 1L, 1L, 3L, 1L, 5L, 6L, 1L, 1L, 7L, 1L), Marg.adhesion = c(1L,
5L, 1L, 1L, 3L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 10L, 4L,
1L, 1L, 6L, 1L), Epith.c.size = c(2L, 7L, 2L, 3L, 2L, 7L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 7L, 6L, 2L, 2L, 4L, 2L), Bare.nuclei = c(1L,
10L, 2L, 4L, 1L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 9L, 1L,
1L, 1L, 10L, 1L), Bl.cromatin = c(3L, 3L, 3L, 3L, 3L, 9L, 3L,
3L, 1L, 2L, 3L, 2L, 4L, 3L, 5L, 4L, 2L, 3L, 4L, 3L), Normal.nucleoli = c(1L,
2L, 1L, 7L, 1L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 5L, 3L, 1L,
1L, 1L, 1L), Mitoses = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L,
1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 2L, 1L), Class = c("benign",
"benign", "benign", "benign", "benign", "malignant", "benign",
"benign", "benign", "benign", "benign", "benign", "malignant",
"benign", "malignant", "malignant", "benign", "benign", "malignant",
"benign")), row.names = c(NA, 20L), class = "data.frame")
If you want to plot each character per class, then the code below might solve the problem.
This type of problems generally has to do with reshaping the data. The format should be the long format and the data is in wide format. See this post on how to reshape the data from wide to long format. I will use package tidyr, function pivot_longer.
library(ggplot2)
wisconsin |>
tidyr::pivot_longer(-Class, names_to = "characters") |>
ggplot(aes(x = Class, y = value)) +
geom_boxplot(fill = "lightblue") +
facet_wrap(~ characters) +
theme_bw()
Created on 2022-10-19 with reprex v2.0.2
I have this plot
With
> str(a)
'data.frame': 150 obs. of 2 variables:
$ study: Factor w/ 7 levels "A","S","H","D",..: 7 2 4 5 3 1 7 2 2 4 ...
$ n : Factor w/ 6 levels "N0","N1","N2a",..: 1 1 2 4 1 1 2 1 1 1 ...
I would like the x-axis to arrange by sample size, i.e. level = c("all", "S", "H", "B", "C", "A", "K", "D")
As you can see, the order is printed alphabetically.
I have tried specifying as ... aes(x=factor(nystudie, level=c(...), but that does not work. What am I doing wrong? I followed this post
library(tidyverse)
colsze <- c("#E1B930", "#2C77BF", "#E38072", "#6DBCC3", "grey40", "black", "#8B3A62")
a %>%
as_tibble() %>%
mutate(nystudie=as.factor(study),
n.seven=as.factor(n)) %>%
bind_rows(., mutate(., nystudie="all")) %>%
count(nystudie, n.seven, .drop=F) %>%
ggplot(aes(x = factor(nystudie, level = c("all", "S", "H", "B", "C", "A", "K", "D")),
n, color = n.seven, fill= n.seven, label=n)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0.1))+
geom_text(aes(label=n),position = position_dodge2(0.9), vjust=-0.25, fontface=2, cex=4.5, show.legend = F) +
scale_fill_manual(values = alpha(colsze, .2),
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_color_manual(values = colsze,
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_x_discrete(name = "", label=c("All\n(n=1,905)",
"A\n(n=221)",
"B\n(n=234)",
"C\n(n=232)",
"D\n(n=108)",
"H\n(n=427)",
"K\n(n=221)",
"S\n(n=462)")) +
scale_y_continuous(name="",
breaks=seq(0,950,100)) +
coord_cartesian(ylim = c(0,950)) +
guides(fill = guide_legend(nrow = 1)) + theme(axis.text.x = element_text(color = "grey20", size =15),
legend.text=element_text(size=16), legend.title=element_text(size=16, face="bold"),
legend.position="top")
Data sample
a <- structure(list(study = structure(c(7L, 2L, 4L, 5L, 3L, 1L, 7L,
2L, 2L, 4L, 4L, 6L, 2L, 5L, 3L, 7L, 1L, 1L, 2L, 6L, 1L, 3L, 2L,
7L, 2L, 2L, 6L, 6L, 6L, 2L, 1L, 2L, 6L, 1L, 2L, 2L, 3L, 4L, 2L,
3L, 2L, 5L, 2L, 3L, 6L, 5L, 3L, 2L, 4L, 3L, 5L, 6L, 2L, 7L, 2L,
3L, 3L, 3L, 7L, 7L, 3L, 4L, 1L, 1L, 2L, 2L, 6L, 2L, 3L, 2L, 3L,
2L, 1L, 2L, 3L, 5L, 3L, 1L, 1L, 1L, 7L, 4L, 3L, 2L, 4L, 3L, 3L,
3L, 2L, 6L, 7L, 3L, 2L, 2L, 6L, 2L, 2L, 6L, 7L, 3L, 3L, 3L, 6L,
2L, 2L, 7L, 7L, 1L, 1L, 6L, 3L, 3L, 7L, 1L, 2L, 7L, 1L, 1L, 7L,
4L, 4L, 4L, 2L, 3L, 3L, 6L, 1L, 4L, 6L, 3L, 5L, 5L, 3L, 3L, 7L,
5L, 3L, 6L, 3L, 5L, 2L, 3L, 7L, 6L, 2L, 1L, 6L, 5L, 1L, 6L), .Label = c("A",
"S", "H", "D", "K", "C", "B"), class = "factor"), n = structure(c(1L,
1L, 2L, 4L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 4L, 2L, 1L, 2L,
3L, 2L, 2L, 4L, 4L, 4L, 2L, 4L, 1L, 2L, 4L, 1L, 1L, 4L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 4L, 1L, 1L, 4L, 2L, 1L, 1L, 4L, 1L, 1L, 2L,
1L, 5L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 2L, 1L,
4L, 1L, 1L, 1L, 1L, 6L, 1L, 2L, 5L, 4L, 2L, 6L, 1L, 4L, 2L, 4L,
2L, 1L, 1L, 4L, 1L, 2L, 1L, 1L, 4L, 4L, 4L, 1L, 4L, 2L, 1L, 1L,
4L, 2L, 1L, 2L, 1L, 5L, 5L, 1L, 4L, 1L, 2L, 2L, 4L, 1L, 1L, 1L,
2L, 4L, 4L, 1L, 5L, 2L, 1L, 5L, 2L, 4L, 1L, 1L, 1L, 4L, 4L, 1L,
1L, 4L, 4L, 4L, 1L, 4L, 4L, 1L, 4L, 5L, 4L, 5L, 1L, 5L, 1L, 1L,
4L, 2L, 1L, 2L, 4L), .Label = c("N0", "N1", "N2a", "N2b", "N2c",
"N3"), class = "factor")), row.names = c(NA, -150L), class = "data.frame")
The levels are being changed again at scale_x_discrete step. Try :
library(dplyr)
library(ggplot2)
a %>%
mutate(nystudie=as.factor(study),
n.seven=as.factor(n)) %>%
bind_rows(., mutate(., nystudie="all")) %>%
count(nystudie, n.seven, .drop=F) %>%
mutate(nystudie = factor(nystudie,
level = c("all", "S", "H", "B", "C", "A", "K", "D"),
labels = c("All\n(n=1,905)", "S\n(n=462)", "H\n(n=427)", "B\n(n=234)",
"C\n(n=232)", "A\n(n=221)", "K\n(n=221)", "D\n(n=108)"))) %>%
ggplot(aes(x = nystudie,
n, color = n.seven, fill= n.seven, label=n)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0.1))+
geom_text(aes(label=n),position = position_dodge2(0.9), vjust=-0.25, fontface=2, cex=4.5, show.legend = F) +
scale_fill_manual(values = alpha(colsze, .2),
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_color_manual(values = colsze,
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_x_discrete(name = "") +
scale_y_continuous(name="",
breaks=seq(0,950,100)) +
coord_cartesian(ylim = c(0,950)) +
guides(fill = guide_legend(nrow = 1)) +
theme(axis.text.x = element_text(color = "grey20", size =15),
legend.text=element_text(size=16),
legend.title=element_text(size=16, face="bold"),
legend.position="top")
I have a histogram illustrating a specific cell count on the x-axis (1 = 1 cell counted, 2 = 2 cells counted etc), and how many patients that cumulative have been diagnosed with this specific cell count on the y-axis.
However, the "base" of the histogram does not align with the specific value on the x-axis. Preferably, I want the x-axis-cell-count to be be centered in the base of the histogram.
I have attached a photo of how it looks ATM:
My data
p <– structure(list(WHO.Grade = c(1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), ki67pro = c(4L, 3L, 4L, 4L, 7L, 6L, 4L, 1L, 4L, 12L, 4L, 3L, 1L, 2L, 20L, 10L, 3L, 3L, 3L, 5L, 3L, 3L, 4L, 5L, 2L, 4L, 5L, 3L, 15L, 4L, 4L, 4L, 4L, 2L, 4L, 2L, 2L, 3L, 7L, 5L, 4L, 2L, 4L, 6L, 4L, 3L, 5L, 4L, 2L, 3L, 3L, 5L, 8L, 0L, 3L, 2L, 20L, 4L, 4L, 4L, 3L, 5L, 5L, 12L, 3L, 2L, 2L, 3L, 3L, NA, 4L, 3L, 3L, 12L, 4L, 1L, 3L, 8L, 7L, 4L, 5L, 3L, 3L, 3L, 3L, 1L, 4L, 5L, 2L, 3L, 3L, 5L, 7L, NA, 2L, 12L, 4L, 0L, 4L, 3L, 10L, 5L, 4L, 3L, 20L, 10L, 10L, 3L, 2L, 10L, 4L, 5L, 3L, 2L, 4L, 2L, 5L, 2L, 4L, 25L, 3L, 5L, 3L, 4L, 7L, 0L, 5L, 7L, 1L, 1L, 1L, 4L, 4L, 6L, 5L, 7L, 3L, 3L, NA, 3L, 4L, 3L, 5L, 10L, 1L, 2L, 3L, 2L, 4L, 5L, 4L, 3L, 4L, 3L, 3L, 7L, 3L, 4L, 3L, 4L, 5L, 6L, 3L, 2L, 3L, 4L, 5L, 3L, 4L, 2L, 4L, 5L, 5L, 12L, 12L, 7L)), .Names = c("WHO.Grade", "ki67pro"), class = "data.frame", row.names = c(NA, -176L))
p1 <- subset(p, p$WHO.Grade==1)
p2 <- subset(p, p$WHO.Grade==2)
p3 <- subset(p, p$WHO.Grade==3)
I have used the follow script:
q <- ggplot() + theme_grey() +
scale_x_continuous(name="The Ki-67/MIB-1 LI percetage", limits=c(-1, 26), seq(-1,26,by=1)) +
scale_y_continuous(name="Patients diagnosed", limits=c(0, 44), seq(0,44,by=2)) +
geom_histogram(aes(x=p1$ki67pro), colour="#222a37", fill="#222a37", bins=40, alpha=0.30) +
geom_histogram(aes(x=p2$ki67pro), colour="dodgerblue2", fill="dodgerblue2", bins=40, alpha=0.35) +
geom_histogram(aes(x=p3$ki67pro), colour="darkred", fill="darkred", bins=40, alpha=0.35) +
geom_density(aes(x=p$ki67pro, y=..count..), colour="orange", fill="white", alpha=0) +
geom_hline(yintercept=0, colour="white", size=0.9)
q
Thanks, C.
Going off of my comment, replacing geom_hist with geom_bar and removing the bin argument gets you:
I've got a list with over 50.000 lines of Tweets. Now I've already exported the hashtags from that list but now I'm stuck with several thousand lines of hashtags which look like this
hashtag1;hashtag2;hashtag3;hashtag4
Since I want to do a co-hashtag-analysis I'm looking for a way to connect these multiple hashtags with each other without having to manually transform these lines into undirected edges. Example:
hashtag1;hashtag2
hashtag1;hashtag3
hashtag1;hashtag4
hashtag2;hashtag3
hashtag2;hashtag4
hashtag3;hashtag4
So, do you have an idea on how to accomplish this task (e.g. via R)? I'm an R-noob and even less "well versed" with other languages but I'm eager to learn.
structure(list(V1 = structure(c(1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 7L, 8L, 8L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 13L,
13L, 13L, 13L, 14L, 14L), .Label = c("profitkapital", "resupply",
"robotik", "rudidutschke", "russland", "sanktionen", "sanktionieren",
"schiller", "siegertyp", "snowden", "sockeleinkommen", "solidarity",
"sozialismus", "sozialphilosoph"), class = "factor"), V2 = structure(c(4L,
3L, 2L, 7L, 7L, 7L, 7L, 17L, 6L, 8L, 9L, 10L, 10L, 11L, 12L,
13L, 18L, 18L, 1L, 15L, 15L, 14L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 4L, 4L, 4L, 4L, 16L, 16L), .Label = c("alltag",
"arbeit", "bbq", "bge", "blockupy", "deutschland", "digitalisierung",
"griechenland", "grundeinkommen", "hartziv", "kenfm", "kirche",
"kopf", "kraft", "marx", "negt", "piraten", "sanktion"), class = "factor"),
V3 = structure(c(1L, 3L, 2L, 4L, 4L, 4L, 4L, 4L, 5L, 4L,
4L, 4L, 13L, 10L, 13L, 4L, 14L, 14L, 7L, 6L, 6L, 15L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 1L, 1L, 1L, 1L, 12L, 12L, 11L, 11L,
11L, 11L, 9L, 9L), .Label = c("", "abitur", "bbqrub", "bge",
"brd", "brecht", "deutschen", "fsa", "grundeinkommen", "hartziv",
"linkezukunft", "ows", "vatikan", "widerspruch", "würde"
), class = "factor"), V4 = structure(c(1L, 3L, 6L, 1L, 1L,
1L, 1L, 1L, 8L, 1L, 2L, 1L, 9L, 5L, 9L, 10L, 4L, 4L, 7L,
3L, 3L, 11L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
12L, 12L, 1L, 1L, 1L, 1L, 3L, 3L), .Label = c("", "bank",
"bge", "eilantrag", "haarp", "job", "jobcentern", "merkel",
"pastor", "probleme", "super", "unibrennt"), class = "factor"),
V5 = structure(c(1L, 3L, 5L, 1L, 1L, 1L, 1L, 1L, 7L, 1L,
10L, 1L, 2L, 9L, 2L, 4L, 8L, 8L, 6L, 1L, 1L, 6L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("", "bge", "bgenation", "fliegen", "geld",
"hartziv", "hitler", "sg", "ttip", "vorbild"), class = "factor"),
V6 = structure(c(1L, 5L, 2L, 1L, 1L, 1L, 1L, 1L, 6L, 1L,
1L, 1L, 8L, 4L, 8L, 7L, 4L, 4L, 4L, 1L, 1L, 4L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "altersarmut", "antifa", "bge", "deeznuts",
"holocaust", "klatsch", "sex"), class = "factor"), V7 = structure(c(1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 6L, 1L, 1L, 1L, 1L, 3L, 1L, 1L,
4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "bge",
"cia", "hartz", "spanishrevolution", "wahre"), class = "factor"),
V8 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "cityoflondon", "grund", "peace"), class = "factor"),
V9 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "bge", "occupy", "rothschild"), class = "factor"),
V10 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "ard", "gezi"), class = "factor"), V11 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "refugeeswelcome",
"zdf"), class = "factor"), V12 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "nolegida",
"wdr"), class = "factor"), V13 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "nopegida",
"swr"), class = "factor"), V14 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "nocastor",
"zukunft"), class = "factor")), .Names = c("V1", "V2", "V3",
"V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12", "V13",
"V14"), class = "data.frame", row.names = c(NA, -41L))
you can try the package combinat with combn wich will generate the couple of permutations
library(combinat)
combn(c("hashtag1", "hashtag2", "hashtag3", "hashtag4"), 2)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] "hashtag1" "hashtag1" "hashtag1" "hashtag2" "hashtag2" "hashtag3"
[2,] "hashtag2" "hashtag3" "hashtag4" "hashtag3" "hashtag4" "hashtag4"
I have a data frame with x and y positions and two factor columns blocknr and cat:
dput(testData)
structure(list(xpos = c(2L, 8L, 5L, 8L, 1L, 4L, 5L, 1L, 8L, 4L,
3L, 2L, 6L, 5L, 1L, 7L, 3L, 4L, 3L, 7L, 1L, 6L, 7L, 7L, 2L, 5L,
3L, 4L, 6L, 7L, 1L, 5L, 1L, 6L, 4L, 5L, 3L, 6L, 4L, 8L, 1L, 3L,
4L, 6L, 7L, 3L, 2L, 6L, 4L, 2L, 1L, 7L, 4L, 8L, 2L, 3L, 2L, 5L,
8L, 2L, 8L, 3L, 3L, 5L, 6L, 7L, 1L, 5L, 6L, 4L, 2L, 6L, 7L, 1L,
5L, 7L, 2L), ypos = c(1L, 2L, 8L, 1L, 6L, 7L, 1L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 7L, 8L, 10L, 2L, 6L, 9L, 1L, 2L, 10L, 4L, 5L,
6L, 3L, 5L, 9L, 3L, 9L, 10L, 3L, 7L, 8L, 2L, 5L, 6L, 3L, 4L,
10L, 1L, 4L, 10L, 2L, 8L, 9L, 3L, 6L, 8L, 5L, 7L, 10L, 3L, 4L,
7L, 2L, 4L, 5L, 6L, 7L, 9L, 4L, 7L, 8L, 1L, 2L, 9L, 5L, 9L, 10L,
1L, 6L, 8L, 3L, 5L, 7L), blocknr = c(1L, 3L, 2L, 3L, 1L, 2L,
2L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 1L, 3L, 2L, 2L, 1L, 3L, 1L, 2L,
3L, 3L, 1L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 1L, 3L, 2L, 2L, 1L, 3L,
2L, 3L, 1L, 1L, 2L, 3L, 3L, 2L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 3L,
1L, 2L, 1L, 2L, 3L, 1L, 3L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 3L, 2L,
1L, 2L, 3L, 1L, 2L, 3L, 1L), cat = structure(c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("A", "B", "C"
), class = "factor")), .Names = c("xpos", "ypos", "blocknr",
"cat"), row.names = c(NA, -77L), class = "data.frame")
I've made the following ggplot code to make 2D overview:
ggplot(data=testData, aes(x=xpos,y=ypos))+
geom_tile(aes(fill=cat), colour = "white")+
scale_fill_manual(values = c('A' = '#F8766D','C' = '#8ABF54','B' = '#C1DDA5'))+
geom_text(aes(x=xpos,y=ypos,label=blocknr),size=3)+
coord_cartesian(ylim = c(0.5, ymax + 0.5)) +
coord_cartesian(xlim = c(0.5, xmax + 0.5)) +
scale_x_continuous(breaks=seq(1,xmax,1))+
scale_y_continuous(breaks=seq(1,ymax,1))+
#geom_polygon(aes(group=blocknr))+
theme(axis.line = element_line(colour = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
which produces the following result:
Now I would like to highlight each group of blocknrs by drawing a border around them as shown below:
I've played around with geom_polygon, geom_path, but I can't quite find a way to do this. Is there a general way to achieve this in ggplot without constructing an algorithm to compute where each line should be and add those lines as a geom_segment?
As far as I know, there is no way to do this with standard ggplot2 tile options. But it's not to much trouble to constuct them if you do it as segments. For example
ymax <- max(testData$ypos)
xmax <- max(testData$xpos)
m <- matrix(0, nrow=ymax, ncol=xmax)
m[as.matrix(testData[,2:1])] <- testData[,3]
Here we are basically taking all the row/col assignment data and creating a matrix that essentially looks like the plot but we will with the block numbers. Now, we will scan for the locations we need to add "wall" by looking for changes in the block numbers as we go across each row and column separately.
has.breaks<-function(x) ncol(x)==2 & nrow(x)>0
hw<-do.call(rbind.data.frame, Filter(has.breaks, Map(function(i,x)
cbind(y=i,x=which(diff(c(0,x,0))!=0)), 1:nrow(m), split(m, 1:nrow(m)))))
vw<-do.call(rbind.data.frame, Filter(has.breaks, Map(function(i,x)
cbind(x=i,y=which(diff(c(0,x,0))!=0)), 1:ncol(m), as.data.frame(m))))
And you can add calls to geom_segments to add the horizontal and vertical walls to the plot.
ggplot(data=testData, aes(x=xpos,y=ypos))+
geom_tile(aes(fill=cat), colour = "white")+
scale_fill_manual(values = c('A' = '#F8766D','C' = '#8ABF54','B' = '#C1DDA5'))+
geom_text(aes(x=xpos,y=ypos,label=blocknr),size=3)+
geom_segment(data=hw, aes(x=x-.5, xend=x-.5, y=y-.5, yend=y+.5))+
geom_segment(data=vw, aes(x=x-.5, xend=x+.5, y=y-.5, yend=y-.5))+
coord_cartesian(ylim = c(0.4, ymax + 0.6)) +
coord_cartesian(xlim = c(0.4, xmax + 0.6)) +
scale_x_continuous(breaks=seq(1,xmax,1))+
scale_y_continuous(breaks=seq(1,ymax,1))+
theme(axis.line = element_line(colour = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
which gives
The desplot package will do this for you (using lattice):
library(desplot)
desplot(cat ~ xpos*ypos, testData, out1=blocknr, text=blocknr, main="testData")