modifying y-axix for character values in ggplot2

modifying y-axix for character values in ggplot2 - r

structure(list(Team = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = "Union", class = "factor"), Date = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 4L, 3L, 3L, 4L, 3L, 3L, 5L, 3L, 3L, 6L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 6L, 3L, 3L, 3L, 3L, 3L, 3L, 6L, 6L, 6L,
6L, 3L, 7L, 8L, 9L, 10L, 10L), .Label = c("2012-01-06", "2012-02-06",
"2012-03-06", "2012-04-06", "2012-05-06", "2012-07-06", "2012-09-06",
"2012-10-06", "2012-11-06", "2012-12-06"), class = "factor"),
STime = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = "07:03", class = "factor"), ETime = structure(c(6L,
7L, 8L, 5L, 5L, 1L, 2L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 11L,
10L, 9L, 8L, 10L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 5L), .Label = c("01:13",
"03:13", "06:13", "09:13", "10:13", "11:13", "12:13", "13:13",
"15:13", "16:13", "18:13"), class = "factor")), .Names = c("Team",
"Date", "STime", "ETime"), class = "data.frame", row.names = c(NA,
-40L))
I amd doing this:
ggplot(df, aes(Date, ETime, group="Team")) + geom_point(size=0.3) + facet_wrap(~ Team)
I would like to have y-axis from 00:00 to 23:29 with 2 hours increments. I tried scale_y_continous, which is not working. Any suggestions?

I suggest changing your date and time columns into POSIXt formated data. Then changing the axis breaks and labeling becomes easier. Currently, your dates and times are stored as factors.
library(ggplot2)
# Change relevant columns from 'factor' to 'POSIXt'.
df$ETime = strptime(as.character(df$ETime), "%H:%M")
df$Date = strptime(as.character(df$Date), "%Y-%m-%d")
plot_1 = ggplot(df, aes(x=Date, y=ETime)) +
geom_point() +
labs(title="Plot 1")
# Manually set datetime limits and breaks.
y_limits = as.POSIXct(c(strptime("00:00", "%H:%M"), strptime("23:29", "%H:%M")))
y_breaks = seq(from=strptime("00:00", "%H:%M"),
to=strptime("23:29", "%H:%M"), by="2 hours")
y_labels = format(y_breaks, "%H:%M")
plot_2 = ggplot(df, aes(x=Date, y=ETime)) +
geom_point() +
scale_y_datetime(limits=y_limits, breaks=y_breaks, labels=y_labels) +
labs(title="Plot 2")
library(gridExtra)
png("plots.png", width=8, height=4, units="in", res=120)
grid.arrange(plot_1, plot_2, nrow=1)
dev.off()

Related

Comparing the mean, radius and concavity of benign and malignant stage cancer

I am trying to implement a series of box plots which demonstrate the area, radius and concavity of human cells. The variable I am trying to plot is 'characters' split into two subsets 'Malignant' or 'Benign'.
I keep receiving the following error messages:
> Error in FUN(X[[i]], ...) : object 'Class_mean' not found
> Error in FUN(X[[i]], ...) : object 'Class_radius' not found
Please see my code:
ggplot(wisconsin, aes(x= Class, y=Class_mean, fill="pink")) +
geom_boxplot(fill= "yellow")+
ggtitle("radius of benign and malignant stage")
ggplot(wisconsin, aes(x= Class, y=Class_radius))+
geom_boxplot()+
ggtitle("area of benign and malignant stage")
ggplot(wisconsin, aes(x= Class, y=concavity_mean))+
geom_boxplot()+
ggtitle("concavity of benign and malignant stage")
Any ideas on how I could figure out the radius, mean and concavity object to Y variable?
All suggestions welcome
Please see head of data:
structure(list(Cl.thickness = c(5L, 5L, 3L, 6L, 4L, 8L, 1L, 2L,
2L, 4L, 1L, 2L, 5L, 1L, 8L, 7L, 4L, 4L, 10L, 6L), Cell.size = c(1L,
4L, 1L, 8L, 1L, 10L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 7L, 4L,
1L, 1L, 7L, 1L), Cell.shape = c(1L, 4L, 1L, 8L, 1L, 10L, 1L,
2L, 1L, 1L, 1L, 1L, 3L, 1L, 5L, 6L, 1L, 1L, 7L, 1L), Marg.adhesion = c(1L,
5L, 1L, 1L, 3L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 10L, 4L,
1L, 1L, 6L, 1L), Epith.c.size = c(2L, 7L, 2L, 3L, 2L, 7L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 7L, 6L, 2L, 2L, 4L, 2L), Bare.nuclei = c(1L,
10L, 2L, 4L, 1L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 9L, 1L,
1L, 1L, 10L, 1L), Bl.cromatin = c(3L, 3L, 3L, 3L, 3L, 9L, 3L,
3L, 1L, 2L, 3L, 2L, 4L, 3L, 5L, 4L, 2L, 3L, 4L, 3L), Normal.nucleoli = c(1L,
2L, 1L, 7L, 1L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 5L, 3L, 1L,
1L, 1L, 1L), Mitoses = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L,
1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 2L, 1L), Class = c("benign",
"benign", "benign", "benign", "benign", "malignant", "benign",
"benign", "benign", "benign", "benign", "benign", "malignant",
"benign", "malignant", "malignant", "benign", "benign", "malignant",
"benign")), row.names = c(NA, 20L), class = "data.frame")
> dput(head(wisconsin, 20))
structure(list(Cl.thickness = c(5L, 5L, 3L, 6L, 4L, 8L, 1L, 2L,
2L, 4L, 1L, 2L, 5L, 1L, 8L, 7L, 4L, 4L, 10L, 6L), Cell.size = c(1L,
4L, 1L, 8L, 1L, 10L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 7L, 4L,
1L, 1L, 7L, 1L), Cell.shape = c(1L, 4L, 1L, 8L, 1L, 10L, 1L,
2L, 1L, 1L, 1L, 1L, 3L, 1L, 5L, 6L, 1L, 1L, 7L, 1L), Marg.adhesion = c(1L,
5L, 1L, 1L, 3L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 10L, 4L,
1L, 1L, 6L, 1L), Epith.c.size = c(2L, 7L, 2L, 3L, 2L, 7L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 7L, 6L, 2L, 2L, 4L, 2L), Bare.nuclei = c(1L,
10L, 2L, 4L, 1L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 9L, 1L,
1L, 1L, 10L, 1L), Bl.cromatin = c(3L, 3L, 3L, 3L, 3L, 9L, 3L,
3L, 1L, 2L, 3L, 2L, 4L, 3L, 5L, 4L, 2L, 3L, 4L, 3L), Normal.nucleoli = c(1L,
2L, 1L, 7L, 1L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 5L, 3L, 1L,
1L, 1L, 1L), Mitoses = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L,
1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 2L, 1L), Class = c("benign",
"benign", "benign", "benign", "benign", "malignant", "benign",
"benign", "benign", "benign", "benign", "benign", "malignant",
"benign", "malignant", "malignant", "benign", "benign", "malignant",
"benign")), row.names = c(NA, 20L), class = "data.frame")

If you want to plot each character per class, then the code below might solve the problem.
This type of problems generally has to do with reshaping the data. The format should be the long format and the data is in wide format. See this post on how to reshape the data from wide to long format. I will use package tidyr, function pivot_longer.
library(ggplot2)
wisconsin |>
tidyr::pivot_longer(-Class, names_to = "characters") |>
ggplot(aes(x = Class, y = value)) +
geom_boxplot(fill = "lightblue") +
facet_wrap(~ characters) +
theme_bw()
Created on 2022-10-19 with reprex v2.0.2

ggplot: why does order on x-axis not level instead of printing alphabetically?

I have this plot
With
> str(a)
'data.frame': 150 obs. of 2 variables:
$ study: Factor w/ 7 levels "A","S","H","D",..: 7 2 4 5 3 1 7 2 2 4 ...
$ n : Factor w/ 6 levels "N0","N1","N2a",..: 1 1 2 4 1 1 2 1 1 1 ...
I would like the x-axis to arrange by sample size, i.e. level = c("all", "S", "H", "B", "C", "A", "K", "D")
As you can see, the order is printed alphabetically.
I have tried specifying as ... aes(x=factor(nystudie, level=c(...), but that does not work. What am I doing wrong? I followed this post
library(tidyverse)
colsze <- c("#E1B930", "#2C77BF", "#E38072", "#6DBCC3", "grey40", "black", "#8B3A62")
a %>%
as_tibble() %>%
mutate(nystudie=as.factor(study),
n.seven=as.factor(n)) %>%
bind_rows(., mutate(., nystudie="all")) %>%
count(nystudie, n.seven, .drop=F) %>%
ggplot(aes(x = factor(nystudie, level = c("all", "S", "H", "B", "C", "A", "K", "D")),
n, color = n.seven, fill= n.seven, label=n)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0.1))+
geom_text(aes(label=n),position = position_dodge2(0.9), vjust=-0.25, fontface=2, cex=4.5, show.legend = F) +
scale_fill_manual(values = alpha(colsze, .2),
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_color_manual(values = colsze,
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_x_discrete(name = "", label=c("All\n(n=1,905)",
"A\n(n=221)",
"B\n(n=234)",
"C\n(n=232)",
"D\n(n=108)",
"H\n(n=427)",
"K\n(n=221)",
"S\n(n=462)")) +
scale_y_continuous(name="",
breaks=seq(0,950,100)) +
coord_cartesian(ylim = c(0,950)) +
guides(fill = guide_legend(nrow = 1)) + theme(axis.text.x = element_text(color = "grey20", size =15),
legend.text=element_text(size=16), legend.title=element_text(size=16, face="bold"),
legend.position="top")
Data sample
a <- structure(list(study = structure(c(7L, 2L, 4L, 5L, 3L, 1L, 7L,
2L, 2L, 4L, 4L, 6L, 2L, 5L, 3L, 7L, 1L, 1L, 2L, 6L, 1L, 3L, 2L,
7L, 2L, 2L, 6L, 6L, 6L, 2L, 1L, 2L, 6L, 1L, 2L, 2L, 3L, 4L, 2L,
3L, 2L, 5L, 2L, 3L, 6L, 5L, 3L, 2L, 4L, 3L, 5L, 6L, 2L, 7L, 2L,
3L, 3L, 3L, 7L, 7L, 3L, 4L, 1L, 1L, 2L, 2L, 6L, 2L, 3L, 2L, 3L,
2L, 1L, 2L, 3L, 5L, 3L, 1L, 1L, 1L, 7L, 4L, 3L, 2L, 4L, 3L, 3L,
3L, 2L, 6L, 7L, 3L, 2L, 2L, 6L, 2L, 2L, 6L, 7L, 3L, 3L, 3L, 6L,
2L, 2L, 7L, 7L, 1L, 1L, 6L, 3L, 3L, 7L, 1L, 2L, 7L, 1L, 1L, 7L,
4L, 4L, 4L, 2L, 3L, 3L, 6L, 1L, 4L, 6L, 3L, 5L, 5L, 3L, 3L, 7L,
5L, 3L, 6L, 3L, 5L, 2L, 3L, 7L, 6L, 2L, 1L, 6L, 5L, 1L, 6L), .Label = c("A",
"S", "H", "D", "K", "C", "B"), class = "factor"), n = structure(c(1L,
1L, 2L, 4L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 4L, 2L, 1L, 2L,
3L, 2L, 2L, 4L, 4L, 4L, 2L, 4L, 1L, 2L, 4L, 1L, 1L, 4L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 4L, 1L, 1L, 4L, 2L, 1L, 1L, 4L, 1L, 1L, 2L,
1L, 5L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 2L, 1L,
4L, 1L, 1L, 1L, 1L, 6L, 1L, 2L, 5L, 4L, 2L, 6L, 1L, 4L, 2L, 4L,
2L, 1L, 1L, 4L, 1L, 2L, 1L, 1L, 4L, 4L, 4L, 1L, 4L, 2L, 1L, 1L,
4L, 2L, 1L, 2L, 1L, 5L, 5L, 1L, 4L, 1L, 2L, 2L, 4L, 1L, 1L, 1L,
2L, 4L, 4L, 1L, 5L, 2L, 1L, 5L, 2L, 4L, 1L, 1L, 1L, 4L, 4L, 1L,
1L, 4L, 4L, 4L, 1L, 4L, 4L, 1L, 4L, 5L, 4L, 5L, 1L, 5L, 1L, 1L,
4L, 2L, 1L, 2L, 4L), .Label = c("N0", "N1", "N2a", "N2b", "N2c",
"N3"), class = "factor")), row.names = c(NA, -150L), class = "data.frame")

The levels are being changed again at scale_x_discrete step. Try :
library(dplyr)
library(ggplot2)
a %>%
mutate(nystudie=as.factor(study),
n.seven=as.factor(n)) %>%
bind_rows(., mutate(., nystudie="all")) %>%
count(nystudie, n.seven, .drop=F) %>%
mutate(nystudie = factor(nystudie,
level = c("all", "S", "H", "B", "C", "A", "K", "D"),
labels = c("All\n(n=1,905)", "S\n(n=462)", "H\n(n=427)", "B\n(n=234)",
"C\n(n=232)", "A\n(n=221)", "K\n(n=221)", "D\n(n=108)"))) %>%
ggplot(aes(x = nystudie,
n, color = n.seven, fill= n.seven, label=n)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0.1))+
geom_text(aes(label=n),position = position_dodge2(0.9), vjust=-0.25, fontface=2, cex=4.5, show.legend = F) +
scale_fill_manual(values = alpha(colsze, .2),
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_color_manual(values = colsze,
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_x_discrete(name = "") +
scale_y_continuous(name="",
breaks=seq(0,950,100)) +
coord_cartesian(ylim = c(0,950)) +
guides(fill = guide_legend(nrow = 1)) +
theme(axis.text.x = element_text(color = "grey20", size =15),
legend.text=element_text(size=16),
legend.title=element_text(size=16, face="bold"),
legend.position="top")

Histogram does not align/correspond to x-axis in ggplot/R

I have a histogram illustrating a specific cell count on the x-axis (1 = 1 cell counted, 2 = 2 cells counted etc), and how many patients that cumulative have been diagnosed with this specific cell count on the y-axis.
However, the "base" of the histogram does not align with the specific value on the x-axis. Preferably, I want the x-axis-cell-count to be be centered in the base of the histogram.
I have attached a photo of how it looks ATM:
My data
p <– structure(list(WHO.Grade = c(1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), ki67pro = c(4L, 3L, 4L, 4L, 7L, 6L, 4L, 1L, 4L, 12L, 4L, 3L, 1L, 2L, 20L, 10L, 3L, 3L, 3L, 5L, 3L, 3L, 4L, 5L, 2L, 4L, 5L, 3L, 15L, 4L, 4L, 4L, 4L, 2L, 4L, 2L, 2L, 3L, 7L, 5L, 4L, 2L, 4L, 6L, 4L, 3L, 5L, 4L, 2L, 3L, 3L, 5L, 8L, 0L, 3L, 2L, 20L, 4L, 4L, 4L, 3L, 5L, 5L, 12L, 3L, 2L, 2L, 3L, 3L, NA, 4L, 3L, 3L, 12L, 4L, 1L, 3L, 8L, 7L, 4L, 5L, 3L, 3L, 3L, 3L, 1L, 4L, 5L, 2L, 3L, 3L, 5L, 7L, NA, 2L, 12L, 4L, 0L, 4L, 3L, 10L, 5L, 4L, 3L, 20L, 10L, 10L, 3L, 2L, 10L, 4L, 5L, 3L, 2L, 4L, 2L, 5L, 2L, 4L, 25L, 3L, 5L, 3L, 4L, 7L, 0L, 5L, 7L, 1L, 1L, 1L, 4L, 4L, 6L, 5L, 7L, 3L, 3L, NA, 3L, 4L, 3L, 5L, 10L, 1L, 2L, 3L, 2L, 4L, 5L, 4L, 3L, 4L, 3L, 3L, 7L, 3L, 4L, 3L, 4L, 5L, 6L, 3L, 2L, 3L, 4L, 5L, 3L, 4L, 2L, 4L, 5L, 5L, 12L, 12L, 7L)), .Names = c("WHO.Grade", "ki67pro"), class = "data.frame", row.names = c(NA, -176L))
p1 <- subset(p, p$WHO.Grade==1)
p2 <- subset(p, p$WHO.Grade==2)
p3 <- subset(p, p$WHO.Grade==3)
I have used the follow script:
q <- ggplot() + theme_grey() +
scale_x_continuous(name="The Ki-67/MIB-1 LI percetage", limits=c(-1, 26), seq(-1,26,by=1)) +
scale_y_continuous(name="Patients diagnosed", limits=c(0, 44), seq(0,44,by=2)) +
geom_histogram(aes(x=p1$ki67pro), colour="#222a37", fill="#222a37", bins=40, alpha=0.30) +
geom_histogram(aes(x=p2$ki67pro), colour="dodgerblue2", fill="dodgerblue2", bins=40, alpha=0.35) +
geom_histogram(aes(x=p3$ki67pro), colour="darkred", fill="darkred", bins=40, alpha=0.35) +
geom_density(aes(x=p$ki67pro, y=..count..), colour="orange", fill="white", alpha=0) +
geom_hline(yintercept=0, colour="white", size=0.9)
q
Thanks, C.

Going off of my comment, replacing geom_hist with geom_bar and removing the bin argument gets you:

Connect multiple hashtags from one line with each other

I've got a list with over 50.000 lines of Tweets. Now I've already exported the hashtags from that list but now I'm stuck with several thousand lines of hashtags which look like this
hashtag1;hashtag2;hashtag3;hashtag4
Since I want to do a co-hashtag-analysis I'm looking for a way to connect these multiple hashtags with each other without having to manually transform these lines into undirected edges. Example:
hashtag1;hashtag2
hashtag1;hashtag3
hashtag1;hashtag4
hashtag2;hashtag3
hashtag2;hashtag4
hashtag3;hashtag4
So, do you have an idea on how to accomplish this task (e.g. via R)? I'm an R-noob and even less "well versed" with other languages but I'm eager to learn.
structure(list(V1 = structure(c(1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 7L, 8L, 8L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 13L,
13L, 13L, 13L, 14L, 14L), .Label = c("profitkapital", "resupply",
"robotik", "rudidutschke", "russland", "sanktionen", "sanktionieren",
"schiller", "siegertyp", "snowden", "sockeleinkommen", "solidarity",
"sozialismus", "sozialphilosoph"), class = "factor"), V2 = structure(c(4L,
3L, 2L, 7L, 7L, 7L, 7L, 17L, 6L, 8L, 9L, 10L, 10L, 11L, 12L,
13L, 18L, 18L, 1L, 15L, 15L, 14L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 4L, 4L, 4L, 4L, 16L, 16L), .Label = c("alltag",
"arbeit", "bbq", "bge", "blockupy", "deutschland", "digitalisierung",
"griechenland", "grundeinkommen", "hartziv", "kenfm", "kirche",
"kopf", "kraft", "marx", "negt", "piraten", "sanktion"), class = "factor"),
V3 = structure(c(1L, 3L, 2L, 4L, 4L, 4L, 4L, 4L, 5L, 4L,
4L, 4L, 13L, 10L, 13L, 4L, 14L, 14L, 7L, 6L, 6L, 15L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 1L, 1L, 1L, 1L, 12L, 12L, 11L, 11L,
11L, 11L, 9L, 9L), .Label = c("", "abitur", "bbqrub", "bge",
"brd", "brecht", "deutschen", "fsa", "grundeinkommen", "hartziv",
"linkezukunft", "ows", "vatikan", "widerspruch", "würde"
), class = "factor"), V4 = structure(c(1L, 3L, 6L, 1L, 1L,
1L, 1L, 1L, 8L, 1L, 2L, 1L, 9L, 5L, 9L, 10L, 4L, 4L, 7L,
3L, 3L, 11L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
12L, 12L, 1L, 1L, 1L, 1L, 3L, 3L), .Label = c("", "bank",
"bge", "eilantrag", "haarp", "job", "jobcentern", "merkel",
"pastor", "probleme", "super", "unibrennt"), class = "factor"),
V5 = structure(c(1L, 3L, 5L, 1L, 1L, 1L, 1L, 1L, 7L, 1L,
10L, 1L, 2L, 9L, 2L, 4L, 8L, 8L, 6L, 1L, 1L, 6L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("", "bge", "bgenation", "fliegen", "geld",
"hartziv", "hitler", "sg", "ttip", "vorbild"), class = "factor"),
V6 = structure(c(1L, 5L, 2L, 1L, 1L, 1L, 1L, 1L, 6L, 1L,
1L, 1L, 8L, 4L, 8L, 7L, 4L, 4L, 4L, 1L, 1L, 4L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "altersarmut", "antifa", "bge", "deeznuts",
"holocaust", "klatsch", "sex"), class = "factor"), V7 = structure(c(1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 6L, 1L, 1L, 1L, 1L, 3L, 1L, 1L,
4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "bge",
"cia", "hartz", "spanishrevolution", "wahre"), class = "factor"),
V8 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "cityoflondon", "grund", "peace"), class = "factor"),
V9 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "bge", "occupy", "rothschild"), class = "factor"),
V10 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "ard", "gezi"), class = "factor"), V11 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "refugeeswelcome",
"zdf"), class = "factor"), V12 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "nolegida",
"wdr"), class = "factor"), V13 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "nopegida",
"swr"), class = "factor"), V14 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "nocastor",
"zukunft"), class = "factor")), .Names = c("V1", "V2", "V3",
"V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12", "V13",
"V14"), class = "data.frame", row.names = c(NA, -41L))

you can try the package combinat with combn wich will generate the couple of permutations
library(combinat)
combn(c("hashtag1", "hashtag2", "hashtag3", "hashtag4"), 2)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] "hashtag1" "hashtag1" "hashtag1" "hashtag2" "hashtag2" "hashtag3"
[2,] "hashtag2" "hashtag3" "hashtag4" "hashtag3" "hashtag4" "hashtag4"

Overlay ggplot grouped tiles with polygon border depending on extra factor

I have a data frame with x and y positions and two factor columns blocknr and cat:
dput(testData)
structure(list(xpos = c(2L, 8L, 5L, 8L, 1L, 4L, 5L, 1L, 8L, 4L,
3L, 2L, 6L, 5L, 1L, 7L, 3L, 4L, 3L, 7L, 1L, 6L, 7L, 7L, 2L, 5L,
3L, 4L, 6L, 7L, 1L, 5L, 1L, 6L, 4L, 5L, 3L, 6L, 4L, 8L, 1L, 3L,
4L, 6L, 7L, 3L, 2L, 6L, 4L, 2L, 1L, 7L, 4L, 8L, 2L, 3L, 2L, 5L,
8L, 2L, 8L, 3L, 3L, 5L, 6L, 7L, 1L, 5L, 6L, 4L, 2L, 6L, 7L, 1L,
5L, 7L, 2L), ypos = c(1L, 2L, 8L, 1L, 6L, 7L, 1L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 7L, 8L, 10L, 2L, 6L, 9L, 1L, 2L, 10L, 4L, 5L,
6L, 3L, 5L, 9L, 3L, 9L, 10L, 3L, 7L, 8L, 2L, 5L, 6L, 3L, 4L,
10L, 1L, 4L, 10L, 2L, 8L, 9L, 3L, 6L, 8L, 5L, 7L, 10L, 3L, 4L,
7L, 2L, 4L, 5L, 6L, 7L, 9L, 4L, 7L, 8L, 1L, 2L, 9L, 5L, 9L, 10L,
1L, 6L, 8L, 3L, 5L, 7L), blocknr = c(1L, 3L, 2L, 3L, 1L, 2L,
2L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 1L, 3L, 2L, 2L, 1L, 3L, 1L, 2L,
3L, 3L, 1L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 1L, 3L, 2L, 2L, 1L, 3L,
2L, 3L, 1L, 1L, 2L, 3L, 3L, 2L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 3L,
1L, 2L, 1L, 2L, 3L, 1L, 3L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 3L, 2L,
1L, 2L, 3L, 1L, 2L, 3L, 1L), cat = structure(c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("A", "B", "C"
), class = "factor")), .Names = c("xpos", "ypos", "blocknr",
"cat"), row.names = c(NA, -77L), class = "data.frame")
I've made the following ggplot code to make 2D overview:
ggplot(data=testData, aes(x=xpos,y=ypos))+
geom_tile(aes(fill=cat), colour = "white")+
scale_fill_manual(values = c('A' = '#F8766D','C' = '#8ABF54','B' = '#C1DDA5'))+
geom_text(aes(x=xpos,y=ypos,label=blocknr),size=3)+
coord_cartesian(ylim = c(0.5, ymax + 0.5)) +
coord_cartesian(xlim = c(0.5, xmax + 0.5)) +
scale_x_continuous(breaks=seq(1,xmax,1))+
scale_y_continuous(breaks=seq(1,ymax,1))+
#geom_polygon(aes(group=blocknr))+
theme(axis.line = element_line(colour = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
which produces the following result:
Now I would like to highlight each group of blocknrs by drawing a border around them as shown below:
I've played around with geom_polygon, geom_path, but I can't quite find a way to do this. Is there a general way to achieve this in ggplot without constructing an algorithm to compute where each line should be and add those lines as a geom_segment?

As far as I know, there is no way to do this with standard ggplot2 tile options. But it's not to much trouble to constuct them if you do it as segments. For example
ymax <- max(testData$ypos)
xmax <- max(testData$xpos)
m <- matrix(0, nrow=ymax, ncol=xmax)
m[as.matrix(testData[,2:1])] <- testData[,3]
Here we are basically taking all the row/col assignment data and creating a matrix that essentially looks like the plot but we will with the block numbers. Now, we will scan for the locations we need to add "wall" by looking for changes in the block numbers as we go across each row and column separately.
has.breaks<-function(x) ncol(x)==2 & nrow(x)>0
hw<-do.call(rbind.data.frame, Filter(has.breaks, Map(function(i,x)
cbind(y=i,x=which(diff(c(0,x,0))!=0)), 1:nrow(m), split(m, 1:nrow(m)))))
vw<-do.call(rbind.data.frame, Filter(has.breaks, Map(function(i,x)
cbind(x=i,y=which(diff(c(0,x,0))!=0)), 1:ncol(m), as.data.frame(m))))
And you can add calls to geom_segments to add the horizontal and vertical walls to the plot.
ggplot(data=testData, aes(x=xpos,y=ypos))+
geom_tile(aes(fill=cat), colour = "white")+
scale_fill_manual(values = c('A' = '#F8766D','C' = '#8ABF54','B' = '#C1DDA5'))+
geom_text(aes(x=xpos,y=ypos,label=blocknr),size=3)+
geom_segment(data=hw, aes(x=x-.5, xend=x-.5, y=y-.5, yend=y+.5))+
geom_segment(data=vw, aes(x=x-.5, xend=x+.5, y=y-.5, yend=y-.5))+
coord_cartesian(ylim = c(0.4, ymax + 0.6)) +
coord_cartesian(xlim = c(0.4, xmax + 0.6)) +
scale_x_continuous(breaks=seq(1,xmax,1))+
scale_y_continuous(breaks=seq(1,ymax,1))+
theme(axis.line = element_line(colour = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
which gives

The desplot package will do this for you (using lattice):
library(desplot)
desplot(cat ~ xpos*ypos, testData, out1=blocknr, text=blocknr, main="testData")

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

modifying y-axix for character values in ggplot2 - r

Related

Comparing the mean, radius and concavity of benign and malignant stage cancer

ggplot: why does order on x-axis not level instead of printing alphabetically?

Histogram does not align/correspond to x-axis in ggplot/R

Connect multiple hashtags from one line with each other

Overlay ggplot grouped tiles with polygon border depending on extra factor

Categories

Resources