plotting mean of variable versus matrix of conditions in R using ggplot2 - r

I have a data.frame X with column X and a data.frame C with M binary values (0/1). Both data.frames have N rows (examples).
I would like to average X on each case 0/1 of each m out of M column of C.
When I plot this, I accept to get M*2 bars where x axis are the column names of each column in C and red/blue is for when catergory m (out of M) is either 0/1.
Can this be done using ggplot2?
Any other quick way to do that without for loops?
Result sketch:
*
* * *
* * * *
m1=0, m1=1, m2=0, m2=1 ,....
Thanks,
Hanan
data sample below:
aggregate(X, by = as.list(C), FUN=mean) will aggregate to any combination of C. This is not what I want. I want X aggregated for every value of each column of C INDEPENDENTLY .
X<-structure(list(V1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)),
.Names = "V1", class = "data.frame", row.names = c(NA, -100L))
C<-structure(list(V1 = c(1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L,
0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L,
1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L), V2 = c(1L, 0L, 1L, 0L,
1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L,
1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L,
0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L
), V3 = c(1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L,
1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L,
0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 1L)),
.Names = c("V1", "V2", "V3"), class = "data.frame", row.names = c(NA, -100L))

Here is a way to transform your data broken down by incremental steps
dd <- do.call(rbind,
Map(function(a,b) cbind(C=a, b), names(C),
lapply(
lapply(
lapply(C, table, X[[1]], dnn=c("CV","X")),
as.data.frame),
subset, X==1)
))
So here we use table() to get the counts of each X value for each C value. Then we turn that into a data.frame and take only the counts for X=1. Finally we add the correct name of the C column and merge all the data.frames into one large data.frame.
Then we can plot that with
ggplot(dd, aes(x=C, y=Freq, fill=CV)) +
geom_bar(position="dodge", stat="identity")
So the columns of C are listed along the x-axis and the values of C are represented by the color of the bar. The counts of X=1 in each of the groups are the heights of the bars.

Related

How to label entire groups in r Heatmap on the right side of the y-axis?

I am just getting started learning how to use RStudio and am trying to build a Heatmap. I understand the basics and how to set up the data in excel and convert it into a matrix. However, I would like to add labels on the side to show treatment groups rather than just having all the individual samples labeled.
Here is the code
vac_v1 <- read.csv('~/Desktop/Seaborn trials/Heatmap of oral treatment.csv')
rownames <- vac_v1[,1]
mat_vac <- data.matrix(vac_v1[,2:ncol(vac_v1)])
heatmap(mat_vac, Rowv = NA, Colv = NA, col = c("black", "green"), scale = 'none', cexRow = 0.6, cexCol = 0.8)
#From the advice of commenters I have included the output from dput(vac_v1)
dput(vac_v1)
structure(list(Group = c("O Oral", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "E Oral",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "Combo Oral",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "Control Oral", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""
), gene.1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), trait.1 = c(0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L,
0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L,
1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L,
1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L), trait.2 = c(0L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), trait.3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), trait.4 = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L), trait.5 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L,
1L), trait.6 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-162L))
I have also attached an image of the Heatmap.
What I would like to do is instead of having numbers on the right side of the y-axis, I would like to have my treatment groups labeled. The treatment groups are labeled in a separate column in the same dataset. multiple samples are from the same treatment group and I would like to label multiple samples with the same group label. Is there anyway to do this? Any help is appreciated enter image description here
In the past I have just added the labels using powerpoint, it was very clunky. I have attached an example below
A potential solution (using publicly available data):
#install.packages("tidyverse")
library(tidyverse)
# install DESeq if necessary
#if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#BiocManager::install("DESeq")
library(DESeq)
#install.packages("viridis")
library(viridis)
# load example dataset from the DESeq package
example_file <- system.file ("extra/TagSeqExample.tab", package="DESeq")
data <- read.delim(example_file, header=T, row.names="gene")
# subset the dataset to get a reasonable number of rows to plot (top 50 rows)
data_subset <- data %>% top_n(50)
# add a dataframe (tibble: https://tibble.tidyverse.org/) called "group" and fill it with labels and colors
group <- tibble("colours" = sample(size = 50, c("#31688EFF", "#35B779FF"), replace = TRUE),
"labels" = ifelse(colours == "#31688EFF", "group_1", "group_2"))
# create the heatmap
heatmap(as.matrix(data_subset), RowSideColors = group$colours,
labRow = group$labels)
EDIT
Thanks for editing your question to include the dput() output - here is another potential solution:
# Load the data using the dput() oputput
# data <- structure(...
# Fill in the blank cells in "Group"
while(length(ind <- which(data$Group == "")) > 0){
data$Group[ind] <- data$Group[ind -1]
}
# Specify colours for each group
rc <- ifelse(data$Group == "O Oral", "#440154FF",
ifelse(data$Group == "E Oral", "#31688EFF",
ifelse(data$Group == "Combo Oral",
"#35B779FF", "#FDE725FF")))
# Plot the heatmap
heatmap(as.matrix(data[-1]), labRow = data$Group,
RowSideColors = rc, Rowv = NA, revC = TRUE,
margins = c(6, 16), col = c("black", "green"))
# Add a legend for the colours
par(lend = 1)
legend("topright", lwd = 10,
legend = c("O Oral", "E Oral", "Combo Oral", "Control Oral"),
col = c("#440154FF", "#31688EFF", "#35B779FF", "#FDE725FF"))
Is this more along the lines of what you're trying to achieve?

SomersDelta: Error in as.table.default(x) : cannot coerce to a tabl in R

data example
sommer=structure(list(tub = c(1L, 2L, 0L, 2L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 2L, 1L, 0L, 0L, 1L, 0L, 2L, 1L, 1L, 0L, 0L, 1L, 0L, 0L,
2L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 3L, 0L, 1L, 1L, 1L, 1L,
0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 3L,
2L, 0L, 1L, 0L, 3L, 2L, 2L, 0L, 0L, 0L, 1L, 0L, 0L, 3L, 1L, 1L,
3L, 1L), fq = c(1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 1L, 3L, 6L, 1L, 0L, 0L, 1L, 0L, 2L, 4L, 2L, 0L, 0L, 2L, 0L,
0L, 5L, 0L, 1L, 2L, 0L, 0L, 1L, 0L, 0L, 0L, 14L, 0L, 1L, 1L,
1L, 4L, 0L, 1L, 0L, 4L, 0L, 0L, 0L, 6L, 1L, 0L, 0L, 1L, 1L, 0L,
1L, 3L, 4L, 0L, 1L, 0L, 8L, 1L, 2L, 0L, 0L, 0L, 2L, 0L, 0L, 3L,
2L, 2L, 1L, 1L)), .Names = c("tub", "fq"), class = "data.frame", row.names = c(NA,
-75L))
i want calculate SomersDelta
library("DescTools")
SomersDelta(sommer, direction = c("row", "column"))
and i get the error
Error in as.table.default(x) : cannot coerce to a table
also i want get spine plot
library("coin")
spineplot(sommer)
but i get the error
Error in margin.table(tab, 1) : 'x' is not an array
are there two errors interconnection?
How to calculate SomersDelta and get spine plot like this
spine plot
It requires a matrix as input. According to ?SomersDelta
x - a numeric vector or a table. A matrix will be treated as table.
Here, the dataset is a data.frame class. We can convert it to matrix (as.matrix) and it should work fine
SomersDelta(as.matrix(sommer), direction = c("row", "column"))
#[1] -0.06137931

Heatmap with multiple/stacked variables per individual

I am trying to create a heatmap. Each individual has three binary variables (ecz, whz, rhi) across 6 time points. The attached heatmap isn't at all informative as I want to see how the variables develop concurrently.
For each individual rows), I would like 3 rows per individual at each time point with 3 different colours to represent each symptom. The columns would represent each of the 6 time points.
I would be most grateful for any help!
Here is my data:
structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 0L,
1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L,
1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L,
1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L,
0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L,
1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L,
1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L,
0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L,
0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L,
0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L,
1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L,
1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L,
1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L,
1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L,
0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L,
1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L,
1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L,
0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L,
0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 0L, 1L), .Dim = c(50L, 19L), .Dimnames = list(NULL, c("idno",
"eczms1", "whzms1", "rhims1", "eczms3", "whzms3", "rhims3", "eczms5",
"whzms5", "rhims5", "eczms8", "whzms8", "rhims8", "eczms11",
"whzms11", "rhims11", "eczms16", "whzms16", "rhims16")))
Here is the code for the attached heatmap:
library(pheatmap)
am=as.matrix(am)
col = c("darkgreen","red")
breaks <- c(-1, 0, 1)
c1=pheatmap(am, show_rownames=FALSE, col=col, cluster_rows = FALSE,
cluster_cols=FALSE, legend =TRUE,
legend_breaks = 0:1, legend_labels = c("No","Yes"))
I have a solution using the tidyverse packages:
EDIT:
Reordered idno so that patients are sorted by appearance of ecz.
library(tidyverse)
input <- structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 0L,
1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L,
1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L,
1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L,
0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L,
1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L,
1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L,
0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L,
0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L,
0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L,
1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L,
1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L,
1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L,
1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L,
0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L,
1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L,
1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L,
0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L,
0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 0L, 1L), .Dim = c(50L, 19L), .Dimnames = list(NULL, c("idno",
"eczms1", "whzms1", "rhims1", "eczms3", "whzms3", "rhims3", "eczms5",
"whzms5", "rhims5", "eczms8", "whzms8", "rhims8", "eczms11",
"whzms11", "rhims11", "eczms16", "whzms16", "rhims16")))
tidy <- tibble::as.tibble(input) %>%
tidyr::gather(key = "key", value = "value", -idno) %>%
tidyr::separate(key, into = c("symptom", "time"), sep = "(?<=[A-Za-z])(?=[0-9])") %>%
dplyr::mutate(
time = as.factor(as.numeric(time)),
value = ifelse(value, symptom, NA),
symptom = factor(symptom, levels = c("eczms", "whzms", "rhims"), labels = c("ecz", "whz", "rhi")),
idno = factor(idno, levels = unique(idno[order(value)]))
)
ggplot(tidy, aes(x = time, y = symptom, fill = value)) +
geom_raster() +
facet_wrap(~idno) +
scale_fill_brewer(guide = FALSE, palette = "Set2")
#> Warning: Removed 474 rows containing missing values (geom_raster).
ggplot(tidy, aes(x = time, y = reorder(idno, dplyr::desc(idno)), fill = value)) +
geom_raster() +
facet_wrap(~symptom) +
scale_fill_brewer(guide = FALSE, palette = "Set2")
#> Warning: Removed 474 rows containing missing values (geom_raster).
Created on 2018-11-09 by the reprex package (v0.2.1)

Express relations between three variables using ggplot2 in R

I have a data frame like this
structure(list(cli_exp = c(1L, 1L, 2L, 1L, 1L, 0L, 2L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 2L, 2L, 0L, 1L, 0L,
1L, 1L, 2L, 0L, 1L), vcs_exp = c(0L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 2L, 1L,
1L, 0L, 0L, 0L, 2L, 1L, 0L), web_exp = c(2L, 2L, 2L, 1L, 0L,
0L, 1L, 2L, 0L, 0L, 3L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 2L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 2L, 0L, 0L)), .Names = c("cli_exp", "vcs_exp",
"web_exp"), row.names = c(NA, 30L), class = "data.frame")
I want to use ggplot2 to express the relation between these three variables and tried the simple point plot
ggplot(data = data) +
geom_point(mapping = aes(x = web_exp, y = vcs_exp, color = cli_exp))
But apparently, there are many overlapping data points, which are not suitable for point display. Are there any better ways?
I would use ggpairs from GGally package
tmp_df <- structure(list(cli_exp = c(1L, 1L, 2L, 1L, 1L, 0L, 2L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 2L, 2L, 0L, 1L, 0L,
1L, 1L, 2L, 0L, 1L), vcs_exp = c(0L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 2L, 1L,
1L, 0L, 0L, 0L, 2L, 1L, 0L), web_exp = c(2L, 2L, 2L, 1L, 0L,
0L, 1L, 2L, 0L, 0L, 3L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 2L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 2L, 0L, 0L)), .Names = c("cli_exp", "vcs_exp",
"web_exp"), row.names = c(NA, 30L), class = "data.frame")
library(GGally)
ggpairs(tmp_df,
upper = list(continuous = wrap("cor", size = 10)),
lower = list(continuous = "smooth"))
Edit: use pairs from base R
pairs(tmp_df)
Use pairs.panels from psych package
library(psych)
pairs.panels(tmp_df,
method = "pearson",
density = TRUE,
ellipses = TRUE
)
As you mentioned, the points overlap, so some points aren't visible when using geom_point.
ggplot(data = df, aes(x = web_exp, y = vcs_exp, color = cli_exp)) +
geom_point()
This can be solved by adding a small amount of jitter. Also, making the points slightly transparent will make any overlaps more clear.
ggplot(data = df, aes(x = web_exp, y = vcs_exp, color = cli_exp)) +
geom_jitter(width = 0.05, height = 0.05, alpha = 0.8)

subset a data frame by group [duplicate]

This question already has answers here:
Split a large dataframe into a list of data frames based on common value in column
(3 answers)
Closed 5 years ago.
I have a dataframe:
dput(test)
test <- structure(list(Blocking = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L), Treatment = structure(c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"),
ID69 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), ID70 = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), ID71 = c(0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L,
1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L), ID72 = c(0L,
0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L), ID73 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
ID74 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), ID75 = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), ID77 = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L)), .Names = c("Blocking",
"Treatment", "ID69", "ID70", "ID71", "ID72", "ID73", "ID74",
"ID75", "ID77"), class = "data.frame", row.names = c(NA, -36L
))
I would like to split it based on treatment into four data frames. I tried the following code:
treatments <- c("A", "B", "C", "D")
subset_list <- lapply(treatments, function(x, input_df = test){
subset(input_dt, treatment=treatments)
})
names(subset_list) <- treatments
I get a list but individual data frames are the full test df's and do not contain rows only corresponding to the treatment. This seems like a really easy question, but I am missing a crucial detail here and I am new to the apply family. Please let me know, thanks!
You can use split():
split(test, treatments)
res <- split(x, f) divides the data into the groups defined by f into a list.
unsplit(res, f) does the opposite.

Resources