Trouble making an object in phyloseq - r

I'm trying to make an object that I can use in the package phyloseq, but I can't seem to get it to work. Below is a small subset of my data. First is an otu table, the second has the taxa.
OTUs <- dput(OTU_table[1:5,])
structure(list(OTU_ID = c("OTU_1", "OTU_6", "OTU_16", "OTU_2",
"OTU_216"), V2T4r5Croot = c(3505L, 5L, 124L, 0L, 8L), V2T4r5Broot = c(18880L,
18390L, 1L, 10233L, 0L), R6T2r5Croot = c(82973L, 5195L, 444L,
93L, 7L), V2T2r2Broot = c(13747L, 79L, 1603L, 33L, 0L), R3T2r5Broot = c(11212L,
2L, 462L, 33977L, 0L), V2T2r2Croot = c(63779L, 354L, 5204L, 374L,
0L), R3T4r5Croot = c(60109L, 1518L, 4067L, 875L, 2L), R3T1r5Aroot = c(28412L,
3161L, 626L, 3465L, 131L), R3T4r2Croot = c(40569L, 110L, 575L,
8642L, 0L), V2T2r5Aroot = c(22800L, 2225L, 1334L, 12185L, 4L),
R6T2r5Broot = c(50017L, 5739L, 4199L, 0L, 0L), R6T1r1Broot = c(52756L,
0L, 35L, 490L, 0L), R6T1r2Croot = c(14828L, 10227L, 180L,
3973L, 10L), V2T1r6Aroot = c(40317L, 146L, 543L, 5975L, 36L
), R6T2r1Broot = c(13801L, 524L, 189L, 6121L, 0L), V2T4r1Broot = c(58001L,
21L, 247L, 2359L, 5L), R6T2r6Croot = c(79608L, 715L, 384L,
13121L, 0L), R3T2r1Aroot = c(7938L, 187L, 2305L, 212L, 0L
), R6T2r6Aroot = c(20243L, 1098L, 320L, 10632L, 9L), V2T1r5Broot = c(11102L,
156L, 200L, 8205L, 0L), V2T4r2Aroot = c(7641L, 393L, 53L,
366L, 27L), R6T4r5Croot = c(5L, 68L, 7192L, 4L, 0L), R6T4r5Broot = c(40122L,
92L, 29L, 64631L, 59L), R3T4r6Croot = c(49960L, 101L, 97L,
18846L, 0L), R6T2r2Aroot = c(81204L, 7801L, 1499L, 13245L,
6L), R3T4r5Aroot = c(108839L, 5072L, 1894L, 1957L, 55L),
R3T2r1Broot = c(624L, 0L, 62L, 3687L, 0L), R6T1r5Croot = c(67805L,
0L, 238L, 2L, 0L), V2T4r6Croot = c(43210L, 24L, 0L, 33L,
0L), R3T1r6Aroot = c(6419L, 274L, 1062L, 2411L, 79L), R3T2r2Croot = c(53908L,
34726L, 3497L, 82L, 144L), R3T4r2Aroot = c(10503L, 48L, 23L,
27764L, 0L), R3T2r5Aroot = c(2386L, 79L, 39L, 1805L, 0L),
V2T2r1Croot = c(20324L, 318L, 14L, 1192L, 0L), V2T1r5Aroot = c(3933L,
33L, 6L, 3785L, 0L), V2T1r1Broot = c(99803L, 7377L, 203L,
1098L, 75L), R3T4r6Aroot = c(16601L, 1113L, 217L, 309L, 33L
), R3T2r1Croot = c(135822L, 24308L, 4986L, 219L, 230L), V2T1r5Croot = c(12444L,
139L, 32L, 211L, 0L), R3T1r6Croot = c(3957L, 9L, 117L, 293426L,
0L), R6T1r2Broot = c(92870L, 43L, 625L, 616L, 0L), V2T2r2Aroot = c(26697L,
654L, 130L, 31056L, 0L), R3T2r6Broot = c(82471L, 22990L,
3253L, 955L, 4L), R6T1r1Aroot = c(11187L, 0L, 5L, 0L, 0L),
R6T1r6Broot = c(6016L, 72L, 386L, 3368L, 0L), R3T1r1Aroot = c(55133L,
5854L, 494L, 1694L, 45L), V2T1r2Aroot = c(9346L, 139L, 17L,
64L, 0L), R3T4r1Aroot = c(84510L, 4049L, 1441L, 1193L, 5L
), R6T2r5Aroot = c(38997L, 33L, 273L, 967L, 0L), R3T4r2Broot = c(54402L,
565L, 567L, 9L, 0L), R3T1r2Broot = c(42977L, 24L, 132L, 3L,
7L), R6T1r5Aroot = c(5433L, 39L, 16L, 2L, 0L), R3T1r1Croot = c(4356L,
0L, 0L, 24719L, 0L), R3T4r5Broot = c(39402L, 6424L, 151L,
0L, 0L), R6T1r2Aroot = c(67639L, 14L, 16L, 1L, 0L), R3T2r5Croot = c(12136L,
3420L, 193L, 98L, 0L), R3T1r5Croot = c(21358L, 2876L, 347L,
9850L, 0L), V2T1r6Broot = c(16975L, 2L, 273L, 1397L, 98L),
R6T1r1Croot = c(7403L, 18L, 36L, 2112L, 0L), R3T1r1Broot = c(18301L,
1122L, 276L, 6921L, 7L), V2T2r6Croot = c(59794L, 2560L, 92L,
12437L, 0L), R6T1r5Broot = c(58396L, 1284L, 119L, 21078L,
0L), V2T4r6Broot = c(10496L, 773L, 1603L, 1950L, 19L), V2T1r6Croot = c(34687L,
9560L, 38L, 19L, 1L), R3T4r1Broot = c(23020L, 78L, 276L,
160L, 66L), V2T4r1Aroot = c(32591L, 91L, 197L, 308L, 0L),
V2T4r6Aroot = c(35314L, 3L, 1550L, 1775L, 0L), V2T2r6Aroot = c(12424L,
884L, 149L, 481L, 0L), V2T2r1Aroot = c(6721L, 52L, 203L,
4286L, 0L), R3T2r2Broot = c(26251L, 447L, 326L, 4834L, 0L
), V2T4r2Broot = c(27830L, 2404L, 1131L, 98L, 18L), R6T1r6Aroot = c(11818L,
14L, 34L, 1L, 0L), V2T1r1Croot = c(7961L, 5L, 376L, 2802L,
0L), R6T2r2Croot = c(25329L, 15L, 63L, 76L, 0L), R6T2r2Broot = c(1002L,
0L, 153L, 26L, 0L), R6T2r1Aroot = c(38869L, 11953L, 1987L,
2639L, 0L), V2T4r5Aroot = c(1838L, 18L, 1L, 47981L, 5L),
R6T4r1Aroot = c(3323L, 16L, 3L, 7212L, 0L), V2T2r5Croot = c(22124L,
1037L, 395L, 1515L, 296L), R3T4r6Broot = c(4112L, 0L, 0L,
1L, 0L), R3T1r5Broot = c(4443L, 120L, 528L, 1176L, 0L), V2T2r6Broot = c(2068L,
55L, 11L, 7180L, 0L), R3T2r2Aroot = c(4962L, 277L, 35L, 1L,
7L), V2T1r1Aroot = c(18506L, 0L, 44L, 93L, 0L), R3T1r2Aroot = c(19779L,
2L, 162L, 51355L, 8L), R6T2r1Croot = c(913L, 4L, 26L, 10L,
0L), V2T2r5Broot = c(7309L, 69L, 63L, 38L, 15L), V2T4r1Croot = c(8043L,
231L, 1351L, 787L, 18L), R3T1r6Broot = c(1973L, 1L, 0L, 40482L,
0L), R3T4r1Croot = c(4004L, 326L, 12L, 2020L, 11L), V2T1r2Croot = c(2712L,
21L, 43L, 2127L, 0L), V2T4r2Croot = c(3711L, 118L, 0L, 1487L,
50L), R3T2r6Croot = c(1491L, 290L, 55L, 33L, 0L), R6T1r6Croot = c(8541L,
0L, 0L, 0L, 0L), R6T2r6Broot = c(2561L, 6L, 2L, 387L, 0L),
V2T2r1Broot = c(2128L, 315L, 180L, 1483L, 0L), V2T1r2Broot = c(2363L,
78L, 260L, 2182L, 0L), R3T2r6Aroot = c(486L, 0L, 191L, 1209L,
0L), R3T1r2Croot = c(6014L, 18L, 126L, 587L, 0L), NC1root = c(5L,
0L, 0L, 0L, 0L), R6T4r6Aroot = c(246L, 3L, 7L, 83L, 0L)), row.names = c(NA,
5L), class = "data.frame")
and
taxa <- dput(taxa[c(1,2,6,16,216),])
structure(list(OTU_ID = c("OTU_1", "OTU_2", "OTU_6", "OTU_16",
"OTU_216"), Kingdom = c("Fungi_1", "Fungi_1", "Fungi_1", "Fungi_1",
"Fungi_1"), Phylum = c("Ascomycota_1", "Ascomycota_1", "", "Ascomycota_1",
"Basidiomycota_1"), Class = c("Sordariomycetes_1", "Dothideomycetes_1",
"", "Dothideomycetes_1", "Tremellomycetes_1"), Order = c("Hypocreales_1",
"Pleosporales_1", "", "Pleosporales_1", "Tremellales_1"), Family = c("Nectriaceae_1",
"Corynesporascaceae_1", "", "Pleosporaceae_1", "Trimorphomycetaceae"
), Genus = c("Fusarium_1", "Corynespora_1", "", "Alternaria_1",
"Saitozyma"), Species = c("", "Corynespora cassiicola ", "",
"", "")), row.names = c(1L, 2L, 6L, 16L, 216L), class = "data.frame")
Here's some code I've tried:
OTUs1 <- as.matrix(OTUs)
taxa1 <- as.matrix(taxa)
OTUs2 = otu_table(OTUs1, taxa_are_rows = TRUE)
physeq <- phyloseq(OTUs2, taxa1)
I get this error when I run the code to try and make the object:
Error in phyloseq(OTUs2, taxa1) :
Problem with OTU/taxa indices among those you provided.
Check using intersect() and taxa_names()

I assume that you are getting this error because the row.names between both matrices do not match. The following steps should resolve your error:
require("dplyr")
# Add OTU_IDs as row.names
row.names(taxa) <- taxa[,'OTU_ID']
row.names(OTUs) <- OTUs[,'OTU_ID']
# Remove the OTU_ID column
taxa <- taxa[,colnames(taxa) != 'OTU_ID']
OTUs <- OTUs[,colnames(OTUs) != 'OTU_ID']
# Make sure that OTU_IDs in taxa and OTUs overlap and are in the same order
ids <- dplyr::intersect(row.names(taxa), row.names(OTUs))
taxa <- taxa[ids,]
OTUs <- OTUs[ids,]
# Convert to phyloseq object
taxa <- tax_table(as.matrix(taxa))
OTUs <- otu_table(as.matrix(OTUs), taxa_are_rows = TRUE)
physeq <- phyloseq(taxa, OTUs)

Related

How to use Predict in rms package for a multiple values?

I used cox model for Predict function rms package for individual values they are returning the correct result where as when I give multiple values it gives me weired results?
library(data.table)
library(survival)
library(survminer)
library(rms)
dput(df)
structure(list(ID = c(1001L, 1002L, 1003L, 1004L, 1006L, 1014L,
1015L, 1016L, 1018L, 1022L, 1024L, 1032L, 1040L, 1042L, 1049L,
1056L, 1059L, 1060L, 1066L, 1084L, 1087L, 1090L, 1093L, 1096L,
1097L, 1098L, 1099L, 1200L, 1205L, 1216L, 1221L, 1222L, 1225L,
1226L, 1233L, 1239L), Time = c(9L, 8L, 69L, 104L, 104L, 100L,
24L, 85L, 100L, 99L, 67L, 58L, 7L, 94L, 93L, 90L, 91L, 90L, 89L,
72L, 84L, 84L, 11L, 82L, 39L, 46L, 82L, 82L, 9L, 34L, 75L, 76L,
52L, 20L, 29L, 70L), Event = c(1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L), Risk1 = c(0.1,
0.03, 0.02, 0.05, 0.01, 0.04, 0.03, 0.06, 0.02, 0.03, 0, 0, 0.11,
0.01, 0.03, 0, 0.01, 0.01, 0.01, 0, 0, 0, 0.05, 0.01, 0, 0, 0,
0, 0.04, 0, 0.07, 0.01, 0.01, 0, 0, 0), Risk2 = c(88L, 49L, 60L,
46L, 50L, 60L, 38L, 74L, 39L, 65L, 80L, 35L, 54L, 40L, 54L, 55L,
60L, 38L, 64L, 74L, 71L, 57L, 55L, 49L, 42L, 30L, 63L, 46L, 47L,
58L, 34L, 72L, 50L, 60L, 73L, 51L), Risk3 = c(2L, 2L, 2L, 3L,
3L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 3L, 1L, 3L, NA, 2L, 3L, 2L, 2L,
2L, NA, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L
)), class = "data.frame", row.names = c(NA, -36L)
followed by
ddist <- datadist(df)
options(datadist='ddist')
cox_model <-cph(Surv(Time,Event==1) ~ Risk1 + Risk2 + Risk3, x = T, y = T, data = df, surv = T)
Predict(cox_model, Risk1=3.2, Risk2=1, Risk3=0.5)
but when I give multiple values as follows:
Predict(cox_model,
Risk1=c(5,3,2,1.5,1.5,2,3,2.5,4,2,5.5,6,3,3.5,4,5,4.5,3,2,6,3,5,4,1.8,3,3.5,1.5,2.5,3.5,5,6,4,1.5,5,4,2.5),
Risk2=c(1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,1,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,0),
Risk3=c(0,0.07,0,0.03,0.01,0.02,0.01,0,0.05,0,0.04,0.03,0.01,0.01,0.01,0,0.11,0.03,0,0.05,0,0,0.02,0.04,0.01,0,0,0.01,0.03,0,0.01,0,0.06,0,0,0.1))
It gives me a ouput with 46566 rows where as I have only 36 rows to predict

magrittr pipe to righthand side in braces gives error

I am using the magrittr pipe %>% to pipe an object into multiple arguments in the righthand side function, using curly braces to force :
data_bec <- dds_tmp %>%
{sva::ComBat_seq(
counts = . %>% SummarizedExperiment::assay(),
batch = . %>% magrittr::extract2("batch") %>% droplevels(),
covar_mod = cbind(
time = . %>% magrittr::extract2("time") %>% droplevels(),
treatment = . %>% magrittr::extract2("treatment") %>% droplevels()
)
)}
Which gives this error:
Error in unique.default(x, nmax = nmax) :
unique() applies only to vectors
However, the code below works:
data_bec <- dds_tmp %>%
{sva::ComBat_seq(
counts = SummarizedExperiment::assay(.),
batch = magrittr::extract2(., "batch") %>% droplevels(),
covar_mod = cbind(
time = magrittr::extract2(., "time") %>% droplevels(),
treatment = magrittr::extract2(., "treatment") %>% droplevels()
)
)}
I can't wrap my head around why magrittr::extract2(., "batch") %>% droplevels() works inside a function inside braces, but . %>% magrittr::extract2("batch") %>% droplevels() doesn't work. Is there an easy way to understand this behavior?
NB: My dds_tmp object is large and produced by DESeq2::sample_table. Here is a small example data set:
dds_tmp <- new(
"DESeqDataSet",
design = ~ 1,
dispersionFunction = function ()
NULL,
rowRanges = new(
"CompressedGRangesList",
unlistData = new(
"GRanges",
seqnames = new(
"Rle",
values = structure(integer(0), .Label = character(0), class = "factor"),
lengths = integer(0),
elementMetadata = NULL,
metadata = list()
),
ranges = new(
"IRanges",
start = integer(0),
width = integer(0),
NAMES = NULL,
elementType = "ANY",
elementMetadata = NULL,
metadata = list()
),
strand = new(
"Rle",
values = structure(integer(0), .Label = c("+",
"-", "*"), class = "factor"),
lengths = integer(0),
elementMetadata = NULL,
metadata = list()
),
seqinfo = new(
"Seqinfo",
seqnames = character(0),
seqlengths = integer(0),
is_circular = logical(0),
genome = character(0)
),
elementMetadata = new(
"DFrame",
rownames = NULL,
nrows = 0L,
listData = structure(list(), .Names = character(0)),
elementType = "ANY",
elementMetadata = NULL,
metadata = list()
),
elementType = "ANY",
metadata = list()
),
elementMetadata = new(
"DFrame",
rownames = NULL,
nrows = 50L,
listData = structure(list(), .Names = character(0)),
elementType = "ANY",
elementMetadata = new(
"DFrame",
rownames = NULL,
nrows = 0L,
listData = list(type = character(0), description = character(0)),
elementType = "ANY",
elementMetadata = NULL,
metadata = list()
),
metadata = list()
),
elementType = "GRanges",
metadata = list(),
partitioning = new(
"PartitioningByEnd",
end = c(
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
),
NAMES = c(
"0610005C13Rik", "0610007C21Rik", "0610007L01Rik", "0610007N19Rik",
"0610007P08Rik", "0610007P14Rik", "0610007P22Rik", "0610008F07Rik",
"0610009B14Rik", "0610009B22Rik", "0610009D07Rik", "0610009L18Rik",
"0610009O20Rik", "0610010B08Rik", "0610010F05Rik", "0610010K14Rik",
"0610010O12Rik", "0610011F06Rik", "0610011L14Rik", "0610012G03Rik",
"0610012H03Rik", "0610030E20Rik", "0610031J06Rik", "0610031O16Rik",
"0610037L13Rik", "0610037P05Rik", "0610038B21Rik", "0610038L08Rik",
"0610039K10Rik", "0610040B10Rik", "0610040F04Rik", "0610040J01Rik",
"0610043K17Rik", "0910001L09Rik", "100043387", "1100001G20Rik",
"1110001A16Rik", "1110001J03Rik", "1110002B05Rik", "1110002L01Rik",
"1110002N22Rik", "1110003E01Rik", "1110004E09Rik", "1110004F10Rik",
"1110005A03Rik", "1110006O24Rik", "1110007C09Rik", "1110008F13Rik",
"1110008J03Rik", "1110008L16Rik"
),
elementType = "ANY",
elementMetadata = NULL,
metadata = list()
)
),
colData = new(
"DFrame",
rownames = c(
"IM_MR30_S30_L001", "IM_MR31_S31_L001", "IM_MR32_S32_L001",
"IM_MR33_S33_L001", "IM_MR34_S34_L001", "IM_MR35_S35_L001",
"IM_MR36_S36_L001", "IM_AR_30", "IM_AR_31", "IM_AR_32", "IM_AR_33"
),
nrows = 11L,
listData = list(
batch = structure(
c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L),
.Label = c(
"190701_K00242_0579_AH7GFLBBXY",
"180810_K00242_0453_AHVJTJBBXX-IM-AR-RS21+180814_K00242_0456_AHW5LYBBXX-IM-AR-RS21+180817_K00242_0458_AHW57TBBXX-IM-AR-RS21",
"180406_K00242_0385_AHTCCHBBXX-IM-AR-RS8+180412_K00242_0388_AHT7NYBBXX-IM-AR-RS8",
"180814_K00242_0456_AHW5LYBBXX-IM-AR-RS21+180814_K00242_0456_AHW5LYBBXX-IM-AR-RS21+180817_K00242_0458_AHW57TBBXX-IM-AR-RS21",
"190322_K00242_0534_AHYLVWBBXX_IM-AM-RS8-dT"
),
class = "factor"
),
time_label = c( "D8", "D10", "D10", "D10", "D15", "D15", "D15", "D6",
"D6", "D6", "D6"
),
time_value = c(8L, 10L,
10L, 10L, 15L, 15L, 15L, 6L, 6L, 6L, 6L),
user = c("MR",
"MR", "MR", "MR", "MR", "MR", "MR", "AR", "AR", "AR",
"AR"),
treatment = structure(
c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 1L),
.Label = c("ctrl", "OE"),
class = "factor"
),
group = structure(
c(10L, 11L, 11L, 11L, 12L, 12L, 12L,
13L, 13L, 13L, 14L),
.Label = c(
"M_2i_ctrl", "M_D0_ctrl", "M_D1_ctrl", "M_D2_ctrl", "M_D3_ctrl",
"M_D4_ctrl", "M_D5_ctrl", "M_D6_ctrl", "M_D7_ctrl", "M_D8_ctrl",
"M_D10_ctrl", "M_D15_ctrl", "A_D6_OE", "A_D6_ctrl", "A_D7_ctrl",
"A_D7_OE", "A_D8_ctrl", "A_D8_OE", "A_D11_ctrl", "A_D11_OE"
),
class = "factor"
),
group_pca_label = c(
NA, "M_D10_ctrl", NA, NA, "M_D15_ctrl", NA, NA, "A_D6_OE", NA, NA,
"A_D6_ctrl"
),
condition = structure(
c(10L,
11L, 11L, 11L, 13L, 13L, 13L, 14L, 14L, 14L, 8L),
.Label = c(
"2i_ctrl", "D0_ctrl", "D1_ctrl", "D2_ctrl", "D3_ctrl", "D4_ctrl",
"D5_ctrl", "D6_ctrl", "D7_ctrl", "D8_ctrl", "D10_ctrl", "D11_ctrl",
"D15_ctrl", "D6_OE", "D7_OE", "D8_OE", "D11_OE"
),
class = "factor"
),
time = structure(
c(10L, 11L, 11L,
11L, 13L, 13L, 13L, 8L, 8L, 8L, 8L),
.Label = c(
"2i", "D0", "D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D10",
"D11", "D15"
),
class = "factor"
)
),
elementType = "ANY",
elementMetadata = new(
"DFrame",
rownames = NULL,
nrows = 9L,
listData = list(
type = c(
"input", "input", "input", "input", "input", "input", "input",
"input", "input"
),
description = c("",
"", "", "", "", "", "", "", "")
),
elementType = "ANY",
elementMetadata = NULL,
metadata = list()
),
metadata = list()
),
assays = new(
"SimpleAssays",
data = new(
"SimpleList",
listData = list(counts = structure(
c(
0L, 221L, 582L, 60L, 392L, 521L, 88L, 0L, 0L, 99L, 405L, 3L, 345L,
0L, 221L, 151L, 4L, 63L, 227L, 117L, 11L, 376L, 406L, 0L, 143L,
255L, 5L, 0L, 0L, 7L, 5L, 1L, 0L, 182L, 8L, 0L, 87L, 20L, 381L, 71L,
58L, 331L, 56L, 589L, 36L, 2L, 73L, 254L, 55L, 95L, 0L, 381L, 691L,
80L, 353L, 397L, 126L, 0L, 0L, 151L, 434L, 6L, 374L, 0L, 242L, 134L,
43L, 113L, 232L, 130L, 23L, 325L, 458L, 0L, 165L, 358L, 4L, 0L, 0L,
10L, 0L, 0L, 4L, 225L, 7L, 0L, 100L, 52L, 486L, 115L, 73L, 512L,
86L, 639L, 50L, 3L, 108L, 273L, 64L, 105L, 0L, 261L, 588L, 65L,
334L, 367L, 88L, 0L, 0L, 102L, 389L, 8L, 333L, 0L, 216L, 158L, 38L,
87L, 254L, 87L, 24L, 302L, 434L, 0L, 159L, 292L, 4L, 0L, 2L, 7L, 1L,
1L, 0L, 197L, 12L, 0L, 86L, 39L, 482L, 99L, 58L, 408L, 67L, 657L,
43L, 1L, 73L, 242L, 63L, 68L, 0L, 271L, 579L, 88L, 298L, 383L, 80L,
0L, 0L, 95L, 358L, 3L, 271L, 0L, 195L, 143L, 23L, 64L, 200L, 82L,
27L, 334L, 382L, 0L, 170L, 296L, 4L, 0L, 0L, 4L, 0L, 1L, 1L, 171L,
14L, 0L, 122L, 26L, 365L, 86L, 62L, 343L, 67L, 589L, 35L, 1L, 59L,
251L, 39L, 71L, 0L, 332L, 566L, 112L, 330L, 282L, 82L, 0L, 0L, 107L,
318L, 1L, 349L, 0L, 218L, 119L, 122L, 68L, 143L, 111L, 26L, 413L,
540L, 0L, 124L, 315L, 0L, 0L, 1L, 6L, 0L, 1L, 3L, 235L, 13L, 0L,
88L, 66L, 508L, 99L, 71L, 510L, 73L, 532L, 46L, 1L, 145L, 342L, 49L,
60L, 0L, 297L, 598L, 100L, 283L, 278L, 58L, 0L, 0L, 115L, 306L, 4L,
368L, 0L, 208L, 109L, 105L, 81L, 190L, 113L, 13L, 406L, 563L, 0L,
115L, 280L, 5L, 0L, 0L, 4L, 0L, 1L, 2L, 238L, 22L, 0L, 78L, 75L,
532L, 54L, 53L, 484L, 79L, 510L, 35L, 1L, 154L, 361L, 46L, 45L, 0L,
413L, 724L, 102L, 352L, 325L, 77L, 0L, 0L, 87L, 364L, 8L, 398L, 0L,
201L, 130L, 94L, 63L, 227L, 116L, 16L, 398L, 594L, 0L, 127L, 312L,
4L, 0L, 0L, 8L, 0L, 0L, 0L, 285L, 12L, 0L, 99L, 61L, 638L, 56L, 44L,
562L, 80L, 604L, 32L, 3L, 178L, 394L, 51L, 81L, 0L, 342L, 1147L,
136L, 887L, 568L, 197L, 0L, 0L, 192L, 644L, 13L, 653L, 0L, 601L,
286L, 36L, 98L, 478L, 190L, 8L, 706L, 694L, 0L, 229L, 306L, 11L, 0L,
1L, 9L, 0L, 6L, 10L, 221L, 39L, 0L, 132L, 109L, 1132L, 350L, 133L,
567L, 115L, 931L, 61L, 5L, 70L, 561L, 130L, 133L, 0L, 337L, 1162L,
135L, 927L, 578L, 182L, 0L, 0L, 242L, 665L, 21L, 702L, 0L, 674L,
307L, 35L, 122L, 475L, 176L, 8L, 653L, 714L, 0L, 251L, 312L, 11L,
0L, 3L, 15L, 0L, 9L, 10L, 209L, 31L, 0L, 168L, 111L, 1231L, 383L,
150L, 552L, 129L, 980L, 72L, 4L, 52L, 686L, 142L, 130L, 1L, 284L,
1159L, 145L, 861L, 529L, 168L, 0L, 0L, 202L, 688L, 3L, 670L, 0L,
654L, 298L, 27L, 122L, 465L, 197L, 15L, 707L, 657L, 0L, 223L, 284L,
6L, 0L, 1L, 13L, 0L, 3L, 10L, 199L, 31L, 0L, 125L, 88L, 1142L, 397L,
121L, 593L, 128L, 888L, 60L, 7L, 54L, 595L, 117L, 150L, 0L, 224L,
1228L, 68L, 894L, 674L, 213L, 0L, 0L, 197L, 798L, 12L, 758L, 0L,
446L, 285L, 21L, 75L, 458L, 185L, 5L, 738L, 456L, 0L, 261L, 339L,
7L, 0L, 0L, 9L, 0L, 0L, 1L, 180L, 32L, 0L, 110L, 98L, 919L, 249L,
134L, 387L, 188L, 1055L, 64L, 3L, 52L, 630L, 110L, 175L
),
.Dim = c(50L, 11L),
.Dimnames = list(
c(
"0610005C13Rik", "0610007C21Rik", "0610007L01Rik",
"0610007N19Rik", "0610007P08Rik", "0610007P14Rik",
"0610007P22Rik", "0610008F07Rik", "0610009B14Rik",
"0610009B22Rik", "0610009D07Rik", "0610009L18Rik",
"0610009O20Rik", "0610010B08Rik", "0610010F05Rik",
"0610010K14Rik", "0610010O12Rik", "0610011F06Rik",
"0610011L14Rik", "0610012G03Rik", "0610012H03Rik",
"0610030E20Rik", "0610031J06Rik", "0610031O16Rik",
"0610037L13Rik", "0610037P05Rik", "0610038B21Rik",
"0610038L08Rik", "0610039K10Rik", "0610040B10Rik",
"0610040F04Rik", "0610040J01Rik", "0610043K17Rik",
"0910001L09Rik", "100043387", "1100001G20Rik", "1110001A16Rik",
"1110001J03Rik", "1110002B05Rik", "1110002L01Rik",
"1110002N22Rik", "1110003E01Rik", "1110004E09Rik",
"1110004F10Rik", "1110005A03Rik", "1110006O24Rik",
"1110007C09Rik", "1110008F13Rik", "1110008J03Rik", "1110008L16Rik"
),
c(
"IM_MR30_S30_L001", "IM_MR31_S31_L001", "IM_MR32_S32_L001",
"IM_MR33_S33_L001", "IM_MR34_S34_L001", "IM_MR35_S35_L001",
"IM_MR36_S36_L001", "IM_AR_30", "IM_AR_31", "IM_AR_32", "IM_AR_33"
)
)
)),
elementType = "ANY",
elementMetadata = NULL,
metadata = list()
)
),
NAMES = NULL,
elementMetadata = new(
"DFrame",
rownames = NULL,
nrows = 50L,
listData = structure(list(), .Names = character(0)),
elementType = "ANY",
elementMetadata = NULL,
metadata = list()
),
metadata = list(version = structure(
list(c(1L, 30L, 1L)), class = c("package_version",
"numeric_version")
))
)
We can use
data_bec <- dds_tmp %>%
{sva::ComBat_seq(
counts = {.} %>% SummarizedExperiment::assay(),
batch = {.} %>% magrittr::extract2("batch") %>% droplevels(),
covar_mod = cbind(
time = {.} %>% magrittr::extract2("time") %>% droplevels(),
treatment = {.} %>% magrittr::extract2("treatment") %>% droplevels()
)
)}

Widening Data and Changing Columns

I have managed to delete a little bit of code that did the below task and can't for the life of me figure out how I did it before.
I want to widen the data that has two factors spread over 8 different 'waves'. There are four 'Paper' factors, each with the same four internal factors 'Response'. The output from a previously required function gives the following dataframe:
[
And I would like to make it look like this:
The single column of the first tibble has become the single row of the second tibble.
As you can see, the second tibble has extra factors of Paper but these can just be joined row wise.
I really wasn't sure how to attack this, but thought it would be done using the pivot_wider function. When I tried
times_correct <- times_19 %>%
pivot_wider( id_cols = c('Stay/remain in the EU`', 'Leave the EU', 'I would/will not vote', 'Don\'t know'), names_from = eurrefcolnames)
I got the error that I can't subset columns that don't exist which makes sense: I need to manually add the correct 'Waves'. I think this is relatively simple, but can't for the life of me figure out how I did it!
Here is the dput of the various tibbles:
structure(list(resp = structure(c(3L, 2L, 4L, 1L, NA, NA, NA,
NA), .Label = c("Don't Know", "Leave", "Remain", "Will Not Vote"
), class = "factor"), `Stay/remain in the EU` = c(316L, 290L,
313L, 324L, 338L, 320L, 325L, 335L), `Leave the EU` = c(157L,
123L, 159L, 154L, 134L, 189L, 187L, 181L), `I would/will not vote` = c(2L,
3L, 3L, 3L, 2L, 2L, 2L, 0L), `Don't know` = c(56L, 51L, 55L,
50L, 57L, 20L, 17L, 0L), Paper = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = "Times", class = "factor")), row.names = c(NA,
-8L), class = c("tbl_df", "tbl", "data.frame"))
structure(list(resp = structure(c(3L, 2L, 4L, 1L, 3L, 2L, 4L,
1L, 3L, 2L, 4L, 1L, 3L, 2L, 4L, 1L, 3L, 2L, 4L, 1L), .Label = c("Don't Know",
"Leave", "Remain", "Will Not Vote"), class = "factor"), euRefVoteW1 = c(316L,
157L, 2L, 56L, 190L, 339L, 4L, 70L, 819L, 79L, 9L, 71L, 1294L,
1311L, 150L, 523L, 1715L, 2587L, 133L, 630L), euRefVoteW2 = c(290L,
123L, 3L, 51L, 175L, 282L, 3L, 62L, 777L, 74L, 5L, 62L, 1091L,
925L, 80L, 371L, 1528L, 2044L, 83L, 517L), euRefVoteW3 = c(313L,
159L, 3L, 55L, 199L, 334L, 4L, 69L, 835L, 81L, 10L, 57L, 1348L,
1289L, 139L, 508L, 1766L, 2563L, 156L, 586L), euRefVoteW4 = c(324L,
154L, 3L, 50L, 215L, 328L, 2L, 61L, 848L, 70L, 10L, 55L, 1397L,
1267L, 128L, 492L, 1853L, 2494L, 143L, 583L), euRefVoteW6 = c(338L,
134L, 2L, 57L, 241L, 286L, 2L, 77L, 853L, 68L, 5L, 57L, 1519L,
1133L, 112L, 520L, 2017L, 2284L, 106L, 667L), euRefVoteW7 = c(320L,
189L, 2L, 20L, 186L, 384L, 2L, 34L, 832L, 109L, 8L, 34L, 1449L,
1456L, 87L, 292L, 1906L, 2785L, 55L, 328L), euRefVoteW8 = c(325L,
187L, 2L, 17L, 187L, 384L, 1L, 34L, 836L, 118L, 5L, 24L, 1462L,
1522L, 72L, 228L, 1898L, 2852L, 56L, 268L), euRefVoteW9 = c(335L,
181L, 0L, 0L, 206L, 385L, 0L, 6L, 844L, 102L, 0L, 4L, 1572L,
1462L, 0L, 21L, 2018L, 2827L, 0L, 20L), Paper = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L), .Label = c("Times", "Telegraph", "Control", "No_Paper",
"Rest"), class = "factor")), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
eurrefcolnames = c('euRefVoteW1','euRefVoteW2', 'euRefVoteW3', 'euRefVoteW4', 'euRefVoteW6',' euRefVoteW7', 'euRefVoteW8', 'euRefVoteW9')
EDIT:
Here is the function that create the initial dataframes, is there an edit I could make here perhaps ?
tally_reader_number <- function(input_dataframe,newspaper_name) {
#function takes the input of in_all_waves, tallies the number of different eu ref responses using map_df for a given newspaper factor (defined above)
# and returns a dataframe of responese for each wave with the newspaper factor as a column
returned_dataframe <- input_dataframe %>%
filter(Paper == newspaper_name) %>%
ungroup() %>% #function refuses to work without this
select(-Paper) %>%
map_df(table) %>% # use map_df from the purrr package to "table" each column
rownames_to_column("response") %>% #convert the rownames to a column named response
mutate(resp = case_when(response == 1 ~ "Remain", #change the resulting numbers to the correct responses
response == 2 ~ "Leave",
response ==3 ~ "Will Not Vote",
response == 4 ~ "Don't Know")) %>%
select(resp, everything(), -response) %>% #reorder the columns with resp at the front, removing response
mutate(Paper = newspaper_name)
returned_dataframe$Paper <- as.factor(returned_dataframe$Paper)
returned_dataframe$resp <- as.factor(returned_dataframe$resp)
returned_dataframe
}

Need help in ggplot doing multiple factor barplot with error bar

I have a data for which I like to plot a barplot with error bar.
My data is as below:
dput(level6.top35)
structure(list(patient = structure(c(3L, 3L, 3L, 1L, 1L, 1L,
4L, 4L, 4L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L,
9L, 10L, 10L, 10L, 11L, 11L, 11L, 2L, 2L, 2L), .Label = c("P1",
"P10", "P11", "P2", "P3", "P4", "P5", "P6", "P7", "P8", "P9"), class = "factor"),
visit = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("V1", "V2", "V3"), class = "factor"),
Bacteroides = c(11095L, 9981L, 2426L, 6107L, 14806L, 785L,
34127L, 27590L, 4699L, 42464L, 32146L, 321L, 611L, 402L,
455L, 5597L, 475L, 2842L, 481L, 11508L, 2125L, 842L, 960L,
3215L, 12118L, 10526L, 517L, 67434L, 82449L, 419L, 25643L,
4455L), Clostridium = c(53693L, 51961L, 89862L, 1122L, 3987L,
3095L, 3083L, 372L, 1628L, 4L, 13L, 11346L, 47803L, 10120L,
939L, 2280L, 11355L, 18642L, 4358L, 53L, 47L, 22L, 44L, 1897L,
9328L, 4394L, 4886L, 7025L, 175L, 1522L, 14776L, 30405L),
Turicibacter = c(25L, 0L, 10L, 9L, 0L, 0L, 4428L, 382L, 827L,
18L, 0L, 370L, 106L, 2180L, 5789L, 422L, 4355L, 1585L, 21205L,
567L, 131028L, 32389L, 14953L, 50692L, 3666L, 9811L, 1694L,
123L, 103L, 475L, 1038L, 0L), Haemophilus = c(31L, 27L, 13L,
2693L, 530L, 908L, 103L, 217L, 22L, 21743L, 7413L, 40763L,
1303L, 40182L, 52L, 67L, 18501L, 7547L, 28384L, 756L, 19L,
43928L, 19930L, 433L, 70L, 952L, 16796L, 4415L, 88L, 0L,
4607L, 507L), Streptococcus = c(303L, 160L, 168L, 1205L,
8360L, 12927L, 8380L, 1341L, 306L, 865L, 3490L, 137L, 428L,
427L, 5215L, 861L, 11635L, 15341L, 7306L, 12963L, 192L, 1646L,
2311L, 645L, 9880L, 9314L, 9091L, 6649L, 7283L, 26253L, 21089L,
39463L), Intestinibacter = c(14L, 16L, 0L, 17L, 11L, 32L,
4991L, 17L, 76L, 13L, 0L, 8182L, 14976L, 8062L, 7529L, 917L,
6612L, 14714L, 23287L, 26558L, 32L, 10L, 46L, 18307L, 7201L,
11970L, 6983L, 2963L, 2172L, 1812L, 0L, 1115L), Ruminococcus = c(3237L,
7853L, 95L, 4209L, 380L, 105L, 4141L, 18344L, 16L, 4000L,
2374L, 17L, 690L, 33L, 3393L, 7285L, 259L, 11344L, 69L, 5175L,
46L, 13L, 64L, 156L, 8923L, 19573L, 60L, 6626L, 7614L, 188L,
998L, 109L), Veillonella = c(630L, 318L, 512L, 302L, 1739L,
420L, 779L, 495L, 11L, 538L, 2857L, 338L, 466L, 1777L, 37L,
423L, 2597L, 1330L, 457L, 1720L, 239L, 4659L, 1864L, 188L,
1062L, 4061L, 279L, 723L, 291L, 11009L, 14337L, 7129L), Sutterella = c(65L,
46L, 25L, 27L, 0L, 62L, 20L, 16L, 38L, 8499L, 7987L, 35L,
78L, 37L, 21L, 84L, 12L, 238L, 39L, 1746L, 26L, 31L, 65L,
383L, 11200L, 565L, 50L, 40L, 17L, 14L, 1407L, 353L), Epulopiscium = c(0L,
0L, 0L, 0L, 0L, 12L, 0L, 0L, 0L, 0L, 0L, 14447L, 8925L, 7733L,
0L, 6L, 20L, 823L, 158L, 84L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), Faecalibacterium = c(184L, 203L, 154L,
113L, 92L, 135L, 111L, 144L, 102L, 1297L, 21410L, 132L, 185L,
138L, 127L, 151L, 135L, 204L, 173L, 128L, 203L, 148L, 191L,
177L, 169L, 171L, 193L, 150L, 133L, 169L, 4444L, 404L), Bifidobacterium = c(2288L,
8161L, 63L, 605L, 169L, 95L, 46L, 71L, 72L, 876L, 2540L,
60L, 467L, 73L, 578L, 1537L, 79L, 5413L, 73L, 543L, 127L,
86L, 144L, 76L, 775L, 71L, 84L, 80L, 64L, 47L, 49L, 70L),
Tyzzerella = c(18L, 0L, 0L, 559L, 0L, 0L, 1408L, 1666L, 0L,
86L, 373L, 0L, 373L, 0L, 439L, 235L, 107L, 21L, 0L, 0L, 0L,
0L, 25L, 134L, 4126L, 12034L, 4L, 0L, 0L, 0L, 47L, 0L), Lactobacillus = c(0L,
0L, 0L, 0L, 0L, 14L, 0L, 0L, 0L, 0L, 0L, 5L, 11L, 4L, 39L,
25L, 321L, 56L, 0L, 36L, 0L, 5L, 0L, 5L, 848L, 63L, 0L, 138L,
538L, 3801L, 122L, 4373L), Serratia = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 128L, 69L, 0L, 0L, 0L, 0L, 0L, 226L, 0L,
0L, 0L, 0L, 0L, 7828L, 0L, 0L, 0L, 0L, 70L, 0L, 0L, 0L, 0L
), Rothia = c(0L, 0L, 11L, 6L, 16L, 24L, 0L, 0L, 5L, 0L,
0L, 0L, 0L, 10L, 0L, 9L, 11L, 140L, 267L, 175L, 0L, 190L,
4617L, 0L, 0L, 0L, 1362L, 19L, 47L, 518L, 21L, 34L), Anaerosporobacter = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 256L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 31L, 3239L, 3546L, 0L, 0L,
0L, 0L, 0L, 0L), Erysipelatoclostridium = c(19L, 0L, 7L,
184L, 194L, 23L, 320L, 129L, 7L, 1151L, 436L, 20L, 52L, 0L,
862L, 1365L, 88L, 20L, 0L, 263L, 9L, 6L, 71L, 46L, 1175L,
217L, 0L, 190L, 98L, 0L, 72L, 26L), Paeniclostridium = c(0L,
0L, 0L, 0L, 303L, 0L, 0L, 0L, 0L, 0L, 0L, 129L, 9L, 339L,
0L, 0L, 66L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 5167L, 228L,
221L, 0L, 0L, 0L), Blautia = c(526L, 132L, 101L, 87L, 19L,
97L, 93L, 118L, 71L, 204L, 1356L, 70L, 105L, 84L, 71L, 144L,
88L, 649L, 136L, 627L, 156L, 88L, 142L, 83L, 139L, 138L,
134L, 122L, 81L, 99L, 98L, 125L), Anaerostipes = c(27L, 38L,
25L, 20L, 10L, 24L, 17L, 21L, 0L, 709L, 4603L, 23L, 24L,
20L, 0L, 178L, 18L, 30L, 42L, 24L, 29L, 16L, 37L, 23L, 57L,
39L, 29L, 29L, 16L, 26L, 25L, 27L), Enterococcus = c(31L,
32L, 26L, 126L, 68L, 2498L, 70L, 31L, 26L, 0L, 15L, 59L,
57L, 23L, 395L, 758L, 133L, 0L, 0L, 27L, 50L, 36L, 56L, 21L,
39L, 0L, 422L, 159L, 20L, 24L, 96L, 95L), Citrobacter = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 3583L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 37L, 0L, 0L, 0L, 1088L, 0L, 0L, 0L, 0L, 0L, 144L,
0L, 0L, 0L, 0L), Prevotella = c(155L, 168L, 87L, 153L, 95L,
121L, 100L, 125L, 152L, 307L, 124L, 100L, 84L, 117L, 91L,
168L, 128L, 137L, 130L, 98L, 139L, 114L, 252L, 84L, 159L,
106L, 140L, 201L, 114L, 126L, 160L, 125L), Roseburia = c(621L,
19L, 0L, 0L, 0L, 0L, 0L, 18L, 0L, 46L, 32L, 17L, 13L, 0L,
0L, 36L, 17L, 160L, 0L, 109L, 18L, 15L, 22L, 77L, 1505L,
559L, 38L, 26L, 12L, 22L, 849L, 90L), Parabacteroides = c(60L,
18L, 12L, 114L, 9L, 49L, 349L, 593L, 60L, 158L, 162L, 46L,
53L, 42L, 17L, 33L, 29L, 197L, 49L, 458L, 42L, 45L, 83L,
271L, 479L, 429L, 51L, 63L, 76L, 0L, 85L, 47L), Neisseria = c(0L,
0L, 0L, 77L, 0L, 0L, 0L, 12L, 0L, 9L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 454L, 2L, 0L, 0L, 771L, 2662L, 4L, 0L, 11L, 10L,
0L, 0L, 0L, 0L, 0L), Actinobacillus = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 2670L, 149L, 0L, 0L, 0L, 0L, 0L, 0L, 130L,
0L, 0L, 0L, 0L, 0L, 10L, 0L, 60L, 0L, 0L, 0L, 0L, 0L, 0L),
Granulicatella = c(59L, 27L, 22L, 18L, 22L, 14L, 19L, 36L,
76L, 37L, 0L, 0L, 0L, 61L, 60L, 24L, 93L, 90L, 457L, 60L,
52L, 42L, 215L, 0L, 40L, 45L, 665L, 14L, 27L, 260L, 34L,
46L), Actinomyces = c(52L, 27L, 12L, 8L, 8L, 16L, 36L, 16L,
89L, 12L, 23L, 13L, 0L, 53L, 18L, 0L, 30L, 112L, 624L, 89L,
12L, 45L, 116L, 11L, 58L, 12L, 587L, 65L, 47L, 135L, 18L,
35L), Lachnoclostridium = c(21L, 19L, 17L, 37L, 0L, 0L, 211L,
337L, 13L, 361L, 184L, 0L, 12L, 12L, 19L, 91L, 0L, 66L, 0L,
228L, 44L, 9L, 0L, 77L, 293L, 257L, 0L, 0L, 0L, 0L, 28L,
20L), Pediococcus = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2101L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 9L, 0L, 0L, 0L, 0L, 0L, 0L), Fusobacterium = c(84L,
51L, 55L, 551L, 12L, 19L, 22L, 54L, 23L, 41L, 40L, 21L, 17L,
14L, 14L, 78L, 18L, 228L, 88L, 35L, 75L, 43L, 162L, 24L,
39L, 25L, 90L, 15L, 21L, 56L, 24L, 36L), Alistipes = c(68L,
81L, 24L, 69L, 35L, 66L, 40L, 57L, 60L, 86L, 72L, 48L, 47L,
60L, 51L, 92L, 48L, 67L, 72L, 36L, 40L, 65L, 137L, 21L, 31L,
65L, 84L, 100L, 93L, 42L, 81L, 41L), Eubacterium = c(0L,
7L, 0L, 0L, 0L, 0L, 0L, 0L, 8L, 12L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L)), .Names = c("patient", "visit", "Bacteroides", "Clostridium",
"Turicibacter", "Haemophilus", "Streptococcus", "Intestinibacter",
"Ruminococcus", "Veillonella", "Sutterella", "Epulopiscium",
"Faecalibacterium", "Bifidobacterium", "Tyzzerella", "Lactobacillus",
"Serratia", "Rothia", "Anaerosporobacter", "Erysipelatoclostridium",
"Paeniclostridium", "Blautia", "Anaerostipes", "Enterococcus",
"Citrobacter", "Prevotella", "Roseburia", "Parabacteroides",
"Neisseria", "Actinobacillus", "Granulicatella", "Actinomyces",
"Lachnoclostridium", "Pediococcus", "Fusobacterium", "Alistipes",
"Eubacterium"), class = "data.frame", row.names = c("AA_001_20-4-16",
"AA_001-V2", "AA_001_19-5-16", "AA_ISS-01-V1", "AA_ISS-01-V2",
"AA_ISS-01-V3", "AA_ISS-02-V1", "AA_ISS-02-V2", "AA_ISS-02-V3",
"AA_ISS-03-V1", "AA_ISS-03-V2", "AA_ISS-04-V1", "AA_ISS-04-V2",
"AA_ISS-04-V3", "AA_ISS-05-V1", "AA_ISS-05-V2", "AA_ISS-05-V3",
"AA_ISS-06-V1", "AA_ISS-06-V2", "AA_ISS-06-V3", "AA_ISS-07-V1",
"AA_ISS-07-V2", "AA_ISS-07-V3", "AA_ISS-08-V1", "AA_ISS-08-V2",
"AA_ISS-08-V3", "AA_ISS-09-V1", "AA_ISS-09-V2", "AA_ISS-09-V3",
"AA_ISS-10-V1", "AA_ISS-10-V2", "AA_ISS-10-V3"))
So far I have tried to reshape the data and few tries to plot them properly. But in vain.
library(reshape2)
df1<-melt(level6.top35, id.vars = c("patient","visit"))
ggplot(data=df1,aes(x=variable,y=value, fill=visit))+geom_bar(position="dodge",stat="identity")
+geom_errorbar( aes(x=variable, ymin=value-sd, ymax=value+sd), width=0.4, colour="orange", alpha=0.9, size=1.3)
I have managed to do the plot but not the errorbar. Ideally I like to have a barplot with the error bar.
You need to summarise the data and then add the error bars, e.g. like this:
df1 <- melt(level6.top35, id.vars = c("patient","visit"))
df1 %>% group_by(visit, variable) %>%
summarise(SD = sd(value), value = mean(value)) %>% ungroup() %>%
ggplot(., aes(x=variable, y=value, fill = visit)) +
geom_bar(stat="identity", position = "dodge") +
geom_errorbar(aes(ymin= value - SD, ymax = value + SD, width=0.2),
position=position_dodge(width=0.90)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x="Species")
It is still up to you to decide whether you want to just plot the positive bars, limit the y axis to avoid negative values, etc..

Plot visreg over an boxplot (GLM with binominal predictor)

I fitted some GLMs with a binominal predictor and would like to plot them with visreg. I usually plot the raw data with par(new=T) as well for better clarity. I don't really like the normal outcome here (x-axis 0-1 in 0.2 steps, a lot of data points just at 0 and 1) and was thinking about plotting the visreg over boxplot since they look much better with binominal data. However, I can't get the two plots to align since there are always two different "starts" and "ends" in the plot. How can I make it so that the visreg line starts at the "No" and ends at the "Yes" of the boxplot?
fit <- glm (Cov.herb ~ Fire, family=gaussian, data=data)
boxplot(data$Cov.herb ~ data$Fire, ylim=c(0,100), axes=F, ylab="Herb cover [%]", xlab="Fire")
axis(1, xaxp=c(1,2,1), xaxt="n")
mtext(text=c("No","Yes"),side=1,line=0.5,at=c(1,2))
axis(2, las=1)
box()
par(new=T)
visreg(fit, scale = "response", type="conditional",line=list(col="red", lwd=1), ylim=c(0,100), xlim=c(0,1), rug=F, axes=F, ann=F)
example plot
Cheers,
Alex
data:
structure(list(Cov.herb = c(40L, 80L, 30L, 2L, 40L, 8L, 5L, 5L,
20L, 45L, 55L, 55L, 35L, 40L, 65L, 70L, 2L, 15L, 1L, 1L, 1L,
25L, 10L, 1L, 10L, 5L, 5L, 15L, 10L, 5L, 15L, 5L, 5L, 35L, 1L,
1L, 35L, 1L, 10L, 5L, 5L, 10L, 5L, 10L, 10L, 20L, 10L, 0L, 3L,
1L, 2L, 4L, 1L, 10L, 30L, 10L, 1L, 2L, 0L, 15L, 25L, 50L, 15L,
35L, 30L, 5L, 5L, 1L, 1L, 1L, 10L, 0L, 0L, 5L, 2L, 1L, 10L, 0L,
2L, 1L, 1L, 5L, 1L, 15L, 1L, 1L, 1L, 0L, 5L, 25L, 3L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 3L, 1L, 1L, 0L, 5L, 1L, 1L, 1L, 1L, 7L, 1L,
1L, 1L, 1L, 5L, 0L, 2L, 3L, 5L, 3L, 1L, 1L, 2L, 0L, 2L, 0L, 10L,
1L, 20L, 3L, 5L, 20L, 3L, 20L, 5L, 10L, 15L, 30L, 0L, 20L, 45L,
1L, 1L, 2L, 1L, 3L, 0L, 5L, 0L, 35L, 1L, 5L, 25L, 0L, 0L, 40L,
3L, 15L, 10L, 3L, 50L, 30L, 10L, 1L, 0L, 5L, 10L, 10L, 2L, 2L,
5L, 1L, 2L, 1L, 1L, 0L, 0L, 1L, 2L, 5L, 15L, 0L, 1L, 1L, 1L,
1L, 0L, 1L, 5L, 1L, 5L, 35L, 1L, 0L, 1L, 0L, 5L, 1L, 1L, 3L,
15L, 1L, 3L, 1L, 0L, 0L, 0L, 15L, 0L, 1L, 1L, 3L, 35L, 80L, 10L,
2L, 10L, 3L, 3L, 2L, 10L, 50L, 20L, 40L, 2L, 40L, 45L, 25L, 5L,
25L, 50L, 35L, 15L, 45L, 10L, 5L, 15L, 2L, 30L, 2L, 3L, 15L,
5L, 45L, 35L, 20L, 70L, 20L, 10L, 30L, 25L, 8L, 4L, 45L, 60L,
35L, 5L, 40L, 30L, 0L, 30L, 3L, 4L, 25L, 15L, 10L, 15L, 25L,
20L, 7L, 25L, 25L, 40L, 35L, 30L, 40L, 25L, 50L, 30L, 25L, 60L,
15L, 25L, 25L, 50L, 30L, 20L, 2L, 3L, 20L, 25L, 35L, 30L, 10L,
15L, 65L, 10L, 20L, 20L, 2L, 7L, 20L, 25L, 30L, 30L, 9L, 20L,
40L, 7L, 20L, 15L, 15L, 30L, 20L, 35L, 8L, 40L, 20L, 3L, 55L,
35L, 10L, 10L, 65L, 20L, 35L, 60L, 45L, 20L, 10L, 35L, 15L, 20L,
15L, 40L, 10L, 10L, 60L, 60L, 40L, 10L, 10L, 25L, 8L, 20L, 40L,
15L, 25L, 5L, 20L, 20L, 20L, 25L, 30L, 35L, 20L, 110L, 50L, 20L,
20L, 10L, 45L, 25L, 20L, 55L, 10L, 5L, 15L, 15L, 1L, 10L, 15L,
15L, 10L, 30L, 20L, 40L, 55L, 55L, 20L, 30L, 10L, 50L, 40L, 5L,
15L, 10L, 30L, 15L, 20L, 5L, 45L, 50L, 25L, 45L, 30L, 7L, 25L,
30L, 5L, 7L, 50L, 60L, 50L, 10L, 30L, 50L, 15L, 15L, 30L, 15L,
25L, 40L, 10L, 2L, 60L, 20L, 65L, 5L, 15L, 3L, 15L, 40L, 50L,
45L, 30L, 5L, 45L, 15L, 25L, 65L, 15L, 50L, 55L, 30L, 10L, 35L,
15L, 20L, 20L, 10L, 20L, 15L, 45L, 40L, 10L, 7L, 25L, 20L, 60L,
4L, 7L, 40L, 60L, 50L, 50L, 10L, 50L, 5L, 10L, 50L, 20L, 40L,
20L, 25L, 25L, 35L, 10L, 2L, 15L, 60L, 25L, 30L, 20L, 25L, 10L,
10L, 20L, 40L, 40L, 45L, 10L, 35L, 60L, 50L, 10L, 40L, 50L, 25L,
20L, 25L, 25L, 45L, 20L, 30L, 65L, 30L, 35L, 40L, 25L, 15L, 10L,
50L, 25L, 45L, 40L, 20L, 5L, 65L, 5L, 10L, 15L, 7L, 20L, 45L,
15L, 5L, 20L, 20L, 20L, 50L, 15L, 20L, 30L, 25L, 45L, 45L, 35L,
40L, 45L, 4L, 10L, 20L, 20L, 30L, 15L, 30L, 50L, 35L, 45L, 25L,
25L, 10L, 5L, 30L, 30L, 10L, 70L, 25L, 25L, 7L, 20L, 5L, 20L,
8L, 15L, 10L, 20L, 10L, 7L, 15L, 15L, 40L, 50L, 15L, 20L, 8L,
45L, 40L, 15L, 25L, 40L, 20L, 35L, 40L, 70L, 20L, 20L, 40L, 5L,
20L, 7L, 40L, 10L, 5L, 45L, 20L, 10L, 20L, 20L, 45L, 15L, 7L,
30L, 30L, 35L, 10L, 20L, 5L, 15L, 35L, 40L, 40L, 10L, 5L, 15L,
70L, 20L, 85L, 15L, 7L, 55L, 55L, 5L, 20L, 25L, 5L, 30L, 20L,
8L, 30L, 40L, 25L, 10L, 5L, 30L, 10L, 5L, 10L, 35L, 2L, 10L,
10L, 10L, 90L, 45L, 60L, 7L, 1L, 15L), Fire = c(0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L,
1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L,
1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("Cov.herb",
"Fire"), class = "data.frame", row.names = c(2L, 3L, 4L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 43L, 44L, 45L, 46L, 47L,
48L, 49L, 50L, 51L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L,
63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 74L, 75L, 76L,
77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 87L, 88L, 89L, 90L,
91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L,
103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L,
114L, 115L, 116L, 117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L,
125L, 126L, 153L, 154L, 155L, 161L, 162L, 163L, 164L, 165L, 166L,
167L, 169L, 170L, 171L, 173L, 174L, 175L, 176L, 177L, 178L, 179L,
180L, 181L, 182L, 183L, 184L, 185L, 186L, 187L, 188L, 189L, 190L,
191L, 192L, 193L, 194L, 195L, 196L, 197L, 198L, 199L, 200L, 201L,
202L, 203L, 204L, 205L, 206L, 207L, 209L, 211L, 213L, 214L, 215L,
216L, 217L, 218L, 219L, 220L, 221L, 222L, 223L, 224L, 225L, 226L,
227L, 228L, 229L, 230L, 231L, 232L, 233L, 234L, 235L, 236L, 237L,
238L, 239L, 240L, 241L, 242L, 243L, 244L, 245L, 246L, 247L, 248L,
249L, 250L, 251L, 252L, 253L, 254L, 255L, 256L, 257L, 258L, 259L,
260L, 261L, 262L, 263L, 269L, 270L, 274L, 275L, 276L, 277L, 279L,
280L, 281L, 282L, 283L, 284L, 285L, 286L, 287L, 288L, 289L, 290L,
291L, 292L, 293L, 294L, 295L, 296L, 297L, 298L, 299L, 300L, 301L,
302L, 303L, 304L, 305L, 306L, 307L, 308L, 309L, 310L, 311L, 312L,
313L, 314L, 315L, 316L, 317L, 318L, 319L, 320L, 321L, 322L, 323L,
324L, 325L, 326L, 327L, 328L, 329L, 330L, 331L, 332L, 333L, 334L,
335L, 336L, 337L, 338L, 339L, 340L, 341L, 342L, 343L, 344L, 345L,
346L, 347L, 349L, 350L, 351L, 352L, 353L, 354L, 355L, 356L, 357L,
358L, 359L, 360L, 361L, 362L, 363L, 364L, 365L, 366L, 367L, 368L,
369L, 370L, 371L, 372L, 373L, 374L, 375L, 376L, 377L, 378L, 380L,
381L, 382L, 383L, 384L, 385L, 386L, 387L, 388L, 389L, 390L, 391L,
392L, 393L, 394L, 395L, 396L, 397L, 398L, 399L, 400L, 401L, 402L,
403L, 404L, 405L, 406L, 407L, 408L, 409L, 410L, 411L, 412L, 413L,
414L, 415L, 416L, 417L, 418L, 419L, 420L, 421L, 422L, 423L, 424L,
425L, 426L, 427L, 428L, 429L, 430L, 431L, 432L, 433L, 434L, 435L,
436L, 437L, 438L, 439L, 440L, 441L, 443L, 444L, 445L, 446L, 447L,
448L, 449L, 450L, 451L, 453L, 454L, 455L, 457L, 458L, 459L, 460L,
461L, 463L, 464L, 465L, 466L, 467L, 468L, 469L, 470L, 471L, 472L,
473L, 474L, 475L, 476L, 477L, 478L, 479L, 480L, 481L, 482L, 483L,
484L, 485L, 486L, 487L, 488L, 489L, 490L, 491L, 492L, 493L, 494L,
495L, 496L, 497L, 498L, 499L, 500L, 501L, 502L, 503L, 504L, 505L,
506L, 507L, 508L, 509L, 510L, 511L, 512L, 513L, 514L, 515L, 516L,
517L, 518L, 519L, 520L, 521L, 522L, 523L, 524L, 525L, 526L, 527L,
528L, 529L, 530L, 531L, 532L, 533L, 534L, 535L, 536L, 537L, 538L,
539L, 540L, 541L, 542L, 543L, 544L, 545L, 546L, 547L, 548L, 549L,
551L, 552L, 553L, 554L, 555L, 556L, 557L, 558L, 559L, 560L, 561L,
562L, 563L, 564L, 565L, 566L, 567L, 568L, 569L, 570L, 571L, 572L,
573L, 574L, 575L, 576L, 577L, 578L, 579L, 580L, 581L, 582L, 583L,
584L, 585L, 587L, 588L, 589L, 590L, 591L, 592L, 593L, 594L, 595L,
596L, 597L, 598L, 599L, 600L, 601L, 602L, 603L, 604L, 605L, 606L,
607L, 608L, 609L, 610L, 611L, 612L, 613L, 614L, 615L, 616L, 617L,
618L, 619L, 620L, 621L, 622L, 623L, 624L, 625L, 626L, 628L, 629L,
631L, 632L, 633L, 634L, 635L, 636L, 637L, 638L, 639L, 640L, 641L,
642L, 643L, 644L, 645L, 646L, 648L, 649L, 650L, 651L, 652L, 653L,
654L, 655L, 656L, 657L, 658L, 659L, 660L, 661L, 662L, 663L, 664L,
665L, 666L, 667L, 668L, 669L, 670L, 671L, 672L, 673L, 674L, 675L,
676L, 677L, 678L, 679L, 680L, 682L, 683L, 684L, 685L, 686L, 687L,
689L, 690L, 691L, 692L, 693L, 694L, 697L, 698L, 699L, 700L, 701L,
702L, 704L, 705L, 706L, 707L))
So, my point was that doing it this way would give you more flexibility with your plotting. For example,
# Fit model
fit <- glm (Cov.herb ~ Fire, family=gaussian, data=data)
# Get model data for plotting
vis.out <- visreg(fit, scale = "response", plot = FALSE)
# Load library
library(ggplot2)
# Create plot
p <- ggplot(data = data)
p <- p + geom_boxplot(aes(x = as.factor(Fire), y = Cov.herb, fill = as.factor(Fire)), alpha = 0.3, outlier.alpha = 1)
p <- p + xlab("Fire") + ylab("Herb cover [%]")
p <- p + geom_ribbon(data = vis.out$fit, aes(x = Fire + 1, ymin = visregLwr, ymax = visregUpr), fill = "lightgrey")
p <- p + geom_line(data = vis.out$fit, aes(x = Fire + 1, y = visregFit), colour = "salmon", size = 1.25)
p <- p + scale_x_discrete(labels = c("No", "Yes"))
p <- p + theme(legend.position = "none")
print(p)
gives,
Is that the sort of thing you're looking for? (You could also add all the data points using geom_point to plot on top of the boxes. I think that usually looks pretty cool.)

Resources