Volcano plot for multiple clusters - r

I am trying to make a volcano plot for different clusters. I have 2 conditions, untreated vs. treated. I have a differential expression excel file that cellranger generated for me but within the file it has multiple clusters each which have a fold change and p value. How do I create a volcano plot that contains all the clusters rather than one? Would I have to do a volcano plot for each cluster and then combine them all somehow?
I used this code to generate the plot for just one of the clusters...
macrophage_list <- read.table("differential_expression_macrophage.csv", header = T, sep = ",")`
EnhancedVolcano(macrophage_list, lab = as.character(macrophage_list$FeatureName), x = 'Cluster1.Log2.Fold.Change', y = 'Cluster1.Adjusted.P.Value', xlim = c(-8,8), title = 'Macrophage', pCutoff = 10e-5, FCcutoff = 1.5, pointSize = 3.0, labSize = 3.0)
How do I merge all the information in the excel file to create a volcano plot?
I uploaded each data cluster one by one and then merged them by using rbind, but is there a simpler/quicker way to do this?
output for dput(gene_list[1:20, 1:14])
structure(list(Feature.ID = structure(1:20, .Label = c("a", "b",
"c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t"), class = "factor"), Feature.Name = structure(1:20, .Label = c("A",
"B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
"O", "P", "Q", "R", "S", "T"), class = "factor"), Cluster.1_Mean.Counts = c(0.000960904,
0.000320301, 0.001281205, 0.000320301, 0.000320301, 0.016335362,
0.000960904, 0, 0.001601506, 0.000320301, 0.007046627, 0.026585,
0.017296265, 0.004804518, 0, 0.874742598, 0.017616566, 0.007366928,
0.008327831, 0.001921807), Cluster.1_Log2.fold.change = c(0.291978774,
1.954943787, -2.008530337, -2.482461526, 3.539906287, 0.407455991,
-0.214981215, 1.539906287, 0.802940693, 2.539906287, -1.333136538,
-1.879953595, -0.52422405, -0.877946228, 1.539906287, -0.629373147,
1.118442519, 0.170672478, 1.065975099, 1.099333696), Cluster.1_Adjusted.p.value = c(1,
0.910243711, 0.04672812, 0.080866038, 0.610296549, 0.80063597,
1, 1, 0.951841603, 0.797013021, 0.103401275, 0.000594428, 0.907754993,
0.532689631, 1, 0.480958806, 0.078345008, 1, 0.198557945, 0.668312142
), Cluster.2_Mean.Counts = c(0.000902278, 0.001804555, 0.006315943,
0.004511388, 0, 0.029775159, 0.001804555, 0, 0.002706833, 0,
0.023459216, 0.128123411, 0.030677437, 0.009022775, 0, 2.174488883,
0.018947828, 0.019850106, 0.010827331, 0.000902278), Cluster.2_Log2.fold.change = c(0.792589781,
4.769869705, 0.35201719, 0.839132367, 3.184907204, 1.32985554,
0.962514783, 3.184907204, 1.725475586, 2.599944703, 0.560416339,
0.580736324, 0.407299626, 0.184907204, 3.184907204, 0.816580902,
1.120776867, 1.742684876, 1.409613491, 0.599944703), Cluster.2_Adjusted.p.value = c(1,
0.153573448, 1, 0.737977734, 1, 0.14478935, 0.853816767, 1, 0.47952604,
1, 0.65316285, 0.507251471, 0.776636022, 1, 1, 0.346630571, 0.285006452,
0.060868933, 0.21546202, 1), Cluster.3_Mean.Counts = c(0.001813813,
0, 0.019045032, 0.00725525, 0, 0.022672657, 0.000906906, 0, 0,
0, 0.029927908, 0.043531502, 0.046252221, 0.029021001, 0, 3.146057931,
0.020858845, 0.013603594, 0.008162157, 0), Cluster.3_Log2.fold.change = c(1.455721575,
2.192687169, 2.008262598, 1.504631175, 3.192687169, 0.9044422,
0.334706174, 3.192687169, -0.451169021, 2.607724668, 0.931421856,
-1.032594057, 1.038258504, 1.970294748, 3.192687169, 1.412371018,
1.26985503, 1.14829305, 0.991053308, -0.451169021), Cluster.3_Adjusted.p.value = c(0.757752635,
1, 0.032609935, 0.33316083, 1, 0.441825712, 1, 1, 1, 1, 0.380305075,
0.605158722, 0.339946318, 0.016952505, 1, 0.056529024, 0.259458704,
0.339639234, 0.536765022, 1), Cluster.4_Mean.Counts = c(0.000641899,
0, 0.002567596, 0.004493293, 0, 0.010270384, 0.003209495, 0,
0.000641899, 0, 0.028243557, 0.160474756, 0.012196081, 0.005135192,
0, 1.199709274, 0.005135192, 0.004493293, 0.005777091, 0.001283798
), Cluster.4_Log2.fold.change = c(0.269229783, 1.661547206, -0.886889419,
0.778904157, 2.661547206, -0.289908942, 1.602653517, 2.661547206,
0.076584705, 2.076584705, 0.854192284, 0.961549693, -0.967809414,
-0.644261223, 2.661547206, -0.104384578, -0.790579612, -0.467735811,
0.459913345, 0.722947751), Cluster.4_Adjusted.p.value = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.584036686, 1, 1, 1, 1, 1, 1,
1, 1)), class = "data.frame", row.names = c(NA, 20L))

Based on your dataset, you need to reshape them but first in order to reshape them using the right pattern, we will rename some column names:
colnames(df) <- gsub(".Mean", "_Mean", colnames(df))
colnames(df) <- gsub(".Log2", "_Log2", colnames(df))
colnames(df) <- gsub(".Adjus","_Adjus",colnames(df))
Now, we can reshape it using the right pattern with pivot_longer function from tidyr package:
library(tidyr)
final_df <- df %>% pivot_longer(., -c(Feature.ID, Feature.Name), names_to = c("set",".value"), names_pattern = "(.+)_(.+)")
# A tibble: 80 x 6
Feature.ID Feature.Name set Mean.Counts Log2.fold.change Adjusted.p.value
<fct> <fct> <chr> <dbl> <dbl> <dbl>
1 a A Cluster.1 0.000961 0.292 1
2 a A Cluster.2 0.000902 0.793 1
3 a A Cluster.3 0.00181 1.46 0.758
4 a A Cluster.4 0.000642 0.269 1
5 b B Cluster.1 0.000320 1.95 0.910
6 b B Cluster.2 0.00180 4.77 0.154
7 b B Cluster.3 0 2.19 1
8 b B Cluster.4 0 1.66 1
9 c C Cluster.1 0.00128 -2.01 0.0467
10 c C Cluster.2 0.00632 0.352 1
# … with 70 more rows
Now, we can create the volcano plot by using ggplot2 and ggrepel libraries for the labeling of Feature.Name (if you don't have ggrepel, you have to install it):
library(ggplot2)
library(ggrepel)
ggplot(final_df, aes(x = Log2.fold.change,y = -log10(Adjusted.p.value), label = Feature.Name))+
geom_point()+
geom_text_repel(data = subset(final_df, Adjusted.p.value < 0.05),
aes(label = Feature.Name))
And you get your volcano plot with all clusters merged, all points with the same color, and with labeling of Feature.names with an adjusted p value < 0.05

Related

Remove Columns from a table that are 90% one value

Example Data:
A<- c(1,2,3,4,1,2,3,4,1,2)
B<- c(A,B,C,D,E,F,G,H,I,J)
C<- c(1,1,1,1,1,1,1,1,1,0)
D<- c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,FALSE)
df1<-data.frame(A,B,C,D)
df1 %>%
select_if(
###column is <90% one value
)
So I have a table that has a few columns that are predominantly one value--like C and D in the above example. I need to get rid of any columns that are 90% or more one unique value. How can I get rid of the columns that fit this criteria?
We may use select with where, get the frequency count with table, convert to proportions, get the max value and check if it is less than .90 to select the particular column
library(dplyr)
df1 <- df1 %>%
select(where(~ max(proportions(table(.))) < .90))
data
df1 <- structure(list(A = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2), B = c("A",
"B", "C", "D", "E", "F", "G", "H", "I", "J"), C = c(1, 1, 1,
1, 1, 1, 1, 1, 1, 0), D = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, FALSE)), class = "data.frame", row.names = c(NA,
-10L))

Removing "unused" nodes in sankey network

I am trying to build a sankey network.
This is my data and code:
library(networkD3)
nodes <- data.frame(c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "D", "E", "N", "O", "P", "Q", "R"))
names(nodes) <- "name"
nodes$name = as.character(nodes$name)
links <- data.frame(matrix(
c(0, 2, 318.167,
0, 3, 73.85,
0, 4, 51.1262,
0, 5, 6.83333,
0, 6, 5.68571,
0, 7, 27.4167,
0, 8, 4.16667,
0, 9, 27.7381,
1, 10, 627.015,
1, 3, 884.428,
1, 4, 364.211,
1, 13, 12.33333,
1, 14, 9,
1, 15, 37.2833,
1, 16, 9.6,
1, 17, 30.5485), nrow=16, ncol=3, byrow = TRUE))
colnames(links) <- c("source", "target", "value")
links$source = as.integer(links$source)
links$target = as.integer(links$target)
links$value = as.numeric(links$value)
sankeyNetwork(Links = links, Nodes = nodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
fontSize = 12, fontFamily = 'Arial', nodeWidth = 20)
The problem is that A and B only have common links to D and E.
Although the links are correctly displayed, D and E are also shown at the right-bottom.
How can I avoid this ?
Note: If I specify
nodes <- data.frame(c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "N", "O", "P", "Q", "R"))
no network at all is created.
Nodes must be unique, see below example. I removed repeated nodes: "D" and "E", then in links, I removed links that reference to nodes that do not exist. We have only 16 nodes, zero based 0:15. And in your links dataframe, you have last 2 rows referencing to 16 and 17.
Or as #CJYetman (networkD3 author) comments:
Another way to say it... every node that is in the nodes data frame will be plotted, even if it has the same name as another node, because the index is technically the unique id.
library(networkD3)
nodes <- data.frame(name = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "N", "O", "P", "Q", "R"),
ix = 0:15)
links <- data.frame(matrix(
c(0, 2, 318.167,
0, 3, 73.85,
0, 4, 51.1262,
0, 5, 6.83333,
0, 6, 5.68571,
0, 7, 27.4167,
0, 8, 4.16667,
0, 9, 27.7381,
1, 10, 627.015,
1, 3, 884.428,
1, 4, 364.211,
1, 13, 12.33333,
1, 14, 9,
1, 15, 37.2833), nrow=14, ncol=3, byrow = TRUE))
colnames(links) <- c("source", "target", "value")
sankeyNetwork(Links = links, Nodes = nodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
fontSize = 12, fontFamily = 'Arial', nodeWidth = 20)

Building sequence data for a recommender system- replacing cross-tabular matrix with a variable value

I am trying to build a sequence data for a recommender system. I have built a cross-tabular data (Table 1) and Table 2 as shown below:
enter image description here
I have been trying to replace all the 1's in Table 1 by the "Grade" from the Table 2 in R.
Any insight/suggestion is greatly appreciated.
Instead of replacing the first one with second, the second table and directly changed to 'wide' with dcast
library(reshape2)
res <- dcast(df2, St.No. ~ Courses, value.var = 'Grade')[names(df1)]
res
# St.No. Math Phys Chem CS
#1 1 A B
#2 2 B B
#3 3 A A C
#4 4 B B D
If we need to replace the blanks with 0
res[res =='"] <- "0"
data
df1 <- data.frame(St.No. = 1:4, Math = c(0, 0, 1, 1), Phys = c(1, 1, 0, 1),
Chem = c(0, 1, 1, 0), CS = c(1, 0, 1, 1))
df2 <- data.frame(St.No. = rep(1:4, each = 4), Courses = rep(c("Math",
"Phys", "Chem", "CS"), 4),
Grade = c("", "A", "", "B", "", "B", "B", "",
"A", "", "A", "C", "B", "B", "", "D"),
stringsAsFactors = FALSE)

Extract the max values for row and column

I need to fin the values for which the row max and the column max are for the same position. Test data (The real data doesn't need to be a square matrix):
scores<-structure(c(0.4, 0.6, 0.222222222222222, 0.4, 0.4, 0, 0.25, 0.5,
0.285714285714286), .Dim = c(3L, 3L), .Dimnames = list(c("a",
"b", "c"), c("d", "e", "f")))
I already found which are the columns/rows with the max value for that row/column.
rows<-structure(list(a = c("d", "e"), b = "d", c = "f"), .Names = c("a",
"b", "c"))
cols<-structure(list(d = "b", e = c("a", "b"), f = "b"), .Names = c("d",
"e", "f"))
But I don't manage to get the values from the matrix. The problem are when the same (max) value appear twice or more. I don't know how to check the indices of that case. I tried using mapply:
mapply(function(x, y) {
cols[x] == rows[y]
}, rows, cols)
But this stops when rows or cols has more than one element.
Expected output: c(0.6, 0.4)
The first is the max value of column 1 and row 2, the second value is the max value of row 1 and column 2.
d e f | Max
a 0.4000000 0.4 0.2500000 0.4
b 0.6000000 0.4 0.5000000 0.6
c 0.2222222 0.0 0.2857143 0.2857
Max: 0.6 0.4 0.5
As you can see for row 2 and column 1 the max value is the same, and for row 1 and column 1 it is the same value, but for row 3 and column 3 it isn't
I think , I understood what you are trying to do. Not an optimal solution though.
We find out the indices for maximum value in rows as well as column and then find out the indices which intersect and display the corresponding value from the dataframe.
a1 <- which(apply(scores, 1, function(x) x == max(x)))
a2 <- which(apply(scores, 2, function(x) x == max(x)))
scores[intersect(a1, a2)]
#[1] 0.6 0.4
And in one-line
scores[intersect(which(apply(scores, 1, function(x) x == max(x))),
which(apply(scores, 2, function(x) x == max(x))))]
This is what you want:
# Compute rows and columns max and rows max positions
row_max<-apply(scores, 1, max)
row_max_pos<-apply(scores, 1, which.max)
col_max<-apply(scores, 2, max)
# For each row, check if max is equal to corresponding column max
res <- sapply(1:length(row_max),
function(i) ifelse(row_max[i] == col_max[row_max_pos[i]], T, F))
row_max[res]
It also work with same max values on multiple rows/columns, for example with this data:
scores <- structure(c(0.4, 0.6, 0.222222222222222, 0.4, 0.4, 0, 0.25, 0.5,
0.285714285714286, 0.13, 0.2, 0.6), .Dim = c(4L, 3L),
.Dimnames = list(c("a", "b", "c", "d"), c("e", "f", "g")))

One-way ANOVA for each sub-group in a melted data frame

I have a very large data set that requires individual one-way ANOVAs for multiple species on 4 different treatments with several measurements. Usually I just make separate excel spread sheets for each species and run ANOVAs on each, looping through each measurement column, but this is very time consuming. Is it possible to use a single spread sheet and run ANOVAs on the melted data? Or perhaps there is another reshape option I could use?
Data:
structure(list(Species = c("A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B",
"B", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "D",
"D", "D", "D", "D", "D", "D", "D", "D", "D"), TREATMENT = c(1,
1, 1, 2, 2, 2, 3, 3, 4, 4, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 1,
1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4),
`CLASS 1` = c(9.3993303987076, 8.14588087483834, 8.44889021858093,
28.2773809415175, 9.49156649568952, 37.406663111623, 8.42458221212166,
35.529904738035, 33.1401135085753, 8.26977630375797, 7.87786527094827,
7.83020300515061, 35.1465417538538, 10.5560853720815, 7.54702433773332,
7.15030081390987, 7.73624654623485, 33.6461639540039, 10.3098164094602,
7.79017325570062, 8.47473750173462, 8.37179798600773, 31.7364310355766,
2.00147496567679, 9.30194886619568, 7.87886829173726, 7.93445694220837,
9.10020522660375, 8.81542855137005, 7.83313314713951, 7.84449591023115,
23.6150030864875, 9.3452854347794, 8.91047098149179, 7.76031738257155,
9.79467065201063, 24.7592334362831, 8.54842834366722, 7.60436112798701,
8.93480758329653, 8.72406315335014, 9.49850179222777), `CLASS 2` = c(10.8069912074565,
4.52426389123869, 8.13120921128287, 52.3870196313339, 7.17369219206721,
45.7325224336886, 17.8345921677786, 38.4579761235057, 37.5916934855387,
6.28803058195647, 3.72788988807285, 4.64744990904241, 29.7689968962103,
7.08515103144071, 6.44277647222835, 5.71017728280462, 6.28290843412007,
45.6123170472575, 6.98431855663527, 8.03809625184267, 4.76656440828616,
6.74640254081232, 31.3243238213156, 45.1287867136276, 7.6308508343969,
4.0127554151831, 9.11910102221636, 6.1658394708941, 10.4617259648364,
6.07502685224869, 8.08373642262043, 48.588633863193, 10.2160085507338,
7.52606530219909, 5.66373884014351, 8.51992766801391, 25.9109062123364,
5.74498954209992, 5.56377323143979, 7.76698847227212, 7.05016373786876,
7.99745310894107), `CLASS 3` = c(3.96856956332584, 2.76052305637364,
6.92715392916015, 0.687821057043984, 2.30154255462355, 2.61089063893911,
11.2199145273738, 10.7058533354417, 1.90691767773411, 3.93488282297868,
1.7034110083142, 1.69310511636903, 1.54005861925764, 50.436990190291,
3.93233520754151, 4.06684782901502, 6.10592204678281, 0.675086986967025,
3.94018776658881, 5.74129993338595, 2.02845185559621, 4.10963382465756,
14.9264019576272, 12.9672579626868, 5.1049208042632, 1.37282635713804,
3.00088572108344, 4.78878116348504, 4.79564218319094, 3.03836532949481,
3.48474205480686, 1.09218910757234, 6.2830307568812, 3.06784943090836,
4.89376208853059, 6.6321148581705, 1.01356027363186, 3.15439940439419,
40.8141653079423, 2.52825000616702, 1.65382018138259, 1.81173455682492
), `CLASS 4` = c(0, 13.4274810838142, 10.9876140536356, 3.15424686759082,
15.2632739415738, 0, 0, 2.39525969535064, 4.19386122886851,
13.15599261724, 14.5421891905919, 14.4542067660843, 0, 0,
12.2276086827261, 12.7527880016103, 12.1436697242409, 3.79216208516423,
0, 12.2283190622827, 16.0271803699645, 14.035876401479, 4.24556176551009,
0, 0, 14.4993393432366, 13.6722412691012, 0, 14.0027443968931,
13.7579074961889, 12.9935353616471, 4.66128854387559, 0,
15.1941922851023, 11.6990009190362, 0, 7.99399142573694,
14.5041748372822, 0, 15.0674109079436, 13.7134908002476,
15.4194201146961), `CLASS 5` = c(7.82638584740367, 6.56112678542475,
6.95253086439919, 2.06445951884762, 7.17086660532553, 2.58627258328855,
7.83400556063298, 1.77053879587063, 2.65292759651742, 6.94701807830366,
6.85309102458439, 6.71505104532983, 4.06818278652755, 5.79906266122279,
6.62064468061089, 6.88365856613044, 7.68403751285005, 2.38479005191691,
9.07405520739349, 7.65785587918449, 7.4385885335047, 7.30144390122309,
2.41680929257195, 4.18258704279641, 7.8906816661241, 6.75678558060943,
6.67150537517493, 8.24794113296791, 7.67443442992891, 6.89357008866252,
6.45444668132533, 2.98342694785768, 8.704729108357, 7.14382850099481,
7.15233553294014, 9.14001781571836, 3.98831954045444, 7.54093786042356,
5.79029360470226, 6.82793163574773, 6.48049736162586, 7.18554914992982
), `CLASS 6` = c(20.6189597026452, 22.8728557858066, 23.0767150659042,
4.99832103176657, 24.1726463550235, 5.56104550736533, 31.8124013284184,
4.31653191057476, 8.4695331411828, 20.63468068931, 19.7369752322083,
19.6902616040991, 11.6648564225744, 0, 25.2321582223958,
22.2981543181678, 29.3198455372777, 5.88723409877159, 30.1474816315191,
28.0835788057802, 24.0430626320328, 25.1446564854412, 5.78713327050339,
30.6155806819949, 23.8853696442419, 20.1783872969561, 17.5935515655693,
23.4169038776536, 21.1986239116884, 19.6931330316831, 22.2658181144794,
7.38944654414811, 32.1897387187698, 23.6398829158785, 25.3561697324352,
30.4118856020653, 12.6822088903071, 26.300118251779, 11.7338836812169,
23.8624555097246, 20.1037712460599, 21.8478004507985), `CLASS 7` = c(15.9129851563051,
15.2250454288061, 13.5577123002506, 2.9902563940573, 15.4408266617369,
2.67511425705514, 8.17164465017573, 2.23047357314211, 4.01010767344732,
13.4046459481448, 15.3008244637288, 15.3885729336047, 7.81496654756214,
17.8194559247092, 12.7823202355514, 13.7684066964868, 15.378473991847,
3.75026919344972, 18.2880822635935, 14.7412162942703, 17.5270089738067,
16.799718650752, 4.33839497916674, 2.21937177530762, 15.0315149187176,
15.3632530721031, 14.1580725482114, 16.4215442147509, 15.5113323256627,
14.3349000132624, 12.8504657216928, 5.06281347160092, 15.5075336560533,
15.9392345541138, 13.3981839319596, 16.6700105346756, 8.10398633871805,
15.958090408468, 16.5733149488757, 15.1802203155931, 14.2236219296677,
16.2095182295187), `CLASS 8` = c(19.9174685533413, 16.6755018156139,
13.9892072522183, 3.35339208579287, 18.98558519396, 3.42749146804023,
10.4801793890691, 2.97802997775506, 5.11270635117451, 17.0372757040089,
18.7865491767228, 18.3992789502607, 9.99639697401416, 0,
15.9270550696003, 17.1615519869107, 15.3488962066467, 4.25197658246908,
21.2560581648095, 15.7194605175531, 19.6944057250743, 17.4904702096271,
5.22494387772846, 2.88494085790995, 19.1038328534942, 19.0183655117756,
17.533290326259, 19.92632149392, 17.5400682364295, 17.664926273487,
16.3075864395099, 6.6071984352649, 17.7536737744256, 18.5784760293114,
14.706720581834, 18.8313728693457, 9.73353207739478, 18.2488613518859,
8.53356517614357, 19.8319355692553, 17.4801581342745, 20.0300225970631
), `CLASS 9` = c(11.5493095708147, 9.80732127808386, 7.92896710456816,
2.08710247204941, 0, 0, 4.22268016442976, 1.61543185032431,
2.92213933696131, 10.3276972542995, 11.4712047448286, 11.1818706700593,
0, 8.30325482025479, 9.28807709161222, 10.2081145049644,
0, 0, 0, 0, 0, 0, 0, 0, 12.0508804125665, 10.9194191312608,
10.316895230176, 11.9324634197247, 0, 10.709037767833, 9.7151732936871,
0, 0, 0, 9.36977099054923, 0, 5.81426180513736, 0, 3.38664292169246,
0, 10.5704134555229, 0), `CLASS 10` = c(44.7938508721352,
51.7310046920715, 57.5715824785637, 89.5047895292528, 58.4027215389776,
91.3111216916161, 69.2914902356924, 91.4055258029079, 85.3021190418994,
52.2833630152431, 47.5883305901355, 48.3152264007455, 78.1204536918961,
68.0782265938132, 55.3819029226251, 51.9782682455077, 61.5885922886562,
89.6129641721643, 51.3818043642034, 61.8814673089921, 55.3399967676143,
58.4083672383978, 88.0198518505328, 90.713100323986, 45.9230901490977,
47.942176704251, 51.3202365201787, 43.4717297386365, 59.2741650079789,
50.3975658567551, 54.6723278637849, 85.3465611452765, 58.0340634611641,
58.33846091558, 55.372988962717, 55.3585987802603, 72.3599002382954,
58.2521103792226, 65.716183348586, 58.1599124794039, 51.2453091189091,
56.5749100234884), `CLASS 11` = c(55.2061491278648, 48.2689953079285,
42.4284175214362, 10.4952104707472, 41.5972784610224, 8.68887830838393,
30.7085097643076, 8.59447419709211, 14.6978809581006, 47.7166369847569,
52.4116694098645, 51.6847735992545, 21.8795463081039, 31.9217734061868,
44.6180970773749, 48.0217317544923, 38.4114077113438, 10.3870358278357,
48.6181956357966, 38.1185326910079, 44.6600032323857, 41.5916327616022,
11.9801481494672, 9.28689967601398, 54.0769098509023, 52.0578232957489,
48.6797634798213, 56.5282702613635, 40.7258349920211, 49.6024341432449,
45.3276721362151, 14.6534388547235, 41.9659365388359, 41.66153908442,
44.627011037283, 44.6414012197397, 27.6400997617046, 41.7478896207774,
34.283816651414, 41.8400875205961, 48.7546908810909, 43.4250899765116
), `CLASS 12` = c(0.811392418775427, 1.07172325344784, 1.35691090645737,
8.52815575054215, 1.40400342762093, 10.5089654211764, 2.25642633809048,
10.6353831202186, 5.80370185913679, 1.09570511081795, 0.907972043744494,
0.934805805194479, 3.57047868323309, 2.13265803649301, 1.24124305047309,
1.08239054166649, 1.60339326148851, 8.62738568129464, 1.05684309531167,
1.62339583767845, 1.23914000811097, 1.40432975000493, 7.34714218491929,
9.76785617252635, 0.849218090969217, 0.920940862853288, 1.05424169822542,
0.769026356858985, 1.45544382379371, 1.01603009463636, 1.20615785649631,
5.82433666195463, 1.38288498357373, 1.40029538508808, 1.24079537651438,
1.24007305478085, 2.61793194894868, 1.3953306600253, 1.91682810629766,
1.39005236188319, 1.05108468934595, 1.30281618424025)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -42L), .Names = c("Species",
"TREATMENT", "CLASS 1", "CLASS 2", "CLASS 3", "CLASS 4", "CLASS 5",
"CLASS 6", "CLASS 7", "CLASS 8", "CLASS 9", "CLASS 10", "CLASS 11",
"CLASS 12"))
library(Reshape2)
melt <- melt(example, id=c("TREATMENT","Species"), value.name="Percentage", variable.name = "Class")
You can use the dplyr package to run an ANOVA on each combination of Species and Class.
library(reshape2)
library(dplyr)
library(broom)
datm <- melt(example, id=c("TREATMENT","Species"), value.name="Percentage", variable.name = "Class")
Now we run an ANOVA for each combination of Species and Class using the group_by function to break the data up into the desired groupings. This returns a data frame where the first two columns are the Species and Class and each element of the third column is a list with the aov model output.
res = datm %>% group_by(Species, Class) %>%
do(Model = aov(Percentage ~ TREATMENT, data=.))
Get results in a tidy data frame.
tidy(res, Model)
Species Class term df sumsq meansq statistic p.value
1 A CLASS 1 TREATMENT 1 1.660188e+02 166.01875057 0.9697826054 0.35357679
2 A CLASS 1 Residuals 8 1.369534e+03 171.19171827 NA NA
3 A CLASS 2 TREATMENT 1 2.227081e+02 222.70809890 0.6192884560 0.45399125
4 A CLASS 2 Residuals 8 2.876955e+03 359.61932885 NA NA
...
93 D CLASS 11 TREATMENT 1 7.653510e+00 7.65350993 0.1968918980 0.66899748
94 D CLASS 11 Residuals 8 3.109731e+02 38.87163469 NA NA
95 D CLASS 12 TREATMENT 1 2.799650e-02 0.02799650 0.1232651091 0.73458657
96 D CLASS 12 Residuals 8 1.816994e+00 0.22712429 NA NA

Resources