Multiple comparisons for multiple correlations in ggplot, R? - r

I have the following dataset:
structure(list(Age_group = structure(c(4L, 2L, 2L, 2L, 4L, 2L,
2L, 4L, 3L, 1L, 2L, 1L, 1L, 4L, 1L, 2L, 1L, 4L, 3L, 4L, 4L, 1L,
2L, 2L, 1L, 2L, 1L, 3L, 3L, 2L, 2L, 3L, 4L, 3L, 2L, 4L, 2L, 2L,
3L, 4L, 4L, 4L, 1L, 2L, 4L, 2L, 4L, 2L, 4L, 4L, 2L, 3L, 3L, 3L,
4L, 4L, 2L, 4L, 4L, 4L, 1L, 3L, 1L, 1L, 1L, 2L, 1L, 3L, 2L, 2L,
2L, 2L, 1L, 3L, 2L, 4L, 2L, 2L, 2L, 4L, 4L, 1L, 2L, 4L, 1L, 1L,
1L, 4L, 2L, 1L, 1L, 2L, 1L, 3L, 1L, 3L, 1L, 1L, 4L, 3L, 2L, 3L,
2L, 4L, 2L, 1L, 4L, 1L, 2L, 1L, 4L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 3L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 4L, 4L, 1L, 1L, 3L, 2L, 2L, 2L, 1L,
1L, 3L, 4L, 3L, 3L, 1L, 1L, 1L, 2L, 4L, 1L, 4L, 1L, 1L, 1L, 1L,
1L, 4L, 1L, 2L, 1L, 1L, 1L, 2L, 4L, 1L, 4L, 2L, 1L, 2L, 1L, 1L,
1L, 3L, 3L, 2L, 1L, 2L, 3L, 4L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 2L,
3L, 4L, 1L, 1L, 1L, 1L, 1L, 4L, 3L, 3L, 3L, 4L, 3L, 2L, 2L, 1L,
4L, 1L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 4L, 1L,
4L, 1L, 1L, 2L, 4L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 4L, 3L, 2L,
1L, 4L, 1L, 4L, 3L, 3L, 4L, 3L, 4L, 3L, 1L, 3L, 3L, 4L, 3L, 4L,
4L, 3L, 4L, 1L, 4L, 4L, 3L, 1L, 3L, 1L, 4L, 4L, 3L, 3L, 4L, 1L,
4L, 1L, 4L, 4L, 1L, 1L, 3L, 3L, 3L, 4L, 2L, 4L, 3L, 3L, 1L, 3L,
4L, 3L, 3L, 1L, 3L, 4L, 4L, 1L, 2L, 3L, 3L, 4L, 4L, 3L, 3L, 3L,
3L, 3L, 4L, 3L, 1L, 1L, 3L, 4L, 3L, 3L, 3L, 1L, 2L, 2L, 1L), .Label = c("Adolescent",
"Young", "Middle", "Older"), class = "factor"), Value = c(0.344845,
0.290967, 0.246231, 0.262066, 0.214854, 0.369023, 0.244076, 0.280915,
0.30564, 0.296507, 0.323117, 0.276703, 0.225361, 0.415376, 0.26803,
0.297092, 0.39287, 0.373648, 0.231434, 0.215282, 0.402466, 0.324974,
0.234958, 0.255247, 0.247927, 0.200748, 0.194252, 0.171439, 0.276834,
0.201723, 0.309028, 0.203337, 0.433123, 0.242758, 0.30205, 0.370564,
0.267963, 0.283591, 0.336721, 0.596052, 0.244396, 0.387599, 0.347128,
0.227341, 0.432896, 0.282985, 0.284935, 0.231549, 0.524341, 0.33092,
0.236906, 0.54037, 0.378644, 0.206526, 0.0978536, 0.252193, 0.332135,
0.315254, 0.280426, 0.217306, 0.23161, 0.240526, 0.446109, 0.41504,
0.28913, 0.269704, 0.317209, 0.433796, 0.392358, 0.299284, 0.338003,
0.311145, 0.426087, 0.339064, 0.506221, 0.519461, 0.464624, 0.422413,
0.311408, 0.384049, 0.30677, 0.316512, 0.274162, 0.426846, 0.437163,
0.350454, 0.406764, 0.502995, 0.330374, 0.299821, 0.43784, 0.329894,
0.48, 0.351307, 0.355426, 0.377012, 0.349693, 0.302153, 0.152044,
0.423236, 0.569011, 0.451337, 0.402483, 0.54266, 0.368916, 0.300246,
0.328711, 0.44537, 0.338924, 0.378004, 0.484292, 0.373512, 0.655633,
0.320122, 0.376306, 0.701183, 0.42354, 0.354544, 0.366982, 0.485444,
0.2711, 0.39679, 0.499632, 0.380856, 0.364726, 0.460057, 0.254963,
0.368593, 0.210968, 0.338162, 0.338745, 0.498087, 0.366381, 0.452842,
0.225168, 0.456962, 0.414057, 0.313421, 0.434526, 0.217877, 0.338147,
0.300099, 0.516165, 0.375086, 0.460186, 0.373398, 0.309855, 0.296928,
0.301164, 0.334937, 0.320049, 0.389919, 0.282245, 0.241675, 0.332736,
0.593453, 0.201379, 0.416399, 0.371206, 0.4048, 0.414817, 0.4947,
0.593219, 0.376317, 0.318016, 0.395748, 0.352561, 0.350144, 0.543684,
0.444405, 0.336287, 0.0667227, 0.325322, 0.379068, 0.391071,
0.37585, 0.476663, 0.464114, 0.461864, 0.415283, 0.458221, 0.400008,
0.38393, 0.285078, 0.237714, 0.361987, 0.426509, 0.317339, 0.294408,
0.619243, 0.34253, 0.329934, 0.355375, 0.46283, 0.407967, 0.242693,
0.51851, 0.317998, 0.323249, 0.448899, 0.360369, 0.459298, 0.484034,
0.27694, 0.487715, 0.434585, 0.605315, 0.494404, 0.256854, 0.351891,
0.231474, 0.413763, 0.410932, 0.365665, 0.511102, 0.365337, 0.527372,
0.400869, 0.24765, 0.369774, 0.350247, 0.530748, 0.461709, 0.428728,
0.303493, 0.573203, 0.498893, 0.280537, 0.387132, 0.594904, 0.425032,
0.370547, 0.535847, 0.397682, 0.372345, 0.305478, 0.193977, 0.362042,
0.453853, 0.383845, 0.359185, 0.349271, 0.248476, 0.404103, 0.333776,
0.433578, 0.317914, 0.36847, 0.394821, 0.254976, 0.436492, 0.596257,
0.331286, 0.299685, 0.063502, 0.469766, 0.403892, 0.447094, 0.471031,
0.458835, 0.248689, 0.479741, 0.277219, 0.294354, 0.450719, 0.32319,
0.481539, 0.489301, 0.301525, 0.310258, 0.415681, 0.42438, 0.320633,
0.441025, 0.0533728, 0.252189, 0.317907, 0.401426, 0.282361,
0.501992, 0.417136, 0.273503, 0.448618, 0.459488, 0.286582, 0.336108,
0.289597, 0.42585, 0.367346, 0.525273, 0.456723, 0.411294, 0.299206,
0.31401, 0.350646, 0.389548, 0.34972, 0.357895, 0.45329, 0.452023,
0.408471, 0.428022, 0.572826, 0.340292, 0.0470799, 0.326013,
0.38702, 0.375492, 0.555507, 0.403654, 0.620388, 0.259259, 0.386142,
0.389715, 0.305789, 0.39022, 0.385585, 0.0526119, 0.379378, 0.411465,
0.376643, 0.0645194, 0.519351, 0.459602, 0.520458), CO2 = c(29L,
28L, 25L, 25L, 28L, NA, 28L, 29L, 32L, NA, 28L, NA, NA, 27L,
28L, 29L, 31L, 31L, NA, 24L, 27L, NA, 27L, 26L, NA, 29L, 24L,
25L, 26L, 29L, NA, 28L, 26L, NA, 22L, 26L, 25L, 22L, NA, 27L,
NA, 26L, 25L, 29L, 26L, NA, NA, 23L, 27L, 26L, 28L, NA, 24L,
22L, 22L, 27L, 23L, 26L, 27L, 28L, 24L, NA, 22L, 21L, NA, 27L,
24L, 24L, NA, 28L, 25L, 26L, 25L, 26L, 27L, 26L, 24L, 25L, 27L,
25L, NA, 25L, NA, 28L, NA, 30L, 27L, NA, 23L, 25L, NA, 27L, NA,
28L, 25L, 26L, NA, NA, 25L, 26L, 29L, 26L, 26L, 29L, 26L, NA,
NA, 24L, NA, NA, NA, NA, NA, 26L, NA, 22L, NA, 25L, 27L, 25L,
29L, 26L, NA, 26L, 21L, NA, 25L, 26L, 25L, 28L, 29L, 26L, NA,
27L, 23L, NA, NA, NA, 23L, NA, 25L, 28L, 28L, NA, 29L, 30L, NA,
27L, 25L, 26L, 24L, NA, NA, NA, 27L, 25L, 25L, 24L, NA, NA, NA,
NA, 25L, NA, 25L, NA, NA, 27L, NA, 26L, 21L, 25L, 26L, 25L, NA,
NA, 27L, 19L, 26L, NA, NA, NA, 24L, 26L, 23L, NA, NA, 29L, 31L,
33L, NA, NA, NA, NA, NA, 27L, 22L, 31L, 25L, 26L, NA, NA, 21L,
23L, 23L, 27L, NA, 26L, 23L, 34L, 28L, 29L, 31L, 24L, 23L, NA,
NA, 25L, 27L, 27L, 25L, 24L, NA, 24L, 26L, 22L, 26L, NA, 26L,
24L, 24L, NA, 24L, 26L, 22L, 29L, 24L, 25L, 24L, 26L, 28L, NA,
NA, 28L, 26L, 22L, NA, 27L, 21L, 27L, NA, 26L, NA, 27L, 24L,
24L, 24L, 25L, NA, 24L, 23L, 21L, 28L, 29L, 25L, 26L, 23L, NA,
26L, 22L, 29L, 23L, 28L, 23L, 26L, 27L, NA, 24L, 27L, 25L, NA,
29L, NA, NA, NA, NA, NA, 25L, 24L, 25L, 21L, NA, 23L, 23L, 21L,
26L, 28L, NA, 22L, 28L, 24L, NA, NA, 24L, 27L, 23L, 27L, 25L,
28L, 26L, 23L, 28L, NA, 26L, NA, NA, 20L, 27L, 23L, NA, NA, 23L,
NA, 21L, 21L)), row.names = c(NA, -325L), class = c("tbl_df",
"tbl", "data.frame"))
I used the following code to calculate correlations between X and Y and facet by age groups:
library(tidyverse)
library(ggpubr)
p <- ggscatter(DF, x = "CO2", y = "Value",
fill = "Age_group",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson")
facet(p, facet.by = "Age_group", scales = "free")
Which gives me the partly-desired output:
However, I would like to correct for having 4 separate correlations calculated. Is this possible as a quick option within my code? Like stat_compare_means has p.adjust.method options that I can implement?
Or do I need to calculate this separately and paste into the charts?

This is a bit hacky, but you you can manually adjust the p.value label with stats::p.adjust:
library(readr)
ggplot(DF, aes(x = CO2, y = Value, fill = Age_group, group = Age_group)) +
geom_point() +
stat_smooth(method = "lm",
color = "black") +
stat_cor(aes(label = paste0(..r.label..,
"~`,`~`p=`~",
p.adjust(readr::parse_number(..p.label..), n = 4))),
method = "pearson", label.y = 0.6) +
facet_wrap(~Age_group, scales = "free") +
theme_bw() +
theme(panel.grid = element_blank(),
legend.position = "top")
Note that you will have to manually change the n= argument.

Related

Conditionally replace values of multiple columns, from values of other multiple columns

Suppose I have this dataset:
set.seed (1234);
data.frame(cbind(a=rep(c("si","no"),30),b=rnorm(60)),
c=rep(c("d","e","f"),20)) %>% head()
Then I want to add many columns (in this example I only added two), to identify distinct cases between each group (in this case, column "a").
set.seed(1234);
data.frame(cbind(a=rep(c("si","no"),30),b=rnorm(60)),c=rep(c("d","e","f"),20)) %>%
group_by(a) %>% dplyr::mutate_at(vars(c(b,c)), .funs= list(dups_hash_ing= ~n_distinct(.)))
This code leaves the following dataset:
If I set the dataset with dput, the outcome is
structure(list(a = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L), .Label = c("no", "si"), class = "factor"), b = structure(c(22L,
1L, 51L, 34L, 50L, 57L, 53L, 10L, 47L, 3L, 11L, 23L, 15L, 38L,
58L, 39L, 41L, 17L, 28L, 21L, 37L, 45L, 29L, 46L, 32L, 48L, 56L,
52L, 26L, 19L, 35L, 8L, 55L, 20L, 9L, 36L, 2L, 12L, 6L, 42L,
49L, 43L, 59L, 54L, 31L, 13L, 60L, 44L, 14L, 30L, 7L, 5L, 16L,
27L, 33L, 18L, 24L, 4L, 25L, 40L), .Label = c("-0.0997905884418961",
"-0.151736536534977", "-0.198416273822079", "-0.254874652654534",
"-0.274704218225806", "-0.304721068966714", "-0.324393300483657",
"-0.400235237343163", "-0.415751788401515", "-0.50873701541522",
"-0.538070788884863", "-0.60615111526422", "-0.659770093821306",
"-0.684320344136007", "-0.789646852263761", "-0.933503340589868",
"-0.965903210133575", "-1.07754212275943", "-1.11444896479736",
"-1.60708093984972", "-2.07823754188738", "-2.7322195229558",
"-2.85575865501923", "-3.23315213292314", "0.0295178303214797",
"0.0326639575014441", "0.116845344986082", "0.162654708118265",
"0.185513915583057", "0.186492083080971", "0.287709728313787",
"0.311681028661359", "0.319160238648117", "0.413868915451097",
"0.418057822385083", "0.42200837321742", "0.485226820569252",
"0.487814635163685", "0.500694614280786", "0.594273774110513",
"0.62021020366732", "0.629536099884472", "0.660212631820405",
"0.677415500438328", "0.696768778564913", "0.700733515544461",
"0.704180178465512", "0.760462361967838", "0.895171980275539",
"0.912322161610113", "0.976031734922396", "1.1123628412626",
"1.16910851401363", "1.17349757263239", "1.49349310261748", "1.84246362620766",
"1.98373220068438", "2.16803253951933", "2.27348352044748", "2.91914013071762"
), class = "factor"), c = structure(c(1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L), .Label = c("d", "e", "f"), class = "factor"),
a_dups_hash_ing = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), b_dups_hash_ing = c(30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L), c_dups_hash_ing = c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -60L), groups = structure(list(
a = structure(1:2, .Label = c("no", "si"), class = "factor"),
.rows = list(c(2L, 4L, 6L, 8L, 10L, 12L, 14L, 16L, 18L, 20L,
22L, 24L, 26L, 28L, 30L, 32L, 34L, 36L, 38L, 40L, 42L, 44L,
46L, 48L, 50L, 52L, 54L, 56L, 58L, 60L), c(1L, 3L, 5L, 7L,
9L, 11L, 13L, 15L, 17L, 19L, 21L, 23L, 25L, 27L, 29L, 31L,
33L, 35L, 37L, 39L, 41L, 43L, 45L, 47L, 49L, 51L, 53L, 55L,
57L, 59L))), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
What I need to do, is replace, column by column, if the number of distinct cases is more than one per group, with the value of the original column. I have to do this for more than 50 columns. An example of this will be provided for only one column with mutate:
dplyr::mutate(b_dups_hash_ing= ifelse(>1,b,0))
I need to repeat the code provided above for many variables. This is very similar to a mutate_at (words in brackets is what I would do). The following example does not work, but is something I would do in an ideal world, just for your better understanding of my problem.
dplyr::mutate_at(vars(contains('_dups_hash_ing')), .funs = list(~ifelse(.>1,vars([original]),0)))
Is this what you're looking for?
df %>% dplyr::mutate_at(vars(contains('_dups_hash_ing')), ~ ifelse(. > 1, ., 0)) %>% head
#> # A tibble: 6 x 6
#> # Groups: a [2]
#> a b c a_dups_hash_ing b_dups_hash_ing c_dups_hash_ing
#> <fct> <fct> <fct> <dbl> <int> <int>
#> 1 si -2.7322195229558 d 0 30 3
#> 2 no -0.09979058844189… e 0 30 3
#> 3 si 0.976031734922396 f 0 30 3
#> 4 no 0.413868915451097 d 0 30 3
#> 5 si 0.912322161610113 e 0 30 3
#> 6 no 1.98373220068438 f 0 30 3

Shiny App got error Result must have length 37849, not 0

I want to see the percentage of people based on their race from the age of 20 to 35 reported their educational status. In the next step I make shiny app. However, I got this error. ![enter image description here][1]
Please help me how to link this code to shiny app.
My question is how can I by changing sliderInput from the age of 20 to 35 , in each age knows how many people have high school, college degree, and bachelor based on their race.
Below you can see the coding of age, education and race.
# rename education
nlsy97$educstat1997<-Recode(nlsy97$R1205700, recodes="0='None';1:2='Hischool';3='college';4='bachelor';5:7='mastermore' ;-5=NA;-3=NA;-4=NA", as.factor=T)
#rename ages
nlsy97$age1<-Recode(nlsy97$R1194100, recodes="12=12;13=13;14=14;15=15;16=16=17=17;18=18;19=19;-5=NA",as.factor=F)
# recode race
nlsy97$race<-Recode(nlsy97$R1482600 , recodes="1='black';2='hispanic' ;4='white';else=NA", as.factor=T)
table(nlsy97$race)
In the next step, I have made the transitions.
myvars1<-c( "R0000100","R0536300", "R0536402","R1489700","R1489800", "gender","race","age1","age2","age3","age4","age5","age6","age7","age8","age9","age10","age11","age12","age13","age14","age15","age16","age17","educstat1997","educstat1998","educstat1999","educstat2000","educstat2001","educstat2002","educstat2003","educstat2004","educstat2005","educstat2006","educstat2007","educstat2008","educstat2009","educstat2010","educstat2011","educstat2013","educstat2015")
which(myvars1 %in% names(nlsy97))
sub<-nlsy97[,myvars1]
sub<-subset(sub,is.na(sub$age1)==F&is.na(sub$age2)==F&is.na(sub$age3)==F&is.na(sub$age4)==F&is.na(sub$age5)==F&is.na(sub$age6)==F&is.na(sub$age7)==F&is.na(sub$age8)==F&is.na(sub$age9)==F&is.na(sub$age10)==F&is.na(sub$age11)==F&is.na(sub$age12)==F&is.na(sub$age13)==F&is.na(sub$age14)==F&is.na(sub$age15)==F&is.na(sub$age16)==F&is.na(sub$age17)==F)
head(sub, n=5)
x.vertical<-reshape(sub, idvar="R0000100", varying=list(age1=c("age1", "age2", "age3","age4","age5","age6","age7","age8","age9","age10","age11","age12","age13","age14","age15","age16"), age2=c("age2", "age3","age4","age5","age6","age7","age8","age9","age10","age11","age12","age13","age14","age15","age16","age17"),educstat1=c("educstat1997","educstat1998","educstat1999","educstat2000","educstat2001","educstat2002","educstat2003","educstat2004","educstat2005","educstat2006","educstat2007","educstat2008","educstat2009","educstat2010","educstat2011","educstat2013"),educstat2=c("educstat1998","educstat1999","educstat2000","educstat2001","educstat2002","educstat2003","educstat2004","educstat2005","educstat2006","educstat2007","educstat2008","educstat2009","educstat2010","educstat2011","educstat2013","educstat2015")), times=1:16, direction="long", v.names=c( "agestart", "ageend","educstat1","educstat2") )
x.vertical<-x.vertical[order(x.vertical$R0000100, x.vertical$time),]
The last step is making shiny dashboard.
# STEP 1: copy an example Shiny app into app.R (or ui.R and server.R)
library(shiny)
library(tidyverse)
library(gapminder)
# User Interface
ui <- basicPage(
# STEP 3: Create an input widget here (e.g. sliderInput)
sliderInput("age", "Select Age:", animate = TRUE, # STEP 4: add animate = TRUE here
min = 20, max = 35, value = 25,
step = 1,
sep="" # so thousands are not separated with a comma (without this defaults to 1,952 - 2,007)
), #note this comma here - different to our usual R code
tabPanel("Plot", plotOutput(outputId = "myplot"))
)
x.vertical2<-x.vertical[complete.cases(x.vertical[, c("race","educstat2")]),]
sums<-as.data.frame(xtabs(~educstat2+race, x.vertical2))
# Server
server <- function(input, output) {
output$myplot <- renderPlot({
# STEP 2: copy your plot code here
x.vertical2 %>%
filter(agestart==input$agestart)
mutate(educstat2 = fct_relevel(educstat2,
"None", "Hischool", "c",
"college")) %>%
filter(is.na(educstat2)==F)%>%
group_by(race, educstat2)%>%
summarise(n = n())%>%
mutate(freq= n /sum(n))%>%
ggplot(aes(x = factor(educstat2),y=freq, fill= race)) +
geom_bar( stat="identity",position = "dodge") +theme_bw()
})
}
shinyApp(ui, server)
dput(head(nlsy97, 10))
structure(list(R0000100 = 1:10, R0536300 = c(2L, 1L, 2L, 2L,
1L, 2L, 1L, 2L, 1L, 1L), R0536401 = c(9L, 7L, 9L, 2L, 10L, 1L,
4L, 6L, 10L, 3L), R0536402 = c(1981L, 1982L, 1983L, 1981L, 1982L,
1982L, 1983L, 1981L, 1982L, 1984L), R1194100 = c(15L, 14L, 13L,
15L, 15L, 15L, 14L, 16L, 15L, 14L), R1205700 = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), R1235800 = c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), R1482600 = c(4L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L), R2553500 = c(17L, 16L, 15L, 17L, 16L, 16L, 15L,
17L, 16L, 14L), R2564101 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), R3876300 = c(18L, 17L, 16L, 18L, 17L, 17L, 16L, 18L,
17L, 15L), R3885701 = c(2L, 0L, 0L, 2L, 0L, 0L, 0L, 2L, 0L, 0L
), R5453700 = c(19L, 18L, 17L, 19L, 18L, 19L, 17L, 19L, 18L,
16L), R5464901 = c(2L, 2L, 0L, 2L, 2L, 2L, 0L, 2L, 2L, 0L), R7216000 = c(20L,
19L, 18L, 20L, 19L, 20L, 18L, 20L, 19L, 17L), R7228601 = c(2L,
2L, 2L, 2L, 2L, 2L, 0L, 2L, 2L, 0L), S1531400 = c(21L, 20L, 19L,
21L, 20L, 20L, 19L, 21L, 20L, 18L), S1542501 = c(2L, 2L, 2L,
2L, 2L, 2L, 0L, 2L, 2L, 2L), S2001000 = c(22L, 21L, 20L, 22L,
21L, 22L, -5L, 22L, 21L, 19L), S2012301 = c(4L, 2L, 3L, 2L, 2L,
2L, -5L, 4L, 2L, 2L), S3801100 = c(23L, 22L, 21L, 23L, 22L, 23L,
-5L, 23L, 22L, 20L), S3813801 = c(4L, 2L, 3L, 2L, 2L, 2L, -5L,
4L, 2L, 2L), S5401000 = c(24L, 23L, -5L, 24L, 23L, 24L, 22L,
24L, 23L, 21L), S5413400 = c(4L, 2L, -5L, 2L, 2L, 2L, 0L, 4L,
4L, 2L), S7501200 = c(25L, -5L, -5L, 25L, 24L, 25L, 23L, 25L,
24L, 22L), S7514300 = c(4L, -5L, -5L, 2L, 2L, 2L, 0L, 4L, 4L,
4L), T0008500 = c(26L, -5L, -5L, 26L, 25L, 25L, -5L, 26L, 25L,
23L), T0014700 = c(4L, -5L, -5L, 2L, 2L, 2L, -5L, 4L, 4L, 4L),
T2011100 = c(27L, 26L, -5L, 27L, 26L, 26L, -5L, -5L, 26L,
24L), T2016800 = c(4L, 2L, -5L, 2L, 2L, 2L, -5L, -5L, 4L,
4L), T3601500 = c(28L, 27L, 26L, 28L, 26L, 27L, 26L, 28L,
27L, 25L), T3607100 = c(4L, 2L, 3L, 2L, 2L, 2L, 1L, 5L, 4L,
4L), T5201400 = c(29L, 28L, -5L, 29L, 28L, 28L, 27L, -5L,
28L, -5L), T5207400 = c(4L, 2L, -5L, 2L, 2L, 2L, 1L, -5L,
4L, -5L), T5207500 = c(4L, 2L, -5L, 2L, 2L, 2L, 1L, -5L,
4L, -5L), T6651300 = c(29L, 29L, 28L, 30L, 29L, 29L, 28L,
30L, 29L, -5L), T6657200 = c(4L, 2L, 3L, 2L, 2L, 2L, 1L,
5L, 5L, -5L), T6657300 = c(4L, 2L, 3L, 2L, 2L, 2L, 1L, 5L,
5L, -5L), T8123600 = c(32L, 31L, 30L, 32L, 31L, 31L, -5L,
-5L, 31L, -5L), T8129600 = c(4L, 2L, 3L, 2L, 2L, 2L, -5L,
-5L, 5L, -5L), T8129700 = c(4L, 2L, 3L, 2L, 2L, 2L, -5L,
-5L, 5L, -5L), U0001800 = c(34L, 33L, -5L, 34L, 33L, 34L,
32L, 34L, 33L, -5L), U0009400 = c(4L, 2L, -5L, 2L, 2L, 2L,
1L, 5L, 5L, -5L), U1838500 = c(-5L, 35L, 34L, 36L, 35L, 35L,
-5L, -5L, 35L, 34L), weight = c(607550L, 0L, 0L, 261156L,
450091L, 367309L, 0L, 0L, 618091L, 0L)), row.names = c(NA,
10L), class = "data.frame")

lme4 error: boundary (singular) fit: see ?isSingular

I am trying to run lme4 package in R. I have 10 Lines in total with four plants for each line in each of the two replications. But some of the plants died and there are some missing values. Weight is the response variable. Here are some lines from the data:
Line Rep Weight PLANT
Line 1 1 NA 1
Line 1 1 NA 2
Line 1 1 NA 3
Line 1 1 NA 4
Line 2 1 26 1
Line 2 1 26 2
Line 2 1 26 3
Line 2 1 27 4
Line 1 2 26 1
Line 1 2 28 2
Line 1 2 26 3
Line 1 2 25 4
Line 2 2 24 1
Line 2 2 26 2
Line 2 2 25 3
Line 2 2 NA 4
I want to run linear mixed model using lme4 package so I tried running:
lme4 <- lmer(Weight ~ 1 + (1|Rep:Plant), data=Data)
But I got an error:
boundary (singular) fit: see ?isSingular
> dput(Data)
structure(list(Line = c("Line 1", "Line 1", "Line 1", "Line 1",
"Line 2", "Line 2", "Line 2", "Line 2", "Line 1", "Line 1", "Line 1",
"Line 1", "Line 2", "Line 2", "Line 2", "Line 2"), Rep = c(1,
1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2), Weight = c(NA,
NA, NA, NA, 26, 26, 26, 27, 26, 28, 26, 25, 24, 26, 25, NA),
PLANT = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4)), row.names = c(NA,
-16L), class = c("tbl_df", "tbl", "data.frame"))
I am using it for the first time and I am not sure about the error. I will appreciate any help!
Your model did fit, but it generated that warning because your random effects are very small. You can read more about this in this post or the help page
Let us look at your data:
ggplot(Data,aes(x=PLANT,y=Weight,col=Rep)) + geom_jitter() + geom_boxplot(alpha=0.2) + facet_wrap(~Rep)
The effects of PLANT and in combination with Rep is extremely small. Let's look at the fitted model:
fit = lmer(Weight ~ 1 + (1|PLANT:Rep),data=Data)
boundary (singular) fit: see ?isSingular
ranef(fit)
$`PLANT:Rep`
(Intercept)
1:1 0
1:2 0
2:1 0
2:2 0
3:1 0
3:2 0
4:1 0
4:2 0
This is exactly what happened. So we can try to account for some other effects and we still see very small coefficients:
fit = lmer(Weight ~ Line + (1|Rep:PLANT),data=Data)
ranef(fit)
$`Rep:PLANT`
(Intercept)
1:1 1.397563e-19
1:2 2.811371e-19
1:3 8.112169e-20
1:4 1.813251e-19
2:1 -1.725964e-19
2:2 -2.463986e-20
2:3 -2.027357e-19
2:4 -2.833681e-19
The takehome message is, there's no really systematic effect coming from PLANT, so you don't need to specify a highly complicated model, do something like:
fit = lmer(Weight ~ Line + (1|Rep),data=Data)
The data in case anyone is interested:
Data = structure(list(Line = structure(c(1L, 1L, 1L, 1L, 12L, 12L, 12L,
12L, 23L, 23L, 23L, 23L, 34L, 34L, 34L, 34L, 45L, 45L, 45L, 45L,
56L, 56L, 56L, 56L, 65L, 65L, 65L, 65L, 66L, 66L, 66L, 66L, 67L,
67L, 67L, 67L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L,
9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 13L,
13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 16L,
16L, 16L, 8L, 8L, 8L, 8L, 66L, 66L, 66L, 66L, 17L, 17L, 17L,
17L, 18L, 18L, 18L, 18L, 9L, 9L, 9L, 9L, 19L, 19L, 19L, 19L,
20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 24L,
24L, 24L, 24L, 25L, 25L, 25L, 25L, 2L, 2L, 2L, 2L, 26L, 26L,
26L, 26L, 27L, 27L, 27L, 27L, 10L, 10L, 10L, 10L, 28L, 28L, 28L,
28L, 29L, 29L, 29L, 29L, 30L, 30L, 30L, 30L, 31L, 31L, 31L, 31L,
67L, 67L, 67L, 67L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 33L,
33L, 33L, 33L, 35L, 35L, 35L, 35L, 36L, 36L, 36L, 36L, 37L, 37L,
37L, 37L, 38L, 38L, 38L, 38L, 39L, 39L, 39L, 39L, 40L, 40L, 40L,
40L, 25L, 25L, 25L, 25L, 19L, 19L, 19L, 19L, 24L, 24L, 24L, 24L,
41L, 41L, 41L, 41L, 42L, 42L, 42L, 42L, 30L, 30L, 30L, 30L, 43L,
43L, 43L, 43L, 44L, 44L, 44L, 44L, 22L, 22L, 22L, 22L, 46L, 46L,
46L, 46L, 47L, 47L, 47L, 47L, 17L, 17L, 17L, 17L, 48L, 48L, 48L,
48L, 49L, 49L, 49L, 49L, 27L, 27L, 27L, 27L, 23L, 23L, 23L, 23L,
50L, 50L, 50L, 50L, 51L, 51L, 51L, 51L, 52L, 52L, 52L, 52L, 41L,
41L, 41L, 41L, 7L, 7L, 7L, 7L, 46L, 46L, 46L, 46L, 11L, 11L,
11L, 11L, 33L, 33L, 33L, 33L, 53L, 53L, 53L, 53L, 54L, 54L, 54L,
54L, 13L, 13L, 13L, 13L, 38L, 38L, 38L, 38L, 4L, 4L, 4L, 4L,
37L, 37L, 37L, 37L, 55L, 55L, 55L, 55L, 57L, 57L, 57L, 57L, 44L,
44L, 44L, 44L, 58L, 58L, 58L, 58L, 59L, 59L, 59L, 59L, 12L, 12L,
12L, 12L, 47L, 47L, 47L, 47L, 48L, 48L, 48L, 48L, 60L, 60L, 60L,
60L, 21L, 21L, 21L, 21L, 18L, 18L, 18L, 18L, 28L, 28L, 28L, 28L,
26L, 26L, 26L, 26L, 61L, 61L, 61L, 61L, 31L, 31L, 31L, 31L, 59L,
59L, 59L, 59L, 52L, 52L, 52L, 52L, 29L, 29L, 29L, 29L, 62L, 62L,
62L, 62L, 63L, 63L, 63L, 63L, 54L, 54L, 54L, 54L, 55L, 55L, 55L,
55L, 53L, 53L, 53L, 53L, 51L, 51L, 51L, 51L, 50L, 50L, 50L, 50L,
64L, 64L, 64L, 64L, 20L, 20L, 20L, 20L, 58L, 58L, 58L, 58L, 16L,
16L, 16L, 16L, 57L, 57L, 57L, 57L, 14L, 14L, 14L, 14L, 63L, 63L,
63L, 63L, 64L, 64L, 64L, 64L, 61L, 61L, 61L, 61L, 36L, 36L, 36L,
36L, 40L, 40L, 40L, 40L, 6L, 6L, 6L, 6L, 39L, 39L, 39L, 39L,
45L, 45L, 45L, 45L, 15L, 15L, 15L, 15L, 1L, 1L, 1L, 1L, 42L,
42L, 42L, 42L, 43L, 43L, 43L, 43L, 65L, 65L, 65L, 65L, 49L, 49L,
49L, 49L, 56L, 56L, 56L, 56L, 3L, 3L, 3L, 3L, 62L, 62L, 62L,
62L, 35L, 35L, 35L, 35L, 5L, 5L, 5L, 5L, 60L, 60L, 60L, 60L,
34L, 34L, 34L, 34L), .Label = c("Line1", "Line10", "Line11",
"Line12", "Line13", "Line14", "Line15", "Line16", "Line17", "Line18",
"Line19", "Line2", "Line20", "Line21", "Line22", "Line23", "Line24",
"Line25", "Line26", "Line27", "Line28", "Line29", "Line3", "Line30",
"Line31", "Line32", "Line33", "Line34", "Line35", "Line36", "Line37",
"Line38", "Line39", "Line4", "Line40", "Line41", "Line42", "Line43",
"Line44", "Line45", "Line46", "Line47", "Line48", "Line49", "Line5",
"Line50", "Line51", "Line52", "Line53", "Line54", "Line55", "Line56",
"Line57", "Line58", "Line59", "Line6", "Line60", "Line61", "Line62",
"Line63", "Line64", "Line65", "Line66", "Line67", "Line7", "Line8",
"Line9"), class = "factor"), Rep = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("1", "2"), class = "factor"), Weight = c(NA,
NA, NA, NA, 26L, 26L, 26L, 27L, NA, NA, NA, NA, 26L, 28L, 26L,
25L, 22L, 17L, 20L, 20L, 28L, 20L, 27L, 26L, 22L, 25L, 21L, 25L,
18L, 18L, 19L, 18L, 24L, 28L, 23L, 30L, 29L, 25L, 26L, 27L, NA,
NA, NA, NA, 29L, 30L, 29L, 30L, NA, NA, NA, NA, 33L, NA, NA,
NA, 21L, 23L, 18L, 23L, 32L, 29L, 30L, 30L, 18L, 19L, 21L, 21L,
25L, 25L, 25L, 26L, 26L, 27L, NA, NA, 29L, 29L, 27L, 29L, 26L,
NA, NA, NA, 26L, 20L, 23L, 27L, NA, NA, NA, NA, 32L, 32L, 30L,
30L, 20L, 20L, 20L, 19L, 22L, 21L, 22L, 22L, 24L, 23L, 23L, 25L,
20L, 25L, NA, NA, 27L, 26L, NA, NA, NA, NA, NA, NA, 30L, 28L,
NA, NA, 25L, 26L, 27L, 26L, NA, NA, NA, NA, 20L, 19L, NA, NA,
19L, 27L, 26L, 29L, 26L, 29L, 31L, 29L, 25L, 25L, 24L, 25L, 26L,
25L, 26L, 26L, 25L, 24L, 24L, 28L, 22L, 26L, 24L, 28L, 29L, 30L,
26L, NA, NA, NA, NA, NA, 26L, 24L, 24L, 24L, NA, NA, NA, NA,
NA, NA, NA, NA, 30L, 30L, 30L, 31L, 24L, 25L, 28L, 22L, 28L,
31L, 30L, NA, 31L, 30L, 29L, 25L, 25L, 22L, 24L, 20L, 30L, 30L,
30L, 29L, 26L, 32L, 28L, 29L, 20L, 15L, 15L, 11L, 25L, 24L, 24L,
24L, 26L, 29L, 31L, 30L, 24L, 28L, 20L, 22L, 29L, 26L, 26L, 28L,
27L, 27L, 27L, 26L, 21L, 22L, 21L, NA, 28L, 29L, 24L, 24L, 28L,
29L, 28L, 27L, 28L, 29L, 27L, 29L, NA, NA, NA, NA, 22L, 26L,
21L, 21L, 26L, 30L, 28L, 30L, 27L, 26L, 28L, 26L, 25L, 25L, 26L,
26L, 27L, 26L, 23L, 29L, NA, NA, NA, NA, 27L, 23L, 29L, 23L,
28L, 29L, 28L, 26L, 20L, NA, NA, NA, 28L, 23L, 26L, 21L, 28L,
26L, 26L, 29L, 20L, 27L, 20L, 26L, 29L, 26L, 28L, 28L, 30L, 27L,
NA, NA, 26L, 21L, 26L, 25L, 27L, 26L, 27L, 24L, 25L, 20L, 21L,
20L, 25L, 25L, 31L, 24L, 29L, 28L, 31L, 27L, 25L, 28L, 26L, 26L,
NA, NA, NA, NA, 24L, 25L, 23L, 27L, 20L, 26L, 25L, 25L, 29L,
28L, 29L, 29L, 26L, 27L, 25L, 28L, NA, NA, NA, NA, 26L, 28L,
NA, NA, 21L, 20L, 31L, 25L, 31L, 28L, 30L, 29L, 23L, 25L, 24L,
28L, 25L, 22L, 25L, 25L, 28L, 29L, 28L, 29L, 26L, 24L, 25L, 26L,
29L, 27L, NA, NA, 26L, 29L, 29L, 30L, 25L, 24L, 25L, 24L, 28L,
25L, 29L, 28L, 24L, 24L, 24L, 24L, 28L, 30L, 27L, 27L, 26L, 25L,
25L, 25L, 25L, 25L, 28L, 25L, 25L, 30L, 28L, 25L, 22L, 24L, 25L,
24L, NA, NA, NA, NA, 5L, 7L, 4L, 5L, 21L, 20L, 22L, 24L, 25L,
27L, 25L, 28L, 32L, 31L, NA, NA, 19L, 26L, 20L, NA, 26L, 26L,
30L, 25L, 28L, 31L, 30L, 26L, 5L, 8L, 4L, 8L, 25L, 25L, 28L,
25L, 28L, 28L, 27L, 26L, 30L, 27L, 27L, 24L, 32L, 29L, 31L, 25L,
30L, 30L, 27L, 28L, 16L, 20L, 16L, 21L, 25L, 22L, 25L, 20L, 24L,
25L, 18L, 25L, 25L, 26L, 29L, 29L, 21L, 20L, 22L, 21L, 19L, 22L,
19L, 21L, 28L, 25L, 26L, 24L, 28L, 26L, 24L, 25L, NA, NA, NA,
NA, 25L, NA, NA, NA, 23L, 21L, 19L, 23L, 25L, 24L, 25L, NA, 22L,
30L, 29L, 26L, 25L, 25L, 24L, 24L), PLANT = structure(c(1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"),
X = structure(c(4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L,
6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L,
5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L,
2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L,
4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L,
6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L,
5L, 6L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L,
7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L,
3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L,
1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L,
8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L,
7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L,
4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L,
6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L,
5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L,
2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L,
4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L,
6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L,
5L, 6L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L,
7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L,
3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L,
1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L,
8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L,
7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L,
3L, 7L, 8L, 1L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L,
6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L,
5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L,
2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L,
4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L,
6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L, 5L, 6L, 4L, 2L,
5L, 6L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L,
7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L,
3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L,
1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L,
8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L,
7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L, 3L, 7L, 8L, 1L
), .Label = c("24", "12", "21", "11", "13", "14", "22", "23"
), class = "factor")), row.names = c(NA, -536L), class = "data.frame")

adding selected strings from first column to another column of a dataframe

I am trying to paste some of strings from the first column named Var1 to the last column named Var2
The data looks like this
df1<- structure(list(Var1 = structure(c(35L, 34L, 33L, 32L, 31L, 30L,
29L, 28L, 27L, 26L, 25L, 24L, 23L, 22L, 21L, 20L, 19L, 18L, 17L,
16L, 15L, 14L, 13L, 12L, 11L, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L,
2L, 1L), .Label = c("F117", "F97", "F87", "F79", "F67", "F61",
"F60", "F58", "F41", "F35", "F31", "F30", "F26", "F25", "F23",
"F22", "F21", "F19", "F18", "F17", "F16", "F15", "F14", "F13",
"F12", "F11", "F10", "F9", "F8", "F7", "F6", "F5", "F4", "F3",
"F2"), class = "factor"), Freq.x = c(252L, 106L, 56L, 32L, 28L,
17L, 10L, 7L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, NA, 2L, 1L, 1L, NA,
NA, 2L, NA, 1L, NA, 1L, NA, 1L, 1L, NA, NA, 1L, NA, 1L, NA),
Freq.y = c(306L, 170L, 69L, 45L, 35L, 20L, 13L, 7L, 12L,
3L, 6L, NA, 7L, NA, 1L, 3L, NA, 2L, 1L, 1L, 2L, 2L, 2L, NA,
1L, NA, 1L, NA, NA, 1L, 1L, NA, 1L, NA, 1L)), .Names = c("Var1",
"Freq.x", "Freq.y"), row.names = c(10L, 13L, 16L, 17L, 19L, 21L,
23L, 24L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 27L, 8L, 9L, 11L, 28L,
29L, 12L, 30L, 14L, 31L, 15L, 32L, 18L, 20L, 33L, 34L, 22L, 35L,
25L, 26L), class = "data.frame")
I want to copy the first string, then each 10 strings and finally the last string
from the Var1to Var2
So the output will look like this
df2<- structure(list(Var1 = structure(c(12L, 18L, 22L, 24L, 26L, 30L,
32L, 34L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L, 14L,
15L, 16L, 17L, 19L, 20L, 21L, 23L, 25L, 27L, 28L, 29L, 31L, 33L,
35L, 3L), .Label = c("F10", "F11", "F117", "F12", "F13", "F14",
"F15", "F16", "F17", "F18", "F19", "F2", "F21", "F22", "F23",
"F25", "F26", "F3", "F30", "F31", "F35", "F4", "F41", "F5", "F58",
"F6", "F60", "F61", "F67", "F7", "F79", "F8", "F87", "F9", "F97"
), class = "factor"), Freq.x = c(252L, 106L, 56L, 32L, 28L, 17L,
10L, 7L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, NA, 2L, 1L, 1L, NA, NA,
2L, NA, 1L, NA, 1L, NA, 1L, 1L, NA, NA, 1L, NA, 1L, NA), Freq.y = c(306L,
170L, 69L, 45L, 35L, 20L, 13L, 7L, 12L, 3L, 6L, NA, 7L, NA, 1L,
3L, NA, 2L, 1L, 1L, 2L, 2L, 2L, NA, 1L, NA, 1L, NA, NA, 1L, 1L,
NA, 1L, NA, 1L), Var2 = structure(c(4L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 6L, 1L, 1L, 1L, 1L, 3L), .Label = c("",
"F11", "F117", "F2", "F22", "F61"), class = "factor")), .Names = c("Var1",
"Freq.x", "Freq.y", "Var2"), class = "data.frame", row.names = c(10L,
13L, 16L, 17L, 19L, 21L, 23L, 24L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
27L, 8L, 9L, 11L, 28L, 29L, 12L, 30L, 14L, 31L, 15L, 32L, 18L,
20L, 33L, 34L, 22L, 35L, 25L, 26L))
We can use data.table
library(data.table)
setDT(df1)[c(seq(1, .N, 9), .N), Var2 := Var1][is.na(Var2), Var2 := ""]
Or use .I
setDT(df1)[df1[, .I[c(seq(1, .N, 9), .N)]], Var2 := Var1][is.na(Var2), Var2 := ""][]
Update
Based on the comments,
setDT(df1)[c(1, tail(seq(0, .N, by = 100), -1), .N), Var2 := Var1][is.na(Var2), Var2 := ""][]
Using base R - logic : use c(seq(1, nrow(df1), by = 9), nrow(df1)) to select the location you wanted. This returns 1 10 19 28 35
df1$Var2 <- ""
df1$Var2[c(seq(1, nrow(df1), by = 9), nrow(df1))] <- as.character(df1$Var1[c(seq(1, nrow(df1), by = 9), nrow(df1))])
using the mod operator : c(TRUE, (seq(nrow(df1))%%10==0)[c(-1, -nrow(df1))], TRUE) : insert TRUE to first and last locations, and for every 10th poisition which you can get from %% operator
df1$Var2[c(TRUE, (seq(nrow(df1))%%10==0)[c(-1, -nrow(df1))], TRUE)] <-
as.character(df1$Var1[c(TRUE, (seq(nrow(df1))%%10==0)[c(-1, -nrow(df1))], TRUE)])

automaticly add p-values to facet plot

I have made a facet plot below using the following command:
ggplot(data, aes(factor(Length),logFC)),
+ geom_boxplot(fill = "grey90"),
+ coord_cartesian(ylim=c(-5,5)) + facet_grid(X~Modification)
Is there a way to compute p-values for each boxplot and add them as geom_text above each boxplot. I want to compute a t-test and compare against y=0.
My data looks like this:
X Length logFC Modification
Daub 26 -0.7307060811 NTA
Daub 22 -0.3325621272 NTA
Daub 22 -2.0579390395 NTA
Daub 25 2.7199391457 NTA
Daub 23 -0.0009869389 NTA
Daub 25 -0.3318842493 NTA
...
My error message:
> data <- structure(list(Experiment = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
+ 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
+ 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
+ 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
+ 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
+ 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
+ 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
+ 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
+ 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
+ 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Daub", "Marie",
+ "Meister"), class = "factor"), Length = c(26L, 22L, 22L, 25L,
+ 23L, 25L, 23L, 25L, 24L, 23L, 24L, 26L, 24L, 21L, 20L, 21L, 22L,
+ 22L, 21L, 21L, 21L, 22L, 21L, 22L, 21L, 21L, 20L, 20L, 21L, 25L,
+ 20L, 22L, 24L, 22L, 23L, 24L, 23L, 23L, 22L, 22L, 22L, 22L, 21L,
+ 19L, 21L, 20L, 20L, 20L, 19L, 19L, 19L, 22L, 23L, 23L, 22L, 23L,
+ 22L, 20L, 21L, 24L, 24L, 24L, 25L, 24L, 21L, 20L, 23L, 23L, 20L,
+ 23L, 23L, 24L, 20L, 21L, 22L, 24L, 23L, 22L, 23L, 22L, 23L, 23L,
+ 19L, 21L, 23L, 24L, 22L, 23L, 23L, 21L, 22L, 20L, 22L, 23L, 25L,
+ 22L, 22L, 23L, 22L, 23L, 25L, 25L, 24L, 24L, 23L, 22L, 22L, 25L,
+ 23L, 24L, 23L, 23L, 22L, 22L, 25L, 23L, 22L, 25L, 21L, 19L, 21L,
+ 23L, 22L, 22L, 20L, 20L, 20L, 23L, 22L, 21L, 21L, 23L, 23L, 23L,
+ 21L, 25L, 23L, 24L, 24L, 23L, 23L, 23L, 21L, 22L, 21L, 21L, 23L,
+ 23L, 22L, 22L, 21L, 22L, 22L, 25L, 24L, 24L, 22L, 24L, 24L, 23L,
+ 22L, 21L, 22L, 23L, 20L, 22L, 23L, 24L, 25L, 24L, 25L, 22L, 23L,
+ 24L, 21L, 25L, 23L, 19L, 21L, 21L, 22L, 20L, 21L, 18L, 20L, 20L,
+ 21L, 20L, 23L, 19L, 19L, 22L, 22L, 22L, 22L, 22L, 21L, 22L, 24L,
+ 20L, 21L, 22L, 22L, 21L, 21L, 21L, 21L, 21L, 23L, 23L, 23L, 25L,
+ 25L, 25L, 23L, 24L, 24L, 24L, 24L, 24L, 24L, 25L, 25L), logFC = c(-0.7307060811,
+ -0.3325621272, -2.0579390395, 2.7199391457, -0.0009869389, -0.3318842493,
+ -2.1922199037, -1.8907961065, -1.9059255014, -0.2815081355, -0.2040330335,
+ 3.661469505, 0.6489955587, -0.0261245467, -1.4312409441, -1.1199604078,
+ -1.6528592355, -2.8208936451, -0.7207549269, -1.6528592355, -1.2540377475,
+ -2.1088724443, -2.1088724443, -1.5556550771, -1.5556550771, -0.2899601367,
+ 0.36449851, -1.7787723427, -1.5556550771, -1.5556550771, -1.5556550771,
+ -2.1092566794, 0.0417776477, -3.0768675589, -4.2573082637, -1.5556550771,
+ -1.8493703566, -0.7310899725, -2.8201262449, -0.7203706918, -2.1088724443,
+ -3.5714106365, -1.5556550771, -1.2144625017, 1.6608916211, -0.3147141406,
+ 1.2344697053, 1.2303596917, 1.2138067782, 0.9409846988, 0.5270928206,
+ -1.0435216994, -1.4320081419, -1.1644217165, -1.1478237529, -0.9941196613,
+ 0.0762668692, 1.0076747803, 0.0679302699, -0.4852244221, 0.7792467457,
+ 0.4902414285, 1.6172022872, 0.5270928206, -1.5403877099, -0.3322684844,
+ 0.0965099283, 0.8067662712, -0.3322684844, -1.2928579903, 0.6067208763,
+ 0.0247576412, -0.0291609233, -0.4737578429, 0.0743062433, 0.1126554177,
+ -0.0156954476, 1.1069888258, -0.956482117, -0.2829742145, 0.8511530937,
+ -0.1571780266, -1.2033199926, -1.1883052896, -0.0619556757, -0.7813018565,
+ 2.2467468049, 2.8382841074, 0.5658773933, -0.4461699001, -0.7409548873,
+ -0.992979577, -1.0966445642, -0.8035321174, 0.4586171366, -0.2760821893,
+ 0.0585422656, 0.0328935437, 0.3858231436, -0.4374188039, 1.1166538873,
+ -1.6539303789, 0.2027459981, -0.2193112677, -0.3939953745, -1.6726108643,
+ 1.1518720793, 2.2517568637, -0.561147283, -2.1625509666, -1.65562751,
+ -0.9048469063, -1.0759388341, 0.4938537603, 1.8754485108, -1.5944759871,
+ 1.0688499798, 2.6559945275, -1.908097968, -1.9214219995, -2.9675169126,
+ 0.0365892303, -0.8345258687, -1.0535567925, -2.0036191122, -1.6843791204,
+ -2.5554312825, -1.5778268888, -1.576142107, -0.9398408101, 2.4453250675,
+ -1.5434092122, -0.794414515, -0.6200158513, 0.5556353409, -1.0772272444,
+ -0.8720587283, -0.8082062813, -0.7353916189, 0.1072543637, 0.5658773933,
+ 0.13043531, -0.0154958912, -0.868710614, -0.1922496916, 1.0682890388,
+ -1.673413308, -0.9581901784, -1.9575141988, -1.8973257122, 1.4967046965,
+ -2.456068976, -1.4577030552, -4.2692094743, -1.9124787897, -1.4993411082,
+ -0.6409837734, 0.6369441273, -0.9960964825, -5.9703084924, -1.97960268,
+ -1.2422870608, -1.5170124157, -1.9021683731, 3.4029417731, 0.1812972171,
+ -1.6370149729, -1.749015407, -2.1677341592, -1.4942545905, -1.1137758818,
+ -1.2428452903, -1.3014446584, 0.0287537402, -0.8721416458, -2.4062762035,
+ -4.0278899462, -2.2229120764, -1.5950383235, -3.6098212725, -2.5979636046,
+ 0.3631424981, 1.1377073609, 0.5151459494, 0.0640542096, -0.7715375264,
+ -1.0361077101, -0.2462753448, -2.3058140776, -0.0847179004, -0.518970228,
+ 0.8519432911, 1.9516260022, -0.5706154628, 1.240812729, 0.336736001,
+ 2.2509464232, -0.322918086, -4.4019571741, -0.5618441487, 3.4700721641,
+ -3.9220135953, -2.1968879291, -0.1362995026, 2.164094913, -1.0688563363,
+ 0.4302583643, 2.6411096027, -3.020513717, -1.5395519303, -2.2219591633,
+ -3.8891956255, 0.9602784132, -0.6470571429, 1.853151793, -0.3271268741,
+ -0.9870872828, -2.516770073, -1.2898235194, -1.7246627604, -0.61328192,
+ -3.5457352204, -2.5068717697), Modification = structure(c(1L,
+ 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L,
+ 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
+ 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L,
+ 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
+ 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
+ 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
+ 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
+ 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
+ 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
+ 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
+ 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NTA",
+ "t3-d", "t3-u", "t5-d", "t5-u"), class = "factor")), .Names = c("Experiment",
+ "Length", "logFC", "Modification"), class = "data.frame", row.names = c(NA,
+ -223L))
> library(dplyr)
> pvalues <- data %>% group_by(Experiment, Modification, Length) %>%
+ filter(n() > 1) %>%
+ summarize(p.value = (t.test(logFC, mu = 0)$p.value))
Error in t.test(logFC, mu = 0) : object 'logFC' not found
You can do this by summarizing the data into a table of p-values. This can be done using dplyr:
library(dplyr)
pvalues <- data %>% group_by(Experiment, Modification, Length) %>%
filter(n() > 1) %>%
summarize(p.value = (t.test(logFC, mu = 0)$p.value))
(The line filter(n() > 1) is to get rid of any groups of size 1, for which a p-value cannot be calculated). This produces a table that looks like:
# Experiment Modification Length p.value
# 1 Daub NTA 22 0.3980043
# 2 Daub NTA 23 0.3535590
# 3 Daub NTA 24 0.5831962
# 4 Daub NTA 25 0.9137644
# 5 Daub NTA 26 0.6254004
# 6 Daub t3-d 20 0.1493108
Now you can add that text to your plot using a geom_text layer, choosing some y such as y = 3:
library(ggplot2)
ggplot(data, aes(factor(Length),logFC)) + geom_boxplot(fill = "grey90") +
coord_cartesian(ylim=c(-5,5)) + facet_grid(Experiment~Modification) +
geom_text(aes(y = 3, label = p.value), data = pvalues, size = 1)
You will probably have to manipulate the size (and possibly angle) of your geom_text to make the plot readable. Note also that since you are performing many tests, you should probably look at the adjusted p-values rather than the raw p-values. You can compute that column with
pvalues <- pvalues %>% mutate(p.adjusted = p.adjust(p.value, method = "bonferroni"))
The function format.pval will also come in handy, especially if some of your p-values are close to 0.

Resources