Forecat : incorrect number of dimensions in R [closed] - r

Closed. This question is not reproducible or was caused by typos. It is not currently accepting answers.
This question was caused by a typo or a problem that can no longer be reproduced. While similar questions may be on-topic here, this one was resolved in a way less likely to help future readers.
Closed 4 years ago.
Improve this question
I conduct TS analysis.
My dput().
df=structure(list(month = structure(c(5L, 4L, 8L, 1L, 9L, 7L, 6L,
2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L,
10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L,
4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L,
9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L,
2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L,
10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L,
4L, 8L, 1L), .Label = c("Apr", "Aug", "Dec", "Feb", "Jan", "Jul",
"Jun", "Mar", "May", "Nov", "Oct", "Sep"), class = "factor"),
year = c(1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1980L, 1980L, 1980L, 1980L,
1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1981L,
1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L,
1981L, 1981L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L,
1982L, 1982L, 1982L, 1982L, 1982L, 1983L, 1983L, 1983L, 1983L,
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1984L,
1984L, 1984L, 1984L, 1984L, 1984L, 1984L, 1984L, 1984L, 1984L,
1984L, 1984L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L,
1985L, 1985L, 1985L, 1985L, 1985L, 1986L, 1986L, 1986L, 1986L,
1986L, 1986L, 1986L, 1986L, 1986L, 1986L, 1986L, 1986L, 1987L,
1987L, 1987L, 1987L), cake = structure(c(54L, 33L, 62L, 70L,
92L, 97L, 3L, 99L, 80L, 88L, 73L, 83L, 46L, 26L, 53L, 61L,
87L, 94L, 2L, 96L, 74L, 82L, 65L, 77L, 38L, 20L, 45L, 52L,
81L, 90L, 1L, 93L, 66L, 76L, 56L, 69L, 30L, 15L, 37L, 44L,
75L, 85L, 100L, 89L, 57L, 68L, 47L, 60L, 23L, 11L, 29L, 36L,
67L, 79L, 98L, 84L, 48L, 59L, 39L, 51L, 18L, 8L, 22L, 28L,
58L, 72L, 95L, 78L, 40L, 50L, 31L, 43L, 14L, 6L, 17L, 21L,
49L, 64L, 91L, 71L, 32L, 42L, 24L, 35L, 10L, 5L, 13L, 16L,
41L, 55L, 86L, 63L, 25L, 34L, 19L, 27L, 7L, 4L, 9L, 12L), .Label = c("10154.696",
"10379.224", "10603.752", "5689.504", "5914.032", "6138.560",
"6238.622", "6363.087", "6442.272", "6463.150", "6587.615",
"6650.772", "6666.799", "6687.677", "6812.142", "6875.300",
"6891.327", "6912.205", "6998.555", "7036.670", "7099.828",
"7115.855", "7136.733", "7223.083", "7248.663", "7261.198",
"7297.533", "7324.355", "7340.382", "7361.260", "7447.610",
"7473.191", "7485.725", "7509.326", "7522.060", "7548.883",
"7564.910", "7585.788", "7672.138", "7697.719", "7705.501",
"7733.854", "7746.588", "7773.411", "7789.438", "7810.315",
"7896.666", "7922.246", "7930.029", "7958.381", "7971.115",
"7997.938", "8013.965", "8034.843", "8106.171", "8121.193",
"8146.774", "8154.557", "8182.909", "8195.643", "8222.466",
"8238.493", "8304.060", "8330.699", "8345.721", "8371.301",
"8379.084", "8407.436", "8420.171", "8446.994", "8528.588",
"8555.227", "8570.249", "8595.829", "8603.612", "8631.964",
"8644.698", "8753.116", "8779.754", "8820.357", "8828.140",
"8856.492", "8869.226", "8977.643", "9004.282", "9032.058",
"9052.667", "9081.019", "9202.171", "9228.809", "9256.586",
"9277.195", "9426.698", "9453.337", "9481.114", "9651.226",
"9677.865", "9705.641", "9875.754", "9930.169"), class = "factor"),
icecream = structure(c(79L, 63L, 81L, 84L, 95L, 97L, 100L,
98L, 88L, 91L, 82L, 87L, 62L, 48L, 67L, 72L, 89L, 93L, 99L,
94L, 76L, 83L, 70L, 74L, 50L, 33L, 52L, 58L, 78L, 85L, 96L,
90L, 65L, 73L, 57L, 64L, 38L, 24L, 41L, 46L, 68L, 75L, 92L,
80L, 54L, 61L, 47L, 53L, 28L, 14L, 30L, 36L, 56L, 66L, 86L,
71L, 42L, 49L, 37L, 43L, 19L, 9L, 21L, 25L, 45L, 55L, 77L,
60L, 31L, 39L, 27L, 32L, 12L, 5L, 13L, 17L, 35L, 44L, 69L,
51L, 22L, 29L, 18L, 23L, 7L, 2L, 8L, 10L, 26L, 34L, 59L,
40L, 15L, 20L, 11L, 16L, 3L, 1L, 4L, 6L), .Label = c("3385.834",
"3778.830", "3948.779", "4124.808", "4178.019", "4319.547",
"4342.230", "4517.359", "4585.426", "4711.662", "4736.627",
"4742.014", "4915.965", "5004.123", "5014.631", "5021.709",
"5109.700", "5131.026", "5150.224", "5261.328", "5322.590",
"5410.013", "5415.633", "5439.154", "5515.562", "5527.707",
"5532.057", "5570.050", "5656.213", "5740.192", "5812.339",
"5816.038", "5899.870", "5914.260", "5925.189", "5932.101",
"5941.954", "6006.774", "6057.884", "6084.031", "6173.592",
"6224.003", "6225.088", "6311.199", "6330.308", "6363.928",
"6364.167", "6407.747", "6468.652", "6470.314", "6479.920",
"6631.617", "6646.099", "6648.738", "6715.593", "6745.815",
"6804.502", "6819.387", "6826.069", "6882.919", "6892.105",
"6984.050", "7046.898", "7084.607", "7092.952", "7130.098",
"7134.058", "7176.124", "7222.477", "7274.188", "7295.503",
"7316.767", "7334.341", "7551.164", "7558.942", "7569.422",
"7626.165", "7629.133", "7648.558", "7721.562", "7752.579",
"7801.250", "7807.326", "7917.988", "8009.607", "8039.695",
"8071.334", "8112.267", "8121.785", "8167.836", "8341.913",
"8467.124", "8497.833", "8647.995", "8708.185", "8915.546",
"9071.327", "9199.960", "9399.616", "9961.726"), class = "factor"),
kex = structure(c(37L, 28L, 46L, 55L, 91L, 4L, 21L, 13L,
73L, 90L, 66L, 82L, 41L, 32L, 50L, 59L, 100L, 8L, 24L, 16L,
78L, 95L, 69L, 85L, 43L, 35L, 53L, 62L, 2L, 10L, 27L, 19L,
81L, 97L, 71L, 87L, 45L, 36L, 54L, 63L, 3L, 11L, 26L, 18L,
80L, 96L, 70L, 86L, 44L, 34L, 52L, 61L, 1L, 9L, 25L, 17L,
77L, 94L, 68L, 84L, 42L, 33L, 51L, 60L, 99L, 7L, 23L, 15L,
76L, 92L, 67L, 83L, 40L, 31L, 49L, 58L, 98L, 6L, 22L, 14L,
74L, 89L, 65L, 79L, 39L, 30L, 48L, 57L, 93L, 5L, 20L, 12L,
72L, 88L, 64L, 75L, 38L, 29L, 47L, 56L), .Label = c("10012.354",
"10027.147", "10031.100", "10284.403", "10298.083", "10350.199",
"10394.860", "10408.841", "10429.411", "10448.012", "10449.622",
"10524.089", "10551.548", "10577.256", "10623.312", "10654.457",
"10659.784", "10682.780", "10685.561", "11238.047", "11245.777",
"11290.695", "11336.062", "11358.832", "11371.587", "11393.214",
"11393.847", "7924.553", "7993.174", "8049.233", "8099.100",
"8114.649", "8140.749", "8171.107", "8173.470", "8185.131",
"8421.128", "8528.465", "8584.070", "8633.341", "8636.581",
"8674.186", "8701.261", "8703.415", "8715.747", "8724.407",
"8759.735", "8816.240", "8866.689", "8893.873", "8909.120",
"8940.573", "8947.258", "8956.228", "8975.999", "8981.998",
"9038.938", "9089.956", "9128.165", "9133.149", "9165.665",
"9176.489", "9182.893", "9260.483", "9315.139", "9339.247",
"9363.164", "9402.322", "9417.254", "9429.165", "9437.884",
"9482.695", "9528.447", "9536.369", "9573.356", "9583.098",
"9620.489", "9622.236", "9628.487", "9644.810", "9649.651",
"9667.118", "9677.137", "9717.143", "9738.233", "9745.187",
"9755.734", "9757.324", "9811.494", "9820.126", "9846.204",
"9858.879", "9883.296", "9897.166", "9905.657", "9922.767",
"9929.587", "9934.869", "9978.806", "9983.549"), class = "factor"),
bread = structure(c(84L, 74L, 85L, 88L, 96L, 97L, 100L, 98L,
91L, 92L, 83L, 89L, 68L, 57L, 71L, 75L, 87L, 93L, 99L, 94L,
79L, 81L, 72L, 77L, 54L, 41L, 58L, 62L, 78L, 82L, 95L, 86L,
66L, 70L, 60L, 64L, 40L, 28L, 46L, 49L, 67L, 73L, 90L, 76L,
53L, 59L, 47L, 52L, 29L, 19L, 34L, 38L, 55L, 63L, 80L, 65L,
43L, 48L, 36L, 42L, 20L, 11L, 23L, 27L, 44L, 51L, 69L, 56L,
32L, 37L, 25L, 31L, 12L, 6L, 15L, 17L, 33L, 39L, 61L, 45L,
22L, 26L, 16L, 21L, 7L, 2L, 8L, 10L, 24L, 30L, 50L, 35L,
14L, 18L, 9L, 13L, 3L, 1L, 4L, 5L), .Label = c("2166.345",
"2648.523", "2736.609", "2898.017", "3085.471", "3140.171",
"3219.482", "3379.514", "3539.234", "3566.302", "3644.388",
"3712.041", "3816.960", "3832.007", "3870.272", "4023.558",
"4056.190", "4071.310", "4165.873", "4217.487", "4300.558",
"4317.832", "4373.292", "4374.852", "4518.024", "4556.376",
"4558.044", "4712.338", "4740.699", "4753.932", "4794.067",
"4814.280", "4863.891", "4893.102", "4908.819", "5026.050",
"5051.821", "5076.228", "5242.140", "5289.752", "5298.084",
"5300.797", "5325.007", "5364.609", "5395.421", "5437.073",
"5552.911", "5561.179", "5617.793", "5658.289", "5741.750",
"5825.819", "5855.726", "5879.818", "5881.214", "5892.896",
"5955.957", "6018.704", "6089.938", "6107.488", "6145.685",
"6195.500", "6256.822", "6377.603", "6405.031", "6416.234",
"6420.458", "6546.651", "6644.214", "6647.421", "6668.263",
"6706.952", "6793.825", "6814.597", "6837.318", "6937.775",
"6972.282", "6994.417", "7026.075", "7157.797", "7251.932",
"7364.200", "7394.167", "7444.071", "7495.351", "7501.435",
"7629.006", "7637.950", "7648.955", "7692.634", "7737.427",
"7950.654", "7992.020", "8116.917", "8259.580", "8406.970",
"8750.245", "8842.216", "8881.043", "9621.858"), class = "factor"),
pie = structure(c(69L, 67L, 72L, 79L, 6L, 25L, 56L, 39L,
95L, 9L, 91L, 2L, 74L, 68L, 81L, 86L, 21L, 41L, 60L, 47L,
7L, 19L, 99L, 12L, 80L, 70L, 84L, 93L, 29L, 46L, 61L, 50L,
14L, 26L, 4L, 20L, 82L, 71L, 88L, 97L, 35L, 48L, 62L, 54L,
17L, 32L, 11L, 23L, 83L, 73L, 92L, 1L, 38L, 49L, 63L, 55L,
22L, 36L, 13L, 28L, 85L, 75L, 94L, 3L, 42L, 51L, 64L, 57L,
24L, 37L, 15L, 31L, 87L, 76L, 96L, 5L, 44L, 52L, 65L, 58L,
27L, 40L, 16L, 33L, 89L, 77L, 98L, 8L, 45L, 53L, 66L, 59L,
30L, 43L, 18L, 34L, 90L, 78L, 100L, 10L), .Label = c("10021.538",
"10089.497", "10090.666", "10134.899", "10143.466", "10147.420",
"10165.583", "10184.298", "10211.385", "10216.074", "10240.421",
"10317.115", "10318.226", "10326.369", "10377.197", "10422.607",
"10437.822", "10457.876", "10461.052", "10462.739", "10476.329",
"10519.485", "10565.467", "10581.157", "10605.485", "10616.507",
"10628.549", "10641.434", "10661.863", "10665.320", "10699.108",
"10724.935", "10743.562", "10778.105", "10786.766", "10804.639",
"10864.942", "10876.955", "10909.292", "10911.331", "10914.655",
"10944.505", "10947.342", "10996.167", "11036.150", "11093.419",
"11185.536", "11214.739", "11302.686", "11351.962", "11368.703",
"11419.258", "11458.410", "11466.567", "11550.255", "11585.645",
"11613.335", "11661.755", "11699.302", "11877.405", "12049.813",
"12167.703", "12253.485", "12318.013", "12367.487", "12405.828",
"8156.854", "8566.438", "8625.615", "8775.256", "8911.947",
"8981.635", "9009.357", "9073.980", "9081.786", "9136.948",
"9179.541", "9212.663", "9256.038", "9291.757", "9359.667",
"9432.769", "9532.766", "9560.171", "9606.922", "9607.613",
"9663.313", "9692.747", "9706.817", "9740.635", "9746.330",
"9787.663", "9800.377", "9858.417", "9903.287", "9912.382",
"9929.028", "9954.085", "9984.490", "9986.527"), class = "factor")), .Names = c("month",
"year", "cake", "icecream", "kex", "bread", "pie"), class = "data.frame", row.names = c(NA,
-100L))
So for 5 positions ( "cake", "icecream", "kex", "bread", "pie"), i want perform forecast!
library(forecast)
ld <- lapply(df, function(x) {ts(c(t(x[,-1])), start = min(x[,1]), frequency = 12)})
lts <- lapply(ld, ets, model = "ZZZ")
lapply(lts, forecast)
After it, the error ocurres.
Error in x[, -1] : incorrect number of dimensions
I think the deal with my data structure, but i can't explane what namely wrong?
How can i understand, why i can't conduct analysis. Maybe i wrong did the grouping?
As final result, i want
month year cake icecream kex bread
Jan 1988 1226 1019 1149 1220
Feb 1988 1627 1530 1649 1341
(it is forecasted value)

This is the continuation of this question: link
The code for the analysis is the same, but the structure of the data is different and that is the problem, if you transform the data to have a structure like the previous one, the problem is solved.
library(reshape)
df <- cast(melt(df, id=c("month", "year"), na.rm=TRUE),
variable + year ~ month)[, c("variable", "year", substring(month.name, 1, 3))]
colnames(df)[1] <- "group"
df[, substring(month.name, 1, 3)] <- lapply(df[, substring(month.name, 1, 3)],
function(x) as.numeric(as.character(x)))
And now this work:
library(forecast)
ld <- split(df[, -1], df$group)
ld <- lapply(ld, function(x) {ts(c(t(x[,-1])), start = min(x[,1]), frequency = 12)})
lts <- lapply(ld, ets, model = "ZZZ")
lf <- lapply(lts, forecast)
lf <- sapply(1:length(lf),
function(x) {d <- as.data.frame(lf[[x]])[, 1, drop = F]
colnames(d) <- names(lf)[x]
d},
simplify = FALSE)
do.call(cbind, lf)
cake icecream kex bread pie
May 1987 7480.974 5134.009 9830.727 3903.814 11068.371
Jun 1987 7881.644 5519.485 10254.112 4285.591 11490.782
Jul 1987 8807.531 6429.317 11193.300 5194.330 12436.630
Aug 1987 8079.533 5685.421 10483.109 4451.265 11729.948
Sep 1987 7024.136 4615.184 9446.217 3383.871 10695.142
...

Related

Creating scatter plot class or group wise

Im using ggstatsplot's ggscatterstats function to calculate correlation between various clinical parameters and then plotting them. For example
here my variables are age and WBC. This is taking all the data points irrespective of the class they belong. I would like to do the same with each FAB classification that is present in my data.
dat <- merge_clinical_class_TMB %>% select(FAB,AGE,Wbc,Platelet,HB,PB_Blasts,BM_Blasts,TMB_NONSYNONYMOUS)
df2 <- dat
library(ggstatsplot)
ggscatterstats(
df2,
x = AGE,
y = Wbc,
type = "np" # try the "robust" correlation too! It might be even better here
#, marginal.type = "boxplot"
)
My dataframe looks like this
head(df2)
FAB AGE Wbc Platelet HB PB_Blasts BM_Blasts TMB_NONSYNONYMOUS
1 M4 50 17 231 10 88 52 0.3000000
2 M3 61 1 90 10 44 0 0.4333333
3 M3 30 6 114 11 82 6 0.2333333
4 M0 77 92 105 9 67 56 0.4000000
5 M1 46 29 90 9 90 81 0.5666667
6 M1 68 3 63 8 91 55 0.9000000
My data
dput(df2)
structure(list(FAB = structure(c(5L, 4L, 4L, 1L, 2L, 2L, 3L,
3L, 3L, 5L, 3L, 5L, 1L, 5L, 5L, 3L, 3L, 3L, 1L, 2L, 1L, 4L, 6L,
6L, 5L, 3L, 5L, 7L, 5L, 1L, 6L, 5L, 5L, 6L, 5L, 6L, 3L, 3L, 4L,
4L, 5L, 7L, 3L, 3L, 5L, 2L, 5L, 1L, 3L, 6L, 2L, 5L, 2L, 5L, 7L,
3L, 3L, 8L, 6L, 4L, 2L, 2L, 2L, 2L, 3L, 8L, 3L, 2L, 2L, 4L, 6L,
3L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 6L, 2L, 1L, 3L, 2L, 5L, 5L,
1L, 2L, 5L, 6L, 6L, 2L, 6L, 4L, 2L, 5L, 2L, 2L, 2L, 1L, 4L, 4L,
1L, 3L, 9L, 6L, 5L, 5L, 1L, 3L, 3L, 5L, 1L, 2L, 2L, 3L, 5L, 1L,
5L, 5L, 6L, 2L, 2L, 2L, 1L, 3L, 3L, 6L, 5L, 2L, 5L, 1L, 2L, 8L,
2L, 3L, 9L, 5L, 2L, 1L, 5L, 3L, 5L, 5L, 1L, 3L, 2L, 5L, 3L, 6L,
5L, 1L, 2L, 2L, 5L, 3L, 5L, 5L, 6L, 5L, 5L, 3L, 5L, 6L, 3L, 2L,
3L, 3L, 2L, 4L, 6L, 4L, 1L, 2L, 6L, 3L, 6L, 2L, 3L, 2L, 4L, 2L,
2L, 4L, 3L, 3L, 4L, 4L, 4L, 3L, 4L, 3L, 6L, 2L, 4L, 2L, 5L, 2L,
4L), .Label = c("M0", "M1", "M2", "M3", "M4", "M5", "M6", "M7",
"nc"), class = "factor"), AGE = c(50L, 61L, 30L, 77L, 46L, 68L,
23L, 64L, 76L, 81L, 25L, 78L, 39L, 49L, 57L, 63L, 62L, 52L, 76L,
64L, 65L, 61L, 44L, 31L, 64L, 33L, 55L, 50L, 64L, 59L, 59L, 77L,
33L, 48L, 35L, 66L, 67L, 51L, 74L, 51L, 64L, 77L, 63L, 37L, 57L,
53L, 62L, 39L, 72L, 66L, 51L, 51L, 18L, 63L, 54L, 75L, 40L, 60L,
76L, 33L, 63L, 53L, 75L, 67L, 66L, 77L, 64L, 76L, 51L, 42L, 51L,
59L, 43L, 45L, 60L, 47L, 68L, 24L, 48L, 73L, 60L, 44L, 71L, 25L,
60L, 57L, 55L, 69L, 42L, 42L, 45L, 50L, 41L, 21L, 50L, 69L, 76L,
70L, 27L, 76L, 65L, 48L, 59L, 69L, 81L, 22L, 61L, 51L, 63L, 61L,
22L, 73L, 49L, 41L, 47L, 54L, 44L, 55L, 83L, 78L, 59L, 57L, 57L,
88L, 43L, 71L, 62L, 75L, 62L, 58L, 65L, 66L, 60L, 35L, 76L, 72L,
35L, 73L, 67L, 70L, 48L, 65L, 41L, 52L, 67L, 58L, 34L, 60L, 55L,
56L, 61L, 31L, 71L, 56L, 57L, 60L, 57L, 58L, 79L, 55L, 34L, 76L,
82L, 67L, 67L, 54L, 53L, 71L, 61L, 30L, 50L, 35L, 29L, 45L, 38L,
81L, 31L, 75L, 67L, 29L, 51L, 40L, 32L, 57L, 25L, 63L, 75L, 25L,
68L, 62L, 25L, 31L, 68L, 45L, 61L, 35L, 22L, 23L, 21L, 53L),
Wbc = c(17L, 1L, 6L, 92L, 29L, 3L, 32L, 117L, 62L, 91L, 34L,
10L, 2L, 57L, 88L, 77L, 75L, 4L, 15L, 1L, 3L, 86L, 9L, 137L,
132L, 3L, 22L, 6L, 3L, 1L, 12L, 40L, 26L, 116L, 53L, 112L,
2L, 42L, 32L, 4L, 2L, 3L, 17L, 19L, 14L, 3L, 119L, 5L, 3L,
79L, 104L, 3L, 35L, 77L, 2L, 8L, 8L, 1L, 4L, 1L, 46L, 2L,
6L, 31L, 3L, 2L, 3L, 34L, 2L, 2L, 15L, 12L, 4L, 29L, 12L,
12L, 60L, 224L, 33L, 2L, 7L, 14L, 5L, 11L, 47L, 5L, 31L,
6L, 11L, 38L, 5L, 7L, 134L, 93L, 3L, 10L, 3L, 48L, 90L, 297L,
1L, 1L, 1L, 2L, 2L, 115L, 35L, 50L, 18L, 62L, 52L, 15L, 12L,
48L, 81L, 13L, 35L, 28L, 78L, 17L, 30L, 99L, 20L, 3L, 172L,
6L, 28L, 98L, 59L, 101L, 68L, 2L, 2L, 43L, 4L, 38L, 34L,
59L, 37L, 1L, 111L, 49L, 43L, 298L, 26L, 47L, 14L, 16L, 114L,
203L, 8L, 133L, 1L, 31L, 3L, 68L, 3L, 20L, 19L, 73L, 20L,
5L, 1L, 15L, 45L, 68L, 88L, 36L, 10L, 23L, 1L, 72L, 1L, 2L,
40L, 12L, 13L, 7L, 46L, 2L, 64L, NA, 5L, 103L, 8L, 1L, 3L,
16L, 29L, 1L, 99L, 2L, 6L, 2L, 3L, 2L, 115L, 27L, 8L, 1L),
Platelet = c(231L, 90L, 114L, 105L, 90L, 63L, 38L, 100L,
32L, 32L, 23L, 98L, 215L, 14L, 56L, 19L, 110L, 22L, 85L,
42L, 16L, 22L, 50L, 42L, 15L, 61L, 65L, 50L, 134L, 102L,
57L, 29L, 111L, 50L, 44L, 34L, 28L, 232L, 42L, 58L, 27L,
86L, 23L, 38L, 76L, 108L, 52L, 175L, 52L, 132L, 23L, 143L,
30L, 41L, 9L, 21L, 95L, 59L, 79L, 38L, 11L, 68L, 22L, 141L,
168L, 70L, 41L, 21L, 25L, 35L, 14L, 20L, 67L, 116L, 45L,
57L, 8L, 34L, 32L, 60L, 93L, 145L, 48L, 33L, 50L, 129L, 9L,
61L, 176L, 12L, 53L, 136L, 40L, 73L, 27L, 12L, 166L, 30L,
87L, 40L, 94L, 52L, 23L, 127L, 39L, 57L, 35L, 21L, 148L,
25L, 149L, 64L, 351L, 71L, 53L, 22L, 35L, 31L, 46L, 85L,
18L, 80L, 62L, 156L, 32L, 50L, 69L, 31L, 20L, 57L, 142L,
37L, 79L, 66L, 21L, 31L, 88L, 11L, 15L, 82L, 53L, 76L, 51L,
68L, 64L, 55L, 40L, 90L, 37L, 45L, 36L, 52L, 86L, 88L, 35L,
174L, 28L, 121L, 131L, 17L, 152L, 52L, 30L, 79L, 79L, 87L,
30L, 44L, 140L, 59L, 58L, 19L, 29L, 156L, 19L, 61L, 36L,
11L, 71L, 13L, 45L, 34L, 39L, 82L, 18L, 43L, 118L, 32L, 73L,
15L, 60L, 208L, 96L, 257L, 61L, 12L, 32L, 23L, 52L, 46L),
HB = c(10L, 10L, 11L, 9L, 9L, 8L, 7L, 10L, 10L, 11L, 11L,
10L, 10L, 8L, 10L, 13L, 11L, 9L, 9L, 8L, 9L, 12L, 8L, 6L,
10L, 7L, 8L, 9L, 11L, 12L, 11L, 10L, 10L, 9L, 8L, 10L, 9L,
13L, 9L, 8L, 12L, 9L, 12L, 9L, 9L, 9L, 11L, 10L, 11L, 12L,
12L, 11L, 9L, 10L, 9L, 9L, 10L, 9L, 10L, 9L, 8L, 9L, 9L,
10L, 12L, 10L, 10L, 8L, 10L, 9L, 11L, 11L, 11L, 8L, 9L, 9L,
9L, 6L, 10L, 10L, 9L, 9L, 8L, 9L, 9L, 7L, 9L, 11L, 12L, 10L,
9L, 10L, 12L, NA, 10L, 7L, 11L, 10L, 9L, 11L, 10L, 9L, 8L,
8L, 10L, 9L, 12L, 11L, 8L, 13L, 11L, 9L, 9L, 12L, 10L, 9L,
10L, 8L, 9L, 9L, 9L, 10L, 9L, 10L, 10L, 9L, 10L, 8L, 7L,
9L, 9L, 8L, 9L, 9L, 8L, 10L, 8L, 9L, 9L, 8L, 9L, 9L, 9L,
9L, 9L, 10L, 9L, 8L, 9L, 10L, 7L, 11L, 11L, 10L, 6L, 8L,
9L, 9L, 10L, 8L, 11L, 10L, 11L, 8L, 9L, 8L, 9L, 8L, 10L,
10L, 10L, 9L, 9L, 12L, 9L, 9L, 11L, 9L, 13L, 9L, 10L, 8L,
9L, 10L, 10L, 11L, 9L, 9L, 10L, 9L, 9L, 11L, 7L, 13L, 14L,
12L, 8L, 12L, 8L, 9L), PB_Blasts = c(88L, 44L, 82L, 67L,
90L, 91L, 59L, 60L, 48L, 98L, 53L, 40L, 75L, 81L, 90L, 57L,
46L, 67L, 74L, 61L, 99L, 73L, 74L, 83L, 72L, 33L, 35L, 70L,
85L, 61L, 95L, 80L, 71L, 83L, 90L, 90L, 50L, 64L, 51L, 93L,
95L, 75L, 80L, 52L, 61L, 72L, 65L, 83L, 45L, 32L, 85L, 73L,
86L, 82L, 30L, 48L, 47L, 58L, 78L, 100L, 81L, 82L, 40L, 89L,
70L, 47L, 80L, 73L, 62L, 88L, 57L, 70L, 40L, 56L, 86L, 37L,
90L, 77L, 75L, 37L, 94L, 86L, 97L, 72L, 87L, 40L, 52L, 60L,
68L, 40L, 95L, 81L, 92L, 90L, 90L, 42L, 37L, 84L, 77L, 99L,
83L, 65L, 79L, 82L, 46L, 94L, 71L, 39L, 62L, 95L, 55L, 11L,
51L, 42L, 77L, 72L, 39L, 69L, 75L, 70L, 75L, 52L, 91L, 33L,
87L, 55L, 72L, 76L, 85L, 79L, 79L, 81L, 50L, 81L, 33L, 88L,
34L, 90L, 69L, 32L, 92L, 90L, 47L, 75L, 30L, 59L, 57L, 62L,
54L, 60L, 89L, 82L, 90L, 90L, 64L, 89L, 43L, 58L, 58L, 97L,
71L, 91L, 53L, 75L, 85L, 67L, 86L, 70L, 43L, 86L, 74L, 87L,
0L, 0L, 86L, 53L, 63L, 41L, 76L, 45L, 85L, 0L, 94L, 6L, 91L,
0L, 2L, 93L, 85L, 82L, 56L, 40L, 48L, 0L, 14L, 90L, 71L,
51L, 91L, 42L), BM_Blasts = c(52L, 0L, 6L, 56L, 81L, 55L,
0L, 0L, 88L, 37L, 87L, 6L, 4L, 48L, 84L, 70L, 53L, 18L, 82L,
5L, 34L, 68L, 5L, 6L, 90L, 0L, 67L, 0L, 22L, 12L, 0L, 2L,
14L, 3L, 18L, 7L, 17L, 79L, 0L, 40L, 0L, 8L, 71L, 33L, 17L,
41L, 65L, 53L, 0L, 11L, 85L, 2L, 90L, 39L, 0L, 54L, 23L,
0L, 0L, 0L, 97L, 42L, 48L, 61L, 6L, 0L, 46L, 55L, 10L, 2L,
0L, 48L, 39L, 37L, 43L, 0L, 91L, 76L, 41L, 16L, 30L, 17L,
54L, 50L, 65L, 0L, 59L, 22L, 51L, 16L, 6L, 10L, 90L, 72L,
0L, 32L, 0L, 49L, 88L, 98L, 0L, 0L, 15L, 0L, 0L, 94L, 55L,
39L, 9L, 86L, 70L, 11L, 5L, 74L, 79L, 90L, 83L, 57L, 74L,
28L, 17L, 4L, 91L, 0L, 91L, 50L, 49L, 80L, 22L, 64L, 84L,
12L, 14L, 86L, 6L, 18L, 40L, 0L, 61L, 6L, 87L, 0L, 62L, 51L,
6L, 72L, 59L, 29L, 24L, 96L, 0L, 53L, 13L, 45L, 61L, 56L,
35L, 10L, 0L, 8L, 58L, 16L, 25L, 10L, 3L, 71L, 52L, 67L,
32L, 88L, 10L, 8L, 0L, 0L, 97L, 7L, 45L, 0L, 49L, 9L, 85L,
0L, 70L, 91L, 7L, 0L, 2L, 0L, 32L, 11L, 71L, 0L, 48L, 0L,
14L, 7L, 90L, 63L, 83L, 29L), TMB_NONSYNONYMOUS = c(0.3,
0.433333333333, 0.233333333333, 0.4, 0.566666666667, 0.9,
0.3, 0.133333333333, 0.4, 0.3, 0.233333333333, 0.5, 0.266666666667,
0, 0.2, 0.4, 0.266666666667, 0.333333333333, 0.4, 0.4, 0.566666666667,
0.0333333333333, 0.166666666667, 0.1, 0.166666666667, 0.266666666667,
0.3, 0.3, 0.466666666667, 0.0666666666667, 0.266666666667,
0.266666666667, 0.0333333333333, 0.1, 0.133333333333, 0.0333333333333,
0.5, 0.6, 0.0333333333333, 0.1, 0.0333333333333, 0.333333333333,
0.433333333333, 0.2, 0.466666666667, 0.2, 0.0333333333333,
0.733333333333, 0.2, 0.233333333333, 0.233333333333, 0.3,
0.133333333333, 0, 0.3, 0.333333333333, 0.333333333333, 0.266666666667,
0.533333333333, 0.2, 0.533333333333, 0.466666666667, 0.533333333333,
0.0333333333333, 0.3, 0.5, 0.333333333333, 0.266666666667,
0.5, 0.333333333333, 0.0666666666667, 0.466666666667, 0.333333333333,
0.266666666667, 0.7, 0.433333333333, 0.166666666667, 0.0666666666667,
0.233333333333, 0.5, 0.0333333333333, 0.2, 0.433333333333,
0.433333333333, 0.4, 0.233333333333, 0.0666666666667, 0.233333333333,
0.466666666667, 0.0666666666667, 0, 0.1, 0.4, 0.1, 0.2, 0.4,
0.433333333333, 0.566666666667, 0.2, 0.0333333333333, 0.533333333333,
0.566666666667, 0.3, 0.466666666667, 0.566666666667, 0.0333333333333,
0.4, 0.0666666666667, 0.633333333333, 0.4, 0.466666666667,
0.466666666667, 0.3, 0.5, 0.0333333333333, 0.333333333333,
0.333333333333, 0.266666666667, 0.366666666667, 0.666666666667,
0.333333333333, 0.533333333333, 0.466666666667, 0.6, 0.333333333333,
0.4, 0.266666666667, 0.366666666667, 0.2, 0.0333333333333,
0.266666666667, 0.3, 0.166666666667, 0.4, 0.566666666667,
0.4, 0.1, 0.1, 0.0666666666667, 0.366666666667, 0, 0.4, 0.0333333333333,
0.1, 0.0666666666667, 0.5, 0.3, 0.466666666667, 0.0333333333333,
0.4, 0.1, 0.0666666666667, 0.766666666667, 0.5, 0.466666666667,
0.333333333333, 0.4, 0.333333333333, 0.4, 0.266666666667,
0.2, 0.3, 0.7, 0.166666666667, 0.2, 0, 0.5, 0.166666666667,
0.533333333333, 0.233333333333, 0.166666666667, 0.133333333333,
0.0666666666667, 0.4, 0.333333333333, 0.133333333333, 0.4,
0.233333333333, 0.466666666667, 0.366666666667, 0.266666666667,
0.266666666667, 0.266666666667, 0.4, 0.2, 0.166666666667,
0.4, 0.333333333333, 0.166666666667, 0.266666666667, 0.1,
0.333333333333, 0.733333333333, 0.466666666667, 0.466666666667,
0.2, 0.1, 1.13333333333, 0.2, 0.3)), class = "data.frame", row.names = c(NA,
-200L))
Objective I would like to do the same with various FABI have FAB label from M0 to M7 I would like to ignore nc
So for each FAB label I would like to see the correlation for example if I have to take the M0 class then I would like to see their Age vs Wbc correlation and similarly for other FAB class as well. Is it possible to do these in ggstataplot as I don't see for correlation any such functionality there .
Simple way is I can subset them and do the same like M0 ,M1, M2 etc etc but that is a long process can I split the FAB column and pass it to the library?
I would like to know other ways to do the above and plot the same
Any help or suggestion would be appreciated
Update: We could also use the built in function see comments:
Many thanks to #Indrajeet Patil: https://indrajeetpatil.github.io/ggstatsplot/articles/web_only/ggscatterstats.html#grouped-analysis-with-grouped_ggscatterstats
To subset FAB we use filter:
## for reproducibility
set.seed(123)
## plot
grouped_ggscatterstats(
## arguments relevant for ggscatterstats
data = df2 %>% filter(as.integer(FAB)<5),
x = AGE,
y = Wbc,
grouping.var = FAB,
type = "r",
# ggtheme = ggthemes::theme_tufte(),
## arguments relevant for combine_plots
annotation.args = list(
title = "Relationship between Wbc and Age",
caption = "Source: stackoverflow"
),
plotgrid.args = list(nrow = 2, ncol = 2)
)
First answer:
We could do something like this:
write a function and pass the data frame + the column FAB value:
library(ggstatsplot)
my_function <- function(df, x){
ggscatterstats(
df %>% filter(FAB == x),
x = AGE,
y = Wbc,
type = "np" # try the "robust" correlation too! It might be even better here
#, marginal.type = "boxplot"
)
}
M0 <- my_function(df2, "M0")
M1 <- my_function(df2, "M1")
M2 <- my_function(df2, "M2")
M3 <- my_function(df2, "M3")
.
.
.
library(patchwork)
(M0 / M1 | M2 / M3)

Loop with multiple subset of data frame

I have a data.frame fish.test0 for which I want to grep specific variables (in varlist) matching the group column to create a sub-data.frame that will undergo a statistical test. The results of the test is saved in tests.res.t. I want to loop the varlist so that I get one results for each input in varlist
Script:
varlist <- c("Abiotrophia","Alphatorquevirus")
for (i in varlist) {
fish.test <- fish.test0[grep("i",fish.test0$group),]
column <- c("ACDC")
tests <- list()
dat_test <- sapply( column, function(colx)
lapply( unique(fish.test$Merge), function(x)
fisher.test( data.frame(
a=c(( fish.test[ which(fish.test$Merge %in% x)[2],"Present"] -
fish.test[ which(fish.test$Merge %in% x)[2], colx] ),fish.test[ which(fish.test$Merge %in% x)[2], colx]
),
b=c(( fish.test[ which(fish.test$Merge %in% x)[1],"NotPresent"] -
fish.test[ which(fish.test$Merge %in% x)[1], colx] ), fish.test[ which(fish.test$Merge %in% x)[1], colx]))) #,alternative = "greater"
) )
rownames(dat_test) <- unique(fish.test$Merge )
colnames(dat_test) <- column
tests.res <- sapply(dat_test[1:dim(dat_test)[1],1], function(x) {
c(x$estimate[1],
x$estimate[2],
ci.lower = x$conf.int[1],
ci.upper = x$conf.int[2],
p.value = x$p.value)
})
tests.res.t <- as.data.frame(t(tests.res))
}
test-data:
fish.test0 <- structure(list(Present = c(4L, 4L, 9L, 9L, 57L, 57L, 146L, 146L,
91L, 91L, 26L, 26L, 6L, 6L, 12L, 12L, 33L, 33L, 10L, 10L, 66L,
66L, 4L, 4L, 4L, 4L, 9L, 9L, 18L, 18L, 19L, 19L, 51L, 51L, 50L,
50L, 12L, 12L, 7L, 7L, 14L, 14L, 27L, 27L, 9L, 9L, 5L, 5L, 6L,
6L, 22L, 22L, 3L, 3L, 14L, 14L, 4L, 4L, 15L, 15L, 6L, 6L, 8L,
8L, 4L, 4L), NotPresent = c(11L, 11L, 44L, 44L, 126L, 126L, 532L,
532L, 382L, 382L, 97L, 97L, 14L, 14L, 43L, 43L, 85L, 85L, 41L,
41L, 336L, 336L, 19L, 19L, 27L, 27L, 67L, 67L, 108L, 108L, 81L,
81L, 240L, 240L, 258L, 258L, 47L, 47L, 31L, 31L, 82L, 82L, 110L,
110L, 63L, 63L, 178L, 178L, 672L, 672L, 451L, 451L, 120L, 120L,
104L, 104L, 47L, 47L, 387L, 387L, 94L, 94L, 300L, 300L, 133L,
133L), group = c("G__Abiotrophia_NotPresent_Anus", "G__Abiotrophia_Present_Anus",
"G__Abiotrophia_NotPresent_Bile duct", "G__Abiotrophia_Present_Bile duct",
"G__Abiotrophia_NotPresent_Bone/Soft tissue", "G__Abiotrophia_Present_Bone/Soft tissue",
"G__Abiotrophia_NotPresent_Breast", "G__Abiotrophia_Present_Breast",
"G__Abiotrophia_NotPresent_Colorectum", "G__Abiotrophia_Present_Colorectum",
"G__Abiotrophia_NotPresent_Esophagus", "G__Abiotrophia_Present_Esophagus",
"G__Abiotrophia_NotPresent_Gallbladder", "G__Abiotrophia_Present_Gallbladder",
"G__Abiotrophia_NotPresent_Head and neck", "G__Abiotrophia_Present_Head and neck",
"G__Abiotrophia_NotPresent_Kidney", "G__Abiotrophia_Present_Kidney",
"G__Abiotrophia_NotPresent_Liver", "G__Abiotrophia_Present_Liver",
"G__Abiotrophia_NotPresent_Lung", "G__Abiotrophia_Present_Lung",
"G__Abiotrophia_NotPresent_Lymphoid tissue", "G__Abiotrophia_Present_Lymphoid tissue",
"G__Abiotrophia_NotPresent_Mesothelium", "G__Abiotrophia_Present_Mesothelium",
"G__Abiotrophia_NotPresent_Nervous system", "G__Abiotrophia_Present_Nervous system",
"G__Abiotrophia_NotPresent_Ovary", "G__Abiotrophia_Present_Ovary",
"G__Abiotrophia_NotPresent_Pancreas", "G__Abiotrophia_Present_Pancreas",
"G__Abiotrophia_NotPresent_Prostate", "G__Abiotrophia_Present_Prostate",
"G__Abiotrophia_NotPresent_Skin", "G__Abiotrophia_Present_Skin",
"G__Abiotrophia_NotPresent_Small intestine", "G__Abiotrophia_Present_Small intestine",
"G__Abiotrophia_NotPresent_Stomach", "G__Abiotrophia_Present_Stomach",
"G__Abiotrophia_NotPresent_Unknown", "G__Abiotrophia_Present_Unknown",
"G__Abiotrophia_NotPresent_Urothelial tract", "G__Abiotrophia_Present_Urothelial tract",
"G__Abiotrophia_NotPresent_Uterus", "G__Abiotrophia_Present_Uterus",
"G__Alphatorquevirus_NotPresent_Bone/Soft tissue", "G__Alphatorquevirus_Present_Bone/Soft tissue",
"G__Alphatorquevirus_NotPresent_Breast", "G__Alphatorquevirus_Present_Breast",
"G__Alphatorquevirus_NotPresent_Colorectum", "G__Alphatorquevirus_Present_Colorectum",
"G__Alphatorquevirus_NotPresent_Esophagus", "G__Alphatorquevirus_Present_Esophagus",
"G__Alphatorquevirus_NotPresent_Kidney", "G__Alphatorquevirus_Present_Kidney",
"G__Alphatorquevirus_NotPresent_Liver", "G__Alphatorquevirus_Present_Liver",
"G__Alphatorquevirus_NotPresent_Lung", "G__Alphatorquevirus_Present_Lung",
"G__Alphatorquevirus_NotPresent_Pancreas", "G__Alphatorquevirus_Present_Pancreas",
"G__Alphatorquevirus_NotPresent_Skin", "G__Alphatorquevirus_Present_Skin",
"G__Alphatorquevirus_NotPresent_Urothelial tract", "G__Alphatorquevirus_Present_Urothelial tract"
), ABCD = c(3L, 2L, 17L, 6L, 34L, 18L, 240L, 53L, 321L, 73L,
87L, 25L, 6L, 3L, 20L, 8L, 15L, 7L, 19L, 4L, 265L, 42L, 6L, 1L,
4L, 2L, 22L, 4L, 70L, 13L, 54L, 12L, 116L, 33L, 58L, 11L, 6L,
2L, 26L, 6L, 42L, 8L, 74L, 18L, 19L, 3L, 52L, 0L, 288L, 5L, 377L,
17L, 110L, 2L, 19L, 3L, 21L, 2L, 298L, 9L, 60L, 6L, 68L, 1L,
89L, 3L), Total = c(15L, 15L, 53L, 53L, 183L, 183L, 678L, 678L,
473L, 473L, 123L, 123L, 20L, 20L, 55L, 55L, 118L, 118L, 51L,
51L, 402L, 402L, 23L, 23L, 31L, 31L, 76L, 76L, 126L, 126L, 100L,
100L, 291L, 291L, 308L, 308L, 59L, 59L, 38L, 38L, 96L, 96L, 137L,
137L, 72L, 72L, 183L, 183L, 678L, 678L, 473L, 473L, 123L, 123L,
118L, 118L, 51L, 51L, 402L, 402L, 100L, 100L, 308L, 308L, 137L,
137L), Merge = c("Abiotrophia_Anus", "Abiotrophia_Anus", "Abiotrophia_Bile duct",
"Abiotrophia_Bile duct", "Abiotrophia_Bone/Soft tissue", "Abiotrophia_Bone/Soft tissue",
"Abiotrophia_Breast", "Abiotrophia_Breast", "Abiotrophia_Colorectum",
"Abiotrophia_Colorectum", "Abiotrophia_Esophagus", "Abiotrophia_Esophagus",
"Abiotrophia_Gallbladder", "Abiotrophia_Gallbladder", "Abiotrophia_Head and neck",
"Abiotrophia_Head and neck", "Abiotrophia_Kidney", "Abiotrophia_Kidney",
"Abiotrophia_Liver", "Abiotrophia_Liver", "Abiotrophia_Lung",
"Abiotrophia_Lung", "Abiotrophia_Lymphoid tissue", "Abiotrophia_Lymphoid tissue",
"Abiotrophia_Mesothelium", "Abiotrophia_Mesothelium", "Abiotrophia_Nervous system",
"Abiotrophia_Nervous system", "Abiotrophia_Ovary", "Abiotrophia_Ovary",
"Abiotrophia_Pancreas", "Abiotrophia_Pancreas", "Abiotrophia_Prostate",
"Abiotrophia_Prostate", "Abiotrophia_Skin", "Abiotrophia_Skin",
"Abiotrophia_Small intestine", "Abiotrophia_Small intestine",
"Abiotrophia_Stomach", "Abiotrophia_Stomach", "Abiotrophia_Unknown",
"Abiotrophia_Unknown", "Abiotrophia_Urothelial tract", "Abiotrophia_Urothelial tract",
"Abiotrophia_Uterus", "Abiotrophia_Uterus", "Alphatorquevirus_Bone/Soft tissue",
"Alphatorquevirus_Bone/Soft tissue", "Alphatorquevirus_Breast",
"Alphatorquevirus_Breast", "Alphatorquevirus_Colorectum", "Alphatorquevirus_Colorectum",
"Alphatorquevirus_Esophagus", "Alphatorquevirus_Esophagus", "Alphatorquevirus_Kidney",
"Alphatorquevirus_Kidney", "Alphatorquevirus_Liver", "Alphatorquevirus_Liver",
"Alphatorquevirus_Lung", "Alphatorquevirus_Lung", "Alphatorquevirus_Pancreas",
"Alphatorquevirus_Pancreas", "Alphatorquevirus_Skin", "Alphatorquevirus_Skin",
"Alphatorquevirus_Urothelial tract", "Alphatorquevirus_Urothelial tract"
)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 10L, 9L, 12L,
11L, 13L, 14L, 16L, 15L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 28L, 27L, 29L, 30L, 31L, 32L, 34L, 33L, 35L, 36L, 38L,
37L, 40L, 39L, 42L, 43L, 45L, 44L, 47L, 46L, 1011L, 1012L, 1014L,
1013L, 1015L, 1016L, 1017L, 1018L, 1019L, 1020L, 1022L, 1021L,
1023L, 1024L, 1026L, 1025L, 1027L, 1028L, 1029L, 1030L), class = "data.frame")
This is probably not an answer but it should help to improve you code. If I'm terribly wrong, I'll remove my answer right away. I have loeft out the test business which I don't understand, but your problem seems to be extraction.
The first thing is that you need to remove the quotation marks in your grep command, try:
varlist <- c("Abiotrophia","Alphatorquevirus")
for( i in varlist )
{
# extract rows which contain the variable
fish.test <- fish.test0[ grep( i, fish.test0$group ), ]
print( head( fish.test ) )
}
From what I understand, you need to define column and tests outside your loop. Does that give you more of what you want:
varlist <- c("Abiotrophia","Alphatorquevirus")
column <- "ACDC"
tests <- list()
for( i in 1 : length( varlist ) ) # index can be used later to fill the list
{
# extract rows which contain the variable
fish.test <- fish.test0[ grep( varlist[ i ], fish.test0$group ), ]
# add a column with your name of choice
fish.test <- cbind( fish.test, c( 1: length( fish.test$group ) ) )
colnames( fish.test )[ length( fish.test ) ] <- column
# write each result into your defined list
tests[[ i ]] <- fish.test
}

Replacing missing values with group_by median or mean values [duplicate]

This question already has answers here:
replace NA with groups mean in a non specified number of columns [duplicate]
(2 answers)
Closed 3 years ago.
I have been trying various time series imputation models to impute the missing time series using the imputeTS but the test results are quite poor. Since it looks like seasonal time series data I thought about simply imputing the missing values with the median or average over a number of months.
The data looks like:
> head(data, 10)
v1 v2 v3 v4
1 1908 1 118 10
2 1908 2 138 33
3 1908 3 128 17
4 1908 4 NA 60
5 1908 5 NA 114
6 1908 6 72 124
7 1908 7 NA 44
8 1908 8 70 134
9 1908 9 58 121
10 1908 10 42 117
I calculated the median values:
data %>%
as_tibble() %>%
group_by(v2) %>%
summarise(imp_v3 = median(v3, na.rm = TRUE),
imp_v4 = median(v4, na.rm = TRUE))
Which gives:
# A tibble: 12 x 3
v2 imp_v3 imp_v4
<int> <dbl> <dbl>
1 1 126 23
2 2 132 27.5
3 3 138 33
4 4 22.5 76
5 5 42.5 102.
6 6 72 127
7 7 82 50
8 8 80 51
9 9 57 121
10 10 24 98
11 11 145 71
12 12 130. 31
All I want to do now is impute the missing values for each v1 and v2 with the corresponding medians.
That is all missing values in v3 which has a 1 in v2 will be replaced with 126. All missing values in v4 which has a 5 in column v2 would be replaced with 102..
The numbers in v1 are years and the numbers in v2 are months.
Data:
data <- structure(list(v1 = c(1908L, 1908L, 1908L, 1908L, 1908L, 1908L,
1908L, 1908L, 1908L, 1908L, 1908L, 1908L, 1909L, 1909L, 1909L,
1909L, 1909L, 1909L, 1909L, 1909L, 1909L, 1909L, 1909L, 1909L,
1910L, 1910L, 1910L, 1910L, 1910L, 1910L, 1910L, 1910L, 1910L,
1910L, 1910L, 1910L, 1911L, 1911L, 1911L, 1911L, 1911L, 1911L,
1911L, 1911L, 1911L, 1911L, 1911L, 1911L, 1912L, 1912L, 1912L,
1912L, 1912L, 1912L, 1912L, 1912L, 1912L, 1912L, 1912L, 1912L,
1913L, 1913L, 1913L, 1913L, 1913L, 1913L, 1913L, 1913L, 1913L,
1913L, 1913L, 1913L, 1914L, 1914L, 1914L, 1914L, 1914L, 1914L,
1914L, 1914L, 1914L, 1914L, 1914L, 1914L, 1915L, 1915L, 1915L,
1915L, 1915L, 1915L, 1915L, 1915L, 1915L, 1915L, 1915L, 1915L,
1916L, 1916L, 1916L, 1916L, 1916L, 1916L, 1916L, 1916L, 1916L,
1916L, 1916L, 1916L, 1917L, 1917L, 1917L, 1917L, 1917L, 1917L,
1917L, 1917L, 1917L, 1917L, 1917L, 1917L, 1918L, 1918L, 1918L,
1918L, 1918L, 1918L, 1918L, 1918L, 1918L, 1918L, 1918L, 1918L,
1919L, 1919L, 1919L, 1919L, 1919L, 1919L, 1919L, 1919L, 1919L,
1919L, 1919L, 1919L, 1920L, 1920L, 1920L, 1920L, 1920L, 1920L,
1920L, 1920L, 1920L, 1920L, 1920L, 1920L, 1921L, 1921L, 1921L,
1921L, 1921L, 1921L, 1921L, 1921L, 1921L, 1921L, 1921L, 1921L,
1922L, 1922L, 1922L, 1922L, 1922L, 1922L, 1922L, 1922L, 1922L,
1922L, 1922L, 1922L, 1923L, 1923L, 1923L, 1923L, 1923L, 1923L,
1923L, 1923L, 1923L, 1923L, 1923L, 1923L, 1924L, 1924L, 1924L,
1924L, 1924L, 1924L, 1924L, 1924L, 1924L, 1924L, 1924L, 1924L,
1925L, 1925L, 1925L, 1925L, 1925L, 1925L, 1925L, 1925L, 1925L,
1925L, 1925L, 1925L, 1926L, 1926L, 1926L, 1926L, 1926L, 1926L,
1926L, 1926L, 1926L, 1926L, 1926L, 1926L, 1927L, 1927L, 1927L,
1927L, 1927L, 1927L, 1927L, 1927L, 1927L, 1927L, 1927L, 1927L,
1928L, 1928L, 1928L, 1928L, 1928L, 1928L, 1928L, 1928L, 1928L,
1928L, 1928L, 1928L, 1929L, 1929L, 1929L, 1930L, 1930L, 1930L,
1930L, 1930L, 1930L, 1930L, 1930L, 1930L, 1930L, 1930L, 1930L,
1931L, 1931L, 1931L, 1931L, 1931L, 1931L, 1931L, 1931L, 1931L,
1931L, 1931L, 1931L, 1932L, 1932L, 1932L, 1932L, 1932L, 1932L,
1932L, 1932L, 1932L, 1932L, 1932L, 1932L, 1933L, 1933L, 1933L,
1933L, 1933L, 1933L, 1933L, 1933L, 1933L, 1933L, 1933L, 1933L,
1934L, 1934L, 1934L, 1934L, 1934L, 1934L, 1934L, 1934L, 1934L,
1934L, 1934L, 1934L, 1935L, 1935L, 1935L, 1935L, 1935L, 1935L,
1935L, 1935L, 1935L, 1935L, 1935L, 1935L, 1936L, 1936L, 1936L,
1936L, 1936L, 1936L, 1936L, 1936L, 1936L, 1936L, 1936L, 1936L,
1937L, 1937L, 1937L, 1937L, 1937L, 1937L, 1937L, 1937L, 1937L,
1937L, 1937L, 1937L, 1938L, 1938L, 1938L, 1938L, 1938L, 1938L,
1938L, 1938L, 1938L, 1938L, 1938L, 1938L, 1939L, 1939L, 1939L,
1939L, 1939L, 1939L, 1939L, 1939L, 1939L, 1939L, 1939L, 1939L,
1940L, 1940L, 1940L, 1940L, 1940L, 1940L, 1940L, 1940L, 1940L,
1940L, 1940L, 1940L, 1941L, 1941L, 1941L, 1941L, 1941L, 1941L,
1941L, 1941L, 1941L, 1941L, 1941L, 1941L, 1942L, 1942L, 1942L,
1942L, 1942L, 1942L, 1942L, 1942L, 1942L, 1942L, 1942L, 1942L,
1943L, 1943L, 1943L, 1943L, 1943L, 1943L, 1943L, 1943L, 1943L,
1943L, 1943L, 1943L, 1944L, 1944L, 1944L, 1944L, 1944L, 1944L,
1944L, 1944L, 1944L, 1944L, 1944L, 1944L, 1945L, 1945L, 1945L,
1945L, 1945L, 1945L, 1945L, 1945L), v2 = c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 10L, 11L, 12L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L), v3 = c(118L, 138L, 128L, NA, NA, 72L,
NA, 70L, 58L, 42L, 159L, 125L, 118L, 122L, 123L, 22L, 43L, 45L,
68L, 82L, 41L, 29L, 140L, 120L, 119L, NA, 156L, 161L, 39L, 72L,
65L, 75L, 45L, 26L, 122L, 145L, 125L, 135L, 130L, 3L, 58L, 73L,
103L, 101L, 68L, 16L, 144L, 147L, NA, 132L, NA, 24L, NA, NA,
77L, 46L, 34L, 18L, 148L, 154L, 121L, NA, 148L, 9L, 43L, 69L,
70L, 79L, 64L, 27L, 4L, 134L, 126L, 158L, NA, 37L, 34L, 78L,
86L, 91L, 65L, NA, 156L, 130L, 121L, 129L, 144L, 12L, 36L, 80L,
NA, 77L, 63L, NA, 125L, NA, 160L, 121L, 112L, 16L, 44L, 40L,
NA, 96L, NA, NA, 153L, 112L, 105L, 107L, 122L, 148L, 58L, NA,
92L, 76L, 68L, 5L, 159L, 113L, 127L, 145L, 147L, NA, 64L, 61L,
NA, NA, NA, 20L, 144L, 153L, NA, 106L, 121L, 4L, 68L, 72L, 67L,
83L, NA, 14L, 117L, 137L, 139L, 148L, 4L, 160L, 44L, 70L, 58L,
59L, 54L, 27L, 152L, 128L, 154L, 130L, 162L, 17L, 47L, 80L, 104L,
NA, 72L, 57L, 127L, 155L, 116L, 127L, 129L, 151L, 59L, 65L, NA,
56L, 44L, NA, 153L, 140L, 147L, 128L, 155L, 161L, 17L, NA, 95L,
70L, 47L, 18L, 122L, NA, 126L, 119L, 132L, 160L, 35L, 62L, 75L,
62L, NA, 24L, 154L, 155L, 138L, 134L, 138L, 6L, 42L, 83L, 98L,
82L, 39L, 30L, 125L, 114L, 133L, 147L, 150L, 23L, NA, 67L, 97L,
87L, 66L, 8L, 142L, 130L, 133L, 132L, 158L, 5L, 36L, NA, 81L,
74L, 45L, 31L, 145L, NA, NA, 142L, 143L, 14L, 36L, 49L, 82L,
80L, 54L, 24L, 161L, 124L, 21L, NA, 137L, 141L, 109L, NA, 162L,
34L, 81L, 70L, 80L, 52L, 26L, 150L, 126L, 122L, 119L, 130L, NA,
41L, 62L, 75L, NA, 39L, 22L, 158L, 138L, 153L, 125L, 141L, 158L,
28L, 73L, 84L, 90L, 54L, 16L, 147L, 138L, 112L, NA, 4L, 23L,
51L, 89L, 100L, 99L, 83L, 23L, 141L, 111L, 138L, 136L, 138L,
10L, 48L, 77L, 104L, 80L, 79L, 26L, NA, 157L, 131L, 140L, 154L,
10L, 33L, 81L, 99L, NA, 60L, 20L, 152L, 113L, 118L, 110L, 153L,
NA, 41L, 76L, 78L, 88L, 66L, 25L, 144L, 142L, 135L, 134L, 118L,
16L, 50L, 66L, 90L, NA, 59L, 28L, 142L, 113L, 143L, 133L, 25L,
15L, 38L, 69L, 74L, 87L, 61L, 24L, 11L, 127L, 121L, 141L, 144L,
NA, 45L, 76L, 78L, 87L, 62L, 12L, 3L, 119L, 1L, NA, 154L, 7L,
64L, 102L, 78L, 79L, 53L, 19L, 151L, 126L, 2L, 115L, NA, 146L,
23L, 85L, 103L, 74L, 71L, NA, 151L, 140L, 94L, 93L, 136L, 22L,
40L, 79L, 82L, 85L, 56L, 28L, 141L, 152L, NA, 150L, 162L, 35L,
56L, 72L, 92L, 75L, 51L, 31L, 149L, 126L, 147L, NA, 146L, 26L,
40L, 55L, 82L, 88L, 47L, 13L, 149L, 131L, 108L, 159L, 17L, 32L,
49L, 72L, 95L, 83L), v4 = c(10L, 33L, 17L, 60L, 114L, 124L, 44L,
134L, 121L, 117L, 73L, NA, 15L, 3L, 3L, 72L, 86L, 112L, 42L,
41L, 118L, 106L, 31L, 18L, 5L, 29L, NA, 63L, NA, 133L, 135L,
52L, 122L, 110L, 4L, 75L, NA, 27L, 33L, 78L, 116L, 127L, 51L,
NA, 123L, 94L, 71L, 72L, 15L, NA, 74L, 73L, 103L, 136L, 51L,
129L, 114L, 81L, NA, 63L, 25L, 26L, 60L, 77L, 105L, 132L, 38L,
NA, 37L, NA, 89L, 62L, 21L, 74L, 65L, 82L, 96L, 128L, 49L, 45L,
121L, 109L, NA, 31L, 27L, 22L, 29L, 74L, 87L, 119L, 37L, 36L,
126L, 89L, 6L, 26L, 82L, 19L, NA, 70L, 100L, 114L, 45L, 54L,
126L, 98L, 80L, 6L, 5L, 12L, NA, 22L, 103L, 131L, 44L, 53L, 37L,
75L, NA, 1L, 16L, 68L, 59L, 68L, 110L, NA, 39L, 48L, 118L, 98L,
31L, 76L, 15L, 10L, NA, NA, 110L, 131L, NA, 39L, 120L, 70L, 16L,
59L, NA, NA, 70L, 79L, 107L, 127L, 38L, 134L, 122L, 105L, 83L,
31L, 77L, 33L, NA, NA, 96L, 123L, 53L, 49L, 130L, 125L, 23L,
72L, NA, 19L, 27L, 21L, NA, 123L, 133L, 136L, 121L, 87L, 72L,
72L, 69L, 33L, 67L, 71L, 85L, 120L, NA, 41L, NA, 102L, 19L, 19L,
25L, NA, 9L, NA, 107L, 131L, 43L, 34L, 36L, 104L, 83L, 86L, 61L,
28L, 32L, 73L, 109L, 129L, 54L, 51L, 112L, 101L, 2L, 8L, 29L,
74L, 70L, 87L, 84L, 120L, NA, 44L, NA, 80L, 61L, 30L, 33L, 20L,
74L, 77L, 93L, 111L, 50L, 51L, 129L, 102L, 67L, 5L, 23L, 62L,
62L, 72L, 94L, 115L, 47L, 44L, 122L, 98L, 86L, 1L, 90L, 64L,
63L, 27L, 5L, 24L, 81L, 95L, 133L, NA, NA, 136L, 107L, NA, 29L,
14L, 24L, 7L, 77L, 105L, 135L, 52L, 136L, 120L, 84L, 88L, 63L,
70L, 18L, 26L, 63L, NA, 125L, 56L, 56L, 124L, 85L, NA, 75L, 3L,
24L, 60L, 86L, 114L, 136L, 56L, 57L, NA, NA, 71L, 1L, 29L, 29L,
25L, 78L, 102L, 132L, 58L, 41L, 34L, 111L, 76L, 90L, 33L, 66L,
72L, 78L, 86L, 37L, 52L, 48L, 129L, 98L, NA, NA, 19L, 9L, 71L,
60L, 101L, 133L, 50L, NA, 46L, 96L, NA, 59L, 31L, 30L, 6L, 88L,
113L, NA, 56L, 51L, 126L, 107L, 61L, 17L, NA, 62L, 91L, 74L,
100L, 35L, 43L, 52L, 134L, 108L, 97L, 26L, 23L, 63L, 66L, 80L,
NA, 125L, 49L, 56L, 40L, 86L, 88L, 28L, 13L, 11L, 33L, 75L, 109L,
40L, 43L, 39L, 119L, 99L, 80L, 29L, 12L, NA, 23L, 65L, NA, 130L,
57L, NA, 46L, 109L, 69L, 66L, 10L, NA, 20L, 78L, 97L, 126L, 43L,
58L, 120L, 107L, 66L, 76L, 23L, 69L, 64L, 98L, 96L, 135L, 41L,
47L, 127L, 97L, 74L, 26L, 65L, 20L, 27L, 92L, 98L, 124L, 55L,
53L, 121L, 96L, NA, 27L, NA, 73L, NA, 85L, 106L, 135L, 54L, NA
)), class = "data.frame", row.names = c(NA, -443L))
We can use replace to replace the NA elements with the median of the column
library(dplyr)
data %>%
group_by(v2) %>%
mutate_at(vars(v3, v4), list(~ replace(., is.na(.), median(., na.rm = TRUE))))
# A tibble: 443 x 4
# Groups: v2 [12]
# v1 v2 v3 v4
# <int> <int> <dbl> <dbl>
# 1 1908 1 118 10
# 2 1908 2 138 33
# 3 1908 3 128 17
# 4 1908 4 22.5 60
# 5 1908 5 42.5 114
# 6 1908 6 72 124
# 7 1908 7 82 44
# 8 1908 8 70 134
# 9 1908 9 58 121
#10 1908 10 42 117
# … with 433 more rows
Or another option is na.aggregate from zoo
library(zoo)
data %>%
group_by(v2) %>%
mutate_at(vars(v3, v4), na.aggregate)
By default, the FUN is mean. If we want median specify the FUN
data %>%
group_by(v2) %>%
mutate_at(vars(v3, v4), na.aggregate, FUN = median)

polynomial fitting and plotting regression line in R

I am trying to find orthogonal polynomials of degree 3 for my data. The purpose of this is that i would like to visualise different polynomials fittings on my data: degree 3 and degree 7. I am using the same code as our profesor in class, however I cannot obtain nice results.
orthpoly <- poly(Air_reduced$Temp, order=3)
Air_reduced$xo1 <- orthpoly[,1]
Air_reduced$xo2 <- orthpoly[,2]
Air_reduced$xo3 <- orthpoly[,3]
polymodel1 <- lm(Ozone ~ xo1 + xo2 + xo3, data=Air_reduced)
Air_reduced$fitted1 <- fitted(polymodel1)
?plot
plot(Air_reduced$Temp,Air_reduced$Ozone,xlab="x",ylab="f(x)",
cex.lab=1.5,cex.axis=1.3,col="red",cex=1.3,
main="Polynomial of degree 3", xlim = c(50,97), ylim = c(0,100))
lines(Air_reduced$Temp, Air_reduced$fitted1,col="blue",lwd=3)
however this produces an ugly graph. There seem to be numerous regression lines.
What am i doing wrong?
Data:
structure(list(Ozone = c(41L, 36L, 12L, 18L, 23L, 19L, 8L, 16L,
11L, 14L, 18L, 14L, 34L, 6L, 30L, 11L, 1L, 11L, 4L, 32L, 23L,
45L, 37L, 29L, 71L, 39L, 23L, 21L, 37L, 20L, 12L, 13L, 49L, 32L,
64L, 40L, 77L, 97L, 97L, 85L, 10L, 27L, 7L, 48L, 35L, 61L, 79L,
63L, 16L, 80L, 108L, 20L, 52L, 82L, 50L, 64L, 59L, 39L, 9L, 16L,
122L, 89L, 110L, 44L, 28L, 65L, 22L, 59L, 23L, 31L, 44L, 21L,
9L, 45L, 73L, 76L, 118L, 84L, 85L, 96L, 78L, 73L, 91L, 47L, 32L,
20L, 23L, 21L, 24L, 44L, 21L, 28L, 9L, 13L, 46L, 18L, 13L, 24L,
16L, 13L, 23L, 36L, 7L, 14L, 30L, 14L, 18L, 20L), Solar.R = c(190L,
118L, 149L, 313L, 299L, 99L, 19L, 256L, 290L, 274L, 65L, 334L,
307L, 78L, 322L, 44L, 8L, 320L, 25L, 92L, 13L, 252L, 279L, 127L,
291L, 323L, 148L, 191L, 284L, 37L, 120L, 137L, 248L, 236L, 175L,
314L, 276L, 267L, 272L, 175L, 264L, 175L, 48L, 260L, 274L, 285L,
187L, 220L, 7L, 294L, 223L, 81L, 82L, 213L, 275L, 253L, 254L,
83L, 24L, 77L, 255L, 229L, 207L, 192L, 273L, 157L, 71L, 51L,
115L, 244L, 190L, 259L, 36L, 212L, 215L, 203L, 225L, 237L, 188L,
167L, 197L, 183L, 189L, 95L, 92L, 252L, 220L, 230L, 259L, 236L,
259L, 238L, 24L, 112L, 237L, 224L, 27L, 238L, 201L, 238L, 14L,
139L, 49L, 20L, 193L, 191L, 131L, 223L), Wind = c(7.4, 8, 12.6,
11.5, 8.6, 13.8, 20.1, 9.7, 9.2, 10.9, 13.2, 11.5, 12, 18.4,
11.5, 9.7, 9.7, 16.6, 9.7, 12, 12, 14.9, 7.4, 9.7, 13.8, 11.5,
8, 14.9, 20.7, 9.2, 11.5, 10.3, 9.2, 9.2, 4.6, 10.9, 5.1, 6.3,
5.7, 7.4, 14.3, 14.9, 14.3, 6.9, 10.3, 6.3, 5.1, 11.5, 6.9, 8.6,
8, 8.6, 12, 7.4, 7.4, 7.4, 9.2, 6.9, 13.8, 7.4, 4, 10.3, 8, 11.5,
11.5, 9.7, 10.3, 6.3, 7.4, 10.9, 10.3, 15.5, 14.3, 9.7, 8, 9.7,
2.3, 6.3, 6.3, 6.9, 5.1, 2.8, 4.6, 7.4, 15.5, 10.9, 10.3, 10.9,
9.7, 14.9, 15.5, 6.3, 10.9, 11.5, 6.9, 13.8, 10.3, 10.3, 8, 12.6,
9.2, 10.3, 10.3, 16.6, 6.9, 14.3, 8, 11.5), Temp = c(67L, 72L,
74L, 62L, 65L, 59L, 61L, 69L, 66L, 68L, 58L, 64L, 66L, 57L, 68L,
62L, 59L, 73L, 61L, 61L, 67L, 81L, 76L, 82L, 90L, 87L, 82L, 77L,
72L, 65L, 73L, 76L, 85L, 81L, 83L, 83L, 88L, 92L, 92L, 89L, 73L,
81L, 80L, 81L, 82L, 84L, 87L, 85L, 74L, 86L, 85L, 82L, 86L, 88L,
86L, 83L, 81L, 81L, 81L, 82L, 89L, 90L, 90L, 86L, 82L, 80L, 77L,
79L, 76L, 78L, 78L, 77L, 72L, 79L, 86L, 97L, 94L, 96L, 94L, 91L,
92L, 93L, 93L, 87L, 84L, 80L, 78L, 75L, 73L, 81L, 76L, 77L, 71L,
71L, 78L, 67L, 76L, 68L, 82L, 64L, 71L, 81L, 69L, 63L, 70L, 75L,
76L, 68L), Month = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L), Day = c(1L, 2L, 3L, 4L, 7L, 8L, 9L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 28L, 29L, 31L, 7L,
9L, 10L, 13L, 16L, 17L, 18L, 19L, 20L, 2L, 3L, 5L, 6L, 7L, 8L,
9L, 10L, 12L, 13L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 7L, 8L, 9L, 12L, 13L,
14L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 26L, 28L, 29L, 30L,
31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
28L, 29L, 30L), ID = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L,
52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L,
65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 78L,
79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L,
92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L,
104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L)), .Names = c("Ozone",
"Solar.R", "Wind", "Temp", "Month", "Day", "ID"), row.names = c(NA,
-108L), class = c("tbl_df", "tbl", "data.frame"))
Order your data by your x-axis before plotting, and your plot will be pretty:
Air_reduced = Air_reduced[order(Air_reduced$Temp), ]
As a side note, I'd encourage you to try out ggplot2 for plotting. It can fit simple models and plot all at once, and it's smart about defaults (default labels, default ordering the points when plotting a line...). In this case, if you just want a plot with both polynomials, it takes just a few lines of code:
library(ggplot2)
ggplot(Air_reduced, aes(x = Temp, y = Ozone)) +
geom_point(color = "red") +
stat_smooth(method = "lm",
formula = y ~ poly(x, 3),
aes(color = "3rd")) +
stat_smooth(method = "lm",
formula = y ~ poly(x, 7),
aes(color = "7th")) +
scale_color_manual(
name = "Polynomial Degree",
breaks = c("3rd", "7th"),
values = c("blue", "green4")
)

Plotting a dendrogram with only a subset of observations in R

From an hclust object, how can I extract only selected observations (to_plot below) and plot a dendrogram from these selected observations? This subset of observations I want to plot as a dendrogram, will not correspond to the tree structure of the hclust object, so I can't extract branches from the dendrogram.
NB. I do not wish to cluster or calculate the distance matrix using the subset of selected observations
Data
1/ hclust object
structure(list(merge = structure(c(-31L, -62L, -46L, -37L, -55L,
-47L, -75L, -57L, -6L, -2L, -45L, -99L, -51L, -12L, -30L, -4L,
3L, -53L, -61L, -27L, -56L, -83L, -38L, -101L, -69L, -11L, -14L,
-21L, -34L, -48L, -82L, -92L, -15L, -7L, -35L, -65L, -105L, -52L,
-40L, -64L, -23L, -94L, -98L, -1L, -25L, -8L, 8L, -41L, -3L,
-33L, -108L, 23L, -58L, -20L, -5L, -93L, 30L, -68L, -49L, -28L,
-17L, 9L, -32L, 35L, -95L, -67L, 26L, -107L, 17L, -19L, -74L,
-63L, 37L, 20L, -84L, 50L, -10L, -13L, 49L, 34L, 39L, 60L, -16L,
63L, 44L, 29L, 10L, -24L, 75L, 73L, 47L, 61L, 57L, 18L, 66L,
43L, 80L, 83L, -78L, -71L, 90L, 93L, 84L, 94L, 102L, 98L, 100L,
87L, 106L, 108L, -97L, 1L, -100L, -43L, -59L, -106L, 4L, -90L,
5L, 2L, -87L, -103L, -86L, -54L, -89L, -42L, 11L, 13L, 12L, -77L,
7L, 14L, 6L, -110L, 22L, -60L, -44L, -91L, -111L, -102L, -88L,
-104L, -50L, -22L, -36L, -79L, 28L, 24L, -66L, 15L, -29L, 25L,
32L, -109L, -39L, 45L, 42L, -96L, 16L, 33L, 19L, 40L, 27L, 31L,
-9L, 41L, 46L, -80L, -81L, -70L, -26L, 21L, -73L, 48L, 38L, 36L,
53L, 56L, 51L, -72L, -85L, -76L, 52L, 58L, 71L, 59L, 64L, -18L,
68L, 54L, 55L, 65L, 70L, 79L, 72L, 74L, 69L, 78L, 77L, 76L, 62L,
81L, 82L, 67L, 86L, 85L, 95L, 89L, 92L, 88L, 91L, 97L, 96L, 99L,
103L, 104L, 105L, 101L, 107L, 109L), .Dim = c(110L, 2L)), height = c(0,
0.188350217744365, 0.247401000321179, 0.249231910045009, 0.261866742195707,
0.377720124194474, 0.378461142310176, 0.527418629683044, 0.636480697844057,
0.70489556723743, 0.799857388088743, 0.895267189098051, 0.940604516439695,
1, 1, 1.25645841742159, 1.47637080579504, 1.49661353166068, 1.60280854934758,
1.64538982117314, 1.65011076915935, 1.66666666666667, 1.8661900064933,
1.91530600787293, 1.95979930296005, 2, 2, 2, 2, 2, 2, 2, 2.06532735656427,
2.32083831336158, 2.44558763136158, 2.48004395957454, 2.65074432837975,
2.69489799737569, 2.71536352494182, 2.75337988132381, 2.87695888696678,
2.89093184314013, 2.91669905927746, 3, 3.03504556878056, 3.42442760079317,
3.50924315636259, 3.54456009196554, 3.58118052752614, 3.80716728885077,
4.26149878117642, 4.63502500606874, 4.66666666666667, 4.66666666666667,
4.76912295317528, 4.90702353976517, 4.92512811564295, 5, 5.15887380396718,
5.20227981903921, 5.39890417564938, 5.71781232947912, 5.94961450567626,
6.17569787723772, 6.21000141305934, 6.47150288200403, 6.48552894195153,
6.61209720286382, 7.27379923250834, 7.65301130607984, 7.74920607244712,
7.8800745368487, 8.17570945188961, 8.75305138718179, 8.87870428752716,
9.36365055557565, 9.68439736325147, 10, 10.121604958431, 10.2845151775143,
10.7517404855684, 10.8165382868783, 11.4489962313067, 11.5939995243571,
12.8179231278111, 12.3055509866599, 14.1589468158871, 14.6988252554622,
14.7792803434488, 15.276874084329, 16.0150635281041, 17.9467649484583,
21.2687065983256, 21.3844895922187, 24.196270007066, 25.3163200486723,
34.1772731084418, 37.4454933955768, 42.6291683810462, 45.1916356921658,
52.531016897072, 55.6590891226214, 61.0699226448619, 73.7706208334886,
98.5310119994231, 148.608243702477, 150.474954574704, 187.419419688973,
241.610436881262, 487.90491231433), order = c(2L, 62L, 31L, 97L,
46L, 100L, 45L, 87L, 108L, 61L, 99L, 103L, 105L, 21L, 91L, 38L,
47L, 106L, 64L, 30L, 89L, 33L, 15L, 50L, 49L, 81L, 57L, 90L,
94L, 69L, 83L, 12L, 54L, 6L, 55L, 59L, 56L, 75L, 37L, 43L, 16L,
19L, 72L, 84L, 74L, 85L, 10L, 35L, 36L, 41L, 96L, 53L, 51L, 86L,
11L, 60L, 58L, 14L, 44L, 78L, 17L, 26L, 40L, 66L, 5L, 9L, 71L,
24L, 13L, 18L, 48L, 102L, 8L, 25L, 39L, 28L, 70L, 95L, 52L, 101L,
110L, 7L, 22L, 20L, 82L, 88L, 67L, 65L, 79L, 34L, 111L, 27L,
77L, 68L, 80L, 32L, 73L, 3L, 4L, 42L, 107L, 93L, 23L, 29L, 98L,
92L, 104L, 1L, 109L, 63L, 76L), labels = c("DX_100203", "DX_100208",
"DX_30528", "DX_100159", "DX_100211", "DX_100215", "DX_100246", "DX_100253",
"DX_100271", "DX_100212", "DX_100035", "DX_100164", "DX_100249", "DX_100036",
"DX_100165", "DX_100221", "DX_100254", "DX_100262", "DX_100274", "DX_100046",
"DX_100171", "DX_100230", "DX_100255", "DX_100275", "DX_100180", "DX_100269",
"DX_100278", "DX_100161", "DX_100229", "DX_100238", "DX_100093", "DX_100191",
"DX_100241", "DX_100237", "DX_100268", "DX_30515", "DX_90862", "DX_30529",
"DX_100073", "DX_90264", "DX_90221", "DX_30550", "DX_90885", "DX_100028",
"DX_100049", "DX_90257", "DX_90215", "DX_30527", "DX_30526", "DX_90892",
"DX_100051", "DX_90333", "DX_90286", "DX_90217", "DX_90252", "DX_90232",
"DX_30573", "DX_100214", "DX_90769", "DX_90907", "DX_100037", "DX_100054",
"DX_30568", "DX_90230", "DX_90280", "DX_90779", "DX_90959", "DX_100187",
"DX_100081", "DX_90310", "DX_90782", "DX_100023", "DX_90994", "DX_100042",
"DX_90304", "DX_100152", "DX_90272", "DX_90861", "DX_100043", "DX_100068",
"DX_30571", "DX_100085", "DX_90312", "DX_30590", "DX_90413", "DX_30561",
"DX_30548", "DX_90296", "DX_30558", "DX_90243", "DX_90293", "DX_90365",
"DX_30584", "DX_90274", "DX_90332", "DX_30583", "DX_30575", "DX_30523",
"DX_30578", "DX_90377", "DX_90297", "DX_30593", "DX_30555", "DX_30549",
"DX_90292", "DX_30565", "DX_30512", "DX_90285", "DX_90231", "DX_90209",
"DX_30570"), method = "ward", call = hclust(d = distance, method = method.hclust),
dist.method = "maximum"), .Names = c("merge", "height", "order",
"labels", "method", "call", "dist.method"), class = "hclust")
2/ subset of observations to extract for plotting as a dendrogram
to_plot <- c("DX_90264", "DX_90221", "DX_30550", "DX_90885", "DX_100028", "DX_100159",
"DX_100049", "DX_90257", "DX_90215", "DX_30527", "DX_30526", "DX_90892",
"DX_100051", "DX_90333", "DX_90286", "DX_90217", "DX_90252", "DX_90232",
"DX_30573", "DX_100214", "DX_90769", "DX_90907", "DX_100037", "DX_100054", "DX_30565")
Based on the comment of #RomanLuštrik I would suggest something like this:
hc <- hclust(dist(USArrests), "ave")
## select some observations to plot
set.seed(1)
toPlot <- sample(rownames(USArrests), size=20)
## use rownames as labels
labels <- rownames(USArrests)
## clear labels not present in toPlot
labels[ !(labels %in% toPlot) ] <- ""
plot(hc, labels=labels)

Resources