geom_smooth not working for trendline, too few points? - r

I am trying to get a trendline for my two sets of averages, in my main graph I will be putting error bars on the points to show the sd's but below is a simplified version:
ggplot(sl, aes(x=Stresslevel, y=Final, color=Treatment)) +
geom_point() +
geom_smooth(method = "lm")
In my output I can see in the legend that it is trying to add it, but it is not showing on the graph:
enter image description here
Here is an image of the data:
enter image description here
Edit: Here is my data, thank you for the advice for getting it>
dput(sl)
structure(list(Stresslevel = structure(c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 3L, 4L, 5L), .Label = c("0", "1", "2 (30%)", "3 (50%)",
"4 (70%)", "5", "Recovered"), class = "factor"), WL = c(0, 15.5,
32.8, 52.9, 69.8, 89.2, 13.5, 30, 50, 70), WLsd = c(5, 6.5, 8.1,
8.8, 10.6, 4.2, 9.8, 5, 5, 5), Final = c(0.0292, 0.0276, 0.0263,
0.0248, 0.0208, 0.0199, 0.0249, 0.0274, 0.0235, 0.0121), Treatment = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("Stressed", "Treated"
), class = "factor"), Finalsd = c(0.0039, 0.0019, 0.0026, 0.0033,
0.002, 0.0021, 0.0028, 0.0049, 0.0048, 0.0026), Dry = c(0.006,
0.008, 0.0107, 0.0139, 0.0138, 0.0174, 0.0047, 0.008, 0.0116,
0.0105), Drysd = c(0.0015, 0.0015, 0.0017, 0.0024, 0.0011, 0.0022,
0.001, 0.0016, 0.0033, 0.0021), Delta = c(0.0231, 0.0196, 0.0155,
0.0109, 0.007, 0.0025, 0.0201, 0.0194, 0.012, 0.0016), Deltasd = c(0.0034,
0.0015, 0.0019, 0.002, 0.0024, 0.001, 0.0025, 0.0043, 0.0035,
0.0013), WC = c(4.07, 2.54, 1.48, 0.81, 0.52, 0.15, 4.44, 2.48,
1.11, 0.16), WCsd = c(1.22, 0.59, 0.26, 0.21, 0.2, 0.08, 1.06,
0.56, 0.45, 0.12), CD = c(1, 1.33, 1.78, 2.31, 2.29, 2.89, 0.78,
1.33, 1.92, 1.75), CDsd = c(0.24, 0.25, 0.28, 0.4, 0.19, 0.37,
0.16, 0.26, 0.54, 0.35)), class = "data.frame", row.names = c(NA,
-10L))
Any help would be greatly appreciated.

Your x variable is a factor, meaning it is a categorical variable, so it's not clear how to fit a regression line through that:
str(sl)
'data.frame': 10 obs. of 14 variables:
$ Stresslevel: Factor w/ 7 levels "0","1","2 (30%)",..: 1 2 3 4 5 6 7 3 4 5
$ WL : num 0 15.5 32.8 52.9 69.8 89.2 13.5 30 50 70
I am not sure if it makes sense to convert your categories to numeric, that is stresslevel 0 will be 1, stresslevel 1 be 2 etc.. and force a line:
ggplot(sl, aes(x=Stresslevel, y=Final, color=Treatment)) +
geom_point() +
geom_smooth(aes(x=as.numeric(Stresslevel)),method = "lm",se=FALSE)
I would say it might make sense to connect the lines, if it makes sense to look at the progression of your dependent variable from 0 to 5 stress:
ggplot(sl, aes(x=Stresslevel, y=Final, color=Treatment)) +
geom_point() +
geom_line(aes(x=as.numeric(Stresslevel)),linetype="dashed")

Related

Divide each column of a dataframe by one row of the dataframe

I would like to divide each column of my dataframe by the values of one row.
I tried to transform my dataframe into a matrix and to extract one row of the dataframe as a vector then divide the matrix by the vector but it did not work. Indeed, only the first row of the matrix got divided by the vector.
Here is my original dataframe.
And this is the code I tried to run :
data <- read_excel("Documents/TFB/xlsx_geochimie/solfatara_maj.xlsx")
View(data)
data.mat <- as.matrix(data[,2:20])
vector <- data[12,2:20]
data.mat/vector
We replicate the vector to make the length same and then do the division
data.mat/unlist(vector)[col(data.mat)]
# FeO Total S SO4 Total N SiO2 Al2O3 Fe2O3 MnO MgO CaO Na2O K2O
#[1,] 0.10 16.5555556 NA NA 0.8908607 0.8987269 0.1835206 0.08333333 0.03680982 0.04175365 0.04823151 0.5738562
#[2,] 0.40 125.8333333 NA NA 0.5510204 0.4456019 0.2359551 0.08333333 0.04294479 0.01878914 0.04501608 0.2588235
#[3,] 0.85 0.6111111 NA NA 1.0021295 1.0162037 0.7715356 1.08333333 0.53987730 0.69728601 1.03858521 1.0457516
#[4,] 0.15 48.0555556 NA NA 1.1027507 0.2569444 NA 0.08333333 0.01840491 0.01878914 0.04180064 0.1647059
#[5,] 0.85 NA NA NA 1.0889086 1.0271991 0.6591760 0.75000000 0.59509202 0.53862213 1.02250804 1.1228758
#[6,] NA NA NA NA 1.3426797 0.6319444 0.0411985 0.08333333 0.03067485 0.11899791 0.65594855 0.7764706
# TiO2 P2O5 LOI LOI2 Total Total 2 Fe2O3(T)
#[1,] 0.7924528 0.3928571 7.0841837 6.6963855 0.9922233 0.9894632 0.14489796
#[2,] 0.5094340 0.3214286 14.5561224 13.7710843 0.9958126 0.9936382 0.31020408
#[3,] 0.8679245 0.6428571 1.5637755 1.5228916 0.9990030 0.9970179 0.80612245
#[4,] 1.4905660 0.2857143 7.4056122 7.0024096 0.9795613 0.9769384 0.05510204
#[5,] 1.0377358 0.2500000 0.3520408 0.3783133 0.9969093 0.9960239 0.74489796
#[6,] 0.3018868 0.2500000 1.2551020 1.1879518 1.0019940 1.0000000 0.04489796
Or use sweep
sweep(data.mat, MARGIN = 2, unlist(vector), FUN = `/`)
Or using mapply with asplit
mapply(`/`, asplit(data.mat, 2), vector)
data
data_mat <- structure(c(0.2, 0.8, 1.7, 0.3, 1.7, NA, 5.96, 45.3, 0.22, 17.3,
NA, NA, NA, 6.72, NA, 4.08, 0.06, 0.16, NA, NA, NA, NA, NA, NA,
50.2, 31.05, 56.47, 62.14, 61.36, 75.66, 15.53, 7.7, 17.56, 4.44,
17.75, 10.92, 0.49, 0.63, 2.06, NA, 1.76, 0.11, 0.01, 0.01, 0.13,
0.01, 0.09, 0.01, 0.06, 0.07, 0.88, 0.03, 0.97, 0.05, 0.2, 0.09,
3.34, 0.09, 2.58, 0.57, 0.15, 0.14, 3.23, 0.13, 3.18, 2.04, 4.39,
1.98, 8, 1.26, 8.59, 5.94, 0.42, 0.27, 0.46, 0.79, 0.55, 0.16,
0.11, 0.09, 0.18, 0.08, 0.07, 0.07, 27.77, 57.06, 6.13, 29.03,
1.38, 4.92, 27.79, 57.15, 6.32, 29.06, 1.57, 4.93, 99.52, 99.88,
100.2, 98.25, 99.99, 100.5, 99.54, 99.96, 100.3, 98.28, 100.2,
100.6, 0.71, 1.52, 3.95, 0.27, 3.65, 0.22), .Dim = c(6L, 19L), .Dimnames = list(
NULL, c("FeO", "Total S", "SO4", "Total N", "SiO2", "Al2O3",
"Fe2O3", "MnO", "MgO", "CaO", "Na2O", "K2O", "TiO2", "P2O5",
"LOI", "LOI2", "Total", "Total 2", "Fe2O3(T)")))
vector <- structure(list(FeO = 2, `Total S` = 0.36, SO4 = NA_real_, `Total N` = NA_real_,
SiO2 = 56.35, Al2O3 = 17.28, Fe2O3 = 2.67, MnO = 0.12, MgO = 1.63,
CaO = 4.79, Na2O = 3.11, K2O = 7.65, TiO2 = 0.53, P2O5 = 0.28,
LOI = 3.92, LOI2 = 4.15, Total = 100.3, `Total 2` = 100.6,
`Fe2O3(T)` = 4.9), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame"))
To divide data frame, df, by the third row:
df/df[rep(3, nrow(df)), ]

How to iteratively fit a brms regression model and extract means and sigma to the dataframe

Given the sample data sampleDT below, I would appreciate any help to iteratively fit the brms model below n times, and each time extract the means and sigma from the brmsfit object brm.fit.n and add them to the data frame sampleDT.
If n=10, then there should be 10 columns of means and 10 columns of sigma added to the data frame.
My attempt below does not work as intended. It allows me to run the brms model n times and generate the means and sigma n times, but does not add them to the data frame - one column for each means and one column for each sigma from each run - as intended.
#sample data
sampleDT<-structure(list(id = 1:10, N = c(10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L), A = c(62L, 96L, 17L, 41L, 212L, 143L, 143L,
143L, 73L, 73L), B = c(3L, 1L, 0L, 2L, 170L, 21L, 0L, 33L, 62L,
17L), C = c(0.05, 0.01, 0, 0.05, 0.8, 0.15, 0, 0.23, 0.85, 0.23
), employer = c(1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L), F = c(0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L), G = c(1.94, 1.19, 1.16,
1.16, 1.13, 1.13, 1.13, 1.13, 1.12, 1.12), H = c(0.14, 0.24,
0.28, 0.28, 0.21, 0.12, 0.17, 0.07, 0.14, 0.12), dollar.wage_1 = c(1.94,
1.19, 3.16, 3.16, 1.13, 1.13, 2.13, 1.13, 1.12, 1.12), dollar.wage_2 = c(1.93,
1.18, 3.15, 3.15, 1.12, 1.12, 2.12, 1.12, 1.11, 1.11), dollar.wage_3 = c(1.95,
1.19, 3.16, 3.16, 1.14, 1.13, 2.13, 1.13, 1.13, 1.13), dollar.wage_4 = c(1.94,
1.18, 3.16, 3.16, 1.13, 1.13, 2.13, 1.13, 1.12, 1.12), dollar.wage_5 = c(1.94,
1.19, 3.16, 3.16, 1.14, 1.13, 2.13, 1.13, 1.12, 1.12), dollar.wage_6 = c(1.94,
1.18, 3.16, 3.16, 1.13, 1.13, 2.13, 1.13, 1.12, 1.12), dollar.wage_7 = c(1.94,
1.19, 3.16, 3.16, 1.14, 1.13, 2.13, 1.13, 1.12, 1.12), dollar.wage_8 = c(1.94,
1.19, 3.16, 3.16, 1.13, 1.13, 2.13, 1.13, 1.12, 1.12), dollar.wage_9 = c(1.94,
1.19, 3.16, 3.16, 1.13, 1.13, 2.13, 1.13, 1.12, 1.12), dollar.wage_10 = c(1.94,
1.19, 3.16, 3.16, 1.13, 1.13, 2.13, 1.13, 1.12, 1.12)), row.names = c(NA,
-10L), class = "data.frame")
#my attempt
map_dfc(1:10, function(i) {
brm.fit.n <-brm(dollar.wage_1 ~ A + B + C + employer + F + G + H,
data=sampleDT, iter = 200, family = gaussian())
sampleDT$mean.n<-fitted(brm.fit.n)[, 1]
sampleDT$sd.n<-summary(brm.fit.n)$spec_pars[1]
return(sampleDT)
})
This question has also been posted here. Thanks in advance for any help.
The two things you need to do to adapt your existing code into a function are:
Repeat the fit n times
Save the output in a nice structure
There are lots of ways to do both parts, one option is purrr::map_dfr which can do both, applying the fit multiple times and creating a dataframe.
Instead of a brm model, which takes some time to fit, I've fit a simple linear model to random data instead, you would just have to replace those parts with your fitting code and save the mean and sd instead of the intercept and slope:
library(purrr)
# 1:10 - will repeat 10 times
map_dfr(1:10, function(i) {
random_data = data.frame(
x = rnorm(30),
y = rnorm(30)
)
fit = lm(y ~ x, data = random_data)
intercept = coef(fit)[1]
slope = coef(fit)[2]
return(data.frame(intercept, slope))
}, .id = "sim_num")
Which gives a dataframe as output:
sim_num intercept slope
1 1 0.274903632 -0.03529736
2 2 -0.005134599 -0.22063748
3 3 -0.134999713 0.12090366
4 4 -0.216886033 0.21265679
5 5 0.261365432 0.02434036
6 6 0.067069791 0.23180334
7 7 -0.235138217 0.28360061
8 8 -0.117489553 0.10781101
9 9 -0.150288480 0.03086797
10 10 -0.031814194 -0.04075479

R: expand data frame columnwise with shifted rows of data

- Example Data to work with:
To create a reduced example, this is the output of dput(df):
df <- structure(list(SubjectID = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L), .Label = c("1", "2", "3"), class = "factor"), EventNumber = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("1", "2"), class = "factor"),
EventType = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L
), .Label = c("A", "B"), class = "factor"), Param1 = c(0.3,
0.21, 0.87, 0.78, 0.9, 1.2, 1.4, 1.3, 0.6, 0.45, 0.45, 0.04,
0, 0.1, 0.03, 0.01, 0.09, 0.06, 0.08, 0.09, 0.03, 0.04, 0.04,
0.02), Param2 = c(45, 38, 76, 32, 67, 23, 27, 784, 623, 54,
54, 1056, 487, 341, 671, 859, 7769, 2219, 4277, 4060, 411,
440, 224, 57), Param3 = c(1.5, 1.7, 1.65, 1.32, 0.6, 0.3,
2.5, 0.4, 1.4, 0.67, 0.67, 0.32, 0.1, 0.15, 0.22, 0.29, 0.3,
0.2, 0.8, 1, 0.9, 0.8, 0.3, 0.1), Param4 = c(0.14, 0, 1,
0.86, 0, 0.6, 1, 1, 0.18, 0, 0, 0.39, 0, 1, 0.29, 0.07, 0.33,
0.53, 0.29, 0.23, 0.84, 0.61, 0.57, 0.59), Param5 = c(0.18,
0, 1, 0, 1, 0, 0.09, 1, 0.78, 0, 0, 1, 0.2, 0, 0.46, 0.72,
0.16, 0.22, 0.77, 0.52, 0.2, 0.68, 0.58, 0.17), Param6 = c(0,
1, 0.75, 0, 0.14, 0, 1, 0, 1, 0.27, 0, 1, 0, 0.23, 0.55,
0.86, 1, 0.33, 1, 1, 0.88, 0.75, 0, 0), AbsoluteTime = structure(c(1522533600,
1522533602, 1522533604, 1522533604, 1525125600, 1525125602,
1525125604, 1519254000, 1519254002, 1519254004, 1519254006,
1521759600, 1521759602, 1521759604, 1521759606, 1521759608,
1517353224, 1517353226, 1517353228, 1517353230, 1517439600,
1517439602, 1517439604, 1517439606), class = c("POSIXct",
"POSIXt"), tzone = "")), row.names = c(NA, -24L), class = "data.frame")
df
The real data has 20 subject, EventNumbers ranging from 1 to 100, and parameters are from Param1 to Param40 (depending on the experiment).
Row number are around 60 000 observation.
- What I want to achieve:
For df, create n * 40 new columns. # (40 or any number of parameters that will be chosen later.)
Think of n as "steps into the future".
Name the 40 * n newly created columns:
Param1_2, Param2_2, Param3_2, ..., Param39_2, Param40_2, ...,
Param1_3, Param2_3, Param3_3, ..., Param39_3, Param40_3, ...,
...,
Param1_n, Param2_n, Param3_n, ..., Param39_n, Param40_n
Resulting in columns
Param1_1, Param2_1, Param1_2, Param2_2, Param1_3, Param2_3, Param1_4, Param2_4, ... Param1_n, Param2_n
So every observation of subset df[X, c(4:9)] will get an additional set of variables with values from df[X+1, c(4:9)] to df[X+n, c(4:9)].
This is what the new df.extended should look like for n = 1:
df.extended <- structure(list(SubjectID = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3), EventNumber = c(1, 1,
1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2,
2), EventType = c("A", "A", "A", "A", "B", "B", "B", "A", "A",
"A", "A", "B", "B", "B", "B", "B", "A", "A", "A", "A", "B", "B",
"B", "B"), Param1 = c(0.3, 0.21, 0.87, 0.78, 0.9, 1.2, 1.4, 1.3,
0.6, 0.45, 0.45, 0.04, 0, 0.1, 0.03, 0.01, 0.05, 0.07, 0.06,
0.01, 0.01, 0.01, 0.07, 0.04), Param2 = c(45, 38, 76, 32, 67,
23, 27, 784, 623, 54, 54, 1056, 487, 341, 671, 859, 1858, 640,
8181, 220, 99, 86, 170, 495), Param3 = c(1.5, 1.7, 1.65, 1.32,
0.6, 0.3, 2.5, 0.4, 1.4, 0.67, 0.67, 0.32, 0.1, 0.15, 0.22, 0.29,
1.5, 0.9, 0.8, 0.9, 0.1, 0, 0.8, 0.1), Param4 = c(0.14, 0, 1,
0.86, 0, 0.6, 1, 1, 0.18, 0, 0, 0.39, 0, 1, 0.29, 0.07, 0.64,
0.11, 0.12, 0.32, 0.55, 0.67, 0.83, 0.82), Param5 = c(0.18, 0,
1, 0, 1, 0, 0.09, 1, 0.78, 0, 0, 1, 0.2, 0, 0.46, 0.72, 0.27,
0.14, 0.7, 0.67, 0.23, 0.44, 0.61, 0.76), Param6 = c(0, 1, 0.75,
0, 0.14, 0, 1, 0, 1, 0.27, 0, 1, 0, 0.23, 0.55, 0.86, 1, 0.56,
0.45, 0.5, 0, 0, 0.89, 0.11), AbsoluteTime = c("2018-04-01 00:00:00",
"2018-04-01 00:00:02", "2018-04-01 00:00:04", "2018-04-01 00:00:04",
"2018-05-01 00:00:00", "2018-05-01 00:00:02", "2018-05-01 00:00:04",
"2018-02-22 00:00:00", "2018-02-22 00:00:02", "2018-02-22 00:00:04",
"2018-02-22 00:00:06", "2018-03-23 00:00:00", "2018-03-23 00:00:02",
"2018-03-23 00:00:04", "2018-03-23 00:00:06", "2018-03-23 00:00:08",
"2018-01-31 00:00:24", "2018-01-31 00:00:26", "2018-01-31 00:00:28",
"2018-01-31 00:00:30", "2018-02-01 00:00:00", "2018-02-01 00:00:02",
"2018-02-01 00:00:04", "2018-02-01 00:00:06"), Param1_2 = c(0.21,
0.87, 0.78, NA, 1.2, 1.4, NA, 0.6, 0.45, 0.45, NA, 0, 0.1, 0.03,
0.01, NA, 0.07, 0.07, 0.08, NA, 0.09, 0.06, 0.01, NA), Param2_2 = c(38,
76, 32, NA, 23, 27, NA, 623, 54, 54, NA, 487, 341, 671, 859,
NA, 6941, 4467, 808, NA, 143, 301, 219, NA), Param3_2 = c(1.7,
1.65, 1.32, NA, 0.3, 2.5, NA, 1.4, 0.67, 0.67, NA, 0.1, 0.15,
0.22, 0.29, NA, 1, 1, 0.1, NA, 0.5, 1, 0.3, NA), Param4_2 = c(0,
1, 0.86, NA, 0.6, 1, NA, 0.18, 0, 0, NA, 0, 1, 0.29, 0.07, NA,
0.31, 0.16, 0.68, NA, 0.86, 0.47, 0.47, NA), Param5_2 = c(0,
1, 0, NA, 0, 0.09, NA, 0.78, 0, 0, NA, 0.2, 0, 0.46, 0.72, NA,
0.29, 0.26, 0.1, NA, 0.88, 0.86, 0.95, NA), Param6_2 = c(1, 0,
0, NA, 0, 1, NA, 1, 0.27, 0, NA, 0, 0.23, 0.55, 0.86, NA, 0.68,
0.66, 0, NA, 0.44, 1, 0.22, NA)), row.names = c(NA, 24L), class = "data.frame")
df.extended
How can this be solved without using loops, writing column indexes by hand etc.? Write a function for trial 2 and use doBy?
My thoughts and what I have done so far to solve this:
Trial 1:
Cycle through the SubjectIDs in a for-loop
In an inner for-loop, cycle through the EventNumber
In another inner for-loop, cycle through the rows
Get the first row by grabbing df[1, ] and save into df.temp
Merge df.temp with df[2, parameters] #
Merge merge df.temp with df[3, parameters] and so on
Save all resulting df.temps into df.final
Problems I ran into: Step 5:
df.temp <- df[1,]
df.temp <- merge(df.temp, df[2, !(colnames(df) == "AbsoluteTime")], by = c("SubjectID", "EventNumber", "EventType"))
df.temp <- merge(df.temp, df[3, !(colnames(df) == "AbsoluteTime")], by = c("SubjectID", "EventNumber", "EventType"))
df.temp <- merge(df.temp, df[4, !(colnames(df) == "AbsoluteTime")], by = c("SubjectID", "EventNumber", "EventType"))
Warning:
In merge.data.frame(df.temp, df[4, ], by = c("SubjectID", "EventNumber", :
column names ‘Param1.x’, ‘Param2.x’, ‘Param3.x’, ‘Param4.x’, ‘Param5.x’, ‘Param6.x’, ‘AbsoluteTime.x’, ‘Param1.y’, ‘Param2.y’,
‘Param3.y’, ‘Param4.y’, ‘Param5.y’, ‘Param6.y’, ‘AbsoluteTime.y’ are
duplicated in the result.
The column names are repeated, see the warning.
I can not figure out how to easily create the column names / rename the new columns based on a given column name and variable.
There must a better way than this:
n <- 3
names_vector <- c()
for (n in seq(from = c(1), to = n)) {
for (i in names(df[4:9])) {
names_vector <- c(names_vector, paste0(i, "_", c(n+1)))
}
}
names(df.temp)[c(4:9)] <- parameters
names(df.temp)[c(11:ncol(df.temp))] <- names_vector
names(df.temp)
Also, how do I prevent the last n-1 rows from breaking the script? This is a lot of work to do by hand and I think quite error prone!?
Trial 2:
Cycle through the SubjectIDs in a for-loop
In an inner for-loop, cycle through the EventNumber
Get all rows of parameters into a new data frame except the first row
Append a row with NAs
use cbind() to merge the rows
Repeat n times.
This is the code for one SubjectID and one EventNumber:
df.temp <- df[which(df$SubjectID == "1" & df$EventNumber == "1"), ]
df.temp2 <- df.temp[2:nrow(df.temp)-1, parameters]
df.temp2 <- rbind(df.temp2, NA)
df.temp <- cbind(df.temp, df.temp2)
df.temp2 <- df.temp[3:nrow(df.temp)-1, parameters]
df.temp2 <- rbind(df.temp2, NA, NA)
df.temp <- cbind(df.temp, df.temp2)
df.temp2 <- df.temp[4:nrow(df.temp)-1, parameters]
df.temp2 <- rbind(df.temp2, NA, NA, NA)
df.temp <- cbind(df.temp, df.temp2)
n <- 3
names_vector <- c()
for (n in seq(from = c(1), to = n)) {
for (i in names(df[4:9])) {
print(i)
print(n)
names_vector <- c(names_vector, paste0(i, "_", c(n+1)))
}
}
names(df.temp)[c(4:9)] <- parameters
names(df.temp)[c(11:ncol(df.temp))] <- names_vector
df.temp
That solves the problem with missing rows (NAs are acceptable in my case).
Still lots of work by hand / for loops and error prone!?
What about something like this:
You can use the developer version of the package dplyr to add and rename variables according to various subsets of interest in your data. dplyr also provides the functions lead()and lag(), which can be used to find the "next" or "previous" values in a vector (or here row). You can use lead() in combination with the function mutate_at() to extract the values from the succeeding "nth"-row and use them to create new set of variables.
Here I use the data you provided in your example:
# load dplyr package
require(dplyr)
# creacte new data frame "df.extended"
df.extended <- df
# number of observations per group (e.g., SubjectID)
# or desired number of successions
obs = 3
# loop until number of successions achieved
for (i in 1:obs) {
# overwrite df.extended with new information
df.extended <- df.extended %>%
# group by subjects and events
group_by(SubjectID, EventNumber) %>%
# create new variable for each parameter
mutate_at( vars(Param1:Param6),
# using the lead function
.funs = funs(step = lead),
# for the nth followning row
n = i) %>%
# rename the new variables to show the succession number
rename_at(vars(contains("_step")), funs(sub("step", as.character(i), .)))
}
This should roughly recreate the data you posted as desired result.
# Look at first part of "df.extended"
> head(df.extended)
# A tibble: 6 x 28
# Groups: SubjectID, EventNumber [2]
SubjectID EventNumber EventType Param1 Param2 Param3 Param4 Param5 Param6 AbsoluteTime Param1_1 Param2_1 Param3_1 Param4_1 Param5_1 Param6_1
<fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 A 0.300 45. 1.50 0.140 0.180 0. 2018-04-01 00:00:00 0.210 38. 1.70 0. 0. 1.00
2 1 1 A 0.210 38. 1.70 0. 0. 1.00 2018-04-01 00:00:02 0.870 76. 1.65 1.00 1.00 0.750
3 1 1 A 0.870 76. 1.65 1.00 1.00 0.750 2018-04-01 00:00:04 0.780 32. 1.32 0.860 0. 0.
4 1 1 A 0.780 32. 1.32 0.860 0. 0. 2018-04-01 00:00:04 NA NA NA NA NA NA
5 1 2 B 0.900 67. 0.600 0. 1.00 0.140 2018-05-01 00:00:00 1.20 23. 0.300 0.600 0. 0.
6 1 2 B 1.20 23. 0.300 0.600 0. 0. 2018-05-01 00:00:02 1.40 27. 2.50 1.00 0.0900 1.00
# ... with 12 more variables: Param1_2 <dbl>, Param2_2 <dbl>, Param3_2 <dbl>, Param4_2 <dbl>, Param5_2 <dbl>, Param6_2 <dbl>, Param1_3 <dbl>,
# Param2_3 <dbl>, Param3_3 <dbl>, Param4_3 <dbl>, Param5_3 <dbl>, Param6_3 <dbl>
For base R, consider by to slice by SubjectID, EventNumber, and EventType, and run a merge using a helper group_num. And to run across a series of params, wrap by process in an lapply for list of dataframes that you chain merge on the outside for final merge with original dataframe:
df_list <- lapply(2:3, function(i) {
# BUILD LIST OF DATAFRAMES
by_list <- by(df, df[c("SubjectID", "EventNumber", "EventType")], FUN=function(sub){
sub$grp_num <- 1:nrow(sub)
row_less_sub <- transform(sub, AbsoluteTime=NULL, grp_num=grp_num-(i-1))
merge(sub, row_less_sub, by=c("SubjectID", "EventNumber", "EventType", "grp_num"),
all.x=TRUE, suffixes = c("", paste0("_", i)))
})
# APPEND ALL DATAFRAMES IN LIST
grp_df <- do.call(rbind, by_list)
grp_df <- with(grp_df, grp_df[order(SubjectID, EventNumber),])
# KEEP NEEDED COLUMNS
grp_df <- grp_df[c("SubjectID", "EventNumber", "EventType", "grp_num",
names(grp_df)[grep("Param[0-9]_", names(grp_df))])]
row.names(grp_df) <- NULL
return(grp_df)
})
# ALL PARAMS_* CHAIN MERGE
params_df <- Reduce(function(x,y) merge(x, y, by=c("SubjectID", "EventNumber", "EventType", "grp_num")), df_list)
# ORIGINAL DF AND PARAMS MERGE
df$grp_num <- ave(df$Param1, df$SubjectID, df$EventNumber, df$EventType,
FUN=function(x) cumsum(rep(1, length(x))))
final_df <- transform(merge(df, params_df, by=c("SubjectID", "EventNumber", "EventType", "grp_num")), grp_num=NULL)
Output
head(final_df, 10)
# SubjectID EventNumber EventType Param1 Param2 Param3 Param4 Param5 Param6 AbsoluteTime Param1_2 Param2_2 Param3_2 Param4_2 Param5_2 Param6_2 Param1_3 Param2_3 Param3_3 Param4_3 Param5_3 Param6_3
# 1 1 1 A 0.30 45 1.50 0.14 0.18 0.00 2018-03-31 17:00:00 0.21 38 1.70 0.00 0.00 1.00 0.87 76 1.65 1.00 1.00 0.75
# 2 1 1 A 0.21 38 1.70 0.00 0.00 1.00 2018-03-31 17:00:02 0.87 76 1.65 1.00 1.00 0.75 0.78 32 1.32 0.86 0.00 0.00
# 3 1 1 A 0.87 76 1.65 1.00 1.00 0.75 2018-03-31 17:00:04 0.78 32 1.32 0.86 0.00 0.00 NA NA NA NA NA NA
# 4 1 1 A 0.78 32 1.32 0.86 0.00 0.00 2018-03-31 17:00:04 NA NA NA NA NA NA NA NA NA NA NA NA
# 5 1 2 B 0.90 67 0.60 0.00 1.00 0.14 2018-04-30 17:00:00 1.20 23 0.30 0.60 0.00 0.00 1.40 27 2.50 1.00 0.09 1.00
# 6 1 2 B 1.20 23 0.30 0.60 0.00 0.00 2018-04-30 17:00:02 1.40 27 2.50 1.00 0.09 1.00 NA NA NA NA NA NA
# 7 1 2 B 1.40 27 2.50 1.00 0.09 1.00 2018-04-30 17:00:04 NA NA NA NA NA NA NA NA NA NA NA NA
# 8 2 1 A 1.30 784 0.40 1.00 1.00 0.00 2018-02-21 17:00:00 0.60 623 1.40 0.18 0.78 1.00 0.45 54 0.67 0.00 0.00 0.27
# 9 2 1 A 0.60 623 1.40 0.18 0.78 1.00 2018-02-21 17:00:02 0.45 54 0.67 0.00 0.00 0.27 0.45 54 0.67 0.00 0.00 0.00
# 10 2 1 A 0.45 54 0.67 0.00 0.00 0.27 2018-02-21 17:00:04 0.45 54 0.67 0.00 0.00 0.00 NA NA NA NA NA NA

Nested reshape from wide to long

I keep on getting all sort of error messages when trying to reshape an object into long direction. Toy data:
d <- structure(c(0.204, 0.036, 0.015, 0.013, 0.208, 0.037, 0.015,
0.006, 0.186, 0.044, 0.016, 0.023, 0.251, 0.044, 0.02, 0.01,
0.268, 0.04, 0.007, 0.007, 0.208, 0.062, 0.027, 0.036, 0.272,
0.054, 0.006, 0.01, 0.274, 0.05, 0.011, 0.006, 0.28, 0.039, 0.007,
0.019, 1.93, 0.345, 0.087, 0.094, 2.007, 0.341, 0.064, 0.061,
1.733, 0.39, 0.131, 0.201, 0.094, 0.01, 0.004, 0, 0.096, 0.014,
0, 0.001, 0.081, 0.016, 0.002, 0.016, 0.062, 0.007, 0.011, 0.001,
0.07, 0.003, 0.005, 0.002, 0.043, 0.033, 0, 0.007, 0.081, 0.039,
0.007, 0, 0.085, 0.033, 0.008, 0, 0.086, 0.023, 0.007, 0.007,
0.083, 0.015, 0, 0, 0.09, 0.009, 0, 0, 0.049, 0.052, 0, 0.025,
2.779, 0.203, 0.098, 0.016, 2.801, 0.242, 0.135, 0.01, 2.12,
0.466, 0.177, 0.121, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 1,
2, 3, 0, 1, 2, 3, 0, 1, 2, 3), .Dim = c(12L, 11L), .Dimnames = list(
c("0", "1", "2", "3", "0", "1", "2", "3", "0", "1", "2",
"3"), c("age_77", "age_78", "age_79", "age_80", "age_81",
"age_82", "age_83", "age_84", "age_85", "item", "k")))
Basically I have different ages, for which 3 items have been reported with four response categories each. I would like to obtain a long-shaped object with colnames = age, item, k, proportion, like this:
structure(c(77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 78,
78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 1, 1, 1, 1, 2, 2,
2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 1, 2,
3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
0.204, 0.036, 0.015, 0.013, 0.208, 0.037, 0.015, 0.006, 0.186,
0.044, 0.016, 0.023, 0.251, 0.044, 0.02, 0.01, 0.268, 0.04, 0.007,
0.007, 0.208, 0.062, 0.027, 0.036), .Dim = c(24L, 4L), .Dimnames = list(
c("0", "1", "2", "3", "0", "1", "2", "3", "0", "1", "2",
"3", "0", "1", "2", "3", "0", "1", "2", "3", "0", "1", "2",
"3"), c("age", "item", "k", "proportion")))
An example I tried:
reshape(as.data.frame(d), varying =1:9, sep = "_", direction = "long",
times = "k", idvar = "item")
Error in `row.names<-.data.frame`(`*tmp*`, value = paste(ids, times[i], :
duplicate 'row.names' are not allowed
Any clue where's my mistake? Thanks a lot beforehand!
The object d as provided by the OP is not a data.frame but a matrix which is causing the error:
str(d)
num [1:12, 1:11] 0.204 0.036 0.015 0.013 0.208 0.037 0.015 0.006 0.186 0.044 ...
- attr(*, "dimnames")=List of 2
..$ : chr [1:12] "0" "1" "2" "3" ...
..$ : chr [1:11] "age_77" "age_78" "age_79" "age_80" ...
In addition, the row numbers are not unique which causes an error as well when coercing d to data.frame.
With data.table, d can be coerced to a data.table object and reshaped from wide to long format using melt(). Finally, age is extracted from the column names and stored as integer values as requested by the OP.
library(data.table)
melt(as.data.table(d), measure.vars = patterns("^age_"),
variable.name = "age", value.name = "proportion")[
, age := as.integer(stringr::str_replace(age, "age_", ""))][]
item k age proportion
1: 1 0 77 0.204
2: 1 1 77 0.036
3: 1 2 77 0.015
4: 1 3 77 0.013
5: 2 0 77 0.208
---
104: 2 3 85 0.010
105: 3 0 85 2.120
106: 3 1 85 0.466
107: 3 2 85 0.177
108: 3 3 85 0.121

Undefined columns data frame error

I will like to create a scatter plot of two variable (Disk and Band), for that I and using the function "ggscatter" that is on the "ggpubr" package. Every time I try to use the ggscatter function I get the following error
Error in [.data.frame(data, , x) : undefined columns selected
Here is my code
install.packages("ggpubr")
library("ggpubr")
my_data <- All_Data_Summer_17_
head(my_data, 6)
ggscatter(my_data, x = "band", y = "Disk",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Band", ylab = "Disk (cm)")
Output of str(my_data)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 24 obs. of 22 variables:
$ Sample ID : chr "NP-A-1" "NP-A-2" "NP-A-3" "NP-A-4" ...
$ Lat : num 36.6 36.6 36.6 36.6 36.6 ...
$ Lon : num -95 -95 -95 -95 -95 ...
$ Temp : num 29.1 30.5 30.6 30.7 31 ...
$ SpCond : num 0.077 0.081 0.082 0.086 0.088 0.09 0.084 0.09 0.084 0.085 ...
$ Cond : int 83 90 90 95 98 99 93 99 93 96 ...
$ Resist : num 12107 11116 11066 10537 10248 ...
$ TDS : num 0.05 0.053 0.053 0.056 0.057 0.058 0.055 0.058 0.055 0.055 ...
$ Sal : num 0.03 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 ...
$ pH : num 8.87 9.41 9.56 9.77 9.61 9.38 9.89 9.67 9.89 9.85 ...
$ Chl : num 62.1 40.1 3.7 1.4 4.2 5.6 41.5 17.8 4.5 7.7 ...
$ ODO : num 5.69 8.76 8.28 8.35 8.75 ...
$ TSS : num 1.111 0.667 2.556 3.333 0.778 ...
$ TP : num 0 1.03 0.01 -0.02 -0.01 -0.03 0.01 -0.01 -0.03 0.01 ...
$ TN : num 0.2 0.3 1.9 0.3 1.1 0.5 1.6 0.9 0.5 0.7 ...
$ NO3-N : num 0.43 0.18 0.71 0.36 0.25 0.42 0.26 0.17 0.24 0.19 ...
$ NH3-N : num 0.3 0.2 -0.3 -0.1 -0.4 -0.3 -0.3 -0.3 -0.2 -0.1 ...
$ Chloro-a : num 8.23 7.19 15.37 12.6 14.22 ...
$ Disk: num 55.5 68 50 50.5 69 65 65 67.7 70 66 ...
$ band : num 0.000093 0.000096 0.000103 0.000152 0.000088 0.000089 0.000096 0.000097 0.000092 0.000101 ...
$ Green Band : num 0.000163 0.000169 0.000154 0.000276 0.00016 0.00013 0.00015 0.000175 0.000171 0.000163 ...
$ Red Band : num 0.00012 0.000145 0.000126 0.000246 0.000117 0.000095 0.000116 0.00011 0.000108 0.000126 ...
Output dput(my_data)
dput(my_data)
structure(list(`Sample ID` = c("NP-A-1", "NP-A-2", "NP-A-3",
"NP-A-4", "NP-A-5", "NP-A-6", "NP-A-7", "NP-A-8", "NP-A-9", "NP-A-10",
"NP-A-11", "NP-A-12", "NP-A-13", "NP-A-14", "NP-A-15", "NP-A-16",
"NP-A-17", "NP-B-1", "NP-B-2", "NP-B-3", "NP-B-4", "NP-B-5",
"NP-B-6", "NP-B-7"), Lat = c(36.568738, 36.569005, 36.569258,
36.569554, 36.569585, 36.569382, 36.56928, 36.568647, 36.568809,
36.569124, 36.569425, 36.569331, 36.56919, 36.569071, 36.568888,
36.568633, 36.568869, 36.568651, 36.568932, 36.56946, 36.569893,
36.570058, 36.569811, 36.56988), Lon = c(-94.96671, -94.966703,
-94.966604, -94.966647, -94.96698, -94.966928, -94.966923, -94.967296,
-94.9677, -94.967761, -94.967911, -94.968069, -94.967358, -94.968107,
-94.968018, -94.968049, -94.968293, -94.968723, -94.968833, -94.968396,
-94.968101, -94.967793, -94.967141, -94.96663), Temp = c(29.12,
30.49, 30.6, 30.71, 30.97, 30.83, 30.82, 30.64, 30.42, 31.62,
31.96, 31.16, 31.16, 32.88, 32.03, 31, 32.41, 31.79, 31.93, 32.17,
32.16, 32.55, 32.61, 32.83), SpCond = c(0.077, 0.081, 0.082,
0.086, 0.088, 0.09, 0.084, 0.09, 0.084, 0.085, 0.08, 0.079, 0.083,
0.079, 0.086, 0.094, 0.078, 0.183, 0.183, 0.183, 0.183, 0.183,
0.183, 0.183), Cond = c(83L, 90L, 90L, 95L, 98L, 99L, 93L, 99L,
93L, 96L, 91L, 88L, 93L, 90L, 97L, 105L, 89L, 206L, 207L, 208L,
208L, 209L, 210L, 210L), Resist = c(12107.2, 11115.7, 11066.2,
10537.1, 10247.7, 10051, 10700.4, 10076.5, 10753.3, 10434.4,
11023, 11304, 10741.8, 11058.1, 10270.4, 9536.35, 11269.8, 4845.53,
4834.38, 4815.44, 4814.59, 4787.82, 4770.86, 4755.86), TDS = c(0.05,
0.053, 0.053, 0.056, 0.057, 0.058, 0.055, 0.058, 0.055, 0.055,
0.052, 0.051, 0.054, 0.051, 0.056, 0.061, 0.051, 0.119, 0.119,
0.119, 0.119, 0.119, 0.119, 0.119), Sal = c(0.03, 0.04, 0.04,
0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04,
0.04, 0.04, 0.03, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08),
pH = c(8.87, 9.41, 9.56, 9.77, 9.61, 9.38, 9.89, 9.67, 9.89,
9.85, 9.46, 9.42, 9.75, 9.19, 10.02, 8.83, 9.65, 7.89, 8.14,
8.21, 8.22, 8.4, 8.21, 8.18), Chl = c(62.1, 40.1, 3.7, 1.4,
4.2, 5.6, 41.5, 17.8, 4.5, 7.7, 8.2, 7.7, 120.3, 3.1, 7.8,
3.6, 3.2, 9.8, 7.6, 6, 10, 8.1, 6.3, 4.3), ODO = c(5.69,
8.76, 8.28, 8.35, 8.75, 8.59, 10.1, 10.06, 9.14, 10.32, 9.1,
8.41, 8.03, 9.63, 9.77, 8.91, 10.16, 7.17, 7.31, 7.41, 7.49,
7.75, 6.98, 7.09), TSS = c(1.1111, 0.6667, 2.5556, 3.3333,
0.7778, -27.3333, 2.1111, -0.3333, 1.2222, -32.6667, -0.2222,
2.3333, -0.2222, 1.1111, 1.4444, 2.6667, 0.1111, 6.3333,
7, 5, 5.4444, 6.4444, 3, 2.7778), TP = c(0, 1.03, 0.01, -0.02,
-0.01, -0.03, 0.01, -0.01, -0.03, 0.01, 0.04, -0.01, -0.03,
0, 0.01, 0.03, 0.04, 0.2, -0.01, 0, -0.03, 0.04, 0.01, -0.01
), TN = c(0.2, 0.3, 1.9, 0.3, 1.1, 0.5, 1.6, 0.9, 0.5, 0.7,
0.6, 1, 0.8, 0.1, 0.4, 1.6, 0.6, 0.8, 0.6, 0.5, 0.9, 1.2,
0.3, 0.6), `NO3-N` = c(0.43, 0.18, 0.71, 0.36, 0.25, 0.42,
0.26, 0.17, 0.24, 0.19, 0.17, 0.41, 0.6, 0.23, 0.3, 0.26,
0.22, 0.32, 0.63, 0.36, 0.24, 0.33, 0.55, 0.36), `NH3-N` = c(0.3,
0.2, -0.3, -0.1, -0.4, -0.3, -0.3, -0.3, -0.2, -0.1, 0.1,
-0.2, 0.2, -0.1, -0.3, -0.1, 0.1, -0.5, 0.2, 0.5, -0.3, 0.2,
-0.4, -0.1), `Chloro-a` = c(8.23, 7.19, 15.37, 12.6, 14.22,
4.56, 7.2, 8.61, 6.31, 8.74, 5.59, 10.92, 5.24, 4.26, 5.48,
6.26, 4.75, 11.45, 10.39, 11.79, 9.59, 9.82, 7.97, 7.92),
`Disk` = c(55.5, 68, 50, 50.5, 69, 65, 65, 67.7, 70,
66, 69, 67, 69, 62, 60, 62, 66, 50, 52, 50, 40, 57, 57, 62
), `band` = c(9.3e-05, 9.6e-05, 0.000103, 0.000152,
8.8e-05, 8.9e-05, 9.6e-05, 9.7e-05, 9.2e-05, 0.000101, 0.000102,
9.6e-05, 0.000106, 8.7e-05, 9.1e-05, 0.000126, 0.000107,
0.000139, 0.000139, 0.000135, 0.000174, 0.000144, 0.000137,
0.000134), `Green Band` = c(0.000163, 0.000169, 0.000154,
0.000276, 0.00016, 0.00013, 0.00015, 0.000175, 0.000171,
0.000163, 0.000177, 0.000188, 0.000131, 0.000162, 0.000166,
0.000233, 0.000204, 0.000265, 0.00023, 0.000254, 0.000325,
0.000262, 0.000263, 0.00028), `Red Band` = c(0.00012, 0.000145,
0.000126, 0.000246, 0.000117, 9.5e-05, 0.000116, 0.00011,
0.000108, 0.000126, 0.000128, 0.000133, 9.3e-05, 0.000114,
0.000113, 0.000176, 0.000136, 0.000215, 0.000198, 0.00019,
0.000218, 0.00021, 0.000205, 0.000223)), .Names = c("Sample ID",
"Lat", "Lon", "Temp", "SpCond", "Cond", "Resist", "TDS", "Sal",
"pH", "Chl", "ODO", "TSS", "TP", "TN", "NO3-N", "NH3-N", "Chloro-a",
"Disk", "band", "Green Band", "Red Band"), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -24L), spec = structure(list(
cols = structure(list(`Sample ID` = structure(list(), class = c("collector_character",
"collector")), Lat = structure(list(), class = c("collector_double",
"collector")), Lon = structure(list(), class = c("collector_double",
"collector")), Temp = structure(list(), class = c("collector_double",
"collector")), SpCond = structure(list(), class = c("collector_double",
"collector")), Cond = structure(list(), class = c("collector_integer",
"collector")), Resist = structure(list(), class = c("collector_double",
"collector")), TDS = structure(list(), class = c("collector_double",
"collector")), Sal = structure(list(), class = c("collector_double",
"collector")), pH = structure(list(), class = c("collector_double",
"collector")), Chl = structure(list(), class = c("collector_double",
"collector")), ODO = structure(list(), class = c("collector_double",
"collector")), TSS = structure(list(), class = c("collector_double",
"collector")), TP = structure(list(), class = c("collector_double",
"collector")), TN = structure(list(), class = c("collector_double",
"collector")), `NO3-N` = structure(list(), class = c("collector_double",
"collector")), `NH3-N` = structure(list(), class = c("collector_double",
"collector")), `Chloro-a` = structure(list(), class = c("collector_double",
"collector")), `Disk` = structure(list(), class = c("collector_double",
"collector")), `band` = structure(list(), class = c("collector_double",
"collector")), `Green Band` = structure(list(), class = c("collector_double",
"collector")), `Red Band` = structure(list(), class = c("collector_double",
"collector"))), .Names = c("Sample ID", "Lat", "Lon", "Temp",
"SpCond", "Cond", "Resist", "TDS", "Sal", "pH", "Chl", "ODO",
"TSS", "TP", "TN", "NO3-N", "NH3-N", "Chloro-a", "Disk",
"band", "Green Band", "Red Band")), default = structure(list(), class = c("collector_guess",
"collector"))), .Names = c("cols", "default"), class = "col_spec"))
Ok, the easy answer is to run the correlation coefficients first, then the CIs.
Perhaps you could report the bug to ggpubr's Maintainer.
ggscatter(my_data, x = "band",
y = "Disk",
add = "reg.line",
cor.coef = FALSE,
cor.method = "pearson",
conf.int = TRUE,
xlab = "Band",
ylab = "Disk (cm)")

Resources