Calculate AUC using sensitivity and specificity values - r

How to calculate AUC, if I have values of sensitivity and specificity for various threshold cutoffs?
I have sensitivity and specificity values for 100 thresholds.
sensitivity: c(0.649193548387097, 0.649193548387097, 0.649193548387097, 0.649193548387097,
0.649193548387097, 0.649193548387097, 0.649193548387097, 0.646586345381526,
0.646586345381526, 0.646586345381526, 0.646586345381526, 0.646586345381526,
0.646586345381526, 0.646586345381526, 0.646586345381526, 0.646586345381526,
0.644, 0.644, 0.644, 0.644, 0.641434262948207, 0.641434262948207,
0.638888888888889, 0.638888888888889, 0.638888888888889, 0.634920634920635,
0.634920634920635, 0.634920634920635, 0.634920634920635, 0.630952380952381,
0.628458498023715, 0.624505928853755, 0.620553359683794, 0.615686274509804,
0.611764705882353, 0.607843137254902, 0.607843137254902, 0.6,
0.6, 0.59765625, 0.59375, 0.5859375, 0.58203125, 0.57421875,
0.57421875, 0.56640625, 0.562015503875969, 0.550387596899225,
0.534883720930233, 0.511627906976744, 0.5, 0.496153846153846,
0.486590038314176, 0.478927203065134, 0.46360153256705, 0.455938697318008,
0.452107279693487, 0.442748091603053, 0.425855513307985, 0.418250950570342,
0.4106463878327, 0.399239543726236, 0.390151515151515, 0.382575757575758,
0.377358490566038, 0.369811320754717, 0.362264150943396, 0.354716981132075,
0.343396226415094, 0.343396226415094, 0.339622641509434, 0.328301886792453,
0.316981132075472, 0.29811320754717, 0.294339622641509, 0.286792452830189,
0.279245283018868, 0.270676691729323, 0.255639097744361, 0.244360902255639,
0.236842105263158, 0.236842105263158, 0.229323308270677, 0.225563909774436,
0.214285714285714, 0.191729323308271, 0.184210526315789, 0.176691729323308,
0.165413533834586, 0.139097744360902, 0.139097744360902, 0.12781954887218,
0.120300751879699, 0.105263157894737, 0.075187969924812, 0.0639097744360902,
0.0601503759398496, 0.0526315789473684, 0.0413533834586466, 0.018796992481203,
0)
specificity : c(0.917961165048544, 0.920581113801453, 0.923708353452438, 0.925337186897881,
0.928743379874819, 0.930288461538462, 0.93371757925072, 0.934772182254197,
0.936272160996646, 0.937739463601533, 0.938872970391595, 0.940867906533143,
0.942435775451951, 0.944893111638955, 0.946969696969697, 0.949881796690307,
0.952290977798772, 0.953235710911667, 0.955209806694955, 0.956235294117647,
0.95815702867889, 0.95868544600939, 0.961556493202063, 0.962043111527648,
0.963951310861423, 0.965420560747664, 0.966449207828518, 0.966930600838379,
0.9674569967457, 0.967951695308871, 0.967951695308871, 0.968474733426055,
0.969401947148818, 0.969401947148818, 0.969907407407407, 0.971322849213691,
0.972735674676525, 0.973684210526316, 0.97372060857538, 0.973756906077348,
0.975598526703499, 0.977000919963201, 0.977512620468105, 0.9780119102153,
0.979405034324943, 0.981235697940503, 0.98124428179323, 0.982167352537723,
0.982632541133455, 0.982648401826484, 0.983135824977211, 0.984069185252617,
0.984993178717599, 0.985467756584923, 0.985934664246824, 0.986406887177164,
0.98733604703754, 0.98869801084991, 0.98961625282167, 0.989625620207488,
0.990081154192967, 0.990085624155025, 0.990540540540541, 0.990540540540541,
0.990995047276002, 0.991449144914491, 0.991899189918992, 0.993252361673414,
0.99370220422852, 0.993707865168539, 0.993713515940727, 0.994616419919246,
0.995513683266039, 0.996410946612831, 0.996859578286227, 0.996860986547085,
0.997311827956989, 0.997315436241611, 0.997316636851521, 0.997763864042934,
0.997763864042934, 0.998211890925346, 0.998212689901698, 0.998212689901698,
0.998212689901698, 0.998214285714286, 0.998661311914324, 0.998661311914324,
0.998661311914324, 0.999107939339875, 0.999107939339875, 0.999108337048596,
0.999108337048596, 0.999108734402852, 0.999109528049866, 0.999554962171785,
1, 1, 1, 1, 1)
threshold:
c(0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1,
0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21,
0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32,
0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43,
0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54,
0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65,
0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76,
0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87,
0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98,
0.99, 1)
AUC =round(sum(specificity [1:length(threshold)]*diff(c(0, 1 - sensitivity [1:length(threshold)]))),2)
AUC= 0.95
1)Is this the correct way to find AUC?
2)If I want to plot ROC curve is this code fine?
plot((1-specificity),sensitivity ,xlab = "Sensitivity",ylab = "Specificity",type = "l")
3) Is there some formula to calculate the power of this ROC analysis. So that I know I need minimum samples to calculate AUC?

Related

Is there a way to filter out the row that has the highest of three different columns simultaneously?

Is there a way to filter out the row that has the highest of three different columns simultaneously? I am trying to filter out the row that has the best accuracy, specificity, and sensitivity in a data frame.
Pic of first few rows of data
in the data provided the highest for all 3 should be (aka the desired output)
"thresh_info.59 0.60 83.39 83.27684 83.557047"
data<- structure(list(threshold = c(0.01, 0.02, 0.03, 0.04, 0.05, 0.06,
0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17,
0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28,
0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39,
0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5,
0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61,
0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72,
0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83,
0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94,
0.95, 0.96, 0.97, 0.98, 0.99), accuracy = c(61.72, 63.67, 65.29,
66.58, 67.86, 69.01, 69.75, 70.83, 71.51, 72.79, 73.87, 74.54,
75.02, 75.29, 75.83, 76.3, 76.7, 77.25, 77.65, 77.92, 78.33,
79, 79.14, 79.07, 79.41, 79.61, 79.68, 80.28, 80.69, 80.82, 80.89,
81.16, 81.3, 81.77, 81.9, 81.97, 82.17, 82.31, 82.44, 82.65,
82.58, 82.92, 82.98, 83.59, 83.52, 83.59, 83.25, 83.46, 83.39,
83.46, 83.66, 83.73, 83.52, 83.66, 83.93, 83.46, 83.25, 83.32,
83.32, 83.39, 83.39, 82.92, 82.24, 82.04, 81.77, 81.3, 81.5,
81.23, 81.03, 80.89, 80.49, 80.35, 80.01, 80.01, 79.2, 79.14,
78.87, 78.93, 78.6, 77.92, 77.25, 76.91, 76.37, 75.56, 74.81,
73.94, 73.13, 72.79, 71.84, 71.51, 69.89, 68.4, 66.44, 64.82,
63.13, 61.44, 59.08, 55.77, 52.8), sensitivity = c(100, 100,
100, 99.8870056497175, 99.8870056497175, 99.6610169491526, 99.6610169491526,
99.5480225988701, 99.3220338983051, 99.2090395480226, 99.0960451977401,
98.7570621468927, 98.6440677966102, 98.5310734463277, 98.1920903954802,
97.9661016949153, 97.7401129943503, 97.6271186440678, 96.8361581920904,
96.3841807909604, 96.045197740113, 95.9322033898305, 95.5932203389831,
95.2542372881356, 94.9152542372881, 94.6892655367232, 94.2372881355932,
94.1242937853107, 94.1242937853107, 93.6723163841808, 93.1073446327684,
92.8813559322034, 92.6553672316384, 92.4293785310735, 92.316384180791,
91.9774011299435, 91.864406779661, 91.5254237288136, 91.2994350282486,
90.8474576271186, 90.0564971751412, 89.9435028248588, 89.7175141242938,
89.2655367231638, 89.0395480225989, 88.8135593220339, 88.135593220339,
87.909604519774, 87.3446327683616, 87.0056497175141, 86.5536723163842,
86.3276836158192, 85.6497175141243, 85.5367231638418, 85.5367231638418,
84.7457627118644, 84.180790960452, 83.8418079096045, 83.6158192090395,
83.2768361581921, 83.0508474576271, 82.0338983050847, 80.6779661016949,
80.225988700565, 79.3220338983051, 78.1920903954802, 77.9661016949153,
76.9491525423729, 76.1581920903955, 75.4802259887006, 74.3502824858757,
73.7853107344633, 72.8813559322034, 72.4293785310735, 70.8474576271186,
70.1694915254237, 69.1525423728814, 68.8135593220339, 68.135593220339,
66.8926553672316, 65.6497175141243, 64.4067796610169, 63.3898305084746,
61.9209039548023, 60.4519774011299, 58.6440677966102, 57.2881355932203,
56.1581920903955, 54.3502824858757, 53.3333333333333, 50.6214689265537,
48.135593220339, 44.8587570621469, 41.9209039548023, 38.6440677966102,
35.7062146892655, 31.638418079096, 26.1016949152542, 21.1299435028249
), specificity = c(4.86577181208054, 9.73154362416107, 13.758389261745,
17.1140939597315, 20.3020134228188, 23.489932885906, 25.3355704697987,
28.1879194630872, 30.2013422818792, 33.5570469798658, 36.4093959731544,
38.5906040268456, 39.9328859060403, 40.7718120805369, 42.6174496644295,
44.1275167785235, 45.4697986577181, 46.9798657718121, 49.1610738255034,
50.503355704698, 52.0134228187919, 53.8590604026846, 54.6979865771812,
55.0335570469799, 56.3758389261745, 57.2147651006711, 58.0536912751678,
59.7315436241611, 60.738255033557, 61.744966442953, 62.751677852349,
63.758389261745, 64.4295302013423, 65.9395973154362, 66.4429530201342,
67.1140939597315, 67.7852348993289, 68.6241610738255, 69.2953020134228,
70.4697986577181, 71.4765100671141, 72.4832214765101, 72.9865771812081,
75.1677852348993, 75.3355704697987, 75.8389261744966, 76.006711409396,
76.8456375838926, 77.5167785234899, 78.1879194630873, 79.3624161073825,
79.8657718120805, 80.3691275167785, 80.8724832214765, 81.5436241610738,
81.5436241610738, 81.8791946308725, 82.5503355704698, 82.8859060402685,
83.5570469798658, 83.8926174496644, 84.2281879194631, 84.5637583892617,
84.7315436241611, 85.4026845637584, 85.9060402684564, 86.744966442953,
87.5838926174497, 88.255033557047, 88.9261744966443, 89.5973154362416,
90.1006711409396, 90.6040268456376, 91.2751677852349, 91.6107382550336,
92.4496644295302, 93.2885906040269, 93.9597315436242, 94.1275167785235,
94.2953020134228, 94.4630872483222, 95.4697986577181, 95.6375838926175,
95.8053691275168, 96.1409395973154, 96.6442953020134, 96.6442953020134,
97.4832214765101, 97.8187919463087, 98.489932885906, 98.489932885906,
98.489932885906, 98.489932885906, 98.8255033557047, 99.496644295302,
99.6644295302013, 99.8322147651007, 99.8322147651007, 99.8322147651007
)), row.names = c("thresh_info", "thresh_info.1", "thresh_info.2",
"thresh_info.3", "thresh_info.4", "thresh_info.5", "thresh_info.6",
"thresh_info.7", "thresh_info.8", "thresh_info.9", "thresh_info.10",
"thresh_info.11", "thresh_info.12", "thresh_info.13", "thresh_info.14",
"thresh_info.15", "thresh_info.16", "thresh_info.17", "thresh_info.18",
"thresh_info.19", "thresh_info.20", "thresh_info.21", "thresh_info.22",
"thresh_info.23", "thresh_info.24", "thresh_info.25", "thresh_info.26",
"thresh_info.27", "thresh_info.28", "thresh_info.29", "thresh_info.30",
"thresh_info.31", "thresh_info.32", "thresh_info.33", "thresh_info.34",
"thresh_info.35", "thresh_info.36", "thresh_info.37", "thresh_info.38",
"thresh_info.39", "thresh_info.40", "thresh_info.41", "thresh_info.42",
"thresh_info.43", "thresh_info.44", "thresh_info.45", "thresh_info.46",
"thresh_info.47", "thresh_info.48", "thresh_info.49", "thresh_info.50",
"thresh_info.51", "thresh_info.52", "thresh_info.53", "thresh_info.54",
"thresh_info.55", "thresh_info.56", "thresh_info.57", "thresh_info.58",
"thresh_info.59", "thresh_info.60", "thresh_info.61", "thresh_info.62",
"thresh_info.63", "thresh_info.64", "thresh_info.65", "thresh_info.66",
"thresh_info.67", "thresh_info.68", "thresh_info.69", "thresh_info.70",
"thresh_info.71", "thresh_info.72", "thresh_info.73", "thresh_info.74",
"thresh_info.75", "thresh_info.76", "thresh_info.77", "thresh_info.78",
"thresh_info.79", "thresh_info.80", "thresh_info.81", "thresh_info.82",
"thresh_info.83", "thresh_info.84", "thresh_info.85", "thresh_info.86",
"thresh_info.87", "thresh_info.88", "thresh_info.89", "thresh_info.90",
"thresh_info.91", "thresh_info.92", "thresh_info.93", "thresh_info.94",
"thresh_info.95", "thresh_info.96", "thresh_info.97", "thresh_info.98"
), class = "data.frame")
You can filter by the minimum variance across the 3 columns:
library(dplyr)
data |>
tibble::rownames_to_column() |>
rowwise() |>
mutate(var = var(c_across(3:5))) |>
ungroup() |>
filter(var == min(var))
# A tibble: 1 × 6
rowname threshold accuracy sensitivity specificity var
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 thresh_info.59 0.6 83.4 83.3 83.6 0.0199

Creating an editable partial effect plot in R with the gratia::draw() function that also has a rugplot

The question I have has mostly been answered by the following post: Cannot update/edit ggplot2 object exported from a package (`gratia`) in R. When I refer to the mydraw.gam function, it comes from code in that post. What I am trying to do is use the mydraw.gam function with a rugplot that looks like the gratia::draw() function.
This is my data:
dput(LMB.stack)
structure(list(X1 = c(0.0541887294548451, 0.0721473880136936,
0.0175421164050594, 0.0215182766921787, 0.0440735967747106, 0.046669040060852,
0.0526230550013067, 0.112833597945919, 0.063812034754301, 0.0940158338572872,
0.0506721208894938, 0.0127474420783362, 0.0657879523145501, 0.0541887294548451,
0.0721473880136936, 0.0175421164050594, 0.0215182766921787, 0.0440735967747106,
0.046669040060852, 0.0526230550013067, 0.112833597945919, 0.063812034754301,
0.0940158338572872, 0.0506721208894938, 0.0127474420783362, 0.0382272328188603,
0.0541887294548451, 0.0721473880136936, 0.0175421164050594, 0.0215182766921787,
0.0440735967747106, 0.046669040060852, 0.0526230550013067, 0.112833597945919,
0.063812034754301, 0.0940158338572872, 0.0506721208894938, 0.0127474420783362,
0.0657879523145501, 0.0382272328188603, 0.0541887294548451, 0.0721473880136936,
0.0175421164050594, 0.0215182766921787, 0.0440735967747106, 0.046669040060852,
0.0526230550013067, 0.0056727211129064, 0.063812034754301, 0.0940158338572872,
0.106570293080958, 0.116604915677637, 0.0422424508991219, 0.109071218434758,
0.0666150693773212, 0.108073813949563, 0.0394885672397296, 0.0688845434754768,
0.0530021292114909, 0.106570293080958, 0.116604915677637, 0.0422424508991219,
0.109071218434758, 0.0666150693773212, 0.108073813949563, 0.0411444155997384,
0.0394885672397296, 0.0688845434754768, 0.0530021292114909, 0.106570293080958,
0.116604915677637, 0.0422424508991219, 0.109071218434758, 0.0666150693773212,
0.108073813949563, 0.0411444155997384, 0.0394885672397296, 0.0688845434754768,
0.0530021292114909, 0.0578017962016202, 0.106570293080958, 0.116604915677637,
0.0422424508991219, 0.109071218434758, 0.0666150693773212, 0.174633119183298,
0.0645268299068541, 0.0709485215243274, 0.0682173756351461, 0.0643514854635756,
0.014808611175444, 0.163637352944664, 0.0599393459014399, 0.134349635442672,
0.214544784680364, 0.0460287439577173, 0.0692001626120574, 0.0682173756351461,
0.0643514854635756, 0.014808611175444), X2 = c(0.64, 0.47, 0.598,
0.52, 0.41, 1.38, 0.53, 0.73, 0.367, 0.58, 0.75, 0.38, 0.227,
0.39, 0.36, 0.35, 0.41, 0.84, 0.53, 0.55, 0.33, 0.33, 0.356,
0.58, 0.33, 0.52, 0.43, 0.53, 0.45, 0.37, 0.54, 0.98, 0.789,
0.44, 0.23, 0.21, 0.67144, 0.37, 0.38, 0.18, 0.24, 0.36, 0.37,
0.16, 0.58, 0.44, 0.41, 0.16, 0.13, 0.55, 0.99, 2.31, 1.264,
1.005, 1.345, 1.24, 1.665, 1.545, 0.799, 0.736, 1.237, 0.776,
0.742, 1.0259, 0.66, 1.17, 0.864, 1.191, 0.631, 0.745, 0.866,
0.917, 1.105, 1.04, 0.517, 1.236, 1.066, 1.35, 0.947, 0.74, 0.62,
1.572, 0.56, 1.189, 0.645, 0.9, 0.74, 0.568, 1.14, 1.159, 1.325,
1.217, 1.37, 1.147, 1.89, 1.19, 1.3, 0.73, 0.693, 1.06)), row.names = c(NA,
100L), class = "data.frame")
This is what my gam looks like (using mgcv):
LMB.gam<-gam(X2~s(X1), data = LMB.stack)
When I use the draw(LMB.gam) command in the package gratia, this is what the partial effect plot looks like:
When I use the mydraw.gam command (see previous post) while trying to add a rug plot (see code below), this is what my plot looks like:
p<-mydraw.gam(LMB.gam)
p[[1]] + geom_rug(position = "jitter",sides="b")
I need some help figuring out how to properly add a rug plot to an editable gratia::draw ggplot partial effect plot that corresponds to the actual data.
Thanks!
I would just use smooth_estimates() and its draw() method to plot a single smooth from the model. You can then add to it using standard ggplot2 functionality...
# using your data in `df`
m <- gam(X2 ~ s(X1), data = df)
sm <- smooth_estimates(m, smooth = "s(X1)")
draw(sm) +
labs(title = "My title", y = "foo") +
geom_rug(data = df,
mapping = aes(x = X1),
sides = "b",
inherit.aes = FALSE)
produces

Plot in R with different pch's

This is my data, and I need to plot:
data=structure(c(0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09,
0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2,
0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31,
0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42,
0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53,
0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64,
0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75,
0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86,
0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97,
0.98, 0.99, -4.29168871465397, -3.11699074587972, 1.09152409255126,
1.55755175826356, -0.172913268677486, 0.138305902738217, -0.38707713636532,
0.0638896647028127, 0.838910810102289, 0.943154102106711, 1.10825647675154,
1.26151733689579, 0.95610404139547, 1.13671597066802, 1.06145162449853,
1.22015975232484, 1.47211564748976, 1.43575780356999, 1.84397139393396,
1.76431139003358, 1.59262327273733, 1.74799121927712, 1.60092115463811,
1.91302749514369, 1.69691050471565, 1.73871696181996, 1.70008388736007,
1.62139419455853, 2.03803222390097, 1.95654400666235, 2.14213709053145,
2.20797610828818, 2.43019994960532, 2.43201814098108, 1.80396697393168,
2.22800019319471, 2.07590961781243, 1.93938306553876, 1.95940985069043,
2.01357121475676, 1.97530323680977, 1.80327169854223, 2.36734705989908,
2.44766094824079, 2.75792381459726, 2.77274665368527, 2.49888229303308,
2.31540449224314, 2.6409962540336, 2.43729957198807, 2.63155885389867,
2.53653088267223, 2.36871141172942, 2.54858578120089, 2.69802567434559,
3.09606341962321, 3.08856133175863, 3.18997559061186, 3.36005160648579,
3.56895022380044, 3.73753226001724, 3.74662085372188, 4.01296134301718,
4.07267448537225, 3.88165588983999, 3.7369314477271, 3.23912007937852,
3.31721703890831, 3.21894991022748, 3.48377059081018, 3.32624243338278,
3.31970136033168, 3.33053692253337, 3.34467916673038, 3.236168836409,
2.93429043790414, 2.9303837626847, 3.15769722112212, 3.75496410153913,
3.60526854720219, 3.82913260531081, 4.12105540857576, 4.00407286724511,
3.86329120505831, 4.01282715673454, 4.27078090625557, 3.57982245847814,
3.42938648057264, 3.04047099021105, 3.22396221972667, 4.4317374989557,
4.55399628631069, 4.51384672365535, 5.19575483872483, 4.77975901314362,
3.67143455937258, 4.83321942758713, 5.82353153779422, 5.4721995802281,
0.209205679527393, 0.36810747913542, 0.767214115569449, 0.631134464438132,
0.950471080949761, 0.955883872576242, 0.861939569072133, 0.978322788509546,
0.650739708163536, 0.609454620741533, 0.416316714902356, 0.424390227854642,
0.509471258981771, 0.45111061569788, 0.482703338045896, 0.415503380452312,
0.281397009944395, 0.312633722543431, 0.172403050166603, 0.157569155616774,
0.223315461391016, 0.134712102225702, 0.187843250166637, 0.109294406499708,
0.115163596824693, 0.138462578171918, 0.119131458337016, 0.174760537513378,
0.060100726330413, 0.0724953102167094, 0.0727020992861007, 0.0538763524104828,
0.0305519665256373, 0.0458544145004334, 0.13222239331969, 0.062914362547982,
0.0997526784831062, 0.11462977656091, 0.116582141802293, 0.0986337165111772,
0.136226138825677, 0.168342590268618, 0.0716128991576213, 0.0676036354494944,
0.0357838762803169, 0.0334279079582225, 0.0610644117339305, 0.0616823286482187,
0.0660736255131733, 0.104368782129991, 0.0705141118177286, 0.0778176025258217,
0.108146014569371, 0.125671355892738, 0.0590267483041353, 0.0294699796128093,
0.0338205013760269, 0.0269159737669502, 0.0134643988629253, 0.00867709725404753,
0.00493722923021656, 0.00323813401160211, 0.000497278521965683,
0.000424360028534299, 0.000603507667276793, 0.00192008642195063,
0.00578745302404915, 0.00632637091749721, 0.0036673526900235,
0.00322317560117313, 0.00315464572099522, 0.00890662685249866,
0.00630278028858244, 0.00172069402847441, 0.00297661131713389,
0.00907593497087, 0.00794661797866469, 0.00360198056893646, 0.000913572843050492,
0.000952621690864408, 0.000214234772719202, 4.55598611162067e-05,
2.0600933563486e-05, 0.00014372066333701, 3.00102200614383e-05,
1.97046007623936e-05, 0.000349337120439941, 0.00580915934418336,
0.0186446024343607, 0.0455194395151208, 0.0067650312952201, 0.00903110379061256,
0.0210099376843247, 0.0126330025977033, 0.0735408204027586, 0.158374400655879,
0.0970807294810527, 0.0643407704341705, 0.408677400389109), .Dim = c(99L,
3L), .Dimnames = list(NULL, c("betas.position", "coef", "pvalue"
)))
I need to plot a graph like this: plot(data[,1],data[,2], pch=8)
When the p-value (data[,3]) is bigger than 0.10, pch should be empty(a line).
I believe that I have to construct some rule, but I am not able to do this so far.
Use an ifelse, which returns a vector which here is either 1 or 2 depending on the value of data[,3]:
plot(data[,1],data[,2],pch=ifelse(data[,3]>0.10,1,2))
so pch=1 for data[,3]>0 and pch=2 otherwise. Adjust these for whichever symbols you want, or use NA for nothing. You can use similar logic for setting the symbol size with the cex= parameter.
The below will remove the points you don't want from your chart:
data <- as.data.frame(data)
plot(data[data$pvalue > 0.1,1],data[data$pvalue > 0.1,2], pch=8)
I'm not sure what you mean by "empty (a line)". If you want to overlay different plot types you should consider ggplot2. It has far more functionality than the Base R plots.

Understanding and implementing numerical integration with a quantile function in R

I need to calculate this integral below, using R:
The q_theta(x) function I managed to do in R with quantile regression (package: quantreg).
matrix=structure(c(0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09,
0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2,
0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31,
0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42,
0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53,
0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64,
0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75,
0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86,
0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97,
0.98, 0.99, -22.2830664155772, -22.2830664155772, -19.9298291765612,
-18.2066426767652, -15.2657135034479, -14.921522915965, -13.5035945028536,
-13.1557269916064, -12.9495709618481, -11.6168348488161, -11.3999095021713,
-10.6962766764396, -10.0588239375837, -9.12944363439522, -8.15648778610587,
-8.04133299299019, -7.66558386420434, -7.50906566627427, -6.95626096568998,
-6.90630556403136, -6.53374879831376, -6.39324677042686, -6.20705804899049,
-6.09754765999465, -5.91272058217526, -5.75771166206242, -5.3770131257001,
-5.20892464393192, -5.07372162687422, -4.96706814289334, -4.64404095131293,
-4.1567394053577, -4.13209444755342, -3.85483644113723, -3.64855238293205,
-3.53054113507559, -3.46035383338799, -3.03155417364444, -2.93100183005178,
-2.90491824855193, -2.64056616049773, -2.51857727614607, -2.25163805172486,
-2.00934783937474, -1.89925824841417, -1.71405007411747, -1.65905834683964,
-1.47502511311988, -1.42755073292529, -1.20464216637298, -1.08574103345057,
-0.701134735371922, -0.590656010656201, -0.290335898959635, -0.0575062007348038,
0.0778328375033378, 0.165234593185889, 0.230651883848336, 0.316817885358695,
0.34841775605248, 0.516869604496075, 0.59743162507581, 0.857843937404964,
0.939734010162078, 1.12533017928147, 1.27037182428776, 1.52040854525927,
1.76577933448152, 2.07456447851822, 2.17389787235523, 2.27567786362425,
2.3850323163509, 2.55365596853891, 2.61208242890655, 2.77359226593771,
2.93275094039929, 3.07968072488942, 3.0822647851901, 3.26452177629061,
3.46223321951649, 3.66011832966054, 3.85710605543097, 4.05385887531972,
4.83943843494744, 5.05864734149161, 5.25501778319145, 5.38941130574907,
5.88571117751377, 6.5116611852713, 6.98632496342285, 7.21816245728101,
7.73244825971004, 7.80401007592906, 8.34648625541999, 9.83184090479964,
10.8324874884172, 11.3060100107816, 12.3048113953808, 13.1300123358331
), .Dim = c(99L, 2L), .Dimnames = list(NULL, c("Theta", "q(x)_(Theta)"
)))
This is my q_theta(x) function that I estimated in R. One of the question I have is:
a> If x is a standard normal distribution this integral is zero; Right?
b> Otherwise, in my case, the integral is not zero. How do I treat the q_1-Theta(x)? Its simply the sort(matrix[,"q(x)_(Theta)"],decreasing=TRUE) ?
And the integration would be:
sintegral(thau[1:50], (matrix[,"q(x)_(Theta)"][1:50] - sort(matrix[,"q(x)_(Theta)"],TRUE)[1:50])[1:50])$value
The median would be a comun point of this two functions. Right?
Thanks.
Recall your previous post Building a function by defining X and Y and then Integrating in R, we build a linear interpolation function
## note `rule = 2` to enable "extrapolation";
## otherwise `rule = 1` gives `NA` outside [0.01, 0.5]
integrand <- approxfun(mat[, 1], y, rule = 2)
Then we can perform numeric integration on [0, 0.5]:
integrate(integrand, lower = 0, upper = 0.5)
# -5.594405 with absolute error < 4e-04
Now for a>, let's have a proof first.
Note, your quantile function is not for normal distribution, so this result does not hold. You can actually verify this
quant <- approxfun(mat[, 1], mat[, 2], rule = 2)
integrate(quant, lower = 0, upper = 0.5)
# -3.737973 with absolute error < 0.00029
Compared with previous integration result -5.594405, the difference is not a factor of 2.

Building a function by defining X and Y and then Integrating in R

I need to construct a function with x values coming from the first column of this matrix below and y values coming from the second column from the same matrix, with the purpose of later calculating the integral in the desired range.:
matrix=structure(c(0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09,
0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2,
0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31,
0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42,
0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53,
0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64,
0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75,
0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86,
0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97,
0.98, 0.99, -7.38512004893287, -7.38512004893287, -6.4788834441613,
-5.63088940915783, -4.83466644123448, -4.68738146949482, -4.28638930290018,
-4.22411786604579, -3.59136848943044, -3.51706359680799, -3.39972014575003,
-3.28609348968074, -3.08569873266253, -2.99764447889508, -2.89470597729108,
-2.77488515429677, -2.67019029728821, -2.54646363628509, -2.48474483938047,
-2.30542896070156, -2.22485510301423, -2.16689229344011, -2.10316315192181,
-2.05135466960309, -1.90942757945567, -1.87863626704201, -1.82507998490407,
-1.75875817642096, -1.6919717645629, -1.62396997031953, -1.56159595204983,
-1.52152738173419, -1.46478394989911, -1.4590555309334, -1.21744398902807,
-1.21731951113139, -1.15003007559406, -1.07321513324935, -0.993364510081357,
-0.924402354306976, -0.885939210442384, -0.831155619244629, -0.80947326709303,
-0.786842719842383, -0.743834513319968, -0.721194178931262, -0.593033922802471,
-0.514780082129033, -0.50717184901095, -0.44223827942003, -0.403514759789576,
-0.296251921664, -0.204238424399985, -0.1463212643028, -0.0982036017275267,
-0.0705262020944892, 0.0275436976821241, 0.0601977432996216,
0.114959963559268, 0.182222546319913, 0.236503724954577, 0.272244043950984,
0.325188234828891, 0.347862804414816, 0.438932719815686, 0.630570414177834,
0.805087251137292, 0.904903847087405, 0.940702374334727, 0.958351604371838,
1.03920208406121, 1.25808734990267, 1.32634708210007, 1.34458194173569,
1.42693337001189, 1.55016591141652, 1.5710754638668, 1.61795101580197,
1.62472416407376, 1.70223430572367, 1.86164374636379, 1.94317125269006,
2.03941620499986, 2.12071850455654, 2.17753890907921, 2.22227616630581,
2.45586794615095, 2.66160802425205, 2.83084956697756, 2.94669126521054,
3.04536994227142, 3.09217816201639, 3.42405058020625, 3.45140184734503,
3.67343579954061, 4.64233570345934, 4.87075743677502, 5.27924539262207,
5.56822483595709), .Dim = c(99L, 2L), .Dimnames = list(NULL,
c("x", "y")))
So i would have a function like this:
plot(matrix[,1],matrix[,2])
And then, my idea is to calculate the integral of this function using this code in R:
integrating= function(x) return(myfunction(x));
integrate(integrating, lower=0.08, upper=0.15)
Is it possible?
I tried but it didnt work.
When I looked at you provide matrix (better use variable mat not matrix for it), I found that your x samples are evenly spaced, and y values are monotone and smooth against x. So a simple linear interpolation would be sufficiently good to model those data.
## read `?approx`
f <- approxfun(mat[, 1], mat[, 2])
Then you can do
integrate (f, lower = 0.08, upper = 0.15)
# -0.2343698 with absolute error < 1.3e-05

Resources