ggplot2 grouped replicate samples - r

My csv file contains replicate values, e.g., Mab1, Mab1rep, Mab2, Mab2rep, etc.
The data frame Data is as below:
Data <- structure(list(Samples = c("Isotype_L", "Isotype_L", "Isotype_L",
"Isotype_L", "Mab1", "Mab1", "Mab1", "Mab1", "Mab1-GL", "Mab1-GL",
"Mab1-GL", "Mab1-GL", "Mab2", "Mab2", "Mab2", "Mab2", "Mab2-GL",
"Mab2-GL", "Mab2-GL", "Mab2-GL", "Mab3", "Mab3", "Mab3", "Mab3",
"Mab4", "Mab4", "Mab4", "Mab4", "Mab4", "Mab5", "Mab5", "Mab5",
"Mab5", "Mab5", "Isotype_K", "Isotype_K", "Isotype_K", "Isotype_K",
"Isotype_Lrep", "Isotype_Lrep", "Isotype_Lrep", "Isotype_Lrep",
"Mab1rep", "Mab1rep", "Mab1rep", "Mab1rep", "Mab1rep", "Mab1-GLrep",
"Mab1-GLrep", "Mab1-GLrep", "Mab1-GLrep", "Mab2rep", "Mab2rep",
"Mab2rep", "Mab2rep", "Mab2-GLrep", "Mab2-GLrep", "Mab2-GLrep",
"Mab2-GLrep", "Mab3rep", "Mab3rep", "Mab3rep", "Mab3rep", "Mab4rep",
"Mab4rep", "Mab4rep", "Mab4rep", "Mab4rep", "Mab5rep", "Mab5rep",
"Mab5rep", "Mab5rep", "Mab5rep", "Isotype_Krep", "Isotype_Krep",
"Isotype_Krep", "Isotype_Krep", "PosCtrl", "PosCtrl", "PosCtrl",
"PosCtrl", "PosCtrl", "neg-AF488", "neg-AF488", "neg-AF488",
"neg-AF488", "Negative", "Negative", "Negative", "Negative",
"PosCtrl_rep", "PosCtrl_rep", "PosCtrl_rep", "PosCtrl_rep", "neg-AF488rep",
"neg-AF488rep", "neg-AF488rep", "neg-AF488rep", "Negative_rep",
"Negative_rep", "Negative_rep", "Negative_rep"), Blue = c(128.3952818,
120.2831546, 143.243713, 132.0577827, 133.8880534, 133.7664632,
121.0706891, 157.5932623, 182.4168577, 160.3366789, 205.4662033,
194.5710452, 136.0504487, 130.899206, 158.7230946, 146.368408,
152.2359201, 135.2182368, 142.0670308, 117.5533153, 135.8317231,
147.2705529, 130.3724567, 137.7607945, 135.0915241, 114.6307573,
153.3744009, 148.6203231, 141.6522212, 142.7500602, 129.3132835,
133.323963, 161.4505614, 120.3986388, 149.467766, 131.8404767,
121.1891517, 134.1152953, 142.9095762, 148.1782023, 133.1172244,
132.8860874, 124.8857092, 140.9295437, 122.7443303, 142.281986,
148.2327674, 138.7267188, 147.7389215, 157.2358721, 153.455753,
135.239042, 168.5716308, 122.357492, 141.6833326, 125.6991336,
121.3251682, 142.712414, 174.2987679, 140.9524518, 121.1017373,
154.801132, 126.8055734, 145.4754619, 168.1953102, 121.5520058,
137.4914411, 142.5554603, 147.9192906, 123.1908202, 134.2369485,
132.6270733, 143.3067567, 120.2250493, 127.5301465, 142.1151132,
125.718732, 117.2397291, 134.7169574, 120.9030571, 138.0262017,
121.5363059, 140.1157374, 171.9441906, 179.801995, 157.7747676,
135.5647523, 130.947343, 124.0994119, 117.3040363, 120.2912237,
128.9369029, 129.2967454, 134.7686437, 127.5407896, 155.7879164,
134.9068068, 121.4993647, 146.2323789, 131.6257992, 161.208799,
137.8464021), Green = c(204.0416907, 179.8289799, 192.7909809,
185.1904749, 119.5289134, 116.1968717, 119.8961343, 119.3418334,
114.7639073, 113.7169804, 118.3994388, 118.0875025, 120.7343683,
119.6826046, 121.079657, 124.4646777, 118.125646, 114.1900465,
114.0732686, 110.5228171, 115.0555818, 118.7761173, 113.2995208,
118.5396075, 167.6058496, 149.1461499, 189.7257013, 207.9481807,
177.2098519, 118.5133042, 118.6931648, 119.4754029, 128.9372642,
129.7043945, 112.037337, 111.9090535, 110.2099861, 112.2431433,
191.4316539, 201.5396396, 190.3129216, 192.7112734, 114.2036743,
115.6031688, 115.5844771, 115.7509866, 118.9890215, 112.9275697,
115.6021348, 119.0952462, 117.3730964, 113.6875097, 117.9319529,
114.2584918, 182.7833727, 111.9750247, 114.6643268, 117.7445263,
119.7687462, 113.3304581, 146.4097633, 114.3161156, 111.3511068,
200.9120144, 218.8782048, 169.1520322, 161.2219501, 266.5332884,
117.3344686, 117.3277836, 118.1452713, 115.3104536, 127.5856625,
112.5214363, 116.5449408, 115.1459536, 111.7753407, 349.2590405,
385.2193187, 439.0155097, 490.8051766, 394.1068064, 107.9149422,
108.005748, 108.1659999, 109.7366457, 107.8067543, 108.471598,
108.4746003, 108.6726188, 330.5756935, 329.6602842, 243.6285135,
266.0160698, 107.21539, 108.4953225, 104.6257189, 108.7797861,
108.3317481, 107.2107311, 107.6584237, 106.8200559), Green_norm = c(1.59,
1.5, 1.35, 1.4, 0.89, 0.87, 0.99, 0.76, 0.63, 0.71, 0.58, 0.61,
0.89, 0.91, 0.76, 0.85, 0.78, 0.84, 0.8, 0.94, 0.85, 0.81, 0.87,
0.86, 1.24, 1.3, 1.24, 1.4, 1.25, 0.83, 0.92, 0.9, 0.8, 1.08,
0.75, 0.85, 0.91, 0.84, 1.34, 1.36, 1.43, 1.45, 0.91, 0.82, 0.94,
0.81, 0.8, 0.81, 0.78, 0.76, 0.76, 0.84, 0.7, 0.93, 1.29, 0.89,
0.95, 0.83, 0.69, 0.8, 1.21, 0.74, 0.88, 1.38, 1.3, 1.39, 1.17,
1.87, 0.79, 0.95, 0.88, 0.87, 0.89, 0.94, 0.91, 0.81, 0.89, 2.98,
2.86, 3.63, 3.56, 3.24, 0.77, 0.63, 0.6, 0.7, 0.8, 0.83, 0.87,
0.93, 2.75, 2.56, 1.88, 1.97, 0.84, 0.7, 0.78, 0.9, 0.74, 0.81,
0.67, 0.77)), class = "data.frame", row.names = c(NA, -102L))
I plotted box plot using ggplot2 library:
Firstly, I wanted to see the distribution of samples and appreciate the difference between replicates.
ggplot(Data, aes(x = reorder (Samples, -Green_norm), y = Green_norm, fill = Samples)) +
geom_boxplot(alpha = 0.5) + geom_point(aes(colour=Samples))+
theme_bw() +
rotate_x_text(angle = 45)
I obtained the plot as follows:
Secondly, I would like to group the replicates per sample type, for example Mab1 and its replicate Mab1rep, and so on.

You can use regular expression substitution to remove any occurrences of "rep" or "_rep" in the Samples column, and then use your existing plotting code. I don't have your rotate_x_text function, so instead I'm doing the equivalent via theme. I've also modified the plotting code to use a different column name, rather than overwriting Samples.
library(tidyverse)
data_new <- Data %>%
mutate(Samples_grouped = gsub('_*rep$', '', Samples))
ggplot(data_new, aes(x = reorder (Samples_grouped, -Green_norm), y = Green_norm, fill = Samples_grouped)) +
geom_boxplot(alpha = 0.5) + geom_point(aes(colour=Samples_grouped))+
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

Related

Creating an editable partial effect plot in R with the gratia::draw() function that also has a rugplot

The question I have has mostly been answered by the following post: Cannot update/edit ggplot2 object exported from a package (`gratia`) in R. When I refer to the mydraw.gam function, it comes from code in that post. What I am trying to do is use the mydraw.gam function with a rugplot that looks like the gratia::draw() function.
This is my data:
dput(LMB.stack)
structure(list(X1 = c(0.0541887294548451, 0.0721473880136936,
0.0175421164050594, 0.0215182766921787, 0.0440735967747106, 0.046669040060852,
0.0526230550013067, 0.112833597945919, 0.063812034754301, 0.0940158338572872,
0.0506721208894938, 0.0127474420783362, 0.0657879523145501, 0.0541887294548451,
0.0721473880136936, 0.0175421164050594, 0.0215182766921787, 0.0440735967747106,
0.046669040060852, 0.0526230550013067, 0.112833597945919, 0.063812034754301,
0.0940158338572872, 0.0506721208894938, 0.0127474420783362, 0.0382272328188603,
0.0541887294548451, 0.0721473880136936, 0.0175421164050594, 0.0215182766921787,
0.0440735967747106, 0.046669040060852, 0.0526230550013067, 0.112833597945919,
0.063812034754301, 0.0940158338572872, 0.0506721208894938, 0.0127474420783362,
0.0657879523145501, 0.0382272328188603, 0.0541887294548451, 0.0721473880136936,
0.0175421164050594, 0.0215182766921787, 0.0440735967747106, 0.046669040060852,
0.0526230550013067, 0.0056727211129064, 0.063812034754301, 0.0940158338572872,
0.106570293080958, 0.116604915677637, 0.0422424508991219, 0.109071218434758,
0.0666150693773212, 0.108073813949563, 0.0394885672397296, 0.0688845434754768,
0.0530021292114909, 0.106570293080958, 0.116604915677637, 0.0422424508991219,
0.109071218434758, 0.0666150693773212, 0.108073813949563, 0.0411444155997384,
0.0394885672397296, 0.0688845434754768, 0.0530021292114909, 0.106570293080958,
0.116604915677637, 0.0422424508991219, 0.109071218434758, 0.0666150693773212,
0.108073813949563, 0.0411444155997384, 0.0394885672397296, 0.0688845434754768,
0.0530021292114909, 0.0578017962016202, 0.106570293080958, 0.116604915677637,
0.0422424508991219, 0.109071218434758, 0.0666150693773212, 0.174633119183298,
0.0645268299068541, 0.0709485215243274, 0.0682173756351461, 0.0643514854635756,
0.014808611175444, 0.163637352944664, 0.0599393459014399, 0.134349635442672,
0.214544784680364, 0.0460287439577173, 0.0692001626120574, 0.0682173756351461,
0.0643514854635756, 0.014808611175444), X2 = c(0.64, 0.47, 0.598,
0.52, 0.41, 1.38, 0.53, 0.73, 0.367, 0.58, 0.75, 0.38, 0.227,
0.39, 0.36, 0.35, 0.41, 0.84, 0.53, 0.55, 0.33, 0.33, 0.356,
0.58, 0.33, 0.52, 0.43, 0.53, 0.45, 0.37, 0.54, 0.98, 0.789,
0.44, 0.23, 0.21, 0.67144, 0.37, 0.38, 0.18, 0.24, 0.36, 0.37,
0.16, 0.58, 0.44, 0.41, 0.16, 0.13, 0.55, 0.99, 2.31, 1.264,
1.005, 1.345, 1.24, 1.665, 1.545, 0.799, 0.736, 1.237, 0.776,
0.742, 1.0259, 0.66, 1.17, 0.864, 1.191, 0.631, 0.745, 0.866,
0.917, 1.105, 1.04, 0.517, 1.236, 1.066, 1.35, 0.947, 0.74, 0.62,
1.572, 0.56, 1.189, 0.645, 0.9, 0.74, 0.568, 1.14, 1.159, 1.325,
1.217, 1.37, 1.147, 1.89, 1.19, 1.3, 0.73, 0.693, 1.06)), row.names = c(NA,
100L), class = "data.frame")
This is what my gam looks like (using mgcv):
LMB.gam<-gam(X2~s(X1), data = LMB.stack)
When I use the draw(LMB.gam) command in the package gratia, this is what the partial effect plot looks like:
When I use the mydraw.gam command (see previous post) while trying to add a rug plot (see code below), this is what my plot looks like:
p<-mydraw.gam(LMB.gam)
p[[1]] + geom_rug(position = "jitter",sides="b")
I need some help figuring out how to properly add a rug plot to an editable gratia::draw ggplot partial effect plot that corresponds to the actual data.
Thanks!
I would just use smooth_estimates() and its draw() method to plot a single smooth from the model. You can then add to it using standard ggplot2 functionality...
# using your data in `df`
m <- gam(X2 ~ s(X1), data = df)
sm <- smooth_estimates(m, smooth = "s(X1)")
draw(sm) +
labs(title = "My title", y = "foo") +
geom_rug(data = df,
mapping = aes(x = X1),
sides = "b",
inherit.aes = FALSE)
produces

Calculate AUC using sensitivity and specificity values

How to calculate AUC, if I have values of sensitivity and specificity for various threshold cutoffs?
I have sensitivity and specificity values for 100 thresholds.
sensitivity: c(0.649193548387097, 0.649193548387097, 0.649193548387097, 0.649193548387097,
0.649193548387097, 0.649193548387097, 0.649193548387097, 0.646586345381526,
0.646586345381526, 0.646586345381526, 0.646586345381526, 0.646586345381526,
0.646586345381526, 0.646586345381526, 0.646586345381526, 0.646586345381526,
0.644, 0.644, 0.644, 0.644, 0.641434262948207, 0.641434262948207,
0.638888888888889, 0.638888888888889, 0.638888888888889, 0.634920634920635,
0.634920634920635, 0.634920634920635, 0.634920634920635, 0.630952380952381,
0.628458498023715, 0.624505928853755, 0.620553359683794, 0.615686274509804,
0.611764705882353, 0.607843137254902, 0.607843137254902, 0.6,
0.6, 0.59765625, 0.59375, 0.5859375, 0.58203125, 0.57421875,
0.57421875, 0.56640625, 0.562015503875969, 0.550387596899225,
0.534883720930233, 0.511627906976744, 0.5, 0.496153846153846,
0.486590038314176, 0.478927203065134, 0.46360153256705, 0.455938697318008,
0.452107279693487, 0.442748091603053, 0.425855513307985, 0.418250950570342,
0.4106463878327, 0.399239543726236, 0.390151515151515, 0.382575757575758,
0.377358490566038, 0.369811320754717, 0.362264150943396, 0.354716981132075,
0.343396226415094, 0.343396226415094, 0.339622641509434, 0.328301886792453,
0.316981132075472, 0.29811320754717, 0.294339622641509, 0.286792452830189,
0.279245283018868, 0.270676691729323, 0.255639097744361, 0.244360902255639,
0.236842105263158, 0.236842105263158, 0.229323308270677, 0.225563909774436,
0.214285714285714, 0.191729323308271, 0.184210526315789, 0.176691729323308,
0.165413533834586, 0.139097744360902, 0.139097744360902, 0.12781954887218,
0.120300751879699, 0.105263157894737, 0.075187969924812, 0.0639097744360902,
0.0601503759398496, 0.0526315789473684, 0.0413533834586466, 0.018796992481203,
0)
specificity : c(0.917961165048544, 0.920581113801453, 0.923708353452438, 0.925337186897881,
0.928743379874819, 0.930288461538462, 0.93371757925072, 0.934772182254197,
0.936272160996646, 0.937739463601533, 0.938872970391595, 0.940867906533143,
0.942435775451951, 0.944893111638955, 0.946969696969697, 0.949881796690307,
0.952290977798772, 0.953235710911667, 0.955209806694955, 0.956235294117647,
0.95815702867889, 0.95868544600939, 0.961556493202063, 0.962043111527648,
0.963951310861423, 0.965420560747664, 0.966449207828518, 0.966930600838379,
0.9674569967457, 0.967951695308871, 0.967951695308871, 0.968474733426055,
0.969401947148818, 0.969401947148818, 0.969907407407407, 0.971322849213691,
0.972735674676525, 0.973684210526316, 0.97372060857538, 0.973756906077348,
0.975598526703499, 0.977000919963201, 0.977512620468105, 0.9780119102153,
0.979405034324943, 0.981235697940503, 0.98124428179323, 0.982167352537723,
0.982632541133455, 0.982648401826484, 0.983135824977211, 0.984069185252617,
0.984993178717599, 0.985467756584923, 0.985934664246824, 0.986406887177164,
0.98733604703754, 0.98869801084991, 0.98961625282167, 0.989625620207488,
0.990081154192967, 0.990085624155025, 0.990540540540541, 0.990540540540541,
0.990995047276002, 0.991449144914491, 0.991899189918992, 0.993252361673414,
0.99370220422852, 0.993707865168539, 0.993713515940727, 0.994616419919246,
0.995513683266039, 0.996410946612831, 0.996859578286227, 0.996860986547085,
0.997311827956989, 0.997315436241611, 0.997316636851521, 0.997763864042934,
0.997763864042934, 0.998211890925346, 0.998212689901698, 0.998212689901698,
0.998212689901698, 0.998214285714286, 0.998661311914324, 0.998661311914324,
0.998661311914324, 0.999107939339875, 0.999107939339875, 0.999108337048596,
0.999108337048596, 0.999108734402852, 0.999109528049866, 0.999554962171785,
1, 1, 1, 1, 1)
threshold:
c(0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1,
0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21,
0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32,
0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43,
0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54,
0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65,
0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76,
0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87,
0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98,
0.99, 1)
AUC =round(sum(specificity [1:length(threshold)]*diff(c(0, 1 - sensitivity [1:length(threshold)]))),2)
AUC= 0.95
1)Is this the correct way to find AUC?
2)If I want to plot ROC curve is this code fine?
plot((1-specificity),sensitivity ,xlab = "Sensitivity",ylab = "Specificity",type = "l")
3) Is there some formula to calculate the power of this ROC analysis. So that I know I need minimum samples to calculate AUC?

Plot in R with different pch's

This is my data, and I need to plot:
data=structure(c(0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09,
0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2,
0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31,
0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42,
0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53,
0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64,
0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75,
0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86,
0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97,
0.98, 0.99, -4.29168871465397, -3.11699074587972, 1.09152409255126,
1.55755175826356, -0.172913268677486, 0.138305902738217, -0.38707713636532,
0.0638896647028127, 0.838910810102289, 0.943154102106711, 1.10825647675154,
1.26151733689579, 0.95610404139547, 1.13671597066802, 1.06145162449853,
1.22015975232484, 1.47211564748976, 1.43575780356999, 1.84397139393396,
1.76431139003358, 1.59262327273733, 1.74799121927712, 1.60092115463811,
1.91302749514369, 1.69691050471565, 1.73871696181996, 1.70008388736007,
1.62139419455853, 2.03803222390097, 1.95654400666235, 2.14213709053145,
2.20797610828818, 2.43019994960532, 2.43201814098108, 1.80396697393168,
2.22800019319471, 2.07590961781243, 1.93938306553876, 1.95940985069043,
2.01357121475676, 1.97530323680977, 1.80327169854223, 2.36734705989908,
2.44766094824079, 2.75792381459726, 2.77274665368527, 2.49888229303308,
2.31540449224314, 2.6409962540336, 2.43729957198807, 2.63155885389867,
2.53653088267223, 2.36871141172942, 2.54858578120089, 2.69802567434559,
3.09606341962321, 3.08856133175863, 3.18997559061186, 3.36005160648579,
3.56895022380044, 3.73753226001724, 3.74662085372188, 4.01296134301718,
4.07267448537225, 3.88165588983999, 3.7369314477271, 3.23912007937852,
3.31721703890831, 3.21894991022748, 3.48377059081018, 3.32624243338278,
3.31970136033168, 3.33053692253337, 3.34467916673038, 3.236168836409,
2.93429043790414, 2.9303837626847, 3.15769722112212, 3.75496410153913,
3.60526854720219, 3.82913260531081, 4.12105540857576, 4.00407286724511,
3.86329120505831, 4.01282715673454, 4.27078090625557, 3.57982245847814,
3.42938648057264, 3.04047099021105, 3.22396221972667, 4.4317374989557,
4.55399628631069, 4.51384672365535, 5.19575483872483, 4.77975901314362,
3.67143455937258, 4.83321942758713, 5.82353153779422, 5.4721995802281,
0.209205679527393, 0.36810747913542, 0.767214115569449, 0.631134464438132,
0.950471080949761, 0.955883872576242, 0.861939569072133, 0.978322788509546,
0.650739708163536, 0.609454620741533, 0.416316714902356, 0.424390227854642,
0.509471258981771, 0.45111061569788, 0.482703338045896, 0.415503380452312,
0.281397009944395, 0.312633722543431, 0.172403050166603, 0.157569155616774,
0.223315461391016, 0.134712102225702, 0.187843250166637, 0.109294406499708,
0.115163596824693, 0.138462578171918, 0.119131458337016, 0.174760537513378,
0.060100726330413, 0.0724953102167094, 0.0727020992861007, 0.0538763524104828,
0.0305519665256373, 0.0458544145004334, 0.13222239331969, 0.062914362547982,
0.0997526784831062, 0.11462977656091, 0.116582141802293, 0.0986337165111772,
0.136226138825677, 0.168342590268618, 0.0716128991576213, 0.0676036354494944,
0.0357838762803169, 0.0334279079582225, 0.0610644117339305, 0.0616823286482187,
0.0660736255131733, 0.104368782129991, 0.0705141118177286, 0.0778176025258217,
0.108146014569371, 0.125671355892738, 0.0590267483041353, 0.0294699796128093,
0.0338205013760269, 0.0269159737669502, 0.0134643988629253, 0.00867709725404753,
0.00493722923021656, 0.00323813401160211, 0.000497278521965683,
0.000424360028534299, 0.000603507667276793, 0.00192008642195063,
0.00578745302404915, 0.00632637091749721, 0.0036673526900235,
0.00322317560117313, 0.00315464572099522, 0.00890662685249866,
0.00630278028858244, 0.00172069402847441, 0.00297661131713389,
0.00907593497087, 0.00794661797866469, 0.00360198056893646, 0.000913572843050492,
0.000952621690864408, 0.000214234772719202, 4.55598611162067e-05,
2.0600933563486e-05, 0.00014372066333701, 3.00102200614383e-05,
1.97046007623936e-05, 0.000349337120439941, 0.00580915934418336,
0.0186446024343607, 0.0455194395151208, 0.0067650312952201, 0.00903110379061256,
0.0210099376843247, 0.0126330025977033, 0.0735408204027586, 0.158374400655879,
0.0970807294810527, 0.0643407704341705, 0.408677400389109), .Dim = c(99L,
3L), .Dimnames = list(NULL, c("betas.position", "coef", "pvalue"
)))
I need to plot a graph like this: plot(data[,1],data[,2], pch=8)
When the p-value (data[,3]) is bigger than 0.10, pch should be empty(a line).
I believe that I have to construct some rule, but I am not able to do this so far.
Use an ifelse, which returns a vector which here is either 1 or 2 depending on the value of data[,3]:
plot(data[,1],data[,2],pch=ifelse(data[,3]>0.10,1,2))
so pch=1 for data[,3]>0 and pch=2 otherwise. Adjust these for whichever symbols you want, or use NA for nothing. You can use similar logic for setting the symbol size with the cex= parameter.
The below will remove the points you don't want from your chart:
data <- as.data.frame(data)
plot(data[data$pvalue > 0.1,1],data[data$pvalue > 0.1,2], pch=8)
I'm not sure what you mean by "empty (a line)". If you want to overlay different plot types you should consider ggplot2. It has far more functionality than the Base R plots.

Understanding and implementing numerical integration with a quantile function in R

I need to calculate this integral below, using R:
The q_theta(x) function I managed to do in R with quantile regression (package: quantreg).
matrix=structure(c(0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09,
0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2,
0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31,
0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42,
0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53,
0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64,
0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75,
0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86,
0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97,
0.98, 0.99, -22.2830664155772, -22.2830664155772, -19.9298291765612,
-18.2066426767652, -15.2657135034479, -14.921522915965, -13.5035945028536,
-13.1557269916064, -12.9495709618481, -11.6168348488161, -11.3999095021713,
-10.6962766764396, -10.0588239375837, -9.12944363439522, -8.15648778610587,
-8.04133299299019, -7.66558386420434, -7.50906566627427, -6.95626096568998,
-6.90630556403136, -6.53374879831376, -6.39324677042686, -6.20705804899049,
-6.09754765999465, -5.91272058217526, -5.75771166206242, -5.3770131257001,
-5.20892464393192, -5.07372162687422, -4.96706814289334, -4.64404095131293,
-4.1567394053577, -4.13209444755342, -3.85483644113723, -3.64855238293205,
-3.53054113507559, -3.46035383338799, -3.03155417364444, -2.93100183005178,
-2.90491824855193, -2.64056616049773, -2.51857727614607, -2.25163805172486,
-2.00934783937474, -1.89925824841417, -1.71405007411747, -1.65905834683964,
-1.47502511311988, -1.42755073292529, -1.20464216637298, -1.08574103345057,
-0.701134735371922, -0.590656010656201, -0.290335898959635, -0.0575062007348038,
0.0778328375033378, 0.165234593185889, 0.230651883848336, 0.316817885358695,
0.34841775605248, 0.516869604496075, 0.59743162507581, 0.857843937404964,
0.939734010162078, 1.12533017928147, 1.27037182428776, 1.52040854525927,
1.76577933448152, 2.07456447851822, 2.17389787235523, 2.27567786362425,
2.3850323163509, 2.55365596853891, 2.61208242890655, 2.77359226593771,
2.93275094039929, 3.07968072488942, 3.0822647851901, 3.26452177629061,
3.46223321951649, 3.66011832966054, 3.85710605543097, 4.05385887531972,
4.83943843494744, 5.05864734149161, 5.25501778319145, 5.38941130574907,
5.88571117751377, 6.5116611852713, 6.98632496342285, 7.21816245728101,
7.73244825971004, 7.80401007592906, 8.34648625541999, 9.83184090479964,
10.8324874884172, 11.3060100107816, 12.3048113953808, 13.1300123358331
), .Dim = c(99L, 2L), .Dimnames = list(NULL, c("Theta", "q(x)_(Theta)"
)))
This is my q_theta(x) function that I estimated in R. One of the question I have is:
a> If x is a standard normal distribution this integral is zero; Right?
b> Otherwise, in my case, the integral is not zero. How do I treat the q_1-Theta(x)? Its simply the sort(matrix[,"q(x)_(Theta)"],decreasing=TRUE) ?
And the integration would be:
sintegral(thau[1:50], (matrix[,"q(x)_(Theta)"][1:50] - sort(matrix[,"q(x)_(Theta)"],TRUE)[1:50])[1:50])$value
The median would be a comun point of this two functions. Right?
Thanks.
Recall your previous post Building a function by defining X and Y and then Integrating in R, we build a linear interpolation function
## note `rule = 2` to enable "extrapolation";
## otherwise `rule = 1` gives `NA` outside [0.01, 0.5]
integrand <- approxfun(mat[, 1], y, rule = 2)
Then we can perform numeric integration on [0, 0.5]:
integrate(integrand, lower = 0, upper = 0.5)
# -5.594405 with absolute error < 4e-04
Now for a>, let's have a proof first.
Note, your quantile function is not for normal distribution, so this result does not hold. You can actually verify this
quant <- approxfun(mat[, 1], mat[, 2], rule = 2)
integrate(quant, lower = 0, upper = 0.5)
# -3.737973 with absolute error < 0.00029
Compared with previous integration result -5.594405, the difference is not a factor of 2.

Building a function by defining X and Y and then Integrating in R

I need to construct a function with x values coming from the first column of this matrix below and y values coming from the second column from the same matrix, with the purpose of later calculating the integral in the desired range.:
matrix=structure(c(0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09,
0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2,
0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31,
0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42,
0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53,
0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64,
0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75,
0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86,
0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97,
0.98, 0.99, -7.38512004893287, -7.38512004893287, -6.4788834441613,
-5.63088940915783, -4.83466644123448, -4.68738146949482, -4.28638930290018,
-4.22411786604579, -3.59136848943044, -3.51706359680799, -3.39972014575003,
-3.28609348968074, -3.08569873266253, -2.99764447889508, -2.89470597729108,
-2.77488515429677, -2.67019029728821, -2.54646363628509, -2.48474483938047,
-2.30542896070156, -2.22485510301423, -2.16689229344011, -2.10316315192181,
-2.05135466960309, -1.90942757945567, -1.87863626704201, -1.82507998490407,
-1.75875817642096, -1.6919717645629, -1.62396997031953, -1.56159595204983,
-1.52152738173419, -1.46478394989911, -1.4590555309334, -1.21744398902807,
-1.21731951113139, -1.15003007559406, -1.07321513324935, -0.993364510081357,
-0.924402354306976, -0.885939210442384, -0.831155619244629, -0.80947326709303,
-0.786842719842383, -0.743834513319968, -0.721194178931262, -0.593033922802471,
-0.514780082129033, -0.50717184901095, -0.44223827942003, -0.403514759789576,
-0.296251921664, -0.204238424399985, -0.1463212643028, -0.0982036017275267,
-0.0705262020944892, 0.0275436976821241, 0.0601977432996216,
0.114959963559268, 0.182222546319913, 0.236503724954577, 0.272244043950984,
0.325188234828891, 0.347862804414816, 0.438932719815686, 0.630570414177834,
0.805087251137292, 0.904903847087405, 0.940702374334727, 0.958351604371838,
1.03920208406121, 1.25808734990267, 1.32634708210007, 1.34458194173569,
1.42693337001189, 1.55016591141652, 1.5710754638668, 1.61795101580197,
1.62472416407376, 1.70223430572367, 1.86164374636379, 1.94317125269006,
2.03941620499986, 2.12071850455654, 2.17753890907921, 2.22227616630581,
2.45586794615095, 2.66160802425205, 2.83084956697756, 2.94669126521054,
3.04536994227142, 3.09217816201639, 3.42405058020625, 3.45140184734503,
3.67343579954061, 4.64233570345934, 4.87075743677502, 5.27924539262207,
5.56822483595709), .Dim = c(99L, 2L), .Dimnames = list(NULL,
c("x", "y")))
So i would have a function like this:
plot(matrix[,1],matrix[,2])
And then, my idea is to calculate the integral of this function using this code in R:
integrating= function(x) return(myfunction(x));
integrate(integrating, lower=0.08, upper=0.15)
Is it possible?
I tried but it didnt work.
When I looked at you provide matrix (better use variable mat not matrix for it), I found that your x samples are evenly spaced, and y values are monotone and smooth against x. So a simple linear interpolation would be sufficiently good to model those data.
## read `?approx`
f <- approxfun(mat[, 1], mat[, 2])
Then you can do
integrate (f, lower = 0.08, upper = 0.15)
# -0.2343698 with absolute error < 1.3e-05

Resources