R fast matrix formulation

R fast matrix formulation - r

I need a fast way in R to insert a matrix into another matrix in R. My current matrix shape is block diagonal shaped like this:
library(Matrix)
block1 <- new("dgCMatrix" , i = c(0L, 1L, 2L, 0L, 1L, 2L, 0L, 1L, 2L, 3L, 4L,
5L, 3L, 4L, 5L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 6L, 7L, 8L,
9L, 6L, 7L, 8L, 9L, 6L, 7L, 8L, 9L) ,
p = c(0L, 3L, 6L, 9L, 12L, 15L, 18L, 22L, 26L, 30L, 34L) ,
Dim = c(10L, 10L) , Dimnames = list(NULL, NULL) ,
x = c(1.002, 0.002, 0, 0.002, 1.002, 0.002, 0, 0.002, 1.002,
1.002, 0.002, 0, 0.002, 1.002, 0.002, 0, 0.002, 1.002,
3.002, 1.924, 0.507, 0.055, 1.924, 3.002, 1.924,
0.507, 0.507, 1.924, 3.002, 1.924, 0.055, 0.507, 1.924, 3.002) ,
factors = list())
I want to insert the matrix below into the elements [4:10,1:6] of the matrix above.
block2 <-
structure(c(0.779, 0.018, 0, 0.779, 0.018, 0, 0.256, 0.169, 0, 0.256,
0.169, 0, 0.035, 0.641, 0, 0.035, 0.641, 0, 0.002, 1,
0.002, 0.002, 1, 0.002), .Dim = c(6L, 4L))
I am currenly doing so using:
block1[1:6,7:10]=block2
However this takes a lot of time if my matrix is huge. Is there any other function or clever method I can use?

Related

Setting legend with Plotly

I am working in R with Plotly. Below you can see my bar chart and data.
t_df3<-data.frame(structure(list(deciles = c(0.001, 0.002, 0.003, 0.004, 0.005,
0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004,
0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003,
0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002,
0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.001,
0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01,
0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009,
0.01, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008,
0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007,
0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006,
0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005,
0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004,
0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003,
0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01), variable.x = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), .Label = c("Food",
"Alcoholic Beverages", "Clothing", "Housing Water", "Furnishings",
"Health", "Transport", "Communication", "Recreation", "Education",
"Restaurants", "Miscellaneous Goods"), class = "factor"), value.x = c(958.823102803738,
1292.9, 1575.2, 1807, 1911.8,
2041.2, 2376.83, 2723, 3161.9,
4130.448, 120.0, 304, 246.4,
249.8, 285, 382.1, 494.5,
691.6, 787, 948.8, 11.9,
22.9085309734513, 62, 77.3, 201.184778761062,
239.961132743363, 330, 588.19178761062, 766.9,
2117.2, 46, 91.0239292035398, 166.0,
329.5, 525, 772.3, 1060.3112920354,
1297.9, 1680, 3963.0, 106.8,
175.2, 228, 295.961379310345, 300.2,
404.8, 447, 496.528551724138, 731.2,
1916.6995862069, 68, 71.9, 111.1,
154.9, 201, 253.49, 248.205798165138,
247.0, 421, 850.106642201835, 19.2,
14.9, 40, 96.0269734513274, 200.4,
354.0, 457, 745.2, 929.6,
2654.9, 4.9, 12.6, 50.6,
77.0251034482759, 168., 259.1, 364.2,
571.067586206897, 828.672, 1452.7, 6.75,
4.5, 5.41241379310345, 25.2, 46.1,
68.2, 125.2, 104.550620689655, 258.9,
951.36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15.5, 14.1,
44.5, 79.2, 119.3, 216.31,
316.8, 577.3, 901.7, 1897.1,
70.5037168141593, 97.10, 136.4, 189.5,
250.2, 333.9, 439.5, 583.7,
725.7, 1283.5), color.x = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), .Label = c("blue",
"cyan", "darkgreen", "red", "brown", "chartreuse", "green", "purple",
"gold", "tomato", "darkturquoise", "forestgreen"), class = "factor"),
deciles = c(0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007,
0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006,
0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005,
0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004,
0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003,
0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002,
0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.001,
0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01,
0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009,
0.01, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008,
0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007,
0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006,
0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005,
0.006, 0.007, 0.008, 0.009, 0.01), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L), .Label = c("Food", "Alcoholic Beverages",
"Clothing", "Housing Water", "Furnishings", "Health", "Transport",
"Communication", "Recreation", "Education", "Restaurants",
"Miscellaneous Goods"), class = "factor"), value = c(145.111841584158,
195.676277227723, 238.405544554455, 273.52, 289.35,
308.935841584158, 359.719128712871, 412.1, 478.5,
625.117306930693, 96.9022702702703, 245.5, 198.9,
201.624648648649, 230.688432432432, 308.3, 399.0,
558.103135135135, 635.512216216216, 765.6, 11.9,
22.9085309734513, 62.3668672566372, 77.3, 201.1,
239.961132743363, 330.529486725664, 588.1, 766.9,
2117.21543362832, 46.402407079646, 91.0, 166.0,
329.568637168142, 525.05182300885, 772.307681415929, 1060.3112920354,
1297.90619469027, 1680.52311504425, 3963.01847787611, 106.808275862069,
175.229793103448, 228.774620689655, 295.961379310345, 300.238344827586,
404.881655172414, 447.85324137931, 496.528551724138, 731.241931034483,
1916.9, 68.8187889908257, 71.9220550458716, 111.1,
154.5, 201.2, 253.4, 248.2,
247.1, 421.0, 850.106642201835, 19.2,
14.94, 40.8, 96.0269734513274, 200.4,
354.09, 457.8, 745.21614159292, 929.6,
2654.1, 4.97, 12.6554482758621, 50.6,
77.0, 168.119172413793, 259.120551724138, 364.27,
571.07, 828.672, 1452.78786206897, 6.75310344827586,
4.55834482758621, 5.41241379310345, 25.2513103448276, 46.1428965517241,
68.2262068965517, 125.256827586207, 104.550620689655, 258.918620689655,
951.36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15.5453793103448, 14.1020689655172,
44.5, 79.2, 119.3, 216.3,
316.8, 577.3, 901.7, 1897.1,
70.5, 97.10, 136.421309734513, 189.527575221239,
250.2, 333.986336283186, 439.591433628319, 583.765805309735,
725.7, 1283.5), color = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L), .Label = c("blue", "cyan", "darkgreen",
"red", "brown", "chartreuse", "green", "purple", "gold",
"tomato", "darkturquoise", "forestgreen"), class = "factor")), class = "data.frame", row.names = c(NA,
-120L)))
Above is artificial data and below is code for bar chart
library(plotly)
plt <- plot_ly(t_df3) %>%
add_trace(x = ~deciles, y = ~value.x, type = 'bar',name = 'Left-scale',marker = list(color = ~color.x), name = ~variable.x) %>%
add_trace(x = ~deciles, y = ~value, type = 'bar', name = 'Right-scale',marker = list(color = ~color), name = ~variable) %>%
layout(
xaxis = list(title = '',font = t_8),
yaxis = list(title = ''),
legend = list(x = 0.01, y = 0.99,font = t_10),
barmode = 'bar'
)
plt
On the left side, you can see a legend, but I am not satisfied with this legend and I want to change this legend, with a legend with the structure of the data (e.g. Food, Alcoholic Beverages, etc.). The structure is same for left and also right bars.So can anybody help with this ?

Would this be suitable?
If so, then this is how you can make this plot.
First, I melted the data.
t_df4 <- pivot_longer(t_df3, cols = c(value, value.x),
names_to = "group") %>%
mutate(group = ifelse(group == "value", "right_side",
"left_side"))
Then I plotted.
plot_ly(t_df4, x = ~list(deciles, group), y = ~value,
color = ~variable, colors = ~as.character(color), type = "bar") %>%
layout(barmode = "stack",
xaxis = list(title = ''),
yaxis = list(title = ''),
legend = list(x = 0.01, y = 0.99))

Setting bars and legend properly

I am working in R with Plotly. Below you can see my bar chart and data.
t_df3<-data.frame(structure(list(deciles = c(0.001, 0.002, 0.003, 0.004, 0.005,
0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004,
0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003,
0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002,
0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.001,
0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01,
0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009,
0.01, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008,
0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007,
0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006,
0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005,
0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004,
0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003,
0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01), variable.x = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), .Label = c("Food",
"Alcoholic Beverages", "Clothing", "Housing Water", "Furnishings",
"Health", "Transport", "Communication", "Recreation", "Education",
"Restaurants", "Miscellaneous Goods"), class = "factor"), value.x = c(958.823102803738,
1292.9, 1575.2, 1807, 1911.8,
2041.2, 2376.83, 2723, 3161.9,
4130.448, 120.0, 304, 246.4,
249.8, 285, 382.1, 494.5,
691.6, 787, 948.8, 11.9,
22.9085309734513, 62, 77.3, 201.184778761062,
239.961132743363, 330, 588.19178761062, 766.9,
2117.2, 46, 91.0239292035398, 166.0,
329.5, 525, 772.3, 1060.3112920354,
1297.9, 1680, 3963.0, 106.8,
175.2, 228, 295.961379310345, 300.2,
404.8, 447, 496.528551724138, 731.2,
1916.6995862069, 68, 71.9, 111.1,
154.9, 201, 253.49, 248.205798165138,
247.0, 421, 850.106642201835, 19.2,
14.9, 40, 96.0269734513274, 200.4,
354.0, 457, 745.2, 929.6,
2654.9, 4.9, 12.6, 50.6,
77.0251034482759, 168., 259.1, 364.2,
571.067586206897, 828.672, 1452.7, 6.75,
4.5, 5.41241379310345, 25.2, 46.1,
68.2, 125.2, 104.550620689655, 258.9,
951.36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15.5, 14.1,
44.5, 79.2, 119.3, 216.31,
316.8, 577.3, 901.7, 1897.1,
70.5037168141593, 97.10, 136.4, 189.5,
250.2, 333.9, 439.5, 583.7,
725.7, 1283.5), color.x = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), .Label = c("blue",
"cyan", "darkgreen", "red", "brown", "chartreuse", "green", "purple",
"gold", "tomato", "darkturquoise", "forestgreen"), class = "factor"),
deciles = c(0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007,
0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006,
0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005,
0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004,
0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003,
0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.001, 0.002,
0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.001,
0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01,
0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009,
0.01, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008,
0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007,
0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006,
0.007, 0.008, 0.009, 0.01, 0.001, 0.002, 0.003, 0.004, 0.005,
0.006, 0.007, 0.008, 0.009, 0.01), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L), .Label = c("Food", "Alcoholic Beverages",
"Clothing", "Housing Water", "Furnishings", "Health", "Transport",
"Communication", "Recreation", "Education", "Restaurants",
"Miscellaneous Goods"), class = "factor"), value = c(145.111841584158,
195.676277227723, 238.405544554455, 273.52, 289.35,
308.935841584158, 359.719128712871, 412.1, 478.5,
625.117306930693, 96.9022702702703, 245.5, 198.9,
201.624648648649, 230.688432432432, 308.3, 399.0,
558.103135135135, 635.512216216216, 765.6, 11.9,
22.9085309734513, 62.3668672566372, 77.3, 201.1,
239.961132743363, 330.529486725664, 588.1, 766.9,
2117.21543362832, 46.402407079646, 91.0, 166.0,
329.568637168142, 525.05182300885, 772.307681415929, 1060.3112920354,
1297.90619469027, 1680.52311504425, 3963.01847787611, 106.808275862069,
175.229793103448, 228.774620689655, 295.961379310345, 300.238344827586,
404.881655172414, 447.85324137931, 496.528551724138, 731.241931034483,
1916.9, 68.8187889908257, 71.9220550458716, 111.1,
154.5, 201.2, 253.4, 248.2,
247.1, 421.0, 850.106642201835, 19.2,
14.94, 40.8, 96.0269734513274, 200.4,
354.09, 457.8, 745.21614159292, 929.6,
2654.1, 4.97, 12.6554482758621, 50.6,
77.0, 168.119172413793, 259.120551724138, 364.27,
571.07, 828.672, 1452.78786206897, 6.75310344827586,
4.55834482758621, 5.41241379310345, 25.2513103448276, 46.1428965517241,
68.2262068965517, 125.256827586207, 104.550620689655, 258.918620689655,
951.36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15.5453793103448, 14.1020689655172,
44.5, 79.2, 119.3, 216.3,
316.8, 577.3, 901.7, 1897.1,
70.5, 97.10, 136.421309734513, 189.527575221239,
250.2, 333.986336283186, 439.591433628319, 583.765805309735,
725.7, 1283.5), color = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L), .Label = c("blue", "cyan", "darkgreen",
"red", "brown", "chartreuse", "green", "purple", "gold",
"tomato", "darkturquoise", "forestgreen"), class = "factor")), class = "data.frame", row.names = c(NA,
-120L)))
Below you can see code in Plotly that produce the chart shown below
library(plotly)
plt <- plot_ly(t_df3) %>%
add_trace(x = ~deciles, y = ~value.x, type = 'bar',name = 'Left-scale',marker = list(color = ~color.x), name = ~variable.x) %>%
add_trace(x = ~deciles, y = ~value, type = 'bar', name = 'Right-scale',marker = list(color = ~color), name = ~variable) %>%
layout(
xaxis = list(title = ''),
yaxis = list(title = ''),
legend = list(x = 0.01, y = 0.99),
barmode = 'bar'
)
Now I want to change the legend on this bar chart that is shown on the left side and to have bar chart as the bar chart shown below.
So can anybody help me how to solve this problem ?

Here is a hack solution. A better one might exists.
library(plotly)
plt <- plot_ly(t_df3) %>%
add_trace(x = ~deciles, y = ~value.x, type = 'bar', marker = list(color = ~color.x), name = ~variable.x) %>%
add_trace(x = ~deciles + 0.0004, y = ~value, type = 'bar', marker = list(color = ~color), name = ~variable) %>%
layout(
xaxis = list(title = ''),
yaxis = list(title = ''),
legend = list(x = 0.01, y = 0.99),
barmode = 'bar'
)
plt <- layout(p = plt, barmode = "stack")
plt
Second trace have an artificial x increase, traces are stacked in layout (info on hover has to be corrected).

shade a facet.grid of kernel density plots with ggplot2

Consider the following df:
df<-structure(list(Trial = structure(c(1L, 5L, 1L, 5L, 1L, 4L, 3L,
2L, 2L, 4L, 3L, 3L, 2L, 5L, 4L, 1L, 2L, 3L, 5L, 1L, 2L, 1L, 4L,
3L, 1L, 3L, 3L, 2L, 3L, 5L, 1L, 3L, 3L, 5L, 5L, 1L, 4L, 3L, 3L,
1L, 1L, 5L, 5L, 1L, 3L, 5L, 2L, 1L, 5L, 3L, 2L, 1L, 4L, 3L, 5L,
3L, 4L, 1L, 2L, 2L, 2L, 2L, 4L, 1L, 4L, 5L, 3L, 1L, 5L, 3L, 3L,
4L, 2L, 2L, 4L, 4L, 1L, 3L, 4L, 5L, 4L, 2L, 3L, 1L, 1L, 4L, 2L,
3L, 5L, 2L, 2L, 4L, 1L, 4L, 4L, 5L, 2L, 4L, 2L, 4L, 1L, 4L, 3L,
5L, 4L, 5L, 2L, 3L, 2L, 2L, 5L, 1L, 3L, 3L, 3L, 1L, 2L, 4L, 5L,
3L, 1L, 2L, 5L, 1L, 4L, 3L, 2L, 2L, 5L, 1L, 5L, 1L, 4L, 5L, 5L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 3L, 2L, 5L, 2L, 5L, 2L, 4L, 1L), .Label =
c("ES8-13", "ES14-25", "ES26-38", "SA1-12", "SA14-25"), class = "factor"),
MAF = c(-0.022, 0.141, -0.035, 0.076, -0.019, -0.064, -0.044,
0.088, 0.067, 0.049, 0.088, 0.053, -0.052, -0.078, 0.104,
-0.019, -0.075, -0.049, 0.098, -0.145, 0.094, 0.014, 0.016,
0.00599999999999999, 0.027, 0.117, -0.011, 0.055, 0.079,
0.26, -0.049, 0.065, 0.227, 0.141, -0.091, -0.021, -0.306,
0.162, -0.089, -0.068, 0.00700000000000001, 0.034, 0.02,
0.038, 0.129, 0.099, 0.06, -0.09, 0.104, 0.195, 0.165, -0.047,
0.074, -0.01, 0.002, -0.068, 0.054, 0.012, -0.012, 0.222,
0.046, 0.00700000000000001, -0.022, 0.00499999999999995,
-0.051, 0.126, 0.073, 0.094, -0.254, 0.185, 0.238, 0.099,
0.027, 0.044, -0.018, 0.014, -0.058, -0.005, -0.00999999999999998,
-0.002, 0.061, 0.178, 0.001, 0.105, -0.001, -0.088, 0.113,
0.134, 0.175, 0.06, -0.026, 0.048, 0.003, 0.049, 0.0649999999999999,
-0.135, -0.036, -0.069, 0.015, -0.058, 0.024, 0.093, 0.123,
-0.144, 0.011, 0.343, 0.002, -0.018, 0.055, -0.047, -0.317,
-0.033, -0.018, 0.068, -0.044, 0.05, 0.079, 0.122, -0.071,
0.13, 0.078, 0.085, 0.012, -0.02, -0.088, -0.086, -0.026,
0.046, 0.101, -0.026, 0.005, 0.00700000000000001, 0.064,
0.066, -0.085, 0.114, 0.003, 0.004, -0.003, 0.097, 0.055,
-0.063, -0.089, 0.104, -0.199, 0.01, 0.184, 0.183, 0.129,
-0.059)), row.names = c(1146L, 163986L, 34946L, 168682L,
33356L, 152862L, 103827L, 54557L, 68666L, 141066L, 118349L, 93909L,
67299L, 193633L, 129212L, 39273L, 71459L, 102636L, 176655L, 30543L,
46107L, 32608L, 122906L, 100356L, 37635L, 81566L, 116510L, 61803L,
96219L, 187927L, 9211L, 106999L, 88554L, 181316L, 176250L, 32656L,
150472L, 80615L, 111414L, 16038L, 23319L, 185075L, 175803L, 32648L,
106332L, 185991L, 65155L, 32165L, 189972L, 92486L, 44161L, 404L,
123856L, 80513L, 180030L, 101190L, 145315L, 5498L, 75891L, 77358L,
67571L, 72894L, 127763L, 6584L, 139250L, 163126L, 101492L, 22520L,
181276L, 82673L, 94756L, 142750L, 48377L, 59931L, 140900L, 154339L,
2769L, 110265L, 130494L, 186334L, 138079L, 50754L, 82207L, 24578L,
26393L, 128021L, 69283L, 84549L, 187875L, 76775L, 45715L, 138049L,
1972L, 137218L, 158324L, 200014L, 61611L, 147430L, 60938L, 154928L,
22421L, 159532L, 98190L, 166565L, 151667L, 180407L, 55681L, 89127L,
54396L, 65975L, 172695L, 21969L, 80439L, 81202L, 87282L, 35394L,
53137L, 131886L, 163181L, 84221L, 32007L, 57711L, 160393L, 32843L,
157924L, 104820L, 63993L, 55023L, 160342L, 20800L, 167583L, 15849L,
143476L, 172878L, 195659L, 49812L, 4971L, 44583L, 24399L, 77026L,
16862L, 56500L, 113282L, 65688L, 188635L, 75437L, 190601L, 54633L,
137420L, 27389L), class = "data.frame")
Here is a snippet of the df:
Trial MAF
ES8-13 -0.022
SA14-25 0.141
ES8-13 -0.035
SA14-25 0.076
ES8-13 -0.019
SA1-12 -0.064
I have produced the following kernel density plot :
p <- ggplot(df,aes(x=MAF)) +
geom_density(fill='grey') + facet_grid(Trial ~.)
p
I would like to shade both tail regions that fall above 90% of the values. With the following command for example I could get the quantiles for the whole df:
qt <- quantile(df$MAF,probs=c(.05,.95))
But I rather need the quantiles for every level of the factor Trial as follows:
require(dplyr)
qt05<-alele_freq_dev %>% group_by(Trial) %>%
summarise(quantile(MAF,probs=c(.05)))
qt95<-alele_freq_dev %>% group_by(Trial) %>%
summarise(quantile(MAF,probs=c(.95)))
With those quantiles in mind I would need to shade every level of factor Trial for every facet of the graph. I found solutions for this problem but only for a singular plot case.
Could someone help me to get this done for a facet.grid case ?

I use library(ggridges) for distribution viz like this, because it has a lot of nice features, including the ability to customize quantile shading!
Here is an example without the faceting, because with this strategy you might not need to facet anymore:
library(ggridges)
ggplot(df, aes(x=MAF, y = Trial, fill=factor(..quantile..))) +
stat_density_ridges(geom = "density_ridges_gradient", calc_ecdf = TRUE, quantiles = c(0.05, 0.95), scale = 1) +
scale_fill_manual(values = c("#FDE725FF", "#A0A0A0A0", "#FDE725FF"),
name = NULL,
labels = c("lower 5%", "middle 90%", "upper 90%"))
If you still want to do the facet route, one drawback is that stat_density_ridges requires a y aesthetic. So I would do something like this to tweak the theme a bit and keep the plot looking pretty and clean (no one will ever know there is a y aes lurking in there!):
ggplot(df, aes(x=MAF, y = Trial, fill=factor(..quantile..))) +
stat_density_ridges(geom = "density_ridges_gradient", calc_ecdf = TRUE, quantiles = c(0.05, 0.95), scale = 1) +
scale_fill_manual(values = c("#FDE725FF", "#A0A0A0A0", "#FDE725FF"),
name = NULL,
labels = c("lower 5%", "middle 90%", "upper 5%")) +
facet_grid(Trial~ ., scales = "free_y") +
theme(axis.text.y = element_blank(), # clean up overhead
axis.ticks.y = element_blank())
Obviously you can tweak the colors and labels as you see fit, just make sure they make sense with the quantiles you set in the geom layer. Let me know if you have more questions.

How to calculate regression residuals in R for each individual in a longitudinal analysis?

I am working on a longitudinal/repeated measures multilevel model (MLM). Usually, for time-varying covariates (in my case "weekly gross income/1000"), you would calculate a person-mean centered version of the variable (i.e. deducting the person-year income response from the average of the person's weekly income across all of said person's time points). However, this can lead to bias (see here) and hence a better (more generalisable) approach is to center around a regression line for each individual (as it happens, the residuals from the regression serve this purpose).
Therefore, I need to calculate the following regression, but for each individual (roughly 10,000 individuals with 25,000 observations):
lm(Weekly_Gross_Pay_Main_Job~nYear, data=df)
Then, the really critical part is that I need to extract the residuals to a separate column in my main dataset, matched up with each person. These residuals will take the place of my group-mean centered variable (which will in turn be used in my MLM).
Here is a possible starting point using the function that I have for the group-mean centering. If this could be updated to fit a regression with the residuals output for each person, then that would be ideal (if not, then I am open to other approaches):
#Group mean-centering a variable. Relevant for L1 variables only.
gmc = function(variable, group){
return(ave(variable, group, FUN = function(x){x - mean(x)}))
}
df$Weekly_Gross_Pay_Main_Jobgmc <- gmc(df$Weekly_Gross_Pay_Main_Job, df$Person_ID)
Data extract in long format (where Person_ID is the person, nYear is time, Weekly_Gross_Pay_Main_Job is weekly income/1000 and Weekly_Gross_Pay_Main_Jobgmc is the group-mean centered version):
structure(list(Person_ID = c(100003L, 100003L, 100003L, 100006L,
100006L, 100006L, 100006L, 100010L, 100010L, 100010L, 100010L,
100010L, 100010L, 100011L, 100014L, 100014L, 100014L, 100014L,
100014L, 100016L, 100018L, 100018L, 100018L, 100018L, 100018L,
100018L, 100018L, 100018L, 100018L, 100020L, 100020L, 100020L,
100020L, 100020L, 100020L, 100020L, 100020L, 100020L, 100021L,
100021L, 100024L, 100024L, 100024L, 100024L, 100024L, 100024L,
100024L, 100024L, 100024L, 100024L, 100025L, 100025L, 100025L,
100025L, 100025L, 100025L, 100025L, 100025L, 100027L, 100027L,
100027L, 100027L, 100029L, 100029L, 100029L, 100029L, 100029L,
100031L, 100031L, 100031L, 100032L, 100032L, 100032L, 100033L,
100033L, 100033L, 100033L, 100033L, 100033L, 100034L, 100034L,
100034L, 100037L, 100037L, 100037L, 100037L, 100037L, 100037L,
100037L, 100044L, 100044L, 100044L, 100044L, 100044L, 100044L,
100044L, 100045L, 100045L, 100045L, 100045L), nYear = c(5L, 6L,
7L, 2L, 3L, 4L, 6L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 5L, 6L, 7L,
8L, 9L, 5L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 1L, 2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 4L, 5L, 6L, 1L, 2L, 3L, 3L, 4L, 5L, 6L, 7L, 8L,
2L, 3L, 5L, 5L, 6L, 7L, 8L, 9L, 11L, 13L, 2L, 3L, 4L, 6L, 7L,
8L, 9L, 4L, 5L, 6L, 7L), Weekly_Gross_Pay_Main_Job = c(0, 0.58,
0.35, 0.035, 0.65, 0.195, 0.43, 0, 0, 0, 0, 0, 0, 0.12, 1.653,
0.967, 1.742, 1.323, 0, 0.709, 0.155, 0.431, 0.235, 0.17, 0.285,
0.357, 0.28, 0.335, 0.375, 0.111, 0.333, 0.582, 0.882, 0.85,
0.944, 1.615, 1.615, 1.35, 0.168, 0.08, 0, 0, 0, 0, 0, 0, 0,
0.134, 0.737, 0, 0.02, 0.372, 0.1, 0.014, 0.307, 0.39, 0.671,
0.5, 0.278, 0.32, 0.425, 0.4, 0.57, 0.917, 0.75, 0.402, 0.437,
0.211, 0.537, 0.54, 0.135, 0.15, 0.65, 0.324, 0.399, 0.497, 0.67,
0.825, 0.825, 0.25, 0.319, 0.35, 0.885, 0.941, 0.975, 0.975,
1.02, 1.096, 1.148, 0.1, 0.11, 0.413, 0.477, 0.578, 0.686, 0.686,
0.511, 0.578, 0.8, 0.75), Weekly_Gross_Pay_Main_Jobgmc = c(-0.31,
0.27, 0.04, -0.2925, 0.3225, -0.1325, 0.1025, 0, 0, 0, 0, 0,
0, 0, 0.516, -0.17, 0.605, 0.186, -1.137, 0, -0.136444444444444,
0.139555555555556, -0.0564444444444445, -0.121444444444444, -0.00644444444444447,
0.0655555555555555, -0.0114444444444444, 0.0435555555555556,
0.0835555555555555, -0.809222222222222, -0.587222222222222, -0.338222222222222,
-0.0382222222222223, -0.0702222222222223, 0.0237777777777777,
0.694777777777778, 0.694777777777778, 0.429777777777778, 0.044,
-0.044, -0.0871, -0.0871, -0.0871, -0.0871, -0.0871, -0.0871,
-0.0871, 0.0469, 0.6499, -0.0871, -0.27675, 0.07525, -0.19675,
-0.28275, 0.01025, 0.09325, 0.37425, 0.20325, -0.07775, -0.03575,
0.06925, 0.04425, -0.0452, 0.3018, 0.1348, -0.2132, -0.1782,
-0.218333333333333, 0.107666666666667, 0.110666666666667, -0.176666666666667,
-0.161666666666667, 0.338333333333333, -0.266, -0.191, -0.093,
0.0800000000000001, 0.235, 0.235, -0.0563333333333333, 0.0126666666666667,
0.0436666666666666, -0.120714285714286, -0.0647142857142858,
-0.0307142857142858, -0.0307142857142858, 0.0142857142857142,
0.0902857142857143, 0.142285714285714, -0.335714285714286, -0.325714285714286,
-0.0227142857142857, 0.0412857142857143, 0.142285714285714, 0.250285714285714,
0.250285714285714, -0.1368, -0.0698000000000001, 0.1522, 0.1022
)), row.names = c(NA, 100L), class = "data.frame")

not sure if I'm reading you right, this might be a very naive answer missing the point, but doesn't "residuals" just work.
Here's a linear mixed effects model with some data i had lying around
some.model<-lme(DV~IV, random=~1|Id, data=df)
head(residuals(some.model))
7 7 24 24 32 32
-0.054135825 -0.054135825 0.064271638 0.064271638 -0.001975424 -0.001975424
If you really want to put it into a column with the idnumber next to it it takes a few more steps. It probably can be done in a single step but i'm really bad.
extra.column<-residuals(some.model)
extra.column.id<-names(residuals(some.model))
extra.column<-residuals(some.model)
cbind(extra.column,extra.column.id)
extra.column extra.column.id
7 "-0.0541358252373243" "7"
7 "-0.0541358252373243" "7"
24 "0.0642716380035857" "24"
24 "0.0642716380035857" "24"
32 "-0.0019754241828096" "32"
32 "-0.0019754241828096" "32"
Sorry if this is not what you're looking for, but check out the residuals command.

Here is how I ended up doing it:
#Before you begin, time needs to be grand-mean centered.
df$nYearmc <- df$nYear-mean(df$nYear, na.rm=TRUE)
#Now to regress the time-varying covariate onto grand-mean centered time and complete the process.
#First, create a group called `by_person`.
df <- tidyr::unite(df, Person_Year, c(Person_ID, nYearmc), remove=FALSE)
by_Person <- dplyr::group_by(df, Person_ID)
#Second, regress the time-varying covariate onto the newly created grand-mean centered time variable and merge with the main data frame.
df.Weekly_Gross_Pay_Main_Job <- dplyr::do(by_Person, augment(lm(Weekly_Gross_Pay_Main_Job~nYearmc, data=.)))
df.Weekly_Gross_Pay_Main_Job <- tidyr::unite(df.Weekly_Gross_Pay_Main_Job, Person_Year, c(Person_ID, nYearmc), remove=FALSE)
df <- merge(df, df.Weekly_Gross_Pay_Main_Job, by="Person_Year")
#Third, copy over the required columns (renaming them would be more efficient, but either way).
df$RegResGrossPay <- df$.resid
#Fourth, do an optional tidy up.
colnames(df)[colnames(df)=="Person_ID.x"] <- "Person_ID"
colnames(df)[colnames(df)=="nYearmc.x"] <- "nYearmc"
colnames(df)[colnames(df)=="Weekly_Gross_Pay_Main_Job.x"] <- "Weekly_Gross_Pay_Main_Job"
df$Person_ID.y <- NULL
df$nYearmc.y <- NULL
df$Weekly_Gross_Pay_Main_Job.y <- NULL
df$.fitted <- NULL
df$.se.fit <- NULL
df$.resid <- NULL
df$.hat <- NULL
df$.sigma <- NULL
df$.cooksd <- NULL
df$.std.resid <- NULL
df.Weekly_Gross_Pay_Main_Job <- NULL
#Fifth, generate plots of the variables you need.
ggplot(df, aes(nYearmc, RegResGrossPay))+geom_line(aes(group=Person_ID), alpha =1/3)+geom_smooth(method="lm",se=FALSE)

how to use ggplot conditional on data

I asked this question and it seams ggplot2 currently has a bug with empty data.frames.
Therefore I am trying to check if the dataframe is empty, before I make the plot. But what ever I come up with, it gets really ugly, and doesn't work. So I am asking for your help.
example data:
SOdata <- structure(list(id = 10:55, one = c(7L, 8L, 7L, NA, 7L, 8L, 5L,
7L, 7L, 8L, NA, 10L, 8L, NA, NA, NA, NA, 6L, 5L, 6L, 8L, 4L,
7L, 6L, 9L, 7L, 5L, 6L, 7L, 6L, 5L, 8L, 8L, 7L, 7L, 6L, 6L, 8L,
6L, 8L, 8L, 7L, 7L, 5L, 5L, 8L), two = c(7L, NA, 8L, NA, 10L,
10L, 8L, 9L, 4L, 10L, NA, 10L, 9L, NA, NA, NA, NA, 7L, 8L, 9L,
10L, 9L, 8L, 8L, 8L, 8L, 8L, 9L, 10L, 8L, 8L, 8L, 10L, 9L, 10L,
8L, 9L, 10L, 8L, 8L, 7L, 10L, 8L, 9L, 7L, 9L), three = c(7L,
10L, 7L, NA, 10L, 10L, NA, 10L, NA, NA, NA, NA, 10L, NA, NA,
4L, NA, 7L, 7L, 4L, 10L, 10L, 7L, 4L, 7L, NA, 10L, 4L, 7L, 7L,
7L, 10L, 10L, 7L, 10L, 4L, 10L, 10L, 10L, 4L, 10L, 10L, 10L,
10L, 7L, 10L), four = c(7L, 10L, 4L, NA, 10L, 7L, NA, 7L, NA,
NA, NA, NA, 10L, NA, NA, 4L, NA, 10L, 10L, 7L, 10L, 10L, 7L,
7L, 7L, NA, 10L, 7L, 4L, 10L, 4L, 7L, 10L, 2L, 10L, 4L, 12L,
4L, 7L, 10L, 10L, 12L, 12L, 4L, 7L, 10L), five = c(7L, NA, 6L,
NA, 8L, 8L, 7L, NA, 9L, NA, NA, NA, 9L, NA, NA, NA, NA, 7L, 8L,
NA, NA, 7L, 7L, 4L, NA, NA, NA, NA, 5L, 6L, 5L, 7L, 7L, 6L, 9L,
NA, 10L, 7L, 8L, 5L, 7L, 10L, 7L, 4L, 5L, 10L), six = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("2010-05-25",
"2010-05-27", "2010-06-07"), class = "factor"), seven = c(0.777777777777778,
0.833333333333333, 0.333333333333333, 0.888888888888889, 0.5,
0.888888888888889, 0.777777777777778, 0.722222222222222, 0.277777777777778,
0.611111111111111, 0.722222222222222, 1, 0.888888888888889, 0.722222222222222,
0.555555555555556, NA, 0, 0.666666666666667, 0.666666666666667,
0.833333333333333, 0.833333333333333, 0.833333333333333, 0.833333333333333,
0.722222222222222, 0.833333333333333, 0.888888888888889, 0.666666666666667,
1, 0.777777777777778, 0.722222222222222, 0.5, 0.833333333333333,
0.722222222222222, 0.388888888888889, 0.722222222222222, 1, 0.611111111111111,
0.777777777777778, 0.722222222222222, 0.944444444444444, 0.555555555555556,
0.666666666666667, 0.722222222222222, 0.444444444444444, 0.333333333333333,
0.777777777777778), eight = c(0.666666666666667, 0.333333333333333,
0.833333333333333, 0.666666666666667, 1, 1, 0.833333333333333,
0.166666666666667, 0.833333333333333, 0.833333333333333, 1, 1,
0.666666666666667, 0.666666666666667, 0.333333333333333, 0.5,
0, 0.666666666666667, 0.5, 1, 0.666666666666667, 0.5, 0.666666666666667,
0.666666666666667, 0.666666666666667, 0.333333333333333, 0.333333333333333,
1, 0.666666666666667, 0.833333333333333, 0.666666666666667, 0.666666666666667,
0.5, 0, 0.833333333333333, 1, 0.666666666666667, 0.5, 0.666666666666667,
0.666666666666667, 0.5, 1, 0.833333333333333, 0.666666666666667,
0.833333333333333, 0.666666666666667), nine = c(0.307692307692308,
NA, 0.461538461538462, 0.538461538461538, 1, 0.769230769230769,
0.538461538461538, 0.692307692307692, 0, 0.153846153846154, 0.769230769230769,
NA, 0.461538461538462, NA, NA, NA, NA, 0, 0.615384615384615,
0.615384615384615, 0.769230769230769, 0.384615384615385, 0.846153846153846,
0.923076923076923, 0.615384615384615, 0.692307692307692, 0.0769230769230769,
0.846153846153846, 0.384615384615385, 0.384615384615385, 0.461538461538462,
0.384615384615385, 0.461538461538462, NA, 0.923076923076923,
0.692307692307692, 0.615384615384615, 0.615384615384615, 0.769230769230769,
0.0769230769230769, 0.230769230769231, 0.692307692307692, 0.769230769230769,
0.230769230769231, 0.769230769230769, 0.615384615384615), ten = c(0.875,
0.625, 0.375, 0.75, 0.75, 0.75, 0.625, 0.875, 1, 0.125, 1, NA,
0.625, 0.75, 0.75, 0.375, NA, 0.625, 0.5, 0.75, 0.875, 0.625,
0.875, 0.75, 0.625, 0.875, 0.5, 0.75, 0, 0.5, 0.875, 1, 0.75,
0.125, 0.5, 0.5, 0.5, 0.625, 0.375, 0.625, 0.625, 0.75, 0.875,
0.375, 0, 0.875), elleven = c(1, 0.8, 0.7, 0.9, 0, 1, 0.9, 0.5,
0, 0.8, 0.8, NA, 0.8, NA, NA, 0.8, NA, 0.4, 0.8, 0.5, 1, 0.4,
0.5, 0.9, 0.8, 1, 0.8, 0.5, 0.3, 0.9, 0.2, 1, 0.8, 0.1, 1, 0.8,
0.5, 0.2, 0.7, 0.8, 1, 0.9, 0.6, 0.8, 0.2, 1), twelve = c(0.666666666666667,
NA, 0.133333333333333, 1, 1, 0.8, 0.4, 0.733333333333333, NA,
0.933333333333333, NA, NA, 0.6, 0.533333333333333, NA, 0.533333333333333,
NA, 0, 0.6, 0.533333333333333, 0.733333333333333, 0.6, 0.733333333333333,
0.666666666666667, 0.533333333333333, 0.733333333333333, 0.466666666666667,
0.733333333333333, 1, 0.733333333333333, 0.666666666666667, 0.533333333333333,
NA, 0.533333333333333, 0.6, 0.866666666666667, 0.466666666666667,
0.533333333333333, 0.333333333333333, 0.6, 0.6, 0.866666666666667,
0.666666666666667, 0.6, 0.6, 0.533333333333333)), .Names = c("id",
"one", "two", "three", "four", "five", "six", "seven", "eight",
"nine", "ten", "elleven", "twelve"), class = "data.frame", row.names = c(NA,
-46L))
And the plot
iqr <- function(x, ...) {
qs <- quantile(as.numeric(x), c(0.25, 0.5, 0.75), na.rm = T)
names(qs) <- c("ymin", "y", "ymax")
qs
}
magic <- function(y, ...) {
high <- median(SOdata[[y]], na.rm=T)+1.5*sd(SOdata[[y]],na.rm=T)
low <- median(SOdata[[y]], na.rm=T)-1.5*sd(SOdata[[y]],na.rm=T)
ggplot(SOdata, aes_string(x="six", y=y))+
stat_summary(fun.data="iqr", geom="crossbar", fill="grey", alpha=0.3)+
geom_point(data = SOdata[SOdata[[y]] > high,], position=position_jitter(w=0.1, h=0),col="green", alpha=0.5)+
geom_point(data = SOdata[SOdata[[y]] < low,], position=position_jitter(w=0.1, h=0),col="red", alpha=0.5)+
stat_summary(fun.y=median, geom="point",shape=18 ,size=4, col="orange")
}
for (i in names(SOdata)[-c(1,7)]) {
p<- magic(i)
ggsave(paste("magig_plot_",i,".png",sep=""), plot=p, height=3.5, width=5.5)
}
The problem is that sometimes in the call to geom_point the subset returns an empty dataframe, which sometimes (!) causes ggplot2 to plot all the data instead of none of the data.
geom_point(data = SOdata[SOdata[[y]] > high,], position=position_jitter(w=0.1, h=0),col="green", alpha=0.5)+
This is kindda of important to me, and I am really stuck trying to find a solution. Any help that will get me started is much appreciated.
Thanks in advance.

I guess you could replace this
magic <- function(y, ...) {
high <- median(SOdata[[y]], na.rm=T)+1.5*sd(SOdata[[y]],na.rm=T)
low <- median(SOdata[[y]], na.rm=T)-1.5*sd(SOdata[[y]],na.rm=T)
ggplot(SOdata, aes_string(x="six", y=y))+
stat_summary(fun.data="iqr", geom="crossbar", fill="grey", alpha=0.3)+
geom_point(data = SOdata[SOdata[[y]] > high,], position=position_jitter(w=0.1, h=0),col="green", alpha=0.5)+
geom_point(data = SOdata[SOdata[[y]] < low,], position=position_jitter(w=0.1, h=0),col="red", alpha=0.5)+
stat_summary(fun.y=median, geom="point",shape=18 ,size=4, col="orange")
}
with something like
magic <- function(y, ...) {
high <- median(SOdata[[y]], na.rm=T)+1.5*sd(SOdata[[y]],na.rm=T)
low <- median(SOdata[[y]], na.rm=T)-1.5*sd(SOdata[[y]],na.rm=T)
k <- SOdata[[y]] > high
z <- SOdata[[y]] < low
k[is.na(k)]<- FALSE
z[is.na(z)]<- FALSE
p <- ggplot(SOdata, aes_string(x="six", y=y))+
stat_summary(fun.data="iqr", geom="crossbar", fill="grey", alpha=0.3)
if (sum(k) > 0) {p <- p + geom_point(data = SOdata[k,], position=position_jitter(w=0.1, h=0),col="green", alpha=0.5)}
if (sum(z) > 0) {p <- p + geom_point(data = SOdata[z,], position=position_jitter(w=0.1, h=0),col="red", alpha=0.5)}
p + stat_summary(fun.y=median, geom="point",shape=18 ,size=4, col="orange")
}

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

R fast matrix formulation - r

Related

Setting legend with Plotly

Setting bars and legend properly

shade a facet.grid of kernel density plots with ggplot2

How to calculate regression residuals in R for each individual in a longitudinal analysis?

how to use ggplot conditional on data

Categories

Resources