Normalize and control for missing data in an extreme dataset - r

I have the following dataset
structure(list(q1 = c(5, 40, 200, 100, 100, 3, 200, 10, 10, 50,
50, 20, 600, 20, 15, 20, 80, 50, 0, 0, 45, 40, 20, 100, 20, 100,
3, 30, 10, 3, 20, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 100, 0, 5, 5,
5, 2, 0, 0, 0, 0, 0, 100, 0, 0, 0, 10, 5, 0, 50), q2 = c(5, 40,
200, 80, 100, 2, 100, 11, 10, 5, 50, 60, 600, 10, 10, 30, 50,
0, 0, 0, 45, 30, 10, 20, 20, 20, 5, 30, 30, 3, 20, 0, 20, 0,
0, 0, 20, 0, 5, 2, 60, 0, 40, 10, 5, 0, 0, 0, 0, 5, 0, 0, 0,
0, 0, 0, 10, 20, 0, 0), q3 = c(2, 70, 400, 160, 350, 100, 500,
20, 100, 500, 300, 20, 1000, 20, 20, 200, 80, 100, 70, 50, 0,
20, 40, 0, 0, 200, 5, 0, 100, 3, 50, 60, 0, 0, 0, 20, 100, 30,
40, 50, 50, 1000, 60, 0, 10, 160, 20, 40, 40, 200, 20, 20, 15,
150, 10, 15, 10, 100, 0, 10), q4 = c(50, 30, 300, 160, 300, 100,
500, 20, 100, 25, 200, 30, 600, 20, 0, 0, 50, 20, 200, 50, 50,
20, 30, 0, 0, 50, 3, 20, 60, 3, 0, 60, 0, 0, 0, 15, 100, 30,
30, 20, 100, 1000, 30, 10, 10, 50, 3, 20, 0, 100, 15, 20, 1510,
0, 10, 20, 0, 50, 0, 0), q5 = c(20, 50, 200, 40, 100, 100, 100,
15, 20, 50, 50, 50, 1000, 20, 15, 30, 50, 30, 15, 15, 25, 20,
20, 20, 20, 150, 3, 50, 30, 10, 30, 30, 50, 20, 20, 15, 20, 30,
8, 20, 100, 500, 30, 10, 30, 20, 3, 20, 20, 15, 30, 0, 45, 20,
0, 15, 30, 40, 20, 15), q6 = c(0, 70, 100, 160, 100, 100, 50,
15, 10, 25, 1000, 50, 1000, 20, 0, 0, 80, 0, 0, 0, 35, 30, 10,
20, 20, 100, 3, 10, 60, 10, 0, 100, 30, 50, 100, 15, 30, 30,
17, 5, 30, 1000, 80, 20, 30, 80, 40, 80, 20, 20, 40, 30, 30,
0, 0, 20, 10, 40, 20, 50), q7 = c(5, 50, 200, 100, 100, 5, 20,
10, 0, 300, 50, 20, 300, 20, 0, 200, 80, 10, 15, 0, 30, 20, 40,
20, 20, 100, 3, 15, 50, 15, 80, 20, 0, 30, 0, 15, 20, 30, 10,
20, 30, 100, 70, 20, 3, 20, 30, 40, 30, 10, 15, 0, 30, 30, 0,
5, 50, 30, 0, 30), q8 = c(0, 30, 50, 100, 20, 5, 5, 8, 10, 5,
30, 20, 100, 20, 0, 0, 50, 20, 0, 0, 35, 20, 20, 0, 30, 20, 5,
6, 30, 15, 10, 10, 30, 0, 0, 0, 20, 30, 6, 5, 50, 100, 10, 10,
5, 35, 20, 80, 20, 20, 15, 0, 15, 0, 0, 5, 10, 40, 0, 15), q9 = c(20,
40, 0, 180, 0, 0, 0, 1, 20, 500, 100, 20, 1000, 0, 20, 0, 80,
50, 0, 15, 45, 20, 20, 0, 20, 200, 3, 80, 50, 15, 30, 30, 30,
0, 20, 0, 50, 0, 45, 200, 0, 0, 5, 20, 10, 180, 50, 90, 20, 50,
20, 0, 15, 0, 0, 30, 50, 40, 0, 30), q10 = c(10, 70, 0, 200,
0, 0, 10, 1, 15, 15, 100, 20, 1000, 0, 0, 0, 80, 30, 0, 10, 30,
30, 10, 0, 15, 20, 5, 30, 40, 15, 10, 30, 100, 0, 0, 5, 50, 30,
20, 15, 30, 0, 5, 10, 10, 90, 25, 90, 15, 25, 20, 0, 15, 0, 0,
35, 10, 20, 0, 15), q11 = c(20, 60, 200, 120, 100, 9, 100, 15,
25, 150, 100, 30, 100, 20, 15, 50, 80, 50, 20, 15, 30, 20, 30,
20, 15, 150, 10, 20, 50, 10, 35, 20, 50, 20, 0, 20, 0, 30, 35,
20, 80, 100, 60, 20, 50, 20, 60, 20, 50, 25, 35, 0, 30, 0, 0,
30, 30, 40, 20, 20), q12 = c(20, 50, 200, 120, 100, 3, 50, 12,
10, 15, 50, 30, 100, 20, 0, 30, 60, 0, 0, 5, 25, 30, 10, 20,
10, 1000, 5, 0, 60, 10, 20, 0, 5, 25, 0, 15, 0, 30, 31, 2, 35,
1000, 10, 10, 15, 20, 25, 80, 50, 20, 35, 0, 20, 0, 0, 10, 20,
30, 0, 15), q13 = c(200, 80, 0, 200, 25, 200, 10, 20, 50, 15,
1000, 70, 1000, 50, 0, 0, 80, 40, 30, 0, 100, 30, 20, 20, 40,
100, 5, 50, 100, 20, 0, 30, 30, 0, 50, 10, 30, 30, 45, 10, 120,
1000, 50, 202, 100, 200, 15, 120, 25, 20, 35, 0, 45, 0, 50, 50,
50, 30, 0, 30), q14 = c(0, 50, 200, 200, 0, 5, 100, 5, 20, 300,
300, 40, 1000, 10020, 20, 0, 80, 30, 0, 15, 50, 50, 20, 0, 40,
300, 3, 20, 100, 5, 0, 50, 100, 0, 0, 0, 30, 100, 20, 100, 40,
100, 5, 10, 10, 10, 50, 120, 0, 50, 15, 50, 50, 0, 50, 15, 100,
40, 0, 50), q15 = c(50, 40, 50, 150, 100, 30, 0, 8, 25, 100,
100, 100, 0, 100, 0, 0, 50, 10, 0, 50, 150, 1000, 10, 0, 120,
0, 5, 100, 20, 10, 10, 0, 100, 0, 0, 5, 100, 30, 45, 200, 100,
200, 20, 5, 0, 0, 50, 100, 50, 100, 10, 0, 0, 0, 50, 30, 100,
50, 0, 50), q16 = c(50, 50, 200, 100, 200, 15, 200, 15, 50, 500,
150, 50, 1000, 20, 0, 100, 100, 30, 0, 50, 60, 30, 50, 100, 100,
100, 10, 100, 100, 15, 200, 50, 30, 0, 0, 15, 30, 30, 5, 50,
15, 1000, 5, 20, 100, 0, 80, 20, 0, 300, 20, 0, 100, 0, 0, 20,
100, 100, 0, 200), q17 = c(0, 30, 100, 140, 100, 5, 100, 15,
15, 15, 100, 60, 1000, 50, 0, 0, 50, 0, 0, 0, 60, 20, 10, 0,
40, 100, 5, 30, 60, 15, 10, 30, 0, 0, 20, 15, 20, 30, 10, 10,
50, 1000, 30, 10, 20, 30, 0, 80, 0, 50, 15, 0, 30, 0, 0, 15,
10, 60, 0, 50), q18 = c(0, 60, 0, 80, 20, 5, 0, 5, 25, 500, 250,
70, 800, 0, 20, 100, 100, 100, 50, 50, 70, 30, 50, 0, 50, 300,
5, 100, 50, 15, 20, 50, 30, 0, 0, 0, 50, 0, 90, 100, 50, 100,
0, 10, 1000, 0, 20, 80, 5, 100, 20, 0, 0, 0, 0, 30, 0, 100, 0,
0), q19 = c(0, 30, 0, 80, 0, 5, 0, 15, 25, 15, 100, 60, 800,
50, 0, 0, 80, 0, 0, 0, 45, 20, 10, 0, 20, 500, 5, 30, 60, 15,
50, 50, 0, 0, 50, 0, 20, 0, 20, 15, 0, 0, 0, 10, 75, 100, 10,
80, 5, 30, 20, 0, 15, 0, 0, 20, 0, 50, 10, 0), q20 = c(100, 60,
200, 150, 200, 30, 200, 100, 50, 1500, 100, 40, 400, 5020, 35,
150, 80, 100, 100, 50, 70, 30, 40, 100, 50, 200, 20, 0, 50, 10,
100, 30, 0, 60, 30, 50, 20, 30, 63, 40, 100, 100, 0, 20, 50,
200, 50, 50, 30, 50, 30, 0, 45, 35, 30, 45, 50, 50, 30, 40),
q21 = c(100, 30, 200, 150, 100, 40, 100, 10, 20, 15, 100,
30, 400, 20, 10, 0, 60, 0, 0, 0, 10, 20, 10, 20, 15, 20,
5, 30, 50, 10, 10, 20, 0, 0, 0, 15, 20, 30, 15, 10, 30, 100,
0, 10, 15, 0, 30, 120, 10, 10, 35, 0, 2525, 35, 50, 40, 10,
30, 20, 15), q22 = c(100, 70, 100, 150, 100, 5, 100, 5, 25,
250, 100, 50, 1000, 20, 15, 70, 80, 100, 10, 20, 30, 30,
20, 50, 50, 200, 10, 40, 40, 15, 100, 20, 50, 60, 20, 15,
30, 30, 10, 30, 100, 100, 25, 20, 10, 100, 80, 50, 25, 20,
35, 0, 30, 20, 0, 20, 50, 50, 0, 50), q23 = c(10, 40, 100,
150, 100, 3, 10, 10, 20, 4, 100, 60, 700, 20, 0, 0, 60, 0,
0, 0, 20, 20, 10, 20, 40, 20, 5, 2, 60, 15, 10, 20, 5, 0,
20, 0, 30, 30, 10, 2, 1010, 0, 10, 1010, 10, 10, 5, 80, 3,
20, 20, 0, 25, 0, 0, 20, 10, 30, 0, 15)), row.names = c(NA,
-60L), class = "data.frame")
edit*: 0 is not missing data as it is values in $
when looking at it graphically it looks far from ideal
boxplot(as.matrix(example))
plot(density(as.matrix(example)))
I would like to normalize this data by a transformation and control for outliers so I have 2 questions:
QUESTION 1
how would you deal with outliers in this dataset. I don't want to lose data so I would like to replace them, however which method to use is unclear to me. On this matter, is there any package that would help me automate this? I also wanna look at the rationale of the method used
QUESTION 2
Having controlled for outliers I want to transform the variables into normality. For this I have two packages I tend to use:
library(rcompanion)
a<- transformTukey(as.matrix(example))
and
library(LambertW)
b<-Gaussianize(example, type = "h")
however I am not too sure mathematically how they work and how to asess if they are doing a good job, which is better or if there is another more practical solution.

It's not completely clear what you're trying to do with the data, but I would pretty much start off with the simple things and go from there. For example, what's the hist() look like and all the normal distribution (sure you can find that online somewhere better) checks. I think the one that I always go for in outliers is the simple lm() which will have the graphs for the outliers and where the 'cutoff' would be if you went through the graphs. Normally, the data type would also give you a little insight as to normalization methods, but in general log norm usually is a good default choice

Related

fill delaunay triangles with colors of vertex points in R

here is a reprex
data<- structure(list(lanmark_id = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67), V1 = c(0.00291280916742007,
0.00738863171211713, 0.0226678081211574, 0.0475105228945172,
0.0932285720818941, 0.167467706279089, 0.257162845610094, 0.365202733889021,
0.49347857580521, 0.623654594804239, 0.738846221030799, 0.838001377618909,
0.911583795022151, 0.954620025430512, 0.976736039833402, 0.99275439380643,
1.00100526672829, 0.0751484964183746, 0.136267471453466, 0.223219796351563,
0.312829176190895, 0.396253287447153, 0.589077347394549, 0.682150866526948,
0.771279538477539, 0.856242644022999, 0.915433541338973, 0.493665602840245,
0.491283285973581, 0.488913167946858, 0.486968906096063, 0.384707082576335,
0.43516446651127, 0.48730704698643, 0.541730425616146, 0.590794609520034,
0.176234316360877, 0.230353437655898, 0.295908510434122, 0.350673723300921,
0.2927721757992, 0.228392965512228, 0.634474821310078, 0.692554938010577,
0.757884656518485, 0.809961553290539, 0.760324208523177, 0.696892501347341,
0.299062528225204, 0.371899560139738, 0.440183530232855, 0.488448817156316,
0.542120710507391, 0.613931454931259, 0.683122622479693, 0.614367295821043,
0.544516611213321, 0.487065702940653, 0.43466839036949, 0.367662837035504,
0.329392110306872, 0.439192556373207, 0.488617118648197, 0.543288506065858,
0.652131615571443, 0.541622182786469, 0.486664920417254, 0.437126878794749
), V2 = c(0.201088019764115, 0.335422141956174, 0.468591127485112,
0.597955245417373, 0.719502795031081, 0.826191980419368, 0.912263437847338,
0.978932088608654, 0.996572250349122, 0.975164350943783, 0.906204543800476,
0.817791059656974, 0.711167374856116, 0.587462637963028, 0.457981280500493,
0.327526817895531, 0.19652402489511, 0.0832018969548692, 0.0247526745448235,
0.00543973063471442, 0.0169853862992864, 0.0463565705952832,
0.0442986445765913, 0.0151651597693172, 0.00747493463745755,
0.0263496825405166, 0.0805712600069456, 0.160307477500307, 0.24640401358039,
0.332244740019727, 0.420995916418539, 0.486383354389177, 0.505514985155285,
0.521022030162301, 0.5059272511442, 0.48818970795347, 0.184054088286897,
0.153658218058329, 0.153359749238857, 0.186997311695192, 0.20294291755153,
0.204166125257439, 0.186997311695192, 0.153386090373069, 0.155932705636629,
0.184603717976376, 0.203900583330345, 0.202836636618411, 0.670663080116174,
0.635972857244521, 0.619932598923225, 0.632625553953685, 0.620132318139554,
0.637530241507316, 0.668109937001625, 0.718821664744205, 0.73956412947459,
0.744898219300658, 0.74046882628352, 0.720755964662638, 0.672731384920681,
0.666152981987244, 0.670464844757437, 0.664772611108765, 0.671145517468628,
0.673968618595099, 0.67986363963374, 0.675352028351748), coef2 = c(0,
0, 0, 0, 0, 0, 0, 0, 0.565178003460693, 0, 0, 0, 0, 0, 0, 0,
0, 0.0433232019717308, 0.0433232019717308, 0.442833876807268,
0.574211955093656, 0.574211955093656, 0.574211955093656, 0.574211955093656,
0.442833876807268, 0.0433232019717308, 0.0433232019717308, 0.0612451242746323,
0.0612451242746323, 0, 0, 0, 0, 0, 0, 0, 0.343056259557492, 0.701076795777046,
0.674029769391816, 0, 0.538117834886036, 0.990039002564078, 0.451921167678043,
0.701076795777046, 0.701076795777046, 0.316009233172263, 0.990039002564078,
0.990039002564078, 0.878350036859346, 0.343364662128988, 0.282119537854356,
0.282119537854356, 0.282119537854356, 0.343364662128988, 0.384793696241895,
0.608382647917744, 0.608382647917744, 1, 0.608382647917744, 0.608382647917744,
0.384793696241895, 0.501936678206125, 0.501936678206125, 0, 0.878350036859346,
0, 0.501936678206125, 0.501936678206125)), row.names = c(NA,
-68L), class = c("tbl_df", "tbl", "data.frame"))
I used this data to create a deulanay plot in R
library(tidyverse)
library(ggforce)
data%>%
mutate(coef2 = coef2/max(coef2))%>%
ggplot(aes(V1, V2))+
geom_delaunay_tile(aes(colour = coef2, fill = coef2), alpha = .5)+
geom_delaunay_segment2(aes(colour = coef2, fill = coef2))+
geom_point(aes(colour = coef2))+
ylim(1,0)+
scale_color_viridis_c(option = "magma")+
scale_fill_viridis_c(option = "magma")+
theme_minimal()
which gives this
I want to fill all triangles with a blend of colors that match the color of each point, just as the lines are colored.
as you can see I have tried using fill = coef2 within de geom_delaunay but this doesn't really achieve what I want.
is there a way to do this in R.
Many thanks!

How to make a profile plot (principal component analysis) in R?

I'm currently running principal component analysis. For the interpretation I want to create a profile (pattern) plot to visualize the correlation between each principal component and the original variables. Is anyone familiar with a package or code to create this in R? I'm using the prcomp() function in R.
See examples:
https://canadianaudiologist.ca/predicting-speech-perception-from-the-audiogram-and-vice-versa/
https://blogs.sas.com/content/iml/2019/11/04/interpret-graphs-principal-components.html
This is similar data to my db:
db <- structure(list(T025 = c(20, 60, 20, 10, 85, 5, 15, 10, 10, 25,
15, 5, 15, 30, 15, 15, 10, 25, 45, 25, 55, 20, 65, 20, 10, 10,
15, 15, 30, 35, 10, 50, 20, 15, 30, 15, 20, 35, 30, 20, 10, 20,
30, 15, 40, 15, 10, 10, 20, 25, -5, 10, 40, 0, 15, 5, 15, 30,
15, 80, 15, 35, 10, 50, 25, 10, 15, 20, 20, 20, 25, 20, 30, 10,
20, 50, 25, 25, 55, 30, 20, 30, 15, 10, 15, 15, 35, 20, 30, 15,
40, 20, 25, 15, 20, 35, 15, 25, 20, 40, 0, 20, 10, 10, 15, 10,
20, 10, 35, 35, 25, 30, 20, 25, 15, 30, 35, 25, 30, 5, 20, 30,
15, 25, 10), T05 = c(0, 25, 0, 5, 25, 5, 0, 0, 5, 5, 5, -5, 5,
15, 15, 5, 0, 15, 25, 15, 50, 20, 45, 5, 5, 5, 0, 10, 10, 10,
5, 20, 15, 10, 20, 10, -5, 10, 30, -5, 0, 10, 35, 5, 40, 0, 0,
-5, 15, 25, 0, 5, 35, -5, 5, 0, 5, 5, 10, 70, 0, 20, 5, 30, 10,
10, 5, 5, 25, 10, 20, 5, 25, 5, 10, 35, 15, 10, 45, 15, 15, 25,
10, 5, 10, 5, 20, 15, 15, 5, 10, 10, 20, 5, 15, 25, 5, 20, 10,
35, -10, 5, 0, -5, 0, 5, 15, 5, 15, 35, 20, 25, 10, 15, 15, 25,
45, 0, 25, 0, 5, 25, 0, 20, 5), T1 = c(25, 20, 25, 20, 50, 10,
15, 20, 25, 25, 25, 25, 15, 45, 25, 25, 20, 35, 40, 35, 65, 45,
45, 30, 25, 20, 5, 20, 30, 25, 20, 35, 25, 25, 35, 15, 15, 25,
45, 20, 25, 35, 40, 25, 60, 15, 15, 15, 25, 45, 20, 20, 60, 15,
20, 25, 45, 45, 25, 75, 10, 45, 15, 50, 20, 25, 20, 15, 40, 30,
50, 20, 40, 20, 35, 50, 35, 15, 50, 30, 20, 45, 25, 25, 20, 45,
30, 35, 30, 30, 15, 15, 30, 25, 25, 25, 15, 40, 25, 55, 20, 30,
10, 15, 50, 15, 40, 20, 20, 55, 35, 45, 20, 50, 35, 20, 65, 10,
35, 15, 30, 55, 25, 15, 25), T2 = c(20, 20, 15, 25, 70, 10, 15,
45, 50, 30, 20, 25, 10, 40, 20, 40, 30, 40, 25, 30, 45, 25, 50,
20, 20, 20, 10, 10, 45, 10, 5, 40, 20, 15, 50, 25, 15, 20, 25,
30, 20, 30, 35, 15, 65, 20, 25, 10, 10, 60, 25, 20, 70, 5, 15,
15, 15, 25, 15, 60, 25, 55, 5, 50, 30, 35, 5, 10, 30, 10, 55,
25, 40, 35, 40, 45, 25, 20, 35, 40, 5, 40, 10, 25, 10, 40, 30,
20, 25, 25, 10, 25, 30, 45, 20, 25, 10, 55, 40, 60, 5, 10, 10,
5, 20, 0, 40, 20, 35, 80, 25, 40, 15, 55, 25, 15, 65, 5, 25,
5, 35, 45, 10, 5, 10), T4 = c(10, 25, 35, 35, 70, 20, 15, 70,
55, 30, 50, 35, 40, 40, 35, 45, 60, 50, 15, 25, 70, 10, 60, 40,
30, 15, 15, 15, 50, 5, 20, 70, 5, 35, 65, 40, 20, 65, 50, 30,
45, 55, 65, 35, 45, 35, 40, 20, 5, 65, 20, 25, 75, 10, 25, 25,
10, 25, 20, 55, 20, 65, 5, 60, 70, 45, 15, 25, 35, 5, 70, 55,
65, 40, 35, 55, 35, 45, 45, 45, 20, 40, 25, 50, 15, 55, 55, 40,
30, 60, 10, 60, 40, 35, 30, 65, 5, 75, 55, 80, 15, 30, 55, 15,
50, 25, 45, 30, 45, 90, 20, 45, 20, 40, 35, 20, 70, 20, 30, 45,
50, 55, 45, 5, 45), T8 = c(5, 55, 55, 40, 75, 40, 5, 70, 25,
10, 50, 55, 5, 35, 10, 30, 40, 55, 20, 20, 65, -5, 55, 50, -10,
45, 5, 50, 65, 20, 0, 75, 15, 30, 50, 50, 30, 70, 45, 25, 35,
40, 85, 30, 60, 50, 55, 15, 10, 75, 60, 20, 90, 0, 20, 55, -10,
20, 10, 45, 20, 65, 0, 70, 85, 0, -5, 30, 35, 5, 80, 45, 60,
25, 35, 55, 30, 45, 65, 45, -5, 35, 35, 40, 50, 55, 50, 70, 45,
40, 0, 55, 45, 30, 0, 56, 0, 45, 50, 70, 15, 20, 45, -10, 45,
55, 45, 20, 50, 85, 5, 50, 10, 20, 25, 0, 70, 0, 25, 5, 45, 35,
40, -5, 25)), row.names = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20", "21", "22", "23", "24", "25", "26",
"177", "191", "200", "205", "208", "212", "231", "236", "240",
"246", "250", "259", "263", "264", "275", "276", "282", "293",
"303", "304", "307", "309", "315", "316", "320", "322", "324",
"327", "333", "338", "343", "356", "365", "377", "379", "393",
"395", "399", "405", "411", "426", "428", "439", "448", "451",
"459", "490", "495", "498", "513", "515", "521", "524", "528",
"532", "550", "552", "559", "566", "570", "577", "583", "587",
"595", "624", "638", "641", "645", "647", "650", "660", "668",
"677", "683", "688", "691", "702", "704", "710", "719", "730",
"732", "748", "752", "758", "766", "772", "780", "782", "790",
"810", "828", "830", "836", "853", "862", "880", "889", "896"
), class = "data.frame")
db.pca <- prcomp(db, center= TRUE, scale.=TRUE)
summary(db.pca)
str(db.pca)
ggbiplot(db.pca)
screeplot(db.pca, type="line")
Here is a way with package FactoMineR to get the correlations. The plot is a base R plot.
library(FactoMineR)
res.pca <- PCA(iris[-5], graph = FALSE)
cos2 <- res.pca$var$cos2
old_par <- par(xpd = TRUE)
matplot(
cos2,
type = "l",
xlab = "variable",
ylab = "correlation",
main = "Component Pattern Profiles",
xaxt = "n"
)
axis(1, at = 1:nrow(cos2), labels = rownames(cos2))
legend(
x = "bottom",
inset = c(0, -0.2),
legend = colnames(cos2),
col = 1:ncol(cos2),
lty = 1:ncol(cos2),
bty = "n",
horiz = TRUE
)
par(old_par)
using your data I did this:
comp = prcomp(db, center=T, scale.=T)
b =matrix(ncol = 3)[-1,]
for(i in 1:ncol(comp$x)){
for(j in colnames(db)){
b = rbind(b, c(i,j,cor.test(comp$x[,i], db[,j])$estimate))
}
}
b= as.data.frame(b)
b$cor= as.numeric(b$cor)
ggplot(b,aes(x=V2,y=cor, group = V1, col= V1))+
geom_line()+
theme_classic()
And I obtained this :
did it help?

Discrepancy between gggmisc and broom packages in LM estimates

I'm trying to extract slope values from a number of linear regression models. I plotting acetone emission against water content on different days.
I have these graphs and models
I have tried to extract the slope values using this code:
Library(broom)
Library(tidyverse)
lm_table <- df %>%
nest_by(days) %>%
summarise(mdl = list(lm(water_content ~ acetone, data)), .groups = "drop") %>%
mutate(adjrsquared = map_dbl(mdl, ~summary(.)$adj.r.squared ),
mdl = map(mdl, broom::tidy)) %>%
unnest(mdl)%>%
filter(term=="acetone")
and also this code:
lm_table2 <- df %>%
nest_by(days) %>%
mutate(model = list(lm(water_content ~ acetone, data)),
coefficients2 = list(tidy(model)))
coefficients2 = lm_table2 %>%
unnest(coefficients2)
Both codes however give different slope values than what I get from the graphs. Any ideas as to why that is?
Here's the data
df <- structure(list(i.x45.03 = c(22, 17, 11, 1782, 1767, 250, 3568,
79, 219, 855, 12009, 395, 1552, 705, 2282, 84, 3396, 252, 2058,
1480, 5, 745, 2573, 1005, 946, 3320, 5406, 2192, 20, 1207, 9519,
66, 463, 250, 1095, 16556, 88, 2695, 275, 16, 1577, 29, 3221,
25, 6295, 2, 63, 123, 8, 1, 37, 5308, 4546, 994, 4567, 421, 0,
1938, 19480, 1027, 3474, 1982, 2819, 69, 27733, 2152, 15429,
996, 8, 3435, 8748, 17062, 269, 26188, 35823, 2572, 67, 761,
13493, 1, 1, 1, 16, 9, 29, 89, 20, 11, 21644, 3, 37, 13, 0, 0,
0, 0, 3, 30, 19, 0, 0, 242, 7246, 1, 20081, 77, 0, 0, 0, 5878,
0, 0, 22, 2, 4, 1, 93, 12, 2, 73, 0, 19, 0, 0, 2, 48, 3, 0, 0,
0, 0, 22, 4, 0, 0, 0, 0, 0, 0, 1, 87, 0, 0, 3, 0, 0, 4, 1, 0,
82, 7, 0, 0, 0, 7, 22, 34, 17, 0, 0, 0, 0, 0, 2, 19, 3, 0, 990,
0, 0, 0, 0, 84, 9, 0, 5, 1246, 1944, 633, 23640, 262, 5399, 83,
19, 4417, 125, 7801, 69, 6755, 6, 39, 262), i.water_content_percent_es = c(98,
39, 85, 14, 21, 28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8,
17, 10, 75, 52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19,
32, 40, 79, 22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 98, 39,
85, 14, 21, 28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8, 17,
10, 75, 52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 39, 85, 14, 21,
28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 98, 23, 8, 17, 10, 75,
52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19, 32, 40, 79,
22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 98, 39, 85, 14, 21,
28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8, 17, 10, 75, 52,
13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19, 32, 40, 79, 22,
49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 31, 35, 19, 32, 40, 79,
22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91), daysincubated4 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4), days = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), water_content = c(98,
39, 85, 14, 21, 28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8,
17, 10, 75, 52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19,
32, 40, 79, 22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 98, 39,
85, 14, 21, 28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8, 17,
10, 75, 52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 39, 85, 14, 21,
28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 98, 23, 8, 17, 10, 75,
52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19, 32, 40, 79,
22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 98, 39, 85, 14, 21,
28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8, 17, 10, 75, 52,
13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19, 32, 40, 79, 22,
49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 31, 35, 19, 32, 40, 79,
22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91), acetone = c(22,
17, 11, 1782, 1767, 250, 3568, 79, 219, 855, 12009, 395, 1552,
705, 2282, 84, 3396, 252, 2058, 1480, 5, 745, 2573, 1005, 946,
3320, 5406, 2192, 20, 1207, 9519, 66, 463, 250, 1095, 16556,
88, 2695, 275, 16, 1577, 29, 3221, 25, 6295, 2, 63, 123, 8, 1,
37, 5308, 4546, 994, 4567, 421, 0, 1938, 19480, 1027, 3474, 1982,
2819, 69, 27733, 2152, 15429, 996, 8, 3435, 8748, 17062, 269,
26188, 35823, 2572, 67, 761, 13493, 1, 1, 1, 16, 9, 29, 89, 20,
11, 21644, 3, 37, 13, 0, 0, 0, 0, 3, 30, 19, 0, 0, 242, 7246,
1, 20081, 77, 0, 0, 0, 5878, 0, 0, 22, 2, 4, 1, 93, 12, 2, 73,
0, 19, 0, 0, 2, 48, 3, 0, 0, 0, 0, 22, 4, 0, 0, 0, 0, 0, 0, 1,
87, 0, 0, 3, 0, 0, 4, 1, 0, 82, 7, 0, 0, 0, 7, 22, 34, 17, 0,
0, 0, 0, 0, 2, 19, 3, 0, 990, 0, 0, 0, 0, 84, 9, 0, 5, 1246,
1944, 633, 23640, 262, 5399, 83, 19, 4417, 125, 7801, 69, 6755,
6, 39, 262)), row.names = c(NA, -192L), class = "data.frame")
and the code for the graph I've made is:
library(ggpmisc)
library(tidyverse)
formula <- y~x
ggplot(df, aes(water_content, acetone)) +
geom_point() +
geom_smooth(method = "lm",formula = y~x) +
theme_bw()+
facet_wrap(~days, scales = "free")+
stat_poly_eq(
aes(label = paste(stat(adj.rr.label), stat(eq.label), stat(p.value.label), sep = "*\", \"*")),
formula = formula, parse = TRUE, size=3)
Any ideas why I don't get the same slope values?
All help is much appreciated!
You swapped x and y. Possibly because of using complex 'tidyverse' coding this was not obvious.
library(nlme)
lmList(acetone ~ water_content | days, data = df)
gives
Call:
Model: acetone ~ water_content | days
Data: df
Coefficients:
(Intercept) water_content
0 3314.26811 -31.663431
4 12046.87296 -154.277916
24 3103.13075 -44.368527
116 63.82385 -0.792739
Degrees of freedom: 192 total; 184 residual
Residual standard error: 4538.636

How do I use column index as x axis in R

I have a data frame with 7 columns and 100 observations
I divided observations into two groups
the question I'm working on is: b) Construct two time plots of the mean blood lead levels superimposed on the blood lead levels at each occasion for succimer and placebo groups.
This is my code so far:
library(tidyverse)
library(haven)
library(dplyr)
library(plyr)
library(foreign)
library(ggplot2)
tlc = read_dta(file = 'tlc.dta')
head(tlc)
## a)
placebo = subset(tlc, tlc$trt==0)
succimer = subset(tlc, tlc$trt==1)
summary(placebo[, 3:6])
summary(succimer[, 3:6])
placebo_mean=colMeans(placebo[ ,3:6])
placebo_std=apply(placebo[ ,3:6],2,sd)
placebo_var=placebo_std^2
succimer_mean=colMeans(succimer[ ,3:6])
succimer_std=apply(succimer[ ,3:6],2,sd)
succimer_var=succimer_std^2
## b)
## c)
placebo_cor=cor(placebo[ , 3:6]) %>% round(digits = 3)
succimer_cor=cor(succimer[ , 3:6]) %>% round(digits = 3)
placebo_cov=cov(placebo[ , 3:6]) %>% round(digits = 3)
succimer_cov=cov(succimer[ , 3:6]) %>% round(digits = 3)
So the purpose is to plot all observation by using values as y axis, and columns y0, y1, y4, y6 (represent to week 0, week 1, week 4, week 6) as x axis, then plot the mean of each group superimposed on the plot. I'm planning to use different colors to distinguish two groups, so the final plot will have a lot of points on each x coordinate, and two short lines to indicate means for each group at each x coordinate.
My question is how to use column index as x axis in R? with or with out using ggplot. I know this question may be too elementary, but it caused a lot of trouble for me as a beginner.
below is my data:
dput(tlc)
structure(list(id = structure(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 100), format.stata = "%9.0g"),
trt = structure(c(0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1,
0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1,
1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1,
1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1), format.stata = "%9.0g", class = "haven_labelled", labels = c(Placebo = 0,
Succimer = 1)), y0 = structure(c(30.7999992370605, 26.5,
25.7999992370605, 24.7000007629395, 20.3999996185303, 20.3999996185303,
28.6000003814697, 33.7000007629395, 19.7000007629395, 31.1000003814697,
19.7999992370605, 24.7999992370605, 21.3999996185303, 27.8999996185303,
21.1000003814697, 20.6000003814697, 24, 37.5999984741211,
35.2999992370605, 28.6000003814697, 31.8999996185303, 29.6000003814697,
21.5, 26.2000007629395, 21.7999992370605, 23, 22.2000007629395,
20.5, 25, 33.2999992370605, 26, 19.7000007629395, 27.8999996185303,
24.7000007629395, 28.7999992370605, 29.6000003814697, 32,
21.7999992370605, 24.3999996185303, 33.7000007629395, 24.8999996185303,
19.7999992370605, 26.7000007629395, 26.7999992370605, 20.2000007629395,
35.4000015258789, 25.2999992370605, 20.2000007629395, 24.5,
20.2999992370605, 20.3999996185303, 24.1000003814697, 27.1000003814697,
34.7000007629395, 28.5, 26.6000003814697, 24.5, 20.5, 25.2000007629395,
34.7000007629395, 30.2999992370605, 26.6000003814697, 20.7000007629395,
27.7000007629395, 24.2999992370605, 36.5999984741211, 28.8999996185303,
34, 32.5999984741211, 29.2000007629395, 26.3999996185303,
21.7999992370605, 27.2000007629395, 22.3999996185303, 32.5,
24.8999996185303, 24.6000003814697, 23.1000003814697, 21.1000003814697,
25.7999992370605, 30, 22.1000003814697, 20, 38.0999984741211,
28.8999996185303, 25.1000003814697, 19.7999992370605, 22.1000003814697,
23.5, 29.1000003814697, 30.2999992370605, 25.3999996185303,
30.6000003814697, 22.3999996185303, 31.2000007629395, 31.3999996185303,
41.0999984741211, 29.3999996185303, 21.8999996185303, 20.7000007629395
), format.stata = "%9.0g"), y1 = structure(c(26.8999996185303,
14.8000001907349, 23, 24.5, 2.79999995231628, 5.40000009536743,
20.7999992370605, 31.6000003814697, 14.8999996185303, 31.2000007629395,
17.5, 23.1000003814697, 26.2999992370605, 6.30000019073486,
20.2999992370605, 23.8999996185303, 16.7000007629395, 33.7000007629395,
25.5, 15.8000001907349, 27.8999996185303, 15.8000001907349,
6.5, 26.7999992370605, 12, 4.19999980926514, 11.5, 21.1000003814697,
3.90000009536743, 26.2000007629395, 21.3999996185303, 13.1999998092651,
21.6000003814697, 21.2000007629395, 26.3999996185303, 17.5,
30.2000007629395, 19.2999992370605, 16.3999996185303, 14.8999996185303,
20.8999996185303, 18.8999996185303, 6.40000009536743, 20.3999996185303,
10.6000003814697, 30.3999996185303, 23.8999996185303, 17.5,
10, 21, 17.2000007629395, 20.1000003814697, 14.8999996185303,
39, 32.5999984741211, 22.3999996185303, 5.09999990463257,
17.5, 25.1000003814697, 39.5, 29.3999996185303, 25.2999992370605,
19.2999992370605, 4, 24.2999992370605, 23.2999992370605,
28.8999996185303, 10.6999998092651, 19, 9.19999980926514,
15.3000001907349, 10.6000003814697, 28.5, 22, 25.1000003814697,
23.6000003814697, 25, 20.8999996185303, 5.59999990463257,
21.8999996185303, 27.6000003814697, 21, 22.7000007629395,
40.7999992370605, 12.5, 28.1000003814697, 11.6000003814697,
21.1000003814697, 7.90000009536743, 16.7999992370605, 3.5,
24.2999992370605, 28.2000007629395, 7.09999990463257, 10.8000001907349,
3.90000009536743, 15.1000003814697, 22.1000003814697, 7.59999990463257,
8.10000038146973), format.stata = "%9.0g"), y4 = structure(c(25.7999992370605,
19.5, 19.1000003814697, 22, 3.20000004768372, 4.5, 19.2000007629395,
28.5, 15.3000001907349, 29.2000007629395, 20.5, 24.6000003814697,
19.5, 18.5, 18.3999996185303, 19, 21.7000007629395, 34.4000015258789,
26.2999992370605, 22.8999996185303, 27.2999992370605, 23.7000007629395,
7.09999990463257, 25.2999992370605, 16.7999992370605, 4,
9.5, 17.3999996185303, 12.8000001907349, 34, 21, 14.6000003814697,
23.6000003814697, 22.8999996185303, 23.7999992370605, 21,
30.2000007629395, 16.3999996185303, 11.6000003814697, 14.5,
22.2000007629395, 18.8999996185303, 5.09999990463257, 19.2999992370605,
9, 26.5, 22.2000007629395, 17.3999996185303, 15.6000003814697,
16.7000007629395, 15.8999996185303, 17.8999996185303, 18.1000003814697,
28.7999992370605, 27.5, 21.7999992370605, 8.19999980926514,
19.6000003814697, 23.3999996185303, 38.5999984741211, 33.0999984741211,
25.1000003814697, 21.8999996185303, 4.19999980926514, 18.3999996185303,
40.4000015258789, 32.7999992370605, 12.6000003814697, 16.2999992370605,
8.30000019073486, 24.6000003814697, 14.3999996185303, 35,
19.1000003814697, 27.7999992370605, 21.2000007629395, 21.7000007629395,
21.7000007629395, 7.30000019073486, 23.6000003814697, 24,
8.60000038146973, 21.2000007629395, 38, 16.7000007629395,
27.5, 13, 21.5, 12.3999996185303, 15.1000003814697, 3, 22.7000007629395,
27, 17.2000007629395, 19.7999992370605, 7, 10.8999996185303,
25.2999992370605, 10.8000001907349, 25.7000007629395), format.stata = "%9.0g"),
y6 = structure(c(23.7999992370605, 21, 23.2000007629395,
22.5, 9.39999961853027, 11.8999996185303, 18.3999996185303,
25.1000003814697, 14.6999998092651, 30.1000003814697, 27.5,
30.8999996185303, 19, 16.2999992370605, 20.7999992370605,
17, 20.2999992370605, 31.3999996185303, 30.2999992370605,
25.8999996185303, 34.2000007629395, 23.3999996185303, 16,
24.7999992370605, 19.2000007629395, 16.2000007629395, 14.5,
21.1000003814697, 12.6999998092651, 28.2000007629395, 22.3999996185303,
11.6000003814697, 27.7000007629395, 21.8999996185303, 22,
24.2000007629395, 27.5, 17.6000003814697, 16.6000003814697,
63.9000015258789, 19.7999992370605, 15.5, 15.1000003814697,
23.7999992370605, 16, 28.1000003814697, 27.2000007629395,
18.6000003814697, 15.1999998092651, 13.5, 17.7000007629395,
18.7000007629395, 21.2999992370605, 34.7000007629395, 22.7999992370605,
21, 23.6000003814697, 18.3999996185303, 22.2000007629395,
43.2999992370605, 28.3999996185303, 27.8999996185303, 21.7999992370605,
11.6999998092651, 27.7999992370605, 39.2999992370605, 31.7999992370605,
21.2000007629395, 18.6000003814697, 18.3999996185303, 32.4000015258789,
18.7000007629395, 30.5, 18.7000007629395, 27.2999992370605,
21.1000003814697, 23.8999996185303, 19.8999996185303, 12.3000001907349,
24.7999992370605, 23.7000007629395, 24.6000003814697, 20.5,
32.7000007629395, 22.2000007629395, 24.7999992370605, 23.1000003814697,
20.6000003814697, 18.8999996185303, 18.7999992370605, 11.5,
20.1000003814697, 25.5, 18.7000007629395, 22.2000007629395,
17.7999992370605, 27.1000003814697, 4.09999990463257, 13,
12.3000001907349), format.stata = "%9.0g")), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
also I have tried this:
p=ggplot(tlc, aes(x=colnames(tlc[,3:6],do.NULL=TRUE)),
y=value)
p=p+geom_point()
No errors found when running the code, but R did report an error (Aesthetics must be either length 1 or the same as the data (100): x) when I call 'p' to plot it.
I don't have your data, but it sounds like you want something that looks like this:
Here is how I made it:
library(tidyverse)
# Setting up some fake data: 100 observations and 7 variables
set.seed(123)
some_data <- data.frame(y0 = rnorm(100),
y1 = runif(100),
y2 = rexp(100, 2),
y3 = rnorm(100, 2, 1),
y4 = rexp(100),
y5 = rnorm(100, 2,2),
y6 = runif(100, -5, 5))
# pivoting the data to longer format:
long_data <- some_data %>%
pivot_longer(cols = everything(),
names_to = "variable")
# building the base plot
p <- ggplot(long_data, aes(x = variable, y = value))
# adding the points - use position_jitter to give it some width if you want
p <- p + geom_point(position = position_jitter(width = 0.2))
# adding the bars at mean - play around with width, color, and size
p <- p + stat_summary(geom = "errorbar",
fun = mean,
width = 0.4,
aes(ymax = ..y.., ymin = ..y..),
color = "orange",
size = 1.5)
p # show plot

Independent alpha for each plot within facet_grid based on density

I am constructing a facet_grid using stat_hexbin however I would like the alpha value to be independent for each of the facet plots.
I am currently using the following code:
ggplot (data, aes (x, y , fill = z)) +
stat_binhex(bins=20, aes(alpha = ..count..)) +
facet_grid(. ~ z) +
guides(alpha = F) +
coord_equal() +
theme_bw()
which produces the following plot:
However, the alpha value, which is defined by ..count.. doesn't work when applied outside of the aes within stat_binhex.
I would like to show that there is some clustering in the 90 grouping on the right, around the (100,0) region, but the hexes are very pale, since there is such heavy clustering around (0,0) in the 10 grouping (leftmost plot) which skews the alpha.
Main question: How can I make the alpha independent for each facet, but still connected to count/density to better show the clustering in '70' and '90' groups?
Many thanks!
Data:
# rounded x and y, from 2 days of 365
structure(list(x = c(-24, 41, 43, 14, 9, 30, 8, -14, -45, 42,
65, 39, 43, 49, 39, 61, -53, -16, 29, 27, 9, 6, -61, 20, 5, -30,
-10, 75, 94, 28, 70, 44, -11, 26, 29, 33, 26, -35, 20, 40, 7,
4, 14, 4, -41, -7, -21, 95, 20, 50, 63, 31, 47, 19, 20, 19, 23,
-25, 29, -8, -73, 13, -82, 4, -29, 3, 9, 3, 35, 45, 64, -14,
-4, 34, 13, 12, 20, 13, 15, -17, 12, 19, -55, -49, 95, -19, 45,
94, 23, 29, 22, -91, -39, -35, -3, 63, 2, 5, 30, 62, 1, 4, -61,
-6, -2, 5, -26, -23, 5, 6, 8, 45, 104, -7, 8, 44, -43, -8, 9,
12, 29, 30, 69, 90, 12, -28, -10, -9, 49, 60, 32, 43, -11, 12,
28, 91, 11, 13, 43, 61, 11, 12, 28, 31, 47, 12, 13, 30, 46, 66,
98, 11, 12, 29, 31, 44, 64, -11, 14, 48, 62, 96, 10, 11, 12,
29, 67, 30, 93, -10, -9, 44, 101, -28, 34, 46, 10, 27, 30, 61,
8, 24, -7, -2, 52, 65, 5, -43, 41, 45, 91, -24, -23, 37, 73,
97, -61, 63, 57, 52, -37, -35, 19, 24, 110, -91, -5, -17, 95,
13, 85, -52, -50, 78, 30, 37, -8, -27, 19, -78, -75, 52, 42,
-11, -37, 27, 62, 78, -16, -56, 41), y = c(-100, -95, -95, -92,
-88, -86, -84, -82, -81, -78, -73, -72, -71, -70, -69, -68, -67,
-67, -64, -63, -62, -59, -58, -57, -56, -54, -54, -54, -54, -52,
-52, -49, -48, -48, -48, -47, -46, -45, -45, -45, -44, -42, -41,
-40, -39, -39, -38, -38, -37, -36, -36, -35, -35, -34, -34, -33,
-33, -32, -32, -31, -30, -30, -29, -29, -28, -27, -27, -26, -26,
-26, -26, -25, -25, -25, -24, -23, -23, -22, -22, -21, -21, -21,
-20, -20, -19, -18, -18, -18, -17, -17, -16, -14, -14, -14, -13,
-13, -12, -12, -12, -12, -11, -11, -10, -10, -10, -10, -9, -9,
-9, -9, -9, -9, -9, -8, -8, -8, -7, -7, -7, -7, -6, -6, -6, -6,
-5, -4, -4, -4, -4, -4, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1,
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
3, 4, 4, 4, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 8, 9, 9, 9, 9, 10,
10, 11, 11, 11, 11, 12, 13, 14, 14, 14, 15, 15, 15, 16, 16, 18,
19, 20, 21, 23, 23, 24, 24, 24, 26, 27, 28, 28, 29, 30, 32, 32,
32, 36, 36, 41, 42, 44, 48, 48, 50, 51, 57, 60, 62, 76, 76, 85,
89, 93), z = c(90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90,
90, 90, 90, 90, 90, 70, 70, 70, 70, 70, 90, 70, 70, 70, 70, 90,
90, 70, 90, 70, 50, 70, 70, 70, 70, 70, 50, 70, 50, 50, 50, 50,
70, 50, 50, 90, 50, 70, 70, 50, 70, 50, 50, 50, 50, 50, 50, 30,
90, 30, 90, 30, 50, 30, 30, 30, 50, 50, 70, 30, 30, 50, 30, 30,
30, 30, 30, 30, 30, 30, 70, 70, 90, 30, 50, 90, 30, 30, 30, 90,
50, 50, 10, 70, 10, 10, 30, 70, 10, 10, 70, 10, 10, 10, 30, 30,
10, 10, 10, 50, 90, 10, 10, 50, 50, 10, 10, 10, 30, 30, 70, 90,
10, 30, 10, 10, 50, 70, 30, 50, 10, 10, 30, 90, 10, 10, 50, 70,
10, 10, 30, 30, 50, 10, 10, 30, 50, 70, 90, 10, 10, 30, 30, 50,
70, 10, 10, 50, 70, 90, 10, 10, 10, 30, 70, 30, 90, 10, 10, 50,
90, 30, 30, 50, 10, 30, 30, 70, 10, 30, 10, 10, 50, 70, 10, 50,
50, 50, 90, 30, 30, 50, 70, 90, 70, 70, 70, 70, 50, 50, 30, 30,
90, 90, 30, 30, 90, 30, 90, 70, 70, 90, 50, 50, 50, 50, 50, 90,
90, 70, 70, 70, 70, 70, 90, 90, 90, 90, 90)), .Names = c("x",
"y", "z"), row.names = c(NA, -231L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x0000000000330788>)

Resources