Plotting a multiple linear regression in R using scatter3D() (package plot3D)

Plotting a multiple linear regression in R using scatter3D() (package plot3D) - r

I have the following data in a csv file.
y,x1,x2,x3,x4,x5,x6,x7,x8,x9
10,2113,1985,38.9,64.7,4,868,59.7,2205,1917
11,2003,2855,38.8,61.3,3,615,55,2096,1575
11,2957,1737,40.1,60,14,914,65.6,1847,2175
13,2285,2905,41.6,45.3,-4,957,61.4,1903,2476
10,2971,1666,39.2,53.8,15,836,66.1,1457,1866
11,2309,2927,39.7,74.1,8,786,61,1848,2339
10,2528,2341,38.1,65.4,12,754,66.1,1564,2092
11,2147,2737,37,78.3,-1,761,58,1821,1909
4,1689,1414,42.1,47.6,-3,714,57,2577,2001
2,2566,1838,42.3,54.2,-1,797,58.9,2476,2254
7,2363,1480,37.3,48,19,984,67.5,1984,2217
example = data.frame(x1,x2,x3,x4,y)
How can I graph the variables x1, x2, x3 using scatter3D(x,y,z)?
I have tried:
library("plot3D")
with(example,scatter3D(y ~ x1 + x2 + x3))
But I get error:
Error in min(x,na.rm) : invalid 'type' (list) of argument

Looks like you want to plot a regression plane. The scatter3d function in package car will do that. You need to install car and rgl. First let's make your data more accessible:
dput(example)
structure(list(y = c(10L, 11L, 11L, 13L, 10L, 11L, 10L, 11L,
4L, 2L, 7L), x1 = c(2113L, 2003L, 2957L, 2285L, 2971L, 2309L,
2528L, 2147L, 1689L, 2566L, 2363L), x2 = c(1985L, 2855L, 1737L,
2905L, 1666L, 2927L, 2341L, 2737L, 1414L, 1838L, 1480L), x3 = c(38.9,
38.8, 40.1, 41.6, 39.2, 39.7, 38.1, 37, 42.1, 42.3, 37.3), x4 = c(64.7,
61.3, 60, 45.3, 53.8, 74.1, 65.4, 78.3, 47.6, 54.2, 48), x5 = c(4L,
3L, 14L, -4L, 15L, 8L, 12L, -1L, -3L, -1L, 19L), x6 = c(868L,
615L, 914L, 957L, 836L, 786L, 754L, 761L, 714L, 797L, 984L),
x7 = c(59.7, 55, 65.6, 61.4, 66.1, 61, 66.1, 58, 57, 58.9,
67.5), x8 = c(2205L, 2096L, 1847L, 1903L, 1457L, 1848L, 1564L,
1821L, 2577L, 2476L, 1984L), x9 = c(1917L, 1575L, 2175L,
2476L, 1866L, 2339L, 2092L, 1909L, 2001L, 2254L, 2217L)),
class = "data.frame", row.names = c(NA, -11L))
install.packages("car")
install.packages("rgl")
library(car)
library(rgl)
scatter3d(y~x1+x2, example)
The plot window will be small. Use the mouse to drag the lower right corner to make it bigger. You can drag within the plot to rotate it.

Related

Extracting values for specific lat long from netcdf

I'm trying to read into R a netCDF file. The netcdf chirps-v2.0.1981.days_p05.nc is downloaded from here:
ftp://ftp.chg.ucsb.edu/pub/org/chg/products/CHIRPS-2.0/global_daily/netcdf/p05/
This netCDF file describes daily rainfall globally as a function of longitude, latitude and has size of
1.1 GB
I also have a set of lon lat
dat <- structure(list(locatioID = paste0('ID', 1:16), lon = c(73.73, 86, 73.45, 86.41, 85.36, 81.95, 82.57, 75.66, 82.03,
81.73, 85.66, 85.31, 81.03, 81.70, 87.03, 73.38),
lat = c(24.59, 20.08, 22.61, 23.33, 23.99, 19.09, 18.85, 15.25, 26.78,
16.63, 25.98, 23.28, 24.5, 21.23, 25.08, 21.11)),
row.names = c(1L, 3L, 5L, 8L, 11L, 14L, 17L, 18L, 19L, 21L,
23L, 26L, 29L, 32L, 33L, 35L), class = "data.frame")
library(ncdf4)
library(raster)
temp <- nc_open("chirps-v2.0.1981.days_p05.nc")
precip = list()
precip$x = ncvar_get(temp, "longitude")
precip$y = ncvar_get(temp, "latitude")
precip$z = ncvar_get(temp, "precip", start=c(1, 1, 1), count=c(-1, -1, 1))
precip.r = raster(precip)
plot(precip.r)
I have two questions:
Can anyone explain to me what does start and count argument does? ?ncvar_get does not give me an intuitive feeling. If I want to create a raster of Julian day 252,
which argument do I need to change?
How do I extract the daily rainfall values for all the 365 days for every lat lon in datsuch that I have a matrix/dataframe of 16 * 365 days

You can use the following code for data extraction from .nc files
dat <- structure(list(locatioID = paste0('ID', 1:16), lon = c(73.73, 86, 73.45, 86.41, 85.36, 81.95, 82.57, 75.66, 82.03,
81.73, 85.66, 85.31, 81.03, 81.70, 87.03, 73.38),
lat = c(24.59, 20.08, 22.61, 23.33, 23.99, 19.09, 18.85, 15.25, 26.78,
16.63, 25.98, 23.28, 24.5, 21.23, 25.08, 21.11)),
row.names = c(1L, 3L, 5L, 8L, 11L, 14L, 17L, 18L, 19L, 21L,
23L, 26L, 29L, 32L, 33L, 35L), class = "data.frame")
temp <- brick("chirps-v2.0.1981.days_p05.nc")
xy <- dat[,2:3] #Column 1 is longitude and column 2 is latitude
xy
spts <- SpatialPoints(xy, proj4string=CRS("+proj=longlat +datum=WGS84"))
#Extract data by spatial point
temp2 <- extract(temp, spts)
temp3 <- t(temp2) #transpose raster object
colnames(temp3) <- dat[,1] #It would be better if you have the location names corresponding to the points
head(temp3)
write.csv(temp3, "Rainfall.csv")

plot graphs in R while showing all the x axis values

I'm trying to plot a graph with my data.
My code for that is
plot(birthRate$country_code, birthRate$yr2014, main = "Birth Rate by Countries 2014")
My out put is like this:
But I want to show all values in x axis.
dput(birthRate):
structure(list(series_code = structure(c(21L, 21L, 21L, 21L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L), .Label = c("NY.GNP.PCAP.CD",
"SE.PRM.ENRR", "SE.SEC.ENRR", "SE.TER.ENRR", "SE.TER.ENRR.FE",
"SH.ALC.PCAP.LI", "SH.DTH.COMM.ZS", "SH.DTH.INJR.ZS", "SH.DTH.NCOM.ZS",
"SH.IMM.IBCG", "SH.STA.MMRT.NE", "SH.STA.TRAF.P5", "SH.XPD.PCAP",
"SH.XPD.PRIV.ZS", "SH.XPD.PUBL.ZS", "SH.XPD.TOTL.ZS", "SL.UEM.TOTL.FE.ZS",
"SL.UEM.TOTL.MA.ZS", "SL.UEM.TOTL.ZS", "SP.ADO.TFRT", "SP.DYN.CBRT.IN",
"SP.DYN.CDRT.IN", "SP.DYN.LE00.FE.IN", "SP.DYN.LE00.IN",
"SP.DYN.LE00.MA.IN",
"SP.DYN.TFRT.IN"), class = "factor"), country_name = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 14L, 15L, 17L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 28L, 29L), .Label = c("Australia",
"Brunei Darussalam", "Cambodia", "China", "Fiji", "Indonesia",
"Japan", "Kiribati", "Korea, Dem. People’s Rep.", "Korea, Rep.",
"Lao PDR", "Malaysia", "Marshall Islands", "Micronesia, Fed. Sts.",
"Mongolia", "Nauru", "New Zealand", "Palau", "Papua New Guinea",
"Philippines", "Samoa", "Singapore", "Solomon Islands", "Thailand",
"Timor-Leste", "Tonga", "Tuvalu", "Vanuatu", "Vietnam"), class = "factor"),
country_code = structure(c(1L, 2L, 8L, 3L, 4L, 6L, 7L, 9L,
20L, 10L, 11L, 14L, 5L, 13L, 16L, 19L, 17L, 29L, 21L, 22L,
23L, 24L, 25L, 28L, 27L), .Label = c("AUS", "BRN", "CHN",
"FJI", "FSM", "IDN", "JPN", "KHM", "KIR", "KOR", "LAO", "MHL",
"MNG", "MYS", "NRU", "NZL", "PHL", "PLW", "PNG", "PRK", "SGP",
"SLB", "THA", "TLS", "TON", "TUV", "VNM", "VUT", "WSM"), class = "factor"),
yr2001 = c(12.7, 20.913, 27.327, 13.38, 24.41, 21.486, 9.3,
30.228, 17.414, 11.6, 30.999, 21.445, 29.21, 19.035, 14.36,
34.396, 29.301, 30.269, 11.8, 35.403, 14.025, 41.441, 28.365,
31.84, 17.13), yr2002 = c(12.8, 20.137, 26.793, 12.86, 24.103,
21.49, 9.3, 29.965, 16.92, 10.2, 30.287, 20.39, 28.453, 19.001,
13.67, 33.95, 28.892, 29.991, 11.4, 35.226, 13.653, 40.428,
28.468, 31.219, 16.921), yr2003 = c(12.6, 19.522, 26.44,
12.41, 23.804, 21.5, 9.2, 29.775, 16.431, 10.2, 29.753, 19.435,
27.669, 19.209, 13.94, 33.49, 28.404, 29.778, 10.5, 35.061,
13.32, 39.726, 28.565, 30.597, 16.839), yr2004 = c(12.3,
19.065, 26.24, 12.29, 23.508, 21.499, 8.6936, 29.647, 15.961,
9.8, 29.38, 18.62, 26.886, 19.627, 14.2, 33.03, 27.845, 29.624,
10.3, 34.889, 13.025, 39.368, 28.624, 29.993, 16.848), yr2005 = c(12.8,
18.738, 26.145, 12.4, 23.208, 21.476, 8.4133, 29.572, 15.532,
8.9, 29.134, 17.971, 26.139, 20.223, 13.96, 32.575, 27.238,
29.499, 10.2, 34.68, 12.764, 39.326, 28.611, 29.427, 16.919
), yr2006 = c(12.9, 18.499, 26.098, 12.09, 22.901, 21.429,
8.65, 29.537, 15.166, 9.2, 28.966, 17.498, 25.461, 20.959,
14.14, 32.121, 26.619, 29.355, 10.3, 34.409, 12.533, 39.509,
28.499, 28.92, 17.03), yr2007 = c(14.1, 18.292, 26.043, 12.1,
22.586, 21.364, 8.63, 29.528, 14.87, 10, 28.821, 17.171,
24.872, 21.769, 15.15, 31.659, 26.025, 29.148, 10, 34.063,
12.323, 39.752, 28.288, 28.475, 17.163), yr2008 = c(14, 18.07,
25.937, 12.14, 22.263, 21.283, 8.7, 29.526, 14.648, 9.4,
28.651, 16.954, 24.385, 22.576, 15.1, 31.186, 25.489, 28.845,
10.2, 33.637, 12.123, 39.92, 27.982, 28.091, 17.298), yr2009 = c(13.9,
17.809, 25.755, 12.13, 21.929, 21.177, 8.5, 29.513, 14.498,
9, 28.429, 16.828, 24.011, 23.311, 14.53, 30.706, 25.023,
28.442, 9.9, 33.132, 11.927, 39.95, 27.588, 27.766, 17.409
), yr2010 = c(13.7, 17.499, 25.491, 11.9, 21.583, 21.034,
8.5, 29.468, 14.411, 9.4, 28.142, 16.773, 23.751, 23.892,
14.68, 30.229, 24.634, 27.944, 9.3, 32.555, 11.725, 39.8,
27.112, 27.486, 17.473), yr2011 = c(13.6, 17.146, 25.164,
11.93, 21.221, 20.841, 8.3, 29.377, 14.374, 9.4, 27.8, 16.765,
23.598, 24.252, 14, 29.764, 24.315, 27.372, 9.5, 31.918,
11.51, 39.461, 26.57, 27.236, 17.477), yr2012 = c(13.7, 16.774,
24.812, 12.1, 20.846, 20.595, 8.2, 29.235, 14.363, 9.6, 27.43,
16.783, 23.528, 24.378, 13.87, 29.318, 24.041, 26.768, 10.1,
31.25, 11.281, 38.985, 25.992, 26.993, 17.424), yr2013 = c(13.3,
16.405, 24.462, 12.08, 20.463, 20.297, 8.2, 29.044, 14.358,
8.6, 27.051, 16.805, 23.511, 24.275, 13.2, 28.899, 23.79,
26.172, 9.3, 30.578, 11.041, 38.419, 25.409, 26.739, 17.318
), yr2014 = c(12.9, 16.043, 24.119, 12.4, 20.075, 19.955,
8, 28.8, 14.349, 8.6, 26.666, 16.811, 23.531, 23.949, 12.68,
28.51, 23.552, 25.608, 9.8, 29.921, 10.79, 37.783, 24.846,
26.466, 17.157)), .Names = c("series_code", "country_name",
"country_code", "yr2001", "yr2002", "yr2003", "yr2004", "yr2005",
"yr2006", "yr2007", "yr2008", "yr2009", "yr2010", "yr2011", "yr2012",
"yr2013", "yr2014"), row.names = c(30L, 31L, 32L, 33L, 34L, 35L,
36L, 37L, 38L, 39L, 40L, 41L, 43L, 44L, 46L, 48L, 49L, 50L, 51L,
52L, 53L, 54L, 55L, 57L, 58L), class = "data.frame", na.action =
structure(c(13L, 16L, 18L, 27L), .Names = c("42", "45", "47", "56"), class = "omit"))

You could try plotting the x-axis labels horizontally.
Try this out:
plot(birthRate$country_code, birthRate$yr2014, main = "Birth Rate by Countries 2014", las=2)
Edit:
I was able to make this barplot using ggplot2.
Here's my code:
birthRate <- arrange(birthRate, yr2014)
p <- ggplot(birthRate, aes(y=yr2014, x=reorder(country_code, yr2014), fill=country_code)) +
geom_col()
p
Note: Your dput() output has 29 observations for the country_code variable, but only 25 for the yr2014 variable. I didn't know exactly where the missing data was, so I just removed the last four observations from the country_code variable to get things to line up. Your output may look slightly different based on where the NAs are...
I hope this was helpful!

Add an offset in a nls fit in R

I have a dataframe df
df<-structure(list(ID = structure(c(1L, 3L, 5L, 6L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 28L, 29L, 30L, 33L, 34L, 37L, 38L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 48L, 49L, 50L, 52L, 53L, 54L, 56L, 57L, 60L, 61L, 62L,
63L, 64L, 65L, 66L, 67L, 69L, 71L, 74L, 75L, 86L, 88L, 90L, 92L,
94L, 95L, 100L, 101L, 107L, 108L, 109L, 110L, 111L, 112L), .Label = c("AU-Tum",
"AU-Wac", "BE-Bra", "BE-Jal", "BE-Vie", "BR-Cax", "BR-Sa3", "CA-Ca1",
"CA-Ca2", "CA-Ca3", "CA-Gro", "Ca-Man", "CA-NS1", "CA-NS2", "CA-NS3",
"CA-NS4", "CA-NS5", "CA-NS6", "CA-NS7", "CA-Oas", "CA-Obs", "CA-Ojp",
"CA-Qcu", "CA-Qfo", "CA-SF1", "CA-SF2", "CA-SF3", "CA-SJ1", "CA-SJ2",
"CA-SJ3", "CA-TP1", "CA-TP2", "CA-TP4", "CN-Cha", "CN-Ku1", "CZ-Bk1",
"De-Bay", "DE-Hai", "DE-Har", "DE-Tha", "DE-Wet", "DK-Sor", "ES-Es1",
"FI-Hyy", "FI-Sod", "FR-Fon", "FR-Hes", "FR-Lbr", "FR-Pue", "GF-Guy",
"ID-Pag", "IL-Yat", "IT-Col", "IT-Cpz", "IT-Lav", "IT-Non", "IT-Pt1",
"IT-Ro1", "IT-Ro2", "IT-Sro", "JP-Tak", "JP-Tef", "JP-Tom", "NL-Loo",
"PT-Esp", "RU-Fyo", "RU-Zot", "SE-Abi", "SE-Fla", "SE-Nor", "SE-Sk1",
"SE-Sk2", "SE-St1", "UK-Gri", "UK-Ham", "US-Bar", "US-Blo", "US-Bn1",
"US-Bn2", "Us-Bn3", "US-Dk3", "US-Fmf", "US-Fwf", "US-Ha1", "US-Ha2",
"US-Ho1", "US-Ho2", "US-Lph", "US-Me1", "US-Me3", "US-Nc2", "US-NR1",
"US-Oho", "US-So2", "US-So3", "US-Sp1", "US-Sp2", "US-Sp3", "US-Syv",
"US-Umb", "US-Wcr", "US-Wi0", "US-Wi1", "US-Wi2", "US-Wi4", "US-Wi8",
"VU-Coc", "CA-Cbo", "RU-Ab", "RU-Be", "RU-Mix", "TH-Mae"), class = "factor"),
VarA = c(98.5, 77, 63.2222222222222, 97, 52.5, 3.5, 15.5,
71, 161.833333333333, 153.5, 73, 39, 40, 23, 14, 5.5, 78,
129.5, 73.5, 4, 100, 10, 3, 30, 65.5, 198, 45.5, 20, 111.5,
44, 68.5, 102.5, 39.1111111111111, 83.8, 136, 31.5, 56.5,
101, 39.25, 108.5, 52.1666666666667, 9.5, 13, 52.1428571428571,
66.5, 1, 44.25, 106, 19, 202.571428571429, 200, 36.6, 2,
21.2, 69, 135, 46.5, 17.5, 96, 80.6666666666667, 10.6666666666667,
86.5, 70.5, 19.5, 85, 200, 50, 250, 30.5), Y = c(436.783385497984,
55.1825021383702, 526.4133417369, 560, 391.49284084118, -519.814235572849,
11.5525291214872, 162.441016515717, 39.0395567645998, -70.4910326673707,
17.1155716306239, -106.326129257097, -94.9308303585276, -66.4285516217351,
-144.929052323413, -220.613145695315, 157.129576861289, 44.1257786633602,
46.8326830295943, -146.719591499443, 30.8043649939355, -4.10548956954153,
-108.258462657337, 90.3369144331664, 126.866108251153, 42.9489971246803,
-45.4886732113082, 483.932040393885, 590.754048774834, 82.1480000555981,
76.8863707484328, 404.007940533033, 202.629066249886, -46.9675149230141,
557.939170770813, 300.979565786038, 224.256197650044, 148.719307398695,
201.195892312115, 466.727302447427, 552.762670615377, 481.359543363331,
467.379381521489, 444.812610935212, 308.198167469197, -638.973101716489,
321.395064735785, 181.896345832773, 629.214319321327, -176.181996958815,
214, 59.1716887350485, -186.42650026083, 515.533437888983,
595.091753601562, 255.499246957091, 368.347069109092, 141.97570288631,
39.5917358684237, 105.039591642989, 77.9087587283187, 153.700042322307,
198.276033313996, 358.242634316906, 156.666666666667, 270,
247, 100, -10)), .Names = c("ID", "VarA", "Y"), row.names = c(NA,
-69L), class = "data.frame")
I am performing a non-linear regression analysis to fit the parameters of a model.
library (minpack.lm)
fit1<- nlsLM(Y~A*(1-exp(k*VarA)), data = NEP_Mean_Site,
start = list(A=192.93829, k=-0.08976), control = list(maxiter = 500))
When I plot the output of the non-linear model against my data, I get this output.
coeff(fit1)
f<- function (x) {211.00044*(1-exp(-0.07224*x))}
plot(df$VarA, df$Y)
curve(f, add=T)
However, I would like my model to start below zero. I guess I have to tune-up my model and include an offset in my nls fit but could not find a way on how to do it.
Anyone knows how to do it?

Just add the offset to the formula:
fit1<- nlsLM(Y~offset + A*(1-exp(k*VarA)),
data = df,
start = list(offset = -300, A=192.93829, k=-0.08976),
control = list(maxiter = 500))
To plot the function, you can use slightly more generic code. Then you don't have to manually construct a function every time you have a different fit:
plot(Y~ VarA, data = df)
id <- order(df$VarA)
lines(predict(fit1)[id] ~ df$VarA[id])
gives you:

Missing Tree Tips When Plotting Phenogram

When I generate a phenogram using the phytools package, the tips and tip labels of the trees are not displaying. Does anyone have any ideas on how to fix this, or another way of plotting a phenogram with nodes and tips with a y axis plotted at the value of the trait in question?
Here's what I have:
midpointData <-
structure(list(Species = structure(1:6, .Label = c("Icterus_croconotus",
"Icterus_graceannae", "Icterus_icterus", "Icterus_jamacaii",
"Icterus_mesomelas", "Icterus_pectoralis"), class = "factor"),
bio_1nam = c(243L, 193L, 225L, 209L, 189L, 180L), bio_12nam = c(5127.5,
751.5, 1373, 914.5, 4043.5, 2623.5), bio_16nam = c(1470.5,
442, 656.5, 542, 1392.5, 1074), bio_17nam = c(1094.5, 51.5,
135, 189.5, 768.5, 377.5), bio_2nam = c(97.5, 91.5, 83, 82.5,
81, 102), bio_5nam = c(314, 265.5, 311, 274, 282, 281), bio_6nam = c(167.5,
132.5, 175.5, 154.5, 128, 114)), .Names = c("Species", "bio_1nam",
"bio_12nam", "bio_16nam", "bio_17nam", "bio_2nam", "bio_5nam",
"bio_6nam"), class = "data.frame", row.names = c(NA, -6L))
prunedTargetTree <-
structure(list(edge = structure(c(7L, 7L, 8L, 9L, 9L, 8L, 10L,
11L, 11L, 10L, 1L, 8L, 9L, 2L, 3L, 10L, 11L, 4L, 5L, 6L), .Dim = c(10L,
2L)), Nnode = 5L, tip.label = c("Icterus_mesomelas", "Icterus_pectoralis",
"Icterus_graceannae", "Icterus_croconotus", "Icterus_icterus",
"Icterus_jamacaii"), edge.length = c(0.152443952069696, 0.014866140819964,
0.0311847312922788, 0.106393079957453, 0.106393079957453, 0.0727572150872864,
0.0130293222294024, 0.0517912739330428, 0.0517912739330428, 0.0648205961624452
)), .Names = c("edge", "Nnode", "tip.label", "edge.length"), class = "phylo", order = "cladewise")
library(phytools)
reconBio1 <- ace(midpointData$bio_1nam, prunedTargetTree, type = "continuous", method = "ML")
bio1final <- c(reconBio1$ace, midpointData$bio_1nam)
names(bio1final) <- c(7,8,9,10,11,4,3,5,6,1,2)
plot.new()
phenogram(prunedTargetTree, bio1final, ylim = c(min(bio1final), max(bio1final)))
Here's what the tree looks like:

I have solved the problem, but wanted to share the solution in case others run into the same issue. pheonogram() looks for names in the argument x (aka bio1final) that match prunedTargetTree$tip.label, not the numeric index of the tip. Instead of:
bio1final <- c(reconBio1$ace, midpointData$bio_1nam);
names(bio1final) <- c(7,8,9,10,11,4,3,5,6,1,2)
it should read:
bio1final <- c(reconBio1$ace, midpointData$bio_1nam);
names(bio1final) <- c(7,8,9,10,11,as.character(midpointData$Species))
**as.character is important, because otherwise $Species is read in as a factor, and the tips of the tree still won't plot.

r - Add significance level to correlation heatmap [duplicate]

This question already has answers here:
Significance level added to matrix correlation heatmap using ggplot2
(3 answers)
Closed 9 years ago.
I have the following data frame df (appended)
I have written a short script to plot a correlation heatmap
library(ggplot2)
library(plyr)
library(reshape2)
library(gridExtra)
#Load data frame
df <- data.frame(read.csv("~/Documents/wig_cor.csv",sep="\t"))
c = cor(df[sapply(df,is.numeric)])
#Plot all data
plots <- dlply(df, .(Method), function (x1) {
ggplot(melt(cor(x1[sapply(x1,is.numeric)])),
aes(x=Var1,y=Var2,fill=value)) + geom_tile(aes(fill = value),colour = "white") + geom_text(aes(label = sprintf("%1.2f",value)), vjust = 1) + theme_bw() + theme(legend.position = 'none') +
scale_fill_gradient2(midpoint=0.8,low = "white", high = "steelblue")})
#Plot by EF Analysis Method
plots <- dlply(df, .(Method), function (x1) {
ggplot(subset(melt(cor(x1[sapply(x1,is.numeric)]))[lower.tri(c),],Var1 != Var2),
aes(x=Var1,y=Var2,fill=value)) + geom_tile(aes(fill = value),colour = "white") +
geom_text(aes(label = sprintf("%1.2f",value)), vjust = 1) +
theme_bw() +
scale_fill_gradient2(name="R^2",midpoint=0.7,low = "white", high = "red") + xlab(NULL)+ylab(NULL) + theme(axis.text.x=element_blank(),axis.text.y=element_blank(), axis.ticks=element_blank(),panel.border=element_blank()) + ggtitle(x1$Method) + theme(plot.title = element_text(lineheight=1,face="bold")) + geom_text(data = subset(melt(cor(x1[sapply(x1,is.numeric)])),Var1==Var2),aes(label=Var1),vjust=3 ) })
#Function to grab legend
g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
legend
}
legend <- g_legend(plots$WIG_Method)
png(file = "/misc/croc_common/physics/jamie/Portfolio/WesternEF/EFCorrelations.png", width = 1200, height = 400)
grid.arrange(legend,plots$Single_ROI+theme(legend.position='none'), plots$Simple_2_ROI+theme(legend.position='none'),plots$WIG_Method+theme(legend.position='none'), plots$WIG_drawn_bg+theme(legend.position='none'), ncol=5, nrow=1, widths=c(1/17,4/17,4/17,4/17,4/17))
dev.off()
However, I would like to use stars to highlight the statistical significanceas of each correlation as described here but I am completely lost on how to do this. Any guidance
structure(list(Study = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L), .Label = c("WCBP12236", "WCBP12241", "WCBP12242", "WCBP12243",
"WCBP12245", "WCBP13001", "WCBP13002", "WCBP13003", "WCBP13004",
"WCBP13005", "WCBP13006", "WCBP13007", "WCBP13008", "WCBP13009",
"WCBP13010", "WCBP13011", "WCBP13012", "WCBP13013", "WCBP13014"
), class = "factor"), G1 = c(68, 68.6, 66.6, 73.1, 51.6, 50.1,
64.1, 73, 63.7, 43.2, 62.3, 59.2, 67.5, 68.2, 54.6, 67.9, 56.5,
54.2, 67.3, 68, 68.4, 67.9, 73.3, 51.7, 50.3, 63.9, 73.9, 64,
42.9, 62.5, 59.3, 66.7, 68.4, 54, 68.2, 56.8, 54.5, 67, 53.2,
41.4, 53, 52.3, 41, 37.4, 56.9, 65.3, 36.2, 35.3, 36.1, 32.5,
56.5, 47.7, 39.4, 59.6, 38.1, 24.2, 30.2, 68.5, 68.9, 70.7, 74.9,
53.4, 51.6, 65.9, 75.7, 64.7, 42.8, 61.4, 60.8, 69.5, 68.7, 55.9,
70.7, 59.5, 51.1, 69.5), G2 = c(79.8, 72.2, 73.5, 74.4, 50.4,
54.8, 63.1, 70.4, 63.6, 45.1, 65.3, 49.4, 65.3, 76.2, 51, 63.9,
58.7, 57.8, 67, 79.6, 72.1, 73.9, 74.7, 50.5, 55.1, 62.8, 70.5,
63.3, 44.6, 65.5, 48.9, 64.9, 76.3, 50.6, 64.8, 58.6, 58.3, 67.4,
51.2, 37.7, 49.1, 53.7, 44.6, 37.3, 54.9, 64.1, 33.8, 31.9, 34.2,
30.3, 56.2, 44.6, 38.2, 63.2, 35.8, 26.5, 27.6, 80.6, 71.6, 75.4,
77.1, 52.4, 56.3, 66, 72.3, 64.5, 38.2, 64.3, 49.2, 66.9, 77.1,
52.4, 67.5, 59.6, 55.6, 69.9), S1 = c(75.1, 65.9, 72.7, 68.8,
49, 57.5, 66.5, 74.1, 60.9, 51.8, 58, 64.3, 71.1, 71.4, 58.9,
62.2, 58, 57.7, 58.6, 75.2, 66, 73.2, 69.7, 48.9, 57.7, 66.5,
74.7, 60.8, 51.4, 58.9, 65.5, 70.5, 71.4, 58.9, 65.1, 60.8, 57.7,
58.4, 54.3, 40.2, 52.6, 60.5, 42.6, 34.1, 55, 64.7, 36.3, 32.5,
39, 38.8, 58.1, 48, 40.5, 61, 40, 26.4, 28.8, 76.4, 66.5, 73.9,
72, 50.7, 59.2, 69.9, 76.3, 62.4, 50, 58.5, 66.6, 73.7, 72.3,
62.6, 69.6, 62.7, 57.9, 61.1), S2 = c(76.6, 71.6, 71.2, 72.7,
51.6, 56.7, 65.9, 73.5, 63.6, 55.2, 62.6, 62.2, 69.1, 71.1, 56.8,
61, 61.7, 60, 55.7, 76.9, 71.6, 72.3, 73.2, 51.7, 56.8, 64.5,
74.9, 63.6, 51.3, 63, 62.8, 68.7, 71.3, 56.8, 64.2, 62.8, 60.4,
55.8, 53.6, 42.5, 50, 54.4, 42.2, 36.4, 57.7, 64.1, 35.1, 30.8,
39.1, 37.4, 58.7, 47.8, 42, 58.8, 39.4, 24.2, 28.2, 78.2, 73.3,
72.3, 75.6, 53.4, 57.8, 68.3, 76.6, 63.7, 51.7, 63.4, 63.3, 71.5,
72.3, 60.2, 67.1, 65.5, 58.2, 59.1), Method = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Simple_2_ROI",
"Single_ROI", "WIG_drawn_bg", "WIG_Method"), class = "factor")), .Names = c("Study",
"G1", "G2", "S1", "S2", "Method"), row.names = c(NA, -76L), class = "data.frame")

Here's an example of one way to do it with ggplot. You essentially add the significance stars as characters to a dataframe and plot them as text on the heatmap: https://github.com/andrewheiss/Attitudes-in-the-Arab-World/blob/master/figure12.R

A useful function for getting p values out of the correlation matrix is rcorr from Hmisc. Using it, I got this:
In each cell of the correlation matrix, there is a pair of numbers: The upper one represents the coefficient of correlation (as does the color gradient of the cell), while the lower one represents the p value. Is this what you wanted? (See the bottom of the answer for improved response, whereby I convert p values into stars...)
I proceeded as follows:
Since your p values would be VERY small in this data frame, I have used jitter and stripped the amount of observations so as to decrease the statistical significance. The reason for that is that very low p values would be very hard to read in a correlation matrix of this type. Consequently, the result does not make much sense from a statistical point of view but it demonstrates nicely how the significance levels can be added to the matrix.
First, jitter it and limit the number of observations:
mydf=df
mydf[,2:5] = sapply(mydf[,2:5],jitter,amount=20)
mydf=mydf[c(1:5,20:24,39:43,58:62),]
Then calculate r coefficient and p values:
library(Hmisc)
# calculate r
c = rcorr(as.matrix(mydf[sapply(mydf,is.numeric)]))$r
# calculate p values
p = rcorr(as.matrix(mydf[sapply(mydf,is.numeric)]))$P
Make a plot based on both those values:
plots <- dlply(mydf, .(Method), function (x1) {
ggplot(data.frame(subset(melt(rcorr(as.matrix(x1[sapply(x1,is.numeric)]))$r)[lower.tri(c),],Var1 != Var2),
pvalue=subset(melt(rcorr(as.matrix(x1[sapply(x1,is.numeric)]))$P)[lower.tri(p),],Var1 != Var2)$value),
aes(x=Var1,y=Var2,fill=value)) +
geom_tile(aes(fill = value),colour = "white") +
geom_text(aes(label = sprintf("%1.2f",value)), vjust = 0) +
geom_text(aes(label = sprintf("%1.2f",pvalue)), vjust = 1) +
theme_bw() +
scale_fill_gradient2(name="R^2",midpoint=0.25,low = "blue", high = "red") +
xlab(NULL) +
ylab(NULL) +
theme(axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
panel.border=element_blank()) +
ggtitle(x1$Method) + theme(plot.title = element_text(lineheight=1,face="bold")) +
geom_text(data = subset(melt(rcorr(as.matrix(x1[sapply(x1,is.numeric)]))$r),Var1==Var2),
aes(label=Var1),vjust=1 )
})
Display plot.
grid.arrange(plots$Single_ROI + theme(legend.position='none'),
plots$Simple_2_ROI + theme(legend.position='none'),
plots$WIG_Method + theme(legend.position='none'),
plots$WIG_drawn_bg + theme(legend.position='none'),
ncol=2,
nrow=2)
Stars instead of p values:
Modify data frame (I leave a few more observations this time):
library(Hmisc)
library(car)
mydf=df
set.seed(12345)
mydf[,2:5] = sapply(mydf[,2:5],jitter,amount=15)
mydf=mydf[c(1:10,20:29,39:48,58:67),]
Calculate r, p values and recode p values into stars inside the plot function:
# calculate r
c = rcorr(as.matrix(mydf[sapply(mydf,is.numeric)]))$r
# calculate p values
p = rcorr(as.matrix(mydf[sapply(mydf,is.numeric)]))$P
plots <- dlply(mydf, .(Method), function (x1) {
ggplot(data.frame(subset(melt(rcorr(as.matrix(x1[sapply(x1,is.numeric)]))$r)[lower.tri(c),],Var1 != Var2),
pvalue=Recode(subset(melt(rcorr(as.matrix(x1[sapply(x1,is.numeric)]))$P)[lower.tri(p),],Var1 != Var2)$value , "lo:0.01 = '***'; 0.01:0.05 = '*'; else = ' ';")),
aes(x=Var1,y=Var2,fill=value)) +
geom_tile(aes(fill = value),colour = "white") +
geom_text(aes(label = sprintf("%1.2f",value)), vjust = 0) +
geom_text(aes(label = pvalue), vjust = 1) +
theme_bw() +
scale_fill_gradient2(name="R^2",midpoint=0.25,low = "blue", high = "red") +
xlab(NULL) +
ylab(NULL) +
theme(axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
panel.border=element_blank()) +
ggtitle(x1$Method) + theme(plot.title = element_text(lineheight=1,face="bold")) +
geom_text(data = subset(melt(rcorr(as.matrix(x1[sapply(x1,is.numeric)]))$r),Var1==Var2),
aes(label=Var1),vjust=1 )
})
Display plot.
grid.arrange(plots$Single_ROI + theme(legend.position='none'),
plots$Simple_2_ROI + theme(legend.position='none'),
plots$WIG_Method + theme(legend.position='none'),
plots$WIG_drawn_bg + theme(legend.position='none'),
ncol=2,
nrow=2)

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Plotting a multiple linear regression in R using scatter3D() (package plot3D) - r

Related

Extracting values for specific lat long from netcdf

plot graphs in R while showing all the x axis values

Add an offset in a nls fit in R

Missing Tree Tips When Plotting Phenogram

r - Add significance level to correlation heatmap [duplicate]

Categories

Resources