Related
str(coord_mat)
List of 1
$ :List of 1
..$ : num [1:17, 1:2] -122 -122 -122 -122 -122 ...
I have list of coordinate pairs in coord_mat that I would like to transform in to data frame (or matrix) of coordinate pairs in the same structure(first column being lon, the second being lat).
> coord_mat
[[1]]
[[1]][[1]]
[,1] [,2]
[1,] -122.3435 47.63787
[2,] -122.3435 47.63787
[3,] -122.3434 47.63787
[4,] -122.3434 47.63787
[5,] -122.3434 47.63787
[6,] -122.3434 47.63787
[7,] -122.3434 47.63787
[8,] -122.3434 47.63784
[9,] -122.3433 47.63777
[10,] -122.3430 47.63772
[11,] -122.3427 47.63778
[12,] -122.3425 47.63776
[13,] -122.3423 47.63749
[14,] -122.3421 47.63718
[15,] -122.3420 47.63700
[16,] -122.3419 47.63698
[17,] -122.3419 47.63698
How is this possible in R while keeping the same double column structure as the list?
I have tried matrix(unlist(coord_mat)), but this just produces a long vector of length 34 with the lon values first then the lat values. Is it because I am working with a list of lists?
> matrix(unlist(coord_mat))
[,1]
[1,] -122.34345
[2,] -122.34345
[3,] -122.34340
[4,] -122.34340
[5,] -122.34340
[6,] -122.34340
[7,] -122.34340
[8,] -122.34338
[9,] -122.34334
[10,] -122.34299
[11,] -122.34273
[12,] -122.34249
[13,] -122.34230
[14,] -122.34208
[15,] -122.34198
[16,] -122.34194
[17,] -122.34194
[18,] 47.63787
[19,] 47.63787
[20,] 47.63787
[21,] 47.63787
[22,] 47.63787
[23,] 47.63787
[24,] 47.63787
[25,] 47.63784
[26,] 47.63777
[27,] 47.63772
[28,] 47.63778
[29,] 47.63776
[30,] 47.63749
[31,] 47.63718
[32,] 47.63700
[33,] 47.63698
[34,] 47.63698
Here is the data:
dput(coord_mat)
list(list(structure(c(-122.34345, -122.34345, -122.343398333333,
-122.343398333333, -122.343398333333, -122.343398333333, -122.343398333333,
-122.343376666667, -122.34334, -122.342991666667, -122.342731666667,
-122.342491666667, -122.3423, -122.342081666667, -122.341983333333,
-122.341943333333, -122.341943333333, 47.6378716666667, 47.6378716666667,
47.6378683333333, 47.6378683333333, 47.6378683333333, 47.6378683333333,
47.6378683333333, 47.637835, 47.637775, 47.6377183333333, 47.63778,
47.63776, 47.6374916666667, 47.6371816666667, 47.6369966666667,
47.6369783333333, 47.6369783333333), .Dim = c(17L, 2L))))
res <- coord_mat[[c(1, 1)]]
# or
res <- matrix(unlist(coord_mat), ncol = 2)
colnames(res) <- c("lon", "lat")
res
lon lat
[1,] -122.3435 47.63787
[2,] -122.3435 47.63787
[3,] -122.3434 47.63787
[4,] -122.3434 47.63787
[5,] -122.3434 47.63787
[6,] -122.3434 47.63787
[7,] -122.3434 47.63787
[8,] -122.3434 47.63784
[9,] -122.3433 47.63777
[10,] -122.3430 47.63772
[11,] -122.3427 47.63778
[12,] -122.3425 47.63776
[13,] -122.3423 47.63749
[14,] -122.3421 47.63718
[15,] -122.3420 47.63700
[16,] -122.3419 47.63698
[17,] -122.3419 47.63698
Is it only me who have the problem with extracting coordinates of a polygon from SpatialPolygonsDataFrame object? I am able to extract other slots of the object (ID,plotOrder) but not coordinates (coords). I don't know what I am doing wrong. Please find below my R session where bdryData being the SpatialPolygonsDataFrame object with two polygons.
> bdryData
An object of class "SpatialPolygonsDataFrame"
Slot "data":
ID GRIDCODE
0 1 0
1 2 0
Slot "polygons":
[[1]]
An object of class "Polygons"
Slot "Polygons":
[[1]]
An object of class "Polygon"
Slot "labpt":
[1] 415499.1 432781.7
Slot "area":
[1] 0.6846572
Slot "hole":
[1] FALSE
Slot "ringDir":
[1] 1
Slot "coords":
[,1] [,2]
[1,] 415499.6 432781.2
[2,] 415498.4 432781.5
[3,] 415499.3 432782.4
[4,] 415499.6 432781.2
Slot "plotOrder":
[1] 1
Slot "labpt":
[1] 415499.1 432781.7
Slot "ID":
[1] "0"
Slot "area":
[1] 0.6846572
[[2]]
An object of class "Polygons"
Slot "Polygons":
[[1]]
An object of class "Polygon"
Slot "labpt":
[1] 415587.3 432779.4
Slot "area":
[1] 20712.98
Slot "hole":
[1] FALSE
Slot "ringDir":
[1] 1
Slot "coords":
[,1] [,2]
[1,] 415499.6 432781.2
[2,] 415505.0 432781.8
[3,] 415506.5 432792.6
[4,] 415508.9 432792.8
[5,] 415515.0 432791.5
[6,] 415517.7 432795.6
[7,] 415528.6 432797.7
[8,] 415538.8 432804.2
[9,] 415543.2 432805.8
[10,] 415545.1 432803.6
[11,] 415547.1 432804.7
[12,] 415551.7 432805.8
[13,] 415557.5 432812.3
[14,] 415564.2 432817.1
[15,] 415568.5 432823.9
[16,] 415571.0 432826.8
[17,] 415573.2 432828.7
[18,] 415574.1 432829.7
[19,] 415576.2 432830.7
[20,] 415580.2 432833.8
[21,] 415589.6 432836.0
[22,] 415593.1 432841.0
[23,] 415592.2 432843.7
[24,] 415590.6 432846.6
[25,] 415589.0 432853.3
[26,] 415584.8 432855.3
[27,] 415579.7 432859.8
[28,] 415577.7 432866.2
[29,] 415575.6 432868.1
[30,] 415566.7 432880.7
[31,] 415562.7 432887.5
[32,] 415559.2 432889.1
[33,] 415561.5 432890.7
[34,] 415586.2 432889.7
[35,] 415587.1 432888.6
[36,] 415588.5 432890.2
[37,] 415598.2 432888.7
[38,] 415599.1 432887.7
[39,] 415601.2 432886.7
[40,] 415603.1 432885.7
[41,] 415605.2 432884.7
[42,] 415606.1 432882.7
[43,] 415607.2 432880.7
[44,] 415608.3 432878.3
[45,] 415612.2 432874.8
[46,] 415614.7 432871.9
[47,] 415617.1 432870.7
[48,] 415622.4 432868.2
[49,] 415622.0 432862.4
[50,] 415624.2 432855.4
[51,] 415633.2 432845.3
[52,] 415639.0 432841.1
[53,] 415642.8 432832.9
[54,] 415647.5 432828.7
[55,] 415654.3 432820.3
[56,] 415654.1 432816.5
[57,] 415658.2 432812.8
[58,] 415661.9 432808.6
[59,] 415663.5 432808.7
[60,] 415668.1 432803.5
[61,] 415676.5 432801.3
[62,] 415679.1 432802.7
[63,] 415680.1 432802.7
[64,] 415681.1 432802.7
[65,] 415682.2 432802.7
[66,] 415685.8 432804.7
[67,] 415691.8 432802.2
[68,] 415693.6 432798.9
[69,] 415696.2 432777.0
[70,] 415689.8 432773.5
[71,] 415683.7 432771.6
[72,] 415680.2 432766.7
[73,] 415679.0 432765.6
[74,] 415676.8 432753.7
[75,] 415671.4 432747.7
[76,] 415662.7 432747.2
[77,] 415658.7 432750.0
[78,] 415657.0 432746.3
[79,] 415654.1 432743.7
[80,] 415652.3 432739.8
[81,] 415649.6 432739.6
[82,] 415648.0 432739.7
[83,] 415641.9 432736.4
[84,] 415633.4 432736.9
[85,] 415630.2 432734.7
[86,] 415622.3 432733.6
[87,] 415614.4 432726.5
[88,] 415617.1 432719.1
[89,] 415612.5 432718.1
[90,] 415610.0 432720.9
[91,] 415606.2 432716.6
[92,] 415603.2 432713.9
[93,] 415601.4 432710.0
[94,] 415580.3 432708.7
[95,] 415545.1 432709.7
[96,] 415543.5 432711.5
[97,] 415534.0 432715.7
[98,] 415527.1 432713.7
[99,] 415521.1 432711.6
[100,] 415505.6 432710.6
[101,] 415501.3 432710.9
[102,] 415499.3 432708.7
[103,] 415495.6 432711.6
[104,] 415482.6 432726.2
[105,] 415477.2 432734.0
[106,] 415478.1 432737.7
[107,] 415479.2 432739.7
[108,] 415480.9 432743.4
[109,] 415486.5 432751.2
[110,] 415493.2 432760.7
[111,] 415494.1 432762.7
[112,] 415498.1 432767.9
[113,] 415497.2 432770.7
[114,] 415490.6 432773.2
[115,] 415493.2 432775.6
[116,] 415496.0 432778.7
[117,] 415499.2 432779.7
[118,] 415499.6 432781.2
Slot "plotOrder":
[1] 1
Slot "labpt":
[1] 415587.3 432779.4
Slot "ID":
[1] "1"
Slot "area":
[1] 20712.98
Slot "plotOrder":
[1] 2 1
Slot "bbox":
min max
x 415477.2 415696.2
y 432708.7 432890.7
Slot "proj4string":
CRS arguments:
+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000
+datum=OSGB36 +units=m +no_defs +ellps=airy
+towgs84=446.448,-125.157,542.060,0.1502,0.2470,0.8421,-20.4894
Subsetting second polygon from bdryData
> bdryData#polygons[[2]]
An object of class "Polygons"
Slot "Polygons":
[[1]]
An object of class "Polygon"
Slot "labpt":
[1] 415587.3 432779.4
Slot "area":
[1] 20712.98
Slot "hole":
[1] FALSE
Slot "ringDir":
[1] 1
Slot "coords":
[,1] [,2]
[1,] 415499.6 432781.2
[2,] 415505.0 432781.8
[3,] 415506.5 432792.6
[4,] 415508.9 432792.8
[5,] 415515.0 432791.5
[6,] 415517.7 432795.6
[7,] 415528.6 432797.7
[8,] 415538.8 432804.2
[9,] 415543.2 432805.8
[10,] 415545.1 432803.6
[11,] 415547.1 432804.7
[12,] 415551.7 432805.8
[13,] 415557.5 432812.3
[14,] 415564.2 432817.1
[15,] 415568.5 432823.9
[16,] 415571.0 432826.8
[17,] 415573.2 432828.7
[18,] 415574.1 432829.7
[19,] 415576.2 432830.7
[20,] 415580.2 432833.8
[21,] 415589.6 432836.0
[22,] 415593.1 432841.0
[23,] 415592.2 432843.7
[24,] 415590.6 432846.6
[25,] 415589.0 432853.3
[26,] 415584.8 432855.3
[27,] 415579.7 432859.8
[28,] 415577.7 432866.2
[29,] 415575.6 432868.1
[30,] 415566.7 432880.7
[31,] 415562.7 432887.5
[32,] 415559.2 432889.1
[33,] 415561.5 432890.7
[34,] 415586.2 432889.7
[35,] 415587.1 432888.6
[36,] 415588.5 432890.2
[37,] 415598.2 432888.7
[38,] 415599.1 432887.7
[39,] 415601.2 432886.7
[40,] 415603.1 432885.7
[41,] 415605.2 432884.7
[42,] 415606.1 432882.7
[43,] 415607.2 432880.7
[44,] 415608.3 432878.3
[45,] 415612.2 432874.8
[46,] 415614.7 432871.9
[47,] 415617.1 432870.7
[48,] 415622.4 432868.2
[49,] 415622.0 432862.4
[50,] 415624.2 432855.4
[51,] 415633.2 432845.3
[52,] 415639.0 432841.1
[53,] 415642.8 432832.9
[54,] 415647.5 432828.7
[55,] 415654.3 432820.3
[56,] 415654.1 432816.5
[57,] 415658.2 432812.8
[58,] 415661.9 432808.6
[59,] 415663.5 432808.7
[60,] 415668.1 432803.5
[61,] 415676.5 432801.3
[62,] 415679.1 432802.7
[63,] 415680.1 432802.7
[64,] 415681.1 432802.7
[65,] 415682.2 432802.7
[66,] 415685.8 432804.7
[67,] 415691.8 432802.2
[68,] 415693.6 432798.9
[69,] 415696.2 432777.0
[70,] 415689.8 432773.5
[71,] 415683.7 432771.6
[72,] 415680.2 432766.7
[73,] 415679.0 432765.6
[74,] 415676.8 432753.7
[75,] 415671.4 432747.7
[76,] 415662.7 432747.2
[77,] 415658.7 432750.0
[78,] 415657.0 432746.3
[79,] 415654.1 432743.7
[80,] 415652.3 432739.8
[81,] 415649.6 432739.6
[82,] 415648.0 432739.7
[83,] 415641.9 432736.4
[84,] 415633.4 432736.9
[85,] 415630.2 432734.7
[86,] 415622.3 432733.6
[87,] 415614.4 432726.5
[88,] 415617.1 432719.1
[89,] 415612.5 432718.1
[90,] 415610.0 432720.9
[91,] 415606.2 432716.6
[92,] 415603.2 432713.9
[93,] 415601.4 432710.0
[94,] 415580.3 432708.7
[95,] 415545.1 432709.7
[96,] 415543.5 432711.5
[97,] 415534.0 432715.7
[98,] 415527.1 432713.7
[99,] 415521.1 432711.6
[100,] 415505.6 432710.6
[101,] 415501.3 432710.9
[102,] 415499.3 432708.7
[103,] 415495.6 432711.6
[104,] 415482.6 432726.2
[105,] 415477.2 432734.0
[106,] 415478.1 432737.7
[107,] 415479.2 432739.7
[108,] 415480.9 432743.4
[109,] 415486.5 432751.2
[110,] 415493.2 432760.7
[111,] 415494.1 432762.7
[112,] 415498.1 432767.9
[113,] 415497.2 432770.7
[114,] 415490.6 432773.2
[115,] 415493.2 432775.6
[116,] 415496.0 432778.7
[117,] 415499.2 432779.7
[118,] 415499.6 432781.2
Slot "plotOrder":
[1] 1
Slot "labpt":
[1] 415587.3 432779.4
Slot "ID":
[1] "1"
Slot "area":
[1] 20712.98
Extracting slots
> bdryData#polygons[[2]]#ID
[1] "1"
> bdryData#polygons[[2]]#plotOrder
[1] 1
But problem with coordinates
> bdryData#polygons[[2]]#coords
Error: no slot of name "coords" for this object of class "Polygons"
Any help is really appreciated. Thanks.
Finally, I figured out that I didn't parse the output correctly. The correct way to do is bdryData#polygons[[2]]#Polygons[[1]]#coords. Mind the difference in command polygons(Polygons and polygons) and it took me ages to find out.
Use the coordinates() function from the sp package. It should give you the values in a list format.
You can also get the Polygon attribute from the shapefile.
mfile = readOGR(dsn=dsn,layer=layername)
polys = attr(mfile,'polygons')
npolys = length(polys)
for (i in 1:npolys){
poly = polys[[i]]
polys2 = attr(poly,'Polygons')
npolys2 = length(polys2)
for (j in 1:npolys2){
#do stuff with these values
coords = coordinates(polys2[[j]])
}
}
This took me a while to figure out too. The following function I wrote worked for me. sp.df should be SpatialPolygonsDataFrame.
extractCoords <- function(sp.df)
{
results <- list()
for(i in 1:length(sp.df#polygons[[1]]#Polygons))
{
results[[i]] <- sp.df#polygons[[1]]#Polygons[[i]]#coords
}
results <- Reduce(rbind, results)
results
}
This question was also addressed on gis.stackexchange, here. I made an example below testing all the options mentioned here by #mdsumner. Also have a look here
library(sp)
library(sf)
#> Warning: package 'sf' was built under R version 3.5.3
#> Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
library(raster)
library(spbabel)
#> Warning: package 'spbabel' was built under R version 3.5.3
library(tmap)
library(microbenchmark)
library(ggplot2)
# Prepare data
data(World)
# Convert from sf to sp objects
atf_sf <- World[World$iso_a3 == "ATF", ]
atf_sp <- as(atf_sf, "Spatial")
atf_sp
#> class : SpatialPolygonsDataFrame
#> features : 1
#> extent : 5490427, 5660887, -6048972, -5932855 (xmin, xmax, ymin, ymax)
#> coord. ref. : +proj=eck4 +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0
#> variables : 15
#> # A tibble: 1 x 15
#> iso_a3 name sovereignt continent area pop_est pop_est_dens economy
#> <fct> <fct> <fct> <fct> <S3:> <dbl> <dbl> <fct>
#> 1 ATF Fr. ~ France Seven se~ 7257~ 140 0.0193 6. Dev~
#> # ... with 7 more variables: income_grp <fct>, gdp_cap_est <dbl>,
#> # life_exp <dbl>, well_being <dbl>, footprint <dbl>, inequality <dbl>,
#> # HPI <dbl>
# Try various functions:
raster::geom(atf_sp)
#> object part cump hole x y
#> [1,] 1 1 1 0 5550200 -5932855
#> [2,] 1 1 1 0 5589907 -5964836
#> [3,] 1 1 1 0 5660887 -5977490
#> [4,] 1 1 1 0 5656160 -5996685
#> [5,] 1 1 1 0 5615621 -6042456
#> [6,] 1 1 1 0 5490427 -6048972
#> [7,] 1 1 1 0 5509148 -5995424
#> [8,] 1 1 1 0 5536900 -5953683
#> [9,] 1 1 1 0 5550200 -5932855
ggplot2::fortify(atf_sp)
#> Regions defined for each Polygons
#> long lat order hole piece id group
#> 1 5550200 -5932855 1 FALSE 1 8 8.1
#> 2 5589907 -5964836 2 FALSE 1 8 8.1
#> 3 5660887 -5977490 3 FALSE 1 8 8.1
#> 4 5656160 -5996685 4 FALSE 1 8 8.1
#> 5 5615621 -6042456 5 FALSE 1 8 8.1
#> 6 5490427 -6048972 6 FALSE 1 8 8.1
#> 7 5509148 -5995424 7 FALSE 1 8 8.1
#> 8 5536900 -5953683 8 FALSE 1 8 8.1
#> 9 5550200 -5932855 9 FALSE 1 8 8.1
spbabel::sptable(atf_sp)
#> # A tibble: 9 x 6
#> object_ branch_ island_ order_ x_ y_
#> <int> <int> <lgl> <int> <dbl> <dbl>
#> 1 1 1 TRUE 1 5550200. -5932855.
#> 2 1 1 TRUE 2 5589907. -5964836.
#> 3 1 1 TRUE 3 5660887. -5977490.
#> 4 1 1 TRUE 4 5656160. -5996685.
#> 5 1 1 TRUE 5 5615621. -6042456.
#> 6 1 1 TRUE 6 5490427. -6048972.
#> 7 1 1 TRUE 7 5509148. -5995424.
#> 8 1 1 TRUE 8 5536900. -5953683.
#> 9 1 1 TRUE 9 5550200. -5932855.
as.data.frame(as(as(atf_sp, "SpatialLinesDataFrame"),"SpatialPointsDataFrame"))
#> iso_a3 name sovereignt continent
#> 8 ATF Fr. S. Antarctic Lands France Seven seas (open ocean)
#> 8.1 ATF Fr. S. Antarctic Lands France Seven seas (open ocean)
#> 8.2 ATF Fr. S. Antarctic Lands France Seven seas (open ocean)
#> 8.3 ATF Fr. S. Antarctic Lands France Seven seas (open ocean)
#> 8.4 ATF Fr. S. Antarctic Lands France Seven seas (open ocean)
#> 8.5 ATF Fr. S. Antarctic Lands France Seven seas (open ocean)
#> 8.6 ATF Fr. S. Antarctic Lands France Seven seas (open ocean)
#> 8.7 ATF Fr. S. Antarctic Lands France Seven seas (open ocean)
#> 8.8 ATF Fr. S. Antarctic Lands France Seven seas (open ocean)
#> area pop_est pop_est_dens economy
#> 8 7257.455 [km^2] 140 0.01929051 6. Developing region
#> 8.1 7257.455 [km^2] 140 0.01929051 6. Developing region
#> 8.2 7257.455 [km^2] 140 0.01929051 6. Developing region
#> 8.3 7257.455 [km^2] 140 0.01929051 6. Developing region
#> 8.4 7257.455 [km^2] 140 0.01929051 6. Developing region
#> 8.5 7257.455 [km^2] 140 0.01929051 6. Developing region
#> 8.6 7257.455 [km^2] 140 0.01929051 6. Developing region
#> 8.7 7257.455 [km^2] 140 0.01929051 6. Developing region
#> 8.8 7257.455 [km^2] 140 0.01929051 6. Developing region
#> income_grp gdp_cap_est life_exp well_being footprint
#> 8 2. High income: nonOECD 114285.7 NA NA NA
#> 8.1 2. High income: nonOECD 114285.7 NA NA NA
#> 8.2 2. High income: nonOECD 114285.7 NA NA NA
#> 8.3 2. High income: nonOECD 114285.7 NA NA NA
#> 8.4 2. High income: nonOECD 114285.7 NA NA NA
#> 8.5 2. High income: nonOECD 114285.7 NA NA NA
#> 8.6 2. High income: nonOECD 114285.7 NA NA NA
#> 8.7 2. High income: nonOECD 114285.7 NA NA NA
#> 8.8 2. High income: nonOECD 114285.7 NA NA NA
#> inequality HPI Lines.NR Lines.ID Line.NR coords.x1 coords.x2
#> 8 NA NA 1 8 1 5550200 -5932855
#> 8.1 NA NA 1 8 1 5589907 -5964836
#> 8.2 NA NA 1 8 1 5660887 -5977490
#> 8.3 NA NA 1 8 1 5656160 -5996685
#> 8.4 NA NA 1 8 1 5615621 -6042456
#> 8.5 NA NA 1 8 1 5490427 -6048972
#> 8.6 NA NA 1 8 1 5509148 -5995424
#> 8.7 NA NA 1 8 1 5536900 -5953683
#> 8.8 NA NA 1 8 1 5550200 -5932855
# What about speed? raster::geom is the fastest
res <- microbenchmark(raster::geom(atf_sp),
ggplot2::fortify(atf_sp),
spbabel::sptable(atf_sp),
as.data.frame(as(as(atf_sp, "SpatialLinesDataFrame"),
"SpatialPointsDataFrame")))
ggplot2::autoplot(res)
#> Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Created on 2019-03-23 by the reprex package (v0.2.1)
ggplot2's fortify() function may be deprecated at some point so the broom package is now suggested
library(broom)
broom::tidy(atf_sp)
The only valid answer on this posting was provided by the author "repres_package" above. See that author's recommended solutions if you want to get the right answer. If you want to obtain the geometry of a polygon dataset, you are seeking the long and lat for every single vertex in the polygon feature class. The author's suggestion of using raster::geom() or ggplot2::fortify(), for example, will give you the total number of vertices that are contained in the spatialpolygonsdataframe. That's what you want. The other author's fail to do so.
For example, in my spatialpolygonsdataframe of North Carolina counties (from US Census), I have a total of 1259547 vertices. By using raster::geom(NC_counties), I am given a dataframe that contains a long and lat for each of those 1259547 vertices. I could also use gglot2::fortify(NC_counties) to obtain coordinates for those 1259547 vertices. All of the valid options are given in the answer by "repres_package".
When I ran the recommended codes in the other answers on this posting, I obtained long and lat coordinates for only 672 vertices, 1041 vertices, or 1721 vertices, which is off by over one million vertices. I'm supposed to get long and lat coordiates for 1259547 vertices. I suspect that those codes are interpolating centroids for the polygons, which is not the geometry of the polygons.
I have used PCA on 2D arrays before, and I use the first PC score vector that best best describes the variance of all the other columns in analyses. Below is a R example that shows the Comp.1 vector that would best describe the variance of the 2D array of interest.
data <- array(data=sample(12), c(4,3))
data
[,1] [,2] [,3]
[1,] 11 2 12
[2,] 4 3 10
[3,] 8 7 1
[4,] 6 9 5
output=princomp(data)
output$scores
Comp.1 Comp.2 Comp.3
[1,] 6.422813 2.865390 0.4025040
[2,] 3.251842 -3.617633 -0.9814571
[3,] -5.856500 1.848419 -1.3819379
[4,] -3.818155 -1.096176 1.9608909
My question is how can I do this same procedure on a 3D array? For example, if I have an array that the size is 4 x 5 x 3 how could I get the 4 x 5 2D array that is equivalent to the Comp.1 vector found above?
I have provided an R example below with code and outputs. When I look at the scores it only outputs one component (not 3 as expected), and the length is 60. Does that mean that the first 20 elements correspond to the first PC, the next 20 to the 2nd PC, and the last 20 to the 3rd PC? If so how does princomp arrange the entries, so I can get back to the original 4 x 5 2D array using the first 20 elements (1st PC)? Thank you for your assistance.
data=array(data=sample(48), c(4,5,3))
data
, , 1
[,1] [,2] [,3] [,4] [,5]
[1,] 47 21 45 41 34
[2,] 1 16 32 31 37
[3,] 39 8 35 10 6
[4,] 48 14 25 3 11
, , 2
[,1] [,2] [,3] [,4] [,5]
[1,] 12 43 15 36 23
[2,] 17 4 7 26 46
[3,] 2 13 33 20 40
[4,] 18 19 28 44 38
, , 3
[,1] [,2] [,3] [,4] [,5]
[1,] 42 24 47 21 45
[2,] 5 22 1 16 32
[3,] 30 29 39 8 35
[4,] 27 9 48 14 25
output=princomp(data)
output$scores
Comp.1
[1,] 21.8833333
[2,] -24.1166667
[3,] 13.8833333
[4,] 22.8833333
[5,] -4.1166667
[6,] -9.1166667
[7,] -17.1166667
[8,] -11.1166667
[9,] 19.8833333
[10,] 6.8833333
[11,] 9.8833333
[12,] -0.1166667
[13,] 15.8833333
[14,] 5.8833333
[15,] -15.1166667
[16,] -22.1166667
[17,] 8.8833333
[18,] 11.8833333
[19,] -19.1166667
[20,] -14.1166667
[21,] -13.1166667
[22,] -8.1166667
[23,] -23.1166667
[24,] -7.1166667
[25,] 17.8833333
[26,] -21.1166667
[27,] -12.1166667
[28,] -6.1166667
[29,] -10.1166667
[30,] -18.1166667
[31,] 7.8833333
[32,] 2.8833333
[33,] 10.8833333
[34,] 0.8833333
[35,] -5.1166667
[36,] 18.8833333
[37,] -2.1166667
[38,] 20.8833333
[39,] 14.8833333
[40,] 12.8833333
[41,] 16.8833333
[42,] -20.1166667
[43,] 4.8833333
[44,] 1.8833333
[45,] -1.1166667
[46,] -3.1166667
[47,] 3.8833333
[48,] -16.1166667
[49,] 21.8833333
[50,] -24.1166667
[51,] 13.8833333
[52,] 22.8833333
[53,] -4.1166667
[54,] -9.1166667
[55,] -17.1166667
[56,] -11.1166667
[57,] 19.8833333
[58,] 6.8833333
[59,] 9.8833333
[60,] -0.1166667
I had a data set with data like this:
value
[1,] 41601325
[2,] 54917632
[3,] 64616616
[4,] 90791277
[5,] 35335221
[6,] .
. .
. .
which I had to scale down to range [0,1] using
apply(data1, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X)))
as I needed to fit the data in GP_fit() of GPfit package. The scaled down values became say:
value
[1,] .4535
[2,] .56355
[3,] .64616
[4,] .70791
[5,] .35563
[6,] .
. .
. .
After I applied GP_fit() on the scaled data and used predict() and as output I got the new values which again are in range[0,1] like:
value
[1,] .0135
[2,] .234355
[3,] .6716
[4,] .325079
[5,] .95563
[6,] .
. .
. .
but I want to take these back to the original range. How can I do that?
Basically I want to revert back/ return to original format for showing the output of predict()
NOTE: The original range is not fixed and can vary but normally the maximum value possible is about 20 million .
UPDATE: I tired to implement the code written by #JustinFletcher. My data was :
value
[1,] 54.2
[2,] 53.8
[3,] 53.9
[4,] 53.8
[5,] 54.9
[6,] 55.0
[7,] 38.5
[8,] 38.0
[9,] 38.1
[10,] 38.0
[11,] 38.8
[12,] 38.9
[13,] 24.3
[14,] 24.1
[15,] 24.3
[16,] 24.1
[17,] 24.4
[18,] 24.4
[19,] 57.3
[20,] 57.2
[21,] 57.6
[22,] 57.7
[23,] 58.1
[24,] 57.9
I wrote this to rescale it in range [0,1]:
data_new <- apply(data_test, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X)))
and I got
value
[1,] 0.885294118
[2,] 0.873529412
[3,] 0.876470588
[4,] 0.873529412
[5,] 0.905882353
[6,] 0.908823529
[7,] 0.423529412
[8,] 0.408823529
[9,] 0.411764706
[10,] 0.408823529
[11,] 0.432352941
[12,] 0.435294118
[13,] 0.005882353
[14,] 0.000000000
[15,] 0.005882353
[16,] 0.000000000
[17,] 0.008823529
[18,] 0.008823529
[19,] 0.976470588
[20,] 0.973529412
[21,] 0.985294118
[22,] 0.988235294
[23,] 1.000000000
[24,] 0.994117647
then to revert it back to original scale I wrote this:
data_revert <- apply(data_new, MARGIN = 2, FUN = function(X, Y) (X + min(Y))*diff(range(Y)), Y=data_test)
and I got
value
[1,] 849.5
[2,] 849.1
[3,] 849.2
[4,] 849.1
[5,] 850.2
[6,] 850.3
[7,] 833.8
[8,] 833.3
[9,] 833.4
[10,] 833.3
[11,] 834.1
[12,] 834.2
[13,] 819.6
[14,] 819.4
[15,] 819.6
[16,] 819.4
[17,] 819.7
[18,] 819.7
[19,] 852.6
[20,] 852.5
[21,] 852.9
[22,] 853.0
[23,] 853.4
[24,] 853.2
This output is not correct.
This is simple algebra. To scale data, you calculate
n = (e - e_min)/(e_max - e_min)
Now you need back e, based on arbitrary e_min and e_max. It is trivial to show that
n(e_max - e_min) + e_min = e
Example:
e <- 1:10
n <- (e - min(e))/(max(e) - min(e))
new.e <- (n*(10-1)) + 1
> all(e == new.e)
[1] TRUE
You just need to apply the inverse of the function FUN to the output data. This requires the original data be passed to the function.
apply(dataOutput, MARGIN = 2, FUN = function(X, Y) (X + min(Y))*diff(range(Y)), Y=data1)
For a great description of the 'apply' function, see here.
P.S.: Romans response is exactly the same idea, I just implemented it with your variables, using apply, because I thought it was interesting.
i am trying to fit a neural net using nnet method. But for some reason i am getting fitted values =1 . any suggestion?
traininginput <- as.data.frame(runif(50, min=0, max=100))
trainingoutput <- sqrt(traininginput)
#Column bind the data into one variable
trainingdata <- cbind(traininginput,trainingoutput)
colnames(trainingdata) <- c("Input","Output")
nnet1=nnet(trainingdata$Input,trainingdata$Output,size=10,decay=.2,MaxNWts=100)
nnet1=nnet(trainingdata$Input,trainingdata$Output,size=10,decay=.2,MaxNWts=100)
# weights: 31
initial value 2398.911170
iter 10 value 1881.721970
iter 20 value 1879.273609
iter 30 value 1879.248746
final value 1879.248003
converged
nnet1$fitted.values
[,1]
[1,] 0.9995635611
[2,] 0.9995572993
[3,] 0.9994755520
[4,] 0.9995623911
[5,] 0.9946006508
[6,] 0.9995635779
[7,] 0.9995108061
[8,] 0.9995635025
[9,] 0.9995634973
[10,] 0.9993213029
[11,] 0.9994652808
[12,] 0.9979116411
[13,] 0.9993242430
[14,] 0.9995635764
[15,] 0.9995632595
[16,] 0.9995583546
[17,] 0.9992778848
[18,] 0.9995635364
[19,] 0.9939526885
[20,] 0.9995635788
[21,] 0.9993010783
[22,] 0.9995597586
[23,] 0.9995635748
[24,] 0.9995635799
[25,] 0.9995634813
[26,] 0.9992898229
[27,] 0.9959834977
[28,] 0.9991941868
[29,] 0.9995632224
[30,] 0.9995486025
[31,] 0.9995608608
[32,] 0.9995635755
[33,] 0.9995635740
[34,] 0.9995491922
[35,] 0.9995635661
[36,] 0.9995629924
[37,] 0.9995634948
[38,] 0.9994889240
[39,] 0.9995633558
[40,] 0.9995559725
[41,] 0.9995563696
[42,] 0.9995564636
[43,] 0.9995602699
[44,] 0.9995635768
[45,] 0.9995612253
[46,] 0.9895069469
[47,] 0.9995271104
[48,] 0.9995635564
[49,] 0.9995635797
[50,] 0.9995609961
Add the linout = TRUE argument to the nnet function:
nnet1=nnet(trainingdata$Input,trainingdata$Output,size=10,decay=.2,MaxNWts=100, linout = TRUE)
That should solve your problem! By default, the fitted values are logistic output units - see ?nnet.