r language getting a not so good neural network - r

i am trying to fit a neural net using nnet method. But for some reason i am getting fitted values =1 . any suggestion?
traininginput <- as.data.frame(runif(50, min=0, max=100))
trainingoutput <- sqrt(traininginput)
#Column bind the data into one variable
trainingdata <- cbind(traininginput,trainingoutput)
colnames(trainingdata) <- c("Input","Output")
nnet1=nnet(trainingdata$Input,trainingdata$Output,size=10,decay=.2,MaxNWts=100)
nnet1=nnet(trainingdata$Input,trainingdata$Output,size=10,decay=.2,MaxNWts=100)
# weights: 31
initial value 2398.911170
iter 10 value 1881.721970
iter 20 value 1879.273609
iter 30 value 1879.248746
final value 1879.248003
converged
nnet1$fitted.values
[,1]
[1,] 0.9995635611
[2,] 0.9995572993
[3,] 0.9994755520
[4,] 0.9995623911
[5,] 0.9946006508
[6,] 0.9995635779
[7,] 0.9995108061
[8,] 0.9995635025
[9,] 0.9995634973
[10,] 0.9993213029
[11,] 0.9994652808
[12,] 0.9979116411
[13,] 0.9993242430
[14,] 0.9995635764
[15,] 0.9995632595
[16,] 0.9995583546
[17,] 0.9992778848
[18,] 0.9995635364
[19,] 0.9939526885
[20,] 0.9995635788
[21,] 0.9993010783
[22,] 0.9995597586
[23,] 0.9995635748
[24,] 0.9995635799
[25,] 0.9995634813
[26,] 0.9992898229
[27,] 0.9959834977
[28,] 0.9991941868
[29,] 0.9995632224
[30,] 0.9995486025
[31,] 0.9995608608
[32,] 0.9995635755
[33,] 0.9995635740
[34,] 0.9995491922
[35,] 0.9995635661
[36,] 0.9995629924
[37,] 0.9995634948
[38,] 0.9994889240
[39,] 0.9995633558
[40,] 0.9995559725
[41,] 0.9995563696
[42,] 0.9995564636
[43,] 0.9995602699
[44,] 0.9995635768
[45,] 0.9995612253
[46,] 0.9895069469
[47,] 0.9995271104
[48,] 0.9995635564
[49,] 0.9995635797
[50,] 0.9995609961

Add the linout = TRUE argument to the nnet function:
nnet1=nnet(trainingdata$Input,trainingdata$Output,size=10,decay=.2,MaxNWts=100, linout = TRUE)
That should solve your problem! By default, the fitted values are logistic output units - see ?nnet.

Related

How to convert the data below to dataframe so that I can plot them out? (data retrieved from API commands)

I have recently retrieved the data from one of our remote monitors through API using the command res <- GET("myurl", authenticate("xxx", "xxx")). Then I got,
> res
Response [https://myurl?dataFormat=json&orient=split&days=1&last=false&valuesOnly=false]
Date: 2021-09-09 17:34
Status: 200
Content-Type: text/html; charset=utf-8
Size: 29.3 kB`
Then I use data = fromJSON(rawToChar(res$content)) to parse the table to characters. The result I got:
> data
$columns
[1] "DATETIME" "TIMESTAMP" "RECORD" "RunType"
[5] "Vapor" "Concentration" "PeakHeight" "PeakLocation"
[9] "CalFactor" "ColTemp" "CarrDuty" "Baseline"
[13] "Ver" "RunCount" "Status" "Latitude"
[17] "Longitude" "WS_ms" "WS_avg" "WindDir"
[21] "Battery_Voltage" "EnclosureTemp" "EngineTemp" "AlarmLevel"
$data
[,1] [,2] [,3] [,4] [,5]
[1,] "2021-09-08T17:26:17Z" "2021-09-08 11:26:17-0600" "3776" "MEAS" "BENZENE"
[2,] "2021-09-08T17:36:17Z" "2021-09-08 11:36:17-0600" "3777" "MEAS" "BENZENE"
[3,] "2021-09-08T17:46:17Z" "2021-09-08 11:46:17-0600" "3778" "MEAS" "BENZENE"
[4,] "2021-09-08T17:56:17Z" "2021-09-08 11:56:17-0600" "3779" "MEAS" "BENZENE"
[5,] "2021-09-08T18:06:25Z" "2021-09-08 12:06:25-0600" "3780" "CAL" "BENZENE"
[6,] "2021-09-08T18:16:17Z" "2021-09-08 12:16:17-0600" "3781" "MEAS" "BENZENE"
[7,] "2021-09-08T18:26:17Z" "2021-09-08 12:26:17-0600" "3782" "MEAS" "BENZENE"
[8,] "2021-09-08T18:36:17Z" "2021-09-08 12:36:17-0600" "3783" "MEAS" "BENZENE"
[9,] "2021-09-08T18:46:17Z" "2021-09-08 12:46:17-0600" "3784" "MEAS" "BENZENE"
[10,] "2021-09-08T18:56:17Z" "2021-09-08 12:56:17-0600" "3785" "MEAS" "BENZENE"
[11,] "2021-09-08T19:06:17Z" "2021-09-08 13:06:17-0600" "3786" "MEAS" "BENZENE"
[12,] "2021-09-08T19:16:17Z" "2021-09-08 13:16:17-0600" "3787" "MEAS" "BENZENE"
[13,] "2021-09-08T19:26:17Z" "2021-09-08 13:26:17-0600" "3788" "MEAS" "BENZENE"
[14,] "2021-09-08T19:36:17Z" "2021-09-08 13:36:17-0600" "3789" "MEAS" "BENZENE"
[15,] "2021-09-08T19:46:17Z" "2021-09-08 13:46:17-0600" "3790" "MEAS" "BENZENE"
[16,] "2021-09-08T19:56:16Z" "2021-09-08 13:56:16-0600" "3791" "MEAS" "BENZENE"
[17,] "2021-09-08T20:06:16Z" "2021-09-08 14:06:16-0600" "3792" "MEAS" "BENZENE"
[18,] "2021-09-08T20:16:16Z" "2021-09-08 14:16:16-0600" "3793" "MEAS" "BENZENE"
[19,] "2021-09-08T20:26:16Z" "2021-09-08 14:26:16-0600" "3794" "MEAS" "BENZENE"
[20,] "2021-09-08T20:36:16Z" "2021-09-08 14:36:16-0600" "3795" "MEAS" "BENZENE"
[21,] "2021-09-08T20:46:16Z" "2021-09-08 14:46:16-0600" "3796" "MEAS" "BENZENE"
[22,] "2021-09-08T20:56:15Z" "2021-09-08 14:56:15-0600" "3797" "MEAS" "BENZENE"
[23,] "2021-09-08T21:06:16Z" "2021-09-08 15:06:16-0600" "3798" "MEAS" "BENZENE"
[24,] "2021-09-08T21:16:16Z" "2021-09-08 15:16:16-0600" "3799" "MEAS" "BENZENE"
[25,] "2021-09-08T21:26:16Z" "2021-09-08 15:26:16-0600" "3800" "MEAS" "BENZENE"
[26,] "2021-09-08T21:36:16Z" "2021-09-08 15:36:16-0600" "3801" "MEAS" "BENZENE"
[27,] "2021-09-08T21:46:16Z" "2021-09-08 15:46:16-0600" "3802" "MEAS" "BENZENE"
[28,] "2021-09-08T21:56:16Z" "2021-09-08 15:56:16-0600" "3803" "MEAS" "BENZENE"
[29,] "2021-09-08T22:06:16Z" "2021-09-08 16:06:16-0600" "3804" "MEAS" "BENZENE"
[30,] "2021-09-08T22:16:16Z" "2021-09-08 16:16:16-0600" "3805" "MEAS" "BENZENE"
[31,] "2021-09-08T22:26:16Z" "2021-09-08 16:26:16-0600" "3806" "MEAS" "BENZENE"
[32,] "2021-09-08T22:36:16Z" "2021-09-08 16:36:16-0600" "3807" "MEAS" "BENZENE"
[33,] "2021-09-08T22:46:16Z" "2021-09-08 16:46:16-0600" "3808" "MEAS" "BENZENE"
[34,] "2021-09-08T22:56:17Z" "2021-09-08 16:56:17-0600" "3809" "MEAS" "BENZENE"
[35,] "2021-09-08T23:06:17Z" "2021-09-08 17:06:17-0600" "3810" "MEAS" "BENZENE"
[36,] "2021-09-08T23:16:17Z" "2021-09-08 17:16:17-0600" "3811" "MEAS" "BENZENE"
[37,] "2021-09-08T23:26:17Z" "2021-09-08 17:26:17-0600" "3812" "MEAS" "BENZENE"
[38,] "2021-09-08T23:36:17Z" "2021-09-08 17:36:17-0600" "3813" "MEAS" "BENZENE"
[39,] "2021-09-08T23:46:17Z" "2021-09-08 17:46:17-0600" "3814" "MEAS" "BENZENE"
[40,] "2021-09-08T23:56:17Z" "2021-09-08 17:56:17-0600" "3815" "MEAS" "BENZENE"
[41,] "2021-09-09T00:06:25Z" "2021-09-08 18:06:25-0600" "3816" "CAL" "BENZENE"
[,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
[1,] "0.25" "1767" "185" "1.179" "52.4" "0.08" "130688" "3.04" "834"
[2,] "0.25" "1739" "183" "1.179" "52.2" "0.08" "130976" "3.04" "835"
[3,] "0.26" "2190" "185" "1.179" "51.9" "0.08" "131464" "3.04" "836"
[4,] "0.25" "1837" "185" "1.179" "52.2" "0.08" "131896" "3.04" "837"
[5,] "9.08" "186269" "183" "1.187" "52.4" "0.08" "132304" "3.04" "838"
[6,] "0.31" "3393" "184" "1.187" "51.7" "0.08" "133184" "3.04" "839"
[7,] "0.28" "2653" "185" "1.187" "52.1" "0.08" "133476" "3.04" "840"
[8,] "0.26" "2120" "185" "1.187" "52.4" "0.08" "133900" "3.04" "841"
[9,] "0.27" "2260" "185" "1.187" "51.8" "0.08" "134192" "3.04" "842"
[10,] "0.26" "2178" "185" "1.187" "52" "0.08" "134608" "3.04" "843"
[11,] "0.25" "1868" "185" "1.187" "52.3" "0.08" "135092" "3.04" "844"
[12,] "0.24" "1654" "186" "1.187" "52.4" "0.08" "135496" "3.04" "845"
[13,] "0.27" "2326" "184" "1.187" "51.8" "0.08" "135932" "3.04" "846"
[14,] "0.26" "1963" "185" "1.187" "52.1" "0.08" "136396" "3.04" "847"
[15,] "0.25" "1829" "185" "1.187" "52.5" "0.08" "136764" "3.04" "848"
[16,] "0.26" "2110" "185" "1.187" "51.8" "0.08" "137104" "3.04" "849"
[17,] "0.3" "3114" "184" "1.187" "52" "0.08" "137488" "3.04" "850"
[18,] "0.24" "1557" "186" "1.187" "52.3" "0.08" "137952" "3.04" "851"
[19,] "0.23" "1237" "186" "1.187" "52.4" "0.08" "138312" "3.04" "852"
[20,] "0.27" "2390" "184" "1.187" "51.9" "0.08" "138736" "3.04" "853"
[,15] [,16] [,17] [,18] [,19] [,20]
[1,] "OK " "32.8518183333" "-104.39718" "7.664" "5.334" "23.63"
[2,] "OK " "32.8518233333" "-104.3971816667" "6.664" "4.847" "353.5"
[3,] "OK " "32.851825" "-104.39718" "2.999" "4.805" "352.8"
[4,] "OK " "32.8518133333" "-104.3971783333" "4.165" "4.786" "0.408"
[5,] "OK CAL PASS" "32.8518116667" "-104.397185" "4.831" "4.522" "12.97"
[6,] "OK " "32.8518083333" "-104.397185" "3.165" "4.269" "27.72"
[7,] "OK " "32.8518066667" "-104.3971766667" "3.165" "4.182" "0.085"
[8,] "OK " "32.8518083333" "-104.397175" "2.832" "4.059" "9.05"
[9,] "OK " "32.8518183333" "-104.3971783333" "3.665" "3.597" "53.43"
[10,] "OK " "32.851825" "-104.3971783333" "4.498" "4.291" "40.63"
[11,] "OK " "32.851825" "-104.397175" "2.832" "3.158" "32.96"
[12,] "OK " "32.8518266667" "-104.397175" "4.332" "3.4" "35.28"
[13,] "OK " "32.8518383333" "-104.3971716667" "2.332" "4.121" "343.1"
[14,] "OK " "32.851845" "-104.3971716667" "3.332" "2.911" "25.6"
[15,] "OK " "32.8518416667" "-104.3971683333" "3.499" "1.802" "8.47"
[16,] "OK " "32.85184" "-104.3971683333" "1.333" "3.036" "10.12"
[17,] "OK " "32.8518333333" "-104.3971733333" "2.499" "2.767" "7.565"
[18,] "OK " "32.85183" "-104.397175" "1.499" "2.822" "35.21"
[19,] "OK " "32.851825" "-104.3971716667" "1.999" "2.591" "28"
[20,] "OK " "32.8518216667" "-104.3971716667" "0.666" "1.631" "352.4"
[,21] [,22] [,23] [,24]
[1,] "12.24" "31.5" "33.33" "NONE"
[2,] "12.23" "31.91" "33.25" "NONE"
[3,] "12.25" "32.34" "33.24" "NONE"
[4,] "12.15" "32.87" "33.38" "NONE"
[5,] "12.12" "33.09" "33.42" "NONE"
[6,] "12.22" "33.43" "33.34" "NONE"
[7,] "12.04" "33.92" "33.41" "NONE"
[8,] "12.01" "34.46" "33.48" "NONE"
[9,] "12.02" "34.67" "33.48" "NONE"
[10,] "12.16" "34.88" "33.52" "NONE"
[11,] "12.05" "35.04" "33.46" "NONE"
[12,] "12.21" "35.41" "33.53" "NONE"
[13,] "12.23" "35.6" "33.48" "NONE"
[14,] "12.28" "35.86" "33.45" "NONE"
[15,] "11.89" "36.27" "33.57" "NONE"
[16,] "11.8" "36.47" "33.61" "NONE"
[17,] "11.9" "36.63" "33.66" "NONE"
[18,] "12.05" "36.61" "33.6" "NONE"
[19,] "11.89" "37" "33.61" "NONE"
[20,] "12.09" "37.26" "33.63" "NONE"
[ reached getOption("max.print") -- omitted 104 rows ]
I want to plot the data out and do some analysis but never got this data into a dataframe. Can someone help me out? Thank you in advance!
setNames(as.data.frame(XYZ$data), XYZ$columns)
# DATETIME TIMESTAMP RECORD RunType Vapor
# 1 2021-09-08T17:26:17Z 2021-09-08 11:26:17-0600 3776 MEAS BENZENE
# 2 2021-09-08T17:36:17Z 2021-09-08 11:36:17-0600 3777 MEAS BENZENE
# 3 2021-09-08T17:46:17Z 2021-09-08 11:46:17-0600 3778 MEAS BENZENE
Abridged sample data:
XYZ <- list(columns = c("DATETIME", "TIMESTAMP", "RECORD", "RunType", "Vapor"), data = structure(c("2021-09-08T17:26:17Z", "2021-09-08T17:36:17Z", "2021-09-08T17:46:17Z", "2021-09-08 11:26:17-0600", "2021-09-08 11:36:17-0600", "2021-09-08 11:46:17-0600", "3776", "3777", "3778", "MEAS", "MEAS", "MEAS", "BENZENE", "BENZENE", "BENZENE"), .Dim = c(3L, 5L)))

How do I calculate perimeter of xy points in R?

I am looking to calculate the perimeter of xy points (including all the points in sequential order), where the first and last point aren't always equal. I can't figure out why the code below for calculating perimeter isn't working.
Ideally, I would like a method which flags when the first and last xy points are different.
Thanks
m
x y
[1,] 606.3 95.4
[2,] 612.4 178.7
[3,] 610.2 222.6
[4,] 610.2 222.8
[5,] 625.8 249.8
[6,] 625.8 250.1
[7,] 633.9 268.9
[8,] 668.7 272.2
[9,] 693.7 222.6
[10,] 723.2 157.0
[11,] 738.6 109.9
[12,] 681.2 90.5
[13,] 606.3 95.4
[14,] 833.3 154.6
[15,] 753.7 267.5
[16,] 747.8 305.1
[17,] 773.8 354.7
[18,] 767.0 393.8
[19,] 763.0 442.0
[20,] 817.4 446.9
[21,] 817.6 446.9
[22,] 840.2 412.3
[23,] 892.1 317.7
[24,] 875.3 218.8
[25,] 833.3 154.6
library(geosphere)
perimeter(m)
**Error in perimeter(m) : could not find function "perimeter"**
This will do the trick..
sample data
library( data.table )
m <- fread("x y
606.3 95.4
612.4 178.7
610.2 222.6
610.2 222.8
625.8 249.8
625.8 250.1
633.9 268.9
668.7 272.2
693.7 222.6
723.2 157.0
738.6 109.9
681.2 90.5
606.3 95.4
833.3 154.6
753.7 267.5
747.8 305.1
773.8 354.7
767.0 393.8
763.0 442.0
817.4 446.9
817.6 446.9
840.2 412.3
892.1 317.7
875.3 218.8
833.3 154.6")
code
library( grDevices )
hull <- chull( m ) #create coordinates of convex hull
coords <- m[ c( hull, hull[1]), ] # create a closed perimeter-polygon
#plot it
plot(m) #plot points
lines(coords, col="red") #plot perimeter

List to data frame while keeping structure

str(coord_mat)
List of 1
$ :List of 1
..$ : num [1:17, 1:2] -122 -122 -122 -122 -122 ...
I have list of coordinate pairs in coord_mat that I would like to transform in to data frame (or matrix) of coordinate pairs in the same structure(first column being lon, the second being lat).
> coord_mat
[[1]]
[[1]][[1]]
[,1] [,2]
[1,] -122.3435 47.63787
[2,] -122.3435 47.63787
[3,] -122.3434 47.63787
[4,] -122.3434 47.63787
[5,] -122.3434 47.63787
[6,] -122.3434 47.63787
[7,] -122.3434 47.63787
[8,] -122.3434 47.63784
[9,] -122.3433 47.63777
[10,] -122.3430 47.63772
[11,] -122.3427 47.63778
[12,] -122.3425 47.63776
[13,] -122.3423 47.63749
[14,] -122.3421 47.63718
[15,] -122.3420 47.63700
[16,] -122.3419 47.63698
[17,] -122.3419 47.63698
How is this possible in R while keeping the same double column structure as the list?
I have tried matrix(unlist(coord_mat)), but this just produces a long vector of length 34 with the lon values first then the lat values. Is it because I am working with a list of lists?
> matrix(unlist(coord_mat))
[,1]
[1,] -122.34345
[2,] -122.34345
[3,] -122.34340
[4,] -122.34340
[5,] -122.34340
[6,] -122.34340
[7,] -122.34340
[8,] -122.34338
[9,] -122.34334
[10,] -122.34299
[11,] -122.34273
[12,] -122.34249
[13,] -122.34230
[14,] -122.34208
[15,] -122.34198
[16,] -122.34194
[17,] -122.34194
[18,] 47.63787
[19,] 47.63787
[20,] 47.63787
[21,] 47.63787
[22,] 47.63787
[23,] 47.63787
[24,] 47.63787
[25,] 47.63784
[26,] 47.63777
[27,] 47.63772
[28,] 47.63778
[29,] 47.63776
[30,] 47.63749
[31,] 47.63718
[32,] 47.63700
[33,] 47.63698
[34,] 47.63698
Here is the data:
dput(coord_mat)
list(list(structure(c(-122.34345, -122.34345, -122.343398333333,
-122.343398333333, -122.343398333333, -122.343398333333, -122.343398333333,
-122.343376666667, -122.34334, -122.342991666667, -122.342731666667,
-122.342491666667, -122.3423, -122.342081666667, -122.341983333333,
-122.341943333333, -122.341943333333, 47.6378716666667, 47.6378716666667,
47.6378683333333, 47.6378683333333, 47.6378683333333, 47.6378683333333,
47.6378683333333, 47.637835, 47.637775, 47.6377183333333, 47.63778,
47.63776, 47.6374916666667, 47.6371816666667, 47.6369966666667,
47.6369783333333, 47.6369783333333), .Dim = c(17L, 2L))))
res <- coord_mat[[c(1, 1)]]
# or
res <- matrix(unlist(coord_mat), ncol = 2)
colnames(res) <- c("lon", "lat")
res
lon lat
[1,] -122.3435 47.63787
[2,] -122.3435 47.63787
[3,] -122.3434 47.63787
[4,] -122.3434 47.63787
[5,] -122.3434 47.63787
[6,] -122.3434 47.63787
[7,] -122.3434 47.63787
[8,] -122.3434 47.63784
[9,] -122.3433 47.63777
[10,] -122.3430 47.63772
[11,] -122.3427 47.63778
[12,] -122.3425 47.63776
[13,] -122.3423 47.63749
[14,] -122.3421 47.63718
[15,] -122.3420 47.63700
[16,] -122.3419 47.63698
[17,] -122.3419 47.63698

Principal component analysis (princomp, principal, etc.) on a 3D array

I have used PCA on 2D arrays before, and I use the first PC score vector that best best describes the variance of all the other columns in analyses. Below is a R example that shows the Comp.1 vector that would best describe the variance of the 2D array of interest.
data <- array(data=sample(12), c(4,3))
data
[,1] [,2] [,3]
[1,] 11 2 12
[2,] 4 3 10
[3,] 8 7 1
[4,] 6 9 5
output=princomp(data)
output$scores
Comp.1 Comp.2 Comp.3
[1,] 6.422813 2.865390 0.4025040
[2,] 3.251842 -3.617633 -0.9814571
[3,] -5.856500 1.848419 -1.3819379
[4,] -3.818155 -1.096176 1.9608909
My question is how can I do this same procedure on a 3D array? For example, if I have an array that the size is 4 x 5 x 3 how could I get the 4 x 5 2D array that is equivalent to the Comp.1 vector found above?
I have provided an R example below with code and outputs. When I look at the scores it only outputs one component (not 3 as expected), and the length is 60. Does that mean that the first 20 elements correspond to the first PC, the next 20 to the 2nd PC, and the last 20 to the 3rd PC? If so how does princomp arrange the entries, so I can get back to the original 4 x 5 2D array using the first 20 elements (1st PC)? Thank you for your assistance.
data=array(data=sample(48), c(4,5,3))
data
, , 1
[,1] [,2] [,3] [,4] [,5]
[1,] 47 21 45 41 34
[2,] 1 16 32 31 37
[3,] 39 8 35 10 6
[4,] 48 14 25 3 11
, , 2
[,1] [,2] [,3] [,4] [,5]
[1,] 12 43 15 36 23
[2,] 17 4 7 26 46
[3,] 2 13 33 20 40
[4,] 18 19 28 44 38
, , 3
[,1] [,2] [,3] [,4] [,5]
[1,] 42 24 47 21 45
[2,] 5 22 1 16 32
[3,] 30 29 39 8 35
[4,] 27 9 48 14 25
output=princomp(data)
output$scores
Comp.1
[1,] 21.8833333
[2,] -24.1166667
[3,] 13.8833333
[4,] 22.8833333
[5,] -4.1166667
[6,] -9.1166667
[7,] -17.1166667
[8,] -11.1166667
[9,] 19.8833333
[10,] 6.8833333
[11,] 9.8833333
[12,] -0.1166667
[13,] 15.8833333
[14,] 5.8833333
[15,] -15.1166667
[16,] -22.1166667
[17,] 8.8833333
[18,] 11.8833333
[19,] -19.1166667
[20,] -14.1166667
[21,] -13.1166667
[22,] -8.1166667
[23,] -23.1166667
[24,] -7.1166667
[25,] 17.8833333
[26,] -21.1166667
[27,] -12.1166667
[28,] -6.1166667
[29,] -10.1166667
[30,] -18.1166667
[31,] 7.8833333
[32,] 2.8833333
[33,] 10.8833333
[34,] 0.8833333
[35,] -5.1166667
[36,] 18.8833333
[37,] -2.1166667
[38,] 20.8833333
[39,] 14.8833333
[40,] 12.8833333
[41,] 16.8833333
[42,] -20.1166667
[43,] 4.8833333
[44,] 1.8833333
[45,] -1.1166667
[46,] -3.1166667
[47,] 3.8833333
[48,] -16.1166667
[49,] 21.8833333
[50,] -24.1166667
[51,] 13.8833333
[52,] 22.8833333
[53,] -4.1166667
[54,] -9.1166667
[55,] -17.1166667
[56,] -11.1166667
[57,] 19.8833333
[58,] 6.8833333
[59,] 9.8833333
[60,] -0.1166667

How to revert back scalling of data in R?

I had a data set with data like this:
value
[1,] 41601325
[2,] 54917632
[3,] 64616616
[4,] 90791277
[5,] 35335221
[6,] .
. .
. .
which I had to scale down to range [0,1] using
apply(data1, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X)))
as I needed to fit the data in GP_fit() of GPfit package. The scaled down values became say:
value
[1,] .4535
[2,] .56355
[3,] .64616
[4,] .70791
[5,] .35563
[6,] .
. .
. .
After I applied GP_fit() on the scaled data and used predict() and as output I got the new values which again are in range[0,1] like:
value
[1,] .0135
[2,] .234355
[3,] .6716
[4,] .325079
[5,] .95563
[6,] .
. .
. .
but I want to take these back to the original range. How can I do that?
Basically I want to revert back/ return to original format for showing the output of predict()
NOTE: The original range is not fixed and can vary but normally the maximum value possible is about 20 million .
UPDATE: I tired to implement the code written by #JustinFletcher. My data was :
value
[1,] 54.2
[2,] 53.8
[3,] 53.9
[4,] 53.8
[5,] 54.9
[6,] 55.0
[7,] 38.5
[8,] 38.0
[9,] 38.1
[10,] 38.0
[11,] 38.8
[12,] 38.9
[13,] 24.3
[14,] 24.1
[15,] 24.3
[16,] 24.1
[17,] 24.4
[18,] 24.4
[19,] 57.3
[20,] 57.2
[21,] 57.6
[22,] 57.7
[23,] 58.1
[24,] 57.9
I wrote this to rescale it in range [0,1]:
data_new <- apply(data_test, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X)))
and I got
value
[1,] 0.885294118
[2,] 0.873529412
[3,] 0.876470588
[4,] 0.873529412
[5,] 0.905882353
[6,] 0.908823529
[7,] 0.423529412
[8,] 0.408823529
[9,] 0.411764706
[10,] 0.408823529
[11,] 0.432352941
[12,] 0.435294118
[13,] 0.005882353
[14,] 0.000000000
[15,] 0.005882353
[16,] 0.000000000
[17,] 0.008823529
[18,] 0.008823529
[19,] 0.976470588
[20,] 0.973529412
[21,] 0.985294118
[22,] 0.988235294
[23,] 1.000000000
[24,] 0.994117647
then to revert it back to original scale I wrote this:
data_revert <- apply(data_new, MARGIN = 2, FUN = function(X, Y) (X + min(Y))*diff(range(Y)), Y=data_test)
and I got
value
[1,] 849.5
[2,] 849.1
[3,] 849.2
[4,] 849.1
[5,] 850.2
[6,] 850.3
[7,] 833.8
[8,] 833.3
[9,] 833.4
[10,] 833.3
[11,] 834.1
[12,] 834.2
[13,] 819.6
[14,] 819.4
[15,] 819.6
[16,] 819.4
[17,] 819.7
[18,] 819.7
[19,] 852.6
[20,] 852.5
[21,] 852.9
[22,] 853.0
[23,] 853.4
[24,] 853.2
This output is not correct.
This is simple algebra. To scale data, you calculate
n = (e - e_min)/(e_max - e_min)
Now you need back e, based on arbitrary e_min and e_max. It is trivial to show that
n(e_max - e_min) + e_min = e
Example:
e <- 1:10
n <- (e - min(e))/(max(e) - min(e))
new.e <- (n*(10-1)) + 1
> all(e == new.e)
[1] TRUE
You just need to apply the inverse of the function FUN to the output data. This requires the original data be passed to the function.
apply(dataOutput, MARGIN = 2, FUN = function(X, Y) (X + min(Y))*diff(range(Y)), Y=data1)
For a great description of the 'apply' function, see here.
P.S.: Romans response is exactly the same idea, I just implemented it with your variables, using apply, because I thought it was interesting.

Resources