Hi I am working with a table in r. The first column consists of the date(monthly) and the following columns contain different return data on several portfolios. I downloaded the package PerformanceAnalytics and therefore I need this data to be read a time series.
This is what I tried to do. It has worked with a sheet before. But now I always get this error. But I only changed the return data nothing else. I dont understand why it wont read the date correctly.
> library(PerformanceAnalytics)
Loading required package: xts
Loading required package: zoo
Attaching package: ‘zoo’
The following objects are masked from ‘package:base’:
as.Date, as.Date.numeric
Package PerformanceAnalytics (1.4.3541) loaded.
Copyright (c) 2004-2014 Peter Carl and Brian G. Peterson, GPL-2 | GPL-3
http://r-forge.r-project.org/projects/returnanalytics/
> #load file into R
> FactorR <- read.table("~/Desktop/Rfiles/FactorRegression.csv",header=TRUE,sep=";")
>
> #Time Series (first column date)
> FactorR_xts <- xts(x = FactorR[, -1],order.by = as.Date(FactorR$Date))
Error in charToDate(x) :
character string is not in a standard unambiguous format
I attached the dput function so you can see what kind of data I am talking about.(I did not include all data because it would be too much)
put(FactorR)
structure(list(Date = structure(c(203L, 55L, 5L, 142L, 70L, 35L,
85L, 167L, 178L, 102L, 105L, 116L, 204L, 26L, 2L, 143L, 71L,
9L, 145L, 36L, 157L, 169L, 19L, 181L, 107L, 192L, 122L, 7L,
30L, 60L, 146L, 17L, 158L, 90L, 92L, 182L, 49L, 193L, 123L, 8L,
133L, 61L, 72L, 76L, 159L, 41L, 93L, 183L, 22L, 194L, 53L, 3L,
134L, 62L, 147L, 77L, 87L, 170L, 94L, 46L, 108L, 195L, 124L,
9L, 135L, 32L, 148L, 78L, 39L, 171L, 95L, 184L, 109L, 118L, 125L,
10L, 136L, 16L, 149L, 79L, 160L, 172L, 45L, 185L, 110L, 52L,
126L, 11L, 57L, 63L, 150L, 37L, 161L, 173L, 20L, 186L, 111L,
196L, 127L, 28L, 137L, 64L, 73L, 80L, 162L, 42L, 96L, 187L, 23L,
197L, 54L, 4L, 138L, 65L, 34L, 81L, 163L, 174L, 97L, 104L, 112L,
198L, 25L, 1L, 139L, 66L, 151L, 82L, 88L, 175L, 98L, 47L, 113L,
199L, 128L, 12L, 140L, 33L, 152L, 83L, 40L, 176L, 99L, 188L,
114L, 119L, 129L, 29L, 58L, 67L, 153L, 38L, 164L, 177L, 21L,
189L, 115L, 200L, 130L, 13L, 31L, 68L, 154L, 18L, 165L, 91L,
100L, 190L, 50L, 201L, 131L, 14L, 141L, 69L, 74L, 84L, 166L,
43L, 101L, 191L, 24L, 202L), .Label = c("26.02.10", "26.02.99",
"27.02.04", "27.02.09", "27.02.98", "28.02.01", "28.02.02", "28.02.03",
"28.02.05", "28.02.06", "28.02.07", "28.02.11", "28.02.13", "28.02.14",
"28.04.00", "28.04.06", "28.06.02", "28.06.13", "28.09.01", "28.09.07",
"28.09.12", "28.11.03", "28.11.08", "28.11.14", "29.01.10", "29.01.99",
"29.02.00", "29.02.08", "29.02.12", "29.03.02", "29.03.13", "29.04.05",
"29.04.11", "29.05.09", "29.05.98", "29.06.01", "29.06.07", "29.06.12",
"29.07.05", "29.07.11", "29.08.03", "29.08.08", "29.08.14", "29.09.00",
"29.09.06", "29.10.04", "29.10.10", "29.10.99", "29.11.02", "29.11.13",
"29.12.00", "29.12.06", "30.01.04", "30.01.09", "30.01.98", "30.03.01",
"30.03.07", "30.03.12", "30.04.01", "30.04.02", "30.04.03", "30.04.04",
"30.04.07", "30.04.08", "30.04.09", "30.04.10", "30.04.12", "30.04.13",
"30.04.14", "30.04.98", "30.04.99", "30.05.03", "30.05.08", "30.05.14",
"30.06.00", "30.06.03", "30.06.04", "30.06.05", "30.06.06", "30.06.08",
"30.06.09", "30.06.10", "30.06.11", "30.06.14", "30.06.98", "30.06.99",
"30.07.04", "30.07.10", "30.07.99", "30.08.02", "30.08.13", "30.09.02",
"30.09.03", "30.09.04", "30.09.05", "30.09.08", "30.09.09", "30.09.10",
"30.09.11", "30.09.13", "30.09.14", "30.09.98", "30.09.99", "30.10.09",
"30.10.98", "30.11.00", "30.11.01", "30.11.04", "30.11.05", "30.11.06",
"30.11.07", "30.11.09", "30.11.10", "30.11.11", "30.11.12", "30.11.98",
"30.11.99", "30.12.05", "30.12.11", "31.01.00", "31.01.01", "31.01.02",
"31.01.03", "31.01.05", "31.01.06", "31.01.07", "31.01.08", "31.01.11",
"31.01.12", "31.01.13", "31.01.14", "31.03.00", "31.03.03", "31.03.04",
"31.03.05", "31.03.06", "31.03.08", "31.03.09", "31.03.10", "31.03.11",
"31.03.14", "31.03.98", "31.03.99", "31.05.00", "31.05.01", "31.05.02",
"31.05.04", "31.05.05", "31.05.06", "31.05.07", "31.05.10", "31.05.11",
"31.05.12", "31.05.13", "31.05.99", "31.07.00", "31.07.01", "31.07.02",
"31.07.03", "31.07.06", "31.07.07", "31.07.08", "31.07.09", "31.07.12",
"31.07.13", "31.07.14", "31.07.98", "31.08.00", "31.08.01", "31.08.04",
"31.08.05", "31.08.06", "31.08.07", "31.08.09", "31.08.10", "31.08.11",
"31.08.12", "31.08.98", "31.08.99", "31.10.00", "31.10.01", "31.10.02",
"31.10.03", "31.10.05", "31.10.06", "31.10.07", "31.10.08", "31.10.11",
"31.10.12", "31.10.13", "31.10.14", "31.12.01", "31.12.02", "31.12.03",
"31.12.04", "31.12.07", "31.12.08", "31.12.09", "31.12.10", "31.12.12",
"31.12.13", "31.12.14", "31.12.97", "31.12.98", "31.12.99"), class = "factor"),
T1V = c(2.647778077, 2.210168532, 5.184543047, 8.040141376,
1.375197787, 5.254693278, 0.238583717, -0.897572167, -6.812178155,
-4.904778447, 1.445454477, 4.362544312, 0.577758687, -1.049345994,
-0.862978469, 1.496311077, 1.535298083, 0.288034989, 1.002503645,
-0.677737904, 1.148733333, -0.068879397, -0.933636437, 1.952957927,
0.864593373, 0.69587105, 1.566383785, 0.201725025, 0.108433102,
1.121251221, 0.697840536, -0.341798507, 1.750353464, -0.336236355,
-0.173630687, 0.405227621, 0.407442779, 0.301534209, -0.252288427,
-2.197112455, 0.4182172, 2.417270431, -1.777693712, 0.333608117,
-0.963997684, -6.639419411, 0.258711011, 0.186660625, 1.075364953,
-0.260546877, -0.144517713, 2.614703924, 1.592532166, 0.247679225,
-2.45731793, -4.605964615, -0.051317674, -2.162348318, -2.094287999,
1.053871887, 0.775032852, -2.409925349, -1.24731202, 0.20137383,
2.9796142, 1.18379607, 0.530516718, 0.687770774, 2.425813597,
1.070508498, 1.594988715, 2.577337728, 1.735724627, 4.753962343,
1.817757107, 0.287317513, 2.122250222, 0.509726992, 1.623651005,
-0.629218412, 1.413071621, 1.466153048, -0.032322501, 1.570878067,
2.495539535, 4.669928369, 2.540314459, 1.351671444, -0.511289999,
.
..
....
....
1.637709345, 0.949670725, -0.380310863, -1.434786801, 0.546588731,
-1.680930574, -1.497671033, 2.134405674, 0.189844698), T3R = c(0.440505512,
5.325647834, 8.837385281, 21.10071908, 4.5326005, 6.606732343,
-4.488433652, -1.304513421, -27.57526532, -19.22941607, 13.12560656,
10.95535151, -2.960696646, -1.282931055, -4.047714673, 4.325802659,
13.34806221, -3.940632325, 2.668465326, -2.035239493, 2.265868534,
2.901646772, 1.555938816, 8.725598107, 11.1111256, 15.10307892,
10.71764649, -1.860936247, -3.235221339, -0.718662895, 2.928862379,
1.567574208, 0.098434872, -2.639317291, -4.334738565, -7.662240412,
-1.392672778, -0.249440069, -7.519374824, -12.54244192, 3.211494367,
-1.798924417, -9.750103402, -17.47336517, -13.59092267, -30.85835803,
6.627120118, 13.84521564, 1.224167247, -4.282226202, -3.879824851,
11.0002882, -1.633862571, 0.728697276, -15.20216478, -21.43439457,
-9.173494124, -27.72510655, -1.643806123, 15.30080078, -11.42185815,
-10.86780424, -10.08529262, 0.158622664, 19.07560852, 4.410459583,
6.983702045, 9.726738752, 11.96532368, 0.865241128, 10.52710826,
1.824183803, 0.051281172, 7.643560265, 3.857934445, -4.269747269,
0.193491252, -1.127403274, -1.145642636, -4.336023223, -4.750288798,
1.386568693, -3.058304715, 3.87811701, 6.007778471, 6.972611825,
7.139746344, 4.366307305, -4.231872029, 0.465995363, 3.370806119,
6.055047349, 1.589337466, 6.641594709, -5.834167246, 0.500189653,
3.001936466, 5.665564573, 6.219235151, 4.696735739, 3.597032279,
-6.95415108, -2.658694701, 0.700309545, 3.870252718, 4.059903633,
4.129877722, 2.850231626, 6.026897131, 11.42913672, -1.40600749,
4.68987461, 6.138984252, 0.859683472, -0.783511946, -2.061859604,
-7.537614888, -3.971992672, 2.743416779, -13.26388813, 1.902781239,
-21.73358064, 5.433251961, -6.426065721, 5.500056238, 1.813441355,
-11.11515726, -5.234823589, 2.582946217, -16.67855167, -36.66711169,
-12.46637364, -5.211445441, -8.572591139, -17.88276043, 2.956958358,
25.59635755, 9.043196394, -1.052072638, 8.698101054, 11.55426061,
6.544403365, -4.495701412, -3.156245124, -1.293693294, 5.803543849,
-0.762197087, 8.000348105, 2.646959488, -12.09434448, -2.563082034,
1.466128125, -1.863374559, 4.699135454, 3.622459782, -1.706221195,
4.038651722, 2.817603386, 1.027156327, -1.486388335, 0.168641413,
-3.888501653, -9.915080583, -11.88374941, -13.56634471, -10.51374661,
3.846951996, -11.50943308, 2.074359943, 7.548294859, 6.711539857,
1.806850477, -0.576496993, -9.21065397, -4.154519223, 3.525193617,
-0.24777096, 3.601168094, 0.143557195, -6.368196817, 5.231960646,
6.810400741, 3.672507394, -2.556477674, -2.869519924, 4.479135652,
-5.380429829, 1.713023169, 3.396652152, 4.922622663, 4.040155598,
1.512006061, 0.24907751, 4.496251525, 0.92375895, -0.774870584,
-3.784012139, 5.614058853, 5.327086162, -0.706470295, 0.771043886,
-4.377376587, -2.491251246, 3.172560156, -2.082216546)), .Names = c("Date",
"T1V", "T2V", "T3V", "T1MV", "T2MV", "T3MV", "T1BTM", "T2BTM",
"T3BTM", "T1MOM", "T2MOM", "T3MOM", "Rm", "SMB", "HML", "MOM",
"T1R", "T2R", "T3R"), class = "data.frame", row.names = c(NA,
-205L))
I would be very happy if anyone could help me.
You need to specify your date format (see ?as.Date):
dates <- c("26.02.10", "26.02.99", "27.02.04", "27.02.09", "27.02.98", "28.02.01", "28.02.02", "28.02.03")
as.Date(dates, "%d.%m.%y")
Related
I want to replace the row names of meth.kirp.cpg with anno$V1 if the existing row names matches anno$V2.
library(tidyverse)
rownames(meth.kirp.cpg) <- meth.kirp.cpg %>%
rowwise() %>%
mutate(out = anno$V1[str_which(colnames(meth.kirp.cpg), anno$V2)])
Traceback:
Error in `mutate()`:
ℹ In argument: `out = anno$V1[str_which(rownames(meth.kirp.cpg),
anno$V2)]`.
ℹ In row 1.
Caused by error in `str_detect()`:
! Can't recycle `string` (size 142513) to match `pattern` (size 365860).
Run `rlang::last_error()` to see where the error occurred.
Example data:
meth.kirp.cpg
> dput(meth.kirp.cpg[1:100,1:2])
structure(list(TCGA.2K.A9WE.01A = c(0.461440642939772, 0.143910373119058,
0.847164847154162, 0.737361955793681, 0.716794733144112, 0.351877113536983,
0.248986769373366, 0.0121360989202765, 0.876303885229884, 0.944311384947134,
0.0490407302658151, 0.0200484962577958, 0.0623434271852525, 0.489865398138095,
0.920994933496615, 0.92663932531651, 0.0149191766670711, 0.884749685210921,
0.446591784140497, 0.91113228700911, 0.912199953863369, 0.908167409366654,
0.386721526377863, 0.0386737340626713, 0.0347492896507038, 0.98309370597552,
0.0176080612232509, 0.91878387167279, 0.743683318738873, 0.939148492241393,
0.722471943330892, 0.613143449419421, 0.0111202783577944, 0.843823786705695,
0.836431557867031, 0.390282953982417, 0.027408710286304, 0.0222349236137297,
0.657221610108816, 0.861848830221141, 0.0433751011272091, 0.0281247935879252,
0.938960776959358, 0.919825831744144, 0.922071582222369, 0.874732275907705,
0.0287898761495033, 0.0266947996996682, 0.922915821025777, 0.95009866012662,
0.964858875373814, 0.106451342824246, 0.406100902807456, 0.0421684244823044,
0.0341277368595181, 0.805451068725895, 0.147595746750675, 0.602617067494429,
0.90660866745333, 0.922313274809095, 0.462291286891102, 0.502857899902497,
0.0292904155423265, 0.835117565787527, 0.146789494933407, 0.06805696389495,
0.970563583145203, 0.0379479981289824, 0.058526761439653, 0.938993650169269,
0.44761099556807, 0.558961729061086, 0.939778576056268, 0.0728795533192928,
0.812084345787681, 0.899377654465699, 0.940111049552295, 0.838186810388758,
0.715121288990262, 0.897506380407565, 0.0929678061732199, 0.99024632582796,
0.055583745670494, 0.835146654988372, 0.973309086845447, 0.651216797099359,
0.0218535991986461, 0.0999671036378156, 0.790540668893094, 0.980591855409854,
0.567883806155822, 0.774816434396113, 0.904434807209845, 0.16641097147085,
0.0102686285230525, 0.65243489007093, 0.917594420539083, 0.0147831247626457,
0.844679485594683, 0.65566679452182), TCGA.2Z.A9J1.01A = c(0.595894468074615,
0.0807243779293262, 0.867305510246114, 0.70680600651273, 0.217862460492399,
0.169408257004071, 0.173115013795265, 0.0108902025634162, 0.813866558997356,
0.938576461648791, 0.0426568318037534, 0.0133187057875756, 0.0540543120983417,
0.317547629906197, 0.89911570032979, 0.525131175543627, 0.0152198596492253,
0.586968687135673, 0.49896100615873, 0.946718072906056, 0.859306039060091,
0.91185524112895, 0.28077646371254, 0.0413484993379312, 0.169193526857136,
0.941230054689418, 0.0164701153466769, 0.928402415411224, 0.736184540407898,
0.946288965623826, 0.312150292032857, 0.403171876971832, 0.0091246246912222,
0.535149883791691, 0.801041308364712, 0.171664264695538, 0.022737572168221,
0.0164834707992085, 0.34399568227201, 0.690016503202975, 0.0390842331750004,
0.0270854886242561, 0.888936631403145, 0.911902815624012, 0.858247513475469,
0.877113632682254, 0.0342892379505875, 0.0387268488822914, 0.922299785913074,
0.926130065834329, 0.975692332236198, 0.105415153493416, 0.127593519059119,
0.0540003798276299, 0.030980833881057, 0.914299941557146, 0.0512267439881511,
0.307325891435045, 0.941037265659174, 0.927078967007025, 0.48873418258592,
0.259006924115841, 0.0278764868641079, 0.87768067729952, 0.302640875302654,
0.0706384569300761, 0.968762634771395, 0.0364352674378962, 0.0441231506131831,
0.8307385629478, 0.242575477196221, 0.513439830376976, 0.932449172188782,
0.0526229004254996, 0.81314353054328, 0.778591104943176, 0.95668645045373,
0.453172059602829, 0.250129171963381, 0.863470213940097, 0.0994627135023581,
0.989489689575077, 0.0472116225581592, 0.911407225108748, 0.825189076107663,
0.578029414148402, 0.018058167343065, 0.0855852777154159, 0.819733395638372,
0.988287891473147, 0.255899615791521, 0.643359326354994, 0.491979154678761,
0.0978562004864199, 0.0105671614378101, 0.48897100984416, 0.9024550858788,
0.0131702158217202, 0.81328537816321, 0.85890307119103)), row.names = c("cg00000029",
"cg00000165", "cg00000236", "cg00000289", "cg00000292", "cg00000321",
"cg00000363", "cg00000622", "cg00000658", "cg00000721", "cg00000734",
"cg00000769", "cg00000905", "cg00000924", "cg00000948", "cg00000957",
"cg00001245", "cg00001249", "cg00001261", "cg00001349", "cg00001364",
"cg00001446", "cg00001510", "cg00001582", "cg00001583", "cg00001687",
"cg00001747", "cg00001791", "cg00001809", "cg00001854", "cg00001874",
"cg00002033", "cg00002116", "cg00002145", "cg00002190", "cg00002224",
"cg00002236", "cg00002406", "cg00002426", "cg00002449", "cg00002464",
"cg00002490", "cg00002531", "cg00002591", "cg00002593", "cg00002597",
"cg00002660", "cg00002719", "cg00002769", "cg00002808", "cg00002809",
"cg00002810", "cg00002837", "cg00003091", "cg00003173", "cg00003181",
"cg00003287", "cg00003345", "cg00003513", "cg00003529", "cg00003578",
"cg00003625", "cg00003784", "cg00003969", "cg00003994", "cg00004055",
"cg00004067", "cg00004072", "cg00004082", "cg00004089", "cg00004105",
"cg00004121", "cg00004192", "cg00004207", "cg00004209", "cg00004429",
"cg00004533", "cg00004562", "cg00004608", "cg00004773", "cg00004818",
"cg00004883", "cg00004939", "cg00004963", "cg00004979", "cg00004996",
"cg00005010", "cg00005040", "cg00005072", "cg00005083", "cg00005112",
"cg00005166", "cg00005215", "cg00005297", "cg00005306", "cg00005390",
"cg00005437", "cg00005543", "cg00005617", "cg00005619"), class = "data.frame")
anno
> dput(anno[1:100,])
structure(list(V1 = c("TSPY4", "TTTY14", "TMSB4Y", "TBL1Y", "TMSB4Y",
"TSPY4", "RPS4Y2", "EIF1AY", "PCDH11Y", "TBL1Y", "ZFY", "FAM197Y2",
"TTTY14", "TSPY4", "ZFY", "NLGN4Y", "EIF1AY", "TSPY4", "TBL1Y",
"UTY", "PRKY", "ZFY", "CD24", "PRKY", "TSPY1", "CYorf15A", "TSPY2",
"TTTY15", "RPS4Y2", "UTY", "CYorf15A", "RPS4Y2", "TSPY2", "TBL1Y",
"TSPY3", "DDX3Y", "CYorf15A", "ZFY", "RBMY1F", "DDX3Y", "RPS4Y2",
"ZFY", "DDX3Y", "TTTY15", "BCORL2", "PCDH11Y", "KDM5D", "TTTY14",
"EIF1AY", "DDX3Y", "LOC100101121", "CYorf15A", "TTTY15", "TSPY1",
"TSPY1", "FAM197Y2", "TSPY4", "TMSB4Y", "DDX3Y", "TTTY15", "TTTY20",
"NLGN4Y", "TSPY4", "CYorf15A", "RPS4Y2", "KDM5D", "RBMY1J", "EIF1AY",
"KDM5D", "ZFY", "TGIF2LY", "HMGN5", "EBP", "UBL4A", "WDR13",
"MTM1", "BCOR", "ZCCHC12", "FTHL17", "PORCN", "NAA10", "PCDH11X",
"ARSE", "DOCK11", "PDK3", "LONRF3", "MAGIX", "PCYT1B", "SLC6A8",
"UBE2A", "TAF9B", "STARD8", "BCOR", "ZIC3", "IL1RAPL2", "TMSB4X",
"CLCN5", "LOC100133957", "SCML1", "GNL3L"), V2 = c("cg00050873",
"cg00212031", "cg00214611", "cg01707559", "cg02004872", "cg02011394",
"cg02050847", "cg02233190", "cg02494853", "cg02839557", "cg02842889",
"cg03052502", "cg03244189", "cg03443143", "cg03683899", "cg03706273",
"cg03750315", "cg04016144", "cg04042030", "cg04448376", "cg04689676",
"cg04840163", "cg05230942", "cg05480730", "cg05544622", "cg05621349",
"cg05865243", "cg05890011", "cg06322277", "cg06479204", "cg07731488",
"cg07747963", "cg08242338", "cg08921682", "cg09350919", "cg09856092",
"cg10076560", "cg10213302", "cg10267609", "cg10698069", "cg10841270",
"cg11131351", "cg14180491", "cg14741114", "cg15027426", "cg15295597",
"cg15329860", "cg15345074", "cg15422579", "cg15429127", "cg15682806",
"cg15682993", "cg15746461", "cg15810474", "cg15935877", "cg17834650",
"cg17837162", "cg18032798", "cg18077436", "cg25032547", "cg25071634",
"cg25518695", "cg25705492", "cg25756647", "cg26058907", "cg26517491",
"cg26983430", "cg26983535", "cg27049643", "cg27433982", "cg27539833",
"cg00008945", "cg00011200", "cg00011891", "cg00014152", "cg00016522",
"cg00016934", "cg00018261", "cg00021786", "cg00026186", "cg00072288",
"cg00072839", "cg00074638", "cg00112256", "cg00114625", "cg00114913",
"cg00116709", "cg00139317", "cg00140085", "cg00142683", "cg00192980",
"cg00200463", "cg00206414", "cg00240113", "cg00241296", "cg00241907",
"cg00264378", "cg00265812", "cg00266918", "cg00360365")), row.names = c(1L,
2L, 4L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 16L, 17L, 18L,
20L, 21L, 23L, 25L, 27L, 29L, 31L, 34L, 35L, 36L, 37L, 38L, 39L,
40L, 41L, 42L, 43L, 45L, 47L, 48L, 50L, 51L, 52L, 54L, 56L, 57L,
58L, 61L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 72L, 73L, 74L, 75L,
76L, 77L, 78L, 79L, 80L, 82L, 83L, 85L, 86L, 87L, 88L, 89L, 91L,
92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L,
104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L,
115L, 116L, 117L, 118L, 119L, 120L, 122L, 123L, 124L, 125L), class = "data.frame")
There is no match between your cpg dataframe and annotation dataframe:
table(rownames(meth.kirp.cpg) %in% anno$V2)
# FALSE
# 100
Below should work with your full data, assuming there is are no duplicates and all rownames are in annotation dataframe:
rownames(meth.kirp.cpg) <- anno$V2[ match(rownames(meth.kirp.cpg), anno$V2) ]
I have a training data (train.dat) and test data (test.dat). I would like to run my linear regression, elastic, tree and random forest model on the test data after training it on the training data.
From there, I would like to get the RMSE and R2 to observe the predictive accuracy of the model. However, I have 2 issues:
When I train my random forest model on test data, I get the error: Error in predict.randomForest(modelFit, newdata) : missing values in newdata. Other models are fine
I can only extract the RMSE and R2 values for my tree model, while the other models return NAs. A screenshot is below
Can anyone tell me what has gone wrong with my code?
My code:
library(caret)
set.seed(10345678)
tr.Control <- trainControl(method = "repeatedcv",
number = 10,
repeats = 5,
classProbs = FALSE,
)
lm7 <- train(Lifeexp ~ . + Govthealth*Privhealth,
data = train.dat,
method = 'lm',
trControl = tr.Control,
preProc = c("center", "scale")
)
alpha.vec <- seq(0, 1, by = 0.1)
lambda.vec2 <- seq(0, 30, length.out = 50)
elastic.grid <- expand.grid(alpha = alpha.vec, lambda = lambda.vec2)
elastic4 <- train(Lifeexp ~ ., data = train.dat,
method = 'glmnet',
trControl = tr.Control,
verbose = FALSE,
tuneGrid = elastic.grid,
preProc = c("center", "scale")
)
cp.vec <- seq(10^-4, 0.1, length.out = 50 )
tree2 <- train(Lifeexp ~ ., data = train.dat
, trControl = tr.Control
, method = "rpart"
, preProcess = c("center", "scale")
, tuneGrid = expand.grid(cp = cp.vec)
)
rf1 <- train(
Lifeexp ~ .,
data = train.dat,
method = "rf",
trControl = tr.Control ,
preProc = c("center", "scale"),
ntree = 1000,
tuneGrid = expand.grid(mtry = seq(1, ncol(train.dat)-1)
)
)
regression.pred <- predict(lm7, newdata = test.dat, na.action = na.pass, type = "raw")
elastic.pred <- predict(elastic4, newdata = test.dat, na.action = na.pass, type = "raw")
tree.pred <- predict(tree2, newdata = test.dat, na.action = na.pass, type = "raw")
rf.pred <- predict(rf1, newdata = test.dat, na.action = na.pass, type = "raw")
test.statistics <- data.frame(
RMSE.regression = RMSE(regression.pred, test.dat$Lifeexp),
Rsquare.regression = R2(regression.pred, test.dat$Lifeexp),
RMSE.el = RMSE(elastic.pred, test.dat$Lifeexp),
Rsquare.el = R2(elastic.pred, test.dat$Lifeexp),
RMSE.tree = RMSE(tree.pred, test.dat$Lifeexp),
Rsquare.tree = R2(tree.pred, test.dat$Lifeexp)
)
test.statistics
train.dat:
structure(list(GDP = c(402.1030419, 442.2030419, 543.3030419,
520.8966027, 254.2432569, 124.4608003, 341.5541149, 772.3135303,
478.6685897, 191.8789042, 592.4010975, 1033.912431, 138.4288795,
622.4988457, 642.7767443, 317.3893069, 269.8711377, 709.5819646,
585.07655, 780.190201, 3122.362815, 3893.596078, 1166.610276,
1674.825261, 3690.113268, 4241.788782, 2441.741991, 4043.662051,
9040.566251, 963.8417858, 2234.579866, 10330.61561, 1944.137621,
2136.440243, 567.5286729, 567.930736, 2292.445156, 2028.18197,
371.6785662, 519.5343268, 987.409723, 1482.403063, 1196.586858,
1955.588006, 6941.235848, 1038.90854, 3102.713363, 3139.966054,
3032.427138, 7328.615629, 869.6965166, 2799.648876, 617.2304355,
1126.683318, 4094.362119, 7708.100996, 10385.96443, 11683.94962,
718.1878292, 3243.231125, 3100.280468, 11286.24302, 8920.762105,
201.4671636, 785.5022829, 1510.324871, 1831.001912, 8141.913127,
12027.36588, 6967.24523, 7691.345097, 3233.295943, 367.5566093,
1357.563719, 1489.876911, 977.2736357, 1508.942737, 2007.736363,
5076.342992, 7273.563207, 948.3318545, 2146.996385, 95.18825018,
390.0933261, 2566.59695, 52022.1256, 57373.68668, 19095.467,
28149.87001, 39435.8399, 20600.37525, 23041.53473, 44141.87814,
47518.63604, 24190.24962, 46232.98962, 26891.44645, 61350.34791,
28364.64508, 50152.34014, 22303.96133, 23635.92922, 41531.9342,
47603.02763, 9600.18513, 12042.95373, 26917.75898, 20324.25356,
20087.59199, 36000.52012, 25423.07201, 32018.06325, 43024.92384,
73191.11632, 12663.36453, 30693.59308, 18440.37852, 38577.38166,
33994.40657, 21290.86038, 50950.03434, 53024.05921, 13663.02162,
13641.10272, 41945.33167, 1731.209509, 4492.727604, 11861.75616,
47236.96023, 23509.54339, 26123.97387, 74605.77451), Health = c(22.23474948,
36.44474948, 45.58774948, 46.38774948, 3.333203815, 5.359203815,
16.69390488, 19.46990488, 33.22835541, 5.300580788, 29.97179604,
33.59179604, 5.971383095, 62.66848373, 67.22848373, 8.23568,
14.98141193, 32.6487999, 10.22661548, 16.19961548, 92.18703461,
98.65987461, 143.7665911, 159.7515106, 308.6578979, 402.5568979,
99.5689502, 111.4155502, 292.8907166, 198.2263198, 221.1403198,
705.336568, 176.6524443, 200.7054443, 12.56211728, 17.72411728,
76.7208786, 98.4562786, 9.55682529, 16.01162529, 26.5686245,
33.565445, 69.66563616, 89.45643616, 275.2236792, 32.77552414,
122.5689168, 198.7124574, 221.7829742, 539.567627, 43.70681763,
108.6149597, 33.2254878, 42.36598, 60.2569, 705.1993408, 891.1377563,
992.5689563, 31.84200096, 77.2356478, 277.45864, 891.7641602,
932.325129, 15.23564, 54.30473709, 74.231488, 200.564125, 665.2514038,
755.36985, 384.9183044, 445.20158, 262.5267029, 11.56898, 45.25077438,
109.0749969, 122.02145, 42.568412, 62.25963211, 172.0576935,
200.562134, 91.17743683, 120.236549, 11.23587, 18.82835197, 99.23568,
4952.777344, 5236.3654, 1101.36589, 1674.2854, 3309.480957, 1654.5687,
1845.321045, 4449.542969, 5000.36545, 1998.634277, 6054.23658,
1900.2356, 7025.36987, 1000.5689, 5036.2356, 1233.36545, 2334.651855,
4597.244629, 5698.2547, 1500.3698, 2000.23564, 2573.740234, 3002.36547,
1520.453613, 3214.546387, 1569.3254, 2873.848145, 3644.802734,
4587.235478, 1122.02145, 2211.019043, 462.5890808, 1061.365601,
1256.56897, 1987.2145, 5186.632813, 6547.2356, 990.32658, 1053.891602,
4201.3698, 122.02145, 238.0044861, 712.2356, 1513.565918, 2015.18042,
2985.23, 8021.80957), Govthealth = c(1.25689, 2.032658, 2.495758057,
2.965478, 1.985478, 2.209019899, 2.882325411, 3.21458, 7.3134408,
1.032568, 5.433434963, 7.235478, 1.239725351, 8.535984039, 10.323589,
1.236589, 3.562868595, 4.673761368, 2.32547, 4.648055553, 23.70949936,
33.235687, 51025478, 71.8605423, 205.9026794, 295.2356, 31.2587,
51.99817276, 154.70401, 56.32588, 73.30036926, 399.23568, 66.3265,
99.82849121, 2.23568, 3.246135235, 10.43734169, 15.235478, 3.569877,
5.623521328, 5.849419594, 8.32665, 35.3654457, 44.96020508, 195.3657,
14.55177689, 35.235698, 61.02356, 81.59127045, 284.7705994, 23.43979454,
43.92045593, 22.36587, 30.42416763, 181.3415375, 385.9675598,
576.0806274, 602.3258, 25.36730576, 66.235687, 92.2147, 401.4833984,
502.3698, 2.0214578, 10.70767879, 15.36987, 112.3698, 481.0765686,
502.36987, 226.7909851, 300.65478, 55.95266342, 2.36547, 11.85855961,
35.50076675, 45.235698, 25.36954, 34.36005783, 126.9312592, 156.3257,
23.53768349, 39.235687, 4.235687, 6.570708275, 45.36987, 3399.406006,
4500.321547, 990.36547, 1368.160278, 2804.857178, 1000.365, 1375.334717,
3458.573975, 4120.325, 1456.037842, 4100.368, 1500.36578, 6925.325445,
990.58795, 4125.25658, 998.25998, 1827.566895, 3482.541016, 4800.3256,
989.325, 1254.325, 1756.99939, 1998.23569, 1104.429321, 2521.927002,
1800.3256, 2315.543701, 2931.431641, 331.0256, 548.32, 1388.55896,
351.3133545, 898.4367065, 997.02145, 956.32547, 3488.651855,
4400.23556, 558.36987, 785.0509033, 3000.3658, 100.36987, 162.3498688,
162.365, 543.0645752, 1458.283813, 2000.3694, 2495.23877), Privhealth = c(14.3698,
25.36698, 36.01279831, 49.36875, 1.23569, 2.278559208, 8.061329842,
10.3658, 5.059076786, 3.25698, 20.38587761, 30.65877, 4.726452827,
22.79703331, 32.65878, 6.32589, 10.38636589, 19.33849907, 8.326589,
11.07592678, 67.27728271, 74.23658, 63.235698, 83.74517059, 88.83229828,
96.32568, 49.32658, 59.41738892, 138.1631165, 100.23564, 147.8399658,
300.23568, 71.02584, 90.6206665, 8.365984, 11.47062778, 61.48280716,
74.254785, 7.235647, 10.26313496, 19.40570831, 23.65879, 33.25478,
44.17641068, 189.32658, 17.06592751, 75.325689, 89.32658, 136.7345276,
238.6507721, 19.86775017, 63.43461227, 7.325478, 19.23568, 25.321547,
319.0157471, 311.9694214, 442.03695, 3.889117956, 15.3654, 115.02365,
488.0875244, 552.0325698, 10.3658, 36.04922485, 45.362154, 45.23548,
182.7733917, 202.3654, 142.2067719, 202.325, 197.0276337, 9.32658,
32.95304871, 70.28269196, 90.3256, 15.021457, 27.89465141, 44.9021492,
60.32568, 43.03323364, 60.325845, 8.325698, 11.45799065, 60.32568,
1553.358765, 2330.2354, 201.0214578, 305.5347595, 503.7982178,
301.23565, 469.9864197, 990.9689331, 1200.36987, 542.5964966,
1823.021457, 312.0215478, 1100.32145, 301.02145, 1100.3256, 320.365478,
507.0849609, 1114.720093, 2001.23548, 401.14567, 662.03214, 816.2644653,
998.32546, 416.0243225, 692.6192017, 402.32564, 558.3044434,
713.3709106, 998.32658, 302.0214, 793.8995972, 111.2757187, 162.9289398,
212.3657, 442.32598, 1698.060913, 2226.32568, 145.2365, 268.8859863,
902.32568, 42.36587, 75.64861298, 332.65478, 970.5014648, 556.8964233,
700.32658, 5526.447266), Population = c(12412308L, 20779953L,
29185507L, 37172386L, 47887865L, 66224804L, 87639964L, 109224559L,
14539612L, 18905478L, 27013212L, 28087871L, 6216341L, 32428167L,
42723139L, 8449913L, 10946445L, 15049353L, 181413402L, 211513823L,
241834215L, 267663435L, 3565890L, 5122493L, 7261539L, 9956011L,
18029824L, 23194257L, 28208035L, 223158L, 279398L, 515696L, 1432905L,
1794571L, 95212450L, 122283850L, 158503197L, 195874740L, 107647921L,
142343578L, 179424641L, 212215030L, 22071433L, 26459944L, 31989256L,
77991755L, 106651922L, 36800509L, 44967708L, 51216964L, 18777601L,
20261737L, 3286542L, 3089027L, 2913021L, 36870787L, 40788453L,
44494502L, 591021L, 754394L, 149003223L, 195713635L, 209469333L,
8975597L, 14312212L, 16249798L, 3119433L, 4577378L, 4999441L,
70878L, 71625L, 3786695L, 873277798L, 1234281170L, 34545013L,
41801533L, 56558186L, 62952642L, 67195028L, 69428524L, 12697723L,
14439018L, 67988862L, 79910412L, 95540395L, 22031750L, 24982688L,
57247586L, 58892514L, 62766365L, 9967379L, 10251250L, 10895586L,
11433256L, 30685730L, 37057765L, 5140939L, 5793636L, 4986431L,
5515525L, 79433029L, 82211508L, 81776930L, 82905782L, 10196792L,
10805808L, 11121341L, 10731726L, 56942108L, 59277417L, 254826L,
281205L, 318041L, 352721L, 4660000L, 7623600L, 2045123L, 2991884L,
4137309L, 14951510L, 16615394L, 17231624L, 3329800L, 3857700L,
4841000L, 38110782L, 38258629L, 3047132L, 5076732L, 2048583L,
2073894L, 7824909L), Lifeexp = c(50.331, 55.841, 61.028, 64.486,
47.099, 51.941, 61.627, 66.24, 55.564, 54.404, 67.611, 70.478,
61.974, 57.099, 62.973, 45.746, 48.069, 55.251, 62.32, 65.772,
69.205, 71.509, 69.872, 71.73, 73.428, 74.405, 70.865, 72.594,
74.493, 61.529, 70.173, 78.627, 61.608, 52.192, 45.9, 46.267,
50.896, 54.332, 60.1, 62.82, 65.264, 67.114, 66.165, 71.111,
76.516, 68.793, 71.095, 63.307, 56.048, 57.669, 71.333, 75.439,
71.836, 73.955, 76.562, 73.576, 75.278, 76.52, 60.884, 71.46,
66.343, 73.619, 75.672, 53.595, 66.56, 69.57, 75.654, 78.769,
80.095, 74.619, 77.672, 71.46, 57.865, 66.693, 62.764, 65.095,
70.248, 70.623, 74.184, 76.931, 50.64, 61.195, 70.551, 73.025,
75.317, 81.69512195, 82.74878049, 75.8804878, 77.74146341, 80.40243902,
76.05195122, 77.72195122, 80.18292683, 81.59512195, 79.13658537,
81.94878049, 74.80536585, 81.35121951, 74.81317073, 81.83414634,
75.2277561, 77.92682927, 79.98780488, 80.99268293, 76.93902439,
77.88780488, 80.38780488, 81.28780488, 79.77804878, 82.03658537,
78.03634146, 79.65365854, 81.89756098, 82.66097561, 76.60731707,
81.60243902, 73.142, 74.358, 75.398, 76.87804878, 80.70243902,
81.76097561, 75.37804878, 78.63658537, 81.85853659, 70.8902439,
73.74878049, 75.29512195, 81.54146341, 79.42195122, 81.02926829,
82.24634146), Govted = c(1.23568, 2.31245, 3.47945, 5.32658,
2.365, 3.98311, 4.49659, 6.32547, 3.5398, 1.023568, 3.63172,
5.16365, 2.32871, 2.38901, 2.52076, 1.23568, 2.97156, 3.34389,
0.984578, 1.36589, 2.81228, 4.326587, 1.2365897, 1.9654789, 2.3658,
3.58851, 3.23568, 5.97161, 4.96645, 1.23568, 3.21548, 6.32547,
2.32657, 6.99139, 1.32658, 2.012457, 3.214587, 4.235687, 2.51681,
1.83782, 2.28687, 3.9854587, 2.36587, 3.22803, 3.71993, 3.26766,
5.32568, 5.12579, 5.44358, 5.72174, 2.36578, 1.71774, 2.3265,
3.43017, 2.65897, 4.58031, 5.01971, 6.32658, 5.51379, 6.64043,
2.36587, 5.6488, 6.32658, 1.235687, 1.53379, 2.16286, 3.24578,
6.63445, 7.02824, 2.36578, 3.325478, 3.215487, 3.23568, 3.37769,
2.32657, 3.23654, 3.323568, 5.25346, 3.50844, 5.32658, 1.54406,
4.60449, 3.326589, 4.235478, 4.17277, 5.55006, 6.32365, 4.05552,
4.06533, 5.74164, 4.021547, 5.32658, 6.40799, 6.9874564, 5.442,
6.32658, 7.32658, 8.9854587, 5.33591, 7.32658, 3.0215478, 3.21547,
4.91368, 6.3265, 2.04608, 3.23019, 4.32658, 5.023658, 4.29886,
4.35239, 4.25224, 6.44717, 6.97848, 7.235689, 5.43073, 5.54157,
2.985467, 3.124578, 3.32652, 5.22879, 5.48909, 4.236587, 5.321457,
6.323658, 7.5698745, 3.26587, 4.9936, 2.325647, 3.08044, 5.56251,
5.965871, 4.92605)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 11L, 13L, 15L, 16L, 18L, 23L, 24L, 25L, 26L, 27L, 29L, 30L,
31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 41L, 42L, 44L, 45L,
46L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 60L, 62L,
64L, 65L, 66L, 67L, 70L, 71L, 73L, 74L, 75L, 78L, 79L, 80L, 82L,
84L, 85L, 87L, 88L, 89L, 91L, 92L, 93L, 95L, 96L, 99L, 100L,
103L, 105L, 107L, 111L, 112L, 113L, 114L, 115L, 116L, 119L, 120L,
121L, 122L, 124L, 127L, 128L, 129L, 130L, 131L, 133L, 134L, 135L,
136L, 138L, 140L, 141L, 144L, 145L, 148L, 149L, 150L, 151L, 152L,
153L, 154L, 155L, 156L, 158L, 159L, 161L, 162L, 163L, 164L, 165L,
167L, 170L, 171L, 172L, 173L, 175L, 176L, 177L, 178L, 180L, 181L,
182L, 185L, 187L, 191L, 192L, 195L), class = "data.frame")
Test.dat:
structure(list(GDP = c(199.9863423, 156.3857186, 389.3980332,
229.4902871, 497.6320261, 749.552711, 826.6215305, 248.0293672,
261.8689977, 899.6599081, 11373.233, 7076.662423, 5324.61704,
5931.453886, 5082.354757, 715.9137121, 2124.05677, 6374.028196,
463.6186318, 4102.48135, 5268.848504, 4333.482973, 564.7796095,
2258.183141, 3749.75325, 302.5771636, 3772.870012, 2860.43156,
4787.780171, 1614.640122, 749.9085236, 4717.143026, 443.3141934,
2009.978857, 483.952592, 366.1728076, 841.9729898, 563.0577411,
1317.890706, 18211.27459, 21679.24784, 42943.90227, 21448.36196,
47450.31847, 30743.54768, 58041.39844, 24285.46682, 46459.97325,
20825.78421, 34483.204, 21043.57493, 41715.02928, 8794.631229,
26149.41108, 33692.01083, 12599.53358, 15420.91116, 23852.32703,
64581.94402, 9107.477079, 10201.30354, 38428.3855, 37868.296,
82796.54716), Health = c(6.22435541, 8.909747124, 39.22274712,
8.625580788, 4.22284155, 42.34384155, 47.44484155, 10.74555809,
18.80055809, 45.32365, 324.6654166, 602.659668, 504.5536499,
594.8854499, 239.3392792, 22.55662414, 91.84031677, 624.335527,
30.56891763, 128.3355597, 74.23569, 505.4589408, 22.23569, 69.80043793,
311.6526794, 19.73552704, 251.0935822, 211.589745, 250.7455292,
35.25698, 47.90106964, 292.54782, 18.56432343, 70.5685123, 10.56888,
17.38329887, 50.66987, 75.201547, 78.18682861, 1022.5487, 1632.427612,
4002.325, 1452.369, 5044.135254, 2496.047119, 6011.536621, 1655.866211,
4099.587891, 1125.365, 4400.325, 1496.87854, 3000.23568, 336.2356,
2023.143677, 3216.223633, 809.1994019, 956.21547, 820.6981812,
1989.235, 446.3265, 796.6470337, 2985.12, 3737.802979, 9658.23
), Govthealth = c(2.65987, 3.350677967, 8.32365, 1.337858081,
0.235689, 8.714180946, 11.02365, 2.356894, 4.656533241, 5.958777,
198.23568, 319.1759033, 207.0215302, 302.654789, 123.2336197,
9.32658, 29.2992878, 300.5689, 12.02589, 52.658912, 22.03256,
222.325689, 16.3258, 50.29269791, 129.758316, 3.900079966, 163.0175018,
102.369, 156.8104706, 4.36987, 5.465222836, 75.36987, 3.839128733,
14.32589, 3.25478, 5.880064487, 12.36547, 18.02584, 30.97570801,
990.365478, 1116.231445, 3201.0245, 996.598723, 3721.796387,
2074.39917, 5042.459961, 1229.708252, 3167.418213, 889.32658,
3698.23598, 944.5585938, 1998.02365, 200.365778, 1396.733398,
2517.370117, 577.3640747, 662.32589, 298.1834717, 702.369, 456.325,
568.7339478, 889.36547, 1045.900513, 3987.3654), Privhealth = c(1.36589,
1.832908154, 7.325698, 5.431494236, 2.36589, 29.85413742, 35.3698,
4.23568, 8.9836483, 22.3658, 152.36589, 263.3545532, 225.5363922,
301.325478, 111.575592, 10.23568, 60.89479446, 336.02145, 12.36587,
75.36987, 34.3265, 223.02145, 2.0215478, 11.81901455, 180.9026947,
15.41190529, 85.28456879, 45.321478, 86.49634552, 25.36987, 39.00668716,
220.32145, 14.22738075, 49.326545, 7.02145, 11.50323391, 20.36587,
33.021456, 45.45627975, 400.23568, 516.1798096, NA, 400.32547,
1322.338745, 421.6481018, 969.076416, 426.0691833, 931.8737793,
302.1245, 886.02154, 517.4750366, 889.32547, 90.3256, 626.4102173,
698.8658447, 231.8352966, 301.0324, 522.5147705, 1236.021458,
117.3658, 227.9130707, 1965.3256, 2691.985107, 6600.3256), Population = c(9404500L,
11148758L, 18143315L, 23941110L, 5283814L, 7527394L, 9100837L,
17354392L, 23650172L, 19077690L, 31528585L, 365734L, 2118874L,
2448255L, 29027674L, 61895160L, 93966780L, 57779622L, 17325773L,
21670000L, 2866376L, 32618651L, 530804L, 685503L, 174790340L,
12155239L, 3962372L, 70419L, 69650L, 4802000L, 4077131L, 3726549L,
1056575549L, 1352617328L, 20147590L, 27275015L, 10432421L, 11881477L,
87967651L, 17065100L, 19153000L, 66460344L, 27691138L, 34004889L,
5339616L, 5547683L, 5176209L, 5363352L, 56719240L, 60421760L,
6289000L, 8882800L, 2095344L, 15925513L, 4350700L, 38042794L,
37974750L, 4027887L, 5638676L, 1998161L, 1988925L, 6715519L,
7184250L, 8513227L), Lifeexp = c(46.096, 45.09, 63.798, 62.288,
58.824, 68.736, 70.879, 45.853, 46.229, 58.893, 75.997, 75.905,
56.665, 63.373, 74.41, 66.366, 69.823, 63.857, 69.509, 76.812,
78.458, 71.594, 52.878, 68.384, 70.116, 58.432, 77.452, 66.843,
71.116, 70.386, 69.902, 73.6, 62.505, 69.416, 55.5, 58.472, 58.1,
44.649, 74.837, 76.99463415, 79.23414634, 81.35609756, 77.42195122,
81.24634146, 76.59268293, 79.1, 77.46585366, 79.87073171, 76.97073171,
82.94634146, 78.95365854, 82.80243902, 72.15, 77.98780488, 80.70243902,
76.24634146, 77.75365854, 77.95121951, 83.14634146, 73.20487805,
75.41219512, 77.24243902, 79.6804878, 83.55121951), Govted = c(3.27054,
5.24797, 4.71484, 2.97515, 1.36587, 4.00675, 6.32547, 1.023658,
2.46167, 4.32658, 4.53477, 4.11747, 8.34961, 10.23547, 2.8673,
2.36587, 5.326545, 6.15899, 2.41093, 2.11189, 2.46866, 1.06738,
3.21547, 4.02447, 3.94893, 1.65599, 4.68696, 1.856231, 2.032145,
1.56897, 2.18109, 4.236587, 4.32479, 5.326587, 0.36589, 1.01218,
1.45426, 2.36589, 5.13722, 4.6764, 4.89147, 7.3265, 5.99199,
5.36993, 8.08434, 8.55955, 5.71688, 6.54071, 3.325687, 5.32658,
6.12262, 6.32658, 1.326587, 4.58512, 7.00241, 5.06843, 6.32547,
3.3213, 5.32658, 3.32365, 4.32657, 4.52294, 4.7814, 5.9658745
)), row.names = c(9L, 10L, 12L, 14L, 17L, 19L, 20L, 21L, 22L,
28L, 40L, 43L, 47L, 48L, 59L, 61L, 63L, 68L, 69L, 72L, 76L, 77L,
81L, 83L, 86L, 90L, 94L, 97L, 98L, 101L, 102L, 104L, 106L, 108L,
109L, 110L, 117L, 118L, 123L, 125L, 126L, 132L, 137L, 139L, 142L,
143L, 146L, 147L, 157L, 160L, 166L, 168L, 169L, 174L, 179L, 183L,
184L, 186L, 188L, 189L, 190L, 193L, 194L, 196L), class = "data.frame")
You can use the following code
#Remove the NA from the data freme
test.dat <- na.omit(test.dat)
regression.pred <- predict(lm7, newdata = test.dat, type = "raw")
elastic.pred <- predict(elastic4, newdata = test.dat, type = "raw")
tree.pred <- predict(tree2, newdata = test.dat, type = "raw")
rf.pred <- predict(rf1, newdata = test.dat, type = "raw")
test.statistics <- data.frame(
RMSE.regression = RMSE(regression.pred, test.dat$Lifeexp),
Rsquare.regression = R2(regression.pred, test.dat$Lifeexp),
RMSE.el = RMSE(elastic.pred, test.dat$Lifeexp),
Rsquare.el = R2(elastic.pred, test.dat$Lifeexp),
RMSE.tree = RMSE(tree.pred, test.dat$Lifeexp),
Rsquare.tree = R2(tree.pred, test.dat$Lifeexp)
)
test.statistics
# RMSE.regression Rsquare.regression RMSE.el Rsquare.el RMSE.tree Rsquare.tree
# 1 8.201072 0.3910896 8.115006 0.3984145 6.369994 0.6446954
I have been working on this for a while now, but I can't seem to figure it out. I'm looking for a solution that can: calculate difference between col1 and col2 and create colA based on this; then calculate difference between col2 and col3 and create colB based on this, etc. etc. I have about 70 rows and 42 of these columns so it's not something I want to do by hand (at this point I am almost desperate enough).
To give a note also, some of the cells in the rows are empty (NA). An emergency solution would be to fill these with zeroes, but I'd rather not.
Also, the dataframe I use is a tibble, however, I am not bound to this so much that I can't change it to a real dataframe.
My data looks like this:
testdata
As you can see, the columns have annoyingly long names I did not know how to change also :). I use the column numbers usually, which are 77:119. I hope this is complete enough. Sorry for the noob-ness and possibly unclear explanation, this is my first question on here and I'm not that craftsy in R!
Finally, to create the 'user/intermittent_answers/n_length' columns I used the following loop, so I thought it'd be possible to reuse this for the calculations that I need now.
#loop through PARTS of testdata to create _length's
for(i in names(testdata[34:76]))
testdata[[paste(i, 'length', sep="_")]] <- str_length(testdata[[i]])
Then I tried something similar which I found here: FOR loop to calculate difference on dates in R
for (j in 2:length(testdata$`user/intermittant_answers/42_length`))
+ testdata$lag[j] <- as.numeric(difftime(testdata$`user/intermittant_answers/42_length`[j], testdata$`user/intermittant_answers/42_length`[j-1], units=c("difference")), units = "days")
Error in as.POSIXct.numeric(time1) : 'origin' must be supplied
I figured this was because I am not working with anything time related, but I don't know/don't know how to find another 'diff' related function that is not bound to matrixes like the one from matrixStats package.
I hope someone can push me in the right direction!
Thank you!!
EDIT: #Ben, thank you for responding! If I had known this function I would've used it way sooner :'). I tried to keep a representation of NA values inside the df. Also, some people suggested using a double loop, however, I have not managed to figure this out. I hope this helps!
> dput(testdata[1:10, 95:105])
structure(list(`user/intermittant_answers/18_length` = c(NA,
24L, 34L, 33L, NA, NA, 16L, NA, 25L, 28L), `user/intermittant_answers/19_length` = c(NA,
38L, 68L, 34L, NA, 11L, 20L, 12L, 47L, 52L), `user/intermittant_answers/20_length` = c(NA,
59L, 81L, 42L, 2L, 33L, 20L, 26L, 96L, 78L), `user/intermittant_answers/21_length` = c(6L,
90L, 116L, 42L, 14L, 41L, 20L, NA, 127L, 113L), `user/intermittant_answers/22_length` = c(17L,
115L, 131L, 65L, 20L, 70L, 37L, 11L, 170L, 130L), `user/intermittant_answers/23_length` = c(40L,
138L, 188L, 65L, 38L, 113L, 22L, 24L, 200L, 136L), `user/intermittant_answers/24_length` = c(66L,
155L, 210L, 99L, 49L, 133L, 41L, 49L, 242L, 185L), `user/intermittant_answers/25_length` = c(66L,
158L, 233L, 99L, 65L, 156L, 67L, 70L, 296L, 224L), `user/intermittant_answers/26_length` = c(84L,
201L, 250L, 113L, 84L, 164L, 67L, 78L, 334L, 224L), `user/intermittant_answers/27_length` = c(89L,
237L, 285L, 130L, 97L, 167L, 84L, 86L, 412L, 232L), `user/intermittant_answers/28_length` = c(116L,
284L, 315L, 130L, 97L, 184L, 97L, 108L, 445L, 247L)), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
I have this data collected on a tripole scale, where respondents were clicking on a point inside a triangle to show how someone responded to a situation:
structure(list(ID = c(24262L, 24263L, 24264L, 24266L, 24267L,
24268L, 24269L, 24270L, 24271L, 24272L, 24273L, 24275L, 24279L,
24282L, 24285L, 24286L, 24287L, 24288L, 24290L, 24292L, 24296L,
24298L, 24299L, 24300L, 24301L, 24302L, 24304L, 24305L, 24309L,
24310L, 24314L, 24328L, 24329L, 24330L, 24331L, 24332L, 24333L,
24339L, 24356L, 24363L, 24370L, 24378L, 24388L, 24390L, 24393L,
24404L, 24406L, 24408L, 24410L, 24420L, 24422L, 24431L, 24435L,
24449L, 24456L, 24457L, 24469L, 24503L, 24535L, 24538L, 24541L,
24543L, 24547L, 24549L, 24555L, 24560L, 24562L, 24564L, 24565L,
24574L, 24693L, 24694L, 24707L, 24711L, 24715L, 24717L, 24719L,
24721L, 24723L, 24725L, 24727L, 24733L, 24735L, 24737L, 24742L,
24750L, 24752L, 24758L, 24761L, 24762L, 24764L, 24770L, 24863L,
24865L, 24866L, 24867L, 24870L, 24885L, 24891L, 24984L, 24995L,
25005L, 25006L, 25010L, 25011L, 25012L, 25014L, 25015L, 25091L,
25092L, 25093L, 25094L, 25106L, 25109L, 25110L, 25111L, 25157L,
25159L, 25162L, 25174L, 25176L, 25180L, 25294L, 25295L, 25298L,
25302L, 25303L, 25304L, 25305L, 25308L, 25339L, 25341L, 25343L,
25345L, 25348L, 25349L, 25559L, 25566L, 25573L, 25575L, 25577L,
25579L, 25581L, 25586L, 25614L, 25622L, 25630L, 25631L, 25635L,
25641L, 25670L, 25671L, 25672L, 25673L, 25674L, 25677L, 25684L,
25688L, 25691L, 25693L, 25695L, 25700L, 24211L, 24212L, 24215L,
24217L, 24218L, 24219L, 24220L, 24222L, 24225L, 24226L, 24227L,
24230L, 24232L, 24234L, 24236L, 24237L, 24238L, 24239L, 24240L,
24243L, 24246L, 24247L, 24250L, 24251L, 24252L), Respectfully = c(0.5385952,
0.672799766, 0.515947104, 0.609299839, 0.600087047, 0.215293989,
0.112566531, 0.631413877, 0.171163484, 0.280788928, 0.895692229,
0.247195691, 0.181995317, 0.163163558, 0.900582135, 0.818431854,
0.795888841, 0.614360929, 0.945623696, 0.922643483, 0.628791392,
0.175074518, 0.619624436, 0.595834434, 0.352946192, 0.531283677,
0.211680189, 0.659169912, 0.526771784, 0.929830313, 0.898694217,
0.613898337, 0.617298901, 0.56617099, 0.554916739, 0.64306879,
0.189266831, 0.920095921, 0.712526262, 0.854605317, 0.913350403,
0.933309317, 1.006667733, 0.987369776, 1.017328858, 0.957674563,
0.90463531, 0.9272874, 0.891221881, 0.884747803, 0.933109701,
1.019063711, 0.916044593, 0.156491563, 0.654910684, 0.517636955,
0.247314185, 0.343438685, 0.337267578, 0.326364845, 0.114466496,
0.090442464, 0.243850961, 0.092173956, 0.235721201, 0.996143162,
0.635637045, 0.970861077, 0.948802829, 0.551817477, 0.912414432,
0.200542375, 0.826407254, 0.071805023, 0.892377079, 0.087980591,
0.918832958, 0.099396825, 1.023749948, 0.102644026, 0.107016437,
0.997948647, 0.110704333, 0.940060258, 0.091438882, 0.055989511,
0.081595875, 0.081419758, 0.770171881, 0.610801637, 0.511512518,
1.070922136, 0.593650937, 0.569419086, 0.873148918, 0.378054291,
0.582714975, 0.60744822, 0.14328903, 0.067492828, 0.315115869,
0.75541079, 0.061788347, 0.087719396, 1.049453616, 0.069038175,
1.044347167, 0.501647294, 0.476157516, 0.110015221, 0.269865036,
0.147203833, 0.961993456, 0.785571694, 0.641585886, 0.638352633,
0.609070599, 0.870874465, 0.864675701, 0.096855976, 0.610836565,
0.627459884, 0.874884486, 0.972632468, 0.164256439, 0.873557031,
0.57596755, 0.565361559, 0.586712956, 0.941195965, 0.446302474,
0.206582263, 0.610695481, 0.638060987, 0.530307591, 1.029941678,
0.607028246, 0.6176126, 0.543566525, 0.519073486, 0.609546781,
0.139241472, 0.901534081, 0.150142923, 0.317818969, 0.189081565,
0.626691282, 0.624533534, 0.612181485, 0.634860277, 0.646151781,
0.633498967, 0.624919891, 0.623312056, 0.631034791, 0.608126938,
0.236088231, 0.323942959, 0.919163823, 0.233712777, 0.276786536,
0.833319068, 0.095358528, 0.812533975, 0.209690139, 0.735989869,
0.596592605, 0.493421763, 0.818909705, 0.805246234, 0.613435805,
0.270724922, 0.366894066, 0.600306869, 0.869067788, 0.145871058,
0.604971766, 0.134385094, 0.588236988, 0.587666631, 1.032822847,
0.623843968, 0.605744064, 0.131348848, 0.588236988, 0.087467365,
0.600683391), Transparently = c(0.820800126, 0.615894616, 0.784985006,
0.606558323, 0.842676938, 0.844404042, 0.916779697, 0.615372658,
0.874791503, 0.814765275, 0.126808345, 0.855662525, 0.846717596,
0.862914324, 0.913444817, 0.251324534, 0.248540372, 0.614360929,
0.936769724, 0.095737927, 0.583792984, 0.858672082, 0.603269815,
0.617806852, 0.728860557, 0.763061166, 0.811132908, 0.599038482,
0.811664104, 0.077664897, 0.134824425, 0.606615484, 0.564655364,
0.618685603, 0.633455515, 0.545877218, 0.855959177, 0.095988706,
0.433236271, 0.697069466, 0.932611644, 0.942195773, 1.008322001,
0.992420793, 1.028732777, 0.969780326, 0.122604199, 0.099307142,
0.138839573, 0.150925994, 0.085792698, 1.020697951, 0.095590822,
0.849863172, 0.647231042, 0.773270965, 0.79933852, 0.781846166,
0.777013123, 0.73322922, 0.914041042, 0.923891008, 0.798273802,
0.938193262, 0.839317203, 0.990858972, 0.590011358, 0.042210646,
0.074093886, 0.548788846, 0.916915476, 0.836126328, 0.575304508,
0.935497701, 0.127815932, 0.920728266, 0.104502067, 0.921889246,
1.03024137, 0.907672346, 0.920933843, 1.002946377, 0.903099537,
0.083944403, 0.922207296, 0.956200302, 0.936974704, 0.937197804,
0.270489872, 0.625058591, 0.496246278, 1.073989391, 0.593650937,
0.592372119, 0.694542348, 0.625950456, 0.619678259, 0.570666313,
0.871415496, 0.946574152, 0.728291929, 0.722327173, 0.946510434,
0.926541567, 1.049453616, 0.943204463, 1.03007555, 0.50816232,
0.835366428, 0.918267071, 0.787079275, 0.868908703, 0.951541662,
0.811538815, 0.61506027, 0.663948357, 0.586418152, 0.898504972,
0.1523799, 0.914196193, 0.583227396, 0.606079459, 0.213126272,
0.986245692, 0.870046079, 0.869732857, 0.604211867, 0.736863017,
0.648767114, 0.939423501, 0.557043076, 0.804438114, 0.532972872,
0.598525584, 0.841363668, 1.029941678, 0.612435043, 0.615830719,
0.509812713, 0.497207224, 0.609743237, 0.897805572, 0.863769054,
0.864284277, 0.756386161, 0.861637115, 0.617861569, 0.612092674,
0.622858763, 0.583585918, 0.614777744, 0.603289545, 0.619621992,
0.586993933, 0.593338847, 0.614418983, 0.779004991, 0.70745641,
0.11726483, 0.775427818, 0.74606353, 0.851781547, 0.919092059,
0.924776435, 0.829707384, 0.580720782, 0.596592605, 0.519732594,
0.421046019, 0.215226546, 0.556450188, 0.759358466, 0.824817002,
0.577669203, 0.169151321, 0.881558836, 0.599436522, 0.90624404,
0.604998171, 0.622988939, 1.034414053, 0.626509905, 0.632660449,
0.89102143, 0.604998171, 0.918262541, 0.55049324), Impartially = c(0.465658277,
0.461714715, 0.497125953, 0.520229161, 0.401690006, 0.802266479,
0.894968808, 0.493858635, 0.84177649, 0.737350881, 0.889409304,
0.759607494, 0.847863555, 0.862213612, 0.109956756, 0.771663547,
0.793201268, 0.509038925, 0.069157727, 0.914556921, 0.524168909,
0.847581744, 0.51422137, 0.522430778, 0.704787254, 0.489853799,
0.838132501, 0.486388475, 0.479211062, 0.93793416, 0.875783682,
0.516177416, 0.552293181, 0.549585342, 0.547982574, 0.549040437,
0.829416931, 0.916274369, 0.618573725, 0.305697709, 0.092083447,
0.073197983, 0.008801615, 0.012722424, 0.028896471, 0.043793023,
0.888742805, 0.90670836, 0.875128031, 0.862690747, 0.92150861,
0.02294177, 0.92108041, 0.896055102, 0.44906044, 0.499450028,
0.791781664, 0.680421829, 0.690234244, 0.738181829, 0.893988132,
0.921715975, 0.798098445, 0.910879731, 0.779454529, 0.009174875,
0.512821853, 0.95919919, 0.92872417, 0.63617301, 0.100211762,
0.828822911, 0.428452343, 0.941970348, 0.890839458, 0.929689884,
0.904637277, 0.909784257, 0.031725951, 0.918267727, 0.899998665,
0.005025526, 0.908758759, 0.920179784, 0.921686828, 0.948506773,
0.924896836, 0.925002098, 0.785541952, 0.503044903, 0.803664029,
0.082803823, 0.545991898, 0.570557058, 0.305972755, 0.83199054,
0.532894731, 0.555229306, 0.887567461, 0.938506544, 0.761560678,
0.323256642, 0.947489619, 0.923677325, 0.0566625, 0.938717306,
0.045118894, 0.795827687, 0.525770962, 0.897508681, 0.769734502,
0.883341014, 0.051330354, 0.244468406, 0.485932499, 0.449341804,
0.538715363, 0.138629705, 0.879033506, 0.920966506, 0.540138841,
0.505164027, 0.791890979, 0.027370578, 0.854349852, 0.15208894,
0.553023458, 0.468633413, 0.50540942, 0.069690846, 0.827210844,
0.861133993, 0.591059625, 0.50319165, 0.470964789, 0.034407794,
0.517081559, 0.504993081, 0.700809896, 0.775536239, 0.517217338,
0.870460451, 0.143105969, 0.883978724, 0.728849649, 0.826070011,
0.495627373, 0.502366483, 0.503692865, 0.519353449, 0.482581139,
0.50265485, 0.495607883, 0.525813699, 0.513563395, 0.514401138,
0.839320302, 0.775061905, 0.887907207, 0.853994012, 0.807586253,
0.188630834, 0.918224454, 0.18752791, 0.820726156, 0.454573005,
0.540567636, 0.784800649, 0.584334373, 0.839739561, 0.563928843,
0.798892915, 0.639075577, 0.554966569, 0.848121166, 0.872286737,
0.530407548, 0.872425079, 0.540660083, 0.525467217, 0.038644876,
0.490734726, 0.501179636, 0.886452258, 0.540660083, 0.93474859,
0.581967294)), row.names = c(2L, 3L, 4L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 16L, 17L, 19L, 20L, 21L, 22L, 24L, 25L, 27L,
28L, 29L, 30L, 31L, 32L, 34L, 35L, 37L, 38L, 39L, 40L, 41L, 42L,
43L, 44L, 45L, 51L, 57L, 61L, 65L, 68L, 73L, 74L, 76L, 81L, 82L,
83L, 84L, 88L, 89L, 92L, 93L, 98L, 100L, 101L, 110L, 116L, 121L,
123L, 125L, 126L, 127L, 129L, 130L, 132L, 133L, 134L, 135L, 143L,
146L, 147L, 154L, 157L, 159L, 160L, 161L, 162L, 163L, 164L, 165L,
168L, 169L, 170L, 173L, 177L, 178L, 180L, 181L, 182L, 183L, 186L,
188L, 190L, 191L, 192L, 195L, 196L, 201L, 209L, 213L, 218L, 219L,
222L, 223L, 224L, 225L, 226L, 233L, 234L, 235L, 236L, 239L, 241L,
242L, 243L, 252L, 253L, 256L, 265L, 267L, 270L, 277L, 278L, 281L,
282L, 283L, 284L, 285L, 288L, 294L, 295L, 296L, 297L, 299L, 300L,
303L, 308L, 313L, 314L, 315L, 316L, 317L, 320L, 333L, 337L, 339L,
340L, 343L, 347L, 351L, 352L, 353L, 354L, 355L, 357L, 358L, 360L,
361L, 362L, 363L, 364L, 367L, 368L, 371L, 373L, 374L, 375L, 376L,
378L, 380L, 381L, 382L, 385L, 387L, 389L, 391L, 392L, 393L, 394L,
395L, 396L, 398L, 399L, 401L, 402L, 403L), class = "data.frame")
and would like to create a ternary plot as this one:
So far, I have this code:
plot <- ggtern(data=behavior,aes(x=Respectfully,
z=Transparently,
y=Impartially)) +
geom_point(size=3,fill="yellow",color="red",shape=21)
plot
which gives me this:
How do I rotate the triangle canvas to fit the plot?.
I have searched and could not find any help online.
I don't want to rotate the whole diagram, just the triangle. I want to retain the points as they are.
You can rotate the diagram by an angle (in degrees or radians) using theme_rotate.
library(ggtern)
plot <- ggtern(data=behavior,aes(x=Impartially,
z=Transparently,
y=Respectfully)) +
geom_point(size=3, fill="yellow", color="red", shape=21) +
theme_rotate()
plot
I want to create a barplot and my data is in a csv file in the following format
0,22
40,50
80,62
120,70
160,62
200,49
240,52
280,64
320,57
360,50
400,47
440,52
480,73
520,70
560,68
600,71
640,69
680,61
720,59
760,59
800,62
840,62
880,62
920,72
960,81
1000,89
1040,86
1080,76
1120,80
1160,95
The element before the comma should be the position in the x axis and the element after the comma the height= of the bar at that position. I can do this in Excel but the data is large.
The graph I want would look like this.
I have tried the following but I think it sums the data in each row.
data <- as.matrix(read.csv(file="data.csv",sep=",",header=FALSE))
barplot(data)
barplot(x$V2, names.arg = seq_len(nrow(x)), cex.names = .6)
two things: first, if you supply the whole matrix to the height parameter of barplot, it will sum them. instead, give it only your data.
dput(dat)
structure(c(0L, 40L, 80L, 120L, 160L, 200L, 240L, 280L, 320L,
360L, 400L, 440L, 480L, 520L, 560L, 600L, 640L, 680L, 720L, 760L,
800L, 840L, 880L, 920L, 960L, 1000L, 1040L, 1080L, 1120L, 1160L,
22L, 50L, 62L, 70L, 62L, 49L, 52L, 64L, 57L, 50L, 47L, 52L, 73L,
70L, 68L, 71L, 69L, 61L, 59L, 59L, 62L, 62L, 62L, 72L, 81L, 89L,
86L, 76L, 80L, 95L), .Dim = c(30L, 2L), .Dimnames = list(NULL,
c("V1", "V2")))
barplot(height=dat[,2])
second, you need to supply the names.arg to barplot to get the labeling:
barplot(height=dat[,2], names.arg=dat[,1])
a side note: its best to avoid naming variables with built in R functions. ?data is probably the most commonly overwritten! I use dat instead regularly.
Using your method of getting the data into R:
myData <- read.csv(file = "data.csv", sep = ",", header = FALSE)
To make sure that the order of the bars follows the order of the values in the first column (although this is not strictly what you asked for in your question)
myData2 <- myData[order(myData[, 1]), ]
barplot(myData2[, 2], names.arg = myData2[, 1])
For tweaking the graph, I recommend spending some time reading ?barplot and ?par