I want to replace the row names of meth.kirp.cpg with anno$V1 if the existing row names matches anno$V2.
library(tidyverse)
rownames(meth.kirp.cpg) <- meth.kirp.cpg %>%
rowwise() %>%
mutate(out = anno$V1[str_which(colnames(meth.kirp.cpg), anno$V2)])
Traceback:
Error in `mutate()`:
ℹ In argument: `out = anno$V1[str_which(rownames(meth.kirp.cpg),
anno$V2)]`.
ℹ In row 1.
Caused by error in `str_detect()`:
! Can't recycle `string` (size 142513) to match `pattern` (size 365860).
Run `rlang::last_error()` to see where the error occurred.
Example data:
meth.kirp.cpg
> dput(meth.kirp.cpg[1:100,1:2])
structure(list(TCGA.2K.A9WE.01A = c(0.461440642939772, 0.143910373119058,
0.847164847154162, 0.737361955793681, 0.716794733144112, 0.351877113536983,
0.248986769373366, 0.0121360989202765, 0.876303885229884, 0.944311384947134,
0.0490407302658151, 0.0200484962577958, 0.0623434271852525, 0.489865398138095,
0.920994933496615, 0.92663932531651, 0.0149191766670711, 0.884749685210921,
0.446591784140497, 0.91113228700911, 0.912199953863369, 0.908167409366654,
0.386721526377863, 0.0386737340626713, 0.0347492896507038, 0.98309370597552,
0.0176080612232509, 0.91878387167279, 0.743683318738873, 0.939148492241393,
0.722471943330892, 0.613143449419421, 0.0111202783577944, 0.843823786705695,
0.836431557867031, 0.390282953982417, 0.027408710286304, 0.0222349236137297,
0.657221610108816, 0.861848830221141, 0.0433751011272091, 0.0281247935879252,
0.938960776959358, 0.919825831744144, 0.922071582222369, 0.874732275907705,
0.0287898761495033, 0.0266947996996682, 0.922915821025777, 0.95009866012662,
0.964858875373814, 0.106451342824246, 0.406100902807456, 0.0421684244823044,
0.0341277368595181, 0.805451068725895, 0.147595746750675, 0.602617067494429,
0.90660866745333, 0.922313274809095, 0.462291286891102, 0.502857899902497,
0.0292904155423265, 0.835117565787527, 0.146789494933407, 0.06805696389495,
0.970563583145203, 0.0379479981289824, 0.058526761439653, 0.938993650169269,
0.44761099556807, 0.558961729061086, 0.939778576056268, 0.0728795533192928,
0.812084345787681, 0.899377654465699, 0.940111049552295, 0.838186810388758,
0.715121288990262, 0.897506380407565, 0.0929678061732199, 0.99024632582796,
0.055583745670494, 0.835146654988372, 0.973309086845447, 0.651216797099359,
0.0218535991986461, 0.0999671036378156, 0.790540668893094, 0.980591855409854,
0.567883806155822, 0.774816434396113, 0.904434807209845, 0.16641097147085,
0.0102686285230525, 0.65243489007093, 0.917594420539083, 0.0147831247626457,
0.844679485594683, 0.65566679452182), TCGA.2Z.A9J1.01A = c(0.595894468074615,
0.0807243779293262, 0.867305510246114, 0.70680600651273, 0.217862460492399,
0.169408257004071, 0.173115013795265, 0.0108902025634162, 0.813866558997356,
0.938576461648791, 0.0426568318037534, 0.0133187057875756, 0.0540543120983417,
0.317547629906197, 0.89911570032979, 0.525131175543627, 0.0152198596492253,
0.586968687135673, 0.49896100615873, 0.946718072906056, 0.859306039060091,
0.91185524112895, 0.28077646371254, 0.0413484993379312, 0.169193526857136,
0.941230054689418, 0.0164701153466769, 0.928402415411224, 0.736184540407898,
0.946288965623826, 0.312150292032857, 0.403171876971832, 0.0091246246912222,
0.535149883791691, 0.801041308364712, 0.171664264695538, 0.022737572168221,
0.0164834707992085, 0.34399568227201, 0.690016503202975, 0.0390842331750004,
0.0270854886242561, 0.888936631403145, 0.911902815624012, 0.858247513475469,
0.877113632682254, 0.0342892379505875, 0.0387268488822914, 0.922299785913074,
0.926130065834329, 0.975692332236198, 0.105415153493416, 0.127593519059119,
0.0540003798276299, 0.030980833881057, 0.914299941557146, 0.0512267439881511,
0.307325891435045, 0.941037265659174, 0.927078967007025, 0.48873418258592,
0.259006924115841, 0.0278764868641079, 0.87768067729952, 0.302640875302654,
0.0706384569300761, 0.968762634771395, 0.0364352674378962, 0.0441231506131831,
0.8307385629478, 0.242575477196221, 0.513439830376976, 0.932449172188782,
0.0526229004254996, 0.81314353054328, 0.778591104943176, 0.95668645045373,
0.453172059602829, 0.250129171963381, 0.863470213940097, 0.0994627135023581,
0.989489689575077, 0.0472116225581592, 0.911407225108748, 0.825189076107663,
0.578029414148402, 0.018058167343065, 0.0855852777154159, 0.819733395638372,
0.988287891473147, 0.255899615791521, 0.643359326354994, 0.491979154678761,
0.0978562004864199, 0.0105671614378101, 0.48897100984416, 0.9024550858788,
0.0131702158217202, 0.81328537816321, 0.85890307119103)), row.names = c("cg00000029",
"cg00000165", "cg00000236", "cg00000289", "cg00000292", "cg00000321",
"cg00000363", "cg00000622", "cg00000658", "cg00000721", "cg00000734",
"cg00000769", "cg00000905", "cg00000924", "cg00000948", "cg00000957",
"cg00001245", "cg00001249", "cg00001261", "cg00001349", "cg00001364",
"cg00001446", "cg00001510", "cg00001582", "cg00001583", "cg00001687",
"cg00001747", "cg00001791", "cg00001809", "cg00001854", "cg00001874",
"cg00002033", "cg00002116", "cg00002145", "cg00002190", "cg00002224",
"cg00002236", "cg00002406", "cg00002426", "cg00002449", "cg00002464",
"cg00002490", "cg00002531", "cg00002591", "cg00002593", "cg00002597",
"cg00002660", "cg00002719", "cg00002769", "cg00002808", "cg00002809",
"cg00002810", "cg00002837", "cg00003091", "cg00003173", "cg00003181",
"cg00003287", "cg00003345", "cg00003513", "cg00003529", "cg00003578",
"cg00003625", "cg00003784", "cg00003969", "cg00003994", "cg00004055",
"cg00004067", "cg00004072", "cg00004082", "cg00004089", "cg00004105",
"cg00004121", "cg00004192", "cg00004207", "cg00004209", "cg00004429",
"cg00004533", "cg00004562", "cg00004608", "cg00004773", "cg00004818",
"cg00004883", "cg00004939", "cg00004963", "cg00004979", "cg00004996",
"cg00005010", "cg00005040", "cg00005072", "cg00005083", "cg00005112",
"cg00005166", "cg00005215", "cg00005297", "cg00005306", "cg00005390",
"cg00005437", "cg00005543", "cg00005617", "cg00005619"), class = "data.frame")
anno
> dput(anno[1:100,])
structure(list(V1 = c("TSPY4", "TTTY14", "TMSB4Y", "TBL1Y", "TMSB4Y",
"TSPY4", "RPS4Y2", "EIF1AY", "PCDH11Y", "TBL1Y", "ZFY", "FAM197Y2",
"TTTY14", "TSPY4", "ZFY", "NLGN4Y", "EIF1AY", "TSPY4", "TBL1Y",
"UTY", "PRKY", "ZFY", "CD24", "PRKY", "TSPY1", "CYorf15A", "TSPY2",
"TTTY15", "RPS4Y2", "UTY", "CYorf15A", "RPS4Y2", "TSPY2", "TBL1Y",
"TSPY3", "DDX3Y", "CYorf15A", "ZFY", "RBMY1F", "DDX3Y", "RPS4Y2",
"ZFY", "DDX3Y", "TTTY15", "BCORL2", "PCDH11Y", "KDM5D", "TTTY14",
"EIF1AY", "DDX3Y", "LOC100101121", "CYorf15A", "TTTY15", "TSPY1",
"TSPY1", "FAM197Y2", "TSPY4", "TMSB4Y", "DDX3Y", "TTTY15", "TTTY20",
"NLGN4Y", "TSPY4", "CYorf15A", "RPS4Y2", "KDM5D", "RBMY1J", "EIF1AY",
"KDM5D", "ZFY", "TGIF2LY", "HMGN5", "EBP", "UBL4A", "WDR13",
"MTM1", "BCOR", "ZCCHC12", "FTHL17", "PORCN", "NAA10", "PCDH11X",
"ARSE", "DOCK11", "PDK3", "LONRF3", "MAGIX", "PCYT1B", "SLC6A8",
"UBE2A", "TAF9B", "STARD8", "BCOR", "ZIC3", "IL1RAPL2", "TMSB4X",
"CLCN5", "LOC100133957", "SCML1", "GNL3L"), V2 = c("cg00050873",
"cg00212031", "cg00214611", "cg01707559", "cg02004872", "cg02011394",
"cg02050847", "cg02233190", "cg02494853", "cg02839557", "cg02842889",
"cg03052502", "cg03244189", "cg03443143", "cg03683899", "cg03706273",
"cg03750315", "cg04016144", "cg04042030", "cg04448376", "cg04689676",
"cg04840163", "cg05230942", "cg05480730", "cg05544622", "cg05621349",
"cg05865243", "cg05890011", "cg06322277", "cg06479204", "cg07731488",
"cg07747963", "cg08242338", "cg08921682", "cg09350919", "cg09856092",
"cg10076560", "cg10213302", "cg10267609", "cg10698069", "cg10841270",
"cg11131351", "cg14180491", "cg14741114", "cg15027426", "cg15295597",
"cg15329860", "cg15345074", "cg15422579", "cg15429127", "cg15682806",
"cg15682993", "cg15746461", "cg15810474", "cg15935877", "cg17834650",
"cg17837162", "cg18032798", "cg18077436", "cg25032547", "cg25071634",
"cg25518695", "cg25705492", "cg25756647", "cg26058907", "cg26517491",
"cg26983430", "cg26983535", "cg27049643", "cg27433982", "cg27539833",
"cg00008945", "cg00011200", "cg00011891", "cg00014152", "cg00016522",
"cg00016934", "cg00018261", "cg00021786", "cg00026186", "cg00072288",
"cg00072839", "cg00074638", "cg00112256", "cg00114625", "cg00114913",
"cg00116709", "cg00139317", "cg00140085", "cg00142683", "cg00192980",
"cg00200463", "cg00206414", "cg00240113", "cg00241296", "cg00241907",
"cg00264378", "cg00265812", "cg00266918", "cg00360365")), row.names = c(1L,
2L, 4L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 16L, 17L, 18L,
20L, 21L, 23L, 25L, 27L, 29L, 31L, 34L, 35L, 36L, 37L, 38L, 39L,
40L, 41L, 42L, 43L, 45L, 47L, 48L, 50L, 51L, 52L, 54L, 56L, 57L,
58L, 61L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 72L, 73L, 74L, 75L,
76L, 77L, 78L, 79L, 80L, 82L, 83L, 85L, 86L, 87L, 88L, 89L, 91L,
92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L,
104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L,
115L, 116L, 117L, 118L, 119L, 120L, 122L, 123L, 124L, 125L), class = "data.frame")
There is no match between your cpg dataframe and annotation dataframe:
table(rownames(meth.kirp.cpg) %in% anno$V2)
# FALSE
# 100
Below should work with your full data, assuming there is are no duplicates and all rownames are in annotation dataframe:
rownames(meth.kirp.cpg) <- anno$V2[ match(rownames(meth.kirp.cpg), anno$V2) ]
Sorry if this is a duplicate question but I cannot seem to find the answer to my question anywhere. I have two plots and I would like to overlay plot two on plot one so that they form one plot. Is this possible? I will attach how both plots look separately. They are both facetted by the same variable which is by location and are on the same x and y-axis scale so theoretically should be possible.
Thank you.
## Plot one
Proxy<-read.csv("ALLRSL.csv",header=T)
p1<-ggplot()+
geom_ribbon(data=Proxy,aes(x=YEAR,ymin=LOWER,ymax=UPPER,fill=SITE),alpha=.5)+
geom_line(data=Proxy,aes(x=YEAR,y=RSL,col=SITE))+
facet_wrap(~ SITE,ncol= 1)+
scale_fill_manual(values=c("#4E193D","#342955","#4E617E","#97B4CB"))+
scale_color_manual(values=c("#4E193D","#342955","#4E617E","#97B4CB"))+
theme_classic()+
xlim(1900, 2020)+
theme(panel.grid.major.x = element_blank())+
theme(panel.grid.minor.x = element_blank())+
theme(panel.grid.minor.y = element_blank())+
theme(panel.grid.major.y = element_blank())+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
theme(
strip.background = element_blank(),
strip.text.x = element_blank()
)+
theme(legend.position="none")
p1
plot two
tgsm<-read.csv("tgsm.csv",header=T)
tgsm<-na.omit(tgsm)
tglonger<-pivot_longer(tgsm, cols=c(-Year),names_to="Site", values_to = "value")
p2<-ggplot()+
geom_point(data=tglonger,aes(x=Year,y=value,col=Site),alpha=.7,size=1)+
facet_wrap(~Site,ncol=1)+
theme_classic()+
xlim(1900,2020)+
scale_color_manual(values=c("#4E193D","#342955","#4E617E","#97B4CB"))+
theme(panel.grid.major.x = element_blank())+
theme(panel.grid.minor.x = element_blank())+
theme(panel.grid.minor.y = element_blank())+
theme(panel.grid.major.y = element_blank())+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
theme(
strip.background = element_blank(),
strip.text.x = element_blank()
)+
theme(legend.position="none")
p2
Data
Proxy <- structure(list(RSL = c(-0.305251214, -0.306414006, -0.307194187,
-0.308202139, -0.309150572, -0.309679123), UPPER = c(-0.182716456,
-0.186724068, -0.189331305, -0.193118273, -0.197069799, -0.20118809
), LOWER = c(-0.416725663, -0.413606073, -0.411131729, -0.408930899,
-0.406531588, -0.404478981), YEAR = 1820:1825, SITE = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Little Swanport", "Lutregala",
"Tarra", "Wapengo"), class = "factor")), row.names = c(NA, 6L
), class = "data.frame")
tgsm <- structure(list(Year = 1993:1998, Lg2002 = c(-0.001164223, -0.002229453,
-0.002734792, -0.002977787, -0.002786098, -0.002026994), Wap2002 = c(-0.002531348,
-0.002051993, -0.001468704, -0.001182162, -0.001027132, -0.00020881
), Tar2002 = c(-0.029020612, -0.024330561, -0.019927593, -0.015682528,
-0.012907219, -0.009784772), LSP2002 = c(-0.034514531, -0.030171621,
-0.026095138, -0.021952898, -0.018480702, -0.014531318)), na.action = structure(c(`1` = 1L,
`2` = 2L, `3` = 3L, `4` = 4L, `5` = 5L, `6` = 6L, `7` = 7L, `8` = 8L,
`9` = 9L, `10` = 10L, `11` = 11L, `12` = 12L, `13` = 13L, `14` = 14L,
`15` = 15L, `16` = 16L, `17` = 17L, `18` = 18L, `19` = 19L, `20` = 20L,
`21` = 21L, `22` = 22L, `23` = 23L, `24` = 24L, `25` = 25L, `26` = 26L,
`27` = 27L, `28` = 28L, `29` = 29L, `30` = 30L, `31` = 31L, `32` = 32L,
`33` = 33L, `34` = 34L, `35` = 35L, `36` = 36L, `37` = 37L, `38` = 38L,
`39` = 39L, `40` = 40L, `41` = 41L, `42` = 42L, `43` = 43L, `44` = 44L,
`45` = 45L, `46` = 46L, `47` = 47L, `48` = 48L, `49` = 49L, `50` = 50L,
`51` = 51L, `52` = 52L, `53` = 53L, `54` = 54L, `55` = 55L, `56` = 56L,
`57` = 57L, `58` = 58L, `59` = 59L, `60` = 60L, `61` = 61L, `62` = 62L,
`63` = 63L, `64` = 64L, `65` = 65L, `66` = 66L, `67` = 67L, `68` = 68L,
`69` = 69L, `70` = 70L, `71` = 71L, `72` = 72L, `73` = 73L, `74` = 74L,
`75` = 75L, `76` = 76L, `77` = 77L, `78` = 78L, `79` = 79L, `80` = 80L,
`81` = 81L, `82` = 82L, `83` = 83L, `84` = 84L, `85` = 85L, `86` = 86L,
`87` = 87L, `88` = 88L, `89` = 89L, `90` = 90L, `91` = 91L, `92` = 92L,
`93` = 93L, `94` = 94L, `95` = 95L, `96` = 96L, `97` = 97L, `98` = 98L,
`99` = 99L, `100` = 100L, `101` = 101L, `102` = 102L, `103` = 103L,
`104` = 104L, `105` = 105L, `106` = 106L, `107` = 107L, `108` = 108L,
`109` = 109L, `110` = 110L, `111` = 111L, `112` = 112L, `113` = 113L,
`114` = 114L, `115` = 115L, `116` = 116L, `117` = 117L, `118` = 118L,
`119` = 119L, `120` = 120L, `121` = 121L, `122` = 122L, `123` = 123L,
`124` = 124L, `125` = 125L, `126` = 126L, `127` = 127L, `128` = 128L,
`129` = 129L, `130` = 130L, `131` = 131L, `132` = 132L, `133` = 133L,
`134` = 134L, `135` = 135L, `136` = 136L, `137` = 137L, `138` = 138L,
`139` = 139L, `140` = 140L, `141` = 141L, `142` = 142L, `143` = 143L,
`144` = 144L, `145` = 145L, `146` = 146L, `147` = 147L, `148` = 148L,
`149` = 149L, `150` = 150L, `151` = 151L, `152` = 152L, `153` = 153L,
`154` = 154L, `155` = 155L, `156` = 156L, `157` = 157L, `183` = 183L
), class = "omit"), row.names = 158:163, class = "data.frame")
See plot one how you can do that with patchwork.
However. Conceptually, I am guessing you want to add a sort of prediction to some historic values or so. I personally would put everything in one data frame and plot this. If there is a too large gap between the two time points, you can facet by timepoints (as in my suggestion).
The plots look a bit different than your plot because you only provided data for one Site in Proxy (so I filtered the other for what I thought is the equivalent, it will work nonetheless, because the faceting remains) - and I removed all those theme elements that are not relevant to the problem.
Plot one - combining plots.
library(tidyverse)
library(patchwork)
tgsm<-na.omit(tgsm)
tglonger <-
pivot_longer(tgsm, cols=c(-Year), names_to="SITE", values_to = "RSL") %>%
filter(SITE == "LSP2002") %>%
rename(YEAR = Year)
p1 <- ggplot() +
geom_ribbon(data = Proxy, aes(x = YEAR, ymin = LOWER, ymax = UPPER, fill = SITE), alpha = .5) +
geom_line(data = Proxy, aes(x = YEAR, y = RSL, col = SITE)) +
facet_wrap(~SITE) +
coord_cartesian(xlim = c(1800, 1830), ylim = c(-1, 0)) +
theme_classic() +
theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
legend.position = "none"
)
p2 <- ggplot() +
geom_point(data = tglonger, aes(x = YEAR, y = RSL, col = SITE), alpha = .7, size = 1) +
facet_wrap(~SITE) +
coord_cartesian(xlim = c(1990, 2000), ylim = c(-1, 0)) +
theme_classic() +
## only one call to theme!!
theme(
## this is where the theme call is different to above
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
legend.position = "none",
)
p1 + p2
Suggestion for an alternative visualisation
df_new <-
bind_rows(time1 = Proxy, time2 = tglonger, .id = "timevar") %>%
mutate(SITE = "LSP2002")
ggplot(df_new)+
geom_point(aes(x=YEAR,y=RSL))+
facet_grid(SITE~timevar, scales = "free_x")+
theme(legend.position="none") +
theme(panel.spacing = unit(.5, "lines"))
You can also use this data frame in order to create a list of plots, and then stitch it together with patchwork. This approach doesn't allow to change individual plots though.
ls_p <-
df_new %>%
split(., .$timevar) %>%
map(~{ggplot(.x)+
geom_point(aes(x=YEAR,y=RSL))+
coord_cartesian(ylim = c(-0.4,0))+
facet_grid(~SITE, scales = "free_x")+
theme(legend.position="none") +
theme(panel.spacing = unit(.5, "lines"))})
library(patchwork)
wrap_plots(ls_p)
I have a training data (train.dat) and test data (test.dat). I would like to run my linear regression, elastic, tree and random forest model on the test data after training it on the training data.
From there, I would like to get the RMSE and R2 to observe the predictive accuracy of the model. However, I have 2 issues:
When I train my random forest model on test data, I get the error: Error in predict.randomForest(modelFit, newdata) : missing values in newdata. Other models are fine
I can only extract the RMSE and R2 values for my tree model, while the other models return NAs. A screenshot is below
Can anyone tell me what has gone wrong with my code?
My code:
library(caret)
set.seed(10345678)
tr.Control <- trainControl(method = "repeatedcv",
number = 10,
repeats = 5,
classProbs = FALSE,
)
lm7 <- train(Lifeexp ~ . + Govthealth*Privhealth,
data = train.dat,
method = 'lm',
trControl = tr.Control,
preProc = c("center", "scale")
)
alpha.vec <- seq(0, 1, by = 0.1)
lambda.vec2 <- seq(0, 30, length.out = 50)
elastic.grid <- expand.grid(alpha = alpha.vec, lambda = lambda.vec2)
elastic4 <- train(Lifeexp ~ ., data = train.dat,
method = 'glmnet',
trControl = tr.Control,
verbose = FALSE,
tuneGrid = elastic.grid,
preProc = c("center", "scale")
)
cp.vec <- seq(10^-4, 0.1, length.out = 50 )
tree2 <- train(Lifeexp ~ ., data = train.dat
, trControl = tr.Control
, method = "rpart"
, preProcess = c("center", "scale")
, tuneGrid = expand.grid(cp = cp.vec)
)
rf1 <- train(
Lifeexp ~ .,
data = train.dat,
method = "rf",
trControl = tr.Control ,
preProc = c("center", "scale"),
ntree = 1000,
tuneGrid = expand.grid(mtry = seq(1, ncol(train.dat)-1)
)
)
regression.pred <- predict(lm7, newdata = test.dat, na.action = na.pass, type = "raw")
elastic.pred <- predict(elastic4, newdata = test.dat, na.action = na.pass, type = "raw")
tree.pred <- predict(tree2, newdata = test.dat, na.action = na.pass, type = "raw")
rf.pred <- predict(rf1, newdata = test.dat, na.action = na.pass, type = "raw")
test.statistics <- data.frame(
RMSE.regression = RMSE(regression.pred, test.dat$Lifeexp),
Rsquare.regression = R2(regression.pred, test.dat$Lifeexp),
RMSE.el = RMSE(elastic.pred, test.dat$Lifeexp),
Rsquare.el = R2(elastic.pred, test.dat$Lifeexp),
RMSE.tree = RMSE(tree.pred, test.dat$Lifeexp),
Rsquare.tree = R2(tree.pred, test.dat$Lifeexp)
)
test.statistics
train.dat:
structure(list(GDP = c(402.1030419, 442.2030419, 543.3030419,
520.8966027, 254.2432569, 124.4608003, 341.5541149, 772.3135303,
478.6685897, 191.8789042, 592.4010975, 1033.912431, 138.4288795,
622.4988457, 642.7767443, 317.3893069, 269.8711377, 709.5819646,
585.07655, 780.190201, 3122.362815, 3893.596078, 1166.610276,
1674.825261, 3690.113268, 4241.788782, 2441.741991, 4043.662051,
9040.566251, 963.8417858, 2234.579866, 10330.61561, 1944.137621,
2136.440243, 567.5286729, 567.930736, 2292.445156, 2028.18197,
371.6785662, 519.5343268, 987.409723, 1482.403063, 1196.586858,
1955.588006, 6941.235848, 1038.90854, 3102.713363, 3139.966054,
3032.427138, 7328.615629, 869.6965166, 2799.648876, 617.2304355,
1126.683318, 4094.362119, 7708.100996, 10385.96443, 11683.94962,
718.1878292, 3243.231125, 3100.280468, 11286.24302, 8920.762105,
201.4671636, 785.5022829, 1510.324871, 1831.001912, 8141.913127,
12027.36588, 6967.24523, 7691.345097, 3233.295943, 367.5566093,
1357.563719, 1489.876911, 977.2736357, 1508.942737, 2007.736363,
5076.342992, 7273.563207, 948.3318545, 2146.996385, 95.18825018,
390.0933261, 2566.59695, 52022.1256, 57373.68668, 19095.467,
28149.87001, 39435.8399, 20600.37525, 23041.53473, 44141.87814,
47518.63604, 24190.24962, 46232.98962, 26891.44645, 61350.34791,
28364.64508, 50152.34014, 22303.96133, 23635.92922, 41531.9342,
47603.02763, 9600.18513, 12042.95373, 26917.75898, 20324.25356,
20087.59199, 36000.52012, 25423.07201, 32018.06325, 43024.92384,
73191.11632, 12663.36453, 30693.59308, 18440.37852, 38577.38166,
33994.40657, 21290.86038, 50950.03434, 53024.05921, 13663.02162,
13641.10272, 41945.33167, 1731.209509, 4492.727604, 11861.75616,
47236.96023, 23509.54339, 26123.97387, 74605.77451), Health = c(22.23474948,
36.44474948, 45.58774948, 46.38774948, 3.333203815, 5.359203815,
16.69390488, 19.46990488, 33.22835541, 5.300580788, 29.97179604,
33.59179604, 5.971383095, 62.66848373, 67.22848373, 8.23568,
14.98141193, 32.6487999, 10.22661548, 16.19961548, 92.18703461,
98.65987461, 143.7665911, 159.7515106, 308.6578979, 402.5568979,
99.5689502, 111.4155502, 292.8907166, 198.2263198, 221.1403198,
705.336568, 176.6524443, 200.7054443, 12.56211728, 17.72411728,
76.7208786, 98.4562786, 9.55682529, 16.01162529, 26.5686245,
33.565445, 69.66563616, 89.45643616, 275.2236792, 32.77552414,
122.5689168, 198.7124574, 221.7829742, 539.567627, 43.70681763,
108.6149597, 33.2254878, 42.36598, 60.2569, 705.1993408, 891.1377563,
992.5689563, 31.84200096, 77.2356478, 277.45864, 891.7641602,
932.325129, 15.23564, 54.30473709, 74.231488, 200.564125, 665.2514038,
755.36985, 384.9183044, 445.20158, 262.5267029, 11.56898, 45.25077438,
109.0749969, 122.02145, 42.568412, 62.25963211, 172.0576935,
200.562134, 91.17743683, 120.236549, 11.23587, 18.82835197, 99.23568,
4952.777344, 5236.3654, 1101.36589, 1674.2854, 3309.480957, 1654.5687,
1845.321045, 4449.542969, 5000.36545, 1998.634277, 6054.23658,
1900.2356, 7025.36987, 1000.5689, 5036.2356, 1233.36545, 2334.651855,
4597.244629, 5698.2547, 1500.3698, 2000.23564, 2573.740234, 3002.36547,
1520.453613, 3214.546387, 1569.3254, 2873.848145, 3644.802734,
4587.235478, 1122.02145, 2211.019043, 462.5890808, 1061.365601,
1256.56897, 1987.2145, 5186.632813, 6547.2356, 990.32658, 1053.891602,
4201.3698, 122.02145, 238.0044861, 712.2356, 1513.565918, 2015.18042,
2985.23, 8021.80957), Govthealth = c(1.25689, 2.032658, 2.495758057,
2.965478, 1.985478, 2.209019899, 2.882325411, 3.21458, 7.3134408,
1.032568, 5.433434963, 7.235478, 1.239725351, 8.535984039, 10.323589,
1.236589, 3.562868595, 4.673761368, 2.32547, 4.648055553, 23.70949936,
33.235687, 51025478, 71.8605423, 205.9026794, 295.2356, 31.2587,
51.99817276, 154.70401, 56.32588, 73.30036926, 399.23568, 66.3265,
99.82849121, 2.23568, 3.246135235, 10.43734169, 15.235478, 3.569877,
5.623521328, 5.849419594, 8.32665, 35.3654457, 44.96020508, 195.3657,
14.55177689, 35.235698, 61.02356, 81.59127045, 284.7705994, 23.43979454,
43.92045593, 22.36587, 30.42416763, 181.3415375, 385.9675598,
576.0806274, 602.3258, 25.36730576, 66.235687, 92.2147, 401.4833984,
502.3698, 2.0214578, 10.70767879, 15.36987, 112.3698, 481.0765686,
502.36987, 226.7909851, 300.65478, 55.95266342, 2.36547, 11.85855961,
35.50076675, 45.235698, 25.36954, 34.36005783, 126.9312592, 156.3257,
23.53768349, 39.235687, 4.235687, 6.570708275, 45.36987, 3399.406006,
4500.321547, 990.36547, 1368.160278, 2804.857178, 1000.365, 1375.334717,
3458.573975, 4120.325, 1456.037842, 4100.368, 1500.36578, 6925.325445,
990.58795, 4125.25658, 998.25998, 1827.566895, 3482.541016, 4800.3256,
989.325, 1254.325, 1756.99939, 1998.23569, 1104.429321, 2521.927002,
1800.3256, 2315.543701, 2931.431641, 331.0256, 548.32, 1388.55896,
351.3133545, 898.4367065, 997.02145, 956.32547, 3488.651855,
4400.23556, 558.36987, 785.0509033, 3000.3658, 100.36987, 162.3498688,
162.365, 543.0645752, 1458.283813, 2000.3694, 2495.23877), Privhealth = c(14.3698,
25.36698, 36.01279831, 49.36875, 1.23569, 2.278559208, 8.061329842,
10.3658, 5.059076786, 3.25698, 20.38587761, 30.65877, 4.726452827,
22.79703331, 32.65878, 6.32589, 10.38636589, 19.33849907, 8.326589,
11.07592678, 67.27728271, 74.23658, 63.235698, 83.74517059, 88.83229828,
96.32568, 49.32658, 59.41738892, 138.1631165, 100.23564, 147.8399658,
300.23568, 71.02584, 90.6206665, 8.365984, 11.47062778, 61.48280716,
74.254785, 7.235647, 10.26313496, 19.40570831, 23.65879, 33.25478,
44.17641068, 189.32658, 17.06592751, 75.325689, 89.32658, 136.7345276,
238.6507721, 19.86775017, 63.43461227, 7.325478, 19.23568, 25.321547,
319.0157471, 311.9694214, 442.03695, 3.889117956, 15.3654, 115.02365,
488.0875244, 552.0325698, 10.3658, 36.04922485, 45.362154, 45.23548,
182.7733917, 202.3654, 142.2067719, 202.325, 197.0276337, 9.32658,
32.95304871, 70.28269196, 90.3256, 15.021457, 27.89465141, 44.9021492,
60.32568, 43.03323364, 60.325845, 8.325698, 11.45799065, 60.32568,
1553.358765, 2330.2354, 201.0214578, 305.5347595, 503.7982178,
301.23565, 469.9864197, 990.9689331, 1200.36987, 542.5964966,
1823.021457, 312.0215478, 1100.32145, 301.02145, 1100.3256, 320.365478,
507.0849609, 1114.720093, 2001.23548, 401.14567, 662.03214, 816.2644653,
998.32546, 416.0243225, 692.6192017, 402.32564, 558.3044434,
713.3709106, 998.32658, 302.0214, 793.8995972, 111.2757187, 162.9289398,
212.3657, 442.32598, 1698.060913, 2226.32568, 145.2365, 268.8859863,
902.32568, 42.36587, 75.64861298, 332.65478, 970.5014648, 556.8964233,
700.32658, 5526.447266), Population = c(12412308L, 20779953L,
29185507L, 37172386L, 47887865L, 66224804L, 87639964L, 109224559L,
14539612L, 18905478L, 27013212L, 28087871L, 6216341L, 32428167L,
42723139L, 8449913L, 10946445L, 15049353L, 181413402L, 211513823L,
241834215L, 267663435L, 3565890L, 5122493L, 7261539L, 9956011L,
18029824L, 23194257L, 28208035L, 223158L, 279398L, 515696L, 1432905L,
1794571L, 95212450L, 122283850L, 158503197L, 195874740L, 107647921L,
142343578L, 179424641L, 212215030L, 22071433L, 26459944L, 31989256L,
77991755L, 106651922L, 36800509L, 44967708L, 51216964L, 18777601L,
20261737L, 3286542L, 3089027L, 2913021L, 36870787L, 40788453L,
44494502L, 591021L, 754394L, 149003223L, 195713635L, 209469333L,
8975597L, 14312212L, 16249798L, 3119433L, 4577378L, 4999441L,
70878L, 71625L, 3786695L, 873277798L, 1234281170L, 34545013L,
41801533L, 56558186L, 62952642L, 67195028L, 69428524L, 12697723L,
14439018L, 67988862L, 79910412L, 95540395L, 22031750L, 24982688L,
57247586L, 58892514L, 62766365L, 9967379L, 10251250L, 10895586L,
11433256L, 30685730L, 37057765L, 5140939L, 5793636L, 4986431L,
5515525L, 79433029L, 82211508L, 81776930L, 82905782L, 10196792L,
10805808L, 11121341L, 10731726L, 56942108L, 59277417L, 254826L,
281205L, 318041L, 352721L, 4660000L, 7623600L, 2045123L, 2991884L,
4137309L, 14951510L, 16615394L, 17231624L, 3329800L, 3857700L,
4841000L, 38110782L, 38258629L, 3047132L, 5076732L, 2048583L,
2073894L, 7824909L), Lifeexp = c(50.331, 55.841, 61.028, 64.486,
47.099, 51.941, 61.627, 66.24, 55.564, 54.404, 67.611, 70.478,
61.974, 57.099, 62.973, 45.746, 48.069, 55.251, 62.32, 65.772,
69.205, 71.509, 69.872, 71.73, 73.428, 74.405, 70.865, 72.594,
74.493, 61.529, 70.173, 78.627, 61.608, 52.192, 45.9, 46.267,
50.896, 54.332, 60.1, 62.82, 65.264, 67.114, 66.165, 71.111,
76.516, 68.793, 71.095, 63.307, 56.048, 57.669, 71.333, 75.439,
71.836, 73.955, 76.562, 73.576, 75.278, 76.52, 60.884, 71.46,
66.343, 73.619, 75.672, 53.595, 66.56, 69.57, 75.654, 78.769,
80.095, 74.619, 77.672, 71.46, 57.865, 66.693, 62.764, 65.095,
70.248, 70.623, 74.184, 76.931, 50.64, 61.195, 70.551, 73.025,
75.317, 81.69512195, 82.74878049, 75.8804878, 77.74146341, 80.40243902,
76.05195122, 77.72195122, 80.18292683, 81.59512195, 79.13658537,
81.94878049, 74.80536585, 81.35121951, 74.81317073, 81.83414634,
75.2277561, 77.92682927, 79.98780488, 80.99268293, 76.93902439,
77.88780488, 80.38780488, 81.28780488, 79.77804878, 82.03658537,
78.03634146, 79.65365854, 81.89756098, 82.66097561, 76.60731707,
81.60243902, 73.142, 74.358, 75.398, 76.87804878, 80.70243902,
81.76097561, 75.37804878, 78.63658537, 81.85853659, 70.8902439,
73.74878049, 75.29512195, 81.54146341, 79.42195122, 81.02926829,
82.24634146), Govted = c(1.23568, 2.31245, 3.47945, 5.32658,
2.365, 3.98311, 4.49659, 6.32547, 3.5398, 1.023568, 3.63172,
5.16365, 2.32871, 2.38901, 2.52076, 1.23568, 2.97156, 3.34389,
0.984578, 1.36589, 2.81228, 4.326587, 1.2365897, 1.9654789, 2.3658,
3.58851, 3.23568, 5.97161, 4.96645, 1.23568, 3.21548, 6.32547,
2.32657, 6.99139, 1.32658, 2.012457, 3.214587, 4.235687, 2.51681,
1.83782, 2.28687, 3.9854587, 2.36587, 3.22803, 3.71993, 3.26766,
5.32568, 5.12579, 5.44358, 5.72174, 2.36578, 1.71774, 2.3265,
3.43017, 2.65897, 4.58031, 5.01971, 6.32658, 5.51379, 6.64043,
2.36587, 5.6488, 6.32658, 1.235687, 1.53379, 2.16286, 3.24578,
6.63445, 7.02824, 2.36578, 3.325478, 3.215487, 3.23568, 3.37769,
2.32657, 3.23654, 3.323568, 5.25346, 3.50844, 5.32658, 1.54406,
4.60449, 3.326589, 4.235478, 4.17277, 5.55006, 6.32365, 4.05552,
4.06533, 5.74164, 4.021547, 5.32658, 6.40799, 6.9874564, 5.442,
6.32658, 7.32658, 8.9854587, 5.33591, 7.32658, 3.0215478, 3.21547,
4.91368, 6.3265, 2.04608, 3.23019, 4.32658, 5.023658, 4.29886,
4.35239, 4.25224, 6.44717, 6.97848, 7.235689, 5.43073, 5.54157,
2.985467, 3.124578, 3.32652, 5.22879, 5.48909, 4.236587, 5.321457,
6.323658, 7.5698745, 3.26587, 4.9936, 2.325647, 3.08044, 5.56251,
5.965871, 4.92605)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 11L, 13L, 15L, 16L, 18L, 23L, 24L, 25L, 26L, 27L, 29L, 30L,
31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 41L, 42L, 44L, 45L,
46L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 60L, 62L,
64L, 65L, 66L, 67L, 70L, 71L, 73L, 74L, 75L, 78L, 79L, 80L, 82L,
84L, 85L, 87L, 88L, 89L, 91L, 92L, 93L, 95L, 96L, 99L, 100L,
103L, 105L, 107L, 111L, 112L, 113L, 114L, 115L, 116L, 119L, 120L,
121L, 122L, 124L, 127L, 128L, 129L, 130L, 131L, 133L, 134L, 135L,
136L, 138L, 140L, 141L, 144L, 145L, 148L, 149L, 150L, 151L, 152L,
153L, 154L, 155L, 156L, 158L, 159L, 161L, 162L, 163L, 164L, 165L,
167L, 170L, 171L, 172L, 173L, 175L, 176L, 177L, 178L, 180L, 181L,
182L, 185L, 187L, 191L, 192L, 195L), class = "data.frame")
Test.dat:
structure(list(GDP = c(199.9863423, 156.3857186, 389.3980332,
229.4902871, 497.6320261, 749.552711, 826.6215305, 248.0293672,
261.8689977, 899.6599081, 11373.233, 7076.662423, 5324.61704,
5931.453886, 5082.354757, 715.9137121, 2124.05677, 6374.028196,
463.6186318, 4102.48135, 5268.848504, 4333.482973, 564.7796095,
2258.183141, 3749.75325, 302.5771636, 3772.870012, 2860.43156,
4787.780171, 1614.640122, 749.9085236, 4717.143026, 443.3141934,
2009.978857, 483.952592, 366.1728076, 841.9729898, 563.0577411,
1317.890706, 18211.27459, 21679.24784, 42943.90227, 21448.36196,
47450.31847, 30743.54768, 58041.39844, 24285.46682, 46459.97325,
20825.78421, 34483.204, 21043.57493, 41715.02928, 8794.631229,
26149.41108, 33692.01083, 12599.53358, 15420.91116, 23852.32703,
64581.94402, 9107.477079, 10201.30354, 38428.3855, 37868.296,
82796.54716), Health = c(6.22435541, 8.909747124, 39.22274712,
8.625580788, 4.22284155, 42.34384155, 47.44484155, 10.74555809,
18.80055809, 45.32365, 324.6654166, 602.659668, 504.5536499,
594.8854499, 239.3392792, 22.55662414, 91.84031677, 624.335527,
30.56891763, 128.3355597, 74.23569, 505.4589408, 22.23569, 69.80043793,
311.6526794, 19.73552704, 251.0935822, 211.589745, 250.7455292,
35.25698, 47.90106964, 292.54782, 18.56432343, 70.5685123, 10.56888,
17.38329887, 50.66987, 75.201547, 78.18682861, 1022.5487, 1632.427612,
4002.325, 1452.369, 5044.135254, 2496.047119, 6011.536621, 1655.866211,
4099.587891, 1125.365, 4400.325, 1496.87854, 3000.23568, 336.2356,
2023.143677, 3216.223633, 809.1994019, 956.21547, 820.6981812,
1989.235, 446.3265, 796.6470337, 2985.12, 3737.802979, 9658.23
), Govthealth = c(2.65987, 3.350677967, 8.32365, 1.337858081,
0.235689, 8.714180946, 11.02365, 2.356894, 4.656533241, 5.958777,
198.23568, 319.1759033, 207.0215302, 302.654789, 123.2336197,
9.32658, 29.2992878, 300.5689, 12.02589, 52.658912, 22.03256,
222.325689, 16.3258, 50.29269791, 129.758316, 3.900079966, 163.0175018,
102.369, 156.8104706, 4.36987, 5.465222836, 75.36987, 3.839128733,
14.32589, 3.25478, 5.880064487, 12.36547, 18.02584, 30.97570801,
990.365478, 1116.231445, 3201.0245, 996.598723, 3721.796387,
2074.39917, 5042.459961, 1229.708252, 3167.418213, 889.32658,
3698.23598, 944.5585938, 1998.02365, 200.365778, 1396.733398,
2517.370117, 577.3640747, 662.32589, 298.1834717, 702.369, 456.325,
568.7339478, 889.36547, 1045.900513, 3987.3654), Privhealth = c(1.36589,
1.832908154, 7.325698, 5.431494236, 2.36589, 29.85413742, 35.3698,
4.23568, 8.9836483, 22.3658, 152.36589, 263.3545532, 225.5363922,
301.325478, 111.575592, 10.23568, 60.89479446, 336.02145, 12.36587,
75.36987, 34.3265, 223.02145, 2.0215478, 11.81901455, 180.9026947,
15.41190529, 85.28456879, 45.321478, 86.49634552, 25.36987, 39.00668716,
220.32145, 14.22738075, 49.326545, 7.02145, 11.50323391, 20.36587,
33.021456, 45.45627975, 400.23568, 516.1798096, NA, 400.32547,
1322.338745, 421.6481018, 969.076416, 426.0691833, 931.8737793,
302.1245, 886.02154, 517.4750366, 889.32547, 90.3256, 626.4102173,
698.8658447, 231.8352966, 301.0324, 522.5147705, 1236.021458,
117.3658, 227.9130707, 1965.3256, 2691.985107, 6600.3256), Population = c(9404500L,
11148758L, 18143315L, 23941110L, 5283814L, 7527394L, 9100837L,
17354392L, 23650172L, 19077690L, 31528585L, 365734L, 2118874L,
2448255L, 29027674L, 61895160L, 93966780L, 57779622L, 17325773L,
21670000L, 2866376L, 32618651L, 530804L, 685503L, 174790340L,
12155239L, 3962372L, 70419L, 69650L, 4802000L, 4077131L, 3726549L,
1056575549L, 1352617328L, 20147590L, 27275015L, 10432421L, 11881477L,
87967651L, 17065100L, 19153000L, 66460344L, 27691138L, 34004889L,
5339616L, 5547683L, 5176209L, 5363352L, 56719240L, 60421760L,
6289000L, 8882800L, 2095344L, 15925513L, 4350700L, 38042794L,
37974750L, 4027887L, 5638676L, 1998161L, 1988925L, 6715519L,
7184250L, 8513227L), Lifeexp = c(46.096, 45.09, 63.798, 62.288,
58.824, 68.736, 70.879, 45.853, 46.229, 58.893, 75.997, 75.905,
56.665, 63.373, 74.41, 66.366, 69.823, 63.857, 69.509, 76.812,
78.458, 71.594, 52.878, 68.384, 70.116, 58.432, 77.452, 66.843,
71.116, 70.386, 69.902, 73.6, 62.505, 69.416, 55.5, 58.472, 58.1,
44.649, 74.837, 76.99463415, 79.23414634, 81.35609756, 77.42195122,
81.24634146, 76.59268293, 79.1, 77.46585366, 79.87073171, 76.97073171,
82.94634146, 78.95365854, 82.80243902, 72.15, 77.98780488, 80.70243902,
76.24634146, 77.75365854, 77.95121951, 83.14634146, 73.20487805,
75.41219512, 77.24243902, 79.6804878, 83.55121951), Govted = c(3.27054,
5.24797, 4.71484, 2.97515, 1.36587, 4.00675, 6.32547, 1.023658,
2.46167, 4.32658, 4.53477, 4.11747, 8.34961, 10.23547, 2.8673,
2.36587, 5.326545, 6.15899, 2.41093, 2.11189, 2.46866, 1.06738,
3.21547, 4.02447, 3.94893, 1.65599, 4.68696, 1.856231, 2.032145,
1.56897, 2.18109, 4.236587, 4.32479, 5.326587, 0.36589, 1.01218,
1.45426, 2.36589, 5.13722, 4.6764, 4.89147, 7.3265, 5.99199,
5.36993, 8.08434, 8.55955, 5.71688, 6.54071, 3.325687, 5.32658,
6.12262, 6.32658, 1.326587, 4.58512, 7.00241, 5.06843, 6.32547,
3.3213, 5.32658, 3.32365, 4.32657, 4.52294, 4.7814, 5.9658745
)), row.names = c(9L, 10L, 12L, 14L, 17L, 19L, 20L, 21L, 22L,
28L, 40L, 43L, 47L, 48L, 59L, 61L, 63L, 68L, 69L, 72L, 76L, 77L,
81L, 83L, 86L, 90L, 94L, 97L, 98L, 101L, 102L, 104L, 106L, 108L,
109L, 110L, 117L, 118L, 123L, 125L, 126L, 132L, 137L, 139L, 142L,
143L, 146L, 147L, 157L, 160L, 166L, 168L, 169L, 174L, 179L, 183L,
184L, 186L, 188L, 189L, 190L, 193L, 194L, 196L), class = "data.frame")
You can use the following code
#Remove the NA from the data freme
test.dat <- na.omit(test.dat)
regression.pred <- predict(lm7, newdata = test.dat, type = "raw")
elastic.pred <- predict(elastic4, newdata = test.dat, type = "raw")
tree.pred <- predict(tree2, newdata = test.dat, type = "raw")
rf.pred <- predict(rf1, newdata = test.dat, type = "raw")
test.statistics <- data.frame(
RMSE.regression = RMSE(regression.pred, test.dat$Lifeexp),
Rsquare.regression = R2(regression.pred, test.dat$Lifeexp),
RMSE.el = RMSE(elastic.pred, test.dat$Lifeexp),
Rsquare.el = R2(elastic.pred, test.dat$Lifeexp),
RMSE.tree = RMSE(tree.pred, test.dat$Lifeexp),
Rsquare.tree = R2(tree.pred, test.dat$Lifeexp)
)
test.statistics
# RMSE.regression Rsquare.regression RMSE.el Rsquare.el RMSE.tree Rsquare.tree
# 1 8.201072 0.3910896 8.115006 0.3984145 6.369994 0.6446954
test <- structure(list(trip_count = 1:10, dropoff_longitude = c(-73.959862,
-73.882202, -73.934113, -73.992203, -74.00563, -73.975189, -73.97448,
-73.974838, -73.981377, -73.955093), dropoff_latitude = c(40.773617,
40.744175, 40.715923, 40.749203, 40.726158, 40.729824, 40.763599,
40.754135, 40.759987, 40.765224)), row.names = c(NA, -10L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x7fd18800f6e0>)
> dput(zip_codes)
zip_codes <- structure(list(zipcode = c("10001", "10002", "10003", "10004",
"10005", "10006", "10007", "10009", "10010", "10011", "10012",
"10013", "10014", "10016", "10017", "10018", "10019", "10020",
"10021", "10022", "10023", "10024", "10025", "10026", "10027",
"10028", "10029", "10030", "10031", "10032", "10033", "10034",
"10035", "10036", "10037", "10038", "10039", "10040", "10044",
"10065", "10069", "10075", "10103", "10110", "10111", "10112",
"10115", "10119", "10128", "10152", "10153", "10154", "10162",
"10165", "10167", "10168", "10169", "10170", "10171", "10172",
"10173", "10174", "10177", "10199", "10271", "10278", "10279",
"10280", "10282"), bounds_north = c(40.759731, 40.724136, 40.739673,
40.709044, 40.709294, 40.71369, 40.71719, 40.734975, 40.745421,
40.756703, 40.731706, 40.727557, 40.742873, 40.752197, 40.757912,
40.762526, 40.773446, 40.761094, 40.775045, 40.764898, 40.783192,
40.818099, 40.811264, 40.807546, 40.822108, 40.782213, 40.800665,
40.824032, 40.834372, 40.850517, 40.861552, 40.87765, 40.809582,
40.765558, 40.819569, 40.714451, 40.846615, 40.866336, 40.772955,
40.770517, 40.781007, 40.777677, 40.761771, 40.755516, 40.759689,
40.759899, 40.811331, 40.751522, 40.787914, 40.759059, 40.764279,
40.758432, 40.770085, 40.752801, 40.755303, 40.752119, 40.754974,
40.753811, 40.756556, 40.755928, 40.754783, 40.752116, 40.7556,
40.752723, 40.708797, 40.71628, 40.713256, 40.714767, 40.719611
), bounds_south = c(40.743451, 40.708802, 40.722933, 40.683919,
40.702879, 40.705871, 40.709806, 40.718612, 40.73231, 40.731043,
40.719867, 40.713446, 40.72428, 40.73801, 40.747251, 40.749102,
40.758645, 40.757284, 40.758133, 40.751445, 40.768436, 40.778805,
40.788476, 40.79691, 40.803047, 40.770062, 40.782531, 40.812791,
40.817221, 40.829083, 40.842958, 40.849745, 40.781075, 40.752197,
40.806636, 40.701689, 40.817912, 40.851863, 40.749415, 40.759284,
40.771612, 40.769441, 40.759787, 40.753481, 40.758538, 40.758436,
40.810373, 40.749101, 40.773108, 40.757749, 40.762964, 40.757125,
40.768355, 40.75146, 40.753994, 40.750775, 40.753811, 40.751441,
40.755243, 40.754619, 40.753481, 40.750766, 40.754678, 40.750241,
40.707694, 40.714082, 40.711995, 40.700273, 40.713378), bounds_east = c(-73.984076,
-73.973635, -73.979864, -73.995657, -74.004569, -74.009988, -74.000455,
-73.971282, -73.971566, -73.990798, -73.991794, -73.994035, -73.999555,
-73.968192, -73.964271, -73.981822, -73.973015, -73.977201, -73.947973,
-73.958599, -73.974067, -73.960687, -73.954966, -73.944667, -73.940404,
-73.944337, -73.930891, -73.936232, -73.938588, -73.934671, -73.92216,
-73.910587, -73.914228, -73.978116, -73.933219, -73.991772, -73.929107,
-73.924385, -73.940026, -73.952085, -73.986609, -73.947039, -73.975831,
-73.980395, -73.976744, -73.97845, -73.963058, -73.99111, -73.937328,
-73.970993, -73.971411, -73.971451, -73.94827, -73.977677, -73.973735,
-73.976048, -73.975209, -73.974648, -73.97282, -73.973276, -73.978332,
-73.973959, -73.975352, -73.993948, -74.009829, -74.002115, -74.007666,
-74.013754, -74.012441), bounds_west = c(-74.008621, -73.997532,
-73.999604, -74.047285, -74.012508, -74.015905, -74.013754, -73.988643,
-73.994028, -74.012359, -74.004575, -74.016381, -74.01599, -73.987746,
-73.981822, -74.007989, -74.003477, -73.98373, -73.968441, -73.977655,
-73.990149, -73.98814, -73.977092, -73.962475, -73.9659, -73.96323,
-73.955778, -73.948677, -73.960007, -73.950403, -73.944672, -73.947051,
-73.946462, -74.001702, -73.943398, -74.010542, -73.943506, -73.938947,
-73.961583, -73.972553, -73.996142, -73.965148, -73.979513, -73.984118,
-73.97845, -73.980886, -73.964424, -73.994844, -73.959921, -73.973068,
-73.973465, -73.973524, -73.951858, -73.979768, -73.975807, -73.978159,
-73.976974, -73.977107, -73.974897, -73.975352, -73.980395, -73.976048,
-73.976516, -74.00143, -74.011248, -74.00542, -74.009668, -74.019603,
-74.01831), zip = c(10001, 10002, 10003, 10004, 10005, 10006,
10007, 10009, 10010, 10011, 10012, 10013, 10014, 10016, 10017,
10018, 10019, 10020, 10021, 10022, 10023, 10024, 10025, 10026,
10027, 10028, 10029, 10030, 10031, 10032, 10033, 10034, 10035,
10036, 10037, 10038, 10039, 10040, 10044, 10065, 10069, 10075,
10103, 10110, 10111, 10112, 10115, 10119, 10128, 10152, 10153,
10154, 10162, 10165, 10167, 10168, 10169, 10170, 10171, 10172,
10173, 10174, 10177, 10199, 10271, 10278, 10279, 10280, 10282
)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 9L, 10L, 11L, 12L,
13L, 14L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L,
40L, 43L, 50L, 51L, 53L, 67L, 74L, 75L, 76L, 79L, 83L, 91L, 101L,
102L, 103L, 111L, 114L, 116L, 117L, 118L, 119L, 120L, 121L, 122L,
123L, 126L, 133L, 151L, 158L, 159L, 160L, 162L), class = "data.frame")
Hey guys, so I am trying to fuzzy-join lat & lon information to get the zip code of a specific location. I tried:
test <- test %>% fuzzy_left_join(zip_codes,by = c("dropoff_longitude" = "bounds_east", "dropoff_longitude" = "bounds_west", "dropoff_latitude" = "bounds_north","dropoff_latitude" = "bounds_south"), match_fun = list('<=', '>=' , '<=', '>='))
But unfortunately, this returns the error message Error: All columns in a tibble must be vectors. x Column "col" is NULL.
I don't know how to solve this. There is no column "col" in either one of the data frames. The result should give me the correspondent zip code if the dropoff_longitute is between bounds_east and bounds_west and the dropoff_latitude is between bounds_north and bounds_south.
Thanks a lot in advance!
We could use the non-equi join from data.table as one of the dataset is data.table
library(data.table)
setDT(test)[zip_codes, on = .(dropoff_longitude <= bounds_east,
dropoff_longitude >= bounds_west,
dropoff_latitude <= bounds_north,
dropoff_latitude >= bounds_south)]