Grouping by multiple columns for ggplot - r

I have data that look like this (I'm only including the first twenty rows to show the spread of data but there are about 135 rows):
> dput(id)
structure(list(date = c("7/27/1992", "7/27/1992", "7/27/1992",
"8/1/1992", "7/1/1994", "7/1/1994", "7/1/1994", "8/7/2003", "8/7/2003",
"8/7/2003", "8/7/2003", "7/21/2004", "7/21/2004", "7/26/2004",
"7/26/2004", "7/5/2005", "7/5/2005", "7/9/2005", "7/9/2005",
"7/9/2005"), event.id = c(8L, 8L, 8L, 10L, 11L, 11L, 11L, 14L,
14L, 15L, 15L, 17L, 17L, 18L, 18L, 20L, 20L, 21L, 21L, 21L), id = c("L5", "L58",
"L73", "L21", "L5", "L58", "L73", "L5", "L73", "L7", "L57", "L21",
"L47", "L54", "L100", "J27", "J31", "J16", "J26", "J36"), sex = structure(c(1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 1L), .Label = c("0", "1"), class = "factor"),
age = c(28L, 12L, 6L, 42L, 30L, 14L, 8L, 39L, 17L, 42L, 26L,
54L, 30L, 27L, 3L, 14L, 10L, 33L, 14L, 6L), matr = c("L9",
"L9", "L9", "L21", "L9", "L9", "L9", "L9", "L9", "L37", "L45",
"L21", "L21", "L35", "L35", "J4", "J4", "J7", "J7", "J7"),
matralive = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), pod = c("L", "L", "L", "L", "L",
"L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "J", "J",
"J", "J", "J")), row.names = c(NA, -134L), class = c("tbl_df", "tbl", "data.frame"))
My goal is to create a graph that looks something like this, using ggplot:
I'm stuck as to how to generate this - I imagine I will have to group by pod and matriline, but am unsure how to go about this.
Any help is super appreciated - thank you so much!!

Here is one possibility using geom_mark_ellipse from ggforce for the ellipses. To have multiple groups (i.e., matr and date) to draw the ellipses, we can use interaction to combine the two columns into a new factor.
library(tidyverse)
library(ggforce)
df %>%
ggplot(aes(x = date, y = age )) +
geom_point(aes(shape = factor(sex), colour = factor(pod), size = 5)) +
geom_mark_ellipse(aes(color = pod, group=interaction(date, matr), label = matr)) +
scale_y_continuous(expand = c(0, 25)) +
scale_x_discrete(expand = c(0, 1.35))
Output
Data
df <- structure(list(date = c("7/27/1992", "7/27/1992", "7/27/1992",
"8/1/1992", "7/1/1994", "7/1/1994", "7/1/1994", "8/7/2003", "8/7/2003",
"8/7/2003", "8/7/2003", "7/21/2004", "7/21/2004", "7/26/2004",
"7/26/2004", "7/5/2005", "7/5/2005", "7/9/2005", "7/9/2005",
"7/9/2005"), event.id = c(8L, 8L, 8L, 10L, 11L, 11L, 11L, 14L,
14L, 15L, 15L, 17L, 17L, 18L, 18L, 20L, 20L, 21L, 21L, 21L),
id = c("L5", "L58", "L73", "L21", "L5", "L58", "L73", "L5",
"L73", "L7", "L57", "L21", "L47", "L54", "L100", "J27", "J31",
"J16", "J26", "J36"), sex = structure(c(1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L
), .Label = c("0", "1"), class = "factor"), age = c(28L,
12L, 6L, 42L, 30L, 14L, 8L, 39L, 17L, 42L, 26L, 54L, 30L,
27L, 3L, 14L, 10L, 33L, 14L, 6L), matr = c("L9", "L9", "L9",
"L21", "L9", "L9", "L9", "L9", "L9", "L37", "L45", "L21",
"L21", "L35", "L35", "J4", "J4", "J7", "J7", "J7"), matralive = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), pod = c("L", "L", "L", "L", "L", "L", "L",
"L", "L", "L", "L", "L", "L", "L", "L", "J", "J", "J", "J",
"J")), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame"))

Related

Scatter plot with small pie charts with R

I have this data below called test1.melted. I also have the code to plot my data using package scatterpie, but due to inherent problem of scatterpie (if coordinates are not cartesian,i.e. equal horizontal and vertical distances), you would not get properly formatted plot. Is there a better way to plot this data without using scatterpie?
Data:
test1.melted<-structure(list(Wet_lab_dilution_A = structure(c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L), .Label = c("A", "B", "C", "D", "E", "F",
"G", "H", "I", "J", "K", "L"), class = "factor"), TypeA = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("I", "II"), class = "factor"),
NA12878 = c(100L, 50L, 25L, 20L, 10L, 0L, 100L, 50L, 25L,
20L, 10L, 0L, 100L, 50L, 25L, 20L, 10L, 0L, 100L, 50L, 25L,
20L, 10L, 0L), NA12877 = c(0L, 50L, 75L, 80L, 90L, 100L,
0L, 50L, 75L, 80L, 90L, 100L, 0L, 50L, 75L, 80L, 90L, 100L,
0L, 50L, 75L, 80L, 90L, 100L), IBD = c(1.02, 0.619, 0.294,
0.244, 0.134, 0.003, 0.003, 0.697, 0.964, 0.978, 1, 1, 1.02,
0.619, 0.294, 0.244, 0.134, 0.003, 0.003, 0.697, 0.964, 0.978,
1, 1), variableA = c("tEst", "tEst", "tEst", "tEst", "tEst",
"tEst", "tEst", "tEst", "tEst", "tEst", "tEst", "tEst", "pair",
"pair", "pair", "pair", "pair", "pair", "pair", "pair", "pair",
"pair", "pair", "pair"), valueA = c(0.1, 59.8, 84.6, 89.2,
97.4, 100, 99.6, 56.4, 29.9, 24, 12.1, 0.1, 0.1, 51.08, 75.28,
80.09, 90.16, 100, 100, 48.09, 23.97, 18.81, 9.24, 0.08)), row.names = c(NA,
-24L), .Names = c("Wet_lab_dilution_A", "TypeA", "NA12878", "NA12877",
"IBD", "variableA", "valueA"), class = "data.frame")
code:
p<- ggplot() + geom_scatterpie(aes(x=valueA, y=IBD, group=TypeA), data=test1.melted,
cols=c("NA12878", "NA12877")) + coord_equal()+
facet_grid(TypeA~variableA)
p
Do you have to use a pie chart? (And you might; there's nothing wrong with them.)
Cause something like this could illustrate literally every variable in the dataset:
library(ggplot2)
test1.melted$NA12877 <- as.factor(test1.melted$NA12877)
test1.melted$NA12878 <- as.factor(test1.melted$NA12878)
p <- ggplot(data = test1.melted, aes(x=valueA, y=IBD, group=TypeA))
p <- p + geom_point(aes(colour=NA12877, fill = NA12878), stroke=3, size = 3, shape = 21)
p <- p + geom_text(aes(label = Wet_lab_dilution_A), size = 2)
p + facet_grid(TypeA ~ variableA) + theme_minimal()

Speeding up a loop (extracting specific values from a data frame)

My task is to extract all values in a column "2" after sorting by factor level in another column "3" (for the interested, i am sorting fasta sequences by organism). I am using this very simple code to get what i need.
df <- read.table("outfile.txt", fill=T) # the original output file includes many empty cells
# df is availabe at the bottom of this post
# splitting by factors
list1 <- split(df, df$V3)
# extract all values in column 2
list2 <- lapply(list1, function(x) as.data.frame(x$V2))
# writing results to file
for (x in names(list2))
write.table(list2[[x]], file=paste(x,".txt"), quote=F, row.names = F, col.names=F)
The works well on a small df. However, the output file contains several gigabytes of data. I tried a subset (500,000 rows on my local machine with 8GB RAM), but the second command is extremely slow (or R just hangs).
So i wondered and am asking the community, if there is a better way to solve this. Thank you.
Here is df:
dput(df)
structure(list(V1 = structure(c(1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L), .Label = c("C", "U"), class = "factor"),
V2 = structure(c(10L, 2L, 27L, 29L, 25L, 32L, 28L, 39L, 40L,
22L, 8L, 7L, 19L, 38L, 15L, 3L, 16L, 26L, 34L, 13L, 17L,
18L, 14L, 41L, 44L, 12L, 45L, 46L, 5L, 1L, 31L, 4L, 37L,
11L, 43L, 20L, 21L, 30L, 23L, 35L, 24L, 42L, 9L, 33L, 36L,
6L), .Label = c("M02978:20:000000000-B8C4P:1:1101:11008:4137",
"M02978:20:000000000-B8C4P:1:1101:14389:3444", "M02978:20:000000000-B8C4P:1:1101:14986:3769",
"M02978:20:000000000-B8C4P:1:1101:15333:4161", "M02978:20:000000000-B8C4P:1:1101:15438:4092",
"M02978:20:000000000-B8C4P:1:1101:15516:4514", "M02978:20:000000000-B8C4P:1:1101:16313:3660",
"M02978:20:000000000-B8C4P:1:1101:16433:3650", "M02978:20:000000000-B8C4P:1:1101:16663:4462",
"M02978:20:000000000-B8C4P:1:1101:17179:3407", "M02978:20:000000000-B8C4P:1:1101:17779:4225",
"M02978:20:000000000-B8C4P:1:1101:18008:3981", "M02978:20:000000000-B8C4P:1:1101:18047:3851",
"M02978:20:000000000-B8C4P:1:1101:18920:3936", "M02978:20:000000000-B8C4P:1:1101:19086:3737",
"M02978:20:000000000-B8C4P:1:1101:19203:3783", "M02978:20:000000000-B8C4P:1:1101:19335:3908",
"M02978:20:000000000-B8C4P:1:1101:19520:3921", "M02978:20:000000000-B8C4P:1:1101:19612:3701",
"M02978:20:000000000-B8C4P:1:1101:19655:4289", "M02978:20:000000000-B8C4P:1:1101:19918:4313",
"M02978:20:000000000-B8C4P:1:1101:20321:3602", "M02978:20:000000000-B8C4P:1:1101:21089:4350",
"M02978:20:000000000-B8C4P:1:1101:22293:4406", "M02978:20:000000000-B8C4P:1:1101:22453:3490",
"M02978:20:000000000-B8C4P:1:1101:23026:3811", "M02978:20:000000000-B8C4P:1:1101:23065:3472",
"M02978:20:000000000-B8C4P:1:1101:23770:3507", "M02978:20:000000000-B8C4P:1:1101:23991:3472",
"M02978:20:000000000-B8C4P:1:1101:24290:4332", "M02978:20:000000000-B8C4P:1:1101:24415:4142",
"M02978:20:000000000-B8C4P:1:1101:25066:3498", "M02978:20:000000000-B8C4P:1:1101:25678:4466",
"M02978:20:000000000-B8C4P:1:1101:25992:3830", "M02978:20:000000000-B8C4P:1:1101:26431:4388",
"M02978:20:000000000-B8C4P:1:1101:26573:4479", "M02978:20:000000000-B8C4P:1:1101:5567:4179",
"M02978:20:000000000-B8C4P:1:1101:6323:3723", "M02978:20:000000000-B8C4P:1:1101:6675:3536",
"M02978:20:000000000-B8C4P:1:1101:6868:3559", "M02978:20:000000000-B8C4P:1:1101:7078:3965",
"M02978:20:000000000-B8C4P:1:1101:8145:4431", "M02978:20:000000000-B8C4P:1:1101:8449:4257",
"M02978:20:000000000-B8C4P:1:1101:8592:3966", "M02978:20:000000000-B8C4P:1:1101:9468:4026",
"M02978:20:000000000-B8C4P:1:1101:9970:4051"), class = "factor"),
V3 = c(926550L, 0L, 1121396L, 406818L, 1265505L, 1167006L,
1121399L, 0L, 177437L, 0L, 1536652L, 0L, 1196029L, 0L, 1178540L,
138119L, 0L, 1536652L, 186802L, 0L, 1322246L, 1232437L, 1196029L,
1121396L, 452637L, 0L, 0L, 0L, 1541959L, 1121403L, 96561L,
1167006L, 767528L, 0L, 0L, 653733L, 1423815L, 857293L, 0L,
0L, 0L, 468059L, 1167006L, 1232437L, 880073L, 761193L), V4 = c(171L,
NA, 264L, 88L, 356L, 257L, 128L, NA, 97L, NA, 243L, NA, 96L,
NA, 80L, 93L, NA, 138L, 155L, NA, 243L, 262L, 77L, 470L,
135L, NA, NA, NA, 124L, 161L, 211L, 202L, 91L, NA, NA, 146L,
98L, 93L, NA, NA, NA, 107L, 382L, 247L, 130L, 157L), V5 = structure(c(25L,
1L, 2L, 17L, 9L, 5L, 3L, 1L, 16L, 1L, 14L, 1L, 7L, 1L, 6L,
11L, 1L, 14L, 24L, 1L, 10L, 8L, 7L, 2L, 18L, 1L, 1L, 1L,
15L, 4L, 26L, 5L, 13L, 1L, 1L, 20L, 12L, 22L, 1L, 1L, 1L,
19L, 5L, 8L, 23L, 21L), .Label = c("", "1121396,", "1121399,",
"1121403,", "1167006,", "1178540,", "1196029,", "1232437,",
"1265505,", "1322246,", "138119,", "1423815,", "1460634,1460635,",
"1536652,", "1541959,", "177437,", "406818,", "452637,",
"468059,", "653733,", "761193,", "857293,", "880073,", "883109,888727,1161902,1230734,1392487,",
"926550,", "96561,"), class = "factor")), .Names = c("V1",
"V2", "V3", "V4", "V5"), class = "data.frame", row.names = c(NA,
-46L))
using data.table package combined with write.table.
order by V3 and then write the V2 columns separately for each group in V3.
library('data.table')
setDT(df)[ order(V3), write.table(V2, file = paste0( V3, ".txt")), by = V3]
This worked for me but I cannot speak for how fast it would be on your machine.
lapply(unique(df$V3), function(x) write.table(df[which(df$V3 == x),]$V2, file = paste(x, ".txt", sep = ""), quote = FALSE, row.names = FALSE, col.names = FALSE))

adding selected strings from first column to another column of a dataframe

I am trying to paste some of strings from the first column named Var1 to the last column named Var2
The data looks like this
df1<- structure(list(Var1 = structure(c(35L, 34L, 33L, 32L, 31L, 30L,
29L, 28L, 27L, 26L, 25L, 24L, 23L, 22L, 21L, 20L, 19L, 18L, 17L,
16L, 15L, 14L, 13L, 12L, 11L, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L,
2L, 1L), .Label = c("F117", "F97", "F87", "F79", "F67", "F61",
"F60", "F58", "F41", "F35", "F31", "F30", "F26", "F25", "F23",
"F22", "F21", "F19", "F18", "F17", "F16", "F15", "F14", "F13",
"F12", "F11", "F10", "F9", "F8", "F7", "F6", "F5", "F4", "F3",
"F2"), class = "factor"), Freq.x = c(252L, 106L, 56L, 32L, 28L,
17L, 10L, 7L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, NA, 2L, 1L, 1L, NA,
NA, 2L, NA, 1L, NA, 1L, NA, 1L, 1L, NA, NA, 1L, NA, 1L, NA),
Freq.y = c(306L, 170L, 69L, 45L, 35L, 20L, 13L, 7L, 12L,
3L, 6L, NA, 7L, NA, 1L, 3L, NA, 2L, 1L, 1L, 2L, 2L, 2L, NA,
1L, NA, 1L, NA, NA, 1L, 1L, NA, 1L, NA, 1L)), .Names = c("Var1",
"Freq.x", "Freq.y"), row.names = c(10L, 13L, 16L, 17L, 19L, 21L,
23L, 24L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 27L, 8L, 9L, 11L, 28L,
29L, 12L, 30L, 14L, 31L, 15L, 32L, 18L, 20L, 33L, 34L, 22L, 35L,
25L, 26L), class = "data.frame")
I want to copy the first string, then each 10 strings and finally the last string
from the Var1to Var2
So the output will look like this
df2<- structure(list(Var1 = structure(c(12L, 18L, 22L, 24L, 26L, 30L,
32L, 34L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L, 14L,
15L, 16L, 17L, 19L, 20L, 21L, 23L, 25L, 27L, 28L, 29L, 31L, 33L,
35L, 3L), .Label = c("F10", "F11", "F117", "F12", "F13", "F14",
"F15", "F16", "F17", "F18", "F19", "F2", "F21", "F22", "F23",
"F25", "F26", "F3", "F30", "F31", "F35", "F4", "F41", "F5", "F58",
"F6", "F60", "F61", "F67", "F7", "F79", "F8", "F87", "F9", "F97"
), class = "factor"), Freq.x = c(252L, 106L, 56L, 32L, 28L, 17L,
10L, 7L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, NA, 2L, 1L, 1L, NA, NA,
2L, NA, 1L, NA, 1L, NA, 1L, 1L, NA, NA, 1L, NA, 1L, NA), Freq.y = c(306L,
170L, 69L, 45L, 35L, 20L, 13L, 7L, 12L, 3L, 6L, NA, 7L, NA, 1L,
3L, NA, 2L, 1L, 1L, 2L, 2L, 2L, NA, 1L, NA, 1L, NA, NA, 1L, 1L,
NA, 1L, NA, 1L), Var2 = structure(c(4L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 6L, 1L, 1L, 1L, 1L, 3L), .Label = c("",
"F11", "F117", "F2", "F22", "F61"), class = "factor")), .Names = c("Var1",
"Freq.x", "Freq.y", "Var2"), class = "data.frame", row.names = c(10L,
13L, 16L, 17L, 19L, 21L, 23L, 24L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
27L, 8L, 9L, 11L, 28L, 29L, 12L, 30L, 14L, 31L, 15L, 32L, 18L,
20L, 33L, 34L, 22L, 35L, 25L, 26L))
We can use data.table
library(data.table)
setDT(df1)[c(seq(1, .N, 9), .N), Var2 := Var1][is.na(Var2), Var2 := ""]
Or use .I
setDT(df1)[df1[, .I[c(seq(1, .N, 9), .N)]], Var2 := Var1][is.na(Var2), Var2 := ""][]
Update
Based on the comments,
setDT(df1)[c(1, tail(seq(0, .N, by = 100), -1), .N), Var2 := Var1][is.na(Var2), Var2 := ""][]
Using base R - logic : use c(seq(1, nrow(df1), by = 9), nrow(df1)) to select the location you wanted. This returns 1 10 19 28 35
df1$Var2 <- ""
df1$Var2[c(seq(1, nrow(df1), by = 9), nrow(df1))] <- as.character(df1$Var1[c(seq(1, nrow(df1), by = 9), nrow(df1))])
using the mod operator : c(TRUE, (seq(nrow(df1))%%10==0)[c(-1, -nrow(df1))], TRUE) : insert TRUE to first and last locations, and for every 10th poisition which you can get from %% operator
df1$Var2[c(TRUE, (seq(nrow(df1))%%10==0)[c(-1, -nrow(df1))], TRUE)] <-
as.character(df1$Var1[c(TRUE, (seq(nrow(df1))%%10==0)[c(-1, -nrow(df1))], TRUE)])

ddply function based on two variables

How can I modify the below code to calculate the average of time per ID in each level of Type.
df <- structure(list(ID = structure(c(1L, 2L, 2L, 3L, 3L, 4L, 4L, 4L,
4L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 5L, 6L), .Label = c("R1", "R2",
"R3", "R4", "R5", "R6"), class = "factor"), cost = c(123L, 12L,
14L, 15L, 16L, 17L, 18L, 9L, 6L, 6L, 7L, 8L, 1L, 111L, 121L,
141L, 181L, 1611L), Time = c(123L, -12L, NA, -15L, NA, 17L, 18L,
-9L, 88L, 666L, 7L, 78L, 188L, 111L, 121L, 141L, 189L, 161L),
number = c(14L, 12L, 15L, 17L, NA, 17L, 22L, 95L, NA, 67L,
7L, 8L, 7L, 118L, NA, 140L, 180L, 1611L),type = c("A", "A", "B", "B", "B", "C", "C",
"M", "A", "M", "D", "D", "C", "A", "D", "B", "B", "M")), .Names = c("ID",
"cost", "Time", "number","type"), row.names = c(NA, -18L), class = "data.frame")
library(dplyr)
df %>%
group_by(id) %>%
summarise(N.Time = sum(!is.na(Time)),
Time_Average = round(mean(Time[Time >=0 & !is.na(Time)]), 2))

Prediction by neural network in R (nnet package)

I can't get why the testing of dataset is not working in R neural networks (nnet package).
I have two datasets with similar structures - for training (trainset, 17 cases) and prediction (testset, 9 cases). Each dataset has columns: Age, Gender, Height, Weight. In the testing dataset the age is unknown (NaN).
The formula for training is obtained successfully below:
library(nnet)
trainednetwork<-nnet(age~gender+emLength+action5cnt,trainset, size=17)
Anyway, if I try to use test dataset for prediction in the next string of the code,
prediction<-predict(trainednetwork,testset)
I get mistake "No component terms, no attribute". Can anyone help?
The data (obtained with dput() function):
testset:
structure(list(
age = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_),
gender = structure(
c(2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L),
.Label = c("f", "m"),
class = "factor"),
Height= c(9L, 11L, 9L, 11L, 9L, 11L, 9L, 11L, 9L),
Weight= c(1L, 41L, 2L, 1L, 2L, 29L, 12L, 6L, 12L)),
.Names = c("age", "gender", "Height", "Weight"),
class = "data.frame",
row.names = c(NA, 9L))
trainset:
structure(list(
age = c(43L, 35L, 22L, 28L, 20L, 47L, 41L, 23L,
42L, 27L, 22L, 60L, 62L, 47L, 42L, 26L, 54L),
gender = structure(
c(2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L),
.Label = c("f", "m"),
class = "factor"),
Height= c(7L, 9L, 11L, 11L, 11L, 9L, 11L, 9L, 23L, 9L,
9L, 9L, 10L, 7L, 7L, 11L, 7L),
Weight= c(2L, 2L, 9L, 9L, 28L, 8L, 6L, 3L, 1L, 2L, 40L,
1L, 9L, 1L, 7L, 4L, 35L)),
.Names = c("age", "gender", "Height", "Weight"),
class = "data.frame",
row.names = c(NA, 17L))
I think in the R neuralnet package the command to use for prediction is "compute", not predict, which is very confusing. A

Resources