Generating 99 datasets at once in R - r

#Generating 99 pairs of datasets
for (i in 1:99) {
set.seed(i)
X1 <- rnorm(100, mean=3, sd=sqrt(10))
X2 <- rnorm(100, mean=-2, sd=sqrt(3))
X1sq <- X1^2
X2sq <- X2^2
X1X2 <- X1*X2
a <- exp(0.2*X1sq + 0.3*X2sq + 0.7*X1X2)
b <- 1 + exp(0.2*X1sq + 0.3*X2sq + 0.7*X1X2)
px3 <- a/b
Y <- rbinom(100, 1, px3)
newtest <- data.frame(X1, X2, Y)
}
Hi guys, essentially I would like to generate 99 pairs of data sets, via the loop function. And each new test should be a data frame consisting of new sets of values of X1, X2 and Y respectively.I am not sure if my loop written above is correct so I would like to seek help if possible!

You can add data frames as list elements to an empty list like this:
n = 99
dfs <- list()
for (i in 1:n) {
set.seed(i)
X1 <- rnorm(100, mean=3, sd=sqrt(10))
X2 <- rnorm(100, mean=-2, sd=sqrt(3))
X1sq <- X1^2
X2sq <- X2^2
X1X2 <- X1*X2
a <- exp(0.2*X1sq + 0.3*X2sq + 0.7*X1X2)
b <- 1 + exp(0.2*X1sq + 0.3*X2sq + 0.7*X1X2)
px3 <- a/b
Y <- rbinom(100, 1, px3)
newtest <- data.frame(X1, X2, Y)
dfs[[i]] <- newtest
}
Output:
> length(dfs)
[1] 99
> dfs[[1]]
X1 X2 Y
1 1.01897911 -3.07450660 1
2 3.58073118 -1.92705317 0
3 0.35751031 -3.57776258 0
4 8.04472084 -1.72628614 1
5 4.04199507 -3.13377386 1
6 0.40545116 1.06103134 0
7 4.54138600 -0.75862624 1
8 5.33478772 -0.42353199 1
9 4.82078051 -1.33457144 1
10 2.03427713 0.91361444 0
11 7.78067182 -3.10112784 1
12 4.23279256 -2.79959213 0
13 1.03546479 0.48078561 0
14 -4.00349598 -3.12703914 1
15 6.55734391 -2.35919398 1
16 2.85790745 -2.68036329 0
17 2.94880189 -2.55424391 0
18 5.98467216 -2.48343842 0
19 5.59692944 -1.14404070 1
20 4.87808088 -2.30714541 1
21 5.90606161 -2.87634403 0
22 5.47333215 0.32621148 1
23 3.23579518 -2.37166244 0
24 -3.29088243 -2.31100103 1
25 4.96006112 -2.17353545 0
26 2.82250534 -0.76562575 1
27 2.50733135 -2.12741729 0
28 -1.65092741 -2.06518430 1
29 1.48795676 -3.18067058 0
30 4.32164726 -2.56165259 0
31 7.29652199 -1.89579906 0
32 2.67495667 -3.01999517 0
33 4.22592528 -1.07942159 1
34 2.82985352 -4.62993570 0
35 -1.35464467 -1.46902621 1
36 1.68767196 -4.66120916 1
37 1.75314569 -2.52130594 1
38 2.81243457 -2.91500764 0
39 6.47858566 -3.12946129 0
40 5.41337362 -2.09854811 1
41 2.47973071 -5.31576779 1
42 2.19880002 0.03790208 1
43 5.20399171 -4.88381685 0
44 4.76032360 -2.80285821 0
45 0.82196325 -3.93283032 1
46 0.76270387 -3.30045666 1
47 4.15290939 1.61507850 1
48 5.43031450 -1.96986990 1
49 2.64473008 -4.22793787 0
50 5.78630728 -4.84161214 0
51 4.25892133 -1.22025307 0
52 1.06460261 -2.03214657 0
53 4.07871518 -2.55091058 0
54 -0.57135969 -3.60970246 1
55 7.53161884 -4.57635683 0
56 9.26257436 -3.86228769 0
57 1.83874373 -0.26789930 1
58 -0.30184360 -3.07606548 0
59 4.80161165 -4.39789764 0
60 2.57291984 1.23770633 1
61 10.59458219 -1.26370455 1
62 2.87591222 -2.41334890 0
63 5.18114738 -0.16665358 1
64 3.08855060 -0.46467093 1
65 0.64956374 -3.07256042 1
66 3.59701367 1.82108156 1
67 -2.70778035 -2.44171977 1
68 7.63449140 -4.46729711 0
69 3.48462961 -2.25010745 0
70 9.87040135 -1.64053305 1
71 4.50369316 1.99753585 1
72 0.75495226 -1.81674492 0
73 4.93128630 -1.20845485 1
74 0.04612393 -2.13363280 1
75 -0.96433690 -2.57850643 1
76 3.92163392 -2.06014725 0
77 1.59818801 -0.63576818 1
78 3.00349543 1.59442979 1
79 3.23508791 -0.22050410 1
80 1.13577108 0.09215872 0
81 1.20171157 -4.13271473 1
82 2.57252769 -0.29584288 1
83 6.72543819 -1.61907907 1
84 -1.81794126 -4.54135160 1
85 4.87822276 -1.09756214 1
86 4.05288152 -2.27497104 0
87 6.36181687 0.53673964 1
88 2.03808597 -3.32689295 0
89 4.17010222 -2.74514862 0
90 3.84464054 -3.60406870 0
91 1.28440103 -2.30675306 0
92 6.81961338 -1.30369517 1
93 6.66951527 -3.26742501 0
94 5.21426998 -0.56175148 1
95 8.01800798 -4.09246077 0
96 4.76608915 -3.81516225 0
97 -1.03693902 0.49615837 0
98 1.18717559 -3.75949942 1
99 -0.87256511 -1.28643887 0
100 1.50297574 -2.66004308 0

Here is a solution with replicate. From the documentation, my emphasis:
replicate is a wrapper for the common use of sapply for repeated evaluation of an expression (which will usually involve random number generation).
set.seed(1)
n <- 99L
dfs <- replicate(n, {
X1 <- rnorm(100, mean=3, sd=sqrt(10))
X2 <- rnorm(100, mean=-2, sd=sqrt(3))
X1sq <- X1^2
X2sq <- X2^2
X1X2 <- X1*X2
a <- exp(0.2*X1sq + 0.3*X2sq + 0.7*X1X2)
b <- 1 + exp(0.2*X1sq + 0.3*X2sq + 0.7*X1X2)
px3 <- a/b
Y <- rbinom(100, 1, px3)
data.frame(X1, X2, Y)
}, simplify = FALSE)
Created on 2022-05-21 by the reprex package (v2.0.1)

Related

Nested xtab tables

I would like to produce nested tables for a multilevel factorial experiment. I have 10 paints examined for time to reach an end point under 4 levels of humidity, 3 temperatures and 2 wind speeds. Of course I have searched on line but without success.
Some sample code can be generated using:
## Made Up Data # NB the data is continuous whereas observations were made 40/168 so data is censored.
time3 <- 4*seq(1:24) # Dependent: times in hrs, runif is not really representative but will do
wind <- c(1,2) # Independent: factor draught on or off
RH <- c(0,35,75,95) # Independent: value for RH but can be processes as a factor
temp <- c(5,11,20) # Independent: value for temperature but can be processed as a factor
paint <- c("paintA", "paintB", "paintC") # Independent: Experimental material
# Combine into dataframe
dfa <- data.frame(rep(temp,8))
dfa$RH <- rep(RH,6)
dfa$wind <- rep(wind,12)
dfa$time3 <- time3
dfa$paint <- rep(paint[1],24)
# Replicate for different paints
dfb <- dfa
dfb$paint <- paint[2]
dfc <- dfa
dfc$paint <- paint[3]
dfx <- do.call("rbind", list(dfa,dfb,dfc))
# Rename first col
colnames(dfx)[1] <- "temp"
# Prepare xtab tables
tx <- xtabs(dfx$time3 ~ dfx$wind + dfx$RH + dfx$temp + dfx$paint)
tx
And the target I hope to obtain would be like this xtab example
This
tx <- xtabs(dfx$time3 ~ dfx$wind + dfx$RH + dfx$temp)
does not work well enough. I would also like to write to C:\file.csv for printing and reporting etc. Please advise on how to achieve the desired output.
You can paste the two variables you want to nest together. Since the items will be ordered lexicographically, you will need to zero-pad the temp variable, to get numerical ordering.
xtabs(time3~wind+paste(sprintf("%02d",temp),RH,sep=":")+paint,dfx)
, , paint = paintA
paste(sprintf("%02d", temp), RH, sep = ":")
wind 05:0 05:35 05:75 05:95 11:0 11:35 11:75 11:95 20:0 20:35 20:75 20:95
1 56 0 104 0 88 0 136 0 120 0 72 0
2 0 128 0 80 0 64 0 112 0 96 0 144
, , paint = paintB
paste(sprintf("%02d", temp), RH, sep = ":")
wind 05:0 05:35 05:75 05:95 11:0 11:35 11:75 11:95 20:0 20:35 20:75 20:95
1 56 0 104 0 88 0 136 0 120 0 72 0
2 0 128 0 80 0 64 0 112 0 96 0 144
, , paint = paintC
paste(sprintf("%02d", temp), RH, sep = ":")
wind 05:0 05:35 05:75 05:95 11:0 11:35 11:75 11:95 20:0 20:35 20:75 20:95
1 56 0 104 0 88 0 136 0 120 0 72 0
2 0 128 0 80 0 64 0 112 0 96 0 144

How to use arguments specified in a user-created R function?

this seems like a basic question; however, I am not sure if I am unable to word my question to search for the answer that I need.
This is the sample:
id2 sbp1 dbp1 age1 sbp2 dbp2 sex bmi1 bmi2 smoke drink exercise
1 1 134.5 89.5 40 146 84 2 21.74685 22.19658 1 0 1
2 4 128.5 89.5 48 125 70 1 24.61942 22.29476 1 0 0
3 5 105.5 64.5 42 121 80 2 22.15103 26.90204 1 0 0
4 8 116.5 79.5 39 107 72 2 21.08032 27.64403 0 0 1
5 9 106.5 73.5 26 132 81 2 21.26762 29.16131 0 0 0
6 10 120.5 81.5 34 130 85 1 24.91663 26.89427 1 1 0
I have this code here for a function I am making:
linreg.ols<- function(indat, dv, p1, p2, p3){
data<- read.csv(file= indat, header=T)
data[1:5,]
y<- data$dv
x <- as.matrix(data.frame(x0=rep(1,nrow(data)), x1=data$p1, x2=data$p2,
x3=data$p3))
inv<- solve(t(x)%*%x)
xy<- t(x)%*%y
betah<- inv%*%xy
print("Value of beta hat")
betah
}
And when I run my code with this line:
linreg.ols("bp.csv",sbp1,smoke,drink,exercise)
I get the following error:
Error in data.frame(x0 = rep(1, nrow(data)), x1 = data$p1, x2 = data$p2, :
arguments imply differing number of rows: 75, 0
I have a feeling that it's because of how I am extracting the p1, p2, and p3 columns on the line where I create the x variable.
EDIT: changed to y<-data$dv
EDIT: added on part of the sample. Also, I tried:
x <- as.matrix(data.frame(1,data[,c("p1","p2","p3")]))
But that returned the error:
Error in `[.data.frame`(data, , c("p1", "p2", "p3")) : undefined columns selected

Function with a for loop to create a column with values 1:n conditioned by intervals matched by another column

I have a data frame like the following
my_df=data.frame(x=runif(100, min = 0,max = 60),
y=runif(100, min = 0,max = 60)) #x and y in cm
With this I need a new column with values from 1 to 36 that match x and y every 10 cm. For example, if 0<=x<=10 & 0<=y<=10, put 1, then if 10<=x<=20 & 0<=y<=10, put 2 and so on up to 6, then 0<=x<=10 & 10<=y<=20 starting with 7 up to 12, etc. I tried to make a function with an if repeating the interval for x 6 times, and increasing by 10 the interval for y every iteration. Here is the function
#my miscarried function 'zones'
>zones= function(x,y) {
i=vector(length = 6)
n=vector(length = 6)
z=vector(length = 36)
i[1]=0
z[1]=0
n[1]=1
for (t in 1:6) {
if (0<=x & x<10 & i[t]<=y & y<i[t]+10) { z[t] = n[t]} else
if (10<=x & x<20 & i[t]<=y & y<i[t]+10) {z[t]=n[t]+1} else
if (20<=x & x<30 & i[t]<=y & y<i[t]+10) {z[t]=n[t]+2} else
if (30<=x & x<40 & i[t]<=y & y<i[t]+10) {z[t]=n[t]+3} else
if (40<=x & x<50 & i[t]<=y & y<i[t]+10) {z[t]=n[t]+4}else
if (50<=x & x<=60 & i[t]<=y & y<i[t]+10) {z[t]=n[t]+5}
else {i[t+1]=i[t]+10
n[t+1]=n[t]+6}
}
return(z)
}
>xy$z=zones(x=xy$x,y=xy$y)
and I got
There were 31 warnings (use warnings() to see them)
>xy$z
[1] 0 0 0 0 25 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Please,help me before I die alone!
I think think this does the trick.
a <- cut(my_df$x, (0:6) * 10)
b <- cut(my_df$y, (0:6) * 10)
z <- interaction(a, b)
levels(z)
[1] "(0,10].(0,10]" "(10,20].(0,10]" "(20,30].(0,10]" "(30,40].(0,10]"
[5] "(40,50].(0,10]" "(50,60].(0,10]" "(0,10].(10,20]" "(10,20].(10,20]"
[9] "(20,30].(10,20]" "(30,40].(10,20]" "(40,50].(10,20]" "(50,60].(10,20]"
[13] "(0,10].(20,30]" "(10,20].(20,30]" "(20,30].(20,30]" "(30,40].(20,30]"
[17] "(40,50].(20,30]" "(50,60].(20,30]" "(0,10].(30,40]" "(10,20].(30,40]"
[21] "(20,30].(30,40]" "(30,40].(30,40]" "(40,50].(30,40]" "(50,60].(30,40]"
[25] "(0,10].(40,50]" "(10,20].(40,50]" "(20,30].(40,50]" "(30,40].(40,50]"
[29] "(40,50].(40,50]" "(50,60].(40,50]" "(0,10].(50,60]" "(10,20].(50,60]"
[33] "(20,30].(50,60]" "(30,40].(50,60]" "(40,50].(50,60]" "(50,60].(50,60]"
If this types of levels aren't for your taste, then change as below:
levels(z) <- 1:36
Is this what you're after? The resulting numbers are in column res:
# Get bin index for x values and y values
my_df$bin1 <- as.numeric(cut(my_df$x, breaks = seq(0, max(my_df$x) + 10, by = 10)));
my_df$bin2 <- as.numeric(cut(my_df$y, breaks = seq(0, max(my_df$x) + 10, by = 10)));
# Multiply bin indices
my_df$res <- my_df$bin1 * my_df$bin2;
> head(my_df)
x y bin1 bin2 res
1 49.887499 47.302849 5 5 25
2 43.169773 50.931357 5 6 30
3 10.626466 43.673533 2 5 10
4 43.401454 3.397009 5 1 5
5 7.080386 22.870539 1 3 3
6 39.094724 24.672907 4 3 12
I've broken down the steps for illustration purposes; you probably don't want to keep the intermediate columns bin1 and bin2.
We probably need a table showing the relationship between x, y, and z. After that, we can define a function to do the join.
The solution is related and inspired by this post (R dplyr join by range or virtual column). You may also find other solutions are useful.
# Set seed for reproducibility
set.seed(1)
# Create example data frame
my_df <- data.frame(x=runif(100, min = 0,max = 60),
y=runif(100, min = 0,max = 60))
# Load the dplyr package
library(dplyr)
# Create a table to show the relationship between x, y, and z
r <- expand.grid(x_from = seq(0, 50, 10), y_from = seq(0, 50, 10)) %>%
mutate(x_to = x_from + 10, y_to = y_from + 10, z = 1:n())
# Define a function for dynamic join
dynamic_join <- function(d, r){
if (!("z" %in% colnames(d))){
d[["z"]] <- NA_integer_
}
d <- d %>%
mutate(z = ifelse(x >= r$x_from & x < r$x_to & y >= r$y_from & y < r$y_to,
r$z, z))
return(d)
}
re_dynamic_join <- function(d, r){
r_list <- split(r, r$z)
for (i in 1:length(r_list)){
d <- dynamic_join(d, r_list[[i]])
}
return(d)
}
# Apply the function
re_dynamic_join(my_df, r)
x y z
1 15.930520 39.2834357 20
2 22.327434 21.1918363 15
3 34.371202 16.2156088 10
4 54.492467 59.5610437 36
5 12.100916 38.0095959 20
6 53.903381 12.7924881 12
7 56.680516 7.7623409 6
8 39.647868 28.6870821 16
9 37.746843 55.4444682 34
10 3.707176 35.9256580 19
11 12.358474 58.5702417 32
12 10.593405 43.9075507 26
13 41.221371 21.4036147 17
14 23.046223 25.8884214 15
15 46.190485 8.8926936 5
16 29.861955 0.7846545 3
17 43.057110 42.9339640 29
18 59.514366 6.1910541 6
19 22.802111 26.7770609 15
20 46.646713 38.4060627 23
21 56.082314 59.5103172 36
22 12.728551 29.7356147 14
23 39.100426 29.0609715 16
24 7.533306 10.4065401 7
25 16.033240 45.2892567 26
26 23.166846 27.2337294 15
27 0.803420 30.6701870 19
28 22.943277 12.4527068 9
29 52.181451 13.7194886 12
30 20.420940 35.7427198 21
31 28.924807 34.4923319 21
32 35.973950 4.6238628 4
33 29.612478 2.1324348 3
34 11.173056 38.5677295 20
35 49.642399 55.7169120 35
36 40.108004 35.8855453 23
37 47.654392 33.6540449 23
38 6.476618 31.5616634 19
39 43.422657 59.1057134 35
40 24.676466 30.4585093 21
41 49.256778 40.9672847 29
42 38.823612 36.0924731 22
43 46.975966 14.3321207 11
44 33.182179 15.4899556 10
45 31.783175 43.7585774 28
46 47.361374 27.1542499 17
47 1.399872 10.5076061 7
48 28.633804 44.8018962 27
49 43.938824 6.2992584 5
50 41.563893 51.8726969 35
51 28.657177 36.8786983 21
52 51.672569 33.4295723 24
53 26.285826 19.7266391 9
54 14.687837 27.1878867 14
55 4.240743 30.0264584 19
56 5.967970 10.8519817 7
57 18.976302 31.7778362 20
58 31.118056 4.5165447 4
59 39.720305 16.6653560 10
60 24.409811 12.7619712 9
61 54.772555 17.0874289 12
62 17.616202 53.7056462 32
63 27.543944 26.7741194 15
64 19.943680 46.7990934 26
65 39.052228 52.8371421 34
66 15.481007 24.7874526 14
67 28.712715 3.8285088 3
68 45.978640 20.1292495 17
69 5.054815 43.4235568 25
70 52.519280 20.2569200 18
71 20.344376 37.8248473 21
72 50.366421 50.4368732 36
73 20.801009 51.3678999 33
74 20.026496 23.4815569 15
75 28.581075 22.8296331 15
76 53.531900 53.7267256 36
77 51.860368 38.6589458 24
78 23.399373 44.4647189 27
79 46.639242 36.3182068 23
80 57.637080 54.1848967 36
81 26.079569 17.6238093 9
82 42.750881 11.4756066 11
83 23.999662 53.1870566 33
84 19.521129 30.2003691 20
85 45.425229 52.6234526 35
86 12.161535 11.3516173 8
87 42.667273 45.4861831 29
88 7.301515 43.4699336 25
89 14.729311 56.6234891 32
90 8.598263 32.8587952 19
91 14.377765 42.7046321 26
92 3.536063 23.3343060 13
93 38.537296 6.0523876 4
94 52.576153 55.6381253 36
95 46.734881 16.9939500 11
96 47.838530 35.4343895 23
97 27.316467 6.6216363 3
98 24.605045 50.4304219 33
99 48.652215 19.0778211 11
100 36.295997 46.9710802 28

Efficient Way To Find Neighboring Coordinates In R

I am working with a matrix data set that has X-Y coordinates, and rest of the columns have logical values containing different parameters. I want to find the neighboring coordinates of X-Y given at least one of the corresponding parameters is true, and then append it to new matrix as rows. Below is the sample matrix data.
Data_1
X Y P1 P2 P3 P4
-52 32 1 0 0 1
-50 34 0 0 0 0
-50 26 0 0 0 1
-52 31 0 1 1 1
To solve this, I am planning to use following algorithm:
Algorithm
# Find row wise sum
newCol <- rowSums(Data_1)
# Bind as first column with Data_1
newData <- cbind(newCol, Data_1)
# Not R code, pseduo code
if (newData[,1] != 0{
store newData[,2] and newData[,3].
Data_2 <- find neighboring coordinates to newData[,2] and newData[,3].
}
finalData <- cbind(Data_1, Data_2)
Output
X Y P1 P2 P3 P4 N1.x N1.y N2.x N2.y N3.x N3.y N4.x N4.y N5.x N5.y N6.x N6.y N7.x N7.y N8.x N8.y
-52 32 1 0 0 1 <Neighboring Coordinates---->
-50 34 0 0 0 0 <NULL>
-50 26 0 0 0 1 <Neighboring Coordinates---->
-52 31 0 1 1 1 <Neighboring Coordinates---->
The problem with this approach is scalability when the matrix will have millions of rows and columns.
Following image shows neighbor coordinates for (x,y).
Please suggest better approach if possible, thanks.
How about a data frame approach--does it need to be a matrix?
# Create one data frame with the starting points
points <- data.frame(x = c(-52, -50, -50, -52),
y = c( 32, 34, 26, 31))
# Create a second data frame with the desired combinations of distances
distances <- expand.grid(xd = 1:4,
yd = 1:4)
# Repeat the distances for each point (cartesian product/outer join)
neighbors <- merge(points, distances)
# Compute neighbor coordinates
neighbors$nx <- neighbors$x + neighbors$xd
neighbors$ny <- neighbors$y + neighbors$yd
# sort
neighbors <- neighbors[order(neighbors$x, neighbors$y), ]
# display
head(neighbors)
Result
x y xd yd nx ny
4 -52 31 1 1 -51 32
8 -52 31 2 1 -50 32
12 -52 31 3 1 -49 32
16 -52 31 4 1 -48 32
20 -52 31 1 2 -51 33
24 -52 31 2 2 -50 33

How to get rows by a specific value of the last data frame column

I'm searching since a couple of hours without being able to find out how to get rows by a specific value of the last data frame column.
I have this data:
X1 X2 X3 X4 X5 X6
1 48.17695413 39.43730616 3.10972063 2.61426454 6.3099154 1
2 -8.65390666 46.85736629 30.87960405 44.27429466 -41.1696044 1
3 30.77856742 21.74027874 48.19661027 -21.80938770 36.6618114 -1
4 -24.59780029 21.06814152 -39.33865333 -42.97304431 -4.8804663 1
5 7.27249086 34.76682615 -45.85831994 13.88918648 47.3615198 1
6 38.67101648 38.26274828 14.08033108 49.82309644 -33.6286389 1
7 -25.51283904 -45.70014705 -26.42990421 28.78326026 35.1519014 -1
8 -2.39959008 -2.97487159 17.50174041 12.75989751 -45.0535343 1
9 37.15204622 31.15702239 7.78025487 42.89215957 22.0417423 1
10 -47.73408335 -15.17439773 27.97187380 35.77547702 32.3852375 -1
11 -1.02035564 26.50974274 -30.74983917 -22.68985184 -38.8585380 1
12 22.58425138 -19.56519226 1.71576410 -34.55656213 2.9134512 -1
13 -9.38323175 -19.29783276 16.63345252 40.99594080 -29.0354038 1
14 -27.58411688 48.17471695 46.65622143 -30.48450849 14.9938999 -1
15 38.80037898 -47.36718861 37.63894415 -16.63851268 40.3090175 -1
16 43.51794997 18.72671039 37.09440767 -34.12630649 -15.1023072 -1
17 -32.74301919 11.90051064 2.80043937 46.51653604 -28.9494123 1
18 35.68157670 14.81230436 -36.31750911 47.51496385 -37.1951638 1
19 -40.50665190 -34.03052327 29.38828175 1.34482600 -34.8719438 -1
20 41.07689315 43.92234944 2.65074784 -11.53928787 32.2066362 1
21 17.66030733 -14.92802922 12.11998039 15.00645671 21.2442750 -1
22 -33.64110310 10.86618125 -39.01640368 40.54307269 39.6113877 1
23 38.51638250 22.13161306 -12.81533381 33.42758568 2.4696089 1
24 -45.89782895 24.45276037 21.59339122 0.08909106 -12.7604788 -1
25 -29.12297575 -19.68773385 2.79124135 31.15364150 -18.4448177 1
26 38.31280286 -30.47305727 26.44094897 -4.80549014 25.7998981 -1
27 16.18606397 -31.02221894 -49.26810868 -25.00526297 6.1112312 -1
28 48.40843189 -21.66230679 -0.12366320 3.17854472 3.8587339 1
29 40.21567111 -48.13758477 24.93614273 28.69090671 -26.4871368 1
30 1.51516791 -26.42521115 -12.80261928 16.00497463 -29.2357545 1
31 -13.19148520 41.22625608 -19.86476800 32.65111602 35.2801044 1
32 20.27577299 -25.47924414 21.95390093 -1.85122907 29.2785393 -1
33 5.61452187 -5.35398889 10.64601119 -44.01104380 17.1113494 -1
34 -44.78640135 -3.47331611 -1.96771566 -12.24545513 -43.1549257 1
35 25.08181627 33.58752318 28.18229569 -11.75732762 19.3758997 -1
36 14.11425164 0.61068691 35.01117583 32.70457520 -39.7158644 1
37 47.13524659 5.07630450 21.24833809 -47.87322641 -16.3222777 -1
38 -30.68369164 44.48609874 17.87844354 38.41511346 -15.1628380 1
39 -9.82401448 47.94880943 -27.94965848 13.46387044 4.4655846 1
40 -16.53469554 13.49646898 27.13568562 48.88138534 13.3870898 1
41 5.66302277 -5.13428820 28.43233454 -38.00809828 34.1122602 -1
42 0.26656424 -30.29644771 -9.31532411 4.33435896 -6.6514673 -1
43 -23.25400757 43.53289178 31.26810214 26.17365187 10.1966180 1
44 -10.83291732 19.52698252 -46.98810957 -26.03460227 3.7786703 1
45 40.12858390 38.52749658 47.63697251 -43.28443427 -2.9563767 -1
46 -21.25995888 11.08579396 17.64257259 49.27916457 14.6470747 1
47 -14.52012877 4.73049677 29.67983205 11.74713625 0.9333332 -1
48 32.01571801 26.66981509 -4.45819930 -21.25811025 13.7524510 1
49 -44.85126981 -28.82791162 -40.01033327 -7.61150781 33.2228162 -1
50 37.97772161 -20.30672238 -43.07964963 30.13702892 -15.0164150 1
51 29.08064781 15.27835699 -37.16766285 15.21206819 -2.2740492 1
52 -40.48949617 -10.51356671 41.26806469 -29.10868146 -34.6600571 -1
53 30.29430094 -46.88648379 -18.59240613 -22.08509911 -5.0840363 -1
54 2.14905366 -12.34249272 15.41978475 -23.87309705 49.7638849 -1
55 -10.86263228 37.23807719 -38.22960216 24.74888896 39.6630126 1
56 40.46251560 -7.42908975 5.45871421 32.46898202 26.3232591 1
57 -49.35276681 48.47594881 -6.54060557 13.13420576 -30.6234515 1
58 -32.55195227 -24.04979744 -0.90411811 -28.68508857 -14.3195416 -1
59 -41.40694165 -32.71186997 -49.91184238 -47.44685916 -30.7997680 -1
60 -40.25409685 -38.63885684 28.98155658 17.04148985 -29.1016112 -1
61 6.55163422 47.65563791 6.50914682 -43.26901732 12.7081363 -1
62 -34.60404077 0.70960103 40.22226434 22.20810365 4.2410096 -1
63 -34.34207600 19.25329904 41.07820110 5.01101012 -11.3040819 -1
64 -26.09685140 26.89760709 0.13924714 6.23782547 30.3378282 -1
65 2.77869620 3.46297752 -16.93740098 -49.87237649 10.3518355 -1
66 -34.43377707 -11.54497443 -0.03308267 18.72712739 -44.7833285 1
67 15.03299868 -7.64621769 49.49413652 -30.31192285 33.8391314 -1
68 -18.59508867 -25.63164262 -22.74283541 34.89008134 21.4274962 1
69 36.03547243 -1.33956145 8.09449207 -15.73835583 -42.1537299 1
70 -12.76628897 -40.29422191 5.18926801 4.31100223 -2.7201807 -1
71 29.70183801 -47.01430625 -0.10015091 -16.11912933 29.4462418 -1
72 19.68328992 -43.62236075 39.62997815 -47.02085708 46.5351474 -1
73 13.60548099 -14.21226230 -16.40881621 -7.07651307 30.7896358 -1
74 48.97985228 28.08398050 -21.35308585 -7.14836787 22.5584449 1
75 40.56481351 44.48666607 39.73892389 -28.47008093 -37.2993243 1
76 38.65095733 -25.91175342 -42.96530839 28.70831673 30.8613458 1
77 -29.94483940 -8.39585150 -28.69421113 -26.08410181 -33.0075214 1
78 -8.54073463 28.14237990 20.12624827 3.81252319 -38.4828388 1
79 48.69534343 -32.77915958 8.73959693 -4.19653771 37.0420157 -1
80 -11.14250922 -41.17423620 -49.40661241 -20.27474060 25.6862722 -1
81 -31.04337221 11.66699962 5.31555978 -49.68183776 16.6878605 -1
82 -46.18124976 -40.82516073 -38.52873622 -43.34530456 -30.9348033 -1
83 29.78327069 -16.52878316 19.70788015 22.03853386 -6.8582143 1
84 -34.10644981 -24.50293470 3.68809036 -24.28773544 38.5350600 -1
85 -0.10980293 33.79236367 30.77957788 -44.64550817 -15.9467584 -1
86 42.79020275 -49.23286503 -27.00992972 -34.40842158 31.3848205 -1
87 41.49881038 -24.64161024 -22.96349106 20.57055852 14.4795177 1
88 -37.18016476 -6.11188293 -9.10155319 -1.89142874 -45.2628603 1
89 -2.62336845 3.08700614 -15.49178469 26.82823380 -32.2975439 1
90 -4.11448206 30.86272427 -46.10658828 44.78534460 46.8005464 1
91 -45.66761896 -32.70136176 24.77845098 34.32040859 39.1300678 -1
92 31.28242453 40.31769738 -20.26872914 -10.96235495 -22.1892530 1
93 3.80620165 46.78701940 4.35569431 45.98498959 -40.7786838 1
94 0.81615241 24.83391126 9.15402786 -19.10655624 -35.2444648 1
95 -41.83242584 -21.35343621 41.83379223 41.67253454 13.9536310 -1
96 47.49085810 -3.14880335 -37.31183154 9.95015414 -0.4881163 1
97 35.54580712 32.30499793 31.31646449 -11.14545153 8.4006399 -1
98 41.21926124 40.27996780 39.20314582 30.17564565 -32.0637980 1
99 35.71568001 -9.81678010 38.08503181 -39.24719978 -25.6364958 -1
100 -26.34892268 48.57874219 -46.59255797 14.88185874 -25.8851135 1
101 -10.52907093 -18.00235135 5.27036686 31.21748420 -34.4811461 1
102 -29.36565380 30.24585389 40.90974440 26.51184569 -17.7345452 1
103 7.87903350 -1.09457127 -15.66868282 -9.03650313 9.9299264 -1
104 42.60331560 23.64356695 25.10587261 -20.22202369 32.7391773 -1
105 34.14003735 -12.71724263 44.14021997 -25.71712218 -31.9221933 -1
106 -12.44853460 -47.72728265 48.57131951 7.67443904 33.7671637 -1
107 -14.09033376 14.33033464 -32.60425429 -38.24150169 6.7293922 -1
108 34.07870485 -46.51382137 49.84296632 -40.89916174 -43.6066909 -1
109 33.01124896 -49.23768658 14.13645993 -44.37299115 4.8207535 -1
110 10.36037307 -33.63043843 21.71480323 -28.38937459 17.0479339 -1
111 -5.79975462 -31.78398826 -11.77024557 -1.45170258 -34.1461023 1
112 -17.11579286 -36.18001689 -20.23241029 5.67694043 42.8745132 -1
113 -38.41568830 3.69573398 -40.29819805 33.50354005 45.8279202 1
114 -29.81968126 -1.95940533 -20.59776224 -36.95698555 29.9862751 -1
115 2.68202946 18.95755879 -24.05982686 -25.90978143 9.7582864 1
116 42.61640511 27.26011581 -46.40585079 4.26192286 29.0983045 1
117 3.67996979 49.15965132 27.85841639 -26.41195238 -43.4398819 1
118 48.34243569 2.41912773 7.61683735 -29.50917231 -19.5587220 1
119 49.62098389 35.17561462 7.30517844 12.31159559 45.7202338 1
120 -0.02593545 19.94858689 17.98142134 -39.22495318 5.3603052 -1
121 -22.92342477 40.02773573 -23.85643139 12.96254296 20.0602848 1
122 -21.57715647 -17.06917104 -29.41318592 36.47456393 2.7248362 1
123 29.80646805 -27.48751035 31.44713799 11.61041714 -44.3767835 1
124 -17.70907494 0.09368618 -49.75451597 -37.48356637 -6.3979823 1
125 28.88035215 -14.24206311 -0.16729170 -42.50496686 -26.9064285 -1
126 35.68704489 -5.52500989 38.28872927 13.51765934 -23.1683312 1
127 -2.63194633 0.26762190 -12.58921986 -28.77510269 31.0379969 -1
128 30.89340176 29.63612082 -19.92898253 -46.91745718 13.4511064 -1
129 -35.64908991 -0.62351443 -1.92955756 -11.38650151 -26.3827307 1
130 15.95162002 12.78902963 34.69599115 21.07257789 -0.7049875 1
131 4.76377490 -44.94423338 1.10263422 46.68685843 -21.0213374 1
132 -7.37550976 14.72977384 -6.50357553 29.58541936 12.0437405 1
133 -35.41488789 15.30446517 -34.03278021 6.36947788 3.5355144 1
134 35.85945931 32.76696808 46.98049189 16.64510823 2.3072411 1
135 16.23364908 -36.40773527 35.59254617 21.82013777 -8.8757714 -1
136 26.42379180 9.73426250 20.27802824 -36.71915454 -14.1613290 -1
137 -36.27642491 -31.46302165 -36.49763539 -9.67619771 15.6194962 -1
138 -47.50820657 -39.16183484 -38.03864804 5.07058196 15.7445797 -1
139 -21.84303377 10.98107803 -0.40645790 41.45426797 -28.7873543 1
140 -0.95827512 -1.69135577 32.42284888 -19.45907820 25.6286883 -1
141 -25.38794791 -18.17971012 -35.69542712 -14.09463407 28.4907040 -1
142 26.73252139 25.03299059 -12.28499415 30.17292826 -10.5327087 1
143 -43.16460560 -29.86155320 -38.54692092 -45.51337271 2.9275517 -1
144 8.24473461 21.64336434 -27.36689942 -27.98296309 21.6617776 -1
145 4.97203793 33.32681707 -38.52586835 -10.99964115 -36.9242265 1
146 -31.53733450 -1.02430454 40.17001574 18.02365675 -48.1134572 1
147 -12.77233372 24.44318922 -2.01369112 -29.32467558 -36.7925263 1
148 -2.59054168 9.29519264 -43.52171838 -4.17696589 17.9192101 1
149 44.38525802 16.66222662 14.42328799 42.99000488 36.5686033 1
150 24.20129897 -6.10695339 3.54532897 5.21023085 8.3394838 -1
151 -25.18552805 32.60349229 2.03504197 6.99553278 -38.9368815 1
152 11.56381064 -21.92995299 15.23446254 -35.43917607 -41.3730330 -1
153 -13.90532325 20.56872472 -42.20650226 -38.18344893 15.1799403 -1
154 -4.53937219 -15.48650358 30.36769438 -48.65573135 -4.2165786 -1
155 -19.33582416 37.94649800 12.92351044 12.55487665 -23.2021390 1
156 -27.08828198 -41.94863634 19.97831776 -31.80869636 -23.1660966 -1
157 -19.96743444 39.69741114 -13.42580225 -48.12533744 -46.1491481 1
158 9.16460447 12.19791272 3.40255061 46.28932201 -13.4358461 1
159 21.10104361 -36.03372062 19.19042345 36.96155972 38.7752557 -1
160 -13.60133037 33.48209793 25.34574629 -39.59111779 26.5550812 -1
161 -13.08967145 9.11082048 2.15857571 -6.17023725 21.4346305 -1
162 11.66161026 -45.03933019 -16.59182839 18.57721794 41.4250354 -1
163 41.77852059 45.44966395 22.19019849 -35.77017984 -16.8267572 1
164 -21.51823819 -45.14922628 14.55832121 0.94149860 -11.4418946 -1
165 7.77473154 -22.52768062 -6.71764023 -2.76005429 21.3051374 -1
166 24.17912751 11.42646645 41.03107578 38.10855299 -4.4167988 1
And what I want to do is to get all of the rows when X6 == -1 (by example). Also, the name and the number of column can change, so I need to find the name of the last column. I did it like that:
simple_learner <- function(df)
{
lastColName <- colnames(df)[ncol(df)]
#....
}
So now, I tried to get the value like that => out <- df[df$lastColName == -1, ], however, I get this input...
> simple_learner <- function(df)
+ {
+ lastColName <- colnames(df)[ncol(df)]
+ as.numeric(df$lastColName)
+ out <- df[df$lastColName == -1, ]
+ out
+ }
>
> simple_learner(read.csv("Exercise-4.csv"))
[1] X1 X2 X3 X4 X5 X6
<0 rows> (or 0-length row.names)
Why?
Try
out <- df[df[,ncol(df)] == -1, ]
or
out <- df[df[,lastColName] == -1, ]
in your function.
Kindly go through the following solution:
Get the last column index using following R code (I am dealing with iris dataset):
lastcolumn<-max(ncol(iris))
lastcolumn
[1] 5
Now, retrieve the rows by the specific value of the last data frame column using following code (I am retrieving the rows with last column having value virginica):
iris[iris[,lastcolumn]=="virginica",]
Hope it works for you.
I had a silimar problem, finally solved it by looking at the help page of dplyr::filter. At the end it gives a nice example:
# To refer to column names that are stored as strings, use the `.data` pronoun:
vars <- c("mass", "height")
cond <- c(80, 150)
starwars %>%
filter(
.data[[vars[[1]]]] > cond[[1]],
.data[[vars[[2]]]] > cond[[2]]
)
In your example above, this could lead to:
df %>%
filter(.data[[colnames(df)[[ncol(df)]]]]== -1)

Resources