Error using if statement [duplicate] - r

This question already has answers here:
Error in if/while (condition) {: missing Value where TRUE/FALSE needed
(4 answers)
Closed 4 years ago.
I am trying to define a new matrix z based on another matrix x. This is what I did :
x = structure(
.Data = c(5012, 3257,2638,-898,1734,2642,1828,599,-54,172,
-106,4179,-1111,5270,3116,1817,-103,0,535,NA,
3410,5582,4881,2268,2594,3479,0,603,NA, NA,
5655,5900,4211,5500,2159,2658,984, NA, NA, NA,
1092,8473,6271,6333,3786,-225, NA, NA, NA, NA,
1513,4932,5257,1233,2917, NA , NA, NA, NA, NA,
-557,3463,6926,1368, NA, NA, NA, NA, NA, NA,
1351,5596,6165, NA, NA, NA, NA, NA, NA, NA,
3133,2262, NA, NA, NA , NA , NA , NA, NA , NA,
2063, NA, NA, NA , NA , NA , NA, NA , NA, NA),
.Dim = c(10,10))
x = t(x)
r=ncol(x)
z = matrix(ncol = r, nrow = r)
for(i in 1:r){
for(j in 1:r){
if(x[i,j]>0){
z[i,j] = 1
}
else if(x[i,j]<0){
z[i,j] = -1
}
else {
z[i,j]=0
}
}
}
but I got error :
Error in if (x[i, j] > 0) { : missing value where TRUE/FALSE needed
How can I fix it ?

The problem happened when if hit NA
for(i in 1:r){
for(j in 1:r){
if(!is.na(x[i,j]) & x[i,j]>0){
z[i,j] = 1
}
else if(!is.na(x[i,j]) & x[i,j]<0){
z[i,j] = -1
}
else {
z[i,j]=0
}
}
}

Related

R: Pearson correlation in a loop, prevent stopping when an error occurs and output NAs

I want to run Pearson correlations of each row of a matrix (dat) vs a vector (v1), as part of a loop, and output the correlation coefficients and associated p-values in a table. Here is an example for random data (data pasted at the end):
result_table <- data.frame(matrix(ncol = 2, nrow = nrow(dat)))
colnames(result_table) <- c("correlation_coefficient", "pvalue")
for(i in 1:nrow(dat)){
print(i)
corr <- cor.test(as.numeric(dat[i,]), v1, na.action = "na.omit")
result_table[i,1] <- corr$estimate
result_table[i,2] <- corr$p.value
}
When cor.test() removes missing data, sometimes there are not enough observations remaining and the loop stops with an error (for example at row 11). I would like the loop to continue running, just leaving the values in the result table as NAs. I think the result table should then look like this:
> result_table
correlation_coefficient pvalue
1 0.68422642 0.04206591
2 -0.15895586 0.70694013
3 -0.37005028 0.53982309
4 0.08448970 0.89255250
5 0.86860091 0.05603661
6 0.19544883 0.75274040
7 -0.94695380 0.01454887
8 -0.03817885 0.94275955
9 -0.15214122 0.77354897
10 -0.22997890 0.70978386
11 NA NA
12 NA NA
13 -0.27769887 0.59415930
14 -0.09768153 0.81800885
15 -0.20986632 0.61790214
16 -0.40474976 0.31990456
17 -0.00605937 0.98863896
18 0.02176976 0.95919460
19 -0.14755097 0.72733118
20 -0.25830856 0.50216600
I would also like the errors to keep being printed
Here is the data:
> dput(v1)
c(-0.840396, 0.4746047, -1.101857, 0.5164767, 1.2203134, -0.9758888,
-0.3657913, -0.6272523, -0.5853803, 1.7367901)
> dput(dat)
structure(list(s1 = c(-0.52411895, 0.14709633, 0.05433954, 0.7504406,
-0.59971988, -0.59679685, -0.12571854, 0.73289705, -0.71668771,
-0.04813957, -0.67849896, -0.11947141, -0.26371884, -1.34137162,
2.60928064, -1.23397547, 0.51811222, -4.10759883, -0.70127093,
7.51914575), s2 = c(0.21446623, -0.27281487, NA, NA, NA, NA,
NA, NA, -0.62468391, NA, NA, NA, -3.84387999, 0.64010069, NA,
NA, NA, NA, NA, NA), s3 = c(0.3461212, 0.279062, NA, NA, NA,
-0.4737744, 0.6313365, -2.8472641, 1.2647846, 2.2524449, -0.7913039,
-0.752590307, -3.535815266, 1.692385187, 3.55789764, -1.694910854,
-3.624517121, -4.963855198, 2.395998161, 5.35680032), s4 = c(0.3579742,
0.3522745, -1.1720907, 0.4223402, 0.146605, -0.3175295, -1.383926807,
-0.688551166, NA, NA, NA, NA, NA, 0.703612974, 1.79890268, -2.625404608,
-3.235884921, -2.845474098, 0.058650461, 1.83900702), s5 = c(1.698104376,
NA, NA, NA, NA, NA, -1.488000007, -0.739488766, 0.276012387,
0.49344994, NA, NA, -1.417434166, -0.644962513, 0.04010434, -3.388182254,
2.900252493, -1.493417096, -2.852256003, -0.98871696), s6 = c(0.3419271,
0.2482013, -1.2230283, 0.270752, -0.6653978, -1.1357202, NA,
NA, NA, NA, NA, NA, NA, NA, -1.0288213, -1.17817328, 6.1682455,
1.02759131, -3.80372867, -2.6249692), s7 = c(0.3957243, 0.8758406,
NA, NA, NA, NA, NA, 0.60196247, -1.28631859, -0.5754757, NA,
NA, NA, NA, NA, NA, NA, NA, NA, -2.6303001), s8 = c(-0.26409595,
1.2643281, 0.05687957, -0.09459169, -0.7875279, NA, NA, NA, NA,
NA, NA, NA, 2.42442997, -0.00445559, -1.0341522, 2.47315322,
0.1190265, 5.82533417, 0.82239131, -0.8279679), s9 = c(0.237123,
-0.5004619, 0.4447322, -0.2155249, -0.2331443, 1.3438071, -0.3817672,
1.9228182, 0.305661, -0.01348, NA, NA, 3.4009042, 0.8268469,
0.2061843, -1.1228663, -0.1443778, 4.8789902, 1.3480328, 0.4258486
), s10 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
0.5211859, 0.2196643, -1.2333367, 0.1186947, 1.478086, 0.5211859,
0.2196643)), .Names = c("s1", "s2", "s3", "s4", "s5", "s6", "s7",
"s8", "s9", "s10"), class = "data.frame", row.names = c(NA, -20L
))
A solution with tryCatch could be
for(i in 1:nrow(dat)){
print(i)
corr <- tryCatch(cor.test(as.numeric(dat[i,]), v1, na.action = "na.omit"), error = function(e) return(NA))
if(length(corr) == 1){
result_table[i,1] <- NA
result_table[i,2] <- NA
}else{
result_table[i,1] <- corr$estimate
result_table[i,2] <- corr$p.value
}
}
Here is a solution with tryCatch():
Replacing the for loop with:
for(i in 1:nrow(dat)){
tryCatch({
print(i)
corr <- cor.test(as.numeric(dat[i,]), v1, na.action = "na.omit") # Correlation miRNA activity vs CNVs for that gene
result_table[i,1] <- corr$estimate
result_table[i,2] <- corr$p.value
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}

How to dput() a raster

If I use dput() to output the structure of a raster object created using the raster package, then assigning that structure back into a new object throws an error
Error in datanotation %in% c("LOG1S", "INT1S", "INT2S", "INT4S", "INT1U", :
error in evaluating the argument 'x' in selecting a method for function '%in%': Error: object 'datanotation' not found
Example output from dput to test this on:
rast <- new("RasterLayer", file = new(".RasterFile"
, name = ""
, datanotation = "FLT4S"
, byteorder = "little"
, nodatavalue = -Inf
, NAchanged = FALSE
, nbands = 1L
, bandorder = "BIL"
, offset = 0L
, toptobottom = TRUE
, blockrows = 0L
, blockcols = 0L
, driver = ""
, open = FALSE
)
, data = new(".SingleLayerData"
, values = c(NA, NA, NA, NA, 27.7696047300953, 25.8297302967319, 21.8282877533719,
18.2355885882618, 27.0557882676846, 27.2210269605054, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 27.7812364734848, 27.405183119753,
24.2674419226904, 21.1096354803572, 19.7839120235376, 28.0337762198564,
30.3552042477317, 27.9129238649901, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 27.8602581108286, 25.5695030720577, 19.308317452836,
20.2224030952562, 19.8943689815922, 26.0737945219631, 29.8730429910469,
30.0356550838097, NA, NA, NA, NA, NA, NA, NA, NA, NA, 27.9364248138976,
26.9457930700303, 23.0304323166943, 19.4650798632613, 19.0999036995668,
17.5193560841074, 27.7251998095169, 28.4496104452209, 28.9315408261731,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 26.8544908125766, 25.0566493895284,
19.392461671792, 17.9138961574326, 18.457466509715, 16.2828861956587,
24.3601694045773, 28.1808209395655, 28.9282707782622, 26.6332021683416,
NA, NA, NA, NA, NA, NA, NA, 25.7558302469057, 25.8550702427802,
22.5693001232205, 19.6993922601795, 16.1425172340908, 18.5221217322922,
15.6749250516081, 23.7808882591915, 26.8347423074187, 27.2630654814702,
25.9184967686647, NA, NA, NA, NA, NA, NA, NA, 24.8123372469289,
21.9120014347897, 21.8593245154305, 18.7720082061109, 19.7574885247249,
18.4980326509342, 16.3585539605331, 24.9138993320561, 25.2434828477134,
24.163634092843, 21.0163621891882, 20.415437668758, NA, NA, NA,
NA, NA, NA, 24.1877819407117, 20.6452893546199, 20.1902008603325,
19.3002926063194, 16.8587312480956, 16.2594198755341, 19.2032612963314,
23.627249155838, 20.2610810034085, 20.512646252079, 21.2108132984962,
21.3929956864179, 22.5462104762584, NA, NA, NA, NA, 28.1377507911064,
26.6783600800768, 21.9226216069185, 18.7325546681671, 19.3040954243679,
18.9295032049331, 16.9754437056141, 18.4150075374079, 22.1472527043877,
23.0212426364059, 24.3613220176048, 23.8262550760194, 23.1817611577951,
23.5871416966677, 24.5249361302642, 24.8507563698565, 24.5380700828535,
24.6222669309606, 28.1310406991608, 26.6318516890262, 22.2093701933002,
18.5946021290531, 18.8365649393596, 19.1392679329481, 18.0261774155026,
15.4867865984622, 22.6594382919435, 24.0000969920539, 26.8590549383737,
25.3828920205212, 24.7396876533108, 24.2529425383968, 25.4417776029091,
25.4515553773556, 25.362837214521, 24.9104771169439, NA, 26.0466484214637,
22.5547365784066, 21.385068811716, 21.7011412999039, 21.5908931968994,
17.5042944634609, 16.1420136345859, 22.2389789351034, 24.9668657681713,
27.1669375893459, 26.7413589409561, 26.421134458086, 26.219636989708,
26.3182362528439, 26.4198369697735, 26.3436222765849, NA, NA,
NA, 23.0141531354431, 25.0932401677589, 21.6662460243741, 20.5190520941524,
18.375683158989, 21.0476911567136, 24.5643254483451, 26.328155553503,
26.272891752264, 27.9174179692592, 27.8104921435185, 27.3675508861065,
27.0387799062499, 26.7967626268208, NA, NA, NA, NA, 23.9459211033352,
27.0411266756461, 24.5366020483741, 21.827679045105, 20.9547907819176,
22.0691273897516, 23.4745553057174, 26.3462307150211, 28.3701287602482,
27.3758861302374, 27.3750656061461, 27.962003557149, 27.5345722331493,
NA, NA, NA, NA, NA, NA, 28.2432491874035, 25.6912040459346, 23.8651528078732,
21.6046243626329, 22.9131480337219, 23.819129269607, 26.3568262380137,
28.9188481472128, 28.1497370861287, 27.7360100735352, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 25.045874725646, 23.2493769507419,
23.1730515314323, 24.2038209656421, 28.3416254663092, 28.8959112067936,
NA, NA, NA, NA, NA, NA, NA)
, offset = 0
, gain = 1
, inmemory = TRUE
, fromdisk = FALSE
, isfactor = FALSE
, attributes = list()
, haveminmax = TRUE
, min = 15.4867865984622
, max = 30.3552042477317
, band = 1L
, unit = ""
, names = "MAT_eclp"
)
, legend = new(".RasterLegend"
, type = character(0)
, values = logical(0)
, color = logical(0)
, names = logical(0)
, colortable = logical(0)
)
, title = character(0)
, extent = new("Extent"
, xmin = 832565.530013465
, xmax = 2452565.53001346
, ymin = 383803.949813352
, ymax = 1733803.94981335
)
, rotated = FALSE
, rotation = new(".Rotation"
, geotrans = numeric(0)
, transfun = function ()
NULL
)
, ncols = 18L
, nrows = 15L
, crs = new("CRS"
, projargs = "+proj=aea +lat_1=20 +lat_2=-23 +lat_0=0 +lon_0=25 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0"
)
, history = list()
, z = list()
)
Or, another minimal example to dput(), then try assigning output to another object:
library(raster)
r1 <- raster(nrow=10, ncol=10)
values(r1) <- runif(ncell(r1))
dput(r1)
How can I load a raster from the text version and avoid this error?
This is a bug, due to an error in the validity check of the .RasterFile object (part of the RasterLayer). Illustrated here:
x <- new(".RasterFile")
validObject(x)
#Error in datanotation %in% c("LOG1S", "INT1S", "INT2S", "INT4S", "INT1U", :
# object 'datanotation' not found
I fixed this in version 2.5-11 (available from R-Forge in an hour or so: install.packages("raster", repos="http://R-Forge.R-project.org") ).
P.S. why would you want to use dput/dget?

how to paste an array to rows which contain a certain value in a certain column in R

I would like to paste values of a certain data.frame row to other rows which have a certain attribute of a certain feature, however not a whole row just a couple of values of it. Exactly it looks like:
z <- c(NA, NA, 3,4,2,3,5)
x <- c(NA, NA, 2,5,5,3,3)
a <- c("Hank", NA, NA, NA, NA, NA, NA)
b <- c("Hank", NA, NA, NA, NA, NA, NA)
c <- c(NA, NA, NA, NA, NA, NA, NA)
d <- c("Bobby", NA, NA, NA, NA, NA, NA)
df <- as.data.frame(rbind( a, b, c, d, z, x))
Now, I would like to pass df["z",3:7] to the rows[3:7] which have V1 == "Hank", and pass df["x", 3:7] when V1== "Bobby".
Do anybody has a hint for me? I guess it should be a function with sapply or something like that. Maybe a dplyr could give a solution? Thanks for any advice!

Identify data blocks

I have a vector with either a negative value or NA and a threshold:
threshold <- -1
example <- c(NA, NA, -0.108, NA, NA, NA, NA, NA -0.601, -0.889, -1.178, -1.089, -1.401, -1.178, -0.959, -1.085, -1.483, -0.891, -0.817, -0.095, -1.305, NA, NA, NA, NA, -0.981, -0.457, -0.003, -0.358, NA, NA)
I want to identify all the data blocks with at least one value lower than the threshold and to replace by NA all the other blocks. With my example vector, I want this result:
result <- c(NA, NA, NA, NA, NA, NA, NA, NA -0.601, -0.889, -1.178, -1.089, -1.401, -1.178, -0.959, -1.085, -1.483, -0.891, -0.817, -0.095, -1.305, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)
So the first available value is the first block but -0.108 is higher than -1 so it turns into NA. The second block is kept the same because there is at least ine value lower than -1. The third block is now NA values because between the 4 available values, no one was lower than the threshold.
My first idea was to identify where were the values lower than the threshold:
val <- which(example < threshold)
But then I don't know how to say "keep all the values around this position which are not NA" because it is always a different number of values...
Try
library(data.table)#v >= 1.9.5 (devel version - install from GitHub).
#library(devtools)
#install_github("Rdatatable/data.table", build_vignettes = FALSE)
as.data.table(example)[, res:=(NA | (min(example)< -1))*example, by=rleid(is.na(example))][, res]
Another way, with the suggestion of OlliJ :
example <- c(NA, NA, -0.108, NA, NA, NA, NA, NA -0.601, -0.889, -1.178, -1.089, -1.401, -1.178, -0.959, -1.085, -1.483, -0.891, -0.817, -0.095, NA, NA, NA, NA, -0.981, -0.457, -0.003, -0.358, NA, NA)
test <- !(is.na(example))
len <- rle(test)$lengths
val <- rle(test)$values
##Matrix with the beginning and the end of each group
ind <- matrix(,nrow=length(which(val)),ncol=2)
ind[,1] <- (cumsum(len)[which(val==T)-1])+1
ind[,2] <- (cumsum(len))[val==T]
result <- rep(NA, length=length(example))
apply(ind, 1, function(x)
{
if(any(example[x[1]:x[2]] < -1))
{
result[x[1]:x[2]] <- example[x[1]:x[2]]
}
})

Matrix to data frame with row/columns numbers

I have a 10x10 matrix in R, called run_off. I would like to convert this matrix to a data frame that contains the entries of the matrix (the order doesn't really matter, although I'd prefer it to be filled by row) as well as the row and columns numbers of the entries as separate columns in the data frame, so that for instance element run_off[2,3] has a row in the data frame with 3 columns, the first containing the element itself, the second containing 2 and the third containing 3.
This is what I have so far:
run_off <- matrix(data = c(45630, 23350, 2924, 1798, 2007, 1204, 1298, 563, 777, 621,
53025, 26466, 2829, 1748, 732, 1424, 399, 537, 340, NA,
67318, 42333, -1854, 3178, 3045, 3281, 2909, 2613, NA, NA,
93489, 37473, 7431, 6648, 4207, 5762, 1890, NA, NA, NA,
80517, 33061, 6863, 4328, 4003, 2350, NA, NA, NA, NA,
68690, 33931, 5645, 6178, 3479, NA, NA, NA, NA, NA,
63091, 32198, 8938, 6879, NA, NA, NA, NA, NA, NA,
64430, 32491, 8414, NA, NA, NA, NA, NA, NA, NA,
68548, 35366, NA, NA, NA, NA, NA, NA, NA, NA,
76013, NA, NA, NA, NA, NA, NA, NA, NA, NA)
, nrow = 10, ncol = 10, byrow = TRUE)
df <- data.frame()
for (i in 1:nrow(run_off)) {
for (k in 1:ncol(run_off)) {
claim <- run_off[i,k]
acc_year <- i
dev_year <- k
df[???, "claims"] <- claim # Problem here
df[???, "acc_year"] <- acc_year # and here
df[???, "dev_year"] <- dev_year # and here
}
}
dev_year refers to the column number of the matrix entry and acc_yearto the row number. My problem is that I don't know the proper index to use for the data frame.
I am assuming you are not interested in the NA elements? You can use which and the arr.ind = TRUE argument to return a two column matrix of array indices for each value and cbind this to the values, excluding the NA values:
# Get array indices
ind <- which( ! is.na(run_off) , arr.ind = TRUE )
# cbind indices to values
out <- cbind( run_off[ ! is.na( run_off ) ] , ind )
head( as.data.frame( out ) )
# V1 row col
#1 45630 1 1
#2 53025 2 1
#3 67318 3 1
#4 93489 4 1
#5 80517 5 1
#6 68690 6 1
Use t() on the matrix first if you want to fill by row, e.g. which( ! is.na( t( run_off ) ) , arr.ind = TRUE ) (and when you cbind it).

Resources