i have data frame that looks like this :
is severe encoding sn_id
1 1 1
0 2 1
1 2 2
0 1 2
1 1 2
im using on by function this function :
catt <-
function(y, x, score = c(0, 1, 2)) {
miss <- unique(c(which(is.na(y)), which(is.na(x))))
n.miss <- length(miss)
if(n.miss > 0) {
y <- y[-miss]
x <- x[-miss]
}
if(!all((y == 0) | (y == 1)))
stop("y should be only 0 or 1.")
if(!all((x == 0) | (x == 1) |(x == 2)))
stop("x should be only 0, 1 or 2.")
ca <- x [y == 1]
co <- x [y == 0]
htca <- table(ca)
htco <- table(co)
A <- matrix(0, 2, 3)
colnames(A) <- c(0, 1, 2)
rownames(A) <- c(0, 1)
A[1, names(htca)] <- htca
A[2, names(htco)] <- htco
ptt <- prop.trend.test(A[1, ], colSums(A), score = score)
#list(#"2x3-table" = A,
#chisq = as.numeric(ptt$statistic),
#df = as.numeric(ptt$parameter),
res= p.value = as.numeric(ptt$p.value)
#n.miss = n.miss)
return(res)
}
when i run it :
by(es_test,es_test$sn_id, function (es_test) {catt(es_test$ï..is_severe,es_test$encoding)})
i get these results:
es_test$sn_id: 1
[1] 0.1572992
------------------------------------------------------------------------
es_test$sn_id: 2
[1] 0.3864762
it is not a very comfortable format as i want to further work with it , is there any way to get these results as list :[0.157,0.386]?
i tried this :
result_pv=c(by(es_test,es_test$sn_id, function (es_test) {catt(es_test$ï..is_severe,es_test$encoding)}))
but it produced double and i want it as vector or list :
the double :
Browse[6]> result_pv
1 2
0.1572992 0.3864762
> typeof(result_pv)
[1] "double"
what i want to do with it later is to add this result_pv to data frame as column and when it is a double i cant do that
thank you
I have a function that does some (complex) selecting of rows, and returns the corresponding values from one column. I then want to overwrite these values.
MWE
x = data.frame(a=c(1,2,3),b=c(4,5,6))
f = function(x,i){ return(x[x$a==i,'b']) }
f(x,2) <- 3
throws:
Error in f(x, 2) = 3 : could not find function "f<-"
Is there a way to assign these values from the function return?
No tidyverse please. Only base R.
The function should be
f <- function(x, i, val) {
if(missing(val)) {
x<- x[x$a==i,]
} else {
x$b[x$a ==i] <- val
}
return(x)
}
Then, when we run the code
> f(x, 2, 3)
a b
1 1 4
2 2 3
3 3 6
> f(x, 2) # missing the val
a b
2 2 5
If we want to update the object, use <-
x <- f(x, 2, 3)
An alternative would be to write your function in two ways: the original one, and a specific assignment function, so that the R parser will work on your original syntax:
f <- function(x, i) {
return(x[x$a == i, 'b'])
}
`f<-` <- function(x, i, value) {
x[x$a == i, 'b'] <- value
return(x)
}
So now you can do:
f(x, 2)
#> [1] 5
f(x, 2) <- 3
x
#> a b
#> 1 1 4
#> 2 2 3
#> 3 3 6
f(x, 2)
#> [1] 3
I showed how I see the implementation of this algorithm, I divided it into two steps
step one sequence search
step two check break rules
set.seed(123)
dat <- as.data.frame(matrix(sample(10,60,replace = T),ncol = 3))
colnames(dat) <- LETTERS[1:ncol(dat)]
dat
rule <- c("A==0","A==10 & B==4","C==9","A>10","B<0","C==0","A==5","A>10",
"B<0","C==0","A==9 & B==9","A>10","B<0","A==10","A==7 & B==5")
action <- c("break","next","next",rep("break",3),"next",rep("break",3),
"next",rep("break",3) ,"next")
rule <- cbind(rule,action)
I think this works -
seq_rule <- function(dat, rule, res.only = TRUE) {
value = rule$action
rule <- rule$rule
m <- with(dat, lapply(rule, function(r) eval(str2expression(r))))
fu <- function(x, y) {
k <- which(y)
ifelse(all(k <= x), NA, min(k[k > x]))
}
idx <- Reduce(fu , m,init = 0, accumulate = TRUE)[-1]
if (!res.only) {
idx <- na.omit(idx)
fidx <- head(idx, length(rule))
debug.vec <- replace(rep("no", nrow(dat)), fidx, rule[seq_along(fidx)])
return(cbind(dat, debug.vec))
}
if(any(value[!is.na(idx)] == 'break')) return(FALSE)
idx <- na.omit(idx)
length(idx) >= length(rule)
}
Here are some checks -
rule <- data.frame(rule= c("A==9","B==4","C==4","A==4", "B==10","C==4") ,
action= c(rep("next",3),"break","break","next"))
seq_rule(dat = dat,rule = rule)
#[1] FALSE
rule <- data.frame(rule= c("C==9","B==3","C==4"),
action= c(rep("next",3)))
seq_rule(dat = dat,rule = rule)
#[1] TRUE
seq_rule(dat = dat,rule = rule, res.only = FALSE)
# A B C debug.vec
#1 3 5 9 C==9
#2 3 3 3 B==3
#3 10 9 4 C==4
#4 2 9 1 no
#5 6 9 7 no
#6 5 3 5 no
#7 4 8 10 no
#8 6 10 7 no
#9 9 7 9 no
#10 10 10 9 no
rule <- data.frame(rule= c("C==9","B==3","C==4", "A == 1"),
action= c(rep("next",3), 'break'))
seq_rule(dat = dat,rule = rule)
#[1] FALSE
rule <- data.frame(rule= c("C==9","B==3","C==4", "A == 6"),
action= c(rep("next",3), 'break'))
seq_rule(dat = dat,rule = rule)
#[1] FALSE
Since the logic of your question is a bit complicated, I guess a straightforward way, e.g., using loops, might be more efficient and readable. Here is one version of seq_rule
seq_rule <- function(dat, rule, res.only = TRUE) {
m <- with(dat, as.data.frame(sapply(rule$rule, function(r) eval(str2expression(r)))))
rule_next <- with(rule, rule[action == "next"])
m_next <- m[rule_next]
idx <- na.omit(
Reduce(
function(x, y) {
k <- which(y)
ifelse(all(k <= x), NA, min(k[k > x]))
}, m_next,
init = 0, accumulate = TRUE
)
)[-1]
fidx <- head(idx, length(rule_next))
debug.vec <- replace(rep("no", nrow(dat)), fidx, rule_next[seq_along(fidx)])
trgs <- do.call(
rbind,
Map(
function(p, q) {
u <- as.matrix(m[p, ][q[q %in% with(rule, rule[action == "break"])]])
k <- which(u, arr.ind = TRUE)
data.frame(breakRowID = row.names(u)[k[, "row"]], breakTrigger = colnames(u)[k[, "col"]])
},
split(1:nrow(dat), cut(1:nrow(dat), c(0, idx, Inf))),
split.default(names(m), cumsum(rule$action != "break"))
)
)
triggerBreaks <- replace(rep("no", nrow(dat)), debug.vec != "no", NA)
if (!res.only) {
cbind(dat, debug.vec, trigger.break = with(trgs, replace(triggerBreaks, as.numeric(breakRowID), breakTrigger)))
} else {
nrow(trgs) == 0
}
}
and you will see
> seq_rule(dat = dat, rule = rule)
[1] FALSE
> seq_rule(dat = dat, rule = rule, res.only = FALSE)
A B C debug.vec trigger.break
1 3 9 2 no no
2 3 3 1 no no
3 10 4 9 A==10 & B==4 <NA>
4 2 1 9 C==9 <NA>
5 6 7 6 no no
6 5 5 5 A==5 <NA>
7 4 10 9 no no
8 6 7 10 no no
9 9 9 4 A==9 & B==9 <NA>
10 10 9 6 no A==10
11 5 10 8 no no
12 3 7 6 no no
13 9 5 6 no no
14 9 7 7 no no
15 9 5 1 no no
16 3 6 6 no no
17 8 9 2 no no
18 10 2 1 no A==10
19 7 5 2 A==7 & B==5 <NA>
20 10 8 4 no no
I want to say a huge thank you to everyone who tried to help me, as well as for unlimited patience ..
But it was impossible to help me because I myself did not fully understand what I wanted. Instead of breaking the question into several parts and asking separately (as it should be), I asked a big difficult question that I could hardly explain to myself.
I am very very sorry for that.
Here is my answer, this is what I wanted to get in the end.
seq_rule2 <- function(dat , rule ,res.only = TRUE){
# This is a fast function written by Thomas here
# https://stackoverflow.com/questions/68625542/match-all-logic-rules-with-a-dataframe-need-super-fast-function
# as an answer to my earlier question.
# It takes the rules as a vector and looks for the sequence
seq_rule <- function(dat, rule, res.only = TRUE) {
m <- with(dat, lapply(rule, function(r) eval(str2expression(r))))
fu <- function(x, y) {
k <- which(y)
ifelse(all(k <= x), NA, min(k[k > x]))
}
idx <- na.omit(Reduce( fu, m,init = 0, accumulate = TRUE ))[-1]
if (!res.only) {
fidx <- head(idx, length(rule))
debug.vec <- replace(rep("no", nrow(dat)), fidx, rule[seq_along(fidx)])
return(cbind(dat, debug.vec))
}
length(idx) >= length(rule)
}
#if there is only one next rule, then there is no point in continuing to return the FALSE and finish completely
if( length(rule$rule[rule$action=="next"]) <= 1 ) return(FALSE)
# STEP 1
# run seq_rule
yes.next.rule.seq <- seq_rule(dat = dat , rule = rule$rule[rule$action=="next"] , res.only = T)
if(res.only==FALSE & yes.next.rule.seq==FALSE) {
Next <- rep("no",nrow(dat))
Break <- rep("no",nrow(dat))
dat <- cbind(dat,Next=Next, Break=Break)
return(dat)
}
if(res.only==TRUE & yes.next.rule.seq==FALSE) return(FALSE)
# if the seq_rule found the sequence (TRUE) but there are no "break rules" in the "rule",
# then there is no point in searching for "break rules". Return TRUE and finish completely
if( length(rule$rule[rule$action=="break"]) == 0 & yes.next.rule.seq == TRUE) return(TRUE)
# STEP 2
#looking for break rules in the range between next rules
if(yes.next.rule.seq){
#get indices where the "next rules" triggered in dat
deb.vec <- seq_rule(dat = dat , rule = rule$rule[rule$action=="next"] , res.only = F)[,"debug.vec"]
idx.next.rules <- which(deb.vec!="no")
#get indices where the "break rules" triggered in dat
m <- with(dat, lapply(rule$rule[rule$action=="break"], function(r) eval(str2expression(r))))
idx.break.rules <- unlist(lapply(m,which))
# RES the final result is equal to TRUE,
# but if a "break rule" is found between the "next rules",
# then the RES will be false
RES <- TRUE
# sliding window of two "next rules" http://prntscr.com/1qhnzae
for(i in 2:length(idx.next.rules)){
temp.range <- idx.next.rules[ (i-1):i ]
# Check if there is any "break rule" index between the "next rule" indexes
break.detect <- any( idx.break.rules > temp.range[1] & idx.break.rules < temp.range[2] )
if( break.detect ) RES <- FALSE ; break
}
}
if(!res.only) {
Next <- rep("no",nrow(dat)) ; Next[idx.next.rules] <- "yes"
Break <- rep("no",nrow(dat)) ; Break[idx.break.rules] <- "yes"
dat <- cbind(dat,Next=Next, Break=Break)
return(dat)
}
return(RES)
}
data for to check
set.seed(963)
dat <- as.data.frame(matrix(sample(10,30,replace = T),ncol = 3))
colnames(dat) <- LETTERS[1:ncol(dat)]
rule <- cbind.data.frame(rule= c("A==9","B==4","C==4","A==4") ,
action= c("next","break","break","next"))
rule <- as.data.frame(rule,stringsAsFactors = F)
seq_rule2(dat = dat, rule = rule)
dat
rule
for example no breaks set.seed(963)
http://prntscr.com/1qhprxq
with break set.seed(930) http://prntscr.com/1qhpv2h
Find the number of entries in each row which are greater than 4.
set.seed(75)
aMat <- matrix( sample(10, size=60, replace=T), nr=6)
rowmax=function(a)
{
x=nrow(a)
y=ncol(a)
i=1
j=1
z=0
while (i<=x) {
for(j in 1:y) {
if(!is.na(a[i][j])){
if(a[i][j]>4){
z=z+1
}
}
j=j+1
}
print(z)
i=i+1
}
}
rowmax(aMat)
It is showing the error. I don't want to apply in built function
You could do this easier counting the x that are greater than 4 using length.
rowmax2 <- function(x) apply(x, 1, function(x) {x <- na.omit(x);length(x[x > 4])})
rowmax2(aMat)
# [1] 8 7 8 7 4 3
If you wanted to do this absolutely without any shortcut you could use two for loops. 1 for each row and another for each value in the row.
rowmax = function(a) {
y=nrow(a)
result <- numeric(y)
for(j in seq_len(y)) {
count = 0
for(val in a[j, ]) {
if(!is.na(val) && val > 4)
count = count + 1
}
result[j] <- count
}
return(result)
}
rowmax(aMat)
#[1] 8 7 8 7 4 3
If you wanted to do this using in-built functions in base R you could use rowSums.
rowSums(aMat > 4, na.rm = TRUE)
#[1] 8 7 8 7 4 3
There are several errors in you code:
You should put z <- 0 inside while loop
You should use a[i,j] for the matrix indexing, rather than a[i][j]
Below is a version after fixing the problems
rowmax <- function(a) {
x <- nrow(a)
y <- ncol(a)
i <- 1
j <- 1
while (i <= x) {
z <- 0
for (j in 1:y) {
if (!is.na(a[i, j])) {
if (a[i, j] > 4) {
z <- z + 1
}
}
j <- j + 1
}
print(z)
i <- i + 1
}
}
and then we get
> rowmax(aMat)
[1] 8
[1] 7
[1] 8
[1] 7
[1] 4
[1] 3
A concise approach to make it is using rowSums, e.g.,
rowSums(aMat, na.rm = TRUE)
Suppose I have a vector like this
lst <- c(2,3,4,6,7,9,10)
Is it possible to number the items in sequence?
Expected Output
lst.rank <- c(1,2,3,1,2,1,2)
unlist(lapply(split(lst, cumsum(c(1, diff(lst)) != 1)), seq_along), use.names = FALSE)
#OR
ave(cumsum(c(1, diff(lst)) != 1), cumsum(c(1, diff(lst)) != 1), FUN = seq_along)
#[1] 1 2 3 1 2 1 2
In the same spirit as d.b's answer, but using rle and sequence.
sequence(rle(cumsum(c(1, diff(lst)) != 1))$lengths)
[1] 1 2 3 1 2 1 2
lst <- c(2,3,4,6,7,9,10)
m = 1
for (i in 1:(length(lst)-1) ){
if (lst[i+1] == lst[i]+1){
lst[i]=m
if(i == length(lst)-1) lst[i+1] = m + 1
m = m+1
}
else{
lst[i]=m
m = 1
}
}
lst