I have this data frame:
A <- c(10, 20, 30, 40, 5)
B <- c(5, 0, 0, 0, 0)
df = data.frame(A, B)
And I want to replace the 0's in B with the sum of A and B[i-1]. I have searched everywhere, but I feel like I am missing something really basic. This is my desired result:
A B
1 10 5
2 20 25
3 30 55
4 40 95
5 5 100
I have tried this, but it didn't work:
for(i in 2:length(df)){
df$B <- A[i] + B[i-1]
}
In Excel, this would be something like B$2 = A$2 + B$1. I cannot figure out how to do this in R. Any help would be greatly appreciated since I feel like I am missing something basic. Thanks!
You were very close. Try this:
for(i in 2:nrow(df)){
df$B[i] <- df$A[i] + df$B[i-1]
}
And to expand to those comments, could something like this work?
for(i in 2:nrow(df)){
if((df$A[i] + df$B[i-1]) > 60) df$B[i] <- df$B[i-1] else{
df$B[i] <- df$A[i] + df$B[i-1]}
}
# Data
# I changed one of the later values of B to non-zero to confirm that only
# the zero values of B were getting changed
A <- c(10, 20, 30, 40, 5)
B <- c(5, 0, 0, 10, 0)
(df = data.frame(A, B))
# A B
# 1 10 5
# 2 20 0
# 3 30 0
# 4 40 0
# 5 5 10
for(i in 2:nrow(df)) {
if(df$B[i]==0) df$B[i] <- df$A[i] + df$B[i-1]
if(df$B[i] >= 60) df$B[i] <- df$B[i-1]
}
df
# A B
# 1 10 5
# 2 20 25
# 3 30 55
# 4 40 55
# 5 5 10
Related
R data frame 1 :
Index
Powervalue
0
1
1
2
2
4
3
8
4
16
5
32
R dataframe 2 :
CombinedValue
20
50
Expected Final Result :
CombinedValue
possiblecodes
20
4, 16
50
2, 16, 32
Can we get the output as in the image. If yes please help.
Please see the image
Here you go.
df <- data.frame(sum = c(50, 20, 6))
values_list <- list()
for (i in 1:nrow(df)) {
sum <- df$sum[i]
values <- c()
while (sum > 0) {
value <- 2^floor(log2(sum))
values <- c(values, value)
sum <- sum - value
}
values_list[[i]] <- values
}
df$values <- values_list
df is now:
sum values
1 50 32, 16, 2
2 20 16, 4
3 6 4, 2
appreciate your guidance as im new to R programme. basically i've created a function to check whether the value is even or odd.
i wish to create a new result column whereby 'even' results in the original value * 2, and 'odd' results in the original value - 5.
not sure where i've gone wrong with the second part of the code but i am trying to figure out where can i include my 'check' column in the second function to specify it should be checked for even or odd.
i only learnt about ifelse(check(df$check) but it doesnt seem to work in my instance.
much appreciated!
## print 'odd' or 'even' results in df
check = function(df,col){
df['check'] =
ifelse(df[,col] %% 2 ==0, 'even', 'odd')
return(df)
}
# multiplication and subtraction for odd_even results
checkresult = function(df,col){
df['res'] =
ifelse(check(df) == 'even', df[,col] * 2, df[,col]-5)
return(df)
}
checkresult(df)
The simplest way to do it is by not implementing a new function, just use ifelse() as it was intended:
set.seed(100)
df <- data.frame(x = sample(1:15, size = 100, replace = T))
df$res <- ifelse(df$x %% 2 == 0, df$x * 2, df$x - 5)
df |> head()
#> x res
#> 1 10 20
#> 2 7 2
#> 3 6 12
#> 4 3 -2
#> 5 9 4
#> 6 10 20
If you need to implement a function that returns a dataframe:
set.seed(100)
df <- data.frame(x = sample(1:15, size = 100, replace = T))
my_func <- function(dframe, column){
vec <- dframe[,column]
dframe$res <- ifelse(vec %% 2 == 0, vec * 2, vec - 5)
return(dframe)
}
my_func(df, "x") |> head()
#> x res
#> 1 10 20
#> 2 7 2
#> 3 6 12
#> 4 3 -2
#> 5 9 4
#> 6 10 20
Edit 1
If you want the parity of the number, one extra line is required:
set.seed(100)
df <- data.frame(x = sample(1:15, size = 100, replace = T))
my_func <- function(dframe, column){
vec <- dframe[,column]
dframe$parity <- ifelse(vec %% 2 == 0, "EVEN", "ODD")
dframe$res <- ifelse(vec %% 2 == 0, vec * 2, vec - 5)
return(dframe)
}
my_func(df, "x") |> head()
#> x parity res
#> 1 10 EVEN 20
#> 2 7 ODD 2
#> 3 6 EVEN 12
#> 4 3 ODD -2
#> 5 9 ODD 4
#> 6 10 EVEN 20
Say I have
a <- c(0, 22, 0, 2, 0, 0, 20, 20, 20, 0, 0)
I want to do a cumulative sum whereby I minus 5 to each value in a, and then add on the previous value.
However, I also have the condition that if a becomes less than 0, I want the cumsum to 0 and if a becomes greater than 40, for cumsum to 40.
So, I want to get
(0, 17, 12, 9, 4, 0, 15, 30, 40, 35, 30)
Can anyone help? I've been trying out a lot of things for a few hours now!
#Holger, that method doesn't always work.
So if I add in a couple of extra zeros it does not come with the right solution
a <- c(0, 22, 0, 2, 0, 0, 0, 0, 20, 20, 20, 0, 0)
gives
0 17 12 9 4 0 1 7 22 37 52 47 42
Here are some alternatives:
1) Loop Create a one line loop like this:
b <- a; for(i in seq_along(b)[-1]) b[i] <- min(40, max(0, a[i] - 5 + b[i-1]))
b
## [1] 0 17 12 9 4 0 15 30 40 35 30
2) Reduce
f <- function(b, a) min(40, max(0, a - 5 + b))
Reduce(f, a, acc = TRUE)
## [1] 0 17 12 9 4 0 15 30 40 35 30
3) recursion This recursive solution will be limited to inputs which are not too long.
rec <- function(a) {
n <- length(a)
if (n <= 1) a
else {
rec.hd <- Recall(a[-n])
c(rec.hd, min(40, max(0, rec.hd[n-1] + a[n] - 5)))
}
}
rec(a)
## [1] 0 17 12 9 4 0 15 30 40 35 30
Try
cumsum_up_low <- function(a, d=5, up=40, low=0 ){
out = rep(0, length(a))
out[1] = a[1]*(a[1]>=0 && a[1]<=40) + 0*(a[1]<0) + 40*(a[1] > 40)
for(i in 2:length(a)){
if(out[i-1] + a[i] - d > low && out[i-1] + a[i] - d < up){
out[i] = out[i-1] + a[i] - d
} else if(out[i-1] + a[i] - d <= low){
out[i] = 0
} else out[i] = 40
}
out
}
cumsum_up_low(a, d=5, up=40, low=0)
# [1] 0 17 12 9 4 0 15 30 40 35 30
For long vectors
a <- sample(a, 1e6, TRUE)
system.time(cumsum_up_low(a))
# user system elapsed
# 3.59 0.00 3.59
library(compiler)
cumsum_up_low_compiled <- cmpfun(cumsum_up_low)
system.time(cumsum_up_low_compiled(a))
# user system elapsed
# 0.28 0.00 0.28
For a really long vectors
library(Rcpp)
cppFunction('
NumericVector cumsum_up_low_cpp(NumericVector a, double d, double up, double low) {
NumericVector out(a.size());
out[0] = a[0];
for(int i=1; i<a.size(); i++){
if(out[i-1] + a[i] - d > low & out[i-1] + a[i] - d < up){
out[i] = out[i-1] + a[i] - d;
} else if(out[i-1] + a[i] - d <= low){
out[i] = 0;
} else out[i] = 40;
}
return out;
}')
a <- sample(a, 5e6, replace = TRUE)
system.time(cumsum_up_low_compiled(a, d=5, up=40, low=0))
# user system elapsed
# 1.45 0.00 1.46
system.time(cumsum_up_low_cpp(a, d=5, up=40, low=0))
# user system elapsed
# 0.04 0.02 0.05
You can use Reduce to get the cumulative sum and combine this with max and min or pmin and pmax to get the bounds.
It is unclear whether you want to use the 0 and 40 in your cumulative summation or if you want bound the variable afterwards. Below, I've provided both possibilities.
Bound within the summation:
Reduce(function(x, y) min(max(x + y - 5, 0), 40), a, 0, accumulate=TRUE)
[1] 0 0 17 12 9 4 0 15 30 40 35 30
Bound after the summation
pmin(pmax(Reduce(function(x, y) x + y - 5, a, 0, accumulate=TRUE), 0), 40)
[1] 0 0 12 7 4 0 0 9 24 39 34 29
This is definitely not the efficient way to do this, but it might be easiest to understand:
a <- c(0, 22, 0, 2, 0, 0, 20, 20, 20, 0, 0)
## Initialize another vector just like a
c <- a
## Do it easy-to-understand'ly in a for loop:
for (i in seq_along(a)){
b <- a[i]
if (i>1) {
b <- b+c[i-1]
b <- b-5
}
if (b<0) b <- 0
if (b>40) b <- 40
c[i] <- b
print(c[i])
}
Try to figure out each part, and if you need help, lemme know!
Having a data frame like this:
df <- data.frame(a=c(31, 18, 0, 1, 20, 2),
b=c(1, 0, 0, 3, 1, 1),
c=c(12, 0, 9, 8, 10, 3))
> df
a b c
1 31 1 12
2 18 0 0
3 0 0 9
4 1 3 8
5 20 1 10
6 2 1 3
How can I do a random subset so the sum of rows and columns is equal to a value, i.e , 100?
As I understand your question, you're trying to sample a subset of the rows and columns of your matrix so that they sum to a target value.
You can use integer optimization to accomplish this. You'll have a binary decision variable for each row, column, and cell, and constraints to force the cell values to be equal to the product of the row and column values. I'll use the lpSolve package to do this, because it has a convenient mechanism to get multiple optimal solutions. We can then use the sample function to select between them:
library(lpSolve)
get.subset <- function(dat, target) {
nr <- nrow(dat)
nc <- ncol(dat)
nvar <- nr + nc + nr*nc
# Cells upper bounded by row and column variable values (r and c) and lower bounded by r+c-1
mat <- as.matrix(do.call(rbind, apply(expand.grid(seq(nr), seq(nc)), 1, function(x) {
r <- x[1]
c <- x[2]
pos <- nr + nc + (r-1)*nc + c
ltc <- rep(0, nvar)
ltc[nr + c] <- 1
ltc[pos] <- -1
ltr <- rep(0, nvar)
ltr[r] <- 1
ltr[pos] <- -1
gtrc <- rep(0, nvar)
gtrc[nr + c] <- 1
gtrc[r] <- 1
gtrc[pos] <- -1
return(as.data.frame(rbind(ltc, ltr, gtrc)))
})))
dir <- rep(c(">=", ">=", "<="), nr*nc)
rhs <- rep(c(0, 0, 1), nr*nc)
# Sum of selected cells equals target
mat <- rbind(mat, c(rep(0, nr+nc), as.vector(t(dat))))
dir <- c(dir, "=")
rhs <- c(rhs, target)
res <- lp(objective.in=rep(0, nvar), # Feasibility problem
const.mat=mat,
const.dir=dir,
const.rhs=rhs,
all.bin=TRUE,
num.bin.solns=100 # Number of feasible solutions to get
)
if (res$status != 0) {
return(list(rows=NA, cols=NA, subset=NA, num.sol=0))
}
sol.num <- sample(res$num.bin.solns, 1)
vals <- res$solution[seq((sol.num-1)*nvar+1, sol.num*nvar)]
rows <- which(vals[seq(nr)] >= 0.999)
cols <- which(vals[seq(nr+1, nr+nc)] >= 0.999)
return(list(rows=rows, cols=cols, subset=dat[rows,cols], num.sol=res$num.bin.solns))
}
The function returns the number of subset with that sum and returns the randomly selected subset:
set.seed(144)
get.subset(df, 1)
# $rows
# [1] 1
# $cols
# [1] 2
# $subset
# [1] 1
# $num.sol
# [1] 14
get.subset(df, 100)
# $rows
# [1] 1 2 4 5
# $cols
# [1] 1 3
# $subset
# a c
# 1 31 12
# 2 18 0
# 4 1 8
# 5 20 10
# $num.sol
# [1] 2
get.subset(df, 10000)
# $rows
# [1] NA
# $cols
# [1] NA
# $subset
# [1] NA
# $num.sol
# [1] 0
My goal is to generate this vector in R using iter:
0 + 1 = 1,
1 + 2 = 3,
3 + 3 = 6,
6 + 4 = 10
I tried the code below, but it didn't give me the right numbers:
iter <- 4
w_vector <- rep(0, iter)
for(i in 1:iter) {w_vector[i] <-sum(i, i-1)
print(w_vector[i])
}
I'll truly appreciate it if you can help me fix my code.
You can do:
w_vector <- cumsum(seq(iter))
w_vector
# [1] 1 3 6 10
Otherwise, using a for loop, your code should look something like:
iter <- 4
w_vector <- rep(0, iter)
w_vector[1] <- 1
for(i in 2:iter) {
w_vector[i] <- w_vector[i-1] + i
}
w_vector
# [1] 1 3 6 10