I have a function runBootstrap whose output result is a vector of variable length (depending on # of values for cat, which itself is a product of test). Apologies that this isn't "minimal".
require(dplyr)
test <- function(combo) {
if(combo[1] == 4) {
cat <- 4
} else if((combo[1] == 3 & combo[2] == 2) | (combo[1] == 2 & combo[2] == 2)) {
cat <- 3
} else if((combo[1] == 2 & combo[2] == 1) | (combo[1] == 1 & combo[2] == 2)) {
cat <- 2
} else {
cat <- 1
}
}
arg1.freqs <- c(0.5, 0.2, 0.1, 0.1)
arg2.freqs <- c(0.8, 0.2)
runBootstrap <- function(arg1.freqs, arg2.freqs) {
sim.df <- data.frame(x1 = 1:10000, y1 = NA)
sim.df$x1 <- sample(1:4, 10000, replace = TRUE,
prob = arg1.freqs)
sim.df$y1 <- sample(1:2, 10000, replace = TRUE,
prob = arg2.freqs)
sim.df$cat <- NA
for(i in 1:nrow(sim.df)) {
combo <- c(sim.df[i, 1], sim.df[i, 2])
sim.df$cat[i] <- test(combo)
}
sim.df <- sim.df %>%
select(cat) %>%
group_by(cat) %>%
summarise(n = n()) %>%
mutate(freq = n / sum(n))
sim.df <- as.data.frame(sim.df)
result <- c(sim.df[1, 3], sim.df[2, 3])
}
In this current version there are only two values for cat so result is a vector of length 2; in a future version I will adjust code so that length(result) will equal # values of cat.
When using the function in a for loop, I would like to use the vector values to create new columns in an already existing data.frame df1. The code I've tried thus far is as follows:
df1$result <- NA
for (i in 1:nrow(df1)) {
df1$result[i] <- runBootstrap(arg1.freqs, arg2.freqs)
}
This clearly doesn't work unless the result vector is length = 1. But I don't know the length of the vector until the function runs (although once it runs it will be same length each iteration).
What I would like to achieve is the following:
Example 1: if length(result) == 2
df1.col x1 x2
1 1 1 1
2 2 2 2
3 3 3 3
4 4 4 4
5 5 5 5
6 6 6 6
Example 2: if length(result) == 3
df1.col x1 x2 x3
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
4 4 4 4 4
5 5 5 5 5
6 6 6 6 6
Thanks for any advice or direction.
edited for clarification
UPDATE - edited with solution
I got it to work as I wanted by creating a blank list, populating, then using rbind as follows:
appendResults <- function(df1, arg1, arg2) {
my.list <- vector("list", nrow(df1))
for (i in 1:nrow(df1)) {
arg1.freqs <- as.numeric(arg1[i, 3:6])
arg2.freqs <- as.numeric(arg2[i, 3:4])
my.list[[i]] <- runBootstrap(arg1.freqs, arg2.freqs)
}
result.df <- do.call(rbind, my.list)
df2 <- do.call(cbind, list(df1, result.df))
}
Check this one, not sure what the result looks like, but this creates empty columns, equal to the length of results, with NAs:
# fake data frame
df1 <- data.frame(x = c(1,2,3), y = c("a", "b", "c"))
# say result has length 3
res <- c(5,6,7)
# make columns with names x1, ..., x + length of res
# and assign NA values to those column
df1[ , paste("x", 1:length(res), sep = "")] <- NA
Related
R data frame 1 :
Index
Powervalue
0
1
1
2
2
4
3
8
4
16
5
32
R dataframe 2 :
CombinedValue
20
50
Expected Final Result :
Can we get the output as in the image. If yes please help.
One of stackoverflow mate provided below code. Am looking how to seperate , values as columns with 1 and 0.
df <- data.frame(sum = c(50, 20, 6))
values_list <- list()
for (i in 1:nrow(df)) {
sum <- df$sum[i]
values <- c()
while (sum > 0) {
value <- 2^floor(log2(sum))
values <- c(values, value)
sum <- sum - value
}
values_list[[i]] <- values
}
df$values <- values_list
Can we fix columns till power 31 as shown in attached image. The columns match with possiblecodes then place 1 and 0 else 0 for the remaining columns. Please help.
Here is a function whose output matches the expected output.
toCodes <- function(x) {
n <- floor(log2(x))
pow <- rev(seq.int(max(n)))
# 'y' is the matrix of codes
y <- t(sapply(x, \(.x) (.x %/% 2^pow) %% 2L))
i_cols <- apply(y, 2, \(.y) any(.y != 0L))
colnames(y) <- sprintf("code_%d", 2^pow)
#
possiblecodes <- apply(y, 1, \(p) {
codes <- 2^pow[as.logical(p)]
paste(rev(codes), collapse = ",")
})
data.frame(combinedvalue = x, possiblecodes, y[, i_cols])
}
x <- c(20L, 50L)
toCodes(x)
#> combinedvalue possiblecodes code_32 code_16 code_4 code_2
#> 1 20 4,16 0 1 1 0
#> 2 50 2,16,32 1 1 0 1
Created on 2022-12-19 with reprex v2.0.2
I have the piece of code below. What i want is alter the code
such that when the column entries for mat are the same, i get the same result in their respective positions in summation without it performing the fit operation?
So instead of getting
1 3 5 1 7 1
2 4 6 2 8 2
3 9 12 5 16 4
I want
1 3 5 1 7 1
2 4 6 2 8 2
3 9 12 3 16 3
set.seed(123)
fit = function(A){
x = A[1]
y = A[2]
z = sum(sample((x+y),2))
return(z)
}
mat= matrix(c(1,2,3,4,5,6,1,2,7,8,1,2),nrow=2,ncol=6)
summation=apply(mat, 2, FUN = 'fit')
newmat=rbind(mat,summation)
newmat
You can find out columns that are duplicates and replace the corresponding summation value with the first value of summation so that you get the same value.
fit = function(A){
x = A[1]
y = A[2]
z = sum(sample((x+y),2))
return(z)
}
mat= matrix(c(1,2,3,4,5,6,1,2,7,8,1,2),nrow=2,ncol=6)
summation=apply(mat, 2, FUN = 'fit')
vals <- apply(mat, 2, paste0, collapse = '-')
summation <- ave(summation, match(vals, unique(vals)), FUN = function(x) x[1])
newmat=rbind(mat,summation)
newmat
To pass only unique columns to fit function we can do :
fit = function(A){
x = A[1]
y = A[2]
z = sum(sample((x+y),2))
return(z)
}
mat= matrix(c(1,2,3,4,5,6,1,2,7,8,1,2),nrow=2,ncol=6)
vals <- apply(mat, 2, paste0, collapse = '-')
summation <- apply(mat[, !duplicated(vals)], 2, fit)
summation <- summation[match(vals, unique(vals))]
newmat=rbind(mat,summation)
newmat
How can I automate the steps below?
I have the following example of what I would like to do - in the end get a dataframe made up of smaller dataframes that are generated automatically in earlier steps. These smaller dataframes need also calculations done in them before they are aggregated. I can do all manually with a long script, but can't seem to figure out how to combine properly list(), apply() or for() loops to get the result I wanted (not sure those are the best option here).
Please advise.
Thank you!
########### MY QUESTION IN DETAILED CODE
# DATASET
a <- c(2.0, 2.4, 2.1, 2.2, 2.3)
b <- c(4.0, 0, 4.5, 4.4, 4.8)
c <- c(0.3, 0.2, 2.0, 2.1, 2.3)
d <- c(5.0, 4.8, 4.8, 4.9, 5.0)
test.data <- data.frame(rbind(a,b,c,d))
#STEP 1: create separate dfs and do different calculations by column in each
#LONG WAY, MANUAL
# calculates % difference between each value with respect to first value in row
# in df1, then second value in row for df2, etc.
nc <- ncol(test.data)
df1 <- (test.data[,1:nc] - test.data[[1]])/(test.data[[1]])*100
df2 <- (test.data[,1:nc] - test.data[[2]])/(test.data[[2]])*100
df3 <- (test.data[,1:nc] - test.data[[3]])/(test.data[[3]])*100
df4 <- (test.data[,1:nc] - test.data[[4]])/(test.data[[4]])*100
df5 <- (test.data[,1:nc] - test.data[[5]])/(test.data[[5]])*100
# some results from above give Inf (since divided by zero), so set those to NA
df1[df1==Inf] <- NA
df2[df2==Inf] <- NA
df3[df3==Inf] <- NA
df4[df4==Inf] <- NA
df4[df4==Inf] <- NA
df5[df5==Inf] <- NA
#next will filter each calculated %-value by the specified percent difference filter
# and save the results in separate associated dataframes.
percent.diff <- 30
df.A1 <- data.frame(ifelse(df1 > -percent.diff & df1 < percent.diff, 1, 0))
df.A2 <- data.frame(ifelse(df2 > -percent.diff & df2 < percent.diff, 1, 0))
df.A3 <- data.frame(ifelse(df3 > -percent.diff & df3 < percent.diff, 1, 0))
df.A4 <- data.frame(ifelse(df4 > -percent.diff & df4 < percent.diff, 1, 0))
df.A5 <- data.frame(ifelse(df5 > -percent.diff & df5 < percent.diff, 1, 0))
#next add ID columns to each of the newly created dataframes
obs <- 4
#add row and df ID variables to each of the above
df.A1["df.cat"] <- 1
df.A1["row"] <- 1:obs
df.A2["df.cat"] <- 2
df.A2["row"] <- 1:obs
df.A3["df.cat"] <- 3
df.A3["row"] <- 1:obs
df.A4["df.cat"] <- 4
df.A4["row"] <- 1:obs
df.A5["df.cat"] <- 5
df.A5["row"] <- 1:obs
#combine the individual dataframes with IDs into a single dataframe.
Combo.df <-list(df.A1, df.A2, df.A3, df.A4, df.A5)
All.df <- Reduce(rbind, Combo.df)
FINAL OUTPUT SHOULD LOOK LIKE THIS (only first few rows shown)
X1 X2 X3 X4 X5 df.cat row
a 1 1 1 1 1 1 1
b 1 0 1 1 1 1 2
c 1 0 0 0 0 1 3
d 1 1 1 1 1 1 4
a1 1 1 1 1 1 2 1
b1 1 1 1 1 1 2 2
c1 0 1 0 0 0 2 3
d1 1 1 1 1 1 2 4
a2 1 1 1 1 1 3 1
b2 1 0 1 1 1 3 2
c2 0 0 1 1 1 3 3
d2 1 1 1 1 1 3 4
FAILED ATTEMPT TO TRY TO AUTOMATE ABOVE STEPS
#
a) created the number of dataframes I will need
num.reps <- 5
obs <- 4
n.cols <- 5
lst <- replicate(num.reps,data.frame(matrix(NA, nrow = obs, ncol = n.cols)), simplify=FALSE)
names(lst) <- paste0('df', 1:num.reps)
list2env(lst, envir = .GlobalEnv)
# b) fill dataframes (not sure how to call up dataframe by sequential names in loop)
# THIS DOES NOT WORK
f.diff.calc <- function(i)
{df[[i]] <-(df[,1:nc] - df[[i]])/(df[[i]])*100}
diff.calc.list <- replicate(5, f.diff.calc(list))
#Error in `[.data.frame`(df, , 1:nc) : undefined columns selected
This is a simplification of your code and as far as I can see it does what you want.
fun1 <- function(col, DF = test.data){
res <- 100*(DF - DF[[col]])/DF[[col]]
is.na(res) <- is.infinite(as.matrix(res))
res
}
fun2 <- function(DF, percent.diff = 30){
data.frame(ifelse(-percent.diff < DF & DF < percent.diff, 1, 0))
}
df_list <- lapply(seq_len(ncol(test.data)), fun1)
names(df_list) <- paste0("df", seq_along(df_list))
#next will filter each calculated %-value by the specified percent difference filter
# and save the results in a list of dataframes.
percent.diff <- 30
df.A_list <- lapply(df_list, fun2)
#next add ID columns to each of the newly created dataframes
tmp <- names(df.A_list)
df.A_list <- lapply(seq_along(df.A_list), function(i){
df.A_list[[i]][["df.cat"]] <- i
df.A_list[[i]][["row"]] <- seq_len(nrow(df.A_list[[i]]))
df.A_list[[i]]
})
names(df.A_list) <- tmp
# combine the results in one dataframe
All.df <- do.call(rbind, df.A_list)
Well I sincerely think with a bit more research you could have solved it. Also I cannot recreate the exact output you were getting, but I was able to match the output I was getting using your code.
Here is the automated version of the code.
a <- c(2.0, 2.4, 2.1, 2.2, 2.3)
b <- c(4.0, 0, 4.5, 4.4, 4.8)
c <- c(0.3, 0.2, 2.0, 2.1, 2.3)
d <- c(5.0, 4.8, 4.8, 4.9, 5.0)
test.data <- data.frame(rbind(a,b,c,d))
#STEP 1: create separate dfs and do different calculations by column in each
#LONG WAY, MANUAL
# calculates % difference between each value with respect to first value in row
# in df1, then second value in row for df2, etc.
nc <- ncol(test.data)
calc<-function(x,percent.diff=30,i){
x[x==Inf] <- NA
obs<-4
x.A<- data.frame(ifelse(x > -percent.diff & x < percent.diff, 1, 0))
x.A$df.cat<-i
x.A$row<-1:obs
return(x.A)
}
output<-data.frame()
for(i in 1:5){
assign(paste('df',i,sep=""),(test.data[,1:nc] - test.data[[i]])/(test.data[[i]])*100)
}
for(i in 1:5){
output<-rbind.data.frame(output,calc(x = get(paste('df',i,sep="")),percent.diff = 30,i=i))
}
I have a dataframe, df, of two columns, x and y. I am trying to sum values within column y and put the sums into another dataframe. The summing only occurs for a section of column y between NA values. There are multiple sections of column y that must be summed but I want each sum to be a separate value in the new data frame.
df <- data.frame(x = c(1966,0.1,0.2,0.3,0.4,5622,0.9,0.8,0.7,0.6,7889),
y = c(NA,1,2,3,4,NA,9,8,7,6,NA))
The answer should be in the format of a data frame with one column of two rows:
df <- data.frame(x = c(10,30))
I thought of solving this using some for loop and if statements for values between values of NA in column y. Any ideas?
So far, I have the following code, but I ultimately want it to work for a column with a series of more than two summations:
NAs <- which(is.na(df$y))
L1 <- length(NAs)
L0 <- dim(df)[1]
soln1 <- data.frame(matrix(nrow = L1-1, ncol = 1))
for(i in 1:L0){
for(j in 1:L1){
if (j == L1){
break
} else
soln1[j,1] <- sum(df[NAs[j] +1,2]:df[NAs[j+1] -1,2])
}
}
I took a stab at it with some fake data:
df <- data.frame(x = c(1,1,3,1,3,1,1,1,1,1,3,1,1,1,1,1),
y = c(1,2,NA,4,5,NA,7,8,NA,10,11,NA,13,14,NA,16))
# df
# x y
#1 1 1
#2 1 2
#3 3 NA
#4 1 4
#5 3 5
#6 1 NA
#7 1 7
#8 1 8
#9 1 NA
#10 1 10
#11 3 11
#12 1 NA
#13 1 13
#14 1 14
#15 1 NA
#16 1 16
The magic function:
# sum rows in y if section is between NA values & before a value in column x that is > 2
specialSum <- function(x, y){
starting <- which(c(NA,x[-length(x)]) > 2 & is.na(y))
NAs <- which(is.na(y))
L <- length(starting)
ending <- sapply(1:L, function(z) NAs[NAs[-starting] > starting[z]][1])
output <- matrix(NA, nrow = L)
naming <- rep("",L)
for(i in 1:L){
output[i] <- sum(y[starting[i]:ending[i]], na.rm = T)
naming[i] <- paste0(starting[i]+1,":",ending[i]-1)
}
dimnames(output) <- list(naming, "specialSum")
output
}
specialSum(df$x, df$y)
# specialSum
#7:8 15
#13:14 27
EDIT:
df <- data.frame(x = c(1966,0.1,0.2,0.3,0.4,5622,0.9,0.8,0.7,0.6,7889),
y = c(NA,1,2,3,4,NA,9,8,7,6,NA))
specialSum <- function(y){
NAs <- which(is.na(y))
starting <- NAs[-length(NAs)]+1
ending <- NAs[-1]-1
L <- length(starting)
sums <- matrix(NA, nrow = L) ; naming <- rep("",L) # initialize for speed
for(i in 1:L){
sums[i] <- sum(y[starting[i]:ending[i]], na.rm = T)
naming[i] <- paste0(starting[i],":",ending[i])
}
sums <- sums[sums != 0,,drop = F] # in case there are multiple NAs in a row
data.frame(specialSum = sums, row.names = naming)
}
specialSum(df$y)
# specialSum
#2:5 10
#7:10 30
EDIT#2:
NAs <- which(is.na(df$y))
sumlist <- vector("list", length(NAs)-1)
count <- 0
for(i in 1:nrow(df)){
if(i %in% NAs){
count = count + 1
} else {
sumlist[[count]] <- append(sumlist[[count]], df$y[i])
}
}
data.frame(specialSum = unlist(lapply(sumlist, sum))) # less pretty output
# specialSum
#1 10
#2 30
Please forgive me if I missed an answer to such a simple question.
I want to use cbind() to bind two columns. One of them is a single entry shorter in length.
Can I have R supply an NA for the missing value?
The documentation discusses a deparse.level argument but this doesn't seem to be my solution.
Further, if I may be so bold, would there also be a quick way to prepend the shorter column with NA's?
Try this:
x <- c(1:5)
y <- c(4:1)
length(y) = length(x)
cbind(x,y)
x y
[1,] 1 4
[2,] 2 3
[3,] 3 2
[4,] 4 1
[5,] 5 NA
or this:
x <- c(4:1)
y <- c(1:5)
length(x) = length(y)
cbind(x,y)
x y
[1,] 4 1
[2,] 3 2
[3,] 2 3
[4,] 1 4
[5,] NA 5
I think this will do something similar to what DWin suggested and work regardless of which vector is shorter:
x <- c(4:1)
y <- c(1:5)
lengths <- max(c(length(x), length(y)))
length(x) <- lengths
length(y) <- lengths
cbind(x,y)
The code above can also be condensed to:
x <- c(4:1)
y <- c(1:5)
length(x) <- length(y) <- max(c(length(x), length(y)))
cbind(x,y)
EDIT
Here is what I came up with to address the question:
"Further, if I may be so bold, would there also be a quick way to prepend the shorter column with NA's?"
inserted into the original post by Matt O'Brien.
x <- c(4:1)
y <- c(1:5)
first <- 1 # 1 means add NA to top of shorter vector
# 0 means add NA to bottom of shorter vector
if(length(x)<length(y)) {
if(first==1) x = c(rep(NA, length(y)-length(x)),x);y=y
if(first==0) x = c(x,rep(NA, length(y)-length(x)));y=y
}
if(length(y)<length(x)) {
if(first==1) y = c(rep(NA, length(x)-length(y)),y);x=x
if(first==0) y = c(y,rep(NA, length(x)-length(y)));x=x
}
cbind(x,y)
# x y
# [1,] NA 1
# [2,] 4 2
# [3,] 3 3
# [4,] 2 4
# [5,] 1 5
Here is a function:
x <- c(4:1)
y <- c(1:5)
first <- 1 # 1 means add NA to top of shorter vector
# 0 means add NA to bottom of shorter vector
my.cbind <- function(x,y,first) {
if(length(x)<length(y)) {
if(first==1) x = c(rep(NA, length(y)-length(x)),x);y=y
if(first==0) x = c(x,rep(NA, length(y)-length(x)));y=y
}
if(length(y)<length(x)) {
if(first==1) y = c(rep(NA, length(x)-length(y)),y);x=x
if(first==0) y = c(y,rep(NA, length(x)-length(y)));x=x
}
return(cbind(x,y))
}
my.cbind(x,y,first)
my.cbind(c(1:5),c(4:1),1)
my.cbind(c(1:5),c(4:1),0)
my.cbind(c(1:4),c(5:1),1)
my.cbind(c(1:4),c(5:1),0)
my.cbind(c(1:5),c(5:1),1)
my.cbind(c(1:5),c(5:1),0)
This version allows you to cbind two vectors of different mode:
x <- c(4:1)
y <- letters[1:5]
first <- 1 # 1 means add NA to top of shorter vector
# 0 means add NA to bottom of shorter vector
my.cbind <- function(x,y,first) {
if(length(x)<length(y)) {
if(first==1) x = c(rep(NA, length(y)-length(x)),x);y=y
if(first==0) x = c(x,rep(NA, length(y)-length(x)));y=y
}
if(length(y)<length(x)) {
if(first==1) y = c(rep(NA, length(x)-length(y)),y);x=x
if(first==0) y = c(y,rep(NA, length(x)-length(y)));x=x
}
x <- as.data.frame(x)
y <- as.data.frame(y)
return(data.frame(x,y))
}
my.cbind(x,y,first)
# x y
# 1 NA a
# 2 4 b
# 3 3 c
# 4 2 d
# 5 1 e
my.cbind(c(1:5),letters[1:4],1)
my.cbind(c(1:5),letters[1:4],0)
my.cbind(c(1:4),letters[1:5],1)
my.cbind(c(1:4),letters[1:5],0)
my.cbind(c(1:5),letters[1:5],1)
my.cbind(c(1:5),letters[1:5],0)
A while back I had put together a function called Cbind that was meant to do this sort of thing. In its current form, it should be able to handle vectors, data.frames, and matrices as the input.
For now, the function is here: https://gist.github.com/mrdwab/6789277
Here is how one would use the function:
x <- 1:5
y <- letters[1:4]
z <- matrix(1:4, ncol = 2, dimnames = list(NULL, c("a", "b")))
Cbind(x, y, z)
# x y z_a z_b
# 1 1 a 1 3
# 2 2 b 2 4
# 3 3 c NA NA
# 4 4 d NA NA
# 5 5 <NA> NA NA
Cbind(x, y, z, first = FALSE)
# x y z_a z_b
# 1 1 <NA> NA NA
# 2 2 a NA NA
# 3 3 b NA NA
# 4 4 c 1 3
# 5 5 d 2 4
The two three functions required are padNA, dotnames, and Cbind, which are defined as follows:
padNA <- function (mydata, rowsneeded, first = TRUE) {
## Pads vectors, data.frames, or matrices with NA
temp1 = colnames(mydata)
rowsneeded = rowsneeded - nrow(mydata)
temp2 = setNames(
data.frame(matrix(rep(NA, length(temp1) * rowsneeded),
ncol = length(temp1))), temp1)
if (isTRUE(first)) rbind(mydata, temp2)
else rbind(temp2, mydata)
}
dotnames <- function(...) {
## Gets the names of the objects passed through ...
vnames <- as.list(substitute(list(...)))[-1L]
vnames <- unlist(lapply(vnames,deparse), FALSE, FALSE)
vnames
}
Cbind <- function(..., first = TRUE) {
## cbinds vectors, data.frames, and matrices together
Names <- dotnames(...)
datalist <- setNames(list(...), Names)
nrows <- max(sapply(datalist, function(x)
ifelse(is.null(dim(x)), length(x), nrow(x))))
datalist <- lapply(seq_along(datalist), function(x) {
z <- datalist[[x]]
if (is.null(dim(z))) {
z <- setNames(data.frame(z), Names[x])
} else {
if (is.null(colnames(z))) {
colnames(z) <- paste(Names[x], sequence(ncol(z)), sep = "_")
} else {
colnames(z) <- paste(Names[x], colnames(z), sep = "_")
}
}
padNA(z, rowsneeded = nrows, first = first)
})
do.call(cbind, datalist)
}
Part of the reason I stopped working on the function was that the gdata package already has a function called cbindX that handles cbinding data.frames and matrices with different numbers of rows. It will not work directly on vectors, so you need to convert them to data.frames first.
library(gdata)
cbindX(data.frame(x), data.frame(y), z)
# x y a b
# 1 1 a 1 3
# 2 2 b 2 4
# 3 3 c NA NA
# 4 4 d NA NA
# 5 5 <NA> NA NA