This block runs below, and produces df_all as intended, but when I uncomment the single function at the top (not even apply it here but I do need for other things) and rerun the same block, I get: Error in bind_rows_(x, .id): Argument 1 must be a data frame or a named atomic vector, not a function
library(data.table)
# addxtoy_newy_csv <- function(df) {
# zdf1 <- df %>% filter(Variable == "s44")
# setDT(df)
# setDT(zdf1)
# df[zdf1, Value := Value + i.Value, on=.(tstep, variable, Scenario)]
# setDF(df)
#}
tstep <- rep(c("a", "b", "c", "d", "e"), 5)
Variable <- c(rep(c("v"), 5), rep(c("w"), 5), rep(c("x"), 5), rep(c("y"), 5), rep(c("x"), 5))
Value <- c(1,2,3,4,5,10,11,12,13,14,33,22,44,57,5,3,2,1,2,3,34,24,11,11,7)
Scenario <- c(rep(c("i"), 20), rep(c("j"), 5) )
df1 <- data.frame(tstep, Variable, Value, Scenario)
tstep <- c("a", "b", "c", "d", "e")
Variable <- rep(c("x"), 5)
Value <- c(100, 34, 100,22, 100)
Scenario <- c(rep(c("i"), 5))
df2<- data.frame(tstep, Variable, Value, Scenario)
setDT(df1)
setDT(df2)
df1[df2, Value := Value + i.Value, on=.(tstep, Variable, Scenario)]
setDF(df1)
df_all <- mget(ls(pattern="df*")) %>% bind_rows()
The pattern you use in ls() will match any object with a "d" in its name, so addxtoy_newy_csv gets included in the list of object names. The f* in your pattern means you currently search for "d, followed by zero or more f's". I think a safer pattern to use would be ^df.*, to match objects that start with "df":
df1 = data.frame(x = 1:3)
df2 = data.frame(x = 4:6)
adder = function(x) x + 1
ls(pattern = "df*")
ls(pattern = "^df.*")
Related
I have a data.table on which I would like to perform a linear regression per group, and capture the slope and intercept. I would like the data to have one row per group, and (in addition to the grouping variable(s)) two columns with the slope and intercept from the regression.
I cannot get this to work, how can I do this?
Below I have a reproducible example, with two strategies I used.
dat <- data.table(
x = rnorm(6),
y = rnorm(6, 10),
g = c("A", "A", "A", "B", "B", "B")
)
x <- dat[ , c("intercept", "slope") := as.list(coef(lm(y ~ x))), by = "g"]
y <- dat[ , .(model = .(lm(y ~ x))), by = "g"]
z <- dat[ , .(coef = list(coef(lm(y ~ x)))), by = "g"]
z[ , c("intercept", "slope") := list(map_dbl(coef, 1), map_dbl(coef, 2))]]
In x, I have the correct columns, but all rows are repeated (this makes sense because I use :=).
In y, I have the correct number of rows (one for each group), but I need to extract the intercept and slope later on.
z gives the expected result but feels inefficient.
Is there a way I can do this all in one go?
# without desired colnames
dat[, as.list(coef(lm(y ~ x))), by = g]
# g (Intercept) x
# 1: A 9.567597 -0.25231210
# 2: B 10.373024 0.01000639
# with desired colnames
dat[, .(intercept = coef(lm(y~x))[1],
slope = coef(lm(y~x))[2]), by = g]
# g intercept slope
# 1: A 9.567597 -0.25231210
# 2: B 10.373024 0.01000639
reproducable sample data
set.seed(123)
dat <- data.table(
x = rnorm(6),
y = rnorm(6, 10),
g = c("A", "A", "A", "B", "B", "B")
)
Lets say i have the following data:
> data.frame(value = 1:2, name = c("a", "b"))
value name
1 1 a
2 2 b
Goal:
Can i give it as Input to the pipe Operator and "send" it to setNames (or magrittr::set_names)?
What i have tried:
library(magrittr)
data.frame(value = 1:2, name = c("a", "b")) %>%
setNames(object = .$value, nm = .$name)
That doesnt work i guess, because the pipe wants to Hand over the whole data.frame and use it as a first Argument. That got me interested if i can skip this behaviour and use two subsets instead.
(So that data.frame(value = 1:2, name = c("a", "b")) %>% is fixed and not replaced by a variable).
Desired Output:
How it would look like without the pipe Operator:
> a <- data.frame(value = 1:2, name = c("a", "b"))
> setNames(object = a$value, nm = a$name)
a b
1 2
For this case, we can simply wrap it inside {}
library(dplyr)
data.frame(value = 1:2, name = c("a", "b")) %>%
{ setNames(object = .$value, nm = .$name)}
With tidyverse, there is also a deframe which will give a named vector
library(tibble)
data.frame(value = 1:2, name = c("a", "b")) %>%
select(2:1) %>%
deframe
#a b
#1 2
How to join 2 columns from a single data.frame
For example:
Column A : a,b,c,d,e
Column B : b,c,a,b,e
The column i want
New Column : a,b,c,d,e,b,c,a,b,e
Basically i want to get all data under both columns into a single column
df <- setNames(data.frame(matrix(, nrow = 100, ncol = 2)), c("V1", "V2"))
df$V1 <- "a, b, c, d, e"
df$V2 <- "b, c, a, b, e"
df$V3 <- paste(df$V1, df$V2, sep = ", ")
Hope this helps.
Using base R you could just copy the data.frame to a new object and concatenate the columns A and B using the c() function:
df <- data.frame(
A = c("a", "b", "c", "d", "e"),
B = c("b", "c", "a", "b", "e"),
stringsAsFactors = FALSE
)
df2 <- data.frame(
AB = c(df$A, df$B)
)
Alternatively, you could use a tidyverse approach with the gather() function from the tidyr package. This has the advantage that you can easily include the old column IDs (A or B) from the original data.frame in each row.
library(tidyr)
df_tidy <- df %>%
gather(key = "old_col_id", value = "value", A, B)
I am trying to pivot pairs of key-value variables using tidyr:spread() .
id <- c(0,1,2,3,4,5,6,7,8,9)
key1 <- c("a", "a", "b", "b", "c","a", "a", "b", "b", "c")
val1 <- c(1,2,3,1,2,3,1,2,3,1)
key2 <- c("d",NA,NA,NA,"e","d","d",NA,"b",NA)
val2 <- c(1,NA,NA,NA,2,3,NA,NA,3,NA)
key3 <- c("x",NA,NA,NA,"e","d",NA,NA,NA,NA)
val3 <- c(0,NA,NA,NA,NA,3,1,NA,NA,NA)
df = data.frame(id, key1, val1, key2, val2, key3, val3)
library(tidyr)
c1 <- spread(df, key1, val1, fill = 0, convert = FALSE)
c2 <- spread(c1, key2, val2, fill = 0, convert = FALSE)
c3 <- spread(c2, key3, val3, fill = 0, convert = FALSE)
while running the spread(), i get the following error:
Error in [.data.frame(data, setdiff(names(data), c(key_col,
value_col))) : undefined columns selected
It makes me think that the problem is in the values and not in the variable names as the error implies, any ideas what to look for?
on the same token, is there a more syntax efficient way to spread multiple pair of key-value variables?
You may use Map
library(tidyr)
res <- do.call(cbind,Map(function(x,y) {x1 <- data.frame(x,y)
r1 <- spread(x1, x,y, fill=0, convert=FALSE)
r1[!is.na(names(r1))] },
df[-1][c(TRUE,FALSE)], df[-1][c(FALSE, TRUE)]))
names(res) <- sub('.*\\.', '', names(res))
cbind(df, res)
I am looking for a vector version of ddply.
I would like to do the following:
vector_ddply(frame1, frame2, ..., frameN, c("column1", "column2"), processingFunction);
Here all frames have both "column1" and "column2" and processingFunction takes N parameters.
Note that in my specific case it doesn't make sense to merge the N data frames into one.
The resulting frame would made of the unions of all the keys of the N frames.
Is there a way to achieve this ?
Thanks
Let's start with some sample data:
ll <- list(
f1 = data.frame( x = c("a", "b", "a", "b"), y = c(1,1,2,2), z = rnorm(4), p = 1:4 ),
f2 = data.frame( x = c("a", "b", "a", "b"), y = c(1,1,2,2), z = rnorm(4), q = 1:4 ),
f3 = data.frame( x = c("a", "b", "a", "b"), y = c(1,1,2,2), z = rnorm(4), r = 1:4 )
)
1. Solution: apply data.frame-wise
You want to ddply processingFunction on each data.frame individually, and combine the results to one resulting data.frame:
ldply( ll, ddply, .(x, y), summarise, z = processingFunction(z) )
2. Solution: apply on one rbinded data.frame
You want to apply processingFunction over all rows of the data.frames at once. So then you should just rbind all data.frames together to a large one. Just in case this is not directly possible because the individual frames have not all columns in common, you have to rbind on the common column subset:
commonCols <- Reduce( "intersect", lapply(ll, colnames) )
oneDf <- do.call( "rbind", lapply( ll, "[", commonCols ) )
ddply( oneDf, .(x,y), summarise, z = processingFunction(z) )