Q: How do I subset a subset of a changing variable inside a function or loop?
Assume I have the following code for determining regression stats for multiple data sets :
dat1 <- data.frame(col1=1:5,col2=6:10)
dat2 <- data.frame(col1=11:15,col2=16:20)
func <- function(data,col.no){
get(data)[,col.no]
}
for(i in c('dat1','dat2')) {
mod.name <- paste0('fit.',i)
assign(mod.name,lm(get(i)[,1]~get(i)[,2]),envir = .GlobalEnv)
}
mod.pvals <- NULL
p.func <- function(attr.name) {
for(i in c('dat1','dat2')) {
mod.name <- paste0('fit.',i)
p.val <- summary(get(mod.name))[attr.name]
mod.pvals <- c(mod.pvals,p.val)
}
mod.pvals
}
r.vals <- p.func('r.squared')
adj.r.vals <- p.func('adj.r.squared')
coef.vals <- p.func('coefficients')
This works just fine with 'r.squared','adj.r.squared',etc.
But I want to access the p-value of the model in the same function.
Outside of a function I'd choose:
summary(fit)$coefficients[2,4]
But how do I do this inside of the function??
I unsuccessfully tried:
summary(get(mod.name))['coefficients'][2,4]
Error in summary(get(mod.name))["coefficients"][[2, 4]] :
incorrect number of subscripts
So then I thought about just changing my code for p.val in the function above:
p.val <- paste0('summary(',mod.name ,')$',attr.name)
get(p.val)
But when I run the code I get the following error:
p.vals <- p.func('coefficients[2,4]')
Error in get(p.val) :
object 'summary(fit.dat1)$coefficients[2,4]' not found
I guess get() doesn't work like this. Is there a function that I can replace get() with?
Other thoughts on how I could make this work??
One solution is to use eval() and parse():
p.val <- eval(parse(text=paste0("summary(",mod.name,")[",paste0(attr.name),"]")))
So the function would be:
mod.pvals <- NULL
p.func <- function(attr.name) {
for(i in c('dat1','dat2')) {
mod.name <- paste0('fit.',i)
p.val <- eval(parse(text=paste0("summary(",mod.name,")[",attr.name,"]")))
mod.pvals <- c(mod.pvals,p.val)
}
mod.pvals
}
r.vals <- p.func(attr.name = '\'r.squared\'')
p.vals <- p.func(attr.name = '[\'coefficients\']][2,4')
Related
I'm struggling with determining the syntax to compare the value of a variable that contains a function as part of a conditional statement.
I've written the following function:
cv_func <- function(df, method, target, nFolds=5, seedVal=100, metrics_list=c("ACC","TPR","PRECISION","F1"), l=0.3, m=0.2, n=500, h='a', kernal='rbfdot', c=1, i=TRUE, f=TRUE, k=1, x=TRUE)
{
# create folds using the assigned values
set.seed(seedVal)
folds = createFolds(df[,target],nFolds)
# lapply loop
cv_results <- lapply(folds, function(x)
{
# data preparation:
test_target <- df[x,target]
test_input <- df[x,-target]
train_target <- df[-x,target]
train_input <- df[-x,-target]
if (method==MLP) {
pred_model <- method(train_target~., data=train_input, l=l, m=m, n=n, h=h)
}
else if (method==ksvm) {
pred_model <- method(train_target~., data=train_input, kernal=kernal, C=c)
}
else if (method==IBk) {
pred_model <- method(train_target~., data=train_input, control = Weka_control(I=i, K=k, F=f, X=x))
}
else {
pred_model <- method(train_target~., data=train_input)
}
pred_train <- predict(pred_model, train_input)
return(mmetric(train_target, pred_train, metrics_list))
})
# convert a list to a data frame using as.data.frame and convert this data frame to a matrix before using rowSds()
cv_results_m <- as.matrix(as.data.frame(cv_results))
cv_mean<- as.matrix(rowMeans(cv_results_m))
cv_sd <- as.matrix(rowSds(cv_results_m))
colnames(cv_mean) <- "Mean"
colnames(cv_sd) <- "Sd"
# Combine and show cv_results and Means and Sds
cv_all <- cbind(cv_results_m, cv_mean, cv_sd)
kable(t(cv_all),digits=3)
}
When I attempt to run the function with default parameters, I get an error:
cv_func(df=df, method=IBk, target=20)
Error: "Error in method == "MLP" : comparison (1) is possible only for atomic and list types"
Any thoughts on whether I can use a variable containing a function as part of a conditional in R?
The issue is related to the argument type. It seems like a string is needed as input and as it is a function, we can get the value of the function with get wrapped around the string. It may be better to have a default method for the last else
cv_func <- function(df=df, target=20, nFolds=5, seedVal=100, method, metrics_list=c("ACC","TPR","PRECISION","F1"), l=0.3, m=0.2, n=500, h='a', kernal='rbfdot', c=1, i=TRUE, f=TRUE, k=1, x=TRUE)
{
# create folds using the assigned values
set.seed(seedVal)
folds = createFolds(df[,target],nFolds)
# lapply loop
cv_results <- lapply(folds, function(x)
{
# data preparation:
test_target <- df[x,target]
test_input <- df[x,-target]
train_target <- df[-x,target]
train_input <- df[-x,-target]
if (method=="MLP") {
pred_model <- get(method)(train_target~., data=train_input, l=l, m=m, n=n, h=h)
}
else if (method=="ksvm") {
pred_model <- get(method)(train_target~., data=train_input, kernal=kernal, C=c)
}
else if (method=="IBk") {
pred_model <- get(method)(train_target~., data=train_input, control = Weka_control(I=i, K=k, F=f, X=x))
}
else {
pred_model <- get(method)(train_target~., data=train_input)
}
pred_train <- predict(pred_model, train_input)
return(mmetric(train_target, pred_train, metrics_list))
})
# convert a list to a data frame using as.data.frame and convert this data frame to a matrix before using rowSds()
cv_results_m <- as.matrix(as.data.frame(cv_results))
cv_mean<- as.matrix(rowMeans(cv_results_m))
cv_sd <- as.matrix(rowSds(cv_results_m))
colnames(cv_mean) <- "Mean"
colnames(cv_sd) <- "Sd"
# Combine and show cv_results and Means and Sds
cv_all <- cbind(cv_results_m, cv_mean, cv_sd)
kable(t(cv_all),digits=3)
}
and then call the function as
cv_func(method="IBk", metrics_list=metrics_list)
I'm trying to create a function that runs kmeans clustering on a specific columns within a dataset and returns the cluster membership. The idea is that someone else could say "what would the clustering look like if I used columns x,y, and z".
I'm trying to use the following code. For some reason, the magic_result() won't return anything when I put it into the function.
mydata.test <- data.frame(a = c(1,1,1,2,2,2,3,3,3), b =
c(2,2,2,4,4,4,5,5,5), c = c(1,1,1,6,6,6,4,4,4), d =
c(1,1,1,4,4,4,2,2,2), e = c(14,40,84,14,40,84,14,40,84))
mylist.test <- list(c(1,2),c(2,3),c(1,2,3),c(1,2,5))
magic_free()
my.kmeans.test <- function(myd,myk,myl) {
library(magicfor)
magic_for(print,silent=T)
for(i in myl) {
kmeans <- kmeans(myd[,i],centers=myk,nstart=25)
cl <- kmeans$cluster
print(cl)
}
res <- magic_result()
res.cl <- res$cl
return(res.cl)
}
What I don't understand is that when I try to run this as just a for loop (rather than a function) it works.
library(magicfor)
magic_for(print,silent=T)
for(i in myl) {
kmeans <- kmeans(mydata.test[,i],centers=3,nstart=25)
cl <- kmeans$cluster
print(cl)
}
res <- magic_result()
res.cl <- res$cl
res.cl
I'm guessing there's something funky going on with magicfor. Any idea how to get around this? Anything is appreciated.
Using map from purrr, you can just do
library(purrr)
my.kmeans.test <- function(myd, myk, myl) {
map(myl, function(idx) {
kmeans(myd[, idx], centers=myk, nstart=25)$center
})
}
my.kmeans.test(mydata.test, 3, mylist.test)
readStateData <- function() {
infile <- paste("state",i,".txt",sep="")
state <- readLines(infile,n=1)
statedata <- read.table(infile,header=FALSE,sep=",",skip=1,col.names=c("Rank","City","Population"))
statename <- list(state,statedata)
statename
}
# Start loop
for(i in 1:50) {
readStateData()
# Add function to big.list
big.list[[i]] <- readStateData(statename)
}
The assignment for class is to bring in 50 files, all named state#.txt, get the state via readLines, get the data via read.table, and ultimately put it all into big.list that'll have all of the data through a for loop.
The problem I'm having is calling the function in during the for loop. I get the error:
Error in readStateData(statename) : unused argument (statename)
I'm either not calling in the function properly or I've written the function wrong. Both are likely.
Thank you for your help.
You have different issues here.
Do not refer inside a function to a variable which is defined outside. It means instead of access an outside the function defined i inside the function:
i <- 1
fct <- function() {
a <- i + 1
return(a)
}
fct()
Pass the variable as an argument to the function:
i <- 1
fct <- function(x) {
a <- x + 1
return(a)
}
fct(i)
In your function the return statement is missing. See point 1 the last command in the functions. Without a return statement the last written variable is on the stack and is "returned" by the function. This is not the clean way to return a value.
Ergo your code should look like this
readStateData <- function(x) {
infile <- paste("state",x,".txt",sep="")
state <- readLines(infile,n=1)
statedata <-read.table(infile,header=FALSE,sep=",",skip=1,col.names=c("Rank","City","Population"))
statename <- list(state,statedata)
return(statename)
}
# Start loop
for(i in 1:50) {
j <- readStateData(i)
# Add function to big.list
big.list[[i]] <- j
}
If your files are all of the pattern: state[number].txt you can simplify your code to:
# Get all files with pattern state*.txt
fls <- dir(pattern='state.*txt')
readStateData <- function(x) {
state <- readLines(x, n=1)
statedata <-read.table(x, header=FALSE,sep=",",skip=1,col.names=c("Rank","City","Population"))
statename <- list(state,statedata)
return(statename)
}
# Start loop
for(i in 1:length(fls)) {
j <- readStateData(fls[i])
# Add function to big.list
big.list[[i]] <- j
}
First, my code works perfectly. I simply need to be able to call the year and seasonal components out of BestSolarData using $ with:
BestSolarData$year
BestSolarData$seasonal
I have these written at the end of my code. The year I know comes from BestYear and seasonal come from BestData in the ForLoopSine function.
Any help to be able to access the components using $?
SineFit <- function (ToBeFitted)
{
msvector <- as.vector(ToBeFitted)
y <- length(ToBeFitted)
x <- 1:y
MS.nls <- nls(msvector ~ a*sin(((2*pi)/12)*x+b)+c, start=list(a=300, b=0, c=600))
summary(MS.nls)
MScoef <- coef(MS.nls)
a <- MScoef[1]
b <- MScoef[2]
c <- MScoef[3]
x <- 1:12
FittedCurve <- a*sin(((2*pi)/12)*x+b)+c
#dev.new()
#layout(1:2)
#plot(ToBeFitted)
#plot(FittedCurve)
return (FittedCurve)
}
ForLoopSine <- function(PastData, ComparisonData)
{
w<-start(PastData)[1]
t<-end(PastData)[1]
BestDiff <- 9999
for(i in w:t)
{
DataWindow <- window(PastData, start=c(i,1), end=c(t,12))
Datapredict <- SineFit(DataWindow)
CurrDiff <- norm1diff(Datapredict, ComparisonData)
if (CurrDiff < BestDiff)
{
BestDiff <- CurrDiff
BestYear <- i
BestData <- Datapredict
}
}
print(BestDiff)
print(BestYear)
return(BestData)
}
RandomFunction <- function(PastData, SeasonalData)
{
w <- start(PastData)[1]
t <- end(PastData)[1]
Seasonal.ts <- ts(SeasonalData, st = c(w,1), end = c(t,12), fr = 12)
Random <- PastData-Seasonal.ts
layout(1:3)
plot(SeasonalData)
plot(Seasonal.ts)
plot(Random)
return(Random)
}
BestSolarData <- ForLoopSine(MonthlySolarPre2015, MonthlySolar2015)
RandomComp <- RandomFunction (MonthlySolarPre2015, BestSolarData)
acf(RandomComp)
BestSolarData$year
BestSolarData$seasonal
As far as I understand your problem, you would like to retrieve the year component of BestSolarData with BestSolarData$year. But BestSolarData is returned by ForLoopSine, which is itself named DataPredict and is returned the SineFit function. It seems to be a vector and not a data.frame, so $ cannot work here.
Your example is not reproducible and this may help you find a solution. See this post for more details.
have the following function:
setTypes <- function(df2, ...) {
fns <- as.list(substitute(list(...)))
for(i in 1:length(df2)) {
if(fns[i] == '') {
next
}
df2[i,] <- fns[i](df2[i,])
}
return(df2)
}
want to do this:
test<-setTypes(sls,c('','as.Date','','','as.numeric','as.numeric'))
idea is to change the types of the fields in a data frame without having to do sls$field <- as.numeric(sls$field) for every field.
I had written a function like this that worked:
fn <- function(t) {
return(t("55.55000"))
}
and the output is this:
> fn(as.numeric)
[1] 55.55
however, i can't figure out why either doing variable length argument as a list and calling it as list[index](input) doesn't work. or even passing a vector of functions like c(as.Date, as.numeric, as.character) and doing c[1]('2015-10-10') # as.Date('2015-10-10')
I am receiving the error 'attempt to apply non-function'.. I've also tried using call but to no avail. Help?
The problem is that class(c[1]) is a list use c[[1]] instead
Example code
v <- c(as.numeric,as.character)
v[[1]]("1")
v[[2]](1)
EDIT
Your example should be:
setTypes <- function(df2, ...) {
fns <- list(...)
for(i in 1:NCOL(df2)) {
if(is.function(fns[[i]])) {
df2[,i] <- fns[[i]](df2[,i])
}
}
return(df2)
}
df <- data.frame(v1 = c(1,2), v2 = c("1","2"))
setTypes(df,as.character,'',as.numeric)