Airlines_Dataset <- function(Number_Of_Rows){
#Departure Date
day.start <- "2009/01/01"
day.end <- "2014/04/25"
Departure_Date <- function(day.start,day.end,size) { //Generates Random dates
dayseq <- seq.Date(as.Date(day.start),as.Date(day.end),by="day")
dayselect <- sample(dayseq,size,replace=TRUE)
Date <- dayselect
}
Date <- data.frame(Departure_Date(day.start,day.end,size=Number_Of_Rows))
return(Date)
#Origin
Origin_Func <- function(Number_Of_Rows,...){
Origin <- (sample(c('IND','ISP','JAN','JAX','IND','ISP',
'LAS','LAX','LBB','LIT','MAF','MCI','MCO','MDW','MHT',
'MST','MCO','OAK','OAC','OMA','OMT','ORF','PBI','PDX',
'PHL','PHX','PIT','PVD','RDU','RNO','RSW','SAN','SAT',
'SDF','SEA','SFO','SJC','SLC','SMF','SNA','STL','TPA',
'TUL','TUS','ABQ','IAD'),Number_Of_Rows, replace=TRUE))
Origin <- data.frame(Origin)
/*Given below is source code of origin*/
Origin_Exec <- system.time((sample(c('IND','ISP','JAN','JAX','IND','ISP',
'LAS','LAX','LBB','LIT','MAF','MCI','MCO','MDW','MHT',
'MST','MCO','OAK','OAC','OMA','OMT','ORF','PBI','PDX',
'PHL','PHX','PIT','PVD','RDU','RNO','RSW','SAN','SAT',
'SDF','SEA','SFO','SJC','SLC','SMF','SNA','STL','TPA',
'TUL','TUS','ABQ','IAD'),Number_Of_Rows, replace=TRUE)))
}
/*Some static origin codes that generates random strings from the list given above*/
return(Origin)
}
am calling the function as given below
Airlines_Result <- Airlines_Dataset(1000)
When I call the above functions only one function is invoked that is date and tine origin is not invoked.
Final Step is to combine the data frames
Airlines_Data <- data.frame(Origin,Date)
Anyone please help to execute the code in proper format I want origin and date to be present in single function call
I reordered your code and added some lines. You do not need to have nested functions in your case. To call a function within a function write the inner functions separately. The outer function call all other functions:
Departure_Date <- function(day.start,day.end,size)
{
dayseq <- seq.Date(as.Date(day.start),as.Date(day.end),by="day")
dayselect <- sample(dayseq,size,replace=TRUE)
Date <- dayselect
return(Date)
}
#Origin
Origin_Func <- function(Number_Of_Rows)
{
Origin <- (sample(c('IND','ISP','JAN','JAX','IND','ISP',
'LAS','LAX','LBB','LIT','MAF','MCI','MCO','MDW','MHT',
'MST','MCO','OAK','OAC','OMA','OMT','ORF','PBI','PDX',
'PHL','PHX','PIT','PVD','RDU','RNO','RSW','SAN','SAT',
'SDF','SEA','SFO','SJC','SLC','SMF','SNA','STL','TPA',
'TUL','TUS','ABQ','IAD'),Number_Of_Rows, replace=TRUE))
return(Origin)
}
Airlines_Dataset <- function(day.start ="2009/01/01", day.end = "2014/04/25", Number_Of_Rows=1000)
{
Data <- data.frame(Departure_Date(day.start,day.end,Number_Of_Rows),Origin_Func(Number_Of_Rows))
names(Data) = c("Date", "Origin")
return(Data)
}
Airlines_Result = Airlines_Dataset()
Related
I'm struggling with determining the syntax to compare the value of a variable that contains a function as part of a conditional statement.
I've written the following function:
cv_func <- function(df, method, target, nFolds=5, seedVal=100, metrics_list=c("ACC","TPR","PRECISION","F1"), l=0.3, m=0.2, n=500, h='a', kernal='rbfdot', c=1, i=TRUE, f=TRUE, k=1, x=TRUE)
{
# create folds using the assigned values
set.seed(seedVal)
folds = createFolds(df[,target],nFolds)
# lapply loop
cv_results <- lapply(folds, function(x)
{
# data preparation:
test_target <- df[x,target]
test_input <- df[x,-target]
train_target <- df[-x,target]
train_input <- df[-x,-target]
if (method==MLP) {
pred_model <- method(train_target~., data=train_input, l=l, m=m, n=n, h=h)
}
else if (method==ksvm) {
pred_model <- method(train_target~., data=train_input, kernal=kernal, C=c)
}
else if (method==IBk) {
pred_model <- method(train_target~., data=train_input, control = Weka_control(I=i, K=k, F=f, X=x))
}
else {
pred_model <- method(train_target~., data=train_input)
}
pred_train <- predict(pred_model, train_input)
return(mmetric(train_target, pred_train, metrics_list))
})
# convert a list to a data frame using as.data.frame and convert this data frame to a matrix before using rowSds()
cv_results_m <- as.matrix(as.data.frame(cv_results))
cv_mean<- as.matrix(rowMeans(cv_results_m))
cv_sd <- as.matrix(rowSds(cv_results_m))
colnames(cv_mean) <- "Mean"
colnames(cv_sd) <- "Sd"
# Combine and show cv_results and Means and Sds
cv_all <- cbind(cv_results_m, cv_mean, cv_sd)
kable(t(cv_all),digits=3)
}
When I attempt to run the function with default parameters, I get an error:
cv_func(df=df, method=IBk, target=20)
Error: "Error in method == "MLP" : comparison (1) is possible only for atomic and list types"
Any thoughts on whether I can use a variable containing a function as part of a conditional in R?
The issue is related to the argument type. It seems like a string is needed as input and as it is a function, we can get the value of the function with get wrapped around the string. It may be better to have a default method for the last else
cv_func <- function(df=df, target=20, nFolds=5, seedVal=100, method, metrics_list=c("ACC","TPR","PRECISION","F1"), l=0.3, m=0.2, n=500, h='a', kernal='rbfdot', c=1, i=TRUE, f=TRUE, k=1, x=TRUE)
{
# create folds using the assigned values
set.seed(seedVal)
folds = createFolds(df[,target],nFolds)
# lapply loop
cv_results <- lapply(folds, function(x)
{
# data preparation:
test_target <- df[x,target]
test_input <- df[x,-target]
train_target <- df[-x,target]
train_input <- df[-x,-target]
if (method=="MLP") {
pred_model <- get(method)(train_target~., data=train_input, l=l, m=m, n=n, h=h)
}
else if (method=="ksvm") {
pred_model <- get(method)(train_target~., data=train_input, kernal=kernal, C=c)
}
else if (method=="IBk") {
pred_model <- get(method)(train_target~., data=train_input, control = Weka_control(I=i, K=k, F=f, X=x))
}
else {
pred_model <- get(method)(train_target~., data=train_input)
}
pred_train <- predict(pred_model, train_input)
return(mmetric(train_target, pred_train, metrics_list))
})
# convert a list to a data frame using as.data.frame and convert this data frame to a matrix before using rowSds()
cv_results_m <- as.matrix(as.data.frame(cv_results))
cv_mean<- as.matrix(rowMeans(cv_results_m))
cv_sd <- as.matrix(rowSds(cv_results_m))
colnames(cv_mean) <- "Mean"
colnames(cv_sd) <- "Sd"
# Combine and show cv_results and Means and Sds
cv_all <- cbind(cv_results_m, cv_mean, cv_sd)
kable(t(cv_all),digits=3)
}
and then call the function as
cv_func(method="IBk", metrics_list=metrics_list)
readStateData <- function() {
infile <- paste("state",i,".txt",sep="")
state <- readLines(infile,n=1)
statedata <- read.table(infile,header=FALSE,sep=",",skip=1,col.names=c("Rank","City","Population"))
statename <- list(state,statedata)
statename
}
# Start loop
for(i in 1:50) {
readStateData()
# Add function to big.list
big.list[[i]] <- readStateData(statename)
}
The assignment for class is to bring in 50 files, all named state#.txt, get the state via readLines, get the data via read.table, and ultimately put it all into big.list that'll have all of the data through a for loop.
The problem I'm having is calling the function in during the for loop. I get the error:
Error in readStateData(statename) : unused argument (statename)
I'm either not calling in the function properly or I've written the function wrong. Both are likely.
Thank you for your help.
You have different issues here.
Do not refer inside a function to a variable which is defined outside. It means instead of access an outside the function defined i inside the function:
i <- 1
fct <- function() {
a <- i + 1
return(a)
}
fct()
Pass the variable as an argument to the function:
i <- 1
fct <- function(x) {
a <- x + 1
return(a)
}
fct(i)
In your function the return statement is missing. See point 1 the last command in the functions. Without a return statement the last written variable is on the stack and is "returned" by the function. This is not the clean way to return a value.
Ergo your code should look like this
readStateData <- function(x) {
infile <- paste("state",x,".txt",sep="")
state <- readLines(infile,n=1)
statedata <-read.table(infile,header=FALSE,sep=",",skip=1,col.names=c("Rank","City","Population"))
statename <- list(state,statedata)
return(statename)
}
# Start loop
for(i in 1:50) {
j <- readStateData(i)
# Add function to big.list
big.list[[i]] <- j
}
If your files are all of the pattern: state[number].txt you can simplify your code to:
# Get all files with pattern state*.txt
fls <- dir(pattern='state.*txt')
readStateData <- function(x) {
state <- readLines(x, n=1)
statedata <-read.table(x, header=FALSE,sep=",",skip=1,col.names=c("Rank","City","Population"))
statename <- list(state,statedata)
return(statename)
}
# Start loop
for(i in 1:length(fls)) {
j <- readStateData(fls[i])
# Add function to big.list
big.list[[i]] <- j
}
First, my code works perfectly. I simply need to be able to call the year and seasonal components out of BestSolarData using $ with:
BestSolarData$year
BestSolarData$seasonal
I have these written at the end of my code. The year I know comes from BestYear and seasonal come from BestData in the ForLoopSine function.
Any help to be able to access the components using $?
SineFit <- function (ToBeFitted)
{
msvector <- as.vector(ToBeFitted)
y <- length(ToBeFitted)
x <- 1:y
MS.nls <- nls(msvector ~ a*sin(((2*pi)/12)*x+b)+c, start=list(a=300, b=0, c=600))
summary(MS.nls)
MScoef <- coef(MS.nls)
a <- MScoef[1]
b <- MScoef[2]
c <- MScoef[3]
x <- 1:12
FittedCurve <- a*sin(((2*pi)/12)*x+b)+c
#dev.new()
#layout(1:2)
#plot(ToBeFitted)
#plot(FittedCurve)
return (FittedCurve)
}
ForLoopSine <- function(PastData, ComparisonData)
{
w<-start(PastData)[1]
t<-end(PastData)[1]
BestDiff <- 9999
for(i in w:t)
{
DataWindow <- window(PastData, start=c(i,1), end=c(t,12))
Datapredict <- SineFit(DataWindow)
CurrDiff <- norm1diff(Datapredict, ComparisonData)
if (CurrDiff < BestDiff)
{
BestDiff <- CurrDiff
BestYear <- i
BestData <- Datapredict
}
}
print(BestDiff)
print(BestYear)
return(BestData)
}
RandomFunction <- function(PastData, SeasonalData)
{
w <- start(PastData)[1]
t <- end(PastData)[1]
Seasonal.ts <- ts(SeasonalData, st = c(w,1), end = c(t,12), fr = 12)
Random <- PastData-Seasonal.ts
layout(1:3)
plot(SeasonalData)
plot(Seasonal.ts)
plot(Random)
return(Random)
}
BestSolarData <- ForLoopSine(MonthlySolarPre2015, MonthlySolar2015)
RandomComp <- RandomFunction (MonthlySolarPre2015, BestSolarData)
acf(RandomComp)
BestSolarData$year
BestSolarData$seasonal
As far as I understand your problem, you would like to retrieve the year component of BestSolarData with BestSolarData$year. But BestSolarData is returned by ForLoopSine, which is itself named DataPredict and is returned the SineFit function. It seems to be a vector and not a data.frame, so $ cannot work here.
Your example is not reproducible and this may help you find a solution. See this post for more details.
Q: How do I subset a subset of a changing variable inside a function or loop?
Assume I have the following code for determining regression stats for multiple data sets :
dat1 <- data.frame(col1=1:5,col2=6:10)
dat2 <- data.frame(col1=11:15,col2=16:20)
func <- function(data,col.no){
get(data)[,col.no]
}
for(i in c('dat1','dat2')) {
mod.name <- paste0('fit.',i)
assign(mod.name,lm(get(i)[,1]~get(i)[,2]),envir = .GlobalEnv)
}
mod.pvals <- NULL
p.func <- function(attr.name) {
for(i in c('dat1','dat2')) {
mod.name <- paste0('fit.',i)
p.val <- summary(get(mod.name))[attr.name]
mod.pvals <- c(mod.pvals,p.val)
}
mod.pvals
}
r.vals <- p.func('r.squared')
adj.r.vals <- p.func('adj.r.squared')
coef.vals <- p.func('coefficients')
This works just fine with 'r.squared','adj.r.squared',etc.
But I want to access the p-value of the model in the same function.
Outside of a function I'd choose:
summary(fit)$coefficients[2,4]
But how do I do this inside of the function??
I unsuccessfully tried:
summary(get(mod.name))['coefficients'][2,4]
Error in summary(get(mod.name))["coefficients"][[2, 4]] :
incorrect number of subscripts
So then I thought about just changing my code for p.val in the function above:
p.val <- paste0('summary(',mod.name ,')$',attr.name)
get(p.val)
But when I run the code I get the following error:
p.vals <- p.func('coefficients[2,4]')
Error in get(p.val) :
object 'summary(fit.dat1)$coefficients[2,4]' not found
I guess get() doesn't work like this. Is there a function that I can replace get() with?
Other thoughts on how I could make this work??
One solution is to use eval() and parse():
p.val <- eval(parse(text=paste0("summary(",mod.name,")[",paste0(attr.name),"]")))
So the function would be:
mod.pvals <- NULL
p.func <- function(attr.name) {
for(i in c('dat1','dat2')) {
mod.name <- paste0('fit.',i)
p.val <- eval(parse(text=paste0("summary(",mod.name,")[",attr.name,"]")))
mod.pvals <- c(mod.pvals,p.val)
}
mod.pvals
}
r.vals <- p.func(attr.name = '\'r.squared\'')
p.vals <- p.func(attr.name = '[\'coefficients\']][2,4')
I have a series of three nested functions I am trying to run. I would like to specify the default arguments of the inner-most function as elements within a list-object that is passed fro the outer-most function. Since this is a little hard to explain I have created a reproducible example that uses the same arguments and objects as my original code. Please forgive the amount of code but I wanted to make this example as close to the original as possible.
The function CreateBirthDates is causing the issue. The four arguments are all elements within the list-object sim (e.g., sim$agents$input). I call sim to the first function, wrapper, and then again to the second function, UpdateAgentStates. Within UpdateAgentStates I would like to modify sim$agents$input using the other objects within sim (e.g., sim$agents$birth_day). Since these other arguments are always the same I would like to "hard-wire" them. But if I run the wrapper function, CreateBirthDates does not recognize sim and therefore cannot specify the default arguments.
I created alternate versions of CreateBirthDates:
CreateBirthDates_with_sim. This includes sim as an argument. Running this function with the wrapper function works!
This seems like a "this-is-the-way-R-works" issue but I don't fully understand why. I would like to improve my basic programming skills so any suggestion or comments would be most appreciated!
Thank you very much,
Javan
See code below:
# Create some example data and load the package lubridate -----
library(lubridate)
t1 <- list("agents"=list("input"=data.frame(id=seq(1:5),class=rep("Male",5),age=rep(6,5))),
"pars"=list("global"=list("input_age_period"="years",
"birth_day"="01Sep",
"sim_start"=as.POSIXct("2000-10-01"))))
# Specify the original functions -------
wrapper <- function(sim,use_sim=FALSE){
if(use_sim==FALSE){
UpdateAgentStates(agent_states = NULL,sim=sim,init=TRUE)
} else {
UpdateAgentStates_with_sim(agent_states = NULL,sim=sim,init=TRUE)
}
}
UpdateAgentStates <- function(agent_states = NULL,
sim = sim,
init = FALSE
) {
if (init == TRUE) {
input <- sim$agents$input
input <- CreateBirthDate(input)
sim$input <- input
return(sim)
}
}
UpdateAgentStates_with_sim <- function(agent_states = NULL,
sim = sim,
init = FALSE
) {
if (init == TRUE) {
input <- sim$agents$input
input <- CreateBirthDate_with_sim(input, sim=sim)
sim$input <- input
return(sim)
}
}
CreateBirthDate <-
function(input = sim$agents$input,
input_age_period = sim$pars$global$input_age_period,
birth_day = sim$pars$global$birth_day,
starting_day = sim$pars$global$sim_start
){
# Only proceed if there is no birth_date column
if(is.null(input$birth_date)){
# Loop through each row in the input
for(a in 1:nrow(input)){
# Is the age_period a year?
if(input_age_period == "year" || input_age_period == "years") {
# Determine the first sim_start date after the birth_day
one_year <- as.period(1, "year")
s0 <- as.Date(starting_day - (one_year*input$age[a]))
# Set the format of the birth_day
birth_day_format <- guess_formats(birth_day,"dm")
birth_day_format <- paste(birth_day_format,"%Y",sep="")
# Determine the first birth_day after s0
s1 <- as.Date(paste(birth_day,year(s0),sep=""), format=birth_day_format)
if(length(s1)>1){
s1 <- s1[-(which(is.na(s1)))]
}
if(s0 >= s1) {
input$birth_date[a] <- as.character(s1)
} else {
input$birth_date[a] <- as.character(s1-one_year)
}
} else {
# If age period is not a year
age_period_unit <- as.period(1, input_age_period)
input$birth_date[a] <- as.character(starting_day -
(age_period_unit*input$age[a]))
}
}
}
# Convert birth_date to a POSIXct object
# input$birth_date <- as.POSIXct(input$birth_date, tz =
# tz(sim$pars$global$sim_start))
return(input)
}
# Specify the modified functions -------
CreateBirthDate_with_sim <-
function(input = sim$agents$input,
input_age_period = sim$pars$global$input_age_period,
birth_day = sim$pars$global$birth_day,
starting_day = sim$pars$global$sim_start, sim=sim
){
# Only proceed if there is no birth_date column
if(is.null(input$birth_date)){
# Loop through each row in the input
for(a in 1:nrow(input)){
# Is the age_period a year?
if(input_age_period == "year" || input_age_period == "years") {
# Determine the first sim_start date after the birth_day
one_year <- as.period(1, "year")
s0 <- as.Date(starting_day - (one_year*input$age[a]))
# Set the format of the birth_day
birth_day_format <- guess_formats(birth_day,"dm")
birth_day_format <- paste(birth_day_format,"%Y",sep="")
# Determine the first birth_day after s0
s1 <- as.Date(paste(birth_day,year(s0),sep=""), format=birth_day_format)
if(length(s1)>1){
s1 <- s1[-(which(is.na(s1)))]
}
if(s0 >= s1) {
input$birth_date[a] <- as.character(s1)
} else {
input$birth_date[a] <- as.character(s1-one_year)
}
} else {
# If age period is not a year
age_period_unit <- as.period(1, input_age_period)
input$birth_date[a] <- as.character(starting_day -
(age_period_unit*input$age[a]))
}
}
}
# Convert birth_date to a POSIXct object
# input$birth_date <- as.POSIXct(input$birth_date, tz =
# tz(sim$pars$global$sim_start))
return(input)
}
# Try running the wrapper function -------------
# Original version, doesn't work
wrapper(t1, use_sim = FALSE)
# But if I add an argument for sim to CreateBirthDate
wrapper(t1, use_sim = TRUE)
There is a difference between using a global function within another global function, and defining a function within another function. In this first example the scope for inner_func is the global environment and sim does not exist in the global environment (the only variables that does is t1):
t1 <- 1
inner_func <- function() {
final <- sim*2
return(final)
}
outer_func <- function(sim = w){ inner_func() }
outer_func(t1)
Error in inner_func() : object 'sim' not found
However if we define the inner_func inside of outer_func then you will get the behaviour you are expecting:
t1 <- 1
outer_func <- function(sim = w){
inner_func <- function() {
final <- sim*2
return(final)
}
inner_func()
}
outer_func(t1)
[1] 2