R Function Not Returning as Expected - r

I've been working on a function in R, and looking to return a data.frame. However, when I run, it return 0 rows/0 columns. I did add a "print" in the if statement, and that works, so I know it's performing as intended within the loop and if statement.
Here is the function:
predict.next.word <- function(word, ng_matrix){
if(ncol(ng_matrix)==3){
for (i in 1:100){
ngram_df <- data.frame()
first_word <- ng_matrix[,1][i]
second_word <- ng_matrix[,2][i]
count_word <- ng_matrix[,3][i]
if (word[1] == first_word && !is.na(first_word)){
matched_factor <- structure(c(second_word, count_word), .Names = c("predicted", "count"))
matched_df <- as.data.frame(as.list(matched_factor))
ngram_df <- (rbind(ngram_df, matched_df))
ngram_df <- transform(ngram_df, count = as.numeric(count))
print (ngram_df) # this works great, but not intention of function
}
}
return (ngram_df)
}
}
Here is a sample of when I call it:
test_bigram_word <- c("a")
predict.next.word(test_bigram_word, bigram_index)

Solved it by moving the initializing of the dataframe above the for loop, like this:
predict.next.word <- function(word, ng_matrix){
if(ncol(ng_matrix)==3){
ngram_df <- data.frame()
for (i in 1:100){
first_word <- ng_matrix[,1][i]
second_word <- ng_matrix[,2][i]
count_word <- ng_matrix[,3][i]
if (word[1] == first_word && !is.na(first_word)){
matched_factor <- structure(c(second_word, count_word), .Names = c("predicted", "count"))
matched_df <- as.data.frame(as.list(matched_factor))
ngram_df <- (rbind(ngram_df, matched_df))
ngram_df <- transform(ngram_df, count = as.numeric(count))
print (ngram_df) # this works great, but not intention of function
}
}
return (ngram_df)
}
}

Related

function generating NA in R

I need help, my function not work correctly when i try make any sum with the results.
have a lot of NA values and I don't know why.
Craps <- function(jogadas){
for (i in 1:jogadas){
comeOut <- sample(1:6,1)+ sample(1:6,1)
if(comeOut %in% c(2,3,12)){
result <- F #False
}else if(comeOut %in% c(7,11)){
}
else{
dados <- sample(1:6,1) + sample(1:6,1)
if(dados == 7){
result <- F
}else {
result <- T
}
while (!(dados %in% c(7,comeOut))){
dados <- sample(1:6,1)+ sample(1:6,1)
}
if(dados == 7)
result <- F
else result <- T
}
print(result)
#probability
prob<-NULL
prob[i] <- result
prob2<-sum(prob)/jogadas
print(prob2)
}
}
Craps(1000)
You put prob=NULL inside your loop, so it will become NULL at each iteration of the loop, just create prob before the loop. Also you forgot one line as noticed in the comments :
Craps <- function(jogadas){
prob<-NULL
for (i in 1:jogadas){
comeOut <- sample(1:6,1)+ sample(1:6,1)
if(comeOut %in% c(2,3,12)){
result <- F #False
}else if(comeOut %in% c(7,11)){
result <- T
}
else{
dados <- sample(1:6,1) + sample(1:6,1)
if(dados == 7){
result <- F
}else {
result <- T
}
while (!(dados %in% c(7,comeOut))){
dados <- sample(1:6,1)+ sample(1:6,1)
}
if(dados == 7)
result <- F
else result <- T
}
print(result)
#probability
prob[i] <- result
prob2<-sum(prob)/jogadas
print(prob2)
}
}

Combining 3 functions into one function

I am trying to set up a function that checks the data then runs the appropriate function.
I have tried to move tbl1 and tbl2 into TBL.Fun. It won't run.
TBL.fun <- function (x,y){
if(length(y)==1) tbl1(x[,y])
else if(length(y)==2) tbl2(x[,y[1]],x[,y[2]])
else print("Only two columns of data, kiddo!")
}
tbl1 <- function(x){
tbl <- ftable(x)
ptbl<- round(prop.table(tbl)*100,2)
out <- tbl
out[] <- paste(tbl,"(",ptbl,"%)")
return(out)
}
tbl2 <- function(x,y){
tbl <- ftable(x,y)
ptbl<- round(prop.table(tbl)*100,2)
out <- tbl
out[] <- paste(tbl,"(",ptbl,"%)")
return(out)
}
I want the TBL.fun to check the data and based on that check, compute and print the correct table. After I combined the functions into
TBL.fun1 <- function (x,y=NULL){
if(is.vector(x)==T && is.null(y)==T) tbl1(x)
else tbl2(x,y)
tbl1 <- function(x){
tbl <- ftable(x)
ptbl<- round(prop.table(tbl)*100,2)
out <- tbl
out[] <- paste(tbl,"(",ptbl,"%)")
return(out)
}
tbl2 <- function(x,y){
tbl <- ftable(x,y)
ptbl<- round(prop.table(tbl)*100,2)
out <- tbl
out[] <- paste(tbl,"(",ptbl,"%)")
return(out)
}
}
After combining the functions i ran a dput() on the function with a single variable.
Gender <- c("F","F","F","M","M","M")
Race <- c("Black","White","Asian","White","Black","Black")
> sam_dat <- cbind(Gender,Race)
dput(TBL.fun1(sam_dat[,1]))
function (x, y)
{
tbl <- ftable(x, y)
ptbl <- round(prop.table(tbl) * 100, 2)
out <- tbl
out[] <- paste(tbl, "(", ptbl, "%)")
return(out)
}
> TBL.fun1(sam_dat[,1])
You dont have to include all functions in TBL.fun1, you just call them, depending on the condition.
You can also simplify the condition as is.vector and is.null already return logical values, so you dont have to test for == TRUE.
I inserted 2 print statements, so you can see that both functions are called.
TBL.fun1 <- function (x, y = NULL){
if (is.vector(x) && is.null(y)) {
print("used tbl1")
tbl1(x)
} else {
print("used tbl2")
tbl2(x, y)
}
}
Gender <- c("F","F","F","M","M","M")
Race <- c("Black","White","Asian","White","Black","Black")
sam_dat <- cbind(Gender,Race)
a = TBL.fun1(sam_dat[,1])
b = TBL.fun1(sam_dat[,2], sam_dat[,1])

R - lapply on deep nested elements of list

I have a large list that stored measurements (a product of other lapply() runs). I now want to gather these measurements and calculate median/mean/sd etc but I don't know how to access them. The structure of this list is like this:
foo[[i]][[j]][[k]][[1]]
foo[[i]][[j]][[k]][[2]]$bar
I can't figure out a function that would return e.g. mean of $bar (but not of $x) and keep relation the values of the indices i,j,k.
A sample list can be generated with the following R code:
library(purrr)
metrics <- function(y){
tt10r <- median(y)
list(y, flatten(list(bar = tt10r)))
}
example_list <- list()
for (i in 1:10)
{
v <- list()
for (j in 1:10)
{
w <- 1:10
v[j] <- list(w)
}
example_list[[i]] <- v
}
foo <- list()
for (i in 1:length(example_list))
{
u <- list()
values <- list()
for (j in 1:length(example_list[[i]]))
{
u[[j]] <- lapply(example_list[[i]][[j]], function(x) mean(x))
values[[j]] <- lapply(u[[j]], function(x) metrics(x))
}
foo[[i]] <- values
}
The following code works nicely, but I am not sure if it is efficient (loops!). Gives the anticipated result:
final <- matrix(nrow = tail(cumsum(unlist(lapply(foo, function(x) lengths(x) -2))), n=1), ncol = 3)
final <- data.frame(final)
j=1
i=1
all_js <- c(0, cumsum(lengths(foo)))
starts <- c(0, cumsum(unlist(lapply(foo, function(x) lengths(x) -2)))) + 1
ends <- c(0, cumsum(unlist(lapply(foo, function(x) lengths(x) -2))))
for (i in 1:length(foo))
{
a <- foo[[i]]
for (j in 1:length(a))
{
b <- a[[j]]
data <- unlist(lapply(lapply(b[1], '[', 2), '[[', 1))
for (k in 2:c(length(b)-2))
{
data <- rbind(data,unlist(lapply(lapply(b[k], '[', 2), '[[', 1)))
}
row.names(data) <- NULL
colnames(final) <- c("i", "j", colnames(data))
first <- starts[all_js[i] + j]
last <- ends[all_js[i] + j+1]
final[first:last,] <- data.frame(cbind(i = i, j = j, data))
}
}

Write error in r using trycatch

I have used trycatch so that if error happened during execution of r code it will not break.
I wanted to write error in one of the file , but not sure how it can be done.
Below is code used
library(forecast)
library(data.table)
library(RODBC)
forecast_data <- data.frame(Project_ID=character(),
Period_End=character(),
Point_Forecast=character(),
Lower_Limit_95=character(),
Upper_Limit_95=character(),
stringsAsFactors=FALSE)
Data <- read.csv("Data.csv", header=TRUE,na.strings=c("NULL",""))[ ,c('Project_ID', 'Period_End_Date', 'Overall_Backlog_Processing_Efficiency_Incident')]
result = tryCatch({
if (nrow(Data) >= 1)
{
backlog <- as.vector(Data$Overall_Backlog_Processing_Efficiency_Incident)
i <- 1
datalist = list()
for (i in 1:8) {
backlogts <- tbats(backlog)
fc2 <- forecast(backlogts, h=1)
fc2
fc_2 <- as.data.frame(fc2)
fc_2$i <- i # maybe you want to keep track of which iteration produced it?
datalist[[i]] <- fc_2 # add it to your list
backlog <- append(backlog,round(fc_2[1,1], digits = 2))
i <- i +1
}
forecast_data = do.call(rbind, datalist)
forecast_data$`Point Forecast` <- round(forecast_data$`Point Forecast` , digits = 3)
nextweekday <- function(date, wday) {
date <- as.Date(date)
diff <- wday - wday(date)
if( diff < 0 )
diff <- diff + 7
return(date + diff)
}
a <- tail(Data$Period_End_Date, n=1)
a <- as.Date(a, "%d-%b-%y")
b <- tail(Data$Project_ID, n=1)
Period_End_Date <- data.table(date=seq(as.Date(nextweekday(a,1)), by=7, length=8), key="date")
forecast_data = cbind(forecast_data, Period_End_Date)
names(forecast_data)[names(forecast_data) == 'date'] <- 'Period_End'
forecast_data$Period_End <- as.character(forecast_data$Period_End)
forecast_data$Project_ID <- b
forecast_data <- forecast_data[c(8,7,1,4,5)]
names(forecast_data)[names(forecast_data) == 'Lo 95'] <- 'Lower_Limit_95'
names(forecast_data)[names(forecast_data) == 'Hi 95'] <- 'Upper_Limit_95'
names(forecast_data)[names(forecast_data) == 'Point Forecast'] <- 'Point_Forecast'
}
},
warning = function(w) {},
error = function(e) {
forecast_data <- data.frame(Project_ID=character(),
Period_End=character(),
Point_Forecast=character(),
Lower_Limit_95=character(),
Upper_Limit_95=character(),
stringsAsFactors=FALSE)
print(paste("MY_ERROR: ",e))
})
I tried to print error print(paste("MY_ERROR: ",err)) under error = function(e), but it is not working
Is there anything I am missing. Please advice.
Does this help
foo <- function(x){
output <- tryCatch(x, error = function(e) e)
ifelse(is(output, "error"), "error", "no error")
}
foo() # error
foo(1) # no error
Alternatively, use purrr.
foo <- function(x){
x
}
foo_safe() # error
foo_safe(10)

"Object '...' not found when referenced object is a nested function in R

Trying to nest functions with in a function to return a list in R after taking in a data frame. But running into a problem right away getting the error:
Error in ------frqTbl <- function(df) { : object 'frqTbl' not found
Is there some way to define a variable that's a function before the function definition? Or is the nesting incorrect?
Tested with:
data(diamonds, package = "ggplot2")
test <- diamonds[1:100,]
mstrFnct(test)
mstrFnct <- function(df){
output <- list()
frqTbl <- function(df){
fctvr <- df[sapply(df,is.factor)]
logicvr <- df[sapply(df,is.logical)]
nwDf <- data.frame(fctvr,logicvr)
if(ncol(nwDf)>0){
freq <-list()
for (i in 1:ncol(nwDf)){
freq[[i]] <- as.data.frame(table((nwDf)[,i]))
names(freq[[i]])[1]=colnames(nwDf[i])
}
return(freq)
}
else{
print("There are no categorical or logical variables in the data
frame.")
}
}
output[[length(output)+1]] <- frqTbl(df)
rSqd <- function(df){
y <- df[sapply(df,is.numeric)]
if(ncol(y)>=2){
c <- combn(colnames(y), 2)
vrPrs <- paste(c[1,], c[2,], sep = "-")
m <- cor(y, method = "pearson")
r <- m[which(lower.tri(m))]
vlus <- r^2
df2 <- data.frame(vrPrs, values)
names(df2) <- sub("^VrPrs$", "Variable Pairs",
names(df2))
names(df2) <- sub("^vlus$", "R-Square", names(df2))
format.data.frame(df2)
return(df2)
}
else{
print(paste("This Data Frame does not have two or more numerical
columns to compute the Pearson correlation coefficient(s)."))
}
}
output[[length(output)+1]] <- rSqd(df)
}
Is there some way to define a variable that's a function before the
function definition?
No. (see first code chunk)
Or is the nesting incorrect?
Actually not. You just messed up the variable names. (see second code chunk)
I suggest the following code to cover your example:
frqTbl <- function(df){
fctvr <- df[sapply(df,is.factor)]
logicvr <- df[sapply(df,is.logical)]
nwDf <- data.frame(fctvr,logicvr)
if(ncol(nwDf)>0){
freq <-list()
for (i in 1:ncol(nwDf)){
freq[[i]] <- as.data.frame(table((nwDf)[,i]))
names(freq[[i]])[1]=colnames(nwDf[i])
}
return(freq)
}
else{
print("There are no categorical or logical variables in the data
frame.")
}
}
rSqd <- function(df){
y <- df[sapply(df,is.numeric)]
if(ncol(y)>=2){
c <- combn(colnames(y), 2)
vrPrs <- paste(c[1,], c[2,], sep = "-")
m <- cor(y, method = "pearson")
r <- m[which(lower.tri(m))]
vlus <- r^2
df2 <- data.frame(vrPrs, vlus)
names(df2) <- sub("^vrPrs$", "Variable Pairs",
names(df2))
names(df2) <- sub("^vlus$", "R-Square", names(df2))
format.data.frame(df2)
return(df2)
}
else{
print(paste("This Data Frame does not have two or more numerical
columns to compute the Pearson correlation coefficient(s)."))
}
}
mstrFnct <- function(df){
output <- list()
output[[length(output)+1]] <- frqTbl(df)
output[[length(output)+1]] <- rSqd(df)
return(output)
}
data(diamonds, package = "ggplot2")
test <- diamonds[1:100,]
mstrFnct(test)
But you could also pack the function definitions into the master function. Like this:
mstrFnct <- function(df){
# create output list
output <- list()
# define function frqTbl()
frqTbl <- function(df){
fctvr <- df[sapply(df,is.factor)]
logicvr <- df[sapply(df,is.logical)]
nwDf <- data.frame(fctvr,logicvr)
if(ncol(nwDf)>0){
freq <-list()
for (i in 1:ncol(nwDf)){
freq[[i]] <- as.data.frame(table((nwDf)[,i]))
names(freq[[i]])[1]=colnames(nwDf[i])
}
return(freq)
}
else{
print("There are no categorical or logical variables in the data
frame.")
}
}
# call function frqTbl() and store result in list
output[[length(output)+1]] <- frqTbl(df)
# define function rSqd()
rSqd <- function(df){
y <- df[sapply(df,is.numeric)]
if(ncol(y)>=2){
c <- combn(colnames(y), 2)
vrPrs <- paste(c[1,], c[2,], sep = "-")
m <- cor(y, method = "pearson")
r <- m[which(lower.tri(m))]
vlus <- r^2
df2 <- data.frame(vrPrs, vlus)
names(df2) <- sub("^vrPrs$", "Variable Pairs",
names(df2))
names(df2) <- sub("^vlus$", "R-Square", names(df2))
format.data.frame(df2)
return(df2)
}
else{
print(paste("This Data Frame does not have two or more numerical
columns to compute the Pearson correlation coefficient(s)."))
}
}
# call function rSqd() and store result in list
output[[length(output)+1]] <- rSqd(df)
return(output)
}
data(diamonds, package = "ggplot2")
test <- diamonds[1:100,]
mstrFnct(test)

Resources