R Incorrect Number of Dimensions Error from data.frame Assignment - r

When running the code below I get the error:
Error in data[, 4] : incorrect number of dimensions
Both data[,4] and goals have the same length (480) so I don't understand what the issue is. Data is a data.frame with 4 columns and goals is a length 480 vector.
library(glmmTMB)
simulate_games = function(data) {
mod <- glmmTMB(goals ~ home + (1|attack) + (1|defence), poisson, data=data, REML=TRUE)
goals = predict(mod,newdata = data, type = "response")
data[,4] = goals #Error here
res = comp_ranks(goals)[,2] #comp_ranks is a user defined function
for (i in 1:1000) {
data[,4] = rpois(480,goals)
res = cbind(res,comp_ranks(data)[,2])
}
return(res)
}
long <- read.csv("https://www.math.ntnu.no/emner/TMA4315/2020h/eliteserie.csv", colClasses = c("factor","factor","factor","numeric"))
simulate_games(long)
Here is also the comp_ranks function although I don't think its whats causing the error.
comp_ranks = function(data) {
goals = data[,4]
goals = goals[!is.na(goals)]
teams = unique(data[,1])
teams_points = cbind.data.frame(0,teams)
goals_scored = cbind.data.frame(0,teams)
goals_conceded = cbind.data.frame(0,teams)
for (i in 1:length(teams)) {
tfs = data[,1] == teams[i]
tfc = data[,2] == teams[i]
goals_scored[i,1] = sum(na.omit(goals[tfs]))
goals_conceded[i,1] = sum(na.omit(goals[tfc]))
}
for (i in seq(1,length(goals)-1,2)) {
idx_1 = match(data[,1][i],teams)
idx_2 = match(data[,1][i+1],teams)
if (goals[i] - goals[i+1] > 0) {
teams_points[idx_1,1] = teams_points[idx_1,1] + 3
}
else if (goals[i] - goals[i+1] < 0 ) {
teams_points[idx_2,1] = teams_points[idx_2,1] + 3
}
else {
teams_points[idx_1,1] = teams_points[idx_1,1] + 1
teams_points[idx_2,1] = teams_points[idx_2,1] + 1
}
}
#Sort data.frame by ranks
colnames(teams_points) = c("Points","Teams")
teams_points = teams_points[with(teams_points, order(-Points)), ]
diff = goals_scored[,1] - goals_conceded[,1]
goals_diff = cbind.data.frame(diff,teams)
teams_ranked = teams_points[,2]
for (i in 1:length(teams_points)) {
for (j in 1:length(teams_points)) {
if(j != i) {
if (teams_points[i,1] == teams_points[j,1]) {
if (goals_diff[i,1] == goals_diff[j,1]) {
if (goals_scored[i,1] < goals_scored[j,1] ) {
teams_ranked = replace(teams_ranked,c(i,j), teams_ranked[c(j,i)])
teams_points[,2] = teams_ranked
}
else if(goals_diff[i,1] < goals_diff[j,1] ) {
teams_ranked = replace(teams_ranked,c(i,j), teams_ranked[c(j,i)])
teams_points[,2] = teams_ranked
}
}
}
}
}
}
ranks = data.frame("Ranks" = c(1:16), "Teams" = teams_points[,2], "Points" = teams_points[,1])
return(ranks)
}

Related

Exporting an Excel file from R with columns color coded?

When exporting the excel file the columns are not color coded. I am using the condformat package. i know that condformat creates a html file, i am using the condformat2excel command but its not color coding the columns i need. any help or tip are welcome thank you.
library(readxl)
library(condformat)
teast1 <- read_excel("Kenilworth_DataIssues-testR.xls",sheet = "Data Cleanup")
color_pick <- function(column) {
sapply(column,
FUN = function(value) {
if (value >= 25) {
return("red")
} else {
return("dodgerblue")
}
})
}
color_pick2 <- function(column2) {
sapply(column2,
FUN = function(value2) {
if (value2 >9000) {
return("red")
} else if(value2 < 8000) {
return("dodgerblue")
}else{
return("green")
}
})
}
condformat2excel(teast1,"flip.xlsx",sheet_name = "lebeon",overwrite_wb = FALSE,overwrite_sheet = TRUE)%>%
rule_fill_discrete("PlannedPremiseTime",expression = color_pick(PlannedPremiseTime),colours = identity)%>%
rule_fill_discrete("PostalCode",expression = color_pick2(PostalCode),colours = identity)

Error in textConnection(): all connections are in use

I have read most of the posts concerning an error of this type but neither applies to my case. I am new in R, working on an assignment for school based on Nolan and Lang's book Data Science Case Studies in R. I am working on using stats to identify spam, url for the code can be found here, which require working with files from http://spamassassin.apache.org/old/publiccorpus/ (which are pretty big)
Now the problem I am facing is the following (just posting the chunks of code where I have encountered the issue):
sampleSplit = lapply(sampleEmail, splitMessage)
processHeader = function(header)
{
# modify the first line to create a key:value pair
header[1] = sub("^From", "Top-From:", header[1])
headerMat = read.dcf(textConnection(header), all = TRUE)
headerVec = unlist(headerMat)
dupKeys = sapply(headerMat, function(x) length(unlist(x)))
names(headerVec) = rep(colnames(headerMat), dupKeys)
return(headerVec)
}
headerList = lapply(sampleSplit,
function(msg) {
processHeader(msg$header)} )
contentTypes = sapply(headerList, function(header)
header["Content-Type"])
names(contentTypes) = NULL
contentTypes
hasAttach = grep("^ *multi", tolower(contentTypes))
hasAttach
boundaries = getBoundary(contentTypes[ hasAttach ])
boundaries
boundary = boundaries[9]
body = sampleSplit[[15]]$body
bString = paste("--", boundary, sep = "")
bStringLocs = which(bString == body)
bStringLocs
eString = paste("--", boundary, "--", sep = "")
eStringLoc = which(eString == body)
eStringLoc
diff(c(bStringLocs[-1], eStringLoc))
### This code has mistakes in it - and we fix them later!
processAttach = function(body, contentType){
boundary = getBoundary(contentType)
bString = paste("--", boundary, "$", sep = "")
bStringLocs = grep(bString, body)
eString = paste("--", boundary, "--$", sep = "")
eStringLoc = grep(eString, body)
n = length(body)
if (length(eStringLoc) == 0) eStringLoc = n + 1
if (length(bStringLocs) == 1) attachLocs = NULL
else attachLocs = c(bStringLocs[-1], eStringLoc)
msg = body[ (bStringLocs[1] + 1) : min(n, (bStringLocs[2] - 1),
na.rm = TRUE)]
if ( eStringLoc < n )
msg = c(msg, body[ (eStringLoc + 1) : n ])
if ( !is.null(attachLocs) ) {
attachLens = diff(attachLocs, lag = 1)
attachTypes = mapply(function(begL, endL) {
contentTypeLoc = grep("[Cc]ontent-[Tt]ype", body[ (begL + 1) : (endL - 1)])
contentType = body[ begL + contentTypeLoc]
contentType = gsub('"', "", contentType )
MIMEType = sub(" *Content-Type: *([^;]*);?.*", "\\1", contentType)
return(MIMEType)
}, attachLocs[-length(attachLocs)], attachLocs[-1])
}
if (is.null(attachLocs)) return(list(body = msg, attachInfo = NULL) )
else return(list(body = msg,
attachDF = data.frame(aLen = attachLens,
aType = attachTypes,
stringsAsFactors = FALSE)))
}
bodyList = lapply(sampleSplit, function(msg) msg$body)
attList = mapply(processAttach, bodyList[hasAttach],
contentTypes[hasAttach],
SIMPLIFY = FALSE)
lens = sapply(attList, function(processedA)
processedA$attachDF$aLen)
head(lens)
attList[[2]]$attachDF
body = bodyList[hasAttach][[2]]
length(body)
body[35:45]
processAttach = function(body, contentType){
n = length(body)
boundary = getBoundary(contentType)
bString = paste("--", boundary, sep = "")
bStringLocs = which(bString == body)
eString = paste("--", boundary, "--", sep = "")
eStringLoc = which(eString == body)
if (length(eStringLoc) == 0) eStringLoc = n
if (length(bStringLocs) <= 1) {
attachLocs = NULL
msgLastLine = n
if (length(bStringLocs) == 0) bStringLocs = 0
} else {
attachLocs = c(bStringLocs[ -1 ], eStringLoc)
msgLastLine = bStringLocs[2] - 1
}
msg = body[ (bStringLocs[1] + 1) : msgLastLine]
if ( eStringLoc < n )
msg = c(msg, body[ (eStringLoc + 1) : n ])
if ( !is.null(attachLocs) ) {
attachLens = diff(attachLocs, lag = 1)
attachTypes = mapply(function(begL, endL) {
CTloc = grep("^[Cc]ontent-[Tt]ype", body[ (begL + 1) : (endL - 1)])
if ( length(CTloc) == 0 ) {
MIMEType = NA
} else {
CTval = body[ begL + CTloc[1] ]
CTval = gsub('"', "", CTval )
MIMEType = sub(" *[Cc]ontent-[Tt]ype: *([^;]*);?.*", "\\1", CTval)
}
return(MIMEType)
}, attachLocs[-length(attachLocs)], attachLocs[-1])
}
if (is.null(attachLocs)) return(list(body = msg, attachDF = NULL) )
return(list(body = msg,
attachDF = data.frame(aLen = attachLens,
aType = unlist(attachTypes),
stringsAsFactors = FALSE)))
}
readEmail = function(dirName) {
# retrieve the names of files in directory
fileNames = list.files(dirName, full.names = TRUE)
# drop files that are not email
notEmail = grep("cmds$", fileNames)
if ( length(notEmail) > 0) fileNames = fileNames[ - notEmail ]
# read all files in the directory
lapply(fileNames, readLines, encoding = "latin1")
}
processAllEmail = function(dirName, isSpam = FALSE)
{
# read all files in the directory
messages = readEmail(dirName)
fileNames = names(messages)
n = length(messages)
# split header from body
eSplit = lapply(messages, splitMessage)
rm(messages)
# process header as named character vector
headerList = lapply(eSplit, function(msg)
processHeader(msg$header))
# extract content-type key
contentTypes = sapply(headerList, function(header)
header["Content-Type"])
# extract the body
bodyList = lapply(eSplit, function(msg) msg$body)
rm(eSplit)
# which email have attachments
hasAttach = grep("^ *multi", tolower(contentTypes))
# get summary stats for attachments and the shorter body
attList = mapply(processAttach, bodyList[hasAttach],
contentTypes[hasAttach], SIMPLIFY = FALSE)
bodyList[hasAttach] = lapply(attList, function(attEl)
attEl$body)
attachInfo = vector("list", length = n )
attachInfo[ hasAttach ] = lapply(attList,
function(attEl) attEl$attachDF)
# prepare return structure
emailList = mapply(function(header, body, attach, isSpam) {
list(isSpam = isSpam, header = header,
body = body, attach = attach)
},
headerList, bodyList, attachInfo,
rep(isSpam, n), SIMPLIFY = FALSE )
names(emailList) = fileNames
invisible(emailList)
}
Everything runs fine right up to:
emailStruct = mapply(processAllEmail, fullDirNames,
isSpam = rep( c(FALSE, TRUE), 3:2))
emailStruct = unlist(emailStruct, recursive = FALSE)
sampleStruct = emailStruct[ indx ]
save(emailStruct, file="emailXX.rda")
I get the error Error in textConnection(header) : all connections are in use, therefore it doesn't recognize "emailStruct", which I need later on. I seriously don't know how to overcome it so that I can continue with the rest of the code, which requires some of these variables to work.
When you run textConnection() you are opening a text connection, but you are never closing it. Try closing it explicitly after you read from it
read.dcf(tc<-textConnection(header), all = TRUE)
close(tc)

Attempting to call my function n times

My goal is to call my dice roll function n times where n is the amount of turns. This is a monopoly simulated turn and as such the doubles will roll again, and triples will go to jail. I cannot figure out how to set this up so my function will be
Diceroll <- Function ( Turns, Sides)
Diceroll <- function(Turn,sides){
Turn = as.integer(0)
First_roll = as.integer(0)
Second_roll = as.integer(0)
Third_roll = as.integer(0)
Fourth_roll = as.integer(0)
Fifth_roll = as.integer(0)
Sixth_roll = as.integer(0)
Total = as.integer(0)
i = as.integer(1)
for (i in 1:Turn) {
First_roll = sample(1:sides,size = 1)
Second_roll = sample(1:sides,size = 1)
if(First_roll[1] == Second_roll[1]) {
Third_roll = sample(1:sides,size = 1)
Fourth_roll = sample(1:sides,size = 1)
}
if(Third_roll[1] == Fourth_roll[1] & Third_roll[1] + Fourth_roll[1] > 0) {
Fifth_roll= sample(1:sides,size = 1)
Sixth_roll = sample(1:sides,size = 1)
}
if(Fifth_roll[1] == Sixth_roll[1] & Fifth_roll[1] + Sixth_roll[1] > 0) { Total = "Jail"
}
else {
Total = (First_roll[1] + Second_roll[1] + Third_roll[1] +
Fourth_roll[1] + Fifth_roll[1] + Sixth_roll[1]) }
return(Total)
}
}
Here is my attempt but it is only listed the value of one roll.

In R how can I apply if and else if for the following example of two items to a list of items?

Here is the general idea, in practice I'd like to feasibly execute list of [alpha, beta, gamma ... etc] with an arbitrary number of items for each of the subsequent else if statement.
if (v == n(r)[alpha]) {
inc(c)
v = sample(c(names(r)), 1, replace = FALSE, prob = p[alpha,])
new = v
if (new == n(r)[ori])
{
inc(c)
dis = b+c
av = c(av,dis)
}
else if (v == names(r)[beta])
{
inc(b)
v = sample(c(n(r)), 1, replace = FALSE, prob = p[beta,])
new = v
if (new == n(r)[ori])
{
inc(b)
dis = b+c
av = c(av,dis)
}
else if (v == names(r)[gamma]
....

Oracle R Enterprise: Error ORE object has no unique key

I need to make a function which call Oracle R Enterprise ore.corr function and output result as a data.frame.
My R code here:
f_sts_corelation =
function(dat, target_col="",corr_type="spearman",group_by="")
{
v_target_col = gsub("\n","",target_col, fixed = TRUE);
v_target_col_list = "";
library("gdata");
for (s_name in strsplit(v_target_col,",")[[1]])
{
n_pos = regexpr(".",s_name,fixed = TRUE);
if (n_pos > 0)
{
s_name = substring(s_name,n_pos+1);
}
s_name = gsub("\"","",s_name, fixed = TRUE);
if (is.numeric(dat[,trim(s_name)]))
{
if (nchar(v_target_col_list)== 0)
{
v_target_col_list = trim(s_name)
}
else
{
v_target_col_list =paste(v_target_col_list,",",trim(s_name))
}
}
}
ore.data = ore.push(dat)
v_id = c()
v_group=c()
v_row = c()
v_col = c()
v_statistic = c()
v_pvalue = c()
v_df = c()
#group_by = ""
s_group_by = trim(gsub("\n","",group_by, fixed = TRUE));
if (nchar(s_group_by) > 0)
{
n_pos = regexpr(".",s_group_by,fixed = TRUE);
if (n_pos > 0)
{
s_group_by = substring(s_group_by,n_pos+1);
}
s_group_by = trim(gsub("\"","",s_group_by, fixed = TRUE));
ore.corr.res = ore.corr(ore.data,var = v_target_col_list, group.by = s_group_by)
for (i in 1:length(ore.corr.res))
{
if (i == 1)
{
v_group = rep(names(ore.corr.res[i]),length(ore.corr.res[[i]]$ROW))
v_row = as.vector(ore.corr.res[[i]]$ROW)
v_col = as.vector(ore.corr.res[[i]]$COL)
v_statistic = as.vector(ore.corr.res[[i]][,3])
v_pvalue = as.vector(ore.corr.res[[i]][,4])
v_df = as.vector(ore.corr.res[[i]][,5])
}
else
{
v_group = c(v_group,rep(names(ore.corr.res[i]),length(ore.corr.res[[i]]$ROW)))
v_row = c(v_row,as.vector(ore.corr.res[[i]]$ROW))
v_col = c(v_col,as.vector(ore.corr.res[[i]]$COL))
v_statistic = c(v_statistic,as.vector(ore.corr.res[[i]][,3]))
v_pvalue = c(v_pvalue,as.vector(ore.corr.res[[i]][,4]))
v_df = c(v_df,as.vector(ore.corr.res[[i]][,5]))
}
}
}
else if(nchar(s_group_by) == 0)
{
ore.corr.res = ore.corr(ore.data,var = v_target_col_list)
v_group = rep(" ",length(ore.corr.res$ROW))
v_row = as.vector(ore.corr.res$ROW)
v_col = as.vector(ore.corr.res$COL)
v_statistic = as.vector(ore.corr.res[,3])
v_pvalue = as.vector(ore.corr.res[,4])
v_df = as.vector(ore.corr.res[,5])
}
df_res = data.frame(Group_by = v_group,
Row = v_row,
Col = v_col,
Statistic = v_statistic,
P_Value = v_pvalue,
DF = v_df)
}
After that, I run the function by following script:
dat = iris;
corr_type="spearman";
V_target_col= '"IRIS_N$10002"."Sepal.Length",
"IRIS_N$10002"."Sepal.Width",
"IRIS_N$10002"."Petal.Width",
"IRIS_N$10002"."Petal.Length"';
group_by =
'
"IRIS_N$10002"."Species"
'
df_result = f_sts_corelation(dat,target_col = target_col, group_by = group_by)
But following error happen.
Error: ORE object has no unique key
I have tried to run each R command inside my function step by step and I sure that the Error happen from the last R command:
df_res = data.frame(Group_by = v_group,
Row = v_row,
Col = v_col,
Statistic = v_statistic,
P_Value = v_pvalue,
DF = v_df)
I don't know how to avoid this error.

Resources