I am trying to create a program to iterate through a R data table. I am trying to avoid for loops, because as far as I know they are slow.
#creation of the data table
col <- c(0, 1, 0, 1, 0, 1)
Priority <- c(1,2,3,4,5,6) #1 highest, 6 lowest
IEC_category <- c("a","b","c","d","e","f")
eventlog_overlap.dt <- data.table(col,Priority, IEC_category)
#comparison and assignation of the priority
if (eventlog_overlap.dt$col == 1){
if (eventlog_overlap.dt$Priority <= shift(eventlog_overlap.dt$Priority,1)){
eventlog_overlap.dt$AlarmaPrior <- eventlog_overlap.dt$IEC_category #write the actual category
}
else{
eventlog_overlap.dt$AlarmaPrior <- shift(eventlog_overlap.dt$IEC_category,1) #write the previous category
}
} else{ eventlog_overlap.dt$AlarmaPrior <- NA
}
Pleas provide the desired result. A dplyr attempt:
library(dplyr)
library(hablar)
col <- c(0, 1, 0, 1, 0, 1)
Priority <- c(1,2,3,4,5,6) #1 highest, 6 lowest
IEC_category <- c("a","b","c","d","e","f")
df <- data.frame(col,Priority, IEC_category)
df %>%
mutate(AlarmaPrior = if_else_(col == 1,
if_else_(Priority <= lag(Priority),
IEC_category,
lag(IEC_category)), NA))
gives you:
col Priority IEC_category AlarmaPrior
1 0 1 a <NA>
2 1 2 b a
3 0 3 c <NA>
4 1 4 d c
5 0 5 e <NA>
6 1 6 f e
Related
I have 12 variables that contained NA values as well. I need to covert NAs to a separate level. Level value for some variables is different. Following is the code:
Replace_NAs <- function(colindex, na_level){
cname <- colnames(tr[colindex])
tr <- tr %>% mutate(cname = as.factor(replace(cname, is.na(cname), na_level)))
return(tr)
}
for (i in 1:12) {
if(i == 5){
na_level <- 3;
tr <- Replace_NAs(i,na_level);
}
else if(i == 11){
na_level <- 5;
tr <- Replace_NAs(i,na_level);
}
else if(i == 4|6|8){
na_level <- 1;
tr <- Replace_NAs(i,na_level);
}
else{
na_level <- 20;
tr <- Replace_NAs(i,na_level);
}
}
Please help me. Thanks.
As Johan mentioned in the comments, you should include a reproducible example. Without that, we're left guessing at what exactly you want.
That said, here's my guess at what'll help you:
df %>%
mutate_at(vars(5), ~ replace_na(., 3)) %>%
mutate_at(vars(11), ~ replace_na(., 5)) %>%
mutate_at(vars(4, 6, 8) ~ replace_na(., 1)) %>%
mutate_at(vars(-c(4, 5, 6, 8, 11)), ~ replace_na(., 20))
Again, please provide a reproducible example with data and your desired output. A more robust answer to your question would explore applying a list of intended switches to your dataframe, but that would be overkill here.
Here's a way to do this using a for loop.
Consider this example :
tr <- data.frame(a = c(NA, 2, NA, 3), b = c(2, 3, NA, 4),
c = c(5, 6, NA, NA), d = c(1, 2, 3, NA))
tr
# a b c d
#1 NA 2 5 1
#2 2 3 6 2
#3 NA NA NA 3
#4 3 4 NA NA
Now prepare a list of column indices and a vector of replacement values
cols <- list(1, c(2, 3))
vals <- c(3, 5)
Use a for loop to replace the columns with the values
for(i in seq_along(cols)) {
tr[cols[[i]]][is.na(tr[cols[[i]]])] <- vals[i]
}
For remaining columns
f_cols <- setdiff(seq_len(ncol(tr)), unlist(cols))
tr[f_cols][is.na(tr[f_cols])] <- 20
tr
# a b c d
#1 3 2 5 1
#2 2 3 6 2
#3 3 5 5 3
#4 3 4 5 20
You can notice how NA's in column 1 is replaced with 3, how NA's in column 2 and 3 are replaced with 5 and for rest of the column it is replaced by 20.
I want to classify some characters that fulfill a condition in one column and concatenate the other characters in a string in another column.
The classification is working. When there is a 1 in the column "col", the program has to compare the inputs in "Category", the actual value with the previous one. If the priority number is smaller, save the value in "AlarmPrior", and the other value in "Other Alarms". I want to concatenate all the values with less priority in a string in "Other Alarms".
#test the function
col <- c(0, 1, 0, 0, 1, 1)
Priority <- c(1,2,3,4,5,6)
Category <- c("a","b","c","d","e","f")
eventlog_overlap.dt <- data.table(col,Priority, IEC_category)
#loading the libraries
library(magrittr)
library(dplyr)
#comparison and value assignation in function of the priority
eventlog_overlap.dt$OtherAlarms <- ""
eventlog_overlap.dt <-
eventlog_overlap.dt %>%
mutate(AlarmPrior = ifelse(col == 1,
ifelse(Priority <= lag(Priority),
Category,
lag(Category)), NA),
OtherAlarms = ifelse(col == 1,
ifelse(Priority <= lag(Priority),
"1",
paste0(sprintf(Category, lag(OtherAlarms)), collapse = ", ")),NA))
For example:
This input,
col <- c(0, 1, 0, 0, 1, 1)
Priority <- c(1,2,3,4,5,6)
Category <- c("a","b","c","d","e","f")
Should return:
col Priority Category OtherAlarms AlarmPrior
1 0 1 a NA NA
2 1 2 b b a
3 0 3 c b,c NA
4 0 4 d b,c NA
5 1 5 e b,c,e d
6 1 6 f b,c,e,f e
My actual result is this one:
col Priority Category OtherAlarms AlarmPrior
1 0 1 a NA NA
2 1 2 b a,b,c,d,e,f a
3 0 3 c NA NA
4 0 4 d NA NA
5 1 5 e a,b,c,d,e,f d
6 1 6 f a,b,c,d,e,f e
I used the for statement to solve the problem
col <- c(0, 1, 0, 0, 1, 1)
Priority <- c(1,2,3,4,5,6)
Category <- c("a","b","c","d","e","f")
eventlog_overlap.dt <- data.table(col,Priority, Category)
#loading the libraries
library(magrittr)
library(dplyr)
#comparison and value assignation in function of the priority
eventlog_overlap.dt$OtherAlarms <- ""
eventlog_overlap.dt <-
eventlog_overlap.dt %>%
mutate(AlarmPrior = ifelse(col == 1,
ifelse(Priority <= lag(Priority),
Category,
lag(Category)), NA))
eventlog_overlap.dt$leadCate= lead(eventlog_overlap.dt$AlarmPrior)
tmpdata = character()
eventlog_overlap.dt$tmp= NA
for(i in 1:nrow(eventlog_overlap.dt)){
tmp = eventlog_overlap.dt[i,3]
leadtmp = eventlog_overlap.dt[i,6]
if(!is.na(leadtmp == tmp) & !as.logical(eventlog_overlap.dt$col[i])){
tmp = tmp[!grepl(tmp,leadtmp)]
tmp = ifelse(NROW(tmp)==0,NA,tmp)
tmpdata = tmpdata
} else{
tmpdata = c(tmpdata,tmp)
}
eventlog_overlap.dt[i,7] = paste(tmpdata,collapse = ',')
}
And the result is shown below
> eventlog_overlap.dt
col Priority Category OtherAlarms AlarmPrior leadCate tmp
1
1 0 1 a <NA> a
2
2 1 2 b a <NA> b
3
3 0 3 c <NA> <NA> b,c
4
4 0 4 d <NA> d b,c
5
5 1 5 e d e b,c,e
6
6 1 6 f e <NA> b,c,e,f
I'm trying to create a function where I can pass a function as a variable to perform on a variable number of columns, after removing zeros. I'm not too comfortable with ellipses yet, and I'm guessing this is where the problem is arising. The function is using all the values in the specified rows, summarizing them based on the selected function, and then mutating that one value. I'd like to maintain the function across the row (e.g. rowMeans)
Example:
# Setup dataframe
a <- 1:5
b <- c(0, 4, 3, 0, 1)
c <- c(5:1)
d <- c(2, 0, 1, 0, 4)
df <- data.frame(a, b, c, d)
FUNexcludeZero <- function(function_name, ...){
# Match function name
FUN <- match.fun(function_name)
# get all the values - I'm sure this is the problem, need to somehow turn it back into a df?
vals <- unlist(list(...))
# Remove 0's and perform function
valsNo0 <- vals[vals != 0]
compiledVals <- FUN(valsNo0)
return(compiledVals)
}
df %>%
mutate(foo = FUNexcludeZero(function_name = 'sd', a, b))
a b c d foo
1 1 0 5 2 1.457738
2 2 4 4 0 1.457738
3 3 3 3 1 1.457738
4 4 0 2 0 1.457738
5 5 1 1 4 1.457738
df %>%
mutate(foo = FUNexcludeZero(function_name = 'min', a, b))
a b c d foo
1 1 0 5 2 1
2 2 4 4 0 1
3 3 3 3 1 1
4 4 0 2 0 1
5 5 1 1 4 1
# Try row-function (same error occurs with rowMeans)
df %>%
mutate(foo = FUNexcludeZero(function_name = 'pmin', a, b))
Error in mutate_impl(.data, dots) :
Column `foo` must be length 5 (the number of rows) or one, not 8
For function_name = 'sd' the column should be c(NA, 1.41, 0, NA, 2.828) and the min and pmin should be c(1, 2, 3, 4, 1). I'm 100% sure the error has something to do with the list/unlist, but any other way I try it I end up with an error.
I am not sure if this is exactly what you what. You needed to perform a row wise operation on the two vectors, thus I used the apply function. This should work for any number of equal length vectors.
# Setup dataframe
a <- 1:5
b <- c(0, 4, 3, 0, 1)
c <- c(5:1)
d <- c(2, 0, 1, 0, 4)
#df <- data.frame(a, b, c, d) #not used
FUNexcludeZero <- function(function_name, ...){
# Match function name
FUN <- match.fun(function_name)
#combine the vectors into a matrix
df<-cbind(...)
#remove 0 from rows and apply function to the rows
compiledVals <- apply(df, 1, function(x) { x<-x[x!=0]
FUN(x)})
return(compiledVals)
}
FUNexcludeZero(function_name = 'sd', a, b)
#[1] NA 1.414214 0.000000 NA 2.828427
FUNexcludeZero(function_name = 'min', a, b)
#[1] 1 2 3 4 1
I wanted to create a vector of counts if possible.
For example: I have a vector
x <- c(3, 0, 2, 0, 0)
How can I create a frequency vector for all integers between 0 and 3? Ideally I wanted to get a vector like this:
> 3 0 1 1
which gives me the counts of 0, 1, 2, and 3 respectively.
Much appreciated!
You can do
table(factor(x, levels=0:3))
Simply using table(x) is not enough.
Or with tabulate which is faster
tabulate(factor(x, levels = min(x):max(x)))
You can do this using rle (I made this in minutes, so sorry if it's not optimized enough).
x = c(3, 0, 2, 0, 0)
r = rle(x)
f = function(x) sum(r$lengths[r$values == x])
s = sapply(FUN = f, X = as.list(0:3))
data.frame(x = 0:3, freq = s)
#> data.frame(x = 0:3, freq = s)
# x freq
#1 0 3
#2 1 0
#3 2 1
#4 3 1
You can just use table():
a <- table(x)
a
x
#0 2 3
#3 1 1
Then you can subset it:
a[names(a)==0]
#0
#3
Or convert it into a data.frame if you're more comfortable working with that:
u<-as.data.frame(table(x))
u
# x Freq
#1 0 3
#2 2 1
#3 3 1
Edit 1:
For levels:
a<- as.data.frame(table(factor(x, levels=0:3)))
I get CSV's with hundreds of different columns and would like to be able to output a new file with the duplicate values removed from each column. Everything that I have seen and tried uses a specific column. I just need each column to be unique values.
For Example My Data:
df <- data.frame(A = c(1, 2, 3, 4, 5, 6), B = c(1, 0, 1, 0, 0, 1), C = c("Mr.","Mr.","Mrs.","Miss","Mr.","Mrs."))
df
A B C
1 1 1 Mr.
2 2 0 Mr.
3 3 1 Mrs.
4 4 0 Miss
5 5 0 Mr.
6 6 1 Mrs.
I would like:
A B C
1 1 1 Mr.
2 2 0 Mrs.
3 3 Miss
4 4
5 5
6 6
Then I can:
write.csv(df, file = file.path(df, "df_No_Dupes.csv"), na="")
So I can use it as a reference for my next task.
read.csv and write.csv work best with tabular data. Your desired output is not a good example of this (every row does not have the same number of columns).
You can easily get all the unique value for your columns with
vals <- sapply(df, unique)
Then you'd be better off saving this object with save() and load() to preserve the list as an R object.
Code snippet to work with a flexible number of columns, remove duplicate columns, and preserve column names:
require(rowr)
df <- data.frame(A = c(1, 2, 3, 4, 5, 6), B = c(1, 0, 1, 0, 0, 1), C = c("Mr.","Mr.","Mrs.","Miss","Mr.","Mrs."))
#get the number of columns in the dataframe
n <- ncol(df)
#loop through the columns
for(i in 1:ncol(df)){
#replicate column i without duplicates, fill blanks with NAs
df <- cbind.fill(df,unique(df[,1]), fill = NA)
#rename the new column
colnames(df)[n+1] <- colnames(df)[1]
#delete the old column
df[,1] <- NULL
}
df <- data.frame(A = c(1, 2, 3, 4, 5, 6), B = c(1, 0, 1, 0, 0, 1), C = c("Mr.","Mr.","Mrs.","Miss","Mr.","Mrs."))
for(i in 1:ncol(df)){
assign(paste("df_",i,sep=""), unique(df[,i]))
}
require(rowr)
df <- cbind.fill(df_1,df_2,df_3, fill = NA)
V1 V1 V1
1 1 1 Mr.
2 2 0 Mrs.
3 3 NA Miss
4 4 NA <NA>
5 5 NA <NA>
6 6 NA <NA>
or you could do
require(rowr)
df <- cbind.fill(df_1,df_2,df_3, fill = "")
df
V1 V1 V1
1 1 1 Mr.
2 2 0 Mrs.
3 3 Miss
4 4
5 5
6 6
If you want to avoid typing the name of each intermediate dataframe you can just use ls(pattern="df_") and get the objects named in that vector or use another loop.
If you want to change the column names back to their original values you can use:
colnames(output_df) <- colnames(input_df)
Then you can save the results however you, like, i.e.
saveRDS()
save()
or write it to a file.
Putting it all together:
df <- data.frame(A = c(1, 2, 3, 4, 5, 6), B = c(1, 0, 1, 0, 0, 1), C = c("Mr.","Mr.","Mrs.","Miss","Mr.","Mrs."))
for(i in 1:ncol(df)){
assign(paste("df_",i,sep=""), unique(df[,i]))
}
require(rowr)
files <- ls(pattern="df_")
df_output <- data.frame()
for(i in files){
df_output <- cbind.fill(df_output, get(i), fill = "")
}
df_output <- df_output[,2:4] # fix extra colname from initialization
colnames(df_output) <- colnames(df)
write.csv(df_output, "df_out.csv",row.names = F)
verify_it_worked <- read.csv("df_out.csv")
verify_it_worked
A B C
1 1 1 Mr.
2 2 0 Mrs.
3 3 Miss
4 4
5 5
6 6