Insert a blank row before zero - r

x<-c(0,1,1,0,1,1,1,0,1,1)
aaa<-data.frame(x)
How to insert a blank row before zero? When the first row is zeroļ¼Œdo not add blank row. Thank you.
Result:
0
1
1
.
0
1
1
1
.
0
1
1

Below we used dot but you can replace "." with NA or "" or something else depending on what you want.
1) We can use Reduce and append:
Append <- function(x, y) append(x, ".", y - 1)
data.frame(x = Reduce(Append, setdiff(rev(which(aaa$x == 0)), 1), init = aaa$x))
2) gsub Another possibility is to convert to a character string, use gsub and convert back:
data.frame(x = strsplit(gsub("(.)0", "\\1.0", paste(aaa$x, collapse = "")), "")[[1]])
3) We can create a two row matrix in which the first row is dot before each 0 and NA otherwise. Then unravel it to a vector and use na.omit to remove the NA values.
data.frame(x = na.omit(c(rbind(replace(ifelse(aaa$x == 0, ".", NA), 1, NA), aaa$x))))
4) We can lapply over aaa$x[-1] outputting c(".", 9) or 1. Unlist that and insert aaa$x[1] back in. No packages are used.
repl <- function(x) if (!x) c(".", 0) else 1
data.frame(x = c(aaa$x[1], unlist(lapply(aaa$x[-1], repl))))
5) Create a list of all but the first element and replace the 0's in that list with c(".", 0) . Unlist that and insert the first element back in. No packages are used.
L <- as.list(aaa$x[-1])
L[x[-1] == 0] <- list(c(".", 0))
data.frame(x = c(aaa$x[1], unlist(L)))
6) Assuming aaa has two columns where the second column is character (NOT factor). Append a row of dots to aaa and then create an index vector using unlist and Map to access the appropriate row of the extended aaa.
aaa <- data.frame(x = c(0,1,1,0,1,1,1,0,1,1), y = letters[1:10],
stringsAsFactors = FALSE)
nr <- nrow(aaa); nc <- ncol(aaa)
fun <- function(ix, x) if (!is.na(x) & x == 0 & ix > 1) c(nr + 1, ix) else ix
rbind(aaa, rep(".", nc))[unlist(Map(fun, 1:nr, aaa$x)), ]
If we did want to have y be factor then note that we can't just add a dot to a factor if it is not a level of that factor so there is the question of what levels the factor can have. To get around that let us add an NA rather than a dot to the factor. Then we get the following which is the same except that aaa has been redefined so that y is a factor, we no longer need nc since we are assuming 2 columns and rep(...) in the last line is replaced with c(".", NA).
aaa <- data.frame(x = c(0,1,1,0,1,1,1,0,1,1), y = letters[1:10])
nr <- nrow(aaa)
fun <- function(ix, x) if (!is.na(x) & x == 0 & ix > 1) c(nr + 1, ix) else ix
rbind(aaa, c(".", NA))[unlist(Map(fun, 1:nr, aaa$x)), ]

One dplyr and tidyr possibility may be:
aaa %>%
uncount(ifelse(row_number() > 1 & x == 0, 2, 1)) %>%
mutate(x = ifelse(x == 0 & lag(x == 1, default = first(x)), NA_integer_, x))
x
1 0
2 1
3 1
4 NA
5 0
6 1
7 1
8 1
9 NA
10 0
11 1
12 1
It is not adding a blank row as you have a numeric vector. Instead, it is adding a row with NA. If you need a blank row, you can convert it into a character vector and then replace NA with blank.

ind = with(aaa, ifelse(x == 0 & seq_along(x) > 1, 2, 1))
d = aaa[rep(1:NROW(aaa), ind), , drop = FALSE]
transform(d, x = replace(x, sequence(ind) == 2, NA))

Here is an option with rleid
library(data.table)
setDT(aaa)[, .(x = if(x[.N] == 1) c(x, NA) else x), rleid(x)][-.N, .(x)]
# x
# 1: 0
# 2: 1
# 3: 1
# 4: NA
# 5: 0
# 6: 1
# 7: 1
# 8: 1
# 9: NA
#10: 0
#11: 1
#12: 1

data.frame(x = unname(unlist(by(aaa$x,cumsum(aaa==0),c,'.'))))
x
1 0
2 1
3 1
4 .
5 0
6 1
7 1
8 1
9 .
10 0
11 1
12 1
13 .

My solution is
aaa <- data.frame(x = c(0,1,1,0,1,1,1,0,1,1), y = letters[1:10])
aaa$ind = with(aaa, ifelse(x == 0 & seq_along(x) > 1, 2, 1))
aaa<-aaa[rep(1:nrow(aaa), aaa$ind), ,]
aaa[(aaa$ind== 2 & !grepl(".1",rownames(aaa))),]<-NA
aaa$ind<- NULL
aaa
x y
1 0 a
2 1 b
3 1 c
4 NA <NA>
4.1 0 d
5 1 e
6 1 f
7 1 g
8 NA <NA>
8.1 0 h
9 1 i
10 1 j

Related

When the before value is not 1 then impute the before value in place of current value until next non 1 value is found in R

The input vector is as below,
data=c(1,1,1,1,11,1,1,1,1,12,1,1,2,1,1,1)
I want the output as 1,1,1,1,11,11,11,11,11,12,12,12,2,2,2,2 where the 1's proceeding the non 1's should be imputed the non 1 value in R.
I tried the following code
data=c(1,1,1,1,11,1,1,1,1,12,1,1,2,1,1,1)
sapply(data, function(x) ifelse (lag(x)!=1,lag(x),x))
but it didn't yield expected output
You can convert every 1 after the first non-1 value to NA then use zoo::na.locf():
library(zoo)
x <- c(1,1,1,1,11,1,1,1,1,12,1,1,2,1,1,1)
data[seq_along(x) > which.max(x!= 1) & x== 1] <- NA
na.locf(x)
[1] 1 1 1 1 11 11 11 11 11 12 12 12 2 2 2 2
Or using replace() to add the NA values:
na.locf(replace(x, seq_along(x) > which.max(x != 1) & x == 1, NA))
In response to your comment about applying it to groups, you can use ave():
df <- data.frame(x = c(x, rev(x)), grp = rep(1:2, each = length(x)))
ave(df$x, df$grp, FUN = function(y)
na.locf(replace(y, seq_along(y) > which.max(y != 1) & y == 1, NA))
)
You can write your custom fill function:
x <- c(1,1,1,1,11,1,1,1,1,12,1,1,2,1,1,1)
myfill <- function(x) {
mem <- x[1]
for (i in seq_along(x)) {
if (x[i] == 1) {
x[i] <- mem
} else {
mem <- x[i]
}
}
x
}
myfill(x)
# 1 1 1 1 11 11 11 11 11 12 12 12 2 2 2 2
You could match unique 1 and non-1 values with the cumsum of non-1 values.
(c(1, x[x != 1]))[match(cumsum(x != 1), 0:3)]
# [1] 1 1 1 1 11 11 11 11 11 12 12 12 2 2 2 2
Data
x <- c(1, 1, 1, 1, 11, 1, 1, 1, 1, 12, 1, 1, 2, 1, 1, 1)
You can use rle from base to overwrite 1 with the value before.
x <- rle(data)
y <- c(FALSE, (x$values == 1)[-1])
x$values[y] <- x$values[which(y)-1]
inverse.rle(x)
# [1] 1 1 1 1 11 11 11 11 11 12 12 12 2 2 2 2

Dispatch values in list column to separate columns

I have a data.table with a list column "c":
df <- data.table(a = 1:3, c = list(1L, 1:2, 1:3))
df
a c
1: 1 1
2: 2 1,2
3: 3 1,2,3
I want to create separate columns for the values in "c".
I create a set of new columns F_1, F_2, F_3:
mmax <- max(df$a)
flux <- paste("F", 1:mmax, sep = "_")
df[, (flux) := 0]
df
a c F_1 F_2 F_3
1: 1 1 0 0 0
2: 2 1,2 0 0 0
3: 3 1,2,3 0 0 0
I want to dispatch values in "c" to columns F_1, F_2, F_3 like this:
df
a c F_1 F_2 F_3
1: 1 1 1 0 0
2: 2 1,2 1 2 0
3: 3 1,2,3 1 2 3
What I have tried:
comp_vect <- function(vec, mmax){
vec <- vec %>% unlist()
n <- length(vec)
answr <- c(vec, rep(0, l = mmax -n))
}
df[ , ..flux := mapply(comp_vect, c, mmax)]
The expected data.table is :
> df
a c F_1 F_2 F_3
1: 1 1 1 0 0
2: 2 1,2 1 2 0
3: 3 1,2,3 1 2 3
I followed a radically different approach. I rbinded the list column and then dcasted it, obtaining the desired result. Last part is to set the names.
library(data.table)
df <- data.table(a = 1:3, d = list(1L, c(1L, 2L), c(1L, 2L, 3L)))
df2 <- df[, rbind(d), by = a][, dcast(.SD, a ~ V1, fill = 0)]
setnames(df2, 2:4, flux)[]
a F_1 F_2 F_3
1: 1 1 0 0
2: 2 1 2 0
3: 3 1 2 3
where flux is the variable of names that you defined in your question.
Please notice that avoided using the column name c, as it may be confused with the function c().
Solution :
for(idx in seq(max(sapply(df$c, length)))){ # maximum number of values according to all the elements of the list
set(x = df,
i = NULL,
j = paste0("F_",idx), # column's name
value = sapply(df$c, function(x){
if(is.na(x[idx])){
return(0) # 0 instead of NA
} else {
return(x[idx])
}
})
)
}
Explications :
We can extract the values from a list like this :
sapply(df$c, function(ll) return(ll[1])) # first value
[1] 1 1 1
sapply(df$c, function(ll) return(ll[2])) # second value
[1] NA 2 2
sapply(df$c, function(ll) return(ll[3])) # third value
[1] NA NA 3
We see that if there is no value, we have a NA.
We need an iterator to extract all values at the position idx. For that, we'll find the number of values in each element of df$c (the list) and keep the maximum.
max(sapply(df$c, length))
[1] 3
If we want zeros instead of NAs, we need to create a function in the sapply to convert them :
vec <- c(NA, 5, 1, NA)
> sapply(vec, function(x) if(is.na(x)) return(0) else return(x))
[1] 0 5 1 0

Going from a list of elements to chemical formula

I have a list of elemental compositions, each element in it's own row. Sometimes these elements have a zero.
C H N O S
1 5 5 0 0 0
2 6 4 1 0 1
3 4 6 2 1 0
I need to combine them so that they read, e.g. C5H5, C6H4NS, C4H6N2O.
This means that for any element of value "1" I should only take the column name, and for anything with value 0, the column should be skipped altogether.
I'm not really sure where to start here. I could add a new column to make it easier to read across the columns, e.g.
c C h H n N o O s S
1 C 5 H 5 N 0 O 0 S 0
2 C 6 H 4 N 1 O 0 S 1
3 C 4 H 6 N 2 O 1 S 0
This way, I just need the output to be a single string, but I need to ignore any zero values, and drop the one after the element name.
And here a base R solution:
df = read.table(text = "
C H N O S
5 5 0 0 0
6 4 1 0 1
4 6 2 1 0
", header=T)
apply(df, 1, function(x){return(gsub('1', '', paste0(colnames(df)[x > 0], x[x > 0], collapse='')))})
[1] "C5H5" "C6H4NS" "C4H6N2O"
paste0(colnames(df)[x > 0], x[x > 0], collapse='') pastes together the column names where the row values are bigger than zero. gsub then removes the ones. And apply does this for each row in the data frame.
Here's a tidyverse solution that uses some reshaping:
df = read.table(text = "
C H N O S
5 5 0 0 0
6 4 1 0 1
4 6 2 1 0
", header=T)
library(tidyverse)
df %>%
mutate(id = row_number()) %>% # add row id
gather(key, value, -id) %>% # reshape data
filter(value != 0) %>% # remove any zero rows
mutate(value = ifelse(value == 1, "", value)) %>% # replace 1 with ""
group_by(id) %>% # for each row
summarise(v = paste0(key, value, collapse = "")) # create the string value
# # A tibble: 3 x 2
# id v
# <int> <chr>
# 1 1 C5H5
# 2 2 C6H4NS
# 3 3 C4H6N2O
Assume that the input matrix m is as given reproducibly in the Note at the end -- convert it to a matrix if it is a data frame using as.matrix.
Now create a matrix the same shape as m with just the letters so now lets contains the letters and m contains the numbers. Then paste the letters and numbers together and replace those cells for which the number is zero with the empty string. Also replace any cells for which the number is 1 with just the letter. Finally paste each row together. No packages are used and no loops or *apply are used.
lets <- t(replace(t(m), TRUE, colnames(m)))
mm <- paste0(lets, m)
mm <- replace(mm, m == 0, "")
mm <- ifelse(m == 1, lets, mm)
do.call("paste0", as.data.frame(mm))
## [1] "C5H5" "C6H4NS" "C4H6N2O"
Note
the input matrix m in reproducible form is assumed to be:
m <- matrix(c(5, 6, 4, 5, 4, 6, 0, 1, 2, 0, 0, 1, 0, 1, 0), 3, 5,
dimnames = list(NULL, c("C", "H", "N", "O", "S")))
Another idea that avoids the apply with margin 1,
gsub('1', '', sapply(split(df, 1:nrow(df)), function(i)
paste(paste0(names(i)[i != 0], i[i != 0]), collapse = '')))
# 1 2 3
# "C5H5" "C6H4NS" "C4H6N2O"
Another option
library(dplyr)
#Get indices of all non-zero numbers in the dataframe
inds <- which(df!=0, arr.ind = TRUE)
#Create a dataframe with row index, column index and value at that position
vals <- data.frame(inds, val = df[inds])
#For each row paste the name of the column and value together and then replace 1
vals %>%
group_by(row) %>%
summarise(chemical = paste0(names(df)[col], val,collapse = "")) %>%
mutate(chemical = gsub("[1]", "", chemical))
# row chemical
# <int> <chr>
#1 1 C5H5
#2 2 C6H4NS
#3 3 C4H6N2O

Removing columns that are all 0

I am trying to remove all columns in my dataframe that solely contain the value 0. My code is the following that I found on this website.
dataset = dataset[ ,colSums(dataset != 0) > 0]
However, I keep returning an error:
Error in [.data.frame(dataset, , colSums(dataset != 0) > 0) :
undefined columns selected
It's because you have an NA in at least one column. Fix like this:
dataset = dataset[ , colSums(dataset != 0, na.rm = TRUE) > 0]
Here's some code that will check which columns are numeric (or integer) and drop those that contain all zeros and NAs:
# example data
df <- data.frame(
one = rep(0,100),
two = sample(letters, 100, T),
three = rep(0L,100),
four = 1:100,
stringsAsFactors = F
)
# create function that checks numeric columns for all zeros
only_zeros <- function(x) {
if(class(x) %in% c("integer", "numeric")) {
all(x == 0, na.rm = TRUE)
} else {
FALSE
}
}
# apply that function to your data
df_without_zero_cols <- df[ , !sapply(df, only_zeros)]
There is an alternative using all():
dataset[, !sapply(dataset, function(x) all(x == 0))]
a c d f
1 1 -1 -1 a
2 2 0 NA a
3 3 1 1 a
In case of a large dataset, time and memory consuming copying can be avoided through removing the columns by reference
library(data.table)
cols <- which(sapply(dataset, function(x) all(x == 0)))
setDT(dataset)[, (cols) := NULL]
dataset
a c d f
1: 1 -1 -1 a
2: 2 0 NA a
3: 3 1 1 a
Data
dataset <- data.frame(a = 1:3, b = 0, c = -1:1, d = c(-1, NA, 1), e = 0, f ="a")
dataset
a b c d e f
1 1 0 -1 -1 0 a
2 2 0 0 NA 0 a
3 3 0 1 1 0 a

Take certain value in a data frame

I have a data.frame and would like to take a certain value from a cell if another is in a dataframe.
I tried the apply function.
n <- c(2, 3, 0 ,1)
s <- c(0, 1, 1, 2)
b <- c("THIS", "FALSE", "NOT", "THIS")
df <- data.frame(n, s, b)
df <- sapply(df$Vals, FUN=function(x){ if(b[x]=="THIS") ? n[x] : s[x] } )
My logic is:
if(b at position x is equal to "This") {
add n[x] to the column df$Vals
} else {
add s[x] to the column df$Vals
}
Whereas x is a single row.
Any recommendation what I am doing wrong?
I appreciate your reply!
Like this:
df$Vals = with(df, ifelse(b=="THIS", n, s))
Or giving direct the resulting data.frame:
transform(df, Vals=with(df, ifelse(b=="THIS", n, s)))
# n s b Vals
#1 2 0 THIS 2
#2 3 1 FALSE 1
#3 0 1 NOT 1
#4 1 2 THIS 1
With your additional conditions:
func=Vectorize(function(b, s, n){if(b=='THIS') return(n);if(b==F) return(n+s);s})
df$Vals = with(df, func(b,s,n))
Or you could use the row/column indexing
df$Vals <- df[1:2][cbind(1:nrow(df),(df$b!='THIS')+1)]
df
# n s b Vals
#1 2 0 THIS 2
#2 3 1 FALSE 1
#3 0 1 NOT 1
#4 1 2 THIS 1

Resources