Calculate cumsum from the end towards the beginning - r

I'm trying to calculate the cumsum starting from the last row towards the first for each group.
Sample data:
t1 <- data.frame(var = "a", val = c(0,0,0,0,1,0,0,0,0,1,0,0,0,0,0))
t2 <- data.frame(var = "b", val = c(0,0,0,0,1,0,0,1,0,0,0,0,0,0,0))
ts <- rbind(t1, t2)
Desired format (grouped by var):
ts <- data.frame(var = c("a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b"),
val = c(2,2,2,2,2,1,1,1,1,1,0,0,0,0,0,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0))

Promoting my comment to an answer; using:
ts$val2 <- ave(ts$val, ts$var, FUN = function(x) rev(cumsum(rev(x))))
gives:
> ts
var val val2
1 a 0 2
2 a 0 2
3 a 0 2
4 a 0 2
5 a 1 2
6 a 0 1
7 a 0 1
8 a 0 1
9 a 0 1
10 a 1 1
11 a 0 0
12 a 0 0
13 a 0 0
14 a 0 0
15 a 0 0
16 b 0 2
17 b 0 2
18 b 0 2
19 b 0 2
20 b 1 2
21 b 0 1
22 b 0 1
23 b 1 1
24 b 0 0
25 b 0 0
26 b 0 0
27 b 0 0
28 b 0 0
29 b 0 0
30 b 0 0
Or with dplyr or data.table:
library(dplyr)
ts %>%
group_by(var) %>%
mutate(val2 = rev(cumsum(rev(val))))
library(data.table)
setDT(ts)[, val2 := rev(cumsum(rev(val))), by = var]

An option without explicitly reversing the vector:
ave(ts$val, ts$var, FUN = function(x) Reduce(sum, x, right = TRUE, accumulate = TRUE))
[1] 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 1 1 1 0 0 0 0 0 0 0
Or the same approach with dplyr:
ts %>%
group_by(var) %>%
mutate(val = Reduce(sum, val, right = TRUE, accumulate = TRUE))

Related

How to summarize values across multiple columns?

I have a dataframe that looks like this:
1 2 3 4 5
A B A B A
C B B B B
A C A B B
And I would like to summarize my data in a frequency table like this:
A B C
1 2 0 1
2 0 2 1
3 2 1 0
4 0 3 0
5 1 2 0
How would I be able to do this?
You can use the following solution:
library(tidyr)
library(janitor)
tab %>%
pivot_longer(everything(), names_to = "nm", values_to = "val",
names_prefix = "X") %>%
tabyl(nm, val)
nm A B C
1 2 0 1
2 0 2 1
3 2 1 0
4 0 3 0
5 1 2 0
You could use
library(tidyr)
data %>%
pivot_longer(everything()) %>%
{ table(.$name, .$value) }
which returns
A B C
1 2 0 1
2 0 2 1
3 2 1 0
4 0 3 0
5 1 2 0
Another table option with stack
> t(table(stack(df)))
values
ind A B C
1 2 0 1
2 0 2 1
3 2 1 0
4 0 3 0
5 1 2 0
An option with base R with table
table(c(col(df1)), c(t(df1)))
A B C
1 2 1 0
2 1 1 1
3 0 3 0
4 1 1 1
5 1 2 0
Data
df1 <- structure(list(`1` = c("A", "C", "A"), `2` = c("B", "B", "C"),
`3` = c("A", "B", "A"), `4` = c("B", "B", "B"), `5` = c("A",
"B", "B")), class = "data.frame", row.names = c(NA, -3L))

How to convert two character columns to a binary matrix?

Here is an example:
df <- data.frame(x=c("A", "A", "B", "C", "C", "C"), y=c("m", "n", "o", "p", "q", "r"))
df
x y
1 A m
2 A n
3 B o
4 C p
5 C q
6 C r
What I wanted is to convert it to a binary matrix using y as rowname and unique(x) as colname:
A B C
m 1 0 0
n 1 0 0
o 0 1 0
p 0 0 1
q 0 0 1
r 0 0 1
My first thought is to use tidyr::spread() but seems not properly working.
You can use:
library(tidyverse)
df %>%
pivot_wider(y,
names_from = x,
values_from = x,
values_fn = list(x = length),
values_fill = list(x = 0))
y A B C
<chr> <int> <int> <int>
1 m 1 0 0
2 n 1 0 0
3 o 0 1 0
4 p 0 0 1
5 q 0 0 1
6 r 0 0 1
Does this work?
library(tidyverse)
df<-data.frame(x=c("A", "A", "B", "C", "C", "C"), y=c("m", "n", "o", "p", "q",
"r"))
df %>%
mutate(x=str_split(x, ",")) %>%
unnest() %>%
mutate(dummy=1) %>%
spread(x, dummy, fill=0)
Output:
A B C
m 1 0 0
n 1 0 0
o 0 1 0
p 0 0 1
q 0 0 1
r 0 0 1
A base R solution.
cbind(df[-1], +sapply(unique(df$x), `==`, df$x))
# y A B C
# 1 m 1 0 0
# 2 n 1 0 0
# 3 o 0 1 0
# 4 p 0 0 1
# 5 q 0 0 1
# 6 r 0 0 1
We can use table in base R
+(t(table(df)) > 0)

Adjacency Matrix from source target dataset

I have a dataset as follows
Var1 Var2 Count
A B 3
A C 4
A D 10
A L 6
I need to create an adjacency matrix for usage downstream in creating a chord diagram. I am looking for an efficient way to get it.
A B C D L
A 0 3 4 10 6
B 3 0 0 0 0
C 4 0 0 0 0
D 10 0 0 0 0
L 6 0 0 0 0
I am looking for a visualization as follows
Assuming you're talking about just the symmetric matrix generation:
dat <- read.table(header=TRUE, stringsAsFactors=FALSE, text='
Var1 Var2 Count
A B 3
A C 4
A D 10
A L 6')
vars <- sort(unique(unlist(dat[c("Var1","Var2")])))
m <- matrix(0, nr=length(vars), nc=length(vars), dimnames=list(vars,vars))
m[as.matrix(dat[c("Var1","Var2")])] <- m[as.matrix(dat[c("Var2","Var1")])] <- dat$Count
m
# A B C D L
# A 0 3 4 10 6
# B 3 0 0 0 0
# C 4 0 0 0 0
# D 10 0 0 0 0
# L 6 0 0 0 0
Here is an option using xtabs. Convert the first two column to factor with levels specified in the order we want in the output. Then, use xtabs to get a matrix output, transpose the output and add to the original matrix to get the expected output
dat[1:2] <- lapply(dat[1:2], factor, levels = c("A", "B", "C", "D", "L"))
out <- xtabs(Count ~ Var1 + Var2, dat)
out + t(out)
# Var2
#Var1 A B C D L
# A 0 3 4 10 6
# B 3 0 0 0 0
# C 4 0 0 0 0
# D 10 0 0 0 0
# L 6 0 0 0 0
data
dat <- structure(list(Var1 = c("A", "A", "A", "A"), Var2 = c("B", "C",
"D", "L"), Count = c(3L, 4L, 10L, 6L)), class = "data.frame",
row.names = c(NA, -4L))

How to create dichotomous variables based on some factors in r?

The initial dataframe is:
Factor1 Factor2 Factor3
A B C
B C NA
A NA NA
B C D
E NA NA
I want to create 5 dichotomous variables based on the above factor variables. The rule should be the new variable A will get 1 if either Factor1 or Factor2 or Factor3 contains an A otherwise A should be 0, and so on. The newly created variables should look like:
A B C D E
1 1 1 0 0
0 1 1 0 0
1 0 0 0 0
0 1 1 1 0
0 0 0 0 1
We can use table to do this. We replicate the sequence of rows with the number of columns, unlist the dataset and get the frequency of values.
table(rep(1:nrow(df1), ncol(df1)), unlist(df1))
# A B C D E
# 1 1 1 1 0 0
# 2 0 1 1 0 0
# 3 1 0 0 0 0
# 4 0 1 1 1 0
# 5 0 0 0 0 1
If we have more than 1 value per row, then convert to logical and then reconvert it back to binary.
+(!!table(rep(1:nrow(df1), ncol(df1)), unlist(df1)))
data
df1 <- structure(list(Factor1 = c("A", "B", "A", "B", "E"),
Factor2 = c("B",
"C", NA, "C", NA), Factor3 = c("C", NA, NA, "D", NA)),
.Names = c("Factor1",
"Factor2", "Factor3"), class = "data.frame", row.names = c(NA, -5L))

Reshape a data frame into a wide shape

The data contains two variables: id and grade. Each id can have multiple records
for each grade.
dat <- data.frame(id = c(1,1,1,2,2,2,2,3,3,4,5,5,5),
grade = c("a", "b", "c", "a", "a", "b", "b", "d", "f", "c", "a", "e", "f"))
I want to reshape the data into a wide shape such that each id has only one record
and each unique grade becomes a single column. The value of each column is either 0 or 1,
depending on the grades for each id.
The final data set looks like:
id a b c d e f
1 1 1 1 0 0 0
2 1 1 0 0 0 0
3 0 0 0 1 0 1
4 0 0 1 0 0 0
5 1 0 0 0 1 1
I tried this, but no luck.
n.dat <- reshape(dat, timevar = "grade",idvar = c("id"),direction = "wide")
You could simply table the values, then convert to logical based on > 0 condition and then convert back to numeric using the + unary operator (or if you want less golfed, by simply + 0)
+(table(dat) > 0)
# grade
# id a b c d e f
# 1 1 1 1 0 0 0
# 2 1 1 0 0 0 0
# 3 0 0 0 1 0 1
# 4 0 0 1 0 0 0
# 5 1 0 0 0 1 1

Resources