Subtracting similar column names R - r

I have a dataframe with columns that have 'x1' and 'x1_fit' with the numbers going up to 5 in some cases.
date <- seq(as.Date('2019-11-04'), by = "days", length.out = 7)
x1 <- c(100,120,111,152,110,112,111)
x1_fit <- c(150,142,146,148,123,120,145)
x2 <- c(110,130,151,152,150,142,161)
x2_fit <- c(170,172,176,178,173,170,175)
df <- data.frame(date,x1,x1_fit,x2,x2_fit)
How can I do x1_fit - x1 and so on. The number of x's will change every time.

You can select those columns with regular expressions (surppose the columns are in appropriate order):
> df[, grep('^x\\d+_fit$', colnames(df))] - df[, grep('^x\\d+$', colnames(df))]
x1_fit x2_fit
1 50 60
2 22 42
3 35 25
4 -4 26
5 13 23
6 8 28
7 34 14
If you want to assign the differences to the original df:
df[, paste0(grep('^x\\d+$', colnames(df), value = TRUE), '_diff')] <-
df[, grep('^x\\d+_fit$', colnames(df))] - df[, grep('^x\\d+$', colnames(df))]
# > df
# date x1 x1_fit x2 x2_fit x1_diff x2_diff
# 1 2019-11-04 100 150 110 170 50 60
# 2 2019-11-05 120 142 130 172 22 42
# 3 2019-11-06 111 146 151 176 35 25
# 4 2019-11-07 152 148 152 178 -4 26
# 5 2019-11-08 110 123 150 173 13 23
# 6 2019-11-09 112 120 142 170 8 28
# 7 2019-11-10 111 145 161 175 34 14

Solution from #mt1022 is straightforward, however since you have tagged this as dplyr, here is one approach following it where we convert the data to long format, subtract the corresponding values and get the data in wide format again.
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = -date) %>%
mutate(name = sub('_.*', '', name)) %>%
group_by(date, name) %>%
summarise(diff = diff(value)) %>%
pivot_wider(names_from = name, values_from = diff) %>%
rename_at(-1, ~paste0(., "_diff")) %>%
left_join(df, by = "date")
# date x1_diff x2_diff x1 x1_fit x2 x2_fit
# <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 2019-11-04 50 60 100 150 110 170
#2 2019-11-05 22 42 120 142 130 172
#3 2019-11-06 35 25 111 146 151 176
#4 2019-11-07 -4 26 152 148 152 178
#5 2019-11-08 13 23 110 123 150 173
#6 2019-11-09 8 28 112 120 142 170
#7 2019-11-10 34 14 111 145 161 175

In base R, you could loop over the unique column names and diff on the the fitted column using
> lapply(setNames(nm = unique(gsub("_.*", "", names(df)))), function(nm) {
fit <- paste0(nm, "_fit")
diff <- df[, nm] - df[, fit]
})
# $x1
# [1] -50 -22 -35 4 -13 -8 -34
#
# $x2
# [1] -60 -42 -25 -26 -23 -28 -14
Here, I set the Date column as the row names and removed the column using
df <- data.frame(date,x1,x1_fit,x2,x2_fit)
row.names(df) <- df$date
df$date <- NULL
but you could just loop over the the column names without the Date column.

We can also do with a split in base R
out <- sapply(split.default(df[-1], sub("_.*", "", names(df)[-1])),
function(x) x[,2] - x[1])
df[sub("\\..*", "_diff", names(lst1))] <- out
df
# date x1 x1_fit x2 x2_fit x1_diff x2_diff
#1 2019-11-04 100 150 110 170 50 60
#2 2019-11-05 120 142 130 172 22 42
#3 2019-11-06 111 146 151 176 35 25
#4 2019-11-07 152 148 152 178 -4 26
#5 2019-11-08 110 123 150 173 13 23
#6 2019-11-09 112 120 142 170 8 28
#7 2019-11-10 111 145 161 175 34 14

Related

How to order variables in blocks

I have a large data set with variables like this:
df <- data.frame(ID=c(1,2),
Hei3ght1=c(180,192),
Weight1=c(70,90),
Hip1=c(25,29),
hei5ght1=c(160,150),
Hei3ght2=c(167,168),
Weight2=c(50,50),
Hip2=c(23,27),
hei5ght2=c(160,150),
Hei3ght3=c(175,176),
Weight3=c(50,70),
Hip3=c(28,28),
hei5ght3=c(160,150))
I would like to order the variables as follows:
ID, Hei3ght1, Hei3ght2, Hei3ght3, Weight1, Weight2, Weight3, Hip1, Hip2, Hip3, Hei5ght1, Hei5ght2, Hei5ght3
I have tried with:
df <- df[sort(names(df))]
But I do not want all the variables alphabetically.
Thank you so much in advance.
UODATE 2
df <- data.frame(ID=c(1,2),
Hei3ght1=c(180,192),
Weight1=c(70,90),
Hip1=c(25,29),
hei5ght1=c(160,150),
hei5ght21=c(160,150),
Hei3ght2=c(167,168),
Weight2=c(50,50),
Hip2=c(23,27),
hei5ght2=c(160,150),
hei5ght22=c(160,150),
Hei3ght3=c(175,176),
Weight3=c(50,70),
Hip3=c(28,28),
hei5ght3=c(160,150),
hei5ght23=c(160,150))
An option in base R would be to convert the column names to a matrix and then to a vector:
n <- length(unique(sub("_\\d+", "", names(df)[-1])))
df[c('ID', c(matrix(names(df)[-1], ncol = n, byrow = TRUE)))]
Output:
ID Height_1 Height_2 Height_3 Weight_1 Weight_2 Weight_3 Hip_1 Hip_2 Hip_3
1 1 180 167 175 70 50 50 25 23 28
2 2 192 168 176 90 50 70 29 27 28
Or you may use
library(data.table)
library(dplyr)
df %>%
select(ID, order(rowid(readr::parse_number(names(.)[-1])))+1)
Output:
ID Height_1 Height_2 Height_3 Weight_1 Weight_2 Weight_3 Hip_1 Hip_2 Hip_3
1 1 180 167 175 70 50 50 25 23 28
2 2 192 168 176 90 50 70 29 27 28
Update
For the updated data
library(stringr)
df %>%
select(ID, order(rowid(str_extract(names(.)[-1], "\\d+$")))+1)
Output:
ID Hei3ght1 Hei3ght2 Hei3ght3 Weight1 Weight2 Weight3 Hip1 Hip2 Hip3 hei5ght1 hei5ght2 hei5ght3
1 1 180 167 175 70 50 50 25 23 28 160 160 160
2 2 192 168 176 90 50 70 29 27 28 150 150 150
Update2
df %>%
select(ID, order(rowid(str_extract(names(.)[-1], "\\d$")))+1)
ID Hei3ght1 Hei3ght2 Hei3ght3 Weight1 Weight2 Weight3 Hip1 Hip2 Hip3 hei5ght1 hei5ght2 hei5ght3 hei5ght21 hei5ght22 hei5ght23
1 1 180 167 175 70 50 50 25 23 28 160 160 160 160 160 160
2 2 192 168 176 90 50 70 29 27 28 150 150 150 150 150 150
This is the long version of #akrun's solution, the core idea is to make pivot longer, transform to factor, and the arrange and pivot back:
library(tidyverse)
df %>%
pivot_longer(-ID) %>%
mutate(helper = str_replace_all(name, "[[:punct:]][0-9]+", ""),
helper = factor(helper, levels = c("Height", "Weight", "Hip"))) %>%
group_by(ID) %>%
arrange(helper, .by_group = TRUE) %>%
select(-helper) %>%
pivot_wider(names_from = name, values_from = value)
ID Height_1 Height_2 Height_3 Weight_1 Weight_2 Weight_3 Hip_1 Hip_2 Hip_3
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 180 167 175 70 50 50 25 23 28
2 2 192 168 176 90 50 70 29 27 28

R - Reducing a matrix

I have a square matrix that is like:
A <- c("111","111","111","112","112","113")
B <- c(100,10,20,NA,NA,10)
C <- c(10,20,40,NA,10,20)
D <- c(10,20,NA,NA,40,200)
E <- c(20,20,40,10,10,20)
F <- c(NA,NA,40,100,10,20)
G <- c(10,20,NA,30,10,20)
df <- data.frame(A,B,C,D,E,F,G)
names(df) <- c("Codes","111","111","111","112","112","113")
# Codes 111 111 111 112 112 113
# 1 111 100 10 10 20 NA 10
# 2 111 10 20 20 20 NA 20
# 3 111 20 40 NA 40 40 NA
# 4 112 NA NA NA 10 100 30
# 5 112 NA 10 40 10 10 10
# 6 113 10 20 200 20 20 20
I want to reduce it so that observations with the same row and column names are summed up.
So I want to end up with:
# Codes 111 112 113
# 1 111 230 120 30
# 2 112 50 130 40
# 3 113 230 40 20
I tried to first combine the rows with the same "Codes" number, but I was having a lot of trouble.
In tidyverse
library(tidyverse)
df %>%
pivot_longer(-Codes, values_drop_na = TRUE) %>%
group_by(Codes, name) %>%
summarise(value = sum(value), .groups = 'drop')%>%
pivot_wider()
# A tibble: 3 x 4
Codes `111` `112` `113`
<chr> <dbl> <dbl> <dbl>
1 111 230 120 30
2 112 50 130 40
3 113 230 40 20
One way in base R:
tapply(unlist(df[-1]), list(names(df)[-1][col(df[-1])], df[,1][row(df[-1])]), sum, na.rm = TRUE)
111 112 113
111 230 50 230
112 120 130 40
113 30 40 20
Note that this can be simplified as denoted by #thelatemail to
grp <- expand.grid(df$Codes, names(df)[-1])
tapply(unlist(df[-1]), grp, FUN=sum, na.rm=TRUE)
You can also use `xtabs:
xtabs(vals~., na.omit(cbind(grp, vals = unlist(df[-1]))))
Var2
Var1 111 112 113
111 230 120 30
112 50 130 40
113 230 40 20
When dealing with actual matrices - especially with large ones -, expressing the operation as (sparse) linear algebra should be most efficient.
library(Matrix) ## for sparse matrix operations
idx <- c("111","111","111","112","112","113")
mat <- matrix(c(100,10,20,NA,NA,10,
10,20,40,NA,10,20,
10,20,NA,NA,40,200,
20,20,40,10,10,20,
NA,NA,40,100,10,20,
10,20,NA,30,10,20),
nrow=length(idx),
byrow=TRUE, dimnames=list(idx, idx))
## convert NA's to zero
mat[is.na(mat)] <- 0
## examine matrix
mat
## 111 111 111 112 112 113
## 111 100 10 20 0 0 10
## 111 10 20 40 0 10 20
## 111 10 20 0 0 40 200
## 112 20 20 40 10 10 20
## 112 0 0 40 100 10 20
## 113 10 20 0 30 10 20
## indicator matrix
## converts between "code" and "idx" spaces
M_code_idx <- fac2sparse(idx)
## project to "code_code" space
M_code_idx %*% mat %*% t(M_code_idx)
## 3 x 3 Matrix of class "dgeMatrix"
## 111 112 113
## 111 230 50 230
## 112 120 130 40
## 113 30 40 20

How can I turn the unique values of a column into row values in a table in R? [duplicate]

This question already has answers here:
How to reshape data from long to wide format
(14 answers)
Closed 1 year ago.
I have a dataframe (df1) and would like to rearrange the information for further processing. However, I am not sure how to convert the unique Site values into headers in the table. How could I accomplish this?
Here is the example data
df1 <- data.frame(matrix(ncol = 4, nrow = 27))
x <- c("Date","Distance","Site","MeanVal")
colnames(df1) <- x
df1$Date <- c("2020-06-01","2020-06-01","2020-06-01","2020-06-01","2020-06-01","2020-06-01","2020-06-01","2020-06-01","2020-06-01",
"2020-07-01","2020-07-01","2020-07-01","2020-07-01","2020-07-01","2020-07-01","2020-07-01","2020-07-01","2020-07-01",
"2020-08-01","2020-08-01","2020-08-01","2020-08-01","2020-08-01","2020-08-01","2020-08-01","2020-08-01","2020-08-01")
df1$Date <- as.Date(df1$Date, format = "%Y-%m-%d")
df1$Distance <- as.numeric(rep(c("0.5","1.5","2.5"),9))
df1$Site <- c("1A","1A","1A","2B","2B","2B","3C","3C","3C",
"1A","1A","1A","2B","2B","2B","3C","3C","3C",
"1A","1A","1A","2B","2B","2B","3C","3C","3C")
set.seed(123)
df1$MeanVal <- round(rnorm(27,100,50), digits = 0)
The output should look something like this
Date Distance 1A 2B 3C
2020-06-01 0.5 108 121 144
2020-06-01 1.5 43 85 141
2020-06-01 2.5 163 145 134
2020-07-01 0.5 128 81 37
2020-07-01 1.5 97 65 208
2020-07-01 2.5 85 90 160
2020-08-01 0.5 44 139 99
2020-08-01 1.5 80 96 98
2020-08-01 2.5 77 113 168
We can use pivot_wider
library(dplyr)
library(tidyr)
df1 %>%
pivot_wider(names_from = Site, values_from = MeanVal)
-output
# A tibble: 9 x 5
Date Distance `1A` `2B` `3C`
<date> <dbl> <dbl> <dbl> <dbl>
1 2020-06-01 0.5 72 104 123
2 2020-06-01 1.5 88 106 37
3 2020-06-01 2.5 178 186 66
4 2020-07-01 0.5 78 120 189
5 2020-07-01 1.5 161 106 125
6 2020-07-01 2.5 118 72 2
7 2020-08-01 0.5 135 89 69
8 2020-08-01 1.5 76 49 16
9 2020-08-01 2.5 47 64 142
Or use dcast
library(data.table)
dcast(setDT(df1), ... ~ Site, value.var = 'MeanVal')

R manipulate dataframe of dates to include 7 days prior to each date

I currently have a dataframe of date, latitude and longitude which represent samples taken. Simple example below:
Date Lat lon
1 1996-03-12 -33 153
2 2004-07-15 -35 160
3 2008-07-26 -28 155
I want to generate a table which adds the 7 days prior to each date/lat/long combination so I can then extract lead up data from some large environmental models but I can't figure out how to generate the larger table, example output for above small table would be as follows:
Date Lat lon
1 1996-03-12 -33 153
2 1996-03-11 -33 153
3 1996-03-10 -33 153
4 1996-03-09 -33 153
5 1996-03-08 -33 153
6 1996-03-07 -33 153
7 1996-03-06 -33 153
8 2004-07-15 -35 160
9 2004-07-14 -35 160
10 2004-07-13 -35 160
11 2004-07-12 -35 160
12 2004-07-11 -35 160
13 2004-07-10 -35 160
14 2004-07-09 -35 160
15 2008-07-26 -28 155
16 2008-07-25 -28 155
17 2008-07-24 -28 155
18 2008-07-23 -28 155
19 2008-07-22 -28 155
20 2008-07-21 -28 155
21 2008-07-20 -28 155
One way to do this using using tidyr::complete is to create a sequence of dates from date - 6 to date for each row.
library(dplyr)
library(tidyr)
df %>%
mutate(Date = as.Date(Date)) %>%
group_by(row = row_number()) %>%
complete(Date = seq(Date - 6, Date, "day")) %>%
ungroup %>%
arrange(row, desc(Date)) %>%
fill(Lat, lon)
# A tibble: 21 x 3
# Date Lat on
# <date> <int> <int>
# 1 1996-03-12 -33 153
# 2 1996-03-11 -33 153
# 3 1996-03-10 -33 153
# 4 1996-03-09 -33 153
# 5 1996-03-08 -33 153
# 6 1996-03-07 -33 153
# 7 1996-03-06 -33 153
# 8 2004-07-15 -35 160
# 9 2004-07-14 -35 160
#10 2004-07-13 -35 160
# … with 11 more rows
data
df <- structure(list(Date = structure(1:3, .Label = c("1996-03-12",
"2004-07-15", "2008-07-26"), class = "factor"), Lat = c(-33L,
-35L, -28L), lon = c(153L, 160L, 155L)),
class = "data.frame", row.names = c("1", "2", "3"))
Base R solution:
n_days_lookback <- 7
data.frame(do.call("rbind", lapply(split(df, df$Date), function(x){
data.frame(Date = as.Date(seq(x$Date - n_days_lookback+1, x$Date, "days"), "%Y-%m-%d"),
Lat = rep(x$Lat, n_days_lookback),
lon = rep(x$lon, n_days_lookback))
}
)
), row.names = NULL
)

Simplify multiple rowSums looping through columns

I'm currently on R trying to create for a DF multiple columns with the sum of previous one. Imagine I got a DF like this:
df=
sep-2016 oct-2016 nov-2016 dec-2016 jan-2017
1 70 153 NA 28 19
2 57 68 73 118 16
3 29 NA 19 32 36
4 177 36 3 54 53
and I want to add at the end the sum of the rows previous of the month that I'm reporting so for October you end up with the sum of sep and oct, and for November you end up with the sum of sep, oct and november and end up with something like this:
df=
sep-2016 oct-2016 nov-2016 dec-2016 jan-2017 status-Oct2016 status-Nov 2016
1 70 153 NA 28 19 223 223
2 57 68 73 118 16 105 198
3 29 NA 19 32 36 29 48
4 177 36 3 54 53 213 93
I want to know a efficient way insted of writing a lots of lines of rowSums() and even if I can get the label on the iteration for each month would be amazing!
Thanks!
We can use lapply to loop through the columns to apply the rowSums.
dat2 <- as.data.frame(lapply(2:ncol(dat), function(i){
rowSums(dat[, 1:i], na.rm = TRUE)
}))
names(dat2) <- paste0("status-", names(dat[, -1]))
dat3 <- cbind(dat, dat2)
dat3
# sep-2016 oct-2016 nov-2016 dec-2016 jan-2017 status-oct-2016 status-nov-2016 status-dec-2016 status-jan-2017
# 1 70 153 NA 28 19 223 223 251 270
# 2 57 68 73 118 16 125 198 316 332
# 3 29 NA 19 32 36 29 48 80 116
# 4 177 36 3 54 53 213 216 270 323
DATA
dat <- read.table(text = " 'sep-2016' 'oct-2016' 'nov-2016' 'dec-2016' 'jan-2017'
1 70 153 NA 28 19
2 57 68 73 118 16
3 29 NA 19 32 36
4 177 36 3 54 53",
header = TRUE, stringsAsFactors = FALSE)
names(dat) <- c("sep-2016", "oct-2016", "nov-2016", "dec-2016", "jan-2017")
Honestly I have no idea why you would want your data in this format, but here is a tidyverse method of accomplishing it. It involves transforming the data to a tidy format before spreading it back out into your wide format. The key thing to note is that in a tidy format, where month is a variable in a single column instead of spread across multiple columns, you can simply use group_by(rowid) and cumsum to calculate all the values you want. The last few lines are constructing the status- column names and spreading the data back out into a wide format.
library(tidyverse)
df <- read_table2(
"sep-2016 oct-2016 nov-2016 dec-2016 jan-2017
70 153 NA 28 19
57 68 73 118 16
29 NA 19 32 36
177 36 3 54 53"
)
df %>%
rowid_to_column() %>%
gather("month", "value", -rowid) %>%
arrange(rowid) %>%
group_by(rowid) %>%
mutate(
value = replace_na(value, 0),
status = cumsum(value)
) %>%
gather("vartype", "number", value, status) %>%
mutate(colname = ifelse(vartype == "value", month, str_c("status-", month))) %>%
select(rowid, number, colname) %>%
spread(colname, number)
#> # A tibble: 4 x 11
#> # Groups: rowid [4]
#> rowid `dec-2016` `jan-2017` `nov-2016` `oct-2016` `sep-2016`
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 28.0 19.0 0 153 70.0
#> 2 2 118 16.0 73.0 68.0 57.0
#> 3 3 32.0 36.0 19.0 0 29.0
#> 4 4 54.0 53.0 3.00 36.0 177
#> # ... with 5 more variables: `status-dec-2016` <dbl>,
#> # `status-jan-2017` <dbl>, `status-nov-2016` <dbl>,
#> # `status-oct-2016` <dbl>, `status-sep-2016` <dbl>
Created on 2018-02-16 by the reprex package (v0.2.0).
A clean way to do it is by convert your data in a long format.
library(tibble)
library(tidyr)
library(dplyr)
your_data <- tribble(~"sep_2016", ~"oct_2016", ~"nov_2016", ~"dec_2016", ~"jan_2017",
70, 153, NA, 28, 19,
57, 68, 73, 118, 16,
29, NA, 19, 32, 36,
177, 36, 3, 54, 53)
You can change the format of your data.frame with gather from the tidyr package.
your_data_long <- your_data %>%
rowid_to_column() %>%
gather(key = month_year, value = the_value, -rowid)
head(your_data_long)
#> # A tibble: 6 x 3
#> rowid month_year the_value
#> <int> <chr> <dbl>
#> 1 1 sep_2016 70
#> 2 2 sep_2016 57
#> 3 3 sep_2016 29
#> 4 4 sep_2016 177
#> 5 1 oct_2016 153
#> 6 2 oct_2016 68
Once your data.frame is in a long format. You can compute cumulative sum with cumsumand dplyrfunctions mutate and group_by.
result <- your_data_long %>%
group_by(rowid) %>%
mutate(cumulative_value = cumsum(the_value))
result
#> # A tibble: 20 x 4
#> # Groups: rowid [4]
#> rowid month_year the_value cumulative_value
#> <int> <chr> <dbl> <dbl>
#> 1 1 sep_2016 70 70
#> 2 2 sep_2016 57 57
#> 3 3 sep_2016 29 29
#> 4 4 sep_2016 177 177
#> 5 1 oct_2016 153 223
#> 6 2 oct_2016 68 125
#> 7 3 oct_2016 NA NA
#> 8 4 oct_2016 36 213
#> 9 1 nov_2016 NA NA
#> 10 2 nov_2016 73 198
#> 11 3 nov_2016 19 NA
#> 12 4 nov_2016 3 216
#> 13 1 dec_2016 28 NA
#> 14 2 dec_2016 118 316
#> 15 3 dec_2016 32 NA
#> 16 4 dec_2016 54 270
#> 17 1 jan_2017 19 NA
#> 18 2 jan_2017 16 332
#> 19 3 jan_2017 36 NA
#> 20 4 jan_2017 53 323
If you want to retrieve the starting form, you can do it with spread.
My preferred solution would be:
# library(matrixStats)
DF <- as.matrix(df)
DF[is.na(DF)] <- 0
RES <- matrixStats::rowCumsums(DF)
colnames(RES) <- paste0("status-", colnames(DF))
cbind.data.frame(df, RES)
This is closest to what you are looking for with the rowSums.
One option could be using spread and gather function from tidyverse.
Note: The status column has been added even for the 1st month. And the status columns are not in order but values are correct.
The approach is:
# Data
df <- read.table(text = "sep-2016 oct-2016 nov-2016 dec-2016 jan-2017
70 153 NA 28 19
57 68 73 118 16
29 NA 19 32 36
177 36 3 54 53", header = T, stringsAsFactors = F)
library(tidyverse)
# Just add an row number as sl
df <- df %>% mutate(sl = row_number())
#Calculate the cumulative sum after gathering and arranging by date
mod_df <- df %>%
gather(key, value, -sl) %>%
mutate(key = as.Date(paste("01",key, sep="."), format="%d.%b.%Y")) %>%
arrange(sl, key) %>%
group_by(sl) %>%
mutate(status = cumsum(ifelse(is.na(value),0L,value) )) %>%
select(-value) %>%
mutate(key = paste("status",as.character(key, format="%b.%Y"))) %>%
spread(key, status)
# Finally join cumulative calculated sum columns with original df and then
# remove sl column
inner_join(df, mod_df, by = "sl") %>% select(-sl)
# sep.2016 oct.2016 nov.2016 dec.2016 jan.2017 status Dec.2016 status Jan.2017 status Nov.2016 status Oct.2016 status Sep.2016
#1 70 153 NA 28 19 251 270 223 223 70
#2 57 68 73 118 16 316 332 198 125 57
#3 29 NA 19 32 36 80 116 48 29 29
#4 177 36 3 54 53 270 323 216 213 177
Another base solution where we build a matrix accumulating the row sums :
status <- setNames(
as.data.frame(t(apply(dat,1,function(x) Reduce(sum,'[<-'(x,is.na(x),0),accumulate = TRUE)))),
paste0("status-",names(dat)))
status
# status-sep-2016 status-oct-2016 status-nov-2016 status-dec-2016 status-jan-2017
# 1 70 223 223 251 270
# 2 57 125 198 316 332
# 3 29 29 48 80 116
# 4 177 213 216 270 323
Then bind it to your original data if needed :
cbind(dat,status[-1])

Resources