Related
I am trying to use pivot_longer. However, I am not sure how to use names_sep or names_pattern to solve this.
dat <- tribble(
~group, ~BP, ~HS, ~BB, ~lowerBP, ~upperBP, ~lowerHS, ~upperHS, ~lowerBB, ~upperBB,
"1", 0.51, 0.15, 0.05, 0.16, 0.18, 0.5, 0.52, 0.14, 0.16,
"2.1", 0.67, 0.09, 0.06, 0.09, 0.11, 0.66, 0.68, 0.08, 0.1,
"2.2", 0.36, 0.13, 0.07, 0.12, 0.15, 0.34, 0.38, 0.12, 0.14,
"2.3", 0.09, 0.17, 0.09, 0.13, 0.16, 0.08, 0.11, 0.15, 0.18,
"2.4", 0.68, 0.12, 0.07, 0.12, 0.14, 0.66, 0.69, 0.11, 0.13,
"3", 0.53, 0.15, 0.06, 0.14, 0.16, 0.52, 0.53, 0.15, 0.16)
Desired output (First row from wide data)
group names values lower upper
1 BP 0.51 0.16 0.18
1 HS 0.15 0.5 0.52
1 BB 0.05 0.14 0.16
Here is solution following a similar method that #Fnguyen used but using the newer pivot_longer and pivot_wider construct:
library(dplyr)
library(tidyr)
longer<-pivot_longer(dat, cols=-1, names_pattern = "(.*)(..)$", names_to = c("limit", "name")) %>%
mutate(limit=ifelse(limit=="", "value", limit))
answer <-pivot_wider(longer, id_cols = c(group, name), names_from = limit, values_from = value, names_repair = "check_unique")
Most of the selecting, separating, mutating and renaming is taking place within the pivot function calls.
Update:
This regular expressions "(.*)(..)$" means:
( ) ( ) Look for two parts,
(.*) the first part should have zero or more characters
(..) the second part should have just 2 characters at the “$” end of the string
A data.table version (not sure yet how to retain the original names so that you dont need to post substitute them https://github.com/Rdatatable/data.table/issues/2551):
library(data.table)
df <- data.table(dat)
v <- c("BP","HS","BB")
setnames(df, v, paste0("x",v) )
g <- melt(df, id.vars = "group",
measure.vars = patterns(values = "x" ,
lower = "lower",
upper = "upper"),
variable.name = "names")
g[names==1, names := "BP" ]
g[names==2, names := "HS" ]
g[names==3, names := "BB" ]
group names values lower upper
1: 1 BP 0.51 0.16 0.18
2: 2.1 BP 0.67 0.09 0.11
3: 2.2 BP 0.36 0.12 0.15
4: 2.3 BP 0.09 0.13 0.16
5: 2.4 BP 0.68 0.12 0.14
6: 3 BP 0.53 0.14 0.16
7: 1 HS 0.15 0.50 0.52
8: 2.1 HS 0.09 0.66 0.68
9: 2.2 HS 0.13 0.34 0.38
10: 2.3 HS 0.17 0.08 0.11
11: 2.4 HS 0.12 0.66 0.69
12: 3 HS 0.15 0.52 0.53
13: 1 BB 0.05 0.14 0.16
14: 2.1 BB 0.06 0.08 0.10
15: 2.2 BB 0.07 0.12 0.14
16: 2.3 BB 0.09 0.15 0.18
17: 2.4 BB 0.07 0.11 0.13
18: 3 BB 0.06 0.15 0.16
Based on your example data this solution using dplyr works for me:
library(dplyr)
dat %>%
gather(key, values,-group) %>%
mutate(names = gsub("lower","",gsub("upper","",key))) %>%
separate(key, into = c("key1","key2") ,"[[:upper:]]", perl=T) %>%
mutate(key1 = case_when(key1 == "" ~ "values", TRUE ~ key1)) %>%
select(group,names,key1,values) %>%
rowid_to_column() %>%
spread(key1,values) %>%
select(-rowid) %>%
group_by(group,names) %>%
summarise_all(mean,na.rm = TRUE)
I'd like to add an alternative tidyverse solution drawing from the answer provided by #Dave2e.
Like Dave2e's solution it's a two-step procedure (first rename, then reshape). Instead of reshaping the data twice, I add the prefix "values" to the columns named "BP", "HS", and "BB" using rename_with. This was necessary for getting the column names right when using the .value sentinel in the names_to argument of pivot_longer.
library(dplyr)
library(tidyr)
dat %>%
rename_with(~sub("^(BP|HS|BB)$", "values\\1", .)) %>% # add prefix values
pivot_longer(dat , cols= -1,
names_pattern = "(.*)(BP|HS|BB)$",
names_to = c(".value", "names"))
I am trying to use pivot_longer. However, I am not sure how to use names_sep or names_pattern to solve this.
dat <- tribble(
~group, ~BP, ~HS, ~BB, ~lowerBP, ~upperBP, ~lowerHS, ~upperHS, ~lowerBB, ~upperBB,
"1", 0.51, 0.15, 0.05, 0.16, 0.18, 0.5, 0.52, 0.14, 0.16,
"2.1", 0.67, 0.09, 0.06, 0.09, 0.11, 0.66, 0.68, 0.08, 0.1,
"2.2", 0.36, 0.13, 0.07, 0.12, 0.15, 0.34, 0.38, 0.12, 0.14,
"2.3", 0.09, 0.17, 0.09, 0.13, 0.16, 0.08, 0.11, 0.15, 0.18,
"2.4", 0.68, 0.12, 0.07, 0.12, 0.14, 0.66, 0.69, 0.11, 0.13,
"3", 0.53, 0.15, 0.06, 0.14, 0.16, 0.52, 0.53, 0.15, 0.16)
Desired output (First row from wide data)
group names values lower upper
1 BP 0.51 0.16 0.18
1 HS 0.15 0.5 0.52
1 BB 0.05 0.14 0.16
Here is solution following a similar method that #Fnguyen used but using the newer pivot_longer and pivot_wider construct:
library(dplyr)
library(tidyr)
longer<-pivot_longer(dat, cols=-1, names_pattern = "(.*)(..)$", names_to = c("limit", "name")) %>%
mutate(limit=ifelse(limit=="", "value", limit))
answer <-pivot_wider(longer, id_cols = c(group, name), names_from = limit, values_from = value, names_repair = "check_unique")
Most of the selecting, separating, mutating and renaming is taking place within the pivot function calls.
Update:
This regular expressions "(.*)(..)$" means:
( ) ( ) Look for two parts,
(.*) the first part should have zero or more characters
(..) the second part should have just 2 characters at the “$” end of the string
A data.table version (not sure yet how to retain the original names so that you dont need to post substitute them https://github.com/Rdatatable/data.table/issues/2551):
library(data.table)
df <- data.table(dat)
v <- c("BP","HS","BB")
setnames(df, v, paste0("x",v) )
g <- melt(df, id.vars = "group",
measure.vars = patterns(values = "x" ,
lower = "lower",
upper = "upper"),
variable.name = "names")
g[names==1, names := "BP" ]
g[names==2, names := "HS" ]
g[names==3, names := "BB" ]
group names values lower upper
1: 1 BP 0.51 0.16 0.18
2: 2.1 BP 0.67 0.09 0.11
3: 2.2 BP 0.36 0.12 0.15
4: 2.3 BP 0.09 0.13 0.16
5: 2.4 BP 0.68 0.12 0.14
6: 3 BP 0.53 0.14 0.16
7: 1 HS 0.15 0.50 0.52
8: 2.1 HS 0.09 0.66 0.68
9: 2.2 HS 0.13 0.34 0.38
10: 2.3 HS 0.17 0.08 0.11
11: 2.4 HS 0.12 0.66 0.69
12: 3 HS 0.15 0.52 0.53
13: 1 BB 0.05 0.14 0.16
14: 2.1 BB 0.06 0.08 0.10
15: 2.2 BB 0.07 0.12 0.14
16: 2.3 BB 0.09 0.15 0.18
17: 2.4 BB 0.07 0.11 0.13
18: 3 BB 0.06 0.15 0.16
Based on your example data this solution using dplyr works for me:
library(dplyr)
dat %>%
gather(key, values,-group) %>%
mutate(names = gsub("lower","",gsub("upper","",key))) %>%
separate(key, into = c("key1","key2") ,"[[:upper:]]", perl=T) %>%
mutate(key1 = case_when(key1 == "" ~ "values", TRUE ~ key1)) %>%
select(group,names,key1,values) %>%
rowid_to_column() %>%
spread(key1,values) %>%
select(-rowid) %>%
group_by(group,names) %>%
summarise_all(mean,na.rm = TRUE)
I'd like to add an alternative tidyverse solution drawing from the answer provided by #Dave2e.
Like Dave2e's solution it's a two-step procedure (first rename, then reshape). Instead of reshaping the data twice, I add the prefix "values" to the columns named "BP", "HS", and "BB" using rename_with. This was necessary for getting the column names right when using the .value sentinel in the names_to argument of pivot_longer.
library(dplyr)
library(tidyr)
dat %>%
rename_with(~sub("^(BP|HS|BB)$", "values\\1", .)) %>% # add prefix values
pivot_longer(dat , cols= -1,
names_pattern = "(.*)(BP|HS|BB)$",
names_to = c(".value", "names"))
I need to do a diagonal multiplication for below table.
It's a 7*7 matrix:
Step 1: need a diagonal multiplcation for 7*7 matrix,
Step 2: then ignore the first column and select the next 7 columns and 7 rows and do diagonal multiplication.
Step 3: ignore the 1st & 2nd column and select the next 7 columns and 7 rows and do diagonal multiplication.
Step 4: similar to step 3 and increment the column ignore 1,2,3 .... and so on and so far ....
Note: the diagonal will be going in upward direct from right side Bottom to the left upper side.
Data:
28/02/2013 31/03/2013 30/04/2013 31/05/2013 30/06/2013 31/07/2013 31/08/2013 30/09/2013 31/10/2013 30/11/2013 31/12/2013 31/01/2014 28/02/2014
0.04 0.03 0.03 0.04 0.04 0.07 0.86 0.28 0.05 0.05 0.05 0.04 0.04
0.44 0.44 0.42 0.43 0.40 0.32 0.64 0.02 0.33 0.36 0.30 0.27 0.37
0.57 0.57 0.52 0.59 0.62 0.51 0.79 0.23 0.64 0.66 0.50 0.55 0.60
0.61 0.58 0.60 0.63 0.65 0.59 0.81 0.83 1.00 0.63 0.57 0.63 0.74
0.70 0.65 0.66 0.71 0.73 0.66 0.86 0.90 0.55 0.76 0.65 0.66 0.74
0.76 0.76 0.79 0.74 0.83 0.83 0.86 1.00 0.61 0.83 0.38 0.74 0.75
0.80 0.84 0.89 0.84 0.82 0.83 0.98 0.84 0.44 0.93 0.88 0.78 0.78
Considering each column as A, B, C, D, E, F, G, H, I, J, K and so on ... there will be many columns, but the number of rows will be only 7.
Calculation of the 7*7 daigonal matrix will be as follows.
A is result for -> STEP 1, B -> STEP 2 AND C -> STEP 3 ... and so on.
A B C
G8*F7*E6*D5*C4*B3*A2 = 0.00 H8*G7*F6*E5*D4*C3*B2 = 0.02 I8*H7*G6*F5*E4*D3*C2 = 0.00
G8*F7*E6*D5*C4*B3 = 0.08 H8*G7*F6*E5*D4*C3 = 0.08 I8*H7*G6*F5*E4*D3 = 0.06
G8*F7*E6*D5*C4 = 0.19 H8*G7*F6*E5*D4 = 0.18 I8*H7*G6*F5*E4 = 0.14
G8*F7*E6*D5 = 0.37 H8*G7*F6*E5 = 0.31 I8*H7*G6*F5 = 0.22
G8*F8*E6 = 0.59 H8*G7*F6 = 0.47 I8*H7*G6 = 0.38
G8*F8 = 0.81 H8*G7 = 0.72 I8*H7 = 0.44
G8 = 0.98 H8 = 0.84 I8 = 0.44
So result should be printed as.
A B C
0 0.02 0.00
0.08 0.08 0.06
0.19 0.18 0.14
0.37 0.31 0.22
0.59 0.47 0.38
0.81 0.72 0.44
0.98 0.84 0.44
Similary there will result for D, E, F, and so on.
Please help, Thanks in Advance.
sapply(lapply(7:NCOL(df), function(i)
df[, (i-6):i]), function(a)
round(x = rev(cumprod(rev(diag(as.matrix(a))))), digits = 2))
# [,1] [,2] [,3] [,4] [,5] [,6] [,7]
#[1,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00
#[2,] 0.09 0.08 0.06 0.08 0.08 0.03 0.00
#[3,] 0.19 0.18 0.14 0.21 0.26 0.05 0.15
#[4,] 0.37 0.31 0.22 0.41 0.33 0.23 0.24
#[5,] 0.59 0.48 0.38 0.51 0.40 0.23 0.38
#[6,] 0.81 0.72 0.44 0.57 0.73 0.30 0.58
#[7,] 0.98 0.84 0.44 0.93 0.88 0.78 0.78
Let me know if the output is correct
DATA
df = structure(list(A = c(0.04, 0.44, 0.57, 0.61, 0.7, 0.76, 0.8),
B = c(0.03, 0.44, 0.57, 0.58, 0.65, 0.76, 0.84), C = c(0.03,
0.42, 0.52, 0.6, 0.66, 0.79, 0.89), D = c(0.04, 0.43, 0.59,
0.63, 0.71, 0.74, 0.84), E = c(0.04, 0.4, 0.62, 0.65, 0.73,
0.83, 0.82), F = c(0.07, 0.32, 0.51, 0.59, 0.66, 0.83, 0.83
), G = c(0.86, 0.64, 0.79, 0.81, 0.86, 0.86, 0.98), H = c(0.28,
0.02, 0.23, 0.83, 0.9, 1, 0.84), I = c(0.05, 0.33, 0.64,
1, 0.55, 0.61, 0.44), J = c(0.05, 0.36, 0.66, 0.63, 0.76,
0.83, 0.93), K = c(0.05, 0.3, 0.5, 0.57, 0.65, 0.38, 0.88
), L = c(0.04, 0.27, 0.55, 0.63, 0.66, 0.74, 0.78), M = c(0.04,
0.37, 0.6, 0.74, 0.74, 0.75, 0.78)), .Names = c("A", "B",
"C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M"), class = "data.frame", row.names = c(NA,
-7L))
I think a for loop is a good bet here - inspired from this
n <- nrow(df)
b <- ncol(df) - n + 1
out <- matrix(0, n, b)
ro <- 1:n
for(i in 1:b){
co <- i:(n + i - 1)
out[ro, i] <- rev(cumprod(rev(df[cbind(ro, co)])))
}
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 0.003423605 0.002303868 0.001785601 0.003374663 0.00337162 0.00232112
# [2,] 0.085590113 0.076795599 0.059520050 0.084366587 0.08429050 0.03315886
# [3,] 0.194522983 0.182846664 0.138418720 0.210916467 0.26340780 0.05181072
# [4,] 0.374082660 0.309909600 0.223256000 0.413561700 0.33342760 0.22526400
# [5,] 0.593782000 0.476784000 0.378400000 0.510570000 0.40172000 0.22526400
# [6,] 0.813400000 0.722400000 0.440000000 0.567300000 0.73040000 0.29640000
# [7,] 0.980000000 0.840000000 0.440000000 0.930000000 0.88000000 0.78000000
Wrap the answer in round to alter how it is printed.
Another way , also using indexing...
ro <- nrow(df)
co <- ncol(df)
b <- co - ro + 1
id <- pmin(ro, b)
ccols <- mapply(seq, 1:b, id:co)
rrows <- rep(1:ro, b)
mat <- matrix(rev(df[cbind(rrows, c(ccols))]), nr=ro)
matrix(rev(matrixStats::colCumprods(mat)), nr=ro)
A quick benchmark on larger data seems to show that method two is considerably faster, however, if you convert the dataframe to a matrix then the for loop has similar speed
I have a list of zoo-bjects consisting of irregular time-series, lodf, in the following format:
> head(lodf)
[[1]]
2014-08-08 2014-08-14 2014-09-12
1.15 1.32 2.39
[[2]]
2014-07-22 2014-07-24 2014-08-14 2014-08-20 2014-08-27 2014-09-12
0.50 0.75 1.29 1.36 1.28 1.28
[[3]]
2012-11-01 2012-11-02 2013-07-12 2013-08-13 2013-09-11 2014-07-01
1.00 1.27 0.91 1.00 0.99 0.98
...
I am ultimately trying to sum all these time-series into one combined time-series, i.e. sum down each column. To do this, I am trying to convert into a zoo/xts time-series for further manipulation , i.e. to apply na.locf and other zoo-library capabilities before summing across the individual data frames/dates using rowsum. i.e. I am trying to get my list of date frames above into a combined zoo object resembling this:
Value
12/09/2014 1.07
14/08/2014 1.32
08/08/2014 1.15
12/09/2014 0.48
27/08/2014 0.53
20/08/2014 0.61
14/08/2014 0.54
24/07/2014 0.75
22/07/2014 0.5
01/07/2014 0.98
01/07/2014 0
...
There is often over-lap between the individual data frames i.e. several values corresponding to the same date index, and What I would like to do in those cases is to sum the values. E.g. if I have
012-11-01
0.7
012-11-01
1.5
012-11-01
0.7
I would like to have
012-11-01
2.9
as the value for this date index in the resulting large data frame.
I have tried merge, reading as a zoo object, do.call(rbind) etc. in the current format, but I am stumped. For further context, this question is part of a larger project outlined here: R: time series with duplicate time index entries. Any help would be most appreciated!
Update: please find a data object below as requested:
> dput(head(lodf))
list(structure(c(1.15, 1.32, 2.39), index = structure(c(16290L,
16296L, 16325L), class = "Date"), class = "zoo"), structure(c(0.5,
0.75, 1.29, 1.36, 1.28, 1.28), index = structure(c(16273L, 16275L,
16296L, 16302L, 16309L, 16325L), class = "Date"), class = "zoo"),
structure(c(1, 1.27, 0.91, 1, 0.99, 0.98), index = structure(c(15645L,
15646L, 15898L, 15930L, 15959L, 16252L), class = "Date"), class = "zoo"),
structure(c(1.27, 1.29, 1.28, 1.17, 0.59, 0), index = structure(c(15645L,
15651L, 15665L, 15679L, 15686L, 15747L), class = "Date"), class = "zoo"),
structure(c(1.9, 1.35, 0.66, 1.16, 0.66, 1.16, 1.26, 1.23,
1.28, 1.23, 1.17, 0.66, 1.18, 0.66, 1.29, 1.35, 1.45, 1.53,
1.61, 1.82, 1.8, 1.89, 1.8, 1.81, 1.78, 1.68, 2.18, 1.68,
1.56, 1.93, 1.84, 1.69, 1.18, 1.73, 1.18, 1.72, 1.83, 1.9,
1.99, 1.93, 1.87, 1.96, 2.1, 2.22, 2.33, 2.38, 2.35, 2.23,
2.16, 2.18, 2.17, 2.2, 2.29, 2.27, 2.28, 2.42, 2.48, 2.99,
2.56, 2.65, 2.69, 3.21, 2.7, 2.8, 2.79, 2.8, 2.78, 2.26,
2.78, 2.26, 2.12, 2.07, 1.97, 1.84, 1.77, 1.18, 1.7, 1.78,
1.91, 1.98, 1.93, 1.83, 1.76, 1.18, 1.01, 0.97, 0.86, 0.69,
0.56), index = structure(c(15645L, 15652L, 15660L, 15740L,
15797L, 15841L, 15860L, 15867L, 15876L, 15887L, 15890L, 15897L,
15901L, 15905L, 15908L, 15909L, 15910L, 15911L, 15915L, 15926L,
15931L, 15932L, 15938L, 15953L, 15954L, 15975L, 15978L, 15979L,
15981L, 15982L, 15985L, 15986L, 15987L, 16001L, 16003L, 16006L,
16008L, 16010L, 16014L, 16016L, 16021L, 16022L, 16023L, 16027L,
16029L, 16031L, 16045L, 16052L, 16059L, 16072L, 16077L, 16078L,
16084L, 16091L, 16098L, 16100L, 16101L, 16106L, 16132L, 16133L,
16134L, 16139L, 16146L, 16150L, 16153L, 16157L, 16160L, 16163L,
16167L, 16169L, 16170L, 16171L, 16175L, 16177L, 16182L, 16184L,
16212L, 16216L, 16220L, 16224L, 16248L, 16254L, 16258L, 16261L,
16297L, 16301L, 16309L, 16310L, 16317L), class = "Date"), class = "zoo"),
structure(c(3.35, 3.44, 3.41, 3.14, 3.11, 2.55, 2.65, 2.87,
3.14, 3.24, 3.41, 4.04, 4.19, 4.34, 4.44, 1.2, 1.3, 1.29,
1.3, 1.27, 0.77, 0.69, 0.55, 0), index = structure(c(15645L,
15650L, 15694L, 15740L, 15741L, 15742L, 15743L, 15749L, 15750L,
15751L, 15755L, 15756L, 15758L, 15762L, 15784L, 15800L, 15805L,
15810L, 15824L, 15835L, 15838L, 15840L, 15847L, 15849L), class = "Date"), class = "zoo"))
>
The input displayed at the top of the question appears to be the first three components of the input specified at the bottom of the question. The variable name used at the bottom of the question, lodf, seems to suggest that it contains a list of data frames but in fact it contains a list of zoo objects.
The question asks for a single data frame result but we are assuming that the output should be a single zoo series too, for consistency. Also we shall use the name L for the input as lodf would wrongly suggest a list of data frames. If z is the result as a zoo series then
data.frame(index = index(z), data = coredata(z))
could be used if a data frame really were desired.
In the output section near the end of this answer we show the result of using as our input L <- lodf[1:3] (i.e. first 3 components only) and separately show the output using L <- lodf (i.e. all components) as our input.
1) Reduce. We merge the zoo series in the list, L, returning a list and filling in missing values with 0. Then use Reduce to sum the components:
Reduce(`+`, do.call(merge, c(L, retclass = "list", fill = 0)))
1a) A variation of this is to return a zoo object from merge (which is the default if we do not specify retclass), then fill in its NAs with 0, turn it back into a list and use Reduce:
Reduce(`+`, as.list(na.fill(do.call(merge, L), 0)))
2) rowSums In this solution we merge the lists to give zoo object z, optionally add column names and then add across rows producing the final zoo object.
z <- do.call(merge, L)
colnames(L) <- seq_along(L) # optionally add names
zoo(rowSums(z, na.rm = TRUE), time(z))
Note that a rowSums solution of zoo objects previously appeared here
3) + If we knew that there were exactly 3 components to the list then an alternate way to write the above would be this. We optionally add names 1, 2, 3, merge the zoo objects and fill NAs with 0. Finally we add the series together. Modify in the obvious way if the number of components differs.
z0 <- na.fill(do.call(merge, L), 0)
colnames(z0) <- 1:3 # optionally add names 1, 2, 3
z0[, 1] + z0[, 2] + z0[, 3]
Output Using L <- lodf[1:3] as displayed at the start of the question where lodf is shown at the bottom of the question our output is:
2012-11-01 2012-11-02 2013-07-12 2013-08-13 2013-09-11 2014-07-01 2014-07-22
1.00 1.27 0.91 1.00 0.99 0.98 0.50
2014-07-24 2014-08-08 2014-08-14 2014-08-20 2014-08-27 2014-09-12
0.75 1.15 2.61 1.36 1.28 3.67
or using L <- locf in the above we get the following (except for solution 3 which would have to be modified in an obvious way to use 6 rather than 3 components):
2012-11-01 2012-11-02 2012-11-06 2012-11-07 2012-11-08 2012-11-16 2012-11-21
7.52 1.27 3.44 1.29 1.35 0.66 1.28
2012-12-05 2012-12-12 2012-12-20 2013-02-04 2013-02-05 2013-02-06 2013-02-07
1.17 0.59 3.41 4.30 3.11 2.55 2.65
2013-02-11 2013-02-13 2013-02-14 2013-02-15 2013-02-19 2013-02-20 2013-02-22
0.00 2.87 3.14 3.24 3.41 4.04 4.19
2013-02-26 2013-03-20 2013-04-02 2013-04-05 2013-04-10 2013-04-15 2013-04-29
4.34 4.44 0.66 1.20 1.30 1.29 1.30
2013-05-10 2013-05-13 2013-05-15 2013-05-16 2013-05-22 2013-05-24 2013-06-04
1.27 0.77 0.69 1.16 0.55 0.00 1.26
2013-06-11 2013-06-20 2013-07-01 2013-07-04 2013-07-11 2013-07-12 2013-07-15
1.23 1.28 1.23 1.17 0.66 0.91 1.18
2013-07-19 2013-07-22 2013-07-23 2013-07-24 2013-07-25 2013-07-29 2013-08-09
0.66 1.29 1.35 1.45 1.53 1.61 1.82
2013-08-13 2013-08-14 2013-08-15 2013-08-21 2013-09-05 2013-09-06 2013-09-11
1.00 1.80 1.89 1.80 1.81 1.78 0.99
2013-09-27 2013-09-30 2013-10-01 2013-10-03 2013-10-04 2013-10-07 2013-10-08
1.68 2.18 1.68 1.56 1.93 1.84 1.69
2013-10-09 2013-10-23 2013-10-25 2013-10-28 2013-10-30 2013-11-01 2013-11-05
1.18 1.73 1.18 1.72 1.83 1.90 1.99
2013-11-07 2013-11-12 2013-11-13 2013-11-14 2013-11-18 2013-11-20 2013-11-22
1.93 1.87 1.96 2.10 2.22 2.33 2.38
2013-12-06 2013-12-13 2013-12-20 2014-01-02 2014-01-07 2014-01-08 2014-01-14
2.35 2.23 2.16 2.18 2.17 2.20 2.29
2014-01-21 2014-01-28 2014-01-30 2014-01-31 2014-02-05 2014-03-03 2014-03-04
2.27 2.28 2.42 2.48 2.99 2.56 2.65
2014-03-05 2014-03-10 2014-03-17 2014-03-21 2014-03-24 2014-03-28 2014-03-31
2.69 3.21 2.70 2.80 2.79 2.80 2.78
2014-04-03 2014-04-07 2014-04-09 2014-04-10 2014-04-11 2014-04-15 2014-04-17
2.26 2.78 2.26 2.12 2.07 1.97 1.84
2014-04-22 2014-04-24 2014-05-22 2014-05-26 2014-05-30 2014-06-03 2014-06-27
1.77 1.18 1.70 1.78 1.91 1.98 1.93
2014-07-01 2014-07-03 2014-07-07 2014-07-10 2014-07-22 2014-07-24 2014-08-08
0.98 1.83 1.76 1.18 0.50 0.75 1.15
2014-08-14 2014-08-15 2014-08-19 2014-08-20 2014-08-27 2014-08-28 2014-09-04
2.61 1.01 0.97 1.36 2.14 0.69 0.56
2014-09-12
Updates Added additional solutions and re-arranged and expanded presentation.
Try (If the list elements are list of zoo objects and if you need to get the sum of the matching index).
library(xts)
library(zoo)
z1 <- setNames(do.call(`merge`, lodf), paste0("Value", seq_along(lodf)))
xts(data.frame(value=rowSums(z1, na.rm=TRUE)), order.by=index(z1))
# value
#2012-11-01 1.00
#2012-11-02 1.27
#2013-07-12 0.91
#2013-08-13 1.00
#2013-09-11 0.99
#2014-07-01 0.98
#2014-07-22 0.50
#2014-07-24 0.75
#2014-08-08 1.15
#2014-08-14 2.61
#2014-08-20 1.36
#2014-08-27 1.28
#2014-09-12 3.67
If you need to use na.locf before summing
z2 <- na.locf(z1)
xts(data.frame(value=rowSums(z2, na.rm=TRUE)), order.by=index(z2))
data
lodf <- list(structure(c(1.15, 1.32, 2.39), index = structure(c(16290,
16296, 16325), class = "Date"), class = "zoo"), structure(c(0.5,
0.75, 1.29, 1.36, 1.28, 1.28), index = structure(c(16273, 16275,
16296, 16302, 16309, 16325), class = "Date"), class = "zoo"),
structure(c(1, 1.27, 0.91, 1, 0.99, 0.98), index = structure(c(15645,
15646, 15898, 15930, 15959, 16252), class = "Date"), class = "zoo"))
With base R:
lodf = list(structure(list(`014-08-08` = 1.15, `2014-08-14` = 1.32,
`2014-09-12` = 2.39), .Names = c("014-08-08", "2014-08-14",
"2014-09-12"), class = "data.frame", row.names = c(NA, -1L)),
structure(list(`2014-07-22` = 0.5, `2014-07-24` = 0.75, `2014-08-14` = 1.29,
`2014-08-20` = 1.36, `2014-08-27` = 1.28, `2014-09-12` = 1.28), .Names = c("2014-07-22",
"2014-07-24", "2014-08-14", "2014-08-20", "2014-08-27", "2014-09-12"
), class = "data.frame", row.names = c(NA, -1L)), structure(list(
`2012-11-01` = 1, `2012-11-02` = 1.27, `2013-07-12` = 0.91,
`2013-08-13` = 1, `2013-09-11` = 0.99, `2014-07-01` = 0.98), .Names = c("2012-11-01",
"2012-11-02", "2013-07-12", "2013-08-13", "2013-09-11", "2014-07-01"
), class = "data.frame", row.names = c(NA, -1L)))
lodf
[[1]]
014-08-08 2014-08-14 2014-09-12
1 1.15 1.32 2.39
[[2]]
2014-07-22 2014-07-24 2014-08-14 2014-08-20 2014-08-27 2014-09-12
1 0.5 0.75 1.29 1.36 1.28 1.28
[[3]]
2012-11-01 2012-11-02 2013-07-12 2013-08-13 2013-09-11 2014-07-01
1 1 1.27 0.91 1 0.99 0.98
ddf = data.frame(full=character(), stringsAsFactors=F)
ll = unlist(lapply(lodf, function(x) paste(names(x), x, sep='_')))
ddf[1:length(ll),1]=ll
ddf
full
1 014-08-08_1.15
2 2014-08-14_1.32
3 2014-09-12_2.39
4 2014-07-22_0.5
5 2014-07-24_0.75
6 2014-08-14_1.29
7 2014-08-20_1.36
8 2014-08-27_1.28
9 2014-09-12_1.28
10 2012-11-01_1
11 2012-11-02_1.27
12 2013-07-12_0.91
13 2013-08-13_1
14 2013-09-11_0.99
15 2014-07-01_0.98
ddf$date = unlist(lapply(strsplit(ddf$full, '_'),function(x)x[1]))
ddf$value = as.numeric(unlist(lapply(strsplit(ddf$full, '_'),function(x)x[2])))
ddf = ddf[,-1]
ddf
date value
1 014-08-08 1.15
2 2014-08-14 1.32
3 2014-09-12 2.39
4 2014-07-22 0.50
5 2014-07-24 0.75
6 2014-08-14 1.29
7 2014-08-20 1.36
8 2014-08-27 1.28
9 2014-09-12 1.28
10 2012-11-01 1.00
11 2012-11-02 1.27
12 2013-07-12 0.91
13 2013-08-13 1.00
14 2013-09-11 0.99
15 2014-07-01 0.98
Finally:
aggregate(value~date, ddf, sum)
date value
1 2012.11.01 1.00
2 2012.11.02 1.27
3 2013.07.12 0.91
4 2013.08.13 1.00
5 2013.09.11 0.99
6 2014.07.01 0.98
7 2014.07.22 0.50
8 2014.07.24 0.75
9 2014.08.08 1.15
10 2014.08.14 2.61
11 2014.08.20 1.36
12 2014.08.27 1.28
13 2014.09.12 3.67
I have a data frame with monthly returns and their corresponding month.
Data <- read.csv("C:/Users/h/Desktop/overflow.csv", sep=";", dec=",")
Data$Date <- as.Date(as.character(Data$Date), format="%Y-%m-%d")
The data frame looks like this now:
> Data
Fund.A Fund.B Fund.C Fund.D
2012-01-01 -0.01 0.04 0.11 0.10
2012-02-01 -0.04 -0.06 0.08 0.11
2012-03-01 -0.04 -0.07 0.15 -0.03
2012-04-01 0.00 -0.08 -0.04 0.13
2012-05-01 -0.07 0.10 0.06 0.02
2012-06-01 -0.05 0.06 0.06 -0.02
2012-07-01 0.12 -0.06 -0.09 -0.06
2012-08-01 0.08 -0.03 0.05 0.13
2012-09-01 0.10 0.07 -0.02 0.15
2012-10-01 -0.08 0.14 0.00 -0.04
2012-11-01 -0.09 0.11 -0.07 0.12
2012-12-01 -0.01 -0.09 0.07 -0.02
Now I want to continue the time series with new returns from a new csv, by simply matching the new return with the appropriate Fund in "Data". My problem is that new assets might have been added, messing up the order.
import <- read.csv("C:/Users/h/Desktop/import.csv", sep=";", dec=",")
import
2013-01-01
1 Funds: NA
2 Fund A 0.04
3 Fund AA -0.09
4 Fund C -0.10
5 Fund D 0.03
6 Fund B 0.14
As you can see, the "import" csv has new assets (Fund AA) as well as assets seen in "Data" (Fund a to D), where the funds are in rows and not columns. How can I write a code, which matches and adds a row to "Data" where the values in "import" falls under the right column (Fund) in "Data"? And if a new asset have been added, creates a column for the new asset?
As a bonus, the code would only add a row if the date in "import" is more recent date than the most recent one in "Data". To only import new returns.
Appreciate it!
For time series purpose, I would recommend using xts. It makes life a bit easier. Borrowing from Arun's usable data:
olddata <- structure(list(Date = structure(c(15340, 15371, 15400, 15431,
15461, 15492, 15522, 15553, 15584, 15614, 15645, 15675), class = "Date"),
Fund.A = c(-0.01, -0.04, -0.04, 0, -0.07, -0.05, 0.12, 0.08, 0.1, -0.08,
-0.09, -0.01), Fund.B = c(0.04, -0.06, -0.07, -0.08, 0.1, 0.06, -0.06,
-0.03, 0.07, 0.14, 0.11, -0.09), Fund.C = c(0.11, 0.08, 0.15, -0.04,
0.06, 0.06, -0.09, 0.05, -0.02, 0, -0.07, 0.07), Fund.D = c(0.1, 0.11,
-0.03, 0.13, 0.02, -0.02, -0.06, 0.13, 0.15, -0.04, 0.12, -0.02)),
.Names = c("Date", "Fund.A", "Fund.B", "Fund.C", "Fund.D"),
row.names = c(NA, 12L), class = "data.frame")
newimport <- structure(list(funds = c("Fund.A", "Fund.AA", "Fund.C",
"Fund.D", "Fund.B"), `2013-01-01` = c(0.04, -0.09, -0.1, 0.03, 0.14)),
.Names = c("funds", "2013-01-01"), row.names = c(NA, -5L),
class = "data.frame")
Convert data to xts for easy datewise subsetting:
olddata <- xts(olddata[,-1], olddata$Date)
newdata <- xts(t(newimport[,-1]), as.Date(colnames(newimport)[-1]))
colnames(newdata) <- newimport[,1]
Merge data together while taking care of any new columns:
cols <- names(newdata) %in% names(olddata)
combineData <- merge(rbind(olddata, newdata[,cols]), newdata[,!cols])
combineData
Fund.A Fund.B Fund.C Fund.D Fund.AA
2012-01-01 -0.01 0.04 0.11 0.10 NA
2012-02-01 -0.04 -0.06 0.08 0.11 NA
2012-03-01 -0.04 -0.07 0.15 -0.03 NA
2012-04-01 0.00 -0.08 -0.04 0.13 NA
2012-05-01 -0.07 0.10 0.06 0.02 NA
2012-06-01 -0.05 0.06 0.06 -0.02 NA
2012-07-01 0.12 -0.06 -0.09 -0.06 NA
2012-08-01 0.08 -0.03 0.05 0.13 NA
2012-09-01 0.10 0.07 -0.02 0.15 NA
2012-10-01 -0.08 0.14 0.00 -0.04 NA
2012-11-01 -0.09 0.11 -0.07 0.12 NA
2012-12-01 -0.01 -0.09 0.07 -0.02 NA
2013-01-01 0.04 0.14 -0.10 0.03 -0.09