Divide matrix values by category means in R - r

I have a matrix (A) containing 211 rows and 6 columns (one per time period) and a different matrix (B) containing 211 rows and 2 columns, the second of which contains categorial information (1-9).
My aim is to create a new matrix (C) where each value in matrix A is the value(A) divided by the mean of (value(A) by category(B)). I managed to compute the means for each category per column with the aggregate function. These are stored in a separate dataframe, column_means, with each time wave in a separate column. This also contains the information about the group in column_means[,1].
I don't understand how to proceed from here and am looking for an elegant solution so I can transfer this knowledge to future projects (and possibly improve my existing code). My guess is that the solution is hidden somewhere in dplyr and rather simple once you know it.
Thank you for any suggestions.
Data example:
##each column here represents a wave:
initialmatrix <- structure(c(0.882647671948723, 0.847932241438909, 0.753052308699317,
0.754977233408875, NA, 0.886095543329695, 0.849625252682829,
0.78893884364632, 0.77111113840682, NA, 0.887255207679895, 0.851503493865384,
0.812107856411831, 0.793982699495818, NA, 0.885212452552841,
0.854894065774315, 0.815265718290737, 0.806766276556325, NA,
0.882027335190646, 0.85386634818439, 0.818052477777012, 0.815997781565393,
NA, 0.88245957310107, 0.855819521951304, 0.830425687228663, 0.820857689847061,
NA), .Dim = 5:6, .Dimnames = list(NULL, c("V1", "V2", "V3", "V4",
"V5", "V6")))
##the first column is unique ID, the 2nd the category:
categories <- structure(c(1L, 2L, 3L, 4L, 5L, 2L, 1L, 2L, 2L, 4L), .Dim = c(5L,
2L), .Dimnames = list(NULL, c("V1", "V2")))
##the first column represents the category, column 1-6 the mean per category for each corresponding wave in "initialmatrix"
column.means <- structure(list(Group.1 = 1:5, x = c(0.805689153058216, 0.815006230419524,
0.832326976776262, 0.794835253329865, 0.773041961434791), asset_means_2...2. = c(0.80050960343197,
0.81923553710203, 0.833814773618545, 0.797834687980729, 0.780028077018158
), asset_means_3...2. = c(0.805053341257357, 0.828691564900149,
0.833953165695685, 0.799381078569563, 0.785813047374534), asset_means_4...2. = c(0.806116664276125,
0.832439754757116, 0.835982197159582, 0.801702200401293, 0.788814840753852
), asset_means_5...2. = c(0.807668548993891, 0.83801834926905,
0.836036508152776, 0.803433961863399, 0.79014026195926), asset_means_6...2. = c(0.808800359101212,
0.840923947682599, 0.839660313992458, 0.804901773257962, 0.793165113115977
)), row.names = c(NA, 5L), class = "data.frame")

Is this what you are trying to do?
options(digits=3)
divisor <- column.means[categories[, 2], -1]
divisor
# x asset_means_2...2. asset_means_3...2. asset_means_4...2. asset_means_5...2. asset_means_6...2.
# 2 0.815 0.819 0.829 0.832 0.838 0.841
# 1 0.806 0.801 0.805 0.806 0.808 0.809
# 2.1 0.815 0.819 0.829 0.832 0.838 0.841
# 2.2 0.815 0.819 0.829 0.832 0.838 0.841
# 4 0.795 0.798 0.799 0.802 0.803 0.805
initialmatrix/divisor
# x asset_means_2...2. asset_means_3...2. asset_means_4...2. asset_means_5...2. asset_means_6...2.
# 2 1.083 1.082 1.071 1.063 1.053 1.049
# 1 1.052 1.061 1.058 1.061 1.057 1.058
# 2.1 0.924 0.963 0.980 0.979 0.976 0.988
# 2.2 0.926 0.941 0.958 0.969 0.974 0.976
# 4 NA NA NA NA NA NA

This looks like a job for Superma ... no wait ... map2.
library(dplyr)
library(purrr)
as_tibble(initialmatrix) %>%
mutate(category = as.double(as_tibble(categories)$V2),
across(starts_with('V'),
~ unlist(map2(., category, ~ .x/mean(c(.x, .y)))))) %>%
select(-category)
# V1 V2 V3 V4 V5 V6
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 0.612 0.614 0.615 0.614 0.612 0.612
# 2 0.918 0.919 0.920 0.922 0.921 0.922
# 3 0.547 0.566 0.578 0.579 0.581 0.587
# 4 0.548 0.557 0.568 0.575 0.580 0.582
# 5 NA NA NA NA NA NA

Related

Combine list elements into a dataframe r

I currently have a list with columns as individual elements.
I would like to combine list elements with the same column names (i.e. bind rows) and merge across the different columns (i.e. bind columns) into a single data frame. I'm having difficulty finding examples of how to do this.
l = list(est = c(0, 0.062220390087795, 1.1020213968139, 0.0359939361491544
), se = c(0.0737200634874046, 0.237735179934829, 0.18105632705918,
0.111359438298789), rf = structure(c(NA, NA, NA, 4L), levels = c("Never\nsmoker",
"Occasional\nsmoker", "Ex-regular\nsmoker", "Smoker"), class = "factor"),
n = c(187L, 18L, 32L, 82L), model = c("Crude", "Crude", "Crude",
"Crude"), est = c(0, 0.112335510453586, 0.867095253670329,
0.144963556944891), se = c(0.163523775933409, 0.237039485900481,
0.186247776987999, 0.119887623484768), rf = structure(c(NA,
NA, NA, 4L), levels = c("Never\nsmoker", "Occasional\nsmoker",
"Ex-regular\nsmoker", "Smoker"), class = "factor"), n = c(187L,
18L, 32L, 82L), model = c("Model 1", "Model 1", "Model 1",
"Model 1"), est = c(0, 0.107097305324242, 0.8278765140371,
0.0958220447859447), se = c(0.164787596943329, 0.237347836229364,
0.187201880036661, 0.120882616647714), rf = structure(c(NA,
NA, NA, 4L), levels = c("Never\nsmoker", "Occasional\nsmoker",
"Ex-regular\nsmoker", "Smoker"), class = "factor"), n = c(187L,
18L, 32L, 82L), model = c("Model 2", "Model 2", "Model 2",
"Model 2"))
I would like the data to have the following format:
data.frame(
est = c(),
se = c(),
rf = c(),
model = c()
)
Any help would be appreciated. Thank you!
In this solution, first the elements of l are grouped by name and then are combined using c. Finally, the resulting list is converted to a dataframe using map_dfc.
library(dplyr)
library(purrr)
cols <- c("est", "se", "rf", "model")
setNames(cols,cols) |>
map(~l[names(l) == .x]) |>
map_dfc(~do.call(c, .x))
#> # A tibble: 12 × 4
#> est se rf model
#> <dbl> <dbl> <fct> <chr>
#> 1 0 0.0737 NA Crude
#> 2 0.0622 0.238 NA Crude
#> 3 1.10 0.181 NA Crude
#> 4 0.0360 0.111 Smoker Crude
#> 5 0 0.164 NA Model 1
#> 6 0.112 0.237 NA Model 1
#> 7 0.867 0.186 NA Model 1
#> 8 0.145 0.120 Smoker Model 1
#> 9 0 0.165 NA Model 2
#> 10 0.107 0.237 NA Model 2
#> 11 0.828 0.187 NA Model 2
#> 12 0.0958 0.121 Smoker Model 2
another option
library(purrr)
grp <- (seq(length(l)) - 1) %/% 5
l_split <- split(l, grp)
map_df(l_split, c)
#> # A tibble: 12 × 5
#> est se rf n model
#> <dbl> <dbl> <fct> <int> <chr>
#> 1 0 0.0737 <NA> 187 Crude
#> 2 0.0622 0.238 <NA> 18 Crude
#> 3 1.10 0.181 <NA> 32 Crude
#> 4 0.0360 0.111 Smoker 82 Crude
#> 5 0 0.164 <NA> 187 Model 1
#> 6 0.112 0.237 <NA> 18 Model 1
#> 7 0.867 0.186 <NA> 32 Model 1
#> 8 0.145 0.120 Smoker 82 Model 1
#> 9 0 0.165 <NA> 187 Model 2
#> 10 0.107 0.237 <NA> 18 Model 2
#> 11 0.828 0.187 <NA> 32 Model 2
#> 12 0.0958 0.121 Smoker 82 Model 2

How to sum up durations if certain patterns are found across columns

I have a dataframe with words and their durations in speech:
test1
d1 d2 d3 d4 d5 d6 d7 d8 d9 d10 w1 w2 w3 w4 w5 w6 w7 w8 w9 w10
10 0.103 0.168 0.198 0.188 0.359 0.343 0.064 0.075 0.095 0.367 And I thought oh no Sarah do n't do it
132 0.091 0.072 0.109 0.119 0.113 0.087 0.088 0.264 0.092 0.249 I du n no you ca n't see his head
784 0.152 0.341 0.117 0.108 0.123 0.263 0.083 0.095 0.099 0.098 Oh honestly I did n't touch it I did n't
The short form n't is treated as if it were a separate word. That's okay as long as the preceding word ends on a consonant such as did, but that's not okay if the preceding word ends on a vowel such do or ca. Because that separation into different words is incorrect the separation into different durations is incorrect too.
What I'd like to do is sum up the durations of ca and n't as well as doand n't but leave alone the separate durations for did and n't.
I know how to select the rows where the changes need to be implemented:
test1[which(grepl("(?<=(ca|do)\\s)n't", apply(test1, 1, paste0, collapse = " "), perl = T)),]
but I'm stuck going forward.
The desired result would look like this:
d1 d2 d3 d4 d5 d6 d7 d8 d9 d10 w1 w2 w3 w4 w5 w6 w7 w8 w9 w10
10 0.103 0.168 0.198 0.188 0.359 0.343 0.139 0.095 0.367 NA And I thought oh no Sarah do n't do it
132 0.091 0.072 0.109 0.119 0.113 0.175 0.264 0.092 0.249 NA I du n no you ca n't see his head
784 0.152 0.341 0.117 0.108 0.123 0.263 0.083 0.095 0.099 0.098 Oh honestly I did n't touch it I did n't
How can this be done? Help is much appreciated.
Reproducible data:
test1 <- structure(list(d1 = c(0.103, 0.091, 0.152), d2 = c(0.168, 0.072,
0.341), d3 = c(0.198, 0.109, 0.117), d4 = c(0.188, 0.119, 0.108
), d5 = c(0.359, 0.113, 0.123), d6 = c(0.343, 0.087, 0.263),
d7 = c(0.064, 0.088, 0.083), d8 = c(0.075, 0.264, 0.095),
d9 = c(0.095, 0.092, 0.099), d10 = c(0.367, 0.249, 0.098),
w1 = c("And", "I", "Oh"), w2 = c("I", "du", "honestly"),
w3 = c("thought", "n", "I"), w4 = c("oh", "no", "did"), w5 = c("no",
"you", "n't"), w6 = c("Sarah", "ca", "touch"), w7 = c("do",
"n't", "it"), w8 = c("n't", "see", "I"), w9 = c("do", "his",
"did"), w10 = c("it", "head", "n't")), row.names = c(10L,
132L, 784L), class = "data.frame")
I think this is best done with data in long instead of wide format so you can take advantage of grouping operations:
library(dplyr)
library(tidyr)
library(tibble)
test1 %>%
rownames_to_column() %>%
pivot_longer(-rowname, names_to = c(".value", "number"), names_pattern = "(\\D)(\\d+)") %>%
group_by(rowname) %>%
mutate(wid = cumsum(!(lag(w) %in% c("ca", "do") & w == "n't"))) %>%
group_by(rowname, wid) %>%
summarise(d = sum(d),
w = paste0(w, collapse = "")) %>%
pivot_wider(names_from = wid, values_from = c(d, w), names_sep = "")
`summarise()` regrouping output by 'rowname' (override with `.groups` argument)
# A tibble: 3 x 21
# Groups: rowname [3]
rowname d1 d2 d3 d4 d5 d6 d7 d8 d9 d10 w1 w2 w3 w4 w5 w6 w7 w8 w9 w10
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 10 0.103 0.168 0.198 0.188 0.359 0.343 0.139 0.095 0.367 NA And I thought oh no Sarah don't do it NA
2 132 0.091 0.072 0.109 0.119 0.113 0.175 0.264 0.092 0.249 NA I du n no you can't see his head NA
3 784 0.152 0.341 0.117 0.108 0.123 0.263 0.083 0.095 0.099 0.098 Oh honestly I did n't touch it I did n't

Control the level of detail in a pivot in R (tidyverse) [duplicate]

This question already has answers here:
Reshaping multiple sets of measurement columns (wide format) into single columns (long format)
(8 answers)
Closed 2 years ago.
I have an extremely wide dataset that I am trying to unpivot to a degree but not completely. Essentially I am trying to group certain columns together based on a string before an underscore and pivot on those groups individually. My current method uses two opposite pivots, a for loop, and an intermediate list to accomplish my goal. I am able to get my final product but for my own knowledge, I am wondering if there is a more elegant solution. I realize that I am likely not explaining things well so I have recreated the scenario with a dummy dataset.
#Required packages
library(tidyverse)
#Dummy data
file <- as_tibble(data.frame(id = c("QQQ", "WWW", "EEE", "RRR", "TTT"),
state = c("aa", "bb", "cc", "dd", "ee"),
city = c("ff", "gg", "hh", "ii", "jj"),
a_1 = runif(5),
a_2 = runif(5),
a_3 = runif(5),
a_4 = runif(5),
a_5 = runif(5),
a_6 = runif(5),
a_7 = runif(5),
a_8 = runif(5),
a_9 = runif(5),
a_10 = runif(5),
b_1 = runif(5),
b_2 = runif(5),
b_3 = runif(5),
b_4 = runif(5),
b_5 = runif(5),
b_6 = runif(5),
b_7 = runif(5),
b_8 = runif(5),
b_9 = runif(5),
b_10 = runif(5),
c_1 = runif(5),
c_2 = runif(5),
c_3 = runif(5),
c_4 = runif(5),
c_5 = runif(5),
c_6 = runif(5),
c_7 = runif(5),
c_8 = runif(5),
c_9 = runif(5),
c_10 = runif(5)))
#My solution
longer <- file %>%
pivot_longer(cols = c(-id:-city),
names_to = c(".value", "section"),
names_pattern = "(.+)_([0-9]+$)"
)
num_letterGroup <- ncol(longer) - 4 #4 is the number of columns i want to retain
wide_list <- vector(mode = "list", length = num_letterGroup)
name_list <- vector(mode = "character", length = num_letterGroup)
for (i in 1:num_letterGroup) {
col_num <- 4 + i
col_name <- colnames(longer)[col_num]
wide <- longer %>%
select(1:4, all_of(col_name)) %>%
pivot_wider(names_from = section, values_from = col_name) %>%
mutate(letterGroup = col_name)
wide_list[[i]] <- wide
name_list[i] <- col_name
}
names(wide_list) <- name_list
wide_df <- bind_rows(wide_list)
I realize that the amount of data given might seem excessive but I needed the column numbers to be sequential as well as reach double digits. Thank you in advance for any assistance you can provide.
EDIT TO CLARIFY: wide_df is the final product that I want
EDIT
This is actually much simpler than the original answer. (Thanks to #thelatemail)
library(tidyr)
pivot_longer(file,
cols = -c(id:city),
names_to = c('letterGroup', '.value'),
names_sep = '_')
# A tibble: 15 x 14
# id state city letterGroup `1` `2` `3` `4` `5` `6` `7` `8` `9` `10`
# <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 QQQ aa ff a 0.894 0.534 0.583 0.327 0.497 0.254 0.877 0.236 0.585 0.436
# 2 QQQ aa ff b 0.861 0.897 0.244 0.292 0.818 0.428 0.732 0.322 0.702 0.158
# 3 QQQ aa ff c 0.371 0.842 0.918 0.615 0.346 0.675 0.821 0.718 0.461 0.374
# 4 WWW bb gg a 0.573 0.00886 0.555 0.810 0.480 0.763 0.624 0.0667 0.705 0.872
# 5 WWW bb gg b 0.994 0.652 0.961 0.825 0.398 0.0138 0.560 0.695 0.0171 0.704
# 6 WWW bb gg c 0.113 0.988 0.663 0.0461 0.335 0.478 0.291 0.338 0.386 0.183
# 7 EEE cc hh a 0.482 0.197 0.630 0.442 0.633 0.932 0.317 0.119 0.872 0.678
# 8 EEE cc hh b 0.834 0.378 0.504 0.911 0.644 0.976 0.777 0.485 0.470 0.560
# 9 EEE cc hh c 0.819 0.240 0.683 0.570 0.969 0.956 0.745 0.790 0.0548 0.314
#10 RRR dd ii a 0.887 0.818 0.0266 0.444 0.554 0.817 0.332 0.0801 0.966 0.252
#11 RRR dd ii b 0.416 0.211 0.931 0.105 0.948 0.555 0.201 0.656 0.794 0.526
#12 RRR dd ii c 0.652 0.897 0.741 0.254 0.815 0.154 0.422 0.361 0.925 0.696
#13 TTT ee jj a 0.391 0.626 0.358 0.296 0.804 0.743 0.655 0.000308 0.257 0.415
#14 TTT ee jj b 0.764 0.686 0.0174 0.460 0.0164 0.0718 0.700 0.558 0.341 0.411
#15 TTT ee jj c 0.812 0.995 0.845 0.513 0.987 0.249 0.429 0.749 0.557 0.369
Original Answer
You can get data completely in long format (no need for intermediate columns), separate the column names in two different columns and get the data in wide format.
file %>%
pivot_longer(cols = -c(id:city)) %>%
separate(name, into = c('letterGroup', 'col'), sep = "_") %>%
pivot_wider(names_from = col, values_from = value)
You can try this:
library(tidyr)
df1 <- pivot_longer(file,cols = names(file)[-c(1:3)]) %>%
separate(name,into = c('letter','number'),sep = '_') %>%
pivot_wider(names_from = number,values_from = value,id_cols = c() )
#Reshape
df2 <- reshape(as.data.frame(df1),idvar = c('id','state','city','letter'),timevar = 'number',direction = 'wide')
names(df2) <- gsub('value.','',names(df2))

Calculate total sum of squares between clusters in R

My objective is to compare which of the two clustering methods I've used cluster_method_1 and cluster_method_2 has the largest between cluster sum of squares in order to identify which one achieves better separation.
I'm basically looking for an efficient way to calculate the distance between each point of cluster 1 and all points of cluster 2,3,4, and so on.
example dataframe:
structure(list(x1 = c(0.01762376, -1.147739752, 1.073605848,
2.000420899, 0.01762376, 0.944438811, 2.000420899, 0.01762376,
-1.147739752, -1.147739752), x2 = c(0.536193126, 0.885609849,
-0.944699546, -2.242627057, -1.809984553, 1.834120637, 0.885609849,
0.96883563, 0.186776403, -0.678508604), x3 = c(0.64707104, -0.603759684,
-0.603759684, -0.603759684, -0.603759684, 0.64707104, -0.603759684,
-0.603759684, -0.603759684, 1.617857394), x4 = c(-0.72712328,
0.72730861, 0.72730861, -0.72712328, -0.72712328, 0.72730861,
0.72730861, -0.72712328, -0.72712328, -0.72712328), cluster_method_1 = structure(c(1L,
3L, 3L, 3L, 2L, 2L, 3L, 2L, 1L, 4L), .Label = c("1", "2", "4",
"6"), class = "factor"), cluster_method_2 = structure(c(5L, 3L,
1L, 3L, 4L, 2L, 1L, 1L, 1L, 6L), .Label = c("1", "2", "3", "4",
"5", "6"), class = "factor")), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"))
x1 x2 x3 x4 cluster_method_1 cluster_method_2
<dbl> <dbl> <dbl> <dbl> <fct> <fct>
1 0.0176 0.536 0.647 -0.727 1 5
2 -1.15 0.886 -0.604 0.727 4 3
3 1.07 -0.945 -0.604 0.727 4 1
4 2.00 -2.24 -0.604 -0.727 4 3
5 0.0176 -1.81 -0.604 -0.727 2 4
6 0.944 1.83 0.647 0.727 2 2
7 2.00 0.886 -0.604 0.727 4 1
8 0.0176 0.969 -0.604 -0.727 2 1
9 -1.15 0.187 -0.604 -0.727 1 1
10 -1.15 -0.679 1.62 -0.727 6 6
The within sum-of-squares for cluster Si can be written as the sum of all pairwise (Euclidean) distances squared, divided by twice the number of points in that cluster (see e.g. the Wikipedia article on k-means clustering)
For convenience we define a function calc_SS that returns the within sum-of-squares for a (numeric) data.frame
calc_SS <- function(df) sum(as.matrix(dist(df)^2)) / (2 * nrow(df))
It's then straightforward to calculate the within (cluster) sum-of-squares for every cluster for every method
library(tidyverse)
df %>%
gather(method, cluster, cluster_method_1, cluster_method_2) %>%
group_by(method, cluster) %>%
nest() %>%
transmute(
method,
cluster,
within_SS = map_dbl(data, ~calc_SS(.x))) %>%
spread(method, within_SS)
## A tibble: 6 x 3
# cluster cluster_method_1 cluster_method_2
# <chr> <dbl> <dbl>
#1 1 1.52 9.99
#2 2 10.3 0
#3 3 NA 10.9
#4 4 15.2 0
#5 5 NA 0
#6 6 0 0
The total within sum-of-squares is then just the sum of the within sum-of-squares for every cluster
df %>%
gather(method, cluster, cluster_method_1, cluster_method_2) %>%
group_by(method, cluster) %>%
nest() %>%
transmute(
method,
cluster,
within_SS = map_dbl(data, ~calc_SS(.x))) %>%
group_by(method) %>%
summarise(total_within_SS = sum(within_SS)) %>%
spread(method, total_within_SS)
## A tibble: 1 x 2
# cluster_method_1 cluster_method_2
# <dbl> <dbl>
#1 27.0 20.9
By the way, we can confirm that calc_SS does indeed return the within sum-of-squares using the iris dataset:
set.seed(2018)
df2 <- iris[, 1:4]
kmeans <- kmeans(as.matrix(df2), 3)
df2$cluster <- kmeans$cluster
df2 %>%
group_by(cluster) %>%
nest() %>%
mutate(within_SS = map_dbl(data, ~calc_SS(.x))) %>%
arrange(cluster)
## A tibble: 3 x 3
# cluster data within_SS
# <int> <list> <dbl>
#1 1 <tibble [38 × 4]> 23.9
#2 2 <tibble [62 × 4]> 39.8
#3 3 <tibble [50 × 4]> 15.2
kmeans$within
#[1] 23.87947 39.82097 15.15100
The total sum of squares, sum_x sum_y ||x-y||² is constant.
The total sum of squares can be computed trivially from variance.
If you now subtract the within-cluster sum of squares where x and y belong to the same cluster, then the between cluster sum of squares remains.
If you do this approach, it takes O(n) time instead of O(n²).
Corollary: the solution with the smallest WCSS has the largest BCSS.
Consider the package clValid. It calculates a large number of indexes for validating clustering. The Dunn index is particularly appropriate for what you are trying to do. The documentation says that the Dunn index is the ratio between the smallest distance between observation not in the same cluster to the largest intra-cluster distance. The documentation for it can be found at https://cran.r-project.org/web/packages/clValid/clValid.pdf.

R: how to use lapply or for to loop over all values of multiple strings

I have a dataset with one variable categorized according to 200 alphabetic codes code1<-c("AAA","BBB","DDD","EEE","FFF"), 2 alphabetic codes code2<-c("Yyy","Zzzzz"), and 41 numeric codes code3<-seq(1970,2011,1).
I have a function that produces a 8-number vector from each subset of the data according to unique values of code1,code2,code3. So, I would like to run a few lines of code on each subset of these data.
The complete list is imported as a data frame data, and I currently work by extracting each subset of the data from the data frame, analyzing it, and then saving the output
The problem is that it would be cumbersome to loop through all the values of code1 and code2 and code3 according to this scheme, and it would be much better to produce a single output data frame as well, with the 8 numbers saved alongside the unique values of code1, code2, and code3 which produced them.
I'm sure that this could be done without resorting to loops over values of code1-code3 and assign(), but being a neophyte I'm afraid I can't quite put it together.
Thanks -- E
Additional data:
This is what the output vector from the function I am running looks like, manually subset for one series:
output1<-fxn(data$input,[which(data$code1=='AAA'&data$code2=='Yyy'&data$code3==1990)])
output2<-fxn2(output1)
str(output2$out[,2]): num [1:8] 0.009 0.648 0.304 0.004 0.445 ...
output2$out[,2]: [1] 0.009 0.648 0.304 0.004 0.445 36.720 0.000 1.103
Additional data:
In response to requests, this faked output dataset approximates what I'm looking for-- each row of the file is from one completed run of the function fxn2. The first 8 columns are output by the function; the last 3 columns are added to distinguish unique values of code1,code2,code3:
> head(data)
X.x1 x2 x3 x4 x5 x6 x7 x8 code3 code2 code1
1 0.008 0.595 0.185 0.005 0.173 36.744 0 1.102 1970 male BGR
2 0.004 0.242 0.276 0.005 0.348 46.017 0 1.108 1971 male BGR
3 0.002 0.553 0.242 0.005 0.247 35.424 0 1.107 1972 male BGR
4 0.005 0.593 0.270 0.004 0.312 43.701 0 1.105 1973 male BGR
5 0.009 0.660 0.217 0.005 0.266 37.955 0 1.103 1974 male BGR
6 0.006 0.347 0.297 0.005 0.411 50.959 0 1.108 1975 male BGR
> dput(head(data))
structure(list(X.x1 = c(0.008, 0.004, 0.002, 0.005, 0.009, 0.006
), x2 = c(0.595, 0.242, 0.553, 0.593, 0.66, 0.347), x3 = c(0.185,
0.276, 0.242, 0.27, 0.217, 0.297), x4 = c(0.005, 0.005, 0.005,
0.004, 0.005, 0.005), x5 = c(0.173, 0.348, 0.247, 0.312, 0.266,
0.411), x6 = c(36.744, 46.017, 35.424, 43.701, 37.955, 50.959
), x7 = c(0, 0, 0, 0, 0, 0), x8 = c(1.102, 1.108, 1.107, 1.105,
1.103, 1.108), year = 1970:1975, sex = structure(c(1L, 1L, 1L,
1L, 1L, 1L), .Label = "male", class = "factor"), iso3 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "BGR", class = "factor")), .Names = c("X.x1",
"x2", "x3", "x4", "x5", "x6", "x7", "x8", "year", "sex", "iso3"
), row.names = c(NA, 6L), class = "data.frame")
I think you can simplify your code by doing this. If you give more details about desired output, I will update the answer accordingly.
code1<-c("AAA","BBB","DDD","EEE","FFF")
code2<-c("Yyy","Zzzzz")
code3<-seq(1970,2011,1)
params <- expand.grid(code1, code2, code3)
names(params) <- c('code1', 'code2', 'code3')
myFunc <- function(code1, code2, code3) {
##add your function code here.
...
...
return(output2$out[,2])
}
LL <- mapply(FUN=myFunc, code1 = params$code1, code2 = params$code2, code3 = params$code3)
result <- split(LL, rep(1:ncol(LL), each = nrow(LL)))
result <- do.call(rbind, result)
result <- cbind(result, params) result <- cbind(result, params)

Resources