Consider the following data set:
SimulatedDated <- structure(list(CustumerId = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 17L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 23L, 23L, 23L, 23L,
23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L,
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 26L, 26L, 26L,
26L, 26L, 26L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 28L,
28L, 28L, 28L, 28L, 28L, 28L, 29L, 29L, 29L, 29L, 29L, 29L, 29L,
29L, 29L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 31L, 31L,
31L, 31L, 31L, 31L, 31L, 31L, 31L, 32L, 32L, 32L, 32L, 32L, 32L,
32L, 32L, 32L, 32L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L,
34L, 34L, 34L, 34L, 34L), ProductId = c(6L, 3L, 4L, 9L, 8L, 10L,
1L, 5L, 7L, 1L, 5L, 3L, 4L, 2L, 7L, 6L, 10L, 8L, 7L, 4L, 10L,
5L, 1L, 3L, 8L, 6L, 2L, 9L, 6L, 1L, 2L, 4L, 7L, 8L, 5L, 9L, 10L,
3L, 2L, 5L, 9L, 4L, 10L, 3L, 6L, 1L, 8L, 8L, 10L, 2L, 4L, 3L,
9L, 5L, 6L, 5L, 6L, 4L, 9L, 10L, 8L, 2L, 7L, 1L, 3L, 10L, 3L,
2L, 8L, 9L, 7L, 5L, 4L, 1L, 7L, 1L, 3L, 2L, 4L, 8L, 9L, 6L, 5L,
10L, 1L, 9L, 2L, 4L, 7L, 3L, 8L, 7L, 9L, 8L, 4L, 10L, 3L, 5L,
1L, 6L, 2L, 6L, 4L, 9L, 3L, 10L, 1L, 8L, 7L, 5L, 2L, 9L, 5L,
7L, 4L, 10L, 1L, 3L, 2L, 6L, 5L, 9L, 2L, 4L, 3L, 8L, 1L, 10L,
6L, 7L, 10L, 9L, 2L, 1L, 5L, 8L, 6L, 4L, 7L, 3L, 9L, 8L, 3L,
5L, 6L, 10L, 1L, 7L, 4L, 1L, 6L, 9L, 10L, 3L, 4L, 2L, 8L, 7L,
10L, 8L, 1L, 6L, 4L, 5L, 9L, 3L, 7L, 2L, 4L, 8L, 3L, 7L, 10L,
1L, 6L, 5L, 5L, 6L, 4L, 7L, 1L, 10L, 3L, 10L, 8L, 3L, 1L, 4L,
5L, 6L, 2L, 9L, 5L, 6L, 4L, 8L, 2L, 10L, 3L, 1L, 8L, 4L, 10L,
6L, 9L, 7L, 2L, 3L, 8L, 3L, 6L, 7L, 9L, 4L, 5L, 2L, 10L, 1L,
5L, 9L, 3L, 7L, 6L, 10L, 8L, 2L, 4L, 8L, 7L, 1L, 4L, 2L, 10L,
10L, 3L, 8L, 1L, 7L, 5L, 4L, 6L, 2L, 10L, 6L, 1L, 2L, 5L, 4L,
8L, 1L, 10L, 8L, 3L, 2L, 9L, 5L, 6L, 4L, 9L, 10L, 6L, 2L, 1L,
7L, 4L, 8L, 5L, 1L, 5L, 9L, 10L, 3L, 8L, 7L, 2L, 4L, 10L, 1L,
5L, 7L, 6L, 2L, 3L, 4L, 9L, 8L, 1L, 5L, 2L, 7L, 3L, 6L, 10L,
4L, 9L, 9L, 5L, 10L, 8L, 2L), DaysSinceEpoch = c(7L, 20L, 31L,
40L, 105L, 146L, 162L, 169L, 212L, 10L, 18L, 31L, 65L, 84L, 122L,
156L, 202L, 206L, 1L, 4L, 7L, 11L, 14L, 24L, 25L, 100L, 148L,
149L, 3L, 10L, 12L, 14L, 18L, 26L, 35L, 41L, 96L, 147L, 9L, 22L,
66L, 80L, 102L, 104L, 170L, 199L, 234L, 10L, 24L, 36L, 38L, 75L,
122L, 163L, 169L, 9L, 16L, 35L, 39L, 54L, 58L, 79L, 116L, 133L,
224L, 27L, 35L, 37L, 49L, 73L, 91L, 105L, 141L, 252L, 16L, 28L,
51L, 73L, 76L, 83L, 126L, 202L, 97L, 105L, 150L, 172L, 203L,
207L, 223L, 256L, 259L, 25L, 28L, 38L, 40L, 63L, 100L, 120L,
176L, 186L, 191L, 7L, 22L, 36L, 37L, 40L, 41L, 53L, 67L, 114L,
233L, 1L, 16L, 17L, 23L, 40L, 52L, 125L, 184L, 186L, 12L, 42L,
53L, 65L, 67L, 69L, 83L, 149L, 154L, 265L, 10L, 14L, 33L, 47L,
67L, 106L, 133L, 181L, 247L, 258L, 6L, 21L, 26L, 41L, 49L, 68L,
89L, 112L, 119L, 9L, 34L, 88L, 91L, 102L, 110L, 132L, 171L, 200L,
6L, 14L, 21L, 36L, 40L, 60L, 64L, 88L, 109L, 208L, 8L, 17L, 21L,
55L, 77L, 85L, 97L, 168L, 18L, 28L, 42L, 44L, 70L, 77L, 101L,
14L, 23L, 33L, 84L, 107L, 123L, 124L, 125L, 25L, 29L, 33L, 57L,
79L, 83L, 98L, 112L, 119L, 5L, 31L, 64L, 91L, 102L, 131L, 222L,
234L, 27L, 46L, 48L, 60L, 61L, 64L, 72L, 103L, 161L, 8L, 24L,
27L, 50L, 60L, 62L, 92L, 99L, 147L, 159L, 16L, 19L, 20L, 84L,
175L, 202L, 17L, 21L, 25L, 46L, 69L, 121L, 161L, 175L, 267L,
10L, 14L, 20L, 39L, 58L, 90L, 229L, 32L, 35L, 39L, 40L, 60L,
66L, 98L, 153L, 173L, 2L, 3L, 25L, 46L, 51L, 80L, 96L, 166L,
202L, 43L, 70L, 76L, 77L, 115L, 160L, 183L, 202L, 223L, 25L,
33L, 61L, 72L, 74L, 77L, 85L, 91L, 152L, 265L, 16L, 62L, 63L,
64L, 66L, 82L, 104L, 126L, 181L, 47L, 49L, 55L, 58L, 67L), BoughtPAD = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L)), .Names = c("CustumerId",
"ProductId", "DaysSinceEpoch", "BoughtPAD"), row.names = c(NA,
300L), class = "data.frame")
Then, doing
library(TraMineR)
SimSeq <- seqecreate(id = SimulatedDated$CustumerId,
timestamp = SimulatedDated$DaysSinceEpoch,
event = SimulatedDated$ProductId)
Cohort <- factor(SimulatedDated$BoughtPAD, labels = c("PAD", "NPAD"))
Fsubseq <- seqefsub(seq = SimSeq, pMinSupport = .01)
DiscrCohort <- seqecmpgroup(subseq = Fsubseq, group = Cohort)
produces:
Error in model.frame.default(formula = ww ~ group + seqmatrix[, index]) :
variable lengths differ (found for 'group')
and I was wondering, what could be causing this problem?
The group variable should have length equal to the number of sequences, i.e., the number of customers in your case. Also it is supposed to remain constant all along the sequence (which is not the case in your example).
The Cohort variable that you use as group argument has for length the total number of events (300) while you have only 34 customers. So you need to aggregate it by the CustumerID.
Here is how you can do that (here by taking the max of the group value for each customer.)
bylist <- list(id = SimulatedDated$CustumerId)
agg.PAD <- aggregate(SimulatedDated[,c("CustumerId","BoughtPAD")], by=bylist, FUN="max")
Cohort <- agg.PAD$BoughtPAD
Now you can look for the subsequences that best discriminate the groups
DiscrCohort <- seqecmpgroup(subseq = Fsubseq, group = Cohort)
print(DiscrCohort[1:10])
Hope this helps.
Related
I'm working on a data.table which contains, among other data, the demand for certain products on certain stores of a business franchise. The goal is to predict the demand for every single product on every single store.
Here is a "head" of my dataset:
head(train_dataset)
Week
Store_ID
Product_ID
Sales
Returns
Demand
3
15766
1212
3
0
3
3
15766
1216
4
0
4
3
15766
1238
4
0
4
3
15766
1240
4
0
4
3
15766
1242
3
0
3
3
15766
1250
5
0
5
My initial approach was to subset the original dataset so that I end up with one dataset per product per store. Exemplifying, if there are 3 products, namely product 1, 2 and 3, and 2 stores, A and B, I want to have one dataset containing all the data of product 1 on the store A, another one containing all the data from product 1 on store B and so on.
Since there are more than 2500 products, my first attempt was to try to automatize, with a loop for or something from the apply family, a code like this:
library(dplyr)
product.n <- filter(train_dataset, product_id == n)
where "n" is a product id which can be obtained from another, dedicated, dataset. In this case, the products ids are int variables. Assuming I loaded this dedicated dataset as "prods", I tried something like:
for (i in prods){
a = prods$product_id[i]
product.a <- paste("product", a)
product.a <- filter(train_dataset, product_id == a)
}
but it didn't work. Then I tried:
products <- split(train_dataset, f = train_dataset$product_id)
which worked. It returned a list of various lists, each one comprising all the data of a certain product id. Then, to subset this lists based on the stores ids, I saw that I could not use a code structured in the same way because "train_dataset$store_id" is not available to be put on the "f" parameter of the split function. To get around this I tried using lapply:
products.per.store <- lapply(products, '[[', "store_id")
which didn't work.
It ocurred me trying to convert all the sublists to dataframes and then trying to apply the same split process again, all automatically. It worked for a single sublist that I did manually, but I wasn't able to automatize it, I also don't think that it would be an efficient way to go about this. I also thought about combining "filter" and "group by" from dplyr but, since wasn't able to automatize the first code example, didn't try any further.
Here is a "head" from one dataset in the pattern that I'm aiming at (comprising, only, all the data from a certain product id in a certain store id):
head(prod41_store684023)
Week
Store_ID
Product_ID
Sales
Returns
Demand
3
684023
41
30
0
30
4
684023
41
95
0
95
5
684023
41
82
0
82
6
684023
41
30
0
30
7
684023
41
60
0
60
8
684023
41
70
0
70
I've seen quite a few other questions here in SO about operations on lists within lists and about filtering/spliting/subsetting datasets but, unfortunately, could not extrapolate anything to this question, so I apologize if this has already been answered before.
Any help will be greatly appreciated.
Thanks!
P.S. I'll add here a sample dput file with data from 2 product ids, id 41 and 151:
structure(list(Week = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L), Store_ID = c(684023L, 681747L, 685079L,
1623763L, 1035265L, 2482890L, 1546790L, 4586525L, 684023L, 1938075L,
681747L, 685079L, 1623763L, 2482890L, 1451516L, 4586525L, 2470338L,
684023L, 1938075L, 681747L, 1623763L, 2482890L, 2470338L, 146030L,
684023L, 1938075L, 465617L, 681747L, 1623763L, 2482890L, 1546790L,
4586525L, 2470338L, 1105804L, 2284385L, 146030L, 684023L, 681747L,
1623763L, 2482890L, 1546790L, 4586525L, 2470338L, 2284385L, 146030L,
684023L, 465617L, 681747L, 1623763L, 2482890L, 1546790L, 4586525L,
2470338L, 2284385L, 146030L, 684023L, 1938075L, 681747L, 1623763L,
2482890L, 1546790L, 64209L, 1451306L, 1451307L, 2290541L, 153680L,
817983L, 1163986L, 1873535L, 4286560L, 4498110L, 153547L, 153688L,
153817L, 713342L, 1549943L, 161141L, 1044616L, 1072646L, 1856859L,
1137252L, 1469082L, 1951821L, 9716137L, 1963850L, 153840L, 1524199L,
1133031L, 168596L, 52677L, 167312L, 168521L, 168527L, 168678L,
1915817L, 1915818L, 168631L, 168784L, 434240L, 984120L, 2176784L,
64209L, 1451306L, 1451307L, 2290541L, 153680L, 817983L, 1163986L,
1873535L, 4286560L, 4498110L, 153547L, 153688L, 153817L, 713342L,
1549943L, 161141L, 1044616L, 1072646L, 1856859L, 1137252L, 1469082L,
1951821L, 9716137L, 1963850L, 153840L, 1524199L, 1133031L, 168596L,
52677L, 167312L, 168521L, 168527L, 168678L, 1915817L, 1915818L,
168631L, 168784L, 434240L, 984120L, 2176784L, 2176785L, 64209L,
1451306L, 1451307L, 2290541L, 153680L, 817983L, 1163986L, 4286560L,
4498110L, 153547L, 153688L, 153817L, 713342L, 1549943L, 161141L,
1044616L, 1072646L, 1856859L, 1137252L, 1469082L, 9716137L, 1963850L,
153840L, 1524199L, 168596L, 52677L, 167312L, 168521L, 168527L,
168678L, 1915817L, 1915818L, 168540L, 168631L, 168784L, 434240L,
984120L, 2176784L, 2176785L, 64209L, 1451306L, 1451307L, 2290541L,
153680L, 817983L, 1163986L, 4286560L, 153688L, 153817L, 713342L,
1549943L, 161141L, 1044616L, 1072646L, 1856859L, 1137252L, 1469082L,
9716137L, 1963850L, 153840L, 168596L, 52677L, 167312L, 168521L,
168527L, 168678L, 1915817L, 1915818L, 168540L, 168631L, 168784L,
434240L, 984120L, 2176784L, 64209L, 1451306L, 1451307L, 2290541L,
153680L, 817983L, 1163986L, 1873535L, 4286560L, 153688L, 153817L,
713342L, 1549943L, 161141L, 1044616L, 1072646L, 1856859L, 1137252L,
1469082L, 1951821L, 9716137L, 1963850L, 153840L, 168596L, 52677L,
167312L, 168521L, 168527L, 168678L, 1915817L, 1915818L, 168540L,
168631L, 168784L, 434240L, 984120L, 2176784L, 64209L, 1451306L,
1451307L, 2290541L, 153680L, 817983L, 1163986L, 1873535L, 4286560L,
153547L, 153688L, 153817L, 713342L, 1549943L, 161141L, 1044616L,
1072646L, 1856859L, 1137252L, 1469082L, 1951821L, 9716137L, 1963850L,
153840L, 1524199L, 168596L, 52677L, 167312L, 168521L, 168527L,
168678L, 1915817L, 1915818L, 168540L, 168631L, 168784L, 434240L,
984120L, 2176784L, 2176785L, 64209L, 1451306L, 1451307L, 2290541L,
153680L, 817983L, 1163986L, 1873535L, 4286560L, 153547L, 153688L,
153817L, 713342L, 1549943L, 161141L, 1044616L, 1072646L, 1856859L,
1137252L, 1469082L, 1951821L, 9716137L, 1963850L, 153840L, 1524199L,
4722056L, 1133031L, 168596L, 52677L, 167312L, 168521L, 168527L,
168678L, 1915817L, 1915818L, 168540L, 168631L, 168784L, 434240L,
984120L, 2176784L, 2176785L), Product_ID = c(41L, 41L, 41L, 41L,
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 41L, 41L, 41L, 41L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L), Sales = c(30L, 2064L, 0L, 1022L, 0L,
330L, 200L, 20L, 95L, 105L, 1430L, 0L, 740L, 430L, 5L, 7L, 45L,
82L, 20L, 1686L, 820L, 400L, 25L, 70L, 30L, 40L, 0L, 1250L, 986L,
500L, 80L, 1L, 25L, 138L, 200L, 60L, 60L, 1570L, 1030L, 300L,
50L, 10L, 20L, 100L, 40L, 70L, 30L, 1305L, 1159L, 295L, 60L,
20L, 10L, 110L, 65L, 45L, 70L, 1378L, 1269L, 410L, 40L, 12L,
14L, 7L, 15L, 10L, 15L, 23L, 9L, 18L, 3L, 10L, 13L, 21L, 12L,
17L, 72L, 20L, 9L, 16L, 25L, 12L, 1L, 10L, 25L, 11L, 9L, 12L,
10L, 14L, 20L, 10L, 18L, 11L, 10L, 10L, 3L, 16L, 3L, 5L, 6L,
14L, 8L, 5L, 13L, 5L, 13L, 7L, 6L, 11L, 1L, 3L, 19L, 15L, 13L,
13L, 38L, 27L, 11L, 14L, 13L, 6L, 3L, 14L, 10L, 8L, 3L, 14L,
11L, 12L, 18L, 14L, 24L, 12L, 5L, 10L, 3L, 22L, 24L, 10L, 4L,
8L, 19L, 23L, 4L, 10L, 7L, 17L, 27L, 9L, 4L, 4L, 12L, 17L, 16L,
18L, 32L, 9L, 1L, 16L, 29L, 5L, 22L, 10L, 11L, 6L, 5L, 8L, 28L,
11L, 22L, 10L, 10L, 25L, 18L, 8L, 20L, 18L, 25L, 8L, 16L, 16L,
8L, 5L, 6L, 7L, 17L, 19L, 22L, 18L, 20L, 21L, 20L, 55L, 14L,
4L, 16L, 7L, 3L, 16L, 17L, 15L, 15L, 16L, 24L, 16L, 20L, 17L,
14L, 15L, 6L, 6L, 14L, 19L, 31L, 10L, 15L, 15L, 6L, 7L, 2L, 11L,
18L, 4L, 9L, 13L, 7L, 2L, 8L, 9L, 17L, 2L, 20L, 6L, 10L, 6L,
8L, 20L, 3L, 6L, 16L, 18L, 20L, 28L, 5L, 11L, 10L, 5L, 3L, 17L,
11L, 10L, 2L, 16L, 9L, 8L, 7L, 21L, 43L, 44L, 13L, 20L, 21L,
21L, 26L, 29L, 60L, 38L, 12L, 5L, 16L, 9L, 10L, 3L, 10L, 9L,
8L, 7L, 18L, 15L, 15L, 20L, 40L, 16L, 20L, 15L, 21L, 6L, 10L,
26L, 14L, 8L, 9L, 25L, 14L, 15L, 20L, 6L, 10L, 15L, 14L, 19L,
3L, 22L, 21L, 14L, 8L, 122L, 43L, 8L, 9L, 39L, 18L, 2L, 16L,
23L, 18L, 18L, 1L, 29L, 17L, 30L, 42L, 18L, 55L, 12L, 20L, 15L,
16L, 11L, 12L, 21L, 20L, 13L, 16L), Returns = c(0L, 0L, 9L, 0L,
90L, 0L, 0L, 5L, 0L, 0L, 0L, 20L, 0L, 0L, 0L, 3L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 30L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 70L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Demand = c(30L,
2064L, 0L, 1022L, 0L, 330L, 200L, 15L, 95L, 105L, 1430L, 0L,
740L, 430L, 5L, 4L, 45L, 82L, 20L, 1686L, 820L, 400L, 25L, 70L,
30L, 40L, 0L, 1250L, 986L, 500L, 80L, 1L, 25L, 138L, 200L, 60L,
60L, 1570L, 1030L, 300L, 50L, 10L, 20L, 100L, 40L, 70L, 0L, 1305L,
1159L, 295L, 60L, 20L, 10L, 110L, 65L, 45L, 70L, 1378L, 1269L,
410L, 40L, 12L, 14L, 7L, 15L, 10L, 15L, 23L, 9L, 18L, 3L, 10L,
13L, 21L, 12L, 17L, 72L, 20L, 9L, 16L, 25L, 12L, 1L, 10L, 25L,
11L, 9L, 12L, 10L, 14L, 20L, 10L, 18L, 11L, 10L, 10L, 3L, 16L,
3L, 5L, 6L, 14L, 8L, 5L, 13L, 5L, 13L, 7L, 6L, 11L, 1L, 3L, 19L,
15L, 13L, 13L, 38L, 27L, 11L, 14L, 13L, 6L, 3L, 14L, 10L, 8L,
3L, 14L, 11L, 12L, 18L, 14L, 24L, 12L, 5L, 10L, 3L, 22L, 24L,
10L, 4L, 8L, 19L, 23L, 4L, 10L, 7L, 17L, 27L, 9L, 4L, 4L, 12L,
17L, 16L, 18L, 32L, 9L, 1L, 16L, 29L, 5L, 22L, 10L, 11L, 6L,
5L, 8L, 28L, 11L, 22L, 10L, 10L, 25L, 18L, 8L, 20L, 18L, 25L,
8L, 16L, 16L, 8L, 5L, 6L, 7L, 17L, 19L, 22L, 18L, 20L, 21L, 20L,
55L, 14L, 4L, 16L, 7L, 3L, 16L, 17L, 15L, 15L, 16L, 24L, 16L,
20L, 17L, 14L, 15L, 6L, 6L, 14L, 19L, 31L, 10L, 15L, 15L, 6L,
7L, 2L, 11L, 18L, 4L, 9L, 13L, 7L, 2L, 8L, 9L, 17L, 2L, 20L,
6L, 10L, 6L, 8L, 20L, 3L, 6L, 16L, 18L, 20L, 28L, 5L, 11L, 10L,
5L, 3L, 17L, 11L, 10L, 2L, 16L, 9L, 8L, 7L, 21L, 43L, 44L, 13L,
20L, 21L, 21L, 26L, 29L, 60L, 38L, 12L, 5L, 16L, 9L, 10L, 3L,
10L, 9L, 8L, 7L, 18L, 15L, 15L, 20L, 40L, 16L, 20L, 15L, 21L,
6L, 10L, 26L, 14L, 8L, 9L, 25L, 14L, 15L, 20L, 6L, 10L, 15L,
14L, 19L, 3L, 22L, 21L, 14L, 8L, 122L, 43L, 8L, 9L, 39L, 18L,
2L, 16L, 23L, 18L, 18L, 1L, 29L, 17L, 30L, 42L, 18L, 55L, 12L,
20L, 15L, 16L, 11L, 12L, 21L, 20L, 13L, 16L)), row.names = c(NA,
-335L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x0000000002611ef0>)
Following up on the approach using split, I managed to solve this.
Like I said on the question, one of my attempts started with:
products <- split(train_dataset, f = train_dataset$product_id)
which created a list with various lists inside, each one comprising all the data from a certain product.
To further subset this sublists, it ocurred me using lapply with an anonymous function:
products_per_stores <- lapply(products, function(x){split(x, f = x$Store_ID)})
it created a list with lists inside, which, in turn, contained lists inside them as well. The "first level" of sublists comprises one list per product id and the "second level", one list per combination of the product id with the stores ids, as was the goal.
In your first attempt to use the for loop, it couldn't work for two reasons:
First:
You try to iterate over 'prods', which is, as you say, a dataset - nothing, you can iterate over.
So in case you want to iterate over each line of your dataset (as your example suggests), you can use
for (a in prods$product_id){
Second:
You overwrote your subset in every iteration.
I assume you tried to name the subset with
product.a <- paste("product", a)
but it doesn't work that way.
To assign a name containing your 'a', you can use the assign() function like so:
assign(paste0("product.", a), filter(train_dataset, product_id == a))
If you only want separate dataframes for each product (that's what you loop attempts, as far as I can see), you can also just subset by the id, allowing you to use only iterate over unique ids.
for(i in unique(prods$product_id)){
assign(paste0("product.",i), prods[which(prods$product_id == i),])
}
This is, of course, no complete solution to your problem but might help you to revisit your initial approach.
I'm trying to use nls to estimate the parameters of a non linear model.
I first use nls2 to find good initial parameters with Random Search and I then use nls to improve the estimation with a Gauss-Newton approach.
The problem is I always get an "singular gradient matrix at initial parameter estimates" error.
I'm not sure I understand, because the input matrix doesn't seem to be a singular gradient matrix.
Moreover even if the fits I'm looking for is not perfect for this data, nls should find a way to improve the
parameters estimations. Isn't it ?
Question: Is there a way to improve the parameters estimation?
I've tried NLS.lm but I had the same problem.
Here is a reproductible example:
Data:
structure(list(x1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), x2 = c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L,
58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 0L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L,
56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 0L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 0L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 0L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L,
56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 0L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L,
58L, 59L, 60L, 61L, 62L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L,
22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L,
35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L,
48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L,
61L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L,
40L, 41L, 42L, 43L, 44L, 45L), y = c(0.0689464583349188, 0.0358227182166929,
0.0187034836294036, 0.0227081421239796, 0.0146603483536504, 0.00562771204350896,
0.00411351161052011, 0.00356917888321555, 0.0028017552960605,
0.0024750328652541, 0.00243175013170564, 0.00242654283706898,
0.00235224917236107, 0.00176144220485858, 0.00138071934398105,
0.000696375069179013, 0.00106282865382483, 0.00114735219137874,
0.00277256441625284, 0.00214359572321392, 0.00144935953386591,
0.00249732559162499, 0.00225859018399108, 0.00201642941663214,
0.00232438586834105, 0.0016083751355862, 0.00143118376291818,
0.00158323933266031, 0.00157585431454131, 0.00169206800399143,
0.00158514119474578, 0.00134506293557103, 0.00119442163345335,
0.00101284069499962, 0.0012621113004254, 0.00128964367655383,
0.00102819258807122, 0.00125345601171754, 0.00116155619985178,
0.00142466624262548, 0.00141075318725309, 0.00106556656123991,
0.0010976347045814, 0.0012442089226047, 0.0010627617251863, 0.00125322168410487,
0.00112108560656369, 0.0012459199320756, 0.00135773322693401,
0.0013997982284804, 0.00155012485145915, 0.00151108062240688,
0.00149570655260348, 0.00152598641103596, 0.00108261570337346,
0.000992225418429453, 0.000769588971038765, 0.000700496873143604,
0.000688378351958078, 0.000595007407260441, 0.000557615594951187,
0.00040476923690092, 0.000492276455560289, 0.000447248723966691,
0.000388694992851599, 0.000346087542525691, 0.000189803623801549,
0.0709302325562937, 0.0424623423412875, 0.019085896698975, 0.0190650552541205,
0.014276898897581, 0.00593407290200902, 0.00445528598343583,
0.00371231334350143, 0.00253909496678967, 0.00263487912423124,
0.00248012072619926, 0.00263786771266913, 0.00219351150766708,
0.00179271674850348, 0.00139646119589996, 0.000911560061336614,
0.000989537441246412, 0.001046390000492, 0.00223993432619926,
0.00164189356162362, 0.00106041866437064, 0.00194151698794588,
0.0014213192200082, 0.00165239495268553, 0.00196583929282493,
0.00120501090643706, 0.001141403899631, 0.00122398595424354,
0.00124538223829438, 0.00123370121853218, 0.00136883147552275,
0.00110907318146781, 0.000965843164247642, 0.000859986264862649,
0.00104695561918819, 0.00103985460139401, 0.000455832014104141,
0.000704296760639607, 0.000870145383845838, 0.000919870911357114,
0.00101396309667897, 0.000781894087412874, 0.000909712365723658,
0.000889897365477655, 0.000933063039278393, 0.000779395399425994,
0.000789546295038951, 0.000773432990897909, 0.00125614787798278,
0.00123172652693727, 0.00078936677195572, 0.000952107503075031,
0.00105449131480115, 0.00123128091742517, 0.000889501370397704,
0.00085648642099221, 0.000830097733497335, 0.000653482256334563,
0.000521696831160312, 0.000612702433456335, 0.000513576588109881,
0.000475289330709307, 0.00041141913800738, 0.000328157997211972,
0.00031336264403444, 0.000328784093808938, 0.000237448446412464,
0.0520691145678866, 0.0281929482152033, 0.0219024230330532, 0.0141074098760277,
0.00691341703402584, 0.00445785262213699, 0.0034569415664917,
0.00234406584844369, 0.00257369504707459, 0.00234047371531346,
0.00227286083862502, 0.00248544382019894, 0.00180810413760828,
0.00138986347039715, 0.000911936124008956, 0.000932783218782117,
0.00108887529088974, 0.0017855660833578, 0.00159768589505946,
0.00124091041330201, 0.00203036436876009, 0.00154489107876964,
0.00111687975012847, 0.00163256939968433, 0.00143626193198502,
0.000996683818914256, 0.0010781399542101, 0.00122575793431581,
0.00115671467616723, 0.001069532453476, 0.0010106869893371, 0.000978618104445015,
0.000894478048836441, 0.000842874700392747, 0.000819009288742475,
0.000843003919670386, 0.000964158733115548, 0.000877802228013507,
0.00087592051873807, 0.000935810596369843, 0.000879047729316546,
0.000829181439950081, 0.0010295792954412, 0.000765620227389517,
0.00102511256239906, 0.000823109180461753, 0.00111669534392894,
0.000802757620485245, 0.00103231207284173, 0.000884354083467919,
0.00109278942886507, 0.000969283099489796, 0.000827480664091176,
0.000798564447676552, 0.000909248326695786, 0.000682209033640434,
0.000780593294853913, 0.000485172195712818, 0.000467514093470122,
0.000295219649739392, 0.000460636351123183, 0.00045060371687344,
0.000492590160218764, 0.000402536549331963, 0.000271941766535751,
0.000171012123770371, 0.0267385565244063, 0.0275426278720772,
0.0154589149018475, 0.00729065000152096, 0.00513675524527996,
0.00378848397112206, 0.00305965140790087, 0.00240428827949139,
0.00233604733730811, 0.00199601458903693, 0.00198302547453915,
0.00137121122011316, 0.00126241982975401, 0.0012413298189045,
0.00103044327584109, 0.00106759120581615, 0.00190957422380402,
0.00124400301656831, 0.000989035353673623, 0.00160702520431547,
0.0011515826661394, 0.00153203681379408, 0.00134897491229138,
0.000916492937174261, 0.00072393419977287, 0.00115124473393361,
0.00104241370079698, 0.000953324905193568, 0.00121656899373365,
0.000891420608484922, 0.000671666092758208, 0.000659860761797571,
0.000586145968952161, 0.00072735268499929, 0.000658407622538582,
0.000498831767252743, 0.000658345030520574, 0.000542106922897528,
0.000874560054044737, 0.000543320226217274, 0.000751139509440084,
0.000668632963233356, 0.000656903021131188, 0.000574965903652329,
0.0006661524076778, 0.000605171890653201, 0.000527045917239561,
0.000985791370586684, 0.000899420142057553, 0.000933015548254953,
0.00082137283567561, 0.000870124781995904, 0.000498046123582973,
0.000540181050881142, 0.000596948101336416, 0.000405622486362069,
0.000631594016548032, 0.000468749313033603, 0.000389576698910993,
0.000335624642574679, 0.000286763668856847, 0.000439039581432135,
0.000244767908276044, 0.000303911794528604, 0.000160988671898765,
0.0365772382134747, 0.0255898183301035, 0.010327803963121, 0.00714710822108354,
0.00506253612461807, 0.00447056668291465, 0.00322822676102386,
0.00328154620569948, 0.0028470908747756, 0.00253477302081723,
0.00187837758253778, 0.00116416512964702, 0.00119557763663167,
0.000993575112051645, 0.00136274483135782, 0.00204131052512691,
0.00157953945941769, 0.00116523253183218, 0.00190793844827791,
0.00144595416523011, 0.00157423646879793, 0.00126996001866537,
0.00115283860342634, 0.00116894693507543, 0.000930041619012519,
0.00106545753272384, 0.00123507493015348, 0.00130865599847824,
0.000940647984853709, 0.000836521897923032, 0.000778436697656724,
0.00100773629284415, 0.000956581999215341, 0.000808036977042788,
0.000597930101173421, 0.000776453419209873, 0.000630241947142534,
0.000649832426616575, 0.000782188275296327, 0.00102823806308181,
0.000830656989407107, 0.00051915559901561, 0.000537114715917872,
0.000872430107712244, 0.000549284113632851, 0.000738257038745497,
0.00097442578198376, 0.000879724260815807, 0.000884543540237537,
0.00100038027474944, 0.00103543285342337, 0.000875585441608313,
0.000829083410412184, 0.000760316116414823, 0.000712211369823927,
0.000386744815307978, 0.000428331410721292, 0.000397681982571065,
0.000213938551710199, 0.000370800615243779, 0.000281234314553042,
0.000267359921177464, 0.000358376119030352, 0.000337361541022196,
0.0310029062887812, 0.0154963087949333, 0.00959302943445506,
0.00645674376405936, 0.00525321947702945, 0.00386084394749159,
0.00374364242039947, 0.00351047952579374, 0.00298556939927835,
0.00199158625919048, 0.00206559575086432, 0.00169077836254661,
0.00139156751815451, 0.00170363478493893, 0.00250481301085496,
0.00182474837251083, 0.00116804333227652, 0.00155778636185214,
0.00183778204100427, 0.00135012918459471, 0.00166904872503284,
0.00120137403943415, 0.00108307957787943, 0.00146041465872549,
0.0014437889563235, 0.000975926161359965, 0.00102580511345623,
0.00112145083941, 0.000921884915530595, 0.00082253191796126,
0.000634876416504371, 0.00108601324863747, 0.000830573067167897,
0.000965052460105379, 0.000922667052402736, 0.000863193817654785,
0.000982111173513293, 0.000763009170856168, 0.000921755812461313,
0.000771609983091022, 0.000669047474976222, 0.000773869648383834,
0.00072022523061129, 0.000742426347056781, 0.000718728249316847,
0.000761437280522971, 0.000833112611531319, 0.000794451658438637,
0.000907360341651947, 0.00112083735676435, 0.00102996529205731,
0.000651843453054939, 0.000640968179416338, 0.000549646466476441,
0.000778958256714525, 0.000627413038784969, 0.000523658918731223,
0.000418571973368359, 0.000643352520494588, 0.000351378727146459,
0.000504093577607682, 0.000333827596358531, 0.000339505558071773,
0.0181836504450303, 0.0135527124187004, 0.00780738765319868,
0.00643260738080874, 0.00476881905655232, 0.00406986745617877,
0.00400325917456592, 0.00277499160186111, 0.00198311377238581,
0.00241837807740304, 0.00141018451525995, 0.00166798657140732,
0.0013970042073337, 0.00237332662413329, 0.00146721126831566,
0.000990562316636778, 0.00186106889002752, 0.00186322276224556,
0.00140391140302307, 0.00139027556176293, 0.00125730361478641,
0.00127044200804939, 0.00126655503830484, 0.00133956330669488,
0.00128219844136096, 0.00109531452608613, 0.00112195611926977,
0.00101411381866565, 0.00104786051750783, 0.000798711632769435,
0.000852432172756047, 0.000852720107765923, 0.00110385307389073,
0.00081385514739304, 0.00102898862672826, 0.000710330768658628,
0.000803425598538879, 0.000723455383750816, 0.00075034248654992,
0.000864917906994041, 0.000799733114881449, 0.000608518601191706,
0.000855476747683942, 0.000988548021123443, 0.00104800683206201,
0.000997051779707941, 0.000796235203259423, 0.000910577791459715,
0.000869997383535945, 0.000557402535474327, 0.000757813148434336,
0.000480807445269952, 0.000553425518375578, 0.000633029237291637,
0.00050222863978579, 0.000390945889771328, 0.000430333228928208,
0.000425167676834459, 0.000239604519722651, 0.000357021364759551,
0.000292330910803864, 0.000288851701197491, 0.0198837196044917,
0.0142208140311702, 0.00733039271103269, 0.00609158853724431,
0.00487605866828399, 0.00382636157210858, 0.00411545257392807,
0.00235906433257981, 0.00228491326937568, 0.00109255715480326,
0.00158036861847788, 0.00122011020381908, 0.00223761733564904,
0.00173284341769128, 0.00117538923471357, 0.00219622963095698,
0.00214263916211795, 0.0013198229549172, 0.00172951959530242,
0.00128074705482347, 0.00124062569884766, 0.00144218669111025,
0.00148407512819099, 0.00100716026446858, 0.0010842890711437,
0.000800686408079248, 0.000890454658065465, 0.000887152794471706,
0.00105780722647994, 0.000874948318354744, 0.000569126715186268,
0.000924642167943982, 0.000857013884141074, 0.000823122890591976,
0.00073038777177409, 0.000522615873628494, 0.00070936497950782,
0.000823074755104667, 0.000720588701733105, 0.000722724038337836,
0.00063458965098969, 0.000620049346639466, 0.000842327487089008,
0.000617708212493797, 0.000783953750160813, 0.00112567150392384
)), .Names = c("x1", "x2", "y"), class = c("tbl_df", "data.frame"
), row.names = c(NA, -500L))
Initial parameters: initial_par
structure(list(A1 = 0.0529486559121727, alpha1 = 0.00888818269595504,
B1 = 0.250994319084551, beta1 = 0.471984946168959, A2 = 0.281956987357551,
alpha2 = 0.325086771510541, B2 = 0.0562204262765557, beta2 = 0.725645614322275), class = "data.frame", row.names = c(NA,
-1L), .Names = c("A1", "alpha1", "B1", "beta1", "A2", "alpha2",
"B2", "beta2"))
Formula:
formula = y ~
(A1*exp(-alpha1*x1) + B1*exp(-beta1*x1)) *
(A2*exp(-alpha2*x2) + B2*exp(-beta2*x2))
Nls and the error message
final = nls(formula,
data=df,
start = as.list(as.vector(initial_par)))
Error in nlsModel(formula, mf, start, wts) :
singular gradient matrix at initial parameter estimates
The problem is that there is not a one to one relationship between your model and parameters. To see this write A1 = exp(a1+d), A2 = exp(a2-d), B1 = exp(b1+d), B2 = exp(b2-d) in which case we have:
y ~ exp(-alpha1 * x1 + a1 + d) * exp(-alpha2 * x2 + a2 - d) +
exp(-alpha1 * x1 + a1 + d) * exp(-beta2 * x2 + b2 - d) +
exp(-beta1 * x1 + b1 + d) * exp(-alpha2 * x2 + a2 - d) +
exp(-beta1 * x1 + b1 + d) * exp(-beta2 * x2 + b2 - d)
But d cancels in each of the 4 terms and so cancels entirely from the RHS. That is, the RHS is the same for any value of d thus the model is overparameterized and so will give a singular gradient.
Fix one of A1, A2, B1, B2 and then you should be able to get a solution:
A1 <- 1
nls(formula, df, start = initial_par[-1])
giving:
Nonlinear regression model
model: y ~ (A1 * exp(-alpha1 * x1) + B1 * exp(-beta1 * x1)) * (A2 * exp(-alpha2 * x2) + B2 * exp(-beta2 * x2))
data: df
alpha1 B1 beta1 A2 alpha2 B2 beta2
0.11902 1.21030 0.79076 0.04604 0.51697 0.00183 0.02317
residual sum-of-squares: 0.000685
Number of iterations to convergence: 11
Achieved convergence tolerance: 6.686e-06
I'm plotting some points over a map with ggmap package.
The problem is that i get the message: "Removed 12 rows containing missing values (geom_point)".
But i don't have any NAs. I've looked the data, and used:
sum(is.na(limanov2)) #Gives 0
to prove it.
This is my code:
library(maps)
library(ggmap)
lima <- get_map(location = "lima", zoom = 11)
ggmap(lima) + geom_point(data = limanov2, aes(x = LONGITUD , y = LATITUD, color = TOTALES,
size = TOTALES)) +
scale_color_gradient(low = "yellow", high = "red")
My data:
structure(list(DISTRITO = c("SAN JUAN DE LURIGANCHO", "CALLAO",
"LOS OLIVOS", "ATE VITARTE", "LIMA CERCADO", "SAN MARTÍN", "SANTIAGO DE SURCO",
"CHORILLOS", "COMAS", "INDEPENDENCIA", "EL AGUSTINO", "LA VICTORIA",
"SAN JUAN DE MIRAFLORES", "VILLA EL SALVADOR", "S. MIGUEL", "CARABAYLLO",
"MIRAFLORES", "PTE. PIEDRA", "SAN BORJA", "VENTANILLA", "SURQUILLO",
"BREÑA", "ANCÓN", "EL RIMAC", "BARRANCO", "LA MOLINA", "SAN LUIS",
"STA. ANITA", "LURIGANCHO", "P. LIBRE", "MAGDALENA", "LA PERLA",
"CHACLACAYO", "SAN ISIDRO", "J. MARÍA", "BELLAVISTA", "LINCE",
"C. DE LA LEGUA", "CIENEGUILLA", "STA.ROSA", "LURÍN", "PTA.NEGRA",
"PUCUSANA", "LA PUNTA", "PTA. HERMOSA", "PACHACAMAC", "SAN BARTOLO",
"SANTA MARÍA"), TOTALES = c(861L, 696L, 696L, 642L, 516L, 479L,
442L, 378L, 371L, 368L, 361L, 333L, 325L, 291L, 282L, 251L, 239L,
223L, 196L, 193L, 188L, 185L, 174L, 161L, 138L, 134L, 128L, 119L,
115L, 105L, 67L, 65L, 63L, 58L, 56L, 45L, 38L, 23L, 23L, 11L,
8L, 6L, 5L, 3L, 3L, 2L, 0L, 0L), HOMICIDIOS = c(1L, 7L, 0L, 1L,
2L, 0L, 0L, 1L, 7L, 4L, 4L, 4L, 0L, 0L, 0L, 2L, 0L, 1L, 0L, 7L,
0L, 0L, 0L, 4L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), LESIONES = c(100L,
72L, 61L, 43L, 44L, 8L, 10L, 15L, 44L, 40L, 50L, 15L, 52L, 28L,
7L, 33L, 15L, 27L, 3L, 21L, 7L, 36L, 33L, 19L, 14L, 1L, 8L, 6L,
16L, 4L, 4L, 9L, 1L, 2L, 9L, 5L, 2L, 5L, 7L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), VIO..DE.LA.LIBERTAD.PERSONAL = c(0L, 7L,
6L, 5L, 6L, 1L, 1L, 0L, 3L, 1L, 2L, 0L, 2L, 0L, 1L, 0L, 1L, 1L,
0L, 1L, 1L, 0L, 3L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), VIO..DE.LA.LIBERTAD.SEXUAL = c(56L,
14L, 12L, 15L, 7L, 10L, 2L, 9L, 11L, 13L, 8L, 9L, 7L, 14L, 4L,
15L, 4L, 12L, 2L, 17L, 7L, 3L, 4L, 12L, 2L, 1L, 5L, 3L, 11L,
4L, 1L, 2L, 0L, 2L, 0L, 3L, 0L, 2L, 2L, 0L, 4L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), HURTO.SIMPLE.Y.AGRAVADO = c(217L, 203L, 296L, 230L,
260L, 167L, 226L, 217L, 130L, 117L, 154L, 133L, 121L, 46L, 163L,
72L, 161L, 84L, 119L, 69L, 120L, 64L, 19L, 21L, 57L, 44L, 39L,
2L, 48L, 60L, 30L, 19L, 48L, 41L, 25L, 19L, 27L, 7L, 11L, 9L,
0L, 6L, 0L, 2L, 3L, 1L, 0L, 0L), ROBO.SIMPLE.Y.AGRAVADO = c(460L,
289L, 308L, 344L, 186L, 277L, 198L, 130L, 165L, 184L, 137L, 149L,
134L, 188L, 104L, 126L, 58L, 96L, 72L, 64L, 51L, 77L, 115L, 76L,
64L, 88L, 73L, 108L, 40L, 36L, 30L, 32L, 14L, 12L, 22L, 12L,
8L, 6L, 3L, 1L, 3L, 0L, 2L, 1L, 0L, 1L, 0L, 0L), MICRO.COM.DE.DROGAS = c(26L,
100L, 13L, 3L, 10L, 15L, 5L, 5L, 11L, 8L, 3L, 23L, 9L, 15L, 3L,
3L, 0L, 2L, 0L, 8L, 2L, 5L, 0L, 28L, 0L, 0L, 1L, 0L, 0L, 0L,
2L, 2L, 0L, 0L, 0L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L,
0L, 0L), TENENCIA.ILEGAL.DE.ARMAS = c(1L, 4L, 0L, 1L, 1L, 1L,
0L, 1L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), LONGITUD = c(-77, -77.12,
-77.08, -76.89, -77.04, -77.09, -76.99, -77.01, -77.05, -77.05,
-77, -77.02, -76.97, -76.94, -77.09, -76.99, -77.03, -77.08,
-77, -77.13, -77.01, -77.05, -77.11, -76.7, -77.02, -76.92, -77,
-76.96, -76.86, -77.06, -77.07, -77.12, -76.76, -77.03, -77.05,
-77.11, -77.04, -77.09, -76.78, -77.16, -76.81, -76.73, -76.77,
-77.16, -76.76, -76.83, -76.73, -76.77), LATITUD = c(-11.99,
-12.04, -11.97, -12.04, -12.06, -12, -12.16, -12.2, -11.93, -11.99,
-12.04, -12.08, -12.16, -12.23, -12.08, -11.79, -12.12, -11.88,
-12.1, -11.89, -12.11, -12.06, -11.69, -11.94, -12.15, -12.09,
-12.08, -12.04, -11.98, -12.08, -12.09, -12.07, -11.99, -12.1,
-12.08, -12.06, -12.09, -12.04, -12.07, -11.81, -12.24, -12.32,
-12.47, -12.07, -12.28, -12.18, -12.38, -12.42)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -48L), .Names = c("DISTRITO",
"TOTALES", "HOMICIDIOS", "LESIONES", "VIO..DE.LA.LIBERTAD.PERSONAL",
"VIO..DE.LA.LIBERTAD.SEXUAL", "HURTO.SIMPLE.Y.AGRAVADO", "ROBO.SIMPLE.Y.AGRAVADO",
"MICRO.COM.DE.DROGAS", "TENENCIA.ILEGAL.DE.ARMAS", "LONGITUD",
"LATITUD"))
You have values outside of the base map zoom range... try changing your zoom parameter.
library(maps)
library(ggmap)
lima <- get_map(location = "lima", zoom = 10)
ggmap(lima) +
geom_point(data = limanov2,
aes(x = LONGITUD , y = LATITUD,
color = TOTALES, size = TOTALES)) +
scale_color_gradient(low = "yellow", high = "red")
I would like to make publication grade file using ggplot. I am using cairo-png but still i get some unexpected horizontal line and "black spot" on the edge of some bar.
Here the code I use:
structure(list(Sample.Name = structure(c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L,
59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L,
72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L,
85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L,
98L, 99L, 100L, 101L), .Label = c("1075", "1104", "1108", "1120",
"1121", "1137", "1258", "1264", "1280", "1286", "1310", "1317",
"1338", "1392", "1401", "1435", "1477", "1480", "1494", "1519",
"1574", "1588", "1595", "1607", "1611", "1644", "1645", "1651",
"1653", "1654", "1673", "1687", "1702", "1714", "1740", "1776",
"1781", "1812", "1835", "1838", "1857", "1874", "1890", "1899",
"1911", "1933", "1936", "1999", "2006", "2046", "2063", "2079",
"2081", "2088", "2116", "2135", "2144", "2147", "2155", "2166",
"2167", "2176", "2183", "2200", "2209", "2223", "2253", "2256",
"2442", "2444", "2453", "2456", "2462", "2467", "2472", "2482",
"2497", "2504", "2507", "2513", "2518", "2523", "2567", "2568",
"2576", "2578", "2598", "2600", "2619", "2623", "2625", "2632",
"2636", "2646", "2652", "2659", "2660", "2676", "2680", "2682",
"2705", "2711", "2756", "2765", "2772", "2793", "2803", "2854",
"2856", "2882", "2912", "2916", "2919", "3058", "3063", "3114",
"3116", "3117", "3125", "3132", "3140", "3145", "3175", "3181",
"3248", "3383", "3431", "3436", "3442", "3472", "3576", "3639",
"4093", "FL001-1", "FL002-1", "FL004-1", "FL006-1", "FL007-1",
"FL008-1", "FL009-1", "FL010-1", "FL017-1", "FL019-1", "FL021-1",
"FL022-1", "FL024-1", "FL027-1", "FL028-1", "FL029-1", "FL030-1",
"FL032-1", "FL033-1", "FL034-1", "FL035-1", "FL036-1", "FL037-1",
"FL038-1", "FL039-1", "FL040-1", "FL041-1", "FL042-1", "FL043-1",
"FL045-1", "FL046-1", "FL047-1", "FL049-1", "FL050-2", "FL051-1",
"FL052-1", "FL053-1", "FL056-1", "FL057-1", "FL059-1", "FL060-1",
"FL061-1", "FL062-1", "FL063-1", "FL064-1", "FL065-1", "FL066-1",
"FL067-1", "FL068-1", "FL069-1", "FL071-1", "FL072-1", "FL073-1",
"FL075-1", "FL076-1", "FL077-1", "FL078-1", "FL080-1", "FL082-1",
"FL083-1", "FL085-1", "FL086-1", "FL087-1", "FL088-1", "FL089-1",
"FL090-1", "FL092-1", "FL094-1", "FL095-1", "FL096-1", "FL097-1",
"FL098-1", "FL099-1", "FL1_1215", "FL10_01501", "FL100-1", "FL101-1",
"FL102-1", "FL103-1", "FL105-1", "FL106-1", "FL107-1", "FL110-1",
"FL111-1", "FL112-1", "FL114-1", "FL115-1", "FL116-1", "FL117-1",
"FL118-1", "FL119-1", "FL12_1593", "FL120-1", "FL121-1", "FL122-1",
"FL123-1", "FL126-1", "FL127-1", "FL129-1", "FL13_01598", "FL130-1",
"FL132-1", "FL133-1", "FL134-1", "FL135-1", "FL136-1", "FL137-1",
"FL138-1", "FL139-1", "FL140-1", "FL141-1", "FL144-1", "FL145-1",
"FL146-1", "FL147-1", "FL148-1", "FL149-1", "FL150-1", "FL151-1",
"FL152-1", "FL153-1", "FL154-1", "FL156-1", "FL158-1", "FL159-1",
"FL16_1738", "FL167-1", "FL168-1", "FL169-1", "FL17_01752", "FL170-1",
"FL173-1", "FL176-1", "FL18_1763", "FL180-1", "FL181-1", "FL183-1",
"FL184-1", "FL185-1", "FL187-1", "FL19_1881", "FL190-1", "FL191-1",
"FL192-1", "FL193-1", "FL194-1", "FL195-1", "FL196-1", "FL197-1",
"FL198-1", "FL199-1", "FL2_1222", "FL200-1", "FL22_2025", "FL23_2032",
"FL24_02085", "FL25_2175", "FL26_2219", "FL27_02242", "FL28_2459",
"FL3_01235", "FL30_2558", "FL35_02726", "FL37_2808", "FL41_2865",
"FL43_02926", "FL44_2994", "FL45_3018", "FL47_3119", "FL48_3128",
"FL55_03303", "FL62_3406", "FL64_3418", "FL65_03421", "FL69_03484",
"FL7_1306", "FL70_03517", "FL71_03534", "FL76_3644", "FL77_3651",
"FL8_01425"), class = "factor"), variable = structure(c(7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L), .Label = c("BCL2_Status", "MLL2", "CREBBP", "TNFRSF14",
"EZH2", "MEF2B", "ARID1A", "EP300", "IRF8", "STAT6", "CARD11",
"GNA13", "ARID1B", "PIM1", "BCL7A", "SMARCA4", "CTSS", "TNFAIP3",
"CCND3", "TP53", "BTG1", "FOXO1", "IKZF3", "CD79B", "DTX1", "KLHL6",
"B2M", "P2RY8", "SGK1", "FAT2", "CIITA", "CXCR4", "ETS1", "FAS",
"ROS1", "BTG2", "CD79A", "PAX5", "SIN3A", "TET2", "ATM", "NOTCH1",
"NOTCH2", "UBR5", "NF1", "PIK3CD", "ARID2", "DNMT3A", "KDM6A",
"MCL1", "SBF1", "GNB1", "IRF4", "TYK2", "ASXL1", "BRWD3", "CD40",
"CHD2", "EPHA7", "IKZF1", "KAT2B", "BRAF", "CD58", "EPHA6", "FBXO11",
"MALT1", "MYC", "MYD88", "PDGFRA", "RB1", "RHOA", "SMARCA2",
"SMARCB1", "CCND1", "CDK6", "GNB2", "IKBKE", "KAT2A", "MUM1",
"PIK3CA", "RET", "SWAP70", "TRAF6", "USP6", "ARID3A", "BIRC3",
"CDK4", "CDKN2B", "DIRAS3", "FCGR3A", "IDH1", "IKZF2", "KAT5",
"MDM2", "PDGFRB", "PTEN", "SF3B1", "TRAF2", "TRAF3"), class = "factor"),
value = c(0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("Sample.Name",
"variable", "value"), row.names = c(NA, 100L), class = "data.frame")
.
qb3r <- qplot(factor(variable),value, data = na.omit(df)) +
geom_bar(stat = "identity") +
theme(axis.text.x=element_text(angle=-90, hjust = 1),
text = element_text(size=8), panel.background = element_blank()) +
labs(title = Title_plot, x = "Gene symbol", y = "Mutated case")
ggsave("frequency FL.Ontogeny.baitset.png", type = "cairo-png", dpi= 150)
Here some example of the problem.
How could i fix it?
Thanks!
I think that you have two problems: using qplot instead of ggplot and not entering your x and y variables as aesthetics. ggplot2 treats x and y variables as aesthetic mappings between the data and the plot, and that mapping almost always needs to be passed via aes() or aes_string().
Here is some minimally-working code that removes the artifacts:
df <- data.frame(variable = c("Alpha","Beta","Gamma","Delta","Epsilon","Zeta"), value = runif(6))
Title_plot <- c("Some Title")
qb3r <- ggplot(aes(x = factor(variable), y = value), data = na.omit(df)) +
geom_bar(stat="identity") +
theme(axis.text.x=element_text(angle=-90, hjust = 1),
text = element_text(size=8),
panel.background = element_blank()) +
scale_x_discrete() +
scale_y_continuous() +
labs(title = Title_plot, x = "Gene symbol", y = "Mutated case")
ggsave(plot=qb3r, filename="barplottest_cairo.png", type = "cairo-png", dpi= 150)
############ uncoded data
x10<- structure(c(0L, 0L, 0L, 0L, 1L, 1L, 1L, 5L, 8L, 9L, 31L, 1L,
0L, 0L, 0L, 1L, 0L, 1L, 2L, 7L, 2L, 10L, 0L, 2L, 0L, 2L, 2L,
5L, 2L, 4L, 6L, 8L, 4L, 1L, 1L, 3L, 2L, 2L, 6L, 1L, 12L, 18L,
7L, 29L, 8L, 4L, 6L, 8L, 6L, 19L, 3L, 9L, 12L, 3L, 12L, 14L,
1L, 2L, 1L, 3L, 1L, 0L, 4L, 6L, 3L, 11L, 0L, 0L, 0L, 1L, 3L,
7L, 5L, 8L, 21L, 26L, 51L, 0L, 1L, 0L, 3L, 5L, 10L, 9L, 29L,
55L, 60L, 125L, 3L, 0L, 1L, 1L, 3L, 10L, 1L, 6L, 18L, 17L, 13L,
6L, 3L, 4L, 13L, 6L, 33L, 17L, 48L, 84L, 54L, 103L, 34L, 11L,
20L, 27L, 26L, 50L, 29L, 30L, 54L, 28L, 34L, 31L, 5L, 7L, 3L,
4L, 20L, 8L, 16L, 16L, 8L, 41L, 1L, 0L, 0L, 3L, 1L, 3L, 3L, 11L,
19L, 16L, 56L, 0L, 0L, 0L, 0L, 3L, 11L, 3L, 18L, 25L, 21L, 62L,
3L, 0L, 1L, 4L, 2L, 7L, 8L, 15L, 22L, 12L, 19L, 5L, 2L, 8L, 9L,
9L, 42L, 18L, 51L, 70L, 45L, 103L, 29L, 15L, 23L, 34L, 25L, 57L,
23L, 38L, 55L, 30L, 33L, 36L, 5L, 5L, 6L, 6L, 16L, 6L, 10L, 17L,
9L, 35L, 2L, 0L, 1L, 1L, 2L, 4L, 6L, 8L, 22L, 33L, 73L, 0L, 0L,
0L, 1L, 2L, 7L, 7L, 15L, 27L, 21L, 56L, 1L, 2L, 2L, 0L, 2L, 9L,
4L, 8L, 24L, 13L, 17L, 14L, 2L, 8L, 10L, 16L, 51L, 16L, 51L,
69L, 29L, 99L, 44L, 18L, 25L, 34L, 19L, 49L, 26L, 43L, 63L, 15L,
30L, 42L, 9L, 17L, 7L, 3L, 16L, 8L, 13L, 22L, 18L, 45L, 0L, 0L,
1L, 3L, 0L, 7L, 4L, 14L, 15L, 20L, 47L, 0L, 1L, 0L, 1L, 1L, 3L,
3L, 5L, 6L, 11L, 21L, 1L, 0L, 0L, 4L, 2L, 3L, 8L, 7L, 17L, 3L,
13L, 5L, 2L, 6L, 13L, 15L, 34L, 19L, 42L, 62L, 37L, 83L, 52L,
16L, 26L, 26L, 29L, 53L, 28L, 45L, 45L, 15L, 22L, 26L, 8L, 12L,
11L, 5L, 12L, 5L, 7L, 17L, 10L, 28L), .Dim = c(11L, 6L, 5L), .Dimnames = structure(list(
c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"),
c("I've changed for work/ a new job/ gone on a work plan",
"I want a phone that doesn't offer", "I want Best Mates/ Favourites",
"I was offered or saw a better offer on another network",
"Issues with the network (poor coverage)", "Other"
), YearQuarter = c("2011-09-01", "2011-12-01", "2012-03-01",
"2012-06-01", "2012-09-01")), .Names = c("", "", "YearQuarter"
)), class = "table")
############ recoded data
x10 <- structure(c(40L, 3L, 13L, 12L, 3L, 9L, 12L, 13L, 10L, 36L, 16L,
30L, 15L, 54L, 21L, 14L, 22L, 10L, 77L, 16L, 29L, 185L, 28L,
84L, 30L, 19L, 24L, 157L, 82L, 132L, 62L, 197L, 84L, 49L, 78L,
32L, 72L, 11L, 30L, 83L, 17L, 43L, 31L, 25L, 37L, 148L, 93L,
121L, 63L, 206L, 93L, 44L, 80L, 27L, 106L, 16L, 30L, 77L, 17L,
42L, 30L, 20L, 32L, 128L, 117L, 120L, 45L, 215L, 106L, 63L, 102L,
35L, 67L, 15L, 29L, 32L, 9L, 11L, 16L, 18L, 24L, 120L, 94L, 104L,
37L, 230L, 90L, 38L, 79L, 24L), .Dim = c(3L, 6L, 5L), .Dimnames = structure(list(
c("Promoters", "Detractors", "Passive"), c("I've changed for work/ a new job/ gone on a work plan",
"I want a phone that doesn't offer", "I want Best Mates/ Favourites",
"I was offered or saw a better offer on another network",
"Issues with the network (poor coverage)", "Other"
), YearQuarter = c("2011-09-01", "2011-12-01", "2012-03-01",
"2012-06-01", "2012-09-01")), .Names = c("", "", "YearQuarter"
)), class = "table")
x10.p <- round(prop.table(x10,c(3,2)),2)*100
Hi there
The Net Promotion Score is a question which asks the consumers to rate the 'the likelihood to recommend the product or the service' on a zero to ten scale. People reported with 10 and 9 are called 'promoters', people rated 8 and 7 are seen as 'Passive', and people reported less than 6 are considered as detractors. The Net Promotion score is the difference between the percentage of 'Promoters' minus the the percentage of 'Detractors'.
I summerised and recoded the answers from the question into a table x10 from Sep 2011 to Sep 2012. The numbers are actual people counts for each group (Promoter,Detractor and Passive). Apologies for the three dimensioanl table, I am interested in the Net Promoter Score for each reason( i.e what's the percentage difference among the promoters and detractors for "I've changed for work/ a new job/ gone on a work plan" in Sep 2012.
The Net Promotion Score before I can plot it which requires a bit manipulation. I wonder if anyone knows to how do it?
Cheers
First, don't round until you've done all your calculations (otherwise you will have percentages not adding to 1)
x10.p <- prop.table(x10,c(3,2))*100
# get the total promoters
promoters <- apply(x10.p, 2:3, function(x) sum(tail(x,2)))
# and detractors
detractors <- apply(x10.p, 2:3, function(x) sum(head(x,7)))
# passive is everything else
passive <- passive <- 100 - (detractors +promoters)
# the net score
net <- promoters - detractors
net
YearQuarter
2011-09-01 2011-12-01 2012-03-01 2012-06-01 2012-09-01
I've changed for work/ a new job/ gone on a work plan 66.071429 50.00000 53.982301 59.210526 46.846847
I want a phone that doesn't offer 37.500000 52.86195 46.153846 44.117647 44.230769
I want Best Mates/ Favourites -2.857143 15.06849 6.451613 12.195122 -3.448276
I was offered or saw a better offer on another network 24.390244 20.21563 15.193370 3.013699 8.176101
Issues with the network (poor coverage) -43.333333 -39.35860 -39.502762 -46.448087 -54.061625
Other -17.391304 -18.23899 -23.841060 -19.500000 -29.078014
You want september 2012, select just that column, with drop = FALSE to ensure it is still a matrix with 1 column.
net[,'2012-09-01', drop = FALSE]
YearQuarter
2012-09-01
I've changed for work/ a new job/ gone on a work plan 46.846847
I want a phone that doesn't offer 44.230769
I want Best Mates/ Favourites -3.448276
I was offered or saw a better offer on another network 8.176101
Issues with the network (poor coverage) -54.061625
Other -29.078014