Related
At the moment, the legend is Quartile 4. HR 0.62 (95%CI 0.10-3.72), P=0.60.
I would like to create a condition when as follow: if P-value is >=0.95, I would like to write Quartile 4. P=0.99. So without the HR and the 95% CI.
HR and Ci make nonsense writing like this for now.
With this code:
#libraries
library(readxl)
library(tidyverse)
library(tidytidbits)
library(survivalAnalysis)
library(dplyr)
library(survival)
library(survminer)
library(ggplot2)
library(ggthemes)
library(ggpubr)
df$quantile <- df$delta_mon1_baseline_to_d3
#define quartile
df$Quartile <- findInterval(df$quantile, quantile(df$quantile, na.rm = TRUE)[-5])
#factor Quartile
df$Quartile <- factor(df$Quartile)
#cox regression
cox <- coxph(Surv(mace_months_date_vs_date_sample, mace) ~ Quartile, data = df)
# create the tags
coxP <- data.frame(summary(cox)$coefficients)[,5]
coxConf <- data.frame(summary(cox)$conf.int) %>%
rownames_to_column() %>%
mutate(p = coxP,
p2 = case_when( # determine direction
round(p, 3) > p ~ '=',
round(p, 3) < p ~ '=',
round(p, 3) == p ~ '='
),
p3 = ifelse(round(p, 2) == 1, T, F), # id if p value is 1 (too high!)
# gsub adds space, round, keep trailing zeros
tag = paste0(rowname %>% gsub("(\\D)(\\d)", "\\1 \\2", .),
". HR ", exp.coef. %>% sprintf(fmt = "%.2f", .),
" (95% CI ",
lower..95 %>% sprintf(fmt = "%.2f", .),
"-", upper..95 %>% sprintf(fmt = "%.2f", .),
"), P", p2, "",
ifelse(p3,
yes = "0.99", # if p rounded = 1
no = sprintf(fmt = "%.2f", p)))) %>%
select(tag)
# validate as expected
coxConf
I have got this legend in the graph
Here are my data:
ID age sex mace mace_months_date_vs_date_sample trop egfr dm smoke delta_mon1_baseline_to_d3
1 44 52 1 1 30 2600 56 1 0 -822.
2 32 66 1 0 73 1710 90 1 0 -562.
3 20 56 1 1 5 NA 75 0 1 -502.
4 17 44 1 0 77 840 71 0 0 -389.
5 52 49 1 0 74 1740 71 0 1 -372.
6 57 58 1 0 74 5010 68 0 1 -308.
7 79 68 1 0 45 776 90 0 0 -284.
8 14 74 1 1 6 7120 78 0 0 -279.
9 223 63 1 0 46 4281 90 0 0 -218.
10 56 43 1 0 70 1360 90 1 0 -173.
11 50 54 1 0 70 15300 90 0 1 -163.
12 31 47 0 0 72 6490 77 1 1 -95.7
13 35 47 1 0 77 NA 83 0 0 -71.0
14 36 64 1 1 5 15940 52 0 1 -69.7
15 15 65 1 1 43 6300 49 1 0 -69.6
16 12 57 1 0 71 6020 88 0 1 -66.5
17 43 59 0 0 74 2100 84 0 1 -58.8
18 22 46 1 0 77 5330 88 0 1 -29.3
19 54 59 1 0 71 1500 81 1 1 -25.7
20 26 66 1 0 77 500 51 0 0 -12.5
21 29 73 0 0 77 NA 51 0 0 -2.99
22 25 54 1 0 73 1080 87 0 0 2.81
23 39 54 1 0 74 990 77 0 0 32.9
24 47 62 1 0 69 1420 85 0 0 33.0
25 49 54 1 1 28 NA 76 1 0 44.1
26 24 47 1 0 77 2390 90 0 1 47.7
27 45 51 0 0 73 3710 65 0 1 55.9
28 30 73 0 0 68 3340 48 1 1 117.
29 16 57 1 0 73 180 99 0 1 131.
30 55 47 1 0 70 NA 90 0 1 131.
31 37 81 1 0 74 NA 99 1 1 147.
32 21 46 1 0 75 3600 87 0 1 153.
33 60 72 1 0 76 470 62 0 0 160.
34 18 56 1 0 69 6390 90 0 1 165.
35 13 53 1 0 69 1970 87 1 1 180.
36 19 66 1 0 78 9320 59 0 0 180.
37 33 59 1 0 69 2260 79 0 1 193.
38 139 39 0 0 58 NA 90 0 1 209.
39 38 55 1 0 78 3930 90 1 0 244.
40 27 28 1 0 71 6440 90 0 1 248.
41 58 36 1 0 76 NA 78 1 1 327.
42 61 48 1 0 76 4470 90 0 1 336.
43 42 38 1 0 69 1800 69 0 1 375.
44 28 76 1 0 71 40 90 1 1 419.
Here is the console output:
structure(list(ID = c(44L, 32L, 20L, 17L, 52L, 57L, 79L, 14L,
223L, 56L, 50L, 31L, 35L, 36L, 15L, 12L, 43L, 22L, 54L, 26L,
29L, 25L, 39L, 47L, 49L, 24L, 45L, 30L, 16L, 55L, 37L, 21L, 60L,
18L, 13L, 19L, 33L, 139L, 38L, 27L, 58L, 61L, 42L, 28L, 121L,
192L, 120L, 68L, 41L, 23L, 216L, 136L, 88L, 87L, 182L, 93L, 154L,
94L, 116L, 145L, 228L, 76L, 63L, 59L, 219L, 175L, 164L, 181L,
234L, 146L, 242L, 71L, 67L, 187L, 128L, 151L, 215L, 132L, 173L,
124L, 119L, 224L, 140L, 221L, 172L, 115L, 103L, 73L, 194L, 106L,
193L, 148L, 156L, 203L, 100L, 81L, 190L, 206L, 233L, 189L, 105L,
220L, 85L, 11L, 205L, 131L, 1L, 225L, 183L, 213L, 7L, 147L, 134L,
86L, 69L, 212L, 199L, 75L, 137L, 191L, 245L, 111L, 153L, 112L,
89L, 243L, 109L, 165L, 95L, 231L, 5L, 168L, 159L, 6L, 179L, 77L,
155L, 171L, 174L, 84L, 102L, 207L, 230L, 138L, 188L, 241L, 72L,
235L, 211L, 127L, 237L, 70L, 210L, 110L, 133L, 2L, 218L, 180L,
229L, 65L, 130L, 96L, 226L, 152L, 197L, 178L, 141L, 195L, 92L,
162L, 201L, 217L, 222L, 208L, 104L, 160L, 66L, 74L, 185L, 177L,
123L, 184L, 204L, 227L, 125L, 83L, 8L, 143L, 9L, 3L, 117L, 10L,
198L, 244L, 108L, 34L, 214L, 4L, 97L, 200L, 113L, 80L, 166L,
98L, 238L, 239L, 114L, 167L, 64L, 157L, 90L, 149L, 129L, 170L,
91L, 135L, 122L, 240L, 99L, 236L, 144L, 53L, 176L, 107L, 232L,
163L, 142L, 118L, 126L, 158L, 186L, 82L, 78L, 48L, 62L, 209L,
196L, 46L, 150L, 161L, 169L, 101L, 202L, 51L, 40L), age = c(52L,
66L, 56L, 44L, 49L, 58L, 68L, 74L, 63L, 43L, 54L, 47L, 47L, 64L,
65L, 57L, 59L, 46L, 59L, 66L, 73L, 54L, 54L, 62L, 54L, 47L, 51L,
73L, 57L, 47L, 81L, 46L, 72L, 56L, 53L, 66L, 59L, 39L, 55L, 28L,
36L, 48L, 38L, 76L, NA, 59L, 71L, 58L, 57L, 54L, 69L, 49L, 65L,
48L, 35L, 44L, 65L, 56L, 66L, 41L, 55L, 52L, 67L, 61L, 65L, 61L,
75L, 56L, 37L, 75L, 68L, 59L, 52L, 59L, 59L, 63L, 62L, 57L, 48L,
65L, 41L, 60L, 77L, 66L, 50L, 51L, 81L, 61L, 64L, 48L, 63L, 78L,
79L, 51L, 74L, 52L, 73L, 82L, 58L, 72L, 63L, 67L, 72L, 51L, 68L,
41L, 66L, 69L, 60L, 66L, 71L, 45L, 81L, 52L, 67L, 58L, 63L, 47L,
63L, 67L, 62L, 72L, 75L, 46L, 73L, 57L, 75L, 68L, 68L, 73L, 51L,
59L, 59L, 60L, 54L, 62L, 64L, 48L, 73L, 79L, 58L, 46L, 75L, 63L,
68L, 60L, 54L, 78L, 54L, 46L, 49L, 67L, 79L, 54L, 47L, 51L, 66L,
61L, 64L, 79L, 73L, 51L, 52L, 52L, 76L, 75L, 56L, 76L, 54L, 82L,
62L, 57L, 53L, 42L, 63L, 37L, 66L, 46L, 76L, 39L, 51L, 80L, 69L,
76L, 48L, 65L, 59L, 76L, 57L, 66L, 69L, 68L, 72L, 62L, 56L, 51L,
60L, 71L, 68L, 77L, 28L, 62L, 51L, 61L, 56L, 72L, 79L, 62L, 68L,
68L, 49L, 75L, 64L, 48L, 51L, 68L, 68L, 70L, 73L, 54L, 47L, 79L,
40L, 52L, 58L, 69L, 61L, 44L, 57L, 55L, 43L, 61L, 44L, 77L, 35L,
74L, 72L, 60L, 44L, 53L, 61L, NA, 80L, 73L, 72L), sex = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L,
0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L,
0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L,
1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L,
0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L,
0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 1L), mace = c(1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L,
0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L), mace_months_date_vs_date_sample = c(30L,
73L, 5L, 77L, 74L, 74L, 45L, 6L, 46L, 70L, 70L, 72L, 77L, 5L,
43L, 71L, 74L, 77L, 71L, 77L, 77L, 73L, 74L, 69L, 28L, 77L, 73L,
68L, 73L, 70L, 74L, 75L, 76L, 69L, 69L, 78L, 69L, 58L, 78L, 71L,
76L, 76L, 69L, 71L, 43L, 44L, 5L, 62L, 73L, 72L, 47L, 61L, 47L,
46L, 44L, 43L, 46L, 45L, 16L, 62L, 8L, 48L, 15L, 79L, 62L, 46L,
62L, 45L, 55L, 21L, 41L, 45L, 46L, 45L, 50L, 62L, 43L, 4L, 1L,
4L, 63L, 17L, 3L, 45L, 2L, 50L, 1L, 44L, 15L, 46L, 46L, 7L, 8L,
62L, 16L, 47L, 19L, 61L, 46L, 1L, 4L, 48L, 61L, 43L, 46L, 61L,
44L, 48L, 1L, 5L, 43L, 43L, 46L, 58L, 3L, 55L, 27L, 1L, 47L,
19L, 11L, 1L, 1L, 1L, 2L, 48L, 62L, 61L, 23L, 1L, 55L, 19L, 48L,
1L, 43L, 47L, 1L, 46L, 11L, 46L, 48L, 46L, 32L, 50L, 61L, 62L,
48L, 46L, 55L, 62L, 55L, 43L, 61L, 47L, 59L, 50L, 10L, 55L, 63L,
55L, 48L, 31L, 32L, 48L, 55L, 62L, 59L, 53L, 48L, 48L, 62L, 59L,
4L, 4L, 25L, 59L, 48L, 60L, 43L, 58L, 49L, 1L, 8L, 1L, 63L, 51L,
55L, 47L, 49L, 55L, 49L, 55L, 13L, 60L, 2L, 13L, 3L, 25L, 7L,
62L, 62L, 60L, 10L, 61L, 45L, 3L, 11L, 51L, 47L, 1L, 46L, 63L,
60L, 43L, 62L, 58L, 61L, 1L, 2L, 46L, 1L, 13L, 54L, 48L, 54L,
45L, 45L, 31L, 48L, 42L, 49L, 43L, 61L, 1L, 31L, 1L, 63L, 11L,
47L, 39L, 7L, 42L, 1L, 1L, 1L), trop = c(2600L, 1710L, NA, 840L,
1740L, 5010L, 776L, 7120L, 4281L, 1360L, 15300L, 6490L, NA, 15940L,
6300L, 6020L, 2100L, 5330L, 1500L, 500L, NA, 1080L, 990L, 1420L,
NA, 2390L, 3710L, 3340L, 180L, NA, NA, 3600L, 470L, 6390L, 1970L,
9320L, 2260L, NA, 3930L, 6440L, NA, 4470L, 1800L, 40L, 21876L,
871L, 7860L, NA, 320L, 8450L, 1730L, 262L, 16720L, 1247L, NA,
NA, 54592L, 1241L, 2413L, NA, 45649L, NA, NA, 160L, 843L, NA,
1470L, 372L, 844L, 1454L, 50000L, 11450L, 769L, 2234L, 349L,
250L, 3654L, 8421L, NA, 5204L, 440L, NA, 40273L, 90L, 9352L,
2177L, 10014L, 11L, 11135L, 5256L, 1753L, NA, NA, 50L, 8903L,
3598L, 2483L, NA, NA, NA, 1557L, 5247L, 24L, 2993L, 3624L, 751L,
NA, NA, 24160L, NA, NA, 5687L, 1911L, NA, NA, 1855L, 9951L, 13374L,
2107L, 4927L, 83L, 2380L, 663L, NA, NA, NA, NA, NA, NA, NA, 1627L,
NA, 1211L, 5654L, NA, NA, 10000L, NA, NA, NA, 2956L, 67927L,
NA, 63L, NA, 4790L, NA, NA, 3569L, 961L, 6581L, 253L, 2888L,
33017L, 1675L, 438L, 15543L, 6212L, 6694L, NA, 1945L, 3004L,
3789L, NA, 2844L, 950L, 123L, 6630L, 3220L, 2040L, NA, 6672L,
1480L, 6979L, NA, 1411L, 5711L, NA, 2340L, NA, 57L, NA, 33L,
5110L, NA, 2797L, 1035L, 2840L, 251L, 7671L, 6155L, 4299L, NA,
846L, 2339L, 400L, 86115L, 27L, 87355L, NA, 8669L, NA, NA, NA,
1258L, 3000L, NA, 137L, 3866L, NA, 1312L, NA, NA, NA, NA, NA,
2103L, 1586L, 601L, 1472L, 1692L, NA, 2102L, 6452L, NA, NA, 1244L,
2051L, 1007L, NA, NA, NA, 1726L, 3400L, 2143L, NA, 236L, 3930L,
31026L, NA, NA, NA, NA, 5280L, 1230L), egfr = c(56L, 90L, 75L,
71L, 71L, 68L, 90L, 78L, 90L, 90L, 90L, 77L, 83L, 52L, 49L, 88L,
84L, 88L, 81L, 51L, 51L, 87L, 77L, 85L, 76L, 90L, 65L, 48L, 99L,
90L, 99L, 87L, 62L, 90L, 87L, 59L, 79L, 90L, 90L, 90L, 78L, 90L,
69L, 90L, 87L, 90L, 58L, 90L, 79L, 55L, 51L, 90L, 58L, 90L, 56L,
62L, 86L, 61L, 84L, 63L, 63L, 90L, 90L, 85L, 64L, 67L, 45L, 90L,
78L, 65L, 69L, 90L, 90L, 59L, 54L, 60L, 68L, 86L, 42L, 73L, 90L,
85L, 63L, 90L, 86L, 68L, 71L, 90L, 68L, 63L, 81L, 76L, 61L, 75L,
84L, 90L, 90L, 48L, 90L, 77L, 68L, 90L, 64L, 90L, 90L, 61L, 84L,
80L, 69L, 58L, 65L, 86L, 86L, 46L, 56L, 90L, 46L, 87L, 84L, 68L,
67L, 72L, 35L, 86L, 74L, 78L, 67L, 90L, 90L, 90L, 76L, 90L, 86L,
63L, 63L, 53L, 90L, 90L, 75L, 64L, 69L, 68L, 52L, 49L, 90L, 65L,
86L, 42L, 63L, 90L, 90L, 73L, 75L, 66L, 64L, 90L, 35L, 90L, 82L,
90L, 84L, 61L, 90L, 86L, 51L, 52L, 69L, 54L, 90L, 75L, 85L, 72L,
90L, 80L, 54L, 58L, 90L, 89L, 72L, 90L, 84L, 33L, 74L, 36L, 56L,
61L, 63L, 77L, 84L, 85L, 90L, 90L, 51L, 90L, 90L, 68L, 90L, 52L,
90L, 48L, 90L, 82L, 86L, 67L, 90L, 76L, 14L, 63L, 59L, 82L, 90L,
39L, 77L, 90L, 78L, 74L, 54L, 36L, 58L, 69L, NA, 53L, 90L, 90L,
90L, 88L, 90L, 90L, 90L, 90L, 90L, 63L, 87L, 48L, 90L, 55L, 70L,
65L, 90L, NA, 90L, 90L, 72L, 66L, 55L), dm = c(1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L,
0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L,
0L, 0L, 2L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, NA, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
0L), smoke = c(0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L,
0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L,
0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L,
0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L,
0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L,
1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
NA, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L), delta_mon1_baseline_to_d3 = c(-821.989379882808,
-562.289733886719, -502.216308593755, -388.549621582031, -372.002563476562,
-308.033813476562, -283.636077880859, -279.422790527344, -218.279922485352,
-173.209777832031, -162.939453124998, -95.700927734375, -70.961883544922,
-69.742797851558, -69.5900268554681, -66.49755859375, -58.77816772461,
-29.29504394531, -25.714111328125, -12.548919677734, -2.99462890625,
2.80639648437602, 32.883270263672, 33.036499023438, 44.05969238281,
47.6982421875, 55.87646484375, 116.716430664065, 130.585632324218,
131.392028808594, 146.8232421875, 153.190795898438, 159.863708496094,
164.985534667969, 179.8525390625, 180.041305541992, 192.978088378906,
208.791275024414, 243.90209960937, 248.09851074219, 327.035522460937,
336.011077880859, 375.086456298828, 419.108337402341, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), stratum = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA), .Label = c("1", "2", "3", "4"), class = "factor"), surv = structure(c(30,
73, 5, 77, 74, 74, 45, 6, 46, 70, 70, 72, 77, 5, 43, 71, 74,
77, 71, 77, 77, 73, 74, 69, 28, 77, 73, 68, 73, 70, 74, 75, 76,
69, 69, 78, 69, 58, 78, 71, 76, 76, 69, 71, 43, 44, 5, 62, 73,
72, 47, 61, 47, 46, 44, 43, 46, 45, 16, 62, 8, 48, 15, 79, 62,
46, 62, 45, 55, 21, 41, 45, 46, 45, 50, 62, 43, 4, 1, 4, 63,
17, 3, 45, 2, 50, 1, 44, 15, 46, 46, 7, 8, 62, 16, 47, 19, 61,
46, 1, 4, 48, 61, 43, 46, 61, 44, 48, 1, 5, 43, 43, 46, 58, 3,
55, 27, 1, 47, 19, 11, 1, 1, 1, 2, 48, 62, 61, 23, 1, 55, 19,
48, 1, 43, 47, 1, 46, 11, 46, 48, 46, 32, 50, 61, 62, 48, 46,
55, 62, 55, 43, 61, 47, 59, 50, 10, 55, 63, 55, 48, 31, 32, 48,
55, 62, 59, 53, 48, 48, 62, 59, 4, 4, 25, 59, 48, 60, 43, 58,
49, 1, 8, 1, 63, 51, 55, 47, 49, 55, 49, 55, 13, 60, 2, 13, 3,
25, 7, 62, 62, 60, 10, 61, 45, 3, 11, 51, 47, 1, 46, 63, 60,
43, 62, 58, 61, 1, 2, 46, 1, 13, 54, 48, 54, 45, 45, 31, 48,
42, 49, 43, 61, 1, 31, 1, 63, 11, 47, 39, 7, 42, 1, 1, 1, 1,
0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0,
0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1,
0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,
0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0,
0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1), .Dim = c(245L, 2L), .Dimnames = list(
NULL, c("time", "status")), type = "right", class = "Surv")), row.names = c(NA,
-245L), class = c("tbl_df", "tbl", "data.frame"))
Thank you very much for your help,
Friends,
I'm having an issue with the Kruskal wallis test in r, testing for stable seasonality with the Kruskal-wallis test. The p-values tested for each variable are coming out the same. Using Kruskal.test(formula, data = mydata) from the library(stats) package . I'm having a hard time believing that the pvalues would be the same.
My dataset is a monthly dataset with 163 obs, 3 macro economic variables in the model and two seasonal dummies.
I'm testing each independent macro economic variable with the dependent variable in the following way Kruskal.test(y~x, data = mydata). So for the data example below it would be Kruskal.test(pr~mev06_mp_lag2, data = mydata). And repeated for each mev in the dataset. All the pvalues for testing the 3 mev's (mev06_mp_lag2, mev29_lag2, mev108_lag1) comes out to be this output:
data: pr by mev29_lag2
Kruskal-Wallis chi-squared = 162, df = 162, p-value = 0.4852
Here is the data:
structure(list(date = structure(c(28L, 56L, 42L, 97L, 1L, 111L,
83L, 70L, 15L, 151L, 138L, 125L, 29L, 57L, 43L, 98L, 2L, 112L,
84L, 71L, 16L, 152L, 139L, 126L, 30L, 58L, 44L, 99L, 3L, 113L,
85L, 72L, 17L, 153L, 140L, 127L, 31L, 59L, 45L, 100L, 4L, 114L,
86L, 73L, 18L, 154L, 141L, 128L, 32L, 60L, 46L, 101L, 5L, 115L,
87L, 74L, 19L, 155L, 142L, 129L, 33L, 61L, 47L, 102L, 6L, 116L,
88L, 75L, 20L, 156L, 143L, 130L, 34L, 62L, 48L, 103L, 7L, 117L,
89L, 76L, 21L, 157L, 144L, 131L, 35L, 63L, 49L, 104L, 8L, 118L,
90L, 77L, 22L, 158L, 145L, 132L, 36L, 64L, 50L, 105L, 9L, 119L,
91L, 78L, 23L, 159L, 146L, 133L, 37L, 65L, 51L, 106L, 10L, 120L,
92L, 79L, 24L, 160L, 147L, 134L, 38L, 66L, 52L, 107L, 11L, 121L,
93L, 80L, 25L, 161L, 148L, 135L, 39L, 67L, 53L, 108L, 12L, 122L,
94L, 81L, 26L, 162L, 149L, 136L, 40L, 68L, 54L, 109L, 13L, 123L,
95L, 82L, 27L, 163L, 150L, 137L, 41L, 69L, 55L, 110L, 14L, 124L,
96L), .Label = c("01APR2006", "01APR2007", "01APR2008", "01APR2009",
"01APR2010", "01APR2011", "01APR2012", "01APR2013", "01APR2014",
"01APR2015", "01APR2016", "01APR2017", "01APR2018", "01APR2019",
"01AUG2006", "01AUG2007", "01AUG2008", "01AUG2009", "01AUG2010",
"01AUG2011", "01AUG2012", "01AUG2013", "01AUG2014", "01AUG2015",
"01AUG2016", "01AUG2017", "01AUG2018", "01DEC2005", "01DEC2006",
"01DEC2007", "01DEC2008", "01DEC2009", "01DEC2010", "01DEC2011",
"01DEC2012", "01DEC2013", "01DEC2014", "01DEC2015", "01DEC2016",
"01DEC2017", "01DEC2018", "01FEB2006", "01FEB2007", "01FEB2008",
"01FEB2009", "01FEB2010", "01FEB2011", "01FEB2012", "01FEB2013",
"01FEB2014", "01FEB2015", "01FEB2016", "01FEB2017", "01FEB2018",
"01FEB2019", "01JAN2006", "01JAN2007", "01JAN2008", "01JAN2009",
"01JAN2010", "01JAN2011", "01JAN2012", "01JAN2013", "01JAN2014",
"01JAN2015", "01JAN2016", "01JAN2017", "01JAN2018", "01JAN2019",
"01JUL2006", "01JUL2007", "01JUL2008", "01JUL2009", "01JUL2010",
"01JUL2011", "01JUL2012", "01JUL2013", "01JUL2014", "01JUL2015",
"01JUL2016", "01JUL2017", "01JUL2018", "01JUN2006", "01JUN2007",
"01JUN2008", "01JUN2009", "01JUN2010", "01JUN2011", "01JUN2012",
"01JUN2013", "01JUN2014", "01JUN2015", "01JUN2016", "01JUN2017",
"01JUN2018", "01JUN2019", "01MAR2006", "01MAR2007", "01MAR2008",
"01MAR2009", "01MAR2010", "01MAR2011", "01MAR2012", "01MAR2013",
"01MAR2014", "01MAR2015", "01MAR2016", "01MAR2017", "01MAR2018",
"01MAR2019", "01MAY2006", "01MAY2007", "01MAY2008", "01MAY2009",
"01MAY2010", "01MAY2011", "01MAY2012", "01MAY2013", "01MAY2014",
"01MAY2015", "01MAY2016", "01MAY2017", "01MAY2018", "01MAY2019",
"01NOV2006", "01NOV2007", "01NOV2008", "01NOV2009", "01NOV2010",
"01NOV2011", "01NOV2012", "01NOV2013", "01NOV2014", "01NOV2015",
"01NOV2016", "01NOV2017", "01NOV2018", "01OCT2006", "01OCT2007",
"01OCT2008", "01OCT2009", "01OCT2010", "01OCT2011", "01OCT2012",
"01OCT2013", "01OCT2014", "01OCT2015", "01OCT2016", "01OCT2017",
"01OCT2018", "01SEP2006", "01SEP2007", "01SEP2008", "01SEP2009",
"01SEP2010", "01SEP2011", "01SEP2012", "01SEP2013", "01SEP2014",
"01SEP2015", "01SEP2016", "01SEP2017", "01SEP2018"), class = "factor"),
pr = c(0.1691759261, 0.1975689455, 0.1701795466, 0.1889038722,
0.1743304586, 0.1850822209, 0.1725476026, 0.1806130453, 0.1769864586,
0.1546961801, 0.18850436, 0.1695999754, 0.1660947088, 0.1929270116,
0.1629685381, 0.1716883769, 0.1782082767, 0.177316379, 0.1586548395,
0.1816295787, 0.1634939904, 0.1653658139, 0.1669465832, 0.1547769918,
0.17154596, 0.1824150313, 0.1600967574, 0.1819462462, 0.1625842114,
0.1605423212, 0.174298958, 0.16859091, 0.1567519737, 0.1549443922,
0.1528250707, 0.1563427163, 0.1562236709, 0.1544731644, 0.1595362963,
0.1749852828, 0.1536175907, 0.1668984941, 0.1532514745, 0.152745466,
0.1590015917, 0.1500819546, 0.1504755171, 0.1583227453, 0.1546476157,
0.1634331963, 0.1565167637, 0.1699421465, 0.1657200266, 0.1642684245,
0.1675084975, 0.1617848489, 0.1662501795, 0.1648139984, 0.1645302595,
0.169286769, 0.1707244798, 0.1845315559, 0.1752391568, 0.1899788506,
0.1784046029, 0.1842806875, 0.1836403012, 0.1753696341, 0.1738240496,
0.1747609205, 0.1724421753, 0.1803992831, 0.1763816185, 0.187630168,
0.1877238382, 0.1860668525, 0.1854666743, 0.1860146483, 0.1781037416,
0.185259322, 0.1879122146, 0.178520754, 0.1875367517, 0.18694397,
0.1860777227, 0.1979044449, 0.1833497201, 0.192027271, 0.1926325454,
0.1916103719, 0.1851319974, 0.1864458557, 0.1832327814, 0.1808570791,
0.1851145899, 0.1815387272, 0.1870942258, 0.1943564723, 0.1862582923,
0.1907279007, 0.1859213896, 0.1865372709, 0.1898453914, 0.1847275775,
0.1736567497, 0.1771092243, 0.1822902114, 0.1840752276, 0.1892670811,
0.1923250842, 0.1852956789, 0.1917880299, 0.18771724, 0.1857801687,
0.1868263217, 0.1867604143, 0.1824500898, 0.1758283625, 0.1829290332,
0.1808247326, 0.183507277, 0.1852845389, 0.1808714285, 0.1818222883,
0.1755951829, 0.1774808136, 0.1775837234, 0.1696830467, 0.172385402,
0.1694350722, 0.168336944, 0.1680335702, 0.1684147459, 0.1726731413,
0.1633235864, 0.1707780779, 0.1606329755, 0.1634684695, 0.1652849939,
0.15803428, 0.1616158193, 0.1527704105, 0.1584612931, 0.1550232032,
0.1534022945, 0.164970584, 0.1565023361, 0.1622506128, 0.1551517442,
0.1539405645, 0.152548495, 0.1516353176, 0.1523898229, 0.1477241538,
0.1502876518, 0.1515682192, 0.1540217905, 0.1589165786, 0.1531622236,
0.1583882529, 0.1532322761, 0.157552401, 0.1621688871), month = c(12L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L), mon1 = c(0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L), mon3 = c(0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L), mev06_mp_lag2 = c(0.2779810102,
0.1874272639, 0.1332826385, 0.1128640237, 0.1247535199, 0.1545791804,
0.2106891929, 0.2757365926, 0.329455103, 0.3808671396, 0.4450555294,
0.5340975751, 0.5971738413, 0.5881040948, 0.4793350636, 0.3124264887,
0.2197636246, 0.2206435437, 0.3113169675, 0.4196078671, 0.5003884945,
0.5494487995, 0.5369484545, 0.4606922562, 0.3338162715, 0.278520389,
0.3170366404, 0.4156696136, 0.4787532552, 0.4443344043, 0.3681819294,
0.2878537618, 0.2048228841, 0.1251537938, 0.0382989338, -0.058589422,
-0.142185008, -0.153725768, -0.074125689, 0.0484987522, 0.0608517463,
-0.079803144, -0.303655154, -0.429635585, -0.363580402, -0.1573843,
0.0420304555, 0.1835101363, 0.2542206609, 0.2533515836, 0.1774048348,
0.0536834552, -0.031620066, -0.048554527, -0.010029088, 0.0691957026,
0.1865379823, 0.314751579, 0.3867383564, 0.3849543674, 0.3270672177,
0.3352052154, 0.4333568873, 0.5807725419, 0.6594152281, 0.5820169704,
0.4614498827, 0.382189864, 0.3472850124, 0.3700953746, 0.4332794073,
0.5388940866, 0.6346031107, 0.6722549883, 0.6226019329, 0.5308626721,
0.5406836123, 0.652356085, 0.8470071782, 0.9341209812, 0.8264468016,
0.612419938, 0.5006911837, 0.5691599433, 0.7307708771, 0.8473791813,
0.8590757515, 0.7900410964, 0.7171039073, 0.6076028502, 0.5505395263,
0.5661995614, 0.631423817, 0.7324609809, 0.776800689, 0.7461146765,
0.6396693594, 0.5909067989, 0.6163303443, 0.6923212327, 0.7608602548,
0.7385415186, 0.7245230167, 0.735008075, 0.7303155287, 0.7306620594,
0.7216900251, 0.710357153, 0.668241137, 0.6465248078, 0.6386886106,
0.644503099, 0.6750915049, 0.6733980993, 0.707678618, 0.7411667711,
0.7159390625, 0.6659808449, 0.6197029436, 0.5965547889, 0.5673138317,
0.5608362128, 0.5669008884, 0.5795942214, 0.5905982279, 0.556992012,
0.5359266787, 0.5449271219, 0.5753646848, 0.6196930073, 0.6313425488,
0.6047324646, 0.5262327459, 0.4680502206, 0.4339327769, 0.422330442,
0.4388551617, 0.4449027001, 0.4724310877, 0.4603556503, 0.3559313099,
0.2192993453, 0.1752438701, 0.2708768468, 0.4398555582, 0.5419383533,
0.5258750189, 0.4264906744, 0.3512451556, 0.3047050285, 0.3177822041,
0.3703341357, 0.4374805453, 0.5119974656, 0.5479752418, 0.5383546522,
0.4763979544, 0.4418530239, 0.4423212346, 0.4638361889, 0.4725955269,
0.4199050848, 0.3677860365), mev29_lag2 = c(12052.672746,
12155.974991, 12259.977269, 12364.551523, 12471.923335, 12575.751994,
12681.578091, 12792.424151, 12903.799861, 13014.933326, 13125.644747,
13237.759633, 13347.540807, 13456.257594, 13563.261568, 13668.005405,
13772.061616, 13868.872889, 13963.208033, 14057.010446, 14145.406294,
14227.079383, 14301.142959, 14368.046479, 14424.924247, 14471.887375,
14508.019112, 14532.668323, 14547.065728, 14552.236417, 14550.020205,
14541.465439, 14527.537817, 14509.400483, 14488.246542, 14464.991414,
14441.692779, 14419.373969, 14399.416496, 14382.82297, 14369.044585,
14358.108259, 14348.715697, 14340.186543, 14332.550823, 14325.428273,
14318.322395, 14310.559769, 14301.864431, 14291.633935, 14279.435535,
14264.935547, 14247.97805, 14230.01465, 14210.49904, 14189.108376,
14166.881283, 14144.225632, 14121.472414, 14098.568702, 14076.59218,
14055.590158, 14035.983138, 14018.088095, 14001.533115, 13987.079436,
13973.759653, 13961.158726, 13949.839264, 13939.826368, 13931.070165,
13923.347123, 13916.816802, 13911.291278, 13906.706121, 13903.022798,
13900.161493, 13898.209865, 13897.051213, 13896.655547, 13897.047312,
13898.205564, 13900.125572, 13902.837452, 13906.230209, 13910.294112,
13914.960492, 13920.218961, 13926.287609, 13932.889015, 13940.451345,
13949.327157, 13959.352267, 13970.583834, 13983.14564, 13997.391872,
14012.965904, 14030.139859, 14048.917902, 14069.304752, 14091.541249,
14113.971365, 14137.471712, 14162.48361, 14187.783215, 14212.951734,
14237.687089, 14262.119284, 14285.160082, 14306.785799, 14326.567908,
14344.249129, 14360.498045, 14374.927988, 14388.841191, 14403.027623,
14417.285193, 14431.921345, 14447.347759, 14464.280067, 14482.60458,
14503.01009, 14525.873936, 14551.515778, 14580.356316, 14610.776601,
14643.555251, 14679.101052, 14716.763371, 14756.356798, 14797.710201,
14841.323243, 14885.552108, 14930.758122, 14976.563876, 15022.743933,
15070.254048, 15116.300407, 15163.332681, 15212.634721, 15262.129309,
15311.443993, 15360.633228, 15410.700926, 15460.012042, 15508.70943,
15555.948922, 15601.38129, 15647.017242, 15691.593748, 15737.814211,
15784.098257, 15824.336441, 15857.184087, 15890.739854, 15937.050823,
15997.292301, 16049.370568, 16063.033239, 16023.148233, 15962.775179,
15932.931115, 15961.380588), mev108_lag1 = c(3.4265582593,
3.8373450191, 4.1211669551, 4.2500265274, 4.2336477943, 4.1032530543,
3.9050112432, 3.691568661, 3.5215361911, 3.4547437295, 3.5245107487,
3.6740870118, 3.8205614376, 3.9060148228, 3.9500668579, 3.9928147249,
4.056423068, 4.097207087, 4.0423248638, 3.8590572205, 3.6249134397,
3.4534377102, 3.419037145, 3.448572797, 3.4287569276, 3.3235979183,
3.3376619007, 3.7361174237, 4.6156476062, 5.5516500424, 5.9018553329,
5.3364327802, 4.406525535, 3.9641497661, 4.5369688556, 5.6155652665,
6.3806850947, 6.3128039966, 5.8286655665, 5.6572058382, 6.1906323861,
7.0408483819, 7.4827400214, 7.0669869294, 6.1581569245, 5.3936717805,
5.2364436715, 5.4913612016, 5.777206406, 5.8339229216, 5.7719456704,
5.8170713396, 6.1029576358, 6.5263492298, 6.8736849118, 6.9975096947,
6.9363923153, 6.7924979551, 6.6668133872, 6.6299076039, 6.7439828613,
7.0243025303, 7.3370606372, 7.4869066644, 7.3844430207, 7.1374881632,
6.940002926, 6.9245088132, 7.0301738798, 7.1305865095, 7.1405475978,
7.1156467585, 7.1524809409, 7.3303394277, 7.6756343523, 8.1680801673,
8.7542261364, 9.1808145707, 9.1010680729, 8.4114150872, 7.6844861301,
7.7270955321, 8.9146989491, 10.361039125, 10.796323189, 9.4618739177,
7.2049954246, 5.5270537994, 5.2221817889, 5.905531143, 6.7592672119,
7.1298927381, 7.0304213613, 6.697874346, 6.3607611025, 6.1569021347,
6.2001333982, 6.5397429639, 7.0184856606, 7.3825719382, 7.5069332339,
7.4599546294, 7.377008726, 7.3638030204, 7.3988155209, 7.4176473452,
7.3829883718, 7.3415942425, 7.3652515353, 7.492033304, 7.6543284954,
7.7427624077, 7.7070473944, 7.6101649913, 7.5623895662, 7.6286991237,
7.7329248639, 7.7505651547, 7.6137269809, 7.4246691851, 7.337208565,
7.4360967197, 7.5892255476, 7.5910082105, 7.3256377393, 6.9067676469,
6.5375463809, 6.3577677595, 6.320229607, 6.3124546301, 6.2662262884,
6.2427837167, 6.3428922976, 6.6124818018, 6.9249171793, 7.0836464531,
6.9995311857, 6.784745399, 6.6375952256, 6.6797395345, 6.7927792813,
6.775540136, 6.5260699355, 6.2318486432, 6.1687507324, 6.4951667771,
7.0000862167, 7.3264282363, 7.2857205376, 6.9859881738, 6.6532338989,
6.4623367973, 6.4024537545, 6.3988018644, 6.3987025271, 6.4148188331,
6.4801548851, 6.6043861168, 6.7236064103, 6.7473536828, 6.6336225214,
6.4408520391, 6.2759289867), p_pr = c(0.1841979358, 0.1909299357,
0.1800235425, 0.1873193897, 0.1778321909, 0.1771717461, 0.1769871609,
0.1769369574, 0.1767002661, 0.1766514006, 0.1772474365, 0.1786372508,
0.1793958093, 0.1873407005, 0.1744738837, 0.1779058647, 0.1660300916,
0.165123522, 0.1662612377, 0.1675426585, 0.1680743656, 0.1680322376,
0.1668552618, 0.1643117778, 0.1604937471, 0.1674889291, 0.1589809185,
0.1707308583, 0.1656141418, 0.1669016231, 0.1658465865, 0.1626002246,
0.1584857239, 0.1556467109, 0.1550484409, 0.1554116407, 0.1553698903,
0.1642789961, 0.1562188049, 0.1676637554, 0.1607636607, 0.159365876,
0.154912779, 0.1508778098, 0.1504706517, 0.1538985266, 0.1585854408,
0.1628016268, 0.1653325485, 0.1746734474, 0.1636385773, 0.1694169075,
0.1595285254, 0.1602916429, 0.1622777106, 0.1647745096, 0.1677972871,
0.170901438, 0.1726448513, 0.1727558383, 0.1718106875, 0.182016627,
0.1762909312, 0.1891248658, 0.1824141631, 0.1800526397, 0.1767170916,
0.1748339829, 0.1743303929, 0.1752424115, 0.1769369171, 0.17959844,
0.182145123, 0.1926835257, 0.1831830764, 0.190698247, 0.1837433962,
0.1875573393, 0.1922445975, 0.1928025222, 0.1883983926, 0.1831397417,
0.1831222451, 0.1882066078, 0.1932319714, 0.2020834894, 0.1878958952,
0.1907776136, 0.179564677, 0.1783669915, 0.1788699402, 0.1800391448,
0.1813284168, 0.1829512395, 0.1831328753, 0.181735949, 0.1790137171,
0.1875337053, 0.1799754626, 0.191124027, 0.1842840392, 0.1833786054,
0.1825845794, 0.182550754, 0.1822481672, 0.1820347832, 0.1814673532,
0.18082831, 0.1795880318, 0.1882358605, 0.1790916575, 0.1878672726,
0.1797660056, 0.1793430747, 0.1799398102, 0.1807822543, 0.180246357,
0.1788849577, 0.1772437109, 0.1760414846, 0.1749113359, 0.1838871358,
0.1750360156, 0.1836953752, 0.1744313344, 0.1722844661, 0.170542729,
0.1699684655, 0.1702419601, 0.1709120463, 0.1706566897, 0.1694752567,
0.1672817086, 0.175105, 0.1653820849, 0.1735863964, 0.1646891174,
0.1638476083, 0.1636914003, 0.1629671545, 0.1601006771, 0.1561250286,
0.1539170317, 0.1550840353, 0.1586350423, 0.1705586865, 0.1617244458,
0.1681380973, 0.1570702457, 0.1547307475, 0.1537854739, 0.1541593825,
0.155270079, 0.1567753976, 0.1573188283, 0.1566263272, 0.154594785,
0.1625938782, 0.1536205501, 0.1632453909, 0.1552261163, 0.1537721633,
0.1517811103), r_pr = c(-0.01502201, 0.0066390098, -0.009843996,
0.0015844825, -0.003501732, 0.0079104748, -0.004439558, 0.003676088,
0.0002861925, -0.02195522, 0.0112569236, -0.009037275, -0.013301101,
0.0055863112, -0.011505346, -0.006217488, 0.0121781851, 0.0121928571,
-0.007606398, 0.0140869202, -0.004580375, -0.002666424, 9.13213e-05,
-0.009534786, 0.0110522129, 0.0149261022, 0.0011158389, 0.0112153879,
-0.00302993, -0.006359302, 0.0084523714, 0.0059906854, -0.00173375,
-0.000702319, -0.00222337, 0.0009310756, 0.0008537806, -0.009805832,
0.0033174915, 0.0073215274, -0.00714607, 0.0075326181, -0.001661304,
0.0018676562, 0.0085309399, -0.003816572, -0.008109924, -0.004478882,
-0.010684933, -0.011240251, -0.007121814, 0.000525239, 0.0061915012,
0.0039767816, 0.0052307869, -0.002989661, -0.001547108, -0.00608744,
-0.008114592, -0.003469069, -0.001086208, 0.0025149289, -0.001051774,
0.0008539848, -0.00400956, 0.0042280478, 0.0069232096, 0.0005356512,
-0.000506343, -0.000481491, -0.004494742, 0.0008008432, -0.005763504,
-0.005053358, 0.0045407618, -0.004631395, 0.0017232781, -0.001542691,
-0.014140856, -0.0075432, -0.000486178, -0.004618988, 0.0044145066,
-0.001262638, -0.007154249, -0.004179044, -0.004546175, 0.0012496574,
0.0130678684, 0.0132433805, 0.0062620573, 0.0064067109, 0.0019043646,
-0.00209416, 0.0019817146, -0.000197222, 0.0080805087, 0.0068227671,
0.0062828296, -0.000396126, 0.0016373504, 0.0031586655, 0.007260812,
0.0021768236, -0.008591417, -0.004925559, 0.0008228582, 0.0032469176,
0.0096790493, 0.0040892237, 0.0062040214, 0.0039207574, 0.0079512344,
0.006437094, 0.0068865115, 0.0059781601, 0.0022037328, -0.003056595,
0.0056853223, 0.004783248, 0.008595941, 0.0013974031, 0.0058354128,
-0.001873087, 0.0011638485, 0.0051963475, 0.0070409944, -0.000285419,
0.0021434419, -0.001476974, -0.002319746, -0.001441687, 0.0011330373,
-0.002431859, -0.002058499, -0.002808318, -0.004056142, -0.000379139,
0.0015935936, -0.004932874, 0.0015151421, -0.003354618, 0.0045442614,
-6.0832e-05, -0.005232748, -0.005588103, -0.00522211, -0.005887484,
-0.001918502, -0.000790183, -0.001236979, -0.002524065, -0.002880256,
-0.009051244, -0.007031176, -0.005058108, -0.000572995, -0.0036773,
-0.000458327, -0.004857138, -0.00199384, 0.0037802378, 0.0103877768
)), .Names = c("date", "pr", "month", "mon1", "mon3", "mev06_mp_lag2",
"mev29_lag2", "mev108_lag1", "p_pr", "r_pr"), class = "data.frame", row.names = c(NA,
-163L))
Am I missing something with the nuances of this test? Thoughts?
A Kruskal-Wallis test compares the dependent variable across groups defined by the unique values of the independent variable (analogous to one-way ANOVA). Your independent variables are continuous, so each splits your 163 observations into the same 163 different groups, each with one observation. This is why the tests come out the same.
A clue was in the output - the test had 162 degrees of freedom on 163 observations!
Kruskal-Wallis chi-squared = 162, df = 162, p-value = 0.4852
So the Kruskal-Wallis test isn't appropriate here, either you meant to bin your dependent variables first (although a K-W test still wouldn't be right as your groups would be ordered), or use a test for correlation.
This question already has answers here:
How to combine scales for colour and size into one legend?
(2 answers)
Closed 7 years ago.
What is the code to make the two legends into one: A circles legend with color?
I think, a single legend with circles colored according to "size" and "# total number of crimes" is the best way to show the legend.
Desired output:
1) There should be one legend: the circles, instead of black should be colored: 0 circle = "yellow" to 800 circle = "red".
My code:
library(maps)
library(ggmap)
Get map from Google Maps
lima <- get_map(location = "lima", zoom = 11, maptype = c("terrain"))
Plot
ggmap(lima) + geom_point(data = limanov2, aes(x = LONGITUD , y = LATITUD, color = TOTALES,
size = TOTALES)) +
scale_size_continuous(name = "Cantidad\ndelitos",range = c(2,12)) +
scale_color_gradient(name = "Cantidad\ndelitos", low = "yellow", high = "red") +
theme(legend.text= element_text(size=14)) +
ggtitle("TOTAL DELITOS - LIMA NOV 2012") +
theme(plot.title = element_text(size = 12, vjust=2, family="Verdana", face="italic"),
legend.position = 'left')
My data:
structure(list(DISTRITO = c("SAN JUAN DE LURIGANCHO", "CALLAO",
"LOS OLIVOS", "ATE", "LIMA", "SAN MARTIN DE PORRES", "SANTIAGO DE SURCO",
"CHORILLOS", "COMAS", "INDEPENDENCIA", "EL AGUSTINO", "LA VICTORIA",
"SAN JUAN DE MIRAFLORES", "VILLA EL SALVADOR", "SAN MIGUEL",
"CARABAYLLO", "MIRAFLORES", "SAN BORJA", "VENTANILLA", "SURQUILLO",
"BREÑA", "ANCON", "PTE. PIEDRA", "RIMAC", "BARRANCO", "LA MOLINA",
"SAN LUIS", "SANTA ANITA", "LURIGANCHO", "P. LIBRE", "MAGDALENA DEL MAR",
"LA PERLA", "CHACLACAYO", "PUENTE PIEDRA", "SAN ISIDRO", "JESUS MARIA",
"BELLAVISTA", "LINCE", "CARMEN DE LA LEGUA REYNOSO", "CIENEGUILLA",
"SANTA ROSA", "LURIN", "PUNTA NEGRA", "PUCUSANA", "LA PUNTA",
"PUNTA HERMOSA", "PACHACAMAC", "SAN BARTOLO", "SANTA MARIA"),
TOTALES = c(861L, 696L, 696L, 642L, 516L, 479L, 442L, 378L,
371L, 368L, 361L, 333L, 325L, 291L, 282L, 251L, 239L, 196L,
193L, 188L, 185L, 174L, 165L, 161L, 138L, 134L, 128L, 119L,
115L, 105L, 67L, 65L, 63L, 58L, 58L, 56L, 45L, 38L, 23L,
23L, 11L, 8L, 6L, 5L, 3L, 3L, 2L, 0L, 0L), HOMICIDIOS = c(1L,
7L, 0L, 1L, 2L, 0L, 0L, 1L, 7L, 4L, 4L, 4L, 0L, 0L, 0L, 2L,
0L, 0L, 7L, 0L, 0L, 0L, 0L, 4L, 0L, 0L, 2L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), LESIONES = c(100L, 72L, 61L, 43L, 44L, 8L, 10L,
15L, 44L, 40L, 50L, 15L, 52L, 28L, 7L, 33L, 15L, 3L, 21L,
7L, 36L, 33L, 15L, 19L, 14L, 1L, 8L, 6L, 16L, 4L, 4L, 9L,
1L, 12L, 2L, 9L, 5L, 2L, 5L, 7L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), VIO..DE.LA.LIBERTAD.PERSONAL = c(0L, 7L, 6L,
5L, 6L, 1L, 1L, 0L, 3L, 1L, 2L, 0L, 2L, 0L, 1L, 0L, 1L, 0L,
1L, 1L, 0L, 3L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L,
0L, 1L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), VIO..DE.LA.LIBERTAD.SEXUAL = c(56L, 14L, 12L, 15L, 7L,
10L, 2L, 9L, 11L, 13L, 8L, 9L, 7L, 14L, 4L, 15L, 4L, 2L,
17L, 7L, 3L, 4L, 6L, 12L, 2L, 1L, 5L, 3L, 11L, 4L, 1L, 2L,
0L, 6L, 2L, 0L, 3L, 0L, 2L, 2L, 0L, 4L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), HURTO.SIMPLE.Y.AGRAVADO = c(217L, 203L, 296L, 230L,
260L, 167L, 226L, 217L, 130L, 117L, 154L, 133L, 121L, 46L,
163L, 72L, 161L, 119L, 69L, 120L, 64L, 19L, 64L, 21L, 57L,
44L, 39L, 2L, 48L, 60L, 30L, 19L, 48L, 20L, 41L, 25L, 19L,
27L, 7L, 11L, 9L, 0L, 6L, 0L, 2L, 3L, 1L, 0L, 0L), ROBO.SIMPLE.Y.AGRAVADO = c(460L,
289L, 308L, 344L, 186L, 277L, 198L, 130L, 165L, 184L, 137L,
149L, 134L, 188L, 104L, 126L, 58L, 72L, 64L, 51L, 77L, 115L,
79L, 76L, 64L, 88L, 73L, 108L, 40L, 36L, 30L, 32L, 14L, 17L,
12L, 22L, 12L, 8L, 6L, 3L, 1L, 3L, 0L, 2L, 1L, 0L, 1L, 0L,
0L), MICRO.COM.DE.DROGAS = c(26L, 100L, 13L, 3L, 10L, 15L,
5L, 5L, 11L, 8L, 3L, 23L, 9L, 15L, 3L, 3L, 0L, 0L, 8L, 2L,
5L, 0L, 0L, 28L, 0L, 0L, 1L, 0L, 0L, 0L, 2L, 2L, 0L, 2L,
0L, 0L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L
), TENENCIA.ILEGAL.DE.ARMAS = c(1L, 4L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), LONGITUD = c(-77,
-77.12, -77.08, -76.89, -77.04, -77.09, -76.99, -77.01, -77.05,
-77.05, -77, -77.02, -76.97, -76.94, -77.09, -76.99, -77.03,
-77, -77.13, -77.01, -77.05, -77.11, -77.08, -76.7, -77.02,
-76.92, -77, -76.96, -76.86, -77.06, -77.07, -77.12, -76.76,
-77.08, -77.03, -77.05, -77.11, -77.04, -77.09, -76.78, -77.16,
-76.81, -76.73, -76.77, -77.16, -76.76, -76.83, -76.73, -76.77
), LATITUD = c(-11.99, -12.04, -11.95, -12.04, -12.06, -12,
-12.16, -12.2, -11.93, -11.99, -12.04, -12.08, -12.16, -12.23,
-12.08, -11.79, -12.12, -12.1, -11.89, -12.11, -12.06, -11.69,
-11.88, -11.94, -12.15, -12.09, -12.08, -12.04, -11.98, -12.08,
-12.09, -12.07, -11.99, -11.88, -12.1, -12.08, -12.06, -12.09,
-12.04, -12.07, -11.81, -12.24, -12.32, -12.47, -12.07, -12.28,
-12.18, -12.38, -12.42)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -49L), .Names = c("DISTRITO", "TOTALES",
"HOMICIDIOS", "LESIONES", "VIO..DE.LA.LIBERTAD.PERSONAL", "VIO..DE.LA.LIBERTAD.SEXUAL",
"HURTO.SIMPLE.Y.AGRAVADO", "ROBO.SIMPLE.Y.AGRAVADO", "MICRO.COM.DE.DROGAS",
"TENENCIA.ILEGAL.DE.ARMAS", "LONGITUD", "LATITUD"))
I've found a solution. Reading the documention for GGPLOT2 V. 0.9
It is the new function: guide_legend() that should be used inside guides().
This is a function that lets you have more control over legend labels.
This is the end code with the resulting output (See the last line):
ggmap(lima) + geom_point(data = limanov2, aes(x = LONGITUD , y = LATITUD, color = TOTALES,
size = TOTALES)) +
scale_size_continuous(name = "Cantidad\ndelitos",range = c(2,12)) +
scale_color_gradient(name = "Cantidad\ndelitos", low = "yellow", high = "red") +
theme(legend.text= element_text(size=14)) +
ggtitle("TOTAL DELITOS - LIMA NOV 2012") +
theme(plot.title = element_text(size = 12, vjust=2, family="Verdana", face="italic"),
legend.position = 'left') +
guides(colour = guide_legend())
I'm plotting some points over a map with ggmap package.
The problem is that i get the message: "Removed 12 rows containing missing values (geom_point)".
But i don't have any NAs. I've looked the data, and used:
sum(is.na(limanov2)) #Gives 0
to prove it.
This is my code:
library(maps)
library(ggmap)
lima <- get_map(location = "lima", zoom = 11)
ggmap(lima) + geom_point(data = limanov2, aes(x = LONGITUD , y = LATITUD, color = TOTALES,
size = TOTALES)) +
scale_color_gradient(low = "yellow", high = "red")
My data:
structure(list(DISTRITO = c("SAN JUAN DE LURIGANCHO", "CALLAO",
"LOS OLIVOS", "ATE VITARTE", "LIMA CERCADO", "SAN MARTÍN", "SANTIAGO DE SURCO",
"CHORILLOS", "COMAS", "INDEPENDENCIA", "EL AGUSTINO", "LA VICTORIA",
"SAN JUAN DE MIRAFLORES", "VILLA EL SALVADOR", "S. MIGUEL", "CARABAYLLO",
"MIRAFLORES", "PTE. PIEDRA", "SAN BORJA", "VENTANILLA", "SURQUILLO",
"BREÑA", "ANCÓN", "EL RIMAC", "BARRANCO", "LA MOLINA", "SAN LUIS",
"STA. ANITA", "LURIGANCHO", "P. LIBRE", "MAGDALENA", "LA PERLA",
"CHACLACAYO", "SAN ISIDRO", "J. MARÍA", "BELLAVISTA", "LINCE",
"C. DE LA LEGUA", "CIENEGUILLA", "STA.ROSA", "LURÍN", "PTA.NEGRA",
"PUCUSANA", "LA PUNTA", "PTA. HERMOSA", "PACHACAMAC", "SAN BARTOLO",
"SANTA MARÍA"), TOTALES = c(861L, 696L, 696L, 642L, 516L, 479L,
442L, 378L, 371L, 368L, 361L, 333L, 325L, 291L, 282L, 251L, 239L,
223L, 196L, 193L, 188L, 185L, 174L, 161L, 138L, 134L, 128L, 119L,
115L, 105L, 67L, 65L, 63L, 58L, 56L, 45L, 38L, 23L, 23L, 11L,
8L, 6L, 5L, 3L, 3L, 2L, 0L, 0L), HOMICIDIOS = c(1L, 7L, 0L, 1L,
2L, 0L, 0L, 1L, 7L, 4L, 4L, 4L, 0L, 0L, 0L, 2L, 0L, 1L, 0L, 7L,
0L, 0L, 0L, 4L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), LESIONES = c(100L,
72L, 61L, 43L, 44L, 8L, 10L, 15L, 44L, 40L, 50L, 15L, 52L, 28L,
7L, 33L, 15L, 27L, 3L, 21L, 7L, 36L, 33L, 19L, 14L, 1L, 8L, 6L,
16L, 4L, 4L, 9L, 1L, 2L, 9L, 5L, 2L, 5L, 7L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), VIO..DE.LA.LIBERTAD.PERSONAL = c(0L, 7L,
6L, 5L, 6L, 1L, 1L, 0L, 3L, 1L, 2L, 0L, 2L, 0L, 1L, 0L, 1L, 1L,
0L, 1L, 1L, 0L, 3L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), VIO..DE.LA.LIBERTAD.SEXUAL = c(56L,
14L, 12L, 15L, 7L, 10L, 2L, 9L, 11L, 13L, 8L, 9L, 7L, 14L, 4L,
15L, 4L, 12L, 2L, 17L, 7L, 3L, 4L, 12L, 2L, 1L, 5L, 3L, 11L,
4L, 1L, 2L, 0L, 2L, 0L, 3L, 0L, 2L, 2L, 0L, 4L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), HURTO.SIMPLE.Y.AGRAVADO = c(217L, 203L, 296L, 230L,
260L, 167L, 226L, 217L, 130L, 117L, 154L, 133L, 121L, 46L, 163L,
72L, 161L, 84L, 119L, 69L, 120L, 64L, 19L, 21L, 57L, 44L, 39L,
2L, 48L, 60L, 30L, 19L, 48L, 41L, 25L, 19L, 27L, 7L, 11L, 9L,
0L, 6L, 0L, 2L, 3L, 1L, 0L, 0L), ROBO.SIMPLE.Y.AGRAVADO = c(460L,
289L, 308L, 344L, 186L, 277L, 198L, 130L, 165L, 184L, 137L, 149L,
134L, 188L, 104L, 126L, 58L, 96L, 72L, 64L, 51L, 77L, 115L, 76L,
64L, 88L, 73L, 108L, 40L, 36L, 30L, 32L, 14L, 12L, 22L, 12L,
8L, 6L, 3L, 1L, 3L, 0L, 2L, 1L, 0L, 1L, 0L, 0L), MICRO.COM.DE.DROGAS = c(26L,
100L, 13L, 3L, 10L, 15L, 5L, 5L, 11L, 8L, 3L, 23L, 9L, 15L, 3L,
3L, 0L, 2L, 0L, 8L, 2L, 5L, 0L, 28L, 0L, 0L, 1L, 0L, 0L, 0L,
2L, 2L, 0L, 0L, 0L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L,
0L, 0L), TENENCIA.ILEGAL.DE.ARMAS = c(1L, 4L, 0L, 1L, 1L, 1L,
0L, 1L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), LONGITUD = c(-77, -77.12,
-77.08, -76.89, -77.04, -77.09, -76.99, -77.01, -77.05, -77.05,
-77, -77.02, -76.97, -76.94, -77.09, -76.99, -77.03, -77.08,
-77, -77.13, -77.01, -77.05, -77.11, -76.7, -77.02, -76.92, -77,
-76.96, -76.86, -77.06, -77.07, -77.12, -76.76, -77.03, -77.05,
-77.11, -77.04, -77.09, -76.78, -77.16, -76.81, -76.73, -76.77,
-77.16, -76.76, -76.83, -76.73, -76.77), LATITUD = c(-11.99,
-12.04, -11.97, -12.04, -12.06, -12, -12.16, -12.2, -11.93, -11.99,
-12.04, -12.08, -12.16, -12.23, -12.08, -11.79, -12.12, -11.88,
-12.1, -11.89, -12.11, -12.06, -11.69, -11.94, -12.15, -12.09,
-12.08, -12.04, -11.98, -12.08, -12.09, -12.07, -11.99, -12.1,
-12.08, -12.06, -12.09, -12.04, -12.07, -11.81, -12.24, -12.32,
-12.47, -12.07, -12.28, -12.18, -12.38, -12.42)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -48L), .Names = c("DISTRITO",
"TOTALES", "HOMICIDIOS", "LESIONES", "VIO..DE.LA.LIBERTAD.PERSONAL",
"VIO..DE.LA.LIBERTAD.SEXUAL", "HURTO.SIMPLE.Y.AGRAVADO", "ROBO.SIMPLE.Y.AGRAVADO",
"MICRO.COM.DE.DROGAS", "TENENCIA.ILEGAL.DE.ARMAS", "LONGITUD",
"LATITUD"))
You have values outside of the base map zoom range... try changing your zoom parameter.
library(maps)
library(ggmap)
lima <- get_map(location = "lima", zoom = 10)
ggmap(lima) +
geom_point(data = limanov2,
aes(x = LONGITUD , y = LATITUD,
color = TOTALES, size = TOTALES)) +
scale_color_gradient(low = "yellow", high = "red")
I have data that is organized in panels like this (see below for output from the dput() function):
Country Year Month Var1 Var2
C1 2000 1 0 0
C1 2000 2 1 0
C1 2000 3 2 1
...
C2 2000 1 1 1
C2 2000 2 1 2
C2 2000 3 3 1
...
The data set has in total 27 countries for the years 1999 to 2008, but with unbalanced panels.
I want to be able to estimate a model for the full data set, and from this model do forecasting for each country in the data set. I have been looking into the YourCast package from King et al. but since I have all my data in a single file, I am at a loss as to how to create a data object that the yourcast() function will accept. Does anyone know how to do this without going through the tedious procedure of manually splitting the data file up into the different cross sections?
PS: 48 observations from the data set:
structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Belgium",
"Denmark", "Czech.Republic", "Germany", "Estonia", "Greece",
"Spain", "France", "Ireland", "Italy", "Cyprus", "Latvia", "Lithuania",
"Luxembourg", "Hungary", "Malta", "Netherlands", "Austria", "Poland",
"Portugal", "Slovenia", "Slovakia", "Bulgaria", "Romania", "Finland",
"Sweden", "UK"), class = "factor"), Year = c(2003, 2003, 2003,
2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2004, 2004,
2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2003,
2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003,
2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004,
2004, 2005), Month = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1), Yes = c(21L,
18L, 20L, 19L, 31L, 39L, 28L, 2L, 28L, 21L, 26L, 50L, 14L, 28L,
50L, 83L, 10L, 25L, 22L, 6L, 22L, 39L, 32L, 56L, 22L, 17L, 20L,
20L, 32L, 39L, 23L, 2L, 27L, 21L, 28L, 48L, 14L, 27L, 50L, 89L,
10L, 25L, 22L, 4L, 22L, 38L, 31L, 56L, 16L), No = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 4L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 1L, 2L, 0L, 0L, 0L, 2L, 0L, 1L, 1L, 0L, 0L), Abstention = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 3L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), No.Neg = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L), Abstention.Neg = c(0L, 0L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Yes.Neg = c(1L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 1L,
0L, 0L, 2L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L
), Yes.Pos = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), Missing = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Enlargement = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1)), .Names = c("Country", "Year", "Month", "Yes",
"No", "Abstention", "No.Neg", "Abstention.Neg", "Yes.Neg", "Yes.Pos",
"Missing", "Enlargement"), row.names = c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 61L, 62L, 63L, 64L, 65L, 66L, 67L,
68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L,
81L, 82L, 83L, 84L, 85L), class = "data.frame")
This is very simple
library(YourCast);
demo(chp.11.10)
You can prep your data to look like the data used in this demo with the yourprep command.
Type ?yourprep
If I understand your problem, splitting up the database could be quite easy. Supposing you named the dataset 'data':
results <- list()
for (i in 1:nlevels(data$Country)) {
results[[levels(data$Country)[i]]] <- yourcast(...)
}
In which simple loop you could do all forecasting to each country, and save the results to a list. Later you can read all results from the results list for all countries. E.g.: results[['Hungary']]
As I do not know anything about the package you use, here is a small example that could be fitted in the loop instead of the line containing yourcast() function:
results[[levels(data$Country)[i]]] <- c(levels(data$Country)[i], length(which(data$Country == levels(data$Country)[i])))
Which command will create a list containg all countries with two variables: name and sample size of given country.