I want generate the following endogenous lag (Y) variable
set Y=1 in the current routine year, if submission==1 and routineyear==1 in the previous routine year
set Y=2 in the current routine year, if sub==0 and routineyear==1 in the previous routine year
Otherwise=0
Note though that "previous routine year" is not previous year, the intervals between routine years varies. This is actually what makes it hard for me to generate this variable.
Basically, I want to generate an endogenous variable that would capture state's behavior in their LAST routineyear.
To illustrate what I want to do:
Assume that country A had its routine year in 1990 - the same year the submission variable was also =1. This would generate Y=1.
Now, the next routineyear for country A is in 1992, where the submission=1 and routineyear=1 in that year. The endogenous lag in this should indicate A's previous behavior as in 1990 (Y=1).
Then, the next routineyear is in 1996 where submission=0 while routineyear=1. The endogenous lag in this case would be the value of A's previous behavior in 1992 (Y=1).
Then again, next routineyear is in 1998, where submission=1 and routineyear=1. The endogenous lag here should indicate A's previous behavior in the last routineyear, in 1996. that is: Y=2!.
This is how the endogenous lag should look like (based on the example above)
country year submission routineyear Y(endo lag)
A 1990 1 1 1
A 1991 0 0 0
A 1992 1 1 1
A 1993 1 0 0
A 1994 0 0 0
A 1995 0 0 0
A 1996 0 1 1
A 1997 0 0 0
A 1998 1 1 2
A 1999 0 0 0
A 2000 0 0 0
A 2001 0 1 1
A 2002 0 0 0
A 2003 1 1 2
I've been trying to do this using different logics but without success. One of the biggest problems is that routine year is different for each country, the intervals are not stable.
I believe that someone who can write proper codes/functions in R would be able to slove this puzzle. If not, I would appreciate all recommendations as how to proceed from here.
A sample from my real data:
structure(list(ccode = c(31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L,
31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L,
42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 51L, 51L, 51L, 51L, 51L,
51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L,
51L, 51L, 51L, 51L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L,
52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 53L, 53L,
53L, 53L, 53L, 53L, 53L, 53L, 53L, 53L, 53L, 53L, 53L, 53L, 53L, 53L,
53L, 53L, 53L, 53L, 53L, 53L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L,
54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L,
70L, 70L, 70L, 70L, 70L, 70L, 70L, 70L, 70L, 70L, 70L, 70L, 70L, 70L,
70L, 70L, 70L, 70L, 70L, 70L, 70L, 70L, 80L, 80L, 80L, 80L, 80L, 80L,
80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L,
80L, 80L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L,
90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L), year = c(1990L,
1991L, 1992L, 1993L, 1994L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L,
2001L, 2002L, 2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L, 2010L,
2011L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1996L, 1997L, 1998L,
1999L, 2000L, 2001L, 2002L, 2003L, 2004L, 2005L, 2006L, 2007L, 2008L,
2009L, 2010L, 2011L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1996L,
1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2004L, 2005L, 2006L,
2007L, 2008L, 2009L, 2010L, 2011L, 1990L, 1991L, 1992L, 1993L, 1994L,
1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2004L,
2005L, 2006L, 2007L, 2008L, 2009L, 2010L, 2011L, 1990L, 1991L, 1992L,
1993L, 1994L, 1995L, 1996L, 1997L, 1998L, 1999L, 1999L, 2000L, 2001L,
2002L, 2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L, 2010L, 2011L,
1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1996L, 1997L, 1998L, 1999L,
2000L, 2001L, 2002L, 2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L,
2010L, 2011L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1996L, 1997L,
1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2004L, 2005L, 2006L, 2007L,
2008L, 2009L, 2010L, 2011L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L,
1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2004L, 2005L,
2006L, 2007L, 2008L, 2009L, 2010L, 2011L, 1990L, 1991L, 1992L, 1993L,
1994L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L,
2004L, 2005L, 2006L, 2007L, 2008L, 2009L, 2010L, 2011L, 1990L, 1991L,
1992L, 1993L, 1994L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L,
2002L, 2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L, 2010L, 2011L,
1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1996L, 1997L, 1998L, 1999L,
2000L, 2001L, 2002L, 2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L,
2010L, 2011L), country = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("Bahamas", "Barbados",
"Belize", "Cuba", "Dominica", "Dominican Republic", "Guatemala",
"Haiti", "Jamaica", "Mexico", "Trinidad and Tobago"), class =
"factor"),
submission = c(1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L,
1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L,
1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L,
1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L,
0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L,
1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L,
0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L,
1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L,
0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L,
0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L,
1L, 0L, 1L, 0L, 1L, 0L, 0L), routineyear = c(1L, 0L, 0L,
1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L,
1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L
)), .Names = c("ccode", "year", "country", "submission", "routineyear"), class = "data.frame", row.names = c(NA, -243L ))
Using data.table:
library(data.table)
setDT(DF)
DF[, Y := 0
][routineyear == 1
, Y := 1 + (shift(submission, fill = 1) == 0)
, by = country][]
which gives (first 15 rows shown):
> DF
ccode year country submission routineyear Y
1: 31 1990 Bahamas 1 1 1
2: 31 1991 Bahamas 0 0 0
3: 31 1992 Bahamas 0 0 0
4: 31 1993 Bahamas 0 1 1
5: 31 1994 Bahamas 0 0 0
6: 31 1995 Bahamas 1 0 0
7: 31 1996 Bahamas 0 0 0
8: 31 1997 Bahamas 1 1 2
9: 31 1998 Bahamas 0 0 0
10: 31 1999 Bahamas 1 1 1
11: 31 2000 Bahamas 0 0 0
12: 31 2001 Bahamas 1 1 1
13: 31 2002 Bahamas 0 0 0
14: 31 2003 Bahamas 1 1 1
15: 31 2004 Bahamas 0 0 0
........
What this does:
setDT(DF) converts your dataframe to a data.table
Y := 0 sets Y to 0 by reference first
Filter for routineyear == 1
Update Y by reference such that Y is set to 1 if previous submission is 1 and to 2 is previous submission is 0
library(dplyr)
select(dat2, -Y) %>%
filter(routineyear == 1L) %>%
group_by(country) %>%
mutate(Y = 2L - lag(submission, default = 1L)) %>%
ungroup() %>%
right_join(select(dat2, -Y)) %>%
mutate(Y = replace(Y, is.na(Y), 0L))
# # A tibble: 14 x 5
# country year submission routineyear Y
# <fct> <int> <int> <int> <int>
# 1 A 1990 1 1 1
# 2 A 1991 0 0 0
# 3 A 1992 1 1 1
# 4 A 1993 1 0 0
# 5 A 1994 0 0 0
# 6 A 1995 0 0 0
# 7 A 1996 0 1 1
# 8 A 1997 0 0 0
# 9 A 1998 1 1 2
# 10 A 1999 0 0 0
# 11 A 2000 0 0 0
# 12 A 2001 0 1 1
# 13 A 2002 0 0 0
# 14 A 2003 1 1 2
all.equal(.Last.value, dat2)
# [1] TRUE
where dat2 is:
dat2 <- read.table(text =
"country year submission routineyear Y
A 1990 1 1 1
A 1991 0 0 0
A 1992 1 1 1
A 1993 1 0 0
A 1994 0 0 0
A 1995 0 0 0
A 1996 0 1 1
A 1997 0 0 0
A 1998 1 1 2
A 1999 0 0 0
A 2000 0 0 0
A 2001 0 1 1
A 2002 0 0 0
A 2003 1 1 2
", header = TRUE)
Related
I have a two-way ANOVA test (w/repeated measures) that I'm using with four almost identical datasets:
> res.aov <- anova_test(
+ data = LST_Weather_dataset_N, dv = LST, wid = Month,
+ within = c(Buffer, TimePeriod),
+ effect.size = "ges",
+ detailed = TRUE,
+ )
Where:
LST = surface temperature deviation in C
Month = 1-12
Buffer = a value 100-1900 - one of 19 areas outward from the boundary of a solar power plant (each 100m wide)
TimePeriod = a factor with a value of 1 or 2 corresponding to pre-/post-construction of a solar power plant.
For one dataset I get the error:
Error: Each row of output must be identified by a unique combination of keys.
Keys are shared for 38 rows:
* 10, 11
* 217, 218
* 240, 241
* 263, 264
* 286, 287
* 309, 310
* 332, 333
...
As far as I can tell I have unique combinations.
dplyr::count(LST_Weather_dataset_N, LST, Month, Buffer, TimePeriod, sort = TRUE)
returns
LST Month Buffer TimePeriod n
1 -6.309045316 12 100 2 1
2 -5.655279925 9 1000 2 1
3 -5.224196295 12 200 2 1
4 -5.194473224 9 1100 2 1
5 -5.025429891 12 400 2 1
6 -4.987575966 9 700 2 1
7 -4.979453868 12 600 2 1
8 -4.825298768 12 300 2 1
9 -4.668994574 12 500 2 1
10 -4.652282192 12 700 2 1
...
'n' is always 1.
I can't work out why this is happening.
Extract of datafram below:
> dput(LST_Weather_dataset_N[sample(1:nrow(LST_Weather_dataset_N), 50),])
structure(list(Buffer = c(1400L, 700L, 300L, 1400L, 100L, 200L,
1700L, 100L, 800L, 1900L, 1100L, 100L, 700L, 800L, 1400L, 400L,
1300L, 200L, 1200L, 500L, 1200L, 1300L, 400L, 1000L, 1300L, 1100L,
100L, 300L, 300L, 600L, 1100L, 1400L, 1500L, 1600L, 1700L, 1800L,
1700L, 1300L, 1200L, 300L, 1100L, 1900L, 1700L, 700L, 1400L,
1200L, 1600L, 1700L, 1900L, 1300L), Date = c("02/05/2014", "18/01/2017",
"19/06/2014", "25/12/2013", "15/09/2017", "08/04/2017", "22/08/2014",
"21/07/2014", "13/07/2017", "25/12/2013", "22/10/2013", "02/05/2014",
"07/03/2017", "15/03/2014", "13/07/2017", "19/06/2014", "25/12/2013",
"17/10/2017", "16/04/2014", "06/10/2013", "15/09/2017", "18/01/2017",
"10/01/2014", "17/12/2016", "13/07/2017", "19/06/2014", "07/03/2017",
"15/03/2014", "11/02/2014", "22/10/2013", "06/10/2013", "15/09/2017",
"16/04/2014", "18/01/2017", "15/03/2014", "21/07/2014", "17/10/2017",
"15/09/2017", "10/01/2014", "23/09/2014", "16/04/2014", "22/10/2013",
"11/06/2017", "26/05/2017", "19/06/2014", "14/08/2017", "11/02/2014",
"26/02/2017", "26/02/2017", "11/02/2014"), LST = c(1.255502397,
4.33385966, 3.327025603, -0.388631166, -0.865430798, 4.386292648,
-0.243018665, 3.276865987, 0.957036835, -0.065821795, 0.69731779,
4.846851651, -1.437700684, 1.003808572, 0.572460421, 2.995902374,
-0.334633662, -1.231447567, 0.644520741, 0.808262029, -3.392959991,
2.324569449, 2.346707612, -3.124354627, 0.58719862, 1.904859254,
1.701580958, 2.792443253, 1.638270039, 1.460743317, 0.699767335,
-3.015643366, 0.930527864, 1.309519336, 0.477789664, 0.147584938,
-0.498188865, -3.506795723, -1.007487965, 1.149604087, 1.192366386,
0.197471474, 0.999391224, -0.190613618, 1.27324015, 2.686622796,
0.573109026, 0.97847983, 0.395005095, -0.40855426), Month = c(5L,
1L, 6L, 12L, 9L, 4L, 8L, 7L, 7L, 12L, 10L, 5L, 3L, 3L, 7L, 6L,
12L, 10L, 4L, 10L, 9L, 1L, 1L, 12L, 7L, 6L, 3L, 3L, 2L, 10L,
10L, 9L, 4L, 1L, 3L, 7L, 10L, 9L, 1L, 9L, 4L, 10L, 6L, 5L, 6L,
8L, 2L, 2L, 2L, 2L), Year = c(2014L, 2017L, 2014L, 2013L, 2017L,
2017L, 2014L, 2014L, 2017L, 2013L, 2013L, 2014L, 2017L, 2014L,
2017L, 2014L, 2013L, 2017L, 2014L, 2013L, 2017L, 2017L, 2014L,
2016L, 2017L, 2014L, 2017L, 2014L, 2014L, 2013L, 2013L, 2017L,
2014L, 2017L, 2014L, 2014L, 2017L, 2017L, 2014L, 2014L, 2014L,
2013L, 2017L, 2017L, 2014L, 2017L, 2014L, 2017L, 2017L, 2014L
), JulianDay = c(122L, 18L, 170L, 359L, 258L, 98L, 234L, 202L,
194L, 359L, 295L, 122L, 66L, 74L, 194L, 170L, 359L, 290L, 106L,
279L, 258L, 18L, 10L, 352L, 194L, 170L, 66L, 74L, 42L, 295L,
279L, 258L, 106L, 18L, 74L, 202L, 290L, 258L, 10L, 266L, 106L,
295L, 162L, 146L, 170L, 226L, 42L, 57L, 57L, 42L), TimePeriod = c(1L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
1L), Temperature = c(28L, 9L, 31L, 12L, 27L, 21L, 29L, 36L, 38L,
12L, 23L, 28L, 12L, 21L, 38L, 31L, 12L, 23L, 25L, 22L, 27L, 9L,
11L, 7L, 38L, 31L, 12L, 21L, 14L, 23L, 22L, 27L, 25L, 9L, 21L,
36L, 23L, 27L, 11L, 31L, 25L, 23L, 29L, 27L, 31L, 34L, 14L, 16L,
16L, 14L), Humidity = c(6L, 34L, 7L, 31L, 29L, 22L, 34L, 15L,
19L, 31L, 16L, 6L, 14L, 14L, 19L, 7L, 31L, 12L, 9L, 12L, 29L,
34L, 33L, 18L, 19L, 7L, 14L, 14L, 31L, 16L, 12L, 29L, 9L, 34L,
14L, 15L, 12L, 29L, 33L, 18L, 9L, 16L, 8L, 13L, 7L, 13L, 31L,
31L, 31L, 31L), Wind_speed = c(6L, 0L, 6L, 7L, 13L, 33L, 6L,
20L, 9L, 7L, 0L, 6L, 0L, 6L, 9L, 6L, 7L, 6L, 0L, 7L, 13L, 0L,
0L, 35L, 9L, 6L, 0L, 6L, 6L, 0L, 7L, 13L, 0L, 0L, 6L, 20L, 6L,
13L, 0L, 0L, 0L, 0L, 24L, 11L, 6L, 24L, 6L, 26L, 26L, 6L), Wind_gust = c(0L,
0L, 0L, 0L, 0L, 54L, 0L, 46L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 48L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 46L, 0L, 0L, 0L, 0L, 0L, 0L, 48L, 0L, 0L, 39L,
0L, 41L, 41L, 0L), Wind_trend = c(1L, 0L, 1L, 1L, 2L, 2L, 0L,
1L, 2L, 1L, 0L, 1L, 0L, 1L, 2L, 1L, 1L, 0L, 0L, 2L, 2L, 0L, 1L,
1L, 2L, 1L, 0L, 1L, 1L, 0L, 2L, 2L, 0L, 0L, 1L, 1L, 0L, 2L, 1L,
1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Wind_direction = c(0,
0, 0, 337.5, 360, 22.5, 0, 22.5, 0, 337.5, 0, 0, 0, 0, 0, 0,
337.5, 180, 0, 247.5, 360, 0, 0, 180, 0, 0, 0, 0, 337.5, 0, 247.5,
360, 0, 0, 0, 22.5, 180, 360, 0, 0, 0, 0, 360, 22.5, 0, 360,
337.5, 360, 360, 337.5), Pressure = c(940.2, 943.64, 937.69,
951.37, 932.69, 933.94, 937.07, 938.01, 937.69, 951.37, 939.72,
940.2, 948.33, 947.71, 937.69, 937.69, 951.37, 943.32, 932.69,
944.71, 932.69, 943.64, 942.31, 943.01, 937.69, 937.69, 948.33,
947.71, 941.94, 939.72, 944.71, 932.69, 932.69, 943.64, 947.71,
938.01, 943.32, 932.69, 942.31, 938.94, 932.69, 939.72, 928.31,
931.12, 937.69, 932.37, 941.94, 936.13, 936.13, 941.94), Pressure_trend = c(1L,
2L, 0L, 2L, 0L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 0L, 2L,
1L, 2L, 1L, 0L, 2L, 2L, 2L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 2L,
2L, 1L, 1L, 1L, 0L, 2L, 1L, 2L, 1L, 0L, 0L, 0L, 1L, 1L, 2L, 2L,
1L)), row.names = c(179L, 14L, 195L, 426L, 306L, 118L, 299L,
229L, 244L, 436L, 374L, 153L, 90L, 91L, 256L, 197L, 424L, 348L,
137L, 355L, 328L, 26L, 7L, 419L, 254L, 211L, 78L, 81L, 43L, 359L,
373L, 332L, 143L, 32L, 109L, 263L, 393L, 330L, 23L, 309L, 135L,
398L, 224L, 166L, 217L, 290L, 69L, 72L, 76L, 63L), class = "data.frame")
Well, this is a bit embarrassing.
The error arose as there were not, in fact, paired months of the data. Rather than there being 38 data (19x2) for each month, due to an error in determining the month value one month had 57 data (19x3). Correcting this, and checking that each month had the same number of paired data for the ANOVA allowed the test to run sucessfully.
> res.aov <- anova_test(
+ data = LST_Weather_dataset_N, dv = LST, wid = Month,
+ within = c(Buffer, TimePeriod),
+ effect.size = "ges",
+ detailed = TRUE,
+ )
> get_anova_table(res.aov, correction = "auto")
ANOVA Table (type III tests)
Effect DFn DFd SSn SSd F p p<.05 ges
1 (Intercept) 1 11 600.135 974.584 6.774 2.50e-02 * 0.189
2 Buffer 18 198 332.217 331.750 11.015 2.05e-21 * 0.115
3 TimePeriod 1 11 29.561 977.945 0.333 5.76e-01 0.011
4 Buffer:TimePeriod 18 198 13.055 283.797 0.506 9.53e-01 0.005
I still don't understand how the error message was telling me this, though.
I have a set of variables in the dataset -- I want to simply calculate the running total (and the running mean) for all these variables, based on all prior years.
To illustrate. This is how my data looks like, including the total run variable that I want to generate.
country year X1 X2 X3 X4 X5 running_total
Bahamas 1990 0 0 0 0 1 NA
Bahamas 1991 0 0 1 1 0 1
Bahamas 1992 1 1 0 0 1 3
Bahamas 1993 0 0 0 0 0 6
Bahamas 1994 1 1 0 1 1 6
Bahamas 1995 0 0 1 0 0 10
Bahamas 1996 0 1 0 1 0 11
Bahamas 1997 1 0 1 0 1 13
Bahamas 1998 0 1 0 1 0 16
Bahamas 1999 1 0 1 0 1 18
Bahamas 2000 0 1 0 1 0 21
Bahamas 2001 1 0 1 0 1 23
Bahamas 2002 0 1 0 1 0 26
Bahamas 2003 1 0 0 0 1 28
Bahamas 2004 0 0 0 1 0 30
Bahamas 2005 1 1 0 0 0 31
Bahamas 2006 0 0 1 1 1 33
Bahamas 2007 1 0 0 0 0 36
Bahamas 2008 0 0 1 1 1 37
Bahamas 2009 1 1 0 0 0 40
Bahamas 2010 0 0 1 1 1 42
Bahamas 2011 1 1 0 0 0 45
Bolivia 1990 0 0 0 0 0 NA
Bolivia 1991 0 0 1 1 0 0
Bolivia 1992 0 0 0 0 0 2
Bolivia 1993 0 0 1 0 0 2
Bolivia 1994 0 0 0 0 0 3
Bolivia 1995 0 0 0 0 0 3
Bolivia 1996 0 0 0 0 0 3
Bolivia 1997 0 0 0 0 0 3
Bolivia 1998 0 0 0 0 0 3
Bolivia 1999 0 0 0 0 0 3
Bolivia 2000 0 1 0 1 0 3
Bolivia 2001 0 0 0 0 0 5
Bolivia 2002 0 0 0 0 0 5
Bolivia 2003 0 0 0 0 0 5
Bolivia 2004 0 0 0 0 0 5
Bolivia 2005 0 0 0 0 0 5
Bolivia 2006 0 0 0 0 0 5
Bolivia 2007 0 0 0 0 0 5
Bolivia 2008 0 0 0 0 1 5
Bolivia 2009 0 0 0 0 0 6
Bolivia 2010 0 0 0 0 1 6
Bolivia 2011 0 0 0 0 0 7
Starting year 1990 ==NA. For example, running total for 1991 is based on 1990. Running total for 1992 is based on 1990-1991. running total for 1993 is based on 1990-1992- running total for 1994 is based on 1990-1993. And so on...until 2011. Then it starts the same procedur for new country B.
I tried the following code below but it doesn't work the way I want. Surely, I need to specify it better, but how?
DF$csum <- ave(DF$X1, DF$X2,DF$X3,DF$X4,DF$X5,FUN=cumsum)
In addition, I would like to generate running mean based on the same logic.
Any help here would be much appreciated!
structure(list(country = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Bahamas", "Bolivia"), class = "factor"),
year = c(1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1996L,
1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2004L, 2005L,
2006L, 2007L, 2008L, 2009L, 2010L, 2011L, 1990L, 1991L, 1992L,
1993L, 1994L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L,
2002L, 2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L, 2010L,
2011L), X1 = c(0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L,
1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), X2 = c(0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L,
1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), X3 = c(0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L,
1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), X4 = c(0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L,
1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L), X5 = c(1L, 0L, 1L, 0L, 1L, 0L, 0L,
1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 1L, 0L), running_total = c(NA, 1L, 3L,
6L, 6L, 10L, 11L, 13L, 16L, 18L, 21L, 23L, 26L, 28L, 30L,
31L, 33L, 36L, 37L, 40L, 42L, 45L, NA, 0L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
7L)), .Names = c("country", "year", "X1", "X2", "X3", "X4", "X5", "running_total"), class = "data.frame", row.names = c(NA,
-44L))
library(data.table)
setDT(df)
df[, xt := X1+X2+X3+X4+X5]
df[, rt2 := shift(cumsum(xt)), by = country]
Actually it can be solved with an one-liner:
df[, rt3 := {xt=X1+X2+X3+X4+X5; shift(cumsum(xt))}, by = country]
# Or as Ryan points out:
df[, rt2 := shift(cumsum(Reduce(`+`, .SD))) , by = country , .SDcols = grep('^X.*', names(df), value = T)]
All resulting in:
country year X1 X2 X3 X4 X5 running_total xt rt2
1: Bahamas 1990 0 0 0 0 1 NA 1 NA
2: Bahamas 1991 0 0 1 1 0 1 2 1
3: Bahamas 1992 1 1 0 0 1 3 3 3
4: Bahamas 1993 0 0 0 0 0 6 0 6
5: Bahamas 1994 1 1 0 1 1 6 4 6
6: Bahamas 1995 0 0 1 0 0 10 1 10
7: Bahamas 1996 0 1 0 1 0 11 2 11
8: Bahamas 1997 1 0 1 0 1 13 3 13
9: Bahamas 1998 0 1 0 1 0 16 2 16
10: Bahamas 1999 1 0 1 0 1 18 3 18
11: Bahamas 2000 0 1 0 1 0 21 2 21
12: Bahamas 2001 1 0 1 0 1 23 3 23
13: Bahamas 2002 0 1 0 1 0 26 2 26
14: Bahamas 2003 1 0 0 0 1 28 2 28
15: Bahamas 2004 0 0 0 1 0 30 1 30
16: Bahamas 2005 1 1 0 0 0 31 2 31
17: Bahamas 2006 0 0 1 1 1 33 3 33
18: Bahamas 2007 1 0 0 0 0 36 1 36
19: Bahamas 2008 0 0 1 1 1 37 3 37
20: Bahamas 2009 1 1 0 0 0 40 2 40
21: Bahamas 2010 0 0 1 1 1 42 3 42
22: Bahamas 2011 1 1 0 0 0 45 2 45
23: Bolivia 1990 0 0 0 0 0 NA 0 NA
24: Bolivia 1991 0 0 1 1 0 0 2 0
25: Bolivia 1992 0 0 0 0 0 2 0 2
26: Bolivia 1993 0 0 1 0 0 2 1 2
27: Bolivia 1994 0 0 0 0 0 3 0 3
28: Bolivia 1995 0 0 0 0 0 3 0 3
29: Bolivia 1996 0 0 0 0 0 3 0 3
30: Bolivia 1997 0 0 0 0 0 3 0 3
31: Bolivia 1998 0 0 0 0 0 3 0 3
32: Bolivia 1999 0 0 0 0 0 3 0 3
33: Bolivia 2000 0 1 0 1 0 3 2 3
34: Bolivia 2001 0 0 0 0 0 5 0 5
35: Bolivia 2002 0 0 0 0 0 5 0 5
36: Bolivia 2003 0 0 0 0 0 5 0 5
37: Bolivia 2004 0 0 0 0 0 5 0 5
38: Bolivia 2005 0 0 0 0 0 5 0 5
39: Bolivia 2006 0 0 0 0 0 5 0 5
40: Bolivia 2007 0 0 0 0 0 5 0 5
41: Bolivia 2008 0 0 0 0 1 5 1 5
42: Bolivia 2009 0 0 0 0 0 6 0 6
43: Bolivia 2010 0 0 0 0 1 6 1 6
44: Bolivia 2011 0 0 0 0 0 7 0 7
country year X1 X2 X3 X4 X5 running_total xt rt2
df = structure(list(country = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Bahamas", "Bolivia"), class = "factor"), year = c(1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L, 2010L, 2011L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L, 2010L, 2011L), X1 = c(0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X2 = c(0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X3 = c(0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X4 = c(0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X5 = c(1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L), running_total = c(NA, 1L, 3L, 6L, 6L, 10L, 11L, 13L, 16L, 18L, 21L, 23L, 26L, 28L, 30L, 31L, 33L, 36L, 37L, 40L, 42L, 45L, NA, 0L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 7L)), .Names = c("country", "year", "X1", "X2", "X3", "X4", "X5", "running_total"), class = "data.frame", row.names = c(NA, -44L))
df <- df %>% mutate(sums = X1 + X2 + X3 +X4 + X5) %>%
group_by(country) %>% mutate(sum_shift = shift(sums),
sum_shift = ifelse(is.na(sum_shift), 0, sum_shift),
running_total = cumsum(sum_shift))
head(df)
country year X1 X2 X3 X4 X5 running_total sums sum_shift
1: Bahamas 1990 0 0 0 0 1 0 1 0
2: Bahamas 1991 0 0 1 1 0 1 2 1
3: Bahamas 1992 1 1 0 0 1 3 3 2
4: Bahamas 1993 0 0 0 0 0 6 0 3
5: Bahamas 1994 1 1 0 1 1 6 4 0
6: Bahamas 1995 0 0 1 0 0 10 1 4
This is the dplyr solution but it is basically the same as the data table solution. We create a column where we sum across the rows. Then we group by the country and and sum across and create a cumulative sum. We have to set the nas to 0 for the cumulative sums to work.
A solution using dplyr and purrr. We can split the data frame by country, create the running_total column, and then combine the data frames. Notice that this solution does not need to specify individual column names, such as X1 and X2. dat2 is the final output.
library(dplyr)
library(purrr)
dat2 <- dat %>%
split(.$country) %>%
map_dfr(~mutate(.x,
running_total =
as.integer(lag(cumsum(rowSums(select(.x, starts_with("X"))))))))
To calculate the running mean, we can follow the same logic by adding the command to the mutate function. Notice that the cummean function is from the dplyr package.
dat2 <- dat %>%
split(.$country) %>%
map_dfr(~mutate(.x,
running_total =
as.integer(lag(cumsum(rowSums(select(.x, starts_with("X")))))),
running_mean =
lag(cummean(rowSums(select(.x, starts_with("X")))))))
DATA
dat <- structure(list(country = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Bahamas", "Bolivia"), class = "factor"), year = c(1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L, 2010L, 2011L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L, 2010L, 2011L), X1 = c(0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X2 = c(0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X3 = c(0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X4 = c(0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X5 = c(1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L), running_total = c(NA, 1L, 3L, 6L, 6L, 10L, 11L, 13L, 16L, 18L, 21L, 23L, 26L, 28L, 30L, 31L, 33L, 36L, 37L, 40L, 42L, 45L, NA, 0L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 7L)), .Names = c("country", "year", "X1", "X2", "X3", "X4", "X5", "running_total"), class = "data.frame", row.names = c(NA, -44L))
dat$running_total <- NULL
I am trying to spread the time columns of my dataframe. left_join would be my choice, but the age groups age and geo differ, thus I end up with most years containing NA values and one of the age categories disappears.
library(dplyr)
dt %>%
filter(time!=2001) %>%
group_by(time, geo, age, sex) %>%
filter(time==2011) %>%
left_join(.,dt %>%
group_by(time, sex, age, geo) %>%
mutate(time2 = 2011) %>%
filter(time != 2011) %>%
spread(time, value),
by = c('time' = 'time2', 'age', 'geo'))
What I obtain is this:
time geo sex.x age value sex.y `2000` `2001` `2002` `2003`
2011 51900 1 0 27933 1 NA 26193 NA NA
2011 51900 1 0 27933 2 NA 22760 NA NA
2011 51900 1 5 20627 1 NA 26213 NA NA
2011 51900 1 5 20627 2 NA 25647 NA NA
...
2011 51900 1 75 6400 1 NA 5313 NA NA
2011 51900 1 75 6400 2 NA 11500 NA NA
2011 51900 1 80 4520 NA NA NA NA NA
but there's a problem with the ```value`` column as it repeats the same values twice (and it shouldn't) and years 2000, 2002, ..., 2020
What I would like is this:
geo sex age 2001 2011 2000 2002 2003 ... 2020
51900 1 0 39290 41900 69844 55281 55545 58045
51900 2 0 34140 38270 61192 65301 65429 65391
51902 1 0 4307 4193 69844 55281 55545 58045
51902 2 0 3753 3453 61192 65301 65429 65391
...
51900 1 80 NA 41900 104766 97952 98143 87068
51900 2 80 NA 38270 91788 89921 83317 98086
dt = structure(list(time = c(2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L,
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2000L, 2000L, 2000L, 2000L, 2000L, 2002L,
2002L, 2002L, 2002L, 2002L, 2003L, 2003L, 2003L, 2003L, 2003L, 2004L, 2004L, 2004L, 2004L, 2004L, 2005L, 2005L, 2005L, 2005L,
2005L, 2006L, 2006L, 2006L, 2006L, 2006L, 2007L, 2007L, 2007L, 2007L, 2007L, 2008L, 2008L, 2008L, 2008L, 2008L, 2009L, 2009L,
2009L, 2009L, 2009L, 2010L, 2010L, 2010L, 2010L, 2010L, 2012L, 2012L, 2012L, 2012L, 2012L, 2013L, 2013L, 2013L, 2013L, 2013L,
2014L, 2014L, 2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 2015L, 2015L, 2016L, 2016L, 2016L, 2016L, 2016L, 2017L, 2017L, 2017L,
2017L, 2017L, 2018L, 2018L, 2018L, 2018L, 2018L, 2019L, 2019L, 2019L, 2019L, 2019L, 2020L, 2020L, 2020L, 2020L, 2020L, 2000L,
2000L, 2000L, 2000L, 2000L, 2002L, 2002L, 2002L, 2002L, 2002L, 2003L, 2003L, 2003L, 2003L, 2003L, 2004L, 2004L, 2004L, 2004L,
2004L, 2005L, 2005L, 2005L, 2005L, 2005L, 2006L, 2006L, 2006L, 2006L, 2006L, 2007L, 2007L, 2007L, 2007L, 2007L, 2008L, 2008L,
2008L, 2008L, 2008L, 2009L, 2009L, 2009L, 2009L, 2009L, 2010L, 2010L, 2010L, 2010L, 2010L, 2012L, 2012L, 2012L, 2012L, 2012L,
2013L, 2013L, 2013L, 2013L, 2013L, 2014L, 2014L, 2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 2015L, 2015L, 2016L, 2016L, 2016L,
2016L, 2016L, 2017L, 2017L, 2017L, 2017L, 2017L, 2018L, 2018L, 2018L, 2018L, 2018L, 2019L, 2019L, 2019L, 2019L, 2019L, 2020L,
2020L, 2020L, 2020L, 2020L), geo = c(51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51902L, 51902L, 51902L,
51902L, 51902L, 51902L, 51902L, 51902L, 51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51902L,
51902L, 51902L, 51902L, 51902L, 51902L, 51902L, 51902L, 51902L, 51902L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L,
51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L,
51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L,
51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L,
51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L,
51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L), sex = c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), age = c(0L, 5L, 10L, 75L, 0L, 5L, 10L, 75L, 0L, 5L, 10L, 75L, 0L, 5L, 10L, 75L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L,
80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L,
0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L,
10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L), value = c(26193L, 26213L, 31653L, 5313L, 22760L, 25647L, 31393L, 11500L, 4307L, 4793L,
5947L, 667L, 3753L, 4500L, 5207L, 1440L, 27933L, 20627L, 20593L, 6400L, 4520L, 25513L, 17480L, 17800L, 9520L, 8560L, 4193L, 3027L,
3453L, 800L, 580L, 3453L, 2473L, 2980L, 1013L, 1167L, 61192L, 88249L, 105509L, 20595L, 18198L, 55281L, 76667L, 99967L, 25571L,
19187L, 55545L, 70490L, 95697L, 28376L, 19340L, 56564L, 64639L, 90809L, 30322L, 19579L, 57471L, 59755L, 85464L, 30949L, 20081L,
60145L, 55926L, 79537L, 30083L, 22373L, 61425L, 53664L, 73329L, 27916L, 24891L, 61683L, 52992L, 67148L, 25620L, 27118L, 61776L,
53403L, 61637L, 24601L, 28551L, 62477L, 53990L, 57438L, 25439L, 29074L, 64401L, 56247L, 52992L, 31317L, 30495L, 64691L, 58095L,
52582L, 35069L, 30691L, 64689L, 60083L, 52853L, 37023L, 31297L, 64391L, 61877L, 53538L, 36327L, 32537L, 63158L, 63367L, 54657L,
33260L, 35359L, 61961L, 64311L, 56249L, 28203L, 38591L, 60751L, 64639L, 58159L, 22742L, 41433L, 59469L, 64485L, 60081L, 18813L,
42936L, 58045L, 64127L, 61703L, 17280L, 42758L, 69844L, 93632L, 109773L, 11025L, 7397L, 65301L, 82373L, 103304L, 16130L, 7705L,
65429L, 77025L, 98764L, 18861L, 7835L, 66195L, 72123L, 93892L, 20763L, 8231L, 66949L, 68002L, 88909L, 21513L, 8973L, 69257L,
64759L, 83202L, 21269L, 10813L, 70402L, 62813L, 77601L, 20044L, 12820L, 70681L, 62125L, 72404L, 18627L, 14631L, 70818L, 62321L,
68099L, 17947L, 15893L, 71579L, 62729L, 65085L, 18379L, 16509L, 73653L, 64712L, 61851L, 21697L, 17861L, 73764L, 66737L, 61483L,
23663L, 18103L, 73537L, 68968L, 61599L, 24347L, 18455L, 73041L, 70867L, 62190L, 23305L, 18986L, 71645L, 72368L, 63235L, 21077L,
20717L, 70201L, 73275L, 64867L, 17653L, 22534L, 68704L, 73517L,
66893L, 14089L, 23935L, 67117L, 73238L, 68928L, 11606L, 24343L, 65391L, 72725L, 70609L, 10697L, 23592L)), .Names = c("time",
"geo", "sex", "age", "value"), class = "data.frame", row.names = c(NA, -226L))
You can use the spread function from tidyr
dt_final <- dt %>% spread (time, # the variable I want to use to create multiple columns
value)# the variable to use to fill the rows in the new columns
head(as.tibble(dt_final))
# geo sex age `2000` `2001` `2002` `2003` `2004` `2005` `2006` `2007` `2008` `2009` `2010` `2011` `2012` `2013` `2014` `2015` `2016` `2017` `2018` `2019` `2020`
# <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
# 1 51 1 0 69844 NA 65301 65429 66195 66949 69257 70402 70681 70818 71579 NA 73653 73764 73537 73041 71645 70201 68704 67117 65391
# 2 51 1 5 93632 NA 82373 77025 72123 68002 64759 62813 62125 62321 62729 NA 64712 66737 68968 70867 72368 73275 73517 73238 72725
# 3 51 1 10 109773 NA 103304 98764 93892 88909 83202 77601 72404 68099 65085 NA 61851 61483 61599 62190 63235 64867 66893 68928 70609
# 4 51 1 75 11025 NA 16130 18861 20763 21513 21269 20044 18627 17947 18379 NA 21697 23663 24347 23305 21077 17653 14089 11606 10697
# 5 51 1 80 7397 NA 7705 7835 8231 8973 10813 12820 14631 15893 16509 NA 17861 18103 18455 18986 20717 22534 23935 24343 23592
# 6 51 2 0 61192 NA 55281 55545 56564 57471 60145 61425 61683 61776 62477 NA 64401 64691 64689 64391 63158 61961 60751 59469 58045
This question already has answers here:
How to combine scales for colour and size into one legend?
(2 answers)
Closed 7 years ago.
What is the code to make the two legends into one: A circles legend with color?
I think, a single legend with circles colored according to "size" and "# total number of crimes" is the best way to show the legend.
Desired output:
1) There should be one legend: the circles, instead of black should be colored: 0 circle = "yellow" to 800 circle = "red".
My code:
library(maps)
library(ggmap)
Get map from Google Maps
lima <- get_map(location = "lima", zoom = 11, maptype = c("terrain"))
Plot
ggmap(lima) + geom_point(data = limanov2, aes(x = LONGITUD , y = LATITUD, color = TOTALES,
size = TOTALES)) +
scale_size_continuous(name = "Cantidad\ndelitos",range = c(2,12)) +
scale_color_gradient(name = "Cantidad\ndelitos", low = "yellow", high = "red") +
theme(legend.text= element_text(size=14)) +
ggtitle("TOTAL DELITOS - LIMA NOV 2012") +
theme(plot.title = element_text(size = 12, vjust=2, family="Verdana", face="italic"),
legend.position = 'left')
My data:
structure(list(DISTRITO = c("SAN JUAN DE LURIGANCHO", "CALLAO",
"LOS OLIVOS", "ATE", "LIMA", "SAN MARTIN DE PORRES", "SANTIAGO DE SURCO",
"CHORILLOS", "COMAS", "INDEPENDENCIA", "EL AGUSTINO", "LA VICTORIA",
"SAN JUAN DE MIRAFLORES", "VILLA EL SALVADOR", "SAN MIGUEL",
"CARABAYLLO", "MIRAFLORES", "SAN BORJA", "VENTANILLA", "SURQUILLO",
"BREÑA", "ANCON", "PTE. PIEDRA", "RIMAC", "BARRANCO", "LA MOLINA",
"SAN LUIS", "SANTA ANITA", "LURIGANCHO", "P. LIBRE", "MAGDALENA DEL MAR",
"LA PERLA", "CHACLACAYO", "PUENTE PIEDRA", "SAN ISIDRO", "JESUS MARIA",
"BELLAVISTA", "LINCE", "CARMEN DE LA LEGUA REYNOSO", "CIENEGUILLA",
"SANTA ROSA", "LURIN", "PUNTA NEGRA", "PUCUSANA", "LA PUNTA",
"PUNTA HERMOSA", "PACHACAMAC", "SAN BARTOLO", "SANTA MARIA"),
TOTALES = c(861L, 696L, 696L, 642L, 516L, 479L, 442L, 378L,
371L, 368L, 361L, 333L, 325L, 291L, 282L, 251L, 239L, 196L,
193L, 188L, 185L, 174L, 165L, 161L, 138L, 134L, 128L, 119L,
115L, 105L, 67L, 65L, 63L, 58L, 58L, 56L, 45L, 38L, 23L,
23L, 11L, 8L, 6L, 5L, 3L, 3L, 2L, 0L, 0L), HOMICIDIOS = c(1L,
7L, 0L, 1L, 2L, 0L, 0L, 1L, 7L, 4L, 4L, 4L, 0L, 0L, 0L, 2L,
0L, 0L, 7L, 0L, 0L, 0L, 0L, 4L, 0L, 0L, 2L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), LESIONES = c(100L, 72L, 61L, 43L, 44L, 8L, 10L,
15L, 44L, 40L, 50L, 15L, 52L, 28L, 7L, 33L, 15L, 3L, 21L,
7L, 36L, 33L, 15L, 19L, 14L, 1L, 8L, 6L, 16L, 4L, 4L, 9L,
1L, 12L, 2L, 9L, 5L, 2L, 5L, 7L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), VIO..DE.LA.LIBERTAD.PERSONAL = c(0L, 7L, 6L,
5L, 6L, 1L, 1L, 0L, 3L, 1L, 2L, 0L, 2L, 0L, 1L, 0L, 1L, 0L,
1L, 1L, 0L, 3L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L,
0L, 1L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), VIO..DE.LA.LIBERTAD.SEXUAL = c(56L, 14L, 12L, 15L, 7L,
10L, 2L, 9L, 11L, 13L, 8L, 9L, 7L, 14L, 4L, 15L, 4L, 2L,
17L, 7L, 3L, 4L, 6L, 12L, 2L, 1L, 5L, 3L, 11L, 4L, 1L, 2L,
0L, 6L, 2L, 0L, 3L, 0L, 2L, 2L, 0L, 4L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), HURTO.SIMPLE.Y.AGRAVADO = c(217L, 203L, 296L, 230L,
260L, 167L, 226L, 217L, 130L, 117L, 154L, 133L, 121L, 46L,
163L, 72L, 161L, 119L, 69L, 120L, 64L, 19L, 64L, 21L, 57L,
44L, 39L, 2L, 48L, 60L, 30L, 19L, 48L, 20L, 41L, 25L, 19L,
27L, 7L, 11L, 9L, 0L, 6L, 0L, 2L, 3L, 1L, 0L, 0L), ROBO.SIMPLE.Y.AGRAVADO = c(460L,
289L, 308L, 344L, 186L, 277L, 198L, 130L, 165L, 184L, 137L,
149L, 134L, 188L, 104L, 126L, 58L, 72L, 64L, 51L, 77L, 115L,
79L, 76L, 64L, 88L, 73L, 108L, 40L, 36L, 30L, 32L, 14L, 17L,
12L, 22L, 12L, 8L, 6L, 3L, 1L, 3L, 0L, 2L, 1L, 0L, 1L, 0L,
0L), MICRO.COM.DE.DROGAS = c(26L, 100L, 13L, 3L, 10L, 15L,
5L, 5L, 11L, 8L, 3L, 23L, 9L, 15L, 3L, 3L, 0L, 0L, 8L, 2L,
5L, 0L, 0L, 28L, 0L, 0L, 1L, 0L, 0L, 0L, 2L, 2L, 0L, 2L,
0L, 0L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L
), TENENCIA.ILEGAL.DE.ARMAS = c(1L, 4L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), LONGITUD = c(-77,
-77.12, -77.08, -76.89, -77.04, -77.09, -76.99, -77.01, -77.05,
-77.05, -77, -77.02, -76.97, -76.94, -77.09, -76.99, -77.03,
-77, -77.13, -77.01, -77.05, -77.11, -77.08, -76.7, -77.02,
-76.92, -77, -76.96, -76.86, -77.06, -77.07, -77.12, -76.76,
-77.08, -77.03, -77.05, -77.11, -77.04, -77.09, -76.78, -77.16,
-76.81, -76.73, -76.77, -77.16, -76.76, -76.83, -76.73, -76.77
), LATITUD = c(-11.99, -12.04, -11.95, -12.04, -12.06, -12,
-12.16, -12.2, -11.93, -11.99, -12.04, -12.08, -12.16, -12.23,
-12.08, -11.79, -12.12, -12.1, -11.89, -12.11, -12.06, -11.69,
-11.88, -11.94, -12.15, -12.09, -12.08, -12.04, -11.98, -12.08,
-12.09, -12.07, -11.99, -11.88, -12.1, -12.08, -12.06, -12.09,
-12.04, -12.07, -11.81, -12.24, -12.32, -12.47, -12.07, -12.28,
-12.18, -12.38, -12.42)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -49L), .Names = c("DISTRITO", "TOTALES",
"HOMICIDIOS", "LESIONES", "VIO..DE.LA.LIBERTAD.PERSONAL", "VIO..DE.LA.LIBERTAD.SEXUAL",
"HURTO.SIMPLE.Y.AGRAVADO", "ROBO.SIMPLE.Y.AGRAVADO", "MICRO.COM.DE.DROGAS",
"TENENCIA.ILEGAL.DE.ARMAS", "LONGITUD", "LATITUD"))
I've found a solution. Reading the documention for GGPLOT2 V. 0.9
It is the new function: guide_legend() that should be used inside guides().
This is a function that lets you have more control over legend labels.
This is the end code with the resulting output (See the last line):
ggmap(lima) + geom_point(data = limanov2, aes(x = LONGITUD , y = LATITUD, color = TOTALES,
size = TOTALES)) +
scale_size_continuous(name = "Cantidad\ndelitos",range = c(2,12)) +
scale_color_gradient(name = "Cantidad\ndelitos", low = "yellow", high = "red") +
theme(legend.text= element_text(size=14)) +
ggtitle("TOTAL DELITOS - LIMA NOV 2012") +
theme(plot.title = element_text(size = 12, vjust=2, family="Verdana", face="italic"),
legend.position = 'left') +
guides(colour = guide_legend())
I'm plotting some points over a map with ggmap package.
The problem is that i get the message: "Removed 12 rows containing missing values (geom_point)".
But i don't have any NAs. I've looked the data, and used:
sum(is.na(limanov2)) #Gives 0
to prove it.
This is my code:
library(maps)
library(ggmap)
lima <- get_map(location = "lima", zoom = 11)
ggmap(lima) + geom_point(data = limanov2, aes(x = LONGITUD , y = LATITUD, color = TOTALES,
size = TOTALES)) +
scale_color_gradient(low = "yellow", high = "red")
My data:
structure(list(DISTRITO = c("SAN JUAN DE LURIGANCHO", "CALLAO",
"LOS OLIVOS", "ATE VITARTE", "LIMA CERCADO", "SAN MARTÍN", "SANTIAGO DE SURCO",
"CHORILLOS", "COMAS", "INDEPENDENCIA", "EL AGUSTINO", "LA VICTORIA",
"SAN JUAN DE MIRAFLORES", "VILLA EL SALVADOR", "S. MIGUEL", "CARABAYLLO",
"MIRAFLORES", "PTE. PIEDRA", "SAN BORJA", "VENTANILLA", "SURQUILLO",
"BREÑA", "ANCÓN", "EL RIMAC", "BARRANCO", "LA MOLINA", "SAN LUIS",
"STA. ANITA", "LURIGANCHO", "P. LIBRE", "MAGDALENA", "LA PERLA",
"CHACLACAYO", "SAN ISIDRO", "J. MARÍA", "BELLAVISTA", "LINCE",
"C. DE LA LEGUA", "CIENEGUILLA", "STA.ROSA", "LURÍN", "PTA.NEGRA",
"PUCUSANA", "LA PUNTA", "PTA. HERMOSA", "PACHACAMAC", "SAN BARTOLO",
"SANTA MARÍA"), TOTALES = c(861L, 696L, 696L, 642L, 516L, 479L,
442L, 378L, 371L, 368L, 361L, 333L, 325L, 291L, 282L, 251L, 239L,
223L, 196L, 193L, 188L, 185L, 174L, 161L, 138L, 134L, 128L, 119L,
115L, 105L, 67L, 65L, 63L, 58L, 56L, 45L, 38L, 23L, 23L, 11L,
8L, 6L, 5L, 3L, 3L, 2L, 0L, 0L), HOMICIDIOS = c(1L, 7L, 0L, 1L,
2L, 0L, 0L, 1L, 7L, 4L, 4L, 4L, 0L, 0L, 0L, 2L, 0L, 1L, 0L, 7L,
0L, 0L, 0L, 4L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), LESIONES = c(100L,
72L, 61L, 43L, 44L, 8L, 10L, 15L, 44L, 40L, 50L, 15L, 52L, 28L,
7L, 33L, 15L, 27L, 3L, 21L, 7L, 36L, 33L, 19L, 14L, 1L, 8L, 6L,
16L, 4L, 4L, 9L, 1L, 2L, 9L, 5L, 2L, 5L, 7L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), VIO..DE.LA.LIBERTAD.PERSONAL = c(0L, 7L,
6L, 5L, 6L, 1L, 1L, 0L, 3L, 1L, 2L, 0L, 2L, 0L, 1L, 0L, 1L, 1L,
0L, 1L, 1L, 0L, 3L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), VIO..DE.LA.LIBERTAD.SEXUAL = c(56L,
14L, 12L, 15L, 7L, 10L, 2L, 9L, 11L, 13L, 8L, 9L, 7L, 14L, 4L,
15L, 4L, 12L, 2L, 17L, 7L, 3L, 4L, 12L, 2L, 1L, 5L, 3L, 11L,
4L, 1L, 2L, 0L, 2L, 0L, 3L, 0L, 2L, 2L, 0L, 4L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), HURTO.SIMPLE.Y.AGRAVADO = c(217L, 203L, 296L, 230L,
260L, 167L, 226L, 217L, 130L, 117L, 154L, 133L, 121L, 46L, 163L,
72L, 161L, 84L, 119L, 69L, 120L, 64L, 19L, 21L, 57L, 44L, 39L,
2L, 48L, 60L, 30L, 19L, 48L, 41L, 25L, 19L, 27L, 7L, 11L, 9L,
0L, 6L, 0L, 2L, 3L, 1L, 0L, 0L), ROBO.SIMPLE.Y.AGRAVADO = c(460L,
289L, 308L, 344L, 186L, 277L, 198L, 130L, 165L, 184L, 137L, 149L,
134L, 188L, 104L, 126L, 58L, 96L, 72L, 64L, 51L, 77L, 115L, 76L,
64L, 88L, 73L, 108L, 40L, 36L, 30L, 32L, 14L, 12L, 22L, 12L,
8L, 6L, 3L, 1L, 3L, 0L, 2L, 1L, 0L, 1L, 0L, 0L), MICRO.COM.DE.DROGAS = c(26L,
100L, 13L, 3L, 10L, 15L, 5L, 5L, 11L, 8L, 3L, 23L, 9L, 15L, 3L,
3L, 0L, 2L, 0L, 8L, 2L, 5L, 0L, 28L, 0L, 0L, 1L, 0L, 0L, 0L,
2L, 2L, 0L, 0L, 0L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L,
0L, 0L), TENENCIA.ILEGAL.DE.ARMAS = c(1L, 4L, 0L, 1L, 1L, 1L,
0L, 1L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), LONGITUD = c(-77, -77.12,
-77.08, -76.89, -77.04, -77.09, -76.99, -77.01, -77.05, -77.05,
-77, -77.02, -76.97, -76.94, -77.09, -76.99, -77.03, -77.08,
-77, -77.13, -77.01, -77.05, -77.11, -76.7, -77.02, -76.92, -77,
-76.96, -76.86, -77.06, -77.07, -77.12, -76.76, -77.03, -77.05,
-77.11, -77.04, -77.09, -76.78, -77.16, -76.81, -76.73, -76.77,
-77.16, -76.76, -76.83, -76.73, -76.77), LATITUD = c(-11.99,
-12.04, -11.97, -12.04, -12.06, -12, -12.16, -12.2, -11.93, -11.99,
-12.04, -12.08, -12.16, -12.23, -12.08, -11.79, -12.12, -11.88,
-12.1, -11.89, -12.11, -12.06, -11.69, -11.94, -12.15, -12.09,
-12.08, -12.04, -11.98, -12.08, -12.09, -12.07, -11.99, -12.1,
-12.08, -12.06, -12.09, -12.04, -12.07, -11.81, -12.24, -12.32,
-12.47, -12.07, -12.28, -12.18, -12.38, -12.42)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -48L), .Names = c("DISTRITO",
"TOTALES", "HOMICIDIOS", "LESIONES", "VIO..DE.LA.LIBERTAD.PERSONAL",
"VIO..DE.LA.LIBERTAD.SEXUAL", "HURTO.SIMPLE.Y.AGRAVADO", "ROBO.SIMPLE.Y.AGRAVADO",
"MICRO.COM.DE.DROGAS", "TENENCIA.ILEGAL.DE.ARMAS", "LONGITUD",
"LATITUD"))
You have values outside of the base map zoom range... try changing your zoom parameter.
library(maps)
library(ggmap)
lima <- get_map(location = "lima", zoom = 10)
ggmap(lima) +
geom_point(data = limanov2,
aes(x = LONGITUD , y = LATITUD,
color = TOTALES, size = TOTALES)) +
scale_color_gradient(low = "yellow", high = "red")