So I have R program, and am struggling with getting all points in map
library(ggmap)
library(ggplot2)
setwd("d:/GIS/")
sep <- read.csv("SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.12", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
# create a new grouping variable
Percent_SEP12_Assets <- ifelse(sep[,8] >= 50, "Over 50", "Under 50")
# get the map
map <- get_map("Kissena Park, Queens", zoom = 13, maptype = 'roadmap')
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
And here is output map
I wish to zoom in enough without cutting out data points, but no matter location I pick on map, the data keeps getting cut, i.e. Removed 2 rows containing missing values (geom_point).
Is there a way to set boundaries based on the extremities of latitude and longitude? The csv I import at
sep <- read.csv("SEP_assets_csv.csv")
Has list of latitude and longitude.
Help!
Coordinates
Latitude Longitude
40.758365 -73.824407
40.774168 -73.818543
40.761748 -73.811379
40.765602 -73.828293
40.751762 -73.81778
40.764834 -73.789712
40.777951 -73.842932
40.76501 -73.794319
40.785959 -73.817349
40.755764 -73.799256
40.745593 -73.829283
40.789929 -73.839501
40.760072 -73.783908
40.726437 -73.807592
40.741093 -73.808757
40.720926 -73.823358
40.729642 -73.81781
40.724191 -73.80937
40.782346 -73.77844
40.778164 -73.799841
40.775122 -73.8185
40.760344 -73.817909
40.792326 -73.809516
40.78322 -73.806977
40.73106 -73.805449
40.736521 -73.813001
40.783714 -73.795027
40.770194 -73.82762
40.735855 -73.823583
40.74943 -73.82141
40.769753 -73.832001
40.754465 -73.826204
40.738775 -73.823892
40.764868 -73.826819
40.738332 -73.82028
40.735017 -73.821339
40.72535 -73.811325
40.721466 -73.820401
dput
> dput(sep)
structure(list(School = structure(1:38, .Label = c("Queens\\25Q020",
"Queens\\25Q021", "Queens\\25Q022", "Queens\\25Q023", "Queens\\25Q024",
"Queens\\25Q025", "Queens\\25Q029", "Queens\\25Q032", "Queens\\25Q079",
"Queens\\25Q107", "Queens\\25Q120", "Queens\\25Q129", "Queens\\25Q130",
"Queens\\25Q154", "Queens\\25Q163", "Queens\\25Q164", "Queens\\25Q165",
"Queens\\25Q168", "Queens\\25Q169", "Queens\\25Q184", "Queens\\25Q185",
"Queens\\25Q189", "Queens\\25Q193", "Queens\\25Q194", "Queens\\25Q200",
"Queens\\25Q201", "Queens\\25Q209", "Queens\\25Q214", "Queens\\25Q219",
"Queens\\25Q237", "Queens\\25Q242", "Queens\\25Q244", "Queens\\25Q425",
"Queens\\25Q460", "Queens\\25Q499", "Queens\\25Q515", "Queens\\25Q707",
"Queens\\25Q792"), class = "factor"), Latitude = c(40.758365,
40.774168, 40.761748, 40.765602, 40.751762, 40.764834, 40.777951,
40.76501, 40.785959, 40.755764, 40.745593, 40.789929, 40.760072,
40.726437, 40.741093, 40.720926, 40.729642, 40.724191, 40.782346,
40.778164, 40.775122, 40.760344, 40.792326, 40.78322, 40.73106,
40.736521, 40.783714, 40.770194, 40.735855, 40.74943, 40.769753,
40.754465, 40.738775, 40.764868, 40.738332, 40.735017, 40.72535,
40.721466), Longitude = c(-73.824407, -73.818543, -73.811379,
-73.828293, -73.81778, -73.789712, -73.842932, -73.794319, -73.817349,
-73.799256, -73.829283, -73.839501, -73.783908, -73.807592, -73.808757,
-73.823358, -73.81781, -73.80937, -73.77844, -73.799841, -73.8185,
-73.817909, -73.809516, -73.806977, -73.805449, -73.813001, -73.795027,
-73.82762, -73.823583, -73.82141, -73.832001, -73.826204, -73.823892,
-73.826819, -73.82028, -73.821339, -73.811325, -73.820401), Windows.SEP.11 = c(48L,
154L, 11L, 62L, 20L, 72L, 9L, 37L, 8L, 22L, 9L, 47L, 44L, 99L,
78L, 91L, 42L, 122L, 55L, 14L, 162L, 108L, 89L, 87L, 23L, 14L,
75L, 74L, 141L, 73L, 43L, 14L, 534L, 189L, 128L, 10L, 79L, 38L
), Mac.SEP.11 = c(49L, 0L, 180L, 2L, 202L, 116L, 41L, 1L, 17L,
22L, 33L, 43L, 1L, 28L, 2L, 0L, 238L, 13L, 76L, 55L, 76L, 42L,
0L, 1L, 12L, 0L, 16L, 10L, 1L, 7L, 0L, 1L, 1L, 67L, 16L, 7L,
31L, 24L), Windows.SEP.12 = c(52L, 252L, 1L, 2L, 12L, 45L, 108L,
15L, 14L, 4L, 19L, 21L, 46L, 90L, 10L, 86L, 15L, 76L, 122L, 2L,
9L, 52L, 39L, 120L, 43L, 17L, 9L, 54L, 19L, 199L, 40L, 25L, 64L,
164L, 14L, 27L, 45L, 2L), Mac.SEP.12 = c(73L, 2L, 91L, 53L, 288L,
6L, 2L, 107L, 109L, 97L, 41L, 18L, 12L, 16L, 2L, 2L, 270L, 32L,
45L, 92L, 54L, 190L, 1L, 4L, 19L, 53L, 1L, 10L, 0L, 61L, 50L,
27L, 27L, 25L, 3L, 1L, 43L, 0L), newCol = c(56.3063063063063,
62.2549019607843, 32.5088339222615, 46.218487394958, 57.4712643678161,
21.3389121338912, 68.75, 76.25, 83.1081081081081, 69.6551724137931,
58.8235294117647, 30.2325581395349, 56.3106796116505, 45.4935622317597,
13.0434782608696, 49.1620111731844, 50.4424778761062, 44.4444444444444,
56.0402684563758, 57.6687116564417, 20.9302325581395, 61.734693877551,
31.0077519379845, 58.4905660377358, 63.9175257731959, 83.3333333333333,
9.9009900990099, 43.2432432432432, 11.8012422360248, 76.4705882352941,
67.6691729323308, 77.6119402985075, 14.5367412140575, 42.4719101123596,
10.5590062111801, 62.2222222222222, 44.4444444444444, 3.125)), .Names = c("School",
"Latitude", "Longitude", "Windows.SEP.11", "Mac.SEP.11", "Windows.SEP.12",
"Mac.SEP.12", "newCol"), row.names = c(NA, -38L), class = "data.frame")
You haven't provided us with any of the data, so I'm going to give an example using a dataset in the historydata package. Instead of getting a map based on a location and a zoom, you can get a map based on the bounding box of the latitudes and longitudes in your dataset.
library(historydata)
library(ggmap)
data("catholic_dioceses")
bbox <- make_bbox(catholic_dioceses$long, catholic_dioceses$lat, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=catholic_dioceses, aes(x = long, y = lat))
Note that the f = argument to make_bbox() lets you control how much padding there is around your map.
In your case, I think this will work:
library(ggmap)
bbox <- make_bbox(sep$Longitude, sep$Latitude, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude,
color = Percent_SEP12_Assets),
size = 9, alpha = 0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
I have a data.frame where the first 13 rows contain site/observation information. Each column represents 1 individual, however most individuals have an A and B observation (although some only have A while a few have an A, B, and C observation). I'd like to average each row for every individual, and create a new data.frame from this information.
Example (small subset with row 1, row 7, row 13, and row 56-61):
OriginalID Tree003A Tree003B Tree008B Tree013A
1 Township LY LY LY LY
7 COFECHA ID LY1A003A LY1A003B LY1A008B LY1A013A
13 PathLength 37.5455 54.8963 57.9732 64.0679
56 2006 1.538 1.915 0.827 2.722
57 2007 1.357 1.923 0.854 2.224
58 2008 1.311 2.204 0.669 2.515
59 2009 0.702 1.125 0.382 2.413
60 2010 0.937 1.556 0.907 2.315
61 2011 0.942 1.268 1.514 1.858
I'd like to create a new data.frame that averages each individual's annual observations, whether they have an A, A and B, or A B and C observation. Individual's IDs are in Row 7 (COFECHA ID):
Intended Output:
OriginalID Tree003avg Tree008avg Tree013avg
1 Township LY LY LY
7 COFECHA ID LY1A003avg LY1A008avg LY1A013avg
13 PathLength 46.2209 57.9732 64.0679
56 2006 1.727 0.827 2.722
57 2007 1.640 0.854 2.224
58 2008 1.758 0.669 2.515
59 2009 0.914 0.382 2.413
60 2010 1.247 0.907 2.315
61 2011 1.105 1.514 1.858
Any ideas on how to average the columns would be great. I've been trying to modify the following code, but due to the 13 rows of additional information at the top of the data.frame, I didn't know how to specify to only average rows 14:61.
rowMeans(subset(LY011B, select = c("LY1A003A", "LY1A003B")), na.rm=TRUE)
The code for a larger set of the data that I'm working with is:
> dput(LY011B)
structure(list(OriginalTreeID = structure(c(58L, 53L, 57L, 59L,
51L, 61L, 50L, 55L, 56L, 60L, 54L, 49L, 52L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L), .Label = c("1964", "1965", "1966", "1967",
"1968", "1969", "1970", "1971", "1972", "1973", "1974", "1975",
"1976", "1977", "1978", "1979", "1980", "1981", "1982", "1983",
"1984", "1985", "1986", "1987", "1988", "1989", "1990", "1991",
"1992", "1993", "1994", "1995", "1996", "1997", "1998", "1999",
"2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007",
"2008", "2009", "2010", "2011", "AnalysisDateTime", "COFECHA ID",
"CoreLetter", "PathLength", "Plot#", "RingCount", "SiteID", "SP",
"Subplot#", "Township", "Tree#", "YearLastRing", "YearLastWhiteWood"
), class = "factor"), Tree003A = structure(c(35L, 8L, 34L, 7L,
34L, 21L, 36L, 31L, 37L, 30L, 32L, 29L, 33L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 23L, 22L, 25L, 28L, 27L, 24L, 26L, 20L, 16L,
15L, 6L, 18L, 12L, 10L, 3L, 9L, 11L, 19L, 17L, 14L, 13L, 2L,
4L, 5L), .Label = c("", "0.702", "0.803", "0.937", "0.942", "0.961",
"003", "1", "1.09", "1.116", "1.124", "1.224", "1.311", "1.357",
"1.471", "1.509", "1.538", "1.649", "1.679", "1.782", "1999",
"2.084", "2.148", "2.162", "2.214", "2.313", "2.429", "2.848",
"2/19/2014 11:06", "2011", "23017323011sp1", "24", "37.5455",
"A", "LY", "LY1A003A", "sp1"), class = "factor"), Tree003B = structure(c(56L,
19L, 54L, 18L, 55L, 49L, 57L, 51L, 58L, 50L, 52L, 48L, 53L, 1L,
1L, 1L, 1L, 10L, 7L, 8L, 6L, 5L, 4L, 3L, 2L, 11L, 9L, 30L, 15L,
24L, 20L, 23L, 33L, 37L, 42L, 13L, 44L, 36L, 12L, 16L, 21L, 27L,
35L, 41L, 38L, 26L, 40L, 14L, 46L, 32L, 28L, 17L, 31L, 22L, 39L,
43L, 45L, 47L, 25L, 34L, 29L), .Label = c("", "0.073", "0.092",
"0.173", "0.174", "0.358", "0.413", "0.425", "0.58", "0.697",
"0.719", "0.843", "0.883", "0.896", "0.937", "0.941", "0.964",
"003", "1", "1.048", "1.067", "1.075", "1.097", "1.119", "1.125",
"1.176", "1.207", "1.267", "1.268", "1.27", "1.297", "1.402",
"1.429", "1.556", "1.662", "1.693", "1.704", "1.735", "1.76",
"1.792", "1.816", "1.881", "1.915", "1.92", "1.923", "2.155",
"2.204", "2/19/2014 11:06", "2000", "2011", "23017323011sp1",
"48", "54.8963", "A", "B", "LY", "LY1A003B", "sp1"), class = "factor"),
Tree008B = structure(c(59L, 24L, 57L, 23L, 58L, 52L, 60L,
54L, 61L, 53L, 55L, 51L, 56L, 19L, 14L, 13L, 22L, 7L, 8L,
9L, 4L, 6L, 3L, 1L, 2L, 10L, 25L, 47L, 43L, 49L, 46L, 40L,
50L, 48L, 44L, 17L, 36L, 31L, 27L, 30L, 39L, 37L, 34L, 45L,
38L, 32L, 41L, 29L, 42L, 33L, 28L, 26L, 21L, 11L, 15L, 16L,
18L, 12L, 5L, 20L, 35L), .Label = c("0.302", "0.31", "0.318",
"0.357", "0.382", "0.412", "0.452", "0.476", "0.5", "0.539",
"0.591", "0.669", "0.673", "0.787", "0.79", "0.827", "0.835",
"0.854", "0.879", "0.907", "0.917", "0.967", "008", "1",
"1.027", "1.037", "1.141", "1.152", "1.172", "1.263", "1.383",
"1.411", "1.446", "1.498", "1.514", "1.611", "1.671", "1.685",
"1.695", "1.719", "1.783", "1.879", "1.884", "1.927", "1.97",
"2.019", "2.069", "2.35", "2.696", "2.979", "2/19/2014 11:06",
"2000", "2011", "23017323011sp1", "48", "57.9732", "A", "B",
"LY", "LY1A008B", "sp1"), class = "factor"), Tree013A = structure(c(45L,
6L, 44L, 5L, 44L, 38L, 46L, 40L, 47L, 39L, 42L, 37L, 43L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 10L,
13L, 8L, 22L, 14L, 18L, 24L, 4L, 11L, 25L, 7L, 36L, 41L,
33L, 29L, 17L, 28L, 23L, 21L, 16L, 26L, 15L, 3L, 20L, 12L,
2L, 9L, 34L, 35L, 27L, 32L, 31L, 30L, 19L), .Label = c("",
"0.608", "0.916", "0.945", "013", "1", "1.125", "1.18", "1.388",
"1.423", "1.493", "1.498", "1.554", "1.579", "1.619", "1.629",
"1.719", "1.756", "1.858", "1.867", "1.869", "1.876", "1.9",
"1.916", "2.023", "2.089", "2.224", "2.246", "2.247", "2.315",
"2.413", "2.515", "2.547", "2.645", "2.722", "2.785", "2/19/2014 11:11",
"2002", "2011", "23017323011sp1", "3.375", "34", "64.0679",
"A", "LY", "LY1A013A", "sp1"), class = "factor")), .Names = c("OriginalTreeID",
"Tree003A", "Tree003B", "Tree008B", "Tree013A"), row.names = c(NA,
61L), class = "data.frame")
Here is another approach where most of the work is done
by rearranging the data with the reshape package.
After the data is "munged", it can be rearranged into almost anything
you want with the cast function.
# I'm used to the transpose
y = t(x)
# Make the first row the column names
# Also get rid of hashes. They make things difficult
library(stringr)
colnames(y) = str_replace( y[1,], "#", "" )
y = data.frame(y[-1,],check.names=FALSE)
# reshape the data by defining the "ID" variables
library(reshape)
z = melt(y,id.vars=c("Township","Plot","Subplot","Tree",
"CoreLetter","COFECHA ID","SiteID","SP","AnalysisDateTime"))
z$value = as.numeric(as.character(z$value))
# Now you can do lots of things!
# All the info you wanted is there, but it's in a different format
# than your "intended output"
cast( z, Tree ~ variable, mean, na.rm=TRUE )
# To get to your "intended output"
out = cast( z, Township + Plot + Subplot + Tree ~ variable, mean, na.rm=TRUE )
out[["COFECHA ID"]] = with(out,paste0(Township,Plot,Subplot,Tree,"avg"))
out2 = out[,c(1,ncol(out),8:(ncol(out)-1))]
out3 = cbind(colnames(out2),t(out2))
colnames(out3) = c("OriginalID",paste0("Tree",out$Tree,"avg"))
# For kicks, here are some other things. Have fun!
cast(z, Tree ~ variable, median, na.rm=TRUE ) # the median instead of the mean
cast(z, Tree + CoreLetter ~ variable ) # back to your original data
cast(z, CoreLetter ~ variable, length ) # How many measurements from each core?
cast(z, CoreLetter ~ variable, mean ) # The average across different cores
For even more fun!
library(ggplot2)
d = z[-c(1:16), ] # A not so pretty hack
colnames(d)[10] = "Year"
d$Year = as.integer(as.character(d$Year))
ggplot(d, aes(x=Year, y=value, group=Tree, color=Tree, shape=CoreLetter)) +
geom_point() + geom_smooth(method="loess",span=0.3)
Does this mean that early 2000's were dry?
try this.....
d.f <- your data structure...above
subset the data
d.f <- d[-(1:13), -1]
c.n <- colnames(d.f)
build the grouping var
f <- gsub(".?$", "", c.n)
f <- d[4, 2:ncol(d)]
split the dataframeinto sub-dataframes
d.f <- apply(d.f, 2, as.numeric)
d.f[is.na(d.f)] <- 0
d.f.g <- as.data.frame(t(d.f))
a <- split(d.f.g, f)
calculate the groupwise averages as colMeans (because transposed)
grp.means <- lapply(a, colMeans)
the grp.means is a list of dataframes each containing the date averages for each grp. re-form this as required, you'll probably want to transpose again.