I created two sets of vectors to plot two sets of data on a map.
Everytime I run, R Studio crashes.
What am I missing?
library(ggmap)
setwd("d:/GIS/31R")
sep <- read.csv("California_SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.11", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
library(sp)
lst <- split(sep, sep[,8] >= 50)
under50 <- lst[[1]]
over50 <- lst[[2]]
coords <- cbind(Longitude = as.numeric(as.character(under50$Longitude)),Latitude=as.numeric(as.character(under50$Latitude)))
coords2 <- cbind(Longitude2 = as.numeric(as.character(over50$Longitude)),Latitude2=as.numeric(as.character(over50$Latitude)))
map <- qmap('Yorba Linda', zoom = 11, maptype = 'hybrid')
map + geom_point(data=under50, aes(x = Longitude, y = Latitude), color="red", size = 5, alpha = 0.5) + geom_point(data=over50, aes(x = Longitude2, y = Latitude2), color="green", size = 5, alpha = 0.5)
Original Code
My original code plotted all points
library(ggmap)
setwd("d:/GIS/31R")
sep <- read.csv("California_SEP_assets_csv.csv")
library(sp)
coords <- cbind(Longitude = as.numeric(as.character(sep$Longitude)),Latitude=as.numeric(as.character(sep$Latitude)))
sep.pts <- SpatialPointsDataFrame(coords,sep[,-(2:3)],proj4string = CRS("+init=epsg:4326"))
plot(sep.pts, pch=".",col="darkred")
map <- qmap('Yorba Linda', zoom = 11, maptype = 'hybrid')
map + geom_point(data=sep, aes(x = Longitude, y = Latitude), color="red", size = 5, alpha = 0.5)
Gave this
I am able to plot points standalone, i.e.
library(ggmap)
setwd("d:/GIS/31R")
sep <- read.csv("California_SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.11", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
library(sp)
lst <- split(sep, sep[,8] >= 50)
under50 <- lst[[1]]
over50 <- lst[[2]]
coords <- cbind(Longitude = as.numeric(as.character(under50$Longitude)),Latitude=as.numeric(as.character(under50$Latitude)))
under50.pts <- SpatialPointsDataFrame(coords, under50[, -(2:3)], proj4string = CRS("+init=epsg:4326"))
coords2 <- cbind(Longitude2 = as.numeric(as.character(over50$Longitude)),Latitude2=as.numeric(as.character(over50$Latitude)))
over50.pts <- SpatialPointsDataFrame(coords2, over50[, -(2:3)], proj4string = CRS("+init=epsg:4326"))
plot(over50.pts, pch = 22, col = "darkgreen")
and I replace the last line, plot(...
with
plot(under50.pts, pch = 22, col = "darkred")
If think you are making things more complicated than needs to be. If you want to color the points to a certain grouping variable, just create such a variable. Based on the data you posted in this question, you can do this as follows:
library(ggmap)
library(ggplot2)
# create a new grouping variable
sep$newvar <- ifelse(sep[,8] >= 50, "Over 50", "Under 50")
# get the map
map <- get_map('Yorba Linda', zoom = 11, maptype = 'hybrid')
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=newvar), size=7, alpha=0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
this gives:
Used data:
sep <- structure(list(Site = structure(1:6, .Label = c("31R001", "31R002", "31R003", "31R004", "31R005", "31R006"), class = "factor"),
Latitude = c(33.808874, 33.877256, 33.820825, 33.852373, 33.829697, 33.810274),
Longitude = c(-117.844048, -117.700135, -117.811845, -117.795516, -117.787532, -117.830429),
Windows.SEP.11 = c(63L, 174L, 11L, 85L, 163L, 71L),
Mac.SEP.11 = c(0L, 1L, 4L, 0L, 0L, 50L),
Windows.SEP.12 = c(124L, 185L, 9L, 75L, 23L, 5L),
Mac.SEP.12 = c(0L, 1L, 32L, 1L, 0L, 50L),
newCol = c(33.6898395721925, 48.4764542936288, 26.7857142857143, 52.7950310559006, 87.6344086021505, 68.75),
newvar = c("Under 50", "Under 50", "Under 50", "Over 50", "Over 50", "Over 50")),
.Names = c("Site", "Latitude", "Longitude", "Windows.SEP.11", "Mac.SEP.11", "Windows.SEP.12", "Mac.SEP.12","newCol", "newvar"),
row.names = c(NA, 6L), class = "data.frame")
I fixed the code. However, if you can post more elegant code and explain it, I will mark as solution.
library(ggmap)
setwd("d:/GIS/31R")
sep <- read.csv("California_SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.11", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
library(sp)
lst <- split(sep, sep[,8] >= 50)
under50 <- lst[[1]]
over50 <- lst[[2]]
coords <- cbind(Longitude = as.numeric(as.character(under50$Longitude)),Latitude=as.numeric(as.character(under50$Latitude)))
under50.pts <- SpatialPointsDataFrame(coords, under50[, -(2:3)], proj4string = CRS("+init=epsg:4326"))
coords2 <- cbind(Longitude = as.numeric(as.character(over50$Longitude)),Latitude=as.numeric(as.character(over50$Latitude)))
over50.pts <- SpatialPointsDataFrame(coords2, over50[, -(2:3)], proj4string = CRS("+init=epsg:4326"))
map <- qmap('Yorba Linda', zoom = 11, maptype = 'hybrid')
map + geom_point(data=over50, aes(x = Longitude, y = Latitude), color="green", size = 5, alpha = 0.5) + geom_point(data=under50, aes(x = Longitude, y = Latitude), color="red", size = 5, alpha = 0.5)
Format of the .csv file
Site Latitude Longitude Windows.SEP.11 Mac.SEP.11 Windows.SEP.12 Mac.SEP.12 newCol
1 31R001 33.80887 -117.8440 63 0 124 0 33.68984
2 31R002 33.87726 -117.7001 174 1 185 1 48.47645
3 31R003 33.82082 -117.8118 11 4 9 32 26.78571
4 31R004 33.85237 -117.7955 85 0 75 1 52.79503
5 31R005 33.82970 -117.7875 163 0 23 0 87.63441
6 31R006 33.81027 -117.8304 71 50 5 50 68.75000
Related
I am trying to put on a Italian geographical map a dot reporting the provenience ('provincia') of our patients. Ideally, the dot size should be proportional to the number of patients coming from that 'provincia'. An example of the list I would like to plot is the following.
MI 8319
CO 537
MB 436
VA 338
BG 310
PV 254
CR 244
NO 210
RM 189
CS 179
In the first column there is the 'provincia' code: MI (Milano), CO (Como), MB (Monza-Brianza), etc. In the second column there is the number of patients from that 'provincia'. So the output should be an Italian political map where the biggest dot is around the city of Milano (MI), the second biggest dot is near the city of Como (CO), the third one is around the city of Monza-Brianza (MB),etc.
Is there any package that could do the plot I am looking for? I found a tool that could do the job here, but apparently they expect that I load the geographical coordinates in order to do the plot.
https://www.littlemissdata.com/blog/maps
Thanks in advance.
Here is one way to handle your task. You have the abbreviations for Italian province. You want to use them to merge your data with polygon data. If you download Italy's polygons from GADM, you can obtain data that contain the abbreviations. Specifically, the column, HASC_2 is the one. You need to merge your data with the polygon data. Then, you want to create another data set which contains centroid. You can draw a map with the two data sets.
library(tidyverse)
library(sf)
library(ggthemes)
# Get the sf file from https://gadm.org/download_country_v3.html
# and import it in R.
mysf <- readRDS("gadm36_ITA_2_sf.rds")
# This is your data, which is called mydata.
mydata <- structure(list(abbs = c("MI", "CO", "MB", "VA", "BG", "PV", "CR",
"NO", "RM", "CS"), value = c(8319L, 537L, 436L, 338L, 310L, 254L,
244L, 210L, 189L, 179L)), class = "data.frame", row.names = c(NA,
-10L))
abbs value
1 MI 8319
2 CO 537
3 MB 436
4 VA 338
5 BG 310
6 PV 254
7 CR 244
8 NO 210
9 RM 189
10 CS 179
# Abbreviations are in HASC_2 in mysf. Manipulate strings so that
# I can join mydata with mysf with the abbreviations. I also get
# longitude and latitude with st_centroid(). This data set is for
# geom_point().
mysf2 <- mutate(mysf, HASC_2 = sub(x = HASC_2, pattern = "^IT.", replacement = "")) %>%
left_join(mydata, by = c("HASC_2" = "abbs")) %>%
mutate(lon = map_dbl(geometry, ~st_centroid(.x)[[1]]),
lat = map_dbl(geometry, ~st_centroid(.x)[[2]]))
# Draw a map
ggplot() +
geom_sf(data = mysf) +
geom_point(data = mysf2, aes(x = lon, y = lat, size = value)) +
theme_map()
UPDATE ON INSET MAP
This is an update following different suggestion on using inset maps, which I think it would be the best solution for yout question and comments:
library(sf)
library(cartography)
EU = st_read("~/R/mapslib/EUROSTAT/NUTS_RG_03M_2016_3035_LEVL_3.geojson")
IT = subset(EU, CNTR_CODE == "IT")
mydata <-
structure(list(
abbs = c("MI", "CO", "MB", "VA", "BG", "PV", "CR",
"NO", "RM", "CS"),
value = c(8319L, 537L, 436L, 338L, 310L, 254L,
244L, 210L, 189L, 179L),
nuts = c("ITC4C","ITC42","ITC4D","ITC41",
"ITC46", "ITC48","ITC4A","ITC15",
"ITI43","ITF61")
),
class = "data.frame",
row.names = c(NA, -10L))
patients = merge(IT, mydata, by.x = "id", by.y = "nuts")
#Get breaks for map
br=getBreaks(patients$value)
#Delimit zone
#Based on NUTS1, Nortwest Italy
par(mar=c(0,0,0,0))
ghostLayer(IT[grep("ITC",IT$NUTS_ID),], bg="lightblue")
plot(st_geometry(EU), col="grey90", add=TRUE)
plot(st_geometry(IT), col = "#FEFEE9", border = "#646464", add=TRUE)
choroLayer(
patients,
var = "value",
breaks = br,
col = carto.pal(pal1 = "red.pal", n1 = length(br)-1),
legend.pos = "topleft",
legend.title.txt = "Total patients",
add = TRUE,
legend.frame = TRUE
)
labelLayer(patients,txt="abbs", halo=TRUE, overlap = FALSE)
#Inset
par(
fig = c(0, 0.4, 0.01, 0.4),
new = TRUE
)
inset=patients[patients$abbs %in% c("RM","CS"),]
ghostLayer(inset, bg="lightblue")
plot(st_geometry(EU), col="grey90", add=TRUE)
plot(st_geometry(IT), col = "#FEFEE9", border = "#646464", add=TRUE)
choroLayer(
patients,
var = "value",
breaks = br,
col = carto.pal(pal1 = "red.pal", n1 = length(br)-1),
legend.pos = "n",
add = TRUE
)
labelLayer(patients,txt="abbs", halo=TRUE, overlap = FALSE)
box(which = "figure", lwd = 1)
#RESTORE PLOT
par(fig=c(0,1,0,1))
OLD ANSWER
Following my comment on plotting labels, maybe with circles is not the best option for your map, given the concentration. I suggest you to use another kind of map for that, as chorolayer, I leveraged on https://stackoverflow.com/users/3304471/jazzurro for the dataframe.
library(sf)
library(cartography)
EU = st_read("~/R/mapslib/EUROSTAT/NUTS_RG_03M_2016_3035_LEVL_3.geojson")
IT = subset(EU, CNTR_CODE == "IT")
mydata <-
structure(list(
abbs = c("MI", "CO", "MB", "VA", "BG", "PV", "CR",
"NO", "RM", "CS"),
value = c(8319L, 537L, 436L, 338L, 310L, 254L,
244L, 210L, 189L, 179L),
nuts = c("ITC4C","ITC42","ITC4D","ITC41",
"ITC46", "ITC48","ITC4A","ITC15",
"ITI43","ITF61")
),
class = "data.frame",
row.names = c(NA, -10L))
patients = merge(IT, mydata, by.x = "id", by.y = "nuts")
#Options1 - With circles
par(mar = c(0, 0, 0, 0))
plot(st_geometry(IT), col = "#FEFEE9", border = "#646464")
propSymbolsLayer(
x = patients,
var = "value",
col = carto.pal(pal1 = "red.pal", n1 = 6),
legend.title.txt = "Total patients",
add = TRUE
)
#Option 2 - Chorolayer with labels
par(mar = c(0, 0, 0, 0))
plot(st_geometry(IT), col = "#FEFEE9", border = "#646464")
choroLayer(
patients,
var = "value",
col = carto.pal(pal1 = "red.pal", n1 = 6),
legend.title.txt = "Total patients",
add = TRUE
)
#Create labels
patients$label = paste(patients$abbs, patients$value, sep = " - ")
labelLayer(
patients,
txt = "label",
overlap = FALSE,
halo = TRUE,
show.lines = TRUE,
)
Data from
https://ec.europa.eu/eurostat/cache/GISCO/distribution/v2/nuts/nuts-2016-files.html
I'm trying to make a hourly dispatch curve with generation and energy consumpsion data, which have the characteristic that when we do a power balance (generation minus consumpsion) we get values nearly to zero.
Into the generation data there are also net interchange values, that be negative when de power system are exporting energy and positive when the system are importing energy to complete the consumption.
Thus, to the plot created with geom_area and geom_line be ok, the black line (consumption) needs be adjusted with the generation area, so that there's no gap between the area and the black line. But, in my attempts I couldn't do it. How you can see, same the energy balence resulting in zero, there are a gap beetwen 19 and 20 hours. I don't know what is wrong. Someone have idea how to do that?
Thanks in advance.
Data to the plot:
generation <-
data.frame('dayHour' = c('18/11/2018 18:00','18/11/2018 19:00','18/11/2018 20:00','18/11/2018 21:00','18/11/2018 18:00','18/11/2018 19:00','18/11/2018 20:00','18/11/2018 21:00','18/11/2018 18:00','18/11/2018 19:00','18/11/2018 20:00','18/11/2018 21:00','18/11/2018 18:00','18/11/2018 19:00','18/11/2018 20:00','18/11/2018 21:00'),
'power' = c(-1364.290, -433.110, 1132.39, 749.48, 463.75, 467.8, 469.35, 436.51, 2025.5, 2133.07, 2306.85, 2304.91, 211.52, 213.16, 214.33, 214.59),
'label' = c('net interchange', 'net interchange', 'net interchange', 'net interchange', 'gas', 'gas', 'gas', 'gas', 'hydro', 'hydro', 'hydro', 'hydro', 'biomass', 'biomass', 'biomass', 'biomass'))
generation$label <- factor(generation$label, levels = c('net interchange', 'gas', 'hydro', 'biomass'))
net.load <-
data.frame('dayHour' = c('18/11/2018 18:00', '18/11/2018 19:00', '18/11/2018 20:00', '18/11/2018 21:00'), 'power' = c(1336.48, 2380.91, 4122.91, 3705.49), 'label' = c('net load', 'net load', 'net load', 'net load'))
generation$dayHour <-
as.POSIXct(strptime(generation$dayHour,format='%d/%m/%Y %H:%M'))
net.load$dayHour <-
as.POSIXct(strptime(net.load$dayHour,format='%d/%m/%Y %H:%M'))
Power balance
pb <-
filter(generation, label == "biomass")$power +
filter(generation, label == "hydro")$power +
filter(generation, label == "gas")$power +
filter(generation, label == "net interchange")$power -
net.load$power
summary(pb)
Dispatch curve
ggplot() +
geom_area(data = generation,
aes(y = power,
x = dayHour,
fill = label)) +
geom_line(data = net.load,
aes(y = power,
x = dayHour,
colour = label),
size = 1.2,
colour = "black") +
labs(fill = "generation",
colour = 'net load')
It looks like position_stack is getting confused when the interpolation crosses the x-axis.
To fix it, you can interpolate manually before plotting (e.g. with approx):
library(tidyverse)
generation <- data.frame(
dayHour = structure(c(1542585600, 1542589200, 1542592800, 1542596400, 1542585600, 1542589200, 1542592800, 1542596400, 1542585600, 1542589200, 1542592800, 1542596400, 1542585600, 1542589200, 1542592800, 1542596400), class = c("POSIXct", "POSIXt"), tzone = ""),
power = c(-1364.29, -433.11, 1132.39, 749.48, 463.75, 467.8, 469.35, 436.51, 2025.5, 2133.07, 2306.85, 2304.91, 211.52, 213.16, 214.33, 214.59),
label = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("net interchange", "gas", "hydro", "biomass"), class = "factor")
)
generation_interpolated <- generation %>%
group_by(label) %>%
summarise(data = list(as_tibble(approx(dayHour, power, n = 501)))) %>%
unnest() %>%
mutate(x = as.POSIXct(x, origin = '1970-01-01', tz = 'UTC'))
net_power_interpolated <- generation_interpolated %>%
group_by(x) %>%
summarise(y = sum(y))
ggplot(generation_interpolated, aes(x, y)) +
geom_area(aes(fill = label)) +
geom_line(data = net_power_interpolated)
To see how approx works, a simpler, ungrouped example:
df <- data.frame(x = c(0, 5, 10), y = c(0, 20, 10))
interpolated <- approx(df$x, df$y, n = 11)
str(interpolated)
#> List of 2
#> $ x: int [1:11] 0 1 2 3 4 5 6 7 8 9 ...
#> $ y: num [1:11] 0 4 8 12 16 20 18 16 14 12 ...
ggplot(as.data.frame(interpolated), aes(x, y)) +
geom_line() +
geom_point() +
geom_point(data = df, color = 'dodgerblue', size = 4)
Aloha all,
I've struggled to build a legend for a mix/match of time series data I'm making. Here is some code:
My understanding is that I need to somehow clean my data and put it all in the same data frame, but all of the time series don't line up very well. Some is at 15 minutes, other one hour. Is there any way to force a legend for these datasets? I don't know what else to post here - since the 5 datasets are quite large.
Plot I'm working on:
q<- ggplot(subset(cr200_Auwai1, timedate>startd & timedate<endd), aes(timedate, Turb_SS)) +
geom_point(color="coral4")+
geom_point(data=subset(dsloi_wl, timedate>startd & timedate<endd), aes(timedate, level), color="blue")+
#geom_point(data=subset(flow_data, mdate>startd & mdate<endd), aes(as.POSIXct(mdate), flow_cfs*1000), color="red")+
geom_point(data=subset(cr300_Wai1, timedate>startd & timedate<endd), aes(timedate, Lvl_m*1000), color="forestgreen", size=1)+ #aquamarine3
geom_point(data=subset(cr300_Wai1, timedate>startd & timedate<endd), aes(timedate, Turb_SS), color="orange")+
#geom_point(data=subset(hihimanu_wl, timedate>startd & timedate<endd), aes(timedate, level), color="azure4", size=0.1)+
#geom_point(data=subset(rain_data, timedate>startd & timedate<endd), aes(timedate, rainmm), color="red",size=5)+
geom_point(data=subset(haptuk_ysi, datetime>startd & datetime<endd), aes(datetime, Turb), color="pink")+
#scale_x_date(breaks=date_breaks("month"), labels = date_format("%b-%y"))+
xlab("Date")+
ylab("Turbidity (NTU) and Water Level (mm)")+
coord_cartesian(ylim=c(0, 1500))+
theme_bw()+
theme(axis.text=element_text(size=14),
axis.title=element_text(size=16,face="bold"),
legend.justification = c(1, 1),
legend.position = c(1, 1),
legend.title=element_text(size=14),
legend.text=element_text(size=12))
Here is a sample of two of the datasets: Note that the times don't line up at all... since I'm mixing sources.
dsloi_wl:
structure(list(ReceptionTime = c(1533895414.1134, 1533895414.1733,
1533895414.19397, 1533895414.20708, 1533895414.22283, 1533895414.23634,
1533895414.25135, 1533895414.26387, 1533895414.27653, 1533895414.29126,
1533896013.68755, 1533896013.7638, 1533896013.79232, 1533896013.80917,
1533896013.82312, 1533896013.83648, 1533896013.84988, 1533896013.8648,
1533896013.87724, 1533896013.8894), d2w = c(776.7, 789.7, 790.2,
777.1, 777.2, 777.7, 778.4, 793.4, 779.6, 794.1, 819.9, 780.7,
794.1, 806.9, 781.9, 781.9, 782.7, 782.8, 783.1, 783.4), timedate = structure(c(1533895414.1134,
1533895414.1733, 1533895414.19397, 1533895414.20708, 1533895414.22283,
1533895414.23634, 1533895414.25135, 1533895414.26387, 1533895414.27653,
1533895414.29126, 1533896013.68755, 1533896013.7638, 1533896013.79232,
1533896013.80917, 1533896013.82312, 1533896013.83648, 1533896013.84988,
1533896013.8648, 1533896013.87724, 1533896013.8894), class = c("POSIXct",
"POSIXt"), tzone = ""), level = c(723.3, 710.3, 709.8, 722.9,
722.8, 722.3, 721.6, 706.6, 720.4, 705.9, 680.1, 719.3, 705.9,
693.1, 718.1, 718.1, 717.3, 717.2, 716.9, 716.6)), .Names = c("ReceptionTime",
"d2w", "timedate", "level"), row.names = c(NA, 20L), class = "data.frame")
CR300_Wai1
structure(list(RECORD = 73027:73046, Temp_C = c(24.62861, 24.62332,
24.61533, 24.60857, 24.60189, 24.59733, 24.59068, 24.58404, 24.57869,
24.57327, 24.56781, 24.5606, 24.55551, 24.55218, 24.54648, 24.5416,
24.5358, 24.5319, 24.52781, 24.52294), Turb_BS = c(94.50522,
88.65939, 109.354, 57.71527, 134.1903, 46.37191, 78.17719, 52.22319,
58.07111, 96.95719, 51.47488, 44.65616, 70.43825, 99.58217, 93.68374,
87.4787, 175.5395, 167.6757, 110.8119, 132.5971), Turb_SS = c(36.63349,
34.31228, 37.02223, 32.97258, 36.68553, 33.82083, 37.43391, 33.43639,
31.17306, 33.6327, 34.69954, 30.99891, 34.69988, 33.64369, 32.54948,
32.1177, 32.86558, 48.97706, 30.65004, 33.71646), Temp_C_2 = c(24.9014,
24.89474, 24.88837, 24.88279, 24.87574, 24.86852, 24.86357, 24.85751,
24.85236, 24.84759, 24.84091, 24.83577, 24.83192, 24.82713, 24.8229,
24.81832, 24.81237, 24.80821, 24.8051, 24.80015), WD_OBS = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), Lvl_m = c(0.6907353, 0.6905226, 0.6896195, 0.6890779,
0.6881586, 0.6878724, 0.6862501, 0.6848835, 0.6844589, 0.6837503,
0.6836612, 0.6831629, 0.6821692, 0.6812283, 0.6799452, 0.6791196,
0.6782504, 0.6772775, 0.6763596, 0.6755115), timedate = structure(c(1533895500,
1533895800, 1533896100, 1533896400, 1533896700, 1533897000, 1533897300,
1533897600, 1533897900, 1533898200, 1533898500, 1533898800, 1533899100,
1533899400, 1533899700, 1533900000, 1533900300, 1533900600, 1533900900,
1533901200), class = c("POSIXct", "POSIXt"), tzone = "")), .Names = c("RECORD",
"Temp_C", "Turb_BS", "Turb_SS", "Temp_C_2", "WD_OBS", "Lvl_m",
"timedate"), row.names = c(NA, 20L), class = "data.frame")
Here is a solution using mock data (next time provide a sample of your data) :
library(tidyverse)
library(lubridate)
#>
#> Attachement du package : 'lubridate'
#> The following object is masked from 'package:base':
#>
#> date
# mock data
time_15m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "15 min")
time_30m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "30 min")
time_60m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "60 min")
data_1 <- data.frame(time = time_15m,
var_1 = cos(hour(time_15m) + minute(time_15m)))
data_2 <- data.frame(time = time_30m,
var_2 = sin(hour(time_30m) + minute(time_30m)))
data_3 <- data.frame(time = time_60m,
var_3 = cos(1 - hour(time_60m) + minute(time_60m)))
# the kind of plot you have (prefer the 2nd version)
ggplot(data_1, aes(x = time, y = var_1)) +
geom_point(color = "red") +
geom_point(data = data_2, aes(time, var_2), color = "green") +
geom_point(data = data_3, aes(time, var_3), color = "blue") +
theme_bw()
# a version with long format data and use of gather function
data_1 %>%
left_join(data_2) %>% # join data from data_2 (timestep = 30m), missing data is NA
left_join(data_3) %>% # join data from data_3 (timestep = 60m), missing data is NA
gather(variable_name, variable_value, var_1, var_2, var_3) %>% # gather var_1, var_2 and var_3 in a single column
ggplot(., aes(x = time, y = variable_value, color = variable_name)) +
theme_bw() +
geom_point(size = 2)
#> Joining, by = "time"
#> Joining, by = "time"
#> Warning: Removed 120 rows containing missing values (geom_point).
Created on 2018-08-22 by the reprex package (v0.2.0).
EDIT 1 (include provided datasets)
library(tidyverse)
dsloi_wl %>%
full_join(cr300_Wai1) %>%
mutate(Lvl_m = 100 * Lvl_m) %>%
gather(variable_name, variable_value, level, Lvl_m, Turb_SS) %>%
ggplot(., aes(x = timedate, y = variable_value, color = variable_name)) +
geom_point() +
scale_color_manual("Legend title",
values = c("level" = "blue",
"Lvl_m" = "forestgreen",
"Turb_SS" = "orange"))
#> Joining, by = "timedate"
#> Warning: Removed 60 rows containing missing values (geom_point).
Created on 2018-08-23 by the reprex package (v0.2.0).
My ultimate goal is to connect all nearest neighbours of a set of buildings (based on Euclidean distance) on a ggmap using geom_path from the ggplot2 package. I need help with a loop that will allow me to plot all neighbours as easily as possible
I have created a distance matrix (called 'kmnew') in kilometres between 3 types of building in Beijing: B (x2), D (x2) and L (x1):
B B D D L
B NA 6.599014 5.758531 6.285787 3.770175
B NA NA 7.141096 3.873296 5.092667
D NA NA NA 3.690725 2.563017
D NA NA NA NA 2.832083
L NA NA NA NA NA
I try to discern the nearest neighbours of each building by row by declaring a matrix and using a loop to ascertain the nearest neighbour building:
nn <- matrix(NA,nrow=5,ncol=1)
for (i in 1:nrow(kmnew)){
nn[i,] <- which.min(kmnew[i,])
}
This returns the following error (not sure why):
Error in nn[i, ] <- which.min(kmnew[i, ]) : replacement has length zero
but seems to return the correct answer to nn:
[,1]
[1,] 5
[2,] 4
[3,] 5
[4,] 5
[5,] NA
I append this to an original dataframe called newbjdata:
colbj <- cbind(newbjdata,nn)
that returns
Name Store sqft long lat nn
1 B 1 1200 116.4579 39.93921 5
2 B 2 750 116.3811 39.93312 4
3 D 1 550 116.4417 39.88882 5
4 D 2 600 116.4022 39.90222 5
5 L 1 1000 116.4333 39.91100 NA
I then retrieve my map via ggmap:
bjgmap <- get_map(location = c(lon = 116.407395,lat = 39.904211),
zoom = 13, scale = "auto",
maptype = "roadmap",
messaging = FALSE, urlonly = FALSE,
filename = "ggmaptemp", crop = TRUE,
color = "bw",
source = "google", api_key)
My ultimate goal is to map the nearest neighbours together in a plot using geom_path from the ggplot package.
For example, the nn of the 1st building of type B (row 1) is the 1 building of type L (row 5). Obviously I can draw this line by subsetting the said 2 rows of the dataframe thus:
ggmap(bjgmap) +
geom_point(data = colbj, aes(x = long,y = lat, fill = factor(Name)),
size =10, pch = 21, col = "white") +
geom_path(data = subset(colbj[c(1,5),]), aes(x = long,y = lat),col = "black")
However, I need a solution that works like a loop, and I can't figure out how one might achieve this, as I need to reference the nn column and refer that back to the long lat data n times. I can well believe that I am not using the most efficient method, so am open to alternatives. Any help much appreciated.
Here is my attempt. I used gcIntermediate() from the geosphere package to set up lines. First, I needed to rearrange your data. When you use gcIntermediate(), you need departure and arrival long/lat. That is you need four columns. In order to arrange your data in this way, I used the dplyr package. mutate_each(colbj, funs(.[nn]), vars = long:lat) works for you to pick up desired arrival long/lat. . is for 'long' and 'lat'. [nn] is the vector index for the variables. Then, I employed gcIntermediate(). This creates SpatialLines. You need to make the object a SpatialLinesDataFrame. Then, you need to convert the output to "normal" data.frame. This step is essential so that ggplot can read your data. fortify() is doing the job.
library(ggmap)
library(geosphere)
library(dplyr)
library(ggplot2)
### Arrange the data: set up departure and arrival long/lat
mutate_each(colbj, funs(.[nn]), vars = long:lat) %>%
rename(arr_long = vars1, arr_lat = vars2) %>%
filter(complete.cases(nn)) -> mydf
### Get line information
rts <- gcIntermediate(mydf[,c("long", "lat")],
mydf[,c("arr_long", "arr_lat")],
50,
breakAtDateLine = FALSE,
addStartEnd = TRUE,
sp = TRUE)
### Convert the routes to a data frame for ggplot use
rts <- as(rts, "SpatialLinesDataFrame")
rts.df <- fortify(rts)
### Get a map (borrowing the OP's code)
bjgmap <- get_map(location = c(lon = 116.407395,lat = 39.904211),
zoom = 13, scale = "auto",
maptype = "roadmap",
messaging = FALSE, urlonly = FALSE,
filename = "ggmaptemp", crop = TRUE,
color = "bw",
source = "google", api_key)
# Draw the map
ggmap(bjgmap) +
geom_point(data = colbj,aes(x = long, y = lat, fill = factor(Name)),
size = 10,pch = 21, col = "white") +
geom_path(data = rts.df, aes(x = long, y = lat, group = group),
col = "black")
EDIT
If you want to do all data manipulation in one sequence, the following is one way to go. foo is identical to rts.df above.
mutate_each(colbj, funs(.[nn]), vars = long:lat) %>%
rename(arr_long = vars1, arr_lat = vars2) %>%
filter(complete.cases(nn)) %>%
do(fortify(as(gcIntermediate(.[,c("long", "lat")],
.[,c("arr_long", "arr_lat")],
50,
breakAtDateLine = FALSE,
addStartEnd = TRUE,
sp = TRUE), "SpatialLinesDataFrame"))) -> foo
identical(rts.df, foo)
#[1] TRUE
DATA
colbj <- structure(list(Name = structure(c(1L, 1L, 2L, 2L, 3L), .Label = c("B",
"D", "L"), class = "factor"), Store = c(1L, 2L, 1L, 2L, 1L),
sqft = c(1200L, 750L, 550L, 600L, 1000L), long = c(116.4579,
116.3811, 116.4417, 116.4022, 116.4333), lat = c(39.93921,
39.93312, 39.88882, 39.90222, 39.911), nn = c(5L, 4L, 5L,
5L, NA)), .Names = c("Name", "Store", "sqft", "long", "lat",
"nn"), class = "data.frame", row.names = c("1", "2", "3", "4",
"5"))
I have data from the iOS Moves app which is in the form:
timeAtSite location
800.52 {"lat": 38.87212, "lon": -94.61764}
116.40 {"lat": 38.91571, "lon": -94.64835}
14.48 {"lat": 38.91461, "lon": -94.64795}
I have tried a variety of unsucessful methods to get the location data into separate lat and lon columns (example below):
al1 = get_map(location = c(lon: -94.61764, lat: 38.87212), zoom = 2, maptype = 'roadmap')
al1MAP = ggmap(al1)
al1MAP
al1MAP <- ggmap(al1)+ geom_point(data=c(moves$Location["lat"],moves$Location["lon"]))
TIA
You could try
library(stringr)
df[c('lat', 'lon')] <- do.call(rbind,lapply(str_extract_all(df$location,
'[-0-9.]+'), as.numeric))
Or
library(tidyr)
df1 <- extract(df, location, c('lat', 'lon'), '([-0-9.]+)[^-0-9.]+([-0-9.]+)',
convert=TRUE)
df1
# timeAtSite lat lon
#1 800.52 38.87212 -94.61764
#2 116.40 38.91571 -94.64835
#3 14.48 38.91461 -94.64795
Once you extracted the location,
center <- paste(min(df1$lat)+(max(df1$lat)-min(df1$lat))/2,
min(df1$lon)+(max(df1$lon)-min(df1$lon))/2, sep=" ")
df1$id <- 1:3
library(ggmap)
al1 <- get_map(location = center, zoom = 11, maptype = "roadmap" )
p <- ggmap(al1)
p <- p + geom_text(data=df1,aes(x = lon, y = lat, label=id),
colour="red",size=4,hjust=0, vjust=0)+
theme(legend.position = "none")
p <- p + geom_point(data=df1,aes(x=lon, y=lat),colour="black",size=2)
p
data
df <- structure(list(timeAtSite = c(800.52, 116.4, 14.48), location =
c("{\"lat\": 38.87212, \"lon\": -94.61764}", "{\"lat\": 38.91571,
\"lon\": -94.64835}", "{\"lat\": 38.91461, \"lon\": -94.64795}"
)), .Names = c("timeAtSite", "location"), class = "data.frame", row.names =
c(NA, -3L))