Zoom in rectangularly in map - r

I'm trying to have a rectangular "zoom in" into my chart. So far, I can create the chart itself and a smaller version, but I haven't figured out how to zoom in rectangularly.
(Builds on 1 and 2)
library(sf)
library(dplyr)
library(tmap)
Get shape files for Germany [55 MB]. In Germany zip codes are called Postleitzahlen (PLZ).
germany <- read_sf("data/OSM_PLZ.shp")
Create some arbitrary groups:
germany <- germany %>%
mutate(plz_groups = case_when(
substr(plz, 1, 1) == "1" ~ "Group A",
substr(plz, 2, 2) == "2" ~ "Group B",
substr(plz, 3, 3) == "2" ~ "Group C",
TRUE ~ "Group X" # rest
))
Make plot filling by PLZ:
map_de <- tm_shape(germany) +
tm_fill(col = "plz_groups")
map_de
germany_zoomin <- germany %>%
filter(substr(plz, 1, 1) == "4")
map_zoomin <- tm_shape(germany_zoomin) +
tm_fill(col = "plz_groups")
So I can create a zoomed in chart, but this is NOT what I want:
map_zoomin
print(map_de, vp = grid::viewport(0.8, 0.185, width = 0.2, height = 0.45))
# tmap_save("test.png")
Instead, I would like to specify the location, e.g. the PLZ of Cologne (50667) and draw a rectangular box around it.

Related

Change each histogram color in chart

I have this histogram separated in five categories depending the age. The problem is that I cannot change the color depending the category. I tried to use the marker function with an array filled with the colors I want, but it didn't work as expected:
As you can see, the colors are all bugged.
This is what I tried:
less20 <- subset(dataset, dataset$EDAD <20)
between20n40 <- subset(dataset, dataset$EDAD >=20 & dataset$EDAD <40)
between40n60 <- subset(dataset, dataset$EDAD >=40 & dataset$EDAD <60)
between60n80 <- subset(dataset, dataset$EDAD >=60 & dataset$EDAD <80)
more80 <- subset(dataset, dataset$EDAD >=80)
plot_ly(alpha = 0.7, orientation = 'h', marker = list(color = c('rgba(31,119,180,1)','rgba(105,122,125,1)','rgba(183,124,67,1)', 'rgba(243,127,23,1)','rgba(255,127,14,1)'))) %>%
add_histogram(y = more80$EDAD, name = "More than 80") %>%
add_histogram(y = between60n80$EDAD, name = "Between 60 and 79") %>%
add_histogram(y = between40n60$EDAD, name = "Between 40 and 59") %>%
add_histogram(y = between20n40$EDAD, name = "Between 20 and 39") %>%
add_histogram(y = less20$EDAD, name = "Less than 20") %>%
layout(barmode = "group", title = "",orientation="h")
The correct color order is the next one:
However, I want to change those colors.
Any recomendations? Thanks in advance :)
I think it might be easier if you put the values along with their corresponding ranges inside a dataframe and color the plot using these values and ranges. This is my solution to this using ggplot2. You can define Values with your original dataset for the histogram and should obtain a similar result to yours. This solution uses a 1000 normally distributed sample with SD = 30 and MEAN = 70 in order to produce the plot.
# Import ggplot2
library("ggplot2")
# Obtain sample values for histogram
set.seed(1234)
Values = rnorm(n = 1000, mean = 70, sd = 30)
Range = c()
# Get ranges for each value in data
for(i in 1:length(Values)){
if(Values[i] >= 80){
Range[i] = "More than 80"
} else if (Values[i] < 80 & Values[i] >= 60){
Range[i] = "Between 60 and 79"
} else if (Values[i] < 60 & Values[i] >= 40){
Range[i] = "Between 40 and 59"
} else if (Values[i] < 40 & Values[i] >= 20){
Range[i] = "Between 20 and 39"
} else {
Range[i] = "Less than 20"
}
}
# Put all data inside a data frame
plot_dat = data.frame(Values, Range)
# Order plot labels
plot_dat$Range <- factor(plot_dat$Range, levels = c("More than 80", "Between 60 and 79", "Between 40 and 59", "Between 20 and 39", "Less than 20"))
# Produce plot
ggplot(plot_dat, aes(x=Values, fill=Range)) + geom_histogram(binwidth = 5) + coord_flip() + ggtitle("Sample Histogram")
Output
I just had to add the marker function inside add_histogram. That way, I only change the color of each histogram added.
plot_ly(alpha = 0.7, orientation = 'h') %>%
add_histogram(y = more80$EDAD, name = "More than 80", marker = list(color ='rgba(31,119,180,1)')) %>%
add_histogram(y = between60n80$EDAD, name = "Between 60 and 79", marker = list(color ='rgba(105,122,125,1)')) %>%
Thank you for your answers!

Save multiple ggplots with different layout matrices

Currently I'm creating multiple plots with regional data and save them to a PDF file. This works without problems, thanks to an SO post I've found (use grid.arrange over multiple pages or marrangeGrob with a layout_matrix).
This is my code so far:
library(ggplot2)
library(gridExtra)
library(dplyr)
data <- data.frame(
region = c("region 1", "region 2", "region 3", rep("region 4", 2), rep("region 5", 2)),
countries = c("country 1", "country 2", "country 3", "country 4", "country 5", "country 6", "country 7"),
dummydata1 = c(rep(1, 7)),
dummydata2 = c(rep(2, 7))
)
criterias <- list()
criterias[[ 'region_1' ]] <- data %>% filter(region == 'region 1')
criterias[[ 'region_2' ]] <- data %>% filter(region == 'region 2')
criterias[[ 'region_3' ]] <- data %>% filter(region == 'region 3')
criterias[[ 'region_4' ]] <- data %>% filter(region == 'region 4')
criterias[[ 'region_5' ]] <- data %>% filter(region == 'region 5')
# This layout matrix should be used for the regional plots
# Don't wonder about the strange numbering, some plots came later
# and it was easier to modify the matrix then all other functions.
regionLayout <- rbind(
c(1,1,1,1,1,2),
c(NULL,NULL,3,3,NULL,NULL),
c(9,9,4,4,10,10),
c(6,6,6,7,7,7),
c(6,6,6,7,7,7),
c(6,6,6,7,7,7),
c(6,6,6,7,7,7),
c(6,6,6,7,7,7),
c(6,6,6,7,7,7)
)
# This is just a dummy function
# The actual function creates several plots based on the real data
createRegionalPlots <- function (data, region) {
examplePlots <- list(ggplot() + ggtitle('Title (ggtext = plot 1)'),
ggplot() + ggtitle('Month (ggtext = plot 2)'),
ggplot() + ggtitle('Plot 1 (tile = 3)'),
ggplot() + ggtitle('Plot 2 (tile = 4)'),
ggplot() + ggtitle('Plot 3 (geom_bar = 5)'),
ggplot() + ggtitle('Plot 4 (geom_bar = 6)'),
ggplot() + ggtitle('Plot 5 (tile = 7)'),
ggplot() + ggtitle('Plot 6 (tile = 8)'))
}
# Found in https://stackoverflow.com/questions/43491685/
preparePage <- function(plots,layoutMatrix) {
# pdf(file = NULL) #invisible
par(mar=(c(5,5,5,5)))
plotsPerPage <- length(unique(na.omit(c(layoutMatrix))))
ml <- lapply(1:ceiling(length(plots)/plotsPerPage), function(page_IND){
ind <- (1 + ((page_IND - 1) * plotsPerPage )) : (page_IND * plotsPerPage)
grid.arrange(grobs = plots[ind], layout_matrix = layoutMatrix)
})
return(marrangeGrob(grobs=ml,nrow=1,ncol=1,top=NULL))
# dev.off() #invisible
}
# Here I'm running through all regions
regionalPlotList <- list()
for (region in names(criterias)) {
regionData <- criterias[[region]]
regionalPlots <- createRegionalPlots(data = regionData, region = region)
regionalPlotList <- do.call(c, list(regionalPlotList, regionalPlots))
}
# This leaves me with a list of 40 plots (5 regions x 8 plots)
allPlots <- preparePage(regionalPlotList, regionLayout)
ggsave("example.pdf",width = 297, height = 210, units = "mm", plot = allPlots)
As said, this works perfectly and leaves me (using the current data) with a five page report, one per every region and with the required layout.
I have now been asked to add additional per country plots at the end of the regional report and these pages should have a different layout (and different plots).
Overestimating myself (and my knowledge of r resp. ggplot) once again, I thought of this as an easy job (which it probably is for everyone else, but I'm stuck).
So, I've created a list of new criterias and a function, including a new layout:
createCountryPlots <- function(data, country) {
exampleCountryPlots <- list(ggplot() + ggtitle('Title (ggtext = plot 1)'),
ggplot() + ggtitle('Month (ggtext = plot 2)'),
ggplot() + ggtitle('Plot 1 (bar = 3)'),
ggplot() + ggtitle('Plot 2 (pie = 4)'),
ggplot() + ggtitle('Plot 3 (geom_bar = 5)'),
ggplot() + ggtitle('Plot 4 (geom_bar = 6)')
)
}
countryLayout = rbind(
c(1, 1, 1, 1, 1, 2),
c(3, 3, 3, 4, 4, 4),
c(3, 3, 3, 4, 4, 4),
c(3, 3, 3, 4, 4, 4),
c(5, 5, 5, 6, 6, 6),
c(5, 5, 5, 6, 6, 6),
c(5, 5, 5, 6, 6, 6)
)
# prepare the data per country
countryCriterias <- list()
countryCriterias[[ 'country_1' ]] <- data %>% filter(country == 'country 1')
countryCriterias[[ 'country_2' ]] <- data %>% filter(country == 'country 2')
# Running through all selected countries
countryPlotList <- list()
for (country in names(countryCriterias)) {
countryData <- countryCriterias[[country]]
countryPlots <- createCountryPlots(data = countryData, country = country)
countryPlotList <- do.call(c, list(countryPlotList, countryPlots))
}
countryPlots <- preparePage(countryPlotList, countryLayout)
# Just saving the country plots works perfectly again
ggsave("example.pdf",width = 297, height = 210, units = "mm", plot = countryPlots)
Saving this plots in a separate file works without any problems, but I'm currently stuck on how to combine these plots in one single PDF, respecting the different layouts the pages should have.
I've tried several possibilities (i.e. grid.arrange and arrangeGrob etc.), but I haven't been able to combine the plots into a single file.
Could anyone please enlighten me?
Edit:
Sorry, if I didn't make myself clear enough. This would be the result I should have at the end.
Thanks to the hint by #teunbrand to have a look at the patchwork package, I've found a solution to my problem.
It's in general almost the same as before, but instead of trying to arrange the plots first and then saving them, I "print" them directly to a pdf in the for-loop.
# defininig the layouts (simplified)
regionLayout <- "
AAAAAB
##CC##
DDEEFF
GGGHHH
GGGHHH"
countryLayout <- "
AAAAAB
CCCCDD
CCCCDD
EEEEFF
EEEEFF
"
# opening pdf
pdf('example5.pdf', pagecentre = FALSE, width = 29.7/2.54, height = 21/2.54)
par(mar = c(5, 5, 5, 5), oma = c(1, 1, 1, 1))
for (region in names(criterias)) {
regionData <- criterias[[region]]
regionalPlots <- createRegionalPlots(data = regionData, region = region)
# as regionalPlots is a list of plots, I'm using wrap_plots, which can take a dynamic
# number of plots
print(wrap_plots(regionalPlots, design = regionLayout))
}
# then the same for the country plots, with a different layout
countryPlotList <- list()
for (country in names(countryCriterias)) {
countryData <- countryCriterias[[country]]
countryPlots <- createCountryPlots(data = countryData, country = country)
print(wrap_plots(countryPlots, design = countryLayout))
}
dev.off()
And at the end I have my PDF with seperate layouts...
Thank you all for your help!!!
PS: Took me a while to find out why the PDF always was empty, before I realized that wrap_plot just arranges the plots but does not print them. As said, relatively new to R (did I mention that?)

Repeating values on map:leaflet map

I am trying to make a map using leaflet. I uploaded a shapefile of 216 districts. I also have a dataset with information from 7 out the 216 districts.
I was hoping for the map to have districts that don't have values or 0% in grey saying not enough information". While having districts with actual values (>0%) showing up as colour following their corresponding bins.
When I tried to do upload my dataset and shapfile, I got a map with coloured districts everywhere. Based on my dataset, there are suppose to be 4 districts (>0%) in colour. But this is not what I see on my map.
How do I make sure that only the districts in my dataset light up where it is suppose to light up, without repeating all over the map? (while maintaining the backdrop of all the other districts in grey)
So far this is the code I used to achieved the map:
districtsg <-readOGR("sample/copyfile/Districts/Map_of_Districts_.shp")
districtsg <- sp::spTransform(districtsg, CRS("+proj=longlat +datum=WGS84"))
wpnew <- wpnew [order(match(wpnew$District,districtsg$NAME)),]
bins <- c(0.1,2.0,5.0,10.0,25.0,40.0,50.0)
pal<- colorBin("YlOrRd",domain=wpnew$per.content,bins=bins)
m<- leaflet() %>%
setView(lng = -1.0232,lat=7.9465,zoom = 6) %>%
addProviderTiles(providers$Stamen.Toner) %>%
addPolygons(data =districtsg,
weight = 1,
smoothFactor = 0.5,
color = "White",
fillOpacity = 0.8,
fillColor= ~pal(wpnew$per.content),
highlight = highlightOptions(
weight = 5,
color = "#666666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE
))
m
labels <- paste( "<p>","District:", districtsg$NAME,"</p>",
"<p>", "% of reports that are content:",round(wpnew$per.content,digits = 3),"</p>",
"<p>", "Total reports labelled as a content:",round(wpnew$totalcontent,digits = 3),"</p>",
"<p>", "Total reports from this district:",round(wpnew$totalreports,digits = 3),"</p>",sep = "" )
m<- leaflet() %>%
setView(lng = -1.0232,lat=7.9465,zoom = 6) %>%
addProviderTiles(providers$Stamen.Toner) %>%
addPolygons(data =districtsg,
weight = 1,
smoothFactor = 0.5,
color = "White",
fillOpacity = 0.8,
fillColor= pal(wpnew$per.content),
label = lapply(labels,HTML)) %>%
addLegend(pal=pal,
values = wpnew$per.content,
opacity = 0.7,
"topright")
m
districts totalreports totalcontent per.content
1 Jomoro 4 2 50.00000
2 Ellembelle 2 1 50.00000
3 Tarkwa Nsuaem 1 0 0.00000
4 Bia West 1 0 0.00000
5 Bodi 2 0 0.00000
6 Accra Metropolis 3 1 33.33333
7 Adenta 3 1 33.33333
shapefile can be downloaded here:
https://data.gov.gh/dataset/shapefiles-all-districts-ghana-2012-216-districts
I handling the joining of shape file and the data file differently and I create my base map using tmap. but perhaps this will be helpful.
library(rgdal)
library(tmap)
library(leaflet)
####Access shape map
elem <- readOGR(dsn = "Data/P3Map", layer = "Boundary___ES")
####Preschool Status for Elementary Schools####
schoolAdresses_PK_2021 <- read_excel("Data/P3Map/schoolAdresses_PK_2021.xlsx") %>%
mutate(PreK= factor(PreK)) %>%
clean_names("lower_camel") %>%
mutate(programType = factor(programType))
##### Merge shape with PreK info######
map <- merge(elem, by.x = "ES_Name", schoolAdresses_PK_2021, by.y = "esName" )
#### Render Map####
MyColors <- c('#e2a331', '#d3d3d3','#3688c8') #yellow, #grey, #blue
PKMap <- tm_shape(map)+
tm_fill(col="preK",
title = " ",
palette = MyColors)+
tm_shape(JeffcoMap)+
tm_borders(col = "white")+
tm_layout("Jeffco PreK Expansion 2019-2020", legend.text.size = -0.5)+
tm_text(text = "ES_ShortNa", size = 0.5, group = "Site Names")
PKMap %>% tmap_leaflet() %>%
setView(lng = -105.10033, lat = 39.6, zoom =9) %>% #lat and long of my district
addProviderTiles('Esri.WorldGrayCanvas', group='Grayscale Map') %>%
addProviderTiles('OpenStreetMap', group='Street Map') %>%
addMarkers(lng = -105.155927, #add marker for PK detached from elementary
lat = 39.746347,
icon = YellowIcon,
label = "Litz",
popup = "<b>Program type:</b><br>Ext. Day",
popupOptions = labelOptions(direction = "bottom",
textsize = "8px"),
group = "Stand alone PreK")
from here you can add leaflet layers
It's tough without your data, but I hope this is helpful. In my case, I am mapping 95 elementary schools in one district.
Your 'districtsg' = My 'elem'
Your 'wpnew' = My 'map'
Example map
Here is my attempt while using your datasets:
library(rgdal)
library(tmap)
library(leaflet)
library(sp)
districtsg <-readOGR('data/Map_of_Districts_216.shp')
wpnew <- read.csv('data/dataFromStack.csv')
map <- sp::merge(x = districtsg, y = wpnew, by = "NAME")
MyColors <- c('#e2a331', '#d3d3d3','#3688c8') #yellow, #grey, #blue
tm_shape(map)+
tm_fill(col="totalcontent",
title = " ",
palette = MyColors)+
tm_shape(districtsg)+
tm_borders(col = "white")
Here is the result that I get.. It does take a moment to render in the R Studio Viewer

using duplicate factor to plot using ggplot2

I am trying to plot a ggplot_dumbbell with the following code:
library(ggplot2)
library(ggalt)
theme_set(theme_classic())
df_senPhi <- structure(list(phi = c(0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,
0.8, 0.9, 0.9, 1), W = c(7833625.7334, 8291583.0188, 8762978.0131,
8169317.158, 8460793.8918, 8765222.8718, 8266025.5499, 8311199.2075,
8265304.816, 8289392.5799, 8273733.0523, 8284554.5615), Type = c("A, B, C",
"A, B, C", "A, B, C", "D, E", "D, E", "D, E", "F, G", "F, G",
"H, I", "H, I", "I, J", "I, J"), pChange = c(-0.0533144181552553,
0.00202924695507283, 0.0589968453118437, -0.0127464560859453,
0.0224782062508261, 0.0592681341679742, -0.00105934677399903,
0.00439984310620854, -0.00114644672167306, 0.00176453467558519,
-0.000127903066776307, 0.00117986514708678)), class = "data.frame", row.names = c(NA,
-12L), .Names = c("phi", "W", "Type", "pChange"))
df_senPhi$phi <- factor(df_senPhi$phi, levels=as.character(df_senPhi$phi)) # for right ordering of the dumbells
gg <- ggplot(df_senPhi, aes(x=0, xend=pChange, y=phi, color = Type)) +
geom_dumbbell(#colour="#a3c4dc",
size=0.75,
colour_xend="#0e668b") +
scale_x_continuous(label=scales::percent)
plot(gg)
If you run this code, you will get a warning saying "duplicate levels in factors are deprecated".
If you look closely in the df_senPhi you can see 12 records. However while plotting, only 11 records are plotted. Also the 10th and the 11th records have the same phi value in the data frame which are associated in to the same level. That is also causing the overlapping of the two phi bars in the plot (probably that's why I'm seeing only 11 dumbbells).
I want all 12 records to be plotted such that the second 0.9 phi's dumbbell appears just above the first just like they were two different values.
Is there a way to achieve this ?
used a bit of dplyr
but it seems to get what you are looking for
df_senPhi %>%
mutate(row = 1:n()) %>%
ggplot(aes(0, row, color = Type)) +
geom_dumbbell(aes(xend = pChange)) +
scale_y_continuous(labels = factor(df_senPhi$phi),
breaks = 1:12)

R 3.2.1 incorrect mapping of color

This is based on R 3.2.1, reverse colors on map
I have two data points, one is more than 66%, which should be green, other is less than 33%, which should be red.
However, the less than 33% is orange.
Below is the code, which looks correct (but something is wrong)
sep <- read.csv("Out_SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.12", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
# create a new grouping variable
Percent_SEP12_Assets <- ifelse(sep[,8] <= 33, "Less than 33%", ifelse(sep[,8] >= 66, "More than 66%", "Between 33% and 66%"))
Percent_SEP12_Assets <- factor(Percent_SEP12_Assets,
levels = c("More than 66%", "Between 33% and 66%", "Less than 33%"))
# get the map
bbox <- make_bbox(sep$Longitude, sep$Latitude, f = 1)
map <- get_map(bbox)
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(values=c("green","orange","red"))
The dput(sep) is
structure(list(School = structure(1:2, .Label = c("Out of City\\00L001",
"Out of City\\O308"), class = "factor"), Latitude = c(40.821367,
41.310426), Longitude = c(-73.488313, -73.837612), Windows.SEP.11 = c(4L,
69L), Mac.SEP.11 = 0:1, Windows.SEP.12 = c(3L, 26L), Mac.SEP.12 = c(16L,
1L), newCol = c(82.6086956521739, 27.8350515463918)), .Names = c("School",
"Latitude", "Longitude", "Windows.SEP.11", "Mac.SEP.11", "Windows.SEP.12",
"Mac.SEP.12", "newCol"), row.names = c(NA, -2L), class = "data.frame")
Output is this (incorrect circled in red) ........ How to fix?
Responses
Coordinates are correct, I am asking why is the point incorrectly colored. I thought this logic is correct
Percent_SEP12_Assets <- ifelse(sep[,8] <= 33, "Less than 33%", ifelse(sep[,8] >= 66, "More than 66%", "Between 33% and 66%"))
Updated code
I tried this per #bondeded user and resulting map is same as before
sep <- read.csv("Out_SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.12", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
# create a new grouping variable
sep$Percent_SEP12_Assets <- ifelse(sep[,8] <= 33, "Less than 33%", ifelse(sep[,8] >= 66, "More than 66%", "Between 33% and 66%"))
sep$Percent_SEP12_Assets <- factor(sep$Percent_SEP12_Assets,
levels = c("More than 66%", "Between 33% and 66%", "Less than 33%"))
# get the map
bbox <- make_bbox(sep$Longitude, sep$Latitude, f = 1)
map <- get_map(bbox)
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=sep$Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(values=c("green","orange","red"))
Actual CSV
Here is actual CSV, two rows
School Latitude Longitude Windows-SEP-11 Mac-SEP-11 Windows-SEP-12 Mac-SEP-12
Out of City\00L001 40.821367 -73.488313 4 0 3 16
Out of City\O308 41.310426 -73.837612 69 1 26 1
The problem is that by default ggplot2 drops unused levels from factors. There are two options:
Specify drop = FALSE
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=sep$Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(values=c("green","orange","red"), drop = FALSE)
Specify the values for each level:
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=sep$Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(values=c(`More than 66%` = "green", `Between 33% and 66%` = "orange", `Less than 33%` = "red"))
Clearly you could also do both.
Now I got what you meant. The problem is in you ifelse structure. Maybe this can help:
ifelse(sep[,8] <= 33, "Less than 33%", ifelse(sep[,8] >= 66, "More than 66%", "Between 33% and 66%"))
[1] "More than 66%" "Less than 33%"

Resources