Map Data: InputSpatialData
Yield Data: InputYieldData
Results_using viewport():
EDIT: Results using "multiplot" function as suggested by #rawr (see comment below). I do love the new results, especially that the map is bigger. Nonetheless, the boxplot seems misaligned with the map plot still. Is there a more systematic way to control for centering and placement?
My Question: Is there a way to control for the size of the boxplot plot to make it close in size and centered with the map plot above it?
FullCode:
## Loading packages
library(rgdal)
library(plyr)
library(maps)
library(maptools)
library(mapdata)
library(ggplot2)
library(RColorBrewer)
library(foreign)
library(sp)
library(ggsubplot)
library(reshape)
library(gridExtra)
## get.centroids: function to extract polygon ID and centroid from shapefile
get.centroids = function(x){
poly = wmap#polygons[[x]]
ID = poly#ID
centroid = as.numeric(poly#labpt)
return(c(id=ID, long=centroid[1], lat=centroid[2]))
}
## read input files (shapefile and .csv file)
wmap <- readOGR(dsn=".", layer="ne_110m_admin_0_countries")
wyield <- read.csv(file = "F:/Purdue University/RA_Position/PhD_ResearchandDissert/PhD_Draft/GTAP-CGE/GTAP_Sims&Rests/NewFiles/RMaps_GTAP/AllWorldCountries_CCShocksGTAP.csv", header=TRUE, sep=",", na.string="NA", dec=".", strip.white=TRUE)
wyield$ID_1 <- substr(wyield$ID_1,3,10) # Eliminate the ID_1 column
## re-order the shapefile
wyield <- cbind(id=rownames(wmap#data),wyield)
## Build table of labels for annotation (legend).
labs <- do.call(rbind,lapply(1:17,get.centroids)) # Call the polygon ID and centroid from shapefile
labs <- merge(labs,wyield[,c("id","ID_1","name_long")],by="id") # merging the "labs" data with the spatial data
labs[,2:3] <- sapply(labs[,2:3],function(x){as.numeric(as.character(x))})
labs$sort <- as.numeric(as.character(labs$ID_1))
labs <- cbind(labs, name_code = paste(as.character(labs[,4]), as.character(labs[,5])))
labs <- labs[order(labs$sort),]
## Dataframe for boxplot plot
boxplot.df <- wyield[c("ID_1","name_long","A1B","A1BLow","A1F","A1T","A2","B1","B1Low","B2")]
boxplot.df[1] <- sapply(boxplot.df[1], as.numeric)
boxplot.df <- boxplot.df[order(boxplot.df$ID_1),]
boxplot.df <- cbind(boxplot.df, name_code = paste(as.character(boxplot.df[,1]), as.character(boxplot.df[,2])))
boxplot.df <- melt(boxplot.df, id=c("ID_1","name_long","name_code"))
boxplot.df <- transform(boxplot.df,name_code=factor(name_code,levels=unique(name_code)))
## Define new theme for map
## I have found this function on the website
theme_map <- function (base_size = 14, base_family = "serif") {
# Select a predefined theme for tweaking features
theme_bw(base_size = base_size, base_family = base_family) %+replace%
theme(
axis.line=element_blank(),
axis.text.x=element_text(size=rel(1.2), color="grey"),
axis.text.y=element_text(size=rel(1.2), color="grey"),
axis.ticks=element_blank(),
axis.ticks.length=unit(0.3, "lines"),
axis.ticks.margin=unit(0.5, "lines"),
axis.title.x=element_text(size=rel(1.2), color="grey"),
axis.title.y=element_text(size=rel(1.2), color="grey"),
legend.background=element_rect(fill="white", colour=NA),
legend.key=element_rect(colour="white"),
legend.key.size=unit(1.3, "lines"),
legend.position="right",
legend.text=element_text(size=rel(1.3)),
legend.title=element_text(size=rel(1.4), face="bold", hjust=0),
panel.border=element_blank(),
panel.grid.minor=element_blank(),
plot.title=element_text(size=rel(1.8), face="bold", hjust=0.5, vjust=2),
plot.margin=unit(c(0.5,0.5,0.5,0.5), "lines")
)}
## Transform shapefile to dataframe and merge with yield data
wmap_df <- fortify(wmap)
wmap_df <- merge(wmap_df,wyield, by="id") # merge the spatial data and the yield data
## Plot map
mapy <- ggplot(wmap_df, aes(long,lat, group=group))
mapy <- mapy + geom_polygon(aes(fill=AVG))
mapy <- mapy + geom_path(data=wmap_df, aes(long,lat, group=group, fill=A1BLow), color="white", size=0.4)
mapy <- mapy + labs(title="Average yield impacts (in %) across SRES scenarios ") + scale_fill_gradient2(name = "%Change in yield",low = "red3",mid = "snow2",high = "darkgreen")
mapy <- mapy + coord_equal() + theme_map()
mapy <- mapy + geom_text(data=labs, aes(x=long, y=lat, label=ID_1, group=ID_1), size=6, family="serif")
mapy
## Plot boxplot
boxploty <- ggplot(data=boxplot.df, aes(factor(name_code),value)) +
geom_boxplot(stat="boxplot",
position="dodge",
fill="grey",
outlier.colour = "blue",
outlier.shape = 16,
outlier.size = 4) +
labs(title="Distribution of yield impacts (in %) by GTAP region", y="Yield (% Change)") + theme_bw() + coord_flip() +
stat_summary(fun.y = "mean", geom = "point", shape=21, size= 4, color= "red") +
theme(plot.title = element_text(size = 26,
hjust = 0.5,
vjust = 1,
face = 'bold',
family="serif")) +
theme(axis.text.x = element_text(colour = 'black',
size = 18,
hjust = 0.5,
vjust = 1,
family="serif"),
axis.title.x = element_text(size = 14,
hjust = 0.5,
vjust = 0,
face = 'bold',
family="serif")) +
theme(axis.text.y = element_text(colour = 'black',
size = 18,
hjust = 0,
vjust = 0.5,
family="serif"),
axis.title.y = element_blank())
boxploty
## I found this code on the website, and tried to tweak it to achieve my desired
result, but failed
# Plot objects using widths and height and respect to fix aspect ratios
grid.newpage()
pushViewport( viewport( layout = grid.layout( 2 , 1 , widths = unit( c( 1 ) , "npc" ) ,
heights = unit( c( 0.45 ) , "npc" ) ,
respect = matrix(rep(2,1),2) ) ) )
print( mapy, vp = viewport( layout.pos.row = 1, layout.pos.col = 1 ) )
print( boxploty, vp = viewport( layout.pos.row = 2, layout.pos.col = 1 ) )
upViewport(0)
vp3 <- viewport( width = unit(0.5,"npc") , x = 0.9 , y = 0.5)
pushViewport(vp3)
#grid.draw( legend )
popViewport()
Is this close to what you had in mind?
Code:
library(rgdal)
library(ggplot2)
library(RColorBrewer)
library(reshape)
library(gridExtra)
setwd("<directory with all your files...>")
get.centroids = function(x){ # extract centroids from polygon with given ID
poly = wmap#polygons[[x]]
ID = poly#ID
centroid = as.numeric(poly#labpt)
return(c(id=ID, c.long=centroid[1], c.lat=centroid[2]))
}
wmap <- readOGR(dsn=".", layer="ne_110m_admin_0_countries")
wyield <- read.csv(file = "AllWorldCountries_CCShocksGTAP.csv", header=TRUE)
wyield <- transform(wyield, ID_1 = substr(ID_1,3,10)) #strip leading "TR"
# wmap#data and wyield have common, unique field: name
wdata <- data.frame(id=rownames(wmap#data),name=wmap#data$name)
wdata <- merge(wdata,wyield, by="name")
labs <- do.call(rbind,lapply(1:17,get.centroids)) # extract polygon IDs and centroids from shapefile
wdata <- merge(wdata,labs,by="id")
wdata[c("c.lat","c.long")] <- sapply(wdata[c("c.lat","c.long")],function(x) as.numeric(as.character(x)))
wmap.df <- fortify(wmap) # data frame for world map
wmap.df <- merge(wmap.df,wdata,by="id") # merge data to fill polygons
palette <- brewer.pal(11,"Spectral") # ColorBrewewr.org spectral palette, 11 colors
ggmap <- ggplot(wmap.df, aes(x=long, y=lat, group=group))
ggmap <- ggmap + geom_polygon(aes(fill=AVG))
ggmap <- ggmap + geom_path(colour="grey50", size=.1)
ggmap <- ggmap + geom_text(aes(x=c.long, y=c.lat, label=ID_1),size=3)
ggmap <- ggmap + scale_fill_gradientn(name="% Change",colours=rev(palette))
ggmap <- ggmap + theme(plot.title=element_text(face="bold"),legend.position="left")
ggmap <- ggmap + coord_fixed()
ggmap <- ggmap + labs(x="",y="",title="Average Yield Impacts across SRES Scenarios (% Change)")
ggmap <- ggmap + theme(plot.margin=unit(c(0,0.03,0,0.05),units="npc"))
ggmap
box.df <- wdata[order(as.numeric(wdata$ID_1)),] # order by ID_1
box.df$label <- with(box.df, paste0(name_long," [",ID_1,"]")) # create labels for boxplot
box.df <- melt(box.df,id.vars="label",measure.vars=c("A1B","A1BLow","A1F","A1T","A2","B1","B1Low","B2"))
box.df$label <- factor(box.df$label,levels=unique(box.df$label)) # need this so orderin is maintained in ggplot
ggbox <- ggplot(box.df,aes(x=label, y=value))
ggbox <- ggbox + geom_boxplot(fill="grey", outlier.colour = "blue", outlier.shape = 16, outlier.size = 4)
ggbox <- ggbox + stat_summary(fun.y=mean, geom="point", shape=21, size= 4, color= "red")
ggbox <- ggbox + coord_flip()
ggbox <- ggbox + labs(x="", y="% Change", title="Distribution of Yield Impacts by GTAP region")
ggbox <- ggbox + theme(plot.title=element_text(face="bold"), axis.text=element_text(color="black"))
ggbox <- ggbox + theme(plot.margin=unit(c(0,0.03,0,0.0),units="npc"))
ggbox
grid.newpage()
pushViewport(viewport(layout=grid.layout(2,1,heights=c(0.40,0.60))))
print(ggmap, vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(ggbox, vp=viewport(layout.pos.row=2,layout.pos.col=1))
Explanation:
The last 4 lines of code do most of the work in arranging the layout. I create a viewport layout with 2 viewports arranged as 2 rows in 1 column. The upper viewport is 40% of the height of the grid, the lower viewport is 60% of the height. Then, in the ggplot calls I create a right margin of 3% of the plot width for both the map and he boxplot, and a left margin for the map so that the map and the boxplot are aligned on the left. There's a fair amount of tweaking to get everything lined up, but these are the parameters to play with. You should also know that, since we use coord_fixed() in the map, if you change the overall size of the plot (by resizing the plot window, for example), the map's width will change..
Finally, your code to create the choropleth map is a little dicey...
## re-order the shapefile
wyield <- cbind(id=rownames(wmap#data),wyield)
This does not reorder the shapefile. All you are doing here is prepending the wmap#data rownames to your wyield data. This works if the rows in wyield are in the same order as the polygons in wmap - a very dangerous assumption. If they are not, then you will get a map, but the coloring will be incorrect and unless you study the output very carefully, it is likely to be missed. So the code above creates an association between polygon ID and region name, merges the wyield data based on name, and then merges that into wmp.df based on polygon id.
wdata <- data.frame(id=rownames(wmap#data),name=wmap#data$name)
wdata <- merge(wdata,wyield, by="name")
...
wmap.df <- fortify(wmap) # data frame for world map
wmap.df <- merge(wmap.df,wdata,by="id") # merge data to fill polygons
Related
This question is quite trivial but I cannot be handled nicely with.
I'm trying to plot a circular tree with a side heatmap.
I'm using ggtree but any approach ggplo2 based is welcome.
The problems that I'm not understanding well the gheatmap function.
I want:
1- names AFTER the heatmap
2- 2 text columns after heatmap (for while may have the same value, but I need to know how to add it )
3- heatmap columns name nicely handled, should we remove the columns name and use different colors scales for each? wherever the solution falls might better than the way it is now
library(tidyverse)
library(ggtree)
library(treeio)
library(tidytree)
beast_file <- system.file("examples/MCC_FluA_H3.tree", package="ggtree")
beast_tree <- read.beast(beast_file)
genotype_file <- system.file("examples/Genotype.txt", package="ggtree")
genotype <- read.table(genotype_file, sep="\t", stringsAsFactor=F)
colnames(genotype) <- sub("\\.$", "", colnames(genotype))
p <- ggtree(beast_tree, mrsd="2013-01-01",layout = "fan", open.angle = -270) +
geom_treescale(x=2008, y=1, offset=2) +
geom_tiplab(size=2)
gheatmap(p, genotype, offset=5, width=0.5, font.size=3,
colnames_angle=-45, hjust=0) +
scale_fill_manual(breaks=c("HuH3N2", "pdm", "trig"),
values=c("steelblue", "firebrick", "darkgreen"), name="genotype")
Thanks in advance
UPDATE:
I found a better way to plot the name of heatmap columns.
Also, I found that the simplification of the data was useful to
clean up a little the tip labels.
Now, I just need to add two text columns after heatmap.
p <- ggtree(beast_tree)
gheatmap(
p, genotype, colnames=TRUE,
colnames_angle=90,
colnames_offset_y = 5,
colnames_position = "top",
) +
scale_fill_manual(breaks=c("HuH3N2", "pdm", "trig"),
values=c("steelblue", "firebrick", "darkgreen"), name="genotype")
UPDATE 2:
A very bad improvement
I just used ggplot to create the label and merge with patchwork
library(patchwork)
p$data %>%
ggplot(aes(1, y= y, label = label )) +
geom_text(size=2) +
xlim(NA, 1) +
theme_classic() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) -> adText
pp + adText
The answer according #xiangpin at GitHub.
Big offset value to geom_tiplabel:
p <- ggtree(beast_tree)
p1 <- gheatmap(
p, genotype, colnames=TRUE,
colnames_angle=-45,
colnames_offset_y = 5,
colnames_position = "bottom",
width=0.3,
hjust=0, font.size=2) +
scale_fill_manual(breaks=c("HuH3N2", "pdm", "trig"),
values=c("steelblue", "firebrick", "darkgreen"), name="genotype") +
geom_tiplab(align = TRUE, linesize=0, offset = 7, size=2) +
xlim_tree(xlim=c(0, 36)) +
scale_y_continuous(limits = c(-1, NA))
p1
Using ggtreeExtra:
library(ggtreeExtra)
library(ggtree)
library(treeio)
library(ggplot2)
beast_file <- system.file("examples/MCC_FluA_H3.tree", package="ggtree")
genotype_file <- system.file("examples/Genotype.txt", package="ggtree")
tree <- read.beast(beast_file)
genotype <- read.table(genotype_file, sep="\t")
colnames(genotype) <- sub("\\.$", "", colnames(genotype))
genotype$ID <- row.names(genotype)
dat <- reshape2::melt(genotype, id.vars="ID", variable.name = "type", value.name="genotype", factorsAsStrings=FALSE)
dat$genotype <- unlist(lapply(as.vector(dat$genotype),function(x)ifelse(nchar(x)==0,NA,x)))
p <- ggtree(tree) + geom_treescale()
p2 <- p + geom_fruit(data=dat,
geom=geom_tile,
mapping=aes(y=ID, x=type, fill=genotype),
color="white") +
scale_fill_manual(values=c("steelblue", "firebrick", "darkgreen"),
na.translate=FALSE) +
geom_axis_text(angle=-45, hjust=0, size=1.5) +
geom_tiplab(align = TRUE, linesize=0, offset = 6, size=2) +
xlim_tree(xlim=c(0, 36)) +
scale_y_continuous(limits = c(-1, NA))
p2
I'm new to R programming but I'm enjoying the challenge of writing code!
I created a GIF by stitching multiple map plots together. Unfortunately,
my legend is referencing the particular year of the map being generated and as a result, the GIF shows a legend that has its marks moving up and down. I think the solution would be to have the legend reference the entire data-frame rather than the given year. How do I do this?
Link to the GIF:
https://1drv.ms/i/s!Ap-NxMqZOClHqgsFHSxo-kR1pLrr
##This is the R-Code I used for the year 1950:
kansas1950 <- readShapePoly("KansasCOUNTIES.shp")
## Kansas Winter-Wheat Planted from Quickstats
kansas1950.acres <- read.csv(file = "KWW 19502016 QuickStatsEst.csv",
stringsAsFactors = FALSE)
## Create a smaller dataset by retaining the kansas Acres in 1950 and the FIPS
## FIPS, which will be used for matching and merging with the input shapefile
smaller.data1950 <- data.frame(FIPS = kansas1950.acres$FIPS, Acres = kansas1950.acres$X1950)
smaller.data1950 <- na.omit(smaller.data1950)
## Join the two datasets using their common field
matched.indices1950 <- match(kansas1950#data[, "FIPS"], smaller.data1950[, "FIPS"])
kansas1950#data <- data.frame(kansas1950#data, smaller.data1950[matched.indices1950, ])
## Compute the cartogram transformation of each county using its population
## with the degree of Gaussian blur = 0.5
kansas1950.carto <- quick.carto(kansas1950, kansas1950#data$Acres, blur = 0.5)
## Convert the object into data frame
kansas1950.carto <- gBuffer(kansas1950.carto, byid=TRUE, width=0)
kansas1950.f <- fortify(kansas1950.carto, region = "FIPS")
## Merge the cartogram transformation with the kansas map shapefile
kansas1950.f <- merge(kansas1950.f, kansas1950#data, by.x = "id", by.y = "FIPS")
# Plot of the transformed polygons, where each county is
## further shaded by their acreage (lighter means bigger)
my_map1950 <- ggplot(kansas1950.f, aes(long, lat, group = group,
fill = kansas1950.f$Acres)) + geom_polygon() +
scale_fill_continuous(breaks = c(0, 10000, 100000, 200000, 526000),
labels = c("0 Acres","10k Acres", "100k Acres", "200k Acres", "526k Acres"),
low = "black",
high = "purple"
) +
labs(x=NULL, y=NULL) + labs(fill = "Acres Planted")
# Remove default ggplot layers
my_map1950 <-my_map1950 + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.ticks=element_blank(),
axis.text.x=element_blank(),axis.text.y=element_blank(),
axis.line = element_line(colour = NA))
# Citation
my_map1950 <- my_map1950 + labs(caption = "USDA-NASS Quick Stats") + ggtitle("1950 Kansas Winter-Wheat Acres Planted")
my_map1950
# Save a higher resolution PNG
png('my_map1950kwwpurp.png', units="in", width=10, height=8, res=300)
my_map1950
dev.off()
Assuming this is what you want, try adding this to your plot (but, of course, specifying your own custom lower and upper limits):
+ scale_fill_gradient(limits = c(0, 10))
I have a sample df that worked:
df <- data.frame(x = 1:10)
p <- ggplot(df, aes(x, 1)) + geom_tile(aes(fill = x), colour = "white")
p + scale_fill_gradient(limits = c(0, 10))
p + scale_fill_gradient(limits = c(0, 20))
Here's the graph with the scale set from 0 to 10.
Here's the graph with the scale set from 0 to 20.
EDIT: Oh, I see now that you have called scale_fill_continuous() in your code. Try adding a limits argument similar to what I did to that.
I have a dataframe named myKrige_new contains some latitude-longutude wise interpolated values. You can download from HERE. I have plotted this values on a particular area of a country map using ggplot2 package in R and I got this plot
But I want the legend(colourbar) of my plot would be like the following legend.
In my dataset here, the range of the data (pred) is 72 to 257. But I want my legend would show the value 0 to 200 because of comparing reason with other plot though there no value under 72 here .
So, I want to use 20 different colour like above legend that means last box of legend would contain colour regarding value greater than 200. I have used scale_fill_gradientn function but it didn't work. I have spend days to find some option to do it in R, didn't get success. Any kind of help will be highly appreciable.
R code :
library(scales)
library(ggplot2)
myKrige_new <- read.csv ("myKrige_new.csv")
range(myKrige_new$LON)
range(myKrige_new$LAT)
#Original skorea data transformed the same was as myKrige_new
skorea1 <- getData("GADM", country= "KOR", level=1)
skorea1 <- fortify(skorea1)
myKorea1 <- data.frame(skorea1)
###############
ggplot()+
theme_minimal() +
#SOLUTION 1:
#geom_tile(data = myKrige_new, aes(x= LON, y= LAT, fill = pred)) +
#SOLUTION 2: Uncomment the line(s) below:
#geom_point(data = myKrige_new, aes(x= LON, y= LAT, fill = pred),
#shape=22, size=8, colour=NA)+
#Solution 3
stat_summary_2d(data=myKrige_new, aes(x = LON, y = LAT, z = pred),bins = 30,
binwidth = c(0.05,0.05)) +
scale_fill_gradientn(colours=c("white","blue","green","yellow","red"),
values=rescale(c(0,50,100,150,200)),
guide="colorbar", name = "PM10 Conc")+
geom_map(data= myKorea1, map= myKorea1, aes(x=long,y=lat,map_id=id,group=group),
fill=NA, colour="black") +
coord_cartesian(xlim= c(126.6, 127.2), ylim= c(37.2 ,37.7)) +
labs(title= "PM10 Concentration in Seoul Area at South Korea",
x="", y= "")+
theme(legend.position = "bottom")+
guides(fill = guide_colourbar(barwidth = 27, barheight = NULL,
title.position = "bottom", title.hjust = 0.5))
Here is a working solution:
library(scales)
library(ggplot2)
library(raster) # needed for the `getData` function
library(dplyr) # needed for the `mutate` funtion
myKrige_new <- read.csv("~/Downloads/myKrige_new.csv")[-1]
range(myKrige_new$LON)
range(myKrige_new$LAT)
# Original skorea data transformed the same was as myKrige_new
skorea1 <- getData("GADM", country= "KOR", level=1)
skorea1 <- fortify(skorea1)
myKorea1 <- data.frame(skorea1)
# the range of pred goes above 200 (max = 257)
summary(myKrige_new$pred)
ggplot() +
theme_minimal() +
stat_summary_2d(data = mutate(myKrige_new,
pred = ifelse(pred > 200, 200, pred)),
aes(x = LON, y = LAT, z = pred),
bins = 30,
binwidth = c(0.05,0.05)) +
scale_fill_gradientn(colours=c("white","blue","green","yellow","red"),
values=rescale(c(0,50,100,150,200)),
name = expression(paste(PM[10], group("[",paste(mu,g/m^3), "]"))),
limits = c(0,200),
breaks = seq(0,200, 20),
guide = guide_colorbar(nbin = 20,
barwidth = 27,
title.position = "bottom",
title.hjust = 0.5,
raster = FALSE,
ticks = FALSE)) +
geom_map(data= myKorea1,
map= myKorea1,
aes(x=long,y=lat,map_id=id,group=group),
fill=NA,
colour="black") +
coord_equal(xlim= c(126.6, 127.2),
ylim= c(37.2 ,37.7)) +
scale_y_continuous(expand = c(0,0)) +
scale_x_continuous(expand = c(0,0)) +
labs(title = "PM10 Concentration in Seoul Area at South Korea",
x = "",
y = "") +
theme(legend.position = "bottom")
I added limits = c(0,200) and breaks = seq(0, 200, 20) to scale_fill_gradientn as well as nbin = 20 to guide_colorbar, this last change is optional because the default nbin is 20, but in your case you actually need 20. Also, adding limits means values outside the range are plotted in grey50 so I had to transform pred values above 200 to 200 to avoid that; the interpretation of red color is now 200+.
One more thing, the option raster in guide_colorbar changes the colorbar from a raster object to a set of rectangles achieving the look you were looking for.
Finally, I changed the coordinate system from cartesian to equal because you are plotting a map.
Here is the result hope it helps:
Update: added a expand argument to scale_y_continuous and scale_x_continuous as requested by OP
I use the rworldmap package with WorldBank Data and I enjoy it. I want to plot maps for Iran, with data related to each province. What are the steps to do that? I know we can plot maps like that in R for some countries like US but not for all countries.
You can combine rworldmap with the great suggestion from #jazzurro of using raster to get GADM boundaries.
I suspect your main difficulty might be getting the province names to match between your data and the map.
The example below, uses defaults that you can change and just gives a different colour for each province.
library(raster)
library(rworldmap)
## 1 Get map of provinces (level 1 admin regions)
iranLevel1<- raster::getData("GADM", country = "Iran", level = 1)
## 2 join your [data] onto the map by specifying the join column in each
## this will report on any mis-matches between province names
#iranLevel1 <- rworldmap::joinData2Map([data],nameMap="iranLevel1",nameJoinIDMap="NAME_1",nameJoinColumnData=[insert])
## 3 plot map (change NAME_1 to the data you want to plot)
rworldmap::mapPolys(iranLevel1, nameColumnToPlot="NAME_1", addLegend=FALSE)
## 4 add text labels for provinces
text(iranLevel1, label="NAME_1", cex=0.7)
Note that joinData2Map(), mapPolys() are more generic equivalents of joinCountryData2Map(), mapCountryData().
Another way of doing this would be to use the choroplethr package.
You can directly import GADM data using the raster package. Then, you can draw a map using ggplot2. When you download data, you can specify different level. Depending on this you see different boundaries.
library(raster)
library(ggplot2)
### Get data
iran<- getData("GADM", country = "Iran", level = 2)
### SPDF to DF
map <- fortify(iran)
### Draw a map
ggplot() +
geom_map(data = map, map = map, aes(x = long, y = lat, map_id = id, group = group))
EDIT
Seeing Andy's answer, I'd like to show how to add province names in ggplot2. This is something I learned from #hrbrmstr. The rworldmap package allows you to type less. The ggplot2 package still offers very nice graphics as well.
library(raster)
library(rgdal)
library(rgeos)
library(ggplot2)
library(dplyr)
### Get data
iran<- getData("GADM", country = "Iran", level = 1)
### SPDF to DF
map <- fortify(iran)
map$id <- as.integer(map$id)
dat <- data.frame(id = 1:(length(iran#data$NAME_1)), state = iran#data$NAME_1)
map_df <- inner_join(map, dat, by = "id")
# Find a center point for each province
centers <- data.frame(gCentroid(iran, byid = TRUE))
centers$state <- dat$state
### This is hrbrmstr's own function
theme_map <- function (base_size = 12, base_family = "") {
theme_gray(base_size = base_size, base_family = base_family) %+replace%
theme(
axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.ticks.length=unit(0.3, "lines"),
axis.ticks.margin=unit(0.5, "lines"),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.background=element_rect(fill="white", colour=NA),
legend.key=element_rect(colour="white"),
legend.key.size=unit(1.5, "lines"),
legend.position="right",
legend.text=element_text(size=rel(1.2)),
legend.title=element_text(size=rel(1.4), face="bold", hjust=0),
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.margin=unit(0, "lines"),
plot.background=element_blank(),
plot.margin=unit(c(1, 1, 0.5, 0.5), "lines"),
plot.title=element_text(size=rel(1.8), face="bold", hjust=0.5),
strip.background=element_rect(fill="grey90", colour="grey50"),
strip.text.x=element_text(size=rel(0.8)),
strip.text.y=element_text(size=rel(0.8), angle=-90)
)
}
ggplot() +
geom_map(data = map_df, map = map_df,
aes(map_id = id, x = long, y = lat, group = group),
color = "#ffffff", fill = "#bbbbbb", size = 0.25) +
geom_text(data = centers, aes(label = state, x = x, y = y), size = 3) +
coord_map() +
labs(x = "", y = "", title = "Iran Province") +
theme_map()
Didzis Elferts showed how to plot a dendogram using ggplot2 and ggdendro:
horizontal dendrogram in R with labels
here is the code:
labs = paste("sta_",1:50,sep="") #new labels
rownames(USArrests)<-labs #set new row names
hc <- hclust(dist(USArrests), "ave")
library(ggplot2)
library(ggdendro)
#convert cluster object to use with ggplot
dendr <- dendro_data(hc, type="rectangle")
#your own labels are supplied in geom_text() and label=label
ggplot() +
geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend, yend=yend)) +
geom_text(data=label(dendr), aes(x=x, y=y, label=label, hjust=0), size=3) +
coord_flip() + scale_y_reverse(expand=c(0.2, 0)) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank(),
panel.background=element_rect(fill="white"),
panel.grid=element_blank())
Does anyone know, how to colorize the different clusters? For example, you want to have 2 Clusters (k=2) colorized?
This approach is very similar to #DidzisElferts', just a little simpler.
df <- USArrests # really bad idea to muck up internal datasets
labs <- paste("sta_",1:50,sep="") # new labels
rownames(df) <- labs # set new row names
library(ggplot2)
library(ggdendro)
hc <- hclust(dist(df), "ave") # heirarchal clustering
dendr <- dendro_data(hc, type="rectangle") # convert for ggplot
clust <- cutree(hc,k=2) # find 2 clusters
clust.df <- data.frame(label=names(clust), cluster=factor(clust))
# dendr[["labels"]] has the labels, merge with clust.df based on label column
dendr[["labels"]] <- merge(dendr[["labels"]],clust.df, by="label")
# plot the dendrogram; note use of color=cluster in geom_text(...)
ggplot() +
geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend, yend=yend)) +
geom_text(data=label(dendr), aes(x, y, label=label, hjust=0, color=cluster),
size=3) +
coord_flip() + scale_y_reverse(expand=c(0.2, 0)) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank(),
panel.background=element_rect(fill="white"),
panel.grid=element_blank())
Workaround would be to plot cluster object with plot() and then use function rect.hclust() to draw borders around the clusters (nunber of clusters is set with argument k=). If result of rect.hclust() is saved as object it will make list of observation where each list element contains observations belonging to each cluster.
plot(hc)
gg<-rect.hclust(hc,k=2)
Now this list can be converted to dataframe where column clust contains names for clusters (in this example two groups) - names are repeated according to lengths of list elemets.
clust.gr<-data.frame(num=unlist(gg),
clust=rep(c("Clust1","Clust2"),times=sapply(gg,length)))
head(clust.gr)
num clust
sta_1 1 Clust1
sta_2 2 Clust1
sta_3 3 Clust1
sta_5 5 Clust1
sta_8 8 Clust1
sta_9 9 Clust1
New data frame is merged with label() information of dendr object (dendro_data() result).
text.df<-merge(label(dendr),clust.gr,by.x="label",by.y="row.names")
head(text.df)
label x y num clust
1 sta_1 8 0 1 Clust1
2 sta_10 28 0 10 Clust2
3 sta_11 41 0 11 Clust2
4 sta_12 31 0 12 Clust2
5 sta_13 10 0 13 Clust1
6 sta_14 37 0 14 Clust2
When plotting dendrogram use text.df to add labels with geom_text() and use column clust for colors.
ggplot() +
geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend, yend=yend)) +
geom_text(data=text.df, aes(x=x, y=y, label=label, hjust=0,color=clust), size=3) +
coord_flip() + scale_y_reverse(expand=c(0.2, 0)) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank(),
panel.background=element_rect(fill="white"),
panel.grid=element_blank())
Adding to #DidzisElferts' and #jlhoward's code, the dendrogram itself can be coloured.
library(ggplot2)
library(ggdendro)
library(plyr)
library(zoo)
df <- USArrests # really bad idea to muck up internal datasets
labs <- paste("sta_", 1:50, sep = "") # new labels
rownames(df) <- labs # set new row names
cut <- 4 # Number of clusters
hc <- hclust(dist(df), "ave") # hierarchical clustering
dendr <- dendro_data(hc, type = "rectangle")
clust <- cutree(hc, k = cut) # find 'cut' clusters
clust.df <- data.frame(label = names(clust), cluster = clust)
# Split dendrogram into upper grey section and lower coloured section
height <- unique(dendr$segments$y)[order(unique(dendr$segments$y), decreasing = TRUE)]
cut.height <- mean(c(height[cut], height[cut-1]))
dendr$segments$line <- ifelse(dendr$segments$y == dendr$segments$yend &
dendr$segments$y > cut.height, 1, 2)
dendr$segments$line <- ifelse(dendr$segments$yend > cut.height, 1, dendr$segments$line)
# Number the clusters
dendr$segments$cluster <- c(-1, diff(dendr$segments$line))
change <- which(dendr$segments$cluster == 1)
for (i in 1:cut) dendr$segments$cluster[change[i]] = i + 1
dendr$segments$cluster <- ifelse(dendr$segments$line == 1, 1,
ifelse(dendr$segments$cluster == 0, NA, dendr$segments$cluster))
dendr$segments$cluster <- na.locf(dendr$segments$cluster)
# Consistent numbering between segment$cluster and label$cluster
clust.df$label <- factor(clust.df$label, levels = levels(dendr$labels$label))
clust.df <- arrange(clust.df, label)
clust.df$cluster <- factor((clust.df$cluster), levels = unique(clust.df$cluster), labels = (1:cut) + 1)
dendr[["labels"]] <- merge(dendr[["labels"]], clust.df, by = "label")
# Positions for cluster labels
n.rle <- rle(dendr$segments$cluster)
N <- cumsum(n.rle$lengths)
N <- N[seq(1, length(N), 2)] + 1
N.df <- dendr$segments[N, ]
N.df$cluster <- N.df$cluster - 1
# Plot the dendrogram
ggplot() +
geom_segment(data = segment(dendr),
aes(x=x, y=y, xend=xend, yend=yend, size=factor(line), colour=factor(cluster)),
lineend = "square", show.legend = FALSE) +
scale_colour_manual(values = c("grey60", rainbow(cut))) +
scale_size_manual(values = c(.1, 1)) +
geom_text(data = N.df, aes(x = x, y = y, label = factor(cluster), colour = factor(cluster + 1)),
hjust = 1.5, show.legend = FALSE) +
geom_text(data = label(dendr), aes(x, y, label = label, colour = factor(cluster)),
hjust = -0.2, size = 3, show.legend = FALSE) +
scale_y_reverse(expand = c(0.2, 0)) +
labs(x = NULL, y = NULL) +
coord_flip() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.background = element_rect(fill = "white"),
panel.grid = element_blank())
The 2-cluster and 4-cluster solutions:
A short way to achieve a similar result is to use the package dendextend, taken from this overview.
df <- USArrests # really bad idea to muck up internal datasets
labs <- paste("sta_",1:50,sep="") # new labels
rownames(df) <- labs # set new row names
require(magrittr)
require(ggplot2)
require(dendextend)
dend <- df %>% dist %>%
hclust %>% as.dendrogram %>%
set("branches_k_color", k = 4) %>% set("branches_lwd", 0.7) %>%
set("labels_cex", 0.6) %>% set("labels_colors", k = 4) %>%
set("leaves_pch", 19) %>% set("leaves_cex", 0.5)
ggd1 <- as.ggdend(dend)
ggplot(ggd1, horiz = TRUE)
Note: The order of the states is slightly different compared to those above - not really changing interpretation though.
For those that are still looking for a convenient way to do this, you can use my package ggdendroplot (https://github.com/NicolasH2/ggdendroplot).
If you have the data from the posted example:
labs = paste("sta_",1:50,sep="") #new labels
rownames(USArrests)<-labs #set new row names
hc <- hclust(dist(USArrests), "ave")
...you can use ggdendroplot with ggplot to get a colored dendrogram:
devtools::install_github("NicolasH2/ggdendroplot")
library(ggdendroplot)
library(ggplot2)
ggplot() + geom_dendro(hc, dendrocut = 30)
You can turn it sideways and on its head etc. Its basically just a ggplot layer, so you can modify the graph further as you wish and add it to other ggplots. Check out the github page to see what you can do with ggdendroplot.