I am working on a project where I need to make a map for the top global streaming music. And I have got the map with song's streamings in different location.
Here is my map
Here is my code, Below are the background info and some data cleaning:
library("dplyr")
library("stringr")
library("tidyverse")
library("scales")
library("ggplot2")
library("maps")
library("leaflet")
# load the .csv into R studio, you can do this 1 of 2 ways
#read.csv("the name of the .csv you downloaded from kaggle")
#spotiify_origional <- read.csv("charts.csv")
spotiify_origional <- read.csv("https://raw.githubusercontent.com/info201a-au2022/project-group-1-section-aa/main/data/charts.csv")
# filters down the data
# removes the track id, explicit, and duration columns
spotify_modify <- spotiify_origional %>%
select(name, country, date, position, streams, artists, genres = artist_genres)
#returns all the data just from 2022
#this is the data set you should you on the project
spotify_2022 <- spotify_modify %>%
filter(date >= "2022-01-01") %>%
arrange(date) %>%
group_by(date)
spotify_2022$streams <- as.numeric(spotify_2022$streams)
View(spotify_2022)
spotify_2022_global <- spotify_modify %>%
filter(date >= "2022-01-01") %>%
filter(country == "global") %>%
arrange(date) %>%
group_by(date)
View(spotify_2022_global)
# use write.csv() to turn the new dataset into a .csv file
#write.csv(Your DataFrame,"Path to export the DataFrame\\File Name.csv", row.names = FALSE)
#write.csv(spotify_2022_global, "/Users/oliviasapp/Documents/info201/project-group-1-section-aa/data/spotify_2022.csv" , row.names = FALSE)
# top 5 most popular songs globally
top_5 <- spotify_2022_global[order(spotify_2022_global$streams, decreasing = TRUE), ]
top_5 <- top_5[1:5, ]
top_5$streams <- as.numeric(top_5$streams)
View(top_5)
# Pepas, Blank Space, I'm Tired, Yonaguni, and Heather
# were the most streamed song of the year according to top_5
Here is the map part:
# makes the map template
world_map <- map_data("world")
ggplot(world_map, aes(x = long, y = lat, group = group)) +
geom_polygon(fill="lightgray", colour = "white")
# a new data frame that has all the abrevated country codes and the country names
abrevations <- read.csv("https://pkgstore.datahub.io/core/country-list/data_csv/data/d7c9d7cfb42cb69f4422dec222dbbaa8/data_csv.csv")
#abrevations <- read.csv("wikipedia-iso-country-codes.csv")
# shortens abreviations to only include names and 2 char codes
abrevations <- abrevations %>%
select(region = Name, Code)
abrevations$region <- str_replace(abrevations$region, "United States", "USA")
abrevations$region <- str_replace(abrevations$region, "Libyan Arab Jamahiriya", "Libya")
abrevations$region <- str_replace(abrevations$region, "Côte d'Ivoire", "Ivory Coast")
abrevations$region <- str_replace(abrevations$region, "Tanzania, United Republic of", "Tanzania")
abrevations$region <- str_replace(abrevations$region, "Republic of Democratic Republic of the Congo", "Democratic Republic of the Congo")
abrevations$region <- str_replace(abrevations$region, "Congo", "Republic of Congo")
abrevations$region <- str_replace(abrevations$region, "Republic of Republic of Republic of Congo", "Republic of Congo")
abrevations$region <- str_replace(abrevations$region, "South Sudan", "Sudan")
abrevations$region <- str_replace(abrevations$region, "Syrian Arab Republic", "Syria")
abrevations$region <- str_replace(abrevations$region, "Korea, Democratic People's Republic of", "North Korea")
abrevations$region <- str_replace(abrevations$region, "Korea, Republic of (South Korea)", "South Korea")
abrevations$region <- str_replace(abrevations$region, "Lao People's Democratic Republic", "Laos")
abrevations$region <- str_replace(abrevations$region, "United Kingdom", "UK")
abrevations$region <- str_replace(abrevations$region, "Moldova, Republic of", "Moldova")
abrevations$region <- str_replace(abrevations$region, "Macedonia, the former Yugoslav Republic of", "North Macedonia")
# makes a list off all the countries where the song was popular this year
get_song_streams <- function(song_name) {
song_streams <- spotify_2022 %>%
select(name, date, country, streams) %>%
filter(name == song_name) %>%
filter(country != "global") %>%
group_by(country) %>%
summarize(streams = sum(streams)) %>%
rename(Code = country)
song_streams$Code <- toupper(song_streams$Code) #capatalizes country codes
# data frame that join's abrevations with the modified countries that listened to a song
# if a country listened to Peopas they get a 1, if not they get a 0
abrevs <- left_join(abrevations, song_streams, by = "Code") %>%
replace(is.na(.), 0)
# fixes some of the names of the countries in abrevs so they match the countries in world_map
# dataframe that will go into the map
top_country.map <- left_join(world_map, abrevs, by = "region")
return(top_country.map)
}
# gets rid of grid lines
blank_theme <- theme_bw() +
theme(
axis.line = element_blank(), # remove axis lines
axis.text = element_blank(), # remove axis labels
axis.ticks = element_blank(), # remove axis ticks
axis.title = element_blank(), # remove axis titles
plot.background = element_blank(), # remove gray background
panel.grid.major = element_blank(), # remove major grid lines
panel.grid.minor = element_blank(), # remove minor grid lines
panel.border = element_blank() # remove border around plot
)
plot_song_map <- function(song_name){
# map of the world. Yellow countries listened to Blenk Space, blue countries did not
# grey countries means we have no data
plot<- ggplot(get_song_streams(song_name), aes(map_id = region, fill = streams))+
geom_map(map = get_song_streams(song_name), color = "white")+
expand_limits(x = get_song_streams(song_name)$long,
y = get_song_streams(song_name)$lat)+
ggtitle(paste("How popular was the song", song_name, "in each country?")) +
scale_fill_continuous(type = "viridis", labels = comma) +
labs(fill = "Streams") +
blank_theme
return(plot)
}
leaflet(plot_song_map("Blank Space"))
plot_song_map("Blank Space")
plot_song_map("I’m Tired (with Zendaya) - Bonus Track")
plot_song_map("Yonaguni")
I wonder how can I make this map interactive? So I can upload it to Shiny later on. I tried to use leaflet but it returns a blank.
Thank you so much in advance! Any comments or suggestions will help!
Related
I am trying to create a map using the Gapminder package in R.
I have a basic map working, using the choroplethr package. But creating the map requires some data manipulation and the map and legend aren't particularly pretty looking. For example:
library(gapminder)
library(dplyr)
library(choroplethr)
# load data
data(gapminder, package = "gapminder")
# set up data for plot
plotdata <- gapminder %>%
filter(year == 2007) %>%
rename(
region = country,
value = lifeExp
) %>%
mutate(region = tolower(region)) %>%
mutate(region = recode(region,
"united states" = "united states of america",
"congo, dem. rep." = "democratic republic of the congo",
"congo, rep." = "republic of congo",
"korea, dem. rep." = "south korea",
"korea. rep." = "north korea",
"tanzania" = "united republic of tanzania",
"serbia" = "republic of serbia",
"slovak republic" = "slovakia",
"yemen, rep." = "yemen"
))
# make plot
country_choropleth(plotdata, legend = "life expectancy")
I was wondering if there are any other packages that could allow me to plot a map of the life expectancy using the Gapminder data from the Gapminder package?
Preferably, I would like to keep the data manipulation as minimal as possible (I need to demonstrate it to teenagers)... although I realise this may not be possible. Is there an easier way to plot this data on a map?
Any suggestions would be greatly appreciated.
I am wanting to create a mask to color all parts of the image that are NOT within the specified land areas. I can get this to work successfully for two countries (e.g Burkina Faso and Nigeria). However, the same code does not work when I try a longer list of 9 countries in the same area. The error I am receiving is :
Error in [[<-.data.frame(tmp, attr(x, "sf_column"), value = list( : replacement has 2 rows, data has 1
Being relatively new to GIS, I am not quite sure what the issue is or how to resolve it. I would really appreciate some help if people could offer some insight. The method I used was based on the accepted answer from this question
Relevant code is below. Thankyou in advance for anyone who is able to help.
This code works perfectly fine
library(sf)
library(dplyr)
library(tidyverse)
library(rnaturalearth)
library(rnaturalearthdata)
# https://stackoverflow.com/questions/49266736/clip-spatial-polygon-by-world-map-in-r
x_coord = c(-20,-20,27,27)
y_coord = c(-1,28,28,-1)
# Create rectalgular polygon for a mask (slightly bigger than the image)
polygon <- cbind(x_coord, y_coord) %>%
st_linestring() %>%
st_cast("POLYGON") %>%
st_sfc(crs = 4326, check_ring_dir = TRUE) %>%
st_sf() %>%
st_wrap_dateline()
land = ne_countries(scale = "medium",
returnclass = "sf") %>%
filter(admin %in% c("Nigeria", "Burkina Faso"))
#filter(admin %in% c("Niger", "Burkina Faso", "Nigeria", "Mali", "Chad", "Mauritania",
# "Gambia", "Senegal", "Guinea", "Cameroon"))
# Get the difference between the bounding box and the area I want to plot
# This works fine when using only two countries
land = st_union(land)
polygon_land_diff = st_difference(polygon, land)
# Plot the data
plot(st_geometry(land))
plot(st_geometry(polygon), add=TRUE)
plot(st_geometry(polygon_land_diff),add=TRUE, col="green")
This code produces an error and I am not sure how to resolve it. The only difference is the number of countries selected
x_coord = c(-20,-20,27,27)
y_coord = c(-1,28,28,-1)
# Create rectalgular polygon for a mask (slightly bigger than the image)
polygon <- cbind(x_coord, y_coord) %>%
st_linestring() %>%
st_cast("POLYGON") %>%
st_sfc(crs = 4326, check_ring_dir = TRUE) %>%
st_sf() %>%
st_wrap_dateline()
land = ne_countries(scale = "medium",
returnclass = "sf") %>%
filter(admin %in% c("Niger", "Burkina Faso", "Nigeria", "Mali", "Chad", "Mauritania",
"Gambia", "Senegal", "Guinea", "Cameroon"))
# get the difference between the bounding box and the area I want to plot
# not quite working. The rror occurs when running `polygon_land_diff = st_difference(polygon, land)`
land = st_union(land)
polygon_land_diff = st_difference(polygon, land)
# Plot the data
plot(st_geometry(land))
plot(st_geometry(polygon), add=TRUE)
plot(st_geometry(polygon_land_diff),add=TRUE, col="green")
st_difference() is generic function and I guess it faild to choice appropriate method at your latter case. (I didn't read source code, this is an estimate.)
Maybe it choice not sfc but sf method. (because Polygon is class sf)
you can get desired output by making polygon sfc class or using sf:::st_difference.sfc directly.
Below is an example;
x_coord = c(-20,-20,27,27)
y_coord = c(-1,28,28,-1)
polygon <- cbind(x_coord, y_coord) %>%
st_linestring() %>%
st_cast("POLYGON") %>%
st_sfc(crs = 4326, check_ring_dir = TRUE)# %>% # keep sfc class
# st_sf() %>%
# st_wrap_dateline()
land = ne_countries(scale = "medium",
returnclass = "sf") %>%
filter(admin %in% c("Niger", "Burkina Faso", "Nigeria", "Mali", "Chad", "Mauritania",
"Gambia", "Senegal", "Guinea", "Cameroon"))
land = st_union(land)
polygon_land_diff = st_difference(polygon, land)
# Plot the data
plot(st_geometry(land))
plot(st_geometry(polygon), add=TRUE)
plot(st_geometry(polygon_land_diff),add=TRUE, col="green")
I ran 2 aggregate commands:
Outright_owners = aggregate(csv_data$Owned...outright~csv_data$State, csv_data, FUN = sum)
and
Mortgage_owners = aggregate(csv_data$Owned...with.a.mortgage~csv_data$State, csv_data, FUN = sum)
This is the output of Outright_owners aggregate:
csv_data$State
csv_data$Owned...outright
New South Wales
819828
Northern Territory
9297
Queensland
448627
South Australia
202917
Tasmania
69784
Victoria
665363
Western Australia
234608
This is the output of Mortgage_owners aggregate:
csv_data$State
csv_data$Owned...with.a.mortgage
New South Wales
824168
Northern Territory
18497
Queensland
533879
South Australia
218264
Tasmania
65921
Victoria
697520
Western Australia
300355
I want code that would create a stacked bar-plot that would look something like this
I haven't tried anything specific yet, so I don't have any minimum reproducible code. I'm not sure where even to begin.
Thanks in advance :)
Update:
Something like this:
To show the data better I would log the y scale:
join the data with left_join
bring it to long form with pivot_longer
apply geom_col
use log y.
With ggplot2
library(tidyverse)
left_join(df1, df2, by="State") %>%
pivot_longer(
cols = -State
) %>%
ggplot(aes(x = State, y=log(value), fill=name, label=value))+
geom_col() +
geom_text(size = 5, position = position_stack(vjust = 0.9))
With base R before base R, I used tidyr package to bring it to wide format.
df1_w <- df1 %>%
pivot_wider(
names_from = State,
values_from = mortgage
)
df2_w <- df2 %>%
pivot_wider(
names_from = State,
values_from = mortgage
)
Plot with base R
data <- as.matrix(data.frame(rbind(df1_w, df2_w)))
rownames(data) <- c("outright", "mortgage")
barplot(data,
col = c("green", "red"))
legend("topright",
legend = c("outright", "mortgage"),
fill = c("green", "red"))
data:
df1 <- structure(list(State = c("New South Wales", "Northern Territory",
"Queensland", "South Australia", "Tasmania", "Victoria", "Western Australia"
), outright = c(819828L, 9297L, 448627L, 202917L, 69784L, 665363L,
234608L)), class = "data.frame", row.names = c(NA, -7L))
df2 <- structure(list(State = c("New South Wales", "Northern Territory",
"Queensland", "South Australia", "Tasmania", "Victoria", "Western Australia"
), mortgage = c(824168L, 18497L, 533879L, 218264L, 65921L, 697520L,
300355L)), class = "data.frame", row.names = c(NA, -7L))
I am new in R and creating a function that highlight list of countries from the data set in the plot.
functionality issue If country names are not passed as arguments (which can vary) then it should be able to take from default list of countries.
I understand ... is used for variable arguments and then may be I can use list(...) but I am not able to put this together with default values.
Is there a way I can write: country_highlight_plot(Australia, Singapore, Norway)
and if I don't mention any country then it takes default countries.
Below is the code (using gapminder data to reproduce):
library(tidyverse)
library(gghighlight)
library(scales)
library(gapminder)
country_highlight_plot <- function(df = gapminder, y_var = gdpPercap,
background_line_color = "grey",
countries = default_list
){
# default list of highlight countries
default_list = c("India","Singapore","Malaysia","Norway",
"Denmark","United States","United Kingdom","China")
# quoting y-axis variable
y_var = enquo(y_var)
# Data Prep.
df %>%
mutate(highlight_type = case_when(country %in% countries ~ "Yes",
TRUE ~ "No")) %>%
# Plotting
ggplot() +
geom_line(aes(x = year, y = round(!!y_var,2), col = country), size = 1.1) +
gghighlight(highlight_type == "Yes",
unhighlighted_params = list(size = 1, colour = alpha(background_line_color, 0.4))) +
# facet_wrap(~continent) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90)
) +
labs(title = "GDP/Cap for world countries across time",
subtitle = "created by ViSa",
caption = "Data Source: Gapminder",
y = "Total Tax Revenue % of GDP"
)
}
country_highlight_plot()
# EDITED below line to gapminder df only
# country_highlight_plot(df=gapminder, y_var=gdpPercap, background_line_color= "pink")
I think setting a sane default value and checking for it makes sense to me.
I tend to differentiate between NULL and NA, where NULL means something like "use nothing or everything" and NA means "use a sane default".
Untested:
country_highlight_plot <- function(df = gapminder, y_var = gdpPercap,
background_line_color = "grey",
countries = NA) {
if (is.null(countries)) {
countries <- sort(unique(df[["country"]])) # assuming 'country' is in there
} else if (anyNA(countries)) {
countries <- c("India", "Singapore", "Malaysia", "Norway",
"Denmark", "United States", "United Kingdom", "China")
countries <- intersect(countries, unique(df[["country"]]))
}
# ...
}
This allows one to use country_highlight_plot(..., countries=NULL) for all countries in df, and country_highlight_plot(..., countries=NA) for your default list of countries. The intersect call ensures that if this function is called as part of a filtered-gapminder dataset, it won't look for countries that are not present. (Based on your current %in% usage, this might not be strictly necessary ... but if you use countries for anything else, it might still be useful. Defensive programming.)
Tangentially: if you're writing a function as part of a package, then I suggest you should import (mandate) the gapminder package, and then use a similar technique for using that dataset:
country_highlight_plot <- function(df, y_var = gdpPercap,
background_line_color = "grey",
countries = NA) {
if (missing(df)) {
df <- get("gapminder", envir = asNamespace("gapminder"))
}
if (is.null(countries)) {
countries <- sort(unique(df[["country"]])) # assuming 'country' is in there
} else if (anyNA(countries)) {
countries <- c("India", "Singapore", "Malaysia", "Norway",
"Denmark", "United States", "United Kingdom", "China")
}
# ...
}
With this, you can do
# default data, your default_list
country_highlight_plot()
# default data, all countries
country_highlight_plot(countries = NULL)
# pre-defined data, all default_list countries found
dplyr::filter(gapminder, ...) %>%
country_highlight_plot(.)
# pre-defined data, all countries found
dplyr::filter(gapminder, ...) %>%
country_highlight_plot(., countries = NULL)
# default data, manual countries
country_highlight_plot(countries = c("a","B"))
I would like to use R to generate a very basic world map with a specific set of countries filled with a red colour to indicate that they are malaria endemic countries.
I have a list of these countries in a data frame but am struggling to overlay them on a world map.
I have tried using the wrld_simpl object and also the joinCountryData2Map method in the rworldmap package.
I would comment on this answer to prevent addition of a possibly redundant question but I do not have enough reputation at the moment, apologies for this.
https://stackoverflow.com/a/9102797/1470099
I am having difficulty understanding the arguments given to the plot() command - I wondered if there was just an easy way to tell R to plot all of the country NAMEs in my list on the wrld_simpl map instead of using grepl() etc. etc.
plot(wrld_simpl,
col = c(gray(.80), "red")[grepl("^U", wrld_simpl#data$NAME) + 1])
Using the rworldmap package, you could use the following:
library(rworldmap)
theCountries <- c("DEU", "COD", "BFA")
# These are the ISO3 names of the countries you'd like to plot in red
malDF <- data.frame(country = c("DEU", "COD", "BFA"),
malaria = c(1, 1, 1))
# malDF is a data.frame with the ISO3 country names plus a variable to
# merge to the map data
malMap <- joinCountryData2Map(malDF, joinCode = "ISO3",
nameJoinColumn = "country")
# This will join your malDF data.frame to the country map data
mapCountryData(malMap, nameColumnToPlot="malaria", catMethod = "categorical",
missingCountryCol = gray(.8))
# And this will plot it, with the trick that the color palette's first
# color is red
EDIT: Add other colors and include picture
## Create multiple color codes, with Burkina Faso in its own group
malDF <- data.frame(country = c("DEU", "COD", "BFA"),
malaria = c(1, 1, 2))
## Re-merge
malMap <- joinCountryData2Map(malDF, joinCode = "ISO3",
nameJoinColumn = "country")
## Specify the colourPalette argument
mapCountryData(malMap, nameColumnToPlot="malaria", catMethod = "categorical",
missingCountryCol = gray(.8), colourPalette = c("red", "blue"))
Try using googleVis package and use gvisGeoMap Functions
e.g.
G1 <- gvisGeoMap(Exports,locationvar='Country',numvar='Profit',options=list(dataMode='regions'))
plot(G1)
library(maptools)
data(wrld_simpl)
myCountries = wrld_simpl#data$NAME %in% c("Australia", "United Kingdom", "Germany", "United States", "Sweden", "Netherlands", "New Zealand")
plot(wrld_simpl, col = c(gray(.80), "red")[myCountries+1])