Order by name if repeat - r

I would like to sort the bars in descending order by value and if the value is repeated the name of the city must appear in alphabetical order
library(plotly)
city <- c("Paris", "New York", "Rio", "Salvador", "Curitiba", "Natal")
value <- c(10,20,30,10,10,10)
data <- data.frame(city, value, stringsAsFactors = FALSE)
data$city <- factor(data$city, levels = unique(data$city)[order(data$value, decreasing = FALSE)])
fig <- plot_ly(y = data$city, x = data$value, type = "bar", orientation = 'h')

Can be achieved using order function on dataframe. Applies order on value column, (-) sign indicates decreasing, and then on city name
data_ordered <- data[order(-data$value, data$city),]
data_ordered
city value
3 Rio 30
2 New York 20
5 Curitiba 10
6 Natal 10
1 Paris 10
4 Salvador 10
data_ordered$city <- factor(data_ordered$city, levels = data_ordered$city)
plot_ly(y = data_ordered$city, x = data_ordered$value, type = "bar", orientation = 'h') %>%
layout(yaxis = list(autorange = "reversed"))

Using tidyverse, i suggest that :
library(tidyverse)
city <- c("Paris", "New York", "Rio", "Salvador", "Curitiba", "Natal")
value <- c(10,20,30,10,10,10)
data <- data.frame(city, value)
db <- as_tibble(data)
db %>%
ggplot(aes(x = reorder(city, -value), y=value))+
geom_col()
The "reorder" function in the definition of "x" make what you want, and the alphabetical order is respected.
To make this graph vertically, add coord_flip in the end.
The "-value" can be switch to "value" if you want reorder
library(tidyverse)
city <- c("Paris", "New York", "Rio", "Salvador", "Curitiba", "Natal", "Zoo", "Aaa")
value <- c(10,20,30,10,10,10,10,10)
data <- data.frame(city, value)
db <- as_tibble(data)
db %>%
ggplot(aes(x = reorder(city, value), y=value))+
geom_col() +
coord_flip()

Related

Create an interactive world map using R and shiny

I have a dataset, more especifically the Unicorn Company data set. I want to create an interactive choropleth that has countries with higher mean valuation would have darker color, that when user clicks on the country it would display the name + valuation of that country.
output$map_plot <- renderPlotly({
# Get the average valuation for each country
industry_investors_data <- unicorn_countries_clustering_cleaned %>%
group_by(Country) %>%
summarize(Valuation = mean(Valuation...B.))
world_map_data <- map_data("world2")
#print(sort(unique(ggplot2::map_data("world")$region)))
# Merge the map data with your data and fill in missing values
world_map_valuation <- world_map_data %>%
right_join(industry_investors_data, by = c("region" = "Country")) %>%
mutate(Valuation = coalesce(Valuation, 0.0))
plot_ly(data = world_map_valuation,
locations = ~region,
z = ~Valuation,
type = "choropleth",
locationmode = "country names",
color = ~Valuation,
colors = "Blues",
title = "Map of the world by country valuation",
showlegend = FALSE)
})
This shows a map on the worldly valuation however it takes very long to render and is not interactive in any way. Before I had left_join instead of right join the result was the same.

Colour specific countries on a worldmap and adding mappoint cities

I want to design a worldmap to show from which country and which city the participants to my survey come from. I used the highcharter package.
First part is : colour the countries --> it worked well ! A scale is created from 0 to 1.
Second part is : adding the cities --> the points are created but the countries colored in blue disappeared ! The scale has changed and is now induced from cities.
I try to change the order of my code but nothing is working.
library(dplyr)
library(maps)
library(magrittr)
# I use the dataset called iso3166 from the {maps} package and rename it date
dat <- iso3166
head(dat)
# I rename the variable a3 by iso-a3
dat <- rename(dat, "iso-a3" = a3)
head(dat)
# I create a vector with the countries I want to colour
part1X_countries <- c("CHE", "FRA", "USA", "GBR", "CAN", "BRA")
dat$part1X <- ifelse(dat$`iso-a3` %in% part1X_countries, 1, 0)
head(dat)
# I add the name of cities with geographical coordinates
cities <- data.frame(
name = c("St Gallen", "Fort Lauderdale", "Paris", "Nottingham", "Winnipeg", "Chicago", "Leeds", "Montréal", "New Rochelle", "São Paulo", "Saint-Genis-Pouilly", "Canterbury"),
lat = c(47.42391, 26.122438, 48.866667, 52.950001, 49.8955, 41.881832, 53.801277, 45.5016889, 40.9232, -23.5489, 46.24356, 51.279999),
lon = c(9.37477, -80.137314, 2.333333, -1.150000, -97.1383, -87.623177, -1.548567, -73.567256, -73.7793, -46.6388, 6.02119, 1.080000))
# I create my worldmap with countries and cities
worldmap <- hcmap(
map = "custom/world-highres3", # high resolution world map
data = dat, # name of dataset
value = "part1X",
joinBy = "iso-a3",
showInLegend = FALSE, # hide legend
download_map_data = TRUE
) %>%
hc_add_series(
data = cities,
type = "mappoint",
name = "Cities"
) %>%
hc_title(text = "Representation of participants by country")```
You need to define a colorkey and add a color axis for the hcmap. The below code keeps the colors from the countries and has the name of the countries added on top as black map points.
worldmap <- hcmap(
map = "custom/world-highres3", # high resolution world map
data = dat, # name of dataset
value = "part1X",
joinBy = "iso-a3",
colorKey = "value",
showInLegend = F, # hide legend
download_map_data = TRUE) %>%
hc_colorAxis(min = min(dat$part1X),
max = max(dat$part1X)) %>%
hc_add_series(
data = cities,
type = "mappoint",
name = "Cities",
dataLabels = list(enabled = TRUE, format = '{point.name}'),
latField = "lat",
longField = "lon",
# color = "color"
valueField = "part1X"
) %>%
hc_title(text = "Representation of participants by country")
worldmap

Unable to find the distance between the centroid of a census tract and school location coordinates

library(tidyverse)
library(tidycensus)
library(sf)
library(sp)
#install.packages('geosphere')
library('geosphere')
library(rgeos)
library(sfheaders)
#install.packages('reshape')
library('reshape')
#> Linking to GEOS 3.6.1, GDAL 2.1.3, PROJ 4.9.3
census_tract <- get_acs(geography = "tract",
variables = "B19013_001",
state = "CA",
county = c("San Joaquin","Merced","stanislaus"),
geometry = TRUE,
year = 2020)
plot(st_geometry(census_tract), axes = T)
plot(st_centroid(st_geometry(census_tract)), pch = "+", col = "red", add = T)
library(ggplot2)
ggplot(census_tract) + geom_sf() +
geom_sf(aes(geometry = st_centroid(st_geometry(census_tract))), colour = "red")
census_tract$centroid <- st_centroid(st_geometry(census_tract))
schoolloc <- read.csv("C:/Users/rlnu/Desktop/EXAMPLE/pubschls.csv")
schoolloc <- schoolloc%>% filter(County == c("San Joaquin","Merced","Stanislaus"))
census_tract <- census_tract %>%
mutate(long = unlist(map(census_tract$centroid,1)),
lat = unlist(map(census_tract$centroid,2)))
shortest_distance$min_distance <- expand.grid.df(census_tract,schoolloc) %>%
mutate(distance = distHaversine(p1 = cbind(long,lat),
p2 = cbind(Longitude,Latitude))
`
I am trying to find distance between the each census tract's centroid to three nearest schools. please help me out with it. I have written some code . The logic is wrong and the code is not working
Can achieve this using the sf package.
I could not access you schools data so made a dummy set of 4 schools.
library(sf)
schools <- data.frame(School_Name=c("School_1", "School_2", "School_3", "School_4"), Lat=c(37.83405, 38.10867, 37.97743, 37.51615), Long=c(-121.2810, -121.2312, -121.2575, -120.8772)) %>% st_as_sf(coords=c("Long", "Lat"), crs=4326)
Convert tracts to centroids and make the crs the same as the school set then calculate the distance matrix
census_centroid <- st_centroid(census_tract) %>% st_transform(4326)
DISTS<- st_distance(census_centroid, schools)
Rename the columns to be the school IDs
colnames(DISTS) <- schools$School_Name
link it back to centoids
cent_dists <- cbind(census_centroid, DISTS) %>% #bind ditances to centroids
pivot_longer(cols = -names(census_centroid), names_to = "School Name", values_to = "Distance") %>% #make long for ordering
group_by(NAME) %>% #group by centroid
slice_min(Distance,n= 3) %>% # take three closest
mutate(Near_No=paste0("Near_School_",rep(1:3))) #School distance ranking
Make wide if one row per census centroid desired, might want to play with column order though
cent_dists_wide <- cent_dists %>%
pivot_wider(names_from = c("Near_No"), values_from = c("Distance", "School Name"), names_sort = FALSE) #make wid if wyou want one row per centoid

Order legend in a R Plotly Bubblemap following factor order

I'm working on a Bubble map where I generated two columns, one for a color id (column Color) and one for a text refering to the id (column Class). This is a classification of my individuals (Color always belongs to Class).
Class is a factor following a certain order that I made with :
COME1039$Class <- as.factor(COME1039$Class, levels = c('moins de 100 000 F.CFP',
'entre 100 000 et 5 millions F.CFP',
'entre 5 millions et 1 milliard F.CFP',
'entre 1 milliard et 20 milliards F.CFP',
'plus de 20 milliards F.CFP'))
This is my code
g <- list(
scope = 'world',
visible = F,
showland = TRUE,
landcolor = toRGB("#EAECEE"),
showcountries = T,
countrycolor = toRGB("#D6DBDF"),
showocean = T,
oceancolor = toRGB("#808B96")
)
COM.g1 <- plot_geo(data = COME1039,
sizes = c(1, 700))
COM.g1 <- COM.g1 %>% add_markers(
x = ~LONGITUDE,
y = ~LATITUDE,
name = ~Class,
size = ~`Poids Imports`,
color = ~Color,
colors=c(ispfPalette[c(1,2,3,7,6)]),
text=sprintf("<b>%s</b> <br>Poids imports: %s tonnes<br>Valeur imports: %s millions de F.CFP",
COME1039$NomISO,
formatC(COME1039$`Poids Imports`/1000,
small.interval = ",",
digits = 1,
big.mark = " ",
decimal.mark = ",",
format = "f"),
formatC(COME1039$`Valeur Imports`/1000000,
small.interval = ",",
digits = 1,
big.mark = " ",
decimal.mark = ",",
format = "f")),
hovertemplate = "%{text}<extra></extra>"
)
COM.g1 <- COM.g1%>% layout(geo=g)
COM.g1 <- COM.g1%>% layout(dragmode=F)
COM.g1 <- COM.g1 %>% layout(showlegend=T)
COM.g1 <- COM.g1 %>% layout(legend = list(title=list(text='Valeurs des importations<br>'),
orientation = "h",
itemsizing='constant',
x=0,
y=0)) %>% hide_colorbar()
COM.g1
Unfortunately my data are too big to be added here, but this is the output I get :
As you can see, the order of the legend is not the one of the factor levels. How to get it ? If data are mandatory to help you to give me a hint, I will try to limit their size.
Many thanks !
Plotly is going to alphabetize your legend and you have to 'make' it listen. The order of the traces in your plot is the order in which the items appear in your legend. So if you rearrange the traces in the object, you'll rearrange the legend.
I don't have your data, so I used some data from rnaturalearth.
First I created a plot, using plot_geo. Then I used plotly_build() to make sure I had the trace order in the Plotly object. I used lapply to investigate the current order of the traces. Then I created a new order, rearranged the traces, and plotted it again.
The initial plot and build.
library(tidyverse)
library(plotly)
library(rnaturalearth)
canada <- ne_states(country = "Canada", returnclass = "SF")
x = plot_geo(canada, sizes = c(1, 700)) %>%
add_markers(x = ~longitude, y = ~latitude,
name = ~name, color = ~name)
x <- plotly_build(x) # capture all elements of the object
Now for the investigation; this is more so you can see how this all comes together.
# what order are they in?
y = vector()
invisible(
lapply(1:length(x$x$data),
function(i) {
z <- x$x$data[[i]]$name
message(i, " ", z)
})
)
# 1 Alberta
# 2 British Columbia
# 3 Manitoba
# 4 New Brunswick
# 5 Newfoundland and Labrador
# 6 Northwest Territories
# 7 Nova Scotia
# 8 Nunavut
# 9 Ontario
# 10 Prince Edward Island
# 11 Québec
# 12 Saskatchewan
# 13 Yukon
In your question, you show that you made the legend element a factor. That's what I've done as well with this data.
can2 = canada %>%
mutate(name = ordered(name,
levels = c("Manitoba", "New Brunswick",
"Newfoundland and Labrador",
"Northwest Territories",
"Alberta", "British Columbia",
"Nova Scotia", "Nunavut",
"Ontario", "Prince Edward Island",
"Québec", "Saskatchewan", "Yukon")))
I used the data to reorder the traces in my Plotly object. This creates a vector. It starts with the levels and their row number or order (1:13). Then I alphabetized the data by the levels (so it matches the current order in the Plotly object).
The output of this set of function calls is a vector of numbers (i.e., 5, 6, 1, etc.). Since I have 13 names, I have 1:13. You could always make it dynamic, as well 1:length(levels(can2$name).
# capture order
df1 = data.frame(who = levels(can2$name), ord = 1:13) %>%
arrange(who) %>% select(ord) %>% unlist()
Now all that's left is to rearrange the object traces and visualize it.
x$x$data = x$x$data[order(c(df1))] # reorder the traces
x # visualize
Originally:
With reordered traces:

How to draw bar chart in R, based on Levels?

I will put my data first, to better understand the question:
amount city agent address
1 Madras Vinod 45/BA
2 Kalkta Bola 56/AS
3 Mumbai Pavan 44/AA
4 Tasha Barez 58/SD
5 Tasha Khan 22/AW
6 Madras Baaz 56/QE
7 Mumbai Neer 99/CC
8 Mumbai Bazan 97/DF
I am learning R. In a scenario, I want to calculate the total numbers of amount in a specific city and then draw a bar chart for that, showing all cities. Considering the data above, I want something like this:
amount city
7 Madras
2 Kalkta
18 Mumbai
9 Tasha
After some searching I found that aggregate function can help, but I faced a problem that says the length is not the same.
Would you please tell me, how can I achieve this?
base R
res <- do.call(rbind,
by(dat, dat$city, FUN = function(z) data.frame(city = z$city[1], amount = sum(z$amount)))
)
barplot(res$amount, names.arg=res$city)
tidyverse
library(dplyr)
res <- dat %>%
group_by(city) %>%
summarize(amount = sum(amount))
barplot(res$amount, names.arg=res$city)
Data
dat <- structure(list(amount = 1:8, city = c("Madras", "Kalkta", "Mumbai", "Tasha", "Tasha", "Madras", "Mumbai", "Mumbai"), agent = c("Vinod", "Bola", "Pavan", "Barez", "Khan", "Baaz", "Neer", "Bazan"), address = c("45/BA", "56/AS", "44/AA", "58/SD", "22/AW", "56/QE", "99/CC", "97/DF")), class = "data.frame", row.names = c(NA, -8L))
Another way to do it using the tidyverse
amount <- c(1,2,3,4,5,6,7,8)
city <- c("Madras", "Kalkta", "Mumbai", "Tasha", "Tasha", "Madras", "Mumbai",
"Mumbai")
df <- tibble(amount = amount, city = city)
df %>%
group_by(city) %>%
summarise(amount = sum(amount, na.rm = T)) %>%
ggplot(aes(x = city, y = amount)) +
geom_col() +
geom_label(aes(label = amount)) +
theme_bw()

Resources