I am creating an interactive graph using ggplot2 and plotly in R, the code is below.
I want to reorder the barchart column values so that they are sorted in descending order,
currently they are sorted alphabetically.
Edit: I might not have made what I wanted clear. Currently, the midfielder with the most points is Salah, but the top row in my midfielder column is currently Alli. I would like to sort the column so that the values are in descending order of points rather than alphabetical.
Would someone please inform me how I can do this?
I have saved the finished graph & csv file at the below locations:
IG: https://ianfm94.github.io/Premier_League_Stats/Top_100_Fantasy_PL_Pointscorers.html
CSV File: https://github.com/Ianfm94/Premier_League_Stats/blob/master/CSV_Files/2020-06-01_updated_fpl_stats.csv
rm(list=ls())
# Required packages, you might need to install these
library(ggplot2)
library(dplyr)
library(plotly)
library(tibble)
## Fantasy_PL Data
fpl_data = read.csv('2020-06-01_updated_fpl_stats.csv',
header = T, fileEncoding = "UTF-8-BOM")
attach(fpl_data)
#View(fpl_data)
# Interactive Plot Workings
top_100_points = total_points[0:100]
top_100_player_pos = factor(player_pos)[0:100]
top_100_surnames = factor(web_name)[0:100]
top_100_team = factor(team_name)[0:100]
color_table = tibble(
Team_Name = c("Arsenal", "Aston Villa", "Bournemouth", "Brighton & Hove Albion",
"Burnley", "Chelsea", "Crystal Palace", "Everton",
"Leicester City", "Liverpool", "Manchester City",
"Manchester United", "Newcastle United", "Norwich City",
"Sheffield United", "Southampton", "Tottenham Hotspurs",
"Watford", "West Ham United", "Wolverhampton Wanderers"),
Team_Color = c("#EF0107", "#670E36", "#B50E12", "#0057B8",
"#6C1D45", "#034694", "#1B458F", "#003399",
"#003090", "#C8102E", "#6CABDD", "#DA291C",
"#241F20", "#FFF200", "#EE2737", "#D71920",
"#132257", "#FBEE23", "#7A263A", "#FDB913")
)
position_table = tibble(
Position_Name = c("Goalkeeper", "Defender", "Midfielder", "Striker"),
)
fpl_df = data.frame(y = top_100_points,
x = top_100_player_pos,
z = top_100_surnames,
w = top_100_team,
stringsAsFactors = F)
fpl_df$w = factor(fpl_df$w, levels = color_table$Team_Name)
fpl_df$x = factor(fpl_df$x, levels = position_table$Position_Name)
names(fpl_df)[names(fpl_df) == "x"] = "Position_Name"
names(fpl_df)[names(fpl_df) == "y"] = "Total_Points_by_Position"
names(fpl_df)[names(fpl_df) == "z"] = "Player_Surname"
names(fpl_df)[names(fpl_df) == "w"] = "Team_Name"
#View(fpl_df)
plot_fpl_1 = ggplot(fpl_df, aes(x = Position_Name,
y = Total_Points_by_Position,
z = Player_Surname,
fill = Team_Name)) +
geom_col() +
scale_fill_manual(values = color_table$Team_Color) +
labs(title = "Top 100 Fantasy PL Pointscorer by Position & Team",
y = "Total Points of Position",
x = "Player Positions",
fill = "Team Name") +
theme_bw() +
theme(plot.title = element_text(size = 14,
face = "bold",
color = "black"),
legend.title = element_text(color = "navy",
face = "bold",
size = 10))
plot_fpl_1 = ggplotly(plot_fpl_1)
plot_fpl_1
You can use forcats::fct_reorder to change the order of z. See below:
Libraries:
# Required packages, you might need to install these
library(ggplot2)
library(dplyr)
library(plotly)
library(tibble)
library(RCurl)
library(forcats)
Data:
## Fantasy_PL Data
csvurl <- getURL("https://raw.githubusercontent.com/Ianfm94/Premier_League_Stats/master/CSV_Files/2020-06-01_updated_fpl_stats.csv")
fpl_data <- read.csv(text = csvurl)
attach(fpl_data)
# Interactive Plot Workings
top_100_points = total_points[0:100]
top_100_player_pos = factor(player_pos)[0:100]
top_100_surnames = factor(web_name)[0:100]
top_100_team = factor(team_name)[0:100]
color_table = tibble(
Team_Name = c("Arsenal", "Aston Villa", "Bournemouth", "Brighton & Hove Albion",
"Burnley", "Chelsea", "Crystal Palace", "Everton",
"Leicester City", "Liverpool", "Manchester City",
"Manchester United", "Newcastle United", "Norwich City",
"Sheffield United", "Southampton", "Tottenham Hotspurs",
"Watford", "West Ham United", "Wolverhampton Wanderers"),
Team_Color = c("#EF0107", "#670E36", "#B50E12", "#0057B8",
"#6C1D45", "#034694", "#1B458F", "#003399",
"#003090", "#C8102E", "#6CABDD", "#DA291C",
"#241F20", "#FFF200", "#EE2737", "#D71920",
"#132257", "#FBEE23", "#7A263A", "#FDB913")
)
position_table = tibble(
Position_Name = c("Goalkeeper", "Defender", "Midfielder", "Striker"),
)
fpl_df = data.frame(y = top_100_points,
x = top_100_player_pos,
z = top_100_surnames,
w = top_100_team,
stringsAsFactors = F)
fpl_df$w = factor(fpl_df$w, levels = color_table$Team_Name)
fpl_df$x = factor(fpl_df$x, levels = position_table$Position_Name)
names(fpl_df)[names(fpl_df) == "x"] = "Position_Name"
names(fpl_df)[names(fpl_df) == "y"] = "Total_Points_by_Position"
names(fpl_df)[names(fpl_df) == "z"] = "Player_Surname"
names(fpl_df)[names(fpl_df) == "w"] = "Team_Name"
Plot:
plot_fpl_1 = ggplot(fpl_df, aes(x = Position_Name,
y = Total_Points_by_Position,
z = fct_reorder(Player_Surname, -Total_Points_by_Position),
fill = Team_Name)) +
geom_col() +
scale_fill_manual(values = color_table$Team_Color) +
labs(title = "Top 100 Fantasy PL Pointscorer by Position & Team",
y = "Total Points of Position",
x = "Player Positions",
fill = "Team Name") +
theme_bw() +
theme(plot.title = element_text(size = 14,
face = "bold",
color = "black"),
legend.title = element_text(color = "navy",
face = "bold",
size = 10))
plot_fpl_2 = ggplotly(plot_fpl_1)
plot_fpl_2
Related
I wrote this code for my R assignment, and every time I run it, it crashes. I am guessing there is an error that's making it terminate.
Anyone can tell me what it is?
library(shiny)
library(shinythemes) library(tidyverse) library(DT)
load("~/Desktop/Yasmine Nahdi - Fifa Analysis - FINAL/fifa.RData")
main_page <- tabPanel( title = "Player Analysis", titlePanel("Player Analysis"), sidebarLayout(
sidebarPanel(
title = "Inputs",
sliderInput("year", h4("Year"), sep = "",
min = 2017, max = 2021,
value = c(2018, 2020), step = 1),
numericInput("max_country", h4("Nationality/Country"),
min = 2, max = 12, step = 1, value = 6),
selectInput("short_name", h4("Player"),
choices = c("All players", as.character(unique(fifa$short_name))),
selected = c("All players"), multiple = TRUE),
selectInput("team_position", h4("Player positions"),
choices = c("All positions", as.character(unique(fifa$team_position)) ),
selected = c("All positions"), multiple = TRUE)
),
mainPanel(
tabsetPanel(
tabPanel(
title = "Plots",
fluidRow(column(6, plotOutput("playerage")),
column(6, plotOutput("weightvsmovement"))),
fluidRow(column(6, plotOutput("playerBMI")),
column(6, plotOutput("movementvsBMI"))),
fluidRow(column(6, plotOutput("nationalitybyposition")),
column(6, plotOutput("valuebyposition"))))
)
)
) )
about_page <- tabPanel( title = "About" )
ui <- navbarPage( theme = shinytheme('united'), title = "Fifa Career Mode Analysis", main_page, about_page )
server <- function(input, output){ data <- reactive({
if(input$short_name == "All players"){
fifa %>%
filter(year >= input$year[1],
year <= input$year[2])
} else {
fifa %>%
filter(year >= input$year[1],
year <= input$year[2],
Player %in% input$short_name)
} })
# (1) General Analysis : Player-focused
#distribution of players ages output$playerage <- renderPlot({
data() %>%
ggplot(aes(x = age)) + geom_histogram(binwidth = 0.1) +
geom_vline(aes(xintercept = mean(age)), linetype = "dashed", size = 1) +
theme_minimal() +
ggtitle("Distribution of Players' age") })
#player weight vs movement output$weightvsmovement <- renderPlot({
data() %>%
ggplot(aes(x= weight_kg, y=Movement, color=overall)) +
geom_point() +
ggtitle("Players' movement in relation to their weight(kg)") })
#distribution of player BMI output$playerBMI <- renderPlot({
data() %>%
mutate(BMI = weight_kg / (height_cm**2) *10000) %>%
ggplot(aes(x = BMI)) +
geom_histogram(aes(y = ..density..)) +
geom_density(color = "red") +
geom_vline(aes(xintercept = mean(BMI)), linetype = "dashed", size = 1) +
theme_minimal() +
ggtitle("Distribution of Players' BMI") +
labs(y= "Density", x = "BMI") })
#movement by BMI output$movementvsBMI <- renderPlot({
data() %>%
mutate(BMI = weight_kg / (height_cm**2) *10000) %>%
ggplot(aes(x = BMI, y= Movement)) +
geom_point() +
geom_smooth( se =F) +
ggtitle("Players' movement in relation to their BMI") })
#players preferred foot output$preferredfoot <- renderPlot({
data() %>%
ggplot(aes(x = preferred_foot)) +
geom_bar() +
theme_minimal() +
ggtitle("Player's preferred foot count") +
labs(x= "Preferred foot", y="Number of players") })
# Correlation between player position, age, and performance
attackers <- c("ST","CF","RW","LW","LF","RF", "LS") midfielders <- c("CM","CAM","CDM","LM","RM","LAM","RAM","LCM","RCM","RDM","LDM") defenders <- c("CB","LB","RB","RWB","LWB","RCB","LCB") other <- c("SUB", "RES")
filter_position <- reactive({
if(input$team_position == "All positions"){
fifa %>%
filter(year >= input$year[1],
year <= input$year[2])
} else {
fifa %>%
filter(year >= input$year[1],
year <= input$year[2],
team_position %in% input$team_position)
} })
output$positionageperf <- renderPlot({
fifa <- fifa %>%
mutate(Position = case_when(
team_position %in% attackers ~ "Attacker",
team_position %in% midfielders ~ "Midfielder",
team_position %in% defenders ~ "Defender",
team_position %in% other ~ "Other"))
data() %>%
filter (Position %in% c("Attacker", "Midfielder", "Defender")) %>%
ggplot(aes(x = age, y = overall, colour = Position )) + geom_point() +
theme_minimal() + geom_smooth( se = F) + ggtitle("Analysis of player's performance with age") + xlab("Player age") + ylab("Player overall performance") })
#Player analysis by position #Most common player nationalities based on position output$nationalitybyposition <- renderPlot({
filter_position() %>%
group_by(nationality) %>%
mutate(nat_number = n()) %>%
filter(nat_number > 10) %>%
ggplot(aes(y= reorder(nationality, nat_number))) + geom_bar() + theme_minimal() + ggtitle("Most common nationality of selected player position (by year/period))") + xlab("Number of players") + ylab("Country") })
#highest valued player by position output$valuebyposition <- renderPlot({
filter_position() %>%
arrange(desc(value_eur)) %>%
filter (value_eur > 34500000) %>%
ggplot(aes(x = short_name, y=value_eur)) +
geom_point() +
ggtitle("Highest valued player by position") +
xlab("Player name") +
ylab("Value (euros)") })
#(2) League analysis
premierLeague <- c("Arsenal", "Aston Villa", "Brighton & Hove Albion", "Burnley", "Chelsea", "Crystal Palace", "Everton", "Fulham", "Leeds United", "Leicester City", "Liverpool", "Manchester City", "Manchester United", "Newcastle United", "Sheffield United", "Southampton", "Tottenham Hotspur", "West Bromwich Albion", "West Ham United", "Wolverhampton Wanderers") bundesliga <- c("1. FC Köln", "1. FSV Mainz 05", "DSC Arminia Bielefeld", "Borussia Dortmund", "FC Augsburg", "FC Bayern München", "FC Schalke 04", "Eintracht Frankfurt", "Hertha BSC", "Bayer 04 Leverkusen", "Borussia Mönchengladbach", "RB Leipzig", "SC Freiburg", "TSG 1899 Hoffenheim", "1. FC Union Berlin", "VfB Stuttgart", "VfL Wolfsburg", "SV Werder Bremen") laliga <- c("Athletic Club de Bilbao", "Atlético Madrid", "CA Osasuna", "Cádiz CF", "Deportivo Alavés", "Elche CF", "FC Barcelona", "Getafe CF", "Granada CF", "Levante UD", "Real Valladolid CF", "RC Celta", "Real Betis", "Real Madrid", "Real Sociedad", "SD Eibar", "SD Huesca", "Sevilla FC", "Valencia CF", "Villarreal CF") seriea <- c("Atalanta", "Benevento", "Bologna", "Cagliari", "Crotone", "Fiorentina", "Genoa","Hellas Verona","Inter", "Spezia", "Lazio", "Milan", "Napoli", "Parma", "Juventus", "Roma","Sampdoria","Sassuolo","Torino", "Udinese") ligue1 <- c("Angers SCO", "AS Monaco", "AS Saint-Étienne", "FC Girondins de Bordeaux", "Dijon FCO", "FC Lorient", "FC Metz", "FC Nantes", "LOSC Lille", "Montpellier HSC", "Nîmes Olympique", "OGC Nice", "Olympique Lyonnais", "Olympique de Marseille", "Paris Saint-Germain", "Racing Club de Lens", "Stade Rennais FC", "Stade Brestois 29", "Stade de Reims", "RC Strasbourg Alsace")
FifaLeague <- fifa %>% mutate(League = case_when(
club_name %in% bundesliga ~ "Bundesliga",
club_name %in% premierLeague ~ "Premier League",
club_name %in% laliga ~ "La Liga",
club_name %in% seriea ~ "Serie A",
club_name %in% ligue1 ~ "Ligue 1" ))
FifaLeague <- na.omit(FifaLeague)
filter_league <- reactive({
FifaLeague %>%
filter(year >= input$year[1],
year <= input$year[2]) }) }
shinyApp(ui = ui, server = server)
This code is for an assignment I am doing to analyze the FIFA 2017 to 2021 players. the idea is to create a shiny app where the user can select inputs and the plots get updated.
Let me know if there is anything else I can do to explain further.
Thank you!
I have the following network graph:
library(tidyverse)
library(igraph)
set.seed(123)
n=15
data = data.frame(tibble(d = paste(1:n)))
relations = data.frame(tibble(
from = sample(data$d),
to = lead(from, default=from[1]),
))
data$name = c("new york", "chicago", "los angeles", "orlando", "houston", "seattle", "washington", "baltimore", "atlanta", "las vegas", "oakland", "phoenix", "kansas", "miami", "newark" )
graph = graph_from_data_frame(relations, directed=T, vertices = data)
V(graph)$color <- ifelse(data$d == relations$from[1], "red", "orange")
plot(graph, layout=layout.circle, edge.arrow.size = 0.2, main = "my_graph")
I was able to convert this graph into a "visnetwork" graph:
library(visNetwork)
visIgraph(graph)
Now, I am trying to put a title on this graph:
visIgraph(graph, main = "my title")
Although this doesn't work:
Error in layout_with_fr(graph, dim = dim, ...) :
unused argument (main = "my title")
I found this link https://datastorm-open.github.io/visNetwork/legend.html that shows how you can add titles to a "visnetwork" graph :
nodes <- data.frame(id = 1:3, group = c("B", "A", "B"))
edges <- data.frame(from = c(1,2), to = c(2,3))
# default, on group
visNetwork(nodes, edges,
main = "A really simple example",
submain = list(text = "Custom subtitle",
style = "font-family:Comic Sans MS;color:#ff0000;font-size:15px;text-align:center;"),
footer = "Fig.1 minimal example",
width = "100%")
This seems to be pretty straightforward, but it requires you to use the "visNetwork()" function instead of the "visIgraph()" function.
Is it possible to directly add titles using the "visIgraph()" function?
Thank you!
We can try this approach if you like
toVisNetworkData(graph) %>%
c(list(main = "my title")) %>%
do.call(visNetwork, .)
or
toVisNetworkData(graph) %>%
{
do.call(visNetwork, c(., list(main = "my title", submain = "subtitle")))
}
and you will see
I was not able to figure out how to do this with "visIgraph()" function - but I think I was able to figure out how to generate a random graph (meeting certain conditions: Generating Random Graphs According to Some Conditions) and using the regular "visNetwork()" function and then place a title on this graph:
n=15
data = data.frame(id = 1:n)
data$color = ifelse(data$id == 1, "Red", "Orange")
data$label = c("new york", "chicago", "los angeles", "orlando", "houston", "seattle", "washington", "baltimore", "atlanta", "las vegas", "oakland", "phoenix", "kansas", "miami", "newark" )
relations = data.frame(tibble(
from = sample(data$id),
to = lead(from, default=from[1]),
))
visNetwork(data, relations, main = "my graph") %>% visEdges(arrows = "to")
It feels great (somewhat) solving my own problem!
But is this possible directly with the "visNetwork()" function?
I have a dataset with the following structure:
structure(list(mes = c(7, 7, 7, 4, 4), ano = c(2021, 2021, 2021,
2021, 2021), nacionalidad = c("Venezuela", "Venezuela", "Venezuela",
"Venezuela", "Venezuela"), centro = c("Aeropuerto Eldorado",
"Aeropuerto Eldorado", "Aeropuerto Eldorado", "Aeropuerto Eldorado",
"Aeropuerto Eldorado"), puesto = c("Aeropuerto Eldorado de Bogotá",
"Aeropuerto Eldorado de Bogotá", "Aeropuerto Eldorado de Bogotá",
"Aeropuerto Eldorado de Bogotá", "Aeropuerto Eldorado de Bogotá"
), transporte = c("Air", "Air", "Air", "Air", "Air"), ciudad = c("Arauca",
"Bogotá", "Pereira", "Bogotá", "Bogotá"), flujo = c("Entries",
"Entries", "Entries", "Entries", "Entries"), motivo = c("Tourism",
"Tourism", "Tourism", "Transit", "Transit"), edad = c("0-17",
"0-17", "0-17", "18-29", "18-29"), colombiano = c("Extranjeros",
"Extranjeros", "Extranjeros", "Extranjeros", "Extranjeros"),
departamento = c("Arauca", "Bogota D.C.", "Risaralda", "Bogota D.C.",
"Bogota D.C."), region = c("América del Sur", "América del Sur",
"América del Sur", "América del Sur", "América del Sur"
), status = c("Permiso de Turismo", "Permiso de Turismo",
"Permiso de Turismo", "Permiso Otras Actividades", "Permiso Otras Actividades"
), departamento_2 = c("Bogotá", "Bogotá", "Bogotá", "Bogotá",
"Bogotá"), destino_procedencia = c("Emiratos Árabes", "Israel",
"Emiratos Árabes", "Panamá", "Panamá"), region_destino = c("Asia",
"Asia", "Asia", "América Central y el Caribe", "América Central y el Caribe"
), sexo = c("Male", "Male", "Male", "Male", "Male"), numero = c(1,
1, 1, 5, 5), date = structure(c(18809, 18809, 18809, 18718,
18718), class = "Date"), date2 = structure(c(18809, 18809,
18809, 18718, 18718), class = "Date")), row.names = c(NA,
5L), class = "data.frame")
and I would like to plot a chart by date and other variables (e.g. mode of transport) in ggplotly. The chart is correct, but the labels that appear in the dates show numbers instead of date format. The variable in the database is in date format, and I already tried changing it to different formats and still does not work.
I would also like to add minor date breaks, but can't seem to get it right.
Here is the code I am using for the chart:
chart9<-ggplot()+
geom_line(data=Flow,
aes(x=date,
color=transporte), stat="count") +
scale_x_date(date_minor_breaks = "1 month",
date_labels = "%Y (%b)")+
labs(color="Type of Flow")+
ggtitle("Number of Entrances, by Month and Mode of Transportation, 2017-2021")+
xlab("Date")+
ylab("Number or People")
ggplotly(chart9)
This is the chart plotted
Any help would be greatly appreciated! :)
Looks like the date class gets dropped when using stat="count". Hence, one option to achieve your desired result would be to aggregate your dataset before passing it to ggplot using e.g. dplyr::count(Flow, date, transporte):
Flow <- dplyr::count(Flow, date, transporte, name = "count")
ggplot() +
geom_line(data = Flow, aes(x = date, y = count, color = transporte)) +
scale_x_date(
date_minor_breaks = "1 month",
date_labels = "%Y (%b)"
) +
labs(color = "Type of Flow") +
ggtitle("Number of Entrances, by Month and Mode of Transportation, 2017-2021") +
xlab("Date") +
ylab("Number or People")
ggplotly()
A second option which additionally for setting the date format would be make use of the text "aesthetic" and convert your numbers back to proper dates:
library(plotly)
ggplot() +
geom_line(data = Flow, aes(x = date, color = transporte, text = paste(
"count:", ..count..,
"<br>Date: ", format(as.Date(..x.., origin = "1970-01-01"), "%Y (%b)"),
"<br>transporte: ", ..color..
)), stat = "count") +
scale_x_date(
date_minor_breaks = "1 month",
date_labels = "%Y (%b)"
) +
labs(color = "Type of Flow") +
ggtitle("Number of Entrances, by Month and Mode of Transportation, 2017-2021") +
xlab("Date") +
ylab("Number or People")
ggplotly(tooltip = c("text"))
I wanna create a pie chart of crime types,and add a legend on the right hand,but I tried many times to avoid overlapping,doesn't work at all.
table(dd$Primary.Type.new)
ARSON ASSAULT BATTERY BURGLARY
833 30743 91237 29298
CRIMINAL DAMAGE CRIMINAL TRESPASS DECEPTIVE PRACTICE HOMICIDE
57539 14353 17472 640
KIDNAPPING MOTOR VEHICLE THEFT NARCOTOCS OFFENSE INVOLVING CHILDREN
517 23724 55685 3347
OTHER OFFENSE PUBLIC OFFENSE PUBLIC PEACE VIOLATION ROBBERY
30878 3833 3632 18891
SEX_CRIME THEFT WEAPONS VIOLATION
9331 103255 4792
Type <- table(dd$Primary.Type.new)
Here's that from dput():
structure(c(ARSON = 833L, ASSAULT = 30743L, BATTERY = 91237L,
BURGLARY = 29298L, `CRIMINAL DAMAGE` = 57539L, `CRIMINAL TRESPASS` = 14353L,
`DECEPTIVE PRACTICE` = 17472L, HOMICIDE = 640L, KIDNAPPING = 517L,
`MOTOR VEHICLE THEFT` = 23724L, NARCOTOCS = 55685L, `OFFENSE INVOLVING CHILDREN` = 3347L,
`OTHER OFFENSE` = 30878L, `PUBLIC OFFENSE` = 3833L, `PUBLIC PEACE VIOLATION` = 3632L,
ROBBERY = 18891L, `SEX CRIME` = 9331L, THEFT = 103255L, `WEAPONS VIOLATION` = 4792L
), .Dim = 19L, .Dimnames = list(. = c("ARSON", "ASSAULT", "BATTERY",
"BURGLARY", "CRIMINAL DAMAGE", "CRIMINAL TRESPASS", "DECEPTIVE PRACTICE",
"HOMICIDE", "KIDNAPPING", "MOTOR VEHICLE THEFT", "NARCOTOCS",
"OFFENSE INVOLVING CHILDREN", "OTHER OFFENSE", "PUBLIC OFFENSE",
"PUBLIC PEACE VIOLATION", "ROBBERY", "SEX CRIME", "THEFT", "WEAPONS VIOLATION"
)), class = "table") -> Type
piepercent<- round(100*Type/sum(Type), 1)
pie(Type, edges = 200, radius = 0.8,
clockwise = FALSE,angle = 45, col = rainbow(length(Type)), main = "Pie Chart of Primary Crime Types", labels = piepercent,labelcex = 0.8)
legend("right", inset = .05, title = "Primary Crime Type",legend= dd$Primary.Type.new,fill = rainbow(length(Type)), horiz=FALSE,cex = 0.6)
I tried to use par(), but doestn't work.
and BTW how can I change the labels into percentage? such as convert 20.7 into 20.7%.
Thank you very much.
Update
I also tried 3D piechart
library(plotrix)
pie3D(Type,labels = piepercent,explode = 0.1, main = "3D Pie Chart of
Primary Crime Types", labelcex = 0.8)
legend("bottom", inset = .05, title = "Primary Crime Type",legend= dd$Primary.Type.new,fill = rainbow(length(Type)), horiz=TRUE,cex = 0.6)
I hesitate to post this since this is an absolutely terrible use case for a pie chart, but it's possible to make it a bit more readable and color-blind friendly:
structure(c(ARSON = 833L, ASSAULT = 30743L, BATTERY = 91237L,
BURGLARY = 29298L, `CRIMINAL DAMAGE` = 57539L, `CRIMINAL TRESPASS` = 14353L,
`DECEPTIVE PRACTICE` = 17472L, HOMICIDE = 640L, KIDNAPPING = 517L,
`MOTOR VEHICLE THEFT` = 23724L, NARCOTOCS = 55685L, `OFFENSE INVOLVING CHILDREN` = 3347L,
`OTHER OFFENSE` = 30878L, `PUBLIC OFFENSE` = 3833L, `PUBLIC PEACE VIOLATION` = 3632L,
ROBBERY = 18891L, `SEX CRIME` = 9331L, THEFT = 103255L, `WEAPONS VIOLATION` = 4792L
), .Dim = 19L, .Dimnames = list(. = c("ARSON", "ASSAULT", "BATTERY",
"BURGLARY", "CRIMINAL DAMAGE", "CRIMINAL TRESPASS", "DECEPTIVE PRACTICE",
"HOMICIDE", "KIDNAPPING", "MOTOR VEHICLE THEFT", "NARCOTOCS",
"OFFENSE INVOLVING CHILDREN", "OTHER OFFENSE", "PUBLIC OFFENSE",
"PUBLIC PEACE VIOLATION", "ROBBERY", "SEX CRIME", "THEFT", "WEAPONS VIOLATION"
)), class = "table") -> Type
Order the slices (IMPORTANT):
Type <- sort(Type, decreasing = TRUE)
Proper % and decent labels:
piepercent <- scales::percent(as.numeric(Type/sum(Type)))
Margins:
par(mar = c(1, 1, 1, 1)) # bltr
pie(
Type,
edges = 200,
radius = 0.8,
clockwise = TRUE, # IMPORTANT
angle = 45,
col = viridis::viridis_pal(option = "magma", direction=-1)(length(Type)), # BETTER COLOR PALETTE
labels = tail(piepercent, -7), # NEVER DISPLAY OVERLAPPING LABELS
cex = 0.7
)
legend(
x = 1.2, # DELIBERATE POSITION
y = 0.5, # DELIBERATE POSITION
inset = .05,
title = "Primary Crime Type",
legend = names(Type), # YOU WERE PASSING IN _ALL_ THE REPEAT NAMES
fill = viridis::viridis_pal(option = "magma", direction=-1)(length(Type)), # USE THE SAME COLOR PALETTE
horiz = FALSE,
cex = 0.6, # PROPER PARAMETER FOR TEXT SIZE
text.width = 0.7 # SET THE BOX WIDTH
)
Add the title manually:
title("Pie Chart of Primary Crime Types", line = -1)
Can't let a pie chart stand alone (and, now, a 3D one at that):
structure(list(cat = c("Arson", "Assault", "Battery", "Burglary",
"Criminal Damage", "Criminal Trespass", "Deceptive Practice",
"Homicide", "Kidnapping", "Motor Vehicle Theft", "Narcotocs",
"Offense Involving Children", "Other Offense", "Public Offense",
"Public Peace Violation", "Robbery", "Sex Crime", "Theft", "Weapons Violation"
), val = c(833, 30743, 91237, 29298, 57539, 14353, 17472, 640,
517, 23724, 55685, 3347, 30878, 3833, 3632, 18891, 9331, 103255,
4792), pct = c(0.001666, 0.061486, 0.182474, 0.058596, 0.115078,
0.028706, 0.034944, 0.00128, 0.001034, 0.047448, 0.11137, 0.006694,
0.061756, 0.007666, 0.007264, 0.037782, 0.018662, 0.20651, 0.009584
)), class = "data.frame", row.names = c(NA, -19L)) -> xdf
dplyr::arrange(xdf, pct) %>%
dplyr::mutate(cat = factor(cat, levels=cat)) %>%
dplyr::mutate(lab = sprintf("%s (%s)", scales::comma(val), scales::percent(pct))) %>%
ggplot(aes(pct, cat)) +
geom_segment(aes(xend=0, yend=cat), size=4, color = "#617a89") +
geom_label(
aes(label=lab), label.size = 0, hjust=0, nudge_x=0.001,
size = 3, family = hrbrthemes::font_rc, color = "#909495"
) +
hrbrthemes::scale_x_percent(expand=c(0,0.001), limits=c(0,0.25)) +
labs(x = NULL, y = NULL, title = "'Theft', 'Battery' & 'Criminal Damage' Account\nfor Half of Primary Recorded Crime Types") +
hrbrthemes::theme_ipsum_rc(grid="X") +
theme(axis.text.x = element_blank())
How I got xdf:
readLines(textConnection("ARSON ASSAULT BATTERY BURGLARY
833 30743 91237 29298
CRIMINAL_DAMAGE CRIMINAL_TRESPASS DECEPTIVE_PRACTICE HOMICIDE
57539 14353 17472 640
KIDNAPPING MOTOR_VEHICLE_THEFT NARCOTOCS OFFENSE_INVOLVING_CHILDREN
517 23724 55685 3347
OTHER_OFFENSE PUBLIC_OFFENSE PUBLIC_PEACE_VIOLATION ROBBERY
30878 3833 3632 18891
SEX_CRIME THEFT WEAPONS_VIOLATION
9331 103255 4792")) %>%
trimws() %>%
stri_split_regex("[[:space:]]+") -> x
do.call(rbind.data.frame, lapply(seq.int(1, length(x), 2), function(i) {
data.frame(
cat = stri_trans_totitle(gsub("_", " ", x[[i]])),
val = as.numeric(x[[i+1]]),
stringsAsFactors = FALSE
)
})) %>%
mutate(pct = val/sum(val)) -> xdf
I'm trying to order the bars of a vertically facetted bar plot (3 facets) based on the values of one of the facets. Specifically, I want bars in the leftmost panel sorted by decreasing value. My data is based on multi- period fund returns. A period length is a facet; the list of ticker symbols spans all facets. I've found many similar examples, but I can't get the reordering of the x axis labels to work correctly. I'm using the code below on a much longer list of stock symbols, but this should be a working example:
require('ggplot2')
require('reshape2')
require('tseries')
symbs = c("XLE", "XLV", "XLK", "XLB", "SPY")
desc = c("Energy", "HealthCare", "Tech", "Materials", "S&P")
data = cbind(symbs, desc)
indexTickers = data[,1]
indexNames = data[,2]
latestDate =Sys.Date()
dailyPrices = lapply(indexTickers, function(ticker) get.hist.quote(instrument= ticker, start = "2012-01-01",
end = latestDate, quote="Close", provider = "yahoo", origin="1970-01-01", compression = "d", retclass="zoo"))
perf5Day = lapply(dailyPrices, function(x){(x-lag(x,k=-5))/lag(x,k=-5)})
perf20Day = lapply(dailyPrices, function(x){(x-lag(x,k=-20))/lag(x,k=-20)})
perf60Day = lapply(dailyPrices, function(x){(x-lag(x,k=-60))/lag(x,k=-60)})
names(perf5Day) = indexTickers
names(perf20Day) = indexTickers
names(perf60Day) = indexTickers
perfsMerged = lapply(indexTickers, function(spdr){merge(perf5Day[[spdr]],perf20Day[[spdr]],perf60Day[[spdr]])})
perfNames = c('1Week','1Month','3Month')
perfsMerged = lapply(perfsMerged, function(x){
names(x)=perfNames
return(x)
})
latestDataPoints = t(sapply(perfsMerged, function(x){return(x[nrow(x)])}))
namesAndTickers = paste(indexNames, paste(' (',indexTickers,')',sep=''), sep='')
latestDataPoints = data.frame(cbind(namesAndTickers,latestDataPoints))
latestDataPoints[2:4] <- lapply( latestDataPoints[2:4], function(x)
as.numeric(as.character(x)))
names(latestDataPoints) = c('Ticker','5 Day','20 Day','60 Day')
drm = melt(latestDataPoints, id.vars=c('Ticker'))
posNeg = ifelse(drm$value >= 0, 'pos','neg')
pcts = paste(round(100*drm$value, 2), "%", sep="")
drm = cbind(drm, posNeg, pcts)
names(drm) = c('Ticker','Period','Value','Sign','Pct')
ordered = drm[drm$Period=="5 Day",]
ordered = ordered[order(-ordered$Value),]
orderedSymbs = ordered$Ticker
orderedSymbs = c(orderedSymbs, orderedSymbs, orderedSymbs)
p = ggplot(drm, aes(x=reorder(Ticker, orderedSymbs),y=Value,fill=Sign)) + geom_bar(stat='identity') + facet_grid(. ~ Period,scales='free_y') +
coord_flip() + scale_fill_manual(values=c('pos'='darkgreen','neg'='red'),guide=FALSE) +
ggtitle("Performances of Top Etfs by Trading Volume")
p = p + theme(plot.title = element_text(vjust = 2, size=rel(1.8),
face="bold"), axis.text.x=element_text(color='orangered4',size=rel(1.2)),
axis.text.y=element_text(color='orangered4',size=rel(1.2)), axis.title.x = element_blank(),
axis.title.y = element_blank(), strip.text.x = element_text(size=rel(1.2)))
p
Here is the working version.
Besides some cosmetic changes to the code (that I usually use), the only major change was related to rearranging the factors before going into the ggplot.
Hope this helps
require('ggplot2')
require('reshape2')
require('tseries')
symbs = c("XLE", "XLV", "XLK", "XLB", "SPY")
desc = c("Energy", "HealthCare", "Tech", "Materials", "S&P")
data = cbind(symbs, desc)
indexTickers = data[,1]
indexNames = data[,2]
latestDate =Sys.Date()
dailyPrices = lapply(indexTickers, function(ticker) get.hist.quote(instrument= ticker, start = "2012-01-01",
end = latestDate, quote="Close", provider = "yahoo", origin="1970-01-01", compression = "d", retclass="zoo"))
perf5Day = lapply(dailyPrices, function(x){(x-lag(x,k=-5))/lag(x,k=-5)})
perf20Day = lapply(dailyPrices, function(x){(x-lag(x,k=-20))/lag(x,k=-20)})
perf60Day = lapply(dailyPrices, function(x){(x-lag(x,k=-60))/lag(x,k=-60)})
names(perf5Day) = indexTickers
names(perf20Day) = indexTickers
names(perf60Day) = indexTickers
perfsMerged = lapply(indexTickers, function(spdr){merge(perf5Day[[spdr]],perf20Day[[spdr]],perf60Day[[spdr]])})
perfNames = c('1Week','1Month','3Month')
perfsMerged = lapply(perfsMerged, function(x){
names(x)=perfNames
return(x)
})
latestDataPoints = t(sapply(perfsMerged, function(x){return(x[nrow(x)])}))
namesAndTickers = paste(indexNames, paste(' (',indexTickers,')',sep=''), sep='')
latestDataPoints = data.frame(cbind(namesAndTickers,latestDataPoints))
latestDataPoints[2:4] <- lapply( latestDataPoints[2:4], function(x)
as.numeric(as.character(x)))
names(latestDataPoints) = c('Ticker','5 Day','20 Day','60 Day')
drm = melt(latestDataPoints, id.vars=c('Ticker'))
posNeg = ifelse(drm$value >= 0, 'pos','neg')
pcts = paste(round(100*drm$value, 2), "%", sep="")
drm = cbind(drm, posNeg, pcts)
names(drm) = c('Ticker','Period','Value','Sign','Pct')
ordered = drm[drm$Period=="5 Day",]
ordered = ordered[order(-ordered$Value),]
orderedSymbs = ordered$Ticker
Here is the change
# commented out this
# orderedSymbs = c(orderedSymbs, orderedSymbs, orderedSymbs)
# added this line, see http://www.r-bloggers.com/reorder-factor-levels-2/ for details
drm$Ticker = factor(drm$Ticker, levels(drm$Ticker)[as.numeric(orderedSymbs)])
And some minor modifications here
p = ggplot(drm,
aes(x=Ticker,
y=Value,fill=Sign)
)
p = p + geom_bar(stat='identity') + facet_grid(. ~ Period,scales='free_y')
p = p + coord_flip() + scale_fill_manual(values=c('pos'='darkgreen','neg'='red'),guide=FALSE)
p = p + ggtitle("Performances of Top Etfs by Trading Volume")
p = p + theme(plot.title = element_text(vjust = 2, size=rel(1.8), face="bold"), axis.text.x=element_text(color='orangered4',size=rel(1.2)), axis.text.y=element_text(color='orangered4',size=rel(1.2)), axis.title.x = element_blank(), axis.title.y = element_blank(), strip.text.x = element_text(size=rel(1.2)))
print(p)