Order the legend names in ggplot2 object from smaller to larger - r

I have the dataframe below and I want to set the order of the legend names from smaller to larger. The values may be characters but they have correspondence to the Cases column which I believe could be useful since the dataset is dynamic.
Cum<-structure(list(Age.group = c("00-04", "00-04", "05-14", "05-14",
"15-24", "15-24", "25-49", "25-49", "50-64", "50-64", "65-79",
"65-79", "80+", "80+"), Gender = c("Female", "Male", "Female",
"Male", "Female", "Male", "Female", "Male", "Female", "Male",
"Female", "Male", "Female", "Male"), Cases = c(64578, 70518,
187568, 197015, 414405, 388138, 1342394, 1206168, 792180, 742744,
400232, 414613, 282268, 198026), lab = c("64,578", "70,518",
"187,568", "197,015", "414,405", "388,138", "1,342,394", "1,206,168",
"792,180", "742,744", "400,232", "414,613", "282,268", "198,026"
), Age.group.Sum = c(135096, 135096, 384583, 384583, 802543,
802543, 2548562, 2548562, 1534924, 1534924, 814845, 814845, 480294,
480294), lab2 = c("135,096", "135,096", "384,583", "384,583",
"802,543", "802,543", "2,548,562", "2,548,562", "1,534,924",
"1,534,924", "814,845", "814,845", "480,294", "480,294"), color = c("#4285f4",
"#4285f4", "#90a9e0", "#90a9e0", "#dd9e5f", "#dd9e5f", "#b45f06",
"#b45f06", "#b45f06", "#b45f06", "#dd9e5f", "#dd9e5f", "#aebbd6",
"#90a9e0"), Range = c("LESS THAN 74.5K", "LESS THAN 74.5K", "148.9K - 223.4K",
"148.9K - 223.4K", "372.3K - 446.7K", "372.3K - 446.7K", "MORE THAN 670.1K",
"MORE THAN 670.1K", "MORE THAN 670.1K", "MORE THAN 670.1K", "372.3K - 446.7K",
"372.3K - 446.7K", "223.4K - 297.8K", "148.9K - 223.4K")), class = "data.frame", row.names = c(NA,
-14L))
names(mycols) <- mycols
ylab <- c(0.5,1,1.5,2,2.5)
lbls <- setNames(unique(Cum$color), unique(Cum$Range))
ggplot_obj <- ggplot(data = Cum, aes(x = `Age.group`, y = Cases, group = Gender,fill = Range)) +
geom_bar(aes(
text = paste("<b>Gender:</b>", Gender, "<br><b>Age:</b>", `Age.group` ,
"<br><b>Cases:</b>", lab, "<br><b>Total cases in age group:</b>",
lab2)),
position = "dodge", stat = "identity") +
geom_text(aes(y = Cases + 10000, label = Gender), vjust = 1,
position = position_dodge(width=0.9),size=2) +
scale_fill_manual(values = lbls) +
coord_cartesian(ylim = c(0, max(Cum$Cases)*1.1), expand = FALSE) +
theme_bw()+ theme(
# remove the vertical grid lines
panel.grid.major.x = element_blank(),
panel.border = element_blank(), axis.line.x = element_line()
) +
scale_y_continuous(labels = unit_format(unit = "M", scale = 1e-6))+
xlab("Age group")

We could convert the column to factor with levels specified in the custom order
Cum$Range <- factor(Cum$Range, levels = c("LESS THAN 74.5K" , "148.9K - 223.4K" , "223.4K - 297.8K", "372.3K - 446.7K" , "MORE THAN 670.1K"))
ylab <- c(0.5,1,1.5,2,2.5)
lbls <- setNames(Cum$color[match(levels(Cum$Range), Cum$Range)], levels(Cum$Range))
Construct the plot with ggplot as in the OP's code
-output
Update
If the 'Range' values are dynamic (assuming the unit is the same), then extract the numeric part with parse_number, order and get the unique values
lvls <- as.character(unique(Cum$Range[order(readr::parse_number(as.character(Cum$Range)))]))
Cum$Range <- factor(Cum$Range, levels = lvls)
Or another option is to arrange by 'Cases` and set the levels for 'Range'
library(dplyr)
Cum <- Cum %>%
arrange(Cases, Age.group) %>%
mutate(Range = factor(Range, levels = unique(Range)))
lbls <- setNames(Cum$color[match(levels(Cum$Range), Cum$Range)], levels(Cum$Range))

Related

PanelMatch R Errors: "please convert time id to consecutive integers" and "please convert unit id column to integer or numeric"

When using the PanelMatch package in R, I'm getting a couple errors. Any guidance on how to resolve them would be greatly appreciated.
First, when running the DisplayTreatment() function, I get the following error: "please convert time id to consecutive integers". I've converted the time id to integer class with as.integer(year), and I'm pretty sure they are consecutive---I've run unique(year) and can see that there are no gaps---but the error persists.
Second, when running the PanelMatch() function, I get this error: "please convert unit id column to integer or numeric". Again, I've tried converting the unit id to integer class, but the error persists.
The data can be downloaded here: faads_women_09.dta.zip. Here is the code necessary to replicate the issue:
house_ab <- read_dta("faads_women_09.dta")
house_ab$year <- as.integer(house_ab$year)
house_ab$statdistcons3 <- as.integer(house_ab$statdistcons3)
DisplayTreatment(unit.id = "statdistcons3", time.id = "year", legend.position = "none", xlab = "year", ylab = "CD", treatment = "female", data = house_ab)
PM.results.AB <- PanelMatch(lag = 4, time.id = "year", unit.id = "statdistcons3", treatment = "female",
refinement.method = "CBPS.match", data = house_ab, match.missing = TRUE,
covs.formula = ~ party + unified_govt + terms + ln_statecapital + margin + ln_population + ln_age65 + ln_black + ln_constrct + ln_school + ln_farmer + ln_foreign + ln_manuf + ln_median_income + ln_unemployed + ln_miltpop + ln_urban,
size.match = 5, qoi = "att", outcome.var = "high_lnoutlays_cpi", forbid.treatment.reversal = FALSE)
I've been vexed by the same problem for quite a while but finally figured it out. I looked up the error logic in the PanelMatch code
if(!"data.frame" %in% class(data)) stop("please convert data to data.frame class")
if(!class(data[, unit.id]) %in% c("integer", "numeric")) stop("please convert unit id column to integer or numeric")
if(class(data[, time.id]) != "integer") stop("please convert time id to consecutive integers")
If we run class(house_ab[,"year"] we get:
"tbl_df" "tbl" "data.frame"
This means the data are stored as a tibble, rather than an ordinary (base) dataframe. Solution is to convert from a tibble to a base dataframe:
house_ab <- as.data.frame(house_ab)
DisplayTreatment(unit.id = "statdistcons3", time.id = "year", legend.position = "none", xlab = "year", ylab = "CD", treatment = "female", data = house_ab)
PM.results.AB <- PanelMatch(lag = 4, time.id = "year", unit.id = "statdistcons3", treatment = "female",
refinement.method = "CBPS.match", data = house_ab, match.missing = TRUE,
covs.formula = ~ party + unified_govt + terms + ln_statecapital + margin + ln_population + ln_age65 + ln_black + ln_constrct + ln_school + ln_farmer + ln_foreign + ln_manuf + ln_median_income + ln_unemployed + ln_miltpop + ln_urban,
size.match = 5, qoi = "att", outcome.var = "high_lnoutlays_cpi", forbid.treatment.reversal = FALSE)

ggplotly showing numbers instead of date labels

I have a dataset with the following structure:
structure(list(mes = c(7, 7, 7, 4, 4), ano = c(2021, 2021, 2021,
2021, 2021), nacionalidad = c("Venezuela", "Venezuela", "Venezuela",
"Venezuela", "Venezuela"), centro = c("Aeropuerto Eldorado",
"Aeropuerto Eldorado", "Aeropuerto Eldorado", "Aeropuerto Eldorado",
"Aeropuerto Eldorado"), puesto = c("Aeropuerto Eldorado de Bogotá",
"Aeropuerto Eldorado de Bogotá", "Aeropuerto Eldorado de Bogotá",
"Aeropuerto Eldorado de Bogotá", "Aeropuerto Eldorado de Bogotá"
), transporte = c("Air", "Air", "Air", "Air", "Air"), ciudad = c("Arauca",
"Bogotá", "Pereira", "Bogotá", "Bogotá"), flujo = c("Entries",
"Entries", "Entries", "Entries", "Entries"), motivo = c("Tourism",
"Tourism", "Tourism", "Transit", "Transit"), edad = c("0-17",
"0-17", "0-17", "18-29", "18-29"), colombiano = c("Extranjeros",
"Extranjeros", "Extranjeros", "Extranjeros", "Extranjeros"),
departamento = c("Arauca", "Bogota D.C.", "Risaralda", "Bogota D.C.",
"Bogota D.C."), region = c("América del Sur", "América del Sur",
"América del Sur", "América del Sur", "América del Sur"
), status = c("Permiso de Turismo", "Permiso de Turismo",
"Permiso de Turismo", "Permiso Otras Actividades", "Permiso Otras Actividades"
), departamento_2 = c("Bogotá", "Bogotá", "Bogotá", "Bogotá",
"Bogotá"), destino_procedencia = c("Emiratos Árabes", "Israel",
"Emiratos Árabes", "Panamá", "Panamá"), region_destino = c("Asia",
"Asia", "Asia", "América Central y el Caribe", "América Central y el Caribe"
), sexo = c("Male", "Male", "Male", "Male", "Male"), numero = c(1,
1, 1, 5, 5), date = structure(c(18809, 18809, 18809, 18718,
18718), class = "Date"), date2 = structure(c(18809, 18809,
18809, 18718, 18718), class = "Date")), row.names = c(NA,
5L), class = "data.frame")
and I would like to plot a chart by date and other variables (e.g. mode of transport) in ggplotly. The chart is correct, but the labels that appear in the dates show numbers instead of date format. The variable in the database is in date format, and I already tried changing it to different formats and still does not work.
I would also like to add minor date breaks, but can't seem to get it right.
Here is the code I am using for the chart:
chart9<-ggplot()+
geom_line(data=Flow,
aes(x=date,
color=transporte), stat="count") +
scale_x_date(date_minor_breaks = "1 month",
date_labels = "%Y (%b)")+
labs(color="Type of Flow")+
ggtitle("Number of Entrances, by Month and Mode of Transportation, 2017-2021")+
xlab("Date")+
ylab("Number or People")
ggplotly(chart9)
This is the chart plotted
Any help would be greatly appreciated! :)
Looks like the date class gets dropped when using stat="count". Hence, one option to achieve your desired result would be to aggregate your dataset before passing it to ggplot using e.g. dplyr::count(Flow, date, transporte):
Flow <- dplyr::count(Flow, date, transporte, name = "count")
ggplot() +
geom_line(data = Flow, aes(x = date, y = count, color = transporte)) +
scale_x_date(
date_minor_breaks = "1 month",
date_labels = "%Y (%b)"
) +
labs(color = "Type of Flow") +
ggtitle("Number of Entrances, by Month and Mode of Transportation, 2017-2021") +
xlab("Date") +
ylab("Number or People")
ggplotly()
A second option which additionally for setting the date format would be make use of the text "aesthetic" and convert your numbers back to proper dates:
library(plotly)
ggplot() +
geom_line(data = Flow, aes(x = date, color = transporte, text = paste(
"count:", ..count..,
"<br>Date: ", format(as.Date(..x.., origin = "1970-01-01"), "%Y (%b)"),
"<br>transporte: ", ..color..
)), stat = "count") +
scale_x_date(
date_minor_breaks = "1 month",
date_labels = "%Y (%b)"
) +
labs(color = "Type of Flow") +
ggtitle("Number of Entrances, by Month and Mode of Transportation, 2017-2021") +
xlab("Date") +
ylab("Number or People")
ggplotly(tooltip = c("text"))

how to change p-value label in ggpubr/ggplot2

I'm using ggplot2 to make violin plots of module scores from Seurat, and am wanting to add statistics to it. I made the following violin plot, and I'm wanting to change the bracket labels so that it says "p < 0.13" instead of just 0.13 like it is now (thanks to #StupidWolf for the example!).
library(Seurat)
library(SeuratObject)
library(ggplot2)
library(ggpubr)
library(reshape2)
#add Seurat's module scores and create Seurat object from them =====================
ERlist <- list(c("CPB1", "RP11-53O19.1", "TFF1", "MB", "ANKRD30B",
"LINC00173", "DSCAM-AS1", "IGHG1", "SERPINA5", "ESR1",
"ILRP2", "IGLC3", "CA12", "RP11-64B16.2", "SLC7A2",
"AFF3", "IGFBP4", "GSTM3", "ANKRD30A", "GSTT1", "GSTM1",
"AC026806.2", "C19ORF33", "STC2", "HSPB8", "RPL29P11",
"FBP1", "AGR3", "TCEAL1", "CYP4B1", "SYT1", "COX6C",
"MT1E", "SYTL2", "THSD4", "IFI6", "K1AA1467", "SLC39A6",
"ABCD3", "SERPINA3", "DEGS2", "ERLIN2", "HEBP1", "BCL2",
"TCEAL3", "PPT1", "SLC7A8", "RP11-96D1.10", "H4C8",
"PI15", "PLPP5", "PLAAT4", "GALNT6", "IL6ST", "MYC",
"BST2", "RP11-658F2.8", "MRPS30", "MAPT", "AMFR", "TCEAL4",
"MED13L", "ISG15", "NDUFC2", "TIMP3", "RP13-39P12.3", "PARD68"))
tnbclist <- list(c("FABP7", "TSPAN8", "CYP4Z1", "HOXA10", "CLDN1",
"TMSB15A", "C10ORF10", "TRPV6", "HOXA9", "ATP13A4",
"GLYATL2", "RP11-48O20.4", "DYRK3", "MUCL1", "ID4", "FGFR2",
"SHOX2", "Z83851.1", "CD82", "COL6A1", "KRT23", "GCHFR",
"PRICKLE1", "GCNT2", "KHDRBS3", "SIPA1L2", "LMO4", "TFAP2B",
"SLC43A3", "FURIN", "ELF5", "C1ORF116", "ADD3", "EFNA3",
"EFCAB4A", "LTF", "LRRC31", "ARL4C", "GPNMB", "VIM",
"SDR16C5", "RHOV", "PXDC1", "MALL", "YAP1", "A2ML1",
"RP1-257A7.5", "RP11-353N4.6", "ZBTB18", "CTD-2314B22.3", "GALNT3",
"BCL11A", "CXADR", "SSFA2", "ADM", "GUCY1A3", "GSTP1",
"ADCK3", "SLC25A37", "SFRP1", "PRNP", "DEGS1", "RP11-110G21.2",
"AL589743.1", "ATF3", "SIVA1", "TACSTD2", "HEBP2"))
genes = c(unlist(c(ERlist,tnbclist)))
mat = matrix(rnbinom(500*length(genes),mu=500,size=1),ncol=500)
rownames(mat) = genes
colnames(mat) = paste0("cell",1:500)
sobj = CreateSeuratObject(mat)
sobj = NormalizeData(sobj)
sobj$ClusterName = factor(sample(0:1,ncol(sobj),replace=TRUE))
sobj = AddModuleScore(object = sobj, features = tnbclist,
name = "TNBC_List",ctrl=5)
sobj = AddModuleScore(object = sobj, features = ERlist,
name = "ER_List",ctrl=5)
sobjlists = FetchData(object = sobj, vars = c("ER_List1", "TNBC_List1", "ClusterName"))
#violin plot =======================================================
my_comparisons <- list( c("0", "1") )
ggplot(sobjlists,aes(x= ClusterName, y = ER_List1)) +
geom_violin(aes(fill=ClusterName)) +
geom_boxplot(width=0.1) + labs(y= "ER+ Signature", x = "ClusterName") + ggtitle(label = "Object") +
theme(plot.title = element_text(hjust = 0.5)) + stat_compare_means(comparisons = my_comparisons, method = "wilcox.test")+ # Add pairwise comparisons p-value
stat_compare_means(label.y = 0.75)
The closest I've found is ggpubr: Show significance levels (*** or n.s.) instead of p-value in the label and https://github.com/kassambara/ggpubr/issues/327 but I am not sure how to implement it with how I created the plot.
Thanks for reading!

How to Keep Statistics with Reordered Combined ggplot2 Graph

I'm using ggplot2 to make violin plots of module scores from Seurat, and am wanting to add statistics to it. I made the following violin plot, but I want to switch the violins around from reading "0" and "1" from left to right, to "1" and "0". (Thanks to #StupidWolf for the example!)
library(Seurat)
library(SeuratObject)
library(ggplot2)
library(ggpubr)
library(reshape2)
#add Seurat's module scores and create Seurat object from them =====================
ERlist <- list(c("CPB1", "RP11-53O19.1", "TFF1", "MB", "ANKRD30B",
"LINC00173", "DSCAM-AS1", "IGHG1", "SERPINA5", "ESR1",
"ILRP2", "IGLC3", "CA12", "RP11-64B16.2", "SLC7A2",
"AFF3", "IGFBP4", "GSTM3", "ANKRD30A", "GSTT1", "GSTM1",
"AC026806.2", "C19ORF33", "STC2", "HSPB8", "RPL29P11",
"FBP1", "AGR3", "TCEAL1", "CYP4B1", "SYT1", "COX6C",
"MT1E", "SYTL2", "THSD4", "IFI6", "K1AA1467", "SLC39A6",
"ABCD3", "SERPINA3", "DEGS2", "ERLIN2", "HEBP1", "BCL2",
"TCEAL3", "PPT1", "SLC7A8", "RP11-96D1.10", "H4C8",
"PI15", "PLPP5", "PLAAT4", "GALNT6", "IL6ST", "MYC",
"BST2", "RP11-658F2.8", "MRPS30", "MAPT", "AMFR", "TCEAL4",
"MED13L", "ISG15", "NDUFC2", "TIMP3", "RP13-39P12.3", "PARD68"))
tnbclist <- list(c("FABP7", "TSPAN8", "CYP4Z1", "HOXA10", "CLDN1",
"TMSB15A", "C10ORF10", "TRPV6", "HOXA9", "ATP13A4",
"GLYATL2", "RP11-48O20.4", "DYRK3", "MUCL1", "ID4", "FGFR2",
"SHOX2", "Z83851.1", "CD82", "COL6A1", "KRT23", "GCHFR",
"PRICKLE1", "GCNT2", "KHDRBS3", "SIPA1L2", "LMO4", "TFAP2B",
"SLC43A3", "FURIN", "ELF5", "C1ORF116", "ADD3", "EFNA3",
"EFCAB4A", "LTF", "LRRC31", "ARL4C", "GPNMB", "VIM",
"SDR16C5", "RHOV", "PXDC1", "MALL", "YAP1", "A2ML1",
"RP1-257A7.5", "RP11-353N4.6", "ZBTB18", "CTD-2314B22.3", "GALNT3",
"BCL11A", "CXADR", "SSFA2", "ADM", "GUCY1A3", "GSTP1",
"ADCK3", "SLC25A37", "SFRP1", "PRNP", "DEGS1", "RP11-110G21.2",
"AL589743.1", "ATF3", "SIVA1", "TACSTD2", "HEBP2"))
genes = c(unlist(c(ERlist,tnbclist)))
mat = matrix(rnbinom(500*length(genes),mu=500,size=1),ncol=500)
rownames(mat) = genes
colnames(mat) = paste0("cell",1:500)
sobj = CreateSeuratObject(mat)
sobj = NormalizeData(sobj)
sobj$ClusterName = factor(sample(0:1,ncol(sobj),replace=TRUE))
sobj = AddModuleScore(object = sobj, features = tnbclist,
name = "TNBC_List",ctrl=5)
sobj = AddModuleScore(object = sobj, features = ERlist,
name = "ER_List",ctrl=5)
sobjlists = FetchData(object = sobj, vars = c("ER_List1", "TNBC_List1", "ClusterName"))
#violin plot =======================================================
my_comparisons <- list( c("0", "1") )
ggplot(sobjlists,aes(x= ClusterName, y = ER_List1)) +
geom_violin(aes(fill=ClusterName)) +
geom_boxplot(width=0.1) + labs(y= "ER+ Signature", x = "ClusterName") + ggtitle(label = "Object") +
theme(plot.title = element_text(hjust = 0.5)) + stat_compare_means(comparisons = my_comparisons, method = "wilcox.test")+ # Add pairwise comparisons p-value
stat_compare_means(label.y = 0.75)
Using the solution from How to reorder plots in combined ggplot2 graph?, I can reorder the plots by adding
+ scale_x_discrete(limits = c("1", "0"))
to the very end of the code I have now. However, doing that, the "Wilcoxon, p = 0.13" disappears, and gives the following error:
ggplot(sobjlists,aes(x= ClusterName, y = ER_List1)) +
geom_violin(aes(fill=ClusterName)) +
geom_boxplot(width=0.1) + labs(y= "ER+ Signature", x = "ClusterName") + ggtitle(label = "Object") +
theme(plot.title = element_text(hjust = 0.5)) + stat_compare_means(comparisons = my_comparisons, method = "wilcox.test")+ # Add pairwise comparisons p-value
stat_compare_means(label.y = 0.75) + scale_x_discrete(limits = c("1", "0"))
Warning messages:
1: Unknown or uninitialised column: `p`.
2: Computation failed in `stat_compare_means()`:
argument "x" is missing, with no default
For a TL;DR, how do I keep the top statistic in and reorder the violins?
Thanks for reading!
Here is what worked for me (From https://www.datanovia.com/en/blog/how-to-change-ggplot-legend-order/)
sobjlists$ClusterName <- factor(sobjlists$ClusterName, levels = c("1", "0"))

Order Bars in interactive graph

I am creating an interactive graph using ggplot2 and plotly in R, the code is below.
I want to reorder the barchart column values so that they are sorted in descending order,
currently they are sorted alphabetically.
Edit: I might not have made what I wanted clear. Currently, the midfielder with the most points is Salah, but the top row in my midfielder column is currently Alli. I would like to sort the column so that the values are in descending order of points rather than alphabetical.
Would someone please inform me how I can do this?
I have saved the finished graph & csv file at the below locations:
IG: https://ianfm94.github.io/Premier_League_Stats/Top_100_Fantasy_PL_Pointscorers.html
CSV File: https://github.com/Ianfm94/Premier_League_Stats/blob/master/CSV_Files/2020-06-01_updated_fpl_stats.csv
rm(list=ls())
# Required packages, you might need to install these
library(ggplot2)
library(dplyr)
library(plotly)
library(tibble)
## Fantasy_PL Data
fpl_data = read.csv('2020-06-01_updated_fpl_stats.csv',
header = T, fileEncoding = "UTF-8-BOM")
attach(fpl_data)
#View(fpl_data)
# Interactive Plot Workings
top_100_points = total_points[0:100]
top_100_player_pos = factor(player_pos)[0:100]
top_100_surnames = factor(web_name)[0:100]
top_100_team = factor(team_name)[0:100]
color_table = tibble(
Team_Name = c("Arsenal", "Aston Villa", "Bournemouth", "Brighton & Hove Albion",
"Burnley", "Chelsea", "Crystal Palace", "Everton",
"Leicester City", "Liverpool", "Manchester City",
"Manchester United", "Newcastle United", "Norwich City",
"Sheffield United", "Southampton", "Tottenham Hotspurs",
"Watford", "West Ham United", "Wolverhampton Wanderers"),
Team_Color = c("#EF0107", "#670E36", "#B50E12", "#0057B8",
"#6C1D45", "#034694", "#1B458F", "#003399",
"#003090", "#C8102E", "#6CABDD", "#DA291C",
"#241F20", "#FFF200", "#EE2737", "#D71920",
"#132257", "#FBEE23", "#7A263A", "#FDB913")
)
position_table = tibble(
Position_Name = c("Goalkeeper", "Defender", "Midfielder", "Striker"),
)
fpl_df = data.frame(y = top_100_points,
x = top_100_player_pos,
z = top_100_surnames,
w = top_100_team,
stringsAsFactors = F)
fpl_df$w = factor(fpl_df$w, levels = color_table$Team_Name)
fpl_df$x = factor(fpl_df$x, levels = position_table$Position_Name)
names(fpl_df)[names(fpl_df) == "x"] = "Position_Name"
names(fpl_df)[names(fpl_df) == "y"] = "Total_Points_by_Position"
names(fpl_df)[names(fpl_df) == "z"] = "Player_Surname"
names(fpl_df)[names(fpl_df) == "w"] = "Team_Name"
#View(fpl_df)
plot_fpl_1 = ggplot(fpl_df, aes(x = Position_Name,
y = Total_Points_by_Position,
z = Player_Surname,
fill = Team_Name)) +
geom_col() +
scale_fill_manual(values = color_table$Team_Color) +
labs(title = "Top 100 Fantasy PL Pointscorer by Position & Team",
y = "Total Points of Position",
x = "Player Positions",
fill = "Team Name") +
theme_bw() +
theme(plot.title = element_text(size = 14,
face = "bold",
color = "black"),
legend.title = element_text(color = "navy",
face = "bold",
size = 10))
plot_fpl_1 = ggplotly(plot_fpl_1)
plot_fpl_1
You can use forcats::fct_reorder to change the order of z. See below:
Libraries:
# Required packages, you might need to install these
library(ggplot2)
library(dplyr)
library(plotly)
library(tibble)
library(RCurl)
library(forcats)
Data:
## Fantasy_PL Data
csvurl <- getURL("https://raw.githubusercontent.com/Ianfm94/Premier_League_Stats/master/CSV_Files/2020-06-01_updated_fpl_stats.csv")
fpl_data <- read.csv(text = csvurl)
attach(fpl_data)
# Interactive Plot Workings
top_100_points = total_points[0:100]
top_100_player_pos = factor(player_pos)[0:100]
top_100_surnames = factor(web_name)[0:100]
top_100_team = factor(team_name)[0:100]
color_table = tibble(
Team_Name = c("Arsenal", "Aston Villa", "Bournemouth", "Brighton & Hove Albion",
"Burnley", "Chelsea", "Crystal Palace", "Everton",
"Leicester City", "Liverpool", "Manchester City",
"Manchester United", "Newcastle United", "Norwich City",
"Sheffield United", "Southampton", "Tottenham Hotspurs",
"Watford", "West Ham United", "Wolverhampton Wanderers"),
Team_Color = c("#EF0107", "#670E36", "#B50E12", "#0057B8",
"#6C1D45", "#034694", "#1B458F", "#003399",
"#003090", "#C8102E", "#6CABDD", "#DA291C",
"#241F20", "#FFF200", "#EE2737", "#D71920",
"#132257", "#FBEE23", "#7A263A", "#FDB913")
)
position_table = tibble(
Position_Name = c("Goalkeeper", "Defender", "Midfielder", "Striker"),
)
fpl_df = data.frame(y = top_100_points,
x = top_100_player_pos,
z = top_100_surnames,
w = top_100_team,
stringsAsFactors = F)
fpl_df$w = factor(fpl_df$w, levels = color_table$Team_Name)
fpl_df$x = factor(fpl_df$x, levels = position_table$Position_Name)
names(fpl_df)[names(fpl_df) == "x"] = "Position_Name"
names(fpl_df)[names(fpl_df) == "y"] = "Total_Points_by_Position"
names(fpl_df)[names(fpl_df) == "z"] = "Player_Surname"
names(fpl_df)[names(fpl_df) == "w"] = "Team_Name"
Plot:
plot_fpl_1 = ggplot(fpl_df, aes(x = Position_Name,
y = Total_Points_by_Position,
z = fct_reorder(Player_Surname, -Total_Points_by_Position),
fill = Team_Name)) +
geom_col() +
scale_fill_manual(values = color_table$Team_Color) +
labs(title = "Top 100 Fantasy PL Pointscorer by Position & Team",
y = "Total Points of Position",
x = "Player Positions",
fill = "Team Name") +
theme_bw() +
theme(plot.title = element_text(size = 14,
face = "bold",
color = "black"),
legend.title = element_text(color = "navy",
face = "bold",
size = 10))
plot_fpl_2 = ggplotly(plot_fpl_1)
plot_fpl_2

Resources