How can I avoid pie chart&legend overlap in R? - r

I wanna create a pie chart of crime types,and add a legend on the right hand,but I tried many times to avoid overlapping,doesn't work at all.
table(dd$Primary.Type.new)
ARSON ASSAULT BATTERY BURGLARY
833 30743 91237 29298
CRIMINAL DAMAGE CRIMINAL TRESPASS DECEPTIVE PRACTICE HOMICIDE
57539 14353 17472 640
KIDNAPPING MOTOR VEHICLE THEFT NARCOTOCS OFFENSE INVOLVING CHILDREN
517 23724 55685 3347
OTHER OFFENSE PUBLIC OFFENSE PUBLIC PEACE VIOLATION ROBBERY
30878 3833 3632 18891
SEX_CRIME THEFT WEAPONS VIOLATION
9331 103255 4792
Type <- table(dd$Primary.Type.new)
Here's that from dput():
structure(c(ARSON = 833L, ASSAULT = 30743L, BATTERY = 91237L,
BURGLARY = 29298L, `CRIMINAL DAMAGE` = 57539L, `CRIMINAL TRESPASS` = 14353L,
`DECEPTIVE PRACTICE` = 17472L, HOMICIDE = 640L, KIDNAPPING = 517L,
`MOTOR VEHICLE THEFT` = 23724L, NARCOTOCS = 55685L, `OFFENSE INVOLVING CHILDREN` = 3347L,
`OTHER OFFENSE` = 30878L, `PUBLIC OFFENSE` = 3833L, `PUBLIC PEACE VIOLATION` = 3632L,
ROBBERY = 18891L, `SEX CRIME` = 9331L, THEFT = 103255L, `WEAPONS VIOLATION` = 4792L
), .Dim = 19L, .Dimnames = list(. = c("ARSON", "ASSAULT", "BATTERY",
"BURGLARY", "CRIMINAL DAMAGE", "CRIMINAL TRESPASS", "DECEPTIVE PRACTICE",
"HOMICIDE", "KIDNAPPING", "MOTOR VEHICLE THEFT", "NARCOTOCS",
"OFFENSE INVOLVING CHILDREN", "OTHER OFFENSE", "PUBLIC OFFENSE",
"PUBLIC PEACE VIOLATION", "ROBBERY", "SEX CRIME", "THEFT", "WEAPONS VIOLATION"
)), class = "table") -> Type
piepercent<- round(100*Type/sum(Type), 1)
pie(Type, edges = 200, radius = 0.8,
clockwise = FALSE,angle = 45, col = rainbow(length(Type)), main = "Pie Chart of Primary Crime Types", labels = piepercent,labelcex = 0.8)
legend("right", inset = .05, title = "Primary Crime Type",legend= dd$Primary.Type.new,fill = rainbow(length(Type)), horiz=FALSE,cex = 0.6)
I tried to use par(), but doestn't work.
and BTW how can I change the labels into percentage? such as convert 20.7 into 20.7%.
Thank you very much.
Update
I also tried 3D piechart
library(plotrix)
pie3D(Type,labels = piepercent,explode = 0.1, main = "3D Pie Chart of
Primary Crime Types", labelcex = 0.8)
legend("bottom", inset = .05, title = "Primary Crime Type",legend= dd$Primary.Type.new,fill = rainbow(length(Type)), horiz=TRUE,cex = 0.6)

I hesitate to post this since this is an absolutely terrible use case for a pie chart, but it's possible to make it a bit more readable and color-blind friendly:
structure(c(ARSON = 833L, ASSAULT = 30743L, BATTERY = 91237L,
BURGLARY = 29298L, `CRIMINAL DAMAGE` = 57539L, `CRIMINAL TRESPASS` = 14353L,
`DECEPTIVE PRACTICE` = 17472L, HOMICIDE = 640L, KIDNAPPING = 517L,
`MOTOR VEHICLE THEFT` = 23724L, NARCOTOCS = 55685L, `OFFENSE INVOLVING CHILDREN` = 3347L,
`OTHER OFFENSE` = 30878L, `PUBLIC OFFENSE` = 3833L, `PUBLIC PEACE VIOLATION` = 3632L,
ROBBERY = 18891L, `SEX CRIME` = 9331L, THEFT = 103255L, `WEAPONS VIOLATION` = 4792L
), .Dim = 19L, .Dimnames = list(. = c("ARSON", "ASSAULT", "BATTERY",
"BURGLARY", "CRIMINAL DAMAGE", "CRIMINAL TRESPASS", "DECEPTIVE PRACTICE",
"HOMICIDE", "KIDNAPPING", "MOTOR VEHICLE THEFT", "NARCOTOCS",
"OFFENSE INVOLVING CHILDREN", "OTHER OFFENSE", "PUBLIC OFFENSE",
"PUBLIC PEACE VIOLATION", "ROBBERY", "SEX CRIME", "THEFT", "WEAPONS VIOLATION"
)), class = "table") -> Type
Order the slices (IMPORTANT):
Type <- sort(Type, decreasing = TRUE)
Proper % and decent labels:
piepercent <- scales::percent(as.numeric(Type/sum(Type)))
Margins:
par(mar = c(1, 1, 1, 1)) # bltr
pie(
Type,
edges = 200,
radius = 0.8,
clockwise = TRUE, # IMPORTANT
angle = 45,
col = viridis::viridis_pal(option = "magma", direction=-1)(length(Type)), # BETTER COLOR PALETTE
labels = tail(piepercent, -7), # NEVER DISPLAY OVERLAPPING LABELS
cex = 0.7
)
legend(
x = 1.2, # DELIBERATE POSITION
y = 0.5, # DELIBERATE POSITION
inset = .05,
title = "Primary Crime Type",
legend = names(Type), # YOU WERE PASSING IN _ALL_ THE REPEAT NAMES
fill = viridis::viridis_pal(option = "magma", direction=-1)(length(Type)), # USE THE SAME COLOR PALETTE
horiz = FALSE,
cex = 0.6, # PROPER PARAMETER FOR TEXT SIZE
text.width = 0.7 # SET THE BOX WIDTH
)
Add the title manually:
title("Pie Chart of Primary Crime Types", line = -1)

Can't let a pie chart stand alone (and, now, a 3D one at that):
structure(list(cat = c("Arson", "Assault", "Battery", "Burglary",
"Criminal Damage", "Criminal Trespass", "Deceptive Practice",
"Homicide", "Kidnapping", "Motor Vehicle Theft", "Narcotocs",
"Offense Involving Children", "Other Offense", "Public Offense",
"Public Peace Violation", "Robbery", "Sex Crime", "Theft", "Weapons Violation"
), val = c(833, 30743, 91237, 29298, 57539, 14353, 17472, 640,
517, 23724, 55685, 3347, 30878, 3833, 3632, 18891, 9331, 103255,
4792), pct = c(0.001666, 0.061486, 0.182474, 0.058596, 0.115078,
0.028706, 0.034944, 0.00128, 0.001034, 0.047448, 0.11137, 0.006694,
0.061756, 0.007666, 0.007264, 0.037782, 0.018662, 0.20651, 0.009584
)), class = "data.frame", row.names = c(NA, -19L)) -> xdf
dplyr::arrange(xdf, pct) %>%
dplyr::mutate(cat = factor(cat, levels=cat)) %>%
dplyr::mutate(lab = sprintf("%s (%s)", scales::comma(val), scales::percent(pct))) %>%
ggplot(aes(pct, cat)) +
geom_segment(aes(xend=0, yend=cat), size=4, color = "#617a89") +
geom_label(
aes(label=lab), label.size = 0, hjust=0, nudge_x=0.001,
size = 3, family = hrbrthemes::font_rc, color = "#909495"
) +
hrbrthemes::scale_x_percent(expand=c(0,0.001), limits=c(0,0.25)) +
labs(x = NULL, y = NULL, title = "'Theft', 'Battery' & 'Criminal Damage' Account\nfor Half of Primary Recorded Crime Types") +
hrbrthemes::theme_ipsum_rc(grid="X") +
theme(axis.text.x = element_blank())
How I got xdf:
readLines(textConnection("ARSON ASSAULT BATTERY BURGLARY
833 30743 91237 29298
CRIMINAL_DAMAGE CRIMINAL_TRESPASS DECEPTIVE_PRACTICE HOMICIDE
57539 14353 17472 640
KIDNAPPING MOTOR_VEHICLE_THEFT NARCOTOCS OFFENSE_INVOLVING_CHILDREN
517 23724 55685 3347
OTHER_OFFENSE PUBLIC_OFFENSE PUBLIC_PEACE_VIOLATION ROBBERY
30878 3833 3632 18891
SEX_CRIME THEFT WEAPONS_VIOLATION
9331 103255 4792")) %>%
trimws() %>%
stri_split_regex("[[:space:]]+") -> x
do.call(rbind.data.frame, lapply(seq.int(1, length(x), 2), function(i) {
data.frame(
cat = stri_trans_totitle(gsub("_", " ", x[[i]])),
val = as.numeric(x[[i+1]]),
stringsAsFactors = FALSE
)
})) %>%
mutate(pct = val/sum(val)) -> xdf

Related

Order Bars in interactive graph

I am creating an interactive graph using ggplot2 and plotly in R, the code is below.
I want to reorder the barchart column values so that they are sorted in descending order,
currently they are sorted alphabetically.
Edit: I might not have made what I wanted clear. Currently, the midfielder with the most points is Salah, but the top row in my midfielder column is currently Alli. I would like to sort the column so that the values are in descending order of points rather than alphabetical.
Would someone please inform me how I can do this?
I have saved the finished graph & csv file at the below locations:
IG: https://ianfm94.github.io/Premier_League_Stats/Top_100_Fantasy_PL_Pointscorers.html
CSV File: https://github.com/Ianfm94/Premier_League_Stats/blob/master/CSV_Files/2020-06-01_updated_fpl_stats.csv
rm(list=ls())
# Required packages, you might need to install these
library(ggplot2)
library(dplyr)
library(plotly)
library(tibble)
## Fantasy_PL Data
fpl_data = read.csv('2020-06-01_updated_fpl_stats.csv',
header = T, fileEncoding = "UTF-8-BOM")
attach(fpl_data)
#View(fpl_data)
# Interactive Plot Workings
top_100_points = total_points[0:100]
top_100_player_pos = factor(player_pos)[0:100]
top_100_surnames = factor(web_name)[0:100]
top_100_team = factor(team_name)[0:100]
color_table = tibble(
Team_Name = c("Arsenal", "Aston Villa", "Bournemouth", "Brighton & Hove Albion",
"Burnley", "Chelsea", "Crystal Palace", "Everton",
"Leicester City", "Liverpool", "Manchester City",
"Manchester United", "Newcastle United", "Norwich City",
"Sheffield United", "Southampton", "Tottenham Hotspurs",
"Watford", "West Ham United", "Wolverhampton Wanderers"),
Team_Color = c("#EF0107", "#670E36", "#B50E12", "#0057B8",
"#6C1D45", "#034694", "#1B458F", "#003399",
"#003090", "#C8102E", "#6CABDD", "#DA291C",
"#241F20", "#FFF200", "#EE2737", "#D71920",
"#132257", "#FBEE23", "#7A263A", "#FDB913")
)
position_table = tibble(
Position_Name = c("Goalkeeper", "Defender", "Midfielder", "Striker"),
)
fpl_df = data.frame(y = top_100_points,
x = top_100_player_pos,
z = top_100_surnames,
w = top_100_team,
stringsAsFactors = F)
fpl_df$w = factor(fpl_df$w, levels = color_table$Team_Name)
fpl_df$x = factor(fpl_df$x, levels = position_table$Position_Name)
names(fpl_df)[names(fpl_df) == "x"] = "Position_Name"
names(fpl_df)[names(fpl_df) == "y"] = "Total_Points_by_Position"
names(fpl_df)[names(fpl_df) == "z"] = "Player_Surname"
names(fpl_df)[names(fpl_df) == "w"] = "Team_Name"
#View(fpl_df)
plot_fpl_1 = ggplot(fpl_df, aes(x = Position_Name,
y = Total_Points_by_Position,
z = Player_Surname,
fill = Team_Name)) +
geom_col() +
scale_fill_manual(values = color_table$Team_Color) +
labs(title = "Top 100 Fantasy PL Pointscorer by Position & Team",
y = "Total Points of Position",
x = "Player Positions",
fill = "Team Name") +
theme_bw() +
theme(plot.title = element_text(size = 14,
face = "bold",
color = "black"),
legend.title = element_text(color = "navy",
face = "bold",
size = 10))
plot_fpl_1 = ggplotly(plot_fpl_1)
plot_fpl_1
You can use forcats::fct_reorder to change the order of z. See below:
Libraries:
# Required packages, you might need to install these
library(ggplot2)
library(dplyr)
library(plotly)
library(tibble)
library(RCurl)
library(forcats)
Data:
## Fantasy_PL Data
csvurl <- getURL("https://raw.githubusercontent.com/Ianfm94/Premier_League_Stats/master/CSV_Files/2020-06-01_updated_fpl_stats.csv")
fpl_data <- read.csv(text = csvurl)
attach(fpl_data)
# Interactive Plot Workings
top_100_points = total_points[0:100]
top_100_player_pos = factor(player_pos)[0:100]
top_100_surnames = factor(web_name)[0:100]
top_100_team = factor(team_name)[0:100]
color_table = tibble(
Team_Name = c("Arsenal", "Aston Villa", "Bournemouth", "Brighton & Hove Albion",
"Burnley", "Chelsea", "Crystal Palace", "Everton",
"Leicester City", "Liverpool", "Manchester City",
"Manchester United", "Newcastle United", "Norwich City",
"Sheffield United", "Southampton", "Tottenham Hotspurs",
"Watford", "West Ham United", "Wolverhampton Wanderers"),
Team_Color = c("#EF0107", "#670E36", "#B50E12", "#0057B8",
"#6C1D45", "#034694", "#1B458F", "#003399",
"#003090", "#C8102E", "#6CABDD", "#DA291C",
"#241F20", "#FFF200", "#EE2737", "#D71920",
"#132257", "#FBEE23", "#7A263A", "#FDB913")
)
position_table = tibble(
Position_Name = c("Goalkeeper", "Defender", "Midfielder", "Striker"),
)
fpl_df = data.frame(y = top_100_points,
x = top_100_player_pos,
z = top_100_surnames,
w = top_100_team,
stringsAsFactors = F)
fpl_df$w = factor(fpl_df$w, levels = color_table$Team_Name)
fpl_df$x = factor(fpl_df$x, levels = position_table$Position_Name)
names(fpl_df)[names(fpl_df) == "x"] = "Position_Name"
names(fpl_df)[names(fpl_df) == "y"] = "Total_Points_by_Position"
names(fpl_df)[names(fpl_df) == "z"] = "Player_Surname"
names(fpl_df)[names(fpl_df) == "w"] = "Team_Name"
Plot:
plot_fpl_1 = ggplot(fpl_df, aes(x = Position_Name,
y = Total_Points_by_Position,
z = fct_reorder(Player_Surname, -Total_Points_by_Position),
fill = Team_Name)) +
geom_col() +
scale_fill_manual(values = color_table$Team_Color) +
labs(title = "Top 100 Fantasy PL Pointscorer by Position & Team",
y = "Total Points of Position",
x = "Player Positions",
fill = "Team Name") +
theme_bw() +
theme(plot.title = element_text(size = 14,
face = "bold",
color = "black"),
legend.title = element_text(color = "navy",
face = "bold",
size = 10))
plot_fpl_2 = ggplotly(plot_fpl_1)
plot_fpl_2

Plotting on a geographical map the provenience of our patients

I am trying to put on a Italian geographical map a dot reporting the provenience ('provincia') of our patients. Ideally, the dot size should be proportional to the number of patients coming from that 'provincia'. An example of the list I would like to plot is the following.
MI 8319
CO 537
MB 436
VA 338
BG 310
PV 254
CR 244
NO 210
RM 189
CS 179
In the first column there is the 'provincia' code: MI (Milano), CO (Como), MB (Monza-Brianza), etc. In the second column there is the number of patients from that 'provincia'. So the output should be an Italian political map where the biggest dot is around the city of Milano (MI), the second biggest dot is near the city of Como (CO), the third one is around the city of Monza-Brianza (MB),etc.
Is there any package that could do the plot I am looking for? I found a tool that could do the job here, but apparently they expect that I load the geographical coordinates in order to do the plot.
https://www.littlemissdata.com/blog/maps
Thanks in advance.
Here is one way to handle your task. You have the abbreviations for Italian province. You want to use them to merge your data with polygon data. If you download Italy's polygons from GADM, you can obtain data that contain the abbreviations. Specifically, the column, HASC_2 is the one. You need to merge your data with the polygon data. Then, you want to create another data set which contains centroid. You can draw a map with the two data sets.
library(tidyverse)
library(sf)
library(ggthemes)
# Get the sf file from https://gadm.org/download_country_v3.html
# and import it in R.
mysf <- readRDS("gadm36_ITA_2_sf.rds")
# This is your data, which is called mydata.
mydata <- structure(list(abbs = c("MI", "CO", "MB", "VA", "BG", "PV", "CR",
"NO", "RM", "CS"), value = c(8319L, 537L, 436L, 338L, 310L, 254L,
244L, 210L, 189L, 179L)), class = "data.frame", row.names = c(NA,
-10L))
abbs value
1 MI 8319
2 CO 537
3 MB 436
4 VA 338
5 BG 310
6 PV 254
7 CR 244
8 NO 210
9 RM 189
10 CS 179
# Abbreviations are in HASC_2 in mysf. Manipulate strings so that
# I can join mydata with mysf with the abbreviations. I also get
# longitude and latitude with st_centroid(). This data set is for
# geom_point().
mysf2 <- mutate(mysf, HASC_2 = sub(x = HASC_2, pattern = "^IT.", replacement = "")) %>%
left_join(mydata, by = c("HASC_2" = "abbs")) %>%
mutate(lon = map_dbl(geometry, ~st_centroid(.x)[[1]]),
lat = map_dbl(geometry, ~st_centroid(.x)[[2]]))
# Draw a map
ggplot() +
geom_sf(data = mysf) +
geom_point(data = mysf2, aes(x = lon, y = lat, size = value)) +
theme_map()
UPDATE ON INSET MAP
This is an update following different suggestion on using inset maps, which I think it would be the best solution for yout question and comments:
library(sf)
library(cartography)
EU = st_read("~/R/mapslib/EUROSTAT/NUTS_RG_03M_2016_3035_LEVL_3.geojson")
IT = subset(EU, CNTR_CODE == "IT")
mydata <-
structure(list(
abbs = c("MI", "CO", "MB", "VA", "BG", "PV", "CR",
"NO", "RM", "CS"),
value = c(8319L, 537L, 436L, 338L, 310L, 254L,
244L, 210L, 189L, 179L),
nuts = c("ITC4C","ITC42","ITC4D","ITC41",
"ITC46", "ITC48","ITC4A","ITC15",
"ITI43","ITF61")
),
class = "data.frame",
row.names = c(NA, -10L))
patients = merge(IT, mydata, by.x = "id", by.y = "nuts")
#Get breaks for map
br=getBreaks(patients$value)
#Delimit zone
#Based on NUTS1, Nortwest Italy
par(mar=c(0,0,0,0))
ghostLayer(IT[grep("ITC",IT$NUTS_ID),], bg="lightblue")
plot(st_geometry(EU), col="grey90", add=TRUE)
plot(st_geometry(IT), col = "#FEFEE9", border = "#646464", add=TRUE)
choroLayer(
patients,
var = "value",
breaks = br,
col = carto.pal(pal1 = "red.pal", n1 = length(br)-1),
legend.pos = "topleft",
legend.title.txt = "Total patients",
add = TRUE,
legend.frame = TRUE
)
labelLayer(patients,txt="abbs", halo=TRUE, overlap = FALSE)
#Inset
par(
fig = c(0, 0.4, 0.01, 0.4),
new = TRUE
)
inset=patients[patients$abbs %in% c("RM","CS"),]
ghostLayer(inset, bg="lightblue")
plot(st_geometry(EU), col="grey90", add=TRUE)
plot(st_geometry(IT), col = "#FEFEE9", border = "#646464", add=TRUE)
choroLayer(
patients,
var = "value",
breaks = br,
col = carto.pal(pal1 = "red.pal", n1 = length(br)-1),
legend.pos = "n",
add = TRUE
)
labelLayer(patients,txt="abbs", halo=TRUE, overlap = FALSE)
box(which = "figure", lwd = 1)
#RESTORE PLOT
par(fig=c(0,1,0,1))
OLD ANSWER
Following my comment on plotting labels, maybe with circles is not the best option for your map, given the concentration. I suggest you to use another kind of map for that, as chorolayer, I leveraged on https://stackoverflow.com/users/3304471/jazzurro for the dataframe.
library(sf)
library(cartography)
EU = st_read("~/R/mapslib/EUROSTAT/NUTS_RG_03M_2016_3035_LEVL_3.geojson")
IT = subset(EU, CNTR_CODE == "IT")
mydata <-
structure(list(
abbs = c("MI", "CO", "MB", "VA", "BG", "PV", "CR",
"NO", "RM", "CS"),
value = c(8319L, 537L, 436L, 338L, 310L, 254L,
244L, 210L, 189L, 179L),
nuts = c("ITC4C","ITC42","ITC4D","ITC41",
"ITC46", "ITC48","ITC4A","ITC15",
"ITI43","ITF61")
),
class = "data.frame",
row.names = c(NA, -10L))
patients = merge(IT, mydata, by.x = "id", by.y = "nuts")
#Options1 - With circles
par(mar = c(0, 0, 0, 0))
plot(st_geometry(IT), col = "#FEFEE9", border = "#646464")
propSymbolsLayer(
x = patients,
var = "value",
col = carto.pal(pal1 = "red.pal", n1 = 6),
legend.title.txt = "Total patients",
add = TRUE
)
#Option 2 - Chorolayer with labels
par(mar = c(0, 0, 0, 0))
plot(st_geometry(IT), col = "#FEFEE9", border = "#646464")
choroLayer(
patients,
var = "value",
col = carto.pal(pal1 = "red.pal", n1 = 6),
legend.title.txt = "Total patients",
add = TRUE
)
#Create labels
patients$label = paste(patients$abbs, patients$value, sep = " - ")
labelLayer(
patients,
txt = "label",
overlap = FALSE,
halo = TRUE,
show.lines = TRUE,
)
Data from
https://ec.europa.eu/eurostat/cache/GISCO/distribution/v2/nuts/nuts-2016-files.html

Underline part of text label in ggplot

I am trying to make a label that is made up of a book title and book author. I would like to underline the title, but not the author, in the label.
Here is the MWE data:
Title,Author,Pages,Date Started,Date Finished
underline('Time Travel'),'James Gleick',353,1/1/17,1/27/17
underline('The Road'),'Cormac McCarthy',324,1/28/17,3/10/17
This code works but does not allow for the title and author
library(ggplot2)
library(tidyverse)
library(ggrepel)
library(ggalt)
books.2017 <- read_csv('books_2017.csv')
books.2017$`Date Started` <- as.Date(books.2017$`Date Started`, "%m/%d/%y")
books.2017$`Date Finished` <- as.Date(books.2017$`Date Finished`, "%m/%d/%y")
ggplot(books.2017, aes(x=`Date Started`, xend=`Date Finished`)) +
geom_dumbbell(aes(size=Pages),size_x=0, size_xend=0) +
geom_text_repel(aes(label=paste(Title)), parse=TRUE)
When I try to change geom_text_repel to something like:
geom_text_repel(aes(label=paste(Title,Author)), parse=TRUE)
I get this error:
Error in parse(text = as.character(lab)) :
<text>:1:26: unexpected string constant
1: underline('Time Travel') 'James Gleick'
^
EDIT The labels should look something like this
You need to form a valid plotmath expression, qplot(1,1,geom="blank") + annotate("text", x=1, y=1, label='underline("this")*" and that"', parse = TRUE)
Applied to your dataset this might look like label=paste(Title, Author, sep="~"), where ~ is a non-breaking space plotmath separator. After fixing your non-reproducible example, this gives
It looks like you are trying to pull down your goodreads data, and map out the number of books you read over the year, against start data, end data and book size.
To do what you propose, you can use the parse option on geom_text*(, to do this you have to create a parse string with sprintf() and pass that to geom_text*( as the label input where parse = TRUE.
To add a newline you might consider using plotmath::over()
parseLabel <- sprintf("over(%s,%s)",
gsub(" ", "~", books.2007$Title, fixed = TRUE),
gsub(" ", "~", books.2007$Author, fixed = TRUE))
parseLabel
alternatively, you can use underline, however adding a newline is tricky as plotmath() does not directly support the use of newline in a parse formula.
parseLabel <- sprintf("underline(%s)~\n~%s",
gsub(" ", "~", books.2007$Title, fixed = TRUE),
gsub(" ", "~", books.2007$Author, fixed = TRUE))
parseLabel
Note: Baptiste correctly hilights this in his answer I am just expanding upon his work here using an example dataset I created.
OK, here is a quick example based on the above assumptions. I hope this points you in the right direction.
Note: I have appended an example dataset for people to use.
Adding an Underline
In order to add an underline to the text, you can harness plotmath by setting parse=true in the geom_label*() call.
Simple example using plotmath wih geom_label
library(tidyverse) # Loads ggplot2
library(graphics)
library(ggrepel)
library(gtable)
library(ggalt)
# load test dataset
# ... See example data set
# books.2007 <- structure...
gp <- ggplot(books.2007)
gp <- gp + geom_dumbbell( aes(x = `Date Started`,
xend = `Date Finished`,
y = ISBN,
size = as.numeric(Pages)),
size_x = 0, size_xend = 0)
# Construct parseLabel using sprintf
parseLabel <- sprintf("underline(%s)~\n~%s",
gsub(" ", "~", books.2007$Title, fixed = TRUE),
gsub(" ", "~", books.2007$Author, fixed = TRUE))
gp <- gp + geom_label(aes(x = `Date Started`,
y = ISBN),
label = parseLabel,
vjust = 1.5, hjust = "inward", parse = TRUE)
gp <- gp + labs(size = "Book Size")
gp
Example Plot Output
Simple example using plotmath with geom_label_repel
nb. My personal sense would be geom_text is easier to use as geom_label_repel requires computation overhead to calculate the positioning of the labels.
## Construct parse string
##
##
parseLabel <- sprintf("underline(%s)~\n~%s",
gsub(" ", "~", books.2007$Title, fixed = TRUE),
gsub(" ", "~", books.2007$Author, fixed = TRUE))
parseLabel
rm(gp)
gp <- ggplot(books.2007)
gp <- gp + geom_dumbbell( aes(x = `Date Started`,
xend = `Date Finished`,
y = ISBN,
size = as.numeric(Pages)),
size_x = 0, size_xend = 0)
gp <- gp + geom_label_repel(aes(x = `Date Started`,
y = ISBN),
label = parseLabel,
# max.iter = 100,
parse = TRUE)
gp <- gp + labs(size = "Book Size")
gp
Example Plot Output with geom_text_repel
Example Data Set:
books.2007 <- structure(list(Title = c("memoirs of a geisha", "Blink: The Power of Thinking Without Thinking",
"Power of One", "Harry Potter and the Half-Blood Prince (Book 6)",
"Dune (Dune Chronicles Book 1)"), Author = c("arthur golden",
"Malcolm Gladwell", "Bryce Courtenay", "J.K. Rowling", "Frank Herbert"
), ISBN = c("0099498189", "0316172324", "034541005X", "0439785960",
"0441172717"), `My Rating` = c(4L, 3L, 5L, 4L, 5L), `Average Rating` = c(4,
4.17, 5, 4.38, 4.55), Publisher = c("vintage", "Little Brown and Company",
"Ballantine Books", "Scholastic Paperbacks", "Ace"), Binding = c("paperback",
"Hardcover", "Paperback", "Paperback", "Paperback"), `Year Published` = c(2005L,
2005L, 1996L, 2006L, 1990L), `Original Publication Year` = c(2005L,
2005L, 1996L, 2006L, 1977L), `Date Read` = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), `Date Added` = structure(c(13558,
13558, 13558, 13558, 13558), class = "Date"), Bookshelves = c("fiction",
"nonfiction marketing", "fiction", "fiction fantasy", "fiction scifi"
), `My Review` = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), `Date Started` = structure(c(13577,
13610, 13634, 13684, 13722), class = "Date"), `Date Finished` = structure(c(13623,
13647, 13660, 13689, 13784), class = "Date"), Pages = c("522",
"700", "300", "145", "700")), .Names = c("Title", "Author", "ISBN",
"My Rating", "Average Rating", "Publisher", "Binding", "Year Published",
"Original Publication Year", "Date Read", "Date Added", "Bookshelves",
"My Review", "Date Started", "Date Finished", "Pages"), row.names = c(NA,
-5L), spec = structure(list(cols = structure(list(Title = structure(list(), class = c("collector_character",
"collector")), Author = structure(list(), class = c("collector_character",
"collector")), ISBN = structure(list(), class = c("collector_character",
"collector")), `My Rating` = structure(list(), class = c("collector_integer",
"collector")), `Average Rating` = structure(list(), class = c("collector_double",
"collector")), Publisher = structure(list(), class = c("collector_character",
"collector")), Binding = structure(list(), class = c("collector_character",
"collector")), `Year Published` = structure(list(), class = c("collector_integer",
"collector")), `Original Publication Year` = structure(list(), class = c("collector_integer",
"collector")), `Date Read` = structure(list(), class = c("collector_character",
"collector")), `Date Added` = structure(list(), class = c("collector_character",
"collector")), Bookshelves = structure(list(), class = c("collector_character",
"collector")), `My Review` = structure(list(), class = c("collector_character",
"collector"))), .Names = c("Title", "Author", "ISBN", "My Rating",
"Average Rating", "Publisher", "Binding", "Year Published", "Original Publication Year",
"Date Read", "Date Added", "Bookshelves", "My Review")), default = structure(list(), class = c("collector_guess",
"collector"))), .Names = c("cols", "default"), class = "col_spec"), class = c("tbl_df",
"tbl", "data.frame"))
Simple Example - no formatting
For completeness here is how I would approach the problem avoiding the formula construction problems.
gp <- ggplot(books.2007)
gp <- gp + geom_dumbbell( aes(x = `Date Started`,
xend = `Date Finished`,
y = ISBN,
size = as.numeric(Pages)),
size_x = 0, size_xend = 0)
t <- paste(books.2007$Title, "\n", books.2007$Author)
gp <- gp + geom_label(aes(x = `Date Started`,
y = ISBN),
label = t,
vjust = 1.5, hjust = "inward", parse = FALSE)
gp <- gp + labs(size = "Book Size")
gp
Plot Output
This problem could be made a lot simpler if italics sufficed instead of underlines, as grid::gpar() does not support an underline fontface. Here's an example of using italics instead:
library(tibble)
library(ggplot2)
books.2017 <-
tribble(~Title,~Author,~Pages,~`Date Started`,~`Date Finished`,
'Time Travel','James Gleick',353,'1/1/17','1/27/17',
'The Road','Cormac McCarthy',324,'1/28/17','3/10/17')
ggplot(books.2017, aes(x = `Date Started`,
xend = `Date Finished`,
y = Title,
yend = Title)) +
geom_segment(aes(size = Pages),
lineend = 'round') +
geom_text(aes(label = Title),
fontface = 'italic',
vjust = -3.5) +
geom_text(aes(label = Author),
vjust = -2)

Error: number of rows of matrices must match (ggplot facet)

I'm trying to assign labels to my ggplot2 facets. As I'm thinking this is a character problem, I'm using the labels exactly as they are in my dataset, so it's a little long, I apologize.
set.seed(123)
names <- c("acquisitionsmergers", "analystratings", "assets", "balanceofpayments",
"bankruptcy", "civilunrest", "corporateresponsibility", "credit",
"creditratings", "crime", "dividends", "earnings", "equityactions",
"exploration", "government", "indexes", "industrialaccidents",
"insidertrading", "investorrelations", "laborissues", "legal",
"marketing", "orderimbalances", "partnerships", "pricetargets",
"productsservices", "publicopinion", "regulatory", "revenues",
"security", "stockprices", "taxes", "technicalanalysis", "transportation",
"warconflict")
mylabels <- c("acquisitionsmergers" = "Acquisitions/Mergers",
"analystratings" = "Analyst Ratings",
"assets" = "Assets",
"balanceofpayments" = "Balance of Payments",
"bankruptcy" = "Bankruptcy",
"civilunrest" = "Civil Unrest",
"corporateresponsibility" = "Corporate Responsibility",
"credit" = "Credit",
"creditratings" = "Credit Ratings",
"crime" = "Crime",
"dividends" = "Dividends",
"earnings" = "Earnings",
"equityactions" = "Equity Actions",
"exploration" = "Exploration",
"government" = "Government",
"indexes" = "Indexes",
"industrialaccidents" = "Industrial Accidents",
"insidertrading" = "Insider Trading",
"investorrelations" = "Investor Relations",
"laborissues" = "Labor Issues",
"legal" = "Legal",
"marketing" = "Marketing",
"orderimbalances" = "Order Imbalances",
"partnerships" = "Partnerships",
"pricetargets" = "Price Targets",
"productsservices" = "Product Services",
"publicopinion" = "Public Opinion",
"regulatory" = "Regulatory",
"revenues" = "Revenues",
"security" = "Security",
"stockprices" = "Stockprices",
"taxes" = "Taxes",
"technicalanalysis" = "Technical Analysis",
"transportation" = "Transportation",
"warconflict" = "War Conflict")
df <- data.frame(item = rep(names, each=5), value=rnorm(5*35,5,2), date = rep(seq(as.Date("2000/1/1"), by = "month", length.out = 5),35))
Then,
library(ggplot2)
ggplot(df, aes(x=date, y=value, color=item)) +
geom_line() +
facet_wrap( ~ item, ncol=4, scales="free_y", labeller = mylabels)
Produces
Error in cbind(labels = list(), list(`{`, if (!is.null(.rows) || !is.null(.cols)) { :
number of rows of matrices must match (see arg 2)
I've used labeller before without problems, so I'm not sure why it's throwing this error. I checked a few things, such as making sure there is a match:
all(names(mylabels) %in% names)
length(mylabels) == length(names)
Thanks for any help!
What about this?
df$item <- factor(df$item,
labels = c("Acquisitions/Mergers","Analyst Ratings","Assets", "Balance of Payments","Bankruptcy", "Civil Unrest",
"Corporate Responsibility", "Credit", "Credit Ratings", "Crime", "Dividends", "Earnings", "Equity Actions",
"Exploration", "Government", "Indexes", "Industrial Accidents", "Insider Trading", "Investor Relations",
"Labor Issues", "Legal", "Marketing", "Order Imbalances","Partnerships", "Price Targets",
"Product Services", "Public Opinion","Regulatory", "Revenues","Security", "Stockprices",
"Taxes", "Technical Analysis", "Transportation", "War Conflict"))
ggplot(df, aes(x=date, y=value, color=item)) +
geom_line() +
facet_wrap( ~ item, ncol=4, scales="free_y")
UPDATE
to address the questions in the comment
First, are the label factors taken as the unique order? In other words, the original "item" vector in the dataframe should be sorted so it is in the same order as labels?
Answer
The order of levels in the labels vector must be the same as the order of levels in item vector.
Below are the levels of item
levels(df$item)
[1] "acquisitionsmergers" "analystratings" "assets" "balanceofpayments" "bankruptcy"
[6] "civilunrest" "corporateresponsibility" "credit" "creditratings" "crime"
[11] "dividends" "earnings" "equityactions" "exploration" "government"
[16] "indexes" "industrialaccidents" "insidertrading" "investorrelations" "laborissues"
[21] "legal" "marketing" "orderimbalances" "partnerships" "pricetargets"
[26] "productsservices" "publicopinion" "regulatory" "revenues" "security"
[31] "stockprices" "taxes" "technicalanalysis" "transportation" "warconflict"
I usually copy paste them inside labels add commas, remove the numbers and change the names as I like.
There is another way, below, to change the names of the levels of item using dplyr and forcats
library(dplyr)
library(forcats)
df <- df %>%
mutate(item_update = item) %>% # create new column called item_update to change the names of item levels
mutate(item_update = fct_recode(item_update,
"Acquisitions/Mergers" = "acquisitionsmergers" ,
"Analyst Ratings" = "analystratings" ,
"Assets" = "assets",
"Balance of Payments" = "balanceofpayments",
"Bankruptcy" = "bankruptcy",
"Civil Unrest" = "civilunrest",
"Corporate Responsibility" = "corporateresponsibility",
"Credit" = "credit",
"Credit Ratings" = "creditratings",
"Crime" = "crime",
"Dividends" = "dividends",
"Earnings" = "earnings",
"Equity Actions" = "equityactions",
"Exploration" = "exploration",
"Government" = "government",
"Indexes" = "indexes",
"Industrial Accidents" = "industrialaccidents",
"Insider Trading" = "insidertrading",
"Investor Relations" = "investorrelations",
"Labor Issues" = "laborissues",
"Legal" = "legal" ,
"Marketing" = "marketing",
"Order Imbalances" = "orderimbalances",
"Partnerships" = "partnerships",
"Price Targets" = "pricetargets",
"Product Services" = "productsservices",
"Public Opinion" = "publicopinion" ,
"Regulatory" = "regulatory",
"Revenues" = "revenues",
"Security" = "security",
"Stockprices" = "stockprices",
"Taxes" = "taxes",
"Technical Analysis" = "technicalanalysis",
"Transportation" = "transportation" ,
"War Conflict" = "warconflict"
))
and we can plot it as below
ggplot(df, aes(x=date, y=value, color=item)) +
geom_line() +
facet_wrap( ~ item_update, ncol=4, scales="free_y")
Second, does this appear to be a bug, which I should file with the ggplot2 page?
Answer
It is not a bug.
Your approach will work fine if you edit mylabels to be
mylabels <- c(acquisitionsmergers = "Acquisitions/Mergers",
analystratings = "Analyst Ratings",
assets = "Assets",
balanceofpayments = "Balance of Payments",
bankruptcy = "Bankruptcy",
civilunrest = "Civil Unrest",
corporateresponsibility = "Corporate Responsibility",
credit = "Credit",
creditratings = "Credit Ratings",
crime = "Crime",
dividends = "Dividends",
earnings = "Earnings",
equityactions = "Equity Actions",
exploration = "Exploration",
government = "Government",
indexes = "Indexes",
industrialaccidents = "Industrial Accidents",
insidertrading = "Insider Trading",
investorrelations = "Investor Relations",
laborissues = "Labor Issues",
legal = "Legal",
marketing = "Marketing",
orderimbalances = "Order Imbalances",
partnerships = "Partnerships",
pricetargets = "Price Targets",
productsservices = "Product Services",
publicopinion = "Public Opinion",
regulatory = "Regulatory",
revenues = "Revenues",
security = "Security",
stockprices = "Stockprices",
taxes = "Taxes",
technicalanalysis = "Technical Analysis",
transportation = "Transportation",
warconflict = "War Conflict")
and the plot to be
ggplot(df, aes(x=date, y=value, color=item)) +
geom_line() +
facet_wrap( ~ item, ncol=4, scales="free_y", labeller = labeller(item = mylabels))

Problems passing string with \n to ggtitle

I think this might have an easy answer - which I can't seem to find anywhere - so I'll forgo the reproducibility for the moment. I have a function designed to draw a ggplot2. I use mapply to pass it a few vectors of strings for the functions input parameters. The parameter of concern here is title. Which is fed a character vector with elements such as "this is a plot title".
Then the following code:
p <- ggplot(df, aes(x=date, y=value))
## plot the line
p <- p + geom_line()
## add plot title
p <- p + ggtitle(title)
actually works just fine and the plot title is "this is a plot title" as expected.
However if the title is long and I want to specify a point to wrap the title using \n it fails to work.
Precisely if I feed ggtitle an element of "this is a \n plot title". I get exactly that contained in the quotes, rather than wrapping the title at the \n. My suspicion is I need eval, or paste or get, but my formations of such a request have failed to achieve the desired results. I appreciate the help.
UPDATE:
I guess it must be the interaction with mapply. This should allow you to reproduce the problem.
create data.frame of strings as sample and assign it to fred.M.SA
structure(list(RegionalCoverage = c("National", "National", "National",
"National", "National", "National"), GeographicLevel = c("MSA",
"MSA", "MSA", "MSA", "MSA", "MSA"), Category = c("Workers", "Workers",
"Workers", "Workers", "Workers", "Workers"), Sector = c("Labor Market",
"Labor Market", "Labor Market", "Labor Market", "Labor Market",
"Labor Market"), Source2 = c("FRED", "FRED", "FRED", "FRED",
"FRED", "FRED"), Title = c("Unemployment Rate in La Crosse, WI-MN (MSA)",
"Trade, Transportation and Utilities Employment in La Crosse, WI-MN (MSA)",
"Professional and Business Services Employment in La Crosse, WI-MN (MSA)",
"Other Services Employment in La Crosse, WI-MN (MSA)", "Manufacturing Employment in La Crosse, WI-MN (MSA)",
"Leisure and Hospitality Employment \\n in La Crosse, WI-MN (MSA)"
), SeriesID = c("LACR155UR", "LACR155TRAD", "LACR155PBSV", "LACR155SRVO",
"LACR155MFG", "LACR155LEIH"), Units = c("%", "Thous. of Persons",
"Thous. of Persons", "Thous. of Persons", "Thous. of Persons",
"Thous. of Persons"), Freq = c("M", "M", "M", "M", "M", "M"),
Seas = c("SA", "SA", "SA", "SA", "SA", "SA"), OriginalSource = c("U.S. Department of Labor: Bureau of Labor Statistics",
"Federal Reserve Bank of St. Louis", "Federal Reserve Bank of St. Louis",
"Federal Reserve Bank of St. Louis", "Federal Reserve Bank of St. Louis",
"Federal Reserve Bank of St. Louis"), Method = c("ImportXML",
"ImportXML", "ImportXML", "ImportXML", "ImportXML", "ImportXML"
), LinktoSource = c("", "", "", "", "", ""), Link.to.Data.Spreadsheet.Name = c("",
"", "", "", "", ""), Link.to.Data.Storage = c("", "", "",
"", "", ""), Link.to.Data.Manipulation.File = c(NA, NA, NA,
NA, NA, NA), Link.to.Data.Manipulation.File.1 = c(NA, NA,
NA, NA, NA, NA)), .Names = c("RegionalCoverage", "GeographicLevel",
"Category", "Sector", "Source2", "Title", "SeriesID", "Units",
"Freq", "Seas", "OriginalSource", "Method", "LinktoSource", "Link.to.Data.Spreadsheet.Name",
"Link.to.Data.Storage", "Link.to.Data.Manipulation.File", "Link.to.Data.Manipulation.File.1"
), row.names = c(27L, 34L, 44L, 46L, 47L, 48L), class = "data.frame")
MakelineFred <- function(series, ylab="",xlab="", title="") {
require(ggplot2) # hadley's plotting framework
require(scales) # to adjust y axis scales
require(ggthemes) # extra themes including tufte
require(xts) # our favorite time series
require(gridExtra) # for adding a caption
require(timeDate) # for our prediction at the end
require(quantmod) #
# Get Data using quantmod
data <- getSymbols(series,src="FRED") #fred ignore from dates
# convert the string df to object df
data.xts <- get(data)
## convert data to data.frame
df <- data.frame(
date=as.Date(index(data.xts)),
value=as.numeric(data.xts))
p <- ggplot(df, aes(x=date, y=value))
## plot the line
p <- p + geom_line()
## add plot title
p <- p + ggtitle(title)
file <- paste("_",series,".png",sep="")
ggsave(file=file, plot=p, width=6, height=4)
finally here is the mapply call.
mapply(MakelineFred, series=fred.M.SA$SeriesID, title=fred.M.SA$Title)

Resources