ggplotly showing numbers instead of date labels - r

I have a dataset with the following structure:
structure(list(mes = c(7, 7, 7, 4, 4), ano = c(2021, 2021, 2021,
2021, 2021), nacionalidad = c("Venezuela", "Venezuela", "Venezuela",
"Venezuela", "Venezuela"), centro = c("Aeropuerto Eldorado",
"Aeropuerto Eldorado", "Aeropuerto Eldorado", "Aeropuerto Eldorado",
"Aeropuerto Eldorado"), puesto = c("Aeropuerto Eldorado de Bogotá",
"Aeropuerto Eldorado de Bogotá", "Aeropuerto Eldorado de Bogotá",
"Aeropuerto Eldorado de Bogotá", "Aeropuerto Eldorado de Bogotá"
), transporte = c("Air", "Air", "Air", "Air", "Air"), ciudad = c("Arauca",
"Bogotá", "Pereira", "Bogotá", "Bogotá"), flujo = c("Entries",
"Entries", "Entries", "Entries", "Entries"), motivo = c("Tourism",
"Tourism", "Tourism", "Transit", "Transit"), edad = c("0-17",
"0-17", "0-17", "18-29", "18-29"), colombiano = c("Extranjeros",
"Extranjeros", "Extranjeros", "Extranjeros", "Extranjeros"),
departamento = c("Arauca", "Bogota D.C.", "Risaralda", "Bogota D.C.",
"Bogota D.C."), region = c("América del Sur", "América del Sur",
"América del Sur", "América del Sur", "América del Sur"
), status = c("Permiso de Turismo", "Permiso de Turismo",
"Permiso de Turismo", "Permiso Otras Actividades", "Permiso Otras Actividades"
), departamento_2 = c("Bogotá", "Bogotá", "Bogotá", "Bogotá",
"Bogotá"), destino_procedencia = c("Emiratos Árabes", "Israel",
"Emiratos Árabes", "Panamá", "Panamá"), region_destino = c("Asia",
"Asia", "Asia", "América Central y el Caribe", "América Central y el Caribe"
), sexo = c("Male", "Male", "Male", "Male", "Male"), numero = c(1,
1, 1, 5, 5), date = structure(c(18809, 18809, 18809, 18718,
18718), class = "Date"), date2 = structure(c(18809, 18809,
18809, 18718, 18718), class = "Date")), row.names = c(NA,
5L), class = "data.frame")
and I would like to plot a chart by date and other variables (e.g. mode of transport) in ggplotly. The chart is correct, but the labels that appear in the dates show numbers instead of date format. The variable in the database is in date format, and I already tried changing it to different formats and still does not work.
I would also like to add minor date breaks, but can't seem to get it right.
Here is the code I am using for the chart:
chart9<-ggplot()+
geom_line(data=Flow,
aes(x=date,
color=transporte), stat="count") +
scale_x_date(date_minor_breaks = "1 month",
date_labels = "%Y (%b)")+
labs(color="Type of Flow")+
ggtitle("Number of Entrances, by Month and Mode of Transportation, 2017-2021")+
xlab("Date")+
ylab("Number or People")
ggplotly(chart9)
This is the chart plotted
Any help would be greatly appreciated! :)

Looks like the date class gets dropped when using stat="count". Hence, one option to achieve your desired result would be to aggregate your dataset before passing it to ggplot using e.g. dplyr::count(Flow, date, transporte):
Flow <- dplyr::count(Flow, date, transporte, name = "count")
ggplot() +
geom_line(data = Flow, aes(x = date, y = count, color = transporte)) +
scale_x_date(
date_minor_breaks = "1 month",
date_labels = "%Y (%b)"
) +
labs(color = "Type of Flow") +
ggtitle("Number of Entrances, by Month and Mode of Transportation, 2017-2021") +
xlab("Date") +
ylab("Number or People")
ggplotly()
A second option which additionally for setting the date format would be make use of the text "aesthetic" and convert your numbers back to proper dates:
library(plotly)
ggplot() +
geom_line(data = Flow, aes(x = date, color = transporte, text = paste(
"count:", ..count..,
"<br>Date: ", format(as.Date(..x.., origin = "1970-01-01"), "%Y (%b)"),
"<br>transporte: ", ..color..
)), stat = "count") +
scale_x_date(
date_minor_breaks = "1 month",
date_labels = "%Y (%b)"
) +
labs(color = "Type of Flow") +
ggtitle("Number of Entrances, by Month and Mode of Transportation, 2017-2021") +
xlab("Date") +
ylab("Number or People")
ggplotly(tooltip = c("text"))

Related

Order the legend names in ggplot2 object from smaller to larger

I have the dataframe below and I want to set the order of the legend names from smaller to larger. The values may be characters but they have correspondence to the Cases column which I believe could be useful since the dataset is dynamic.
Cum<-structure(list(Age.group = c("00-04", "00-04", "05-14", "05-14",
"15-24", "15-24", "25-49", "25-49", "50-64", "50-64", "65-79",
"65-79", "80+", "80+"), Gender = c("Female", "Male", "Female",
"Male", "Female", "Male", "Female", "Male", "Female", "Male",
"Female", "Male", "Female", "Male"), Cases = c(64578, 70518,
187568, 197015, 414405, 388138, 1342394, 1206168, 792180, 742744,
400232, 414613, 282268, 198026), lab = c("64,578", "70,518",
"187,568", "197,015", "414,405", "388,138", "1,342,394", "1,206,168",
"792,180", "742,744", "400,232", "414,613", "282,268", "198,026"
), Age.group.Sum = c(135096, 135096, 384583, 384583, 802543,
802543, 2548562, 2548562, 1534924, 1534924, 814845, 814845, 480294,
480294), lab2 = c("135,096", "135,096", "384,583", "384,583",
"802,543", "802,543", "2,548,562", "2,548,562", "1,534,924",
"1,534,924", "814,845", "814,845", "480,294", "480,294"), color = c("#4285f4",
"#4285f4", "#90a9e0", "#90a9e0", "#dd9e5f", "#dd9e5f", "#b45f06",
"#b45f06", "#b45f06", "#b45f06", "#dd9e5f", "#dd9e5f", "#aebbd6",
"#90a9e0"), Range = c("LESS THAN 74.5K", "LESS THAN 74.5K", "148.9K - 223.4K",
"148.9K - 223.4K", "372.3K - 446.7K", "372.3K - 446.7K", "MORE THAN 670.1K",
"MORE THAN 670.1K", "MORE THAN 670.1K", "MORE THAN 670.1K", "372.3K - 446.7K",
"372.3K - 446.7K", "223.4K - 297.8K", "148.9K - 223.4K")), class = "data.frame", row.names = c(NA,
-14L))
names(mycols) <- mycols
ylab <- c(0.5,1,1.5,2,2.5)
lbls <- setNames(unique(Cum$color), unique(Cum$Range))
ggplot_obj <- ggplot(data = Cum, aes(x = `Age.group`, y = Cases, group = Gender,fill = Range)) +
geom_bar(aes(
text = paste("<b>Gender:</b>", Gender, "<br><b>Age:</b>", `Age.group` ,
"<br><b>Cases:</b>", lab, "<br><b>Total cases in age group:</b>",
lab2)),
position = "dodge", stat = "identity") +
geom_text(aes(y = Cases + 10000, label = Gender), vjust = 1,
position = position_dodge(width=0.9),size=2) +
scale_fill_manual(values = lbls) +
coord_cartesian(ylim = c(0, max(Cum$Cases)*1.1), expand = FALSE) +
theme_bw()+ theme(
# remove the vertical grid lines
panel.grid.major.x = element_blank(),
panel.border = element_blank(), axis.line.x = element_line()
) +
scale_y_continuous(labels = unit_format(unit = "M", scale = 1e-6))+
xlab("Age group")
We could convert the column to factor with levels specified in the custom order
Cum$Range <- factor(Cum$Range, levels = c("LESS THAN 74.5K" , "148.9K - 223.4K" , "223.4K - 297.8K", "372.3K - 446.7K" , "MORE THAN 670.1K"))
ylab <- c(0.5,1,1.5,2,2.5)
lbls <- setNames(Cum$color[match(levels(Cum$Range), Cum$Range)], levels(Cum$Range))
Construct the plot with ggplot as in the OP's code
-output
Update
If the 'Range' values are dynamic (assuming the unit is the same), then extract the numeric part with parse_number, order and get the unique values
lvls <- as.character(unique(Cum$Range[order(readr::parse_number(as.character(Cum$Range)))]))
Cum$Range <- factor(Cum$Range, levels = lvls)
Or another option is to arrange by 'Cases` and set the levels for 'Range'
library(dplyr)
Cum <- Cum %>%
arrange(Cases, Age.group) %>%
mutate(Range = factor(Range, levels = unique(Range)))
lbls <- setNames(Cum$color[match(levels(Cum$Range), Cum$Range)], levels(Cum$Range))

Order Bars in interactive graph

I am creating an interactive graph using ggplot2 and plotly in R, the code is below.
I want to reorder the barchart column values so that they are sorted in descending order,
currently they are sorted alphabetically.
Edit: I might not have made what I wanted clear. Currently, the midfielder with the most points is Salah, but the top row in my midfielder column is currently Alli. I would like to sort the column so that the values are in descending order of points rather than alphabetical.
Would someone please inform me how I can do this?
I have saved the finished graph & csv file at the below locations:
IG: https://ianfm94.github.io/Premier_League_Stats/Top_100_Fantasy_PL_Pointscorers.html
CSV File: https://github.com/Ianfm94/Premier_League_Stats/blob/master/CSV_Files/2020-06-01_updated_fpl_stats.csv
rm(list=ls())
# Required packages, you might need to install these
library(ggplot2)
library(dplyr)
library(plotly)
library(tibble)
## Fantasy_PL Data
fpl_data = read.csv('2020-06-01_updated_fpl_stats.csv',
header = T, fileEncoding = "UTF-8-BOM")
attach(fpl_data)
#View(fpl_data)
# Interactive Plot Workings
top_100_points = total_points[0:100]
top_100_player_pos = factor(player_pos)[0:100]
top_100_surnames = factor(web_name)[0:100]
top_100_team = factor(team_name)[0:100]
color_table = tibble(
Team_Name = c("Arsenal", "Aston Villa", "Bournemouth", "Brighton & Hove Albion",
"Burnley", "Chelsea", "Crystal Palace", "Everton",
"Leicester City", "Liverpool", "Manchester City",
"Manchester United", "Newcastle United", "Norwich City",
"Sheffield United", "Southampton", "Tottenham Hotspurs",
"Watford", "West Ham United", "Wolverhampton Wanderers"),
Team_Color = c("#EF0107", "#670E36", "#B50E12", "#0057B8",
"#6C1D45", "#034694", "#1B458F", "#003399",
"#003090", "#C8102E", "#6CABDD", "#DA291C",
"#241F20", "#FFF200", "#EE2737", "#D71920",
"#132257", "#FBEE23", "#7A263A", "#FDB913")
)
position_table = tibble(
Position_Name = c("Goalkeeper", "Defender", "Midfielder", "Striker"),
)
fpl_df = data.frame(y = top_100_points,
x = top_100_player_pos,
z = top_100_surnames,
w = top_100_team,
stringsAsFactors = F)
fpl_df$w = factor(fpl_df$w, levels = color_table$Team_Name)
fpl_df$x = factor(fpl_df$x, levels = position_table$Position_Name)
names(fpl_df)[names(fpl_df) == "x"] = "Position_Name"
names(fpl_df)[names(fpl_df) == "y"] = "Total_Points_by_Position"
names(fpl_df)[names(fpl_df) == "z"] = "Player_Surname"
names(fpl_df)[names(fpl_df) == "w"] = "Team_Name"
#View(fpl_df)
plot_fpl_1 = ggplot(fpl_df, aes(x = Position_Name,
y = Total_Points_by_Position,
z = Player_Surname,
fill = Team_Name)) +
geom_col() +
scale_fill_manual(values = color_table$Team_Color) +
labs(title = "Top 100 Fantasy PL Pointscorer by Position & Team",
y = "Total Points of Position",
x = "Player Positions",
fill = "Team Name") +
theme_bw() +
theme(plot.title = element_text(size = 14,
face = "bold",
color = "black"),
legend.title = element_text(color = "navy",
face = "bold",
size = 10))
plot_fpl_1 = ggplotly(plot_fpl_1)
plot_fpl_1
You can use forcats::fct_reorder to change the order of z. See below:
Libraries:
# Required packages, you might need to install these
library(ggplot2)
library(dplyr)
library(plotly)
library(tibble)
library(RCurl)
library(forcats)
Data:
## Fantasy_PL Data
csvurl <- getURL("https://raw.githubusercontent.com/Ianfm94/Premier_League_Stats/master/CSV_Files/2020-06-01_updated_fpl_stats.csv")
fpl_data <- read.csv(text = csvurl)
attach(fpl_data)
# Interactive Plot Workings
top_100_points = total_points[0:100]
top_100_player_pos = factor(player_pos)[0:100]
top_100_surnames = factor(web_name)[0:100]
top_100_team = factor(team_name)[0:100]
color_table = tibble(
Team_Name = c("Arsenal", "Aston Villa", "Bournemouth", "Brighton & Hove Albion",
"Burnley", "Chelsea", "Crystal Palace", "Everton",
"Leicester City", "Liverpool", "Manchester City",
"Manchester United", "Newcastle United", "Norwich City",
"Sheffield United", "Southampton", "Tottenham Hotspurs",
"Watford", "West Ham United", "Wolverhampton Wanderers"),
Team_Color = c("#EF0107", "#670E36", "#B50E12", "#0057B8",
"#6C1D45", "#034694", "#1B458F", "#003399",
"#003090", "#C8102E", "#6CABDD", "#DA291C",
"#241F20", "#FFF200", "#EE2737", "#D71920",
"#132257", "#FBEE23", "#7A263A", "#FDB913")
)
position_table = tibble(
Position_Name = c("Goalkeeper", "Defender", "Midfielder", "Striker"),
)
fpl_df = data.frame(y = top_100_points,
x = top_100_player_pos,
z = top_100_surnames,
w = top_100_team,
stringsAsFactors = F)
fpl_df$w = factor(fpl_df$w, levels = color_table$Team_Name)
fpl_df$x = factor(fpl_df$x, levels = position_table$Position_Name)
names(fpl_df)[names(fpl_df) == "x"] = "Position_Name"
names(fpl_df)[names(fpl_df) == "y"] = "Total_Points_by_Position"
names(fpl_df)[names(fpl_df) == "z"] = "Player_Surname"
names(fpl_df)[names(fpl_df) == "w"] = "Team_Name"
Plot:
plot_fpl_1 = ggplot(fpl_df, aes(x = Position_Name,
y = Total_Points_by_Position,
z = fct_reorder(Player_Surname, -Total_Points_by_Position),
fill = Team_Name)) +
geom_col() +
scale_fill_manual(values = color_table$Team_Color) +
labs(title = "Top 100 Fantasy PL Pointscorer by Position & Team",
y = "Total Points of Position",
x = "Player Positions",
fill = "Team Name") +
theme_bw() +
theme(plot.title = element_text(size = 14,
face = "bold",
color = "black"),
legend.title = element_text(color = "navy",
face = "bold",
size = 10))
plot_fpl_2 = ggplotly(plot_fpl_1)
plot_fpl_2

R: how to display 2019-04-29 as 29 of may of 2019 in ggplot subtitle?

I need to put the min date value and the max date value in ggplot subtitle.
I've found a similar question but for the axis labels, I need to apply this to the subtitle argument:
subtitle = paste0("Del ", vitocho_likes_min_date, " al ", vitocho_likes_max_date)
Min date looks like:
"2010-10-14" #Expect: "14 de octubre del 2010"
dput(vitocho_likes_min_date)
structure(14896, class = "Date")
Max date looks like:
"2019-04-29" #Expect: "29 de abril del 2019"
dput(vitocho_likes_max_date)
structure(18015, class = "Date")
This is my ggplot chart:
vitocho_chart <- t_kids_faves %>%
filter(user == "VictorAndresGB") %>%
ggplot(aes(x = fct_reorder(screen_name, n), y = n)) +
geom_col(fill = "#494A4F") +
coord_flip() +
theme_tweets() +
labs(
x = "",
y = "",
title = "Cuentas de Twitter con más likes de Victor Andrés García Belaunde.",
subtitle = paste0("Del ", vitocho_likes_min_date, " al ", vitocho_likes_max_date)
) +
geom_text(
aes(x = screen_name,
y = n - 15,
label = n
),
size = 4,
color = "gray95"
)
use:
Sys.setlocale("LC_TIME", "Spanish")
vitocho_likes_min_date= as.character(format(as.Date(14896, origin="1970-01-01"), "%d de %B del %Y"))
vitocho_likes_max_date= as.character(format(as.Date(18015, origin="1970-01-01"), "%d de %B del %Y"))

How can I modify time on x-axis in ggplot?

I am creating a plot with r and I would like to change the time on the x-axis. There should be labels for every hour. I used: scale_x_datetime(breaks="1 hour", labels=date_format("%H:%M")), but unfortunately, it is not working. Does someone has an idea? Thanks for helping.
This is my code:
input2 <- "C:\\Users\\time_distance.csv"
time_distance <- read.csv(input2, sep=";")
library(scales)
time <- strptime(time_distance$time, format = "%H:%M:%S")
plot2 <-ggplot(time_distance, aes(x = time, y = distance, group = 1)) +
geom_point(stat = "identity") +
geom_smooth(method = lm, color = "red", se = FALSE) +
theme(legend.position = "none") +
theme_bw() +
labs(y = "Distance [m]", x = "time [hour]")+
scale_y_continuous(limits = c(0,1600), breaks = seq(100, 1500, 100))
print(plot2)
This is my data:
dput(time_distance)
structure(list(time = c("12:51:57", "12:55:16", "12:56:29", "13:25:05",
"13:36:54", "13:55:37", "14:11:20", "14:13:17", "15:14:26", "15:18:48",
"15:21:01", "15:22:29", "15:25:13", "15:28:16", "15:28:26", "15:39:58",
"15:46:49", "15:50:45", "15:59:51", "16:02:38", "16:24:05", "16:35:17",
"11:15:24", "13:32:40", "14:42:39", "15:24:08", "15:32:28", "16:43:48",
"16:48:42", "17:10:28", "17:27:55", "11:23:25", "12:19:21", "12:27:19",
"12:28:39", "12:47:18", "13:17:01", "14:06:26", "14:15:35", "14:18:06",
"14:26:26", "14:50:32", "15:25:26", "15:33:50", "15:56:02", "15:58:45",
"16:11:02", "16:35:42", "17:16:24", "17:28:30", "08:58:43", "09:55:52",
"10:14:17", "10:24:17", "10:36:42", "10:55:35", "14:18:29", "14:36:59",
"15:22:46", "15:51:35", "15:58:29", "16:36:36", "16:47:48", "13:33:52",
"14:31:38", "14:49:03", "16:13:31", "11:11:20", "11:46:00", "11:50:43",
"12:13:11", "13:07:31", "13:27:46", "14:37:37", "15:27:51", "10:07:52",
"10:18:18", "11:56:53", "12:18:37", "12:45:55", "13:14:20", "13:23:21",
"13:30:15", "13:31:34", "13:33:12", "13:48:06", "13:53:52", "14:30:18",
"14:44:24", "14:51:14", "15:03:19", "15:33:32", "15:49:00", "15:55:53",
"16:07:24", "16:11:43", "16:21:43", "16:35:50", "16:39:37", "16:48:41",
"10:02:45", "12:14:40", "13:10:27", "14:36:28", "14:51:12", "15:38:50",
"15:58:29", "10:53:33", "11:11:27", "11:32:26", "11:38:36", "12:56:03",
"13:45:09", "14:39:48", "14:51:57", "14:53:25", "15:08:02", "16:01:32",
"16:22:14", "16:46:01", "08:57:18", "09:07:51", "09:25:10", "09:34:32",
"10:15:35", "10:33:54", "11:07:55", "11:26:00", "11:40:21", "11:50:45",
"11:57:16", "12:55:00", "13:21:18", "14:47:07", "14:50:21", "14:56:56",
"15:06:39", "15:21:53", "15:36:26", "15:48:37", "15:54:50", "16:01:28",
"16:12:21", "16:21:53", "16:26:20", "16:30:52", "09:49:00", "10:13:53",
"10:27:21", "10:46:43", "12:24:04", "12:30:04", "12:54:33", "13:33:14",
"13:52:55", "14:12:14", "14:37:37", "14:42:58", "14:44:37", "14:51:39",
"15:08:57", "15:38:08", "15:49:06", "16:05:53", "17:01:34", "08:31:56",
"09:44:00", "10:19:35", "10:47:49", "11:18:16", "11:35:49", "12:32:43",
"12:43:45", "13:11:05", "13:24:34", "13:32:46", "13:42:01", "14:20:17",
"14:31:11", "14:36:30", "16:06:58", "08:30:07", "09:02:22", "10:03:07",
"10:29:09", "10:52:23", "11:47:59", "12:58:26", "13:47:26", "13:53:05",
"14:08:33", "14:16:46", "14:28:50", "15:16:42", "16:07:43", "08:27:08",
"08:52:03", "09:04:13", "09:14:04", "09:22:39", "09:32:25", "09:39:04",
"10:18:02", "13:06:58", "13:21:03", "13:37:28", "13:41:09", "14:17:06",
"14:36:17", "14:51:45"), distance = c(89.6472646, 162.833946,
204.1718123, 69.32061609, 145.5725233, 157.8104904, 142.7204165,
139.4100682, 156.290279, 281.1730457, 211.3723597, 146.9900352,
155.5278723, 121.630972, 115.0260845, 103.4678487, 535.2962882,
172.2392646, 187.1019506, 192.2072681, 163.1432699, 328.1146666,
161.378541, 276.9657775, 288.6843714, 232.236379, 286.5742551,
171.9799195, 131.3821584, 645.23548, 328.8999153, 83.1376454,
206.8425108, 160.1471859, 163.4999165, 71.84934976, 122.7265289,
156.6223912, 245.7737619, 76.4010552, 191.7314754, 241.2484589,
314.4240603, 168.1778327, 249.4432742, 171.5837494, 445.289732,
150.781544, 231.0174121, 233.6595053, 212.0908014, 274.9652469,
230.2231595, 463.3457859, 185.4275877, 413.7982665, 225.4934765,
171.8698762, 287.8326822, 457.6477022, 216.539991, 203.7116093,
261.3755307, 176.3162834, 113.2523456, 186.0197098, 110.2207489,
164.0611501, 111.7164405, 157.4453558, 158.5689564, 155.240531,
117.3045189, 117.6454036, 224.4921484, 326.2455013, 205.1963079,
166.1579876, 245.2281083, 168.9163027, 280.1466904, 331.9011914,
268.5518097, 296.9010562, 476.5082804, 391.8845907, 255.7662667,
483.9525726, 283.381313, 592.5282434, 158.4318925, 722.0857244,
1524.855308, 261.3729014, 495.6679407, 585.3051787, 485.0507777,
472.889667, 394.5888053, 328.1134478, 209.4992859, 161.8100828,
179.8945385, 438.95502, 538.0923178, 142.1052511, 150.0340155,
170.1366692, 123.6032668, 171.7141119, 98.76667809, 235.874407,
241.7648426, 224.3868202, 167.1152772, 67.44743255, 87.79127323,
150.6129032, 476.6532019, 186.2234702, 139.4749401, 102.0916653,
69.80207399, 77.6190789, 179.4630148, 109.0492763, 165.9927234,
228.9755043, 247.591346, 262.0171879, 155.9824185, 207.1022985,
121.5681699, 178.136665, 126.386831, 74.78485864, 71.12781299,
78.21953935, 317.1932926, 219.2749766, 247.6333865, 98.85128473,
235.0537481, 90.20871946, 124.2324844, 125.3904596, 248.5174138,
148.2374205, 299.5611988, 77.88228217, 194.0273272, 117.208969,
347.7014402, 306.3749268, 313.0498324, 313.7161044, 479.518518,
314.9932, 271.3977789, 274.4941856, 391.3815061, 418.8094285,
243.0563698, 599.4055807, 169.8079415, 70.72250265, 181.7665702,
172.3945082, 95.23804572, 147.9418742, 101.8629435, 127.5025047,
236.7513256, 113.5718566, 133.5204809, 215.6790291, 72.40720193,
80.22093147, 188.9783062, 75.71888165, 100.6934031, 293.32912,
269.7824729, 772.4577258, 468.3636793, 221.4304232, 282.6939389,
250.2391573, 111.5475549, 181.1174836, 221.0176637, 206.4821957,
127.4349516, 112.25765, 198.9440625, 144.8789547, 86.23383985,
48.90437015, 107.9818848, 186.2547336, 156.2620094, 112.5252141,
349.3143126, 342.4842646, 69.19741708, 206.0086208, 211.6554678,
119.8004909, 136.4785611, 111.8184516)), class = "data.frame",
row.names = c(NA, -210L))
The column time should be POSIXct to be able to use scale_x_datetime, convert it to the right type and it will work.
This should work:
library(scales)
time_distance$time <- as.POSIXct(time_distance$time, format = "%H:%M:%S")
ggplot(time_distance, aes(x=time, y=distance, group=1)) +
geom_point() +
geom_smooth(method=lm , color="red", se=FALSE) +theme(legend.position="none") +
theme_bw()+
labs(y = "Distance [m]", x = "time [hour]")+
scale_y_continuous(limits=c(0,1600), breaks=seq(100, 1500, 100)) +
scale_x_datetime(date_breaks="1 hour", labels=date_format("%H:%M"))

Underline part of text label in ggplot

I am trying to make a label that is made up of a book title and book author. I would like to underline the title, but not the author, in the label.
Here is the MWE data:
Title,Author,Pages,Date Started,Date Finished
underline('Time Travel'),'James Gleick',353,1/1/17,1/27/17
underline('The Road'),'Cormac McCarthy',324,1/28/17,3/10/17
This code works but does not allow for the title and author
library(ggplot2)
library(tidyverse)
library(ggrepel)
library(ggalt)
books.2017 <- read_csv('books_2017.csv')
books.2017$`Date Started` <- as.Date(books.2017$`Date Started`, "%m/%d/%y")
books.2017$`Date Finished` <- as.Date(books.2017$`Date Finished`, "%m/%d/%y")
ggplot(books.2017, aes(x=`Date Started`, xend=`Date Finished`)) +
geom_dumbbell(aes(size=Pages),size_x=0, size_xend=0) +
geom_text_repel(aes(label=paste(Title)), parse=TRUE)
When I try to change geom_text_repel to something like:
geom_text_repel(aes(label=paste(Title,Author)), parse=TRUE)
I get this error:
Error in parse(text = as.character(lab)) :
<text>:1:26: unexpected string constant
1: underline('Time Travel') 'James Gleick'
^
EDIT The labels should look something like this
You need to form a valid plotmath expression, qplot(1,1,geom="blank") + annotate("text", x=1, y=1, label='underline("this")*" and that"', parse = TRUE)
Applied to your dataset this might look like label=paste(Title, Author, sep="~"), where ~ is a non-breaking space plotmath separator. After fixing your non-reproducible example, this gives
It looks like you are trying to pull down your goodreads data, and map out the number of books you read over the year, against start data, end data and book size.
To do what you propose, you can use the parse option on geom_text*(, to do this you have to create a parse string with sprintf() and pass that to geom_text*( as the label input where parse = TRUE.
To add a newline you might consider using plotmath::over()
parseLabel <- sprintf("over(%s,%s)",
gsub(" ", "~", books.2007$Title, fixed = TRUE),
gsub(" ", "~", books.2007$Author, fixed = TRUE))
parseLabel
alternatively, you can use underline, however adding a newline is tricky as plotmath() does not directly support the use of newline in a parse formula.
parseLabel <- sprintf("underline(%s)~\n~%s",
gsub(" ", "~", books.2007$Title, fixed = TRUE),
gsub(" ", "~", books.2007$Author, fixed = TRUE))
parseLabel
Note: Baptiste correctly hilights this in his answer I am just expanding upon his work here using an example dataset I created.
OK, here is a quick example based on the above assumptions. I hope this points you in the right direction.
Note: I have appended an example dataset for people to use.
Adding an Underline
In order to add an underline to the text, you can harness plotmath by setting parse=true in the geom_label*() call.
Simple example using plotmath wih geom_label
library(tidyverse) # Loads ggplot2
library(graphics)
library(ggrepel)
library(gtable)
library(ggalt)
# load test dataset
# ... See example data set
# books.2007 <- structure...
gp <- ggplot(books.2007)
gp <- gp + geom_dumbbell( aes(x = `Date Started`,
xend = `Date Finished`,
y = ISBN,
size = as.numeric(Pages)),
size_x = 0, size_xend = 0)
# Construct parseLabel using sprintf
parseLabel <- sprintf("underline(%s)~\n~%s",
gsub(" ", "~", books.2007$Title, fixed = TRUE),
gsub(" ", "~", books.2007$Author, fixed = TRUE))
gp <- gp + geom_label(aes(x = `Date Started`,
y = ISBN),
label = parseLabel,
vjust = 1.5, hjust = "inward", parse = TRUE)
gp <- gp + labs(size = "Book Size")
gp
Example Plot Output
Simple example using plotmath with geom_label_repel
nb. My personal sense would be geom_text is easier to use as geom_label_repel requires computation overhead to calculate the positioning of the labels.
## Construct parse string
##
##
parseLabel <- sprintf("underline(%s)~\n~%s",
gsub(" ", "~", books.2007$Title, fixed = TRUE),
gsub(" ", "~", books.2007$Author, fixed = TRUE))
parseLabel
rm(gp)
gp <- ggplot(books.2007)
gp <- gp + geom_dumbbell( aes(x = `Date Started`,
xend = `Date Finished`,
y = ISBN,
size = as.numeric(Pages)),
size_x = 0, size_xend = 0)
gp <- gp + geom_label_repel(aes(x = `Date Started`,
y = ISBN),
label = parseLabel,
# max.iter = 100,
parse = TRUE)
gp <- gp + labs(size = "Book Size")
gp
Example Plot Output with geom_text_repel
Example Data Set:
books.2007 <- structure(list(Title = c("memoirs of a geisha", "Blink: The Power of Thinking Without Thinking",
"Power of One", "Harry Potter and the Half-Blood Prince (Book 6)",
"Dune (Dune Chronicles Book 1)"), Author = c("arthur golden",
"Malcolm Gladwell", "Bryce Courtenay", "J.K. Rowling", "Frank Herbert"
), ISBN = c("0099498189", "0316172324", "034541005X", "0439785960",
"0441172717"), `My Rating` = c(4L, 3L, 5L, 4L, 5L), `Average Rating` = c(4,
4.17, 5, 4.38, 4.55), Publisher = c("vintage", "Little Brown and Company",
"Ballantine Books", "Scholastic Paperbacks", "Ace"), Binding = c("paperback",
"Hardcover", "Paperback", "Paperback", "Paperback"), `Year Published` = c(2005L,
2005L, 1996L, 2006L, 1990L), `Original Publication Year` = c(2005L,
2005L, 1996L, 2006L, 1977L), `Date Read` = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), `Date Added` = structure(c(13558,
13558, 13558, 13558, 13558), class = "Date"), Bookshelves = c("fiction",
"nonfiction marketing", "fiction", "fiction fantasy", "fiction scifi"
), `My Review` = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), `Date Started` = structure(c(13577,
13610, 13634, 13684, 13722), class = "Date"), `Date Finished` = structure(c(13623,
13647, 13660, 13689, 13784), class = "Date"), Pages = c("522",
"700", "300", "145", "700")), .Names = c("Title", "Author", "ISBN",
"My Rating", "Average Rating", "Publisher", "Binding", "Year Published",
"Original Publication Year", "Date Read", "Date Added", "Bookshelves",
"My Review", "Date Started", "Date Finished", "Pages"), row.names = c(NA,
-5L), spec = structure(list(cols = structure(list(Title = structure(list(), class = c("collector_character",
"collector")), Author = structure(list(), class = c("collector_character",
"collector")), ISBN = structure(list(), class = c("collector_character",
"collector")), `My Rating` = structure(list(), class = c("collector_integer",
"collector")), `Average Rating` = structure(list(), class = c("collector_double",
"collector")), Publisher = structure(list(), class = c("collector_character",
"collector")), Binding = structure(list(), class = c("collector_character",
"collector")), `Year Published` = structure(list(), class = c("collector_integer",
"collector")), `Original Publication Year` = structure(list(), class = c("collector_integer",
"collector")), `Date Read` = structure(list(), class = c("collector_character",
"collector")), `Date Added` = structure(list(), class = c("collector_character",
"collector")), Bookshelves = structure(list(), class = c("collector_character",
"collector")), `My Review` = structure(list(), class = c("collector_character",
"collector"))), .Names = c("Title", "Author", "ISBN", "My Rating",
"Average Rating", "Publisher", "Binding", "Year Published", "Original Publication Year",
"Date Read", "Date Added", "Bookshelves", "My Review")), default = structure(list(), class = c("collector_guess",
"collector"))), .Names = c("cols", "default"), class = "col_spec"), class = c("tbl_df",
"tbl", "data.frame"))
Simple Example - no formatting
For completeness here is how I would approach the problem avoiding the formula construction problems.
gp <- ggplot(books.2007)
gp <- gp + geom_dumbbell( aes(x = `Date Started`,
xend = `Date Finished`,
y = ISBN,
size = as.numeric(Pages)),
size_x = 0, size_xend = 0)
t <- paste(books.2007$Title, "\n", books.2007$Author)
gp <- gp + geom_label(aes(x = `Date Started`,
y = ISBN),
label = t,
vjust = 1.5, hjust = "inward", parse = FALSE)
gp <- gp + labs(size = "Book Size")
gp
Plot Output
This problem could be made a lot simpler if italics sufficed instead of underlines, as grid::gpar() does not support an underline fontface. Here's an example of using italics instead:
library(tibble)
library(ggplot2)
books.2017 <-
tribble(~Title,~Author,~Pages,~`Date Started`,~`Date Finished`,
'Time Travel','James Gleick',353,'1/1/17','1/27/17',
'The Road','Cormac McCarthy',324,'1/28/17','3/10/17')
ggplot(books.2017, aes(x = `Date Started`,
xend = `Date Finished`,
y = Title,
yend = Title)) +
geom_segment(aes(size = Pages),
lineend = 'round') +
geom_text(aes(label = Title),
fontface = 'italic',
vjust = -3.5) +
geom_text(aes(label = Author),
vjust = -2)

Resources