I'm just learning R fundamentals, and I would like to ask your help with data visualization, and specifically time series. I'm studying how vote shares of a specific category of political parties (right-wing populists) vary overtime in each country from 2009 to 2019.
Here's my dataset:
dput(votesharespop)
structure(list(country = c("Austria", "Belgium", "Bulgaria",
"Czech Republic", "Denmark", "Estonia", "Finland", "France",
"Germany", "Great Britain", "Greece", "Hungary", "Italy", "Lithuania",
"Luxembourg", "Netherlands", "Poland", "Romania", "Portugal",
"Slovakia", "Slovenia", "Spain", "Sweden", "Austria", "Belgium",
"Bulgaria", "Czech Republic", "Denmark", "Estonia", "Finland",
"France", "Germany", "Great Britain", "Greece", "Hungary", "Italy",
"Lithuania", "Luxembourg", "Netherlands", "Poland", "Romania",
"Portugal", "Slovakia", "Slovenia", "Spain", "Sweden", "Austria",
"Belgium", "Bulgaria", "Czech Republic", "Denmark", "Estonia",
"Finland", "France", "Germany", "Great Britain", "Greece", "Hungary",
"Italy", "Lithuania", "Luxembourg", "Netherlands", "Poland",
"Romania", "Portugal", "Slovakia", "Slovenia", "Spain", "Sweden"
), year = c(2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009,
2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009,
2009, 2009, 2009, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014,
2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014,
2014, 2014, 2014, 2014, 2019, 2019, 2019, 2019, 2019, 2019, 2019,
2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,
2019, 2019, 2019, 2019, 2019), vote_share = c(17.3, 15.7, 16.7,
4.3, 15.3, 0, 9.8, 8.1, 1.7, 22.7, 7.2, 71.2, 45.5, 12.2, 7.4,
17, 27.4, 8.7, 0, 5.6, 35.2, 0, 3.3, 20.2, 7.6, 16.8, 4.8, 26.6,
5.3, 12.9, 28.7, 0.4, 28.6, 6.2, 66.2, 26.7, 14.3, 7.5, 13.3,
31.8, 2.7, 0, 3.6, 28.8, 1.6, 9.7, 17.2, 13.8, 14.6, 10, 10.8,
12.7, 13.8, 26.8, 11, 34.9, 6.2, 62.2, 49.5, 2.7, 10, 14.5, 49.1,
0, 1.5, 7.3, 30.3, 6.2, 15.3), continent = c("Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -69L))
My aim was to get something like this (no interactive):
Or something like facets, but for each country.
Thank you very much for your attention.
Data
votesharespop <- structure(list(country = c("Austria", "Belgium", "Bulgaria",
"Czech Republic", "Denmark", "Estonia", "Finland", "France",
"Germany", "Great Britain", "Greece", "Hungary", "Italy", "Lithuania",
"Luxembourg", "Netherlands", "Poland", "Romania", "Portugal",
"Slovakia", "Slovenia", "Spain", "Sweden", "Austria", "Belgium",
"Bulgaria", "Czech Republic", "Denmark", "Estonia", "Finland",
"France", "Germany", "Great Britain", "Greece", "Hungary", "Italy",
"Lithuania", "Luxembourg", "Netherlands", "Poland", "Romania",
"Portugal", "Slovakia", "Slovenia", "Spain", "Sweden", "Austria",
"Belgium", "Bulgaria", "Czech Republic", "Denmark", "Estonia",
"Finland", "France", "Germany", "Great Britain", "Greece", "Hungary",
"Italy", "Lithuania", "Luxembourg", "Netherlands", "Poland",
"Romania", "Portugal", "Slovakia", "Slovenia", "Spain", "Sweden"
), year = c(2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009,
2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009,
2009, 2009, 2009, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014,
2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014,
2014, 2014, 2014, 2014, 2019, 2019, 2019, 2019, 2019, 2019, 2019,
2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,
2019, 2019, 2019, 2019, 2019), vote_share = c(17.3, 15.7, 16.7,
4.3, 15.3, 0, 9.8, 8.1, 1.7, 22.7, 7.2, 71.2, 45.5, 12.2, 7.4,
17, 27.4, 8.7, 0, 5.6, 35.2, 0, 3.3, 20.2, 7.6, 16.8, 4.8, 26.6,
5.3, 12.9, 28.7, 0.4, 28.6, 6.2, 66.2, 26.7, 14.3, 7.5, 13.3,
31.8, 2.7, 0, 3.6, 28.8, 1.6, 9.7, 17.2, 13.8, 14.6, 10, 10.8,
12.7, 13.8, 26.8, 11, 34.9, 6.2, 62.2, 49.5, 2.7, 10, 14.5, 49.1,
0, 1.5, 7.3, 30.3, 6.2, 15.3), continent = c("Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe", "Europe", "Europe", "Europe",
"Europe", "Europe", "Europe", "Europe")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -69L))
Code
library(ggplot2)
library(ggthemes) # to access theme_hc()
ggplot(data = votesharespop, mapping = aes(x = year, y = vote_share, color = country)) + # specify data, x-axis, y-axis and grouping variable
geom_line() + # a line per group
geom_point() + # points per group
theme_hc() + # a ggtheme, similar to your example
labs(title = "Variation of vote shares of right wing populists, 2009 to 2019", # plot title
subtitle = "Add a subtitle of your choice", # plot subtitle
caption = "Add a caption of your choice") + # plot caption
theme(legend.position = "right", # move legend to the right hand side of the plot
axis.title.x = element_blank(), # remove x axis title
axis.title.y = element_blank(), # remove y axis title
legend.title = element_blank(), # remove legend title
plot.title = element_text(size = 20, color = "gray40"), # change size and color of plot title
plot.subtitle = element_text(color = "gray40"), # change color of subtitle
plot.caption = element_text(color = "gray40", hjust = 0)) + # change color of caption and left-align
scale_y_continuous(breaks = seq(0, 80, by = 20)) + # specify min, max and break distance for y axis
scale_x_continuous(breaks = seq(2009, 2019, by = 5)) + # specify min, max and break distance for x axis
expand_limits(y = c(0, 80))
Output
Note however, that for multiple groups, the colors can be quite indistinguishable. It might be preferable to go with facet_wrap
Code
ggplot(data = votesharespop, mapping = aes(x = year, y = vote_share, color = country)) + # specify data, x-axis, y-axis and grouping variable
geom_line() + # a line per group
geom_point() + # points per group
theme_hc() + # a ggtheme, similar to your example
labs(title = "Variation of vote shares of right wing populists, 2009 to 2019", # plot title
subtitle = "Add a subtitle of your choice", # plot subtitle
caption = "Add a caption of your choice") + # plot caption
theme(legend.position = "right", # move legend to the right hand side of the plot
axis.title.x = element_blank(), # remove x axis title
axis.title.y = element_blank(), # remove y axis title
legend.title = element_blank(), # remove legend title
plot.title = element_text(size = 20, color = "gray40"), # change size and color of plot title
plot.subtitle = element_text(color = "gray40"), # change color of subtitle
plot.caption = element_text(color = "gray40", hjust = 0)) + # change color of caption and left-align
scale_y_continuous(breaks = seq(0, 75, by = 25)) + # specify min, max and break distance for y axis
scale_x_continuous(breaks = seq(2009, 2019, by = 5)) + # specify min, max and break distance for x axis
expand_limits(y = c(0, 75)) + # adjust y axis limits
facet_wrap(~ country) + # facet wrap
theme(legend.position = "none") + # remove legend, since not needed anymore in facet_wrap
theme(panel.spacing.x = unit(4, "mm")) # avoid overlapping of x axis text
Output
I have the dataframe below:
ct<-structure(list(name = c("Afghanistan India", "Afghanistan India",
"Albania Kosovo", "Albania Kosovo", "Bangkok Agreement", "Bangkok Agreement",
"Bangkok Agreement", "Bangkok Agreement", "Bangkok Agreement",
"Belarus Russia (Union State)", "Belarus Russia (Union State)",
"Albania Macedonia", "Albania Macedonia", "Belarus Serbia", "Belarus Serbia",
"Belarus Ukraine", "Belarus Ukraine", "Belize Guatemala", "Belize Guatemala",
"Bhutan India"), Country = c("Afghanistan", "India", "Albania",
"Kosovo", "Bangladesh", "India", "Laos", "South Korea", "Sri Lanka",
"Belarus", "Russia", "Albania", "North Macedonia", "Belarus",
"Serbia", "Belarus", "Ukraine", "Belize", "Guatemala", "Bhutan"
), Scope = c(3, 3, 23, 23, 23, 23, 23, 23, 23, 26, 26, 6, 6,
6, 6, 6, 6, 1, 1, 5), year2 = c(2000, 2000, 2000, 2000, 1975,
1975, 1975, 1975, 1975, 1995, 1995, 2000, 2000, 2005, 2005, 1990,
1990, 2005, 2005, 2005), pta_count = c(2, 3, 8, 1, 1, 1, 1, 1,
1, 2, 2, 8, 8, 1, 4, 2, 7, 2, 3, 1)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
and I create this plot in which I modified the text displayed for the black dots.I want also to dislay the variable name inside the hover text of dots but it is not included in variables which I use for x and y axis.
# for instance
i <- 2
p<-ct %>% filter(Country==unique(ct$Country)[i]) %>%
ggplot(aes(year2,Scope))+geom_jitter()+
geom_col(aes(y=pta_count/(max(dt2$pta_count)/max(dt2$scope_ntis_ciu))),
fill="darkolivegreen",alpha=0.3,width=3)+
xlim(c(1950,2020))+
scale_y_continuous(
limits=c(0,33),
# Features of the first axis
name = "NTI Scope\n(scope measures the sum of all NTIs mentioned in a PTA,\ndot indicated one PTA)",
# Add a second axis and specify its features
sec.axis = sec_axis( ~ . * max(dt2$pta_count)/max(dt2$scope_ntis_ciu), name="PTA Count\n(green columns indicate number of PTAs\n signed in given 5-year intervall)")
)+
labs(x='',title=unique(ct$Country)[i],
subtitle = paste0('signed ',sum(ct[ct$Country=="India",]$pta_count),' PTAs in total and\nhas an average ',mean(ct[ct$Country==unique(ct$Country)[i],]$Scope),' NTI-scope index across all years'))
# create plotly object to modify
p2 <- plotly_build(p)
# now modify the text calls for each trace that this applies to
# modfiy existing tooltips
# this is the first trace (the bar chart or first geom in ggplot object)
p2$x$data[[1]]$text <- str_replace_all(p2$x$data[[1]]$text,
"year2", "Year ") %>%
str_replace_all(., fixed("Scope"),
"Count of issues ")
p2
When you made the minor changes to the hover text, I had shown a method to replace what was there. However, for this–the addition of a new variable, it's easiest to go back to the ggplot object.
BTW I had to change scope_ntis_ciu to Scope in a few places in this code.
Return to the layers that you want to change:
In the geom_jitter layer, add a call for text in aes with what you want to appear in your hover text.
Did you get an error–'can't find name'? If you do, you added the text outside of aes.
geom_jitter(aes(text = paste0("Name: ", name, "\nYear: ", year2,
"\nCount of issues: ", Scope))) +
Then your column layer:
geom_col(aes(y=pta_count/(max(pta_count)/max(Scope)),
text = paste0("Year: ", year2, "\nCount of issues: ",
pta_count/(max(pta_count)/max(Scope)))),
fill="darkolivegreen",alpha=0.3,width=3)+
When you execute this code for the ggplot object, you will be warned that ggplot is ignoring your text–that's okay, because it still keeps the information and sends it right along to plotly, where it will be used.
Now when you call the ggplotly object, you can add the tooltip.
ggplotly(p, tooltip = "text")
Can you use text as X axis labels on a plot? I've searched and cannot see any examples. Am I trying to do something that is not possible in R? Even when I try to plot one variable. Countries is text/character - but I do not know how to set it as such
plot(Finally$Countries,Finally$RobberyPerCent, pch = 16, col = 2)
I get the error
Error in plot.window(...) : need finite 'xlim' values
In addition: There were 24 warnings (use warnings() to see them)
Thank you, my goal is to combine two variables and see if there is a basic pattern. I've been able to figure out simple linear regression (no correlation), but I'm failing at basic plotting
#Subset for Percentages
Q5DataFinal <- subset(Q5Data, select = c(RobberyPerCent, UnlawfulPerCent))
View(Q5DataFinal)
library(data.table)
Nearlythere <- setDT(Q5DataFinal, keep.rownames = TRUE)[] # turn rownames into column data
names(Nearlythere)[names(Nearlythere) == 'rn'] <- 'Countries' #renaming rn to countries
Nearlythere$Countries[] <- lapply(Nearlythere$Countries, as.character) #Changing Countries to Character
Finally <- Nearlythere
summary(Finally) #Countries saved as characters
# Attempt to create two Y axis Graph with Countries as X ticks
par(mar = c(5, 4, 4, 4) + 0.3) # Additional space for second y-axis
plot(Finally$Countries,Finally$RobberyPerCent, pch = 16, col = 2) # Create first plot
par(new = TRUE) # Add new plot
plot(Finally$Countries, Finally$UnlawfulPerCent, pch = 17, col = 3, # Create second plot without axes
axes = FALSE, xlab = "", ylab = "")
axis(side = 4, at = pretty(range(Finally$UnlawfulPerCent))) # Add second axis
mtext("UnlawfulPerCent", side = 4, line = 3) # Add second axis label
Dput is
structure(list(Countries = list("Albania", "Austria", "Bulgaria",
"Croatia", "Cyprus", "Czechia", "Finland", "Germany (until 1990 former territory of the FRG)",
"Greece", "Ireland", "Italy", "Kosovo (under United Nations Security Council Resolution 1244/99)",
"Latvia", "Lithuania", "Luxembourg", "Malta", "Montenegro",
"Romania", "Serbia", "Slovenia", "Spain", "Switzerland"),
RobberyPerCent = c(5, 6, 18, 7, 5, 23, 5, 9, 24, 9, 40, 12,
17, 18, 10, 52, 24, 33, 10, 17, 80, 2), UnlawfulPerCent = c(95,
94, 82, 93, 95, 77, 95, 91, 76, 91, 60, 88, 83, 82, 90, 48,
76, 67, 90, 83, 20, 98)), row.names = c(NA, -22L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x0000020282d01ef0>)
Do you want something like this?
par(mar = c(5, 5, 4, 2))
x <- seq(0, 5, length.out = 500)
plot(x, sin(x^2), xaxt = "n", xlab = expression("Here is X"), ylab = expression(sin(x^2)),
main = expression("My coolest plot" - sin(x^2)))
axis(1, at=0:5, labels=c("Albania", "Kosovo", "Kongo", "Germany", "Bulgaria", "Spain"))
An addition
#your dataset
countries <- list("Albania", "Austria", "Bulgaria",
"Croatia", "Cyprus", "Czechia", "Finland", "Germany (until 1990 former territory of the FRG)",
"Greece", "Ireland", "Italy", "Kosovo (under United Nations Security Council Resolution 1244/99)",
"Latvia", "Lithuania", "Luxembourg", "Malta", "Montenegro",
"Romania", "Serbia", "Slovenia", "Spain", "Switzerland")
#modify to
axis(1, at=0:21, labels=countries, cex.axis=0.5) #select cex.axis for better displaying