Sorting a scatterplot by a third variable in ggplot - r

Here's a subset of data.
structure(list(Transmitter = c(1675L, 1675L, 1675L, 1675L, 1681L,
1681L, 1681L, 1681L, 1685L, 1685L, 1685L, 1685L, 1685L, 9782L,
9782L, 9782L, 24166L, 24166L, 24166L, 24166L, 24184L, 24184L,
24184L, 24184L), Date = structure(c(17392, 17721, 17722, 17393,
17734, 17729, 17391, 17717, 17392, 17390, 17391, 17381, 17382,
18079, 18110, 17762, 17751, 18097, 18090, 18091, 18097, 18068,
18082, 18088), class = "Date"), Year = c(2017L, 2018L, 2018L,
2017L, 2018L, 2018L, 2017L, 2018L, 2017L, 2017L, 2017L, 2017L,
2017L, 2019L, 2019L, 2018L, 2018L, 2019L, 2019L, 2019L, 2019L,
2019L, 2019L, 2019L), DirectionGroups = structure(c(3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L), .Label = c("Both", "Marine", "River"), class = "factor")), row.names = c(355L,
356L, 357L, 358L, 475L, 476L, 477L, 478L, 530L, 531L, 532L, 533L,
534L, 573L, 574L, 575L, 626L, 627L, 628L, 629L, 764L, 765L, 766L,
767L), class = "data.frame")
I'm trying to create a scatterplot of individually tagged animals through time. Points are colored by the group I have put them in. Currently the scatterplot is sorted by the level of the Transmitter. Instead I would like a way to sort these data by the DirectionGroup.
Here is my current scatterplot.
ggplot(data = AbPlot3, aes(x = Date, y = factor(Transmitter), color = DirectionGroups)) + geom_point()+theme_bw()+ylab("Transmitter")+
scale_color_manual(values = c('grey40', 'black', 'grey70'), labels = c('Transient', 'External', 'Resident'))+
theme(axis.text.y = element_blank(), axis.title = element_text(size = 16),
axis.text.x = element_text(size = 14), legend.text = element_text(size = 14),
legend.title = element_text(size = 16))
Essentially, I want one plot with all Transient points next to each other, all external points together and all resident points together.

Try this. As already pointed out in the comments simply sort your data by DirectionsGroup then convert Transmitter to a factor and set the order accordingly e.g. by using forcats::fct_inorder:
library(ggplot2)
library(dplyr)
AbPlot3 <- AbPlot3 %>%
# Sort data in the wanted order
arrange(DirectionGroups, Transmitter) %>%
# Convert to factor and set order according to the order in the df
mutate(Transmitter = forcats::fct_inorder(factor(Transmitter)))
ggplot(data = AbPlot3, aes(x = Date, y = Transmitter, color = DirectionGroups)) + geom_point()+theme_bw()+ylab("Transmitter")+
scale_color_manual(values = c('grey40', 'black', 'grey70'), labels = c('Transient', 'External', 'Resident'))+
theme(axis.text.y = element_blank(), axis.title = element_text(size = 16),
axis.text.x = element_text(size = 14), legend.text = element_text(size = 14),
legend.title = element_text(size = 16))
Created on 2020-06-11 by the reprex package (v0.3.0)

Related

change linetype when using dfmelt with breaks

To plot the time series of one month over multiple years I'm using the following code:
JAN<-subset(nDF, format.Date(DATE, "%m")=="01")
dfmelt<-melt(JAN,id.vars="DATE")
breaks <- unique(as.Date(cut(dfmelt$DATE, "month")))
ba2 <- transform(dfmelt, year = as.integer(format(DATE, "%Y")))
p <- ggplot(ba2, aes(x=DATE,y=value,
col=variable)) + labs(title='JANUARY')+
geom_line(lwd=1.0,alpha=0.5) +
facet_grid(cols = vars(year), scales = "free_x", space = "free_x")+
theme(panel.spacing = unit(0, "lines"))
p + scale_x_date(breaks = breaks, date_labels = "%b")
head(JAN)
DATE MODEL BC OBSERVED
215 2001-01-01 1.2860092 1.52571356 1.55332905
216 2001-01-02 0.7906073 1.24322433 1.24701969
217 2001-01-03 0.3687850 0.11566294 0.11677768
218 2001-01-04 0.3539595 0.15826654 0.15906525
219 2001-01-05 0.2531596 0.18768851 0.18768533
220 2001-01-06 0.2311364 0.01537928 0.01516614
However since BC and Observed have almost same values, I would like to change linetype of MODEL and OBSERVED only . How do I achieve this as any change I do reflects in all three lines
Add linetype= to your aesthetics. Perhaps:
p <- ggplot(ba2, aes(x=DATE, y=value, color=variable, linetype=variable)) +
labs(title='JANUARY') +
geom_line(lwd=1.0, alpha=0.5) +
facet_grid(cols=vars(year), scales="free_x", space="free_x") +
theme(panel.spacing=unit(0, "lines"))
p
Data
ba2 <- structure(list(DATE = structure(c(11323, 11324, 11325, 11326, 11327, 11328, 11323, 11324, 11325, 11326, 11327, 11328, 11323, 11324, 11325, 11326, 11327, 11328), class = "Date"), variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("MODEL", "BC", "OBSERVED"), class = "factor"), value = c(1.2860092, 0.7906073, 0.368785, 0.3539595, 0.2531596, 0.2311364, 1.52571356, 1.24322433, 0.11566294, 0.15826654, 0.18768851, 0.01537928, 1.55332905, 1.24701969, 0.11677768, 0.15906525, 0.18768533, 0.01516614), year = c(2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L)), class = "data.frame", row.names = c(NA, -18L))

scale_color_gradient for geom_curve in ggplot

Currently I have a dataframe consisting of several flight as such
Using ggplot as shown below, I have managed to plot the flight path from origin to destination however cannot seem to change the path line to gradient colour that can visualise the flight from origin to destination.
Do advise as from my understanding, ggplot colour must be reliant on a variable.
q4c%>%
mutate(TailNum = factor(x=TailNum, levels=c("N351UA","N960DL","N524", "N14998", "N355CA","N711UW", "N587AA", "N839UA","N941CA","N516UA"))) %>%
ggplot() + usMap2 +
geom_curve(aes(x=OriginLong, y=OriginLat, xend=DestLong, yend=DestLat, size=TotalDelay,color=TailNum),
curvature=0.2)+
scale_size_continuous(range = c(0.02, 0.5))+
geom_point(aes(x=OriginLong, y=OriginLat),
size=0.02) +
geom_point(aes(x=DestLong, y=DestLat),
size=0.02) +
facet_wrap(~TailNum)
edit:
I have tried ggforce::geom_link however it only shows solid colors instead of gradient as i added dummy sequence of 0,1 to get the color contrast
structure(list(Year = c(2005L, 2005L, 2005L, 2005L, 2005L, 2005L
), Month = c(1L, 1L, 1L, 1L, 1L, 1L), DayofMonth = c(1L, 1L,
1L, 1L, 1L, 1L), DepTime = c(1022L, 1025L, 1037L, 1054L, 1110L,
1111L), ArrTime = c(1527L, 1057L, 1209L, 1219L, 1454L, 1409L),
DepDelay = c(3L, 0L, -10L, -1L, 65L, -2L), ArrDelay = c(6L,
-3L, -11L, -27L, 52L, -8L), TotalDelay = c(9L, -3L, -21L,
-28L, 117L, -10L), TailNum = c("N351UA", "N524", "N14998",
"N941CA", "N355CA", "N587AA"), Origin = c("DEN", "PHX", "LBB",
"LGA", "SLC", "DFW"), Dest = c("CLT", "BUR", "IAH", "GSO",
"STL", "IND"), AirportOrigin = c("Denver Intl", "Phoenix Sky Harbor International",
"Lubbock International", "LaGuardia", "Salt Lake City Intl",
"Dallas-Fort Worth International"), OriginLong = c(-104.6670019,
-112.0080556, -101.8227778, -73.87260917, -111.9777731, -97.0372
), OriginLat = c(39.85840806, 33.43416667, 33.66363889, 40.77724306,
40.78838778, 32.89595056), AirportDest = c("Charlotte/Douglas International",
"Burbank-Glendale-Pasadena", "George Bush Intercontinental",
"Piedmont Triad International", "Lambert-St Louis International",
"Indianapolis International"), DestLong = c(-80.94312583,
-118.3584969, -95.33972222, -79.9372975, -90.35998972, -86.29438417
), DestLat = c(35.21401111, 34.20061917, 29.98047222, 36.09774694,
38.74768694, 39.71732917), id = 1:6, seqnum = c(1, 6, 1,
6, 1, 6)), row.names = c(NA, 6L), class = "data.frame")
dataframe
q4cc%>%
ggplot() + usMap2 +
geom_link2(aes(x=OriginLong, y=OriginLat, size=TotalDelay, colour=seqnum))+
scale_size_continuous(range = c(0.02, 1))+
scale_color_gradient(name="Journey Path", high="red", low="blue")+
scale_alpha_continuous(range=c(0.03,0.3))+
geom_point(aes(x=OriginLong, y=OriginLat),
colour="red",
size=0.02) +
facet_wrap(~TailNum)
New Plot

How to force common x-axis labels/limits among facets in ggplot2?

Here's some example data.
structure(list(Transmitter = c(1675L, 1675L, 1675L, 1675L, 1681L,
1681L, 1681L, 1681L, 1685L, 1685L, 1685L, 1685L, 1685L, 9782L,
9782L, 9782L, 24166L, 24166L, 24166L, 24166L, 24184L, 24184L,
24184L, 24184L), Date = structure(c(17392, 17721, 17722, 17393,
17734, 17729, 17391, 17717, 17392, 17390, 17391, 17381, 17382,
18079, 18110, 17762, 17751, 18097, 18090, 18091, 18097, 18068,
18082, 18088), class = "Date"), Year = c(2017L, 2018L, 2018L,
2017L, 2018L, 2018L, 2017L, 2018L, 2017L, 2017L, 2017L, 2017L,
2017L, 2019L, 2019L, 2018L, 2018L, 2019L, 2019L, 2019L, 2019L,
2019L, 2019L, 2019L), DirectionGroups = structure(c(3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L), .Label = c("Both", "Marine", "River"), class = "factor"),
`min(Year)` = c(2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L
), TagYear = c(2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2018, 2018, 2018, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2018)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -24L), groups = structure(list(
Transmitter = c(1675L, 1681L, 1685L, 9782L, 24166L, 24184L
), `min(Year)` = c(2017L, 2017L, 2017L, 2017L, 2017L, 2017L
), .rows = list(1:4, 5:8, 9:13, 14:16, 17:20, 21:24)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
Here's the code I'm using to plot.
ggplot(data = AbPlot3, aes(x = Date, y = factor(Transmitter), color = DirectionGroups)) + geom_point()+theme_bw()+ylab("Transmitter")+
scale_color_manual(values = c('grey70', 'black', 'grey40'), labels = c('Resident', 'External', 'Transient'))+
theme(axis.text.y = element_blank(), axis.title = element_text(size = 16),
axis.text.x = element_text(size = 14), legend.text = element_text(size = 14),
legend.title = element_text(size = 16)) + guides(color = guide_legend(override.aes = list(size = 5)))+facet_grid(.~Year, scales = 'free')
Hopefully you can see when you plot the figure that the three facets have different x-axis limits/labels (the purpose of scales = 'free'). If you were to remove that argument, you would get the same axes limits/labels, but the axis range would cover the entire three years (2017,2018,2019), and there is a lot of white space. I'm trying to do something in between these two options such that the x axis limits/labels are the same, but separated by year with minimal white space in the plots. An x axis range of June 20 to August 20 for each year might work. I've tried manually setting the x axis limits to no avail.
Since the labels on the x axis is just the month and day, one option is to convert the all the dates to the same year: as.Date(paste0("2020-", format(AbPlot3$Date, "%m-%d")))
Now even with the "fixed" scale all facets with plot equally.
ggplot(data = AbPlot3, aes(x = as.Date(paste0("2020-", format(Date, "%m-%d"))), y = factor(Transmitter), color = DirectionGroups)) +
geom_point()+theme_bw()+
ylab("Transmitter")+
xlab("Date") +
scale_color_manual(values = c('grey70', 'black', 'grey40'), labels = c('Resident', 'External', 'Transient'))+
theme(axis.text.y = element_blank(), axis.title = element_text(size = 16),
axis.text.x = element_text(size = 12), legend.text = element_text(size = 14),
legend.title = element_text(size = 16)) +
guides(color = guide_legend(override.aes = list(size = 5)))+
facet_grid(.~Year)
Maybe this can help. You can play around scale_x_date(). Hoping this is useful:
ggplot(data = AbPlot3, aes(x = Date, y = factor(Transmitter), color = DirectionGroups)) +
scale_x_date(date_labels="%d-%b",breaks = '5 days',limits = c(),
expand = c(0.01,0),
labels = seq(from=as.Date('2020-06-01'),to=as.Date('2020-08-31'),by='2 days'))+
geom_point()+theme_bw()+ylab("Transmitter")+
scale_color_manual(values = c('grey70', 'black', 'grey40'),
labels = c('Resident', 'External', 'Transient'))+
theme(axis.text.y = element_blank(), axis.title = element_text(size = 16),
axis.text.x = element_text(angle=-90,size = 14,vjust=0.5), legend.text = element_text(size = 14),
legend.title = element_text(size = 16)) + guides(color = guide_legend(override.aes = list(size = 5)))+facet_grid(.~Year, scales = 'free')

Multiple Timeseries graph in R

I am trying to create a time series plot that has multiple data over the years. I would like to just plot the years and get the data to run from start date to end date. Here I have converted the respective columns to dates and then combined them but I do not get the result I am looking for.
The data is available from this website: https://www.businessinsider.co.za/coronavirus-deaths-how-pandemic-compares-to-other-deadly-outbreaks-2020-4?r=US&IR=T
Something like this where the data doesn't start in the same year or end in the same year:
https://ichef.bbci.co.uk/news/410/cpsprodpb/6E25/production/_111779182_optimised-mortality-nc.png
(time period vs deaths caused)
library(lubridate)
library(ggplot2)
otherDiseaseData <- structure(list(ï..Disease = structure(c(11L, 2L, 12L, 6L, 3L,
1L, 9L, 7L, 13L, 4L, 5L, 8L, 10L), .Label = c("Asian Flu", "blackdeath",
"Cholera", "Covid 19", "Ebola", "HIV", "Hong Kong Flu", "Mers",
"Russian Flu", "Sars", "smallpox", "spanish flu", "Swine Flu"
), class = "factor"), Start = c(0L, 1347L, 1918L, 1981L, 1899L,
1957L, 1889L, 1968L, 2009L, 2019L, 2014L, 2012L, 2002L), End = c(1979L,
1351L, 1919L, 2020L, 1923L, 1958L, 1890L, 1970L, 2010L, 2020L,
2016L, 2020L, 2003L), Death = c(300000L, 225000000L, 50000L,
2360000L, 1500000L, 1100000L, 1000000L, 1000000L, 151700L, 101526L,
11300L, 866L, 774L)), class = "data.frame", row.names = c(NA,
-13L))
yrs <- otherDiseaseData$Start
yr <- as.Date(as.character(yrs), format = "%Y")
yStart <- year(yr)
yrs <- otherDiseaseData$End
yr <- as.Date(as.character(yrs), format = "%Y")
yStart <- year(yr)
otherDiseaseData$x <- paste(otherDiseaseData$Start,otherDiseaseData$End)
otherDiseaseData
ggplot(otherDiseaseData, aes(y = Death, x = otherDiseaseData$x),xlim=0000-2000) + geom_point()
I'm not sure I've fully understood what you're asking for, but my interpretation is this:
df <- reshape::melt(otherDiseaseData, measure.vars = c("Start", "End"))
ggplot(df %>% filter(Disease != "smallpox", Death != 225000000)) +
geom_line(aes(value,Death, colour = Disease), size = 2) +
theme_minimal() +
ggrepel::geom_label_repel(data = filter(df, Disease != "smallpox", Death != 225000000, variable != "Start"),
aes(label = Disease, x = value, y = Death)) +
scale_y_log10() +
theme(legend.position = "none", aspect.ratio = 1) +
ylab("Number of Deaths") + xlab("Year")
I've used the reshape package to reorganise the given data, and then ggrepel to label the bars. I've had to remove some data as it really throws the scale, which I've ended up making logarithmic to spread the data out a little. It gives you this plot:
It's not perfect but it might be heading in the right direction? Apologies if I've misunderstood what you were angling for.

How to change the geom_point appearance?

Using this data:
Data2 <- structure(list(year = c(2008L, 2009L, 2010L, 2011L, 2012L, 2013L,
2014L, 2015L, 2016L, 2017L, 2018L, 2019L, 2008L, 2009L, 2010L,
2011L, 2012L, 2013L, 2014L, 2015L, 2016L, 2017L, 2018L, 2019L,
2008L, 2009L, 2010L, 2011L, 2012L, 2013L, 2014L, 2015L, 2016L,
2017L, 2018L, 2019L), variable = c("var1", "var1", "var1", "var1",
"var1", "var1", "var1", "var1", "var1", "var1", "var1", "var1",
"var2", "var2", "var2", "var2", "var2", "var2", "var2", "var2",
"var2", "var2", "var2", "var2", "var3", "var3", "var3", "var3",
"var3", "var3", "var3", "var3", "var3", "var3", "var3", "var3"
), frequency = c(1L, 0L, 0L, 0L, 1L, 1L, 3L, 4L, 3L, 10L, 9L,
0L, 0L, 1L, 0L, 0L, 3L, 2L, 2L, 3L, 8L, 9L, 12L, 3L, 0L, 0L,
0L, 0L, 0L, 2L, 1L, 0L, 1L, 2L, 4L, 0L)), class = "data.frame", row.names = c(NA,
-36L))
I try to produce a plot like this one:
library(ggplot2)
ggplot(Data2, aes(year, variable, size = frequency, color = variable)) +
geom_point()+ theme(text = element_text(size = 18))
However I have some problem with the view of this graph:
How can I change the x axis to have all years, make bigger the geom_point (I tried the size but it is a fix option and all circles made the same) and from frequency show from 1 and not 0 cicles if exist?
An easy way to get years to show up is to turn them into factors. As far as starting your size scale at one: I'm not sure if you want to remove points representing 0, or just start the scale from 1. The following code should give you and idea of how to do either, or both. To remove 0s from a plot it might be easiest to just turn them into NAs, which won't be plotted (it will throw a warning, but that's okay). You can change your size scale breaks using scale_size_continuous:
Data2 %>%
mutate(frequency = ifelse(frequency == 0, NA, frequency),
year = as.factor(year)
) %>%
ggplot(aes(year, size = frequency, variable, color = variable)) +
geom_point() +
theme(text = element_text(size = 18)) +
scale_size_continuous(breaks = c(1, 4, 8, 12))
When the axis labels are not all showing up, or when they are superimposed, a standard trick is to rotate them. In this case I will rotate the x axis labels by 45 degrees.
library(ggplot2)
ggplot(Data2, aes(year, variable,
size = frequency, color = variable)) +
geom_point() +
theme(text = element_text(size = 18),
axis.text.x = element_text(angle = 45, hjust = 1))
I have a quick&dirty solution for no 0 frequencies: Data2[Data2==0] <- NA
alternatively only for frequency:
library(data.table)
setDT(Data2)
Data2[frequency==0, frequency:= NA]
For having proper years:
Data2$year = as.Date(strptime(Data2$year, "%Y"))
ggplot(Data2, aes(year, variable, size = frequency, color = variable)) +
geom_point()+
theme( axis.text.x = element_text(angle = 60, hjust = 1) )

Resources