Function to label local extrema in ggplot2 line graph in R - r

I am plotting a time series and I would like to automatically label specific points such as the last point, and local extrema, the highest and lowest.
How would I wrap this in a function to automatically label key points. Specifically by adding more local extrema.
Here is an example dataset:
latest <- structure(list(ReleaseDate = structure(c(1363928400, 1364533200,
1365138000, 1365742800, 1366347600, 1366952400, 1367557200, 1368162000,
1368766800, 1369371600, 1369976400, 1370581200, 1371186000, 1371790800,
1372395600, 1373000400, 1373605200, 1374210000, 1374814800, 1375419600,
1376024400, 1376629200, 1377234000, 1377838800, 1378443600, 1379048400,
1379653200, 1380258000, 1380862800, 1381467600, 1382072400, 1382677200,
1383282000, 1383890400, 1384495200), class = c("POSIXct", "POSIXt"
), tzone = ""), Count = c(1746L, 1748L, 1738L, 1771L, 1758L,
1754L, 1764L, 1769L, 1769L, 1762L, 1771L, 1765L, 1771L, 1759L,
1748L, 1757L, 1759L, 1770L, 1776L, 1782L, 1778L, 1791L, 1776L,
1776L, 1767L, 1768L, 1761L, 1744L, 1756L, 1743L, 1739L, 1738L,
1742L, 1754L, 1762L)), .Names = c("ReleaseDate", "Count"), row.names = 150:184, class = "data.frame")
library(ggplot2)
libary(ddply)
last <- tail(latest, 1)
high <- subset(latest, Count == max(Count))
mid <- subset(latest[5:20,], Count == min(Count))
ggplot(latest, aes(ReleaseDate, Count)) + geom_line() +
geom_text(data = last, aes(ReleaseDate, Count), label = last$Count, vjust = -1) +
geom_text(data = high, aes(ReleaseDate, Count), label = high$Count, vjust = -.1) +
geom_text(data = mid, aes(ReleaseDate, Count), label = mid$Count, vjust = 1)
Here I've attempted to manually add specific points, but it would not be concise if I wanted to add 10 labels to a longer series.

Related

How do I scatterplot LD1 vs LD2 in lda analysis?

I'm very very new to R, so thanks in advance for the help
I did the lda analysis on my dataset (tme.lda), in the console I get all my results with LD1, LD2, LD3, LD4, LD5 and LD6 but when I try to plot it I tried a lot of different methods but I get every kind of error: Error LD1 object not found - Error in fortify - Error in as.data.frame just to say a few.
This is my dataset:
dput(head(tme.lda))
structure(list(Word = structure(1:6, levels = c("bene", "bile",
"casa", "come", "posso", "tutto", "vero"), class = "factor"),
f0min = c(184L, 193L, 189L, 199L, 175L, 144L), f0max = c(229L,
226L, 198L, 225L, 192L, 188L), F1 = c(600L, 347L, 980L, 531L,
550L, 432L), F2 = c(2406L, 2695L, 1759L, 997L, 996L, 1901L
), F4 = c(4125L, 4403L, 3837L, 3988L, 3909L, 4171L), max_F0 = c(143L,
130L, 124L, 133L, 123L, 120L)), row.names = c(NA, 6L), class = "data.frame")
And this is the code I wrote, how can I get from here the scatterplot LD1 vs LD2?
View(tme.lda)
#lDFA analysis with "WORD" as grouping factor
tme.lda<-cbind(tme[,5],tme.lda[,1:6])
names(tme.lda)
#> [1] "tme[, 5]" "f0min" "f0max" "F1" "F2" "F4" "max_F0"
names(tme.lda)=c("Word","f0min","f0max","F1","F2","F4","max_F0")
names(tme.lda)
#> [1] "Word" "f0min" "f0max" "F1" "F2" "F4" "max_F0"
library(MASS)
lda(Word~f0min+f0max+F1+F2+F4+max_F0,data = tme.lda)
I tried this:
plot(Word, panel = tme.lda, abbrev = FALSE, xlab = "LD1", ylab = "LD2")
plot(x, panel = panel.lda, cex = 0.7, dimen=2, abbrev = FALSE, xlab = "LD1", ylab = "LD2")
ggplot(Word, panel = tme.lda, cex = 0.7, dimen=2, xlab = "LD1", ylab = "LD2")
ggplot2::aes(LD1,LD2) (Word, panel = tme.lda, cex = 0.7, dimen=2, xlab = "LD1", ylab = "LD2")
plot.lda<-lda(Word~f0min+f0max+F1+F2+F4+max_F0,data = tme.lda)
ggp <- ggplot(plot.lda, aes(x = LD1, y=LD2)) +
geom_point(mapping = aes(colour=Word)) +
ggtitle("LD1 Vs. LD2")
ggp <- ggplot(plot.lda, aes(x = LD1, y=LD2))
Just to say a few things I tried

plotting a 3D bubble graph on R

I am currently trying to plot a 3D bubble graph with 2 (then later i will try with 3) axes, as in excel, but on R (here is an example of the 3D bubble plot i am trying to plot) :
https://fr.extendoffice.com/documents/excel/2017-excel-create-bubble-chart.html
library(ggplot2)
library(scales)
p <- ggplot(plot_3D, aes(x = var_2020_2021_valeur, y = var_2020_2021_CA)) +
geom_point(aes(color = Specialite, size = CA.annee.N), alpha = 0.5) +
scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07",
"#FFB5C5", "#BF87B3", "#7F5AA2", "#3F2D91", "#000080",
"#2468A0", "#a32cc4", "#9073db", "#c51f5d",
"#5800ff", "#4455ff", "#48ff50")) +
scale_size(range = c(0.5, 12)) + # Réglage de la plage de tailles des points
ylim(-100, 100) +
xlim (-100, 100) +
geom_hline(yintercept=0) +
geom_vline(xintercept=0) +
scale_y_continuous(labels = percent) +
scale_x_continuous(labels = percent)
I get the following message of error :
"Scale for 'y' is already present. Adding another scale for 'y',
which
will replace the existing scale.
Scale for 'x' is already present. Adding another scale for 'x',
which
will replace the existing scale."
Even when i abandon the two last lines of code, it doesn't work...
Here is the structure of my dataset plot_3D :
structure(list(Specialite = c("ANESTHESIE REANIMATION", "Autres",
"CHIRURGIE GENERALE ET VISCERALE", "CHIRURGIE PLASTIQUE", "GASTRO ENTEROLOGIE",
"GYNECOLOGIE OBSTETRIQUE", "IMAGERIE", "MAXILLO STOMATO", "MEDECINE GENERALE et
INTERNE",
"OPHTALMOLOGIE", "ORL", "ORTHOPEDIE", "PNEUMOLOGIE", "URGENTISTE",
"UROLOGIE"), CA.annee.N = c(64310L, 25298L, 1205537L, 42020L,
3694964L, 344370L, 3454L, 588033L, 228439L, 1849804L, 27358L,
2739286L, 0L, 916L, 432907L), Nombre.de.sejours.annee.N = c(171L,
34L, 1504L, 56L, 9224L, 682L, 9L, 1103L, 888L, 2276L, 57L, 4068L,
0L, 2L, 720L), CA.annee.N.1 = c(50135L, 454L, 790559L, 15531L,
2644858L, 304242L, 3026L, 402195L, 459813L, 1308933L, 20597L,
2269691L, 0L, 3901L, 318352L), Nombre.de.sejours.annee.N.1 = c(150L,
1L, 1067L, 25L, 7276L, 627L, 9L, 802L, 1918L, 1693L, 43L, 3519L,
0L, 7L, 547L), CA.annee.N.2 = c(48583L, 453L, 941610L, 16675L,
3140507L, 385813L, 2950L, 642017L, 691982L, 1704005L, 51602L,
2261368L, 7145L, 4648L, 308169L), Nombre.de.sejours.annee.N.2 = c(154L,
1L, 1264L, 28L, 8317L, 831L, 8L, 1286L, 3231L, 2269L, 127L, 3610L,
26L, 10L, 551L), CA_par_sejour_N = c(376.081871345029, 744.058823529412,
801.553856382979, 750.357142857143, 400.581526452732, 504.941348973607,
383.777777777778, 533.121486854034, 257.251126126126, 812.743409490334,
479.964912280702, 673.374139626352, NaN, 458, 601.259722222222
), CA_par_sejour_N1 = c(334.233333333333, 454, 740.917525773196,
621.24, 363.504398020891, 485.234449760766, 336.222222222222,
501.490024937656, 239.735662148071, 773.14412285883, 479, 644.981813015061,
NaN, 557.285714285714, 581.99634369287), CA_par_sejour_N2 = c(315.474025974026,
453, 744.944620253165, 595.535714285714, 377.600937838163, 464.275571600481,
368.75, 499.235614307932, 214.169606932838, 750.993829881005,
406.314960629921, 626.417728531856, 274.807692307692, 464.8,
559.290381125227), var_2020_2021_valeur = c(0.125207553640259,
0.638896087069189, 0.0818395145215454, 0.207837780659878, 0.101999119223065,
0.0406131494220115, 0.141440846001322, 0.063074957314078, 0.0730615704860669,
0.051218505658529, 0.00201443064864667, 0.0440203522616658, NaN,
-0.178159446295822, 0.0330987964754596), var_2020_2021_CA = c(0.282736611149895,
54.7224669603524, 0.524917178856986, 1.70555662867813, 0.397036816343259,
0.131895004634469, 0.141440846001322, 0.462059448774848, -0.503191514811456,
0.413215191304673, 0.328251687138904, 0.206898207729598, NaN,
-0.765188413227378, 0.35983753832236)), class = "data.frame", row.names = c(NA,
-15L))
Could anyone help ?
You should remove xlim and ylim because they can be used when you don't specify anything else on your axis. So your could add the limits to both scale_*_continuous in the limits arguments like this:
library(ggplot2)
library(scales)
p <- ggplot(plot_3D, aes(x = var_2020_2021_valeur, y = var_2020_2021_CA)) +
geom_point(aes(color = Specialite, size = CA.annee.N), alpha = 0.5) +
scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07",
"#FFB5C5", "#BF87B3", "#7F5AA2", "#3F2D91", "#000080",
"#2468A0", "#a32cc4", "#9073db", "#c51f5d",
"#5800ff", "#4455ff", "#48ff50")) +
scale_size(range = c(0.5, 12)) + # Réglage de la plage de tailles des points
geom_hline(yintercept=0) +
geom_vline(xintercept=0) +
scale_y_continuous(labels = percent, limits = c(-100, 100)) +
scale_x_continuous(labels = percent, limits = c(-100, 100))
p
#> Warning: Removed 1 rows containing missing values (geom_point).
Created on 2022-07-12 by the reprex package (v2.0.1)

Adding date labels to barplot

I'm trying to add date labels to each individual bar that i've plotted. The data looks like this:
structure(list(lakeID = c("WE1", "WE1", "WE1", "WE1", "WE1",
"WE1", "WE1", "WE1", "WE1", "WE1"), depth = c("hypolimnion",
"hypolimnion", "hypolimnion", "hypolimnion", "hypolimnion", "hypolimnion",
"hypolimnion", "hypolimnion", "hypolimnion", "hypolimnion"),
date = structure(c(1505779200, 1529366400, 1534723200, 1537142400,
1559088000, 1590624000, 1592352000, 1596153600, 1597881600,
1599696000), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
year = c(2017L, 2018L, 2018L, 2018L, 2019L, 2020L, 2020L,
2020L, 2020L, 2020L), BACI = c("Control", "Control", "Control",
"Control", "Impact", "Impact", "Impact", "Impact", "Impact",
"Impact"), N = c(2L, 2L, 1L, 1L, 7L, 56L, 10L, 58L, 3L, 2L
), meanlengths = c(1.44092870850016, 1.90413253399558, 2.08356379729054,
1.24630120343851, 6.39410976055929, 6.07576588787627, 5.63215818993585,
6.00116183368038, 5.42957068546558, 3.62096915399567), sd = c(0.261562571663927,
0.237860661401758, NA, NA, 0.454442531596122, 0.703831022329251,
0.465831548164042, 1.1448481521142, 1.19135665831808, 1.41507359493585
), SE = c(0.184952668128155, 0.1681928866547, NA, NA, 0.171763131967707,
0.0940533765621035, 0.14730886981608, 0.150325927020161,
0.687830087380796, 1.00060813485716), Upper2SE = c(1.80343593803135,
2.23379059183879, NA, NA, 6.73076549921599, 6.26011050593799,
5.92088357477536, 6.2958006506399, 6.77771765673194, 5.58216109831572
), Lower2SE = c(1.07842147896898, 1.57447447615237, NA, NA,
6.05745402190258, 5.89142126981455, 5.34343280509633, 5.70652301672087,
4.08142371419922, 1.65977720967563)), row.names = 22:31, class = "data.frame")
I've attempted to plot the dates with this code but have not been able to get them to show:
xxx <- barplot(ChiSum$meanlength, axisnames=TRUE, beside = TRUE, names.arg=c(ChiSum$date), col=ifelse(ChiSum$BACI=="Control",'white','gray'), ylim = c(-0.01,8), ylab = "Mean body length",
xlab="Date",xaxt="n")
arrows(xxx,ChiSum$Lower2SE,xxx,ChiSum$Upper2SE, code=3, length=0.02, angle = 90,
col=ifelse(ChiSum$BACI=="Control",'black','black'))
axis(side=1,at=XXX[c(1,20)], labels=format(ChiSum$date))
How about this:
bar_mids <- cumsum(c(.7, rep(1.2,9)))
par(mar=c(8,4,2,1)+.1)
xxx <- barplot(ChiSum$meanlength, axisnames=TRUE, beside = TRUE, names.arg=c(ChiSum$date), col=ifelse(ChiSum$BACI=="Control",'white','gray'), ylim = c(-0.01,8), ylab = "Mean body length",
xlab="",xaxt="n")
arrows(xxx,ChiSum$Lower2SE,xxx,ChiSum$Upper2SE, code=3, length=0.02, angle = 90,
col=ifelse(ChiSum$BACI=="Control",'black','black'))
axis(side=1,at=bar_mids, labels=format(ChiSum$date), las=2)
The key is to know where the mid-points of the bars are. The default settings are that the bars are one-unit wide with a space (before every bar) of 0.2. So the mid-point of the first bar is at 0.7. The mid-point of the second bar is 0.7 + 1.2, the mid-point of the third is 0.7 + 1.2*2, etc... These mid-points should be specified as at and then you can use the dates as the labels. To make them plot nicely, you can use las=2 to turn the labels perpendicular to the axis.
You could also do it with ggplot2:
ChiSum %>%
arrange(date) %>%
mutate(obs = 1:n()) %>%
ggplot(aes(x=obs, y=meanlengths, ymin = Lower2SE, ymax=Upper2SE, fill=BACI)) +
geom_bar(stat="identity", col="black") +
geom_errorbar(width=.15) +
scale_x_continuous(breaks=1:10, labels=ChiSum$date) +
theme_classic() +
theme(axis.text.x = element_text(angle=45, hjust=1),
legend.position = c(.15, .85)) +
labs(x="", y="Mean Length") +
scale_fill_manual(values=c("white", "gray50"))

How do I label the sum the total of y-axis column values from consecutive bar values like in the example “Confirmed” Cases per x-axis “Date”

I have been working on this for some time, and am re-posting this hoping to simplify the definition of the problem and to bring some clarity from feedback of my previous attempt. I am able to label each individual column value, but not able to put the code together necessary to sum the total. The examples I have looked at never work the way I try to put them together, for example with goup_by, or summarize etc.. I would like to only sum the values of "Confirmed Cases", and not show the other column values as with many c("x", "Y", ... "data"), it becomes impossible to read.
Here is the data frame:
dput(COVID1[1:12, ])
structure(list(COUNTY = c("Antrim", "Antrim", "Antrim", "Charlevoix",
"Charlevoix", "Grand Traverse", "Grand Traverse", "Grand Traverse",
"Antrim", "Grand Traverse", "Grand Traverse", "Grand Traverse"
), Date = structure(c(18453, 18456, 18457, 18453, 18455, 18453,
18456, 18457, 18455, 18453, 18456, 18457), class = "Date"), CASE_STATUS = c("Confirmed",
"Confirmed", "Confirmed", "Confirmed", "Confirmed", "Confirmed",
"Confirmed", "Confirmed", "Probable", "Probable", "Probable",
"Probable"), Cases = c(1L, 1L, 2L, 1L, 3L, 2L, 2L, 1L, 1L, 1L,
1L, 1L)), row.names = c(NA, 12L), class = "data.frame")
Code:
ggplot(filter(COVID1, COUNTY %in% c("Antrim", "Charlevoix", "Grand Traverse"), Cases > 0)) +
geom_col(aes(x = Date, y = Cases, fill = CASE_STATUS), position = position_stack(reverse = TRUE), width = .88)+
geom_text(aes(x = Date, y = Cases, label = (Cases)), position = position_stack(reverse = TRUE), vjust = 1.5, size = 3, color = "white") +
scale_fill_manual(values = c('blue',"tomato"))+
scale_x_date(labels = date_format("%m/%d"), limits = as.Date(c('2020-07-09','today()')), breaks = "1 week")+
theme(axis.text.x = element_text(angle=0))+
labs(title = "Antrim - Grand Traverse - Charlevoix")
I'm not sure if I understood the question but I think you want to add the sum of the confirmed cases as labels. There might be a ggplot way of doing it but I think the most straightforward way is to make another dataset with your labels and feed it in.
date_labels <- filter(COVID1, COUNTY %in% c("Antrim", "Charlevoix", "Grand Traverse"), Cases > 0) %>% group_by(Date) %>% summarise(confirmed_cases = sum(Cases[CASE_STATUS == "Confirmed"]))
ggplot(filter(COVID1, COUNTY %in% c("Antrim", "Charlevoix", "Grand Traverse"), Cases > 0)) +
geom_col(aes(x = Date, y = Cases, fill = CASE_STATUS), position = position_stack(reverse = TRUE), width = .88)+
geom_text(data = date_labels, aes(x = Date, y = 1, label = confirmed_cases), position = position_stack(reverse = TRUE), vjust = 1.5, size = 3, color = "white") +
scale_fill_manual(values = c('blue',"tomato"))+
scale_x_date(labels = label_date("%m/%d"), limits = as.Date(c('2020-07-09','today()')), breaks = "1 week")+
theme(axis.text.x = element_text(angle=0))+
labs(title = "Antrim - Grand Traverse - Charlevoix")
Gives me this result:

R: ggplot geom_hline custom legend with scale_color_manual error

My code was working fine until I added another geom_hline() layer to my plot. Now I keep getting this error and I don't understand what I'm doing wrong. I want to have two horizontal lines both red but one either dotted or dashed at the yintercept provided.
Error in `[[<-.data.frame`(`*tmp*`, v, value = c(1, 2)) :
replacement has 2 rows, data has 1
Code:
box_whisk_graph<-ggplot(data = box_fresh_chloride, aes(x = factor(year), y = val)) +
geom_boxplot(coef=10) +
geom_hline(aes(yintercept = 230,color="red"),size=1.3)+
geom_hline(aes(yintercept = 860,color="red"),size=1.3)+
scale_color_manual("",
values = c("red"="red","red"="red"),
labels=c("Freshwater Aquatic Life (chronic)\nCriteria for chloride = 230 mg/L","Freshwater Aquatic Life Criteria (acute) for chloride = 860 mg/L"),
guide=guide_legend(override.aes=list(linetype=c(1,2), lwd=c(1,0.5))))
dput of data:
structure(list(orgid = c("USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ",
"USGS-NJ", "USGS-NJ"), locid = c("USGS-01482500", "USGS-0146453250",
"USGS-01392150", "USGS-01411035", "USGS-01411466", "USGS-01411444"
), stdate = structure(c(16394, 16610, 16328, 16583, 16602, 16602
), class = "Date"), sttime = c("09:45:00", "11:00:00", "10:40:00",
"09:45:00", "12:00:00", "10:30:00"), charnam = c("Chloride",
"Chloride", "Chloride", "Chloride", "Chloride", "Chloride"),
val = c(23.6, 221, 144, 10.8, 10.5, 5.76), valunit = c("mg/l",
"mg/l", "mg/l", "mg/l", "mg/l", "mg/l"), swqs = c("FW2-NT",
"FW2-NT", "FW2-NT", "PL", "FW2-NT", "PL"), region = c("Lower Delaware",
"Lower Delaware", "Northeast", "Atlantic Coast", "Lower Delaware",
"Atlantic Coast"), WMA = c(17L, 20L, 4L, 15L, 17L, 16L),
year = c(2014, 2015, 2014, 2015, 2015, 2015)), .Names = c("orgid",
"locid", "stdate", "sttime", "charnam", "val", "valunit", "swqs",
"region", "WMA", "year"), row.names = c(NA, 6L), class = "data.frame")

Resources