plotting a 3D bubble graph on R

plotting a 3D bubble graph on R - r

I am currently trying to plot a 3D bubble graph with 2 (then later i will try with 3) axes, as in excel, but on R (here is an example of the 3D bubble plot i am trying to plot) :
https://fr.extendoffice.com/documents/excel/2017-excel-create-bubble-chart.html
library(ggplot2)
library(scales)
p <- ggplot(plot_3D, aes(x = var_2020_2021_valeur, y = var_2020_2021_CA)) +
geom_point(aes(color = Specialite, size = CA.annee.N), alpha = 0.5) +
scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07",
"#FFB5C5", "#BF87B3", "#7F5AA2", "#3F2D91", "#000080",
"#2468A0", "#a32cc4", "#9073db", "#c51f5d",
"#5800ff", "#4455ff", "#48ff50")) +
scale_size(range = c(0.5, 12)) + # Réglage de la plage de tailles des points
ylim(-100, 100) +
xlim (-100, 100) +
geom_hline(yintercept=0) +
geom_vline(xintercept=0) +
scale_y_continuous(labels = percent) +
scale_x_continuous(labels = percent)
I get the following message of error :
"Scale for 'y' is already present. Adding another scale for 'y',
which
will replace the existing scale.
Scale for 'x' is already present. Adding another scale for 'x',
which
will replace the existing scale."
Even when i abandon the two last lines of code, it doesn't work...
Here is the structure of my dataset plot_3D :
structure(list(Specialite = c("ANESTHESIE REANIMATION", "Autres",
"CHIRURGIE GENERALE ET VISCERALE", "CHIRURGIE PLASTIQUE", "GASTRO ENTEROLOGIE",
"GYNECOLOGIE OBSTETRIQUE", "IMAGERIE", "MAXILLO STOMATO", "MEDECINE GENERALE et
INTERNE",
"OPHTALMOLOGIE", "ORL", "ORTHOPEDIE", "PNEUMOLOGIE", "URGENTISTE",
"UROLOGIE"), CA.annee.N = c(64310L, 25298L, 1205537L, 42020L,
3694964L, 344370L, 3454L, 588033L, 228439L, 1849804L, 27358L,
2739286L, 0L, 916L, 432907L), Nombre.de.sejours.annee.N = c(171L,
34L, 1504L, 56L, 9224L, 682L, 9L, 1103L, 888L, 2276L, 57L, 4068L,
0L, 2L, 720L), CA.annee.N.1 = c(50135L, 454L, 790559L, 15531L,
2644858L, 304242L, 3026L, 402195L, 459813L, 1308933L, 20597L,
2269691L, 0L, 3901L, 318352L), Nombre.de.sejours.annee.N.1 = c(150L,
1L, 1067L, 25L, 7276L, 627L, 9L, 802L, 1918L, 1693L, 43L, 3519L,
0L, 7L, 547L), CA.annee.N.2 = c(48583L, 453L, 941610L, 16675L,
3140507L, 385813L, 2950L, 642017L, 691982L, 1704005L, 51602L,
2261368L, 7145L, 4648L, 308169L), Nombre.de.sejours.annee.N.2 = c(154L,
1L, 1264L, 28L, 8317L, 831L, 8L, 1286L, 3231L, 2269L, 127L, 3610L,
26L, 10L, 551L), CA_par_sejour_N = c(376.081871345029, 744.058823529412,
801.553856382979, 750.357142857143, 400.581526452732, 504.941348973607,
383.777777777778, 533.121486854034, 257.251126126126, 812.743409490334,
479.964912280702, 673.374139626352, NaN, 458, 601.259722222222
), CA_par_sejour_N1 = c(334.233333333333, 454, 740.917525773196,
621.24, 363.504398020891, 485.234449760766, 336.222222222222,
501.490024937656, 239.735662148071, 773.14412285883, 479, 644.981813015061,
NaN, 557.285714285714, 581.99634369287), CA_par_sejour_N2 = c(315.474025974026,
453, 744.944620253165, 595.535714285714, 377.600937838163, 464.275571600481,
368.75, 499.235614307932, 214.169606932838, 750.993829881005,
406.314960629921, 626.417728531856, 274.807692307692, 464.8,
559.290381125227), var_2020_2021_valeur = c(0.125207553640259,
0.638896087069189, 0.0818395145215454, 0.207837780659878, 0.101999119223065,
0.0406131494220115, 0.141440846001322, 0.063074957314078, 0.0730615704860669,
0.051218505658529, 0.00201443064864667, 0.0440203522616658, NaN,
-0.178159446295822, 0.0330987964754596), var_2020_2021_CA = c(0.282736611149895,
54.7224669603524, 0.524917178856986, 1.70555662867813, 0.397036816343259,
0.131895004634469, 0.141440846001322, 0.462059448774848, -0.503191514811456,
0.413215191304673, 0.328251687138904, 0.206898207729598, NaN,
-0.765188413227378, 0.35983753832236)), class = "data.frame", row.names = c(NA,
-15L))
Could anyone help ?

You should remove xlim and ylim because they can be used when you don't specify anything else on your axis. So your could add the limits to both scale_*_continuous in the limits arguments like this:
library(ggplot2)
library(scales)
p <- ggplot(plot_3D, aes(x = var_2020_2021_valeur, y = var_2020_2021_CA)) +
geom_point(aes(color = Specialite, size = CA.annee.N), alpha = 0.5) +
scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07",
"#FFB5C5", "#BF87B3", "#7F5AA2", "#3F2D91", "#000080",
"#2468A0", "#a32cc4", "#9073db", "#c51f5d",
"#5800ff", "#4455ff", "#48ff50")) +
scale_size(range = c(0.5, 12)) + # Réglage de la plage de tailles des points
geom_hline(yintercept=0) +
geom_vline(xintercept=0) +
scale_y_continuous(labels = percent, limits = c(-100, 100)) +
scale_x_continuous(labels = percent, limits = c(-100, 100))
p
#> Warning: Removed 1 rows containing missing values (geom_point).
Created on 2022-07-12 by the reprex package (v2.0.1)

Related

How do I scatterplot LD1 vs LD2 in lda analysis?

I'm very very new to R, so thanks in advance for the help
I did the lda analysis on my dataset (tme.lda), in the console I get all my results with LD1, LD2, LD3, LD4, LD5 and LD6 but when I try to plot it I tried a lot of different methods but I get every kind of error: Error LD1 object not found - Error in fortify - Error in as.data.frame just to say a few.
This is my dataset:
dput(head(tme.lda))
structure(list(Word = structure(1:6, levels = c("bene", "bile",
"casa", "come", "posso", "tutto", "vero"), class = "factor"),
f0min = c(184L, 193L, 189L, 199L, 175L, 144L), f0max = c(229L,
226L, 198L, 225L, 192L, 188L), F1 = c(600L, 347L, 980L, 531L,
550L, 432L), F2 = c(2406L, 2695L, 1759L, 997L, 996L, 1901L
), F4 = c(4125L, 4403L, 3837L, 3988L, 3909L, 4171L), max_F0 = c(143L,
130L, 124L, 133L, 123L, 120L)), row.names = c(NA, 6L), class = "data.frame")
And this is the code I wrote, how can I get from here the scatterplot LD1 vs LD2?
View(tme.lda)
#lDFA analysis with "WORD" as grouping factor
tme.lda<-cbind(tme[,5],tme.lda[,1:6])
names(tme.lda)
#> [1] "tme[, 5]" "f0min" "f0max" "F1" "F2" "F4" "max_F0"
names(tme.lda)=c("Word","f0min","f0max","F1","F2","F4","max_F0")
names(tme.lda)
#> [1] "Word" "f0min" "f0max" "F1" "F2" "F4" "max_F0"
library(MASS)
lda(Word~f0min+f0max+F1+F2+F4+max_F0,data = tme.lda)
I tried this:
plot(Word, panel = tme.lda, abbrev = FALSE, xlab = "LD1", ylab = "LD2")
plot(x, panel = panel.lda, cex = 0.7, dimen=2, abbrev = FALSE, xlab = "LD1", ylab = "LD2")
ggplot(Word, panel = tme.lda, cex = 0.7, dimen=2, xlab = "LD1", ylab = "LD2")
ggplot2::aes(LD1,LD2) (Word, panel = tme.lda, cex = 0.7, dimen=2, xlab = "LD1", ylab = "LD2")
plot.lda<-lda(Word~f0min+f0max+F1+F2+F4+max_F0,data = tme.lda)
ggp <- ggplot(plot.lda, aes(x = LD1, y=LD2)) +
geom_point(mapping = aes(colour=Word)) +
ggtitle("LD1 Vs. LD2")
ggp <- ggplot(plot.lda, aes(x = LD1, y=LD2))
Just to say a few things I tried

Adding line with specific df value to each bar in ggplot

My df is organized this way for example:
OCCURED_COUNTRY_DESC | a | b | c | d | flagged | type | MedDRA_PT| **E** |
__________________________________________________________________________
UNITED STATES |403|1243|473|4077| yes | disp | Seizure |144.208|
__________________________________________________________________________
My data:
structure(list(OCCURED_COUNTRY_DESC = c("AUSTRALIA", "AUSTRIA",
"BELGIUM", "BRAZIL", "CANADA"), a = c(4L, 7L, 20L, 5L, 11L),
b = c(31, 27, 100, 51, 125), c = c(872, 869, 856, 871, 865
), d = c(5289, 5293, 5220, 5269, 5195), w = c(876, 876, 876,
876, 876), x = c(5320, 5320, 5320, 5320, 5320), y = c(35L,
34L, 120L, 56L, 136L), z = c(6161, 6162, 6076, 6140, 6060
), N = c(6196, 6196, 6196, 6196, 6196), k = c("0.5", "0.5",
"0.5", "0.5", "0.5"), SOR = c(0.80199821407511, 1.52042360060514,
1.21312776329214, 0.615857869962066, 0.539569644832803),
log = c(-0.318329070860348, 0.604473324558599, 0.278731499148795,
-0.699330656240263, -0.890118907611227), LL99 = c(-0.695969674426877,
0.382102954188229, 0.198127619344382, -1.00534117464748,
-1.03425468471322), UL99 = c(-0.0544058884186467, 0.763880731966007,
0.337239065783058, -0.482651467660248, -0.785935460582379
), flagged = c("no", "no", "no", "no", "yes"), type = c(NA,
NA, NA, NA, "under"), MedDRA_PT = c("Seizure", "Seizure",
"Seizure", "Seizure", "Seizure"), E = c(5.11098506333901,
4.43283582089552, 16.3984674329502, 8.43063199848168, 20.8132820019249
)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
))
I am using ggplot2 to create a bar chart using the following piece of code:
test2 %>% #using test2 as the df
ggplot(aes(a, OCCURED_COUNTRY_DESC, fill=type)) +
geom_bar(stat="identity")+
scale_fill_manual(values = c("disp" = "#FF8C00",
"under" = "#7EC0EE",
"NA"="#EEE9E9"))+
theme_classic()+
labs(title = "Seizure",
x = "Count",
y = "")
What I would like to do is to add a black line in each bar correspondent to the E value, from the dataframe, for that country. However I haven't been successful. Can someone kindly guide me on how to achieve this?
Thanks!

One option to achieve your desired result would be via a geom_segment, where you map your E column on both the x and the xend position. "Tricky" part are the y positions. However, as a categorical axis is still a numeric axis we could add a helper column to your data which contains the numeric positions of your categorical OCCURED_COUNTRY_DESC column. This helper column could then be mapped on the y and the yend aes needed by geom_segment where we also take into account the width of the bars:
library(ggplot2)
test2$OCCURED_COUNTRY_DESC_num <- as.numeric(factor(test2$OCCURED_COUNTRY_DESC))
width <- .9 # defautlt width of bars
ggplot(test2, aes(a, OCCURED_COUNTRY_DESC, fill = type)) +
geom_bar(stat = "identity") +
geom_segment(aes(x = E, xend = E,
y = OCCURED_COUNTRY_DESC_num - width / 2,
yend = OCCURED_COUNTRY_DESC_num + width / 2),
color = "black", size = 1) +
scale_fill_manual(values = c(
"disp" = "#FF8C00",
"under" = "#7EC0EE",
"NA" = "#EEE9E9"
)) +
theme_classic() +
labs(
title = "Seizure",
x = "Count",
y = ""
)

Add p-values from own formula to ggplot2

I would like to add different p-values from an specific formula in a plot. I need different p-values from each of the subjects. Here is the code I used, which did not work:
formula <- lme(scale(Inactive.freq)~ scale(Time.point), random=~ 1|Subject, data=Freq_df, method='ML')
gggplot(Freq_df, aes(x=Time.point, y=Inactive.freq, group=Subject,colour=Subject)) +
geom_line(size=2)+
theme_minimal()+
geom_point()+
stat_smooth(method=lm, se = FALSE,linetype ="dashed")+
geom_smooth(method = "lm", formula = formula)+
stat_poly_eq(aes(label = paste(stat(eq.label),
stat(adj.rr.label), sep = "~~~~")), formula = formula, parse = TRUE) +
stat_fit_glance(label.x.npc = "right", label.y.npc = "bottom", geom = "text",
aes(label = paste("P-value = ", signif(..p.value.., digits = 3), sep = "")))
I would appreciate any help. Thank you!
UPDATE
My data:
structure(list(Subject = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label =
c("Caesar",
"DL", "Kyosti", "Paul", "Richards", "Taylor"), class = "factor"),
Time.point = c(1, 3, 4, 5, 6, 7), Pacing.freq = c(0.644444444444444,
0.562962962962963, 0.411111111111111, 0.122222222222222,
0, 0), Affiliative.freq = c(0.0703125, 0.138576779026217,
0.00760456273764259, 0.00617283950617284, 0.0634920634920635,
0.0629370629370629), Inactive.freq = c(0, 0, 0.174904942965779,
0.518518518518518, 0.290322580645161, 0.172661870503597),
Not.alert.alone.freq = c(0, 0, 0.174904942965779, 0.518518518518518,
0.279569892473118, 0.165467625899281), Not.alert.with.cagemate.freq = c(0,
0, 0, 0, 0.0108695652173913, 0.00719424460431655), Alert.with.cagemate.freq = c(0.06640625,
0.0262172284644195, 0, 0, 0, 0.00719424460431655), Non_visible = c(15L,
3L, 7L, 18L, 84L, 131L), Visible = c(255L, 267L, 263L, 162L,
186L, 139L)), row.names = c(NA, 6L), class = "data.frame")

This can be done using another layer with the "stat_fit_glance" method provided with the package ggpmisc (which you are already using, I believe...). It's a great package with lot more capabilities for annotating ggplot2.
The solution would be:
The modified data
Freq_df <- structure(list(Subject = as.factor(c(rep("Caesar", 3), rep("DL", 3))),
Time.point = c(1, 3, 4, 5, 6, 7),
Pacing.freq = c(0.644444444444444, 0.562962962962963,
0.411111111111111, 0.122222222222222, 0, 0),
Affiliative.freq = c(0.0703125, 0.138576779026217, 0.00760456273764259,
0.00617283950617284, 0.0634920634920635, 0.0629370629370629),
Inactive.freq = c(0, 0, 0.174904942965779, 0.518518518518518,
0.290322580645161, 0.172661870503597),
Not.alert.alone.freq = c(0, 0, 0.174904942965779, 0.518518518518518,
0.279569892473118, 0.165467625899281),
Not.alert.with.cagemate.freq = c(0, 0, 0, 0,
0.0108695652173913, 0.00719424460431655),
Alert.with.cagemate.freq = c(0.06640625, 0.0262172284644195, 0, 0, 0,
0.00719424460431655),
Non_visible = c(15L, 3L, 7L, 18L, 84L, 131L),
Visible = c(255L, 267L, 263L, 162L, 186L, 139L)),
row.names = c(NA, 6L), class = "data.frame")
The data needed to be changed, as a line cannot be fitted unless at least two data points are there, whereas you provided one data point per subject. So I limited it to two subjects with three points per subject. But you get the idea :)
The plotting code
ggplot(Freq_df, aes(x = Time.point, y = Pacing.freq)) + ylim(-0.5, 1.5) +
geom_line(size=2, alpha = 0.5) + geom_point(aes(group = "Subject"), size = 3) +
geom_smooth(method = "lm", formula = formula) + facet_wrap('Subject') +
stat_poly_eq(aes(label = paste(stat(eq.label), stat(adj.rr.label),
sep = "~~~~")), formula = formula, parse = TRUE) +
stat_fit_glance(label.x.npc = "right", label.y.npc = "bottom", geom = "text",
aes(label = paste("P-value = ", signif(..p.value.., digits = 15),
sep = "")))
EDIT 1:
#another way to use `stat_fit_glance` (not shown in the graph here)
stat_fit_glance(label.x = "right", label.y = "bottom",
aes(label = sprintf('r^2~"="~%.3f~~italic(p)~"="~%.2f',
stat(r.squared), stat(p.value))), parse = T)
`Facet-wrap' will do the trick if you need seperate p-values (seperate line-fitting) per group (and also not too many groups I believe... there must be a limit to number of facets allowed, which I don't know!).
OUTPUT
Play with the options to get desired output, e.g. if you use label.x.npc = "left" & label.y.npc = "bottom", then the regression equation & the p value labels might overlap.

Add character variable (weekdays) to plot

I want to add on this plot the weekday as text on top of the bars.
The only function to add text in ggplot I found, is "annotate", which does not work the way I want.
It should look like this:
Plot with weekdays
geom_text gives me this
Geom_text
My code:
ggplot(data = filter(T2G2_dayav, site %in% c("S17S", "S17N"), !is.na(distance)),
mapping = aes(as.factor(x = date_days))) +
geom_col(mapping = aes(y = T2pn_av, fill = as.factor(distance)),
position = position_dodge(width = 0.9)) +
theme_bw() + ylab("Particle Number (#/cmÂ³), day-av") + xlab("Date") +
scale_y_continuous(limits = c(0, 30000)) +
scale_fill_discrete(name = "T2, Distance from road (m)") +
scale_color_grey(name = "Reference intrument G2") +
ggtitle("Day-averaged Particle Number (PN) per distance")
the head of my data:
distance date_days site T2pn_av T2pn_avambient T2wdir_med weekday Date G2pn_av G2pn_min G2pn_max G2ws_av G2ws_min G2ws_max G2wdir_med
<int> <dttm> <chr> <dbl> <dbl> <dbl> <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 -10 2017-07-18 S17N 28814.83 16917.831 110 Di 2017-07-18 13655.29 4621 105100 0.6781284 0 3.6 51.0
2 -10 2017-07-19 S17N 24210.95 15565.951 100 Mi 2017-07-19 10627.73 2908 67250 1.3673618 0 5.5 70.0
3 -10 2017-07-24 S17N 16143.44 7907.442 80 Mo 2017-07-24 11686.54 3582 55080 0.8178753 0 4.8 95.5
4 -10 2017-07-29 S17N 11762.56 5574.563 270 Sa 2017-07-29 12180.73 5413 45490 1.0304985 0 5.7 265.0
5 -10 2017-07-30 S17N 12138.22 6360.225 290 So 2017-07-30 10404.75 6113 23860 1.2385791 0 6.6 274.0
6 -10 2017-07-31 S17N 13815.32 9008.320 270 Mo 2017-07-31 11849.89 4595 46270 0.8554044 0 4.4 230.0
dput(head(T2G2_dayav))
structure(list(distance = c(-10L, -10L, -10L, -10L, -10L, -10L
), date_days = structure(c(1500328800, 1500415200, 1500847200,
1501279200, 1501365600, 1501452000), class = c("POSIXct", "POSIXt"
), tzone = "Europe/Berlin"), site = c("S17N", "S17N", "S17N",
"S17N", "S17N", "S17N"), T2pn_av = c(28814.8306772908, 24210.9512670565,
16143.442364532, 11762.5630630631, 12138.2247114732, 13815.3198380567
), T2pn_avambient = c(16917.8306772908, 15565.9512670565, 7907.44236453202,
5574.56306306306, 6360.22471147318, 9008.31983805668), T2wdir_med = c(110,
100, 80, 270, 290, 270), weekday = c("Di", "Mi", "Mo", "Sa",
"So", "Mo"), Date = structure(c(1500328800, 1500415200, 1500847200,
1501279200, 1501365600, 1501452000), class = c("POSIXct", "POSIXt"
), tzone = "Europe/Berlin"), G2pn_av = c(13655.2885517401, 10627.7329973352,
11686.5429216867, 12180.7308516181, 10404.7472642001, 11849.8893070109
), G2pn_min = c(4621, 2908, 3582, 5413, 6113, 4595), G2pn_max = c(105100,
67250, 55080, 45490, 23860, 46270), G2ws_av = c(0.678128438241936,
1.36736183524505, 0.817875347544022, 1.0304984658137, 1.23857912107,
0.855404388351763), G2ws_min = c(0, 0, 0, 0, 0, 0), G2ws_max = c(3.6,
5.5, 4.8, 5.7, 6.6, 4.4), G2wdir_med = c(51, 70, 95.5, 265, 274,
230)), .Names = c("distance", "date_days", "site", "T2pn_av",
"T2pn_avambient", "T2wdir_med", "weekday", "Date", "G2pn_av",
"G2pn_min", "G2pn_max", "G2ws_av", "G2ws_min", "G2ws_max", "G2wdir_med"
), row.names = c(NA, -6L), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), vars = c("distance", "date_days"), drop = TRUE, indices = list(
0L, 1L, 2L, 3L, 4L, 5L), group_sizes = c(1L, 1L, 1L, 1L,
1L, 1L), biggest_group_size = 1L, labels = structure(list(distance = c(-10L,
-10L, -10L, -10L, -10L, -10L), date_days = structure(c(1500328800,
1500415200, 1500847200, 1501279200, 1501365600, 1501452000), class = c("POSIXct",
"POSIXt"), tzone = "Europe/Berlin")), row.names = c(NA, -6L), class = "data.frame", vars = c("distance",
"date_days"), drop = TRUE, .Names = c("distance", "date_days"
)))

The idea is that you add a text on top of every bar (that's why vjust = 0, but you could also do vjust = -.5 to allow more space or vjust = 1.5 to put it in the bars, which is nice as well). The rest within the geom_text ist basically the same as in geom_col. But in general, you could put commonly used aesthetics in the first occurency within ggplot(aes(...)), as you already did with the x-value.
ggplot(data = filter(T2G2_dayav, site %in% c("S17S", "S17N"), !is.na(distance)),
mapping = aes(as.factor(x = date_days))) +
geom_col(mapping = aes(y = T2pn_av, fill = as.factor(distance)),
position = position_dodge(width = 0.9)) +
geom_text(aes(label = weekday, y = T2pn_av), vjust = -.5, # add these
position = position_dodge(width = 0.9)) + # lines
theme_bw() + ylab("Particle Number (#/cmÂ³), day-av") + xlab("Date") +
scale_y_continuous(limits = c(0, 30000)) +
scale_fill_discrete(name = "T2, Distance from road (m)") +
scale_color_grey(name = "Reference intrument G2") +
ggtitle("Day-averaged Particle Number (PN) per distance")
The following should solve your problem with too many labels. It takes the highest label and places it in the center of the bars of that x-value. Find a plot below with additional rows added to your data:
T2G2_dayav <- rbind(T2G2_dayav %>% ungroup(), T2G2_dayav %>% ungroup() %>% mutate(distance = 5)) # add more observations for testing
T2G2_dayav <- T2G2_dayav %>% mutate(T2pn_av = ifelse(distance == 5, T2pn_av/2, T2pn_av)) # label only the highest bar
The following should work with your data:
ggplot(data = filter(T2G2_dayav, site %in% c("S17S", "S17N"), !is.na(distance)) %>%
group_by(date_days) %>% # group by days
mutate(weekday2 = ifelse(T2pn_av == max(T2pn_av), weekday, NA)), # within each day (group), only label the highest
mapping = aes(as.factor(x = date_days))) +
geom_col(mapping = aes(y = T2pn_av, fill = as.factor(distance)),
position = position_dodge(width = 0.9)) +
geom_text(aes(label = weekday2, y = T2pn_av), vjust = -.5, # add these
position = position_dodge(with = 0.9)) + # lines
theme_bw() + ylab("Particle Number (#/cmÂ³), day-av") + xlab("Date") +
scale_y_continuous(limits = c(0, 30000)) +
scale_fill_discrete(name = "T2, Distance from road (m)") +
scale_color_grey(name = "Reference intrument G2") +
ggtitle("Day-averaged Particle Number (PN) per distance")

Function to label local extrema in ggplot2 line graph in R

I am plotting a time series and I would like to automatically label specific points such as the last point, and local extrema, the highest and lowest.
How would I wrap this in a function to automatically label key points. Specifically by adding more local extrema.
Here is an example dataset:
latest <- structure(list(ReleaseDate = structure(c(1363928400, 1364533200,
1365138000, 1365742800, 1366347600, 1366952400, 1367557200, 1368162000,
1368766800, 1369371600, 1369976400, 1370581200, 1371186000, 1371790800,
1372395600, 1373000400, 1373605200, 1374210000, 1374814800, 1375419600,
1376024400, 1376629200, 1377234000, 1377838800, 1378443600, 1379048400,
1379653200, 1380258000, 1380862800, 1381467600, 1382072400, 1382677200,
1383282000, 1383890400, 1384495200), class = c("POSIXct", "POSIXt"
), tzone = ""), Count = c(1746L, 1748L, 1738L, 1771L, 1758L,
1754L, 1764L, 1769L, 1769L, 1762L, 1771L, 1765L, 1771L, 1759L,
1748L, 1757L, 1759L, 1770L, 1776L, 1782L, 1778L, 1791L, 1776L,
1776L, 1767L, 1768L, 1761L, 1744L, 1756L, 1743L, 1739L, 1738L,
1742L, 1754L, 1762L)), .Names = c("ReleaseDate", "Count"), row.names = 150:184, class = "data.frame")
library(ggplot2)
libary(ddply)
last <- tail(latest, 1)
high <- subset(latest, Count == max(Count))
mid <- subset(latest[5:20,], Count == min(Count))
ggplot(latest, aes(ReleaseDate, Count)) + geom_line() +
geom_text(data = last, aes(ReleaseDate, Count), label = last$Count, vjust = -1) +
geom_text(data = high, aes(ReleaseDate, Count), label = high$Count, vjust = -.1) +
geom_text(data = mid, aes(ReleaseDate, Count), label = mid$Count, vjust = 1)
Here I've attempted to manually add specific points, but it would not be concise if I wanted to add 10 labels to a longer series.

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

plotting a 3D bubble graph on R - r

Related

How do I scatterplot LD1 vs LD2 in lda analysis?

Adding line with specific df value to each bar in ggplot

Add p-values from own formula to ggplot2

Add character variable (weekdays) to plot

Function to label local extrema in ggplot2 line graph in R

Categories

Resources