Remove specific markers from legend - r

Sorry if this question has already been answered but I could not find the solution to what I am after. I have a plot that uses both geom_line and geom_point. The result of this is that in the legend, it adds both a line and a point when they should have one or the other. I want to keep the circles for the data tg1 and tg2 and remove the line and then do the opposite to the data full i.e. keep the line but remove the circle. I have seen that something like this works where you want to remove dots from all of the legend entries but nothing to only do specifics Removing ggplot2's geom_point icons from the legend. Can anyone help? Thanks.
#code for plot
library(ggplot2)
library(tidypaleo)
ggplot(LGRSL, aes(x =mmsl , y = Age))+
coord_flip()+
theme_classic(12)+
geom_point(data=tg1,aes(x=mmslc,y=Year,col="Fort Denison 1"),pch=1,size=2)+
geom_point(data=tg2,aes(x=mmslc,y=Year,col="Fort Denison 2"),pch=1,size=2)+
geom_lineh(data = full, aes(x=Lutregalammslc,y=Year,col="Full budget"))+
scale_colour_manual(values=c("grey15","grey50","black"))
## data
## tg1
structure(list(Year = 1886:1891, SLR = c(6919L, 6935L, 6923L,
6955L, 6956L, 6957L), mmsl = c(-0.158, -0.142, -0.154, -0.122,
-0.121, -0.12), m = c(6.919, 6.935, 6.923, 6.955, 6.956, 6.957
), GIA.correction = c(-0.02814, -0.02793, -0.02772, -0.02751,
-0.0273, -0.02709), SLRc = c(6.89086, 6.90707, 6.89528, 6.92749,
6.9287, 6.92991), mmslc = c(-0.19667, -0.18046, -0.19225, -0.16004,
-0.15883, -0.15762)), row.names = c(NA, 6L), class = "data.frame")
##tg2
structure(list(Year = 1915:1920, SLR = c(7011L, 6929L, 6987L,
6945L, 6959L, 6951L), mmsl = c(-0.066, -0.148, -0.09, -0.132,
-0.118, -0.126), m = c(7.011, 6.929, 6.987, 6.945, 6.959, 6.951
), GIA.correction = c(-0.02205, -0.02184, -0.02163, -0.02142,
-0.02121, -0.021), SLRc = c(6.98895, 6.90716, 6.96537, 6.92358,
6.93779, 6.93), mmslc = c(-0.09858, -0.18037, -0.12216, -0.16395,
-0.14974, -0.15753)), row.names = c(NA, 6L), class = "data.frame")
##full
structure(list(Year = 1900:1905, Lutregala = c(-0.103609677,
-0.118603251, -0.134550791, -0.105553735, -0.103983082, -0.121731984
), Wapengo = c(-0.095213147, -0.096005337, -0.115700625, -0.097696891,
-0.084444784, -0.109161066), Tarra = c(-0.106672829, -0.109537943,
-0.135256365, -0.101357772, -0.089716518, -0.104258351), Lutregalammsl = c(-0.292863465,
-0.307857039, -0.323804579, -0.294807523, -0.29323687, -0.310985772
), Wapengommsl = c(-0.257028279, -0.257820469, -0.277515756,
-0.259512023, -0.246259916, -0.270976198), Tarrammsl = c(-0.30925682,
-0.312121933, -0.337840355, -0.303941762, -0.292300508, -0.306842342
), LgGIAc = c(-0.01921, -0.01904, -0.01887, -0.0187, -0.01853,
-0.01836), WapGIAc = c(-0.02486, -0.02464, -0.02442, -0.0242,
-0.02398, -0.02376), TarGIAc = c(-0.02373, -0.02352, -0.02331,
-0.0231, -0.02289, -0.02268), Lutregalammslc = c(-0.312073465,
-0.326897039, -0.342674579, -0.313507523, -0.31176687, -0.329345772
), Wapmmslc = c(-0.281888279, -0.282460469, -0.301935756, -0.283712023,
-0.270239916, -0.294736198), Tarmmslc = c(-0.33298682, -0.335641933,
-0.361150355, -0.327041762, -0.315190508, -0.329522342)), row.names = c(NA,
6L), class = "data.frame")
##LGRSL
structure(list(depths = c(0.5, 1.5, 2.5, 3.5, 4.5, 5.5), RSL = c(0.047746907,
0.025564293, 0.021733558, 0.007855661, -0.004909879, 0.01747051
), RSLerror = c(0.058158556, 0.057902654, 0.057988654, 0.057957388,
0.057905405, 0.057226072), Age = c(2017.456716, 2013.594255,
2006.92838, 1999.675523, 1994.729181, 1990.518154), Ageerror = c(0.373138707,
0.77640096, 1.430582242, 1.627131115, 3.222393394, 3.239674718
), mmsl = c(0.01993169, -0.002250924, -0.006081659, -0.019959556,
-0.032725096, -0.010344707)), row.names = c(NA, 6L), class = "data.frame")
##LGRSLgp
structure(list(Age = 1892:1897, mean = c(-0.298147401, -0.304630597,
-0.31023294, -0.315506983, -0.321225142, -0.327190675), error = c(0.051858047,
0.04985084, 0.047760525, 0.045624121, 0.043505044, 0.041477551
), min = c(-0.246289354, -0.254779758, -0.262472416, -0.269882862,
-0.277720098, -0.285713124), max = c(-0.350005447, -0.354481437,
-0.357993465, -0.361131103, -0.364730186, -0.368668226), x = c(-0.02125,
-0.02108, -0.02091, -0.02074, -0.02057, -0.0204), meangia = c(-0.276897401,
-0.283550597, -0.28932294, -0.294766983, -0.300655142, -0.306790675
), rate = c(NA, -4.967327, -4.946326, -4.964493, -4.977451, -4.911859
), raterror = c(NA, 3.581013, 3.796417, 4.022157, 4.226762, 4.255126
), mmsl = c(-0.325962618, -0.332445814, -0.338048157, -0.3433222,
-0.349040359, -0.355005892)), row.names = c(NA, 6L), class = "data.frame")

Here is a way.
Override the guide legend with a list of vectors of values for each of the aesthetics involved, shape and linetype. Note the different ways to specify what is to be removed.
I have also simplified the code a bit.
library(ggplot2)
library(dplyr)
colrs <- c("Fort Denison 1" = "grey15",
"Fort Denison 2" = "grey50",
"Full budget" = "black")
legnd <- list(shape = c(1, 1, NA),
linetype = c("blank", "blank", "solid"))
bind_rows(
tg1 %>% mutate(col = "Fort Denison 1"),
tg2 %>% mutate(col = "Fort Denison 2")
) %>%
ggplot(aes(x = mmslc, y = Year, colour = col)) +
geom_point(pch = 1, size = 2) +
geom_lineh(data = full, aes(x = Lutregalammslc, col = "Full budget"))+
scale_colour_manual(values = colrs,
guide = guide_legend(override.aes = legnd)) +
coord_flip() +
theme_classic(base_size = 12)

Related

Is there a way to use slip data based on a name to then use alongside sec.axi

I'm using the below code in order to plot two veriables on the same graph to compare them.
Some of them are in te AW group and the others are in the EWC group.
Data1 %>%
pivot_longer(
cols = -1,
names_to = c("try", "exp"),
names_pattern = "(.)(.)")%>%
ggplot(aes(x = exp, y = value, group = exp)) +
geom_point(aes (shape = exp, colour = exp))+
geom_smooth(alpha = 0, aes(colour = exp))+
stat_summary(fun.y = mean,
fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
aes(color=exp),
geom = "errorbar") +
scale_y_continuous(expand = c(0, 0), breaks = seq ( 0,700, by = 200), name = "AW (%)",
sec.axis = sec_axis(~. + 10, name = "EWC (%)"))
Data that I'm working with:
structure(list(Sample = c("AW DL", "AW", "AW ambient temp", "AW DL ambient temp",
"EWC DL", "EWC", "EWC DL ambient temp", "EWC ambient temp"),
A1 = c(418.181818181874, 288.888888888889, 319.999999999996,
173.333333333325, 80.701754385967, 74.2857142857143, 63.4146341463404,
76.190476190476), B1 = c(483.333333333305, 517.647058823533,
565.384615384621, 375.000000000032, 82.857142857142, 83.8095238095239,
78.947368421054, 84.9710982658961), C1 = c(606.06060606057,
542.10526315789, 496.551724137933, 587.500000000377, 85.8369098712439,
84.4262295081966, 85.4545454545534, 83.2369942196532), A2 = c(368.750000000047,
46.428571428571, 216.39344262295, 104.651162790703, 78.6666666666688,
31.7073170731706, 51.136363636365, 68.3937823834196), B2 = c(417.857142857153,
123.913043478263, 213.63636363636, 180.769230769273, 80.6896551724142,
55.3398058252432, 64.3835616438409, 68.1159420289851), C2 = c(283.928571428547,
169.230769230771, 271.428571428566, 95.2380952380727, 73.9534883720913,
62.8571428571431, 48.7804878048721, 73.0769230769227), A3 = c(564.10256410254,
194.285714285712, 314.999999999998, 362.162162162103, 84.9420849420844,
66.0194174757279, 78.3625730994124, 75.9036144578312), B3 = c(656.249999999929,
26.4705882352953, 263.492063492065, 443.243243243154, 86.776859504131,
20.9302325581403, 81.592039800992, 72.4890829694324), C3 = c(634.883720930251,
330.555555555559, 304.444444444446, 416.666666666644, 86.3924050632915,
76.7741935483873, 80.6451612903217, 75.2747252747254), A4 = c(260.00000000002,
96.3636363636384, 285.714285714287, 174.509803921584, 72.2222222222237,
49.0740740740746, 63.5714285714306, 74.0740740740742), B4 = c(196.721311475417,
41.4285714285729, 245.762711864405, 190.566037735832, 66.2983425414373,
29.29292929293, 65.5844155844135, 71.0784313725489), C4 = c(262.264150943415,
58.6206896551738, 194.444444444444, 214.634146341482, 72.3958333333348,
36.956521739131, 68.2170542635678, 66.0377358490565)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -8L))
I found some examples of an if statements, I am just stuck on getting them to work.
Thank you.

Rep function in r for forest plots

I am trying to create a forest plot and although this works for my other datasets, the shapes are not working and doing what I want them do with this dataset. Each cohort is represented by a diamond or a circle and then each age group is represented by a colour. With this code, I keep on getting circles for both cohorts and then diamonds for both cohorts:
Code:
tiff('replication_diet_subset1_risk_diff.tiff',units = "cm",width=16,height=16,res=600)
tt1=cbind(replication_diet_subset1$Description)
forestplot(tt1, mean = cbind(replication_diet_subset1$coef.1,replication_diet_subset1$coef1_hunt,replication_diet_subset1$coef.2,replication_diet_subset1$coef2_hunt,replication_diet_subset1$coef.3,replication_diet_subset1$coef3_hunt),
lower = cbind(replication_diet_subset1$lower.1, replication_diet_subset1$lci_coef1_hunt,replication_diet_subset1$lower.2,replication_diet_subset1$lci_coef2_hunt,replication_diet_subset1$lower.3,replication_diet_subset1$lci_coef3_hunt),
upper = cbind(replication_diet_subset1$upper.1,replication_diet_subset1$uci_coef1_hunt, replication_diet_subset1$upper.2,replication_diet_subset1$uci_coef2_hunt,replication_diet_subset1$upper.3,replication_diet_subset1$uci_coef3_hunt),
lwd.xaxis=1,
xlog=FALSE,
graph.pos = 2,
zero = 0,
lwd.zero = 1.5,
boxsize = 0.08,
lty = 1,
col=fpColors(box=rep(c("blue", "darkred", "darkgreen"),each=2),line=rep(c("blue", "darkred", "darkgreen"),each=2)),
fn.ci_norm=rep(c(fpDrawDiamondCI,fpDrawCircleCI),3),
legend = c("UK 39-53y","UN 39-53y","UK 53-62y","UN 53-62y","UK 62-72y","UN 62-72y"),
lwd.ci = 1.5, ##thickness of the CI line,
xticks = c(-0.05,0,0.05),
xlab ="coefficient (95% CI)",las = 10,
is.summary = FALSE,
txt_gp = fpTxtGp(xlab = gpar(cex=0.6),##changes font size of xlabel
ticks = gpar(cex=0.6),
legend=gpar(cex=0.55),
label = list(gpar(cex=0.6),
gpar(cex=0.8))),
hrzl_lines=list("2" = gpar(lty=2, lwd=1), "3" = gpar(lty=2, lwd=1),"4" = gpar(lty=2, lwd=1)),
legend_args=fpLegend(pos = list(x=0.1,y=-0.10, "align"="horizontal",gpar(lwd=1, lty=1), padding=unit(ifelse(!is.null(forestplot), 0.6, 0.6), "mm"))))
Dataframe
dput(replication_diet_subset1)
structure(list(Description = c("Fresh fruit intake", "Lamb/mutton intake",
"Non-oily fish intake", "Oily fish intake", "Water intake", "Salad/raw vegetable intake"
), coef1_hunt = c(-0.00743, 0.002942, 0.00236, -0.00743, 0.005049,
0.003846), se...3 = c(0.007141, 0.005758, 0.005443, 0.007141,
0.006629, 0.006245), lci_coef1_hunt = c(-0.02142636, -0.00834368,
-0.00830828, -0.02142636, -0.00794384, -0.0083942), uci_coef1_hunt = c(0.00656636,
0.01422768, 0.01302828, 0.00656636, 0.01804184, 0.0160862), ...6 = c(16635,
9728, 9725, 16635, 23278, 16635), coef2_hunt = c(-0.002244, -0.00276,
0.000345, -0.002244, -0.00151, -0.000206), se...8 = c(0.008917,
0.006276, 0.006188, 0.008917, 0.009408, 0.007517), lci_coef2_hunt = c(-0.01972132,
-0.01506096, -0.01178348, -0.01972132, -0.01994968, -0.01493932
), uci_coef2_hunt = c(0.01523332, 0.00954096, 0.01247348, 0.01523332,
0.01692968, 0.01452732), ...11 = c(9897, 7545, 7567, 9897, 9915,
9867), coef3_hunt = c(0.019017, -0.007802, -0.006307, 0.019017,
0.005309, -0.002412), se...13 = c(0.005753, 0.004112, 0.003982,
0.005753, 0.008282, 0.004975), lci_coef3_hunt = c(0.00774112,
-0.01586152, -0.01411172, 0.00774112, -0.01092372, -0.012163),
uci_coef3_hunt = c(0.03029288, 0.000257520000000001, 0.00149772,
0.03029288, 0.02154172, 0.007339), ...16 = c(25536, 17792,
17907, 25536, 14181, 25575), coef.1 = c(0.00362314944509158,
-0.00470790257232875, 0.000345069788105902, 0.00119994747831877,
0.00366026474163439, 0.00452057277808969), lower.1 = c(0.000100809813626033,
-0.00870031842036767, -0.00444673772605906, -0.00415468715681286,
-0.00142385548036369, 0.000495870575295929), upper.1 = c(0.00714548907655713,
-0.000715486724289832, 0.00513687730227086, 0.0065545821134504,
0.00874438496363247, 0.00854527498088346), coef.2 = c(0.00652712910056686,
-0.008389394817565, 0.00385312798368479, 0.00684123232489216,
0.00820481571195358, 0.00206021000034981), lower.2 = c(0.0031064649063915,
-0.0124479794653457, -0.000643937452513724, 0.00152183943585278,
0.00305901182100011, -0.00191718709478407), upper.2 = c(0.00994779329474222,
-0.00433081016978427, 0.00835019341988331, 0.0121606252139315,
0.0133506196029071, 0.00603760709548368), coef.3 = c(0.00801099623452188,
-0.0108977910125354, 0.0115185369443351, 0.0167317285202842,
0.0153438933855643, 0.0101781793610485), lower.3 = c(0.00465798740347312,
-0.0150517577821998, 0.00721508140505276, 0.0114866825853908,
0.0101968058956273, 0.00609905055235197), upper.3 = c(0.0113640050655706,
-0.00674382424287112, 0.0158219924836174, 0.0219767744551776,
0.0204909808755013, 0.0142573081697451)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
dev.off()
I would really appreciate your advice!
TIA

R ggplot2 line two rows and all columns

I have this dataset:
structure(list(AgeGroup = c("Old", "Young"), Point.1 = c(0.401899407258065,
0.432845035714286), Point.2 = c(0.435610404, 0.448826385964912
), Point.3 = c(0.466951088, 0.473339649122807), Point.4 = c(0.490997664,
0.505416649122807), Point.5 = c(0.51047508, 0.517228789473684
), Point.6 = c(0.519614064, 0.520077087719298), Point.7 = c(0.524924236,
0.522939438596491), Point.8 = c(0.535448152, 0.523846929824561
), Point.9 = c(0.539298204, 0.529132140350877), Point.10 = c(0.546288992,
0.535221877192982), Point.11 = c(0.552286756, 0.544069684210526
), Point.12 = c(0.548644056, 0.547988701754386), Point.13 = c(0.546028996,
0.556100789473684), Point.14 = c(0.551464336, 0.557342807017544
), Point.15 = c(0.55835804, 0.552995140350877), Point.16 = c(0.560958056,
0.555753035087719), Point.17 = c(0.566631508, 0.553254912280702
), Point.18 = c(0.573644824, 0.557015719298246), Point.19 = c(0.579504268,
0.560797315789474), Point.20 = c(0.583600364, 0.560459526315789
), Point.21 = c(0.591889884, 0.563138245614035), Point.22 = c(0.598549332,
0.578847140350877), Point.23 = c(0.605920632, 0.59655149122807
), Point.24 = c(0.612308084, 0.611475473684211), Point.25 = c(0.618838952,
0.627695631578947), Point.26 = c(0.626865524, 0.640329719298246
), Point.27 = c(0.634642932, 0.642362438596491), Point.28 = c(0.639958892,
0.640706877192982), Point.29 = c(0.642219468, 0.654251789473684
), Point.30 = c(0.651740076, 0.674775824561404), Point.31 = c(0.657197604,
0.679311385964912), Point.32 = c(0.657618572, 0.673946421052632
), Point.33 = c(0.653554616, 0.67093849122807), Point.34 = c(0.648990388,
0.673238403508772), Point.35 = c(0.643885328, 0.669246245614035
), Point.36 = c(0.636234632, 0.670007543859649), Point.37 = c(0.632127604,
0.667657561403509), Point.38 = c(0.631252172, 0.665906228070175
), Point.39 = c(0.637404984, 0.677649561403509), Point.40 = c(0.6451598,
0.679067614035088), Point.41 = c(0.648019716, 0.688604824561403
), Point.42 = c(0.645375244, 0.692729175438596), Point.43 = c(0.647187664,
0.691994543859649), Point.44 = c(0.651923432, 0.681522859649123
), Point.45 = c(0.650062976, 0.674073456140351), Point.46 = c(0.638525956,
0.660092263157895), Point.47 = c(0.627772732, 0.652689456140351
), Point.48 = c(0.615988064, 0.650307087719298), Point.49 = c(0.599147952,
0.651349771929825), Point.50 = c(0.584897698795181, 0.63722649122807
)), class = "data.frame", row.names = c(NA, -2L))
which a subset of the 50 points looks like:
AgeGroup Point.1 Point.2 Point.3 Point.4 Point.5 Point.6 Point.7 Point.8 Point.9 Point.10 Point.11 Point.12 Point.13 Point.14 Point.15 Point.16
1 Old 0.4018994 0.4356104 0.4669511 0.4909977 0.5104751 0.5196141 0.5249242 0.5354482 0.5392982 0.5462890 0.5522868 0.5486441 0.5460290 0.5514643 0.5583580 0.5609581
2 Young 0.4328450 0.4488264 0.4733396 0.5054166 0.5172288 0.5200771 0.5229394 0.5238469 0.5291321 0.5352219 0.5440697 0.5479887 0.5561008 0.5573428 0.5529951 0.5557530
I am having difficulty plotting all columns on one graph, where X is just 1:50 tick marks and Y is the value of each point, color coded by AgeGroup.
I have tried melt, but I dont think thats necessary as it transposes the data and doubles the Point values.
I've tried variations of the following:
ggplot(YaxL, aes(x=1:50,y=YaxL[2:51])) + geom_point()
and
ggplot(YaxL, aes(x = 1:50)) +
geom_line(aes(y = YaxLDF[1,1], colour = "Old")) +
geom_line(aes(y = YaxLDF[2,1], colour = "Young"))
I feel like I'm overthinking this, help appreciated.
Try this approach. You can reshape to long with pivot_longer() and use the separate() function to extract the point position. After that the design of the plot is very practical. I have used the data you shared as YaxL. Always first try to reshape your data and then the plots can be easily built. Here the code:
library(tidyverse)
#Data process and plot
YaxL %>% pivot_longer(-1) %>%
separate(name,c('name','x'),sep='\\.') %>%
mutate(x=as.numeric(x)) %>%
dplyr::select(-name) %>%
ggplot(aes(x=x,y=value,color=AgeGroup,group=AgeGroup))+
geom_point()
Output:

Subset Data.Frame With Multiple Conditions

End Goal:
Create a plot for each region of StressCumulative, BaseCumulative, StressQoQ, and BaseQoQ over the date range from rows 1:167.
Problem:
I'm having difficulty subsetting my data.frame. My issue is that the condition by which I'm subsetting is logical, and thus will only return the first element after the condition.
subset_region_1 <- subset.data.frame(HPF, HPF$region == 1, select = BaseCumulative, HPF$StressCumulative, StressQoQ, BaseQoQ)
Warning messages:
1: In if (drop) warningc("drop ignored") :
the condition has length > 1 and only the first element will be used
2: drop ignored
This returns only the first column, BaseCumulative.
Data:
Here you get a glimpse of what I'm working with. This is the table I am looking to subset from. My data.frame is in a tall format
I would like to create a subset in order to graph BaseCumulative, StressCumulative, BaseQoQ, and StressQoQ variables over the range of dates from rows 1:167. The date column uses the same dates for all 100 regions. My issue is that when I go to plot in ggplot, I get an error that my aes mappings are not of the same size. The full table has date = 18370 rows long, but the values repeat every 167 rows (for each unique region). Further, the BaseCumulative variable is also 18370 rows long but is unique for all regions, i.e. every 167 rows. I want to know how I can subset by region while obtaining the correct row size for the variables I am interested in measuring.
Data Pts:
#Rows 1-3 (Region 1 Sample):
dput(head(HPF[1:3, ]))
structure(list(region = c(1, 1, 1), path = c(1, 1, 1), date = c(20140215,
20140515, 20140815), index_value = c(1, 1.033852765, 1.041697122
), index = 0:2, counter = 1:3, BaseQoQ = c(NA, 0.033852765, 0.00758749917354029
), BaseCumulative = c(100, 103.3852765, 104.1697122), StressCumulative = c(110,
113.3852765, 114.1697122), StressQoQ = c(NA, 0.0307752409090909,
0.00691832065162346)), .Names = c("region", "path", "date", "index_value",
"index", "counter", "BaseQoQ", "BaseCumulative", "StressCumulative",
"StressQoQ"), row.names = c(NA, -3L), class = c("tbl_df", "tbl",
"data.frame"))
#Rows 168:200 (Region 2 Sample):
dput(head(HPF[168:200, ]))
structure(list(region = c(2, 2, 2, 2, 2, 2), path = c(1, 1, 1,
1, 1, 1), date = c(20140215, 20140515, 20140815, 20141115, 20150215,
20150515), index_value = c(1, 1.014162265, 1.01964828, 1.009372314,
1.007210703, 1.018695493), index = 0:5, counter = 1:6, BaseQoQ = c(NA,
0.014162265, 0.00540940556489744, -0.0100779515854232, -0.0021415398163972,
0.0114025694582001), BaseCumulative = c(100, 101.4162265, 101.964828,
100.9372314, 100.7210703, 101.8695493), StressCumulative = c(110,
111.4162265, 111.964828, 110.9372314, 110.7210703, 101.8695493
), StressQoQ = c(NA, 0.0128747863636363, 0.00492389230216839,
-0.00917785181610786, -0.00194849914020834, -0.0799443229370588
)), .Names = c("region", "path", "date", "index_value", "index",
"counter", "BaseQoQ", "BaseCumulative", "StressCumulative", "StressQoQ"
), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
Question:
How do I subset other columns in addition to specifying region == #? I have tried the following but then the issue is that values recycle for the dates and my charts are incorrect:
ggplot(HPF, aes(x = date, y= BaseCumulative, linetype = factor(region == 1))) +
geom_line() +
theme_light()
Further, I am also unsuccessful if I try to subset within the ggplot such as:
ggplot(HPF[HPF$region == 1, ], aes(x = HPF$date[1:167, ], y= HPF$BaseCumulative[1:167, ], linetype = factor(region == 1))) +
geom_line() +
theme_light()
Any help is appreciated.
I'm not entirely sure what you're trying to show in your plot; is this what you're after?
library(tidyverse);
df %>%
gather(what, value, 7:10) %>%
ggplot(aes(date, value, colour = what)) + geom_line() + theme_light()
Explanation: Convert your data from wide to long format, then pass what as a colour (or linetype) aesthetic to get different line plots for columns 7, 8, 9, 10 in one plot.
If you want separate plots for region, you could add + facet_wrap(~ as.factor(region)), e.g.
df %>%
gather(what, value, 7:10) %>%
ggplot(aes(date, value, colour = what)) + geom_line() + theme_light() + facet_wrap(~ as.factor(region))
Sample data
df1 <- structure(list(region = c(1, 1, 1), path = c(1, 1, 1), date = c(20140215,
20140515, 20140815), index_value = c(1, 1.033852765, 1.041697122
), index = 0:2, counter = 1:3, BaseQoQ = c(NA, 0.033852765, 0.00758749917354029
), BaseCumulative = c(100, 103.3852765, 104.1697122), StressCumulative = c(110,
113.3852765, 114.1697122), StressQoQ = c(NA, 0.0307752409090909,
0.00691832065162346)), .Names = c("region", "path", "date", "index_value",
"index", "counter", "BaseQoQ", "BaseCumulative", "StressCumulative",
"StressQoQ"), row.names = c(NA, -3L), class = c("tbl_df", "tbl",
"data.frame"));
df2 <- structure(list(region = c(2, 2, 2, 2, 2, 2), path = c(1, 1, 1,
1, 1, 1), date = c(20140215, 20140515, 20140815, 20141115, 20150215,
20150515), index_value = c(1, 1.014162265, 1.01964828, 1.009372314,
1.007210703, 1.018695493), index = 0:5, counter = 1:6, BaseQoQ = c(NA,
0.014162265, 0.00540940556489744, -0.0100779515854232, -0.0021415398163972,
0.0114025694582001), BaseCumulative = c(100, 101.4162265, 101.964828,
100.9372314, 100.7210703, 101.8695493), StressCumulative = c(110,
111.4162265, 111.964828, 110.9372314, 110.7210703, 101.8695493
), StressQoQ = c(NA, 0.0128747863636363, 0.00492389230216839,
-0.00917785181610786, -0.00194849914020834, -0.0799443229370588
)), .Names = c("region", "path", "date", "index_value", "index",
"counter", "BaseQoQ", "BaseCumulative", "StressCumulative", "StressQoQ"
), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
df <- rbind.data.frame(df1, df2);

Adding parameters to a ggplot produced plot in a function

Let's say I have a saved plot named my_plot, produced with ggplot. Also, let's say that the column in my_plot[[1]] data frame used for horizontal axis is named my_dates
Now, I want to add some vertical lines to the plot, which, of course, can be done by something like that:
my_plot +
geom_vline(aes(xintercept = my_dates[c(3, 8)]))
Since I perform this task quite on a regular basis, I want to write a function for that -- something like that:
ggplot.add_lines <- function(given_plot, given_points) {
finale <- given_plot +
geom_vline(aes(xintercept = given_plot[[1]]$my_dates[given_points]))
return(finale)
}
Which, as it's probably obvious to everyone, doesn't work:
> ggplot.add_lines(my_plot, c(3, 5))
Error in eval(expr, envir, enclos) : object 'given_plot' not found
So, my question would be what am I doing wrong, and how can it be fixed? Below is some data for a reproducible example:
> dput(my_plot)
structure(list(data = structure(list(my_dates = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10), my_points = c(-2.20176409422924, -1.12872396340683,
-0.259703895194354, 0.634233385649338, -0.678983982973015, -1.83157126614836,
1.33360095418957, -0.120455389285709, -0.969431974863616, -1.20451262626184
)), .Names = c("my_dates", "my_points"), row.names = c(NA, -10L
), class = "data.frame"), layers = list(<environment>), scales = <S4 object of class structure("Scales", package = "ggplot2")>,
mapping = structure(list(x = my_dates, y = my_points), .Names = c("x",
"y"), class = "uneval"), theme = list(), coordinates = structure(list(
limits = structure(list(x = NULL, y = NULL), .Names = c("x",
"y"))), .Names = "limits", class = c("cartesian", "coord"
)), facet = structure(list(shrink = TRUE), .Names = "shrink", class = c("null",
"facet")), plot_env = <environment>, labels = structure(list(
x = "my_dates", y = "my_points"), .Names = c("x", "y"
))), .Names = c("data", "layers", "scales", "mapping", "theme",
"coordinates", "facet", "plot_env", "labels"), class = c("gg",
"ggplot"))
According to this post, below is my solution to this problem. The environment issue in the **ply and ggplot is annoying.
ggplot.add_lines <- function(given_plot, given_points) {
finale <- eval(substitute( expr = {given_plot +
geom_vline(aes(xintercept = my_dates[given_points]))}, env = list(given_points = given_points)))
return(finale)
}
The following code runs well on my machine. (I cannot make your reproducible work on my machine...)
df <- data.frame(my_dates = 1:10, val = 1:10)
my_plot <- ggplot(df, aes(x = my_dates, y = val)) + geom_line()
my_plot <- ggplot.add_lines(my_plot, c(3, 5))
print(my_plot)
Update: The above solution fails when more than two points are used.
It seems that we can easily solve this problem by not including the aes (subsetting together with aescauses problems):
ggplot.add_lines <- function(given_plot, given_points) {
finale <- given_plot + geom_vline(xintercept = given_plot[[1]]$my_dates[given_points])
return(finale)
}
I would take the following approach: extract the data.frame of interest, and pass it to the new layer,
df <- data.frame(my_dates = 1:10, val = rnorm(10))
my_plot <- ggplot(df, aes(x = my_dates, y = val)) + geom_line()
add_lines <- function(p, given_points=c(3,5), ...){
d <- p[["data"]][given_points,]
p + geom_vline(data = d, aes_string(xintercept="my_dates"), ...)
}
add_lines(my_plot, c(3,5), lty=2)

Resources