I have a time series in which stem densities were measured both early and late in the growing season. I would like to present trends in stem densities over time, but including tick mark labels such as 2005 Early, 2005 Late, 2006 Early, 2006 Late, and so on is too cluttered for presentation. Is it possible to group the tick marks from each year, such that each mark indicates only either "Early" or "Late", with the year indicated below each pair?
Here is what I have come up with so far, but I know beforehand that it won't give me exactly what I have in mind, but worse yet, it doesn't work. It results in a plot without any x-axis tick labels whatsoever:
dt<-ggplot(nzsum, aes(x = Date, y = Average.Stem.Density, colour = Species)) + geom_line(aes(group = Species)) +
geom_point() + geom_errorbar(aes(ymin=Average.Stem.Density-se, ymax=Average.Stem.Density+se), width = 0.25) + facet_grid(Heat~Treatment)
dt<-dt+ scale_x_discrete(breaks = 1:18, labels = c("2005 Early", "Late", "2006 Early", "Late", "2007 Early","Late","2008 Early", "Late",
"2009 Early","Late", "2010 Early","Late","2011 Early","Late","2012 Early","Late", "2013 Early","Late"))
dt + theme(axis.text.x = element_text(angle=90, vjust=0.5, size=10)) + ylab('Stem Density')
To possibly further complicate things, I have missing data for some of the measurement dates, but would like to have either the gaps remain in the plot, or to have a vertical line separating the periods of consistent measurements.
Any suggestions are appreciated!
output of dput, as requested:
> dput(nzsum)
structure(list(Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L), .Label = c("2005 Early",
"2005 Late", "2006 Early", "2006 Late", "2007 Early", "2007 Late",
"2008 Early", "2008 Late", "2009 Early", "2009 Late", "2010 Early",
"2010 Late", "2011 Early", "2011 Late", "2012 Early", "2012 Late",
"2013 Early", "2013 Late"), class = "factor"), Treatment = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("Ambient Precipitation",
"Increased Precipitation Variability"), class = "factor"), Heat = structure(c(1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Ambient Temperature",
"Increased Temperature"), class = "factor"), Species = structure(c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("A. gerardii",
"S. nutans"), class = "factor"), N = c(6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6), Average.Stem.Density = c(59.75,
52.4166666666667, 59.4166666666667, 38.8333333333333, 47.1666666666667,
44.6666666666667, 34.6666666666667, 47.1666666666667, 45.5833333333333,
51, 46.8333333333333, 40.8333333333333, 36.0833333333333, 43.5,
30.9166666666667, 53.1666666666667, 59.9166666666667, 47.8333333333333,
54.8333333333333, 40.1666666666667, 45.0833333333333, 38.5833333333333,
38.6666666666667, 38.3333333333333, 41.5, 57.3333333333333, 41.4166666666667,
50.25, 32.8333333333333, 41.5833333333333, 29.5, 41.25, 57.75,
34, 49.4166666666667, 45.1666666666667, 36.5833333333333, 22.6666666666667,
33.0833333333333, 30.1666666666667, 37.8333333333333, 43.9166666666667,
34.3333333333333, 44.4166666666667, 30.6666666666667, 31.1666666666667,
28.0833333333333, 41.0833333333333, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 52.8333333333333, 43.5, 39,
38.4166666666667, 40.5833333333333, 28.8333333333333, 33.3333333333333,
26.6666666666667, 34.5833333333333, 53.3333333333333, 24.6666666666667,
38.8333333333333, 26.9166666666667, 35.3333333333333, 22.1666666666667,
16.3333333333333, 51.0833333333333, 58.25, 34.8333333333333,
46.5833333333333, 47.5, 40, 35.4166666666667, 29.0833333333333,
24.5, 62, 20.1666666666667, 40.6666666666667, 22.9166666666667,
50, 20.25, 23.25, 32, 45.1666666666667, 30, 36.5, 31.6666666666667,
36.9166666666667, 21.6666666666667, 18.4166666666667, NA, NA,
NA, NA, NA, NA, NA, NA, 27.9166666666667, 29.75, 24.8333333333333,
20.0833333333333, 19, 21.8333333333333, 15.5833333333333, 8.16666666666667,
25, 42.25, 20.5833333333333, 29.25, 19.8333333333333, 23.1666666666667,
19.4166666666667, 13.8333333333333, 46.6666666666667, 35, 36.4166666666667,
29.4166666666667, 36, 22.6666666666667, 29.0833333333333, 18.5833333333333,
33.3333333333333, 31.25, 25.8333333333333, 28.3333333333333,
23.1666666666667, 14.0833333333333, 18.4166666666667, 15.3333333333333
), sd = c(32.2020962050609, 15.1407287363147, 25.7884017858158,
14.1833235409289, 13.7501515143167, 22.0333081189972, 11.0574258607809,
23.6107320231006, 25.6542718989775, 13.2815661727072, 15.4099534933324,
11.2590704175197, 8.15730756234351, 15.52417469626, 11.8423674434915,
20.9300422041301, 36.3612660212302, 14.7738507731284, 16.7022952514517,
14.9788740119766, 7.61194237147567, 13.6872812006865, 12.9794709702155,
16.7022952514517, 20.7797978815964, 13.47095641247, 10.4566565720916,
18.5654248537436, 7.85281265959316, 14.756072196444, 9.46572765295939,
20.3168649156311, 37.9983552275622, 15.3068612066615, 18.5240834231189,
16.9813623324711, 8.59893404246519, 9.99833319442129, 9.61985793380893,
16.0831174424198, 24.0409373084052, 17.8841177212259, 10.5340717040785,
13.9191115616862, 8.09732466106347, 12.5445871461227, 9.43618920256831,
21.0081333456037, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 32.7744819435284, 25.4872517153184, 6.09918027279076,
26.1962147392837, 13.5033946349304, 13.6479546697176, 10.1176413588675,
19.7855165883195, 13.9298839430437, 28.6856526275186, 6.8239773348588,
23.1984194863932, 12.654709268358, 18.4842275106824, 4.79235502302017,
13.1288486420808, 25.7864628568299, 31.8680874857592, 6.17791766428355,
26.5582692709195, 19.2041662146525, 19.0446842977247, 18.5941298980798,
18.0205900754295, 11.6790410565251, 28.8946361804401, 4.34357763446985,
23.2350310235787, 8.88491230495083, 25.7740179250345, 9.23985930628816,
11.5617905187735, 20.2410474037289, 22.2994768249541, 9.18694726228468,
17.0117606378646, 17.229819112999, 20.4093524313406, 17.1571170849495,
11.9558214551183, NA, NA, NA, NA, NA, NA, NA, NA, 15.0180447018467,
17.2880016196205, 11.8939760663399, 10.3605823517149, 9.92975326984513,
18.1457065628943, 9.86618805145466, 7.92254167970524, 17.5499287747842,
14.7300712829232, 8.01508993503296, 15.5298100439123, 12.3193614553136,
15.25013661141, 12.7766062264854, 10.7780641428165, 23.0079696337305,
13.9319776054945, 10.9060380829459, 15.5609018590397, 21.0214176496258,
14.0558410159857, 23.8189350447636, 15.6314320094695, 16.2777967387072,
16.3240619944914, 6.17791766428355, 14.4798710859823, 14.770466027403,
12.076492316342, 13.8759384067048, 12.6912043032435), se = c(13.146450725069,
6.18117662297757, 10.5280709428545, 5.79031758868007, 5.6134758493389,
8.99506037284415, 4.51417520459472, 9.63904098503119, 10.4733126458527,
5.42217668469038, 6.29108716978058, 4.59649625016466, 3.33020686711468,
6.33771778061052, 4.8346262638503, 8.54465394917261, 14.844424692269,
6.03139932169789, 6.81868348322786, 6.11509970846302, 3.10756246026439,
5.58780915127844, 5.29884683471581, 6.81868348322786, 8.48331696134635,
5.49949492630409, 4.26891216952412, 7.57930295827613, 3.20589734361189,
6.02414124815959, 3.86436713231718, 8.29432536939161, 15.5127635620908,
6.2489999199872, 7.56242538989826, 6.93261214197874, 3.51050012264793,
4.08180243411059, 3.92729055598273, 6.5659052011974, 9.81467155730531,
7.30116048614496, 4.30051676481379, 5.68245349983419, 3.30571895021004,
5.12130625697954, 3.85230811044207, 8.57653452417959, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 13.3801262242842,
10.4051269414009, 2.48997991959775, 10.6945598839368, 5.51273777516923,
5.57175416224051, 4.13050978828414, 8.07740332312584, 5.68685130610761,
11.7108686460247, 2.78587708112026, 9.47071509678358, 5.16626342512437,
7.54615428178118, 1.95647074544389, 5.35983001388829, 10.5272793784107,
13.010092236414, 2.52212432507026, 10.8423680275318, 7.84006802691575,
7.77495980697007, 7.5910217435541, 7.35687509144413, 4.76794854558366,
11.79618582424, 1.77325814376939, 9.48566169425084, 3.62725025941752,
10.5221987562803, 3.77215676591876, 4.72008121399057, 8.26337299977033,
9.10372329202606, 3.75055551440939, 6.94502219818867, 7.03404419788345,
8.33208323956927, 7.00436371921898, 4.88094367014321, NA, NA,
NA, NA, NA, NA, NA, NA, 6.1310910756388, 7.05779710674655, 4.85569539590137,
4.22969003329769, 4.05380479714223, 7.40795368356051, 4.02785440540136,
3.23436409686836, 7.16472842006823, 6.01352641966426, 3.27214676389132,
6.34001840165574, 5.02935825373819, 6.22584220094849, 5.21602764989263,
4.4001262608147, 9.39296427001497, 5.68770604022395, 4.45237140309646,
6.35271158202893, 8.58195781858662, 5.73827306580802, 9.72403951269453,
6.38150539536802, 6.64538269109546, 6.66427040267725, 2.52212432507026,
5.9113826169894, 6.03001750504185, 4.93020734294659, 5.66482813311911,
5.18116246072679), ci = c(33.7940274369502, 15.889220349886,
27.0632679299923, 14.8844852160264, 14.4298990530937, 23.1225388048852,
11.604056783811, 24.7779436690751, 26.922507246369, 13.9381488952758,
16.1717544050426, 11.8156697682437, 8.56056928151632, 16.2916222062267,
12.4278024559438, 21.9647322335412, 38.1588084744178, 15.5042055398258,
17.5279839049389, 15.7193642336799, 7.98824361346093, 14.3639207052787,
13.6211194231394, 17.5279839049389, 21.8070604867824, 14.1369017627314,
10.9735880809055, 19.4832185113292, 8.24102147840344, 15.4855480678265,
9.93367195656454, 21.3212421334155, 39.8768282332316, 16.0635656852119,
19.4398333406273, 17.8208468456823, 9.02402784927781, 10.4926071937808,
10.0954217664752, 16.878196644708, 25.2294164279454, 18.7682305247498,
11.0548302794798, 14.6072117485218, 8.49762108712868, 13.1647368389222,
9.90267325397684, 22.0466838605638, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 34.3947094306661, 26.7472303130552,
6.40069715241714, 27.4912413777728, 14.1709435894762, 14.3226500420865,
10.6178134336812, 20.7636262615357, 14.6185166694449, 30.1037462209939,
7.16132502104324, 24.3452481982785, 13.280302918737, 19.3980071256559,
5.02926816019191, 13.7778816757996, 27.0612331488113, 33.4435067828789,
6.48332697724211, 27.8711943068573, 20.1535364601427, 19.9861704525997,
19.5133426079005, 18.9114494771115, 12.2564019245259, 30.3230610095818,
4.55830517426775, 24.3836696502324, 9.32414363016577, 27.0481729938491,
9.69663766364338, 12.1333550314124, 21.2416765342972, 23.4018657311414,
9.64110987888638, 17.8527479107548, 18.0815862461422, 21.4183018286465,
18.0052901468143, 12.5468651392342, NA, NA, NA, NA, NA, NA, NA,
NA, 15.760471351669, 18.1426450422092, 12.4819623840869, 10.872764369967,
10.4206369767492, 19.0427511781958, 10.3539293711124, 8.31419759724407,
18.4175207338946, 15.458261782508, 8.41132103479517, 16.2975361408962,
12.928376971967, 16.0040368732962, 13.4082259309908, 11.3108846405566,
24.1453833352814, 14.6207138334386, 11.4451850543063, 16.3301649998,
22.0606248826558, 14.7507005108872, 24.9964393403423, 16.4041818533482,
17.0825000365819, 17.1310524448908, 6.48332697724211, 15.1956927787292,
15.5006534670296, 12.6735014416993, 14.5619043009976, 13.318602109025
)), .Names = c("Date", "Treatment", "Heat", "Species", "N", "Average.Stem.Density",
"sd", "se", "ci"), row.names = c(NA, -144L), class = "data.frame")
You can customize your tick marks and labels as much as you like. For example:
# Set tick marks on y axis
# a tick mark is shown on every 5
p + scale_y_continuous(breaks=seq(0,40,5))
# Tick marks can be spaced randomly
p + scale_y_continuous(breaks=c(5,7.5, 20, 25))
# Change x and y axis labels, and limits
sp + scale_x_continuous(name="Speed of cars", limits=c(0, 30)) +
scale_y_continuous(name="Stopping distance", limits=c(0, 150))
You can find full details and lots of examples here.
To address your 2nd question I suggest gap.plot from plotrix.
twogrp<-c(rnorm(5)+4,rnorm(5)+20,rnorm(5)+5,rnorm(5)+22)
gpcol<-c(2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5)
gap.plot(twogrp,gap=c(8,16),xlab="Index",ylab="Group values",
main="Gap on Y axis",col=gpcol)
gap.plot(twogrp,rnorm(20),gap=c(8,16),gap.axis="x",xlab="X values",
xtics=c(4,7,17,20),ylab="Y values",main="Gap on X axis with added lines")
gap.plot(c(seq(3.5,7.5,by=0.5),seq(16.5,22.5,by=0.5)),
rnorm(22),gap=c(8,16),gap.axis="x",type="l",add=TRUE,col=2,)
gap.plot(twogrp,gap=c(8,16,25,35),
xlab="X values",ylab="Y values",xlim=c(1,30),ylim=c(0,42),
main="Test two gap plot with the lot",xtics=seq(0,30,by=5),
ytics=c(4,6,18,20,22,38,40,42),
lty=c(rep(1,10),rep(2,10)),
pch=c(rep(2,10),rep(3,10)),
col=c(rep(2,10),rep(3,10)),
type="b")
gap.plot(21:30,rnorm(10)+40,gap=c(8,16,25,35),add=TRUE,
lty=rep(3,10),col=rep(4,10),type="l")
You could also combine axis.break with manual subsetting and re-scaling of the data. By re-scaling I mean that after omitting any range of data you don't want, subtract the size of the range from the observations with greater values, then add the axis break zigzag and change the tick mark labels to reflect where the data was before rescaling.
Related
I am running a logistic regression model using complex survey data using the survey package in R. After fitting the model, I performed regression diagnostics using the car package. I noticed outlying and influential observations that I would like to remove and then refit the model to check for their effects on the regression coefficients but my current approach is not giving me want I expect.
My dataset has about 10,000 observations. Here is sample data and code I have tried using:
library(car); library(survey)
dat <- structure(list(id = c(1009918, 1012826, 1029625, 1000926, 1027525,
1000115, 1000201, 1000202, 1000214, 1000219, 1000313, 1000324,
1000510, 1000521, 1000624, 1000708, 1000811, 1000817, 1000818,
1000906, 1000922, 1001002, 1001005, 1001401, 1001411, 1001413,
1001420, 1001424, 1001501, 1001510, 1001518, 1001526, 1001621,
1001807, 1001922, 1001926, 1002106, 1002217, 1002406, 1002416,
1002618, 1002709, 1003004, 1003017, 1003103, 1003108, 1003304,
1003319, 1003723, 1003804, 1003811, 1003819, 1004014, 1008902,
1008913, 1009011, 1009022, 1009123, 1009212, 1009215), strata = c(1,
2, 6, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), cluster = c(785,
938, 2337, 28, 2122, 3, 6, 6, 6, 6, 10, 10, 16, 16, 19, 22, 24,
24, 24, 28, 28, 33, 33, 45, 45, 45, 45, 45, 50, 50, 50, 50, 53,
60, 63, 63, 69, 74, 96, 96, 100, 102, 111, 111, 115, 115, 122,
122, 178, 193, 193, 193, 210, 755, 755, 759, 759, 762, 765, 765
), weights = c(621.921704979739, 5440.9107594311, 8450.49341643626,
2457.37241774248, 7174.79930450487, 930.492019594546, 443.253676607562,
443.253676607562, 886.507353215123, 443.253676607562, 1552.30979801343,
517.436599337811, 403.146111343943, 806.292222687886, 439.775494378883,
839.561001668328, 1210.77101540146, 403.590338467152, 403.590338467152,
457.23211170669, 914.464223413381, 584.557580338056, 584.557580338056,
233.135312658304, 233.135312658304, 233.135312658304, 466.270625316608,
233.135312658304, 287.94933168791, 287.94933168791, 287.94933168791,
287.94933168791, 2354.32022397843, 213.628591090648, 300.596873749779,
300.596873749779, 1121.27419052962, 528.482361549292, 1936.60489456861,
1291.06992971241, 282.360930726457, 3526.73915258957, 337.531162185852,
337.531162185852, 2183.63202546241, 2729.54003182802, 1035.32340123929,
1552.98510185893, 1400.62601417017, 717.92144006312, 358.96072003156,
1435.84288012624, 275.058410167952, 557.874242565598, 278.937121282799,
1687.48015279064, 1012.48809167438, 424.663883556537, 227.805527040477,
227.805527040477), age = c(20, 19, 93, 24, 18, 23, 22, 23, 24,
19, 18, 24, 20, 19, 18, 17, 19, 23, 19, 19, 21, 22, 21, 20, 23,
24, 24, 19, 21, 22, 20, 23, 21, 23, 20, 22, 23, 15, 20, 23, 24,
18, 24, 24, 15, 21, 24, 16, 22, 20, 20, 18, 21, 20, 21, 21, 24,
22, 24, 18), gender = structure(c(1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), levels = c("Male", "Female"), class = "factor"),
educ = structure(c(4L, 2L, 1L, 3L, 2L, 3L, 2L, 2L, 2L, 2L,
3L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 4L, 2L, 3L, 2L,
4L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L,
4L, 4L, 4L, 3L, 3L), levels = c("No formal education", "Primary",
"Secondary", "Tertiary"), class = "factor"), employ = structure(c(4L,
3L, 4L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 4L, 3L, 2L, 2L, 2L, 4L,
3L, 3L, 3L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 4L,
4L, 4L, 2L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 4L, 3L, 2L, 4L, 4L,
2L, 3L, 1L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 4L, 3L, 4L), levels = c("Unemployed",
"Employed", "Self-employed", "Other"), class = "factor"),
know = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), levels = c("No/Don't know", "Yes"), class = "factor"),
status = structure(c(2L, 4L, 2L, 1L, 5L, 3L, 2L, 2L, 2L,
4L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 5L, 4L, 2L, 5L, 5L, 4L, 3L,
2L, 3L, 2L, 2L, 2L, 2L, 4L, 3L, 3L, 4L, 3L, 2L, 2L, 3L, 1L,
2L, 1L, 1L, 2L, 2L, 2L), levels = c("1", "2", "3", "4", "5"
), class = "factor"), smoker = structure(c(2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("no",
"yes"), class = "factor")), row.names = c(81L, 4174L, 6722L,
1255L, 2712L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L,
52L, 53L, 54L, 55L), class = "data.frame")
## Create survey design object
dat_svy <- survey::svydesign(ids = ~cluster, strat = ~strata, weights = ~weights, data = dat, nest = TRUE)
options(survey.lonely.psu = "adjust")
## fit logistic regression model
mod <- survey::svyglm(formula = smoker ~ age + educ + gender + employ + educ + know + status, design = dat_svy, family = "quasibinomial")
I have tried the following:
update(mod, subset = !(rownames(dat_svy) %in% c(2, 5, 9, 13, 21))) # returns an error
update(mod, subset = -c(2, 5, 9, 13, 21)) # only removes one (first specified) observation
I am not sure why I am still receiving this message when running a base model with all variables in my dataset:
My data, with anonymized variables:
set.seed(1234)
#dput(df)
structure(list(outcome_1= structure(c(2L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
1L, 2L, 1L, NA, 2L, 1L), .Label = c("0", "1"), class = "factor"),
outcome_2= structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, NA, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, NA, 2L, 1L), .Label = c("0", "1"), class = "factor"),
outcome_3= structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, NA, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, NA, 1L, 1L), .Label = c("0", "1"), class = "factor"),
bl_ep = c(16, 92, 10, 40, 19, 1, 16, 10, 22, 28, 8, 11, 6,
47, 12, 1, 9, 20, 2, 14, 72, 28, 5, 16, 61, 12, 24, 22, 44,
44, 16, 36, 62, 10, 16, 10, 89, 22, 5, 38, 8, 11), bl_days = c(12,
28, 10, 25, 19, 1, 10, 9, 13, 28, 4, 11, 6, 20, 12, 1, 8,
16, 2, 12, 27, 28, 5, 13, 24, 10, 18, 18, 16, 16, 10, 28,
22, 5, 15, 8, 28, 15, 5, 22, 7, 11), score_1 = c(11,
19, 17, 17, 12, 14, 8, 12, 14, 15, 14, 13, 12, 14, 15, 5,
11, 14, 14, 13, 16, 11, 11, 14, 20, 14, 12, 11, 17, 15, 14,
18, 15, 14, 12, 10, 17, 16, 11, 13, 18, 17), score_2 = c(1.1,
1.6, 1.6, 2.8, 1.9, 3.3, 4, 3.8, 1.8, 1.4, 2, 3.55, 1.6,
1.8, 2.4, 3.7, 1.4, 2.9, 3.55, 2.5, 1.6, 3.2, 3.5, 2.4, 3.1,
2.3, 3.8, 3.9, 1.1, 1.7, 2.3, 1.5, 1.9, 3.3, 3, 2.9, 1.6,
3.1, 3.7, 2.8, 1.2, 1.9), score_3 = c(1,
1.22222222222222, 1.11111111111111, 1.88888888888889, 1.44444444444444,
1.44444444444444, 3.22222222222222, 2.77777777777778, 1.11111111111111,
1, 1, 2.83333333333333, 1.22222222222222, 1.875, 1.55555555555556,
2.66666666666667, 1, 2.25, 1.72222222222222, 2.05555555555556,
1.22222222222222, 2, 2, 1.77777777777778, 1.33333333333333,
1.11111111111111, 2.5, 2.55555555555556, 1, 1.22222222222222,
1.77777777777778, 1.22222222222222, 2.44444444444444, 1.55555555555556,
1.77777777777778, 1.66666666666667, 1.11111111111111, 2.33333333333333,
2.88888888888889, 1.55555555555556, 1, 1.25), score_4 = c(1.31428571428571,
1.37142857142857, 1.08571428571429, 1.83809523809524, 1.37142857142857,
1.8952380952381, 4, 3.88571428571429, 3.02857142857143, 2.12222222222222,
1.43333333333333, 3.39047619047619, 1.74285714285714, 1.67619047619048,
2.02857142857143, 3.48571428571429, 1.24761904761905, 3.73333333333333,
3.08571428571429, 2.56666666666667, 1.74285714285714, 2.6952380952381,
3.45714285714286, 2.27619047619048, 1.9047619047619, 2.62857142857143,
3.74285714285714, 3.74285714285714, 1.24761904761905, 1.39047619047619,
1.83809523809524, 2.74285714285714, 4, 1.77142857142857,
3.42857142857143, 3.2, 1.65714285714286, 2.55238095238095,
2.38095238095238, 2.40952380952381, 2.07619047619048, 2.56666666666667
), score_5 = c(1, 1, 1, 1, 1.33333333333333,
1, 3.33333333333333, 3.66666666666667, 1.66666666666667,
1.66666666666667, 2, 2.5, 1.66666666666667, 1, 1.33333333333333,
3, 1, 1.66666666666667, 2.16666666666667, 2.16666666666667,
1.33333333333333, 2.66666666666667, 3, 2.66666666666667,
1.33333333333333, 2.66666666666667, 3, 1.33333333333333,
1, 1, 1, 1, 1, 1.33333333333333, 3, 3.66666666666667, 1.66666666666667,
1.33333333333333, 2.33333333333333, 1.66666666666667, 2,
2), sex = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("F", "M"), class = "factor"), age = c(64,
66, 51, 69, 60, 65, 65, 69, 50, 78, 75, 78, 35, 77, 69, 48,
65, 72, 60, 64, 78, 71, 58, 55, 55, 57, 81, 76, 56, 71, 56,
73, 69, 51, 43, 77, 31, 64, 69, 63, 38, 71), childbirth = structure(c(2L,
2L, 2L, 1L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 2L, NA, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, NA, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L), .Label = c("N",
"Y"), class = "factor"), x1= c(3, 2, 2, NA,
3, 2, 3, NA, 3, 3, 2, 2, NA, 2, 5, 2, 2, 2, 4, 3, 2, 2, 3,
NA, 2, 3, NA, NA, 2, 2, 2, 2, 2, 2, 3, 2, 1, NA, 2, 2, 1,
3), x2= c(0, 0, 0, NA, 1, 0, 0, NA, 0, 0,
0, 0, NA, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, NA, NA,
0, 0, 0, 0, 0, 0, 0, 0, 1, NA, 0, 0, 0, 0), x3= structure(c(4L,
1L, 1L, 2L, 1L, 1L, 1L, NA, 4L, 1L, 1L, 4L, NA, 4L, 1L, 4L,
4L, 4L, 4L, 3L, 1L, 1L, 1L, 2L, 4L, 1L, NA, 2L, 1L, 4L, 1L,
1L, 4L, 4L, 1L, 4L, 4L, 2L, 4L, 4L, 4L, 1L), .Label = c("N",
"NA", "UNK", "Y"), class = "factor"), x4= structure(c(4L,
1L, 1L, 2L, 1L, 1L, 1L, NA, 1L, 1L, 4L, 1L, NA, 1L, 1L, 4L,
3L, 1L, 4L, 4L, 1L, 4L, 4L, 2L, 1L, 4L, NA, 2L, 4L, 1L, 4L,
1L, 1L, 4L, 4L, 1L, 4L, 2L, 4L, 1L, 4L, 4L), .Label = c("N",
"NA", "UNK", "Y"), class = "factor"), x5= structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 2L, NA, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L), .Label = c("N",
"Y"), class = "factor"), x6= structure(c(2L, 2L, 2L, 1L,
1L, 2L, 2L, NA, 1L, 1L, 1L, 2L, NA, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, 1L, 2L, 2L, NA, 2L, 2L, 2L, 1L, 2L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L), .Label = c("N", "Y"), class = "factor"),
x7= structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, NA, 1L, 1L, 1L, 1L, NA, 1L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 3L, 1L, NA, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 1L, 1L, 1L, 1L, 2L, 3L), .Label = c("N", "NA", "Y"), class = "factor"),
x8= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, 1L,
2L, 2L, 2L, NA, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, NA, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
2L, 2L, 2L), .Label = c("N", "Y"), class = "factor"), x9= structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("N",
"Y"), class = "factor"), x10= structure(c(1L, 2L, 2L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x11= structure(c(1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x12= structure(c(1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x13= structure(c(2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x14= c(41, 7, 8, 9, 7, 2, 1, 5, 9, 6, 6, 8,
14, 2, 4, NA, 11, 9, 31, 13, 8, 2, 11, 20, 8, 7, 6, 8, 2,
12, 32, 1, 2, 38, 10, 17, 5, 28, 31, 10, 3, 6), x15= structure(c(3L,
4L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 5L, 1L, 3L, 3L,
3L, 3L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 1L, 2L, 2L, 3L, 3L, 3L,
2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L), .Label = c("IATRO",
"IDIO", "OBST", "OBST/IDIO", "TRAUM"), class = "factor"),
x16= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x17= structure(c(2L, 2L, 1L, 2L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x18= c(31.8, 20, 30.9, 23.3, 22.5, 23.1, 23.6, 25.9, 22.8,
25.2, 30.2, 23.4, 22.2, 29, 24.8, 32.7, 20.8, 28.5, 24.6,
23, 23.4, 21.1, 24.9, 18, 21.7, 27.6, 27, 29, 32.9, 26, 29.3,
27.1, 22.7, 19.7, 25, 22.3, 21.3, 17.5, 20.9, 20.1, 25.1,
22.1), x19= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
x20 = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 2L), .Label = c("NO", "YES"), class = "factor"),
x21= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L,
1L, 1L, 1L, 1L, 1L), .Label = c("NO", "YES"), class = "factor")), row.names = c(NA,
-42L), class = c("tbl_df", "tbl", "data.frame"))
logit1 <-glm(outcome_1~., data = df, family = "binomial")
Which yielded the classic error message for a logit model:
#Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) :
# contrasts can be applied only to factors with 2 or more levels
Ok, so I went to double check that all factor variables indeed have more than 1 unique value, and can verify:
sapply(lapply(df, unique), length)
returned all variables showing 2 or more unique values. Still same error message when I ran the model again.
I even attempted to run one solution I found online:
values_count <- sapply(lapply(df, unique), length)
logit1 <-
lm(outcome_1~ ., df[ , values_count > 1])
What's going on? Am I blind in seeing some variable that is secretly saying it has more than one unique value and does not?
Thank you!
The regression works on the supplied data for simple models, such as
logit1 <-glm(outcome_1~ sex + age, data = df, family = "binomial")
It's a small data set with lots of variables, the computer is not going to be able to pull out the meaningful relationships even if they are there. Start with some exploratory data plots, and think about how the (biological) relationship between your outcomes and other variables in order to come up with hypotheses you can test with you data. Realistically, which measurements do you think actually affect patient outcomes?
I using PCA function of R to study the principal components analysis.
This is to make the question reproducible:
> dput(DATA_FINAL[1:50,])
structure(list(DataCRMSanoflore.Year_Sales = c(2, 1, 2, 1, 2,
1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,
1, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1,
1, 1, 1), DataCRMSanoflore.Month_Sales = c(9, 9, 2, 5, 9, 4,
7, 9, 3, 9, 7, 12, 3, 11, 3, 12, 3, 3, 6, 3, 4, 7, 5, 3, 5, 8,
8, 1, 9, 5, 4, 1, 10, 9, 5, 4, 9, 3, 2, 12, 9, 4, 4, 3, 6, 8,
6, 4, 4, 12), DataCRMSanoflore.Date_Sales = c(13, 3, 10, 22,
23, 26, 13, 1, 12, 2, 25, 11, 10, 26, 9, 4, 10, 18, 9, 9, 1,
7, 30, 9, 14, 24, 4, 2, 10, 17, 2, 28, 22, 17, 4, 14, 22, 30,
2, 5, 29, 13, 2, 10, 25, 5, 10, 23, 1, 6), DataCRMSanoflore.HOURS_INSCR = c(17,
14, 18, 17, 16, 11, 22, 14, 23, 17, 9, 21, 18, 16, 19, 12, 11,
17, 16, 21, 20, 11, 16, 18, 14, 19, 22, 17, 14, 10, 22, 15, 13,
19, 13, 21, 16, 19, 23, 19, 11, 21, 11, 22, 20, 13, 11, 15, 17,
15), DataCRMSanoflore.Year_Creation_Sales = c(2, 1, 2, 1, 2,
1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,
2, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1,
1, 1, 1), DataCRMSanoflore.Month_Creation_Sales = c(9, 9, 2,
10, 10, 9, 7, 9, 12, 9, 7, 12, 3, 11, 4, 2, 6, 3, 6, 10, 4, 7,
6, 3, 5, 8, 3, 1, 9, 7, 4, 11, 11, 9, 5, 4, 9, 3, 2, 12, 10,
4, 4, 3, 10, 8, 6, 4, 4, 12), DataCRMSanoflore.Day_Creation_Sales = c(13,
11, 15, 2, 31, 26, 23, 1, 5, 2, 25, 16, 10, 27, 13, 7, 3, 18,
9, 8, 27, 7, 8, 18, 18, 24, 6, 2, 26, 4, 4, 24, 16, 17, 12, 15,
22, 30, 10, 5, 1, 14, 2, 10, 5, 5, 10, 27, 25, 6), DataCRMSanoflore.Year_Validation_Sales = c(2,
1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
2, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1,
1, 1, 1, 1, 1, 1, 1), DataCRMSanoflore.Month_Validation_Sales = c(9,
9, 2, 10, 11, 10, 7, 9, 12, 9, 7, 12, 3, 12, 4, 2, 6, 3, 6, 10,
4, 7, 6, 3, 5, 8, 3, 1, 10, 7, 4, 11, 11, 9, 5, 4, 9, 4, 2, 12,
10, 4, 4, 3, 10, 8, 6, 4, 4, 12), DataCRMSanoflore.Day_Validation_Sales = c(15,
14, 16, 3, 3, 1, 27, 2, 6, 5, 27, 21, 19, 1, 27, 8, 5, 21, 10,
9, 30, 9, 9, 21, 26, 27, 7, 4, 1, 6, 15, 25, 17, 18, 13, 20,
29, 1, 11, 7, 2, 16, 3, 20, 6, 6, 13, 29, 29, 8), DataCRMSanoflore.AGE_CUSTUMER = c(33,
37, 24, 34, 32, 46, 52, 60, 44, 55, 37, 29, 34, 30, 30, 31, 37,
57, 48, 44, 42, 28, 34, 43, 45, 33, 37, 53, 43, 35, 55, 62, 60,
57, 33, 51, 32, 51, 35, 54, 42, 47, 59, 33, 45, 35, 36, 54, 28,
42), DataCRMSanoflore.MEAN_PURCHASE = c(0, 71.75, 50.7142857142857,
18.6666666666667, 0, 0, 54.7, 22, 0.666666666666667, 38, 6.5,
0, 83.3333333333333, 0.333333333333333, 44.3333333333333, 25.7777777777778,
24.1818181818182, 23.3846153846154, 35.5294117647059, 21.6363636363636,
1.125, 40.6428571428571, 0, 46.8461538461538, 6, 8.66666666666667,
18.4, 16.9285714285714, 15.0666666666667, 110.25, 0, 8.85714285714286,
0, 36.5, 21.5, 18.5714285714286, 28.125, 8.38888888888889, 101.333333333333,
0, 2, 0, 20.9166666666667, 69.1428571428571, 16.6666666666667,
1.5, 87.1666666666667, 0, 48.25, 13.3333333333333), DataCRMSanoflore.NUMBER_GIFTS = c(1,
1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 3, 4, 3, 4, 2, 2,
1, 2, 1, 1, 1, 2, 4, 1, 1, 1, 1, 3, 1, 3, 2, 4, 1, 1, 1, 1, 2,
2, 1, 1, 1, 1, 2, 3), DataCRMSanoflore.Year_Sales = c(2L, 1L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), DataCRMSanoflore.Month_Sales = c(9L, 9L, 2L, 5L, 9L, 4L, 7L,
9L, 3L, 9L, 7L, 12L, 3L, 11L, 3L, 12L, 3L, 3L, 6L, 3L, 4L, 7L,
5L, 3L, 5L, 8L, 8L, 1L, 9L, 5L, 4L, 1L, 10L, 9L, 5L, 4L, 9L,
3L, 2L, 12L, 9L, 4L, 4L, 3L, 6L, 8L, 6L, 4L, 4L, 12L), DataCRMSanoflore.Date_Sales = c(13L,
3L, 10L, 22L, 23L, 26L, 13L, 1L, 12L, 2L, 25L, 11L, 10L, 26L,
9L, 4L, 10L, 18L, 9L, 9L, 1L, 7L, 30L, 9L, 14L, 24L, 4L, 2L,
10L, 17L, 2L, 28L, 22L, 17L, 4L, 14L, 22L, 30L, 2L, 5L, 29L,
13L, 2L, 10L, 25L, 5L, 10L, 23L, 1L, 6L), DataCRMSanoflore.Year_Creation_Sales = c(2L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), DataCRMSanoflore.Month_Creation_Sales = c(9L, 9L, 2L, 10L,
10L, 9L, 7L, 9L, 12L, 9L, 7L, 12L, 3L, 11L, 4L, 2L, 6L, 3L, 6L,
10L, 4L, 7L, 6L, 3L, 5L, 8L, 3L, 1L, 9L, 7L, 4L, 11L, 11L, 9L,
5L, 4L, 9L, 3L, 2L, 12L, 10L, 4L, 4L, 3L, 10L, 8L, 6L, 4L, 4L,
12L), DataCRMSanoflore.Day_Creation_Sales = c(13L, 11L, 15L,
2L, 31L, 26L, 23L, 1L, 5L, 2L, 25L, 16L, 10L, 27L, 13L, 7L, 3L,
18L, 9L, 8L, 27L, 7L, 8L, 18L, 18L, 24L, 6L, 2L, 26L, 4L, 4L,
24L, 16L, 17L, 12L, 15L, 22L, 30L, 10L, 5L, 1L, 14L, 2L, 10L,
5L, 5L, 10L, 27L, 25L, 6L), DataCRMSanoflore.Year_Validation_Sales = c(2L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), DataCRMSanoflore.Month_Validation_Sales = c(9L, 9L, 2L,
10L, 11L, 10L, 7L, 9L, 12L, 9L, 7L, 12L, 3L, 12L, 4L, 2L, 6L,
3L, 6L, 10L, 4L, 7L, 6L, 3L, 5L, 8L, 3L, 1L, 10L, 7L, 4L, 11L,
11L, 9L, 5L, 4L, 9L, 4L, 2L, 12L, 10L, 4L, 4L, 3L, 10L, 8L, 6L,
4L, 4L, 12L), DataCRMSanoflore.Day_Validation_Sales = c(15L,
14L, 16L, 3L, 3L, 1L, 27L, 2L, 6L, 5L, 27L, 21L, 19L, 1L, 27L,
8L, 5L, 21L, 10L, 9L, 30L, 9L, 9L, 21L, 26L, 27L, 7L, 4L, 1L,
6L, 15L, 25L, 17L, 18L, 13L, 20L, 29L, 1L, 11L, 7L, 2L, 16L,
3L, 20L, 6L, 6L, 13L, 29L, 29L, 8L), TYPE_PEAU = c(3L, 4L, 5L,
1L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 4L, 3L, 1L, 3L, 1L, 3L, 3L,
3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 3L, 5L, 1L, 5L, 2L, 1L, 5L,
5L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L),
SENSIBILITE = c(4L, 4L, 4L, 1L, 3L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 4L, 4L, 1L, 3L, 1L, 3L, 3L, 4L, 1L, 1L, 1L, 2L, 1L, 1L,
4L, 1L, 2L, 3L, 1L, 4L, 4L, 1L, 3L, 4L, 4L, 1L, 4L, 1L, 1L,
1L, 1L, 4L, 1L, 1L, 1L, 1L, 4L, 1L), IMPERFECTIONS = c(3L,
4L, 3L, 1L, 2L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 3L, 3L, 1L, 2L,
1L, 3L, 2L, 3L, 1L, 1L, 1L, 4L, 1L, 1L, 3L, 1L, 3L, 2L, 1L,
4L, 3L, 1L, 3L, 3L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 3L, 1L), BRILLANCE = c(4L, 2L, 2L, 1L, 4L, 1L, 1L,
1L, 4L, 1L, 1L, 1L, 4L, 4L, 1L, 4L, 1L, 4L, 4L, 4L, 1L, 1L,
1L, 4L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 2L, 3L, 1L, 4L, 4L, 4L,
1L, 4L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 4L, 1L), GRAIN_PEAU = c(4L,
4L, 4L, 1L, 4L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 4L, 2L, 1L, 2L,
1L, 2L, 4L, 4L, 1L, 1L, 1L, 4L, 1L, 1L, 3L, 1L, 2L, 2L, 1L,
3L, 2L, 1L, 2L, 4L, 4L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 2L, 1L), RIDES_VISAGE = c(2L, 2L, 2L, 1L, 4L, 1L,
1L, 1L, 4L, 1L, 1L, 1L, 4L, 4L, 1L, 2L, 1L, 4L, 2L, 4L, 1L,
1L, 1L, 2L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 4L, 4L, 1L, 2L, 4L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 4L, 1L),
ALLERGIES = c(2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L), MAINS = c(3L, 4L, 4L,
1L, 4L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 3L, 1L, 3L, 1L, 3L,
3L, 3L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 3L, 2L, 1L, 4L, 4L,
1L, 3L, 4L, 4L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
3L, 1L), PEAU_CORPS = c(2L, 3L, 3L, 1L, 2L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 3L, 1L, 1L, 1L, 2L,
1L, 1L, 2L, 1L, 2L, 2L, 1L, 3L, 3L, 1L, 3L, 3L, 2L, 1L, 3L,
1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 3L, 1L), INTERET_ALIM_NATURELLE = c(2L,
4L, 4L, 1L, 2L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 2L, 2L, 1L, 2L,
1L, 4L, 2L, 2L, 1L, 1L, 1L, 4L, 1L, 1L, 2L, 1L, 2L, 2L, 1L,
3L, 4L, 1L, 4L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 2L, 1L), INTERET_ORIGINE_GEO = c(2L, 4L, 2L, 1L,
2L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 2L, 5L, 1L, 2L, 1L, 2L, 5L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 5L, 2L, 1L, 4L, 2L, 1L,
2L, 5L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L,
1L), INTERET_VACANCES = c(3L, 4L, 2L, 1L, 3L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 3L, 2L, 1L, 2L, 1L, 3L, 4L, 3L, 1L, 1L, 1L,
2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 3L, 3L, 1L, 4L, 3L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L), INTERET_ENVIRONNEMENT = c(3L,
5L, 5L, 1L, 5L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 3L, 3L, 1L, 3L,
1L, 3L, 3L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 3L, 3L, 1L,
5L, 3L, 1L, 3L, 3L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L,
1L, 1L, 3L, 1L), INTERET_COMPOSITION = c(2L, 2L, 2L, 1L,
4L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 3L, 4L, 1L,
4L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 4L,
1L)), .Names = c("DataCRMSanoflore.Year_Sales", "DataCRMSanoflore.Month_Sales",
"DataCRMSanoflore.Date_Sales", "DataCRMSanoflore.HOURS_INSCR",
"DataCRMSanoflore.Year_Creation_Sales", "DataCRMSanoflore.Month_Creation_Sales",
"DataCRMSanoflore.Day_Creation_Sales", "DataCRMSanoflore.Year_Validation_Sales",
"DataCRMSanoflore.Month_Validation_Sales", "DataCRMSanoflore.Day_Validation_Sales",
"DataCRMSanoflore.AGE_CUSTUMER", "DataCRMSanoflore.MEAN_PURCHASE",
"DataCRMSanoflore.NUMBER_GIFTS", "DataCRMSanoflore.Year_Sales",
"DataCRMSanoflore.Month_Sales", "DataCRMSanoflore.Date_Sales",
"DataCRMSanoflore.Year_Creation_Sales", "DataCRMSanoflore.Month_Creation_Sales",
"DataCRMSanoflore.Day_Creation_Sales", "DataCRMSanoflore.Year_Validation_Sales",
"DataCRMSanoflore.Month_Validation_Sales", "DataCRMSanoflore.Day_Validation_Sales",
"TYPE_PEAU", "SENSIBILITE", "IMPERFECTIONS", "BRILLANCE", "GRAIN_PEAU",
"RIDES_VISAGE", "ALLERGIES", "MAINS", "PEAU_CORPS", "INTERET_ALIM_NATURELLE",
"INTERET_ORIGINE_GEO", "INTERET_VACANCES", "INTERET_ENVIRONNEMENT",
"INTERET_COMPOSITION"), row.names = c(NA, 50L), class = "data.frame")
The first step is to write this code to create a PCA object as this:
library(FactoMineR)
library("factoextra")
res.pca <- PCA(as.data.frame(DATA_FINAL), graph = FALSE)
Then, to plot variables, I used the fviz_pca_var function like this:
fviz_pca_var(res.pca, col.var = "black")
I get this error:
Error in row.names<-.data.frame(*tmp*, value = value) :
duplicate 'row.names' are not allowed In addition: Warning messages:
1: In data.row.names(row.names, rowsi, i) : some row.names
duplicated: 14,15,16,17,18,19,20,21,22 --> row.names NOT used 2:
non-unique values when setting 'row.names':
‘DataCRMSanoflore.Date_Sales’, ‘DataCRMSanoflore.Day_Creation_Sales’,
‘DataCRMSanoflore.Day_Validation_Sales’,
‘DataCRMSanoflore.Month_Creation_Sales’,
‘DataCRMSanoflore.Month_Sales’,
‘DataCRMSanoflore.Month_Validation_Sales’,
‘DataCRMSanoflore.Year_Creation_Sales’, ‘DataCRMSanoflore.Year_Sales’,
‘DataCRMSanoflore.Year_Validation_Sales’
How resolve this issue please?
You have duplicate columns in your input data so simply by removing these columns you should be all set.
df <- DATA_FINAL[, -c(1:3, 5:10)]
and then run PCA
library(FactoMineR)
library(factoextra)
res.pca <- PCA(df, graph = F)
fviz_pca_var(res.pca, col.var = "black")
I have been struggling with ggplot to display these plots how I would like. My data have 2 factors, quarter and species. Station will be on the x-axis, value on the y-axis, and the constituent will be used with the facet_wrap. I want quarter differentiated with shapes, and species with colors.
The issue is I'm trying to replicate a figure done in SigmaPlot. It is 4x4 grid of plots, with the first two rows of the first column are empty, to allow for the placement of the legend. My original plan was to have two separate facets made using facet-wrap, and combine those, however, this doesn't maintain the 4x4 arrangement, it transforms it into a 1x2, which ruins alignment of plots and shrinks the larger faceted grid.
My next thought was to create each plot individually, then arrange them in a grid using cowplot. This presents the plots how I'd like them arranged, but I can't figure out how to have two y-axis labels, due to different units. One label would be centered on the two leftmost plots, and one centered on the left of the next column of 4 plots.
I'm trying to use this code (just copy the example data below, and run):
library(ggplot)
library(gridExtra)
test.data1 <- test.data[1:95, ]
test.data2 <- test.data[96:111, ]
testplot1 <- ggplot(test.data1, aes(Station, value)) +
geom_point(aes(shape = factor(quarter), fill = Species)) +
scale_shape_manual(values = c(21, 22)) +
labs(x = "Station", y = "Unit a", shape = "Sampling Quarter", fill = "Species") +
theme(legend.position = "none", legend.title = element_blank()) +
guides(fill = guide_legend(override.aes = list(shape = 21), nrow = 2, byrow = TRUE), shape = guide_legend(nrow = 2, byrow = TRUE)) +
facet_wrap( ~ constituent, ncol = 3, scales = "free_y")
testplot2 <- ggplot(test.data2, aes(Station, value)) +
geom_point(aes(shape = factor(quarter), fill = Species))
scale_shape_manual(values = c(21, 22)) +
labs(x = "Station", y = "Unit b", shape = "Sampling Quarter", fill = "Species") +
theme(legend.position = "top", legend.title = element_blank()) +
guides(fill = guide_legend(override.aes = list(shape = 21), nrow = 2, byrow = TRUE), shape = guide_legend(nrow = 2, byrow = TRUE)) +
facet_wrap( ~ constituent, ncol = 1, scales = "free_y")
grid.arrange(testplot2, testplot1, ncol = 2)
Which generates this:
But I want it to be arranged like this, where the XX and YY plots from above are normalized in size with the other plots (this was done using individual plots, and using plot_grid):
Example data from a larger set:
test.data <- structure(list(Station = structure(c(1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("StA", "StB"), class = "factor"),
CollectionDate = structure(c(3L, 2L, 3L, 1L, 3L, 1L, 3L,
1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L,
3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L,
1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L,
3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L,
1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 1L, 3L, 2L, 3L,
1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L,
3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L), .Label = c("10/1/2017",
"10/16/2017", "4/1/2017"), class = "factor"), Species = structure(c(1L,
2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L,
1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L,
3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L,
2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L,
2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L,
1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L,
3L, 1L, 2L, 2L, 3L), .Label = c("SpA", "SpB", "SpC"), class = "factor"),
quarter = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("2017 Q2",
"2017 Q4"), class = "factor"), constituent = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L
), .Label = c("A", "B", "C", "D", "E", "F", "G", "H", "I",
"J", "K", "L", "XX", "YY"), class = "factor"), value = c(16,
35, 46, 23, 40, 19, 9, 50, 0.2, 1, 0.5698, 0.322, 1, 0.45,
0.322, 0.5, 16, 9, 6, 19, 14, 13, 16, 9, 0, 0.004, 0, 0.004,
1, 0.32, 1, 0.678, 0, 0.39, 0.23, 0, 0, 1.1, 0.5, 0.5, 9,
4.9, 7, 4.768, 9, 8.65, 4.768, 6.54, 195, 195, 46, 46, 124,
124, 218, 218, 2, 1, 1, 1, 1, 2, 1, 1, 0.1, 0.4, 0.22, 0.4,
0.22, 0.4, 0.22, 0.1, 0.99, 0.99, 1.2, 0.45, 0.765, 0.99,
0.99, 0.99, 0.99, 1.2, 4.3, 0.98, 0.99, 1.2, 1.2, 34, 34,
65, 98, 150, 34, 65, 65, 2, 0, 4, 1.3, 5, 3.3, 1.56, 1, 9,
0.36, 4, 4, 11, 2, 2.22, 11)), class = "data.frame", row.names = c(NA,
-111L))
Is there a way to make the thinner lines in the plot (those without an y axis tick label) appear closer to the lines above (those with a label) so as to better simulate pairs of baseline / actual bars of the same activity in a gantt chart?
See gantt chart examples here and here.
mdfr <- structure(list(name = structure(c(8L, 8L, 8L, 8L, 6L, 6L, 6L,
6L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 7L, 7L, 7L, 7L, 5L, 5L, 5L,
5L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 8L, 8L, 8L, 8L, 6L, 6L, 6L,
6L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 7L, 7L, 7L, 7L, 5L, 5L, 5L,
5L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L), .Label = c("100 A", "100 B",
"101 A", "101 B", "102 A", "102 B", "103 A", "103 B"), class = "factor"),
stadio = c(2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7,
1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 2, 4, 5,
7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 1, 3, 6, 8, 1, 3,
6, 8, 1, 3, 6, 8, 1, 3, 6, 8), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("start_date", "end_date"), class = "factor"),
value = c("05/10/2012", "17/12/2012", "12/03/2012", "30/05/2013",
"10/01/2013", "14/10/2013", "24/10/2013", "10/01/2014", "30/09/2013",
"29/01/2014", "30/01/2014", "06/05/2014", "30/09/2013", "29/01/2014",
"30/01/2014", "06/05/2014", "05/10/2012", "17/12/2012", "12/03/2012",
"30/05/2013", "10/01/2013", "14/10/2013", "24/10/2013", "10/01/2014",
"30/09/2013", "29/01/2014", "30/01/2014", "05/06/2014", "30/09/2013",
"29/01/2014", "30/01/2014", "05/06/2014", "17/12/2012", "12/03/2012",
"30/05/2013", "30/05/2014", "14/10/2013", "24/10/2013", "10/01/2014",
"11/07/2014", "29/01/2014", "30/01/2014", "06/05/2014", "23/12/2014",
"29/01/2014", "30/01/2014", "06/05/2014", "23/12/2014", "17/12/2012",
"12/03/2012", "30/05/2013", "30/05/2014", "14/10/2013", "24/10/2013",
"10/01/2014", "11/07/2014", "29/01/2014", "30/01/2014", "05/06/2014",
"28/12/2014", "29/01/2014", "30/01/2014", "05/06/2014", "29/12/2014"
), rating = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("3",
"5"), class = "factor")), row.names = c(NA, -64L), .Names = c("name",
"stage", "variable", "value", "rating"), class = "data.frame")
names <- as.character(unique(mdfr$name))
names1 <- names[gsub("[^ B]","",names) == " B"]
names1 <- paste("No.",gsub("[ B]","",names1),sep="")
names2 <- rep("",length(names1))
new.names <- c(names1,names2)
ggplot(mdfr, aes(as.POSIXct(as.Date(value, "%d/%m/%Y")), name, colour = factor(stage))) +
geom_line(aes(size=rating)) +
labs(colour="(Baseline/Actual):", x = "", y = "") +
scale_colour_brewer(palette="RdYlGn",breaks = c("1", "3", "6","8"), guide = "none") +
scale_size_manual(breaks = levels(mdfr$rating), values = as.integer(levels(mdfr$rating)), guide = "none") +
scale_y_discrete(breaks=names, labels=new.names)
I would use facets to do this. Below you find a possible solution. This may not be the most elegant solution, but it lets you change the distance between thinner and thicker lines by changing the expand argument in scale_x_discrete.
# numbers to facet by (levels used for order of the facets)
mdfr$nr <- factor(paste0("No.", as.numeric(gsub("A|B", "", mdfr$name))),
levels=unique(paste0("No.", as.numeric(gsub("A|B", "", mdfr$name)))))
# recast your data
df <- dcast(mdfr, nr+stage+rating~variable)
# plot as before, switched x and y values
ggplot(df, aes(x=factor(rating),
ymin=as.POSIXct(as.Date(start_date, "%d/%m/%Y")),
ymax=as.POSIXct(as.Date(end_date, "%d/%m/%Y")),
color=factor(stage),
size=rating
)) +
geom_linerange() + # linerange instead of line
facet_grid(nr~., scales="free_x") + # faceting
coord_flip() + # flip coordinates back
scale_x_discrete(name="", breaks=NULL, expand=c(4,1)) + # use the expand variable to change the distances
scale_colour_brewer(palette="RdYlGn",breaks = c("1", "3", "6","8"), guide = "none") +
scale_size_manual(breaks = levels(mdfr$rating), values = as.integer(levels(mdfr$rating)), guide = "none")