IV Regression With Clustered Standard Errors - r

On R, I have panel dataset with observations for individual, year and variables y, x and z. I am trying to estimate an IV regression so that:
$$y = \beta x + e$$
$$x = \alpha z + u$$
Is there a way to estimate the IV regression while clustering standard errors by individual?
So far I have used ivreg as follows:
my_iv <- ivreg(y ~ x + as.factor(year) |z + as.factor(year),
data = data)
But can't figure out how to include clustered standard errors. Thanks in advance!

We can use lfe::felm.
The formula is specified as y ~ x1 + x2 | f1 + f2 | (Q|W ~ x3+x4) | clu1 + clu2 , where first term is the model, f are the fixed effects, middle term is the instrument and third term the clusters, where we only have clu1 = id.
ivest <- lfe::felm(y ~ x1 + x2 | id + firm | (Q|W ~ x3 + factor(x4)) | id, data = d)
summary(ivest)
# Call:
# lfe::felm(formula = y ~ x1 + x2 | id + firm | (Q | W ~ x3 + factor(x4)) | id, data = d)
#
# Residuals:
# Min 1Q Median 3Q Max
# -1.9436 -0.5140 0.0078 0.4335 2.0452
#
# Coefficients:
# Estimate Cluster s.e. t value Pr(>|t|)
# x1 1.6392 0.5155 3.180 0.00519 **
# x2 0.5039 0.1547 3.257 0.00438 **
# `Q(fit)` 0.9348 0.5148 1.816 0.08608 .
# `W(fit)` 1.2116 0.1048 11.561 9.18e-10 ***
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 1.051 on 40 degrees of freedom
# Multiple R-squared(full model): 0.8295 Adjusted R-squared: 0.6846
# Multiple R-squared(proj model): 0.7157 Adjusted R-squared: 0.474
# F-statistic(full model, *iid*):6.157 on 34 and 40 DF, p-value: 6.629e-08
# F-statistic(proj model): 85.66 on 4 and 18 DF, p-value: 1.837e-11
# F-statistic(endog. vars):121.7 on 2 and 18 DF, p-value: 3.488e-11
Data:
d <- structure(list(x1 = c(1.37095844714667, -0.564698171396089, 0.363128411337339,
0.63286260496104, 0.404268323140999, -0.106124516091484, 1.51152199743894,
-0.0946590384130976, 2.01842371387704, -0.062714099052421, 1.30486965422349,
2.28664539270111, -1.38886070111234, -0.278788766817371, -0.133321336393658,
0.635950398070074, -0.284252921416072, -2.65645542090478, -2.44046692857552,
1.32011334573019, -0.306638594078475, -1.78130843398, -0.171917355759621,
1.2146746991726, 1.89519346126497, -0.4304691316062, -0.25726938276893,
-1.76316308519478, 0.460097354831271, -0.639994875960119, 0.455450123241219,
0.704837337228819, 1.03510352196992, -0.608926375407211, 0.50495512329797,
-1.71700867907334, -0.784459008379496, -0.850907594176518, -2.41420764994663,
0.0361226068922556, 0.205998600200254, -0.361057298548666, 0.758163235699517,
-0.726704827076575, -1.36828104441929, 0.432818025888717, -0.811393176186672,
1.44410126172125, -0.431446202613345, 0.655647883402207, 0.321925265203947,
-0.783838940880375, 1.57572751979198, 0.642899305717316, 0.0897606465996057,
0.276550747291463, 0.679288816055271, 0.0898328865790817, -2.99309008315293,
0.284882953530659, -0.367234642740975, 0.185230564865609, 0.581823727365507,
1.39973682729268, -0.727292059474465, 1.30254263204414, 0.335848119752074,
1.03850609869762, 0.920728568290646, 0.720878162866862, -1.04311893856785,
-0.0901863866107067, 0.623518161999544, -0.953523357772344, -0.542828814573857
), x2 = c(0.580996497681682, 0.768178737834591, 0.463767588540167,
-0.885776297409679, -1.09978089864786, 1.51270700980493, 0.257921437532031,
0.0884402291595864, -0.120896537539089, -1.19432889516053, 0.611996898040387,
-0.217139845746521, -0.182756706331922, 0.93334632857116, 0.821773110508249,
1.39211637593427, -0.476173923054674, 0.650348560726305, 1.39111045639,
-1.1107888794479, -0.860792586877842, -1.13173868085377, -1.4592139995024,
0.0799825532411612, 0.65320433964919, 1.20096537559849, 1.04475108716773,
-1.00320864683985, 1.84848190167275, -0.666773408757817, 0.105513812456069,
-0.422255881868856, -0.122350171954971, 0.188193034501498, 0.119160957997006,
-0.0250925508674029, 0.108072727942033, -0.485435235846668, -0.504217130687904,
-1.66109907991481, -0.382333726873818, -0.5126502578778, 2.7018910003448,
-1.36211623118972, 0.137256218558607, -1.49362506731629, -1.4704357414368,
0.124702386197007, -0.996639134884037, -0.0018226143047082, -0.428258881425815,
-0.613671606449495, -2.02467784541911, -1.22474795035999, 0.179516441117938,
0.567620594423535, -0.492877353553475, 6.28840653511241e-05,
1.12288964337997, 1.43985574297619, -1.09711376840582, -0.117319560250177,
1.2014984009197, -0.469729580566301, -0.0524694849389963, -0.0861072982370896,
-0.887679017906432, -0.444684004884738, -0.0294448790882381,
-0.413868849057924, 1.1133860233682, -0.480992841653982, -0.433169032600729,
0.696862576552103, -1.05636841317091), id = structure(c(15L,
1L, 6L, 10L, 8L, 19L, 12L, 5L, 10L, 14L, 2L, 6L, 1L, 13L, 15L,
16L, 10L, 17L, 2L, 9L, 4L, 19L, 14L, 7L, 16L, 5L, 19L, 17L, 8L,
10L, 6L, 9L, 8L, 18L, 8L, 10L, 18L, 4L, 9L, 16L, 11L, 6L, 14L,
8L, 9L, 17L, 7L, 13L, 11L, 18L, 9L, 5L, 2L, 3L, 2L, 13L, 15L,
4L, 6L, 19L, 19L, 10L, 1L, 13L, 1L, 4L, 12L, 6L, 11L, 7L, 11L,
5L, 17L, 7L, 12L), levels = c("1", "2", "3", "4", "5", "6", "7",
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"20"), class = "factor"), firm = structure(c(10L, 6L, 2L, 12L,
3L, 1L, 13L, 4L, 11L, 9L, 6L, 10L, 7L, 3L, 12L, 2L, 9L, 12L,
10L, 9L, 3L, 7L, 3L, 6L, 13L, 4L, 10L, 2L, 7L, 1L, 2L, 3L, 1L,
10L, 5L, 13L, 2L, 3L, 3L, 10L, 2L, 10L, 4L, 9L, 10L, 13L, 7L,
1L, 4L, 6L, 3L, 12L, 3L, 6L, 12L, 9L, 10L, 7L, 3L, 9L, 2L, 6L,
9L, 8L, 11L, 6L, 5L, 10L, 1L, 9L, 10L, 1L, 12L, 6L, 1L), levels = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13"
), class = "factor"), u = c(1.50059880011796, -1.1406741536911,
0.496847374722702, 1.4083694575847, -0.62337037345602, -0.741660664568964,
0.376627729983173, -1.31017535156112, 0.0781199235564999, -0.795416769640129,
1.76649494325627, -0.936421828043658, 0.703918293809932, -0.755838819389157,
-0.468169290964327, -0.844487992517338, -1.09153959975659, -1.32335062531384,
-0.591596405204697, 0.748314146325651, -0.821525190798946, -1.76459494767604,
-0.388535656201395, -0.669263871638158, 0.704517511710363, -0.827627694647081,
-3.3493240811071, 0.342540952025075, -1.93109788557074, 0.177752576915369,
1.40188419407316, 0.373066264998072, -0.0486032519865556, 2.46346047322287,
0.567606140037349, 0.968840299750039, 2.65141739994693, -0.0344926185009225,
-0.40962669998027, -0.103651100985079, -0.574649379426294, -0.51335141274752,
-0.0552973857861534, -0.978438064871714, 0.0587080378017696,
0.0618109093908222, -0.380486867891915, 1.41806216315907, -0.818524354680359,
-0.457037558315825, 0.217494497375878, 0.625046465412299, 0.517376640159983,
-0.185381470022198, -0.064033677082036, -0.704399082564789, -0.699697505160012,
0.560902750983909, 0.0478343678989824, 0.566967709139165, -1.19615960100349,
0.959600821189023, 0.507252101121057, -0.374112242378446, 1.42044733361966,
1.69437460038628, 0.516779140506365, -0.0481044125067062, -1.28024577281817,
-0.48968148391622, 1.28769545705832, 0.930501447346418, 0.72614069638199,
-0.193061140800734, -0.561229382374985), y = c(0.708066254628044,
0.589847453563791, 3.27667516342197, -0.358336088116629, 1.93500981542015,
1.2290187506713, 3.88766205090481, -0.853662127286324, -2.97033719755537,
-2.13167402260678, -1.24053074997088, -0.66281266951442, 3.49598166168038,
1.69270069297096, -2.09652282229081, 1.54306435896839, 0.732824572951072,
2.19324620039636, 0.431719256134024, -1.98271609019762, -1.99702206469318,
0.797786579028574, 2.22710786022503, 1.76093212830986, 1.31242354427332,
-1.72283495631895, -1.37198900123695, 1.08479618619769, 0.0287449582719836,
-1.50213538600816, 1.65240525388632, -1.4421369631952, -1.30345852538258,
3.28724465137813, -0.211786236468322, 2.18957244539307, 1.56845473318596,
1.53145225517394, 2.27959056711884, 0.64051870094227, 0.864194170850803,
-1.64068991456381, 4.40925788562828, 0.841266425448286, 0.132140253784467,
3.05682249679669, 0.150826763337795, 0.133032535285816, -2.05592783553907,
0.203954571581413, -1.27312068778677, 1.9033153295143, -4.05715144236128,
-1.34004031196686, -0.000958627238489718, -0.362160895354921,
-0.0579078985442866, -0.0265915674252484, 4.71311872421459, 1.98292166068375,
-2.23214380882587, 1.18921557572872, 3.47207577553789, 0.495203981641904,
2.05682465802038, 1.89443159356544, -0.584175385578428, -0.679597405650453,
0.749822321747065, 1.95112667124936, 3.36808860806582, -0.287388459122759,
3.02594427585541, 3.65847302005212, 0.404317829797556), x3 = c(-0.0580369218956107,
0.449420458030374, 1.07708171621578, 0.18648317970334, 1.56915807785103,
-0.226757041173532, 0.766361213944578, -0.312113303827908, -1.35545758035236,
-0.905646776474841, -0.651186922939884, -1.04474994594567, 1.59391872402891,
0.542347904017506, -2.36633351109067, 0.471107993311305, -0.231608083899238,
0.0936844299205661, -1.29508960780026, -0.848784608724723, -2.29005568761573,
1.28285601248922, 1.66378018954505, 0.401436254867987, -1.5093425111217,
-1.13836587194805, 0.337741204287881, -1.47262814385863, -0.291877329483668,
-2.02964531250218, -0.594044533118403, -1.05455069953558, -1.07847961911409,
1.28112323849091, 0.0810282612193334, -0.99307899011046, -0.75707153139077,
0.00924351551023881, 1.40932694027844, 0.293275146448466, 0.297134825971617,
-1.40253035970293, 0.104285140622482, 0.371419703354857, -1.67615738713669,
1.78467147315505, -2.27698465336006, -1.58982110493222, -0.246374254853273,
-0.35374988018924, 0.268372607619405, 0.454175327851366, -2.68824727377896,
0.866650172377294, 0.168739701377876, -1.09082406185189, -0.38034811429265,
-0.948091836164654, 0.721252264089489, -0.159142456737528, -1.78755661450632,
1.49377847299869, 1.52389572182853, 0.458664055612706, -0.812496512482602,
1.05538328565825, -0.907595054419802, -0.417152112834271, 1.12843784555488,
0.933612853893915, 0.445603849144031, -0.0591298458723813, 0.188708474704579,
0.691269579039664, 0.821856282128092), x4 = c(11L, 1L, 1L, 4L,
10L, 5L, 12L, 11L, 9L, 3L, 5L, 5L, 2L, 9L, 3L, 8L, 12L, 2L, 11L,
8L, 12L, 2L, 6L, 10L, 1L, 4L, 8L, 8L, 3L, 9L, 10L, 6L, 3L, 1L,
4L, 12L, 9L, 1L, 11L, 6L, 4L, 9L, 12L, 8L, 6L, 12L, 6L, 1L, 11L,
11L, 9L, 2L, 11L, 3L, 1L, 11L, 1L, 1L, 8L, 11L, 2L, 1L, 7L, 6L,
2L, 5L, 1L, 9L, 7L, 2L, 12L, 4L, 7L, 10L, 3L), Q = c(0.646076516637449,
0.774080826010094, 0.705491710522028, -0.329960376229488, 2.3937371586073,
-0.175453512555076, 2.54744938000504, 0.227305524232654, 1.17480056418941,
0.314456133339992, -0.0804355243921238, 2.9329566992971, -0.662472721938112,
1.26523857842063, -0.623496493384185, 1.24297501180904, 0.338575486300627,
-1.01897501642229, -1.4577819702458, -0.220428059190226, -1.09122765376906,
-0.797414605643003, 1.79407962827076, 2.41577034670514, 0.692621619981134,
-0.606319409257714, 0.956043025501215, -1.11775444992615, 1.89608324348157,
-0.863502236328827, 0.767257776055209, -0.320789249747173, 1.08195881842052,
-0.0440656150328142, 0.757553454326518, -1.61835680364557, -0.728173973541659,
-0.939760619892448, -1.42632613622065, 0.484748356232311, 0.0228991794151839,
0.487920320035935, 2.55563673422967, 0.626999269491833, -1.60281421987745,
0.961527689359054, 0.910717534800114, 0.580434197322011, -0.958460907355067,
1.07029842296127, 0.445149928725711, -0.986338470177715, -0.207729334549163,
0.146619643496565, -1.07290918879056, 0.385381081617997, 0.534497721129405,
-0.902058044918584, -0.994572369688443, 0.180823001094562, -0.866662979176913,
-0.202275546504082, 1.73827586670431, 2.13966378394672, -0.165781203871104,
0.533931178962832, 0.822623338094019, 1.74551382869949, 1.32794595998142,
2.43573160454602, -0.559235835256031, -0.362748369122295, 0.962221699341186,
1.60692805915648, 0.928717057173142), W = c(-2.83762382837145,
-0.0881326865595118, -0.849333816502672, -3.47988767629463, -1.25757341429974,
1.84520152770484, -3.05023294700363, 0.00359941535195923, -6.56098795097819,
-3.49031810270443, -4.72002812925721, -5.8535812329552, 3.8408026234803,
-0.185770570993576, -2.90334736382513, -1.36070438950823, 0.98928972872898,
4.56099504732292, 4.64544468251834, -3.02066411165329, 0.616674402296625,
5.91850052824554, -0.515720909998593, -3.38773746666513, -3.68833342688417,
-0.734948617130689, 1.43850797310029, 2.38978672741508, -2.57084901665802,
-0.145829650882282, -3.35360143478379, -1.47279229368909, -4.42324187193105,
0.991719825790708, -2.50310563360398, 3.07779435805313, -1.32782534107766,
3.56323944057156, 7.29719069966174, 1.19307470232548, 1.40086649613106,
-2.01403461672415, -2.39775611572149, 0.771955483382669, 4.27959416950681,
0.403938975289119, -0.849080251103613, -4.09776416085041, 1.21595604447267,
-2.05700790259951, -1.528229857328, 1.94298831392009, -5.2991157119565,
-0.671165412529572, -0.507466314736962, -2.04536515574161, -0.0631163240487921,
0.506176392202083, 6.28698004765225, -0.136395621843103, 0.11629291113172,
-0.266486705229334, -1.62824000948479, -2.65620379813359, 0.522554460895879,
-1.4430959240814, -2.29002246193027, -4.20932914045798, 0.775344843756961,
-3.10339695092203, 4.43830824189278, -0.386409948433949, -1.37520970091814,
0.702201027077844, -0.0774679544406474)), row.names = c(NA, -75L
), class = "data.frame")

I would recommend the feols function from the fixest package:
library(fixest)
feols(y ~ as.factor(year) | x ~ z, cluster = "id", data = data)

Related

Panel regression with cross sectional averages

I am estimating a panel regression model, and I need to add the cross sectional average of the dependent variable and regressors to the model.
I am struggling to implement the cross sectional averages in R. Can anyone help me out.
So I have a panel regression code below - using plm package.
I need to add cross sectional average of variable A, B, C and D to the right hand side of the regression
library(plm)
panel_fe <- plm(A ~ B+ C + D, model = "fd", effect="individual", data = PanelS)
So my final regression model would be like this A = B+ C+D + A_bar + B_bar + C_bar + D_bar, where A_bar, B_bar , C_bar and D_bar are the cross sectional averages of A, B,C and D respectively.
My panel datasets is below, PanelS.
structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L), .Label = c("CountryA", "CountryB",
"CountryC", "CountryD", "CountryE", "CountryF", "CountryG", "CountryH",
"CountryI", "CountryJ"), class = "factor"), Year = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L), .Label = c("2000", "2001",
"2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009",
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017",
"2018", "2019"), class = "factor"), A = c(0.051539, 0.064525,
0.014292, 0.018774, 0.035449, 0.021988, 0.02396, 0.011415, 0.010358,
-0.029607, -0.020427, -0.012734, 0.006683, 0.007373, -0.039712,
-0.005499, 0.008682, 0.015326, 0.020524, 0.015101, 0.035355,
0.031157, 0.023387, 0.024198, 0.035353, 0.053873, 0.038743, 0.042338,
0.034935, 0.015377, 0.010599, 0.015154, 0.002919, 0.024291, 0.043819,
0.015901, 0.01897, 0.027767, 0.015992, 0.041976, 0.011223, 0.006144,
0.000778, 0.005873, 0.007194, -0.022017, -0.023338, -0.037765,
-0.049356, 0.026135, 0.035633, 0.015691, -0.006196, -0.00025,
0.001181, -0.001472, -0.009324, -0.022664, -0.022623, -0.019586,
-0.012207, -0.004603, -0.013073, -0.010771, -0.009882, -0.014417,
-0.031812, -0.043885, -0.050883, -0.039834, -0.020299, -0.000684,
0.011216, 0.005419, 0.000939, -0.005508, 0.006266, -0.008077,
-0.016137, -0.012681, 0.031612, 0.043729, 0.009314, 0.002734,
-0.012284, 0.002403, 0.016807, 0.019995, 0.033096, 0.024383,
0.010588, 0.019833, 0.031837, 0.03127, 0.029059, 0.020708, 0.019296,
0.017787, 0.032074, 0.027125, 0.005673, 0.003698, -5.3e-05, 0.001794,
-0.011977, -0.008686, -0.031588, -0.039411, -0.073931, -0.076715,
-0.039171, -0.025797, -0.007637, 0.00345, 0.009101, 0.01674,
-0.006968, -0.019178, -0.02438, -0.039663, 0.078313, 0.06707,
0.062822, 0.050771, 0.041274, 0.043921, 0.046429, 0.039418, 0.034671,
0.017356, 0.001054, 0.00414, 0.00226, 0.00275, 0.00085, 0.00495,
0.001276, -0.001446, -0.005771, -0.007513, 0.053734, 0.038679,
0.017375, 0.01438, 0.018403, 0.032943, 0.025539, 0.032463, 0.032267,
0.034009, 0.018229, 0.008958, 0.010079, 0.00749, 0.000604, 0.001948,
0.011782, 0.013253, 0.007898, 0.007546, 0.018052, -0.001123,
-0.012597, -0.042292, -0.058516, -0.022736, -0.03841, -0.050843,
-0.073979, -0.097242, -0.024712, 0.038037, 0.048685, -0.00624,
0.075575, 0.044947, 0.097171, 0.086809, 0.079856, 0.068521, 0.008062,
-0.00911, -0.010527, -4.3e-05, 0.002428, 0.004422, 0.008752,
0.019602, 0.01724, 0.01965, -0.008816, 0.011466, 0.020956, 0.021873,
0.021772, 0.024495, 0.021354, 0.015267, 0.018769, 0.016904),
C = c(0.75345, 0.70657, 0.645051, 0.510055, 0.433786, 0.35728,
0.265817, 0.208721, 0.163261, 0.130248, 0.136607, 0.153873,
0.152275, 0.166592, 0.170559, 0.27089, 0.259813, 0.292847,
0.253142, 0.222618, 0.56764082, 0.523543, 0.485083, 0.49081,
0.461501, 0.44156, 0.374122, 0.315494, 0.27346, 0.333132,
0.401818, 0.425879, 0.460709, 0.448942, 0.440456, 0.442703,
0.397737, 0.372338, 0.359446, 0.340254, 0.064305, 0.05107,
0.047682, 0.056584, 0.055981, 0.051134, 0.047025, 0.046318,
0.037655, 0.045041, 0.071989, 0.066074, 0.061057, 0.097641,
0.101621, 0.105545, 0.09996, 0.099131, 0.091119, 0.082012,
0.120817, 0.120871, 0.138383, 0.13023, 0.141247, 0.146088,
0.119133, 0.100396, 0.084592, 0.185873, 0.368416, 0.479167,
0.4367, 0.421837, 0.400428, 0.416259, 0.37072, 0.40398, 0.390126,
0.371126, 0.079576, 0.074647, 0.076712, 0.074295, 0.074504,
0.079053, 0.080224, 0.082991, 0.082006, 0.15357, 0.161465,
0.201522, 0.190049, 0.219974, 0.236873, 0.227428, 0.219862,
0.200938, 0.223426, 0.209529, 0.217219, 0.224867, 0.258694,
0.248207, 0.221093, 0.189452, 0.159052, 0.124236, 0.119492,
0.123362, 0.217807, 0.296186, 0.339882, 0.371345, 0.376212,
0.391509, 0.378059, 0.373931, 0.351043, 0.347354, 0.440547,
0.424547, 0.409236, 0.401795, 0.427482, 0.426416, 0.399297,
0.381117, 0.339041, 0.325607, 0.415314, 0.469047, 0.482712,
0.536225, 0.562292, 0.598259, 0.636417, 0.631764, 0.612668,
0.596271, 0.605061, 0.503479, 0.518971, 0.498057, 0.492731,
0.484527, 0.486885, 0.43596, 0.388967, 0.374978, 0.407324,
0.381025, 0.371731, 0.375149, 0.402248, 0.449982, 0.437387,
0.422554, 0.407331, 0.389125, 0.989067, 1.049344, 1.070812,
1.048631, 1.014561, 1.028734, 1.073949, 1.036117, 1.03103,
1.094155, 1.267447, 1.474942, 1.752192, 1.619444, 1.784347,
1.802256, 1.770079, 1.807951, 1.792139, 1.862386, 0.601394,
0.590658, 0.579365, 0.597035, 0.633089, 0.649877, 0.673465,
0.667047, 0.639942, 0.655222, 0.729901, 0.823816, 0.79801,
0.811354, 0.787169, 0.756694, 0.72207, 0.692768, 0.651024,
0.617801), B = c(0.147502302, 0.043680673, -0.212478849,
-0.266834333, -0.228099071, -0.199890362, -0.968175801, 1.047500546,
1.273127656, 1.227657506, -0.286068921, -1.356896168, -1.442625298,
-0.291748363, 2.029875219, 1.099611751, -1.112127832, -0.894025857,
0.103213651, 0.286801553, 0.756833023, 0.591945192, 0.525259532,
0.466656359, 0.706692697, -2.361722697, -2.777257989, -4.097114222,
-4.564987155, 2.317853991, 3.44030537, 3.034469093, 5.845290721,
0.403542521, 0.128582254, 0.817094156, -0.886707561, -2.998573025,
-0.491794488, -0.856367773, 0.023343476, -0.209503364, -0.084839186,
-0.146285026, -0.256672799, -0.093852713, 0.145824486, 0.434606031,
0.966980327, 0.67904687, -0.292659443, -0.487763914, -0.084930583,
-0.32722087, -0.442172133, -0.168366978, -0.186469629, 0.046322287,
0.181126569, 0.303486593, 0.171541123, -0.348150815, -0.407466419,
-0.624622679, -0.354132366, -0.15050691, 0.700892294, 0.67692383,
1.014111655, 0.862019536, 0.395600738, -0.256706715, -0.542246369,
-0.539422399, -0.405088653, -0.247954994, -0.497333992, -0.010723655,
0.393516751, 0.169750037, -0.581903347, -0.730163914, 0.351894514,
0.629568917, 0.882078894, 0.760041333, -0.564317727, -0.57799292,
-0.433736512, 0.513350369, 0.55464973, -0.224497194, -0.074326596,
-0.123301819, -0.432013928, -0.25316664, -0.374406673, 0.116449941,
0.308969388, 0.252824183, 2.398228162, -0.033362631, -1.681378615,
-3.655293426, -2.793256764, -3.636310622, 0.149490332, 3.951131246,
7.177449077, 4.831325877, 2.050070679, 1.314471427, -1.687424783,
-3.796189127, -3.329685346, -1.695252718, -3.010416797, -2.414597902,
1.199960369, 4.661041564, 0.531518012, -1.384184059, -0.64216453,
-0.13206166, 0.249287935, -0.153010531, -0.987952985, -1.71711917,
-0.678751076, 0.890062065, 1.663691535, 1.883735194, 2.171029985,
2.383501603, 1.490313839, -0.732542129, -0.291797363, -1.655272704,
-1.613245217, -1.275038743, -0.789256935, -3.589249982, 0.502475039,
1.840081099, 1.141218417, 3.130100399, 3.94751837, 0.97811035,
0.013586974, -3.245960526, -2.068241886, -1.82476664, -1.481654499,
0.37039449, -1.516414277, -1.722381744, 0.683458083, 0.153189319,
3.410781995, 0.067011953, -3.09418792, -4.09753755, -4.682167411,
-1.333607727, 2.505605899, -4.332639317, -2.190945016, 4.048457741,
11.60535564, 13.61047901, 5.145259686, -0.712611552, -3.385649938,
7.214394614, -10.34401695, -1.841542179, -6.437949187, -4.545422837,
-0.012548047, 2.881273043, 3.227611639, 10.96399365, 16.38843255,
14.72001327, -13.84595255, -10.51570643, -13.59695535, -36.70577424,
-12.07070647, 12.51742535, 52.88207865, 9.143152612, -7.818895359,
-15.57456939, -21.31957866, -23.55720863, -5.574415019, 5.783084584,
12.02189272, 22.93207708), D = c(0.77780751, 0.793229898,
0.80623893, 0.821155065, 0.836880111, 0.854312944, 0.873660631,
0.890537317, 0.907536298, 0.912375095, 0.929637942, 0.946439284,
0.965000087, 0.97726773, 0.986870808, 1, 1.019208507, 1.037842597,
1.054711181, 1.072171599, 0.534008473, 0.566583199, 0.58762954,
0.601043497, 0.63362178, 0.673913677, 0.719447102, 0.799187909,
0.864173776, 0.899162389, 0.909465125, 0.96350569, 0.978220642,
0.971679886, 0.976158221, 1, 1.025374896, 1.065804414, 1.108567186,
1.166769344, 0.588726028, 0.64526073, 0.733094431, 0.718268082,
0.746291144, 0.799900392, 0.846050389, 0.894179583, 1.015232882,
0.982856394, 1.012948099, 1.041332642, 1.032947106, 1.013566583,
0.980944689, 1, 1.020576612, 1.061740647, 1.117831183, 1.159906251,
0.750587042, 0.769670674, 0.790024355, 0.801712216, 0.817505148,
0.83991247, 0.856517319, 0.878345181, 0.914006005, 0.920044857,
0.949573071, 0.955207703, 0.978810398, 0.985618398, 0.996205139,
1, 1.004364708, 1.017159213, 1.021013703, 1.02682649, 0.825278825,
0.836048671, 0.847570474, 0.858769029, 0.86834942, 0.871868036,
0.875331803, 0.890827568, 0.898928134, 0.915485416, 0.921392822,
0.931246968, 0.945182975, 0.963702812, 0.981800571, 1, 1.013277522,
1.026999204, 1.044176589, 1.067069774, 0.490666665, 0.523850087,
0.54906662, 0.570457925, 0.597126217, 0.632406036, 0.689467717,
0.775073059, 0.828560075, 0.827109078, 0.842215091, 0.887572897,
0.923280339, 0.960610381, 0.988936452, 1, 1.022699304, 1.054533263,
1.098615084, 1.134067127, 0.757140805, 0.809228408, 0.851488047,
0.884918505, 0.889385715, 0.916751643, 0.948479832, 0.960072842,
0.956196673, 0.911566837, 0.884542463, 0.89644222, 0.917048164,
0.929279352, 0.929337342, 1, 1.010128912, 1.026719845, 1.029923385,
1.062349178, 0.786853444, 0.804351028, 0.831286834, 0.859995963,
0.886334727, 0.906191485, 0.937863282, 0.969963165, 1.012104032,
1.038112793, 1.036283847, 1.046222, 1.043339336, 1.02279939,
1.002888566, 1, 0.994233243, 0.998082845, 0.997049083, 0.998951287,
0.740171055, 0.770579402, 0.802054487, 0.833603662, 0.865965514,
0.90147914, 0.937354271, 0.969378485, 0.99123068, 0.992657113,
0.994179737, 0.993983379, 0.992844694, 0.99680058, 0.994574042,
1, 1.003228988, 1.016266499, 1.028341184, 1.04261954, 0.801617134,
0.817716283, 0.834621959, 0.850140657, 0.863935678, 0.880664424,
0.899645623, 0.9226463, 0.944486016, 0.945115307, 0.95522518,
0.964280334, 0.975483583, 0.983073825, 0.988745617, 1, 1.005225593,
1.010468623, 1.020086873, 1.032605559)), row.names = c("CountryA-2000",
"CountryA-2001", "CountryA-2002", "CountryA-2003", "CountryA-2004",
"CountryA-2005", "CountryA-2006", "CountryA-2007", "CountryA-2008",
"CountryA-2009", "CountryA-2010", "CountryA-2011", "CountryA-2012",
"CountryA-2013", "CountryA-2014", "CountryA-2015", "CountryA-2016",
"CountryA-2017", "CountryA-2018", "CountryA-2019", "CountryB-2000",
"CountryB-2001", "CountryB-2002", "CountryB-2003", "CountryB-2004",
"CountryB-2005", "CountryB-2006", "CountryB-2007", "CountryB-2008",
"CountryB-2009", "CountryB-2010", "CountryB-2011", "CountryB-2012",
"CountryB-2013", "CountryB-2014", "CountryB-2015", "CountryB-2016",
"CountryB-2017", "CountryB-2018", "CountryB-2019", "CountryC-2000",
"CountryC-2001", "CountryC-2002", "CountryC-2003", "CountryC-2004",
"CountryC-2005", "CountryC-2006", "CountryC-2007", "CountryC-2008",
"CountryC-2009", "CountryC-2010", "CountryC-2011", "CountryC-2012",
"CountryC-2013", "CountryC-2014", "CountryC-2015", "CountryC-2016",
"CountryC-2017", "CountryC-2018", "CountryC-2019", "CountryD-2000",
"CountryD-2001", "CountryD-2002", "CountryD-2003", "CountryD-2004",
"CountryD-2005", "CountryD-2006", "CountryD-2007", "CountryD-2008",
"CountryD-2009", "CountryD-2010", "CountryD-2011", "CountryD-2012",
"CountryD-2013", "CountryD-2014", "CountryD-2015", "CountryD-2016",
"CountryD-2017", "CountryD-2018", "CountryD-2019", "CountryE-2000",
"CountryE-2001", "CountryE-2002", "CountryE-2003", "CountryE-2004",
"CountryE-2005", "CountryE-2006", "CountryE-2007", "CountryE-2008",
"CountryE-2009", "CountryE-2010", "CountryE-2011", "CountryE-2012",
"CountryE-2013", "CountryE-2014", "CountryE-2015", "CountryE-2016",
"CountryE-2017", "CountryE-2018", "CountryE-2019", "CountryF-2000",
"CountryF-2001", "CountryF-2002", "CountryF-2003", "CountryF-2004",
"CountryF-2005", "CountryF-2006", "CountryF-2007", "CountryF-2008",
"CountryF-2009", "CountryF-2010", "CountryF-2011", "CountryF-2012",
"CountryF-2013", "CountryF-2014", "CountryF-2015", "CountryF-2016",
"CountryF-2017", "CountryF-2018", "CountryF-2019", "CountryG-2000",
"CountryG-2001", "CountryG-2002", "CountryG-2003", "CountryG-2004",
"CountryG-2005", "CountryG-2006", "CountryG-2007", "CountryG-2008",
"CountryG-2009", "CountryG-2010", "CountryG-2011", "CountryG-2012",
"CountryG-2013", "CountryG-2014", "CountryG-2015", "CountryG-2016",
"CountryG-2017", "CountryG-2018", "CountryG-2019", "CountryH-2000",
"CountryH-2001", "CountryH-2002", "CountryH-2003", "CountryH-2004",
"CountryH-2005", "CountryH-2006", "CountryH-2007", "CountryH-2008",
"CountryH-2009", "CountryH-2010", "CountryH-2011", "CountryH-2012",
"CountryH-2013", "CountryH-2014", "CountryH-2015", "CountryH-2016",
"CountryH-2017", "CountryH-2018", "CountryH-2019", "CountryI-2000",
"CountryI-2001", "CountryI-2002", "CountryI-2003", "CountryI-2004",
"CountryI-2005", "CountryI-2006", "CountryI-2007", "CountryI-2008",
"CountryI-2009", "CountryI-2010", "CountryI-2011", "CountryI-2012",
"CountryI-2013", "CountryI-2014", "CountryI-2015", "CountryI-2016",
"CountryI-2017", "CountryI-2018", "CountryI-2019", "CountryJ-2000",
"CountryJ-2001", "CountryJ-2002", "CountryJ-2003", "CountryJ-2004",
"CountryJ-2005", "CountryJ-2006", "CountryJ-2007", "CountryJ-2008",
"CountryJ-2009", "CountryJ-2010", "CountryJ-2011", "CountryJ-2012",
"CountryJ-2013", "CountryJ-2014", "CountryJ-2015", "CountryJ-2016",
"CountryJ-2017", "CountryJ-2018", "CountryJ-2019"), class = c("pdata.frame",
"data.frame"), index = structure(list(Country = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L), .Label = c("CountryA",
"CountryB", "CountryC", "CountryD", "CountryE", "CountryF", "CountryG",
"CountryH", "CountryI", "CountryJ"), class = "factor"), Year = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L), .Label = c("2000", "2001",
"2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009",
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017",
"2018", "2019"), class = "factor")), class = c("pindex", "data.frame"
), row.names = c(NA, 200L)))
You can use function Between from package plm to calculate the cross sectional averages and add them to your data:
library(plm)
# PanelS is a pdata.frame (otherwise use pdata.frame(your_data, index))
PanelS$A_bar <- Between(PanelS$A)
PanelS$B_bar <- Between(PanelS$B)
PanelS$C_bar <- Between(PanelS$C)
PanelS$D_bar <- Between(PanelS$D)
mod <- plm(A ~ B + C + D + A_bar + B_bar + C_bar + D_bar, model = "pooling", effect="individual", data = PanelS)
summary(mod)
# Pooling Model
#
# Call:
# plm(formula = A ~ B + C + D + A_bar + B_bar + C_bar + D_bar,
# data = PanelS, effect = "individual", model = "pooling")
#
# Balanced Panel: n = 10, T = 20, N = 200
#
# Residuals:
# Min. 1st Qu. Median 3rd Qu. Max.
# -0.06143690 -0.01311792 0.00070253 0.01186605 0.05107105
#
# Coefficients:
# Estimate Std. Error t-value Pr(>|t|)
# (Intercept) -0.00000000000001042 0.03313743211380626 0.0000 1.000000
# B -0.00076930351859426 0.00020566635571130 -3.7405 0.000242 ***
# C 0.10827039012266901 0.00949296134830719 11.4053 < 0.00000000000000022 ***
# D -0.04222788490989914 0.01136058813979121 -3.7171 0.000264 ***
# A_bar 0.99999999999911215 0.09632471140222754 10.3816 < 0.00000000000000022 ***
# C_bar -0.10827039012256123 0.01033406661607372 -10.4770 < 0.00000000000000022 ***
# D_bar 0.04222788490990802 0.03874710199411169 1.0898 0.277145
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Total Sum of Squares: 0.17549
# Residual Sum of Squares: 0.07128
# R-Squared: 0.59382
# Adj. R-Squared: 0.58119
# F-statistic: 47.0268 on 6 and 193 DF, p-value: < 0.000000000000000222
Note that it seems like you want to estimate a fixed effects model but your estimation has model = "fd" to estimate a first-differenced model. Also note that the cross sectional averages will drop out of the estimation of a fixed effects model.

support subtraction between two different itemsets when having same items

Using arules, I have got two itemsets, and I want to do subtraction between the two different itemsets when having same items.
> inspect(fsets_model_test)
items support count
[1] {SURFSKINTEMP=6,MODIS_LST=1} 0.01235235 663
[2] {TOTCO=13,MODIS_LST=1} 0.01373104 737
[3] {TOTCO=6,MODIS_LST=1} 0.01393598 748
[4] {TOTO3=15,MODIS_LST=1} 0.01265045 679
[5] {TOTH2OVAP=6,MODIS_LST=1} 0.01548236 831
[6] {TOTH2OVAP=1,MODIS_LST=1} 0.01565004 840
> inspect(fsets_nonsesmic_test)
items support count
[1] {TOTCO=6,MODIS_LST=1} 0.02192761 10013
[2] {TOTCO=13,MODIS_LST=1} 0.02261524 10327
[3] {TOTO3=15,MODIS_LST=1} 0.02432556 11108
[4] {SURFAIRTEMP=3,TOTH2OVAP=1,MODIS_LST=1} 0.01772735 8095
[5] {TOTH2OVAP=1,MODIS_LST=1} 0.02873605 13122
[6] {SURFAIRTEMP=3,TOTH2OVAP=1} 0.01856828 8479
you can see that itemsets fsets_model_test and itemsets fsets_nonsesmic_test have same items {TOTO3=15,MODIS_LST=1}
What I want to do is subtract support between two itemsets, in above case is
0.02432556 - 0.01265045 = 0.01167511, and then get a new itemsets.
How to implement this in arules, thanks
following are the example itemsets
one itemsets
fsets_model_test <- new("itemsets"
, items = new("itemMatrix"
, data = new("ngCMatrix"
, i = c(5L, 121L, 74L, 121L, 67L, 121L, 59L, 121L, 33L, 121L, 28L,
121L)
, p = c(0L, 2L, 4L, 6L, 8L, 10L, 12L)
, Dim = c(125L, 6L)
, Dimnames = list(NULL, NULL)
, factors = list()
)
, itemInfo = structure(list(labels = c("SURFSKINTEMP=1", "SURFSKINTEMP=2",
"SURFSKINTEMP=3", "SURFSKINTEMP=4", "SURFSKINTEMP=5", "SURFSKINTEMP=6",
"SURFSKINTEMP=7", "SURFSKINTEMP=8", "SURFSKINTEMP=9", "SURFSKINTEMP=10",
"SURFSKINTEMP=11", "SURFSKINTEMP=12", "SURFSKINTEMP=13", "SURFSKINTEMP=14",
"SURFSKINTEMP=15", "SURFSKINTEMP=16", "SURFAIRTEMP=1", "SURFAIRTEMP=2",
"SURFAIRTEMP=3", "SURFAIRTEMP=4", "SURFAIRTEMP=5", "SURFAIRTEMP=6",
"SURFAIRTEMP=7", "SURFAIRTEMP=8", "SURFAIRTEMP=9", "SURFAIRTEMP=10",
"SURFAIRTEMP=11", "SURFAIRTEMP=12", "TOTH2OVAP=1", "TOTH2OVAP=2",
"TOTH2OVAP=3", "TOTH2OVAP=4", "TOTH2OVAP=5", "TOTH2OVAP=6", "TOTH2OVAP=7",
"TOTH2OVAP=8", "TOTH2OVAP=9", "TOTH2OVAP=10", "TOTH2OVAP=11",
"TOTH2OVAP=12", "TOTH2OVAP=13", "TOTH2OVAP=14", "TOTH2OVAP=15",
"TOTH2OVAP=16", "TOTH2OVAP=17", "TOTO3=1", "TOTO3=2", "TOTO3=3",
"TOTO3=4", "TOTO3=5", "TOTO3=6", "TOTO3=7", "TOTO3=8", "TOTO3=9",
"TOTO3=10", "TOTO3=11", "TOTO3=12", "TOTO3=13", "TOTO3=14", "TOTO3=15",
"TOTO3=16", "TOTO3=17", "TOTCO=1", "TOTCO=2", "TOTCO=3", "TOTCO=4",
"TOTCO=5", "TOTCO=6", "TOTCO=7", "TOTCO=8", "TOTCO=9", "TOTCO=10",
"TOTCO=11", "TOTCO=12", "TOTCO=13", "TOTCO=14", "TOTCO=15", "TOTCH4=1",
"TOTCH4=2", "TOTCH4=3", "TOTCH4=4", "TOTCH4=5", "TOTCH4=6", "TOTCH4=7",
"TOTCH4=8", "TOTCH4=9", "TOTCH4=10", "TOTCH4=11", "TOTCH4=12",
"TOTCH4=13", "TOTCH4=14", "OLR_ARIS=1", "OLR_ARIS=2", "OLR_ARIS=3",
"OLR_ARIS=4", "OLR_ARIS=5", "OLR_ARIS=6", "OLR_ARIS=7", "OLR_ARIS=8",
"OLR_ARIS=9", "OLR_ARIS=10", "CLROLR_ARIS=1", "CLROLR_ARIS=2",
"CLROLR_ARIS=3", "CLROLR_ARIS=4", "CLROLR_ARIS=5", "CLROLR_ARIS=6",
"CLROLR_ARIS=7", "CLROLR_ARIS=8", "CLROLR_ARIS=9", "CLROLR_ARIS=10",
"OLR_NOAA=1", "OLR_NOAA=2", "OLR_NOAA=3", "OLR_NOAA=4", "OLR_NOAA=5",
"OLR_NOAA=6", "OLR_NOAA=7", "OLR_NOAA=8", "OLR_NOAA=9", "OLR_NOAA=10",
"MODIS_LST=1", "MODIS_LST=2", "MODIS_LST=3", "MODIS_LST=4"),
variables = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L,
2L), .Label = c("CLROLR_ARIS", "MODIS_LST", "OLR_ARIS", "OLR_NOAA",
"SURFAIRTEMP", "SURFSKINTEMP", "TOTCH4", "TOTCO", "TOTH2OVAP",
"TOTO3"), class = "factor"), levels = structure(c(1L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L,
1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 1L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 2L, 1L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 2L, 1L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 2L, 1L, 10L, 11L, 12L), .Label = c("1", "10", "11",
"12", "13", "14", "15", "16", "17", "2", "3", "4", "5", "6",
"7", "8", "9"), class = "factor")), .Names = c("labels",
"variables", "levels"), row.names = c(NA, -125L), class = "data.frame")
, itemsetInfo = structure(list(), .Names = character(0), row.names = integer(0), class = "data.frame")
)
, tidLists = NULL
, quality = structure(list(support = c(0.0123523493684093, 0.0137310429630734,
0.0139359839028207, 0.0126504452807691, 0.0154823564481872, 0.0156500353988896
), count = c(663, 737, 748, 679, 831, 840)), .Names = c("support",
"count"), row.names = c(NA, 6L), class = "data.frame")
, info = structure(list(data = model_data_tr, ntransactions = 53674L,
support = 0.01), .Names = c("data", "ntransactions", "support"
))
)
another itemsets is:
fsets_nonsesmic_test <- new("itemsets"
, items = new("itemMatrix"
, data = new("ngCMatrix"
, i = c(67L, 121L, 74L, 121L, 59L, 121L, 18L, 28L, 121L, 28L, 121L,
18L, 28L)
, p = c(0L, 2L, 4L, 6L, 9L, 11L, 13L)
, Dim = c(125L, 6L)
, Dimnames = list(NULL, NULL)
, factors = list()
)
, itemInfo = structure(list(labels = c("SURFSKINTEMP=1", "SURFSKINTEMP=2",
"SURFSKINTEMP=3", "SURFSKINTEMP=4", "SURFSKINTEMP=5", "SURFSKINTEMP=6",
"SURFSKINTEMP=7", "SURFSKINTEMP=8", "SURFSKINTEMP=9", "SURFSKINTEMP=10",
"SURFSKINTEMP=11", "SURFSKINTEMP=12", "SURFSKINTEMP=13", "SURFSKINTEMP=14",
"SURFSKINTEMP=15", "SURFSKINTEMP=16", "SURFAIRTEMP=1", "SURFAIRTEMP=2",
"SURFAIRTEMP=3", "SURFAIRTEMP=4", "SURFAIRTEMP=5", "SURFAIRTEMP=6",
"SURFAIRTEMP=7", "SURFAIRTEMP=8", "SURFAIRTEMP=9", "SURFAIRTEMP=10",
"SURFAIRTEMP=11", "SURFAIRTEMP=12", "TOTH2OVAP=1", "TOTH2OVAP=2",
"TOTH2OVAP=3", "TOTH2OVAP=4", "TOTH2OVAP=5", "TOTH2OVAP=6", "TOTH2OVAP=7",
"TOTH2OVAP=8", "TOTH2OVAP=9", "TOTH2OVAP=10", "TOTH2OVAP=11",
"TOTH2OVAP=12", "TOTH2OVAP=13", "TOTH2OVAP=14", "TOTH2OVAP=15",
"TOTH2OVAP=16", "TOTH2OVAP=17", "TOTO3=1", "TOTO3=2", "TOTO3=3",
"TOTO3=4", "TOTO3=5", "TOTO3=6", "TOTO3=7", "TOTO3=8", "TOTO3=9",
"TOTO3=10", "TOTO3=11", "TOTO3=12", "TOTO3=13", "TOTO3=14", "TOTO3=15",
"TOTO3=16", "TOTO3=17", "TOTCO=1", "TOTCO=2", "TOTCO=3", "TOTCO=4",
"TOTCO=5", "TOTCO=6", "TOTCO=7", "TOTCO=8", "TOTCO=9", "TOTCO=10",
"TOTCO=11", "TOTCO=12", "TOTCO=13", "TOTCO=14", "TOTCO=15", "TOTCH4=1",
"TOTCH4=2", "TOTCH4=3", "TOTCH4=4", "TOTCH4=5", "TOTCH4=6", "TOTCH4=7",
"TOTCH4=8", "TOTCH4=9", "TOTCH4=10", "TOTCH4=11", "TOTCH4=12",
"TOTCH4=13", "TOTCH4=14", "OLR_ARIS=1", "OLR_ARIS=2", "OLR_ARIS=3",
"OLR_ARIS=4", "OLR_ARIS=5", "OLR_ARIS=6", "OLR_ARIS=7", "OLR_ARIS=8",
"OLR_ARIS=9", "OLR_ARIS=10", "CLROLR_ARIS=1", "CLROLR_ARIS=2",
"CLROLR_ARIS=3", "CLROLR_ARIS=4", "CLROLR_ARIS=5", "CLROLR_ARIS=6",
"CLROLR_ARIS=7", "CLROLR_ARIS=8", "CLROLR_ARIS=9", "CLROLR_ARIS=10",
"OLR_NOAA=1", "OLR_NOAA=2", "OLR_NOAA=3", "OLR_NOAA=4", "OLR_NOAA=5",
"OLR_NOAA=6", "OLR_NOAA=7", "OLR_NOAA=8", "OLR_NOAA=9", "OLR_NOAA=10",
"MODIS_LST=1", "MODIS_LST=2", "MODIS_LST=3", "MODIS_LST=4"),
variables = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L,
2L), .Label = c("CLROLR_ARIS", "MODIS_LST", "OLR_ARIS", "OLR_NOAA",
"SURFAIRTEMP", "SURFSKINTEMP", "TOTCH4", "TOTCO", "TOTH2OVAP",
"TOTO3"), class = "factor"), levels = structure(c(1L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L,
1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 1L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 2L, 1L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 2L, 1L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 2L, 1L, 10L, 11L, 12L), .Label = c("1", "10", "11",
"12", "13", "14", "15", "16", "17", "2", "3", "4", "5", "6",
"7", "8", "9"), class = "factor")), .Names = c("labels",
"variables", "levels"), row.names = c(NA, -125L), class = "data.frame")
, itemsetInfo = structure(list(), .Names = character(0), row.names = integer(0), class = "data.frame")
)
, tidLists = NULL
, quality = structure(list(support = c(0.0219276058330541, 0.0226152387334415,
0.024325561329628, 0.0177273513650827, 0.0287360475123675, 0.0185682782241552
), count = c(10013, 10327, 11108, 8095, 13122, 8479)), .Names = c("support",
"count"), row.names = c(NA, 6L), class = "data.frame")
, info = structure(list(data = nonsesmic_data_tr, ntransactions = 456639L,
support = 0.01), .Names = c("data", "ntransactions", "support"
))
)
If the two sets come from transaction data that are compatible (see ? itemCoding) then you can use match to find matching itemsets in the two sets. After that, it should be easy to subtract the support.

How do I melt/gather multiple variables (error bars) into one for mapping to geom_bar?

I'll start with my goal which is to generate graphs for each of my variables (magnitude [mag], duration [dura] and distance [dist] but with distinct error bars for train and test. :
Almost finished graph
I have a data-frame that looks like this: (screenshot + dput below). It shows the responses (magnitude, distance, duration) during train and test of various biological strains along with their standard error (SEM). For example, the duration response at train is in column "train_avg_dura" and at test is "test_avg_dura". The standard error for each of these is in the columns train_duraSEM and test_duraSEM
df_group_sum.wide (data-frame)
dput data:
df_group_sum.wide <-
structure(list(strain = structure(1:8, .Label = c("N2", "acy-1(LOF)",
"acy-1(GOF)", "pde-4", "unc-43", "crh-1", "glr-1", "avr-14"), class = "factor"),
test_avg_dist = c(0.23102447163515, 0.198503787878788, 0.23892936802974,
0.247270588235294, 0.148316666666667, 0.195762711864407,
0.204740740740741, 0.238755154639175), test_avg_dura = c(1.04759733036707,
1.15537878787879, 0.914684014869888, 1.12286274509804, 0.828916666666667,
0.785491525423729, 0.788407407407407, 1.02309278350515),
test_avg_mag = c(0.112163461525871, 0.113447031611172, 0.15930172539742,
0.105397926645665, 0.0370000063024116, 0.0823626968797451,
0.0441620688813484, 0.135786546158742), test_distSEM = c(0.00460504533342531,
0.0050568065734325, 0.00945562739572128, 0.00524044558789062,
0.00882224860763199, 0.00983820301449839, 0.0162322856355826,
0.00738407922404085), test_duraSEM = c(0.0187491841242793,
0.0287113186085301, 0.0283764910080623, 0.0215386973519077,
0.0471018319675206, 0.0341593217329755, 0.0564553992545153,
0.0271939362203803), test_magSEM = c(0.00335619679815181,
0.00443251320170775, 0.00919066553588191, 0.00432150262248429,
0.00400887448034098, 0.00664866437888279, 0.00575860867691942,
0.00524462205156711), train_avg_dist = c(0.337652222222222,
0.294218518518519, 0.338651851851852, 0.311313725490196,
0.254675, 0.2737, 0.390688888888889, 0.314817948717949),
train_avg_dura = c(1.3543, 1.429, 1.19151851851852, 1.37256862745098,
1.236, 1.06376666666667, 1.41396296296296, 1.31512820512821
), train_avg_mag = c(0.1930557426236, 0.19297076970836, 0.212916856705011,
0.127417008935649, 0.0841239843171108, 0.117210954090848,
0.115413610503398, 0.179227387006556)), class = "data.frame", .Names = c("strain",
"test_avg_dist", "test_avg_dura", "test_avg_mag", "test_distSEM",
"test_duraSEM", "test_magSEM", "train_avg_dist", "train_avg_dura",
"train_avg_mag"), row.names = c(NA, -8L))
The problem I am having is how to add error bars using SEM since I need them to be merged into one variable as opposed to two when I map the variable to geom_bar. I think this is a melt issue but I can't figure it out.
Update:
The melted data-frame I used to plot the graph is as follows:
structure(list(strain = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), .Label = c("N2", "acy-1(LOF)",
"acy-1(GOF)", "pde-4", "unc-43", "crh-1", "glr-1", "avr-14"), class = "factor"),
variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L), .Label = c("test_avg_dist",
"test_avg_dura", "test_avg_mag", "test_avg_prob", "test_avg_spd",
"test_distSEM", "test_duraSEM", "test_magSEM", "test_probSEM",
"test_spdSEM", "train_avg_dist", "train_avg_dura", "train_avg_mag",
"train_avg_prob", "train_avg_spd", "train_distSEM", "train_duraSEM",
"train_magSEM", "train_probSEM", "train_spdSEM"), class = "factor"),
value = c(0.23102447163515, 0.198503787878788, 0.23892936802974,
0.247270588235294, 0.148316666666667, 0.195762711864407,
0.204740740740741, 0.238755154639175, 1.04759733036707, 1.15537878787879,
0.914684014869888, 1.12286274509804, 0.828916666666667, 0.785491525423729,
0.788407407407407, 1.02309278350515, 0.112163461525871, 0.113447031611172,
0.15930172539742, 0.105397926645665, 0.0370000063024116,
0.0823626968797451, 0.0441620688813484, 0.135786546158742,
0.457040018571118, 0.563727434411572, 0.624264612406578,
0.392625726149316, 0.219488346025285, 0.355836464305103,
0.158243463050796, 0.549997886634136, 0.218104671667048,
0.175578055416405, 0.256197987699313, 0.218534931269605,
0.181253278716812, 0.235434749265196, 0.236043513165036,
0.229165553562148, 0.00460504533342531, 0.0050568065734325,
0.00945562739572128, 0.00524044558789062, 0.00882224860763199,
0.00983820301449839, 0.0162322856355826, 0.00738407922404085,
0.0187491841242793, 0.0287113186085301, 0.0283764910080623,
0.0215386973519077, 0.0471018319675206, 0.0341593217329755,
0.0564553992545153, 0.0271939362203803, 0.00335619679815181,
0.00443251320170775, 0.00919066553588191, 0.00432150262248429,
0.00400887448034098, 0.00664866437888279, 0.00575860867691942,
0.00524462205156711, 0.00460504533342531, 0.0050568065734325,
0.00945562739572128, 0.00524044558789062, 0.00882224860763199,
0.00983820301449839, 0.0162322856355826, 0.00738407922404085,
0.00148090077905166, 0.00224725406956702, 0.00293788372166611,
0.00142518092482957, 0.00475313026432338, 0.00259537819051875,
0.00439432015310276, 0.00179190641262238, 0.337652222222222,
0.294218518518519, 0.338651851851852, 0.311313725490196,
0.254675, 0.2737, 0.390688888888889, 0.314817948717949, 1.3543,
1.429, 1.19151851851852, 1.37256862745098, 1.236, 1.06376666666667,
1.41396296296296, 1.31512820512821, 0.1930557426236, 0.19297076970836,
0.212916856705011, 0.127417008935649, 0.0841239843171108,
0.117210954090848, 0.115413610503398, 0.179227387006556,
0.525206741295172, 0.606796097537911, 0.592920766963248,
0.383218177729097, 0.294853306191478, 0.37983654970313, 0.244065736387288,
0.529995494304863, 0.245519078777542, 0.204069564920836,
0.279438682643543, 0.223741850875084, 0.203505986396722,
0.244494243449087, 0.263225928969608, 0.235094347033923,
0.00509151719343593, 0.00741331297357774, 0.0110354960774679,
0.0058641318136066, 0.0114389388703232, 0.0108143010933781,
0.0182904578688527, 0.00913426247712326, 0.0167858570502119,
0.0279705569908445, 0.030133138276768, 0.0219057666071679,
0.0479637760140276, 0.0332974908188985, 0.0605392786801207,
0.0323033076008837, 0.00498395111761598, 0.0081988397756359,
0.0107052683837969, 0.00442352355941589, 0.00723029142814287,
0.00764631328347674, 0.00980735575566329, 0.00789476278044047,
0.00509151719343593, 0.00741331297357774, 0.0110354960774679,
0.0058641318136066, 0.0114389388703232, 0.0108143010933781,
0.0182904578688527, 0.00913426247712326, 0.00139403793044242,
0.00220415921330836, 0.00299625483623813, 0.00144528089431754,
0.00441088530148196, 0.00248394605240026, 0.00319027562414684,
0.00174638373495128)), row.names = c(NA, -160L), .Names = c("strain",
"variable", "value"), class = "data.frame")
The code I used to plot this (after removing SEM rows) is as follows:
(abs_bar_mag <-
df_group_sum.long %>%
filter(grepl("mag", variable)) %>%
ggplot(aes(x = strain,
y = value,
fill = variable))+
scale_fill_manual(values=c("lightseagreen", "indianred1"))+
geom_bar(stat="identity", position = "dodge") +
#geom_errorbar(aes(ymin=value-1, ymax=value+1), width=.1, position = position_dodge(width=0.9)) +
theme(panel.background = element_blank()) +
theme(text = element_text(size = 20),
axis.line = element_line(colour = "black")) +
ggtitle("") +
theme(plot.title = element_text(size = 30, hjust = 0.5, face = "bold"),
axis.text = element_text(size = 70),
strip.text = element_text(size = 40),
axis.text.x = element_text(angle = 65, hjust = 1, size = 40),
axis.title.y = (element_text(size = 65)))
+
labs(colour = "",
y = "Magnitude",
x = "") +
scale_colour_manual(values = rev())
)
I appreciate any pointers or solutions you may have!
Thanks,
Aram
The issue here is that the avg columns and the SEM (standard error) columns need to stay together. This requires to reshape two value columns simultaneously. See section 3.a of Efficient reshaping using data.tables for more details.
Therefore, we start with the data in wide format (df_group_sum.wide). To be in line with the code provided by the OP, only magnitudes are plotted.
library(data.table)
library(ggplot2)
molten <- melt(
data.table(df_group_sum.wide), id.vars = "strain",
measure.vars = patterns("avg_mag$", "magSEM$"),
value.name = c("avg", "SEM"))[
, variable := forcats::lvls_revalue(variable, c("test_mag", "train_mag"))][]
molten
strain variable avg SEM
1: N2 test_mag 0.11216346 0.003356197
2: acy-1(LOF) test_mag 0.11344703 0.004432513
3: acy-1(GOF) test_mag 0.15930173 0.009190666
4: pde-4 test_mag 0.10539793 0.004321503
5: unc-43 test_mag 0.03700001 0.004008874
6: crh-1 test_mag 0.08236270 0.006648664
7: glr-1 test_mag 0.04416207 0.005758609
8: avr-14 test_mag 0.13578655 0.005244622
9: N2 train_mag 0.19305574 NA
10: acy-1(LOF) train_mag 0.19297077 NA
11: acy-1(GOF) train_mag 0.21291686 NA
12: pde-4 train_mag 0.12741701 NA
13: unc-43 train_mag 0.08412398 NA
14: crh-1 train_mag 0.11721095 NA
15: glr-1 train_mag 0.11541361 NA
16: avr-14 train_mag 0.17922739 NA
ggplot(molten,
aes(strain, avg, ymin = avg - SEM, ymax = avg + SEM, fill = variable)) +
geom_col(position = "dodge") +
geom_errorbar(width=.1, position = position_dodge(width=0.9)) +
scale_fill_manual(values=c("lightseagreen", "indianred1")) +
theme_bw() +
labs(fill = "", y = "Magnitude", x = "")
The OP also has provided a data.frame in long format df_group_sum.long which does contain more data than df_group_sum.wide. These should be plotted as well, now.
By looking at the variable names
unique(df_group_sum.long$variable)
[1] test_avg_dist test_avg_dura test_avg_mag test_avg_prob test_avg_spd
[6] test_distSEM test_duraSEM test_magSEM test_probSEM test_spdSEM
[11] train_avg_dist train_avg_dura train_avg_mag train_avg_prob train_avg_spd
[16] train_distSEM train_duraSEM train_magSEM train_probSEM train_spdSEM
20 Levels: test_avg_dist test_avg_dura test_avg_mag test_avg_prob ... train_spdSEM
the data.frame seems to contain aggregated data (avg and SEM) of five different variables (dist, dura, mag, prob, spd) of two data sets (train and test). Again, avg and SEM need to stay together on one row for plotting bar charts with error bars.
Unfortunately, the naming scheme is inconsistent. It would have been better if the variables containing standard errors would have been named similar to train_avg_mag, e.g., train_SEM_mag instead of train_magSEM.
So, the first step is to split up the variable names to get the different groups separately:
library(data.table)
DT <- data.table(df_group_sum.long)
DT[, c("dataset", "measure", "variable") :=
DT[, tstrsplit(variable, "_|SEM$")][is.na(V3), `:=`(V3 = V2, V2 = "SEM")]]
DT
strain variable value dataset measure
1: N2 dist 0.231024472 test avg
2: acy-1(LOF) dist 0.198503788 test avg
3: acy-1(GOF) dist 0.238929368 test avg
4: pde-4 dist 0.247270588 test avg
5: unc-43 dist 0.148316667 test avg
---
156: pde-4 spd 0.001445281 train SEM
157: unc-43 spd 0.004410885 train SEM
158: crh-1 spd 0.002483946 train SEM
159: glr-1 spd 0.003190276 train SEM
160: avr-14 spd 0.001746384 train SEM
unique(DT[, variable])
"dist" "dura" "mag" "prob" "spd"
unique(DT[, dataset])
"test" "train"
unique(DT[, measure])
"avg" "SEM"
Now, the abbreviated variable names are replaced by their full names using an update on join:
abbr2full <- data.table(
variable = c("dist", "dura", "mag"),
full = c("Distance", "Duration", "Magnitude")
)
DT[abbr2full, on = "variable", variable := full][]
Finally, a facetted plot of all five variables is created. dcast() is used to reshape the data from long to a wide format where each row has two measures avg and SEM.
library(ggplot2)
ggplot(dcast(DT, ... ~ measure),
aes(strain, avg, ymin = avg - SEM, ymax = avg + SEM, fill = dataset)) +
geom_col(position = "dodge") +
geom_errorbar(width=.1, position = position_dodge(width=0.9)) +
scale_fill_manual(values=c("lightseagreen", "indianred1")) +
theme_bw() +
labs(fill = "", y = "Average", x = "") +
facet_wrap(~ variable, scales = "free_y") +
theme(axis.text.x = element_text(angle = 65, hjust = 1))

"minimum count is not zero" error for zero inflated model

Here is the data of my regression :
y is the number of passengers at platform of the train station in each 2 minutes period while A1 to A17 are the number of passengers at 17 study areas on concourse. Time lag has already between considered by shifting the Xs.
Since sometimes, there will be no one waiting in the study areas on concourse, so excess zero occurs. I am planing to use zero inflated model. I have tried the code as shown between, but it said "minimum count is not zero" What does that mean and how can i solve it? I have done poisson and it's alright but zero inflated doesn't work.
> setwd('C:/Users/zuzymelody/Desktop')
> try<-read.csv('0inflated_2mins27peak.csv',header=TRUE)
> attach(try)
> names(try)
[1] "y" "A1" "A2" "A3" "A4" "A5" "A6" "A7" "A8" "A9" "A10" "A11"
[13] "A12" "A13" "A14" "A15" "A16" "A17"
> model1<-glm(y~A1+A2+A3+A4+A5+A6+A7+A8+A9+A10+A11+A12+A13+A14+A15+A16+A17,family="poisson")
> summary(model1)
Call:
glm(formula = y ~ A1 + A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 +
A10 + A11 + A12 + A13 + A14 + A15 + A16 + A17, family = "poisson")
Deviance Residuals:
Min 1Q Median 3Q Max
-7.8598 -3.4571 -0.3663 2.1867 12.5183
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 6.102009 0.164497 37.095 < 2e-16 ***
A1 -0.017555 0.003665 -4.790 1.66e-06 ***
A2 -0.026101 0.017569 -1.486 0.137371
A3 -0.179988 0.014976 -12.018 < 2e-16 ***
A4 -0.032584 0.007735 -4.213 2.52e-05 ***
A5 -0.019908 0.007014 -2.839 0.004532 **
A6 -0.044144 0.010266 -4.300 1.71e-05 ***
A7 0.049829 0.006518 7.645 2.09e-14 ***
A8 -0.080712 0.009819 -8.220 < 2e-16 ***
A9 0.007390 0.007105 1.040 0.298273
A10 0.041116 0.004085 10.065 < 2e-16 ***
A11 -0.041420 0.008418 -4.921 8.62e-07 ***
A12 -0.008241 0.007304 -1.128 0.259171
A13 -0.033161 0.008966 -3.699 0.000217 ***
A14 0.020818 0.005250 3.965 7.34e-05 ***
A15 -0.002995 0.006125 -0.489 0.624887
A16 -0.061997 0.017122 -3.621 0.000294 ***
A17 -0.025025 0.008391 -2.982 0.002860 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 1137.71 on 29 degrees of freedom
Residual deviance: 599.74 on 12 degrees of freedom
AIC: 840.1
Number of Fisher Scoring iterations: 5
>with(model1, cbind(res.deviance = deviance, df = df.residual,
p = pchisq(deviance, df.residual, lower.tail=FALSE)))
res.deviance df p
[1,] 599.7445 12 1.202013e-120
> require( pscl )
> Zip<-zeroinfl(model1,link="logit",dist="poisson")
**Error in zeroinfl(model1, link = "logit", dist = "poisson") :
invalid dependent variable, minimum count is not zero**
dput(try)
structure(list(y = c(156L, 74L, 221L, 207L, 168L, 36L, 128L,
208L, 99L, 117L, 228L, 211L, 341L, 173L, 196L, 310L, 112L, 203L,
104L, 183L, 325L, 143L, 218L, 166L, 218L, 127L, 136L, 38L, 102L,
34L), A1 = c(24L, 24L, 24L, 19L, 20L, 9L, 14L, 23L, 15L, 23L,
14L, 16L, 15L, 25L, 25L, 19L, 24L, 26L, 25L, 26L, 22L, 14L, 13L,
15L, 9L, 12L, 9L, 12L, 15L, 18L), A2 = c(2L, 4L, 0L, 3L, 0L,
1L, 1L, 2L, 1L, 2L, 0L, 2L, 2L, 0L, 1L, 1L, 3L, 3L, 2L, 2L, 3L,
2L, 3L, 5L, 4L, 3L, 4L, 1L, 2L, 1L), A3 = c(2L, 2L, 0L, 1L, 1L,
9L, 3L, 0L, 0L, 0L, 1L, 1L, 3L, 1L, 0L, 0L, 1L, 2L, 3L, 1L, 0L,
1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 2L), A4 = c(15L, 11L, 6L, 7L,
10L, 10L, 5L, 4L, 5L, 7L, 9L, 9L, 4L, 6L, 6L, 13L, 9L, 13L, 9L,
10L, 6L, 6L, 7L, 6L, 10L, 9L, 10L, 7L, 9L, 2L), A5 = c(13L, 10L,
6L, 6L, 11L, 19L, 13L, 14L, 7L, 7L, 6L, 8L, 10L, 5L, 7L, 9L,
9L, 11L, 3L, 13L, 8L, 8L, 8L, 6L, 8L, 9L, 9L, 14L, 9L, 6L), A6 = c(9L,
10L, 9L, 9L, 4L, 7L, 7L, 12L, 11L, 11L, 12L, 8L, 6L, 7L, 8L,
5L, 9L, 6L, 5L, 6L, 9L, 11L, 6L, 6L, 8L, 9L, 4L, 11L, 10L, 7L
), A7 = c(21L, 16L, 13L, 13L, 4L, 9L, 12L, 13L, 12L, 12L, 12L,
6L, 7L, 6L, 6L, 4L, 5L, 9L, 8L, 7L, 9L, 12L, 10L, 7L, 8L, 12L,
14L, 2L, 6L, 6L), A8 = c(1L, 5L, 10L, 10L, 1L, 9L, 6L, 6L, 7L,
7L, 5L, 6L, 3L, 2L, 4L, 0L, 4L, 2L, 5L, 5L, 5L, 3L, 2L, 4L, 3L,
8L, 10L, 8L, 2L, 5L), A9 = c(8L, 9L, 10L, 10L, 12L, 19L, 10L,
6L, 6L, 6L, 0L, 6L, 8L, 10L, 2L, 3L, 6L, 2L, 2L, 6L, 5L, 2L,
4L, 1L, 3L, 7L, 7L, 4L, 4L, 2L), A10 = c(7L, 10L, 12L, 20L, 24L,
21L, 24L, 18L, 20L, 18L, 26L, 21L, 12L, 11L, 18L, 18L, 19L, 16L,
25L, 21L, 22L, 14L, 12L, 17L, 21L, 14L, 14L, 10L, 8L, 7L), A11 = c(0L,
2L, 1L, 4L, 2L, 1L, 1L, 1L, 13L, 10L, 12L, 5L, 2L, 0L, 5L, 1L,
4L, 4L, 3L, 3L, 1L, 1L, 3L, 3L, 5L, 5L, 2L, 10L, 3L, 4L), A12 = c(12L,
14L, 14L, 17L, 10L, 14L, 13L, 19L, 7L, 5L, 6L, 6L, 8L, 7L, 13L,
11L, 10L, 8L, 6L, 6L, 9L, 14L, 9L, 10L, 8L, 9L, 8L, 9L, 5L, 7L
), A13 = c(6L, 2L, 1L, 5L, 9L, 6L, 7L, 4L, 12L, 5L, 9L, 10L,
3L, 7L, 4L, 2L, 2L, 6L, 4L, 6L, 7L, 4L, 9L, 6L, 11L, 4L, 5L,
4L, 6L, 6L), A14 = c(14L, 13L, 16L, 11L, 8L, 6L, 9L, 13L, 14L,
14L, 9L, 8L, 12L, 11L, 13L, 11L, 18L, 15L, 20L, 21L, 17L, 18L,
18L, 18L, 25L, 20L, 12L, 9L, 8L, 8L), A15 = c(7L, 6L, 7L, 5L,
4L, 9L, 12L, 12L, 11L, 12L, 9L, 8L, 7L, 8L, 10L, 16L, 8L, 8L,
13L, 10L, 5L, 5L, 8L, 10L, 10L, 4L, 6L, 6L, 6L, 7L), A16 = c(2L,
1L, 3L, 3L, 1L, 2L, 3L, 2L, 3L, 2L, 2L, 1L, 2L, 2L, 3L, 3L, 2L,
1L, 3L, 4L, 2L, 5L, 4L, 8L, 5L, 2L, 1L, 2L, 2L, 2L), A17 = c(10L,
13L, 13L, 2L, 5L, 1L, 3L, 3L, 5L, 4L, 4L, 6L, 4L, 6L, 3L, 2L,
2L, 2L, 7L, 8L, 3L, 7L, 5L, 6L, 7L, 6L, 6L, 3L, 4L, 3L)), .Names = c("y",
"A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10",
"A11", "A12", "A13", "A14", "A15", "A16", "A17"), class = "data.frame", row.names = c(NA,
-30L))
above is the reproducible example. Sorry its my first time to post here, dont know the rule well
Your data frame does not contain a zero value in your dependent variable $y$:
min(mydata$y)
[1] 34
You'll need to have at least one $y = 0$.

R - Manual legend color in geom_line ggplot2

I am trying to manually change the color of only the first item of a legend in a ggplot2 line plot.
I have several observations of a variable that I am displaying in a line plot, just like this:
ggplot(tmp1, aes(x=factor(month), y=value, group=variable, colour=variable ) ) +
geom_line(size=1) + geom_point(size=2.5) + theme_grey(base_size = 18) +
xlab(NULL) + ylab('%') + theme(legend.title = element_blank()) + theme(axis.text.x=element_blank()) +
ggtitle("a) Cloud fraction") + theme(plot.title = element_text(hjust = 0))
However, the first variable (CRU) is my reference and I would like to show its line in black. I managed to do this by adding one extra geom_line with the condition variable=='CRU':
ggplot(tmp1, aes(x=factor(month), y=value, group=variable, colour=variable ) ) +
geom_line(size=1) + geom_point(size=2.5) + theme_grey(base_size = 18) +
geom_line(data=subset(tmp1, variable == "CRU"), colour="black", linetype="solid", size=1) +
geom_point(data=subset(tmp1, variable == "CRU"), colour="black", size=2.5) +
xlab(NULL) + ylab('%') + theme(legend.title = element_blank()) + theme(axis.text.x=element_blank()) +
ggtitle("a) Cloud fraction") + theme(plot.title = element_text(hjust = 0))
which works for the line, but the legend keeps the old colour.
How can I change the color of just the first element of the legend, in order to match the new black line?
This is an example of my data:
library(ggplot2)
tmp1 <- structure(list(month = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L), .Label = c("Jan", "Feb", "Mar",
"Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
), class = "factor"), variable = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L
), .Label = c("CRU", "CanESM2", "GFDL-ESM2M", "GISS-E2-H", "GISS-E2-R-CC",
"GISS-E2-R", "HadGEM2-AO", "HadGEM2-CC", "IPSL-CM5A-MR", "IPSL-CM5B-LR",
"MIROC4h", "MRI-CGCM3", "NorESM1-M", "bcc-csm1-1-m", "bcc-csm1-1",
"inmcm4"), class = "factor"), value = c(68.9226631460789, 68.2418796877392,
68.3045372212868, 66.5727907036073, 64.278360290491, 60.6452267972856,
56.4079999829923, 57.4384828307567, 60.874295882443, 63.70427487797,
65.9934520468731, 68.9723871966257, 69.0959015590216, 68.6126351492122,
65.9106136896166, 65.790169283913, 64.6320994816801, 63.894111784301,
62.0459530253135, 60.0455773681386, 59.4195693791228, 59.8531302282566,
62.8877658601921, 66.4625078340445, 63.4659654507164, 64.5466810117518,
63.6412932878715, 61.5786848043378, 60.6491980933614, 63.5160886052168,
62.739218138279, 60.8826348052995, 60.1196738813257, 59.0451443027396,
58.9044684656519, 61.5033887899156, 62.442928703121, 61.9933297554931,
61.686560285787, 62.1675956585161, 63.0625380934021, 63.3192922622326,
62.6727899590586, 60.9706714311941, 59.4656895840826, 59.8689092461429,
60.7585523645951, 62.2374164636759, 62.2586495696979, 62.3005886556949,
62.0719314334763, 61.7786313583016, 62.1037020616999, 62.5919637033876,
60.7746642298107, 58.7307471416832, 57.6602849809809, 57.3379551651851,
59.8210398283061, 61.5997238276034, 62.1190176575675, 62.2214930174241,
61.607539296931, 61.836536870373, 61.8298589429815, 62.0478835210295,
60.8165122782774, 59.224498365607, 57.5387307267022, 56.8641846144649,
59.6779581588162, 61.5822371331742, 56.9625864272884, 55.0519081266715,
53.9161532646461, 52.0847886852487, 54.1855963059705, 54.1565901942167,
53.8164314129289, 53.3013959169719, 52.1283494730607, 49.9814907883562,
51.0053330490513, 54.1758812796363, 54.1947459143536, 53.2985061657513,
51.5351727215781, 51.2131541342776, 53.040182168441, 53.4657505459587,
52.8257974728027, 52.8523832284788, 51.2527233914323, 48.0999294191007,
48.3915726340961, 50.9305288780026, 65.3647375158419, 64.6894843930494,
62.2700707798592, 60.2848148985731, 59.0797813854392, 58.6641353922813,
60.36671822738, 61.0883458866571, 60.3963355506111, 60.989444946264,
62.1570976843054, 64.0549504714623, 63.043822206253, 61.5388900651697,
61.0125502971802, 60.4999006674972, 60.9554692113674, 61.2665703834057,
61.1470225339614, 61.4827838311531, 60.0397138517742, 61.6503963603034,
62.7421837830534, 63.9911949044232, 55.7117557057576, 55.0687784028633,
51.7447044604762, 50.5160095376821, 51.7744811245234, 52.6710116909617,
52.9126480516047, 51.6347065362984, 50.6773480024225, 48.8928054774924,
50.3505731163001, 53.7488684714513, 61.558109087334, 61.6673093977654,
61.008465555097, 58.5478578294864, 57.4119260976748, 57.9275733769477,
56.9129774651439, 55.6494927089111, 52.0222406797903, 51.9215916366208,
53.4679949695072, 58.2128251869788, 64.7955701998493, 62.8319013929061,
60.8391061131818, 56.1759467734789, 55.4331550199683, 55.8437923896573,
54.998540828777, 54.7840203124691, 54.3853750266133, 52.7590435522892,
56.1409799671355, 62.0047140533332, 57.5185465474672, 57.2532289998115,
55.9911913829976, 54.6479285609432, 53.1659722964534, 53.3609799276622,
51.321452599498, 49.6933914680193, 48.6718229103421, 49.5393207890844,
52.8096091918065, 56.1667672797739, 60.7380412023987, 60.1791897430251,
58.7798069796932, 58.061108119255, 59.7770862278418, 60.2070273632675,
59.074898814382, 55.5571990297011, 53.8564792650491, 54.0753885029223,
56.2369958393563, 58.9062125901571, 70.7538119957697, 69.4271857400385,
67.3954189057409, 66.9262104442679, 67.1558044757422, 65.8848885390536,
65.3092556552615, 64.3799468889004, 64.9999333535186, 65.6493831700943,
69.2646980549075, 70.6342115226731)), row.names = c(NA, -192L
), .Names = c("month", "variable", "value"), class = "data.frame")
Instead of splitting up the data and plotting two geom_lines, you can simply supply a custom colour palette in which CRU is mapped to black.
If you want to keep the default colours for the other variables, you first need to define a little helper function to retrieve them the way ggplot2 does it.
gg_color_hue <- function(n) {
hues = seq(15, 375, length=n+1)
hcl(h=hues, l=65, c=100)[1:n]
}
Then create a custom colour palette vector, combining the standard palette and black. Since CRU is the first level of your factor variable (with 16 levels in total), this is simply
custom_palette <- c("#000000", gg_color_hue(15))
The following then produces your desired plot:
ggplot(tmp1, aes(x=factor(month), y=value, group=variable, colour=variable)) +
geom_line(size=1) +
geom_point(size=2.5) +
scale_colour_manual(values=custom_palette) +
theme_grey(base_size = 18) +
xlab(NULL) + ylab('%') +
theme(legend.title = element_blank()) +
theme(axis.text.x=element_blank()) +
ggtitle("a) Cloud fraction") +
theme(plot.title = element_text(hjust = 0))

Resources