Use ifelse() to Identify inconsistent responses in data - r

I have a long format dataframe of responses to a repeated question about puberty status vb_ asked approximately yearly at ages 9, 10, 11, 13, 14, 15, 16, and 17.
Each year participants were asked to rate their development from 1 to 5, with 1 being least developed and 5 being most developed.
I would like to use R's ifelse() to identify inconsistent responses i.e. those that report a stage at one year that is lower than any of the previous years.
Here is some fake example data for 20 people:
vb <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L,
19L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L), age = c(9L, 10L,
11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L,
17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L,
14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L,
10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L,
16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L,
13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L,
9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L,
15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L,
11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L,
17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L,
14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L,
10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L,
16L, 17L), vb_ = c(1L, 1L, 1L, 3L, 4L, 4L, 4L, 5L, 2L, 2L, 3L,
4L, 5L, 5L, 5L, 5L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 2L, 1L, 3L,
4L, 4L, 5L, 5L, 5L, 2L, 2L, 1L, 3L, 4L, 3L, 4L, 4L, 1L, 1L, 1L,
3L, 4L, 4L, 5L, 5L, 1L, 1L, 2L, 4L, 4L, 4L, 5L, 5L, 2L, 2L, 2L,
4L, 5L, 4L, 4L, 5L, 2L, 2L, 1L, 4L, 5L, 5L, 5L, 5L, 1L, 2L, 3L,
4L, 5L, 5L, 4L, 5L, 1L, 1L, 1L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
1L, 4L, 4L, 4L, 4L, 1L, 1L, 3L, 4L, 4L, 4L, 5L, 5L, 1L, 1L, 1L,
4L, 4L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 4L, 4L, 5L, 5L, 2L, 3L, 3L,
4L, 4L, 5L, 5L, 5L, 1L, 1L, 2L, 2L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
3L, 3L, 4L, 5L, 5L, 1L, 1L, 1L, 2L, 4L, 4L, 4L, 4L, 1L, 1L, 1L,
2L, 4L, 4L, 4L, 4L)), class = "data.frame", row.names = c(NA,
-160L), .Names = c("id", "age", "vb_"))

If you insist on a ifelse approach you can do:
vb <- vb[order(vb$id, vb$age), ]
vb$inconsistent <- ifelse(vb$id == lag(vb$id),
ifelse(vb$vb_ < lag(vb$vb_), "inconsistent", ""),
"")
vb$inconsistent[1] <- ""
id age vb_ inconsistent
1 1 9 1
2 1 10 1
3 1 11 1
4 1 13 3
5 1 14 4
6 1 15 4
7 1 16 4
8 1 17 5
9 2 9 2
10 2 10 2
11 2 11 3
12 2 13 4
13 2 14 5
14 2 15 5
15 2 16 5
16 2 17 5
17 3 9 2
18 3 10 3
19 3 11 3
20 3 13 3
21 3 14 4
22 3 15 4
23 3 16 4
24 3 17 5
25 4 9 2
26 4 10 1 inconsistent
27 4 11 3
...
Or one approach with dplyr is:
library(dplyr)
vb %>%
group_by(id) %>%
arrange(id, age) %>%
mutate(vb_diff = vb_ - lag(vb_)) %>%
filter(vb_diff < 0)
# A tibble: 6 x 4
# Groups: id [5]
id age vb_ vb_diff
<int> <int> <int> <int>
1 4 10 1 -1
2 5 11 1 -1
3 5 15 3 -1
4 8 15 4 -1
5 9 11 1 -1
6 10 16 4 -1

Here you go.
vb <- vb[order(vb$id, vb$age),]
vb$decreasingdevelopment <- c(0, diff(vb$vb_))<0 #difference between this score and previous <0
vb$sameperson <- c(0, diff(vb$id))==0 #is this the same participant than previous
vb$inconsistency <- vb$decreasingdevelopment&vb$sameperson #ifelse(vb$devdiff&vb$sameperson, T, F)
which(vb$inconsistency)
#[1] 26 35 38 62 67 79
Note that the use of ifelse() is possible but not necessary.
PS: for completeness of the answer, you should always use the following :
vb$inconsistency_robust <- apply(vb, 1, function(x) length(which(vb$vb_>x["vb_"]&vb$age<x["age"]&vb$id==x["id"]))>0)
#x["decreasingdevelopment"]&x["sameperson"])
all.equal(which(vb$inconsistency_robust), which(vb$inconsistency))
#> which(vb$inconsistency_robust)
#[1] 26 35 38 62 63 67 79
#> which(vb$inconsistency)
#[1] 26 35 38 62 67 79
Note how the robust method spots all the occurences of inconsistencies while my more naive ifelse() method here only compares line to line.

Related

Finding the 3 coldest consecutive months

I am trying to write a code that finds the 3 consecutives months that are the coldest.
For now I have written a code for the 3 first months (1,2,3) but then it should be applied to (4,5,6), (7,8,9), (10,11,12), (2,3,4), (5,6,7), (8,9,10), (11,12,1), (3,4,5), (6,7,8), (9,10,11) and (12,1,2) which are all the possible combinations of 3 consecutives months.
The code I wrote is here :
cold <- data_example %>%
group_by(Site) %>%
filter(Month %in% c(1,2,3)) %>%
mutate(mean_temperature = mean(t_q)) %>%
dplyr::select(-c(t_q,Month)) %>%
distinct(Site, mean_temperature)
average_temp_month_1_2_3 <- cold$mean_temperature
Then I replaced the c(1,2,3) by all possiblities, I have created a new column for each output.
I end up with a dataset with row corresponding to Site and columns are all the possibilities of 3 consecutive months.
After I took the min value for each row using the function apply() and min() and it gives me the coldest quarter for each Site.
I am looking for a way to generalize it, like creating a loop on the possiblities.
The structure of data_example is as follow :
structure(list(Site = c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 25L, 25L, 25L, 25L, 25L, 25L, 25L,
25L, 25L, 25L, 25L, 25L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L,
26L, 26L, 26L, 26L), Month = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L), t_q = c(9.67754848470332, -6.74555496540183,
5.67969761460384, 12.537207581471, -9.4899105618945, 21.0747672424502,
15.2643039243614, -3.62839910494421, 11.3919028351455, 1.69988257436554,
4.22015024307287, 11.7045830784212, 8.91437673833493, 0.579081429509138,
-10.8207481229903, 7.05356868592628, 13.0911580912516, 17.2032089167605,
-2.47642708849114, -11.2105599344486, 33.986736305027, 17.8578689773214,
-14.9114468266335, 14.4681380389141, 0.568074240873411, 7.65458408777801,
1.91368344556659, 6.01571556896127, 11.4858297513536, 2.2608458985328,
-2.08200762781776, 12.1540989284163, 20.9941815285413, 0.375777604316208,
-2.7137027317614, -6.17690210400591, 11.2549857164403, 17.447156776654,
-6.96565197389579, -5.41542361226991, 11.1680111873065, 16.2266522778922,
-11.4503938582433, 5.93300314835716, -18.2818398656237, 16.2930210946949,
9.80219192652316, -0.48237356523527, 7.72680942503686, 5.84113084181759,
9.66129413490096, -4.53018262186904, 7.42187509892118, 9.2559478576895,
8.25120948667013, 8.18182063263247, 16.3703081943971, 19.5469951420341,
3.71888263185773, -0.150179891749435, 1.32057298670562, -5.63556532224354,
21.3918542474341, 4.58752188336035, 5.49430262894033, 5.99587512047837,
-3.76459024109216, -8.53522098071824, 8.01805680562232, 26.2227490426066,
8.90822434139878, 5.04259034084471, 6.89740304247746, 11.9484584922927,
-11.5085102739471, 30.4526759119379, 21.878533782357, -5.39936677076962,
-9.83965056853816, 19.3083455159472, 7.90653548036154, 3.11876660277767,
-8.85027083180008, -9.9225496831988, 5.97307112581907, -2.83528336599284,
-2.75758002814396, 4.68388181004449, 6.61649031537118, -6.65988084338133,
-0.981075313384259, 5.84898952305179, -5.20962191660178, 0.416662319713158,
-10.5336993269853, 19.5350642296553, 26.9696625385792, 15.3291059661081,
15.0799591208354, 13.2310653499033, 7.2053382722482, -7.87288386491102,
20.8083797469715, 6.16664220270041, 8.3360949793043, -14.4000921795463,
-10.5503025782944, 14.3185205291177, 5.83802399796341, 2.49660818997943,
15.7399297014092, -0.834086173817971, 12.4883230222372, 6.73548467376379,
7.7988835803825, -5.13583355913738, 7.51054162811707, 11.6610602814336,
-11.8864185954223, 4.2704440943851)), row.names = c(NA, -120L
), groups = structure(list(Site = c(4L, 5L, 13L, 14L, 15L, 16L,
17L, 18L, 25L, 26L), .rows = structure(list(1:12, 13:24, 25:36,
37:48, 49:60, 61:72, 73:84, 85:96, 97:108, 109:120), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
You can use raster::movingFun to do a moving average with circular data, then use slice_min to get the minimum value per group.
library(dplyr)
circ <- function(x, by) ifelse(x%%by == 0, by, x%%by)
df %>%
group_by(Site) %>%
mutate(rolmean = raster::movingFun(t_q, n = 3, fun = mean, circular = TRUE)) %>%
slice_min(rolmean) %>%
mutate(coldest = toString(circ(c(Month-1, Month, Month+1), 12)))
output
# A tibble: 10 × 5
# Groups: Site [10]
Site Month t_q rolmean coldest
<int> <int> <dbl> <dbl> <chr>
1 4 2 -6.75 2.87 1, 2, 3
2 5 3 -10.8 -1.06 2, 3, 4
3 13 11 -2.71 -2.84 10, 11, 12
4 14 8 5.93 -7.93 7, 8, 9
5 15 3 9.66 3.66 2, 3, 4
6 16 7 -3.76 -2.10 6, 7, 8
7 17 11 -8.85 -5.22 10, 11, 12
8 18 10 0.417 -5.11 9, 10, 11
9 25 10 -14.4 -5.54 9, 10, 11
10 26 12 4.27 -0.593 11, 12, 1
Using which.min in aggregate on a moving average window.
aggregate(t_q ~ Site, dat, \(s) {
win <- 3 ## window length
sq <- Map(seq, 1:(length(s) - win + 1), win:length(s))
toString(sq[[which.min(sapply(sq, \(sq) mean(s[sq])))]])
})
# Site t_q
# 1 4 1, 2, 3
# 2 5 2, 3, 4
# 3 13 10, 11, 12
# 4 14 7, 8, 9
# 5 15 2, 3, 4
# 6 16 6, 7, 8
# 7 17 10, 11, 12
# 8 18 9, 10, 11
# 9 25 9, 10, 11
# 10 26 10, 11, 12

Panel regression with cross sectional averages

I am estimating a panel regression model, and I need to add the cross sectional average of the dependent variable and regressors to the model.
I am struggling to implement the cross sectional averages in R. Can anyone help me out.
So I have a panel regression code below - using plm package.
I need to add cross sectional average of variable A, B, C and D to the right hand side of the regression
library(plm)
panel_fe <- plm(A ~ B+ C + D, model = "fd", effect="individual", data = PanelS)
So my final regression model would be like this A = B+ C+D + A_bar + B_bar + C_bar + D_bar, where A_bar, B_bar , C_bar and D_bar are the cross sectional averages of A, B,C and D respectively.
My panel datasets is below, PanelS.
structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L), .Label = c("CountryA", "CountryB",
"CountryC", "CountryD", "CountryE", "CountryF", "CountryG", "CountryH",
"CountryI", "CountryJ"), class = "factor"), Year = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L), .Label = c("2000", "2001",
"2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009",
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017",
"2018", "2019"), class = "factor"), A = c(0.051539, 0.064525,
0.014292, 0.018774, 0.035449, 0.021988, 0.02396, 0.011415, 0.010358,
-0.029607, -0.020427, -0.012734, 0.006683, 0.007373, -0.039712,
-0.005499, 0.008682, 0.015326, 0.020524, 0.015101, 0.035355,
0.031157, 0.023387, 0.024198, 0.035353, 0.053873, 0.038743, 0.042338,
0.034935, 0.015377, 0.010599, 0.015154, 0.002919, 0.024291, 0.043819,
0.015901, 0.01897, 0.027767, 0.015992, 0.041976, 0.011223, 0.006144,
0.000778, 0.005873, 0.007194, -0.022017, -0.023338, -0.037765,
-0.049356, 0.026135, 0.035633, 0.015691, -0.006196, -0.00025,
0.001181, -0.001472, -0.009324, -0.022664, -0.022623, -0.019586,
-0.012207, -0.004603, -0.013073, -0.010771, -0.009882, -0.014417,
-0.031812, -0.043885, -0.050883, -0.039834, -0.020299, -0.000684,
0.011216, 0.005419, 0.000939, -0.005508, 0.006266, -0.008077,
-0.016137, -0.012681, 0.031612, 0.043729, 0.009314, 0.002734,
-0.012284, 0.002403, 0.016807, 0.019995, 0.033096, 0.024383,
0.010588, 0.019833, 0.031837, 0.03127, 0.029059, 0.020708, 0.019296,
0.017787, 0.032074, 0.027125, 0.005673, 0.003698, -5.3e-05, 0.001794,
-0.011977, -0.008686, -0.031588, -0.039411, -0.073931, -0.076715,
-0.039171, -0.025797, -0.007637, 0.00345, 0.009101, 0.01674,
-0.006968, -0.019178, -0.02438, -0.039663, 0.078313, 0.06707,
0.062822, 0.050771, 0.041274, 0.043921, 0.046429, 0.039418, 0.034671,
0.017356, 0.001054, 0.00414, 0.00226, 0.00275, 0.00085, 0.00495,
0.001276, -0.001446, -0.005771, -0.007513, 0.053734, 0.038679,
0.017375, 0.01438, 0.018403, 0.032943, 0.025539, 0.032463, 0.032267,
0.034009, 0.018229, 0.008958, 0.010079, 0.00749, 0.000604, 0.001948,
0.011782, 0.013253, 0.007898, 0.007546, 0.018052, -0.001123,
-0.012597, -0.042292, -0.058516, -0.022736, -0.03841, -0.050843,
-0.073979, -0.097242, -0.024712, 0.038037, 0.048685, -0.00624,
0.075575, 0.044947, 0.097171, 0.086809, 0.079856, 0.068521, 0.008062,
-0.00911, -0.010527, -4.3e-05, 0.002428, 0.004422, 0.008752,
0.019602, 0.01724, 0.01965, -0.008816, 0.011466, 0.020956, 0.021873,
0.021772, 0.024495, 0.021354, 0.015267, 0.018769, 0.016904),
C = c(0.75345, 0.70657, 0.645051, 0.510055, 0.433786, 0.35728,
0.265817, 0.208721, 0.163261, 0.130248, 0.136607, 0.153873,
0.152275, 0.166592, 0.170559, 0.27089, 0.259813, 0.292847,
0.253142, 0.222618, 0.56764082, 0.523543, 0.485083, 0.49081,
0.461501, 0.44156, 0.374122, 0.315494, 0.27346, 0.333132,
0.401818, 0.425879, 0.460709, 0.448942, 0.440456, 0.442703,
0.397737, 0.372338, 0.359446, 0.340254, 0.064305, 0.05107,
0.047682, 0.056584, 0.055981, 0.051134, 0.047025, 0.046318,
0.037655, 0.045041, 0.071989, 0.066074, 0.061057, 0.097641,
0.101621, 0.105545, 0.09996, 0.099131, 0.091119, 0.082012,
0.120817, 0.120871, 0.138383, 0.13023, 0.141247, 0.146088,
0.119133, 0.100396, 0.084592, 0.185873, 0.368416, 0.479167,
0.4367, 0.421837, 0.400428, 0.416259, 0.37072, 0.40398, 0.390126,
0.371126, 0.079576, 0.074647, 0.076712, 0.074295, 0.074504,
0.079053, 0.080224, 0.082991, 0.082006, 0.15357, 0.161465,
0.201522, 0.190049, 0.219974, 0.236873, 0.227428, 0.219862,
0.200938, 0.223426, 0.209529, 0.217219, 0.224867, 0.258694,
0.248207, 0.221093, 0.189452, 0.159052, 0.124236, 0.119492,
0.123362, 0.217807, 0.296186, 0.339882, 0.371345, 0.376212,
0.391509, 0.378059, 0.373931, 0.351043, 0.347354, 0.440547,
0.424547, 0.409236, 0.401795, 0.427482, 0.426416, 0.399297,
0.381117, 0.339041, 0.325607, 0.415314, 0.469047, 0.482712,
0.536225, 0.562292, 0.598259, 0.636417, 0.631764, 0.612668,
0.596271, 0.605061, 0.503479, 0.518971, 0.498057, 0.492731,
0.484527, 0.486885, 0.43596, 0.388967, 0.374978, 0.407324,
0.381025, 0.371731, 0.375149, 0.402248, 0.449982, 0.437387,
0.422554, 0.407331, 0.389125, 0.989067, 1.049344, 1.070812,
1.048631, 1.014561, 1.028734, 1.073949, 1.036117, 1.03103,
1.094155, 1.267447, 1.474942, 1.752192, 1.619444, 1.784347,
1.802256, 1.770079, 1.807951, 1.792139, 1.862386, 0.601394,
0.590658, 0.579365, 0.597035, 0.633089, 0.649877, 0.673465,
0.667047, 0.639942, 0.655222, 0.729901, 0.823816, 0.79801,
0.811354, 0.787169, 0.756694, 0.72207, 0.692768, 0.651024,
0.617801), B = c(0.147502302, 0.043680673, -0.212478849,
-0.266834333, -0.228099071, -0.199890362, -0.968175801, 1.047500546,
1.273127656, 1.227657506, -0.286068921, -1.356896168, -1.442625298,
-0.291748363, 2.029875219, 1.099611751, -1.112127832, -0.894025857,
0.103213651, 0.286801553, 0.756833023, 0.591945192, 0.525259532,
0.466656359, 0.706692697, -2.361722697, -2.777257989, -4.097114222,
-4.564987155, 2.317853991, 3.44030537, 3.034469093, 5.845290721,
0.403542521, 0.128582254, 0.817094156, -0.886707561, -2.998573025,
-0.491794488, -0.856367773, 0.023343476, -0.209503364, -0.084839186,
-0.146285026, -0.256672799, -0.093852713, 0.145824486, 0.434606031,
0.966980327, 0.67904687, -0.292659443, -0.487763914, -0.084930583,
-0.32722087, -0.442172133, -0.168366978, -0.186469629, 0.046322287,
0.181126569, 0.303486593, 0.171541123, -0.348150815, -0.407466419,
-0.624622679, -0.354132366, -0.15050691, 0.700892294, 0.67692383,
1.014111655, 0.862019536, 0.395600738, -0.256706715, -0.542246369,
-0.539422399, -0.405088653, -0.247954994, -0.497333992, -0.010723655,
0.393516751, 0.169750037, -0.581903347, -0.730163914, 0.351894514,
0.629568917, 0.882078894, 0.760041333, -0.564317727, -0.57799292,
-0.433736512, 0.513350369, 0.55464973, -0.224497194, -0.074326596,
-0.123301819, -0.432013928, -0.25316664, -0.374406673, 0.116449941,
0.308969388, 0.252824183, 2.398228162, -0.033362631, -1.681378615,
-3.655293426, -2.793256764, -3.636310622, 0.149490332, 3.951131246,
7.177449077, 4.831325877, 2.050070679, 1.314471427, -1.687424783,
-3.796189127, -3.329685346, -1.695252718, -3.010416797, -2.414597902,
1.199960369, 4.661041564, 0.531518012, -1.384184059, -0.64216453,
-0.13206166, 0.249287935, -0.153010531, -0.987952985, -1.71711917,
-0.678751076, 0.890062065, 1.663691535, 1.883735194, 2.171029985,
2.383501603, 1.490313839, -0.732542129, -0.291797363, -1.655272704,
-1.613245217, -1.275038743, -0.789256935, -3.589249982, 0.502475039,
1.840081099, 1.141218417, 3.130100399, 3.94751837, 0.97811035,
0.013586974, -3.245960526, -2.068241886, -1.82476664, -1.481654499,
0.37039449, -1.516414277, -1.722381744, 0.683458083, 0.153189319,
3.410781995, 0.067011953, -3.09418792, -4.09753755, -4.682167411,
-1.333607727, 2.505605899, -4.332639317, -2.190945016, 4.048457741,
11.60535564, 13.61047901, 5.145259686, -0.712611552, -3.385649938,
7.214394614, -10.34401695, -1.841542179, -6.437949187, -4.545422837,
-0.012548047, 2.881273043, 3.227611639, 10.96399365, 16.38843255,
14.72001327, -13.84595255, -10.51570643, -13.59695535, -36.70577424,
-12.07070647, 12.51742535, 52.88207865, 9.143152612, -7.818895359,
-15.57456939, -21.31957866, -23.55720863, -5.574415019, 5.783084584,
12.02189272, 22.93207708), D = c(0.77780751, 0.793229898,
0.80623893, 0.821155065, 0.836880111, 0.854312944, 0.873660631,
0.890537317, 0.907536298, 0.912375095, 0.929637942, 0.946439284,
0.965000087, 0.97726773, 0.986870808, 1, 1.019208507, 1.037842597,
1.054711181, 1.072171599, 0.534008473, 0.566583199, 0.58762954,
0.601043497, 0.63362178, 0.673913677, 0.719447102, 0.799187909,
0.864173776, 0.899162389, 0.909465125, 0.96350569, 0.978220642,
0.971679886, 0.976158221, 1, 1.025374896, 1.065804414, 1.108567186,
1.166769344, 0.588726028, 0.64526073, 0.733094431, 0.718268082,
0.746291144, 0.799900392, 0.846050389, 0.894179583, 1.015232882,
0.982856394, 1.012948099, 1.041332642, 1.032947106, 1.013566583,
0.980944689, 1, 1.020576612, 1.061740647, 1.117831183, 1.159906251,
0.750587042, 0.769670674, 0.790024355, 0.801712216, 0.817505148,
0.83991247, 0.856517319, 0.878345181, 0.914006005, 0.920044857,
0.949573071, 0.955207703, 0.978810398, 0.985618398, 0.996205139,
1, 1.004364708, 1.017159213, 1.021013703, 1.02682649, 0.825278825,
0.836048671, 0.847570474, 0.858769029, 0.86834942, 0.871868036,
0.875331803, 0.890827568, 0.898928134, 0.915485416, 0.921392822,
0.931246968, 0.945182975, 0.963702812, 0.981800571, 1, 1.013277522,
1.026999204, 1.044176589, 1.067069774, 0.490666665, 0.523850087,
0.54906662, 0.570457925, 0.597126217, 0.632406036, 0.689467717,
0.775073059, 0.828560075, 0.827109078, 0.842215091, 0.887572897,
0.923280339, 0.960610381, 0.988936452, 1, 1.022699304, 1.054533263,
1.098615084, 1.134067127, 0.757140805, 0.809228408, 0.851488047,
0.884918505, 0.889385715, 0.916751643, 0.948479832, 0.960072842,
0.956196673, 0.911566837, 0.884542463, 0.89644222, 0.917048164,
0.929279352, 0.929337342, 1, 1.010128912, 1.026719845, 1.029923385,
1.062349178, 0.786853444, 0.804351028, 0.831286834, 0.859995963,
0.886334727, 0.906191485, 0.937863282, 0.969963165, 1.012104032,
1.038112793, 1.036283847, 1.046222, 1.043339336, 1.02279939,
1.002888566, 1, 0.994233243, 0.998082845, 0.997049083, 0.998951287,
0.740171055, 0.770579402, 0.802054487, 0.833603662, 0.865965514,
0.90147914, 0.937354271, 0.969378485, 0.99123068, 0.992657113,
0.994179737, 0.993983379, 0.992844694, 0.99680058, 0.994574042,
1, 1.003228988, 1.016266499, 1.028341184, 1.04261954, 0.801617134,
0.817716283, 0.834621959, 0.850140657, 0.863935678, 0.880664424,
0.899645623, 0.9226463, 0.944486016, 0.945115307, 0.95522518,
0.964280334, 0.975483583, 0.983073825, 0.988745617, 1, 1.005225593,
1.010468623, 1.020086873, 1.032605559)), row.names = c("CountryA-2000",
"CountryA-2001", "CountryA-2002", "CountryA-2003", "CountryA-2004",
"CountryA-2005", "CountryA-2006", "CountryA-2007", "CountryA-2008",
"CountryA-2009", "CountryA-2010", "CountryA-2011", "CountryA-2012",
"CountryA-2013", "CountryA-2014", "CountryA-2015", "CountryA-2016",
"CountryA-2017", "CountryA-2018", "CountryA-2019", "CountryB-2000",
"CountryB-2001", "CountryB-2002", "CountryB-2003", "CountryB-2004",
"CountryB-2005", "CountryB-2006", "CountryB-2007", "CountryB-2008",
"CountryB-2009", "CountryB-2010", "CountryB-2011", "CountryB-2012",
"CountryB-2013", "CountryB-2014", "CountryB-2015", "CountryB-2016",
"CountryB-2017", "CountryB-2018", "CountryB-2019", "CountryC-2000",
"CountryC-2001", "CountryC-2002", "CountryC-2003", "CountryC-2004",
"CountryC-2005", "CountryC-2006", "CountryC-2007", "CountryC-2008",
"CountryC-2009", "CountryC-2010", "CountryC-2011", "CountryC-2012",
"CountryC-2013", "CountryC-2014", "CountryC-2015", "CountryC-2016",
"CountryC-2017", "CountryC-2018", "CountryC-2019", "CountryD-2000",
"CountryD-2001", "CountryD-2002", "CountryD-2003", "CountryD-2004",
"CountryD-2005", "CountryD-2006", "CountryD-2007", "CountryD-2008",
"CountryD-2009", "CountryD-2010", "CountryD-2011", "CountryD-2012",
"CountryD-2013", "CountryD-2014", "CountryD-2015", "CountryD-2016",
"CountryD-2017", "CountryD-2018", "CountryD-2019", "CountryE-2000",
"CountryE-2001", "CountryE-2002", "CountryE-2003", "CountryE-2004",
"CountryE-2005", "CountryE-2006", "CountryE-2007", "CountryE-2008",
"CountryE-2009", "CountryE-2010", "CountryE-2011", "CountryE-2012",
"CountryE-2013", "CountryE-2014", "CountryE-2015", "CountryE-2016",
"CountryE-2017", "CountryE-2018", "CountryE-2019", "CountryF-2000",
"CountryF-2001", "CountryF-2002", "CountryF-2003", "CountryF-2004",
"CountryF-2005", "CountryF-2006", "CountryF-2007", "CountryF-2008",
"CountryF-2009", "CountryF-2010", "CountryF-2011", "CountryF-2012",
"CountryF-2013", "CountryF-2014", "CountryF-2015", "CountryF-2016",
"CountryF-2017", "CountryF-2018", "CountryF-2019", "CountryG-2000",
"CountryG-2001", "CountryG-2002", "CountryG-2003", "CountryG-2004",
"CountryG-2005", "CountryG-2006", "CountryG-2007", "CountryG-2008",
"CountryG-2009", "CountryG-2010", "CountryG-2011", "CountryG-2012",
"CountryG-2013", "CountryG-2014", "CountryG-2015", "CountryG-2016",
"CountryG-2017", "CountryG-2018", "CountryG-2019", "CountryH-2000",
"CountryH-2001", "CountryH-2002", "CountryH-2003", "CountryH-2004",
"CountryH-2005", "CountryH-2006", "CountryH-2007", "CountryH-2008",
"CountryH-2009", "CountryH-2010", "CountryH-2011", "CountryH-2012",
"CountryH-2013", "CountryH-2014", "CountryH-2015", "CountryH-2016",
"CountryH-2017", "CountryH-2018", "CountryH-2019", "CountryI-2000",
"CountryI-2001", "CountryI-2002", "CountryI-2003", "CountryI-2004",
"CountryI-2005", "CountryI-2006", "CountryI-2007", "CountryI-2008",
"CountryI-2009", "CountryI-2010", "CountryI-2011", "CountryI-2012",
"CountryI-2013", "CountryI-2014", "CountryI-2015", "CountryI-2016",
"CountryI-2017", "CountryI-2018", "CountryI-2019", "CountryJ-2000",
"CountryJ-2001", "CountryJ-2002", "CountryJ-2003", "CountryJ-2004",
"CountryJ-2005", "CountryJ-2006", "CountryJ-2007", "CountryJ-2008",
"CountryJ-2009", "CountryJ-2010", "CountryJ-2011", "CountryJ-2012",
"CountryJ-2013", "CountryJ-2014", "CountryJ-2015", "CountryJ-2016",
"CountryJ-2017", "CountryJ-2018", "CountryJ-2019"), class = c("pdata.frame",
"data.frame"), index = structure(list(Country = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L), .Label = c("CountryA",
"CountryB", "CountryC", "CountryD", "CountryE", "CountryF", "CountryG",
"CountryH", "CountryI", "CountryJ"), class = "factor"), Year = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L), .Label = c("2000", "2001",
"2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009",
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017",
"2018", "2019"), class = "factor")), class = c("pindex", "data.frame"
), row.names = c(NA, 200L)))
You can use function Between from package plm to calculate the cross sectional averages and add them to your data:
library(plm)
# PanelS is a pdata.frame (otherwise use pdata.frame(your_data, index))
PanelS$A_bar <- Between(PanelS$A)
PanelS$B_bar <- Between(PanelS$B)
PanelS$C_bar <- Between(PanelS$C)
PanelS$D_bar <- Between(PanelS$D)
mod <- plm(A ~ B + C + D + A_bar + B_bar + C_bar + D_bar, model = "pooling", effect="individual", data = PanelS)
summary(mod)
# Pooling Model
#
# Call:
# plm(formula = A ~ B + C + D + A_bar + B_bar + C_bar + D_bar,
# data = PanelS, effect = "individual", model = "pooling")
#
# Balanced Panel: n = 10, T = 20, N = 200
#
# Residuals:
# Min. 1st Qu. Median 3rd Qu. Max.
# -0.06143690 -0.01311792 0.00070253 0.01186605 0.05107105
#
# Coefficients:
# Estimate Std. Error t-value Pr(>|t|)
# (Intercept) -0.00000000000001042 0.03313743211380626 0.0000 1.000000
# B -0.00076930351859426 0.00020566635571130 -3.7405 0.000242 ***
# C 0.10827039012266901 0.00949296134830719 11.4053 < 0.00000000000000022 ***
# D -0.04222788490989914 0.01136058813979121 -3.7171 0.000264 ***
# A_bar 0.99999999999911215 0.09632471140222754 10.3816 < 0.00000000000000022 ***
# C_bar -0.10827039012256123 0.01033406661607372 -10.4770 < 0.00000000000000022 ***
# D_bar 0.04222788490990802 0.03874710199411169 1.0898 0.277145
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Total Sum of Squares: 0.17549
# Residual Sum of Squares: 0.07128
# R-Squared: 0.59382
# Adj. R-Squared: 0.58119
# F-statistic: 47.0268 on 6 and 193 DF, p-value: < 0.000000000000000222
Note that it seems like you want to estimate a fixed effects model but your estimation has model = "fd" to estimate a first-differenced model. Also note that the cross sectional averages will drop out of the estimation of a fixed effects model.

Undirected network graph calculated by tidygraph shows more degree centrality than should be possible

I have a cleaned data set with 26 nodes. I am placing these 26 nodes in an undirected network graph using tidygraph, where I use the centrality_degree() function to calculate the centrality degree. However, when I graph the resulting network, my highest possible centrality degree is 40, which should not be possible. When I change the graph to directed, this is corrected.
I somewhat confused, as other methods I have used in the past, where I manually calculated the centrality degree, I have never once come across this issue.
Is this regular behaviour, or am I doing something wrong?
Reproducible example:
library(tidygraph)
library(ggraph)
library(tidyverse)
nodes <- structure(list(id = 1:26, label = c("a", "b", "c", "d", "e",
"f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
"s", "t", "u", "v", "w", "x", "y", "z")), row.names = c(NA, -26L
), class = "data.frame")
edges <- structure(list(from = c(21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
21L, 21L, 21L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 24L,
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L,
24L, 24L, 24L, 24L, 24L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L,
25L, 25L, 25L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L,
22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 17L, 17L, 17L, 17L, 17L),
to = c(1L, 12L, 3L, 16L, 24L, 4L, 10L, 6L, 22L, 2L, 8L, 1L,
12L, 13L, 3L, 18L, 16L, 24L, 5L, 7L, 14L, 4L, 10L, 6L, 9L,
22L, 15L, 2L, 20L, 8L, 21L, 12L, 13L, 3L, 16L, 24L, 5L, 7L,
14L, 4L, 10L, 6L, 22L, 15L, 2L, 8L, 17L, 21L, 1L, 13L, 3L,
16L, 5L, 7L, 14L, 10L, 6L, 9L, 22L, 15L, 2L, 20L, 8L, 17L,
21L, 1L, 3L, 18L, 16L, 5L, 7L, 14L, 4L, 10L, 6L, 25L, 9L,
22L, 15L, 20L, 8L, 17L, 21L, 11L, 1L, 12L, 13L, 18L, 16L,
24L, 5L, 7L, 14L, 4L, 10L, 6L, 25L, 9L, 22L, 15L, 20L, 8L,
17L, 1L, 3L, 10L, 6L, 22L, 20L, 8L, 21L, 11L, 1L, 13L, 3L,
18L, 24L, 7L, 4L, 10L, 6L, 25L, 9L, 22L, 15L, 2L, 20L, 8L,
17L, 21L, 11L, 1L, 12L, 13L, 18L, 16L, 5L, 7L, 14L, 10L,
6L, 25L, 9L, 22L, 15L, 20L, 8L, 17L, 1L, 3L, 18L, 16L, 7L,
14L, 4L, 10L, 6L, 9L, 22L, 15L, 2L, 20L, 8L, 17L, 21L, 11L,
1L, 12L, 13L, 3L, 18L, 16L, 24L, 14L, 4L, 10L, 6L, 25L, 9L,
22L, 15L, 2L, 20L, 8L, 11L, 1L, 3L, 18L, 16L, 7L, 10L, 6L,
9L, 22L, 15L, 2L, 20L, 8L, 17L, 21L, 11L, 1L, 12L, 13L, 3L,
18L, 16L, 24L, 5L, 7L, 14L, 10L, 6L, 25L, 9L, 22L, 15L, 2L,
20L, 8L, 17L, 21L, 11L, 1L, 12L, 13L, 3L, 18L, 16L, 24L,
5L, 7L, 14L, 4L, 6L, 25L, 9L, 22L, 15L, 2L, 20L, 8L, 17L,
21L, 11L, 1L, 12L, 13L, 3L, 18L, 24L, 5L, 7L, 14L, 4L, 10L,
25L, 9L, 22L, 15L, 2L, 20L, 8L, 21L, 1L, 13L, 3L, 18L, 5L,
10L, 6L, 22L, 2L, 20L, 8L, 21L, 1L, 13L, 3L, 18L, 16L, 24L,
4L, 10L, 6L, 22L, 15L, 2L, 20L, 8L, 11L, 1L, 12L, 13L, 3L,
16L, 24L, 5L, 7L, 14L, 4L, 10L, 6L, 25L, 9L, 15L, 2L, 20L,
8L, 17L, 21L, 1L, 12L, 3L, 18L, 16L, 24L, 7L, 10L, 6L, 25L,
9L, 22L, 2L, 20L, 8L, 17L, 21L, 11L, 1L, 12L, 13L, 3L, 18L,
16L, 24L, 5L, 7L, 14L, 4L, 6L, 25L, 9L, 22L, 15L, 20L, 8L,
17L, 21L, 11L, 1L, 3L, 16L, 24L, 7L, 10L, 6L, 22L, 2L, 8L,
21L, 11L, 1L, 12L, 13L, 3L, 18L, 16L, 24L, 14L, 4L, 10L,
6L, 25L, 9L, 22L, 2L, 20L, 7L, 6L, 25L, 22L, 8L), weight = c(3L,
1L, 3L, 2L, 1L, 1L, 5L, 1L, 8L, 2L, 1L, 2L, 3L, 2L, 5L, 1L,
4L, 1L, 4L, 4L, 4L, 1L, 5L, 13L, 3L, 7L, 3L, 2L, 3L, 8L,
1L, 1L, 1L, 15L, 10L, 7L, 2L, 4L, 2L, 5L, 19L, 23L, 6L, 2L,
11L, 7L, 1L, 1L, 2L, 3L, 3L, 5L, 4L, 5L, 4L, 4L, 21L, 2L,
9L, 8L, 1L, 1L, 12L, 1L, 2L, 1L, 3L, 1L, 6L, 6L, 5L, 6L,
1L, 6L, 22L, 2L, 2L, 9L, 8L, 3L, 13L, 1L, 5L, 6L, 4L, 10L,
13L, 3L, 41L, 46L, 11L, 39L, 9L, 55L, 2L, 108L, 2L, 8L, 31L,
30L, 13L, 39L, 2L, 2L, 1L, 3L, 4L, 8L, 5L, 1L, 8L, 1L, 6L,
1L, 8L, 2L, 3L, 23L, 2L, 12L, 96L, 1L, 3L, 21L, 1L, 6L, 12L,
38L, 4L, 5L, 4L, 4L, 8L, 8L, 3L, 29L, 3L, 11L, 3L, 3L, 63L,
2L, 5L, 18L, 19L, 4L, 25L, 1L, 2L, 3L, 1L, 7L, 6L, 7L, 1L,
3L, 17L, 1L, 3L, 6L, 1L, 4L, 11L, 1L, 5L, 1L, 5L, 1L, 1L,
15L, 4L, 7L, 3L, 1L, 4L, 12L, 8L, 1L, 9L, 32L, 3L, 7L, 5L,
35L, 1L, 1L, 3L, 1L, 6L, 4L, 4L, 12L, 2L, 5L, 4L, 2L, 2L,
9L, 1L, 2L, 3L, 4L, 9L, 13L, 2L, 1L, 25L, 25L, 10L, 14L,
10L, 4L, 59L, 4L, 5L, 21L, 19L, 1L, 8L, 27L, 3L, 5L, 8L,
8L, 11L, 12L, 111L, 5L, 50L, 45L, 15L, 32L, 10L, 49L, 109L,
1L, 8L, 28L, 39L, 53L, 13L, 48L, 5L, 13L, 2L, 20L, 3L, 3L,
27L, 10L, 8L, 1L, 58L, 1L, 7L, 32L, 13L, 21L, 110L, 1L, 17L,
27L, 124L, 1L, 1L, 1L, 2L, 3L, 1L, 1L, 2L, 7L, 1L, 1L, 1L,
2L, 2L, 1L, 5L, 2L, 2L, 2L, 1L, 3L, 3L, 14L, 2L, 2L, 4L,
1L, 3L, 14L, 5L, 8L, 44L, 16L, 14L, 4L, 12L, 4L, 19L, 41L,
47L, 2L, 1L, 11L, 24L, 2L, 18L, 1L, 7L, 5L, 1L, 7L, 3L, 27L,
3L, 15L, 7L, 54L, 1L, 4L, 17L, 5L, 6L, 27L, 1L, 1L, 2L, 3L,
4L, 10L, 56L, 3L, 25L, 25L, 7L, 16L, 5L, 29L, 59L, 3L, 3L,
20L, 17L, 5L, 31L, 3L, 6L, 1L, 4L, 7L, 1L, 3L, 1L, 6L, 5L,
13L, 1L, 2L, 9L, 1L, 15L, 2L, 1L, 16L, 4L, 4L, 3L, 1L, 6L,
17L, 10L, 1L, 13L, 63L, 11L, 12L, 1L, 5L, 1L, 2L, 3L)), row.names = c(NA,
-383L), class = c("tbl_df", "tbl", "data.frame"))
routes_tidy <- tbl_graph(nodes=nodes, edges=edges, directed=FALSE) %>% mutate(neighbors = centrality_degree())
# Filtering out 3 nodes out of the graph as they have no connections and zoom the figure way out
ggraph(routes_tidy, layout="graphopt") +
geom_node_point(aes(size=neighbors, filter=(label!="z" & label!="s" & label!="w"))) +
geom_edge_link(aes(width=weight, alpha=weight)) +
scale_edge_width(range=c(0.2, 2)) +
geom_node_text(aes(label=label, fontface="bold", size=neighbors, filter=(label!="z" & label!="s" & label!="w")), repel=TRUE) +
labs(edge_width="N") +
theme_graph()
I'm new to the whole tidygraph thing, stumbled over this question, got confused, and figured it'd be a nice way to get to know stuff. So, I don't know if it's a bug or a feature, but the behaviour is triggered because you have doubled edges:
# Given your edges
edges %>%
filter((from == 1 & to == 2) | from == 2 & to == 1)
# A tibble: 2 x 3
from to weight
<int> <int> <int>
1 1 2 11
2 2 1 3
And those count as 2 connections in the calculation of the degree centrality. One way to remove those double edges is to convert the network to a simple network:
routes_simple <-
routes_tidy %>%
morph(to_simple) %>%
crystallise() %>%
pull(graph) %>%
getElement(1) %>%
activate(nodes) %>%
mutate(neighbors = centrality_degree())
Now the maximum degree is 22 (and the heighest possible, presumably, 25).

Calculating cumulative return for each quarter by investor

I m looking to calculate cumulative returns based on column values for each quarter grouped by investors. I tried using Return.cumulative but didn't get any success.
I appreciate if someone can help me with some easy way to calculate cumulative return in R?
structure(list(Quarter = structure(c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L), .Label = c("2012Q1", "2012Q2", "2012Q3",
"2012Q4", "2013Q1", "2013Q2", "2013Q3", "2013Q4", "2014Q1", "2014Q2",
"2014Q3", "2014Q4", "2015Q1", "2015Q2", "2015Q3", "2015Q4", "2016Q1",
"2016Q2", "2016Q3", "2016Q4"), class = "factor"), Total_Return = c(0.040561972,
0.012692509, 0.053079761, 0.048656856, 0.037110412, 0.041422455,
0.052373109, 0.049826591, 0.053255331, 0.050956964, 0.038683073,
0.018446161, 0.039546641, 0.057108385, 0.020790648, 0.020743042,
0.015486459, 0.001202289, 0.066082963, 0.036178889, 0.037096464,
0.003068485, 0.026307213, 0.052918456, 0.019292362, 0.058390755,
0.040255949, 0.020420614, 0.024955646, 0.051180526, 0.04598829,
0.012425778, 0.036190369, 0.079480322, 0.00574259, 0.026401296,
0.018309495, 0.004887553, 0.05935355, 0.051702238, 0.080892981,
0.07076032, 0.088251171, 0.045903253, 0.029692483, 0.058297815,
0.065338687, 0.071947108, 0.074878083, 0.03989637, -0.031255434,
0.029883299, 0.008148657, 0.078836907, 0.030064965, 0.048887451,
0.034827005, -0.065304898, 0.136766281, 0.019039148, 0.075818622,
0.037509338, 0.060238115, 0.03877549, 0.027433037, 0.033627931,
0.053488836, 0.024999278, 0.016037836, 0.011863841, -0.02610323,
0.046568702, 0.021033516, 0.052322078, 0.038724408, 0.023703685,
0.013482776, 0.018159864, 0.01098064, 0.014761168, 0.010590211,
0.001237805, 0.097323777, 0.088712748, 0.034759189, 0.022507656,
0.036512294, 0.048105471, 0.030822456, 0.07172102, 0.029038233,
0.032163273, 0.015176988, 0.041039802, -0.006245358, 0.049354849,
0.00318641, 0.012988646, 0.053365281, 0.03352103, 0.030454118,
-0.011862117, 0.015271336, 0.036371973, 0.045939313, 0.047864175,
0.053764664, 0.055199293, 0.072631781, 0.063949369, 0.09113885,
0.012533175, 0.049910727, 0.055676551, 0.008841404, 0.01962578,
0.015040302, 0.020496695, 0.054345313, 0.052533934), Investor = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Active", "Total", "America",
"Africa", "China", "Europe"), class = "factor"), Date = structure(c(6L,
11L, 16L, 1L, 7L, 12L, 17L, 2L, 8L, 13L, 18L, 3L, 9L, 14L, 19L,
4L, 10L, 15L, 20L, 5L, 6L, 11L, 16L, 1L, 7L, 12L, 17L, 2L, 8L,
13L, 18L, 3L, 9L, 14L, 19L, 4L, 10L, 15L, 20L, 5L, 6L, 11L, 16L,
1L, 7L, 12L, 17L, 2L, 8L, 13L, 18L, 3L, 9L, 14L, 19L, 4L, 10L,
15L, 20L, 5L, 6L, 11L, 16L, 1L, 7L, 12L, 17L, 2L, 8L, 13L, 18L,
3L, 9L, 14L, 19L, 4L, 10L, 15L, 20L, 5L, 6L, 11L, 16L, 1L, 7L,
12L, 17L, 2L, 8L, 13L, 18L, 3L, 9L, 14L, 19L, 4L, 10L, 15L, 20L,
5L, 6L, 11L, 16L, 1L, 7L, 12L, 17L, 2L, 8L, 13L, 18L, 3L, 9L,
14L, 19L, 4L, 10L, 15L, 20L, 5L), .Label = c("12/1/2012", "12/1/2013",
"12/1/2014", "12/1/2015", "12/1/2016", "3/1/2012", "3/1/2013",
"3/1/2014", "3/1/2015", "3/1/2016", "6/1/2012", "6/1/2013", "6/1/2014",
"6/1/2015", "6/1/2016", "9/1/2012", "9/1/2013", "9/1/2014", "9/1/2015",
"9/1/2016"), class = "factor")), class = "data.frame", row.names = c(NA,
-120L))
library(tidyverse)
df %>%
arrange(Investor, Date) %>%
group_by(Investor) %>%
mutate(return_coef = 1 + Total_Return,
return_coef_cuml = cumprod(return_coef),
return_cuml = return_coef_cuml - 1) %>%
ungroup()
# A tibble: 120 x 7
# Groups: Investor [6]
Quarter Total_Return Investor Date return_coef return_coef_cuml return_cuml
<fct> <dbl> <fct> <fct> <dbl> <dbl> <dbl>
1 2012Q4 0.0487 Active 12/1/2012 1.05 1.05 0.0487
2 2013Q4 0.0498 Active 12/1/2013 1.05 1.10 0.101
3 2014Q4 0.0184 Active 12/1/2014 1.02 1.12 0.121
4 2015Q4 0.0207 Active 12/1/2015 1.02 1.14 0.144
5 2016Q4 0.0362 Active 12/1/2016 1.04 1.19 0.186
6 2012Q1 0.0406 Active 3/1/2012 1.04 1.23 0.234
7 2013Q1 0.0371 Active 3/1/2013 1.04 1.28 0.280
8 2014Q1 0.0533 Active 3/1/2014 1.05 1.35 0.348
9 2015Q1 0.0395 Active 3/1/2015 1.04 1.40 0.401
10 2016Q1 0.0155 Active 3/1/2016 1.02 1.42 0.423

Combine factor/cluster levels into clusters of certain size in R

I have a simple dataframe reproducible by running:
structure(list(cluster = c(1L, 2L, 3L, 4L, 4L, 5L, 6L, 4L, 4L,
7L, 3L, 3L, 4L, 8L, 4L, 5L, 2L, 4L, 9L, 1L, 9L, 10L, 7L, 6L,
4L, 11L, 7L, 10L, 7L, 11L, 12L, 8L, 9L, 1L, 6L, 8L, 4L, 2L, 6L,
1L, 9L, 13L, 10L, 12L, 1L, 7L, 10L, 12L, 12L, 4L, 9L, 13L, 5L,
7L, 5L, 8L, 11L, 14L, 1L, 12L, 10L, 14L, 11L, 4L, 12L, 11L, 4L,
4L, 12L, 5L, 4L, 2L, 13L, 12L, 1L, 4L, 12L, 6L, 4L, 7L, 8L, 15L,
3L, 8L, 7L, 12L, 8L, 2L, 7L, 7L, 2L, 13L, 6L, 6L, 8L, 8L, 11L,
2L, 7L, 13L, 3L, 13L, 11L, 2L, 8L, 15L, 7L, 4L, 6L, 6L, 10L,
3L, 8L, 11L, 4L, 9L, 1L, 12L, 6L, 11L, 6L, 6L, 14L, 7L, 8L, 2L,
5L, 4L, 6L), clusterSize = c(8L, 9L, 6L, 19L, 19L, 6L, 13L, 19L,
19L, 13L, 6L, 6L, 19L, 12L, 19L, 6L, 9L, 19L, 6L, 8L, 6L, 6L,
13L, 13L, 19L, 9L, 13L, 6L, 13L, 9L, 11L, 12L, 6L, 8L, 13L, 12L,
19L, 9L, 13L, 8L, 6L, 6L, 6L, 11L, 8L, 13L, 6L, 11L, 11L, 19L,
6L, 6L, 6L, 13L, 6L, 12L, 9L, 3L, 8L, 11L, 6L, 3L, 9L, 19L, 11L,
9L, 19L, 19L, 11L, 6L, 19L, 9L, 6L, 11L, 8L, 19L, 11L, 13L, 19L,
13L, 12L, 2L, 6L, 12L, 13L, 11L, 12L, 9L, 13L, 13L, 9L, 6L, 13L,
13L, 12L, 12L, 9L, 9L, 13L, 6L, 6L, 6L, 9L, 9L, 12L, 2L, 13L,
19L, 13L, 13L, 6L, 6L, 12L, 9L, 19L, 6L, 8L, 11L, 13L, 9L, 13L,
13L, 3L, 13L, 12L, 9L, 6L, 19L, 13L)), .Names = c("cluster",
"clusterSize"), row.names = c(NA, -129L), class = "data.frame")
Here's what it looks like:
head(clusterdata)
cluster clusterSize
1 1 8
2 2 9
3 3 6
4 4 19
5 4 19
6 5 6
I have a total of 15 clusters and a total of 129 observations . I need to have each cluster size be 9. It's okay if there are not 15 clusters in the end.
In other words, I would like to combine clusters in a way that produces clusters of size no more than 9 and possibly leave a cluster that is smaller than 9. Clusters that are originally bigger than 9 should be split up but always keep 9 as the size and put the remaining in a "left-over" cluster/s. Cluster that are originally smaller than 9 should combine with other clusters to form clusters of size 9 while putting the remaining in the "left-over" cluster/s.
I have no idea how to go about this in R. Any ideas would be much appreciated...

Resources