Unnest and Pivot Longer with Duplicate Columns - r

I have a nested df that I am trying to clean up.
Sample Data:
df <-
tibble::tribble(
~idTeam, ~ptsTotalBehindFirst, ~ptsOverall, ~ptsDiffLastPeriod, ~rankOverall, ~ptsBattingBehindFirst, ~ptsBatting, ~ptsDiffBattingLastPeriod, ~dataBatting, ~rankBatting, ~ptsPitchingBehindFirst, ~ptsPitching, ~ptsDiffPitchingLastPeriod, ~dataPitching, ~rankPitching,
"2", "0", "111", "-4", 1L, "0", "65", "0", list(abbr = c("OBP", "HR", "RBI", "R", "SB"), roto_points = c(13, 13, 13, 13, 13), value = c(0.3663, 384, 1012, 1102, 164), diff = c(0, 0, 0, 0, 0), rank = c(1, 1, 1, 1, 1)), 1L, "5", "46", "-4", list(abbr = c("S", "W", "K", "ERA", "WHIP"), roto_points = c(12, 6, 11, 8, 9), value = c(94, 89, 1576, 3.946, 1.2179), diff = c(0, -2, -2, 0, 0), rank = c(2, 8, 3, 6, 5)), 3L,
"8", "13.5", "97.5", "2", 2L, "13", "52", "0", list(abbr = c("OBP", "HR", "RBI", "R", "SB"), roto_points = c(12, 11, 11, 12, 6), value = c(0.3576, 323, 954, 1011, 89), diff = c(0, 0, 0, 0, 0), rank = c(2, 3, 3, 2, 8)), 3L, "5.5", "45.5", "2", list(abbr = c("S", "W", "K", "ERA", "WHIP"), roto_points = c(2, 7.5, 10, "13", 13), value = c(56, 91, 1508, 3.688, 1.1474), diff = c(-1, 1.5, 0.5, 1, 0), rank = c(12, 6, 4, 1, 1)), 4L
)
The data I am trying to unnest is stored in the dataBatting and dataPitching columns. I am trying to unnest all of the columns in both columns and bind the results as rows. Something akin to pivot_longer but I wasn't sure of the right way to do this with 4 duplicate columns nested within 2 separate columns.
My attempt to do this was:
df %>%
unnest_wider(dataBatting) %>%
unnest(c(abbr, roto_points, value, diff, rank)) %>%
unnest_wider(dataPitching) %>%
unnest(c(abbr, roto_points, value, diff, rank))
Error is:
Error: Column names `abbr`, `roto_points`, `value`, `diff`, `rank` must not be duplicated.
Use .name_repair to specify repair.
Call `rlang::last_error()` to see a backtrace
My issue is that I want to bind the same columns from dataPitching that have the same column names as dataBatting (abbr, roto_points, value, diff, rank).
I also want to change the name of the columns that are duplicates. Is tidyr::hoist a better way to do this?
Desired df:
tibble::tribble(
~idTeam, ~ptsTotalBehindFirst, ~ptsOverall, ~ptsDiffLastPeriod, ~rankOverall, ~ptsBattingBehindFirst, ~ptsBatting, ~ptsDiffBattingLastPeriod, ~abbr, ~roto_points5, ~value, ~diff, ~rank, ~rankPitching, ~ptsPitchingBehindFirst, ~ptsPitching, ~ptsDiffPitchingLastPeriod,
2, 0, 111, -4, 1, 0, 65, 0, "OBP", 13, 0.3663, 0, 1, 3, 5, 46, -4,
2, 0, 111, -4, 1, 0, 65, 0, "HR", 13, 384, 0, 1, 3, 5, 46, -4,
2, 0, 111, -4, 1, 0, 65, 0, "RBI", 13, 1012, 0, 1, 3, 5, 46, -4,
2, 0, 111, -4, 1, 0, 65, 0, "R", 13, 1102, 0, 1, 3, 5, 46, -4,
2, 0, 111, -4, 1, 0, 65, 0, "SB", 13, 164, 0, 1, 3, 5, 46, -4,
2, 0, 111, -4, 1, 0, 65, 0, "S", 12, 94, 0, 2, 3, 5, 46, -4,
2, 0, 111, -4, 1, 0, 65, 0, "W", 6, 89, -2, 8, 3, 5, 46, -4,
2, 0, 111, -4, 1, 0, 65, 0, "K", 11, 1576, -2, 3, 3, 5, 46, -4,
2, 0, 111, -4, 1, 0, 65, 0, "ERA", 8, 3.946, 0, 6, 3, 5, 46, -4,
2, 0, 111, -4, 1, 0, 65, 0, "WHIP", 9, 1.2179, 0, 5, 3, 5, 46, -4,
8, 13.5, 97.5, 2, 2, 13, 52, 0, "OBP", 12, 0.3576, 0, 2, 4, 5.5, 45.5, 2,
8, 13.5, 97.5, 2, 2, 13, 52, 0, "HR", 11, 323, 0, 3, 4, 5.5, 45.5, 2,
8, 13.5, 97.5, 2, 2, 13, 52, 0, "RBI", 11, 954, 0, 3, 4, 5.5, 45.5, 2,
8, 13.5, 97.5, 2, 2, 13, 52, 0, "R", 12, 1011, 0, 2, 4, 5.5, 45.5, 2,
8, 13.5, 97.5, 2, 2, 13, 52, 0, "SB", 6, 89, 0, 8, 4, 5.5, 45.5, 2,
8, 13.5, 97.5, 2, 2, 13, 52, 0, "S", 2, 56, -1, 12, 4, 5.5, 45.5, 2,
8, 13.5, 97.5, 2, 2, 13, 52, 0, "W", 7.5, 91, 1.5, 6, 4, 5.5, 45.5, 2,
8, 13.5, 97.5, 2, 2, 13, 52, 0, "K", 10, 1508, 0.5, 4, 4, 5.5, 45.5, 2,
8, 13.5, 97.5, 2, 2, 13, 52, 0, "ERA", 13, 3.688, 1, 1, 4, 5.5, 45.5, 2,
8, 13.5, 97.5, 2, 2, 13, 52, 0, "WHIP", 13, 1.1474, 0, 1, 4, 5.5, 45.5, 2
)

An option is to loop through the 'dataBatting', 'dataPitching' column names, do the unnest_wider separately, unnest the columns of interest, and bind the rows together (map_dfr - suffix 'dfr' returns dataframe with rows binded together from a list of data.frames of tibbles). One thing that should be noted is that many of the tidyverse functions are type sensitive. Here, we find some list elements to have different types and this would have an issue in unnest unless the 'ptype' is mentioned. To avoid that, we can use type.convert to change the type automatically based on the values and then do the unnesting
library(dplyr)
library(tidyr)
library(purrr)
library(stringr)
map_dfr(c('dataBatting', 'dataPitching'), ~
df %>%
unnest_wider(.x) %>%
mutate_at(vars(c(abbr, roto_points, value, diff, rank)),
type.convert) %>%
unnest(c(abbr, roto_points, value, diff, rank)) %>%
mutate_if(is.factor, as.character) %>%
select(-one_of(c("dataBatting", "dataPitching"))))

Related

ggplot barplot to boxplot

I have code to produce a good barplot and I'm trying to create a boxplot with the same data.
The barplot displays the count of "response" across all people (id). I'd like to create a boxplot for each type of "response" to replace the 3 bars. Boxplots should be calculated from the count of that specific "response" for each participant. So far no luck because I'm stuck on how to count the response for each participant.
current code:
df %>%
ggplot(position = dodge) +
labs(title= "question") +
geom_bar(aes(x = response), fill="red") +
labs(y = "count", x = "responses") +
scale_y_continuous(breaks=seq(0,100,20), limits = c(0,100))
output:
data sample:
structure(list(id = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5),
response = c(0, 1, 1, 0, 0, 0, 1, -1, 1, -1, 0, 1, -1, 1,
0, 0, 0, 0, 1, 1, 1, -1, 0, 1, 0, 1, 1, -1, 0, 1, 1, 1, 0,
1, 0, 0, 1, -1, 0, 1, 1, 1, -1, 1, 1, 1, 0, 0, -1, 1, 1,
-1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0,
1, 1, 0, 0, 0), iscorrect = c(0, 1, 1, 0, 0, 0, 1, 0, 1,
0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0,
0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0,
0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,
0, 1, 0, 0, 1, 1, 0, 0, 0), min = c(100, 150, 150,
50, 50, 50, 150, 100, 100, 100, 50, 100, 50, 150, 150, 150,
50, 100, 100, 100, 150, 150, 50, 50, 50, 150, 150, 100, 50,
100, 100, 150, 150, 50, 50, 50, 150, 100, 100, 100, 50, 100,
50, 150, 150, 150, 50, 100, 100, 100, 150, 150, 50, 50, 50,
150, 150, 100, 50, 100, 100, 150, 100, 50, 50, 50, 150, 100,
100, 50, 150, 100, 50, 150, 150), max = c(125.4, 180.8,
180.8, 62.4, 62.4, 62.4, 180.8, 125.4, 125.4, 125.4, 62.4,
125.4, 62.4, 180.8, 180.8, 180.8, 62.4, 125.4, 125.4, 125.4,
180.8, 180.8, 62.4, 62.4, 62.4, 180.8, 180.8, 125.4, 62.4,
125.4, 125.4, 180.8, 180.8, 62.4, 62.4, 62.4, 180.8, 125.4,
125.4, 125.4, 62.4, 125.4, 62.4, 180.8, 180.8, 180.8, 62.4,
125.4, 125.4, 125.4, 180.8, 180.8, 62.4, 62.4, 62.4, 180.8,
180.8, 125.4, 62.4, 125.4, 125.4, 180.8, 125.4, 62.4, 62.4,
62.4, 180.8, 125.4, 125.4, 62.4, 180.8, 125.4, 62.4, 180.8,
180.8), time = c(5, 7, 9, 5, 1, 7, 1, 1, 7, 3, 9, 9,
3, 5, 3, 1, 9, 5, 1, 7, 9, 3, 5, 7, 1, 5, 7, 3, 3, 9, 5,
7, 9, 5, 1, 7, 1, 1, 7, 3, 9, 9, 3, 5, 3, 1, 9, 5, 1, 7,
9, 3, 5, 7, 1, 5, 7, 3, 3, 9, 9, 7, 5, 7, 5, 9, 5, 3, 1,
1, 9, 7, 3, 3, 1)), row.names = c(NA, -75L), class = c("tbl_df",
"tbl", "data.frame"))
You can use this code:
data %>%
group_by(id) %>%
count(response) %>%
mutate(response = as.factor(response)) %>%
ggplot(aes(x = response, y = n)) +
geom_boxplot(fill = "red") +
labs(y = "count", x = "responses")
Output:
You can try:
library(dplyr)
library(ggplot2)
df %>%
group_by(id, response) %>%
count() %>%
mutate(id = factor(id), response = factor(response)) %>%
ggplot(aes(response, n)) +
geom_boxplot(fill = "red") +
scale_y_continuous(name = "Number of responses per participant")
Note that boxplots don't work well for discrete data like small counts (unless your actual data has a far higher number of participants with a far higher count per response)

Using gsub with a data frame getting an odd result, how you use gsub with a dataframe

I am trying to use a gsub function on my data frame. In my data frame, I have phrases like "text.Democrat17_P" and many others with different numbers. My goal is to replace phrases like this with just, "DEM".
I first wanted to test the gsub function with only one row before I replace every value in the data frame. However, when I ran my script, gsub seemed to disassemble my data frame and list numbers out instead.
My result looked like this:
[1] "c(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 16)"
[2] "c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4)"
[3] "c(0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1)"
[4] "c(2322, 2490, 2912, 3181, 3245, 2640, 3215, 4506, 3256, 2705, 2662, 5676, 7344, 2888, 2891, 4387, 9494, 2525, 3649, 1654, 2178, 2913, 2922, 3320, 7243, 5836, 6054, 6283, 4499, 5291, 4747, 2538, 5433, 5354, 5272, 4166, 3427, 5432, 4566, 5371, 5503, 4550, 1639, 2603, 3937, 2359, 1516, 1204, 826, 916, 1039, 1738, 2077, 874, 2495, 628, 582, 872, 2179, 682, 578, 476, 2207, 1178, 859, 1345, 1223, 2014, 438, 448, 1020, 879, 1117, 271, 210, 295, 233, 172, 77, 205, 3958)"
[5] "c(\"DEM\", \"text.Demminus14_P\", \"text.Demplus9_P\", \"text.Repminus11_O\", \"text.Repminus18_O\", \"text.Demminus12_P\", \"text.Repminus4_O\", \"text.Demminus12_P\", \"text.Repplus8_O\", \"text.Demminus4_P\", \"text.Demplus9_P\", \"text.Demminus20_P\", \"text.Repplus16_O\", \"text.Repminus10_O\", \"text.Repminus13_O\", \"text.Demplus18_P\", \"text.Repplus18_O\", \"text.Demplus1_P\", \"text.Repminus15_O\", \"text.Demminus11_P\", \"text.Repplus14_O\", \"text.Demminus8_P\", \"text.Repminus18_O\", \"text.Repminus13_O\", \"text.Demminus9_P\", \n\"text.Repminus13_O\", \"text.Repminus16_O\", \"text.Demminus9_P\", \"text.Repminus1_O\", \"text.Demplus15_P\", \"DEM\", \"text.Demminus1_P\", \"text.Repplus2_O\", \"text.Demminus18_P\", \"text.Repplus14_O\", \"text.Repminus20_O\", \"text.Repplus16_O\", \"text.Demplus2_P\", \"text.Repplus10_O\", \"text.Demminus18_P\", \"text.Repplus2_O\", \"text.Demminus15_P\", \"text.Repminus6_O\", \"text.Demminus19_P\", \"text.Repminus9_O\", \"text.Repplus15_O\", \"text.Repminus15_O\", \"text.Repminus8_O\", \"text.Repplus12_O\", \"text.Demminus19_P\", \n\"text.Repplus6_O\", \"text.Demplus13_P\", \"text.Demminus14_P\", \"text.Demminus5_P\", \"text.Demminus2_P\", \"text.Repplus1_O\", \"text.Repminus18_O\", \"text.Repplus14_O\", \"text.Demplus20_P\", \"text.Repplus6_O\", \"text.Repminus16_O\", \"text.Demminus19_P\", \"text.Demplus12_P\", \"text.Demminus12_P\", \"text.Demminus10_P\", \"text.Repplus5_O\", \"text.Demplus5_P\", \"text.Repplus17_O\", \"text.Repminus13_O\", \"text.Demplus3_P\", \"text.Demminus5_P\", \"text.Repminus10_O\", \"text.Repplus6_O\", \"text.Repplus16_O\", \"text.Repminus10_O\", \n\"text.Repplus1_O\", \"text.Demminus6_P\", \"text.Repplus5_O\", \"text.Demplus3_P\", \"text.Demplus3_P\", \"text.Repminus19_P\")"
Does anyone know why this happend and how I can get my result to look like a original data frame, with rows and columns.
This is the code that I am using:
DDMB <- DDMBehavfin[1,]
DDMB
gsub (pattern= "text.Demminus17_P", replacement = "DEM", x= DDMB)
Could this have to do something with the datatype of the columns? What can I do to my gsub function so that I can gain a regular looking data frame instead of a messy result like this?
I first want to tackle why my result looks odd, before using gsub to replace all of the values.
Thank you for any help.

Apply linear model formula to grouped data

I want to group my dataframe by Participant and iteratively apply a simple linear model formula, lm(Outcome ~ A, data = mydata), so that I end up with a new, separate dataframe with one coefficient for each Participant.
Here's a sample of mydata:
structure(list(Participant = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9, 9, 9,
9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 12,
12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 14), Outcome = c(15,
-4, 5, 25, 0, 3, 16, 0, 5, 0, 10, 0, 5, 0, 0, 0, 0, 9, 5, 1,
20, 11, 8, 15, 0, 0, 13, 22, 20, 0, 0, 0, 0, 0, 0, 10, 0, 12,
0, 0, 0, 0, 0, -12, 0, 0, 0, 0, 0, 0, 5, 9, 5, 0, 0, 10, 20,
0, 10, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0,
11, 12, 19, 0, 0, 10, 0, 10, -10, 0, 0, 0, 6, -13, 0, 0, 0, -4,
0, 0, 0, 0, 0), A = c(16, 50, 9, 25, 33, 3, 23, 13, 20, 11, 21,
20, 19, 36, 6, 22, 18, 20, 5, 6, 23, 43, 14, 46, 7, 18, 20, 78,
35, 5, 8, 5, 18, 9, 17, 71, 18, 26, 8, 56, 45, 29, 21, 10, 14,
15, 21, 11, 38, 26, 15, 9, 22, 20, 21, 51, 20, 29, 14, 48, 10,
21, 9, 11, 29, 6, 21, 25, 20, 27, 29, 36, 31, 7, 27, 38, 30,
32, 3, 43, 19, 28, 31, 33, 10, 9, 36, 45, 46, 27, 7, 21, 25,
15, 20, 35, 23, 22, 16, 24), B = c(11, 42, 17, 26, -1, -8, 18,
7, -25, 6, 11, 10, 14, 41, 11, 18, 23, 16, 10, 4, 47, 26, 14,
16, 12, 23, 0, 66, 20, -3, 5, 0, 53, 17, 10, 66, 20, 14, 8, 11,
25, 14, -6, 22, 2, -2, -29, 3, 31, 26, 10, 9, 17, -20, -19, 31,
0, -1, -6, -2, -10, 31, -11, -29, -21, -19, 21, 25, 18, 6, 13,
24, -31, 2, 2, 8, 3, 10, -19, 33, 5, 4, 16, 18, 10, 19, -14,
-25, 21, 16, 20, 13, 4, 5, -8, -15, 16, 12, -1, 14)), row.names = c(2041L,
2281L, 2521L, 2641L, 3901L, 4141L, 4201L, 4681L, 4801L, 4921L,
161L, 241L, 321L, 361L, 401L, 481L, 1241L, 2L, 42L, 82L, 122L,
162L, 202L, 362L, 482L, 1242L, 1562L, 1682L, 1802L, 1842L, 1922L,
43L, 123L, 323L, 483L, 1683L, 1963L, 2042L, 2102L, 2282L, 2402L,
2522L, 2642L, 2762L, 3482L, 3962L, 4382L, 4922L, 4982L, 5042L,
44L, 204L, 484L, 1444L, 1564L, 1684L, 45L, 325L, 965L, 1165L,
1445L, 1685L, 1765L, 1925L, 86L, 366L, 406L, 2043L, 2103L, 2343L,
2523L, 2583L, 2643L, 4083L, 4323L, 4983L, 407L, 1247L, 1407L,
1807L, 48L, 208L, 408L, 1248L, 2104L, 2164L, 2284L, 2404L, 2584L,
2644L, 2764L, 4384L, 2045L, 2105L, 2345L, 2405L, 2645L, 2765L,
4385L, 2046L), class = "data.frame")
And here's what my desired output would look like (with hypothetical coefficients):
Participant Coef
1 1 0.09
2 2 0.07
3 3 0.11
...
In the past, I've used the group_by function to group by Participant and calculate a descriptive stat (e.g., mean, median)for each. For instance, I can use the code below to create a dataframe, myMeans, with the mean Outcome for each participant:
myMeans<- as.data.frame(mydata %>%
group_by(Participant) %>%
select(Outcome) %>%
summarise_each(list(mean)))
head(myCoefficients)
Participant Outcome
1 1 7.0454545
2 2 9.8510638
3 3 10.0652174
4 4 5.2156863
5 5 0.5319149
6 6 6.1041667
I was hoping something like this would work to create a dataframe, myCoefficients:
myCoefficients<- as.data.frame(mydata %>%
group_by(Participant) %>%
coef(lm(Outcome ~ A)))
...but it evidently does not.
Any suggestions?
Try lmList. Note that the nlme package already comes with R.
library(nlme)
coef(lmList(Outcome ~ A | Participant, mydata))
giving:
(Intercept) A
1 8.122188 -0.079910741
2 2.111455 0.001547988
3 1.722062 0.304546146
4 -2.127148 0.164948454
5 -1.883623 0.076522166
6 2.463768 0.103024575
7 7.133361 -0.043622767
8 0.000000 0.000000000
9 1.370920 0.006923838
10 8.286374 0.081986143
11 -5.359477 0.283224401
12 -4.486884 0.143756558
13 -1.333333 0.034188034
14 0.000000 NA
Here is a solution using sapply.
#find the slope and intercept
intercept<-sapply(unique(mydata$Participant), function(x){
lm(Outcome ~ A, data=mydata[mydata$Participant==x,])$coefficients[1]})
A_coefficient<-sapply(unique(mydata$Participant), function(x){
lm(Outcome ~ A, data=mydata[mydata$Participant==x,])$coefficients[2]})
#combine results into a dataframe
answer<-data.frame(Participant=unique(mydata$Participant), intercept, A_coefficient)
#slightly more compact coding:
fit<-sapply(unique(mydata$Participant), function(x){
lm(Outcome ~ A, data=mydata[mydata$Participant==x,])$coefficients})
answer<-cbind(Participant=unique(mydata$Participant), as.data.frame(t(fit)))
Another reasonable option is mentioned in the comments is to use split and lapply
For a tidyverse solution, there is a similar use case in ?do. Reframing this for the current example:
library(tidyverse)
data %>%
group_by(Participant) %>%
do(mod = lm(Participant ~ A, data = .)) %>%
summarise(Participant = Participant,
coef = list(mod$coefficients)) %>%
unnest_wider(coef)
Note that this requires the relatively recent tidyr 1.0.0 for unnest_wider().

How to Obtain Constant Term in Linear Discriminant Analysis

Consider dput:
structure(list(REAÇÃO = structure(c(0, 1, 0, 0, 1, 0, 1, 1,
0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,
1, 0, 1, 1, 0, 1, 1), format.spss = "F11.0"), IDADE = structure(c(22,
38, 36, 58, 37, 31, 32, 54, 60, 34, 45, 27, 30, 20, 30, 30, 22,
26, 19, 18, 22, 23, 24, 50, 20, 47, 34, 31, 43, 35, 23, 34, 51,
63, 22, 29), format.spss = "F11.0"), ESCOLARIDADE = structure(c(6,
12, 12, 8, 12, 12, 10, 12, 8, 12, 12, 12, 8, 4, 8, 8, 12, 8,
9, 4, 12, 6, 12, 12, 12, 12, 12, 12, 12, 8, 8, 12, 16, 12, 12,
12), format.spss = "F11.0"), SEXO = structure(c(1, 1, 0, 0, 1,
0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,
0, 1, 0, 1, 0, 0, 0, 1, 1, 1), format.spss = "F11.0")), .Names = c("REAÇÃO",
"IDADE", "ESCOLARIDADE", "SEXO"), row.names = c(NA, -36L), class = "data.frame")
where: REAÇÃO is a dependent variable in the model.
Constant: -4.438.
How can I obtain this value using a simple function in R?
For obtain constant term in Discriminant Analysis on R (with library MASS):
groupmean<-(model$prior%*%model$means)
constant<-(groupmean%*%model$scaling)
constant
where model is the lda discriminant expression:
model<-lda(y~x1+x2+xn,data=mydata)
model

Joining two weighted Graphs in R and keeping weight as sum

I have the same question as this how to merge two weighted graph and sum weigths.
But here ist my R code for better understanding:
g1 <- graph.full(10)
V(g1)$name <- letters[1:vcount(g1)]
E(g1)$weight <- 1
g3 <- graph.full(5)
V(g3)$name <- c("a", "b", "x", "y", "z")
E(g3)$weight <- 1
graph.union.by.name(g1, g3)
The weights in merged graph should be a 2 on same edges in g1 and g3 (a - b)
And the dput of graphs is:
> dput(g1)
structure(list(10, FALSE, c(1, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3,
4, 5, 6, 7, 8, 9, 3, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 5, 6,
7, 8, 9, 6, 7, 8, 9, 7, 8, 9, 8, 9, 9), c(0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 8), c(0, 1, 9,
2, 10, 17, 3, 11, 18, 24, 4, 12, 19, 25, 30, 5, 13, 20, 26, 31,
35, 6, 14, 21, 27, 32, 36, 39, 7, 15, 22, 28, 33, 37, 40, 42,
8, 16, 23, 29, 34, 38, 41, 43, 44), c(0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44), c(0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45),
c(0, 9, 17, 24, 30, 35, 39, 42, 44, 45, 45), list(c(1, 0,
1), structure(list(name = "Full graph", loops = FALSE), .Names = c("name",
"loops")), structure(list(name = c("a", "b", "c", "d", "e",
"f", "g", "h", "i", "j")), .Names = "name"), structure(list(
weight = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), .Names = "weight"))), class = "igraph")
> dput(g2)
structure(list(10, FALSE, c(1, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3,
4, 5, 6, 7, 8, 9, 3, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 5, 6,
7, 8, 9, 6, 7, 8, 9, 7, 8, 9, 8, 9, 9), c(0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 8), c(0, 1, 9,
2, 10, 17, 3, 11, 18, 24, 4, 12, 19, 25, 30, 5, 13, 20, 26, 31,
35, 6, 14, 21, 27, 32, 36, 39, 7, 15, 22, 28, 33, 37, 40, 42,
8, 16, 23, 29, 34, 38, 41, 43, 44), c(0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44), c(0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45),
c(0, 9, 17, 24, 30, 35, 39, 42, 44, 45, 45), list(c(1, 0,
1), structure(list(name = "Full graph", loops = FALSE), .Names = c("name",
"loops")), structure(list(name = c("a", "b", "c", "d", "e",
"f", "g", "h", "i", "j")), .Names = "name"), structure(list(
weight = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), .Names = "weight"))), class = "igraph")
Is it possible with igraph or do i need some workaround?
This will be supported in the next version, until then here is a workaround:
mymerge <- function(g1, g2) {
e1 <- get.data.frame(g1, what="edges")
e2 <- get.data.frame(g2, what="edges")
e <- merge(e1, e2, by=c("from", "to"), all=TRUE)
newe <- data.frame(e[,c("from", "to"), drop=FALSE],
weight=rowSums(e[, c("weight.x", "weight.y")], na.rm=TRUE))
graph.data.frame(newe, directed=is.directed(g1))
}
mymerge(g1, g3)
# IGRAPH UNW- 13 54 --
# + attr: name (v/c), weight (e/n)
mymerge(g1, g3)["a", "b"]
# [1] 2

Resources