I have the original dataframe of many many rows (i know they are replicated)
> dput(DATA)
structure(list(N_b = c(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), N_l = c(4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3), S = c(12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9), Proposed.Girder3 = c(0.52, 0.52, 0.52, 0.52, 0.52, 0.52,
0.52, 0.52, 0.52, 0.52, 0.52, 0.65, 0.65, 0.65, 0.65, 0.65, 0.65,
0.65, 0.65, 0.65, 0.65, 0.65, 0.51, 0.51, 0.51, 0.51, 0.51, 0.51,
0.51, 0.51, 0.51, 0.51, 0.51, 0.52, 0.52, 0.52, 0.52, 0.52, 0.52,
0.52, 0.52, 0.52, 0.52, 0.52, 0.65, 0.65, 0.65, 0.65, 0.65, 0.65,
0.65, 0.65, 0.65, 0.65, 0.65, 0.51, 0.51, 0.51, 0.51, 0.51, 0.51,
0.51, 0.51, 0.51, 0.51, 0.51, 0.52, 0.52, 0.52, 0.52, 0.52, 0.52,
0.52, 0.52, 0.52, 0.52, 0.52, 0.65, 0.65, 0.65, 0.65, 0.65, 0.65,
0.65, 0.65, 0.65, 0.65, 0.65, 0.51, 0.51, 0.51, 0.51, 0.51, 0.51,
0.51, 0.51, 0.51, 0.51, 0.51, 0.52, 0.52, 0.52, 0.52, 0.52, 0.52,
0.52, 0.52, 0.52, 0.52, 0.52, 0.65, 0.65, 0.65, 0.65, 0.65, 0.65,
0.65, 0.65, 0.65, 0.65, 0.65, 0.51, 0.51, 0.51, 0.51, 0.51, 0.51,
0.51, 0.51, 0.51, 0.51, 0.51, 0.52, 0.52, 0.52, 0.52, 0.52, 0.52,
0.52, 0.52, 0.52, 0.52, 0.52, 0.65, 0.65, 0.65, 0.65, 0.65, 0.65,
0.65, 0.65, 0.65, 0.65, 0.65, 0.51, 0.51, 0.51, 0.51, 0.51, 0.51,
0.51, 0.51, 0.51, 0.51, 0.51, 0.52, 0.52, 0.52, 0.52, 0.52, 0.52,
0.52, 0.52, 0.52, 0.52, 0.52, 0.65, 0.65, 0.65, 0.65, 0.65, 0.65,
0.65, 0.65, 0.65, 0.65, 0.65, 0.51, 0.51, 0.51, 0.51, 0.51, 0.51,
0.51, 0.51, 0.51, 0.51, 0.51), Lanes = c(4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3), UG = c(100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 108, 108, 108,
108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
108, 108, 108, 108, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 124, 124,
124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
124, 124, 124, 124, 124, 84, 84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84, 92, 92, 92, 92, 92, 92, 92, 92,
92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92,
92, 92, 92, 92, 92, 92, 92, 92, 92), CSi = c(0.498857761911128,
0.506171857609652, 0.491697098095741, 0.5060648860829, 0.51602587099039,
0.49808311021839, 0.484326916022697, 0.486261403372008, 0.484696645284676,
0.542438052075464, 0.501306385491762, 0.634937543079967, 0.642078412670016,
0.618943708143363, 0.642001779473278, 0.658268730337476, 0.630133634378208,
0.61289410586889, 0.615963132516221, 0.615769133902813, 0.686518342284576,
0.63848257046785, 0.477839632977349, 0.481308189937141, 0.466821213798956,
0.484416044616133, 0.495362194700848, 0.47320175377938, 0.46075484570102,
0.462933293434182, 0.46296944030225, 0.519970813725933, 0.478798800223883,
0.499649613847278, 0.50349372475143, 0.490922567156329, 0.506660508807011,
0.514932254618641, 0.497406049605651, 0.483910162470329, 0.484700178543721,
0.483690038460146, 0.541097742382397, 0.49864706679875, 0.638103261594521,
0.645030188324246, 0.622321358649241, 0.644932774331382, 0.661080914216008,
0.633266424051986, 0.616403425794446, 0.619662818975923, 0.619368626094409,
0.689062534462536, 0.641640617456748, 0.481809699169021, 0.484780552887199,
0.471041353094871, 0.489040154106175, 0.499309812059152, 0.477094533923277,
0.46479660834156, 0.467205762187312, 0.465930259455921, 0.524495868736496,
0.482408228794972, 0.498202392725583, 0.502618858440184, 0.489487329812287,
0.503840707835284, 0.514021291777706, 0.495297263755732, 0.482202022708633,
0.483839116286323, 0.539456419577533, 0.539456419855875, 0.498082441376597,
0.630858086293792, 0.63756198028618, 0.615358704038841, 0.637489319425201,
0.653397114261802, 0.625957013049853, 0.609464834716713, 0.612676084901444,
0.612530536192196, 0.612530533035217, 0.63430804238461, 0.48126980512503,
0.484644526574109, 0.470238034678857, 0.487935539905689, 0.49887970982208,
0.476533589513863, 0.464212956954452, 0.466465412750473, 0.46642671330667,
0.52379164029609, 0.48210024308779, 0.495313482556363, 0.499430830726606,
0.486650554549094, 0.501074567462105, 0.511559881655238, 0.492318751733689,
0.479463896518796, 0.480962859032664, 0.479819940340815, 0.536420385604673,
0.494978560935791, 0.628848475181058, 0.63411772566777, 0.613650360338718,
0.637687298501148, 0.651062927764633, 0.624780782896341, 0.608015537732378,
0.609978147167127, 0.610267677247537, 0.679026578215092, 0.630653747823922,
0.484330062840347, 0.483272947533652, 0.469030546777778, 0.486654560445457,
0.497498231247353, 0.475287888336171, 0.46299937090013, 0.465252231525678,
0.465143863657343, 0.52242431063692, 0.480777563607102, 0.509393190572395,
0.0306794102100841, 0.499801210623311, 0.514261273631288, 0.524257222129056,
0.507090829156798, 0.492293988923706, 0.494634579696826, 0.492902890462201,
0.551785598208862, 0.510878424089161, 0.639185175219647, 0.646663818507054,
0.622627268125056, 0.646370988091098, 0.662988587960886, 0.634650836091679,
0.616659986042537, 0.619748241531646, 0.61951439818747, 0.691805427278811,
0.64284460028603, 0.484887769151249, 0.48865031929918, 0.473940218959181,
0.484917825918303, 0.496960554187183, 0.473813537802849, 0.461261792526738,
0.463580335134683, 0.463394052048788, 0.520637868360136, 0.479413561159061,
0.503027081012421, 0.508886440468214, 0.493841291641758, 0.508191035709441,
0.517649011896045, 0.500464428119884, 0.486297995480364, 0.48821271352713,
0.486779215307284, 0.544834557703888, 0.503994168801514, 0.63420530022389,
0.641441566573504, 0.617939244523909, 0.641442341788609, 0.657759058843292,
0.629532366426079, 0.611898297010965, 0.615054071992963, 0.614543613798064,
0.686239403738044, 0.637751280776591, 0.476610456219972, 0.480071378890351,
0.465670604256241, 0.483172142840914, 0.494035003336151, 0.472023145046551,
0.459586167724079, 0.461826826254301, 0.461674647472426, 0.518534655365997,
0.477607992305144)), row.names = c(NA, -198L), class = "data.frame")
I try to sort it based on the column S with:
target <- c(12,15,9)
DATA <- DATA[match(target, DATA$S),]
The result is a 3 row dataframe but I want to keep the same number of rows and just sort it
> dput(DATA)
structure(list(N_b = c(5, 5, 5), N_l = c(4, 5, 3), S = c(12,
15, 9), Proposed.Girder3 = c(0.52, 0.65, 0.51), Lanes = c(4,
5, 3), UG = c(100, 100, 100), CSi = c(0.498857761911128, 0.634937543079967,
0.477839632977349)), row.names = c(1L, 12L, 23L), class = "data.frame")
Here is a round about way:
library(dplyr)
data %>%
# convert S to factor and specify order
mutate(S = factor(S, levels = c(12, 15, 9))) %>%
# sort by levels of S factor
arrange(S) %>%
# convert S back to numeric (need to go through character or weirdness happens)
mutate(S = as.numeric(as.character(S)))
The order in match should be reversed and you need to order the output from match.
target <- c(12,15,9)
DATA <- DATA[order(match(DATA$S, target)),]
Related
I would like to pass the information I have to a normal list of axes with nodes but I don't know how to do it. The raw data with "deput" would look like this. If someone knows how to convert this list into something easier to use I would appreciate it.I can visualise the graph with "plot" but to edit it I need to have more precise information.
library(igraph)
dput (net2$graph_pajek)
structure(list(30, FALSE, c(1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 6, 7, 13, 13,
14, 15, 16, 18, 20, 20, 21, 27, 27, 27, 27, 29, 2, 2, 2, 2, 2,
2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
6, 6, 7, 8, 8, 9, 9, 9, 10, 10, 10, 10, 10, 11, 11, 12, 12, 12,
13, 13, 13, 14, 14, 14, 15, 15, 15, 16, 18, 18, 18, 19, 20, 20,
21, 21, 23, 24, 25, 26, 27, 27, 27, 29, 3, 3, 3, 3, 3, 3, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
5, 5, 5, 5, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 10,
10, 10, 10, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 13, 13, 14, 14, 15, 15, 15, 15, 15), list(c(1, 0, 1), structure(list(), .Names = character(0)),
list(name = c("A", "B", "C",
"D", "E", "F", "G", "H",
"I", "J", "K",
"L", "M", "N",
"O", "P", "Q", "R",
"S", "T", "U",
"V", "W", "X", "Y", "Z",
"AB", "AC", "AD", "AE"
), deg = c(248, 532, 855, 574, 1761, 261, 229, 216, 554,
628, 774, 223, 502, 295, 266, 910, 227, 312, 364, 260, 294,
741, 227, 471, 392, 376, 292, 295, 212, 287), size = c(2.,
6, 9, 6, 20,
2, 2, 2, 6,
7, 8, 2, 7,
3, 3, 10, 2,
3, 4, 2, 3.,
8, 2, 5, 4,
4, 3, 3, 2,
3), label.cex = c(0.7, 0.7, 0.7, 0.7, 0.7, 0.7,
0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7,
0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7
), id = c("A", "B", "C",
"D", "E", "F", "G", "H",
"I", "J", "K",
"L", "M", "N",
"O", "P", "Q", "R",
"S", "T", "U",
"V", "W", "X", "Y", "Z",
"AB", "AC", "AD", "AE"
)), list(num = c(4, 4, 4, 4, 7, 7, 7, 7, 7, 7, 7, 3, 3, 3,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 3, 3, 3, 1, 1, 2,
2, 1, 1, 1, 1, 2, 2, 1, 4, 4, 4, 4, 1, 7, 7, 7, 7, 7, 7,
7, 6, 6, 6, 6, 6, 6, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 1, 2, 2, 1, 2, 2, 3, 3, 3, 5, 5, 5, 5, 5, 2,
2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 1, 2,
2, 2, 2, 1, 1, 1, 1, 3, 3, 3, 1, 6, 6, 6, 6, 6, 6, 40, 40,
40, 40, 40, 40, 40, 40, 40), weight = c(4, 4, 4, 4,
7, 7, 7, 7, 7, 7, 7, 3, 3, 3, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 3, 3, 3, 1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 4,
4, 4, 4, 1, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 1, 2, 2, 1, 2, 2,
3, 3, 3, 5, 5, 5, 5, 5, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 1, 3, 3, 3, 1, 2, 2, 2, 2, 1, 1, 1, 1, 3, 3, 3,
1, 6, 6, 6, 6, 6, 6, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 7, 7, 7, 7, 7, 7, 7, 1, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7,
4, 4, 4, 4, 4, 4, 4, 4, 1, 18, 18))), <environment>), class = "igraph")
Are you looking for something like get.data.frame
> get.data.frame(net)
from to weight
1 A B 0.63502922
2 B C 0.79410173
3 C D 0.90802625
4 D E 0.09408188
5 E F 0.16450634
6 F G 0.75931882
7 G H 0.30409658
8 H I 0.23990324
9 I J 0.84762277
10 A J 0.88657718
data
Since I cannot reproduce the example in your post, I created a dummy example net like below
net <- make_ring(10) %>%
set_vertex_attr(name = "name", value = LETTERS[1:vcount(.)]) %>%
set_edge_attr(name = "weight", value = runif(ecount(.)))
To clarify a couple things:
The igraph file is not a plot per se, but a graph structure (as in, nodes and edges).
igraph has functions for plotting graphs, but there is no single and standard way of plotting a graph - instead, different algorithms can be used to determine visually-ideal ways of displaying them, and these algorithms oftentimes rely on random initializations.
The outputs from the plotting functions of igraph are only relevant in terms of R base plot drawing logic, AFAIK they don't use an intermediate format with coordinates handled in a user-comprehensible structure. You can nevertheless manage lots of aspects of how they are drawn - see ?igraph::igraph.plotting.
I'm trying to compare a control group with an experimental group on a range of variable to show that they are similar (baseline).
I thus need to do multiple t-test (unpaired/ Welch t-test). My data is in a long format with the first variable called "Group" with either a number 1 or a number 2. There are some missing values in some of my other variables but it's pretty random.
So when I run t-test manually using this line of code:
t.test(variable_1 ~ Group,df)
it works.
I then tried to do it all at once using this line of code:
sapply(df[,2:71], function(i) t.test(i ~ df$Group)$p.value)
But I get the following error:
grouping factor must have exactly 2 levels
Could anyone help?
Here is what the structure looks like
structure(list(Group = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 2, 2), EM_Accuracy_Time_Airport = c(3, 3, 0,
1, 1, 2, 2, 1, 1, 3, 3, 2, 2, 2, 1, 3, 1, 3, 1, 1), EM_Accuracy_Place_Airport = c(2,
2, 1, 2, 1, 2, 2, 1, 1, 2, 0, 2, 2, 0, 2, 2, 2, 1, 1, 1), EM_Accuracy_Expl_Airport = c(2,
2, 2, 0, 2, 2, 2, 1, 2, 2, 2, 2, 2, 0, 0, 1, 0, 2, 2, 1), EM_Accuracy_Death_Airport = c(0,
2, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0), EM_Accuracy_Time_Metro = c(3,
1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 2, 1, 3, 1, 1, 2, 1, 3, 3), EM_Accuracy_Death_Metro = c(3,
0, 1, 0, 1, 1, 0, 0, 0, 3, 0, 0, 1, 0, 3, 1, 1, 1, 0, 0), EM_Accuracy_PC_Time_Airpot = c(100,
100, 0, 33.3333333333333, 33.3333333333333, 66.6666666666667,
66.6666666666667, 33.3333333333333, 33.3333333333333, 100, 100,
66.6666666666667, 66.6666666666667, 66.6666666666667, 33.3333333333333,
100, 33.3333333333333, 100, 33.3333333333333, 33.3333333333333
), EM_Accuracy_PC_Place_Airport = c(100, 100, 50, 100, 50, 100,
100, 50, 50, 100, 0, 100, 100, 0, 100, 100, 100, 50, 50, 50),
EM_Accuracy_PC_Expl_Airport = c(100, 100, 100, 0, 100, 100,
100, 50, 100, 100, 100, 100, 100, 0, 0, 50, 0, 100, 100,
50), EM_Accuracy_PC_Death_Airport = c(0, 66.6666666666667,
0, 0, 33.3333333333333, 66.6666666666667, 0, 0, 0, 0, 0,
0, 66.6666666666667, 0, 0, 0, 100, 0, 0, 0), EM_Accuracy_PC_Time_Metro = c(100,
33.3333333333333, 0, 0, 33.3333333333333, 33.3333333333333,
0, 33.3333333333333, 33.3333333333333, 33.3333333333333,
33.3333333333333, 66.6666666666667, 33.3333333333333, 100,
33.3333333333333, 33.3333333333333, 66.6666666666667, 33.3333333333333,
100, 100), EM_Accuracy_PC_Death_Metro = c(100, 0, 33.3333333333333,
0, 33.3333333333333, 33.3333333333333, 0, 0, 0, 100, 0, 0,
33.3333333333333, 0, 100, 33.3333333333333, 33.3333333333333,
33.3333333333333, 0, 0), EM_ACCURACY_PC = c(83.3333333333333,
66.6666666666667, 30.5555555555556, 22.2222222222222, 47.2222222222222,
66.6666666666666, 44.4444444444444, 27.7777777777778, 36.1111111111111,
72.2222222222222, 38.8888888888889, 55.5555555555555, 66.6666666666666,
27.7777777777778, 44.4444444444444, 52.7777777777778, 55.5555555555556,
52.7777777777778, 47.2222222222222, 38.8888888888889), EM_Certainty_Time_Airport = c(3,
1, 1, 1, 2, 2, 1, 1, 2, 3, 3, 2, 2, 2, 4, 2, 3, 3, 2, 2),
EM_Certainty__Place_Airport = c(3, 4, 2, 2, 2, 2, 4, 1, 3,
4, 4, 4, 4, 3, 3, 4, 4, 3, 2, 3), EM_Certainty__Expl_Airport = c(4,
2, 3, 1, 2, 3, 2, 1, 2, 4, 1, 3, 2, 2, 1, 3, 1, 2, 2, 3),
EM_Certainty__Death_Airport = c(1, 1, NA, 1, 2, 1, 3, 1,
2, 3, NA, 3, 2, 1, 2, 1, 1, 1, 4, 4), EM_Certainty__Time_Metro = c(3,
3, 1, 1, 2, 2, 2, 1, 3, 2, 3, 2, 3, 2, 2, 2, 3, 1, 2, 2),
EM_Certainty__Death_Metro = c(2, 1, 1, NA, 2, 1, 1, 1, 2,
1, NA, 3, 2, 1, 1, 1, 1, 1, 1, 4), EM_CERTAINTY = c(2.66666666666667,
2, 1.6, 1.2, 2, 1.83333333333333, 2.16666666666667, 1, 2.33333333333333,
2.83333333333333, 2.75, 2.83333333333333, 2.5, 1.83333333333333,
2.16666666666667, 2.16666666666667, 2.16666666666667, 1.83333333333333,
2.16666666666667, 3), EM_CONFIDENCE = c(5, 5, 1, 2, 2, 4,
5, 2, 3, 4, 5, 5, 3, 3, 4, 4, 3, 2, 3, 2), FBM_CONFIDENCE = c(4,
6, 7, 7, 5, 4, 2, 7, 5, 6, 6, 7, 6, 7, 3, 6, 6, 4, 5, 6),
FBM_Vividness_Time = c(3, 3, 1, 4, 3, 2, 4, 3, 4, 4, 1, 3,
4, 4, 3, 3, 3, 2, 4, 3), FBM_Vividness_How = c(4, 4, 2, 4,
4, 3, 4, 4, 4, 4, 3, 4, 3, 4, 4, 4, 4, 4, 4, 4), FBM_Vividness_Where = c(4,
4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4),
FBM_Vividness_WithWhom = c(4, 4, 3, 4, 3, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4), FBM_Vividness_WereDoing = c(4,
4, 1, 4, 3, 4, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4),
FBM_Vividness_Did_After = c(4, 4, 3, 4, 2, 3, 4, 4, 2, 4,
1, 4, 4, 4, 3, 4, 4, 3, 4, 4), FBM_VIVIDNESS = c(3.83333333333333,
3.83333333333333, 2, 4, 3.16666666666667, 3.33333333333333,
4, 3.83333333333333, 3.66666666666667, 4, 2.33333333333333,
3.83333333333333, 3.83333333333333, 4, 3.66666666666667,
3.83333333333333, 3.83333333333333, 3.5, 4, 3.83333333333333
), FBM_Details_NB_T2 = c(3, 5, 0, 5, 5, 5, 2, 5, 1, 5, 3,
5, 5, 5, 2, 4, 2, 3, 5, 5), P_Novelty_5 = c(5, 6.2, 6.5,
5.6, 4.8, 5.4, 4, 4.2, 4.4, 5.8, 3.4, 5.8, 6, 5.8, 3.8, 6.4,
6.8, 6.6, 7, 3), P_Suprise_emotion = c(6, 6, 6, 6, 4, 5,
1, 7, 1, 5, 4, 5, 7, 7, 6, 4, 7, 7, 2, 5), P_Surprise_Expected = c(1,
3, 5, 2, 4, 3, 6, 2, 2, 1, 6, 4, 3, 1, 5, 1, 1, 1, 5, 4),
P_Surprise_Unbelievable = c(5, 4, 1, 6, 4, 4, 2, 7, 1, 4,
1, 6, 7, 7, 6, 3, 7, 7, 5, 3), `P_Consequence-Importance_5` = c(5.6,
4.8, 3.4, 5, 4.8, 4, 5, 5.4, 3, 5.2, 6.8, 5.4, 4, 4.4, 6,
3.8, 4, 4.8, 5, 5.2), P_Emotional_Intensity_4 = c(5.25, 5.75,
3, 4.75, 4.75, 6, 4, 5.25, 2.5, 5.5, 7, 6.5, 5.75, 6.75,
6.75, 6, 6.25, 6, 5, 2.5), P_Social_Sharing_6 = c(3.66666666666667,
3.83333333333333, 3.4, 3.16666666666667, 3, 3.33333333333333,
3.8, 3.16666666666667, 2.16666666666667, 4.16666666666667,
4, 4.5, 4.5, 4.33333333333333, 4, 3.16666666666667, 3.66666666666667,
4, NA, NA), P_Media_3 = c(4.66666666666667, 4, 3, 2.66666666666667,
2.66666666666667, 2.33333333333333, 3, 2.33333333333333,
2.33333333333333, 3.33333333333333, 4.33333333333333, 5,
4.33333333333333, 5, 4, 2, 3, 3.33333333333333, 2, 1.66666666666667
), P_Ruminations = c(3, NA, 3, 2, 4, NA, 4, 2, 1, 4, 4, 4,
2, 4, 2, 3, 3, 3, 4, 3), P_Novelty_Common_rev = c(6, 7, 7,
7, 4, 6, 4, 7, 2, 6, 3, 7, 7, 7, 3, 6, 7, 7, 7, 3), P_Novelty_Unusual = c(2,
5, 7, 7, 3, 5, 3, 3, 5, 6, 1, 4, 7, 1, 4, 6, 6, 6, 7, 2),
P_Novelty_Special = c(6, 6, NA, 6, 5, 5, 4, 3, 5, 4, 1, 5,
6, 7, 4, 6, 7, 7, 7, 3), P_Novelty_Singular = c(4, 6, 5,
1, 5, 5, 4, 1, 3, 6, 5, 6, 4, 7, 3, 7, 7, 6, 7, 2), P_Novelty_Ordinary_rev = c(7,
7, 7, 7, 7, 6, 5, 7, 7, 7, 7, 7, 6, 7, 5, 7, 7, 7, 7, 5),
P_Consequence = c(6, 7, 5, 4, 5, 4, 5, 3, 5, 5, 7, 5, 5,
2, 6, 6, 1, 4, 6, 3), P_Importance_self = c(4, 3, 3, 4, 4,
3, 5, 6, 1, 5, 7, 5, 3, 3, 5, 2, 2, 4, 5, 3), `P_Importance_friends&family` = c(4,
4, 3, 4, 4, 4, 4, 6, 1, 5, 6, 5, 3, 3, 5, 2, 6, 4, 5, 10),
P_Importance_Belgium = c(7, 5, 3, 7, 6, 5, 6, 7, 3, 7, 7,
7, 5, 7, 7, 5, 6, 7, 6, 6), P_Importance_International = c(7,
5, 3, 6, 5, 4, 5, 5, 5, 4, 7, 5, 4, 7, 7, 4, 5, 5, 3, 4),
P_Emotional_Intensity_Upset = c(4, 5, NA, 3, 3, 5, 3, 5,
2, 5, 7, 5, 5, 6, 7, 6, 6, 5, 5, 3), P_Emotional_Intensity_Indiferent_rev = c(7,
7, 5, 7, 6, 7, 4, 6, 4, 7, 7, 7, 7, 7, 7, 7, 7, 7, NA, 4),
P_Emotional_Intensity_Affected = c(6, 6, 3, 5, 5, 6, 5, 6,
2, 5, 7, 7, 5, 7, 7, 6, 6, 6, NA, 2), P_Emotional_Intensity_Shaken = c(4,
5, 1, 4, 5, 6, 4, 4, 2, 5, 7, 7, 6, 7, 6, 5, 6, 6, 5, 1),
P_Rehearsal_Media_TV = c(5, 3, NA, 3, 2, 3, NA, 1, 1, 4,
3, 5, 5, 5, 2, 3, 2, 2, 2, 2), P_Rehearsal_Media_Internet = c(4,
4, 1, 3, 2, 2, 2, 4, 3, 2, 5, 5, 3, 5, 5, 1, 5, 4, 2, 1),
P_Rehearsal_Media_Social_Networks = c(5, 5, 5, 2, 4, 2, 4,
2, 3, 4, 5, 5, 5, 5, 5, 2, 2, 4, 2, 2), P_Social_Sharing_How_Often = c(4,
5, 4, 4, 4, 3, 3, 3, 3, 5, 4, 5, 5, 5, 5, 3, 4, 4, 5, NA),
P_Social_Sharing_With_How_Many_People = c(5, 4, NA, 3, 3,
3, 3, 3, 2, 5, 3, 5, 5, 3, 5, 3, 3, 4, 3, NA), PK_Shops_YN = c(0,
1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1),
PK_Comic = c(0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0,
0, 0, 0, 1, 0), PK_Hotel = c(0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0), PK_Decoration_Maelbeek = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1),
PK_Stations_before_after_Maelbeek = c(0, 0.5, 0, 0, 0, 0,
0, 0, 0.5, 1, 0, 0, 0.5, 0.5, 0, 0, 0.5, 0, 0.5, 0), PK_TOTAL_PC = c(0,
50, 0, 40, 40, 40, 20, 0, 10, 60, 20, 40, 90, 70, 20, 0,
30, 20, 70, 40), SI_Attachment_BXL = c(6, 4, 1, 4, 2, 5,
1, 6, 5, 4, 2, 6, 6, 7, 1, 3, 6, 4, 5, 4), SI_Pride_BXL = c(1,
2, 1, 2, 1, 2, 1, 5, 1, 6, 1, 1, 7, 7, 1, 2, 6, 1, 3, 3),
SI_Attachment_Belgium = c(7, 3, 5, 5, 4, 6, 7, 6, 5, 6, 7,
7, 7, 7, 5, 6, 7, 6, 4, 2), SI_Pride_Belgium = c(7, 2, 6,
4, 2, 6, 4, 5, 1, 5, 1, 6, 7, 7, 5, 7, 7, 6, 2, 2), SI_Attachment_EU = c(6,
4, 2, 5, 4, 4, 5, 4, 7, 4, 1, 6, 7, 7, 5, 4, 6, 6, 2, 6),
SI_Pride_EU = c(7, 1, 1, 4, 3, 4, 4, 4, 1, 4, 1, 6, 7, 7,
4, 3, 6, 6, 2, 4)), .Names = c("Group", "EM_Accuracy_Time_Airport",
"EM_Accuracy_Place_Airport", "EM_Accuracy_Expl_Airport", "EM_Accuracy_Death_Airport",
"EM_Accuracy_Time_Metro", "EM_Accuracy_Death_Metro", "EM_Accuracy_PC_Time_Airpot",
"EM_Accuracy_PC_Place_Airport", "EM_Accuracy_PC_Expl_Airport",
"EM_Accuracy_PC_Death_Airport", "EM_Accuracy_PC_Time_Metro",
"EM_Accuracy_PC_Death_Metro", "EM_ACCURACY_PC", "EM_Certainty_Time_Airport",
"EM_Certainty__Place_Airport", "EM_Certainty__Expl_Airport",
"EM_Certainty__Death_Airport", "EM_Certainty__Time_Metro", "EM_Certainty__Death_Metro",
"EM_CERTAINTY", "EM_CONFIDENCE", "FBM_CONFIDENCE", "FBM_Vividness_Time",
"FBM_Vividness_How", "FBM_Vividness_Where", "FBM_Vividness_WithWhom",
"FBM_Vividness_WereDoing", "FBM_Vividness_Did_After", "FBM_VIVIDNESS",
"FBM_Details_NB_T2", "P_Novelty_5", "P_Suprise_emotion", "P_Surprise_Expected",
"P_Surprise_Unbelievable", "P_Consequence-Importance_5", "P_Emotional_Intensity_4",
"P_Social_Sharing_6", "P_Media_3", "P_Ruminations", "P_Novelty_Common_rev",
"P_Novelty_Unusual", "P_Novelty_Special", "P_Novelty_Singular",
"P_Novelty_Ordinary_rev", "P_Consequence", "P_Importance_self",
"P_Importance_friends&family", "P_Importance_Belgium", "P_Importance_International",
"P_Emotional_Intensity_Upset", "P_Emotional_Intensity_Indiferent_rev",
"P_Emotional_Intensity_Affected", "P_Emotional_Intensity_Shaken",
"P_Rehearsal_Media_TV", "P_Rehearsal_Media_Internet", "P_Rehearsal_Media_Social_Networks",
"P_Social_Sharing_How_Often", "P_Social_Sharing_With_How_Many_People",
"PK_Shops_YN", "PK_Comic", "PK_Hotel", "PK_Decoration_Maelbeek",
"PK_Stations_before_after_Maelbeek", "PK_TOTAL_PC", "SI_Attachment_BXL",
"SI_Pride_BXL", "SI_Attachment_Belgium", "SI_Pride_Belgium",
"SI_Attachment_EU", "SI_Pride_EU"), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
The error you get means that there's a problem in your dataset, with at least one of your variables.
Here's a process to help you spot problematic variables:
library(tidyverse)
df %>%
group_by(Group) %>% # for each group value
summarise_all(~sum(!is.na(.))) %>% # count non NA values for each variable
gather(var,value,-Group) %>% # reshape
spread(Group, value, sep = "_") %>% # reshape
filter(Group_2 < 2) # get problematic variables
# # A tibble: 5 x 3
# var Group_1 Group_2
# <chr> <int> <int>
# 1 P_Emotional_Intensity_Affected 18 1
# 2 P_Emotional_Intensity_Indiferent_rev 18 1
# 3 P_Social_Sharing_6 18 0
# 4 P_Social_Sharing_How_Often 18 1
# 5 P_Social_Sharing_With_How_Many_People 17 1
0 counts will throw an error about needing two levels in your grouping variables.
1 count will throw an error about needing more observations in one of your groups.
After spotting those you have to treat them accordingly and then your original t.test code should work.
So my problem was just missing data in one variable.
However, if you are looking at doing multiple T-test in a long format: this line of code works:
sapply(df[,2:71], function(i) t.test(i ~ df$Group)$p.value)
I want to plot a partition layout a.k.a. partition table in R. This type of diagram plots hierarchical data horizontally from left to right with nodes as space-filling tiles of proportional height and with tile-adjacency as connections. The x-axis represents hierarchy levels and the y-axis whatever metric used.
Flawed makeshift example cobbled in Excel:
It could also be compared to:
a horizontal uniform-depth icicle plot
a non-nested treemap
a table with variable-height cells
This type of diagram is available in D3.js.
How can I do it in R? Is it possible with ggplot2?
EDIT: Here's my actual data. I need emp_est (not a count) to be the y-axis. emp_est for N-level NAICS code equals the sum of emp_est of daughter N-1-level NAICS codes.
structure(list(naics_level = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3), naics = c("62----", "44----", "54----", "72----", "61----",
"52----", "31----", "56----", "42----", "51----", "81----", "23----",
"55----", "48----", "71----", "53----", "22----", "99----", "11----",
"21----", "541///", "722///", "611///", "622///", "561///", "621///",
"624///", "445///", "551///", "623///", "238///", "423///", "524///",
"522///", "523///", "511///", "813///", "424///", "334///", "448///",
"812///", "531///", "713///", "452///", "441///", "518///", "721///",
"236///", "446///", "444///", "311///", "485///", "811///", "517///",
"332///", "454///", "325///", "453///", "333///", "519///", "484///",
"339///", "237///", "451///", "711///", "221///", "492///", "425///",
"447///", "481///", "442///", "493///", "532///", "336///", "562///",
"488///", "443///", "323///", "326///", "335///", "712///", "515///",
"512///", "327///", "337///", "313///", "322///", "312///", "533///",
"521///", "315///", "331///", "321///", "314///", "316///", "487///",
"525///", "324///", "212///", "115///", "483///", "486///", "114///",
"113///", "213///", "211///", "7225//", "6113//", "6221//", "5511//",
"4451//", "6241//", "5613//", "5415//", "5221//", "5417//", "6211//",
"5416//", "5413//", "5241//", "6231//", "2382//", "5617//", "5239//",
"5112//", "4481//", "7139//", "6111//", "6216//", "6214//", "5411//",
"5182//", "7211//", "5412//", "6244//", "3345//", "6232//", "4234//",
"5242//", "4461//", "8131//", "7223//", "8121//", "4411//", "4441//",
"4521//", "6212//", "5616//", "5313//", "6233//", "6223//", "5419//",
"4244//", "8111//", "4236//", "6116//", "5171//", "5111//", "2383//",
"5191//", "6213//", "5231//", "5614//", "2362//", "2361//", "5311//",
"5418//", "4541//", "4529//", "3344//", "3254//", "4251//", "2381//",
"8129//", "4471//", "4511//", "4921//", "5611//", "6222//", "2389//",
"4931//", "4811//", "2211//", "4431//", "4242//", "4238//", "3391//",
"3231//", "3118//", "8139//", "4841//", "3327//", "8123//", "4854//",
"7224//", "8134//", "5222//", "6215//", "6219//", "4413//", "8133//",
"2371//", "4243//", "4453//", "6243//", "3261//", "4422//", "4532//",
"4543//", "5615//", "4842//", "5619//", "3364//", "6242//", "8132//",
"3399//", "5312//", "4539//", "3332//", "7113//", "3359//", "4231//",
"4851//", "4239//", "7121//", "2373//", "4482//", "4237//", "4452//",
"5179//", "4859//", "3119//", "4853//", "5121//", "5621//", "3339//",
"6112//", "4249//", "4241//", "6117//", "5629//", "4233//", "3329//",
"3323//", "5324//", "5151//", "4248//", "4483//", "5223//", "4885//",
"5321//", "4421//", "4512//", "3328//", "5414//", "7111//", "4881//",
"4232//", "3113//", "4533//", "7112//", "5612//", "6115//", "3333//",
"6239//", "5172//", "3115//", "3116//", "3222//", "3121//", "5322//",
"8122//", "8112//", "3363//", "6114//", "4247//", "2212//", "4884//",
"3342//", "8113//", "4235//", "4246//", "5331//", "3341//", "4442//",
"3372//", "3133//", "3262//", "5211//", "3114//", "3251//", "3353//",
"8114//", "3117//", "4531//", "3252//", "3132//", "4412//", "3152//",
"3334//", "3371//", "3219//", "7131//", "5152//", "3273//", "3255//",
"3335//", "3321//", "3324//", "3259//", "3322//", "3272//", "2379//",
"4922//", "5622//", "7115//", "3315//", "3149//", "4855//", "3279//",
"3162//", "4872//", "2372//", "4812//", "3351//", "4852//", "2213//",
"3336//", "3366//", "3141//", "7212//", "3362//", "5259//", "4542//",
"3241//", "4883//", "3256//", "2123//", "3221//", "4831//", "7114//",
"3369//", "3379//", "3313//", "1152//", "3346//", "5323//", "3343//",
"7132//", "3274//", "3314//", "4862//", "4889//", "7213//", "3271//",
"5122//", "3169//", "3352//", "4871//", "3331//", "3161//", "3326//",
"3312//", "3159//", "4245//", "5174//", "1133//", "3361//", "3253//",
"3325//", "3112//", "1141//", "1153//", "2131//", "1142//", "3211//",
"3111//", "2111//", "5232//", "3365//", "1151//", "1132//", "3131//",
"3212//", "4832//", "1131//", "2122//", "3122//", "3311//", "4879//"
), emp_est = c(444491, 266068, 242031, 217912, 172492, 156366,
150651, 141859, 106269, 103451, 94438, 93862, 78951, 61390, 42552,
38517, 8786, 644, 475, 373, 242031, 193891, 172492, 145566, 134867,
132980, 92554, 79489, 78951, 73391, 63089, 59167, 56680, 53411,
44938, 42428, 41420, 38574, 35544, 35191, 34789, 29626, 29104,
27284, 25031, 24318, 24021, 20156, 19473, 19472, 18503, 18446,
18229, 17982, 16842, 14807, 13685, 12597, 11880, 11531, 11199,
10983, 10617, 10365, 9554, 8786, 8565, 8528, 8025, 7760, 7578,
7390, 7369, 7308, 6992, 6955, 6756, 6511, 6428, 5969, 3894, 3649,
3543, 2642, 2586, 2413, 2154, 1871, 1522, 1256.5, 1145, 1043,
1038, 988, 799, 645, 324, 319, 291, 282, 282, 148, 108, 85, 58,
24, 168952, 123448, 122772, 78951, 70361, 59968, 52588, 47253,
44865, 43270, 40371, 39138, 37926, 36667, 35557, 35527, 34236,
33878, 30390, 28710, 28005, 26446, 26006, 25954, 24481, 24318,
23538, 23091, 22838, 22786, 20565, 20463, 20013, 19473, 18890,
18796, 18617, 18271, 17994, 17856, 17654, 17590, 15429, 15258,
15038, 14706, 14407, 13729, 13324, 12736, 12354, 12038, 11949,
11531, 11385, 11049, 10961, 10105, 10051, 9894, 9756, 9507, 9428,
9329, 9296, 8528, 8059, 8033, 8025, 7895, 7846, 7786, 7756, 7554,
7390, 7258, 6823, 6756, 6702, 6689, 6661, 6511, 6443, 6412, 6351,
6332, 6317, 6272, 6143, 6056, 5918, 5827, 5783, 5689, 5663, 5526,
5381, 5379, 5242, 5138, 5047, 4979, 4977, 4905, 4848, 4742, 4569,
4506, 4399, 4322, 4303, 4287, 4274, 4237, 4212, 4152, 4057, 4045,
3894, 3855, 3810, 3802, 3749, 3611, 3599, 3551, 3493, 3418, 3289,
3182, 3138, 3092, 3063, 3020, 2994, 2859, 2819, 2795, 2739, 2722,
2701, 2671, 2628, 2617, 2607, 2531, 2470, 2467, 2410, 2322, 2316,
2302, 2273, 2220, 2066, 2059, 2054, 2029, 2011, 1939, 1901, 1876,
1870, 1868, 1851, 1822, 1795, 1698, 1650, 1627, 1578, 1567, 1547,
1540, 1531, 1523, 1522, 1515, 1478, 1330, 1322, 1290, 1256.5,
1251, 1174, 1171, 1165, 1112, 1111, 1109, 1084, 1071, 1067, 1035,
1021, 992, 944, 927, 925, 909, 908, 827, 823, 820, 809.5, 801,
719, 719, 709, 672, 661, 645, 630, 618, 596, 547, 517, 502, 482,
395, 385, 358, 355, 343, 342, 340, 324, 323, 319, 311, 304, 288,
286, 274, 257, 238, 235, 209, 206, 197, 172, 170, 155, 154, 148,
148, 144, 141, 133, 125, 111, 104, 96, 94, 92, 90, 80, 78, 78,
78, 77, 77, 73, 73, 68, 66, 66, 58, 41.5, 38, 28, 24, 14.5, 13,
12, 9.5, 9.5, 8, 8, 2.5, 2.5, 2.5, 2.5, 2.5)), .Names = c("naics_level",
"naics", "emp_est"), row.names = c(NA, -390L), class = c("tbl_df",
"tbl", "data.frame"))
Here's one simple example in ggplot2:
d <- data.frame(level3 = c(rep('aaa', 4), 'aab', rep('aba', 2), 'abb', 'aca', 'acb'), stringsAsFactors = FALSE)
d$level2 <- substr(d$level3, 1, 2)
d$level1 <- substr(d$level3, 1, 1)
d$id <- 1:nrow(d)
d2 <- tidyr::gather(d, level, label, -id)
library(ggplot2)
ggplot(d2, aes(level, group = label, fill = level)) +
geom_bar(position = 'stack', col = 1, width = 1) +
geom_text(aes(label = label), position = position_stack(vjust = 0.5), stat = 'count')
I'm trying to build an auto.arima forecast with predictors like the example below. I've noticed that my predictor is non-stationary. So I was wondering if I should difference the predictor before inputting it in the xreg parameter, like I've shown below. The real data set is much larger, this just an example. Any advice is greatly appreciated.
Code:
tsTrain <-tsTiTo[1:60]
tsTest <- tsTiTo[61:100]
ndiffs(ds$CustCount)
##returns 1
diffedCustCount<-diff(ds$CustCount,differences=1)
Xreg<-diffedCustCount[1:100]
##Predictor
xregTrain2 <- Xreg[1:60]
xregTest2 <- Xreg[61:100]
Arima.fit2 <- auto.arima(tsTrain, xreg = xregTrain2)
Acast2<-forecast(Arima.fit2, h=40, xreg = xregTest2)
Data:
dput(ds$CustCount[1:100])
c(3, 3, 1, 4, 1, 3, 2, 3, 2, 4, 1, 1, 5, 6, 8, 5, 2, 7, 7, 3, 2, 2, 2, 1, 3, 2, 3, 1, 1, 2, 1, 1, 3, 2, 2, 2, 3, 7, 5, 6, 8, 7, 3, 5, 6, 6, 8, 4, 2, 1, 2, 1, NA, NA, 4, 2, 2, 4, 11, 2, 8, 1, 4, 7, 11, 5, 3, 10, 7, 1, 1, NA, 2, NA, NA, 2, NA, NA, 1, 2, 3, 5, 9, 5, 9, 6, 6, 1, 5, 3, 7, 5, 8, 3, 2, 6, 3, 2, 3, 1 )
dput(tsTiTo[1:100])
c(45, 34, 11, 79, 102, 45, 21, 45, 104, 20, 2, 207, 45, 2, 3, 153, 8, 2, 173, 11, 207, 79, 45, 153, 192, 173, 130, 4, 173, 174, 173, 130, 79, 154, 4, 104, 192, 153, 192, 104, 28, 173, 52, 45, 11, 29, 22, 81, 7, 79, 193, 104, 1, 1, 46, 130, 45, 154, 153, 7, 174, 21, 193, 45, 79, 173, 45, 153, 45, 173, 2, 1, 2, 1, 1, 8, 1, 1, 79, 45, 79, 173, 45, 2, 173, 130, 104, 19, 4, 34, 2, 192, 42, 41, 31, 39, 11, 79, 4, 79)
The xreg argument in auto.arima performs a dynamic regression which is to say that you are performing a linear regression and fitting the errors with an arma process.
While auto.arima() used to require manual differencing for non-stationary data when external regressors are included, this is no longer the case. auto.arima() will take non-stationary data as an input and determine the order of differencing using a unit-root test.
See this Post from Rob Hyndman for further detail.
I am using principal() function from psych package to replicate SPSS principal component analysis results in R. (as recommended in: https://stats.stackexchange.com/questions/612/is-pca-followed-by-a-rotation-e-g-varimax-still-pca)
I'm using the code below:
com<-principal(ws, nfactors = 1, residuals = FALSE,rotate="varimax",n.obs=NA, covar=TRUE,
scores=TRUE,missing=TRUE,impute="mean",oblique.scores=TRUE,
method="correlation")
But I'm getting the following error:
Error in solve.default(model, r) :
Lapack routine dgesv: system is exactly singular: U[104,104] = 0
In addition: Warning message:
In cor.smooth(r) : Matrix was not positive definite, smoothing was done
Any advice would be much appreciated. I'm attaching a sample of my data (by dput(ws)) below. Thank you so much!
structure(list(hv001 = c(906, 3101, 4001, 6801, 1502, 3508, 4301,
2205, 508, 901, 906, 2304, 4913, 3803, 5704, 2902, 4701, 2303,
3602, 5905, 4805, 1405, 6301, 4007, 904, 302, 7003, 1602, 7107,
4801, 1803, 3403, 6904, 3002, 509, 6904, 1407, 5803, 5802, 4504,
405, 502, 7207, 2706, 2703, 4007, 3403, 703, 2304, 4806, 403,
502, 5607, 902, 2707, 2709, 6402, 6902, 1902, 7501, 2202, 2201,
7205, 4901, 2204, 3504, 5608, 3202, 3505, 2202, 5608, 2306, 7402,
7109, 7303, 5904, 3602, 6102, 101, 7302, 906, 3506, 6003, 6302,
909, 4801, 4806, 3104, 5708, 6802, 404, 4903, 5609, 2101, 3502,
4002, 1602, 7206, 6903, 7103), hv002 = c(26, 372, 147, 215, 23,
48, 130, 17, 48, 214, 93, 15, 122, 27, 172, 5, 53, 100, 177,
341, 279, 42, 47, 10, 15, 47, 90, 106, 218, 110, 95, 86, 69,
77, 47, 49, 43, 55, 324, 88, 120, 177, 112, 462, 347, 3, 69,
36, 16, 561, 109, 322, 28, 47, 123, 8, 174, 66, 101, 55, 212,
201, 57, 231, 53, 128, 211, 136, 102, 246, 154, 5, 59, 231, 263,
108, 137, 35, 98, 165, 27, 205, 8, 70, 147, 189, 417, 71, 23,
56, 139, 302, 106, 83, 24, 12, 56, 95, 4, 198), hv005 = c(346129,
1051441, 458555, 513316, 2337718, 2526226, 1956333, 420415, 2337718,
106202, 346129, 420415, 1181939, 1956333, 852471, 420415, 1956333,
420415, 1956333, 811088, 1181939, 1042611, 513316, 1956333, 346129,
1042611, 595684, 2337718, 249256, 1181939, 2526226, 671551, 595684,
1566584, 2337718, 595684, 1042611, 852471, 264004, 1956333, 2337718,
537817, 1238001, 1051441, 1051441, 1956333, 671551, 1042611,
420415, 1181939, 2337718, 537817, 852471, 106202, 1051441, 1051441,
513316, 595684, 2526226, 513316, 162660, 162660, 1238001, 333940,
420415, 671551, 852471, 2526226, 2526226, 162660, 852471, 420415,
214934, 1238001, 595684, 811088, 1956333, 811088, 346129, 595684,
346129, 2526226, 811088, 513316, 346129, 1181939, 1181939, 1566584,
852471, 513316, 2337718, 333940, 852471, 1566584, 671551, 458555,
2337718, 1238001, 595684, 249256), hv025 = c(2, 1, 1, 2, 2, 2,
2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 1, 2, 1, 2,
2, 2, 2, 1, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2,
1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 1, 2, 2, 1, 1, 2, 2, 2, 1), hv009 = c(7, 3, 4, 5, 4, 3, 4,
2, 5, 2, 8, 3, 4, 4, 2, 3, 7, 4, 3, 5, 9, 5, 6, 1, 3, 5, 7, 5,
6, 2, 4, 4, 4, 3, 5, 5, 3, 5, 3, 4, 4, 5, 3, 5, 4, 4, 5, 9, 3,
3, 4, 3, 4, 3, 5, 4, 9, 8, 6, 1, 5, 4, 6, 1, 8, 3, 3, 5, 2, 6,
5, 2, 3, 1, 7, 3, 7, 3, 7, 7, 3, 4, 5, 5, 3, 3, 4, 5, 8, 2, 3,
5, 2, 7, 2, 2, 7, 10, 6, 5), hv012 = c(7, 3, 4, 5, 4, 3, 4, 2,
5, 2, 8, 3, 4, 4, 2, 3, 3, 4, 3, 5, 9, 5, 6, 1, 3, 5, 7, 5, 6,
2, 4, 4, 4, 3, 5, 5, 3, 5, 3, 4, 4, 5, 3, 5, 4, 4, 5, 9, 3, 3,
4, 3, 4, 3, 5, 2, 9, 8, 6, 1, 5, 4, 6, 1, 8, 3, 3, 5, 2, 6, 5,
2, 3, 1, 7, 3, 7, 3, 7, 7, 3, 4, 5, 5, 3, 3, 4, 5, 6, 2, 3, 5,
2, 7, 2, 2, 7, 10, 6, 4), hv013 = c(7, 3, 4, 4, 4, 3, 4, 2, 5,
2, 8, 3, 4, 4, 2, 3, 7, 4, 3, 5, 9, 5, 4, 1, 3, 3, 7, 5, 6, 2,
0, 4, 3, 3, 5, 5, 3, 5, 3, 4, 4, 3, 3, 5, 4, 4, 5, 9, 3, 3, 4,
3, 4, 3, 5, 4, 9, 8, 6, 1, 4, 4, 6, 1, 8, 3, 3, 5, 2, 6, 5, 2,
3, 1, 5, 3, 7, 3, 7, 7, 3, 4, 5, 5, 3, 2, 4, 5, 8, 2, 3, 3, 1,
7, 2, 2, 7, 10, 6, 5), hv201 = c(44, 12, 12, 43, 21, 41, 13,
43, 21, 12, 44, 12, 13, 31, 21, 13, 12, 12, 11, 31, 21, 21, 12,
13, 13, 43, 41, 21, 21, 12, 21, 21, 13, 12, 12, 13, 21, 12, 21,
13, 21, 21, 21, 31, 11, 13, 21, 12, 12, 21, 21, 21, 21, 44, 13,
21, 44, 13, 21, 13, 12, 12, 21, 11, 12, 13, 13, 21, 13, 12, 43,
13, 12, 21, 13, 13, 13, 13, 12, 43, 44, 13, 42, 13, 13, 11, 21,
13, 21, 43, 12, 11, 32, 12, 11, 13, 21, 21, 44, 21), hv204 = c(15,
996, 996, 20, 996, 5, 15, 30, 996, 996, 30, 996, 5, 25, 5, 20,
996, 996, 996, 10, 996, 10, 996, 20, 60, 60, 30, 996, 996, 996,
30, 10, 15, 996, 996, 10, 996, 996, 996, 996, 996, 996, 5, 996,
996, 10, 996, 996, 996, 10, 996, 996, 996, 20, 10, 15, 20, 5,
10, 10, 996, 996, 996, 996, 996, 996, 5, 996, 5, 996, 90, 5,
996, 996, 5, 996, 15, 20, 996, 15, 30, 9, 30, 5, 10, 996, 996,
20, 996, 15, 996, 996, 10, 996, 996, 5, 996, 996, 5, 996), hv205 = c(23,
12, 12, 22, 31, 22, 31, 31, 12, 31, 31, 12, 23, 23, 31, 12, 13,
13, 23, 12, 13, 31, 22, 13, 31, 23, 31, 13, 31, 11, 31, 12, 31,
23, 12, 31, 31, 23, 31, 12, 31, 12, 31, 12, 12, 12, 13, 23, 41,
12, 12, 12, 13, 23, 12, 14, 31, 22, 31, 22, 12, 12, 31, 12, 13,
12, 31, 12, 12, 12, 31, 22, 13, 31, 31, 31, 22, 13, 13, 31, 23,
12, 31, 31, 13, 12, 12, 31, 13, 31, 12, 12, 31, 23, 12, 12, 31,
12, 22, 12)), .Names = c("hv001", "hv002", "hv005", "hv025",
"hv009", "hv012", "hv013", "hv201", "hv204", "hv205"), row.names = c(10678L,
3179L, 4323L, 8790L, 835L, 3936L, 4789L, 1696L, 6299L, 10495L,
10703L, 1909L, 5863L, 4156L, 7311L, 2912L, 5073L, 1857L, 3991L,
7840L, 5254L, 690L, 8477L, 4520L, 10597L, 3056L, 9158L, 889L,
9572L, 5096L, 1178L, 3598L, 9040L, 2989L, 6336L, 9030L, 768L,
7583L, 7539L, 5005L, 4713L, 6064L, 10013L, 2486L, 2392L, 4531L,
3596L, 9322L, 1910L, 5317L, 4631L, 6080L, 7070L, 10542L, 2501L,
2594L, 8582L, 8957L, 1223L, 10372L, 1587L, 1547L, 9960L, 5458L,
1667L, 3749L, 7100L, 3355L, 3783L, 1591L, 7087L, 1991L, 10264L,
9655L, 10147L, 7781L, 3982L, 8396L, 120L, 10106L, 10679L, 3841L,
8033L, 8519L, 10789L, 5111L, 5313L, 3309L, 7464L, 8835L, 4668L,
5526L, 7117L, 1537L, 3698L, 4355L, 916L, 10006L, 8995L, 9434L
), class = "data.frame")
If you leave the method default (which is "regression") you should be able to get the PCA results. However, I doubt if you will get the same results as SPSS (I have seen discrepancy between SPSS and R with "varimax" rotation results).
I went through the code. It seems that scores cannot be passed as an argument to the function so please change the value of the score to FALSE.
Try the code below....
res = principal(ws, nfactors = 1, residuals = F, rotate = "varimax", n.obs = NA, covar = T, scores = F, missing = T, impute = "mean", oblique.scores = T, method = "correlation")
I think it will help to continue your work.