ex <- structure(list(group = c("group B", "group B", "group C", "group B","group C", "group B", "group B", "group A", "group C", "group C", "group C", "group B", "group A", "group A", "group A", "group B", "group A", "group A", "group B", "group C", "group B", "group A", "group C", "group C", "group C", "group C", "group B", "group A", "group A", "group C", "group B", "group A", "group A", "group B", "group C", "group C", "group A", "group C", "group C", "group A", "group B", "group B", "group A", "group B", "group C", "group C","group A", "group B", "group C", "group C"), A1 = c(0.765913072274998, 0.167720616329461, 0.282011203467846, 0.16467465297319, 0.407501850277185, 0.33958561392501, 0.117573569528759, 0.267871993361041, 0.930967768887058, 0.286146199563518, 0.741841563722119, 0.637853658990934, 0.137378493556753, 0.820813736645505, 0.249520575627685, 0.275153698632494, 0.916794545250013, 0.316050065914169, 0.393918378278613, 0.342175736324862, 0.0177193265408278, 0.178873546421528, 0.376545072998852, 0.411527326330543, 0.904074088903144, 0.487975180381909, 0.491365089081228, 0.591370195383206, 0.319207336986437, 0.98943907325156, 0.916014631278813, 0.0347612821497023, 0.323899461887777, 0.155270972754806, 0.436683354899287, 0.316902073565871, 0.734995431266725, 0.584133808733895, 0.515310257440433, 0.921727291075513, 0.0689518100116402, 0.659549278207123, 0.894137248862535, 0.00174906081520021, 0.873320956015959, 0.77207364118658, 0.637504813494161, 0.473099726485088, 0.557896945858374, 0.632965805241838), A2 = c(0.782154354499653, 0.718993512215093, 0.391234505455941, 0.337346265325323, 0.141482090810314, 0.587817938998342, 0.384924706770107, 0.0679492244962603, 0.0509498412720859, 0.786300176288933, 0.00685039279051125, 0.361857839627191, 0.851737944642082, 0.333896369440481, 0.521961389342323, 0.761324436869472, 0.486214824952185, 0.249763275263831, 0.536617708392441, 0.982582966331393, 0.879302836721763, 0.0212801641318947, 0.999207010492682, 0.661623647902161, 0.514440550701693, 0.748157452791929, 0.609151393873617, 0.581557413795963, 0.495366840157658, 0.595225095050409, 0.694380027009174, 0.419036868494004, 0.618371620541438, 0.406731882831082, 0.947823651600629, 0.182527825701982, 0.365398081485182, 0.307149735512212, 0.905119536910206, 0.657605888554826, 0.706386201782152, 0.461993521312252, 0.637554163113236, 0.280387100065127, 0.454221101710573, 0.0712104975245893, 0.914795317919925, 0.951028517214581, 0.645093881059438, 0.754043457563967), A3 = c(0.590488174697384, 0.876135899219662, 0.349565496202558, 0.365676332963631, 0.709230658365414, 0.584304825868458, 0.391973132034764, 0.464247716590762, 0.00831679091788828, 0.282901889178902, 0.842566592851654, 0.141866789199412, 0.278708242345601, 0.680587171344087, 0.256092368392274, 0.535304376389831, 0.803430012892932, 0.336343225324526, 0.320332229137421, 0.809689761372283, 0.588527292944491, 0.767302295425907, 0.124350237427279, 0.605355758452788, 0.619420127244666, 0.326774680987, 0.917224677512422, 0.710018905811012, 0.892817938234657, 0.149181636283174, 0.65066168922931, 0.433064805110916, 0.167979725869372, 0.809581968234852, 0.803237372776493, 0.703188817715272, 0.507392750121653, 0.372131450567394, 0.0688441153615713, 0.928956841118634, 0.960712827509269, 0.37454927386716, 0.753415656508878, 0.687665716046467, 0.05052674934268, 0.155349446227774, 0.806162646971643, 0.725155076943338, 0.537310504587367, 0.674253351520747), A4 = c(0.426875792676583, 0.168233293108642, 0.38692078506574, 0.673673333134502, 0.221049380488694, 0.142470651771873, 0.505352358799428, 0.579006788786501, 0.809476702939719, 0.343090934911743, 0.136329119792208, 0.881694708252326, 0.142607795307413, 0.658202062360942, 0.0624804550316185, 0.938871977152303, 0.477995269699022, 0.989794839406386, 0.307003591908142, 0.40553830191493, 0.0249065780080855, 0.321581491269171, 0.432656849268824, 0.578710418893024, 0.482647196389735, 0.72430428257212, 0.611029474530369, 0.748521578731015, 0.939656358910725, 0.803305297158659, 0.339922665851191, 0.919090943178162, 0.0926963407546282, 0.671128012472764, 0.634122629882768, 0.219061656622216, 0.376445228001103, 0.468331813113764, 0.131768246181309, 0.258267979836091, 0.651934198103845, 0.678243630565703, 0.663701833924279, 0.678762876661494, 0.524524878012016, 0.380242201732472, 0.433922954136506, 0.795754680642858, 0.383180371485651, 0.160383063135669)), .Names = c("group", "A1", "A2", "A3", "A4"), row.names = c(NA, -50L), class = c("tbl_df", "tbl", "data.frame"))
With above sample data I want to perform msClustering within groups. This clustering requires tuning parameter h so I define few values of it in column h.cand. Then I want to call msClustering with subsequent values of h and store the output in a list column. Theoretically, it should be feasible with purrr, but I think it requires nested map, and precisely speaking map inside map2. Here is my problem, I'm not sure how to refer for different list arguments. I have tried something like below:
ex %>%
group_by(group) %>%
nest() %>%
h.cand = map(data, ~quantile(dist(.x), seq(0.05, 0.40, by = 0.05))) %>%
mutate(cluster = map2(h.cand, data, ~map(.x, ~msClustering(
.y, # data (second argument of outter map2)
h = .x # h.cand element (first argument of inner map)
))))
and ended up with error:
Error: cannot allocate vector of size 1681.9 Gb
How should I refer to elements of outter and inner map in order to perform 8 (a length of h.cand vector) clusterings for each group?
For complicated anonymous functions, like this one, it's better if you use the function(x) instead of lambda/~ syntax for passing to map()'s .f argument.
Clean up the data:
map(ex, length)
# make element5 same length
ex[[5]] <- c(ex[[5]], runif(16))
# make into data frame
ex <- dplyr::bind_cols(ex)
Use function(x) instead of ~:
ex2 <- ex %>%
group_by(group) %>%
nest() %>%
mutate(h.cand = map(data,
~ quantile(dist(.), seq(0.05, 0.40, by = 0.05))),
cluster = map2(h.cand, data,
function(x, y) { map(x,
function(x2) { msClustering(y, x2) }) } ) )
Result check:
unnest(ex2, cluster)
# A tibble: 24 x 2
group cluster
<chr> <list>
1 group B <list [2]>
2 group B <list [2]>
3 group B <list [2]>
4 group B <list [2]>
5 group B <list [2]>
6 group B <list [2]>
7 group B <list [2]>
8 group B <list [2]>
9 group C <list [2]>
10 group C <list [2]>
# ... with 14 more rows
I am trying to plot a ggplot_dumbbell with the following code:
library(ggplot2)
library(ggalt)
theme_set(theme_classic())
df_senPhi <- structure(list(phi = c(0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,
0.8, 0.9, 0.9, 1), W = c(7833625.7334, 8291583.0188, 8762978.0131,
8169317.158, 8460793.8918, 8765222.8718, 8266025.5499, 8311199.2075,
8265304.816, 8289392.5799, 8273733.0523, 8284554.5615), Type = c("A, B, C",
"A, B, C", "A, B, C", "D, E", "D, E", "D, E", "F, G", "F, G",
"H, I", "H, I", "I, J", "I, J"), pChange = c(-0.0533144181552553,
0.00202924695507283, 0.0589968453118437, -0.0127464560859453,
0.0224782062508261, 0.0592681341679742, -0.00105934677399903,
0.00439984310620854, -0.00114644672167306, 0.00176453467558519,
-0.000127903066776307, 0.00117986514708678)), class = "data.frame", row.names = c(NA,
-12L), .Names = c("phi", "W", "Type", "pChange"))
df_senPhi$phi <- factor(df_senPhi$phi, levels=as.character(df_senPhi$phi)) # for right ordering of the dumbells
gg <- ggplot(df_senPhi, aes(x=0, xend=pChange, y=phi, color = Type)) +
geom_dumbbell(#colour="#a3c4dc",
size=0.75,
colour_xend="#0e668b") +
scale_x_continuous(label=scales::percent)
plot(gg)
If you run this code, you will get a warning saying "duplicate levels in factors are deprecated".
If you look closely in the df_senPhi you can see 12 records. However while plotting, only 11 records are plotted. Also the 10th and the 11th records have the same phi value in the data frame which are associated in to the same level. That is also causing the overlapping of the two phi bars in the plot (probably that's why I'm seeing only 11 dumbbells).
I want all 12 records to be plotted such that the second 0.9 phi's dumbbell appears just above the first just like they were two different values.
Is there a way to achieve this ?
used a bit of dplyr
but it seems to get what you are looking for
df_senPhi %>%
mutate(row = 1:n()) %>%
ggplot(aes(0, row, color = Type)) +
geom_dumbbell(aes(xend = pChange)) +
scale_y_continuous(labels = factor(df_senPhi$phi),
breaks = 1:12)