map inside map2 - how to refer properly to arguments (purrr) - r
ex <- structure(list(group = c("group B", "group B", "group C", "group B","group C", "group B", "group B", "group A", "group C", "group C", "group C", "group B", "group A", "group A", "group A", "group B", "group A", "group A", "group B", "group C", "group B", "group A", "group C", "group C", "group C", "group C", "group B", "group A", "group A", "group C", "group B", "group A", "group A", "group B", "group C", "group C", "group A", "group C", "group C", "group A", "group B", "group B", "group A", "group B", "group C", "group C","group A", "group B", "group C", "group C"), A1 = c(0.765913072274998, 0.167720616329461, 0.282011203467846, 0.16467465297319, 0.407501850277185, 0.33958561392501, 0.117573569528759, 0.267871993361041, 0.930967768887058, 0.286146199563518, 0.741841563722119, 0.637853658990934, 0.137378493556753, 0.820813736645505, 0.249520575627685, 0.275153698632494, 0.916794545250013, 0.316050065914169, 0.393918378278613, 0.342175736324862, 0.0177193265408278, 0.178873546421528, 0.376545072998852, 0.411527326330543, 0.904074088903144, 0.487975180381909, 0.491365089081228, 0.591370195383206, 0.319207336986437, 0.98943907325156, 0.916014631278813, 0.0347612821497023, 0.323899461887777, 0.155270972754806, 0.436683354899287, 0.316902073565871, 0.734995431266725, 0.584133808733895, 0.515310257440433, 0.921727291075513, 0.0689518100116402, 0.659549278207123, 0.894137248862535, 0.00174906081520021, 0.873320956015959, 0.77207364118658, 0.637504813494161, 0.473099726485088, 0.557896945858374, 0.632965805241838), A2 = c(0.782154354499653, 0.718993512215093, 0.391234505455941, 0.337346265325323, 0.141482090810314, 0.587817938998342, 0.384924706770107, 0.0679492244962603, 0.0509498412720859, 0.786300176288933, 0.00685039279051125, 0.361857839627191, 0.851737944642082, 0.333896369440481, 0.521961389342323, 0.761324436869472, 0.486214824952185, 0.249763275263831, 0.536617708392441, 0.982582966331393, 0.879302836721763, 0.0212801641318947, 0.999207010492682, 0.661623647902161, 0.514440550701693, 0.748157452791929, 0.609151393873617, 0.581557413795963, 0.495366840157658, 0.595225095050409, 0.694380027009174, 0.419036868494004, 0.618371620541438, 0.406731882831082, 0.947823651600629, 0.182527825701982, 0.365398081485182, 0.307149735512212, 0.905119536910206, 0.657605888554826, 0.706386201782152, 0.461993521312252, 0.637554163113236, 0.280387100065127, 0.454221101710573, 0.0712104975245893, 0.914795317919925, 0.951028517214581, 0.645093881059438, 0.754043457563967), A3 = c(0.590488174697384, 0.876135899219662, 0.349565496202558, 0.365676332963631, 0.709230658365414, 0.584304825868458, 0.391973132034764, 0.464247716590762, 0.00831679091788828, 0.282901889178902, 0.842566592851654, 0.141866789199412, 0.278708242345601, 0.680587171344087, 0.256092368392274, 0.535304376389831, 0.803430012892932, 0.336343225324526, 0.320332229137421, 0.809689761372283, 0.588527292944491, 0.767302295425907, 0.124350237427279, 0.605355758452788, 0.619420127244666, 0.326774680987, 0.917224677512422, 0.710018905811012, 0.892817938234657, 0.149181636283174, 0.65066168922931, 0.433064805110916, 0.167979725869372, 0.809581968234852, 0.803237372776493, 0.703188817715272, 0.507392750121653, 0.372131450567394, 0.0688441153615713, 0.928956841118634, 0.960712827509269, 0.37454927386716, 0.753415656508878, 0.687665716046467, 0.05052674934268, 0.155349446227774, 0.806162646971643, 0.725155076943338, 0.537310504587367, 0.674253351520747), A4 = c(0.426875792676583, 0.168233293108642, 0.38692078506574, 0.673673333134502, 0.221049380488694, 0.142470651771873, 0.505352358799428, 0.579006788786501, 0.809476702939719, 0.343090934911743, 0.136329119792208, 0.881694708252326, 0.142607795307413, 0.658202062360942, 0.0624804550316185, 0.938871977152303, 0.477995269699022, 0.989794839406386, 0.307003591908142, 0.40553830191493, 0.0249065780080855, 0.321581491269171, 0.432656849268824, 0.578710418893024, 0.482647196389735, 0.72430428257212, 0.611029474530369, 0.748521578731015, 0.939656358910725, 0.803305297158659, 0.339922665851191, 0.919090943178162, 0.0926963407546282, 0.671128012472764, 0.634122629882768, 0.219061656622216, 0.376445228001103, 0.468331813113764, 0.131768246181309, 0.258267979836091, 0.651934198103845, 0.678243630565703, 0.663701833924279, 0.678762876661494, 0.524524878012016, 0.380242201732472, 0.433922954136506, 0.795754680642858, 0.383180371485651, 0.160383063135669)), .Names = c("group", "A1", "A2", "A3", "A4"), row.names = c(NA, -50L), class = c("tbl_df", "tbl", "data.frame"))
With above sample data I want to perform msClustering within groups. This clustering requires tuning parameter h so I define few values of it in column h.cand. Then I want to call msClustering with subsequent values of h and store the output in a list column. Theoretically, it should be feasible with purrr, but I think it requires nested map, and precisely speaking map inside map2. Here is my problem, I'm not sure how to refer for different list arguments. I have tried something like below:
ex %>%
group_by(group) %>%
nest() %>%
h.cand = map(data, ~quantile(dist(.x), seq(0.05, 0.40, by = 0.05))) %>%
mutate(cluster = map2(h.cand, data, ~map(.x, ~msClustering(
.y, # data (second argument of outter map2)
h = .x # h.cand element (first argument of inner map)
))))
and ended up with error:
Error: cannot allocate vector of size 1681.9 Gb
How should I refer to elements of outter and inner map in order to perform 8 (a length of h.cand vector) clusterings for each group?
For complicated anonymous functions, like this one, it's better if you use the function(x) instead of lambda/~ syntax for passing to map()'s .f argument.
Clean up the data:
map(ex, length)
# make element5 same length
ex[[5]] <- c(ex[[5]], runif(16))
# make into data frame
ex <- dplyr::bind_cols(ex)
Use function(x) instead of ~:
ex2 <- ex %>%
group_by(group) %>%
nest() %>%
mutate(h.cand = map(data,
~ quantile(dist(.), seq(0.05, 0.40, by = 0.05))),
cluster = map2(h.cand, data,
function(x, y) { map(x,
function(x2) { msClustering(y, x2) }) } ) )
Result check:
unnest(ex2, cluster)
# A tibble: 24 x 2
group cluster
<chr> <list>
1 group B <list [2]>
2 group B <list [2]>
3 group B <list [2]>
4 group B <list [2]>
5 group B <list [2]>
6 group B <list [2]>
7 group B <list [2]>
8 group B <list [2]>
9 group C <list [2]>
10 group C <list [2]>
# ... with 14 more rows
Related
creating kendall correlation matrix
i have data that looks like this : in total 38 columns . data code sample : df <- structure( list( Christensenellaceae = c( 0.010484508, 0.008641566, 0.010017172, 0.010741488, 0.1, 0.2, 0.3, 0.4, 0.7, 0.8, 0.9, 0.1, 0.3, 0.45, 0.5, 0.55 ), Date=c(27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28), Treatment = c( "Treatment 1", "Treatment 1", "Treatment 1", "Treatment 1", "Treatment 2", "Treatment 2", "Treatment 2", "Treatment 2", "Treatment 1", "Treatment 1", "Treatment 1", "Treatment 1", "Treatment 2", "Treatment 2", "Treatment 2", "Treatment 2" ) ),class = "data.frame", row.names = c(NA,-9L) ) whay i wish to do is to create kendall correlation matrix (the data doesnt have linear behavor) between the treatment types(10 in total but 2 in example)for every column (except treatment and date) so in total 36 correlation matrix with size 1010 (here will be 22) . this is my code: res2 <- cor(as.matrix(data),method ="kendall") but i get the error: Error in cor(data, method = "kendall") : 'x' must be numeric is there any way to solve this ? thank you:)
You can do that using a tidyverse approach by first making some data wrangling and then using correlate to calculate the correlation in pairs for every combination of variables. library(corrr) library(tidyverse) df |> # Transform data into wide format pivot_wider(id_cols = Date, names_from = Treatment, values_from = -starts_with(c("Treatment", "Date"))) |> # Unnest lists inside each column unnest(cols = starts_with("Treatment")) |> # Remove Date from the columns select(-Date) |> # Correlate all columns using kendall correlate(method = "kendall") # A tibble: 2 x 3 # term `Treatment 1` `Treatment 2` # <chr> <dbl> <dbl> #1 Treatment 1 NA 0.546 #2 Treatment 2 0.546 NA
Looping in a data frame in R until a certain condition is met
We have the current data frame df as below df <- data.frame(ID = c(1,2,3,4,5,6), Name = c("Chris", "J", "Kemp", "President,", "CEO & ", "Director", "Ashton", "K", "Christian", "Analyst"), Font = c("Font A", "Font A", "Font A", "Font B", "Font B", "Font B", "Font A", "Font A", "Font A", "Font B")) The expected Output is final_df <- data.frame(Name = c("Chris J Kemp", "Ashton K Christian"), Designation = c("President, CEO & Director", "Analyst")) So basically I want to add names until there's a certain font type in column Font and this is of course a sample of the huge data frame I'm dealing with. Thanks for the help in advance ! Sorry all your efforts earlier. This question has been re-edited a bit.
Data: df <- data.frame( ID = c(1:12), Name = c("Chris", "J", "Kemp", "President,", "CEO & ", "Director", "Bad", "D", "King", "Best,", "Teacher & ", "Friend"), Font = c("Font A", "Font A", "Font A", "Font B", "Font B", "Font B", "Font A", "Font A", "Font A", "Font B", "Font B", "Font B") ) You can do: df$group <- cumsum(c(TRUE, df$Font[-1] != df$Font[-length(df$Font)])) final_df <- as.data.frame(matrix(lapply(split(df$Name, df$group), paste, collapse = " "), ncol = 2)) colnames(final_df) <- c("Name", "Designation") A grouping row of consecutif font is created. Then split allows to have a list per font then you can reformat data using paste. Output: Name Designation 1 Chris J Kemp Bad D King 2 President, CEO & Director Best, Teacher & Friend
You can try - library(dplyr) library(tidyr) df %>% mutate(Font = recode(Font, 'Font A' = 'Name', 'Font B' = 'Designation'), ID = data.table::rleid(Font)) %>% group_by(ID, Font) %>% summarise(Name = toString(Name), .groups = 'drop') %>% mutate(ID = ceiling(ID/2)) %>% pivot_wider(names_from = Font, values_from = Name) %>% select(-ID) # Name Designation # <chr> <chr> #1 Chris, J, Kemp President,, CEO & , Director #2 Ashton, K, Christian Analyst
How about this method: df_final = as.data.frame(matrix(unlist(lapply(unique(df$Font),function(i){paste(collapse = ' ', df[df$Font%in%i,"Name"])})), byrow=T,ncol = 2)) colnames(df_final)=c("names", "designation") Let me know if it's ok
Here is a solution with dplyr: library(dplyr) df %>% group_by(Font, fontnum) %>% summarize(Tmp = paste(Name, collapse = " ")) %>% mutate(ID = fontnum %/% 2) %>% pivot_wider(id_cols = ID, names_from = Font, values_from = Tmp) %>% transmute(Name = `Font A`, Designation = `Font B`) where df <- data.frame( ID = 1:12, Name = c("Chris", "J", "Kemp", "President,", "CEO & ", "Director", "Bad", "D", "King", "Best,", "Teacher & ", "Friend"), Font = c("Font A", "Font A", "Font A", "Font B", "Font B", "Font B", "Font A", "Font A", "Font A", "Font B", "Font B", "Font B") ) and df$fontnum <- cumsum(c(0, abs(diff(as.numeric(factor(df$Font)))))) And the result will be # A tibble: 2 × 2 Name Designation <chr> <chr> 1 Chris J Kemp President, CEO & Director 2 Bad D King Best, Teacher & Friend
Here is an alternative strategy with dplyr: Data used from Clemsang (many thanks!) group_by and divide ID by 3 summarise and collapse (bring rows to one row by group) use group_split to split the groups (returns a list) use bind_cols to get a dataframe tweak names and select library(dplyr) df %>% group_by(Font, ceiling(ID/3)) %>% summarise(Name = paste0(Name, collapse = " ")) %>% group_split(Font) %>% bind_cols() %>% select(Name = Name...3, Designation=Name...6) Name Designation <chr> <chr> 1 Chris J Kemp President, CEO & Director 2 Bad D King Best, Teacher & Friend
Pivot wider returning 1 column? [duplicate]
This question already has answers here: Collapse text by group in data frame [duplicate] (2 answers) Closed 2 years ago. I'm trying to format my base_df to show users_id organized by program watched. base_df: base_df <- structure(list(category = c("News", "News", "Sports", "Sports", "sports", "Sports", "Sports", "sports"), programs = c("News A", "News B", "Sports A", "Sports B", "sports C", "Sports A", "Sports B", "sports C"), users_id = c(10003831, 10003823, 10003841, 10003823, 10003851, 10003851, 10003851, 10003854)), row.names = c(NA, -8L ), class = c("tbl_df", "tbl", "data.frame")) Desired output: I think pivot_longer can help me here: I've tried to use it but it returns a df with just the users_ids. What am I doing wrong? b <- pivot_wider( base_df, id_cols = users_id, names_from = programs )
You don't need to convert to wide format, but rather aggregate, i.e. library(dplyr) base_df %>% filter(category %in% c('Sports', 'sports')) %>% group_by(users_id) %>% summarise(how_many = n(), which = toString(programs)) # A tibble: 4 x 3 # users_id how_many which # <dbl> <int> <chr> #1 10003823 1 Sports B #2 10003841 1 Sports A #3 10003851 3 sports C, Sports A, Sports B #4 10003854 1 sports C
Is there a way to colour code directionality in an igraph?
I have some data that shows Twitter connections between people (i.e. people that tag other users in their tweets) and would like to map out the connections between people. In some cases the relationship is reciprocal, as in both people have tagged the other while some people have been tagged but have not tweeted. In the example below, Person A has tagged Person B and Person C, while Person C has only tagged Person B. The arrows are unidirectional from Person A -> Person C and from Person C -> Person B, but bidirectional between Person A <-> Person B. Is it possible to makes these arrows different colours? library(igraph) df <- data.frame (from = c("Person A", "Person A", "Person B", "Person C"), to = c ("Person B", "Person C", "Person A", "Person B"), weight = c (1, 3, 4, 5) ) g_1 <- graph.data.frame(df, directed = TRUE) set.seed(123) plot (g_1, edge.width = E(g_1)$weight)
It is possible to choose edge color specifing color argument of E and it is possible to find reciprocical edge thanks to is.mutual() function : E(g_1)$color <- "grey50" E(g_1)$color[is.mutual(g_1)] = "red" plot(g_1, edge.width = E(g_1)$weight)
You can use the duplicated() function to colourize bidirectional edges (taken from R reciprocal edges in igraph in R and modified for colouring instead of curving): E(g_1)[duplicated(E) | duplicated(E,fromLast =TRUE)]$color <- "red" Complete example: library(igraph) df <- data.frame (from = c("Person A", "Person A", "Person B", "Person C"), to = c ("Person B", "Person C", "Person A", "Person B"), weight = c (1, 3, 4, 5) ) g_1 <- graph.data.frame(df, directed = TRUE) set.seed(123) E <- t(apply(get.edgelist(g_1),1,sort)) E(g_1)$color <- "grey50" E(g_1)[duplicated(E) | duplicated(E,fromLast =TRUE)]$color <- "red" plot (g_1, edge.width = E(g_1)$weight)
Putting Some Part in Double Quotes in R
I want to put some part of object into double quote like the example given below: Required Output "Group 1" = 3, "Group 2" = 3 MWE Test <- structure("Group 1 = 3, Group 2 = 3", class = "noquote") Test [1] Group 1 = 3, Group 2 = 3 as.character(Test) [1] "Group 1 = 3, Group 2 = 3" Edited Actually I have a long character string (here Labs) Labs <- c("Group 1", "Group 2") Test <- noquote(paste(Labs, "= 3", collapse = ", ")) Test [1] Group 1 = 3, Group 2 = 3 However, I want to have output like this "Group 1" = 3, "Group 2" = 3
You can use single quotes to let R know where the string begins and ends. That will let you have double quotes inside of it: Test <- c('"Group 1" = 3', '"Group 2" = 3') If you print it, then by default it's going to show you the escape characters. However, you can just cat it, or use some fancier options, depending on your needs. cat(Test) "Group 1" = 3 "Group 2" = 3