Set edge attribute of one bipartite projection based on other bipartite projection - r

I am doing some work with bipartite projections and onemode networks in igraph. Is there a way to label the edges of one onemode network (network of artists below) using the vertices of the other (network of hometowns below)?
I want to do something like the below, but am worried it will not scale as the number of artist nodes expand and relationships between hometowns begin to appear (e.g., artists who split time between two towns).
Thanks!
library(tibble)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(igraph)
#>
#> Attaching package: 'igraph'
#> The following objects are masked from 'package:dplyr':
#>
#> as_data_frame, groups, union
#> The following object is masked from 'package:tibble':
#>
#> as_data_frame
#> The following objects are masked from 'package:stats':
#>
#> decompose, spectrum
#> The following object is masked from 'package:base':
#>
#> union
df <- tibble(user = c("Mac Miller", "Wiz Khalifa", "Arizona Zervas", "Lil Wayne", "Birdman", "Logic"), hometown = c("Pittsburgh", "Pittsburgh", "DC", "New Orleans", "New Orleans", "DC"))
g <- graph_from_data_frame(df) %>%
igraph::set_vertex_attr(., name = "type", value = V(.)$name %in% df$hometown) %>%
bipartite_projection()
igraph::get.edgelist(g$proj1)
#> [,1] [,2]
#> [1,] "Mac Miller" "Wiz Khalifa"
#> [2,] "Arizona Zervas" "Logic"
#> [3,] "Lil Wayne" "Birdman"
g1 <- igraph::set.edge.attribute(graph = g$proj1, name = "hometown", value = get.vertex.attribute(g$proj2)$name)
get.edgelist(g1)
#> [,1] [,2]
#> [1,] "Mac Miller" "Wiz Khalifa"
#> [2,] "Arizona Zervas" "Logic"
#> [3,] "Lil Wayne" "Birdman"
get.edge.attribute(g1)
#> $weight
#> [1] 1 1 1
#>
#> $hometown
#> [1] "Pittsburgh" "DC" "New Orleans"
Created on 2020-08-19 by the reprex package (v0.3.0)

Related

How to assign manually colors to each data group

Could someone help me in assigning a manual colour to each group? I want to use these two colours for my data #F96167 for beer, and #FCE77D for Whisky.
Also, in the last graph, I would like different plot symbols (e.g. ∆ for Beer, O for Whisky) for each group.
library(rgeos)
#> Loading required package: sp
#> rgeos version: 0.5-9, (SVN revision 684)
#> GEOS runtime version: 3.9.1-CAPI-1.14.2
#> Please note that rgeos will be retired by the end of 2023,
#> plan transition to sf functions using GEOS at your earliest convenience.
#> GEOS using OverlayNG
#> Linking to sp version: 1.5-0
#> Polygon checking: TRUE
library(sp)
library(vegan)
#> Loading required package: permute
#> Loading required package: lattice
#> This is vegan 2.6-2
library(tidyverse)
library(Momocs)
#>
#> Attaching package: 'Momocs'
#> The following objects are masked from 'package:dplyr':
#>
#> arrange, combine, filter, mutate, rename, sample_frac, sample_n,
#> select, slice
#> The following object is masked from 'package:tidyr':
#>
#> chop
#> The following object is masked from 'package:stats':
#>
#> filter
library(caret)
#>
#> Attaching package: 'caret'
#> The following object is masked from 'package:purrr':
#>
#> lift
#> The following object is masked from 'package:vegan':
#>
#> tolerance
library(doParallel)
#> Loading required package: foreach
#>
#> Attaching package: 'foreach'
#> The following objects are masked from 'package:purrr':
#>
#> accumulate, when
#> Loading required package: iterators
#> Loading required package: parallel
library(xlsx)
library(foreach)
library(broom)
library(MASS)
#>
#> Attaching package: 'MASS'
#> The following object is masked from 'package:Momocs':
#>
#> select
#> The following object is masked from 'package:dplyr':
#>
#> select
library(scales)
#>
#> Attaching package: 'scales'
#> The following object is masked from 'package:Momocs':
#>
#> rescale
#> The following object is masked from 'package:purrr':
#>
#> discard
#> The following object is masked from 'package:readr':
#>
#> col_factor
library(RColorBrewer)
library(ggspatial)
data(bot)
bot
#> Out (outlines)
#> - 40 outlines, 162 +/- 21 coords (in $coo)
#> - 2 classifiers (in $fac):
#> # A tibble: 40 × 2
#> type fake
#> <fct> <fct>
#> 1 whisky a
#> 2 whisky a
#> 3 whisky a
#> 4 whisky a
#> 5 whisky a
#> 6 whisky a
#> # … with 34 more rows
#> - also: $ldk
panel(bot, fac="type", names=TRUE)
bot.f <- efourier(bot, nb.h=10)
#> 'norm=TRUE' is used and this may be troublesome. See ?efourier
bot.f
#> An OutCoe object [ elliptical Fourier analysis ]
#> --------------------
#> - $coe: 40 outlines described, 10 harmonics
#> # A tibble: 40 × 2
#> type fake
#> <fct> <fct>
#> 1 whisky a
#> 2 whisky a
#> 3 whisky a
#> 4 whisky a
#> 5 whisky a
#> 6 whisky a
#> # … with 34 more rows
# mean shape
ms_ <- MSHAPES(bot.f, fac="type")
ms_ <- ms_$shp
datams_<-rbind(data.frame(ms_$beer, Group="Beer"),
data.frame(ms_$whisky, Group="Whisky"))
ggplot(datams_)+theme_bw()+geom_path( aes(x,y, color=Group, linetype=Group), size=0.5)+theme_void()+theme(legend.position = c(0.5,0.6), axis.title = element_blank(), axis.ticks = element_blank(), axis.text = element_blank())+coord_equal()+
theme(legend.key.size = unit(5, units = "mm"))
####Also, in this code, I would like different plot symbols (e.g. ∆ for Beer, O for Whisky) for each group.
pca.fourier = bot.f %>% PCA %>% plot_PCA(~type)
layer_ellipses( conf = 0.9, lwd = 1, alpha = 0)%>%
layer_axes(lwd = 1)%>%
layer_axesvar(cex=1.5)%>%
layer_ellipsesaxes (conf = 0.5,lwd=1.5)%>%
layer_grid( col = "#999999", lty = 3, grid = 3)%>%
layer_stars(alpha = 0.8)%>%
layer_points( cex=1.3) %>%
#layer_eigen( nb_max =5, cex = 1 )%>%
layer_legend( cex = 1)%>%
layer_title( title = "Study", cex =1)
#> Error in is.factor(x$f): argument "x" is missing, with no default
Created on 2022-07-12 by the reprex package (v2.0.1)

fct_collapse in R?

I have a factor that's words (instances of words that difference participants said). I want to collapse it so that there are the categories "that" (every instance of the word "that") and notThat (all other words combined into one category). Naturally there are a lot of other words, and I don't want to go through and type them all. I've tried using != in various places, but it won't work. Maybe I just have the syntax wrong?
Anyway, is there a way to do this? That is, collapse all words that aren't "that" into one group?
How about this:
library(forcats)
x <- c("that", "something", "else")
fct_collapse(x, that = c("that"), other_level="notThat")
#> [1] that notThat notThat
#> Levels: that notThat
Created on 2022-02-15 by the reprex package (v2.0.1)
Edit to show in a data frame
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(forcats)
dat <- data.frame(
gender = factor(c(1,0,1,1,1,0), labels=c("male", "female")),
age = round(runif(6, 18,85)),
word = c("that", "something", "altogether", "different", "entirely", "that"))
dat %>%
mutate(word_collapse = fct_collapse(word, that="that", other_level="notThat"))
#> gender age word word_collapse
#> 1 female 74 that that
#> 2 male 72 something notThat
#> 3 female 57 altogether notThat
#> 4 female 44 different notThat
#> 5 female 79 entirely notThat
#> 6 male 81 that that
Created on 2022-02-15 by the reprex package (v2.0.1)

Tidymodels class cost

I am dealing with a prediction case where the data is suffering from a strong imbalance in the binary prediction target. Is there a way of penalizing wrong predictions of the minority class with a cost matrix in TidyModels? I know that caret had this implemented, but the information I find in TidyModels is quite confusing.
All I find is the baguette::class_cost() function from the experimental baguette package, which only seems to apply to bagged trees models.
Yes, you want to set a classification_cost():
library(yardstick)
#> For binary classification, the first factor level is assumed to be the event.
#> Use the argument `event_level = "second"` to alter this as needed.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
# Two class example
data(two_class_example)
# Assuming `Class1` is our "event", this penalizes false positives heavily
costs1 <- tribble(
~truth, ~estimate, ~cost,
"Class1", "Class2", 1,
"Class2", "Class1", 2
)
# Assuming `Class1` is our "event", this penalizes false negatives heavily
costs2 <- tribble(
~truth, ~estimate, ~cost,
"Class1", "Class2", 2,
"Class2", "Class1", 1
)
classification_cost(two_class_example, truth, Class1, costs = costs1)
#> # A tibble: 1 × 3
#> .metric .estimator .estimate
#> <chr> <chr> <dbl>
#> 1 classification_cost binary 0.288
classification_cost(two_class_example, truth, Class1, costs = costs2)
#> # A tibble: 1 × 3
#> .metric .estimator .estimate
#> <chr> <chr> <dbl>
#> 1 classification_cost binary 0.260
Created on 2021-10-27 by the reprex package (v2.0.1)
In tidymodels, you can use this metric either just to compute results after the fact or in tuning. Learn more here.

Extract Model Description from a mable

I have a mable object that is like so:
models
# A mable: 1 x 3
ets arima nnetar
<model> <model> <model>
1 <ETS(M,Ad,M)> <ARIMA(2,1,2)(0,0,2)[12]> <NNAR(14,1,10)[12]>
I just want the models descriptions so I can place them in a plot. So I ran the following code:
model_desc <- models %>%
gather() %>%
select(key, value) %>%
set_names("model","model_desc") %>%
mutate(model_desc_char = model_desc %>% as.character())
as_tibble() %>%
select(model, model_desc)
This still gives me back a tibble where model_desc is still a list object. I think this is because of how a mable is constructed and how its structure is supposed to be.
** UPDATE **
I solved the problem by doing the following:
model_desc <- models %>%
as_tibble() %>%
gather() %>%
mutate(model_desc = print(value)) %>%
select(key, model_desc) %>%
set_names("model", "model_desc")
For anybody else who will encounter this going forward, I have pasted a solution that works for me with the latest versions of fable/fabletools.
library(fable)
#> Loading required package: fabletools
library(tsibble)
library(tsibbledata)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following object is masked from 'package:tsibble':
#>
#> interval
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tidyr)
aus_retail %>%
filter(
State %in% c("New South Wales", "Victoria"),
Industry == "Department stores"
) %>%
model(
ets = ETS(box_cox(Turnover, 0.3)),
arima = ARIMA(log(Turnover)),
snaive = SNAIVE(Turnover)
) %>%
pivot_longer(cols = -c(State, Industry),
names_to = "model_type",
values_to = "model_specifics_mdl") %>%
mutate(model_specifics = format(model_specifics_mdl)) %>%
select(-model_specifics_mdl)
#> # A tibble: 6 x 4
#> State Industry model_type model_specifics
#> <chr> <chr> <chr> <chr>
#> 1 New South Wales Department stores ets <ETS(A,Ad,A)>
#> 2 New South Wales Department stores arima <ARIMA(2,1,1)(2,1,1)[12]>
#> 3 New South Wales Department stores snaive <SNAIVE>
#> 4 Victoria Department stores ets <ETS(A,A,A)>
#> 5 Victoria Department stores arima <ARIMA(2,1,1)(1,1,2)[12]>
#> 6 Victoria Department stores snaive <SNAIVE>
Created on 2020-09-07 by the reprex package (v0.3.0)
This ended up solving my issue:
model_desc <- models %>%
as_tibble() %>%
gather() %>%
mutate(model_desc = print(value)) %>%
select(key, model_desc) %>%
set_names("model", "model_desc")

Combine tidy text with synonyms to create dataframe

I have sample data frame as below:
quoteiD <- c("q1","q2","q3","q4", "q5")
quote <- c("Unthinking respect for authority is the greatest enemy of truth.",
"In the middle of difficulty lies opportunity.",
"Intelligence is the ability to adapt to change.",
"Science is not only a disciple of reason but, also, one of romance and passion.",
"If I have seen further it is by standing on the shoulders of Giants.")
library(dplyr)
quotes <- tibble(quoteiD = quoteiD, quote= quote)
quotes
I have created some tidy text as below
library(tidytext)
data(stop_words)
tidy_words <- quotes %>%
unnest_tokens(word, quote) %>%
anti_join(stop_words) %>%
count( word, sort = TRUE)
tidy_words
Further, I have searched the synonyms using qdap package as below
library(qdap)
syns <- synonyms(tidy_words$word)
The qdap out put is a list , and I am looking to pick the first 5 synonym for each word in the tidy data frame and create a column called synonyms as below:
word n synonyms
ability 1 adeptness, aptitude, capability, capacity, competence
adapt 1 acclimatize, accommodate, adjust, alter, apply,
authority 1 ascendancy, charge, command, control, direction
What is an elegant way of merging the list of 5 words from qdap synonym function and separate by commas?
One way this can be done using a tidyverse solution is
library(plyr)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:plyr':
#>
#> arrange, count, desc, failwith, id, mutate, rename, summarise,
#> summarize
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tidytext)
library(qdap)
#> Loading required package: qdapDictionaries
#> Loading required package: qdapRegex
#>
#> Attaching package: 'qdapRegex'
#> The following object is masked from 'package:dplyr':
#>
#> explain
#> Loading required package: qdapTools
#>
#> Attaching package: 'qdapTools'
#> The following object is masked from 'package:dplyr':
#>
#> id
#> The following object is masked from 'package:plyr':
#>
#> id
#> Loading required package: RColorBrewer
#>
#> Attaching package: 'qdap'
#> The following object is masked from 'package:dplyr':
#>
#> %>%
#> The following object is masked from 'package:base':
#>
#> Filter
library(tibble)
library(tidyr)
#>
#> Attaching package: 'tidyr'
#> The following object is masked from 'package:qdap':
#>
#> %>%
quotes <- tibble(quoteiD = paste0("q", 1:5),
quote= c(".\n\nthe ebodac consortium consists of partners: janssen (efpia), london school of hygiene and tropical medicine (lshtm),",
"world vision) mobile health software development and deployment in resource limited settings grameen\n\nas such, the ebodac consortium is well placed to tackle.",
"Intelligence is the ability to adapt to change.",
"Science is a of reason of romance and passion.",
"If I have seen further it is by standing on ."))
quotes
#> # A tibble: 5 x 2
#> quoteiD quote
#> <chr> <chr>
#> 1 q1 ".\n\nthe ebodac consortium consists of partners: janssen (efpia~
#> 2 q2 "world vision) mobile health software development and deployment~
#> 3 q3 Intelligence is the ability to adapt to change.
#> 4 q4 Science is a of reason of romance and passion.
#> 5 q5 If I have seen further it is by standing on .
data(stop_words)
tidy_words <- quotes %>%
unnest_tokens(word, quote) %>%
anti_join(stop_words) %>%
count( word, sort = TRUE)
#> Joining, by = "word"
tidy_words
#> # A tibble: 33 x 2
#> word n
#> <chr> <int>
#> 1 consortium 2
#> 2 ebodac 2
#> 3 ability 1
#> 4 adapt 1
#> 5 change 1
#> 6 consists 1
#> 7 deployment 1
#> 8 development 1
#> 9 efpia 1
#> 10 grameen 1
#> # ... with 23 more rows
syns <- synonyms(tidy_words$word)
#> no match for the following:
#> consortium, ebodac, consists, deployment, efpia, grameen, janssen, london, lshtm, partners, settings, software, tropical
#> ========================
syns %>%
plyr::ldply(data.frame) %>% # Change the list to a dataframe (See https://stackoverflow.com/questions/4227223/r-list-to-data-frame)
rename("Word_DefNumber" = 1, "Syn" = 2) %>% # Rename the columns with a name that is more intuitive
separate(Word_DefNumber, c("Word", "DefNumber"), sep = "\\.") %>% # Find the word part of the word and definition number
group_by(Word) %>% # Group by words, so that when we select rows it is done for each word
slice(1:5) %>% # Keep the first 5 rows for each word
summarise(synonyms = paste(Syn, collapse = ", ")) %>% # Combine the synonyms together comma separated using paste
ungroup() # So there are not unintended effects of having the data grouped when using the data later
#> # A tibble: 20 x 2
#> Word synonyms
#> <chr> <chr>
#> 1 ability adeptness, aptitude, capability, capacity, competence
#> 2 adapt acclimatize, accommodate, adjust, alter, apply
#> 3 change alter, convert, diversify, fluctuate, metamorphose
#> 4 development advance, advancement, evolution, expansion, growth
#> 5 health fitness, good condition, haleness, healthiness, robustness
#> 6 hygiene cleanliness, hygienics, sanitary measures, sanitation
#> 7 intelligence acumen, alertness, aptitude, brain power, brains
#> 8 limited bounded, checked, circumscribed, confined, constrained
#> 9 medicine cure, drug, medicament, medication, nostrum
#> 10 mobile ambulatory, itinerant, locomotive, migrant, motile
#> 11 passion animation, ardour, eagerness, emotion, excitement
#> 12 reason apprehension, brains, comprehension, intellect, judgment
#> 13 resource ability, capability, cleverness, ingenuity, initiative
#> 14 romance affair, affaire (du coeur), affair of the heart, amour, at~
#> 15 school academy, alma mater, college, department, discipline
#> 16 science body of knowledge, branch of knowledge, discipline, art, s~
#> 17 standing condition, credit, eminence, estimation, footing
#> 18 tackle accoutrements, apparatus, equipment, gear, implements
#> 19 vision eyes, eyesight, perception, seeing, sight
#> 20 world earth, earthly sphere, globe, everybody, everyone
Created on 2019-04-05 by the reprex package (v0.2.1)
Please note that plyr should be loaded before dplyr

Resources