How to keep forest plot from getting cut off? - r

I am creating several forest plots, but they are all getting cut off knit the RMD. Reproducible example:
author = c("aaaaaaaaaa", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "t", "u", "v", "w", "x")
Ne = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 34, 5, 6, 7, 8, 4, 3, 2, 7, 3, 6, 7, 8, 5)
Me = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 34, 5, 6, 7, 8, 4, 3, 2, 7, 3, 6, 7, 8, 5)
SDe = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 34, 5, 6, 7, 8, 4, 3, 2, 7, 3, 6, 7, 8, 5)
Nc = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 34, 5, 6, 7, 8, 4, 3, 2, 7, 3, 6, 7, 8, 5)
Mc = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 34, 5, 6, 7, 8, 4, 3, 2, 7, 3, 6, 7, 8, 5)
SDc = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 34, 5, 6, 7, 8, 4, 3, 2, 7, 3, 6, 7, 8, 5)
df = data.frame(author, Ne, Me, SDe, Nc, Mc, SDc)
m.cont <- meta::metacont(n.e = Ne,
mean.e = Me,
sd.e = SDe,
n.c = Nc,
mean.c = Mc,
sd.c = SDc,
studlab = author,
data = df,
sm = "SMD",
method.smd = "Hedges",
fixed = FALSE,
random = TRUE,
method.tau = "REML",
hakn = TRUE,
title = "Example")
meta::forest.meta(m.cont,
sortvar = TE,
predict = TRUE,
print.tau2 = FALSE,
leftlabs = c("Author", "g", "SE"),
xlim = "symmetric")
I know I can get the full figures through running block without knitting and then expanding the figure, but I'd like these plots to be presentable within the knit html doc.

Simply specify the width and height of your figure in the chunk header.
---
title: "Testing"
output: html_document
---
```{r,fig.width = 10,fig.height=10}
#above this is where you can add dimensions
author = c("aaaaaaaaaa", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "t", "u", "v", "w", "x")
Ne = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 34, 5, 6, 7, 8, 4, 3, 2, 7, 3, 6, 7, 8, 5)
Me = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 34, 5, 6, 7, 8, 4, 3, 2, 7, 3, 6, 7, 8, 5)
SDe = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 34, 5, 6, 7, 8, 4, 3, 2, 7, 3, 6, 7, 8, 5)
Nc = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 34, 5, 6, 7, 8, 4, 3, 2, 7, 3, 6, 7, 8, 5)
Mc = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 34, 5, 6, 7, 8, 4, 3, 2, 7, 3, 6, 7, 8, 5)
SDc = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 34, 5, 6, 7, 8, 4, 3, 2, 7, 3, 6, 7, 8, 5)
df = data.frame(author, Ne, Me, SDe, Nc, Mc, SDc)
m.cont <- meta::metacont(n.e = Ne,
mean.e = Me,
sd.e = SDe,
n.c = Nc,
mean.c = Mc,
sd.c = SDc,
studlab = author,
data = df,
sm = "SMD",
method.smd = "Hedges",
fixed = FALSE,
random = TRUE,
method.tau = "REML",
hakn = TRUE,
title = "Example")
meta::forest.meta(m.cont,
sortvar = TE,
predict = TRUE,
print.tau2 = FALSE,
leftlabs = c("Author", "g", "SE"),
xlim = "symmetric",)
```
When you knit it you should then get:

Related

How to convert igraph file in row/colums?

I would like to pass the information I have to a normal list of axes with nodes but I don't know how to do it. The raw data with "deput" would look like this. If someone knows how to convert this list into something easier to use I would appreciate it.I can visualise the graph with "plot" but to edit it I need to have more precise information.
library(igraph)
dput (net2$graph_pajek)
structure(list(30, FALSE, c(1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 6, 7, 13, 13,
14, 15, 16, 18, 20, 20, 21, 27, 27, 27, 27, 29, 2, 2, 2, 2, 2,
2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
6, 6, 7, 8, 8, 9, 9, 9, 10, 10, 10, 10, 10, 11, 11, 12, 12, 12,
13, 13, 13, 14, 14, 14, 15, 15, 15, 16, 18, 18, 18, 19, 20, 20,
21, 21, 23, 24, 25, 26, 27, 27, 27, 29, 3, 3, 3, 3, 3, 3, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
5, 5, 5, 5, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 10,
10, 10, 10, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 13, 13, 14, 14, 15, 15, 15, 15, 15), list(c(1, 0, 1), structure(list(), .Names = character(0)),
list(name = c("A", "B", "C",
"D", "E", "F", "G", "H",
"I", "J", "K",
"L", "M", "N",
"O", "P", "Q", "R",
"S", "T", "U",
"V", "W", "X", "Y", "Z",
"AB", "AC", "AD", "AE"
), deg = c(248, 532, 855, 574, 1761, 261, 229, 216, 554,
628, 774, 223, 502, 295, 266, 910, 227, 312, 364, 260, 294,
741, 227, 471, 392, 376, 292, 295, 212, 287), size = c(2.,
6, 9, 6, 20,
2, 2, 2, 6,
7, 8, 2, 7,
3, 3, 10, 2,
3, 4, 2, 3.,
8, 2, 5, 4,
4, 3, 3, 2,
3), label.cex = c(0.7, 0.7, 0.7, 0.7, 0.7, 0.7,
0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7,
0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7
), id = c("A", "B", "C",
"D", "E", "F", "G", "H",
"I", "J", "K",
"L", "M", "N",
"O", "P", "Q", "R",
"S", "T", "U",
"V", "W", "X", "Y", "Z",
"AB", "AC", "AD", "AE"
)), list(num = c(4, 4, 4, 4, 7, 7, 7, 7, 7, 7, 7, 3, 3, 3,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 3, 3, 3, 1, 1, 2,
2, 1, 1, 1, 1, 2, 2, 1, 4, 4, 4, 4, 1, 7, 7, 7, 7, 7, 7,
7, 6, 6, 6, 6, 6, 6, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 1, 2, 2, 1, 2, 2, 3, 3, 3, 5, 5, 5, 5, 5, 2,
2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 1, 2,
2, 2, 2, 1, 1, 1, 1, 3, 3, 3, 1, 6, 6, 6, 6, 6, 6, 40, 40,
40, 40, 40, 40, 40, 40, 40), weight = c(4, 4, 4, 4,
7, 7, 7, 7, 7, 7, 7, 3, 3, 3, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 3, 3, 3, 1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 4,
4, 4, 4, 1, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 1, 2, 2, 1, 2, 2,
3, 3, 3, 5, 5, 5, 5, 5, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 1, 3, 3, 3, 1, 2, 2, 2, 2, 1, 1, 1, 1, 3, 3, 3,
1, 6, 6, 6, 6, 6, 6, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 7, 7, 7, 7, 7, 7, 7, 1, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7,
4, 4, 4, 4, 4, 4, 4, 4, 1, 18, 18))), <environment>), class = "igraph")
Are you looking for something like get.data.frame
> get.data.frame(net)
from to weight
1 A B 0.63502922
2 B C 0.79410173
3 C D 0.90802625
4 D E 0.09408188
5 E F 0.16450634
6 F G 0.75931882
7 G H 0.30409658
8 H I 0.23990324
9 I J 0.84762277
10 A J 0.88657718
data
Since I cannot reproduce the example in your post, I created a dummy example net like below
net <- make_ring(10) %>%
set_vertex_attr(name = "name", value = LETTERS[1:vcount(.)]) %>%
set_edge_attr(name = "weight", value = runif(ecount(.)))
To clarify a couple things:
The igraph file is not a plot per se, but a graph structure (as in, nodes and edges).
igraph has functions for plotting graphs, but there is no single and standard way of plotting a graph - instead, different algorithms can be used to determine visually-ideal ways of displaying them, and these algorithms oftentimes rely on random initializations.
The outputs from the plotting functions of igraph are only relevant in terms of R base plot drawing logic, AFAIK they don't use an intermediate format with coordinates handled in a user-comprehensible structure. You can nevertheless manage lots of aspects of how they are drawn - see ?igraph::igraph.plotting.

How can I add edges into an existing plot?

I am wanting to plot graph clusters that I define by myself. I am using the simplified undirected enron data.
library(igraphdata)
data("enron")
g <- as.undirected(enron)
g <- simplify(g)
rm("enron")
member <- c(1, 8, 9, 9, 10, 10, 8, 7, 4, 1, 2, 6, 3, 1, 2, 8, 7, 2, 1, 5,
1, 7, 6, 4, 8, 4, 8, 10, 3, 6, 1, 4, 7, 4, 3, 7, 9, 10, 3, 8, 1,
9, 8, 2, 7, 2, 9, 5, 1, 2, 6, 10, 3, 3, 2, 1, 9, 10, 3, 5, 6, 5,
5, 3, 7, 6, 9, 10, 8, 10, 8, 8, 10, 10, 10, 8, 7, 7, 9, 1, 9, 2, 9,
7, 2, 7, 7, 3, 2, 5, 2, 1, 6, 5, 10, 4, 3, 2, 4, 6, 4, 9, 5, 4,
1, 10, 2, 3, 4, 3, 6, 3, 6, 4, 6, 8, 2, 4, 5, 1, 5, 1, 4, 10, 4, 7,
5, 9, 10, 1, 2, 1, 5, 7, 5, 3, 5, 8, 7, 9, 5, 8, 1, 5, 3, 3, 3, 10,
1, 7, 8, 4, 1, 10, 9, 6, 9, 9, 4, 2, 6, 4, 6, 3, 5, 6, 9, 7, 6, 6,
4, 8, 6, 8, 8, 2, 5, 4, 3, 2, 9, 10, 2, 7)
I have tried many ways but none looks good. The best I can make is
edges_data_frame <- get.data.frame(g, what = "edges")
w.mem <- rep(0, length(E(g)))
for (i in 1:length(E(g))){
w.mem[i] <- ifelse(member[edges_data_frame$from[i]] == member[edges_data_frame$to[i]], 500, 1)
}
mem <- make_clusters(g,member)
E(g)$weight <- w.mem
colors <- rainbow(max(membership(mem)))
layout <- layout.fruchterman.reingold(g, weights=w.mem)
set.seed(1234)
plot(g, vertex.color=colors[mem$membership],
mark.groups=communities(mem),
vertex.label = NA,
edge.width = 1, edge.color = "lightgray", vertex.size = 5)
my first trial
I found that the "deleting edges plot" looks much cleaner
coGrph <- delete_edges(g, E(g)[crossing(mem, g)])
col_vector <- c('#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', '#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000075', '#808080', '#ffffff', '#000000')
temp <- sapply(1:length(V(g)), FUN = function(i) {col_vector[member[i]]})
V(coGrph)$color <- temp
plot(coGrph, vertex.label = NA, vertex.size = 5)
my second trial
However, this plot has some missing edges and does not reflect the true connection of the plot. I want to use this plot and add the deleted edges back to the plot without changing the positions I have right now. Is it possible?
Thank you very much I really appreciate your help.
Yes. Use your coGrph to create a layout, but then plot the original graph.
Continuing your "second trial"
set.seed(1234)
LOcG = layout_nicely(coGrph)
V(g)$color <- temp
plot(g, layout=LOcG, vertex.label = NA, vertex.size = 5)

Code to analyze relationships between responses to different ranking questions on a survey

My goal is to find much simpler code, which can generalize, that shows the relationships between responses to two survey questions. In the MWE, one question asked respondents to rank eight marketing selections from 1 to 8 and the other asked them to rank nine attribute selections from 1 to 9. Higher rankings indicate the respondent favored the selection more. Here is the data frame.
structure(list(Email = c("a", "b", "c", "d", "e", "f", "g", "h",
"i"), Ads = c(2, 1, 1, 1, 1, 2, 1, 1, 1), Alumni = c(3, 2, 2,
3, 2, 3, 2, 2, 2), Articles = c(6, 4, 3, 2, 3, 4, 3, 3, 3), Referrals = c(4,
3, 4, 8, 7, 8, 8, 6, 4), Speeches = c(7, 7, 6, 7, 4, 7, 4, 5,
5), Updates = c(8, 6, 6, 5, 5, 5, 5, 7, 6), Visits = c(5, 8,
7, 6, 6, 6, 6, 4, 8), `Business Savvy` = c(10, 6, 10, 10, 4,
4, 6, 8, 9), Communication = c(4, 3, 8, 3, 3, 9, 7, 6, 7), Experience = c(7,
7, 7, 9, 2, 8, 5, 9, 5), Innovation = c(2, 1, 4, 2, 1, 2, 2,
1, 1), Nearby = c(3, 2, 2, 1, 5, 3, 3, 2, 2), Personal = c(8,
10, 6, 8, 6, 10, 4, 3, 3), Rates = c(9, 5, 9, 6, 9, 7, 10, 5,
4), `Staffing Model` = c(6, 8, 5, 5, 7, 5, 8, 7, 8), `Total Cost` = c(5,
4, 3, 7, 8, 6, 9, 4, 6)), row.names = c(NA, -9L), class = c("tbl_df",
"tbl", "data.frame"))
If numeric rankings cannot be used for my solution to calculating relationships (correlations), please correct me.
Hoping they can be used, I arrived at the following plodding code, which I hope calculates the correlation matrix of each method selection against each attribute selection.
library(psych)
dataframe2 <- psych::corr.test(dataframe[ , c(2, 9:17)])[[1]][1:10] # the first method vs all attributes
dataframe3 <- psych::corr.test(dataframe[ , c(3, 9:17)])[[1]][1:10] # the 2nd method vs all attributes and so on
dataframe4 <- psych::corr.test(dataframe[ , c(4, 9:17)])[[1]][1:10]
dataframe5 <- psych::corr.test(dataframe[ , c(5, 9:17)])[[1]][1:10]
dataframe6 <- psych::corr.test(dataframe[ , c(6, 9:17)])[[1]][1:10]
dataframe7 <- psych::corr.test(dataframe[ , c(7, 9:17)])[[1]][1:10]
dataframe8 <- psych::corr.test(dataframe[ , c(8, 9:17)])[[1]][1:10]
# create a dataframe from the rbinded rows
bind <- data.frame(rbind(dataframe2, dataframe3, dataframe4, dataframe5, dataframe6, dataframe7, dataframe8))
Rename rows and columns:
colnames(bind) <- c("Sel", colnames(dataframe[9:17]))
rownames(bind) <- colnames(dataframe[2:8])
How can I accomplish the above more efficiently?
By the way, the bind data frame also allows one to produce a heat map with the DataExplorer package.
library(DataExplorer)
DataExplorer::plot_correlation(bind)
[Summary]
In the scope of our discussion, there are two ways to get the correlation data.
Use stats::cor, i.e., cor(subset(dataframe, select = -Email))
Use psych::corr.test, i.e., corr.test(subset(dataframe, select = -Email))[[1]]
Then you may subset the correlation matrix with the desired rows and columns.
In order to use DataExplorer::plot_correlation, you can simply do plot_correlation(dataframe, type = "c"). Note: the output heatmap will include correlations for all columns, so you can just ignore columns that are not of interests.
[Original Answer]
## Create data
dataframe <- structure(
list(
Email = c("a", "b", "c", "d", "e", "f", "g", "h", "i"),
Ads = c(2, 1, 1, 1, 1, 2, 1, 1, 1),
Alumni = c(3, 2, 2, 3, 2, 3, 2, 2, 2),
Articles = c(6, 4, 3, 2, 3, 4, 3, 3, 3),
Referrals = c(4, 3, 4, 8, 7, 8, 8, 6, 4),
Speeches = c(7, 7, 6, 7, 4, 7, 4, 5, 5),
Updates = c(8, 6, 6, 5, 5, 5, 5, 7, 6),
Visits = c(5, 8, 7, 6, 6, 6, 6, 4, 8),
`Business Savvy` = c(10, 6, 10, 10, 4, 4, 6, 8, 9),
Communication = c(4, 3, 8, 3, 3, 9, 7, 6, 7),
Experience = c(7, 7, 7, 9, 2, 8, 5, 9, 5),
Innovation = c(2, 1, 4, 2, 1, 2, 2, 1, 1),
Nearby = c(3, 2, 2, 1, 5, 3, 3, 2, 2),
Personal = c(8, 10, 6, 8, 6, 10, 4, 3, 3),
Rates = c(9, 5, 9, 6, 9, 7, 10, 5, 4),
`Staffing Model` = c(6, 8, 5, 5, 7, 5, 8, 7, 8),
`Total Cost` = c(5, 4, 3, 7, 8, 6, 9, 4, 6)
),
row.names = c(NA, -9L),
class = c("tbl_df", "tbl", "data.frame")
)
Following your example strictly, we can do the following:
## Calculate correlation
df2 <- subset(dataframe, select = -Email)
marketing_selections <- names(df2)[1:7]
attribute_selections <- names(df2)[8:16]
corr_matrix <- psych::corr.test(df2)[[1]]
bind <- subset(corr_matrix,
subset = rownames(corr_matrix) %in% marketing_selections,
select = attribute_selections)
DataExplorer::plot_correlation(bind)
WARNING
However, is this what you really want? psych::corr.test generates the correlation matrix, and DataExplorer::plot_correlation calculates the correlation again. It is like the correlation of the correlation.

plot from sankeyNetwork in networkD3 does not show output neither generates any warning/error in R

I want to generate a Sankey plot to visualize movements to different areas using sankeyNetwork() from the package networkd3 in r. I tried to mimic some examples as perfectly as possible. But when I run the function sankeyNetwork, no output is generated. On top of that, R doesn't show any warnings, erros et cetera. Therefore, I can't really check whether I made mistakes (obviously, because no plot is generated) and how to fix them. I provided a sample df and the code below.
library(networkD3)
nodes <- data.frame(area = c("a", "b", "c", "d", "e", "f", "g",
"h", "i", "j", "k", "l", "m", "n"))
links2 <- data.frame(source = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9, 10, 10, 11, 11, 11, 12, 13, 13),
target = c(2, 8, 10, 11, 13, 0, 4, 5, 6, 7, 10, 11, 13, 0, 4, 9, 10, 12, 13, 0, 5, 6, 7, 10, 11, 13, 7, 10, 12,
0, 10, 11, 12, 13, 8, 9, 10, 11, 12, 13, 9, 10, 13, 10, 12, 13, 0, 11, 12, 13, 0, 14, 0, 0),
value = c(14, 4, 6, 23, 3, 6, 36, 3, 4, 4, 3, 12, 3, 24, 3, 6, 19, 3, 9, 3, 6, 3, 11, 9, 3, 22, 3, 3, 10, 3, 4,
3, 3, 9, 12, 5, 16, 13, 3, 10, 3, 4, 9, 7, 4, 4, 77, 4, 6, 6, 27, 3, 3, 3))
sankeyNetwork(Links = links2, Nodes = nodes,
Source = "source", Target = "target",
Value = "value", NodeID = "area",
fontSize= 12, nodeWidth = 30)
You refer to 15 unique nodes in your links2 data frame, but you only have 14 unique nodes in your nodes data frame.
length(unique(c(links2$source, links2$target)))
# [1] 15
length(nodes$area)
# [1] 14
If you add another node, it will work...
library(networkD3)
nodes <- data.frame(area = c("a", "b", "c", "d", "e", "f", "g",
"h", "i", "j", "k", "l", "m", "n", "o"))
links2 <- data.frame(source = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9, 10, 10, 11, 11, 11, 12, 13, 13),
target = c(2, 8, 10, 11, 13, 0, 4, 5, 6, 7, 10, 11, 13, 0, 4, 9, 10, 12, 13, 0, 5, 6, 7, 10, 11, 13, 7, 10, 12,
0, 10, 11, 12, 13, 8, 9, 10, 11, 12, 13, 9, 10, 13, 10, 12, 13, 0, 11, 12, 13, 0, 14, 0, 0),
value = c(14, 4, 6, 23, 3, 6, 36, 3, 4, 4, 3, 12, 3, 24, 3, 6, 19, 3, 9, 3, 6, 3, 11, 9, 3, 22, 3, 3, 10, 3, 4,
3, 3, 9, 12, 5, 16, 13, 3, 10, 3, 4, 9, 7, 4, 4, 77, 4, 6, 6, 27, 3, 3, 3))
sankeyNetwork(Links = links2, Nodes = nodes,
Source = "source", Target = "target",
Value = "value", NodeID = "area",
fontSize= 12, nodeWidth = 30)

Optimizing an R Boggle Solver [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 7 years ago.
Improve this question
Forenote: this is a follow-up question to this one.
I've programmed a Boggle Game Solver in R (see this github page for source code), and find its performance disappointing.
Here's how it works...
# Say we have the following set of letters
bog.letters <- c("t", "e", "n", "s", "d", "a", "i", "o",
"l", "e", "r", "o", "c", "f", "i", "e")
# We get the list of paths (permutations) from a pre-existing list
paths <- paths.by.length[[6]] # 6th element corresponds to 8-element "paths"
dim(paths) # [1] 183472 8
# The following function is the key here,
# mapping the 183,472 combinations to the 16 letters
candidates <- apply(X = paths, MARGIN = 1, FUN = function(x) paste(bog.letters[x], collapse=""))
# The only remaining thing is to intersect the candidate words
# with the actual words from our dictionary
dict.words <- dict.fr$mot[dict.fr$taille == 8]
valid.words <- intersect(candidates, dict.words)
Reproducible example for 13-letter words candidates
bog.letters <- c("t", "e", "n", "s", "d", "a", "i", "o", "l", "e", "r", "o", "c", "f", "i", "e")
n.letters <- 13
paths <- structure(list(V1 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), V2 = c(2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2), V3 = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3),
V4 = c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), V5 = c(7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7), V6 = c(6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6), V7 = c(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), V8 = c(9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9), V9 = c(10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10), V10 = c(11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 13, 13, 13, 13,
13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14), V11 = c(8, 8,
12, 12, 12, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 14, 14,
14, 14, 14, 14, 14, 11, 11, 11, 11, 11, 11, 11, 11), V12 = c(12,
12, 15, 15, 16, 15, 15, 12, 12, 14, 16, 12, 12, 15, 15, 11,
11, 11, 11, 15, 15, 15, 8, 12, 12, 12, 15, 15, 16, 16), V13 = c(15,
16, 14, 16, 15, 12, 16, 8, 16, 13, 12, 8, 15, 12, 14, 8,
12, 15, 16, 11, 12, 16, 12, 8, 15, 16, 12, 16, 12, 15)), .Names = c("V1",
"V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11",
"V12", "V13"), row.names = c(NA, 30L), class = "data.frame")
candidates <- apply(X = paths, MARGIN = 1, FUN = function(x) paste(bog.letters[x], collapse=""))
For such a small path list, this is pretty fast. But the actual number of paths for 13-letter words is 2,644,520. So it can take a minute or even more to find all candidates. Using doSNOW, I am able to parrallelize the searches, reducing the total time by a significant amount, but there is a huge drawback to this: when using a normal loop, I can exit/break whenever I reach the point where no more words are found. This is not obvious (impossible?) to do with parrallel processes.
So my question is: can you think of a better function/algorithm for this task? Some websites provide solutions to Boggle game in a matter of seconds... Either they generated all possible letter combinations and stored the results in a database (!), else they clearly use a better algorithm (and probably a compiled language) to achieve those results.
Any ideas?
Using cpp_str_split function from the Rcpp Gallery, running time is now reduced to 3secs for 2644520 paths.
library(stringi)
paths <- data.frame(matrix(sample(1:16, 13*2644520, TRUE), ncol=13))
a1 <- stri_c(bog.letters[t(as.matrix(paths))], collapse="")
candidates <- cpp_str_split(a1, 13)[[1]]
For 2644520 paths, apply approach takes about 80secs on my notebook.

Resources