how to expand the polygon to reach nearby line in r? - r

Let say i have a SpatialPolygons object with 3 polygons data name groupexc:
library(raster)
p1 <- matrix(c(2, 3, 4, 5, 6, 5, 4, 3, 2, 4, 5, 6, 5, 4, 3, 2, 3, 4), ncol=2)
p2 <- matrix(c(8, 9, 10, 11, 12, 11, 10, 9, 8, 4, 5, 6, 5, 4, 3, 2, 3, 4), ncol=2)
p3 <- matrix(c(5, 6, 7, 8, 9, 8, 7, 6, 5, 9, 10, 11, 10, 9, 8, 7, 8, 9), ncol=2)
groupexc <- spPolygons(p1, p2, p3)
And a SpatialPolygons object zoneexc that represents a single zone:
zoneexc = spPolygons(matrix(c(2,1,3,4,6,8,10,13,14,14,12,10,8,6,4,2,1,3,7,10,12,14,12,6,4,3,1,1,1,1,1,1), ncol=2))
Is there a way for me to expand the output from groupexc until it reach points in zoneexc?
before
plot(zoneexc, border='red', lwd=3)
plot(groupexc, add=TRUE, border='blue', lwd=2)
text(groupexc, letters[1:3])
after:
Any help would be appreciated.

Here is an approximate solution. This approach might break for large problems, and it depends on having sufficient number of nodes in each polygon. But it may be good enough for your purpose.
# example data
library(raster)
p1 <- matrix(c(2, 3, 4, 5, 6, 5, 4, 3, 2, 4, 5, 6, 5, 4, 3, 2, 3, 4), ncol=2)
p2 <- matrix(c(8, 9, 10, 11, 12, 11, 10, 9, 8, 4, 5, 6, 5, 4, 3, 2, 3, 4), ncol=2)
p3 <- matrix(c(5, 6, 7, 8, 9, 8, 7, 6, 5, 9, 10, 11, 10, 9, 8, 7, 8, 9), ncol=2)
groups <- spPolygons(p1, p2, p3, attr=data.frame(name=c('a', 'b', 'c')))
zone <- spPolygons(matrix(c(2,1,3,4,6,8,10,13,14,14,12,10,8,6,4,2,1,3,7,10,12,14,12,6,4,3,1,1,1,1,1,1), ncol=2))
Now create nearest neighbor polygons. For this to work as below, you need dismo version 1.1-1 (or higher)
library(dismo)
# get the coordinates of the polygons
g <- unique(geom(groups))
v <- voronoi(g[, c('x', 'y')], ext=extent(zone))
# plot(v)
# assign group id to the new polygons
v$group <- g[v$id, 1]
# aggregate (dissolve) polygons by group id
a <- aggregate(v, 'group')
# remove areas outside of the zone
i <- crop(a, zone)
# add another identifier
i$name <- groups$name[i$group]
plot(i, col=rainbow(3))
text(i, "name", cex=2)
plot(groups, add=TRUE, lwd=2, border='white', lty=2)
To see how it works:
points(g[, c('x', 'y')], pch=20, cex=2)
plot(v, add=TRUE)

Related

How can I add edges into an existing plot?

I am wanting to plot graph clusters that I define by myself. I am using the simplified undirected enron data.
library(igraphdata)
data("enron")
g <- as.undirected(enron)
g <- simplify(g)
rm("enron")
member <- c(1, 8, 9, 9, 10, 10, 8, 7, 4, 1, 2, 6, 3, 1, 2, 8, 7, 2, 1, 5,
1, 7, 6, 4, 8, 4, 8, 10, 3, 6, 1, 4, 7, 4, 3, 7, 9, 10, 3, 8, 1,
9, 8, 2, 7, 2, 9, 5, 1, 2, 6, 10, 3, 3, 2, 1, 9, 10, 3, 5, 6, 5,
5, 3, 7, 6, 9, 10, 8, 10, 8, 8, 10, 10, 10, 8, 7, 7, 9, 1, 9, 2, 9,
7, 2, 7, 7, 3, 2, 5, 2, 1, 6, 5, 10, 4, 3, 2, 4, 6, 4, 9, 5, 4,
1, 10, 2, 3, 4, 3, 6, 3, 6, 4, 6, 8, 2, 4, 5, 1, 5, 1, 4, 10, 4, 7,
5, 9, 10, 1, 2, 1, 5, 7, 5, 3, 5, 8, 7, 9, 5, 8, 1, 5, 3, 3, 3, 10,
1, 7, 8, 4, 1, 10, 9, 6, 9, 9, 4, 2, 6, 4, 6, 3, 5, 6, 9, 7, 6, 6,
4, 8, 6, 8, 8, 2, 5, 4, 3, 2, 9, 10, 2, 7)
I have tried many ways but none looks good. The best I can make is
edges_data_frame <- get.data.frame(g, what = "edges")
w.mem <- rep(0, length(E(g)))
for (i in 1:length(E(g))){
w.mem[i] <- ifelse(member[edges_data_frame$from[i]] == member[edges_data_frame$to[i]], 500, 1)
}
mem <- make_clusters(g,member)
E(g)$weight <- w.mem
colors <- rainbow(max(membership(mem)))
layout <- layout.fruchterman.reingold(g, weights=w.mem)
set.seed(1234)
plot(g, vertex.color=colors[mem$membership],
mark.groups=communities(mem),
vertex.label = NA,
edge.width = 1, edge.color = "lightgray", vertex.size = 5)
my first trial
I found that the "deleting edges plot" looks much cleaner
coGrph <- delete_edges(g, E(g)[crossing(mem, g)])
col_vector <- c('#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', '#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000075', '#808080', '#ffffff', '#000000')
temp <- sapply(1:length(V(g)), FUN = function(i) {col_vector[member[i]]})
V(coGrph)$color <- temp
plot(coGrph, vertex.label = NA, vertex.size = 5)
my second trial
However, this plot has some missing edges and does not reflect the true connection of the plot. I want to use this plot and add the deleted edges back to the plot without changing the positions I have right now. Is it possible?
Thank you very much I really appreciate your help.
Yes. Use your coGrph to create a layout, but then plot the original graph.
Continuing your "second trial"
set.seed(1234)
LOcG = layout_nicely(coGrph)
V(g)$color <- temp
plot(g, layout=LOcG, vertex.label = NA, vertex.size = 5)

Code to analyze relationships between responses to different ranking questions on a survey

My goal is to find much simpler code, which can generalize, that shows the relationships between responses to two survey questions. In the MWE, one question asked respondents to rank eight marketing selections from 1 to 8 and the other asked them to rank nine attribute selections from 1 to 9. Higher rankings indicate the respondent favored the selection more. Here is the data frame.
structure(list(Email = c("a", "b", "c", "d", "e", "f", "g", "h",
"i"), Ads = c(2, 1, 1, 1, 1, 2, 1, 1, 1), Alumni = c(3, 2, 2,
3, 2, 3, 2, 2, 2), Articles = c(6, 4, 3, 2, 3, 4, 3, 3, 3), Referrals = c(4,
3, 4, 8, 7, 8, 8, 6, 4), Speeches = c(7, 7, 6, 7, 4, 7, 4, 5,
5), Updates = c(8, 6, 6, 5, 5, 5, 5, 7, 6), Visits = c(5, 8,
7, 6, 6, 6, 6, 4, 8), `Business Savvy` = c(10, 6, 10, 10, 4,
4, 6, 8, 9), Communication = c(4, 3, 8, 3, 3, 9, 7, 6, 7), Experience = c(7,
7, 7, 9, 2, 8, 5, 9, 5), Innovation = c(2, 1, 4, 2, 1, 2, 2,
1, 1), Nearby = c(3, 2, 2, 1, 5, 3, 3, 2, 2), Personal = c(8,
10, 6, 8, 6, 10, 4, 3, 3), Rates = c(9, 5, 9, 6, 9, 7, 10, 5,
4), `Staffing Model` = c(6, 8, 5, 5, 7, 5, 8, 7, 8), `Total Cost` = c(5,
4, 3, 7, 8, 6, 9, 4, 6)), row.names = c(NA, -9L), class = c("tbl_df",
"tbl", "data.frame"))
If numeric rankings cannot be used for my solution to calculating relationships (correlations), please correct me.
Hoping they can be used, I arrived at the following plodding code, which I hope calculates the correlation matrix of each method selection against each attribute selection.
library(psych)
dataframe2 <- psych::corr.test(dataframe[ , c(2, 9:17)])[[1]][1:10] # the first method vs all attributes
dataframe3 <- psych::corr.test(dataframe[ , c(3, 9:17)])[[1]][1:10] # the 2nd method vs all attributes and so on
dataframe4 <- psych::corr.test(dataframe[ , c(4, 9:17)])[[1]][1:10]
dataframe5 <- psych::corr.test(dataframe[ , c(5, 9:17)])[[1]][1:10]
dataframe6 <- psych::corr.test(dataframe[ , c(6, 9:17)])[[1]][1:10]
dataframe7 <- psych::corr.test(dataframe[ , c(7, 9:17)])[[1]][1:10]
dataframe8 <- psych::corr.test(dataframe[ , c(8, 9:17)])[[1]][1:10]
# create a dataframe from the rbinded rows
bind <- data.frame(rbind(dataframe2, dataframe3, dataframe4, dataframe5, dataframe6, dataframe7, dataframe8))
Rename rows and columns:
colnames(bind) <- c("Sel", colnames(dataframe[9:17]))
rownames(bind) <- colnames(dataframe[2:8])
How can I accomplish the above more efficiently?
By the way, the bind data frame also allows one to produce a heat map with the DataExplorer package.
library(DataExplorer)
DataExplorer::plot_correlation(bind)
[Summary]
In the scope of our discussion, there are two ways to get the correlation data.
Use stats::cor, i.e., cor(subset(dataframe, select = -Email))
Use psych::corr.test, i.e., corr.test(subset(dataframe, select = -Email))[[1]]
Then you may subset the correlation matrix with the desired rows and columns.
In order to use DataExplorer::plot_correlation, you can simply do plot_correlation(dataframe, type = "c"). Note: the output heatmap will include correlations for all columns, so you can just ignore columns that are not of interests.
[Original Answer]
## Create data
dataframe <- structure(
list(
Email = c("a", "b", "c", "d", "e", "f", "g", "h", "i"),
Ads = c(2, 1, 1, 1, 1, 2, 1, 1, 1),
Alumni = c(3, 2, 2, 3, 2, 3, 2, 2, 2),
Articles = c(6, 4, 3, 2, 3, 4, 3, 3, 3),
Referrals = c(4, 3, 4, 8, 7, 8, 8, 6, 4),
Speeches = c(7, 7, 6, 7, 4, 7, 4, 5, 5),
Updates = c(8, 6, 6, 5, 5, 5, 5, 7, 6),
Visits = c(5, 8, 7, 6, 6, 6, 6, 4, 8),
`Business Savvy` = c(10, 6, 10, 10, 4, 4, 6, 8, 9),
Communication = c(4, 3, 8, 3, 3, 9, 7, 6, 7),
Experience = c(7, 7, 7, 9, 2, 8, 5, 9, 5),
Innovation = c(2, 1, 4, 2, 1, 2, 2, 1, 1),
Nearby = c(3, 2, 2, 1, 5, 3, 3, 2, 2),
Personal = c(8, 10, 6, 8, 6, 10, 4, 3, 3),
Rates = c(9, 5, 9, 6, 9, 7, 10, 5, 4),
`Staffing Model` = c(6, 8, 5, 5, 7, 5, 8, 7, 8),
`Total Cost` = c(5, 4, 3, 7, 8, 6, 9, 4, 6)
),
row.names = c(NA, -9L),
class = c("tbl_df", "tbl", "data.frame")
)
Following your example strictly, we can do the following:
## Calculate correlation
df2 <- subset(dataframe, select = -Email)
marketing_selections <- names(df2)[1:7]
attribute_selections <- names(df2)[8:16]
corr_matrix <- psych::corr.test(df2)[[1]]
bind <- subset(corr_matrix,
subset = rownames(corr_matrix) %in% marketing_selections,
select = attribute_selections)
DataExplorer::plot_correlation(bind)
WARNING
However, is this what you really want? psych::corr.test generates the correlation matrix, and DataExplorer::plot_correlation calculates the correlation again. It is like the correlation of the correlation.

Is it possible to limit forecasts made by bsts to positive values only?

I am learning to use various forecasting packages available in R, and came across bsts(). The data I deal with is a time series of demands.
data=c(27, 2, 7, 7, 9, 4, 3, 3, 3, 9, 6, 2, 6, 2, 3, 8, 6, 1, 3, 8, 4, 5, 8, 5, 4, 4, 6, 1, 6, 5, 1, 3, 0, 2, 6, 7, 1, 2, 6, 2, 8, 6, 1, 1, 3, 2, 1, 3, 1, 6, 3, 4, 3, 7, 3, 4, 1, 7, 5, 6, 3, 4, 3, 9, 2, 1, 7, 2, 2, 9, 4, 5, 3, 4, 2, 4, 4, 8, 6, 3, 9, 2, 9, 4, 1, 3, 8, 1, 7, 7, 6, 0, 1, 4, 8, 9, 2, 5)
ts.main=ts(data, start=c(1910,1), frequency=12)
ss <- AddLocalLinearTrend(list(), y=ts.main)
ss <- AddSeasonal(ss, y=as.numeric(ts.temp), nseasons=12)
model <- bsts(as.numeric(ts.temp),
state.specification = ss,
niter = 1000)
pred <- predict(model, horizon = 12)
Is there way I can restrict pred$mean from becoming negative?
Since your data are a time series of counts, you need to take that into account rather than assume Gaussian errors; for some discussion on this and elaboration of some approaches, see for example Brandt et al 2000 and Brandt and Williams 2001. Luckily, the bsts package has a built-in functionality for this, the family option (see pages 24 to 26 of the documentation).
So, you can just do this
model <- bsts(as.numeric(ts.main),
state.specification = ss,
family = 'poisson',
niter = 1000)
so that the bsts() function correctly considers the data as counts, which will solve your issue, since the draws from the posterior predictive distribution will then be non-negative by definition.

How to dynamically indicate groups in an R in plot

This is my data
x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22)
y = c(1, 6, 2, 5, 4, 7, 9, 6, 8, 4, 5, 6, 5, 5, 6, 7, 5, 8, 9,
5, 4, 7)
plot(x, y)
fit <- lm(y ~ x)
fit
abline(fit, col = "black", lwd = "1")
I would like to the plot to split the data into two groups, observations above the regression line and and those under the regression line. How can I do this?
You can use predict to get the fitted value at each x, and then a logical comparison between the observed and fitted to test if they're above or below the line. Then set the colors you plot based on this logical comparison.
prediction <- predict(fit)
colors<-ifelse(y>prediction,1,2)
plot(x,y,col=colors)
abline(fit, col= "black",lwd="1")

Creating a histogram with appropriate counts and labels in R

I have a dataset (dat), which I am hard-coding in here:
dat = c(5, 9, 5, 6, 5, 6, 8, 4, 6, 4, 6, 6, 4, 6, 4, 6, 5, 5, 6, 5, 6, 7, 4, 5, 4, 4, 6, 4, 4, 5, 7, 6, 3, 5, 5, 5, 5, 4, 6, 3, 6, 5, 4, 6, 5, 8, 4, 8, 5, 5, 4, 4, 6, 6, 4, 6, 4, 7, 4, 1, 4, 6, 3, 6, 3, 4, 6, 6, 3, 6, 6, 2, 5, 5, 4, 7, 6)
table(dat)
By doing the table function above on the data, I see that there should be a count of 1 for values of 1, and count of 1 for values of 2. However, when I plot the data using hist, I get a count of 2.
hist(dat, col="lightgreen", labels = TRUE, xlim=c(0,10), ylim=c(0,27))
This is the first problem. The other problem is that I am trying to plot the x label value for the corresponding bin (where there should be 11 bins, labeled 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10). Even though I have no 0 values or 10 values, I would like to illustrate that they had a count of 0, and have their bins - like the rest- labeled. How can I accomplish that?
Thanks.
am = hist(dat, col="lightgreen", labels = TRUE,
breaks=seq(min(dat)-2,max(dat)),
axes=F)
axis(2)
axis(1,at=am$mids,seq(min(dat)-1,max(dat)))
Did you mean like this:
hist(dat, col="lightgreen", labels = TRUE,
xlim=c(0,10), ylim=c(0,27), breaks = 0:10, at=0:10)

Resources