Related
I have a dataframe with 3 columns, example like this (purely hypothetical):
id <- c("Muller", "Muller", "Ter Stegen", "Musiala", "Musiala", "Musiala", "Pavard")
tid <- c("Davies", "De Ligt", "Muller", "Kimmich", "Pavard", "Lewandowski", "De Ligt")
Passes <- c(14, 5, 1, 10, 23, 4, 1)
Passes <- data.frame(id, tid, Passes)
dput(Passes)
And I have been wanting to plot this so that the vertices appear at specific coordinates in the output graph .
So far my codes are like this:
g <- graph.data.frame(Passes, directed = TRUE)
set_edge_attr(g, "weight", value= E(g)$Passes)
coords <- data.frame(id = c("Ter Stegen", "Musiala", "Davies", "Kimmich", 'De Ligt', "Lewandowski", "Muller", "Pavard"),
x= c(0.5, 1, 1, 1, 2, 3, 3, 3.5),
y= c(1, 1.8, 1.4, 1, 0.6, 1.8, 1.6, 1.2))
plot(g, vertex.size= 2, edge.arrow.size = 0.3, vertex.label.cex = 0.8,
edge.curved=.2, asp = 0, vertex.label.dist=0.7,
layout=coords, xlim = c(0, 4), ylim = c(0, 2))
But then I keep getting errors like 'Error in norm_coords(layout, -1, 1, -1, 1) : `layout' not a matrix''
Anyone know what is wrong with my code, or can propose a better method? Thank you! It's just my actual dataframe has 32 unique ids and together there are 252 rows, I want to find an efficient way to give each unique id a position.
Thanks,
Emmy
try
library(tidyverse)
new.coords <- coords %>% arrange(factor(id, levels = V(g))) %>% select(x,y) %>% as.matrix()
plot(g, vertex.size= 2, edge.arrow.size = 0.3, vertex.label.cex = 0.8,
edge.curved=.2, asp = 0, vertex.label.dist=0.7,
layout = new.coords)
I am working with the R programming language. I am trying to make a "parallel coordinates plot" using some fake data:
library(MASS)
a = rnorm(100, 10, 10)
b = rnorm(100, 10, 5)
c = rnorm(100, 5, 10)
d = matrix(a, b, c)
parcoord(d[, c(3, 1, 2)], col = 1 + (0:149) %/% 50)
However, a problem arises when I try to mix numeric and factor variables together:
group <- sample( LETTERS[1:4], 100, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25) )
d = matrix(a,b, group)
parcoord(d[, c(3, 1, 2)], col = 1 + (0:149) %/% 50)
Error in x - min(x, na.rm = TRUE): non-numeric argument to binary operator
I am just curious. Can this problem be resolved? Or is it simply impossible to make such a plot using numeric and factor variables together?
I saw a previous stackoverflow post over here where a similar plot is made using numeric and factor variables: How to plot parallel coordinates with multiple categorical variables in R
However, I am using a computer with no USB port or internet access - I have a pre-installed version of R with limited libraries (I have plotly, ggplot2, dplyr, MASS ... I don't have ggally or tidyverse) and was looking for a way to do this only with the parcoord() function.
Does anyone have any ideas if this can be done?
Thanks
Thanks
One option is to label rows of the matrix using a factor and use that on the plot, e.g.
library(MASS)
set.seed(300)
par(xpd=TRUE)
par(mar=c(4, 4, 4, 6))
a = rnorm(12, 10, 10)
b = rnorm(12, 10, 5)
c = rnorm(12, 5, 10)
group <- sample(c("#FF9289", "#FF8AFF", "#00DB98", "#00CBFF"),
12, replace=TRUE)
d = cbind(a, b, c)
rownames(d) <- group
parcoord(d[, c(3, 1, 2)], col = group)
title(main = "Plot", xlab = "Variable", ylab = "Values")
axis(side = 2, at = seq(0, 1, 0.1),
tick = TRUE, las = 1)
legend(3.05, 1, legend = c("A", "B", "C", "D"), lty = 1,
col = c("#FF9289", "#FF8AFF", "#00DB98", "#00CBFF"))
EDIT
Thanks for the additional explanation. What you want does make sense, but unfortunately it doesn't look like it will work as I expected. I tried to make a plot using an ordered factor as the middle variable (per https://pasteboard.co/JKK4AUD.jpg) but got the same error ("non-numeric argument to binary operator").
One way I thought of doing it is to recode the factor as a number (e.g. "Var_1" -> 0.2, "Var_2" -> 0.4) as below:
library(MASS)
set.seed(123)
par(xpd=TRUE)
par(mar=c(4, 4, 4, 6))
a = rnorm(12, 10, 10)
b = c(rep("Var_1", 3),
rep("Var_2", 3),
rep("Var_3", 3),
rep("Var_4", 3))
c = rnorm(12, 5, 10)
group <- c(rep("#FF9289", 3),
rep("#FF8AFF", 3),
rep("#00DB98", 3),
rep("#00CBFF", 3))
d = data.frame("A" = a,
"Factor" = b,
"C" = c,
"Group" = group)
d$Factor <- sapply(d$Factor, switch,
"Var_1" = 0.8,
"Var_2" = 0.6,
"Var_3" = 0.4,
"Var_4" = 0.2)
parcoord(d[, c(1, 2, 3)], col = group)
title(main = "Plot", xlab = "Variable", ylab = "Values")
axis(side = 2, at = seq(0, 1, 0.1),
tick = TRUE, las = 1)
legend(3.05, 1, legend = c("A", "B", "C", "D"), lty = 1,
col = c("#FF9289", "#FF8AFF", "#00DB98", "#00CBFF"))
mtext(text = "Var 1", side = 1, adj = 0.6, padj = -30)
mtext(text = "Var 3", side = 1, adj = 0.6, padj = -12)
mtext(text = "Var 2", side = 1, adj = 0.6, padj = -21)
mtext(text = "Var 4", side = 1, adj = 0.6, padj = -3)
I'm pretty new to ggraph and strugling a bit to get the legend to properly display the colors of the nodes.
I have the following example data set:
nodes <- data.frame( ID = c( 2, 3, 4, 5, 6, 7 ),
cl = c( "A", "B", "A", "A", "C", "B" ),
ty = c( 1, 1, 0, 0, 0, 1 ),
assets = c( 20000000, 10000, 500000, 10000, 150, 50 )
)
edges <- data.frame( from = c( 2, 5, 4, 6, 7, 4, 3 ),
to = c( 3, 4, 3, 5, 5, 3, 2 ),
we = c( 1, 1, 3, 2, 1, 1, 3 ),
pa = c( 0, 0, 1, 0, 1, 0, 0 ))
Based on these data I tried to plot the graph:
library( 'tidygraph' )
library( 'igraph' )
library( 'ggraph' )
graph <- graph_from_data_frame( edges, vertices = nodes, directed = TRUE ) %>% as_tbl_graph()
ggraph( graph, layout = 'fr' ) +
# Create edge layer
geom_edge_link0( aes( width = we, color = factor( pa )),
arrow = arrow( angle = 10, length = unit( 0.15, "inches" ),
ends = "last", type = "closed" )) +
scale_edge_width( range = c( 0.2, 2.2 )) +
scale_edge_color_grey( start = 0.4, end = 0.8 ) +
# Create node layer
geom_node_point( aes( shape = factor( ty ), fill = cl, size = log( assets ))) +
# Title and legend
labs( edge_width = "Power", edge_color = "Ownertype" ) +
ggtitle( "Title" ) +
theme( legend.key = element_rect( fill = "white", colour = "black" ),
legend.title = element_text(face = "bold" )) +
scale_size_continuous( name = "Assets", range = c( 3, 6 ), breaks = c( 5, 10, 15 )) +
scale_shape_manual( name = "Same branch", values = c( 21, 23 ), labels = c( "no", "yes" )) +
scale_fill_brewer( name = "Sector", palette = "Dark2" )
I have two issues with the legend under heading 'Sector':
The color keys are not displayed, they are all black. This happens everytime I let both the color and the shape of the nodes vary.
The color keys are too small, so that it is really hard to distinguish the colors (once they are there).
Unfortunately all my attempts to solve these two problems have been unsuccessful.
By default, the legend guide for points doesn't use a shape that supports a fill color. You need to set such a shape for the guide:
+ guides(fill = guide_legend(override.aes = list(size = 5, shape = 21)))
Is it possible to add a 2d plot to a 3D plot in R?
I have the following R code that generates a 3d bar plot.
dt = structure(c(1, 1, 1, 3,
0, 2, 2, 1,
1, 2, 1, 3,
2, 6, 3, 1,
1, 2, 3, 0,
1, 0, 2, 1,
1,2,2,2), .Dim = c(4L, 7L), .Dimnames = list(c("0-50",
"51-60", "61-70", "71-80"
), c("0-50", "51-60", "61-70", "71-80", "81-90", "91-100", "101-Inf")))
m <- matrix(rep(seq(4),each=7), ncol=7, nrow=4, byrow = TRUE)
hist3D(x = 1:4, z = dt, scale = T,bty="g", phi=35,theta=30,border="black",space=0.15,col = jet.col(5, alpha = 0.8), add = F, colvar = m, colkey = F, ticktype = "detailed")
The hist3d call only:
hist3D(x = 1:4, z = dt, scale = T,bty="g", phi=35,theta=30, border="black", space=0.15,col = jet.col(5, alpha = 0.8), add = F, colvar = m, colkey = F, ticktype = "detailed")
This generates the following 3d plot:
What I'm looking for is being able to add a plot in the position where the grey grid is. Is it possible?
Thanks!
As far as I know, there isn't a good function to make a barplot in RGL. I suggest a manual method.
dt = structure(c(1, 1, 1, 3,
0, 2, 2, 1,
1, 2, 1, 3,
2, 6, 3, 1,
1, 2, 3, 0,
1, 0, 2, 1,
1,2,2,2), .Dim = c(4L, 7L), .Dimnames = list(c("0-50",
"51-60", "61-70", "71-80"
), c("0-50", "51-60", "61-70", "71-80", "81-90", "91-100", "101-Inf")))
Making 3D barplot in RGL
library(rgl)
# make dt xyz coordinates data
dt2 <- cbind( expand.grid(x = 1:4, y = 1:7), z = c(dt) )
# define each bar's width and depth
bar_w <- 1 * 0.85
bar_d <- 1 * 0.85
# make a base bar (center of undersurface is c(0,0,0), width = bar_w, depth = bar_d, height = 1)
base <- translate3d( scale3d( cube3d(), bar_w/2, bar_d/2, 1/2 ), 0, 0, 1/2 )
# make each bar data and integrate them
bar.list <- shapelist3d(
apply(dt2, 1, function(a) translate3d(scale3d(base, 1, 1, a[3]), a[1], a[2], 0)),
plot=F)
# set colors
for(i in seq_len(nrow(dt2))) bar.list[[i]]$material$col <- rep(jet.col(5)[c(1:3,5)], 7)[i]
open3d()
plot3d(0,0,0, type="n", xlab="x", ylab="y", zlab="z", box=F,
xlim=c(0.5, 4.5), ylim=c(0.5, 7.5), zlim=c(0, 6.2), aspect=T, expand=1)
shade3d(bar.list, alpha=0.8)
wire3d(bar.list, col="black")
light3d(ambient="black", diffuse="gray30", specular="black") # light up a little
Add a 2d plot
# show2d() uses 2d plot function's output as a texture
# If you need the same coordinates of 2d and 3d, plot3d(expand=1) and show2d(expand=1),
# the same xlims, equivalent plot3d(zlim) to 2d plot's ylim, 2d plot(xaxs="i", yaxs="i") are needed.
show2d({
par(mar = c(0,0,0,0))
barplot(c(3,4,5,6), yaxs="i", ylim=c(0, 6.2))
},
expand = 1 , face = "y+", texmipmap = F) # texmipmap=F makes tone clear
I have following type data for human family:
indvidual <- c("John", "Kris", "Peter", "King", "Marry", "Renu", "Kim", "Ken", "Lu")
Parent1 <- c( NA, NA, "John", "John", "John", NA, "Peter", NA, NA)
Parent2 <- c( NA, NA, "Kris", "Kris", "Renu", NA, "Lu", NA, NA)
X <- c( 2, 3, 2, 3, 4, 5, 1.5, 1, 1)
Y <- c( 3, 3, 2, 2, 2, 3, 1, 3, 2)
pchsize <- c( 4.5, 4.3, 9.2, 6.2, 3.2, 6.4, 2.1, 1.9, 8)
fillcol <- c( 8.5, 8.3, 1.2, 3.2, 8.2, 2.4, 2.6, 6.1, 3.2)
myd <- data.frame (indvidual, Parent1, Parent2, X, Y, pchsize,fillcol)
indvidual Parent1 Parent2 X Y pchsize fillcol
1 John <NA> <NA> 2.0 3 4.5 8.5
2 Kris <NA> <NA> 3.0 3 4.3 8.3
3 Peter John Kris 2.0 2 9.2 1.2
4 King John Kris 3.0 2 6.2 3.2
5 Marry John Renu 4.0 2 3.2 8.2
6 Renu <NA> <NA> 5.0 3 6.4 2.4
7 Kim Peter Lu 1.5 1 2.1 2.6
8 Ken <NA> <NA> 1.0 3 1.9 6.1
9 Lu <NA> <NA> 1.0 2 8.0 3.2
I want plot something like the following, individuals points are connected to parents (Preferably different line color to Parent1 and Parent2 listed). Also pch size and pch fill is scaled to other variables pchsize and fillcol. Thus plot outline is:
Here is my progress in ggplot2:
require(ggplot2)
ggplot(data=myd, aes(X, Y,fill = fillcol)) +
geom_point(aes(size = pchsize, fill = fillcol), pch = "O") +
geom_text(aes (label = indvidual, vjust=1.25))
Issues unsolved: connecting lines, making size of pch big and fill color at the sametime.
Here is ggplot2 solution
library(ggplot2)
individual <- c("John", "Kris", "Peter", "King", "Marry", "Renu", "Kim", "Ken", "Lu")
Parent1 <- c( NA, NA, "John", "John", "John", NA, "Peter", NA, NA)
Parent2 <- c( NA, NA, "Kris", "Kris", "Renu", NA, "Lu", NA, NA)
X <- c( 2, 3, 2, 3, 4, 5, 1.5, 1, 1)
Y <- c( 3, 3, 2, 2, 2, 3, 1, 3, 2)
pchsize <- c( 4.5, 4.3, 9.2, 6.2, 3.2, 6.4, 2.1, 1.9, 8)
fillcol <- c( 8.5, 8.3, 1.2, 3.2, 8.2, 2.4, 2.6, 6.1, 3.2)
myd <- data.frame (individual, Parent1, Parent2, X, Y, pchsize,fillcol)
SegmentParent1 <- merge(
myd[, c("individual", "X", "Y")],
myd[!is.na(myd$Parent1), c("Parent1", "X", "Y")],
by.x = "individual", by.y = "Parent1")
SegmentParent2 <- merge(
myd[, c("individual", "X", "Y")],
myd[!is.na(myd$Parent1), c("Parent2", "X", "Y")],
by.x = "individual", by.y = "Parent2")
Segments <- rbind(SegmentParent1, SegmentParent2)
ggplot(data=myd, aes(X, Y)) +
geom_segment(data = Segments, aes(x = X.x, xend = X.y, y = Y.x, yend = Y.y)) +
geom_point(aes(size = pchsize, colour = fillcol)) +
geom_text(aes (label = indvidual), vjust = 0.5, colour = "red", fontface = 2) +
scale_x_continuous("", expand = c(0, 0.6), breaks = NULL) +
scale_y_continuous("", expand = c(0, 0.4), breaks = NULL) +
scale_size(range = c(20, 40)) +
theme_bw()
Here is a solution just using plot(), text(), and arrows(). The for loop is a bit cluttered, but will work for larger data sets and it should be easy to play with the plot and arrows:
plot(myd$X,myd$Y, col='white', type="p", main="", ylab="", xlab="",
axes = FALSE, ylim = c(min(myd$Y)*.8, max(myd$Y)*1.2),
xlim = c(min(myd$X)*.8, max(myd$X)*1.2))
child = data.frame()
child = myd[!is.na(myd$Parent1),]
DArrows = matrix(0,nrow(child),4);
MArrows = matrix(0,nrow(child),4);
for (n in 1:nrow(child)){
d<-child[n,];
c1<-myd$indvidual==as.character(d$Parent1);
b1<-myd[t(c1)];
c2<-myd$indvidual==as.character(d$Parent2);
b2<-myd[t(c2)];
DArrows[n, 1]=as.double(d$X)
DArrows[n, 2]=as.double(d$Y)
DArrows[n, 3]=as.double(b1[4])
DArrows[n, 4]=as.double(b1[5])
MArrows[n, 1]=as.double(d$X)
MArrows[n, 2]=as.double(d$Y)
MArrows[n, 3]=as.double(b2[4])
MArrows[n, 4]=as.double(b2[5])
}
arrows(DArrows[,3],DArrows[,4],DArrows[,1],DArrows[,2],lwd= 2, col = "blue",length=".1")
arrows(MArrows[,3],MArrows[,4],MArrows[,1],MArrows[,2],lwd=2, col = "red",length=".1")
par(new=TRUE)
plot(myd$X,myd$Y,type = "p", main = "", ylab = "", xlab = "",cex = myd$pchsize,
axes = FALSE, pch = 21, ylim = c(min(myd$Y)*.8, max(myd$Y)*1.2),
xlim=c(min(myd$X)*.8, max(myd$X)*1.2), bg = myd$fillcol,fg = 'black')
text(1.12*myd$X, .85*myd$Y, myd$indvidual)
arrows((DArrows[,3]+DArrows[,1])/2, (DArrows[,4]+DArrows[,2])/2,
DArrows[,1], DArrows[,2], lwd = 2, col = "blue", length = ".1")
arrows((MArrows[,3]+MArrows[,1])/2, (MArrows[,4]+MArrows[,2])/2,
MArrows[,1], MArrows[,2], lwd = 2, col = "red", length = ".1")
One thing that jumped out to me was to treat this is a network - R has many packages to plot these.
Here's a very simple solution:
First, I used your parent list to make a sociomatrix - you can generally input networks using edge lists as well - here I put 1 for the first parental relationship and 2 for the second.
psmat <- rbind(c(0, 0, 1, 1, 1, 0, 0, 0, 0),
c(0, 0, 2, 2, 0, 0, 0, 0, 0),
c(0, 0, 0, 0, 0, 0, 1, 0, 0),
rep(0, 9),
rep(0, 9),
c(0, 0, 0, 0, 2, 0, 0, 0, 0),
rep(0, 9),
rep(0, 9),
c(0, 0, 0, 0, 0, 0, 2, 0, 0))
Then, using the network package I just hit:
require(network)
plot(network(psmat), coord = cbind(X, Y), vertex.cex = pchsize,
vertex.col = fillcol, label = indvidual, edge.col = psmat)
This isn't terribly pretty in itself, but I think gives you all the basic elements you wanted.
For the colors, I believe the decimal places are just rounded - I wasn't sure what to do with those.
I know I've seen people plot networks in ggplot, so that might give you a better result.
Edit:
So here's a really messy way of turning your data into a network object directly - someone else might be able to fix it. Additionally, I add an edge attribute (named 'P' for parental status) and give the first set a value of 1 and the second set a value of 2. This can be used when plotting to set the colors.
P1 <- match(Parent1, indvidual)
e1 <- cbind(P1, 1:9); e1 <- na.omit(e1); attr(e1, 'na.action') <- NULL
P2 <- match(Parent2, indvidual)
e2 <- cbind(P2, 1:9); e2 <- na.omit(e2); attr(e2, 'na.action') <- NULL
en1 <- network.initialize(9)
add.edges(en1, e1[,1], e1[,2])
set.edge.attribute(en1, 'P', 1)
add.edges(en1, e2[,1], e2[,2], names.eval = 'P', vals.eval = 2)
plot(en1, coord = cbind(X, Y), vertex.cex = pchsize,
vertex.col = fillcol, label = indvidual, edge.col = 'P')
Alternative solution use igraph
library(igraph)
mm<-data.frame(dest=c(as.character(myd$Parent1),as.character(myd$Parent2)))
mm$orig<-myd$individual
g<-graph.edgelist(as.matrix(mm[!is.na(mm$dest),]))
rownames(myd)<-as.character(myd[,1])
l<-as.matrix(myd[V(g)$name,4:5])
plot(g,layout=l,vertex.color=myd[V(g)$name,6],vertex.size=myd[V(g)$name,6])
Just play a bit with color a sizes!