connecting line like tree in r - r

I have following type data for human family:
indvidual <- c("John", "Kris", "Peter", "King", "Marry", "Renu", "Kim", "Ken", "Lu")
Parent1 <- c( NA, NA, "John", "John", "John", NA, "Peter", NA, NA)
Parent2 <- c( NA, NA, "Kris", "Kris", "Renu", NA, "Lu", NA, NA)
X <- c( 2, 3, 2, 3, 4, 5, 1.5, 1, 1)
Y <- c( 3, 3, 2, 2, 2, 3, 1, 3, 2)
pchsize <- c( 4.5, 4.3, 9.2, 6.2, 3.2, 6.4, 2.1, 1.9, 8)
fillcol <- c( 8.5, 8.3, 1.2, 3.2, 8.2, 2.4, 2.6, 6.1, 3.2)
myd <- data.frame (indvidual, Parent1, Parent2, X, Y, pchsize,fillcol)
indvidual Parent1 Parent2 X Y pchsize fillcol
1 John <NA> <NA> 2.0 3 4.5 8.5
2 Kris <NA> <NA> 3.0 3 4.3 8.3
3 Peter John Kris 2.0 2 9.2 1.2
4 King John Kris 3.0 2 6.2 3.2
5 Marry John Renu 4.0 2 3.2 8.2
6 Renu <NA> <NA> 5.0 3 6.4 2.4
7 Kim Peter Lu 1.5 1 2.1 2.6
8 Ken <NA> <NA> 1.0 3 1.9 6.1
9 Lu <NA> <NA> 1.0 2 8.0 3.2
I want plot something like the following, individuals points are connected to parents (Preferably different line color to Parent1 and Parent2 listed). Also pch size and pch fill is scaled to other variables pchsize and fillcol. Thus plot outline is:
Here is my progress in ggplot2:
require(ggplot2)
ggplot(data=myd, aes(X, Y,fill = fillcol)) +
geom_point(aes(size = pchsize, fill = fillcol), pch = "O") +
geom_text(aes (label = indvidual, vjust=1.25))
Issues unsolved: connecting lines, making size of pch big and fill color at the sametime.

Here is ggplot2 solution
library(ggplot2)
individual <- c("John", "Kris", "Peter", "King", "Marry", "Renu", "Kim", "Ken", "Lu")
Parent1 <- c( NA, NA, "John", "John", "John", NA, "Peter", NA, NA)
Parent2 <- c( NA, NA, "Kris", "Kris", "Renu", NA, "Lu", NA, NA)
X <- c( 2, 3, 2, 3, 4, 5, 1.5, 1, 1)
Y <- c( 3, 3, 2, 2, 2, 3, 1, 3, 2)
pchsize <- c( 4.5, 4.3, 9.2, 6.2, 3.2, 6.4, 2.1, 1.9, 8)
fillcol <- c( 8.5, 8.3, 1.2, 3.2, 8.2, 2.4, 2.6, 6.1, 3.2)
myd <- data.frame (individual, Parent1, Parent2, X, Y, pchsize,fillcol)
SegmentParent1 <- merge(
myd[, c("individual", "X", "Y")],
myd[!is.na(myd$Parent1), c("Parent1", "X", "Y")],
by.x = "individual", by.y = "Parent1")
SegmentParent2 <- merge(
myd[, c("individual", "X", "Y")],
myd[!is.na(myd$Parent1), c("Parent2", "X", "Y")],
by.x = "individual", by.y = "Parent2")
Segments <- rbind(SegmentParent1, SegmentParent2)
ggplot(data=myd, aes(X, Y)) +
geom_segment(data = Segments, aes(x = X.x, xend = X.y, y = Y.x, yend = Y.y)) +
geom_point(aes(size = pchsize, colour = fillcol)) +
geom_text(aes (label = indvidual), vjust = 0.5, colour = "red", fontface = 2) +
scale_x_continuous("", expand = c(0, 0.6), breaks = NULL) +
scale_y_continuous("", expand = c(0, 0.4), breaks = NULL) +
scale_size(range = c(20, 40)) +
theme_bw()

Here is a solution just using plot(), text(), and arrows(). The for loop is a bit cluttered, but will work for larger data sets and it should be easy to play with the plot and arrows:
plot(myd$X,myd$Y, col='white', type="p", main="", ylab="", xlab="",
axes = FALSE, ylim = c(min(myd$Y)*.8, max(myd$Y)*1.2),
xlim = c(min(myd$X)*.8, max(myd$X)*1.2))
child = data.frame()
child = myd[!is.na(myd$Parent1),]
DArrows = matrix(0,nrow(child),4);
MArrows = matrix(0,nrow(child),4);
for (n in 1:nrow(child)){
d<-child[n,];
c1<-myd$indvidual==as.character(d$Parent1);
b1<-myd[t(c1)];
c2<-myd$indvidual==as.character(d$Parent2);
b2<-myd[t(c2)];
DArrows[n, 1]=as.double(d$X)
DArrows[n, 2]=as.double(d$Y)
DArrows[n, 3]=as.double(b1[4])
DArrows[n, 4]=as.double(b1[5])
MArrows[n, 1]=as.double(d$X)
MArrows[n, 2]=as.double(d$Y)
MArrows[n, 3]=as.double(b2[4])
MArrows[n, 4]=as.double(b2[5])
}
arrows(DArrows[,3],DArrows[,4],DArrows[,1],DArrows[,2],lwd= 2, col = "blue",length=".1")
arrows(MArrows[,3],MArrows[,4],MArrows[,1],MArrows[,2],lwd=2, col = "red",length=".1")
par(new=TRUE)
plot(myd$X,myd$Y,type = "p", main = "", ylab = "", xlab = "",cex = myd$pchsize,
axes = FALSE, pch = 21, ylim = c(min(myd$Y)*.8, max(myd$Y)*1.2),
xlim=c(min(myd$X)*.8, max(myd$X)*1.2), bg = myd$fillcol,fg = 'black')
text(1.12*myd$X, .85*myd$Y, myd$indvidual)
arrows((DArrows[,3]+DArrows[,1])/2, (DArrows[,4]+DArrows[,2])/2,
DArrows[,1], DArrows[,2], lwd = 2, col = "blue", length = ".1")
arrows((MArrows[,3]+MArrows[,1])/2, (MArrows[,4]+MArrows[,2])/2,
MArrows[,1], MArrows[,2], lwd = 2, col = "red", length = ".1")

One thing that jumped out to me was to treat this is a network - R has many packages to plot these.
Here's a very simple solution:
First, I used your parent list to make a sociomatrix - you can generally input networks using edge lists as well - here I put 1 for the first parental relationship and 2 for the second.
psmat <- rbind(c(0, 0, 1, 1, 1, 0, 0, 0, 0),
c(0, 0, 2, 2, 0, 0, 0, 0, 0),
c(0, 0, 0, 0, 0, 0, 1, 0, 0),
rep(0, 9),
rep(0, 9),
c(0, 0, 0, 0, 2, 0, 0, 0, 0),
rep(0, 9),
rep(0, 9),
c(0, 0, 0, 0, 0, 0, 2, 0, 0))
Then, using the network package I just hit:
require(network)
plot(network(psmat), coord = cbind(X, Y), vertex.cex = pchsize,
vertex.col = fillcol, label = indvidual, edge.col = psmat)
This isn't terribly pretty in itself, but I think gives you all the basic elements you wanted.
For the colors, I believe the decimal places are just rounded - I wasn't sure what to do with those.
I know I've seen people plot networks in ggplot, so that might give you a better result.
Edit:
So here's a really messy way of turning your data into a network object directly - someone else might be able to fix it. Additionally, I add an edge attribute (named 'P' for parental status) and give the first set a value of 1 and the second set a value of 2. This can be used when plotting to set the colors.
P1 <- match(Parent1, indvidual)
e1 <- cbind(P1, 1:9); e1 <- na.omit(e1); attr(e1, 'na.action') <- NULL
P2 <- match(Parent2, indvidual)
e2 <- cbind(P2, 1:9); e2 <- na.omit(e2); attr(e2, 'na.action') <- NULL
en1 <- network.initialize(9)
add.edges(en1, e1[,1], e1[,2])
set.edge.attribute(en1, 'P', 1)
add.edges(en1, e2[,1], e2[,2], names.eval = 'P', vals.eval = 2)
plot(en1, coord = cbind(X, Y), vertex.cex = pchsize,
vertex.col = fillcol, label = indvidual, edge.col = 'P')

Alternative solution use igraph
library(igraph)
mm<-data.frame(dest=c(as.character(myd$Parent1),as.character(myd$Parent2)))
mm$orig<-myd$individual
g<-graph.edgelist(as.matrix(mm[!is.na(mm$dest),]))
rownames(myd)<-as.character(myd[,1])
l<-as.matrix(myd[V(g)$name,4:5])
plot(g,layout=l,vertex.color=myd[V(g)$name,6],vertex.size=myd[V(g)$name,6])
Just play a bit with color a sizes!

Related

How to plot igraph such that each vertex is at a specified coordinates, that resembles football player positions?

I have a dataframe with 3 columns, example like this (purely hypothetical):
id <- c("Muller", "Muller", "Ter Stegen", "Musiala", "Musiala", "Musiala", "Pavard")
tid <- c("Davies", "De Ligt", "Muller", "Kimmich", "Pavard", "Lewandowski", "De Ligt")
Passes <- c(14, 5, 1, 10, 23, 4, 1)
Passes <- data.frame(id, tid, Passes)
dput(Passes)
And I have been wanting to plot this so that the vertices appear at specific coordinates in the output graph .
So far my codes are like this:
g <- graph.data.frame(Passes, directed = TRUE)
set_edge_attr(g, "weight", value= E(g)$Passes)
coords <- data.frame(id = c("Ter Stegen", "Musiala", "Davies", "Kimmich", 'De Ligt', "Lewandowski", "Muller", "Pavard"),
x= c(0.5, 1, 1, 1, 2, 3, 3, 3.5),
y= c(1, 1.8, 1.4, 1, 0.6, 1.8, 1.6, 1.2))
plot(g, vertex.size= 2, edge.arrow.size = 0.3, vertex.label.cex = 0.8,
edge.curved=.2, asp = 0, vertex.label.dist=0.7,
layout=coords, xlim = c(0, 4), ylim = c(0, 2))
But then I keep getting errors like 'Error in norm_coords(layout, -1, 1, -1, 1) : `layout' not a matrix''
Anyone know what is wrong with my code, or can propose a better method? Thank you! It's just my actual dataframe has 32 unique ids and together there are 252 rows, I want to find an efficient way to give each unique id a position.
Thanks,
Emmy
try
library(tidyverse)
new.coords <- coords %>% arrange(factor(id, levels = V(g))) %>% select(x,y) %>% as.matrix()
plot(g, vertex.size= 2, edge.arrow.size = 0.3, vertex.label.cex = 0.8,
edge.curved=.2, asp = 0, vertex.label.dist=0.7,
layout = new.coords)

Coloured Box Plot With Precomputed Quartiles

I'm trying to colour a r-plotly boxplot with custom values, but it remains in the default blue colour.
For example - see the code in the official tutorial: https://plotly.com/r/box-plots/#box-plot-with-precomputed-quartiles. When I add the code for coloring, nothing happens with the colours:
fig <- plot_ly(y = list(1,2,3,4,5,6,7,8,9), type = "box", q1=list(1, 2, 3), median=list(4, 5, 6),
q3=list(7, 8, 9 ), lowerfence=list(-1, 0, 1),
upperfence=list(5, 6, 7), mean=list(2.2, 2.8, 3.2 ),
sd=list(0.2, 0.4, 0.6), notchspan=list(0.2, 0.4, 0.6),
color = list(1,1,2),
colors = list("red2", "grey"))
fig
What am I doing wrong?
The example has several issues, so I don't know if I understood the aim of the OP correctly. In the following, I removed the dummy ydata and replaced it with ID characters, used vectors instead of lists (except for y) and a formula with ~ for the color mapping:
library("plotly")
fig <- plot_ly(
y = c("A", "B", "C"),
type = "box",
q1 = c(1, 2, 3),
median = c(4, 5, 6),
q3 = c(7, 8, 9 ),
lowerfence = c(-1, 0, 1),
upperfence = c(5, 6, 7),
mean = c(2.2, 2.8, 3.2 ),
sd = c(0.2, 0.4, 0.6),
notchspan = c(0.2, 0.4, 0.6),
color = ~ c("a", "a", "c"),
colors = c("red2", "grey")
)
fig
For the mean and sd values, I left the original dummy values as in the original example.

How to automate positioning of inner labels within a stacked barplot?

I frequently have to produce stacked bar plots with labels. The way I've been coding the labels is very time intensive and I wondered if there was a way to code things more efficiently. I would like the labels to be centered on each section of the bars. I'd prefer base R solutions.
stemdata <- structure(list( #had to round some nums below for 100% bar
A = c(7, 17, 76),
B = c(14, 10, 76),
C = c( 14, 17, 69),
D = c( 4, 10, 86),
E = c( 7, 17, 76),
F = c(4, 10, 86)),
.Names = c("Food, travel, accommodations, and procedures",
"Travel itinerary and dates",
"Location of the STEM Tour stops",
"Interactions with presenters/guides",
"Duration of each STEM Tour stop",
"Overall quality of the STEM Tour"
),
class = "data.frame",
row.names = c(NA, -3L)) #4L=number of numbers in each letter vector#
# attach(stemdata)
print(stemdata)
par(mar=c(0, 19, 1, 2.1)) # this sets margins to allow long labels
barplot(as.matrix(stemdata),
beside = F, ylim = range(0, 10), xlim = range(0, 100),
horiz = T, col=colors, main="N=29",
border=F, las=1, xaxt='n', width = 1.03)
text(7, 2, "14%")
text(19, 2, "10%")
text(62, 2, "76%")
text(7, 3.2, "14%")
text(22.5, 3.2, "17%")
text(65.5, 3.2, "69%")
text(8, 4.4, "10%")
text(55, 4.4, "86%")
text(3.5, 5.6, "7%")
text(15, 5.6, "17%")
text(62, 5.6, "76%")
text(9, 6.9, "10%")
text(55, 6.9, "86%")
Staying base R as OP requested, we can easily automate the inner label positioning (i.e. x coordinates) within a small function.
xFun <- function(x) x/2 + c(0, cumsum(x)[-length(x)])
Now, it's good to know that barplot invisibly trows the y coordinates, we can catch them by assignment (here byc <- barplot(.)).
Eventually, just assemble coordinates and labels in data frame labs and "loop" through the text calls in a sapply. (Use col="white" or col=0 for white labels as wished in the other question.)
# barplot
colors <- c("gold", "orange", "red")
par(mar=c(2, 19, 4, 2) + 0.1) # expand margins
byc <- barplot(as.matrix(stemdata), horiz=TRUE, col=colors, main="N=29", # assign `byc`
border=FALSE, las=1, xaxt='n')
# labels
labs <- data.frame(x=as.vector(sapply(stemdata, xFun)), # apply `xFun` here
y=rep(byc, each=nrow(stemdata)), # use `byc` here
labels=as.vector(apply(stemdata, 1:2, paste0, "%")),
stringsAsFactors=FALSE)
invisible(sapply(seq(nrow(labs)), function(x) # `invisible` prevents unneeded console output
text(x=labs[x, 1:2], labels=labs[x, 3], cex=.9, font=2, col=0)))
# legend (set `xpd=TRUE` to plot beyond margins!)
legend(-55, 8.5, legend=c("Medium","High", "Very High"), col=colors, pch=15, xpd=TRUE)
par(mar=c(5, 4, 4, 2) + 0.1) # finally better reset par to default
Result
Data
stemdata <- structure(list(`Food, travel, accommodations, and procedures` = c(7,
17, 76), `Travel itinerary and dates` = c(14, 10, 76), `Location of the STEM Tour stops` = c(14,
17, 69), `Interactions with presenters/guides` = c(4, 10, 86),
`Duration of each STEM Tour stop` = c(7, 17, 76), `Overall quality of the STEM Tour` = c(4,
10, 86)), class = "data.frame", row.names = c(NA, -3L))
Would you consider a tidyverse solution?
library(tidyverse) # for dplyr, tidyr, tibble & ggplot2
stemdata %>%
rownames_to_column(var = "id") %>%
gather(Var, Val, -id) %>%
group_by(Var) %>%
mutate(id = factor(id, levels = 3:1)) %>%
ggplot(aes(Var, Val)) +
geom_col(aes(fill = id)) +
coord_flip() +
geom_text(aes(label = paste0(Val, "%")),
position = position_stack(0.5))
Result:

R: ggplot for hover text

My data (final.df) looks like the following:
A B C Y 1
0 0 0 0 0.05
0 0 1 1 0.03
....
Based on the comment below, here is a ASCII text representation of the dataframe.
structure(list(A = c(502, 541, 542, 543, 544, 545, 4304, 4370,
4371, 4372, 4373, 4442), B = c(4.4, 4.2, 4.4, 4.6, 4.8, 5, 5.2,
4.6, 4.8, 5, 5.2, 5.2), C = c(2.6, 2.8, 2.8, 2.8, 2.8, 2.8, 12.6,
12.8, 12.8, 12.8, 12.8, 13), Y = c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1), `1` = c(0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1), `NA` = c(0,
0, 0, 0, 0, 0, 0, 0, 0.000281600479875937, 0, 0, 0)), .Names = c("A",
"B", "C", "Y", "1", NA), row.names = c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L), class = "data.frame")
To summarize, there are four columns that identify each data point. I am interested in creating two boxplots according to their values in column with name 1. I want to compare the values for points labeled 0 in column 'Y' and labeled 1 in column 'Y'. Finally, I want to be able to hover over the points to retrieve the meta-data, meaning the 'A', 'B', 'C', and '1' value.
p <- ggplot(final.df, aes(x = factor(Y), y =
Y, fill = factor(Y)))
p <- p + geom_boxplot() + geom_point() + xlab("Y") + guides(fill =
guide_legend("Y")) + theme(legend.position="top")
final.p <- ggplotly(p)
The current plot shows me factor(Y) value and the corresponding value in 1. How can I include the meta-data in columns 'A', 'B', 'C'?
We can build a text using paste0 and HTML tag <br><\br> and instructe toolttip to use text.
p <- ggplot(df, aes(x = factor(Y), y = Y,
fill = factor(Y), text=paste('</br>A: ',A,'</br>B: ',B, '</br>1: ',1)))
ggplotly(p,tooltip = c("text"))
Use the tooltip feature of ggplotly. Read about it by typing in help(ggplotly). See Below:
library(tidyverse)
library(plotly)
set.seed(55)
df <- data.frame(
A = c(rep(0, 8), rep(1, 8)),
B = rep(c(rep(0, 4), rep(1, 4)), 2),
C = rep(c(rep(0, 2), rep(1, 2)), 4),
Y = rep(c(0, 1), 8),
X1 = runif(16)
)
p <- ggplot(df, aes(x = factor(Y), y = X1, fill = factor(Y), A = A, B = B, C = C))
p <- p + geom_boxplot() +
geom_point() +
xlab("Y") +
guides(fill = guide_legend("Y")) +
theme(legend.position = "top")
final.p <- ggplotly(p, tooltip = c("A", "B", "C"))
final.p

hist3D 2D plot in background in R

Is it possible to add a 2d plot to a 3D plot in R?
I have the following R code that generates a 3d bar plot.
dt = structure(c(1, 1, 1, 3,
0, 2, 2, 1,
1, 2, 1, 3,
2, 6, 3, 1,
1, 2, 3, 0,
1, 0, 2, 1,
1,2,2,2), .Dim = c(4L, 7L), .Dimnames = list(c("0-50",
"51-60", "61-70", "71-80"
), c("0-50", "51-60", "61-70", "71-80", "81-90", "91-100", "101-Inf")))
m <- matrix(rep(seq(4),each=7), ncol=7, nrow=4, byrow = TRUE)
hist3D(x = 1:4, z = dt, scale = T,bty="g", phi=35,theta=30,border="black",space=0.15,col = jet.col(5, alpha = 0.8), add = F, colvar = m, colkey = F, ticktype = "detailed")
The hist3d call only:
hist3D(x = 1:4, z = dt, scale = T,bty="g", phi=35,theta=30, border="black", space=0.15,col = jet.col(5, alpha = 0.8), add = F, colvar = m, colkey = F, ticktype = "detailed")
This generates the following 3d plot:
What I'm looking for is being able to add a plot in the position where the grey grid is. Is it possible?
Thanks!
As far as I know, there isn't a good function to make a barplot in RGL. I suggest a manual method.
dt = structure(c(1, 1, 1, 3,
0, 2, 2, 1,
1, 2, 1, 3,
2, 6, 3, 1,
1, 2, 3, 0,
1, 0, 2, 1,
1,2,2,2), .Dim = c(4L, 7L), .Dimnames = list(c("0-50",
"51-60", "61-70", "71-80"
), c("0-50", "51-60", "61-70", "71-80", "81-90", "91-100", "101-Inf")))
Making 3D barplot in RGL
library(rgl)
# make dt xyz coordinates data
dt2 <- cbind( expand.grid(x = 1:4, y = 1:7), z = c(dt) )
# define each bar's width and depth
bar_w <- 1 * 0.85
bar_d <- 1 * 0.85
# make a base bar (center of undersurface is c(0,0,0), width = bar_w, depth = bar_d, height = 1)
base <- translate3d( scale3d( cube3d(), bar_w/2, bar_d/2, 1/2 ), 0, 0, 1/2 )
# make each bar data and integrate them
bar.list <- shapelist3d(
apply(dt2, 1, function(a) translate3d(scale3d(base, 1, 1, a[3]), a[1], a[2], 0)),
plot=F)
# set colors
for(i in seq_len(nrow(dt2))) bar.list[[i]]$material$col <- rep(jet.col(5)[c(1:3,5)], 7)[i]
open3d()
plot3d(0,0,0, type="n", xlab="x", ylab="y", zlab="z", box=F,
xlim=c(0.5, 4.5), ylim=c(0.5, 7.5), zlim=c(0, 6.2), aspect=T, expand=1)
shade3d(bar.list, alpha=0.8)
wire3d(bar.list, col="black")
light3d(ambient="black", diffuse="gray30", specular="black") # light up a little
Add a 2d plot
# show2d() uses 2d plot function's output as a texture
# If you need the same coordinates of 2d and 3d, plot3d(expand=1) and show2d(expand=1),
# the same xlims, equivalent plot3d(zlim) to 2d plot's ylim, 2d plot(xaxs="i", yaxs="i") are needed.
show2d({
par(mar = c(0,0,0,0))
barplot(c(3,4,5,6), yaxs="i", ylim=c(0, 6.2))
},
expand = 1 , face = "y+", texmipmap = F) # texmipmap=F makes tone clear

Resources