Radial Visualization coordinate with ggplot2 - r

I'm trying to obtain a radial coordinate plot using ggplot2 and the coord_polar(). I have use the drep package from R and obtain a radviz2d(), see NotMyRadViz. Since I desire to do it in ggplot I extracted all the necessary functions. Also I wish to obtain this using the coord_polar() and when I do it my variables rotate MyRadViz2D. Does anyone have an idea how to fix this?
library("dprep")
radviz2d(iris,name="Iris")
#-------------------- Extracted from mmnorm.R
n <- dim(iris)[1]
p <- dim(iris)[2]
classes <- iris[,p]
varnames <- colnames(iris)
dataset <- as.matrix(mmnorm(iris))
dataset <- dataset[,-p]
sumrows <- rowSums(dataset)
columns <- seq(0,p-2)
angles <- (2*pi*columns)
angles <- angles/(p-1)
cosines <- cos(angles)
sines <- sin(angles)
proj.x <- (dataset%*%cosines)
proj.x <- proj.x/sumrows
proj.y <- (dataset%*%sines)
proj.y <- proj.y/sumrows
#--------------- What I'd been doing
library("ggplot2")
library("reshape2")
dataset2 <- melt(dataset/sumrows)
classnumbers <- 1:length(unique(classes))
classes <- as.numeric(classes,drop=TRUE)
Projxy <- data.frame(Projx = proj.x, Projy = proj.y, Ids = classes)
Cos_Sin <- data.frame(Angle = angles,Cos=cosines,
Sin = sines, Varnames = varnames[-p])
theta <- angles * 180/pi
d2 <- data.frame(theta,Varnames=varnames[-p])
#------------ My ggplot2
ggplot(data=dataset2,aes(x = Var2, y = value)) + geom_blank() +
ylab("") + xlab("") + ggtitle(paste("RadViz 2D for", "Iris")) +
coord_polar("y",start = pi/2) +
geom_point(data = Projxy,aes(x = Projx, y = Projy,col = factor(Ids))) +
coord_polar() + theme(legend.position = "bottom")

Related

Add jitterred points to a ggnetwork plot

I have a graph of vertices and edges which I'd like to plot using a fruchtermanreingold layout.
Here's the graph edges matrix:
edge.mat <- matrix(as.numeric(strsplit("3651,0,0,1,0,0,0,0,2,0,11,2,0,0,0,300,0,1,0,0,66,0,78,9,0,0,0,0,0,0,11690,0,1,0,0,0,0,0,0,0,0,493,1,1,0,4288,5,0,0,36,0,9,7,3,0,6,1,0,1,7,490,0,0,0,6,0,0,628,6,12,0,0,0,0,0,641,0,0,4,0,0,0,0,0,0,66,0,0,0,0,3165,0,281,0,0,0,0,0,0,0,0,45,1,0,0,35248,0,1698,2,0,1,0,2,99,0,0,6,29,286,0,31987,0,1,10,0,8,0,16,0,21,1,0,0,1718,0,51234,0,0,17,3,12,0,0,7,0,0,0,1,0,2,16736,0,0,0,3,0,0,4,630,0,0,0,9,0,0,29495,53,6,0,0,0,0,5,0,0,0,0,3,0,19,186,0,0,0,482,8,12,0,1,0,7,1,0,6,0,26338",
split = ",")[[1]]),
nrow = 14,
dimnames = list(LETTERS[1:14], LETTERS[1:14]))
I then create an igraph object from that using:
gr <- igraph::graph_from_adjacency_matrix(edge.mat, mode="undirected", weighted=T, diag=F)
And then use ggnetwork to convert gr to a data.frame, with specified vertex colors:
set.seed(1)
gr.df <- ggnetwork::ggnetwork(gr,
layout="fruchtermanreingold",
weights="weight",
niter=50000,
arrow.gap=0)
And then I plot it using ggplot2 and ggnetwork:
vertex.colors <- strsplit("#00BE6B,#DC2D00,#F57962,#EE8044,#A6A400,#62B200,#FF6C91,#F77769,#EA8332,#DA8E00,#C59900,#00ACFC,#C49A00,#DC8D00",
split=",")[[1]]
library(ggplot2)
library(ggnetwork)
ggplot(gr.df, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(color = "gray", aes(size = weight)) +
geom_nodes(color = "black")+
geom_nodelabel(aes(label = vertex.names),
color = vertex.colors, fontface = "bold")+
theme_minimal() +
theme(axis.text=element_blank(),
axis.title=element_blank(),
legend.position="none")
In my case each vertex actually represents many points, where each vertex has a different number of points. Adding that information to gr.df:
gr.df$n <- NA
gr.df$n[which(is.na(gr.df$weight))] <- as.integer(runif(length(which(is.na(gr.df$weight))), 100, 500))
What I'd like to do is add to the plot gr.df$n radially jittered points around each vertex (i.e., with its corresponding n), with the same vertex.colors coding. Any idea how to do that?
I think sampling and then plotting with geom_point is a reasonable strategy. (otherwise you could create your own geom).
Here is some rough code, starting from the relevant bit of your question
gr.df$n <- 1
gr.df$n[which(is.na(gr.df$weight))] <- as.integer(runif(length(which(is.na(gr.df$weight))), 100, 500))
# function to sample
# https://stackoverflow.com/questions/5837572/generate-a-random-point-within-a-circle-uniformly
circSamp <- function(x, y, R=0.1){
n <- length(x)
A <- a <- runif(n,0,1)
b <- runif(n,0,1)
ind <- b < a
a[ind] <- b[ind]
b[ind] <- A[ind]
xn = x+b*R*cos(2*pi*a/b)
yn = y+b*R*sin(2*pi*a/b)
cbind(x=xn, y=yn)
}
# sample
d <- with(gr.df, data.frame(vertex.names=rep(vertex.names, n),
circSamp(rep(x,n), rep(y,n))))
# p is your plot
p + geom_point(data=d, aes(x, y, color = vertex.names),
alpha=0.1, inherit.aes = FALSE) +
scale_color_manual(values = vertex.colors)
Giving

gam plots with ggplot

I need to create some gam plots in ggplot. I can do them with the general plot function, but am unsure how to do with ggplot. Here is my code and plots with the regular plot function. I'm using the College data set from the ISLR package.
train.2 <- sample(dim(College)[1],2*dim(College)[1]/3)
train.college <- College[train.2,]
test.college <- College[-train.2,]
gam.college <- gam(Outstate~Private+s(Room.Board)+s(Personal)+s(PhD)+s(perc.alumni)+s(Expend)+s(Grad.Rate), data=train.college)
par(mfrow=c(2,2))
plot(gam.college, se=TRUE,col="blue")
See update below old answer.
Old answer:
There is an implementation of GAM plotting using ggplot2 in voxel library. Here is how you would go about it:
library(ISLR)
library(mgcv)
library(voxel)
library(tidyverse)
library(gridExtra)
data(College)
set.seed(1)
train.2 <- sample(dim(College)[1],2*dim(College)[1]/3)
train.college <- College[train.2,]
test.college <- College[-train.2,]
gam.college <- gam(Outstate~Private+s(Room.Board)+s(Personal)+s(PhD)+s(perc.alumni)+s(Expend)+s(Grad.Rate), data=train.college)
vars <- c("Room.Board", "Personal", "PhD", "perc.alumni","Expend", "Grad.Rate")
map(vars, function(x){
p <- plotGAM(gam.college, smooth.cov = x) #plot customization goes here
g <- ggplotGrob(p)
}) %>%
{grid.arrange(grobs = (.), ncol = 2, nrow = 3)}
after a bunch of errors: In plotGAM(gam.college, smooth.cov = x) :
There are one or more factors in the model fit, please consider plotting by group since plot might be unprecise
To compare to the plot.gam:
par(mfrow=c(2,3))
plot(gam.college, se=TRUE,col="blue")
You might also want to plot the observed values:
map(vars, function(x){
p <- plotGAM(gam.college, smooth.cov = x) +
geom_point(data = train.college, aes_string(y = "Outstate", x = x ), alpha = 0.2) +
geom_rug(data = train.college, aes_string(y = "Outstate", x = x ), alpha = 0.2)
g <- ggplotGrob(p)
}) %>%
{grid.arrange(grobs = (.), ncol = 3, nrow = 2)}
or per group (especially important if you used the by argument (interaction in gam).
map(vars, function(x){
p <- plotGAM(gam.college, smooth.cov = x, groupCovs = "Private") +
geom_point(data = train.college, aes_string(y = "Outstate", x = x, color= "Private"), alpha = 0.2) +
geom_rug(data = train.college, aes_string(y = "Outstate", x = x, color= "Private" ), alpha = 0.2) +
scale_color_manual("Private", values = c("#868686FF", "#0073C2FF")) +
theme(legend.position="none")
g <- ggplotGrob(p)
}) %>%
{grid.arrange(grobs = (.), ncol = 3, nrow = 2)}
Update, 08. Jan. 2020.
I currently think the package mgcViz offers superior functionality compared to the voxel::plotGAMfunction. An example using the above data set and models:
library(mgcViz)
viz <- getViz(gam.college)
print(plot(viz, allTerms = T), pages = 1)
plot customization is similar go ggplot2 syntax:
trt <- plot(viz, allTerms = T) +
l_points() +
l_fitLine(linetype = 1) +
l_ciLine(linetype = 3) +
l_ciBar() +
l_rug() +
theme_grey()
print(trt, pages = 1)
This vignette shows many more examples.

A ggplot2 equivalent of the lines() function in basic plot

For reasons I won't go into I need to plot a vertical normal curve on a blank ggplot2 graph. The following code gets it done as a series of points with x,y coordinates
dfBlank <- data.frame()
g <- ggplot(dfBlank) + xlim(0.58,1) + ylim(-0.2,113.2)
hdiLo <- 31.88
hdiHi <- 73.43
yComb <- seq(hdiLo, hdiHi, length = 75)
xVals <- 0.79 - (0.06*dnorm(yComb, 52.65, 10.67))/0.05
dfVertCurve <- data.frame(x = xVals, y = yComb)
g + geom_point(data = dfVertCurve, aes(x = x, y = y), size = 0.01)
The curve is clearly discernible but is a series of points. The lines() function in basic plot would turn these points into a smooth line.
Is there a ggplot2 equivalent?
I see two different ways to do it.
geom_segment
The first uses geom_segment to 'link' each point with its next one.
hdiLo <- 31.88
hdiHi <- 73.43
yComb <- seq(hdiLo, hdiHi, length = 75)
xVals <- 0.79 - (0.06*dnorm(yComb, 52.65, 10.67))/0.05
dfVertCurve <- data.frame(x = xVals, y = yComb)
library(ggplot2)
ggplot() +
xlim(0.58, 1) +
ylim(-0.2, 113.2) +
geom_segment(data = dfVertCurve, aes(x = x, xend = dplyr::lead(x), y = y, yend = dplyr::lead(y)), size = 0.01)
#> Warning: Removed 1 rows containing missing values (geom_segment).
As you can see it just link the points you created. The last point does not have a next one, so the last segment is removed (See the warning)
stat_function
The second one, which I think is better and more ggplotish, utilize stat_function().
library(ggplot2)
f = function(x) .79 - (.06 * dnorm(x, 52.65, 10.67)) / .05
hdiLo <- 31.88
hdiHi <- 73.43
yComb <- seq(hdiLo, hdiHi, length = 75)
ggplot() +
xlim(-0.2, 113.2) +
ylim(0.58, 1) +
stat_function(data = data.frame(yComb), fun = f) +
coord_flip()
This build a proper function (y = f(x)), plot it. Note that it is build on the X axis and then flipped. Because of this the xlim and ylim are inverted.

Cumulative Density Plots with ggplot and plotly

When we take the following example from ggplot2 docs
df <- data.frame(x = c(rnorm(100, 0, 3), rnorm(100, 0, 10)),
g = gl(2, 100))
library(ggplot2)
p <- ggplot(df, aes(x, colour = g)) +
stat_ecdf(geom = "step", na.rm = T) + # interchange point and step
theme_bw()
p
We can create a standard cdf plot. Now if we want to play with the plot in plotly, I obtain a very confusing image when I use the step command. See below. However, when I use the point command plotly behaves like it should. What is happening with the step command? Why can't I recreate the image from using ggplot only?
library(plotly)
ggplotly(p)
I found the solution here https://community.plotly.com/t/bug-with-ggplot2-stat-ecdf-function/1187/3.
You should reorder the dataframe along x.
df <- dplyr::arrange(df, x)
library(ggplot2)
p <- ggplot(df, aes(x, colour = g)) +
stat_ecdf(geom = "step", na.rm = T) +
theme_bw()
p
library(plotly)
ggplotly(p)
This can be solved using ecdf() function.
## ecdf function to get y and 1-y
rcdf <- function (x) {
cdf <- ecdf(x)
y1 <- cdf(x)
y <- unique(y1)
# xrcdf <- 1-y ## to get reverse cdf
xrcdf <- y ## to get cdf
}
ug <- unique(df$g)
ng <- length(ug)
xll <- min(df$x)
xul <- max(df$x)
adr <- data.frame(myxx=c(), myyy=c(), mygg=c())
lapply(1:ng, function(i){
ad2r <- subset(df, g==ug[i])
myx1 <- unique(ad2r$x)
myxx <- c(xll,myx1,xul) ## add lowest value - dummy to assign 100%
myy1 <- rcdf(ad2r$x)
# myyy <- c(1.0,myy1,0.0) ## add 100% to get reverse cdf
myyy <- c(0.0,myy1,1.0) ## add 0% to get cdf
mygg <- ug[i]
ad2rf <- data.frame(myxx,myyy,mygg)
adr <<- rbind(adr,ad2rf)
})
adf <- adr[order(adr$myxx),]
pp <- ggplot(data=adf,
aes_(x=adf$myxx, y=100*adf$myyy, col=adf$mygg, group=adf$mygg)) +
geom_step() +
labs(title="CDF", y = "Y", x = "X", col=NULL)
ppp <- ggplotly(pp, tooltip=c("x","y"))
ppp
This gives the following output:
CDF

geom_raster faceted plot with ggplot2: control row height

In the example below I have a dataset containing two experiments F1 and F2. A classification is performed based on F1 signal, and both F1 and F2 values are ordered accordingly. In this diagram, each facet has the same dimension although the number of rows is not the same (e.g class #7 contains only few elements compare to the other classes). I would like to modify the code to force row height to be the same across facets (facets would thus have various blank space below). Any hints would be greatly appreciated.
Thank you
library(ggplot2)
library(reshape2)
set.seed(123)
# let's create a fake dataset
nb.experiment <- 4
n.row <- 200
n.col <- 5
nb.class <- 7
d <- matrix(round(runif(n.row * n.col),2), nc=n.col)
colnames(d) <- sprintf("%02d", 1:5)
# These strings will be the row names of each heatmap
# in the subsequent facet plot
elements <- sample(replicate(n.row/2, rawToChar(as.raw(sample(65:90, 6, replace=T)))))
# let's create a data.frame d
d <- data.frame(d,
experiment = sort(rep(c("F1","F2"), n.row/2)),
elements= elements)
# Now we split the dataset by experiments
d.split <- split(d, d$experiment)
# Now we create classes (here using hierarchical clustering )
# based on F1 experiment
dist.mat <- as.dist(1-cor(t(d.split$F1[,1:5]), method="pearson"))
hc <- hclust(dist.mat)
cuts <- cutree(hc, nb.class)
levels(cuts) <- sprintf("Class %02d", 1:nb.experiment)
# We split F1 and F2 based on classification result
for(s in names(d.split)){
d.split[[s]] <- split(d.split[[s]], cuts)
}
# Data are melt (their is perhaps a better solution...)
# in order to use the ggplot function
dm <- melt(do.call('rbind',lapply(d.split, melt)), id.var=c( "experiment", "elements", "variable", "L1"))
dm <- dm[, -5]
colnames(dm) <- c("experiment","elements", "pos", "class", "exprs")
dm$class <- as.factor(dm$class)
levels(dm$class) <- paste("Class", levels(dm$class))
# Now we plot the data
p <- ggplot(dm, aes(x = pos, y = elements, fill = exprs))
p <- p + geom_raster()
p <- p + facet_wrap(~class +experiment , scales = "free", ncol = 2)
p <- p + theme_bw()
p <- p + theme(text = element_text(size=4))
p <- p + theme(text = element_text(family = "mono", face = "bold"))
print(p)
Use facet_grid instead of facet_wrap and set the space attribute:
ggplot(dm, aes(x = pos, y = elements, fill = exprs)) +
geom_raster() +
facet_grid(rowMeanClass ~ experiment , scales = "free", space = "free_y") +
theme_bw()

Resources