R plot 2D surface of a matrix of numbers - r

I am currently trying, given a n*p matrix of numbers, to plot a graph with n*p squares, each square having a colour depending of the number in the matrix.
The matrix is defined as follow:
ll <- list(c(1,3,4,3,6,5,8),c(1,1,4,5,7,6,8),c(1,3,1,1,3,4,8),c(2,1,1,2,1,3,5))
mm <- do.call(rbind,ll)
In a very general way, I would like to define colors for group of numbers.
For example:
Yellow for the group {1,2}
Orange for the group {3,4,5}
Red for the gorup of numbers {6,7,8}
And then "plot" the matrix. Like the colorfull matplotlib picture on this link:
http://activeintelligence.org/blog/archive/matplotlib-sparse-matrix-plot/
I really have no clue how to do it, and any point of view would be greatly appreciated!

cc <- mm # make copy to modify
cc[] <- findInterval(cc, c(0, 2.5, 5.5, 8.5 ) ) # values 1:3
cc
image(seq(dim(cc)[1]), seq(dim(cc)[2]), cc, col=c("yellow","orange","red"))
The values in the cc -matrix will pull from the color vector.

I suppose that the position of the points are defined by the n and p ranks. You could handle this with ggplot2 and reshape2.
ll <- list(c(1,3,4,3,6,5,8),c(1,1,4,5,7,6,8),c(1,3,1,1,3,4,8),c(2,1,1,2,1,3,5))
mm <- do.call(rbind,ll)
rownames(mm) = 1:nrow(mm)
colnames(mm) = 1:ncol(mm)
library(reshape2)
library(ggplot2)
mm_long = melt(mm)
colnames(mm_long) = c("x", "y", "group")
mm_long$colour_group = NA
mm_long$colour_group[mm_long$group %in% c(1,2)] = 1
mm_long$colour_group[mm_long$group %in% c(3,4,5)] = 2
mm_long$colour_group[mm_long$group %in% c(6,7,8)] = 3
mm_long$group = factor(mm_long$group)
mm_long$colour_group = factor(mm_long$colour_group)
ggplot(mm_long, aes(x=x, y=y)) +
geom_point(aes(colour=colour_group), shape=15, size=10) +
scale_colour_manual(values = c("yellow","orange", "red"))

Basically following suggestions from #MrFlick & #BondedDust:
cols=c(rep("yellow", 2), rep("orange", 3), rep("red", 3))
image(1:ncol(mm), 1:nrow(mm), t(mm), col=cols, breaks=c(0:length(cols))+0.5, xlab="", ylab="")
or
heatmap(mm, col=cols, breaks=c(0:length(cols))+0.5, Colv=NA, Rowv=NA, scale="none")

Related

How to calculate the overlap between 2 dataset distribution

Hi How can calculate the overlapping area between 2 columns ( or 2 subsets of a column) in R.
Please see the example data below:
set.seed(1234)
df <- data.frame(
Data=factor(rep(c("D1", "D2"), each=200)),
weight=round(c(rnorm(200, mean=55, sd=5),
rnorm(200, mean=65, sd=5)))
)
library(ggplot2)
plot <- ggplot(df, aes(weight,fill = Data))+
geom_density()
plot
This results in the below plot. I am wondering, how to color the overlapping area and calculate the overlapping coefficient (OVL) similar to what is done here Using Monte Carlo Integration?
Please note that the link (and example above) provided uses parametric distribution while I am asking if I have a dataset of observed values.
I normally find it easier to work directly with the densities and plot them as geom_area. If you get the x-axis sampling points to match on the two distributions you can find the overlap area using pmin, and the sum of its values divided by the sum of the values for the two curves should give you the proportion of the total area that is overlapped.
d1dens <- with(df, density(weight[Data == "D1"],
from = min(weight),
to = max(weight)))
d2dens <- with(df, density(weight[Data == "D2"],
from = min(weight),
to = max(weight)))
joint <- pmin(d1dens$y, d2dens$y)
df2 <- data.frame(x = rep(d1dens$x, 3),
y = c(d1dens$y, d2dens$y, joint),
Data = rep(c("D1", "D2", "overlap"), each = length(d1dens$x)))
ggplot(df2, aes(x, y, fill = Data)) +
geom_area(position = position_identity(), color = "black") +
scale_fill_brewer(palette = "Pastel2") +
theme_bw()
sum(joint) / sum(d1dens$y, d2dens$y)
#> [1] 0.1480701

Position geom_label on the outside of a network using ggplot?

I am creating a network style plot using ggnet and ggplot. At the moment im just using geom_label's nudge_y argument to position the labels. But I was wondering if it's possible to position the labels so they are always on the outside of the circle (my network is always circular). A toy example is shown below.
library(ggplot2)
library(igraph)
library(GGally) # contains ggnet2
nam <- c("A", "B", "C", "D", "E") # Node name
g <- sample_pa(5, m = 5) # generate graph with x nodes
g <- igraph::as_data_frame(g) # create df
g <- rbind(g$to,g$from) # create matrix
net.bg <- make_graph(g, 5, directed = FALSE) #make graph
E(net.bg)$weight <- sample(1:3, 5,replace=T)
V(net.bg)$size <- sample(1:5, 5,replace=T)
p <- ggnet2(net.bg,
mode = "circle",
size = V(net.bg)$size,
node.color = "red",
edge.size = E(net.bg)$weight,
edge.alpha = 0.5,
edge.color = "blue") +
theme(legend.text = element_text(size = 10)) +
geom_label(aes(label = nam),nudge_y = 0.05)
p
The above code produces something like this:
As can be seen, the labels are all nudged in the y direction. But I was hoping to make something like this (which I made in powerpoint):
Is it possible to do such a thing?
It is possible, though not particularly easy or portable. The object p is a ggplot object, so contains all the information required to build the plot in terms of co-ordinates, geoms, mapping, data, etc.
This means you can directly change the labels layer so that its x, y co-ordinates are a small multiple above their previous values. So you could do:
geoms <- sapply(p$layers, function(x) class(x$geom)[1])
segments <- p$layers[[which(geoms == "GeomSegment")]]
labels <- p$layers[[which(geoms == "GeomLabel")]]
segments$data <- segments$data - 0.5
p$data$x <- p$data$x - 0.5
p$data$y <- p$data$y - 0.5
labels$position$y <- 0
labels$data <- p$data
labels$data$x <- labels$data$x * 1.1
labels$data$y <- labels$data$y * 1.1
p$scales$scales <- lapply(p$scales$scales, function(x) {
if(class(x)[1] == "ScaleContinuousPosition") ScaleContinuousPosition else x })
p <- p + theme(axis.text = element_blank())
p

colored points in R

I have a table with 3 numeric columnes. Two of them are coordinates and the third one means color. There are hundreds of rows in my text file.
I want to make a picture, where to first numbers mean coordinates of each point and the third one is the color of the point. The bigger number - the darker point.
How could i do this?
The example of the row in my file:
99.421875 48.921875 0.000362286050144
Will this do?
require(ggplot2)
# assuming your data is in df and x,y, and col are the column names.
ggplot(data = df, aes(x = x, y = y)) +
geom_point(colour="red", size = 3, aes(alpha=col))
# sample data
set.seed(45)
df <- data.frame(x=runif(100)*sample(1:10, 100, replace=T),
y= runif(100*sample(1:50, 100, replace=T)),
col=runif(100/sample(1:100)))
Plot:
A lattice solution:
library(lattice)
mydata <- matrix(c(1,2,3,1,1,1,2,5,10),nrow=3)
xyplot(mydata[,2] ~ mydata[,1], col = mydata[,3], pch= 19 ,
alpha = (mydata[,3]/10), cex = 15)
alpha here controls the transparency.
Here is a base R solution:
##Generate data
##Here z lies between 0 and 10
dd = data.frame(x = runif(100), y= runif(100), z= runif(100, 0, 10))
First normalise z:
dd$z = dd$z- min(dd$z)
dd$z = dd$z/max(dd$z)
Then plot as normal using the size of z for the shading:
##See ?gray for other colour combinations
##pch=19 gives solid points. See ?point for other shapes
plot(dd$x, dd$y, col=gray(dd$z), pch=19)
Another solution using base... to change the colour, you can replace some of data[,3] to 0 inside the rgb()
n <- 1000
data <- data.frame(x=runif(n),y=runif(n),col=runif(n))
plot(data[,1:2],col=rgb(data[,3],data[,3],data[,3],maxColorValue = max(data[,3])),pch=20)

How do I plot the following in R?

I'm new to plotting in R so I ask for your help. Say I have the following matrix.
mat1 <- matrix(seq(1:6), 3)
dimnames(mat1)[[2]] <- c("x", "y")
dimnames(mat1)[[1]] <- c("a", "b", "c")
mat1
x y
a 1 4
b 2 5
c 3 6
I want to plot this, where the x-axis contains each rowname (a, b, c) and the y-axis is the value of each rowname (a = 1 and 4, b = 2 and 5, c = 3 and 6). Any help would be appreciated!
| o
| o x
| o x
| x
|_______
a b c
Here's one way using base graphics:
plot(c(1,3),range(mat1),type = "n",xaxt ="n")
points(1:3,mat1[,2])
points(1:3,mat1[,1],pch = "x")
axis(1,at = 1:3,labels = rownames(mat1))
Edited to include different plotting symbol
matplot() was designed for data in just this format:
matplot(y = mat1, pch = c(4,1), col = "black", xaxt ="n",
xlab = "x-axis", ylab = "y-axis")
axis(1, at = 1:nrow(mat1), labels = rownames(mat1)) ## Thanks, Joran
And finally, a lattice solution
library(lattice)
dfmat <- as.data.frame(mat1)
xyplot( x + y ~ factor(rownames(dfmat)), data=dfmat, pch=c(4,1), cex=2)
You could do it in base graphics, but if you're going to use R for much more than this I think it is worth getting to know the ggplot2 package. Note that ggplot2 only takes data frames - but then, it is often more useful to keep your data in data frames rather than matrices.
d <- as.data.frame(mat1) #convert to a data frame
d$cat <- rownames(d) #add the 'cat' column
dm <- melt(d, id.vars)
dm #look at dm to get an idea of what melt is doing
require(ggplot2)
ggplot(dm, aes(x=cat, y=value, shape=variable)) #define the data the plot will use, and the 'aesthetics' (i.e., how the data are mapped to visible space)
+ geom_point() #represent the data with points

heatmap-like plot, but for categorical variables

I have three factors (set1, set2, and set3) for each of about 50 individuals. The values for set1, set2, and set3 are "A","B","C". I'd like to make a heatmap-like plot of these data but have the legend show the color associated with the values (eg., A='red', B='blue', C='black'). Any suggestions?
Thanks.
I decided it would be easist to approach this with ggplot2 (for me anyway):
#recreate a data set
dat <- data.frame(person=factor(paste0("id#", 1:50),
levels =rev(paste0("id#", 1:50))), matrix(sample(LETTERS[1:3], 150, T), ncol = 3))
library(ggplot2); library(reshape2)
dat3 <- melt(dat, id.var = 'person')
ggplot(dat3, aes(variable, person)) + geom_tile(aes(fill = value),
colour = "white") + scale_fill_manual(values=c("red", "blue", "black"))
A similar plot can also be made with base graphics. Here is one method using the base image function. This sample has a categorical response rather than a numeric one.
dx <- data.frame( Tasks = c('1','2','3','4'),
Phase1 = c('Done','Done','Done','WIP'),
Phase2 = c('WIP','Done','Done',''),
Phase3 = c('','WIP','Done',''))
ff<-factor(as.matrix(dx[,2:4]),
levels=c("Done","WIP",""),
labels=c("done","wip","-empty-")
)
fx<-matrix(as.numeric(ff), ncol=ncol(dx)-1)
#use labels to assign colors
col<-c(done="darkgreen",wip="orange","-empty-"="black")
imgflip<-function(x) {t(x[nrow(x):1,])}
image(imgflip(fx),
breaks=(1:(nlevels(ff)+1))-.5,
col=col[levels(ff)],
xaxt="n", yaxt="n"
)
axis(2, at=seq(0,1,length.out=nrow(dx)), labels=rev(paste("Task",dx$Tasks)), las=2)
axis(3, at=seq(0,1,length.out=length(names(dx))-1), labels=names(dx)[-1])
which will produce this picture.

Resources