Interactive correlation matrix - r

I want to plot correlation matrix in such a sense that it should be interactive and show both, correlation values and corresponding p-values in ggplot2 or plot_ly.
I have tried the following code.
library(ggplot2)
library(plotly)
library(reshape)
library(Hmisc)
x <- Seatbelts
y <- as.matrix(x)
rt <- rcorr(y)
mtlr <- melt(rt$r)
mtlp <- melt(rt$P)
gx <- ggplot(mtl, aes(X1, X2, fill = value)) + geom_tile() +
scale_fill_gradient(low = "cyan", high = "red")
ggplotly(gx)
I want to show "mtlp" value in the same plot.
Can anyone help?

You could add your p-values as a label or text which will then appear in your hovertext.
library(ggplot2)
library(plotly)
library(reshape)
library(Hmisc)
x <- Seatbelts
y <- as.matrix(x)
rt <- rcorr(y)
mtlr <- melt(rt$r)
mtlp <- melt(rt$P)
p.value <- mtlp$value
gx <- ggplot(mtlr, aes(X1, X2, fill = value, label=p.value)) + geom_tile() +
scale_fill_gradient(low = "cyan", high = "red")
ggplotly(gx)

The easiest way is to merge the two melted data.frames back together, then use the text aesthetic to add to the tooltip.
I re-did the melt using reshape2 (instead of reshape) and set value names to make the merge easier
mtlr <- melt(rt$r, value.name = "Correlation")
mtlp <- melt(rt$P, value.name = "P-Value")
mtl <-
merge(mtlr, mtlp)
Then, make the plot -- note that you can use anything you want in the text aesthetic, and I often use it to change the layout/presentation of the tooltip (and then only display the text tooltip)
gx <-
ggplot(mtl
, aes(Var1, Var2
, fill = Correlation
, text = paste("P-val = ", round(`P-Value`, 4)))) +
geom_tile() +
scale_fill_gradient(low = "cyan", high = "red")
ggplotly(gx)

Related

Presentation of two curves in ggplot2

Is there a more efficient way to present these data in ggplot2? Ideally, I would like them both in one plot. I know this can be achieved in python with matlibplot, but I like the visuals of ggplot2 better.
R code used to generate the plots:
#load libraries
library(ggplot2)
library (gridExtra)
library(scales)
#generate some data plot 1
var_iter <- c(seq(0, 4000, 20))
x <- runif(201,0.877813, 2.283210)
var_loss <- c(sort(x, decreasing = TRUE))
rndm1 <- data.frame(var_iter, var_loss)
#generate some data plot 2
var_iter2 <- c(seq(0, 3500, 500))
x2 <- runif(8,0.1821, 0.6675)
var_acc <- c(sort(x2, decreasing = FALSE))
rndm2 <- data.frame(var_iter2, var_acc)
#plot loss
c <- ggplot(data=rndm1, aes(x=var_iter, y=var_loss)) + geom_line(aes(colour="Log Loss")) +
scale_colour_manual(name='', values=c('Log Loss'='#00BFC4')) + #theme_bw() +
xlab("iterations") + ylab("log loss") + theme(legend.position=c(1,1),legend.justification=c(1,1),
legend.direction="horizontal",
legend.box="horizontal",
legend.box.just = c("top"),
legend.background = element_rect(fill=alpha('white', 0.3)))
#plot accuracy
d <- ggplot(data=rndm2, aes(x=var_iter2, y=var_acc)) + geom_line(aes(colour="Accuracy")) +
scale_colour_manual(name='', values=c('Accuracy'='#F8766D')) + #theme_bw() +
xlab("iterations") + ylab("accuracy") + theme(legend.position=c(0.80, 1),legend.justification=c(1,1),
legend.direction="horizontal",
legend.box="horizontal",
legend.box.just = c("top"),
legend.background = element_rect(fill=alpha('white', 0.3)))
grid.arrange(c, d, ncol=2)
You still can use the same concept of adding a layer on another layer.
ggplot(rndm1, aes(x=var_iter)) +
geom_line(aes(y=var_loss, color="var_loss")) +
geom_line(data=rndm2, aes(x=var_iter2, y=var_acc, color="var_acc"))
Or combine two data frame together and create another variable for color.
# Change the column name, so they can combine together
names(rndm1) <- c("x", "y")
names(rndm2) <- c("x", "y")
rndm <- rbind(rndm1, rndm2)
# Create a variable for color
rndm$group <- rep(c("Log Loss", "Accuracy"), c(dim(rndm1)[1], dim(rndm2)[1]))
ggplot(rndm, aes(x=x, y=y, color=group)) + geom_line()
I wanted to suggest the same idea as the JasonWang, but he was faster. I think it is the way to go (hence I upvoted it myself).
ggplot2 doesn't allow two y axis, for a reason: Plot with 2 y axes, one y axis on the left, and another y axis on the right
It is misleading.
But if you still want to do it. You can do it with base plot or dygraphs (for example):
rndm2$var_iter <- rndm2$var_iter2
rndm2$var_iter2 <- NULL
merged.rndm <- merge(rndm1, rndm2, all = TRUE)
dygraph(merged.rndm) %>% dySeries("var_acc", axis = "y2")
But this will give you points for var_acc, as it has a lot less observations.
You could fill it.
merged.rndm1 <- as.data.frame(zoo::na.approx(merged.rndm))
dygraph(merged.rndm1) %>% dySeries("var_acc", axis = "y2")
Note: this has approximated values, which might not be something you want to do.

How to plot three point lines using ggplot2 instead of the default plot in R

I have three matrix and I want to plot the graph using ggplot2. I have the data below.
library(cluster)
require(ggplot2)
require(scales)
require(reshape2)
data(ruspini)
x <- as.matrix(ruspini[-1])
w <- matrix(W[4,])
df <- melt(data.frame(max_Wmk, min_Wmk, w, my_time = 1:10), id.var = 'my_time')
ggplot(df, aes(colour = variable, x = my_time, y = value)) +
geom_point(size = 3) +
geom_line() +
scale_y_continuous(labels = comma) +
theme_minimal()
I want to add the three plots into one plot using a beautiful ggplot2.
Moreover, I want to make the points with different values have different colors.
I'm not quite sure what you're after, here's a guess
Your data...
max <- c(175523.9, 33026.97, 21823.36, 12607.78, 9577.648, 9474.148, 4553.296, 3876.221, 2646.405, 2295.504)
min <- c(175523.9, 33026.97, 13098.45, 5246.146, 3251.847, 2282.869, 1695.64, 1204.969, 852.1595, 653.7845)
w <- c(175523.947, 33026.971, 21823.364, 5246.146, 3354.839, 2767.610, 2748.689, 1593.822, 1101.469, 1850.013)
Slight modification to your base plot code to make it work...
plot(1:10,max,type='b',xlab='Number',ylab='groups',col=3)
points(1:10,min,type='b', col=2)
points(1:10,w,type='b',col=1)
Is this what you meant?
If you want to reproduce this with ggplot2, you might do something like this...
# ggplot likes a long table, rather than a wide one, so reshape the data, and add the 'time' variable explicitly (ie. my_time = 1:10)
require(reshape2)
df <- melt(data.frame(max, min, w, my_time = 1:10), id.var = 'my_time')
# now plot, with some minor customisations...
require(ggplot2); require(scales)
ggplot(df, aes(colour = variable, x = my_time, y = value)) +
geom_point(size = 3) +
geom_line() +
scale_y_continuous(labels = comma) +
theme_minimal()
UPDATE after the question was edited and the example data changed, here's an edit to suit the new example data:
Here's your example data (there's scope for simplification and speed gains here, but that's another question):
library(cluster)
require(ggplot2)
require(scales)
require(reshape2)
data(ruspini)
x <- as.matrix(ruspini[-1])
wss <- NULL
W=matrix(data=NA,ncol=10,nrow=100)
for(j in 1:100){
k=10
for(i in 1: k){
wss[i]=kmeans(x,i)$tot.withinss
}
W[j,]=as.matrix(wss)
}
max_Wmk <- matrix(data=NA, nrow=1,ncol=10)
for(i in 1:10){
max_Wmk[,i]=max(W[,i],na.rm=TRUE)
}
min_Wmk <- matrix(data=NA, nrow=1,ncol=10)
for(i in 1:10){
min_Wmk[,i]=min(W[,i],na.rm=TRUE)
}
w <- matrix(W[4,])
Here's what you need to do to make the three objects into vectors so you can make the data frame as expected:
max_Wmk <- as.numeric(max_Wmk)
min_Wmk <- as.numeric(min_Wmk)
w <- as.numeric(w)
Now reshape and plot as before...
df <- melt(data.frame(max_Wmk, min_Wmk, w, my_time = 1:10), id.var = 'my_time')
ggplot(df, aes(colour = variable, x = my_time, y = value)) +
geom_point(size = 3) +
geom_line() +
scale_y_continuous(labels = comma) +
theme_minimal()
And here's the result:

Plot two graphs in the same plot [duplicate]

This question already has answers here:
Plotting two variables as lines using ggplot2 on the same graph
(5 answers)
Closed 4 years ago.
The solution with ggplot in this question worked really well for my data. However, I am trying to add a legend and everything that I tried does not work...
For example, in the ggplot example in the above question, how I can add a legend to show that the red curve is related to "Ocean" and the green curve is related to "Soil"? Yes, I want to add text that I will define and it is not related to any other variable in my data.frame.
The example below is some of my own data...
Rate Probability Stats
1.0e-04 1e-04 891.15
1.0e-05 1e-04 690
...
etc (it's about 400 rows). And I have two data frames similar to the above one.
So My code is
g <- ggplot(Master1MY, aes(Probability))
g <- g + geom_point(aes(y=Master1MY$Stats), colour="red", size=1)
g <- g + geom_point(aes(y=Transposon1MY$Stats), colour="blue", size=1)
g + labs(title= "10,000bp and 1MY", x = "Probability", y = "Stats")
The plot looks like
I just want a red and blue legend saying "Master" and "Transposon"
Thanks!
In ggplot it is generally most convenient to keep the data in a 'long' format. Here I use the function melt from the reshape2 package to convert your data from wide to long format. Depending how you specify different aesthetics (size, shape, colour et c), corresponding legends will appear.
library(ggplot2)
library(reshape2)
# data from the example you were referring to, in a 'wide' format.
x <- seq(-2, 2, 0.05)
ocean <- pnorm(x)
soil <- pnorm(x, 1, 1)
df <- data.frame(x, ocean, soil)
# melt the data to a long format
df2 <- melt(data = df, id.vars = "x")
# plot, using the aesthetics argument 'colour'
ggplot(data = df2, aes(x = x, y = value, colour = variable)) + geom_line()
Edit, set name and labels of legend
# Manually set name of the colour scale and labels for the different colours
ggplot(data = df2, aes(x = x, y = value, colour = variable)) +
geom_line() +
scale_colour_discrete(name = "Type of sample", labels = c("Sea water", "Soil"))
Edit2, following new sample data
Convert your data, assuming its organization from your update, to a long format. Again, I believe you make your ggplot life easier if you keep your data in a long format. I relate every step with the simple example data which I used in my first answer. Please note that there are many alternative ways to rearrange your data. This is one way, based on the small (non-reproducible) parts of your data you provided in the update.
# x <- seq(-2, 2, 0.05)
# Master1MY$Probability
Probability <- 1:100
# ocean <- pnorm(x)
# Master1MY$Stats
Master1MY <- rnorm(100, mean = 600, sd = 20)
# soil <- pnorm(x,1,1)
# Transposon1MY$Stats
Transposon1MY <- rnorm(100, mean = 100, sd = 10)
# df <- data.frame(x, ocean, soil)
df <- data.frame(Probability, Master1MY, Transposon1MY)
# df2 <- melt(df, id.var = "x")
df2 <- melt(df, id.var = "Probability")
# default
ggplot(data = df2, aes(x = Probability, y = value, col = variable)) +
geom_point()
# change legend name and labels, see previous edit using 'scale_colour_discrete'
# set manual colours scale using 'scale_colour_manual'.
ggplot(data = df2, aes(x = Probability, y = value, col = variable)) +
geom_point() +
scale_colour_manual(values = c("red","blue"), name = "Type of sample", labels = c("Master", "Transposon"))

Proper way to plot multiple data series in ggplot with custom colors, legend etc

I want to incrementally build a plot that contains several data series of different lengths. My goal is to be able to control the appearance of each data series,
give them custom names
and to have appropriate legends. My data series are of different lengths, so
I cannot put them in a single dataframe.
In the code below I expect 4 lines: the shortest will be red, the next ones will be blue, green and black respectively
library(ggplot2)
set.seed(12345)
plt <- ggplot()
colors <- c('red', 'blue', 'green', 'black')
for(i in seq(length(colors))) {
x <- seq(1, 2*i)
y <- x * i + rnorm(length(x))
df <- data.frame(x=x, y=y)
plt <- plt + geom_point(aes(x, y), data=df, color=colors[i]) +
geom_line(aes(x, y), data=df, color=colors[i])
}
print(plt)
This is what I get.
How can I give names to the lines and display a legend?
Is there a better way to acheive my goal?
The way to do this is to create a single data frame in long format:
Like this:
library(ggplot2)
set.seed(12345)
colors <- c('red', 'blue', 'green', 'black')
dat <- lapply(seq_along(colors), function(i){
x <- seq(1, 2*i)
data.frame(
series = colors[i],
x = x,
y = x * i + rnorm(length(x))
)}
)
dat <- do.call(rbind, dat)
Now plot
ggplot(dat, aes(x, y, color=series)) + geom_line()
You don't have to use a for-loop and plot each time after constructing the data because they are of unequal lengths. This is why ggplot2 is awesome! You can create a group for each of the dataset. And you can name the line to whatever you want using the same group as it will appear in the legend as such (of course you can change it directly in the legend as well, later, if you wish). Here's what I think you expect:
set.seed(12345)
require(ggplot2)
require(plyr)
# to group your data. change the letters to whatever you want to appear as legend
line_names <- letters[1:4]
# Use plyr to create your x and y for each i and add the group.
dat <- ldply(1:length(colors), function(i) {
x <- seq(1, 2*i)
y <- x * i + rnorm(length(x))
data.frame(x=x, y=y, grp=line_names[i])
})
# just plot here.
ggplot(data = dat, aes(x=x, y=y)) + geom_line(aes(colour=grp)) + geom_point()

ggplot2 Scatter Plot Labels

I'm trying to use ggplot2 to create and label a scatterplot. The variables that I am plotting are both scaled such that the horizontal and the vertical axis are plotted in units of standard deviation (1,2,3,4,...ect from the mean). What I would like to be able to do is label ONLY those elements that are beyond a certain limit of standard deviations from the mean. Ideally, this labeling would be based off of another column of data.
Is there a way to do this?
I've looked through the online manual, but I haven't been able to find anything about defining labels for plotted data.
Help is appreciated!
Thanks!
BEB
Use subsetting:
library(ggplot2)
x <- data.frame(a=1:10, b=rnorm(10))
x$lab <- letters[1:10]
ggplot(data=x, aes(a, b, label=lab)) +
geom_point() +
geom_text(data = subset(x, abs(b) > 0.2), vjust=0)
The labeling can be done in the following way:
library("ggplot2")
x <- data.frame(a=1:10, b=rnorm(10))
x$lab <- rep("", 10) # create empty labels
x$lab[c(1,3,4,5)] <- LETTERS[1:4] # some labels
ggplot(data=x, aes(x=a, y=b, label=lab)) + geom_point() + geom_text(vjust=0)
Subsetting outside of the ggplot function:
library(ggplot2)
set.seed(1)
x <- data.frame(a = 1:10, b = rnorm(10))
x$lab <- letters[1:10]
x$lab[!(abs(x$b) > 0.5)] <- NA
ggplot(data = x, aes(a, b, label = lab)) +
geom_point() +
geom_text(vjust = 0)
Using qplot:
qplot(a, b, data = x, label = lab, geom = c('point','text'))

Resources