Heatmaps for a matrix with ones and zeros using R - r

Below is my sample data, basically its a matrix with row names as person names
and some columns for each of these rows. All I have in the data is just zeros and ones. I would like to visualize it using heatmaps. (reds for 0s and green for 1s or any other color coding). How do I accomplish this using R? you can show me using any example dataset with just ones and zeros (binary values).

Just another approach using ggplot
library(ggplot2)
library(reshape2)
library(plyr)
library(scales)
df <- structure(list(people = structure(c(2L, 1L), .Label = c("Dwayne", "LeBron"), class = "factor"),
G = c(1L, 0L),
MIN = c(1L, 0L),
PTS = c(0L, 1L),
FGM = c(0L,0L),
FGA = c(0L,0L),
FGP = c(1L,1L)),
.Names = c("people", "G", "MIN", "PTS", "FGM", "FGA", "FGP"),
class = "data.frame",
row.names = c(NA, -2L))
df.m <- melt(df)
df1.m <- ddply(df.m, .(variable), transform, rescale = value)
p <- ggplot(df1.m, aes(variable, people)) +
geom_tile(aes(fill = rescale), colour = "black")
p + scale_fill_gradient(low = "green", high = "red")
show(p)
Adopted from this tutorial

With highcharter:
library(highcharter)
library(tidyr)
library(dplyr)
df<-data.frame(row=c("Dwayne","James"),G=c(1,0),MIN=c(1,0),PTS=c(0,1),FGM=c(0,0),FGA=c(0,0),FGP=c(1,1))
rownames(df)<-c("Dwayne","James")
df$row<-rownames(df)
data<-df%>%
tidyr::gather(row,value)%>%
setNames(c("name","variable","value"))
hchart(data, "heatmap", hcaes(x = variable, y = name, value = value)) %>%
hc_colorAxis(stops = color_stops(2, c("red","green")))
UPDATE:
You can add hc_size(height = 800) for height=800 or make something like that
x<-50
hg<-length(unique(data$name))*x+100
hchart(data, "heatmap", hcaes(x = variable, y = name, value = value)) %>%
hc_colorAxis(stops = color_stops(2, c("red","green")))%>%
hc_size(height = hg)
Where each row in dataset makes chart bigger by 50 points. You can change it in x

This answer uses plotly and hence adding it as another answer. Using the same data as the following one.
library(plotly)
df1 <- as.matrix(df)
p <- plot_ly(x = colnames(df), y = df[,1], z = as.matrix(df[-1]), colors = colorRamp(c("green", "red")), type = "heatmap")
This is much simpler than the ggplot2 in terms of getting the output.
Hope this helps!

Related

plotting all columns of a dataframe in r per grouping variable onto one single plot

D
Dear all, given the following dataframe, I am trying to plot the values for each column and marked overall (coloured) by type (there are three groups, alpha, beta, gamma).
In other words the x axis should display 9 points (x100,x110....,x180) and the y axis range should be from 0 to 2 (each column takes a value from 0 to 2 max).
Each of the resulting three lines should highlight each of the three categorical variables.
Apologies about the format of the dataframe, I have not figured out how to pretty output it yet.
structure(list(Group.1 = c("alpha", "beta", "gamma"), x100 =
c(1.31729175522923,
0.985278656706214, 0.156200287397951), x110 = c(1.54471416538581,
0.915659603197128, 0.733224726747721), x120 = c(1.27778739808127,
0.813037838321179, 0.779596480540931), x130 = c(1.25000598328188,
0.488610395696014, 0.806707685347646), x140 = c(1.82296009687707,
1.16132276877761, 1.31973652262241), x150 = c(0.929914232343435,
1.41477890312672, 1.41652730805799), x160 = c(1.19612871715799,
0.801679770927876, 0.39746836386621), x170 = c(1.88860023999587,
1.03295020200312, 0.729622524231672), x180 = c(0.926427994389087,
1.20304362708703, 1.57529754098505)), row.names = c(NA, -3L),
class = "data.frame")
I am trying to use ggplot (any other method of plotting would do) but in reading the requirements of the ggplot function I am struggling to understand if I should possibly create a vector of values to use as the aes parameter y?
Thanks in advance,
F
To get the layout you need for plotting you need to convert your data.frame to a long format using pivot_longer. From there you can just use ggplot as normal. To unstack the bars use position = dodge.
library(tidyverse)
data <- structure(list(Group.1 = c("alpha", "beta", "gamma"),
x100 = c(1.31729175522923, 0.985278656706214, 0.156200287397951),
x110 = c(1.54471416538581, 0.915659603197128, 0.733224726747721),
x120 = c(1.27778739808127, 0.813037838321179, 0.779596480540931),
x130 = c(1.25000598328188, 0.488610395696014, 0.806707685347646),
x140 = c(1.82296009687707, 1.16132276877761, 1.31973652262241),
x150 = c(0.929914232343435, 1.41477890312672, 1.41652730805799),
x160 = c(1.19612871715799, 0.801679770927876, 0.39746836386621),
x170 = c(1.88860023999587, 1.03295020200312, 0.729622524231672),
x180 = c(0.926427994389087, 1.20304362708703, 1.57529754098505)),
row.names = c(NA, -3L),
class = "data.frame")
data %>%
pivot_longer(cols = contains("x"),
names_to = "data_points",
values_to = "vals") %>%
ggplot(aes(x = data_points, y = vals, fill = Group.1)) +
geom_col(position = "dodge")
Created on 2020-11-11 by the reprex package (v0.3.0)
A base-R solution would be to reshape the data to long format with reshape before plotting.
dflong <- reshape(df, direction = "long", idvar = "Group.1", timevar = "xval",
varying = 2:10, v.names = "yval", times = colnames(df)[2:10])
library(ggplot2)
ggplot(data = dflong, aes(x = xval, y = yval, color = Group.1)) +
geom_line(aes(group = Group.1))

ggboxplot significance bracket position too high

I'm using the ggpubr libray. For some reason, the locations of the brackets are too high. Is there any way to put them a little lower. I don't want to position each one by one.
library(ggpubr)
set.seed(1)
df <- data.frame(
Value = c(rnorm(50),rnorm(50)+10,rnorm(50)+20),
Group = sample(1:4, 150, replace = T),
Class = rep(LETTERS[1:3],each=50),
M = sample(LETTERS[25:26], 150, replace = T))
df %>% ggboxplot(x = "Group", y = "Value",
facet.by = c("Class", "M"), scales="free_y") +
stat_compare_means(comparisons = list(c("1", "2"), c("2","3"),c("1","4")))
Here is a quick and dirty way...just export the plot as pdf with a much higher hegiht, you can also do this by pdf(height = ...)
then you got:

Multiple plots in R with time series

enter image description hereI have the following data; please can any one help me to plot it, I have tried to use a lot of different commands but none has given me a perfect graph
year x y
2012 4 5
2014 7 9
2017 4 3
enter image description here
this picture i need to make as it
Based on your comments you might be looking for:
library(tidyverse)
plot1 <- df %>% gather(key = measure, value = value, -year) %>%
ggplot(aes(x = year, y = value, color = measure))+
geom_point()+
geom_line()+
facet_wrap(~measure)
plot1
The biggest points here are gather and facet_wrap. I recommend the following two links:
https://ggplot2.tidyverse.org/reference/facet_grid.html
https://ggplot2.tidyverse.org/reference/facet_wrap.html
You need to convert year column type to Date.
This is a tidyverse style solution
library(tidyverse)
mydf %>%
rename("col1" = x, "col2" = y) %>%
mutate(year = paste0(year, "-01-01")) %>%
mutate(year = as.Date(year)) %>%
ggplot() +
geom_line(aes(x = year, y = col1), color = "red", size = 2) +
geom_line(aes(x = year, y = col2), color = "blue", size = 2) +
theme_minimal()
which returns this
Using the data shown reproducibly in the Note below use matplot. No packages are used.
matplot(dd[[1]], dd[-1], pch = c("x", "y"), type = "o", xlab = "year", ylab = "value")
Note
dd <- structure(list(year = c(2012L, 2014L, 2017L), x = c(4L, 7L, 4L),
y = c(5L, 9L, 3L)), class = "data.frame", row.names = c(NA, -3L))

Unfilled area in ggplot geom_area

I'm trying to do a plot with ggplot2 and geom_area. The fill is set by a variable. For some reason, only the 'outer' groups are filled. I can't figure out how to get the inner regions filled as well.
The same problem seems to occur here but no answer is given on how to solve it.
Below is an minimal example of the code i'm using and the resulting plot:
I'm using R 3.3 and ggplot2_2.1.0
Any help would be appreciated.
df <- data.frame(month = seq(from = as.Date("2016-01-01"), to = as.Date("2016-12-31"), by = "month"),
type = c(rep("past", times = 5), "current", rep("future", times = 6)),
amount = c(seq(from = 100, to = 1200, by = 100)))
df$type <- factor(df$type, levels = c("past", "current", "future"))
ggplot(data = df, aes(x = month, y = amount, fill = type)) +
geom_area()
I added 2 points in time arround the "current" value in order to produce an area. The problem is that with only one point no area can be drawn.
library(ggplot2)
df <- data.frame(month = seq(from = as.Date("2016-01-01"), to = as.Date("2016-12-31"), by = "month"),
type = c(rep("past", times = 5), "current", rep("future", times = 6)),
amount = c(seq(from = 100, to = 1200, by = 100)))
df <- rbind(df[1:5, ],
data.frame(month = as.Date(c("2016-05-15", "2016-06-15")),
type = c("current", "current"),
amount = c(550, 650)),
df[7:12, ])
df$type <- factor(df$type, levels = c("past", "current", "future"))
ggplot(data = df, aes(x = month, y = amount, fill = type)) +
geom_area()

loess method fails on data frame due to multiple series not having enough data points

I have a data frame is like this:
dput(xx)
structure(list(TimeStamp = structure(c(15705, 15706), class = "Date"),
Host = c("Host1", "Host2"), OS = structure(c(1L, 1L), .Label = "solaris", class = "factor"),
ID = structure(c(1L, 1L), .Label = "1234", class = "factor"),
Class = structure(c(1L, 1L), .Label = "Processor", class = "factor"),
Stat = structure(c(1L, 1L), .Label = "CPU", class = "factor"),
Instance = structure(c(1L, 1L), .Label = c("_Total", "CPU0",
"CPU1", "CPU10", "CPU11", "CPU12", "CPU13", "CPU14", "CPU15",
"CPU16", "CPU17", "CPU18", "CPU19", "CPU2", "CPU20", "CPU21",
"CPU22", "CPU23", "CPU3", "CPU4", "CPU5", "CPU6", "CPU7",
"CPU8", "CPU9"), class = "factor"), Average = c(4.39009345794392,
5.3152972972973), Min = c(3.35, -0.01), Max = c(5.15, 72.31
)), .Names = c("TimeStamp", "Host", "OS", "ID", "Class",
"Stat", "Instance", "Average", "Min", "Max"), row.names = c(NA,
-2L), class = "data.frame")
This data frame is huge and it has many Hosts. The challenge that I am having is that when a host like above does not have enough data points, the following ggplot fails, basically complaining about not having enough data points to draw the graph.
ggplot(xx, aes(TimeStamp, Max, group=Host, colour=Host)) + geom_point() + geom_smooth(mehtod="loess")
How can I check and see if a particular Host in this data frame has greater than 10 data points, if yes use method="loess".
if the number of data points for a Host is less than 10, use method="lm"
Yes, it was tricky to find, but it seems to be possible,
# for reproducibility
set.seed(42)
# The idea is to first split the data to < 10 and >= 10 points
# I use data.table for that
require(data.table)
dt <- data.frame(Host = rep(paste("Host", 1:10, sep=""), sample(1:20, 10)),
stringsAsFactors = FALSE)
dt <- transform(dt, x=sample(1:nrow(dt)), y = 15*(1:nrow(dt)))
dt <- data.table(dt, key="Host")
dt1 <- dt[, .SD[.N >= 10], by = Host]
dt2 <- dt[, .SD[.N < 10], by = Host]
# on to plotting now
require(ggplot2)
# Now, dt1 has all Hosts with >= 10 observations and dt2 the other way round
# plot now for dt1
p <- ggplot(data=dt1, aes(x = x, y = y, group = Host)) + geom_line() +
geom_smooth(method="loess", se=T)
# plot geom_line for dt2 by telling the data and aes
# The TRICKY part: add geom_smooth by telling data=dt2
p <- p + geom_line(data = dt2, aes(x=x, y=y, group = Host)) +
geom_smooth(data = dt2, method="lm", se=T)
p
(This is an ugly example. But it gives you the idea).
Adding to Arun's excellent answer, I think you simply need to visually distinguish e.g. use solid-line for loess, dotted-line for lm:
p <- ggplot(data=dt1, aes(x = x, y = y, group = Host)) + geom_line() +
geom_smooth(method='loess', linetype='solid', se=T)
p <- p + geom_line(data = dt2, aes(x=x, y=y, group = Host)) +
geom_smooth(data = dt2, method='lm', linetype='dashed', se=T)
The warning messages can be prevented by duplicating the data points and setting the span parameter of the geom_smooth function. For example:
data <- rbind(dt1, dt2)
p <- ggplot(data=dt1, aes(x = x, y = y, group = Host)) + geom_line() +
geom_smooth(method='loess', span = 1.4, se=T)
In case the warnings remain, you can try different values of span parameter.

Resources