ggplot geom_tile overlay plot with points - r

severity <- c("Major","Serious","Minor","Negligible")
probability <- c("Highly Probable","Probable","Possible","Remote","Unlikely","Impossible")
df <- expand.grid(x=severity,y=probability)
df$x <- factor(df$x, levels=rev(unique(df$x)))
df$y <- factor(df$y, levels=rev(unique(df$y)))
df$color <- c(1,1,2,2,1,2,2,2,2,2,2,3,2,2,3,3,2,3,3,3,3,3,3,3)
ggplot(df,aes(x,y,fill=factor(color)))+
geom_tile(color="black")+
scale_fill_manual(guide="none",values=c("red","yellow","green"))+
scale_x_discrete(expand=c(0,0))+scale_y_discrete(expand=c(0,0))+
labs(x="",y="")
Produces a risk assesssment score card chart. I want to add points by using a csv file by adding a record. Each record has 3 fields, a item name, x, and y coordinate. x= severity and y = probability.
da <- data.frame(list(name=c("ENVIRONMENTAL","COSTS","SUPPLY","HEALTH"),
severity=c("Major","Serious","Minor","Serious"),
probability=c("Probable","Possible","Probable","Unlikely")))
da
name severity probability
1 ENVIRONMENTAL Major Probable
2 COSTS Serious Possible
3 SUPPLY Minor Probable
4 HEALTH Serious Unlikely
> p1 <- p + data.frame(da, aes(severity, probability)) + geom_point()
Error in as.data.frame.default(x[[i]], optional = TRUE, stringsAsFactors = stringsAsFactors) :
cannot coerce class ""uneval"" to a data.frame
>
> d <- data.frame(list(name=c("ENVIRONMENTAL","COSTS","SUPPLY","HEALTH"),
severity=c(2,3,4,1),probability=c(3,5,4,6)))
> d
name severity probability
1 ENVIRONMENTAL 2 3
2 COSTS 3 5
3 SUPPLY 4 4
4 HEALTH 1 6
> ggplot(d,x=severity, y=probability)+ geom_point()
Error in exists(name, envir = env, mode = mode) :
argument "env" is missing, with no default
How can I add points to the ggplot / geom_tile graph?

You can't add a data.frame to a plot (not like that, at least...). What you can do is add a new layer, geom_point(), and specify the data.frame it comes from. To make things work, you should have the columns from any aesthetics you still want to use (here, x and y) have the same names in both data.frames.
# It's better practice to modify your data
# then to convert to factor within the plot
df$color <- factor(c(1,1,2,2,1,2,2,2,2,2,2,3,2,2,3,3,2,3,3,3,3,3,3,3))
# get some meaningful names, that match da and d
names(df)[1:2] <- c("severity", "probability")
p <- ggplot(df, aes(x = severity, y = probability)) +
# moved fill to the geom_tile layer, because it's only used there
geom_tile(color = "black", aes(fill = color)) +
scale_fill_manual(guide = "none", values = c("red", "yellow", "green")) +
scale_x_discrete(expand = c(0, 0)) +
scale_y_discrete(expand = c(0, 0)) +
labs(x = "", y = "")
# alsonoticehowaddingspacesmakesiteasiertoread
# Using the same column names? Yup! Now it's this easy:
p + geom_point(data = da) +
geom_point(data = d, color = "dodgerblue4")

Related

How Insert an expression in legend in ggplot2?:: correct color + multiple lines and point

I am new to R and have not been able to correct the following graph.
Xb_exp, it should have blue dots.
Xb_dw, solid red line.
Xb_f, dotted line.
Xb_s, longdash line.
The legend expression should be as shown with the subscript.
I have not been able to correct it.
Is there a way to do this?
enter image description here
my data
CA <- c(3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30)
Xb_exp <- c(0.0231,0.0519,0.0839,0.1197,0.1595,0.1996,0.2384,0.2772,0.3153,0.3520,0.3887,0.4254,0.4615,0.4978,0.5339,0.5685,0.6000,0.6279,0.6528,0.6762,0.6974,0.7166,0.7346,0.7516,0.7669,0.7810,0.7940,0.8059)
Xb_dw <- c(0.0160,0.0516,0.0886,0.1259,0.1633,0.2006,0.2377,0.2749,0.3122,0.3496,0.3870,0.4245,0.4617,0.4984,0.5339,0.5678,0.5996,0.6288,0.6551,0.6786,0.6994,0.7179,0.7346,0.7499,0.7641,0.7774,0.7899,0.8018)
Xb_f <- c(0.0021,0.0031,0.0046,0.0067,0.0095,0.0131,0.0177,0.0234,0,0387,0.0483,0.0591,0.0709,0.0832,0.0955,0.1073,0.1181,0.1272,0.1345,0.1398,0.1443,0.1456,0.1468,0.1474,0.1476,0.1477,0.1477,0.1477,0.1477)
Xb_s <- c(0.0139,0.0484,0.0839,0.1192,0.1538,0.1874,0.2200,0.2515,0.2818,0.3108,0.3387,0.3653,0.3908,0.4151,0.4383,0.4604,0.4815,0.5015,0.5206,0.5387,0.5559,0.5722,0.5877,0.6024,0.6164,0.6264,0.6421,0.6040)
dat <- c(CA, Xb_exp, Xb_dw, Xb_f, Xb_s)
my code
labels = c(expression(X[b_exp]),expression(X[b_dw]),expression(X[b_f]),expression(X[b_s]))
color4 <- c("Xb_exp"="#3C5488FF", "Xb_dw"="#DC0000FF", "Xb_f"="#00A087FF", "Xb_s"="#4DBBD5FF")
Xb_D1 <- ggplot(data = dat) +
theme_bw() +
labs(x="Crank position (ºCA)", y= bquote('Burn fraction ('~X[b]~')')) +
geom_point(aes(x=CA, y=Xb_exp, colour="Xb_exp"), size=3) +
geom_line(aes(x=CA, y=Xb_dw,colour="Xb_dw"), size=1,linetype="solid") +
geom_line(aes(x=CA, y=Xb_f,colour="Xb_f"), size=1,linetype="dotted") +
geom_line(aes(x=CA, y=Xb_s,colour="Xb_s"), size=1,linetype="longdash") +
scale_colour_manual(values=color4, labels=labels) +
theme(legend.title = element_blank(),legend.position = c(0.8, 0.5),
legend.text = element_text(size = 12)) +
scale_x_continuous(limits = c(2,80))
plot(Xb_D1)
ggplot() requires a dataframe not a vector. If you modify your code with:
dat <- data.frame(CA, Xb_exp, Xb_dw, Xb_f, Xb_s)
and fix the typo in your Xb_f vector
Xb_f <- c(0.0021,0.0031,0.0046,0.0067,0.0095,0.0131,0.0177,0.0234,0.0387,0.0483,0.0591,0.0709,0.0832,0.0955,0.1073,0.1181,0.1272,0.1345,0.1398,0.1443,0.1456,0.1468,0.1474,0.1476,0.1477,0.1477,0.1477,0.1477)
Your remaining code will work as but could be achieved more simply using the tidyverse approach below. Use pivot_longer to stack the y variables against your x variable.
dat %>%
pivot_longer(Xb_exp:Xb_s) %>%
ggplot(aes(x = CA, y = value, colour = name)) +
geom_point() +
geom_line() +
scale_colour_manual(values=color4, labels=labels) +
theme_bw() +
theme(legend.title = element_blank(),legend.position = c(0.8, 0.5),
legend.text = element_text(size = 12)) +
scale_x_continuous(limits = c(2,80)) +
labs(x="Crank position (ºCA)", y= bquote('Burn fraction ('~X[b]~')')) ```
Ironically, setting this up with conventional ploting is rather simple:
Given all the data above:
linetypes4 <- c( Xb_exp=NA, Xb_dw="solid", Xb_f="dotted", Xb_s="longdash" )
plot(
NA, type="n", xlim=c(0,30), ylim=c(0,0.8),
xlab = "Crank position (ºCA)", ylab = bquote('Burn fraction ('~X[b]~')'),
panel.first = grid()
)
with( dat, {
points( x=CA, y=Xb_exp, pch=19, col=color4["Xb_exp"], size=3 )
for( n in c("Xb_dw", "Xb_f", "Xb_s")) {
lines( x=CA, y=get(n), lty=linetypes[n], col=color4[n], lwd=2 )
}
})
legend(
x = "right",
legend = labels,
col = color4,
lty = linetypes4,
pch = c(19,NA,NA,NA),
box.lwd = 0,
inset = .02
)
There are some errors in your code suggesting you didn't try what you pasted.
0,0387, in your data should likely be 0.0387, otherwise nothing is right (no data measures several hundreds in there)
c(CA, ... ) should likely be data.frame( CA, ... )
Now, the first problem is you are doing all the heavy lifting yourself, while ggplot sits there with nothing left to do. It was designed to set up colors and line types by group. You however need to transform the data first to take full advantage of that:
library(tidyr)
CA <- c(3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30)
Xb_exp <- c(0.0231,0.0519,0.0839,0.1197,0.1595,0.1996,0.2384,0.2772,0.3153,0.3520,0.3887,0.4254,0.4615,0.4978,0.5339,0.5685,0.6000,0.6279,0.6528,0.6762,0.6974,0.7166,0.7346,0.7516,0.7669,0.7810,0.7940,0.8059)
Xb_dw <- c(0.0160,0.0516,0.0886,0.1259,0.1633,0.2006,0.2377,0.2749,0.3122,0.3496,0.3870,0.4245,0.4617,0.4984,0.5339,0.5678,0.5996,0.6288,0.6551,0.6786,0.6994,0.7179,0.7346,0.7499,0.7641,0.7774,0.7899,0.8018)
Xb_f <- c(0.0021,0.0031,0.0046,0.0067,0.0095,0.0131,0.0177,0.0234,0.0387,0.0483,0.0591,0.0709,0.0832,0.0955,0.1073,0.1181,0.1272,0.1345,0.1398,0.1443,0.1456,0.1468,0.1474,0.1476,0.1477,0.1477,0.1477,0.1477)
Xb_s <- c(0.0139,0.0484,0.0839,0.1192,0.1538,0.1874,0.2200,0.2515,0.2818,0.3108,0.3387,0.3653,0.3908,0.4151,0.4383,0.4604,0.4815,0.5015,0.5206,0.5387,0.5559,0.5722,0.5877,0.6024,0.6164,0.6264,0.6421,0.6040)
dat <- data.frame(CA, Xb_exp, Xb_dw, Xb_f, Xb_s)
color4 <- c("Xb_exp"="#3C5488FF", "Xb_dw"="#DC0000FF", "Xb_f"="#00A087FF", "Xb_s"="#4DBBD5FF")
linetypes <- c( Xb_dw="solid", Xb_f="dotted", Xb_s="longdash" )
dat2 <- pivot_longer( dat, cols=starts_with("Xb_") )
dat2.line <- dat2 %>% filter( name != "Xb_exp" )
dat2.point <- dat2 %>% filter( name == "Xb_exp" )
dat2 is now a long data set, with data category as a variable, not with a separate column for each data series. This is how ggplot likes it:
dat2
# A tibble: 112 x 3
CA name value
<dbl> <fct> <dbl>
1 3 Xb_exp 0.0231
2 3 Xb_dw 0.016
3 3 Xb_f 0.0021
4 3 Xb_s 0.0139
5 4 Xb_exp 0.0519
6 4 Xb_dw 0.0516
7 4 Xb_f 0.0031
8 4 Xb_s 0.0484
9 5 Xb_exp 0.0839
10 5 Xb_dw 0.0886
# … with 102 more rows
I then split the data on what later goes to points and what goes ot lines, just not to make the plot code uglier than it has to be:
Xb_D1 <- ggplot(data = dat2.line, aes(x=CA,y=value,color=name)) +
theme_bw() +
labs(x="Crank position (ºCA)", y= bquote('Burn fraction ('~X[b]~')')) +
geom_point( data = dat2.point, size=3) +
geom_line( aes(col=name,lty=name), size=1 ) +
scale_colour_manual(values=color4) +
scale_linetype_manual( values=linetypes, guide=FALSE ) +
guides(
color = guide_legend( override.aes=list( shape=c(NA,19,NA,NA), linetype=c("solid","solid","dashed","dotted") ) )
) +
theme(legend.title = element_blank(),legend.position = c(0.8, 0.5),
legend.text.align = 0,
legend.text = element_text(size = 12)) +
scale_x_continuous(limits = c(2,30))
print(Xb_D1)
no need to supply labels
use line type as you would use color with ggplot, its just one more channel that can carry information (or aesthetic as they like to call it over there)
align the legends left, looks nicer that way
more sophisticated is the use of override.aes to take away the points from the legend categories who shouldn't have them.
Now, I was unable to change the order of the data series in the labels, that can be a hazzle. Is it still ok for you the order they are?

ggplot2: plotting line behind boxplot

I want to plot a line using geom_line behind my boxplot, I finally managed to combine line plotting with a boxplot. I have this dataset which I used to create a boxplot:
>head(MdataNa)
1 2 3 4 5 6 7
1 -0.02798634 -0.05740014 -0.02643664 0.02203644 0.02366325 -0.02868668 -0.01278713
2 0.20278229 0.19960302 0.10896017 0.24215229 0.31925211 0.29928739 0.15911725
3 0.06570653 0.08658396 -0.06019098 0.01437147 0.02078022 0.13814853 0.11369999
4 -0.42805441 -0.91945721 -1.05555731 -0.90877542 -0.77493682 -0.90620917 -1.00535742
5 0.39922939 0.12347996 0.06712451 0.07419287 -0.09517628 -0.12056720 -0.40863078
6 0.52821596 0.30827515 0.29733794 0.30555717 0.31636676 0.11592717 0.16957927
I have glucose concentration which should be plotted in a line behind this boxplot:
# glucose curve values
require("scales")
offconc <- c(0,0.4,0.8,1.8,3.5,6.9,7.3)
offtime <- c(9,11.4,12.9,14.9,16.7,18.3,20.5)
# now we have to scale them so they fit in the (boxplot)plot
time <- rescale(offtime, to=c(1,7))
conc <- rescale(offconc, to=c(-1,1))
glucoseConc <- data.frame(time,conc)
glucoseConc2 <- melt(glucoseConc, id = "time")
Then I plotted this data, but I was only able to plot the glucose curve in FRONT of the boxplot instead of behind it, I used this code:
boxNa <- ggplot(stack(MdataNa), aes(x = ind, y = values)) +
geom_boxplot() +
coord_cartesian(y = c(-1.5,1.5)) +
labs(list(title = "After Loess", x = "Timepoint", y = "M")) +
geom_line(data=glucoseConc2,aes(x=time,y=value),group=1)
output of the code above:
EDIT as suggested by the comments(NOT WORKING)
boxNa <- ggplot(stack(MdataNa), aes(x = ind, y = values)) +
geom_line(data=glucoseConc2,aes(x=time,y=value),group=1) +
geom_boxplot(data=stack(MdataNa), aes(x = ind, y = values)) +
coord_cartesian(y = c(-1.5,1.5)) +
labs(list(title = "After Loess", x = "Timepoint", y = "M"))
this will give the following error:
Error: Discrete value supplied to continuous scale
probably I'm doing something wrong then?
Here's a solution.
The idea is to convert the x axis in continous values:
ggplot() +
geom_line(data=glucoseConc2,aes(x=time,y=value),group=1)+
geom_boxplot(data=stack(MdataNA), aes(x = as.numeric(ind), y = values, group=ind)) +
coord_cartesian(y = c(-1.5,1.5)) +
labs(list(title = "After Loess", x = "Timepoint", y = "M"))+
scale_x_continuous(breaks=1:7)

How to add zero count labels for unused factors?

I would like to create bargraph of the factor variable with count number on y axis. Also I would like to add count labels to the bars for all factors, including missing ones.
For example, code below generate the graph I need, but z factor has no label(it should be 0), so I would like to add it. ggplot2 version 2.2.1.9000
df <- data.frame(x = factor(c("x", "x", "x"), levels = c("x","z")))
ggplot(df, aes(x)) + stat_count() +
geom_text(stat = "count" ,aes(label = ..count..),vjust = -1) +
scale_x_discrete(drop = FALSE)
Is it possible to do this without data transformations?
Note:
library(ggplot2)
df <- data.frame(x = factor(c("x", "x", "x"), levels = c("x","z")))
ggplot(df, aes(x)) +
stat_count() +
scale_x_discrete(drop = FALSE) -> gg
This "computes" the plot:
gb <- ggplot_build(gg)
And, here's all that's available after the stat_count() calculation:
gb$data[[1]]
## y count prop x PANEL group ymin ymax xmin xmax colour fill size linetype alpha
## 1 3 3 1 1 1 1 0 3 0.55 1.45 NA grey35 0.5 1 NA
You don't have that data available (excerpt from stat_count()):
compute_group = function(self, data, scales, width = NULL) {
x <- data$x
weight <- data$weight %||% rep(1, length(x))
width <- width %||% (resolution(x) * 0.9)
count <- as.numeric(tapply(weight, x, sum, na.rm = TRUE))
count[is.na(count)] <- 0
data.frame(
count = count,
prop = count / sum(abs(count)),
x = sort(unique(x)),
width = width
)
}
Either write a new stat_ or just do the computation outside of the plotting.

Plot multiple layers with ggplot2

I am trying to plot two data.frame as two layers using ggplot2 "geom_raster" function. The top layer contains NA values that are set to "transparent" in order to make the underneath layer visible. As the scale_fill_xxx function can't be used twice, I've tried the following code (based on this post : ggplot2 - using two different color scales for overlayed plots) :
library(ggplot2)
df1 <- data.frame(x=rep(c(1,2,3),times=3), y=c(1,1,1,2,2,2,3,3,3), data= c(NA,4,9,NA,2,7,NA,NA,3))
df2 <- data.frame(x=rep(c(1,2,3),times=3), y=c(1,1,1,2,2,2,3,3,3), data= c(1,NA,NA,2,NA,NA,1,2,NA))
ggplot() +
geom_raster(data=df1, aes(y= y, x= x, fill= data)) +
scale_fill_gradientn(name="df1", colours=c("red", "blue"), na.value = "transparent") +
geom_raster(data= df2, aes(y= y, x= x, colour= as.factor(data))) +
scale_colour_manual(values = c("green", "black"), name= "df2", labels= c("Class 1", "Class 2"), na.value="transparent")
The thing is that the "colour" / "scale_colour_manual" solution does not return what I expect (it returns a dark grey plot instead). I would like the df1 "data" column to be represented on a red to blue scale (NA's should be transparent) and the df2 "data" column to be represented according to class number ("1"=green and "2"=black).
Could anyone help me to understand what's wrong with my procedure?
Here is a solution :
df3 = merge(df1, df2, by = c("x","y"))
names(df3)[names(df3) == "data.x"] <- "data.1"
names(df3)[names(df3) == "data.y"] <- "data.2"
df3$data = df3$data.1
df3$data[is.na(df3$data)] = df3$data.2[is.na(df3$data)]
myGrad <- colorRampPalette(c('blue','red')) # color gradient
min_value = min(df3$data[df3$data >2]) # minimum value except 1 and 2
max_value = max(df3$data) # maximum value
param = max_value - min_value + 1 # number of colors in the gradient
ggplot(df3, aes(x, y, fill = data)) + geom_raster() +
scale_fill_gradientn(colours=c("green","black", myGrad(param)),
values = rescale(c(1, 2, seq(min_value, max_value, 1))), na.value = "transparent")
I guess you will use this plot with higher values and ranges, I tried with a 5x5 matrix:
set.seed(123)
df4 = data.frame(x=rep(c(1,2,3,4,5),5), y=c(rep(1,5), rep(2,5), rep(3,5), rep(4,5), rep(5,5)),
data = sample(c(1:20), 25, prob = c(0.2,0.2,rep(0.6/18,18)), replace = T))
min_value = min(df4$data[df4$data >2])
max_value = max(df4$data)
param = max_value - min_value + 1
ggplot(df4, aes(x, y, fill = data)) + geom_raster() +
scale_fill_gradientn(colours=c("green","black", myGrad(param)),
values = rescale(c(1, 2, seq(min_value, max_value, 1))), na.value = "transparent")

Parallel co-ordinates plot in R (ggparcoord)

I am facing a somewhat strange situation while plotting a parallel co-ordinates plot using ggparcoord. I am running the following code and it is running perfectly fine:
# Load required packages
require(GGally)
# Load datasets
data(state)
df <- data.frame(state.x77,
State = state.name,
Abbrev = state.abb,
Region = state.region,
Division = state.division
)
# Generate basic parallel coordinate plot
p <- ggparcoord(data = df,
# Which columns to use in the plot
columns = 1:4,
# Which column to use for coloring data
groupColumn = 11,
# Allows order of vertical bars to be modified
order = "anyClass",
# Do not show points
showPoints = FALSE,
# Turn on alpha blending for dense plots
alphaLines = 0.6,
# Turn off box shading range
shadeBox = NULL,
# Will normalize each column's values to [0, 1]
scale = "uniminmax" # try "std" also
)
# Start with a basic theme
p <- p + theme_minimal()
# Decrease amount of margin around x, y values
p <- p + scale_y_continuous(expand = c(0.02, 0.02))
p <- p + scale_x_discrete(expand = c(0.02, 0.02))
# Remove axis ticks and labels
p <- p + theme(axis.ticks = element_blank())
p <- p + theme(axis.title = element_blank())
p <- p + theme(axis.text.y = element_blank())
# Clear axis lines
p <- p + theme(panel.grid.minor = element_blank())
p <- p + theme(panel.grid.major.y = element_blank())
# Darken vertical lines
p <- p + theme(panel.grid.major.x = element_line(color = "#bbbbbb"))
# Move label to bottom
p <- p + theme(legend.position = "bottom")
# Figure out y-axis range after GGally scales the data
min_y <- min(p$data$value)
max_y <- max(p$data$value)
pad_y <- (max_y - min_y) * 0.1
# Calculate label positions for each veritcal bar
lab_x <- rep(1:4, times = 2) # 2 times, 1 for min 1 for max
lab_y <- rep(c(min_y - pad_y, max_y + pad_y), each = 4)
# Get min and max values from original dataset
lab_z <- c(sapply(df[, 1:4], min), sapply(df[, 1:4], max))
# Convert to character for use as labels
lab_z <- as.character(lab_z)
# Add labels to plot
p <- p + annotate("text", x = lab_x, y = lab_y, label = lab_z, size = 3)
# Display parallel coordinate plot
print(p)
I get the following output:
The moment I want to subset the data to display fewer region levels using the following statement:
df<-df[which(df$Region %in% c('South','West','Northeast')),]
I start receiving the following error:
Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) :
contrasts can be applied only to factors with 2 or more levels
Why am I getting this error when the number of levels I want to display are clearly more than 2?
Any help on this would be much appreciated.
I figured what the problem was. I had to convert the column into factor.
df$Region <- factor(df$Region)
The above piece of code fixes the error.

Resources