Wanted: repeated-pictogram visualization of population split - r

I am looking for a way to do a (actually quite common) visualization, whereby the split of a population of, say, N units into several categories is shown via a set of N pictograms - I'd prefer filled squares; in a newspaper, etc., one might see little humanoid shapes - with each pictogram colored according to the category of n'th unit. N fixed to 1 or 100, and the chart displaying fractions or percentages rather than counts, would be OK. The colored "stripes" would have to wrap to multiple lines. Is anyone aware of this chart available in an R package?
To take a specific example,
x = sample(c("A","B"),100,replace = T)
I would like to see a 10x10 (or 5x20, or whatever) grid of colored squares, some - corresponding to "A" - colored green, others red. The green (and likewise red) squares could be "bunched", or left in the original order.
Thank you.

Below we show 3 versions. The first uses squares, the next uses circles, the next uses an icon of a man and finally we use smiley faces.
Squares
# input data
set.seed(123)
x <- sample(c("A","B"),100,replace = T)
# input parameters - nr * nc should equal length(x)
cols <- c("green", "red")
nr <- 10
nc <- 10
# create data.frame of positions and colors
m <- matrix(cols[factor(x)], nr, nc)
DF <- data.frame(row = c(row(m)), col = c(col(m)[, nc:1]), value = c(m),
stringsAsFactors = FALSE)
# plot squares - modify cex to get different sized squares
plot(col ~ row, DF, col = DF$value, pch = 15, cex = 4, asp = 1,
xlim = c(0, nr), ylim = c(0, nc),
axes = FALSE, xlab = "", ylab = "")
Circles
# plot circles
plot(col ~ row, DF, col = DF$value, pch = 20, cex = 6, asp = 1,
xlim = c(0, nr), ylim = c(0, nc),
axes = FALSE, xlab = "", ylab = "")
png Icons This solution uses a black/white icon of a man which we have assumed has been saved in the current directory as man.png. We color it red and green and use those two versions in place of the squares or circles:
# blank graph to insert man icons into
plot(col ~ row, DF, col = DF$value, asp = 1,
xlim = c(0, nr), ylim = c(0, nc),
axes = FALSE, xlab = "", ylab = "", type = "n")
library(png)
man <- readPNG("man.png")
red.man <- man
red.man[,,1] <- man[,,4] # fill in red dimension
R <- subset(DF, value == "red")
with(R, rasterImage(red.man,
row-.5, col-.5, row+.5, col+.5,
xlim = c(0, nr), ylim = c(0, nc),
xlab = "", ylab = ""))
green.man <- man
green.man[,,2] <- man[,,4] # fill in green dimension
G <- subset(DF, value == "green")
with(G, rasterImage(green.man,
row-.5, col-.5, row+.5, col+.5,
xlim = c(0, nr), ylim = c(0, nc),
xlab = "", ylab = ""))
Smiley Face Icons This solution uses a green smiley face icon and a red frowning face icon which we have assumed have been saved in the
current directory as smiley_green.jpg and smiley_red.jpg.
# blank graph to insert man icons into
xp <- 1.25
plot(col ~ row, DF, col = DF$value, asp = 1,
xlim = c(0, xp * nr), ylim = c(0, xp * nc),
axes = FALSE, xlab = "", ylab = "", type = "n")
library(jpeg)
smiley_green <- readJPEG("smiley_green.jpg")
smiley_red <- readJPEG("smiley_red.jpg")
R <- subset(transform(DF, row = xp * row, col = xp * col), value == "red")
with(R, rasterImage(smiley_red,
row - .5, col - .5, row + .5, col + .5,
xlim = c(0, xp * nr), ylim = c(0, xp * nc),
xlab = "", ylab = ""))
G <- subset(transform(DF, row = xp * row, col = xp * col), value == "green")
with(G, rasterImage(smiley_green,
row - .5, col - .5, row + .5, col + .5,
xlim = c(0, xp * nr), ylim = c(0, xp * nc),
xlab = "", ylab = ""))
Revised To 10x10 green/red and added version using man icon.

BINGO. My prayers have been answered with
http://rud.is/b/2015/03/18/making-waffle-charts-in-r-with-the-new-waffle-package/
(But many thanks for the solutions suggested earlier).
# devtools::install_github("hrbrmstr/waffle")
library(waffle)
x <- sample(c("A","B"),100,replace = T)
x <- c(A=sum(x=="A"), B=sum(x=="B"))
waffle(x, rows=10, colors=c("red", "green"))

I also made a package for this (nigh-simultaneously with that other guy :-). It has three approaches, using geom_tile, geom_text (use e.g. FontAwesome for icons) and geom_point. The latter two can do drop shadows, but you have to tinker with the settings sometimes. There's some more examples here.
devtools::install_github("rubenarslan/formr")
library(formr)
qplot_waffle(rep(1:3,each=40,length.out=90))
library(ggplot2)
qplot_waffle_text(rep(1, each = 30), symbol = "", rows = 3) + ggtitle("Number of travellers in 2008")

The OP requested a ggplot2 solution on the ggplot2 Google group, so I'm posting it here as well:
ggplot(DF, aes(row, col, fill=value)) +
geom_tile(colour="white", lwd=2) +
scale_fill_manual(values=c("green","red")) +
theme(panel.background=element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank()) +
guides(fill=FALSE)

Related

How can I change the colour of my points on my db-RDA triplot in R?

QUESTION: I am building a triplot for the results of my distance-based RDA in R, library(vegan). I can get a triplot to build, but can't figure out how to make the colours of my sites different based on their location. Code below.
#running the db-RDA
spe.rda.signif=capscale(species~canopy+gmpatch+site+year+Condition(pair), data=env, dist="bray")
#extract % explained by first 2 axes
perc <- round(100*(summary(spe.rda.signif)$cont$importance[2, 1:2]), 2)
#extract scores (coordinates in RDA space)
sc_si <- scores(spe.rda.signif, display="sites", choices=c(1,2), scaling=1)
sc_sp <- scores(spe.rda.signif, display="species", choices=c(1,2), scaling=1)
sc_bp <- scores(spe.rda.signif, display="bp", choices=c(1, 2), scaling=1)
#These are my location or site names that I want to use to define the colours of my points
site_names <-env$site
site_names
#set up blank plot with scaling, axes, and labels
plot(spe.rda.signif,
scaling = 1,
type = "none",
frame = FALSE,
xlim = c(-1,1),
ylim = c(-1,1),
main = "Triplot db-RDA - scaling 1",
xlab = paste0("db-RDA1 (", perc[1], "%)"),
ylab = paste0("db-RDA2 (", perc[2], "%)")
)
#add points for site scores - these are the ones that I want to be two different colours based on the labels in the original data, i.e., env$site or site_names defined above. I have copied the current state of the graph
points(sc_si,
pch = 21, # set shape (here, circle with a fill colour)
col = "black", # outline colour
bg = "steelblue", # fill colour
cex = 1.2) # size
Current graph
I am able to add species names and arrows for environmental predictors, but am just stuck on how to change the colour of the site points to reflect their location (I have two locations defined in my original data). I can get them labelled with text, but that is messy.
Any help appreciated!
I have tried separating shape or colour of point by site_name, but no luck.
If you only have a few groups (in your case, two), you could make the group a factor (within the plot call). In R, factors are represented as an integer "behind the scenes" - you can represent up to 8 colors in base R using a simple integer:
set.seed(123)
df <- data.frame(xvals = runif(100),
yvals = runif(100),
group = sample(c("A", "B"), 100, replace = TRUE))
plot(df[1:2], pch = 21, bg = as.factor(df$group),
bty = "n", xlim = c(-1, 2), ylim = c(-1, 2))
legend("topright", unique(df$group), pch = 21,
pt.bg = unique(as.factor(df$group)), bty = "n")
If you have more than 8 groups, or if you would like to define your own colors, you can simply create a vector of colors the length of your groups and still use the same factor method, though with a few slight tweaks:
# data with 10 groups
set.seed(123)
df <- data.frame(xvals = runif(100),
yvals = runif(100),
group = sample(LETTERS[1:10], 100, replace = TRUE))
# 10 group colors
ccols <- c("red", "orange", "blue", "steelblue", "maroon",
"purple", "green", "lightgreen", "salmon", "yellow")
plot(df[1:2], pch = 21, bg = ccols[as.factor(df$group)],
bty = "n", xlim = c(-1, 2), ylim = c(-1, 2))
legend("topright", unique(df$group), pch = 21,
pt.bg = ccols[unique(as.factor(df$group))], bty = "n")
For pch just a slight tweak to wrap it in as.numeric:
pchh <- c(21, 22)
ccols <- c("slateblue", "maroon")
plot(df[1:2], pch = pchh[as.numeric(as.factor(df$group))], bg = ccols[as.factor(df$group)],
bty = "n", xlim = c(-1, 2), ylim = c(-1, 2))
legend("topright", unique(df$group),
pch = pchh[unique(as.numeric(as.factor(df$group)))],
pt.bg = ccols[unique(as.factor(df$group))], bty = "n")

How to specify breaks for y axis in R plot

I have created the following fanchart using the fanplot package. I'm trying to add axis ticks and labels to the y axis, however it's only giving me the decimals and not the full number. Looking for a solution to display the full number (e.g 4.59 and 4.61) on the y axis
I am also unsure of how to specify the breaks and number of decimal points for the labels on the y-axis using plot(). I know doing all of this in ggplot2 it would look something like this scale_y_continuous(breaks = seq(min(data.ts$Index),max(data.ts$Index),by=0.02)) . Any ideas on how to specify the breaks in the y axis as well as the number of decimal points using the base plot() feature in R?
Here is a reproductible of my dataset data.ts
structure(c(4.6049904235401, 4.60711076016453, 4.60980084146652,
4.61025389170935, 4.60544515681515, 4.60889021700954, 4.60983993107244,
4.61091608826696, 4.61138799159174, 4.61294431148318, 4.61167545843765,
4.61208284263432, 4.61421991328081, 4.61530485425155, 4.61471465043043,
4.6155992084451, 4.61195799200607, 4.61178486640435, 4.61037927954796,
4.60744590947049, 4.59979957741728, 4.59948551500254, 4.60078678080182,
4.60556092645471, 4.60934962087565, 4.60981147563749, 4.61060477704678,
4.61158365084251, 4.60963435263623, 4.61018215733317, 4.61209710959768,
4.61231368335184, 4.61071363571141, 4.61019496497916, 4.60948652606191,
4.61068813487859, 4.6084092003352, 4.60972706132393, 4.60866915174087,
4.61192565195909, 4.60878767339377, 4.61341471281265, 4.61015272152397,
4.6093479714315, 4.60750965935653, 4.60768790690338, 4.60676463096309,
4.60746490411374, 4.60885670935448, 4.60686846708382, 4.60688947889575,
4.60867708110485, 4.60448791268212, 4.60387348166032, 4.60569806689426,
4.6069320880709, 4.6087143894128, 4.61059688801283, 4.61065399116698,
4.61071421014339), .Tsp = c(2004, 2018.75, 4), class = "ts")
and here is a reproductible of the code I'm using
# # Install and Load Packages
## pacman::p_load(forecast,fanplot,tidyverse,tsbox,lubridate,readxl)
# Create an ARIMA Model using the auto.arima function
model <- auto.arima(data.ts)
# Simulate forecasts for 4 quarters (1 year) ahead
forecasts <- simulate(model, n=4)
# Create a data frame with the parameters needed for the uncertainty forecast
table <- ts_df(forecasts) %>%
rename(mode=value) %>%
mutate(time0 = rep(2019,4)) %>%
mutate(uncertainty = sd(mode)) %>%
mutate(skew = rep(0,4))
y0 <- 2019
k <- nrow(table)
# Set Percentiles
p <- seq(0.05, 0.95, 0.05)
p <- c(0.01, p, 0.99)
# Simulate a qsplitnorm distribution
fsval <- matrix(NA, nrow = length(p), ncol = k)
for (i in 1:k)
fsval[, i] <- qsplitnorm(p, mode = table$mode[i],
sd = table$uncertainty[i],
skew = table$skew[i])
# Create Plot
plot(data.ts, type = "l", col = "#75002B", lwd = 4,
xlim = c(y0 - 2,y0 + 0.75), ylim = range(fsval, data.ts),
xaxt = "n", yaxt = "n", ylab = "",xlab='',
main = '')
title(ylab = 'Log AFSI',main = 'Four-Quarter Ahead Forecast Fan - AFSI',
xlab = 'Date')
rect(y0 - 0.25, par("usr")[3] - 1, y0 + 2, par("usr")[4] + 1,
border = "gray90", col = "gray90")
fan(data = fsval, data.type = "values", probs = p,
start = y0, frequency = 4,
anchor = data.ts[time(data.ts) == y0 - .25],
fan.col = colorRampPalette(c("#75002B", "pink")),
ln = NULL, rlab = NULL)
# Add axis labels and ticks
axis(1, at = y0-2:y0 + 2, tcl = 0.5)
axis(1, at = seq(y0-2, y0 + 2, 0.25), labels = FALSE, tcl = 0.25)
abline(v = y0 - 0.25, lty = 1)
abline(v = y0 + 0.75, lty = 2)
axis(2, at = range(fsval, data.ts), las = 2, tcl = 0.5)
range(blah) will only return two values (the minimum and maximum). The at parameter of axis() requires a sequence of points at which you require axis labels. Hence, these are the only two y values you have on your plot. Take a look at using pretty(blah) or seq(min(blah), max(blah), length.out = 10).
The suggestions of #Feakster are worth looking at, but the problem here is that the y-axis margin isn't wide enough. You could do either of two things. You could round the labels so they fit within the margins, for example you could replace this
axis(2, at = range(fsval, data.ts), las = 2, tcl = 0.5)
with this
axis(2, at = range(fsval, data.ts),
labels = sprintf("%.3f", range(fsval, data.ts)), las = 2, tcl = 0.5)
Or, alternatively you could increase the y-axis margin before you make the plot by specifying:
par(mar=c(5,5,4,2)+.1)
plot(data.ts, type = "l", col = "#75002B", lwd = 4,
xlim = c(y0 - 2,y0 + 0.75), ylim = range(fsval, data.ts),
xaxt = "n", yaxt = "n", ylab = "",xlab='',
main = '')
Then everything below that should work. The mar element of par sets the number of lines printed in the margin of each axis. The default is c(5,4,4,2).

How to plot a formula with a given range?

I am looking to plot the following:
L<-((2*pi*h*c^2)/l^5)*((1/(exp((h*c)/(l*k*T)-1))))
all variables except l are constant:
T<-6000
h<-6.626070040*10^-34
c<-2.99792458*10^8
k<-1.38064852*10^-23
l has a range of 20*10^-9 to 2000*10^-9.
I have tried l<-seq(20*10^-9,2000*10^-9,by=1*10^-9), however this does not give me the results I expect.
Is there a simple solution for this in R, or do I have to try in another language?
Thank you.
Looking at the spectral radiance equation wikipedia page, it seems that your formula is a bit off. Your formula multiplies an additional pi (not sure if intended) and the -1 is inside the exp instead of outside:
L <- ((2*pi*h*c^2)/l^5)*((1/(exp((h*c)/(l*k*T)-1))))
Below is the corrected formula. Also notice I have converted it into a function with parameter l since this is a variable:
T <- 6000 # Absolute temperature
h <- 6.626070040*10^-34 # Plank's constant
c <- 2.99792458*10^8 # Speed of light in the medium
k <- 1.38064852*10^-23 # Boltzmann constant
L <- function(l){((2*h*c^2)/l^5)*((1/(exp((h*c)/(l*k*T))-1)))}
# Plotting
plot(L, xlim = c(20*10^-9,2000*10^-9),
xlab = "Wavelength (nm)",
ylab = bquote("Spectral Radiance" ~(KW*sr^-1*m^-2*nm^-1)),
main = "Plank's Law",
xaxt = "n", yaxt = "n")
xtick <- seq(20*10^-9, 2000*10^-9,by=220*10^-9)
ytick <- seq(0, 4*10^13,by=5*10^12)
axis(side=1, at=xtick, labels = (1*10^9)*seq(20*10^-9,2000*10^-9,by=220*10^-9))
axis(side=2, at=ytick, labels = (1*10^-12)*seq(0, 4*10^13,by=5*10^12))
The plot above is not bad, but I think we can do better with ggplot2:
h <- 6.626070040*10^-34 # Plank's constant
c <- 2.99792458*10^8 # Speed of light in the medium
k <- 1.38064852*10^-23 # Boltzmann constant
L2 <- function(l, T){((2*h*c^2)/l^5)*((1/(exp((h*c)/(l*k*T))-1)))} # Plank's Law
classical_L <- function(l, T){(2*c*k*T)/l^4} # Rayleigh-Jeans Law
library(ggplot2)
ggplot(data.frame(l = c(20*10^-9,2000*10^-9)), aes(l)) +
geom_rect(aes(xmin=390*10^-9, xmax=700*10^-9, ymin=0, ymax=Inf),
alpha = 0.3, fill = "lightblue") +
stat_function(fun=L2, color = "red", size = 1, args = list(T = 3000)) +
stat_function(fun=L2, color = "green", size = 1, args = list(T = 4000)) +
stat_function(fun=L2, color = "blue", size = 1, args = list(T = 5000)) +
stat_function(fun=L2, color = "purple", size = 1, args = list(T = 6000)) +
stat_function(fun=classical_L, color = "black", size = 1, args = list(T = 5000)) +
theme_bw() +
scale_x_continuous(breaks = seq(20*10^-9, 2000*10^-9,by=220*10^-9),
labels = (1*10^9)*seq(20*10^-9,2000*10^-9,by=220*10^-9),
sec.axis = dup_axis(labels = (1*10^6)*seq(20*10^-9,2000*10^-9,by=220*10^-9),
name = "Wavelength (\U003BCm)")) +
scale_y_continuous(breaks = seq(0, 4*10^13,by=5*10^12),
labels = (1*10^-12)*seq(0, 4*10^13,by=5*10^12),
limits = c(0, 3.5*10^13)) +
labs(title = "Black Body Radiation described by Plank's Law",
x = "Wavelength (nm)",
y = expression("Spectral Radiance" ~(kWsr^-1*m^-2*nm^-1)),
caption = expression(''^'\U02020' ~'Spectral Radiance described by Rayleigh-Jeans Law, which demonstrates the ultraviolet catastrophe.')) +
annotate("text",
x = c(640*10^-9, 640*10^-9, 640*10^-9, 640*10^-9,
150*10^-9, (((700-390)/2)+390)*10^-9, 1340*10^-9),
y = c(2*10^12, 5*10^12, 14*10^12, 31*10^12,
35*10^12, 35*10^12, 35*10^12),
label = c("3000 K", "4000 K", "5000 K", "6000 K",
"UV", "VISIBLE", "INFRARED"),
color = c(rep("black", 4), "purple", "blue", "red"),
alpha = c(rep(1, 4), rep(0.6, 3)),
size = 4.5) +
annotate("text", x = 1350*10^-9, y = 23*10^12,
label = deparse(bquote("Classical theory (5000 K)"^"\U02020")),
color = "black", parse = TRUE)
Notes:
I created L2 by also making absolute temperature T a variable
For each T, I plot the function L2 using different colors for representation. I've also added a classical_L function to demonstrate classical theory of spectral radiance
geom_rect creates the light blue shaded area for "VISIBLE" light wavelength range
scale_x_continuous sets the breaks of the x axis, while labels sets the axis tick labels. Notice I have multiplied the seq by (1*10^9) to convert the units to nanometer (nm). A second x-axis is added to display the micrometer scale
Analogously, scale_y_continuous sets the breaks and tick labels for y axis. Here I multiplied by (1*10^-12) or (1*10^(-3-9)) to convert from watts (W) to kilowatts (kW), and from inverse meter (m^-1) to inverse nanometer (nm^-1)
bquote displays superscripts correctly in the y axis label
annotate sets the coordinates and text for curve labels. I've also added the labels for "UV", "VISIBLE" and "INFRARED" light wavelengths
ggplot2
Plot from wikipedia:
Image source: https://upload.wikimedia.org/wikipedia/commons/thumb/1/19/Black_body.svg/600px-Black_body.svg.png

R: center red_to_blue color palette at 0 in levelplot

I am making a levelplot in which one variable of my data frame is used to color the cells (fold.change) and another (map.signif) is written on top. In this case, I write * and ** for significant cells.
This is my MWE:
set.seed(150)
pv.df <- data.frame(compound=rep(LETTERS[1:8], each=3), comparison=rep(c("a/b","b/c","a/c"), 8), p.value=runif(24, 0, 0.2), fold.change=runif(24, -0.3, 0.9))
pv.df$map.signif <- ifelse(pv.df$p.value > 0.05, "", ifelse(pv.df$p.value > 0.01,"*", "**"))
pv.df
myPanel <- function(x, y, z, ...) {
panel.levelplot(x, y, z, ...)
panel.text(x, y, pv.df$map.signif, cex=3)
}
#install.packages("latticeExtra")
library(latticeExtra)
library(RColorBrewer)
cols <- colorRampPalette(brewer.pal(11, "RdBu"))(11)
png(filename="test.png", height=800, width=400)
print(
levelplot(fold.change ~ comparison*compound, #p.value instead of p.adjust depending on map.signif
pv.df,
panel = myPanel,
col.regions = cols,
at = do.breaks(range(pv.df$fold.change), 11),
colorkey = list(col = cols,
at = do.breaks(range(pv.df$fold.change), 11)),
xlab = "", ylab = "", # remove axis titles
scales = list(x = list(rot = 45), # change rotation for x-axis text
cex = 0.8), # change font size for x- & y-axis text
main = list(label = "Test\nfold change color\n*pv<0.05\t**pv<0.01",
cex = 1.5))
)
dev.off()
Which produces:
My question here is: Since fold.change includes negative and positive values, how do I make 0 to coincide with white in my color palette, so negative values are in red, and positive ones in blue?
For the win, is it possible to wirte the * in black when the cell background is clear, and in white when the background is dark? Many thanks!
Your range is not simetrical. An alternative is this one:
max_abs <- max(abs(pv.df$fold.change))
brk <- do.breaks(c(-max_abs, max_abs), 11)
levelplot(fold.change ~ comparison*compound, #p.value instead of p.adjust depending on map.signif
pv.df,
panel = myPanel,
col.regions = cols,
at = brk,
colorkey = list(col = cols,
at = brk),
xlab = "", ylab = "", # remove axis titles
scales = list(x = list(rot = 45), # change rotation for x-axis text
cex = 0.8), # change font size for x- & y-axis text
main = list(label = "Test\nfold change color\n*pv<0.05\t**pv<0.01",
cex = 1.5))
Edit
If you don't want the extra breaks:
max_abs <- max(abs(pv.df$fold.change))
brk <- do.breaks(c(-max_abs, max_abs), 11)
first_true <- which.max(brk > min(pv.df$fold.change))
brk <- brk[(first_true -1):length(brk)]
cols <- cols[(first_true -1):length(cols)]
levelplot(fold.change ~ comparison*compound, #p.value instead of p.adjust depending on map.signif
pv.df,
panel = myPanel,
col.regions = cols,
at = brk,
colorkey = list(col = cols,
at = brk),
xlab = "", ylab = "", # remove axis titles
scales = list(x = list(rot = 45), # change rotation for x-axis text
cex = 0.8), # change font size for x- & y-axis text
main = list(label = "Test\nfold change color\n*pv<0.05\t**pv<0.01",
cex = 1.5))

How do I plot a legend next to my title (outside plot) using R?

I'm using base R plot(), and I want a legend (a color block and key) to show up above (outside) the top right of my plot next to my title (generated using title()).
What's the best way to do this?
Maybe something like this is what you're looking for:
x <- c(1,2,3,4)
y <- c(4,1,3,2)
z <- c(1,2,3,4)
dat <- data.frame(x,y,z)
windows(width = 5, height = 9) #quartz() on Mac
layout(matrix(c(1,2), 2, 1, byrow = TRUE), heights=c(0.5,1))
par(oma = c(4,3,0,0) + 0.1, mar = c(0,0,1,1) + 0.1)
plot(dat$x, y=rep(1,4), type = "n", axes = F, ylab = "", xlab = "")
legend(x = "bottomright", legend = c("y", "z"), fill = c("blue", "red"))
plot(dat$x, dat$y, type = "n", main = "PLOT")
lines(z, col = "red")
lines(y, col = "blue")
Basically this makes two plots, one is just invisible and shortened so all that's displayed is the legend.
You may be able to addtionally tweak the margins around the legend and other graphical parameters (?par) to get the layout better.

Resources