Related
By using ggplot and faced_grid functions I'm trying to make a heatmap. I have a categorical y axis, and I want y axis labels to be left aligned. When I use theme(axis.text.y.left = element_text(hjust = 0)), each panels' labels are aligned independently. Here is the code:
#data
set.seed(1)
gruplar <- NA
for(i in 1:20) gruplar[i] <- paste(LETTERS[sample(c(1:20),sample(c(1:20),1),replace = T) ],
sep="",collapse = "")
gruplar <- cbind(gruplar,anagruplar=rep(1:4,each=5))
tarih <- data.frame(yil= rep(2014:2019,each=12) ,ay =rep_len(1:12, length.out = 72))
gruplar <- gruplar[rep(1:nrow(gruplar),each=nrow(tarih)),]
tarih <- tarih[rep_len(1:nrow(tarih),length.out = nrow(gruplar)),]
grouped <- cbind(tarih,gruplar)
grouped$value <- rnorm(nrow(grouped))
#plot
p <- ggplot(grouped,aes(ay,gruplar,fill=value))
p <- p + facet_grid(anagruplar~yil,scales = "free",
space = "free",switch = "y")
p <- p + theme_minimal(base_size = 14) +labs(x="",y="") +
theme(strip.placement = "outside",
strip.text.y = element_text(angle = 90))
p <- p + geom_raster(aes(fill = value), na.rm = T)
p + theme(axis.text.y.left = element_text(hjust = 0, size=14))
I know that by putting spaces and using a mono-space font I can solve the problem, but I have to use the font 'Calibri Light'.
Digging into grobs isn't my favourite hack, but it can serve its purpose here:
# generate plot
# (I used a smaller base_size because my computer screen is small)
p <- ggplot(grouped,aes(ay,gruplar,fill=value)) +
geom_raster(aes(fill = value),na.rm = T) +
facet_grid(anagruplar~yil,scales = "free",space = "free",switch = "y") +
labs(x="", y="") +
theme_minimal(base_size = 10) +
theme(strip.placement = "outside",
strip.text.y = element_text(angle = 90),
axis.text.y.left = element_text(hjust = 0, size=10))
# examine ggplot object: alignment is off
p
# convert to grob object: alignment is unchanged (i.e. still off)
gp <- ggplotGrob(p)
dev.off(); grid::grid.draw(gp)
# change viewport parameters for left axis grobs
for(i in which(grepl("axis-l", gp$layout$name))){
gp$grobs[[i]]$vp$x <- unit(0, "npc") # originally 1npc
gp$grobs[[i]]$vp$valid.just <- c(0, 0.5) # originally c(1, 0.5)
}
# re-examine grob object: alignment has been corrected
dev.off(); grid::grid.draw(gp)
I guess one option is to draw the labels on the right-hand side, and move that column in the gtable,
p <-ggplot(grouped,aes(ay,gruplar,fill=value)) +
facet_grid(anagruplar~yil,scales = "free",space = "free",switch = "y") +
geom_raster(aes(fill = value),na.rm = T) +
theme_minimal(base_size = 12) + labs(x="",y="") +
scale_y_discrete(position='right') +
theme(strip.placement = "outside", strip.text.y = element_text(angle = 90))+
theme(axis.text.y.left = element_text(hjust = 0,size=14))
g <- ggplotGrob(p)
id1 <- unique(g$layout[grepl("axis-l", g$layout$name),"l"])
id2 <- unique(g$layout[grepl("axis-r", g$layout$name),"l"])
g2 <- gridExtra::gtable_cbind(g[,seq(1,id1-1)],g[,id2], g[,seq(id1+1, id2-1)], g[,seq(id2+1, ncol(g))])
library(grid)
grid.newpage()
grid.draw(g2)
This seems like a bug in ggplot2, or at least what I consider an undesirable / unexpected behavior. You may have seen the approach suggested here, which uses string padding on a mono-space font to achieve the alignment.
This is pretty hacky, but if you need to achieve alignment using a particular font, you might replace the axis labels altogether with geom_text. I have a mostly-working solution, but it is ugly, in that each step seems to break something else!
library(ggplot2); library(dplyr)
# To add a blank facet before 2014, I convert to character
grouped$yil = as.character(grouped$yil)
# I add some rows for the dummy facet, in year "", to use for labels
grouped <- grouped %>%
bind_rows(grouped %>%
group_by(gruplar) %>%
slice(1) %>%
mutate(yil = "",
value = NA_real_) %>%
ungroup())
p <- ggplot(grouped,
aes(ay,gruplar,fill=value)) +
geom_raster(aes(fill = value),na.rm = T) +
scale_x_continuous(breaks = 4*0:3) +
facet_grid(anagruplar~yil,
scales = "free",space = "free",switch = "y") +
theme_minimal(base_size = 14) +
labs(x="",y="") +
theme(strip.placement = "outside",
strip.text.y = element_text(angle = 90),
axis.text.y.left = element_blank(),
panel.grid = element_blank()) +
geom_text(data = grouped %>%
filter(yil == ""),
aes(x = -40, y = gruplar, label = gruplar), hjust = 0) +
scale_fill_continuous(na.value = "white")
p
(The last problem with this plot that I can see is that it shows an orphaned "0" on the x axis of the dummy facet. Need another hack to get rid of that!)
I have data which I want to plot as a stacked area plot. On the x-axis I have data which is continuous and on the y axis I have continuous data which I prepare to be cumulative. This is the code I am using with some dummy data:
library(data.table)
library(ggplot2)
set.seed(1)
dt <- data.table(var=sample(1:6,1000,replace=TRUE),xdata=runif(1000),ydata=runif(1000))
setorder(dt, var, xdata)
dt$cumydata <- dt[,
cumsum(ydata),
by = .(var)]$V1/sum(dt$ydata)
ggplot(dt, aes(x = xdata, y = cumydata, fill = as.factor(var))) +
geom_area(position = "stack")
Here is the output plot:
My issue is, that the data does not stack correctly. I guess this could be because of the continuity of the data?
For a stacked area chart the x values as well as the number of occurences must be the same. So changing your sample data to this will give you the expected output:
set.seed(1)
dt <- data.table(ydata=runif(1002))
dt$var <- rep(1:6, each=167)
dt$xdata <- rep(runif(167), 6)
setorder(dt, var, xdata)
dt$cumydata <- dt[,
cumsum(ydata),
by = .(var)]$V1/sum(dt$ydata)
ggplot(dt,aes(x = xdata, y = cumydata, fill = as.factor(var))) +
geom_area(position = "stack")
So this is finally how I solved it, based on Jimbou's information. It is just a bit of preprocessing. I also made the whole thing logarithmic.
library(data.table)
library(ggplot2)
set.seed(1)
dtt <- data.table(var=sample(1:6,1000,replace=TRUE),xdata=runif(1000),ydata=runif(1000))
setorder(dtt, var, xdata)
log.min.xdata <- log(min(dtt$xdata))
log.max.xdata <- log(max(dtt$xdata))
nbreaks <- 101
temp <- hist(log(dtt$xdata[dtt$var==1]),
breaks = seq(log.min.xdata, log.max.xdata, length=nbreaks),
plot = FALSE)
dt <- data.table(var = unlist(lapply(sort(unique(dtt$var)),
function(x){rep(x,nbreaks-1)})),
bin = rep(1:(nbreaks-1),length(unique(dtt$var))),
mid = rep(temp$mids))
dt$count <- dt[,
hist(log(dtt$xdata[dtt$var==var]),
breaks = seq(log.min.xdata, log.max.xdata, length=nbreaks),
plot = FALSE)$counts,
by = .(var)]$V1
dt$cumcount <- dt[,
cumsum(count),
by = .(var)]$V1
pp <- ggplot(dt, aes(x = exp(mid), y = cumcount, fill = as.factor(var))) +
geom_area(position = "stack") +
scale_x_log10() +
theme_bw() +
theme(legend.position = c(0.1, 0.70),
legend.background = element_rect(fill="lightgrey",
size=0.5, linetype="solid")) +
labs(title = "y",
fill = " var",
x = "xdata",
y = "cumcount") +
theme(title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
legend.title = element_text(face = "bold"),
legend.text = element_text(face = "bold"))
Please help me to add smooth lines(thick black lines shown in the figure) to a R pyramid plot as shown in the attached image. Appreciate your help.This plot shows the population distribution according to the age and gender.
xy.pop<-c(3.2,3.5,3.6,3.6,3.5,3.5,3.9,3.7,3.9,3.5,3.2,2.8,2.2,1.8,1.5,1.3,0.7,0.4)
xx.pop<-c(3.2,3.4,3.5,3.5,3.5,3.7,4,3.8,3.9,3.6,3.2,2.5,2,1.7,1.5,1.3,1,0.8)
agelabels<-c("0-4","5-9","10-14","15-19","20-24","25-29","30-34",
"35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74",
"75-79","80-44","85+")
mcol<-color.gradient(c(0,0,0.5,1),c(0,0,0.5,1),c(1,1,0.5,1),18)
fcol<-color.gradient(c(1,1,0.5,1),c(0.5,0.5,0.5,1),c(0.5,0.5,0.5,1),18)
par(mar=pyramid.plot(xy.pop,xx.pop,labels=agelabels,main="Australian population pyramid 2002",lxcol=mcol,rxcol=fcol,))
How about the following (using ggplot rather than base R graphics).
# Your data
xy.pop<-c(3.2,3.5,3.6,3.6,3.5,3.5,3.9,3.7,3.9,3.5,3.2,2.8,2.2,1.8,1.5,1.3,0.7,0.4)
xx.pop<-c(3.2,3.4,3.5,3.5,3.5,3.7,4,3.8,3.9,3.6,3.2,2.5,2,1.7,1.5,1.3,1,0.8)
agelabels<-c("0-4","5-9","10-14","15-19","20-24","25-29","30-34",
"35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74",
"75-79","80-44","85+")
# Collect data in dataframe
df <- rbind.data.frame(
cbind.data.frame(Percentage = -xy.pop, Group = agelabels, Gender = "male"),
cbind.data.frame(Percentage = +xx.pop, Group = agelabels, Gender = "female"));
# Make sure agelabels have the right order
df$Group <- factor(df$Group, levels = agelabels);
# (gg)plot
gg <- ggplot(
data = df,
aes(x = Group, y = Percentage, fill = Gender, group = Gender));
gg <- gg + geom_bar(data = subset(df, Gender == "female"), stat = "identity");
gg <- gg + geom_bar(data = subset(df, Gender == "male"), stat = "identity");
gg <- gg + coord_flip();
gg <- gg + geom_smooth(
colour = "black", method = "loess", se = FALSE, show.legend = FALSE, size = 0.5);
gg <- gg + labs(
x = "Age",
y = "Percentage",
title = "Australian population pyramid 2012");
gg <- gg + scale_y_continuous(
breaks = seq(-4, 4, by = 2),
labels = c(rev(seq(0, 4, by = 2)), seq(2, 4, by = 2)));
print(gg);
I'm here fitting a LOESS curve separately to both the male and female pyramid halves (through the group aesthetic).
It's not quite the same plot as the one you show, but there is still room for improvement/tweaking. For example, you can change the fill aesthetic to achieve a percentage-dependent fill of the bars.
Credit where credit is due: This solution is based on this post on SO by #DidzisElferts.
Update (nearly a year later)
I've always wanted to review this answer to increase the aesthetic similarity of a ggplot2 solution with the plot generated from plotrix::pyramid.plot. Here is an update that gets pretty close.
# Define function to draw the left/right half of an age pyramid
ggpyramidhalf <- function(df, pos = "left", title) {
gg <- ggplot(df, aes(Group, Percentage, group = Gender)) +
geom_col(aes(fill = Group), colour = "black") +
geom_smooth(
colour = "black",
method = "loess",
se = F,
show.legend = F, size = 0.5) +
theme_minimal() +
labs(y = "%", title = title) +
coord_flip(expand = FALSE) +
theme(
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
if (pos == "left") {
gg <- gg +
ylim(c(min(range(pretty(df$Percentage))), 0)) +
scale_fill_manual(
values = colorRampPalette(c("blue", "white"))(length(agelabels)),
guide = F) +
theme(
plot.title = element_text(hjust = 1),
axis.text.y = element_blank())
} else {
gg <- gg +
ylim(c(0, max(range(pretty(df$Percentage))))) +
scale_fill_manual(
values = colorRampPalette(c("red", "white"))(length(agelabels)),
guide = F) +
theme(
plot.title = element_text(hjust = 0),
axis.title.y = element_blank(),
axis.text.y = element_text(hjust = 0.5, margin = margin(r = 10)))
}
gg
}
# Draw left (male) half of age pyramid
gg1 <- df %>%
filter(Gender == "male") %>%
mutate(Group = factor(Group, agelabels)) %>%
ggpyramidhalf(pos = "left", title = "Male")
# Draw right (female) half of age pyramid
gg2 <- df %>%
filter(Gender == "female") %>%
mutate(Group = factor(Group, agelabels)) %>%
ggpyramidhalf(pos = "right", title = "Female")
# Use gridExtra to draw both halfs in one plot
library(gridExtra)
library(grid)
grid.arrange(
gg1, gg2,
ncol = 2,
widths = c(1, 1.15),
top = textGrob("Australian population period 2002", gp = gpar(font = 2)))
Here is a solution using the pyramid.plot function of plotrix:
library(plotrix)
pyramid.plot(xy.pop,xx.pop,labels=agelabels,
main="Australian population pyramid 2002",lxcol=mcol,rxcol=fcol)
male.smline <- loess.smooth(x=1:18, y=xy.pop, degree=2)
lines(-1-male.smline$y, male.smline$x, col="red", lwd=3)
female.smline <- loess.smooth(x=1:18, y=xx.pop, degree=2)
lines(1+female.smline$y, female.smline$x, col="black", lwd=3)
I need a flexible way to make radar / spider charts in ggplot2. From solutions I've found on github and the ggplot2 group, I've come this far:
library(ggplot2)
# Define a new coordinate system
coord_radar <- function(...) {
structure(coord_polar(...), class = c("radar", "polar", "coord"))
}
is.linear.radar <- function(coord) TRUE
# rescale all variables to lie between 0 and 1
scaled <- as.data.frame(lapply(mtcars, ggplot2:::rescale01))
scaled$model <- rownames(mtcars) # add model names as a variable
as.data.frame(melt(scaled,id.vars="model")) -> mtcarsm
ggplot(mtcarsm, aes(x = variable, y = value)) +
geom_path(aes(group = model)) +
coord_radar() + facet_wrap(~ model,ncol=4) +
theme(strip.text.x = element_text(size = rel(0.8)),
axis.text.x = element_text(size = rel(0.8)))
which works, except for the fact that lines are not closed.
I thougth that I would be able to do this:
mtcarsm <- rbind(mtcarsm,subset(mtcarsm,variable == names(scaled)[1]))
ggplot(mtcarsm, aes(x = variable, y = value)) +
geom_path(aes(group = model)) +
coord_radar() + facet_wrap(~ model,ncol=4) +
theme(strip.text.x = element_text(size = rel(0.8)),
axis.text.x = element_text(size = rel(0.8)))
in order to join the lines, but this does not work. Neither does this:
closes <- subset(mtcarsm,variable == names(scaled)[c(1,11)])
ggplot(mtcarsm, aes(x = variable, y = value)) +
geom_path(aes(group = model)) +
coord_radar() + facet_wrap(~ model,ncol=4) +
theme(strip.text.x = element_text(size = rel(0.8)),
axis.text.x = element_text(size = rel(0.8))) + geom_path(data=closes)
which does not solve the problem, and also produces lots of
"geom_path: Each group consist of only one observation. Do you need to
adjust the group aesthetic?"
messages. Som, how do I go about closing the lines?
/Fredrik
Using the new ggproto mechanism available in ggplot2 2.0.0, coord_radar can be defined as:
coord_radar <- function (theta = "x", start = 0, direction = 1)
{
theta <- match.arg(theta, c("x", "y"))
r <- if (theta == "x")
"y"
else "x"
ggproto("CoordRadar", CoordPolar, theta = theta, r = r, start = start,
direction = sign(direction),
is_linear = function(coord) TRUE)
}
Not sure if the syntax is perfect but it is working...
The codes here seem outdated for ggplot2: 2.0.0
Try my package zmisc: devtools:install_github("jerryzhujian9/ezmisc")
After you install it, you will be able to run:
df = mtcars
df$model = rownames(mtcars)
ez.radarmap(df, "model", stats="mean", lwd=1, angle=0, fontsize=0.6, facet=T, facetfontsize=1, color=id, linetype=NULL)
ez.radarmap(df, "model", stats="none", lwd=1, angle=0, fontsize=1.5, facet=F, facetfontsize=1, color=id, linetype=NULL)
if you are curious about what's inside, see my codes at github:
The main codes were adapted from http://www.cmap.polytechnique.fr/~lepennec/R/Radar/RadarAndParallelPlots.html
solution key factor
add duplicated mpg row after melt by rbind
inherit CoordPolar on ggproto
set is_linear = function() TRUE on ggproto
especially is_linear = function() TRUE is important,
since if not you will get plot like this...
with is_linear = function() TRUE settings you can get,
library(dplyr)
library(data.table)
library(ggplot2)
rm(list=ls())
scale_zero_to_one <-
function(x) {
r <- range(x, na.rm = TRUE)
min <- r[1]
max <- r[2]
(x - min) / (max - min)
}
scaled.data <-
mtcars %>%
lapply(scale_zero_to_one) %>%
as.data.frame %>%
mutate(car.name=rownames(mtcars))
plot.data <-
scaled.data %>%
melt(id.vars='car.name') %>%
rbind(subset(., variable == names(scaled.data)[1]))
# create new coord : inherit coord_polar
coord_radar <-
function(theta='x', start=0, direction=1){
# input parameter sanity check
match.arg(theta, c('x','y'))
ggproto(
NULL, CoordPolar,
theta=theta, r=ifelse(theta=='x','y','x'),
start=start, direction=sign(direction),
is_linear=function() TRUE)
}
plot.data %>%
ggplot(aes(x=variable, y=value, group=car.name, colour=car.name)) +
geom_path() +
geom_point(size=rel(0.9)) +
coord_radar() +
facet_wrap(~ car.name, nrow=4) +
theme_bw() +
theme(
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.x = element_blank(),
legend.position = 'none') +
labs(title = "Cars' Status")
final result
Sorry, I was beeing stupid. This seems to work:
library(ggplot2)
# Define a new coordinate system
coord_radar <- function(...) {
structure(coord_polar(...), class = c("radar", "polar", "coord"))
}
is.linear.radar <- function(coord) TRUE
# rescale all variables to lie between 0 and 1
scaled <- as.data.frame(lapply(mtcars, ggplot2:::rescale01))
scaled$model <- rownames(mtcars) # add model names as a variable
as.data.frame(melt(scaled,id.vars="model")) -> mtcarsm
mtcarsm <- rbind(mtcarsm,subset(mtcarsm,variable == names(scaled)[1]))
ggplot(mtcarsm, aes(x = variable, y = value)) +
geom_path(aes(group = model)) +
coord_radar() + facet_wrap(~ model,ncol=4) +
theme(strip.text.x = element_text(size = rel(0.8)),
axis.text.x = element_text(size = rel(0.8)))
It turns out than geom_polygom still produces a polygon in the polar coordinates so that
# rescale all variables to lie between 0 and 1
scaled <- as.data.frame(lapply(mtcars, ggplot2:::rescale01))
scaled$model <- rownames(mtcars) # add model names as a variable
# melt the dataframe
mtcarsm <- reshape2::melt(scaled)
# plot it as using the polygon geometry in the polar coordinates
ggplot(mtcarsm, aes(x = variable, y = value)) +
geom_polygon(aes(group = model), color = "black", fill = NA, size = 1) +
coord_polar() + facet_wrap( ~ model) +
theme(strip.text.x = element_text(size = rel(0.8)),
axis.text.x = element_text(size = rel(0.8)),
axis.ticks.y = element_blank(),
axis.text.y = element_blank()) +
xlab("") + ylab("")
works perfectly...
Thank you guys for the help but it did not cover all of my needs. I used two series of data to be compared so I took the subset of mtcars for Mazda:
nobody mentioned about order of the x variable and ggplot2 sorts this variable for the plot but does not sort the data and it made my chart wrong at the first attempt. Apply sorting function for me it was dplyr::arrange(plot.data, x.variable.name)
I needed to annotate the chart with values and ggplot2::annotate() works fine but it was not included in the recent answers
the above code did not work fine for my data until adding ggplot2::geom_line
Finally this code chunk did my chart:
scaled <- as.data.frame(lapply(mtcars, ggplot2:::rescale01))
scaled$model <- rownames(mtcars)
mtcarsm <- scaled %>%
filter(grepl('Mazda', model)) %>%
gather(variable, value, mpg:carb) %>%
arrange(variable)
ggplot(mtcarsm, aes(x = variable, y = value)) +
geom_polygon(aes(group = model, color = model), fill = NA, size = 1) +
geom_line(aes(group = model, color = model), size = 1) +
annotate("text", x = mtcarsm$variable, y = (mtcarsm$value + 0.05), label = round(mtcarsm$value, 2), size = 3) +
theme(strip.text.x = element_text(size = rel(0.8)),
axis.text.x = element_text(size = rel(1.2)),
axis.ticks.y = element_blank(),
axis.text.y = element_blank()) +
xlab("") + ylab("") +
guides(color = guide_legend()) +
coord_radar()
Hopefully usefull for somebody
I have a 3 rows by 5 columns facet plot. Each row show data which spread over different ranges. To properly display my data so everything is shown, I don't set a y axis limit.
Here's my code:
require(reshape2)
library(ggplot2)
library(RColorBrewer)
fileName = paste("./data_test.csv", sep = "")
## data available here: https://dl.dropboxusercontent.com/u/73950/data_test.csv
mydata = read.csv(fileName,sep=",", header=TRUE)
dataM = melt(mydata,c("id"))
dataM = cbind(dataM,
colsplit(dataM$variable,
pattern = "_",
names = c("Network_model", "order", "category")))
dataM$variable <- NULL
dataM <- dcast(dataM, ... ~ category, value.var = "value")
dataM$minCut <- NULL
dataM$nbr_communities <- NULL
dataM$mean_community_size <- NULL
dataM$density <- NULL
my_palette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
dataM = melt(dataM, id.vars = c("Network_model", "order", "nodesRemoved", "id"))
my_palette = c(brewer.pal(5, "Blues")[c(4)], brewer.pal(5, "Set1")[c(3)])
ggplot(dataM, aes(x= nodesRemoved ,y= value, group= order, color= order)) +
geom_point(size = .6,alpha = .15,position="jitter") + ## increased size
stat_smooth(se = FALSE, size = .5, alpha = .1, method = "loess") +
scale_color_manual(values=my_palette) +
theme_bw() +
theme(plot.background = element_blank(),
axis.line = element_blank(),
legend.key = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8)
) +
scale_y_continuous("Value") +
scale_x_continuous("Time", limits=c(0, 100)) +
facet_grid(variable ~ Network_model,scales="free")
Which produces this:
Now, I'd like to selectively set limits for each of the three rows, so that the first row is limits=c(1.9, 3), the second is limits=c(0, 1) and the third is limits=c(.3, .7).
How can I achieve this in ggplot2 of faceting?
I think your best option will be to trim the data before plotting it, e.g. with dplyr,
library(dplyr)
limits <- data.frame(variable = levels(dataM$variable),
min = c(1.9,0,0.3),
max = c(3,1,0.7))
dataC <- inner_join(dataM, limits) %>% filter(value > min, value < max)
last_plot() %+% dataC
(I initially made the points bigger to see the culprits more clearly)