Say I created a heatmap using the function geom_raster() (from ggplot2).
What's a smart way to add a row at the bottom of the table showing (in my case) the 'Mean return' for each month on the period considered ?
It would be nice there is some space left between the 1985-2013 period and the row for the average, and maybe police color and 'cases' could be customized.
The core of my code is as follows (the object molten contains the my data, originally a matrix passed through the melt() function of reshape2.
hm <- ggplot(data = molten, aes(x = factor(Var2, levels = month.abb), y=Var1, fillll=value)) + geom_raster()
hm <- hm + scale_fill_gradient2(low=LtoM(100), mid=Mid, high=MtoH(100))
hm <- hm + labs(fill='% Return')
hm <- hm + geom_text(aes(label=paste(sprintf("%.1f %%", value))), size = 4)
hm <- hm + scale_y_continuous(breaks = 1985:2013)
hm <- hm + xlab(label = NULL) + ylab(label = NULL)
hm <- hm + theme_bw()
hm <- hm + theme(axis.text.x = element_text(size = 10, hjust = 0, vjust = 0.4, angle=90))
It's not very concise, but I think this should do what you need.
You didn't provide a data set, so I just made some up. Also, the LtoM and MtoH functions are not included in any R package I could find, so I did a quick Google search and found them here
The following code produces a plot hm2 with facets to make the "Mean Return" row at the bottom:
require(reshape2)
require(ggplot2)
# Random data
set.seed(100)
casted = data.frame(Var1 = rep(1985:2013, times=12), Var2 = rep(month.abb, each=29), return = rnorm(12*29, 0, 9))
molten = melt(casted, id.vars = c("Var1", "Var2"))
LtoM <-colorRampPalette(c('red', 'yellow' ))
Mid <- "snow3"
MtoH <-colorRampPalette(c('lightgreen', 'darkgreen'))
# Averages
monthly.avg = cbind(Var1 = rep("Mean", 12), dcast(molten, Var2 ~ ., mean))
colnames(monthly.avg)[3] = "Mean"
molten2 = merge(molten, melt(monthly.avg), all.x = TRUE, all.y = TRUE)
# New plot
hm2 =
ggplot(data = molten2, aes(x = factor(Var2, levels = month.abb), y=Var1, fill=value)) +
geom_raster() +
scale_fill_gradient2(low=LtoM(100), mid=Mid, high=MtoH(100)) +
labs(fill='% Return') +
geom_text(aes(label=paste(sprintf("%.1f %%", value))), size = 4) +
xlab(label = NULL) + ylab(label = NULL) +
theme_bw() +
theme(axis.text.x = element_text(size = 10, hjust = 0, vjust = 0.4, angle=90)) +
facet_grid(variable ~ ., scales = "free_y", space = "free_y") + # grid layout
theme(strip.background = element_rect(colour = 'NA', fill = 'NA'), strip.text.y = element_text(colour = 'white')) # remove facet labels
which gives the following plot:
How about this:
I created a grid to mock up your data
Main changes, are to precalculate the aggregate and "spacer" data rows, and add to molten,
then add scale_y_discrete so you can label the rows,
then make sure the format works for the grey spacer bar with no % label (comments in code)
Easier in future if you include the data (or a sample) in the question
require(ggplot2)
molten<-expand.grid(c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"),1985:2013,0)
colnames(molten)<-c("Var2","Var1","value")
molten$value=(runif(nrow(molten))*60)-30
#create means
means<-aggregate(molten[,c(1,3)], by=list(molten$Var2),FUN=mean, na.rm=TRUE)
colnames(means)<-c("Var2","Var1","value")
means$Var1<-"MEANS"
#create spacer bar
spacer<-means
spacer$Var1<-" "
spacer$value<-NA
#append them to the data
molten<-rbind(molten,spacer,means)
hm <- ggplot(data = molten, aes(x = Var2, y=Var1, fill=value)) +
geom_raster() +
# replaced your functions for ease of use
scale_fill_gradient2(low="red", mid="yellow", high="green",na.value="grey") +
labs(fill='% Return') +
# don't format the NA vals with %, return blank
geom_text(aes(label=ifelse((is.na(value)),"",paste(sprintf("%.1f %%", value)))), size = 4) +
# make the scale discrete to add labels and enforce order (use a blank space for the spacer)
scale_y_discrete(limits = c("MEANS"," ",1985:2013)) +
xlab(label = NULL) + ylab(label = NULL) +
theme_bw() +
theme(axis.text.x = element_text(size = 10, hjust = 0, vjust = 0.4, angle=90))
hm
Related
I would like to plot stacked barplot with added line plot that presents the overall set sizes. I'm plotting stacked barplot in ggplot2 without problems however additional line with different y axis is the difficulty. I'm using long-formated table as input, so there is no 'overall size' column.
Code to reproduce sample table:
df <- data.frame(Sample=c("S1","S2","S3","S4","S5","S6"), A=c(30,52,50,81,23,48), B=c(12,20,15,22,30,14), C=c(rep(15,6)))
df.melt <- melt(setDT(df), id.vars = "Sample", variable.name = "Group")
Head of the table:
Sample Group value
1: S1 A 30
2: S2 A 52
3: S3 A 50
4: S4 A 81
5: S5 A 23
6: S6 A 48
Code to draw stacked barplot:
ggplot(df.melt, aes(x = Sample, y = value, fill = Group)) +
geom_col(position = position_fill(reverse = TRUE)) +
theme(axis.text.x=element_text(angle=45, hjust=1), legend.title=element_blank()) +
scale_fill_brewer(palette="Set3") +
ylab("% of Total") +
scale_y_continuous(labels = percent) +
scale_x_discrete(limits = unique(df.melt$Sample))
Therefore the line would run through six stacked bars pointing the size of each set i.e. for sample S1 it would be 57 (A + B + C), and y axis labels to the right of the plot would show set size range.
You can put the data set directly in the geom. This allows you to use different data sets for each geom. Secondary axis are a bit tricky. They need to be a function of the primary axis and the data adjusted accordingly. I've used 120 as the adjustment factor.
percent <- c("0%", "25%", "50%", "75%", "100%")
set_sizes <- df %>%
rowwise %>%
mutate(Size = sum(A, B, C))
ggplot() +
geom_col(df.melt, mapping = aes(x = Sample, y = value, fill = Group),position = position_fill(reverse = TRUE)) +
geom_line(set_sizes, mapping = aes(x = Sample, y = Size / 120, group = 1)) +
scale_y_continuous(name = "% of Total", labels = percent, sec.axis = sec_axis(~ .*120, name = "Sample Size")) +
theme(axis.text.x=element_text(angle=45, hjust=1), legend.title=element_blank()) +
scale_fill_brewer(palette="Set3") +
scale_x_discrete(limits = unique(df.melt$Sample))
Alternatively, you can use cowplot to arrange two independent plots on top of each other, e.g.:
suppressMessages(invisible(lapply(c("data.table", "ggplot2", "cowplot"),
require, character.only=TRUE)))
df <- data.table(Sample=c("S1","S2","S3","S4","S5","S6"),
A=c(30,52,50,81,23,48), B=c(12,20,15,22,30,14), C=c(rep(15,6)))
df.melt <- melt(df, id.vars = "Sample", variable.name = "Group")
percent <- paste0(sprintf("%s", seq(0, 100, 25)), "%")
p1 <- ggplot(df.melt, aes(x = Sample, y = value, fill = Group)) +
geom_col(position = position_fill(reverse = TRUE)) +
theme(axis.text.x=element_text(angle=45, hjust=1), legend.title=element_blank()) +
scale_fill_brewer(palette="Set3") +
ylab("% of Total") +
scale_y_continuous(labels = percent) +
scale_x_discrete(limits = unique(df.melt$Sample))
p2 <- ggplot(df.melt[, .(value=sum(value)), by="Sample"],
aes(x = Sample, y = value, group=1)) +
geom_line() +
scale_x_discrete(labels = NULL, breaks = NULL) +
labs(x = NULL)
plot_grid(p2, NULL, p1, align="hv", nrow=3, axis='tlbr', rel_heights=c(1, -.28, 4), greedy=FALSE)
Created on 2022-02-20 by the reprex package (v2.0.1)
I have this two data.frames
df1 <-
data.frame(unit = factor(1:20, levels = 20:1),
value = sample(1:10, 20, replace = T))
df2 <-
data.frame(unit =
factor(as.vector(sapply(1:20, FUN = function(x) rep(x, 10))).
levels = 1:20),
time = rep(1:10, 20),
value = sample(1:100, 10*20, replace = T))
Which I want to plot side by side like this:
library(ggplot2)
library(cowplot)
plot_grid(ggplot(df1, aes(x=value,y=unit)) +
geom_bar(stat = 'identity') +
scale_x_continuous(position = "top"),
ggplot(df2, aes(x=time,y=value)) +
geom_line() +
facet_grid(rows = vars(unit), scales = "free_y") +
scale_x_continuous(position = "top") +
theme(axis.text.y = element_text(size=6)),
ncol = 2)
which results in this output
Still, the rows from the two plots, mapping variables from the same unit are not perfectly aligned:
What's the easiest way to align them programmatically (so that it will also work with a different number of units)? The solution doesn't need to involve the cowplot package.
A simple solution to achieve this is by using facets for the bar plot, too. As long as the spacing between the panels is the same in both plots this should ensure that the bars and the line plots for each group are aligned. Try this:
df1 <-
data.frame(unit = factor(1:20, levels = 20:1),
value = sample(1:10, 20, replace = T))
df2 <-
data.frame(unit = factor(as.vector(sapply(1:20, FUN = function(x) rep(x, 10))), levels = 1:20),
time = rep(1:10, 20),
value = sample(1:100, 10*20, replace = T))
library(ggplot2)
library(cowplot)
plot_grid(ggplot(df1, aes(x=value,y=unit)) +
geom_bar(stat = 'identity') +
facet_grid(rows = vars(unit), scales = "free_y") +
scale_x_continuous(position = "top") +
theme(panel.spacing.y = unit(1, "pt"), strip.text = element_blank()),
ggplot(df2, aes(x=time,y=value)) +
geom_line() +
facet_grid(rows = vars(unit), scales = "free_y") +
scale_x_continuous(position = "top") +
theme(axis.text.y = element_text(size=6), panel.spacing.y = unit(1, "pt")),
ncol = 2)
If we look at the way the plots are aligned, it seems clear that to have the bars matching the corresponding facets, we have to get rid of the space at either end of the bars' y axis. We can do this with scale_y_discrete(expand = c(0, 0)). We can also scale the width of the bars so that it is equal to the proportion that each of the facet panels takes up in their allotted viewports. Unfortunately this is somewhat dependent on device dimensions. However, a width of 0.8 or 0.9 will get you pretty close.
plot_grid(ggplot(df1, aes(x=value,y=unit)) +
geom_bar(stat = 'identity', width = 0.8) +
scale_x_continuous(position = "top") +
scale_y_discrete(expand = c(0, 0)),
ggplot(df2, aes(x=time,y=value)) +
geom_line() +
facet_grid(rows = vars(unit), scales = "free_y") +
scale_x_continuous(position = "top") +
theme(axis.text.y = element_text(size=6)),
ncol = 2)
I am using the windrose function posted here: Wind rose with ggplot (R)?
I need to have the percents on the figure showing on the individual lines (rather than on the left side), but so far I have not been able to figure out how. (see figure below for depiction of goal)
Here is the code that makes the figure:
p.windrose <- ggplot(data = data,
aes(x = dir.binned,y = (..count..)/sum(..count..),
fill = spd.binned)) +
geom_bar()+
scale_y_continuous(breaks = ybreaks.prct,labels=percent)+
ylab("")+
scale_x_discrete(drop = FALSE,
labels = waiver()) +
xlab("")+
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE)+
theme_bw(base_size = 12, base_family = "Helvetica")
I marked up the figure I have so far with what I am trying to do! It'd be neat if the labels either auto-picked the location with the least wind in that direction, or if it had a tag for the placement so that it could be changed.
I tried using geom_text, but I get an error saying that "aesthetics must be valid data columns".
Thanks for your help!
One of the things you could do is to make an extra data.frame that you use for the labels. Since the data isn't available from your question, I'll illustrate with mock data below:
library(ggplot2)
# Mock data
df <- data.frame(
x = 1:360,
y = runif(360, 0, 0.20)
)
labels <- data.frame(
x = 90,
y = scales::extended_breaks()(range(df$y))
)
ggplot(data = df,
aes(x = as.factor(x), y = y)) +
geom_point() +
geom_text(data = labels,
aes(label = scales::percent(y, 1))) +
scale_x_discrete(breaks = seq(0, 1, length.out = 9) * 360) +
coord_polar() +
theme(axis.ticks.y = element_blank(), # Disables default y-axis
axis.text.y = element_blank())
#teunbrand answer got me very close! I wanted to add the code I used to get everything just right in case anyone in the future has a similar problem.
# Create the labels:
x_location <- pi # x location of the labels
# Get the percentage
T_data <- data %>%
dplyr::group_by(dir.binned) %>%
dplyr::summarise(count= n()) %>%
dplyr::mutate(y = count/sum(count))
labels <- data.frame(x = x_location,
y = scales::extended_breaks()(range(T_data$y)))
# Create figure
p.windrose <- ggplot() +
geom_bar(data = data,
aes(x = dir.binned, y = (..count..)/sum(..count..),
fill = spd.binned))+
geom_text(data = labels,
aes(x=x, y=y, label = scales::percent(y, 1))) +
scale_y_continuous(breaks = waiver(),labels=NULL)+
scale_x_discrete(drop = FALSE,
labels = waiver()) +
ylab("")+xlab("")+
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE)+
theme_bw(base_size = 12, base_family = "Helvetica") +
theme(axis.ticks.y = element_blank(), # Disables default y-axis
axis.text.y = element_blank())
I have a dataset that has a wide range of values for one group. Using ggplot's facet_wrap, I would plot the y axis in a log scale for one group (the group that has the widest range of values) and regular axis for the other group.
Below is a reproducible example.
set.seed(123)
FiveLetters <- LETTERS[1:2]
df <- data.frame(MonthlyCount = sample(1:10, 36, replace=TRUE),
CustName = factor(sample(FiveLetters,size=36, replace=TRUE)),
ServiceDate = format(seq(ISOdate(2003,1,1), by='day', length=36),
format='%Y-%m-%d'), stringsAsFactors = F)
df$ServiceDate <- as.Date(df$ServiceDate)
# replace some counts to really high numbers for group A
df$MonthlyCount[df$CustName =="A" & df$MonthlyCount >= 9 ] <-300
df
library(ggplot2)
library(scales)
ggplot(data = df, aes(x = ServiceDate, y = MonthlyCount)) +
geom_point() +
facet_wrap(~ CustName, ncol = 1, scales = "free_y" ) +
scale_x_date("Date",
labels = date_format("%Y-%m-%d"),
breaks = date_breaks("1 week")) +
theme(axis.text.x = element_text(colour = "black",
size = 16,
angle = 90,
vjust = .5))
The resulting graph has two facets. The facet for group A has dots on the top and the bottom on the graph, which are difficult to compared, the facet for B is easier to read. I would like to plot facet for group A in log scale and leave the other "free".
this does the job
ggplot(data = df, aes(x = ServiceDate, y = MonthlyCount)) +
geom_point() +
facet_wrap(~ CustName, ncol = 1, scales = "free_y" ) +
scale_x_date("Date",
labels = date_format("%Y-%m-%d"),
breaks = date_breaks("1 week")) +
scale_y_continuous(trans=log_trans(), breaks=c(1,3,10,50,100,300),
labels = comma_format())+
theme(axis.text.x = element_text(colour = "black",
size = 16,
angle = 90,
vjust = .5))
You can make a transformed monthly count and use that as the y-axis.
## modify monthly count
df$mcount <- with(df, ifelse(CustName == "A", log(MonthlyCount), MonthlyCount))
ggplot(data = df, aes(x = ServiceDate, y = mcount)) +
geom_point() +
facet_wrap(~ CustName, ncol = 1, scales = "free_y" ) +
scale_x_date("Date",
labels = date_format("%Y-%m-%d"),
breaks = date_breaks("1 week")) +
theme(axis.text.x = element_text(colour = "black",
size = 16,
angle = 90,
vjust = .5))
I would like to plot a barplot but I have dates on the x axis and I want those dates to be correctly spaced (as it is NON categorical)
set.seed(1)
m = matrix(abs(rnorm(6)),3,2)
rownames(m) = as.Date(c('2011-01-01','2011-01-03','2011-01-10'))
barplot(t(m),beside=T,col=c('red','blue'),las=2)
On this example I would like 14984 to be offset on the right.
I'd rather a graphics solution but ggplot2 is fine too
Would you mind to use ´ggplot´ instead?
library(ggplot2)
set.seed(1)
df <- data.frame(y=abs(rnorm(6)),
x=rep(as.Date(c('2011-01-01','2011-01-03','2011-01-10')),
times = 2),
g = factor(rep(c(1,2), each = 3)))
ggplot(aes(x=x, y=y, group = g, fill = g), data = df) +
geom_bar(stat = 'identity', position = 'dodge')
You can improve axis formatting with `scale_x_date´
library(scales)
ggplot(aes(x=x, y=y, group = g, fill = g), data = df) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_x_date(breaks = '1 day') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
And customize it to your purpose
ggplot(aes(x=x, y=y, group = g, fill = g), data = df) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_x_date(breaks = '1 day') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
scale_fill_manual('My\nclasses', values = c('1'='red', '2' = 'blue')) +
labs(list(title = 'Barplot\n', x = ('Date'), y = 'Values'))
With graphics, you probably have to prepare the data appropriately (with missing values for dates you don't consider) in order to do this. Then you can use barplot.
# matrix definition
set.seed(1)
m = matrix(abs(rnorm(6)),3,2)
rownames(m) = as.Date(c('2011-01-01','2011-01-03','2011-01-10'))
# get all dates in between
dts <- do.call(":", as.list(range(rownames(m))))
dts <- dts[!dts%in%rownames(m)]
mat <- matrix(NA, nrow=length(dts), ncol=2, dimnames=list(dts, NULL))
# combine with original matrix
m <- rbind(m, mat)
m <- m[order(rownames(m)), ]
which(!is.na(m[,1]))
# plot
barplot(t(m), beside=T, col=c('red','blue'),las=2, axes=FALSE, axisnames=FALSE)
axis(2)
axis(1, at=3*which(!is.na(m[,1]))-1, labels=rownames(m[!is.na(m[,1]),]))