I'm trying to recreate this graph produced by Tableau using ggplot2. I've gotten far enough but I can't seem to figure out how to add color (whose intensity is proportional to the amount of profit).
The dataset is here
Here's the plot I want to replicate
https://www.dropbox.com/s/wcu780m72a85lvi/Screen%20Shot%202014-05-11%20at%209.05.49%20PM.png
Here's my code so far:
ggplot(coffee,aes(x=Product,weight=Sales))
+geom_bar()+facet_grid(Market~Product.Type,scales="free_x",space="free")
+ylab("Sales")+theme(axis.text.x=element_text(angle=90))
Using the aggregate function.
library(ggplot2)
coffee <- read.csv('CoffeeChain.csv')
agg <- aggregate(cbind(Profit, Sales) ~ Product+Market+Product.Type, data=coffee, FUN=sum)
ggplot(agg, aes(x=Product, weight=Sales, fill=Profit), stat="identity") +
geom_bar() +
scale_fill_gradientn(colours=c("#F37767", "#9FC08D", "#6BA862", "#2B893E", "#036227")) +
facet_grid(Market~Product.Type, scales="free_x", space="free") +
ylab("Sales") +
theme(axis.text.x=element_text(angle=90))
Probably not the best way to do it:
require(ggplot2)
aggProfit <- ave(coffee$Profit, coffee$Product.Type, coffee$Product, coffee$Market, FUN=sum)
coffee$Breaks<- cut(aggProfit, c(seq(-8000, 25000, 5000), max(aggSales)), dig.lab = 10)
appcolors <- c("#F37767", "#9FC08D", "#6BA862", "#2B893E", "#036227")
gg <- ggplot(coffee,aes(x=Product,weight=Sales, fill = Breaks))+
geom_bar()+facet_grid(Market~Product.Type,scales="free_x",space="free")+
ylab("Sales")+theme(axis.text.x=element_text(angle=90)) +
scale_fill_manual(values=colorRampPalette(appcolors)( length(levels(coffee$Breaks)) ))
plot(gg)
To get the colors c("#F37767", "#9FC08D", "#6BA862", "#2B893E", "#036227") I used the ColorZilla plugin.
Related
Is there a more efficient way to present these data in ggplot2? Ideally, I would like them both in one plot. I know this can be achieved in python with matlibplot, but I like the visuals of ggplot2 better.
R code used to generate the plots:
#load libraries
library(ggplot2)
library (gridExtra)
library(scales)
#generate some data plot 1
var_iter <- c(seq(0, 4000, 20))
x <- runif(201,0.877813, 2.283210)
var_loss <- c(sort(x, decreasing = TRUE))
rndm1 <- data.frame(var_iter, var_loss)
#generate some data plot 2
var_iter2 <- c(seq(0, 3500, 500))
x2 <- runif(8,0.1821, 0.6675)
var_acc <- c(sort(x2, decreasing = FALSE))
rndm2 <- data.frame(var_iter2, var_acc)
#plot loss
c <- ggplot(data=rndm1, aes(x=var_iter, y=var_loss)) + geom_line(aes(colour="Log Loss")) +
scale_colour_manual(name='', values=c('Log Loss'='#00BFC4')) + #theme_bw() +
xlab("iterations") + ylab("log loss") + theme(legend.position=c(1,1),legend.justification=c(1,1),
legend.direction="horizontal",
legend.box="horizontal",
legend.box.just = c("top"),
legend.background = element_rect(fill=alpha('white', 0.3)))
#plot accuracy
d <- ggplot(data=rndm2, aes(x=var_iter2, y=var_acc)) + geom_line(aes(colour="Accuracy")) +
scale_colour_manual(name='', values=c('Accuracy'='#F8766D')) + #theme_bw() +
xlab("iterations") + ylab("accuracy") + theme(legend.position=c(0.80, 1),legend.justification=c(1,1),
legend.direction="horizontal",
legend.box="horizontal",
legend.box.just = c("top"),
legend.background = element_rect(fill=alpha('white', 0.3)))
grid.arrange(c, d, ncol=2)
You still can use the same concept of adding a layer on another layer.
ggplot(rndm1, aes(x=var_iter)) +
geom_line(aes(y=var_loss, color="var_loss")) +
geom_line(data=rndm2, aes(x=var_iter2, y=var_acc, color="var_acc"))
Or combine two data frame together and create another variable for color.
# Change the column name, so they can combine together
names(rndm1) <- c("x", "y")
names(rndm2) <- c("x", "y")
rndm <- rbind(rndm1, rndm2)
# Create a variable for color
rndm$group <- rep(c("Log Loss", "Accuracy"), c(dim(rndm1)[1], dim(rndm2)[1]))
ggplot(rndm, aes(x=x, y=y, color=group)) + geom_line()
I wanted to suggest the same idea as the JasonWang, but he was faster. I think it is the way to go (hence I upvoted it myself).
ggplot2 doesn't allow two y axis, for a reason: Plot with 2 y axes, one y axis on the left, and another y axis on the right
It is misleading.
But if you still want to do it. You can do it with base plot or dygraphs (for example):
rndm2$var_iter <- rndm2$var_iter2
rndm2$var_iter2 <- NULL
merged.rndm <- merge(rndm1, rndm2, all = TRUE)
dygraph(merged.rndm) %>% dySeries("var_acc", axis = "y2")
But this will give you points for var_acc, as it has a lot less observations.
You could fill it.
merged.rndm1 <- as.data.frame(zoo::na.approx(merged.rndm))
dygraph(merged.rndm1) %>% dySeries("var_acc", axis = "y2")
Note: this has approximated values, which might not be something you want to do.
I have a sample data frame like this:
Measurement <- c("Length","Breadth","Length","Breadth","Height",
"Height","Breadth","Length","Height","Breadth",
"Length","Height","Height","Breadth","Length")
Value <- c(45,43,45,100,62,62,43,74,74,74,12,17,17,44,12)
data <- data.frame(Measurement, Value)
I am trying to visualize this data to see how the values are distributed for each measurement and also if we combine the measurements. I am using a basic plot of histogram to do this but this is not visually appealing
hist(data$Value)
Could some one help me with ggplot2 or other advanced visualization to view this data better and I would like to group by Measurements. I would like to see if density plots can mean something here. Any help would be appreciated.
Here are a couple interesting options:
library(ggplot2)
ggplot(data, aes(factor(Measurement), Value)) + geom_violin(aes(fill = factor(Measurement)))
ggplot(data, aes(Value, colour = Measurement, group = Measurement)) + geom_density(fill=NA)
They produce the following:
Hope this helps!
Here is another possibility using geom_histogram. To get the best looking, most informative histogram, it is important to set the binwidth manually for every new data set.
library(ggplot2)
p = ggplot(data=data, aes(x=Value, fill=Measurement)) +
geom_histogram(binwidth=1, colour="grey40", drop=TRUE) +
facet_grid(Measurement ~ ., margins=TRUE) +
theme_bw()
ggsave("hist.png", plot=p, width=8, height=4, dpi=150)
Not sure if I understood the questions. Do you want to separate the values?
For that, you can do something like this:
ValueLength <- data.frame(Value = Value[which(Measurement == "Length")], Measurement = "Lenghth")
ValueBreadth <- data.frame(Value = Value[which(Measurement == "Breadth")], Measurement = "Breadth")
ValueHeight <- data.frame(Value = Value[which(Measurement == "Height")], Measurement = "Height")
Then you can combine them in one data frame again:
Values <- rbind(ValueLength, ValueBreadth, ValueHeight)
And plot with ggplot:
ggplot(Values, aes(Value, fill = Measurement)) + geom_density(alpha = 0.2)
ggplot
I want to use facets (because I like the way they look for this) to show polynomial fits of increasing degree. It's easy enough to plot them separately as follows:
df <- data.frame(x=rep(1:10,each=10),y=rnorm(100))
ggplot(df,aes(x=x,y=y)) + stat_smooth(method="lm",formula=y~poly(x,2))
ggplot(df,aes(x=x,y=y)) + stat_smooth(method="lm",formula=y~poly(x,3))
ggplot(df,aes(x=x,y=y)) + stat_smooth(method="lm",formula=y~poly(x,4))
I know I can always combine them in some fashion using grobs, but I would like to combine them using facet_grid if possible. Maybe something similar to:
poly2 <- df
poly2$degree <- 2
poly3 <- df
poly3$degree <- 3
poly4 <- df
poly4$degree <- 4
polyn <- rbind(poly2,poly3,poly4)
ggplot(polyn,aes(x=x,y=y)) + stat_smooth(method="lm",formula=y~poly(x,degree)) +
facet_grid(degree~.)
This doesn't work, of course, because the faceting does not work on y~poly(x,degree) so that degree gets pulled from the data. Is there some way to make this work?
You can always predict the points manually and then facet quite easily,
## Data
set.seed(0)
df <- data.frame(x=rep(1:10,each=10),y=rnorm(100))
## Get poly fits
dat <- do.call(rbind, lapply(1:4, function(d)
data.frame(x=(x=runif(1000,0,10)),
y=predict(lm(y ~ poly(x, d), data=df), newdata=data.frame(x=x)),
degree=d)))
ggplot(dat, aes(x, y)) +
geom_point(data=df, aes(x, y), alpha=0.3) +
geom_line(color="steelblue", lwd=1.1) +
facet_grid(~ degree)
To add confidence bands, you can use the option interval='confidence' with predict. You might also be interested in the function ggplot2::fortify to get more fit statistics.
dat <- do.call(rbind, lapply(1:4, function(d) {
x <- seq(0, 10, len=100)
preds <- predict(lm(y ~ poly(x, d), data=df), newdata=data.frame(x=x), interval="confidence")
data.frame(cbind(preds, x=x, degree=d))
}))
ggplot(dat, aes(x, fit)) +
geom_point(data=df, aes(x, y), alpha=0.3) +
geom_line(color="steelblue", lwd=1.1) +
geom_ribbon(aes(x=x, ymin=lwr, ymax=upr), alpha=0.3) +
facet_grid(~ degree)
I have a very ugly solution, in which de plot is faceted and the fits are plotted for the appropriate subsets of the data:
p1 <- ggplot(polyn,aes(x=x,y=y)) + facet_grid(.~degree)
p1 +
stat_smooth(data=polyn[polyn$degree==2,],formula=y~poly(x,2),method="lm") +
stat_smooth(data=polyn[polyn$degree==3,],formula=y~poly(x,3),method="lm") +
stat_smooth(data=polyn[polyn$degree==4,],formula=y~poly(x,4),method="lm")
yields
This question already has answers here:
Align multiple plots in ggplot2 when some have legends and others don't
(6 answers)
Closed 5 years ago.
I'm trying to use ggplot to draw a graph comparing the absolute values of two variables, and also show the ratio between them. Since the ratio is unitless and the values are not, I can't show them on the same y-axis, so I'd like to stack vertically as two separate graphs with aligned x-axes.
Here's what I've got so far:
library(ggplot2)
library(dplyr)
library(gridExtra)
# Prepare some sample data.
results <- data.frame(index=(1:20))
results$control <- 50 * results$index
results$value <- results$index * 50 + 2.5*results$index^2 - results$index^3 / 8
results$ratio <- results$value / results$control
# Plot absolute values
plot_values <- ggplot(results, aes(x=index)) +
geom_point(aes(y=value, color="value")) +
geom_point(aes(y=control, color="control"))
# Plot ratios between values
plot_ratios <- ggplot(results, aes(x=index, y=ratio)) +
geom_point()
# Arrange the two plots above each other
grid.arrange(plot_values, plot_ratios, ncol=1, nrow=2)
The big problem is that the legend on the right of the first plot makes it a different size. A minor problem is that I'd rather not show the x-axis name and tick marks on the top plot, to avoid clutter and make it clear that they share the same axis.
I've looked at this question and its answers:
Align plot areas in ggplot
Unfortunately, neither answer there works well for me. Faceting doesn't seem a good fit, since I want to have completely different y scales for my two graphs. Manipulating the dimensions returned by ggplot_gtable seems more promising, but I don't know how to get around the fact that the two graphs have a different number of cells. Naively copying that code doesn't seem to change the resulting graph dimensions for my case.
Here's another similar question:
The perils of aligning plots in ggplot
The question itself seems to suggest a good option, but rbind.gtable complains if the tables have different numbers of columns, which is the case here due to the legend. Perhaps there's a way to slot in an extra empty column in the second table? Or a way to suppress the legend in the first graph and then re-add it to the combined graph?
Here's a solution that doesn't require explicit use of grid graphics. It uses facets, and hides the legend entry for "ratio" (using a technique from https://stackoverflow.com/a/21802022).
library(reshape2)
results_long <- melt(results, id.vars="index")
results_long$facet <- ifelse(results_long$variable=="ratio", "ratio", "values")
results_long$facet <- factor(results_long$facet, levels=c("values", "ratio"))
ggplot(results_long, aes(x=index, y=value, colour=variable)) +
geom_point() +
facet_grid(facet ~ ., scales="free_y") +
scale_colour_manual(breaks=c("control","value"),
values=c("#1B9E77", "#D95F02", "#7570B3")) +
theme(legend.justification=c(0,1), legend.position=c(0,1)) +
guides(colour=guide_legend(title=NULL)) +
theme(axis.title.y = element_blank())
Try this:
library(ggplot2)
library(gtable)
library(gridExtra)
AlignPlots <- function(...) {
LegendWidth <- function(x) x$grobs[[8]]$grobs[[1]]$widths[[4]]
plots.grobs <- lapply(list(...), ggplotGrob)
max.widths <- do.call(unit.pmax, lapply(plots.grobs, "[[", "widths"))
plots.grobs.eq.widths <- lapply(plots.grobs, function(x) {
x$widths <- max.widths
x
})
legends.widths <- lapply(plots.grobs, LegendWidth)
max.legends.width <- do.call(max, legends.widths)
plots.grobs.eq.widths.aligned <- lapply(plots.grobs.eq.widths, function(x) {
if (is.gtable(x$grobs[[8]])) {
x$grobs[[8]] <- gtable_add_cols(x$grobs[[8]],
unit(abs(diff(c(LegendWidth(x),
max.legends.width))),
"mm"))
}
x
})
plots.grobs.eq.widths.aligned
}
df <- data.frame(x = c(1:5, 1:5),
y = c(1:5, seq.int(5,1)),
type = factor(c(rep_len("t1", 5), rep_len("t2", 5))))
p1.1 <- ggplot(diamonds, aes(clarity, fill = cut)) + geom_bar()
p1.2 <- ggplot(df, aes(x = x, y = y, colour = type)) + geom_line()
plots1 <- AlignPlots(p1.1, p1.2)
do.call(grid.arrange, plots1)
p2.1 <- ggplot(diamonds, aes(clarity, fill = cut)) + geom_bar()
p2.2 <- ggplot(df, aes(x = x, y = y)) + geom_line()
plots2 <- AlignPlots(p2.1, p2.2)
do.call(grid.arrange, plots2)
Produces this:
// Based on multiple baptiste's answers
Encouraged by baptiste's comment, here's what I did in the end:
library(ggplot2)
library(dplyr)
library(gridExtra)
# Prepare some sample data.
results <- data.frame(index=(1:20))
results$control <- 50 * results$index
results$value <- results$index * 50 + 2.5*results$index^2 - results$index^3 / 8
results$ratio <- results$value / results$control
# Plot ratios between values
plot_ratios <- ggplot(results, aes(x=index, y=ratio)) +
geom_point()
# Plot absolute values
remove_x_axis =
theme(
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank())
plot_values <- ggplot(results, aes(x=index)) +
geom_point(aes(y=value, color="value")) +
geom_point(aes(y=control, color="control")) +
remove_x_axis
# Arrange the two plots above each other
grob_ratios <- ggplotGrob(plot_ratios)
grob_values <- ggplotGrob(plot_values)
legend_column <- 5
legend_width <- grob_values$widths[legend_column]
grob_ratios <- gtable_add_cols(grob_ratios, legend_width, legend_column-1)
grob_combined <- gtable:::rbind_gtable(grob_values, grob_ratios, "first")
grob_combined <- gtable_add_rows(
grob_combined,unit(-1.2,"cm"), pos=nrow(grob_values))
grid.draw(grob_combined)
(I later realised I didn't even need to extract the legend width, since the size="first" argument to rbind tells it just to have that one override the other.)
It feels a bit messy, but it is exactly the layout I was hoping for.
An alternative & quite easy solution is as follows:
# loading needed packages
library(ggplot2)
library(dplyr)
library(tidyr)
# Prepare some sample data
results <- data.frame(index=(1:20))
results$control <- 50 * results$index
results$value <- results$index * 50 + 2.5*results$index^2 - results$index^3 / 8
results$ratio <- results$value / results$control
# reshape into long format
long <- results %>%
gather(variable, value, -index) %>%
mutate(facet = ifelse(variable=="ratio", "ratio", "values"))
long$facet <- factor(long$facet, levels=c("values", "ratio"))
# create the plot & remove facet labels with theme() elements
ggplot(long, aes(x=index, y=value, colour=variable)) +
geom_point() +
facet_grid(facet ~ ., scales="free_y") +
scale_colour_manual(breaks=c("control","value"), values=c("green", "red", "blue")) +
theme(axis.title.y=element_blank(), strip.text=element_blank(), strip.background=element_blank())
which gives:
Hi I really have googled this a lot without any joy. Would be happy to get a reference to a website if it exists. I'm struggling to understand the Hadley documentation on polar coordinates and I know that pie/donut charts are considered inherently evil.
That said, what I'm trying to do is
Create a donut/ring chart (so a pie with an empty middle) like the tikz ring chart shown here
Add a second layer circle on top (with alpha=0.5 or so) that shows a second (comparable) variable.
Why? I'm looking to show financial information. The first ring is costs (broken down) and the second is total income. The idea is then to add + facet=period for each review period to show the trend in both revenues and expenses and the growth in both.
Any thoughts would be most appreciated
Note: Completely arbitrarily if an MWE is needed if this was tried with
donut_data=iris[,2:4]
revenue_data=iris[,1]
facet=iris$Species
That would be similar to what I'm trying to do.. Thanks
I don't have a full answer to your question, but I can offer some code that may help get you started making ring plots using ggplot2.
library(ggplot2)
# Create test data.
dat = data.frame(count=c(10, 60, 30), category=c("A", "B", "C"))
# Add addition columns, needed for drawing with geom_rect.
dat$fraction = dat$count / sum(dat$count)
dat = dat[order(dat$fraction), ]
dat$ymax = cumsum(dat$fraction)
dat$ymin = c(0, head(dat$ymax, n=-1))
p1 = ggplot(dat, aes(fill=category, ymax=ymax, ymin=ymin, xmax=4, xmin=3)) +
geom_rect() +
coord_polar(theta="y") +
xlim(c(0, 4)) +
labs(title="Basic ring plot")
p2 = ggplot(dat, aes(fill=category, ymax=ymax, ymin=ymin, xmax=4, xmin=3)) +
geom_rect(colour="grey30") +
coord_polar(theta="y") +
xlim(c(0, 4)) +
theme_bw() +
theme(panel.grid=element_blank()) +
theme(axis.text=element_blank()) +
theme(axis.ticks=element_blank()) +
labs(title="Customized ring plot")
library(gridExtra)
png("ring_plots_1.png", height=4, width=8, units="in", res=120)
grid.arrange(p1, p2, nrow=1)
dev.off()
Thoughts:
You may get more useful answers if you post some well-structured sample data. You have mentioned using some columns from the iris dataset (a good start), but I am unable to see how to use that data to make a ring plot. For example, the ring plot you have linked to shows proportions of several categories, but neither iris[, 2:4] nor iris[, 1] are categorical.
You want to "Add a second layer circle on top": Do you mean to superimpose the second ring directly on top of the first? Or do you want the second ring to be inside or outside of the first? You could add a second internal ring with something like geom_rect(data=dat2, xmax=3, xmin=2, aes(ymax=ymax, ymin=ymin))
If your data.frame has a column named period, you can use facet_wrap(~ period) for facetting.
To use ggplot2 most easily, you will want your data in 'long-form'; melt() from the reshape2 package may be useful for converting the data.
Make some barplots for comparison, even if you decide not to use them. For example, try:
ggplot(dat, aes(x=category, y=count, fill=category)) +
geom_bar(stat="identity")
Just trying to solve question 2 with the same approach from bdemarest's answer. Also using his code as a scaffold. I added some tests to make it more complete but feel free to remove them.
library(broom)
library(tidyverse)
# Create test data.
dat = data.frame(count=c(10,60,20,50),
ring=c("A", "A","B","B"),
category=c("C","D","C","D"))
# compute pvalue
cs.pvalue <- dat %>% spread(value = count,key=category) %>%
ungroup() %>% select(-ring) %>%
chisq.test() %>% tidy()
cs.pvalue <- dat %>% spread(value = count,key=category) %>%
select(-ring) %>%
fisher.test() %>% tidy() %>% full_join(cs.pvalue)
# compute fractions
#dat = dat[order(dat$count), ]
dat %<>% group_by(ring) %>% mutate(fraction = count / sum(count),
ymax = cumsum(fraction),
ymin = c(0,ymax[1:length(ymax)-1]))
# Add x limits
baseNum <- 4
#numCat <- length(unique(dat$ring))
dat$xmax <- as.numeric(dat$ring) + baseNum
dat$xmin = dat$xmax -1
# plot
p2 = ggplot(dat, aes(fill=category,
alpha = ring,
ymax=ymax,
ymin=ymin,
xmax=xmax,
xmin=xmin)) +
geom_rect(colour="grey30") +
coord_polar(theta="y") +
geom_text(inherit.aes = F,
x=c(-1,1),
y=0,
data = cs.pvalue,aes(label = paste(method,
"\n",
format(p.value,
scientific = T,
digits = 2))))+
xlim(c(0, 6)) +
theme_bw() +
theme(panel.grid=element_blank()) +
theme(axis.text=element_blank()) +
theme(axis.ticks=element_blank(),
panel.border = element_blank()) +
labs(title="Customized ring plot") +
scale_fill_brewer(palette = "Set1") +
scale_alpha_discrete(range = c(0.5,0.9))
p2
And the result: