How to visualize the group and subgroup frequency? - r

I have to plot the frequency data using a group variable As and subgroup variable ADs. What is the best way to visualize the frequency ie., pie chart or mosaic? Is there any function available in ggplot2?
df <- data.frame(As=c('GeA','GeA','GeA', 'GA'),
ADs=c('A44','A33','A37','A141'),
freq=c(501,65,50,103))
# As ADs freq
# 1 GeA A44 501
# 2 GeA A33 65
# 3 GeA A37 50
# 4 GA A141 103
Some thoughts are like below:
However, is there any way to differentiate both group and subgroup in a single plot?
Out of the proposed solutions, below two charts looked promising.
Pie Chart & Tile Graph
I have used the following code suggested by M--.
df.2 <- df
df.2$ymax <- with(df.2, ave(freq, As, FUN=cumsum))
df.2$ymin <- lag(df.2$ymax, default = 0)
df.2$ymin <- ifelse(lag(as.character(df.2$As), default = 0) != df.2$As, 0, df.2$ymin)
df.legend <- df.2[with(df.2, order(As)), ]
library(ggplot2)
# Pie Chart
ggplot(df.2) +
geom_rect(aes(fill=As, ymax=ymax, ymin=ymin, xmax=4, xmin=3)) +
geom_rect(aes(fill=ADs, ymax=ymax, ymin=ymin, xmax=3, xmin=0)) +
xlim(c(0, 4)) +
theme(aspect.ratio=1) +
coord_polar(theta="y") +
scale_x_continuous(breaks=c(0,3), labels=c("ADs", "As")) +
annotate("text", x=rep(1.5,4), y=c(50, 350,530,590),
label= as.character(df.legend$ADs)) +
annotate("text", x=rep(3.5,2), y=c(50, 350),
label= as.character(unique(df.legend$As))) +
theme(legend.position="none", axis.title.x=element_blank(),
axis.title.y=element_blank())
# Tile Graph
ggplot(df.2) +
geom_rect(aes(fill=As, ymax=ymax, ymin=ymin, xmax=4, xmin=3)) +
geom_rect(aes(fill=ADs, ymax=ymax, ymin=ymin, xmax=3, xmin=0)) +
xlim(c(0, 4)) + theme(aspect.ratio=1) +
scale_x_continuous(breaks=c(1.5,3.5), labels=c("ADs", "As")) +
annotate("text", x=rep(1.5,4), y=c(50, 350,530,590),
label= paste(as.character(df.legend$ADs), df.legend$freq,sep= " = ")) +
annotate("text", x=rep(3.5,2), y=c(50, 350),
label= as.character(unique(df.legend$As))) +
theme(legend.position="none", axis.title.x=element_blank(),
axis.title.y=element_blank())
However, I didn't get the same output
Pie Chart & Tile Graph
Message: Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
Could you please advise what would be the issue? Is there any difference in the version of the package(s) used?

Stacked Barplot:
You can use stacked barplots:
library(ggplot2)
ggplot(data = df, aes(x = As, y = freq, fill = ADs)) +
geom_bar(stat = "identity")
you can add this and get labels on the plot:
p + geom_text(aes(label = paste(ADs, freq, sep=": ")),
position = position_stack(vjust = 0.5), size = 3) + #subgroups
stat_summary(fun.y = sum, aes(label = ..y.., group = As), geom = "text") + #groups
theme(legend.position="none")
Next two answers are in reference to this post.
Tile Graph:
For this we need to tweak the data:
df.2 <- df
df.2$ymax <- with(df.2, ave(freq, As, FUN=cumsum))
df.2 <- df.2[with(df.2, order(As)), ]
#for some reason lag function does not work properly in R 3.3.3
library(data.table)
setDT(df.2)[, ymin:=c(0,ymax[-.N])]
df.legend <- df.2[with(df.2, order(As)), ]
Then we can use ggplot again:
ggplot(df.2) +
geom_rect(aes(fill=As, ymax=ymax, ymin=ymin, xmax=4, xmin=3)) +
geom_rect(aes(fill=ADs, ymax=ymax, ymin=ymin, xmax=3, xmin=0)) +
xlim(c(0, 4)) + theme(aspect.ratio=1) +
scale_x_continuous(breaks=c(1.5,3.5), labels=c("ADs", "As")) +
annotate("text", x=rep(1.5,4), y=c(50, 350,530,590),
label= paste(as.character(df.legend$ADs), df.legend$freq,sep= " = ")) +
annotate("text", x=rep(3.5,2), y=c(50, 350),
label= as.character(unique(df.legend$As))) +
theme(legend.position="none", axis.title.x=element_blank(),
axis.title.y=element_blank())
Pie Chart:
ggplot(df.2) +
geom_rect(aes(fill=As, ymax=ymax, ymin=ymin, xmax=4, xmin=3)) +
geom_rect(aes(fill=ADs, ymax=ymax, ymin=ymin, xmax=3, xmin=0)) +
xlim(c(0, 4)) +
theme(aspect.ratio=1) +
coord_polar(theta="y") +
scale_x_continuous(breaks=c(0,3), labels=c("ADs", "As")) +
annotate("text", x=rep(1.5,4), y=c(50, 350,530,590),
label= as.character(df.legend$ADs)) +
annotate("text", x=rep(3.5,2), y=c(50, 350),
label= as.character(unique(df.legend$As))) +
theme(legend.position="none", axis.title.x=element_blank(),
axis.title.y=element_blank())

Related

Adding a centred overlaid title to a ggplot2 doughnut graph

I'm hoping to create a ggplot2 title overlaying a doughnut graph, with my reprex adapted this example from https://www.r-graph-gallery.com/128-ring-or-donut-plot.html.
# load library
library(ggplot2)
# Create test data.
data <- data.frame(
category=c("A", "B", "C"),
count=c(10, 60, 30)
)
# Compute percentages
data$fraction <- data$count / sum(data$count)
# Compute the cumulative percentages (top of each rectangle)
data$ymax <- cumsum(data$fraction)
# Compute the bottom of each rectangle
data$ymin <- c(0, head(data$ymax, n=-1))
# Compute label position
data$labelPosition <- (data$ymax + data$ymin) / 2
# Compute a good label
data$label <- paste0(data$count)
# Make the plot
ggplot(data, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=category)) +
geom_rect() +
coord_polar(theta="y") + # Try to remove that to understand how the chart is built initially
xlim(c(2, 4))+ # Try to remove that to see how to make a pie chart
theme_void()+
scale_fill_brewer(palette = 1)+
geom_label( x=3.5, aes(y=labelPosition, label=label), size=6)+
theme(legend.position = "top",
plot.title = element_text(hjust=0.5))+
ggtitle("My title")
This is what I have currently:
And this is what I want:
I haven't been able to find any documentation demonstrating how to do this in ggplot2. Any suggestions are appreciated.
You can add an annotation layer :
library(ggplot2)
ggplot(data, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=category)) +
geom_rect() +
coord_polar(theta="y") +
xlim(c(2, 4))+
theme_void()+
scale_fill_brewer(palette = 1)+
geom_label( x=3.5, aes(y=labelPosition, label=label), size=6)+
theme(legend.position = "top") +
annotate('text', x = 2, y = 0.5, label = 'My title', color = 'blue', size = 5)

How to add $ sign to labels on bar chart in R? [Image & Code Included]

Looking to add a dollar sign on the labels on my x axis and the numbers in the bar chart. Below is my code and the chart.
YTD_bar <-
ggplot(TYSales_LYSales, aes(x=as.character(FSCL_YR), y=SALES)) + geom_bar(stat="identity", fill="orange", color="grey40") + theme_bw() + coord_flip() +
geom_text(aes(x= as.character(FSCL_YR), y=0.01, label= SALES),
hjust=-0.8, vjust=-1, size=3,
colour="black", fontface="bold",
angle=360) + labs(title="D27 2020 YTD Sales v 2019 YTD Sales", x="Fiscal Year",y="Sales") + theme(plot.title=element_text(hjust=0.5))
YTD_bar
The scales package (installed with ggplot2) has the handy dollar and label_dollar() functions for converting the decimal values into currency.
See help to understand the may options available to adjust the formatting.
library(ggplot2)
library(scales)
YTD_bar <-
ggplot(TYSales_LYSales, aes(x=as.character(FSCL_YR), y=SALES)) +
geom_bar(stat="identity", fill="orange", color="grey40") +
theme_bw() + coord_flip() +
geom_text(aes(x= as.character(FSCL_YR), y=0.01, label= dollar(SALES)),
hjust=-0.8, vjust=-1, size=3, colour="black", fontface="bold", angle=360) +
labs(title="D27 2020 YTD Sales v 2019 YTD Sales", x="Fiscal Year",y="Sales") +
theme(plot.title=element_text(hjust=0.5)) +
scale_y_continuous(labels = label_dollar())
YTD_bar
You can do this with liberal application of sprintf("$%0.2f", ...). The %0.2f part tells is to format as floating point numbers, but with two decimal places. You need to do this in two places: (1) within geom_text(), and (2) as part of a call to scale_y_continuous():
YTD_bar <- ggplot(TYSales_LYSales, aes(x=as.character(FSCL_YR), y=SALES)) +
geom_bar(stat="identity", fill="orange", color="grey40") +
theme_bw() +
coord_flip() +
geom_text(aes(x = as.character(FSCL_YR), y=0.01,
label= sprintf("$%0.2f", SALES)),
hjust=-0.8, vjust=-1, size=3,
colour="black", fontface="bold",
angle=360) +
labs(title="D27 2020 YTD Sales v 2019 YTD Sales",
x="Fiscal Year", y="Sales") +
theme(plot.title=element_text(hjust=0.5)) +
scale_y_continuous(labels = function(breaks) sprintf("$%0.2f", breaks))
YTD_bar
Data
TYSales_LYSales <- data.frame(
FSCL_YR = 2019:2020,
SALES = c(61851186, 5511072)
)
You can specify the label yourself and add a dollar sign in front by using paste0() function
df <- data.frame(yr = 2019:2020, sales = c(1234, 5678))
df$text <- paste0("$", df$sales)
ggplot(df, aes(x=as.character(yr), y=sales)) +
geom_col(fill="orange", color="grey40") +
theme_bw() +
coord_flip() +
geom_text(aes(x = as.character(yr), y=0.01, label= text),
hjust=-0.8, vjust=-1, size=3,
colour="black", fontface="bold",
angle=360) +
scale_y_continuous(breaks = 0:3 * 2000,
labels = paste0("$", 0:3 * 2000))

Deleting an entire row of facets of unused factor level combination

I want to remove the 2nd row of facets from my plot below because there is no data for that factor combination.
library(ggplot2)
library(grid)
set.seed(5000)
# generate first df
df1 = data.frame(x=rep(rep(seq(2,8,2),4),6),
y=rep(rep(seq(2,8,2),each=4),6),
v1=c(rep("x1",32),rep("x2",64)),
v2=c(rep("y1",64),rep("y2",32)),
v3=rep(rep(c("t1","t2"),each=16),3),
v4=rbinom(96,1,0.5))
# generate second df
df2 = data.frame(x=runif(20)*10, y=runif(20)*10,
v1=sample(c("x1","x2"),20,T))
# plot
ggplot() +
geom_point(data=df1, aes(x=x, y=y, colour = factor(v4)), shape=15, size=5) +
scale_colour_manual(values = c(NA,"black")) + facet_grid(v1+v2~v3, drop = T) +
geom_point(data=df2, aes(x=x,y=y), shape=23 , colour="black", fill="white", size=4) +
coord_equal(ratio=1) + xlim(0, 10) + ylim(0, 10)
I tried to use the idea from this post..
g=ggplotGrob(y)
pos=which(g$layout$t==5 | g$layout$t==6)
g$layout=g$layout[-c(pos),]
g$grobs=g$grobs[-c(pos)]
grid.newpage()
grid.draw(g)
..but got this.
How do I eliminate the white space? Also, is there a straightforward solution to this, without having to manipulate the grobs, etc?
Just modify the data:
df2 <- rbind(cbind(df2, v2 = "y1"),
cbind(df2, v2 = "y2"))
df2 <- df2[!(df2$v1 == "x1" & df2$v2 == "y2"),]
# plot
ggplot() +
geom_point(data=df1, aes(x=x, y=y, colour = factor(v4)), shape=15, size=5) +
scale_colour_manual(values = c(NA,"black")) + facet_grid(v1+v2~v3, drop = T) +
geom_point(data=df2, aes(x=x,y=y), shape=23 , colour="black", fill="white", size=4) +
coord_equal(ratio=1) + xlim(0, 10) + ylim(0, 10)

ggplot does not show legend in geom_histogram

I have this code
ggplot()
+ geom_histogram(aes(x=V1, y=(..count..)/sum(..count..)), fill="red", alpha=.4, colour="red", data=coding, stat = "bin", binwidth = 30)
+ geom_histogram(aes(x=V1,y=(..count..)/sum(..count..)), fill="blue", alpha=.4, colour="blue", data=lncrna, stat = "bin", binwidth = 30)
+ coord_cartesian(xlim = c(0, 2000))
+ xlab("Size (nt)")
+ ylab("Percentage (%)")
+ geom_vline(data=cdf, aes(xintercept=rating.mean, colour=Labels), linetype="dashed", size=1)
that produces a beautiful histogram without legend:
In every post I visit with the same problem, they say to put color inside aes. nevertheless, this does not give any legend.
I tried:
ggplot() + geom_histogram(aes(x=V1, y=(..count..)/sum(..count..),color="red", fill="red"), fill="red", alpha=.4, colour="red", data=coding, stat = "bin", binwidth = 30)
+ geom_histogram(aes(x=V1,y=(..count..)/sum(..count..), color="blue", fill="blue"), fill="blue", alpha=.4, colour="blue", data=lncrna, stat = "bin", binwidth = 30)
+ coord_cartesian(xlim = c(0, 2000))
+ xlab("Size (nt)")
+ ylab("Percentage (%)")
+ geom_vline(data=cdf, aes(xintercept=rating.mean, colour=Labels), linetype="dashed", size=1)
without success.
How can I put a legend in my graph?
If you don't want to put the data in one data.frame, you can do this:
set.seed(42)
coding <- data.frame(V1=rnorm(1000))
lncrna <- data.frame(V1=rlnorm(1000))
library(ggplot2)
ggplot() +
geom_histogram(aes(x=V1, y=(..count..)/sum(..count..), fill="r", colour="r"), alpha=.4, data=coding, stat = "bin") +
geom_histogram(aes(x=V1,y=(..count..)/sum(..count..), fill="b", colour="b"), alpha=.4, data=lncrna, stat = "bin") +
scale_colour_manual(name="group", values=c("r" = "red", "b"="blue"), labels=c("b"="blue values", "r"="red values")) +
scale_fill_manual(name="group", values=c("r" = "red", "b"="blue"), labels=c("b"="blue values", "r"="red values"))
The problem is that you can't map your color into aes because you've got two separete sets of data. An idea is to bind them, then to apply the "melt" function of package reshape2 so you create a dummy categorical variable that you can pass into aes. the code:
require(reshape2)
df=cbind(blue=mtcars$mpg, red=mtcars$mpg*0.8)
df=melt(df, id.vars=1:2)
ggplot()+geom_histogram(aes(y=(..count..)/sum(..count..),x=value, fill=Var2, color=Var2), alpha=.4, data=df, stat = "bin")
There you've got your legend

Draw lines between two facets in ggplot2

How can I draw several lines between two facets?
I attempted this by plotting points at the min value of the top graph but they are not between the two facets. See picture below.
This is my code so far:
t <- seq(1:1000)
y1 <- rexp(1000)
y2 <- cumsum(y1)
z <- rep(NA, length(t))
z[100:200] <- 1
df <- data.frame(t=t, values=c(y2,y1), type=rep(c("Bytes","Changes"), each=1000))
points <- data.frame(x=c(10:200,300:350), y=min(y2), type=rep("Bytes",242))
vline.data <- data.frame(type = c("Bytes","Bytes","Changes","Changes"), vl=c(1,5,20,5))
g <- ggplot(data=df, aes(x=t, y=values)) +
geom_line(colour=I("black")) +
facet_grid(type ~ ., scales="free") +
scale_y_continuous(trans="log10") +
ylab("Log values") +
theme(axis.text.x = element_text(angle = 90, hjust = 1), panel.margin = unit(0, "lines"))+
geom_point(data=points, aes(x = x, y = y), colour="green")
g
In order to achieve that, you have to set the margins inside the plot to zero. You can do that with expand=c(0,0). The changes I made to your code:
When you use scale_y_continuous, you can define the axis label inside that part and you don't need a seperarate ylab.
Changed colour=I("black") to colour="black" inside geom_line.
Added expand=c(0,0) to scale_x_continuous and scale_y_continuous.
The complete code:
ggplot(data=df, aes(x=t, y=values)) +
geom_line(colour="black") +
geom_point(data=points, aes(x = x, y = y), colour="green") +
facet_grid(type ~ ., scales="free") +
scale_x_continuous("t", expand=c(0,0)) +
scale_y_continuous("Log values", trans="log10", expand=c(0,0)) +
theme(axis.text.x=element_text(angle=90, vjust=0.5), panel.margin=unit(0, "lines"))
which gives:
Adding lines can also be done with geom_segment. Normally the lines (segments) will appear in both facets. If you want them to appear between the two facets, you will have to restrict that in data parameter:
ggplot(data=df, aes(x=t, y=values)) +
geom_line(colour="black") +
geom_segment(data=df[df$type=="Bytes",], aes(x=10, y=0, xend=200, yend=0), colour="green", size=2) +
geom_segment(data=df[df$type=="Bytes",], aes(x=300, y=0, xend=350, yend=0), colour="green", size=1) +
facet_grid(type ~ ., scales="free") +
scale_x_continuous("t", expand=c(0,0)) +
scale_y_continuous("Log values", trans="log10", expand=c(0,0)) +
theme(axis.text.x=element_text(angle=90, vjust=0.5), panel.margin=unit(0, "lines"))
which gives:

Resources