Deleting an entire row of facets of unused factor level combination - r

I want to remove the 2nd row of facets from my plot below because there is no data for that factor combination.
library(ggplot2)
library(grid)
set.seed(5000)
# generate first df
df1 = data.frame(x=rep(rep(seq(2,8,2),4),6),
y=rep(rep(seq(2,8,2),each=4),6),
v1=c(rep("x1",32),rep("x2",64)),
v2=c(rep("y1",64),rep("y2",32)),
v3=rep(rep(c("t1","t2"),each=16),3),
v4=rbinom(96,1,0.5))
# generate second df
df2 = data.frame(x=runif(20)*10, y=runif(20)*10,
v1=sample(c("x1","x2"),20,T))
# plot
ggplot() +
geom_point(data=df1, aes(x=x, y=y, colour = factor(v4)), shape=15, size=5) +
scale_colour_manual(values = c(NA,"black")) + facet_grid(v1+v2~v3, drop = T) +
geom_point(data=df2, aes(x=x,y=y), shape=23 , colour="black", fill="white", size=4) +
coord_equal(ratio=1) + xlim(0, 10) + ylim(0, 10)
I tried to use the idea from this post..
g=ggplotGrob(y)
pos=which(g$layout$t==5 | g$layout$t==6)
g$layout=g$layout[-c(pos),]
g$grobs=g$grobs[-c(pos)]
grid.newpage()
grid.draw(g)
..but got this.
How do I eliminate the white space? Also, is there a straightforward solution to this, without having to manipulate the grobs, etc?

Just modify the data:
df2 <- rbind(cbind(df2, v2 = "y1"),
cbind(df2, v2 = "y2"))
df2 <- df2[!(df2$v1 == "x1" & df2$v2 == "y2"),]
# plot
ggplot() +
geom_point(data=df1, aes(x=x, y=y, colour = factor(v4)), shape=15, size=5) +
scale_colour_manual(values = c(NA,"black")) + facet_grid(v1+v2~v3, drop = T) +
geom_point(data=df2, aes(x=x,y=y), shape=23 , colour="black", fill="white", size=4) +
coord_equal(ratio=1) + xlim(0, 10) + ylim(0, 10)

Related

Create multi-factor geom_bar with distributions assoicated with each other

Take the following sample data:
set.seed(123456)
#
Male_alive <- rbinom(100,1,0.6)
Male_age <- sample(20:80, 100, T)
#
Female_alive <- rbinom(100,1,0.7)
Female_age <- sample(20:80, 100, T)
#
Alive <- c(Male_alive, Female_alive)
Age <- c(Male_age, Female_age)
Sex <- c(rep('Male', length(Male_alive)),rep('Female', length(Female_alive)))
#
Patients <- data.frame(Alive, Age, Sex)
I can create a simple bar plot with the following code:
ggplot(Patients, aes(Sex, fill = factor(Alive))) +
geom_bar(position = "fill")
But I want to extend on this, by creating a multi-factored bar plot (for Sex and AgeGr) that looks like the image (colours are not important):
#
Patients$AgeGr <- cut(Patients$Age, 6)
using
ggplot(Patients, aes(..., fill = factor(Alive))) +
geom_bar(position = "fill") + geom_wrap(~Sex)
Where the AgeGr is only fills up to the corresponding height of Alive
Perhaps this could be a terrible way to do it:
#to get plots side by side
library(gridExtra)
#plot count of males
pmales <-ggplot(Patients[Patients$Sex=='Male',], aes(Sex, fill =factor(Alive))) + geom_bar(position='fill')
#plot grage males0
pagegrmales0 <-ggplot(Patients[Patients$Sex=='Male' & Patients$Alive==0,], aes(Sex, fill =factor(AgeGr))) + geom_bar(position='fill') + ylab(NULL) +xlab(NULL) + theme(legend.position="none", plot.margin=unit(c(1,1,-0.5,1), "cm"))
#plot grage males1
pagegrmales1 <-ggplot(Patients[Patients$Sex=='Male' & Patients$Alive==1,], aes(Sex, fill =factor(AgeGr))) + geom_bar(position='fill') + ylab(NULL) +xlab(NULL) + theme(legend.position="none", plot.margin=unit(c(-0.5,1,1,1), "cm"))
factorsmale <- grid.arrange(pagegrmales0, pagegrmales1, heights=c(prop.table(table(Patients[Patients$Sex=='Male',]$Alive))[[1]], prop.table(table(Patients[Patients$Sex=='Male',]$Alive))[[2]]), nrow=2)
males <- grid.arrange(pmales, factorsmale, ncol =2, nrow= 2)
########
#plot count of females
pfemales <-ggplot(Patients[Patients$Sex=='Female',], aes(Sex, fill =factor(Alive))) + geom_bar(position='fill')
#plot grage females0
pagegrfemales0 <-ggplot(Patients[Patients$Sex=='Female' & Patients$Alive==0,], aes(Sex, fill =factor(AgeGr))) + geom_bar(position='fill') + ylab(NULL) +xlab(NULL) + theme(legend.position="none", plot.margin=unit(c(1,1,-0.5,1), "cm"))
#plot grage females1
pagegrfemales1 <-ggplot(Patients[Patients$Sex=='Female' & Patients$Alive==1,], aes(Sex, fill =factor(AgeGr))) + geom_bar(position='fill') + ylab(NULL) +xlab(NULL) + theme(legend.position="none", plot.margin=unit(c(-0.5,1,1,1), "cm"))
factorsfemale <- grid.arrange(pagegrfemales0, pagegrfemales1, heights=c(prop.table(table(Patients[Patients$Sex=='Female',]$Alive))[[1]], prop.table(table(Patients[Patients$Sex=='Female',]$Alive))[[2]]), nrow=2)
females <- grid.arrange(pfemales, factorsfemale, ncol =2, nrow= 2)
grid.arrange(males, females, ncol = 2, nrow = 1)
Combine the alive and agegroup columns
Patients$Alive_AgeGr <- paste(Patients$Alive, Patients$AgeGr, sep="_")
Plot
ggplot(Patients, aes(x = factor(Alive), fill = factor(AgeGr))) +
geom_bar(position = "fill") + facet_wrap(~Sex)
First, you need to create long format dataset with only required columns and then plot the stacked-bar plot with wrap, as below:
library("reshape2")
mPatients <- melt(Patients[,-2], id.vars = "Sex")
ggplot(mPatients, aes(x=variable, fill = factor(value))) +
geom_bar(position = "fill") + facet_wrap(~Sex)

ggplot legends when plot is built from two data frames

I have data coming from two different data frames. I am trying to create legend for each data frame. I know I can combine the data frame and do it, but because of my data source it makes the most sense to plot from two different data frames.
Please find the simplified example below. I have gotten close but the 'Main Forecast' in the legend is only white color. I want to show where 'Main Forecast' is red on the outside and white on the inside.
x = seq(1,10, 1)
y = seq(10,100, 10)
df = data.frame(x=x, y=y)
df2 = data.frame(x=5, y=50)
p = ggplot(data=df) +
geom_point(data=df,aes(x=x, y=y, color="Weekly Forecast"), fill="red", size=5, shape=16) +
geom_line(data=df,aes(x=x, y=y), color="red", size=1) +
geom_point(data=df2, aes(x=x, y=y, color="Main Forecast"), size=2, shape=16) +
scale_color_manual("Legend Title", breaks=c("Weekly Forecast", "Main Forecast"), values = c("white","red"))
p
Any assistance will be greatly appreciated.
You need to use one of the symbols that takes a fill (pch = 21:25). You then need to use override.aes to get the legend right. I've moved shared data and aes into the ggplot command.
ggplot(data=df, aes(x=x, y=y)) +
geom_point(aes(color="Weekly Forecast"), shape=16, size = 5) +
geom_line(color="red", size=1) +
geom_point(data=df2, aes(color="Main Forecast"), shape=21, fill = "white", size = 5) +
scale_color_manual("Legend Title", limits=c("Weekly Forecast", "Main Forecast"), values = c("red","red")) +
guides(colour = guide_legend(override.aes = list(pch = c(16, 21), fill = c("red", "white"))))
This can also be done without override.aes:
ggplot(data=df, aes(x=x, y=y)) +
geom_line(aes(color="Main Forecast"), size=1) +
geom_point(aes(color="Weekly Forecast", fill="Weekly Forecast"), shape=21, size = 5) +
geom_point(data=df2, aes(color="Main Forecast", fill="Main Forecast"), shape=21, size = 5) +
scale_color_manual(name="", values = c("red","red")) +
scale_fill_manual(name="", values=c("white","red"))

How can I add a line to one of the facets?

ggplot(all, aes(x=area, y=nq)) +
geom_point(size=0.5) +
geom_abline(data = levelnew, aes(intercept=log10(exp(interceptmax)), slope=fslope)) + #shifted regression line
scale_y_log10(labels = function(y) format(y, scientific = FALSE)) +
scale_x_log10(labels = function(x) format(x, scientific = FALSE)) +
facet_wrap(~levels) +
theme_bw() +
theme(panel.grid.major = element_line(colour = "#808080"))
And I get this figure
Now I want to add one geom_line to one of the facets. Basically, I wanted to have a dotted line (Say x=10,000) in only the major panel. How can I do this?
I don't have your data, so I made some up:
df <- data.frame(x=rnorm(100),y=rnorm(100),z=rep(letters[1:4],each=25))
ggplot(df,aes(x,y)) +
geom_point() +
theme_bw() +
facet_wrap(~z)
To add a vertical line at x = 1 we can use geom_vline() with a dataframe that has the same faceting variable (in my case z='b', but yours will be levels='major'):
ggplot(df,aes(x,y)) +
geom_point() +
theme_bw() +
facet_wrap(~z) +
geom_vline(data = data.frame(xint=1,z="b"), aes(xintercept = xint), linetype = "dotted")
Another way to express this which is possibly easier to generalize (and formatting stuff left out):
ggplot(df, aes(x,y)) +
geom_point() +
facet_wrap(~ z) +
geom_vline(data = subset(df, z == "b"), aes(xintercept = 1))
The key things being: facet first, then decorate facets by subsetting the original data frame, and put the details in a new aes if possible. Other examples of a similar idea:
ggplot(df, aes(x,y)) +
geom_point() +
facet_wrap(~ z) +
geom_vline(data = subset(df, z == "b"), aes(xintercept = 1)) +
geom_smooth(data = subset(df, z == "c"), aes(x, y), method = lm, se = FALSE) +
geom_text(data = subset(df, z == "d"), aes(x = -2, y=0, label = "Foobar"))

How to find Percent Frequency gg plot

There have been a few questions on here asking how to plot percent frequency. I have tried implementing the suggestions but am still having trouble.
I have the following vector:
var <- c(2,2,1,0,1,1,1,1,1,3,2,3,3,5,1,4,4,0,3,4,1,0,3,3,0,0,
1,3,2,6,2,2,2,1,0,2,3,2,0,0,0,0,3,2,2,4,3,2,2,0,4,1,0,1,3,1,4,3,1,2,
6,7,6,1,2,2,4,5,3,0,6,5,2,0,7,1,7,3,1,4,1,1,2,1,1,2,1,1,4,2,0,3,3,2,2,2,5,3,2,5,2,5)
I plotted a histogram using the following code:
df <- data.table(x = var)
df <- df[, .N, by=x]
df$x <- factor(df$x, levels=c(0:25))
p <- ggplot(df, aes(x=x, y= N)) +
geom_bar(
stat="identity", width=1.0,
colour = "darkgreen",
fill = 'paleturquoise4'
)
p <- p + labs(scale_x_discrete(drop=FALSE) )
p = p + coord_cartesian(ylim=c(0, 50)) +
scale_y_continuous(breaks=seq(0, 50, 2))
print(p)
I tried using the following but it does not work.
p <- ggplot(df, aes(x=x, y= N)) +
geom_bar(
aes(y = (..count..)/sum(..count..)),
stat="identity", width=1.0,
colour = "darkgreen",
fill = 'paleturquoise4'
)
One thing you can do is that you can do the calculation before you draw the graphic. But, if I follow your approach, you would want something like this.
ggplot(df, aes(x=x)) +
geom_bar(aes(y = N/sum(N)), stat="identity", width=1.0,
colour = "dark green", fill = 'paleturquoise4') +
ylab("y")

Draw lines between two facets in ggplot2

How can I draw several lines between two facets?
I attempted this by plotting points at the min value of the top graph but they are not between the two facets. See picture below.
This is my code so far:
t <- seq(1:1000)
y1 <- rexp(1000)
y2 <- cumsum(y1)
z <- rep(NA, length(t))
z[100:200] <- 1
df <- data.frame(t=t, values=c(y2,y1), type=rep(c("Bytes","Changes"), each=1000))
points <- data.frame(x=c(10:200,300:350), y=min(y2), type=rep("Bytes",242))
vline.data <- data.frame(type = c("Bytes","Bytes","Changes","Changes"), vl=c(1,5,20,5))
g <- ggplot(data=df, aes(x=t, y=values)) +
geom_line(colour=I("black")) +
facet_grid(type ~ ., scales="free") +
scale_y_continuous(trans="log10") +
ylab("Log values") +
theme(axis.text.x = element_text(angle = 90, hjust = 1), panel.margin = unit(0, "lines"))+
geom_point(data=points, aes(x = x, y = y), colour="green")
g
In order to achieve that, you have to set the margins inside the plot to zero. You can do that with expand=c(0,0). The changes I made to your code:
When you use scale_y_continuous, you can define the axis label inside that part and you don't need a seperarate ylab.
Changed colour=I("black") to colour="black" inside geom_line.
Added expand=c(0,0) to scale_x_continuous and scale_y_continuous.
The complete code:
ggplot(data=df, aes(x=t, y=values)) +
geom_line(colour="black") +
geom_point(data=points, aes(x = x, y = y), colour="green") +
facet_grid(type ~ ., scales="free") +
scale_x_continuous("t", expand=c(0,0)) +
scale_y_continuous("Log values", trans="log10", expand=c(0,0)) +
theme(axis.text.x=element_text(angle=90, vjust=0.5), panel.margin=unit(0, "lines"))
which gives:
Adding lines can also be done with geom_segment. Normally the lines (segments) will appear in both facets. If you want them to appear between the two facets, you will have to restrict that in data parameter:
ggplot(data=df, aes(x=t, y=values)) +
geom_line(colour="black") +
geom_segment(data=df[df$type=="Bytes",], aes(x=10, y=0, xend=200, yend=0), colour="green", size=2) +
geom_segment(data=df[df$type=="Bytes",], aes(x=300, y=0, xend=350, yend=0), colour="green", size=1) +
facet_grid(type ~ ., scales="free") +
scale_x_continuous("t", expand=c(0,0)) +
scale_y_continuous("Log values", trans="log10", expand=c(0,0)) +
theme(axis.text.x=element_text(angle=90, vjust=0.5), panel.margin=unit(0, "lines"))
which gives:

Resources