I have some data with standard errors associated and would like to display these with error bars. That's what I have:
# generate some data
hod <- data.frame(h = c(1:24,1:24,1:24), mean = 1:(24*3) + runif(24*3, 0, 5),ci = runif(24*3, 0, 2), t = c(rep("a",24),rep("b",24),rep("c",24)))
pd <- position_dodge(0.3)
dayplot <- ggplot(hod, aes(x=h, y=mean, colour=as.factor(t),group=as.factor(t))) +
geom_line(position=pd, size=1) +
geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci),
width=1,
size=0.5,
position=pd) +
geom_point(position=pd, shape=21, size=1, fill="white") +
scale_x_continuous(limits=c(-0.5,23.5),
breaks=c(0:8*3),
labels=ifelse(
c(0:8*3) < 10,
paste('0',c(0:8*3),':00',sep=''),
paste(c(0:8*3),':00',sep='')
)
) +
xlab("Hour of day") + ylab(ylabel) + labs(title = varlabels[var]) +
theme_minimal() +
theme(plot.margin = unit(c(1,0,1,1), "cm"),
axis.title.x = element_text(vjust=-1),
axis.title.y = element_text(angle=90, vjust=0),
legend.margin = unit(c(0), "cm"),
legend.key.height = unit(c(0.9), "cm"),
panel.grid.major = element_line(colour=rgb(0.87,0.87,0.87)),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = rgb(0.97,0.97,0.97), linetype=0)
)
The only thing of interest is probably:
geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci),
width=1,
size=0.5,
position=pd)
It gives:
Now when I group the data by a factor variable (as.factor(t)), I get several lines instead of one, which is what I want, BUT, as you can see, the horizontal lines at the error bars are more narrow, and I can't figure out why. I tried changing and even taking away the width and size attribute of geom_errorbar, but nothing happens. Is there a way to have the same width of the horizontal lines for every chart, no matter the data? I mean, why should it vary? Or does that width convey some information?
Below is a reproducible example using random data. The fix to the problem is to multiply the width by the number of classes/factors that you have. In the plot below, since I used three factors, using a width of 3 fixes the problem. ggplot2 seems to calculate the relative width by the number of data points in your dataset, rather than the numeric values on the x-axis. This is (IMO) a bug.
library(ggplot2)
library(grid)
#plot with factors
hod <- data.frame(h = c(1:24,1:24,1:24), mean = 1:(24*3) + runif(24*3, 0, 5),ci = runif(24*3, 0, 2), t = c(rep("a",24),rep("b",24),rep("c",24)))
pd <- position_dodge(0.3)
dayplot <- ggplot(hod, aes(x=h, y=mean, colour=as.factor(t),group=as.factor(t))) +
geom_line(position=pd, size=1) +
geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci),
width=1,
size=0.5,
position=pd) +
geom_point(position=pd, shape=21, size=1, fill="white") +
scale_x_continuous(limits=c(-0.5,23.5),
breaks=c(0:8*3),
labels=ifelse(
c(0:8*3) < 10,
paste('0',c(0:8*3),':00',sep=''),
paste(c(0:8*3),':00',sep='')
)
) +
xlab("Hour of day") +
theme_minimal() +
theme(plot.margin = unit(c(1,0,1,1), "cm"),
axis.title.x = element_text(vjust=-1),
axis.title.y = element_text(angle=90, vjust=0),
legend.margin = unit(c(0), "cm"),
legend.key.height = unit(c(0.9), "cm"),
panel.grid.major = element_line(colour=rgb(0.87,0.87,0.87)),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = rgb(0.97,0.97,0.97), linetype=0)
)
print(dayplot)
#plot without factors
hod <- data.frame(h = c(1:24,1:24,1:24), mean = 1:(24) + runif(24, 0, 5),ci = runif(24, 0, 2))
pd <- position_dodge(0.3)
dayplot <- ggplot(hod, aes(x=h, y=mean)) +
geom_line(position=pd, size=1) +
geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci),
width=1,
size=0.5,
position=pd) +
geom_point(position=pd, shape=21, size=1, fill="white") +
scale_x_continuous(limits=c(-0.5,23.5),
breaks=c(0:8*3),
labels=ifelse(
c(0:8*3) < 10,
paste('0',c(0:8*3),':00',sep=''),
paste(c(0:8*3),':00',sep='')
)
) +
xlab("Hour of day") +
theme_minimal() +
theme(plot.margin = unit(c(1,0,1,1), "cm"),
axis.title.x = element_text(vjust=-1),
axis.title.y = element_text(angle=90, vjust=0),
legend.margin = unit(c(0), "cm"),
legend.key.height = unit(c(0.9), "cm"),
panel.grid.major = element_line(colour=rgb(0.87,0.87,0.87)),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = rgb(0.97,0.97,0.97), linetype=0)
)
print(dayplot)
I have managed to solve a similar issue. In my case I wanted to set both horizontal and vertical errorbar heads to the same size - regardless of the aspect ratio of the plot.
Based on the original posted code:
f <- ggplot_build(dayplot)
f$plot$layers[[5]]$geom_params$width <- 0.02 * diff(f$layout$panel_params[[1]]$x.range)
f$plot$layers[[6]]$geom_params$height <- 0.02 * diff(f$layout$panel_params[[1]]$y.range)
dayplot <- f$plot
This will set the errorbar head to 2% of the axis range. Maybe could solve your issue.
Related
I little experience with ggplot2. I am trying to plot coverage probability and cohort size using the code below:
library("reshape2")
library("ggplot2")
library(latex2exp)
CP1 <-c(0.953,0.942,0.947,0.958)
CP2 <- c(0.937,0.952,0.955,0.957)
cohort <- c(500,1000,5000,10000)
mdata <- data.frame(rate1=CP1,rate2=CP2,cohort.size=cohort)
mydata <- melt(mdata,id='cohort.size',value.name="CP")
ggplot(mydata , aes(x=cohort.size, y=CP)) +
geom_line(size=1,aes(colour=variable)) +
geom_point( size=4, shape=0)+ coord_cartesian(ylim = c(0,1)) +
scale_x_continuous(breaks=c(500,1000,5000,10000))+
scale_color_discrete(labels = unname(TeX(c(" $\\r_1$", "$\\r_2$")))) +
geom_hline(yintercept =c(0.936,0.964) ,linetype="dashed") +
theme(legend.title = element_blank(), axis.title.x = element_text(color="#993333", size=14, face="bold"),
axis.title.y = element_text(color="#993333", size=14, face="bold"),
plot.title = element_text(color="#993333", size=14, face="bold"),
legend.position = c(.85, .85),
legend.justification = c("right", "top"),
legend.box.just = "right",
legend.margin = margin(6, 6, 6, 6),legend.text=element_text(size=20)) + xlab("Cohort Size") + ylab("Coverage Proability")+
annotate("text",
x = 8700,
y = 0.68,
label =expression(bold(paste("MN=57% \n AB=38% \n XYZ=5%" ))),parse = TRUE,size=5)
I have three questions:
1. When I run the code, I get a warning; how can I fix it.
2. There are two horizontal black dashed lines and I want to have just one legend for both to represent '95% CL'.
3. I feel the code is too much, is there a much simpler way of writing it using ggplot2 only.
Thanks!!
I can't install latex2exp. Without this package, you simply can try this and in my opinion all three questions are solved:
ggplot(mydata , aes(x=cohort.size, y=CP)) +
geom_line(size=1,aes(colour=variable)) +
geom_point( size=4, shape=0)+
geom_hline(data = data.frame(yintercept =c(0.936,0.964)),
aes(yintercept =yintercept, linetype ='95% CL')) +
scale_linetype_manual("", values = 2) +
ylim(0,1) +
annotate("text",
x = 8700,
y = 0.68,
label = paste("MN=57%\n AB=38%\n XYZ=5%" ),
size=5, fontface =2)
I'd like to add the count values calculated in the geom_histogram function on ggplot2. I've put the ggplot2 into a loop so I can produce multiple plots, in my case 30 but for ease, here is a dummy set for only four plots. Facet wrap didn't work as the geom density was pooling the data across all factors before calculating proportions, rather than within a factor/variable. To produce this plot, I've essentially mixed a whole bunch of code from various sources so credit to them.
library(dplyr)
library(ggplot2)
library(ggridges)
library(reshape2)
library(gridExtra)
#Make the data#
df.fact <- data.frame("A"=rnorm(400, mean = 350, sd=160),"B"=rnorm(400, mean = 300, sd=100), "C"=rnorm(400, mean = 200, sd=80), names=rep(factor(LETTERS[23:26]), 100))
df.test<-melt(df.fact, id.vars = "names", value.name = "Length2")
names(df.test)[names(df.test) =="variable"] <- "TSM.FACT"
#Create the plotlist##
myplots <- list()
#Loop for plots##
for(i in 1:(length(unique(df.test$names)))){
p1 <- eval(substitute(
ggplot(data=df.test[df.test$names == levels(df.test$names)[i],], aes(x=Length2, group=TSM.FACT, colour = TSM.FACT, fill=TSM.FACT)) +
geom_histogram(aes( y = stat(width*density)), position = "dodge", binwidth = 50, alpha =0.4, show.legend=T)+
ggtitle(paste0(levels(df.test$names)[i]))+
geom_density_line(stat="density", aes(y=(..count..)/sum(..count..)*50), alpha=0.3, size=0.5, show.legend=F) +
geom_vline(data=ddply(df.test[df.test$names == levels(df.test$names)[i],], ~ TSM.FACT, numcolwise(mean)), mapping=aes(xintercept = Length2, group=TSM.FACT, colour=TSM.FACT), linetype=2, size=1, show.legend=F) +
scale_y_continuous(labels = percent_format()) +
ylab("relative frequency") +
scale_color_manual(values= c("#00B2EE", "#1E90FF", "#104E8B")) +
scale_fill_manual(values= c("#00B2EE", "#1E90FF", "#104E8B")) +
theme_bw() + theme(
plot.title = element_text(lineheight=0.5, hjust= 0.5, size=10),
strip.text.y = element_text(hjust = 1, angle = 0),
strip.text.x = element_text(size=10, vjust = 0.9),
strip.text=element_text(margin = margin(t=0.3,r=1,b=0.3,l=1), size=8, debug = F, vjust=0.2),
strip.background = element_blank(),
axis.text.x = element_text(size=8, angle=0, vjust=0.2, margin = margin(t=0.3,r=0.1,b=0.3,l=0.1)),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
panel.grid.minor = element_blank(),
panel.border=element_blank(),
panel.background=element_blank(),
legend.position=(c(0.9,0.9)),
legend.title = element_blank(),
legend.key = element_blank()),
list(i = i)))
print(i)
print(p1)
myplots[[i]] <- p1
plot(p1)
}
#Join the plots
panelplot=grid.arrange(plotlist = myplots, grobs = myplots, shared.legend=T)
Unfortunately I am unable to reproduce your example. I can recommend adding a column that includes the sum of each bar (let's name it "Bar")
The required addition to the ggplot code then involves:
geom_text(aes(label = Bar), position = position_stack(vjust = 1)) +
The text height above the bar can be adjusted with vjust
I am creating faceted box plots that are grouped by a variable. Instead of having the x-axis text be the factors for the x-axis variable I'd like the x-axis text to be the grouping variable.
However, I don't just want to use the grouping variable as my x-axis variable because I'd like the boxplots to cluster. Its hard to explain well. But I think its clear from the code and comments below.
Let me know if you have any suggestions or can help and thanks in advance!
library(ggplot2)
library(scales)
ln_clr <- "black"
bk_clr <- "white"
set.seed(1)
# Creates variables for a dataset
donor = rep(paste0("Donor",1:3), each=40)
machine = sample(rep(rep(paste0("Machine",1:4), each=1),30))
gene = rep(paste0("Gene",LETTERS[1:5]), each=24)
value = rnorm(24*5, mean=rep(c(0.5,10,1000,25000,8000), each=24),
sd=rep(c(0.5,8,900,9000,3000), each=24))
# Makes all values positive
for(m in 1:length(value)){
if(value[m]<0){
value[m] <- sqrt(value[m]*value[m])
}
}
# Creates a data frame from variables
df = data.frame(donor, machine, gene, value)
# Adds a clone variable
clns <- LETTERS[1:4]
k=1
for(i in 1:nrow(df)/4){
for(j in 1:length(clns)){
df$clone[k] <- paste(df$donor[k],clns[j],sep="")
k = k+1
}
}
df$clone <- as.factor(df$clone)
#*************************************************************************************************************************************
# Creates the facet of the machine but what I want on the x-axis is clone, not donor.
# However, if I set x to clone it doesn't group the boxplots and its harder to read
# the graph.
bp1 <- ggplot(df, aes(x=donor, y=value, group=clone)) +
stat_boxplot(geom ='errorbar', position = position_dodge(width = .83),
width = 0.25, size = 0.7, coef = 1) +
geom_boxplot(coef=1, outlier.shape = NA, position = position_dodge(width = .83),
lwd = 0.3, alpha = 1, colour = ln_clr) +
geom_point(position = position_dodge(width = 0.83), size = 1.8, alpha = 0.9,
mapping=aes(group=clone)) +
facet_wrap(~ machine, ncol=2, scales="free_x")
bp1 + scale_y_log10(expand = c(0, 0)) +
theme(axis.text.x= element_text(size=rel(1), colour = "black", angle=45, hjust=1),
strip.background = element_rect(colour = ln_clr, fill = bk_clr, size = 1))
# Creates the facet of the Donor and clusters the clones but doesn't facet the
# machine. This could be okay if I could put spaces in between the different
# machine values but not the donors and could remove the donor facet labels, and
# only have the machine values show up once.
bp2 <- ggplot(df, aes(x=clone, y=value)) +
stat_boxplot(geom ='errorbar', position = position_dodge(width = .83),
width = 0.25, size = 0.7, coef = 1) +
geom_boxplot(coef=1, outlier.shape = NA, position = position_dodge(width = .83),
lwd = 0.3, alpha = 1, colour = ln_clr) +
geom_point(position = position_dodge(width = 0.83), size = 1.8, alpha = 0.9) +
facet_wrap(machine ~ donor, scales="free_x", ncol=6)
bp2 + scale_y_log10(expand = c(0, 0)) +
theme(axis.text.x= element_text(size=rel(1), colour = "black", angle=45, hjust=1),
strip.background = element_rect(colour = ln_clr, fill = bk_clr, size = 1),
panel.spacing = unit(0, "lines"))
Below is an example comparing what I'd like in an ideal world (Top two facets) as compared to what I'm getting (bottom two facets).
I'm not sure I understand exactly what you're trying to do, so let me know if this is on the right track:
library(dplyr)
pd = position_dodge(width=0.83)
ggplot(df %>% mutate(clone=gsub("Donor[1-3]","",clone),
donor=gsub("Donor", "", donor)),
aes(x=clone, y=value, color=donor, group=interaction(clone,donor))) +
geom_boxplot(coef=1, outlier.shape=NA, position=pd, lwd=0.3) +
geom_point(position=pd, size=1.8, alpha=0.9) +
facet_wrap(~ machine, ncol=2, scales="free_x") +
scale_y_log10(expand = c(0.02, 0)) +
theme(strip.background=element_rect(colour=ln_clr, fill=bk_clr, size=1))
How about this:
ggplot(df, aes(x=clone, y=value, group=interaction(clone,donor))) +
geom_boxplot(coef=1, outlier.shape=NA, lwd=0.3) +
geom_point(size=1.8, alpha=0.9) +
facet_wrap(~ machine, ncol=2, scales="free_x") +
scale_y_log10(expand = c(0.02, 0)) +
theme(axis.text.x= element_text(size=rel(1), colour = "black", angle=45, hjust=1),
strip.background=element_rect(colour=ln_clr, fill=bk_clr, size=1))
I found a work around for this problem but its not very elegant. I'd be super happy if some one came up with a better solution. Using the code to create a function for a "multiplot" found here and adding the code below I was able to do what I wanted. However, This is a slightly wonky solution in that I can't really format my titles with boxes around them and there are still two "clone" titles on the x axis that I can't replace easily with a single x-axis title. Also, had I of had many "machines" in my example this solution would have been painful to scale. All-in-all not ideal but passible for what I need. Special thanks to Eipi10 for their help, I appreciate it.
# Creates a multi-plot function for use in the graphs below
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
# Call multiplot function after storing each of the below plots as variables
ln_clr <- "black"
bk_clr <- "white"
bp3 <- ggplot(df[df$machine=="Machine1",], aes(x=clone, y=value)) +
geom_boxplot(coef=1, outlier.shape=NA, lwd=0.3) +
geom_point(size=1.8, alpha=0.9) +
ggtitle("Machine 1") +
expand_limits(y=c(0.001,10^5)) +
facet_wrap(~ donor, nrow=1, scales="free_x") + scale_y_log10(expand = c(0, 0)) +
theme(axis.text.x= element_text(size=rel(1), color = ln_clr, angle=45, hjust=1),
panel.spacing = unit(0.25, "lines"), axis.title.x= element_blank(),
plot.title = element_text(hjust=0.5),
strip.text.x = element_text(size=rel(1), face="bold", colour = ln_clr),
strip.background = element_rect(colour = ln_clr, fill = bk_clr, size = 1),
axis.line.x= element_line(size = 1.25, colour = ln_clr),
axis.line.y= element_line(size = 1.25, colour = ln_clr),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = bk_clr),
panel.border = element_blank(),
plot.background = element_rect(fill = bk_clr))
bp4 <- ggplot(df[df$machine=="Machine2",], aes(x=clone, y=value)) +
geom_boxplot(coef=1, outlier.shape=NA, lwd=0.3) +
geom_point(size=1.8, alpha=0.9) +
ggtitle("Machine 2") +
expand_limits(y=c(0.001,10^5)) +
facet_wrap(~ donor, nrow=1, scales="free_x") + scale_y_log10(expand = c(0, 0)) +
theme(axis.text.x= element_text(size=rel(1), colour = ln_clr, angle=45, hjust=1),
panel.spacing = unit(0.25, "lines"), plot.title = element_text(hjust=0.5),
strip.text.x = element_text(size=rel(1), face="bold", colour = ln_clr),
strip.background = element_rect(colour = ln_clr, fill = bk_clr, size = 1),
axis.line.x= element_line(size = 1.25, colour = ln_clr),
axis.line.y= element_line(size = 1.25, colour = ln_clr),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = bk_clr),
panel.border = element_blank(),
plot.background = element_rect(fill = bk_clr))
bp5 <- ggplot(df[df$machine=="Machine3",], aes(x=clone, y=value)) +
geom_boxplot(coef=1, outlier.shape=NA, lwd=0.3) +
geom_point(size=1.8, alpha=0.9) +
ggtitle("Machine 3") +
expand_limits(y=c(0.001,10^5)) +
facet_wrap(~ donor, nrow=1, scales="free_x") + scale_y_log10(expand = c(0, 0)) +
theme(panel.spacing = unit(0.25, "lines"), axis.title.y= element_blank(),
axis.title.x= element_blank(),axis.line.y= element_blank(),
axis.text.y=element_blank(),
axis.text.x= element_text(size=rel(1), colour = ln_clr, angle=45, hjust=1),
axis.ticks.y=element_blank(), plot.title = element_text(hjust=0.5),
strip.text.x = element_text(size=rel(1), face="bold", colour = ln_clr),
strip.background = element_rect(colour = ln_clr, fill = bk_clr, size = 1),
axis.line.x= element_line(size = 1.25, colour = ln_clr),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = bk_clr),
panel.border = element_blank(),
plot.background = element_rect(fill = bk_clr))
bp6 <- ggplot(df[df$machine=="Machine4",], aes(x=clone, y=value)) +
geom_boxplot(coef=1, outlier.shape=NA, lwd=0.3) +
geom_point(size=1.8, alpha=0.9) +
ggtitle("Machine 4") +
expand_limits(y=c(0.001,10^5)) +
facet_wrap(~ donor, nrow=1, scales="free_x") + scale_y_log10(expand = c(0, 0)) +
theme(axis.text.x= element_text(size=rel(1), colour = ln_clr, angle=45, hjust=1),
panel.spacing = unit(0.25, "lines"), plot.title = element_text(hjust=0.5),
strip.text.x = element_text(size=rel(1), face="bold", colour = ln_clr),
strip.background = element_rect(colour = ln_clr, fill = bk_clr, size = 1),
axis.line.x= element_line(size = 1.25, colour = ln_clr),
axis.line.y= element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title.y= element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = bk_clr),
panel.border = element_blank(),
plot.background = element_rect(fill = bk_clr))
# Plot all 4 graphs and saves them as a output file
png(filename="graph3.png", width= 9, height= 7.5, units = "in", res=600)
multiplot(bp3, bp4, bp5, bp6, cols=2)
dev.off()
Alternatively, if I set the "strip.text.x = " and the "strip.background =" as element_blank(). I can generate the below:
I need to gather two facet columns into one column with ggplot2.
In the following example, I need to overlay the content of the two columns DEG and RAN into one, while giving different colours to DEG and RAN data (small points and smooth line) and provide the corresponding legend (so I can distinguish them as they are overlayed).
I feel my code is not too, too far from what I need, but the relative complexity of the dataset blocks me. How to go about achieving this in ggplot2?
Here's my code so far:
require(reshape2)
library(ggplot2)
library(RColorBrewer)
fileName = paste("./4.csv", sep = "") # csv file available here: https://www.dropbox.com/s/bm9hd0t5ak74k89/4.csv?dl=0
mydata = read.csv(fileName,sep=",", header=TRUE)
dataM = melt(mydata,c("id"))
dataM = cbind(dataM,colsplit(dataM$variable,pattern = "_",names = c("NM", "ORD", "CAT")))
dataM$variable <- NULL
dataM <- dcast(dataM, ... ~ CAT, value.var = "value")
my_palette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
ggplot(dataM, aes(x=NR ,y= ASPL)) +
geom_point(size = .4,alpha = .5) +
stat_smooth(se = FALSE, size = .5) +
theme_bw() +
theme(plot.background = element_blank(),
axis.line = element_blank(),
legend.key = element_blank(),
legend.title = element_blank()) +
scale_y_continuous("ASPL", expand=c(0,0), limits = c(1, 7)) +
scale_x_continuous("NR", expand=c(0,0), limits = c(0, 100)) +
theme(legend.position="bottom") +
theme(axis.title.x = element_text(vjust=-0.3, face="bold", size=12)) +
theme(axis.title.y = element_text(vjust=1.5, face="bold", size=12)) +
ggtitle("Title") + theme(plot.title = element_text(lineheight=.8, face="bold")) +
theme(title = element_text(vjust=2)) +
facet_grid(NM ~ ORD)
Here's what it gives me right now:
Extra question: how come DEG/SF doesn't show a smooth line?
You can use the group aesthetic to define that data points with the same value of ORD belong together. You can also map aesthetics shape and color to this variable. You can also use . to specify that the facets are not split along a specific dimension.
I have made the changes to your code below after transforming NR and ASPL to numeric variables:
dataM$NR <- as.integer(dataM$NR)
dataM$ASPL <- as.numeric(dataM$ASPL)
ggplot(dataM, aes(x=NR ,y= ASPL, group=ORD, color=ORD)) +
geom_point(size = .7,alpha = .5, aes(shape=ORD)) + ## increased size
stat_smooth(se = FALSE, size = .5) +
theme_bw() +
theme(plot.background = element_blank(),
axis.line = element_blank(),
legend.key = element_blank(),
legend.title = element_blank()) +
scale_y_continuous("ASPL", expand=c(0,0), limits = c(1, 7)) +
scale_x_continuous("NR", expand=c(0,0), limits = c(0, 100)) +
theme(legend.position="bottom") +
theme(axis.title.x = element_text(vjust=-0.3, face="bold", size=12)) +
theme(axis.title.y = element_text(vjust=1.5, face="bold", size=12)) +
ggtitle("Title") + theme(plot.title = element_text(lineheight=.8, face="bold")) +
theme(title = element_text(vjust=2)) +
facet_grid(NM ~.)
I am not sure what is the default date interval in ggplot. My data has five data points including Sep-2011, Dec-2011, Mar-2012,Jun-2012 and Sep-2012.
The ggplot displays different data points from my data which I found a bit annoying. Am I missing anything?
Could you help to display "Sep-2011, Dec-2011, Mar-2012,Jun-2012 and Sep-2012
x4.1.m<- structure(list(Var.1=structure(c(1L,2L,3L,4L,5L,6L,1L,2L,3L,4L,5L,6L,1L,2L,3L,4L,5L,6L,1L,2L,3L,4L,5L,6L,1L,2L,3L,4L,5L,6L),.Label=c("I'vechangedforwork/anewjob/goneonaworkplan","IwantaphonethatVodafonedoesn'toffer","IwantBestMates/Favourites","Iwasofferedorsawabetterofferonanothernetwork","Issueswiththe2degreesnetwork(poorcoverage)","Other"),class="factor"),YearQuarter=structure(c(1L,1L,1L,1L,1L,1L,2L,2L,2L,2L,2L,2L,3L,3L,3L,3L,3L,3L,4L,4L,4L,4L,4L,4L,5L,5L,5L,5L,5L,5L),.Label=c("2011-09-01","2011-12-01","2012-03-01","2012-06-01","2012-09-01"),class="factor"),value=c(0.23,0.23,0.121,0.25,0.223,0.14,0.39,0.22,0.05,0.37,0.25,0.2,0.09,0.14,0.05,0.3,0.4,0.12,0.13,0.1,0.26,0.38,0.28,0.15,0.33,0.05,0.06,0.44,0.32,0.43)),.Names=c("Var.1","YearQuarter","value"),row.names=c(NA,-30L),class="data.frame")
library(scales)
library(ggplot2)
###data
x4.1.m$YearQuarter <- as.Date(x4.1.m$YearQuarter)
x4.1.m$label <- paste(round(x4.1.m$value*100,0), "%", sep="")
### plot
x4.line <- ggplot(data=x4.1.m, aes(x=YearQuarter, y=value,colour=Var.1)) +
geom_smooth(se=F, size=1.5)
x4.line <- x4.line + geom_text(aes(label = label),size = 3, hjust = 0.5, vjust =1.5)
### theme
x4.line <- x4.line + theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.background=element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank())
x4.line <- x4.line + ggtitle("Percentages:Main Reasons for Leaving Vodafone by Quarter") +
theme(plot.title = element_text(size=rel(1.2)))+
scale_y_continuous(labels=percent, limits=c(0,0.5)) +
scale_x_date(labels = date_format("%b-%y"),breaks = date_breaks("3 months"))+
labs(y="Percentage",x="Year Quarter")
x4.line
You need to pass your required breaks to breaks argument to scale_x_date (what a surprise there.)
# your breaks
d <- unique(x4.1.m[['YearQuarter']])
themestuff <- theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.background=element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank())
x4.line <- ggplot(data=x4.1.m, aes(x=YearQuarter, y=value,colour=Var.1)) +
geom_smooth(se=F, size=1.5) +
geom_text(aes(label = label),size = 3, hjust = 0.5, vjust =1.5) +
themestuff + ggtitle("Percentages:Main Reasons for Leaving Vodafone by Quarter") +
theme(plot.title = element_text(size=rel(1.2))) +
scale_y_continuous(labels=percent, limits=c(0,0.5)) +
scale_x_date(labels = date_format("%b-%y"), breaks = d)+
labs(y="Percentage",x="Year Quarter")
x4.line