combining geom_ribbon when x is a factor - r

Leading on from this question.
I cannot generate the shaded area when my x is a factor.
Here is some sample data
time <- as.factor(c('A','B','C','D'))
x <- c(1.00,1.03,1.03,1.06)
x.upper <- c(0.91,0.92,0.95,0.90)
x.lower <- c(1.11,1.13,1.17,1.13)
df <- data.frame(time, x, x.upper, x.lower)
ggplot(data = df,aes(time,x))+
geom_ribbon(aes(x=time, ymax=x.upper, ymin=x.lower), fill="pink", alpha=.5) +
geom_point()
when i substitute factor into the aes() I still cannot get the shaded region. Or if i try this:
ggplot()+
geom_ribbon(data = df, aes(x=time, ymax=x.upper, ymin=x.lower), fill="pink", alpha=.5) +
geom_point(data = df, aes(time,x))
I still cannot get the shading. Any ideas how to overcome this...

I think aosmith was exactly right, you simply need to convert your factor variable to numeric. I think the following code is what you're looking for:
ggplot(data = df,aes(as.numeric(time),x))+
geom_ribbon(aes(x=as.numeric(time), ymax=x.upper, ymin=x.lower),
fill="pink", alpha=.5) +
geom_point()
Which produces this plot:
EDIT EDIT: Change x-axis labels back to their original values taken from #aosmith in the comments below:
ggplot(data = df,aes(as.numeric(time),x))+
geom_ribbon(aes(x=as.numeric(time), ymax=x.upper, ymin=x.lower),
fill="pink", alpha=.5) +
geom_point() + labs(title="My ribbon plot",x="Time",y="Value") +
scale_x_continuous(breaks = 1:4, labels = levels(df$time))

Related

Display Greek symbols and charge facet titles at the same time with ggplot

I know how to modify titles in ggplot without altering the original data. Suppose I have the following data frame and I want to change the labels. Then, I would do so in the following way
df <- data.frame(x = 1:4, y = 1:4, label = c(c("params[1]", "params[2]", "params[3]",
"params[4]")))
params_names <- list(
'params[1]'= "beta[11]",
'params[2]'= "beta[22]",
'params[3]'= "beta[33]",
'params[4]'= "beta[44]"
)
param_labeller <- function(variable, value){
params_names[value]
}
ggplot(df, aes(x=x,y=y)) +
geom_point() +
facet_grid(~label, labeller = param_labeller)
If I wanted to display the subscripts, I would just do this
ggplot(df, aes(x=x,y=y)) +
geom_point() +
facet_grid(~label, labeller = label_parsed)
How do I apply both operations at the same time?
I don't know exactly if this conflicts with you not wanting to "alter" the original data, but you add the labelling information to the factor itself:
df$label2 <- factor(df$label,
labels = c("beta[4]", "beta[24]", "beta[42]", "beta[43]"))
ggplot(df, aes(x = x, y = y)) +
geom_point() +
facet_grid( ~ label2, labeller = label_parsed)
This produces the following plot:
Plot with formatted facet labels

How to highlight a column in ggplot2

I have the following graph and I want to highlight the columns (both) for watermelons as it has the highest juice_content and weight. I know how to change the color of the columns but I would like to WHOLE columns to be highlighted. Any idea on how to achieve this? There doesn't seems to be any similar online.
fruits <- c("apple","orange","watermelons")
juice_content <- c(10,1,1000)
weight <- c(5,2,2000)
df <- data.frame(fruits,juice_content,weight)
df <- gather(df,compare,measure,juice_content:weight, factor_key=TRUE)
plot <- ggplot(df, aes(fruits,measure, fill=compare)) + geom_bar(stat="identity", position=position_dodge()) + scale_y_log10()
An option is to use gghighlight
library(gghighlight)
ggplot(df, aes(fruits,measure, fill = compare)) +
geom_col(position = position_dodge()) +
scale_y_log10() +
gghighlight(fruits == "watermelons")
In response to your comment, how about working with different alpha values
ggplot(df, aes(fruits,measure)) +
geom_col(data = . %>% filter(fruits == "watermelons"),
mapping = aes(fill = compare),
position = position_dodge()) +
geom_col(data = . %>% filter(fruits != "watermelons"),
mapping = aes(fill = compare),
alpha = 0.2,
position = position_dodge()) +
scale_y_log10()
Or you can achieve the same with one geom_col and a conditional alpha (thanks #Tjebo)
ggplot(df, aes(fruits, measure)) +
geom_col(
mapping = aes(fill = compare, alpha = fruits == 'watermelons'),
position = position_dodge()) +
scale_alpha_manual(values = c(0.2, 1)) +
scale_y_log10()
You could use geom_area to highlight behind the bars. You have to force the x scale to discrete first which is why I've used geom_blank (see this answer geom_ribbon overlay when x-axis is discrete) noting that geom_ribbon and geom_area are effectively the same except geom_area always has 0 as ymin
#minor edit so that the level isn't hard coded
watermelon_level <- which(levels(df$fruits) == "watermelons")
AreaDF <- data.frame(fruits = c(watermelon_level-0.5,watermelon_level+0.5))
plot <- ggplot(df, aes(fruits)) +
geom_blank(aes(y=measure, fill=compare))+
geom_area(data = AreaDF, aes( y = max(df$measure)), fill= "yellow")+
geom_bar(aes(y=measure, fill=compare),stat="identity", position=position_dodge()) + scale_y_log10()
Edit to address comment
If you want to highlight multiple fruits then you could do something like this. You need a data.frame with where you want the geom_area x and y, including dropping it to 0 between. I'm sure there's slightly tidier methods of getting the data.frame but this one works
highlight_level <- which(levels(df$fruits) %in% c("apple", "watermelons"))
AreaDF <- data.frame(fruits = unlist(lapply(highlight_level, function(x) c(x -0.51,x -0.5,x+0.5,x+0.51))),
yval = rep(c(1,max(df$measure),max(df$measure),1), length(highlight_level)))
AreaDF <- AreaDF %>% mutate(
yval = ifelse(floor(fruits) %in% highlight_level & ceiling(fruits) %in% highlight_level, max(df$measure), yval)) %>%
arrange(fruits) %>% distinct()
plot <- ggplot(df, aes(fruits)) +
geom_blank(aes(y=measure, fill=compare))+
geom_area(data = AreaDF, aes(y = yval ), fill= "yellow")+
geom_bar(aes(y=measure, fill=compare),stat="identity", position=position_dodge()) + scale_y_log10()
plot

R - How to overlay the average of a set of iid RVs

In the code below I build a 40x1000 data frame where in each column I have the cumulative means for successive random draws from an exponential distribution with parameter lambda = 0.2.
I add an additional column to host the specific number of the "draw".
I also calculate the rowmeans as df_means.
How do I add df_means (as a black line) on top of all my simulated RVs? I don't understand ggplot well enough to do this.
df <- data.frame(replicate(1000,cumsum(rexp(40,lambda))/(1:40)))
df$draw <- seq(1,40)
df_means <- rowMeans(df)
Molten <- melt(df, id.vars="draw")
ggplot(Molten, aes(x = draw, y = value, colour = variable)) + geom_line() + theme(legend.position = "none") + geom_line(df_means)
How would I add plot(df_means, type="l") to my ggplot, below?
Thank you,
You can make another data.frame with the means and ids and use that to draw the line,
df_means <- rowMeans(df)
means <- data.frame(id=1:40, mu=df_means)
ggplot(Molten, aes(x=draw, y=value, colour=variable)) +
geom_line() +
theme(legend.position = "none") +
geom_line(data=means, aes(x=id, y=mu), color="black")
As described here
stat_sum_df <- function(fun, geom="crossbar", ...) {
stat_summary(fun.data=fun, colour="red", geom=geom, width=0.2, ...)
}
k<-ggplot(Molten, aes(x = draw, y = value, colour = variable)) + geom_line() + theme(legend.position = "none")
k+stat_sum_single(mean) #gives you the required plot

Varying factor order in each facet of ggplot2

I am trying to create a Cleveland Dot Plot given for two categories in this case J and K. The problem is the elements A,B,C are in both categories so R keeps farting. I have made a simple example:
x <- c(LETTERS[1:10],LETTERS[1:3],LETTERS[11:17])
type <- c(rep("J",10),rep("K",10))
y <- rnorm(n=20,10,2)
data <- data.frame(x,y,type)
data
data$type <- as.factor(data$type)
nameorder <- data$x[order(data$type,data$y)]
data$x <- factor(data$x,levels=nameorder)
ggplot(data, aes(x=y, y=x)) +
geom_segment(aes(yend=x), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=type)) +
scale_colour_brewer(palette="Set1", limits=c("J","K"), guide=FALSE) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(type ~ ., scales="free_y", space="free_y")
Ideally, I would want a dot plot for both categories(J,K) individually with each factor(vector x) decreasing with respect to the y vector. What ends up happening is that both categories aren't going from biggest to smallest and are erratic at the end instead. Please help!
Unfortunately factors can only have one set of levels. The only way i've found to do this is actually to create two separate data.frames from your data and re-level the factor in each. For example
data <- data.frame(
x = c(LETTERS[1:10],LETTERS[1:3],LETTERS[11:17]),
y = rnorm(n=20,10,2),
type= c(rep("J",10),rep("K",10))
)
data$type <- as.factor(data$type)
J<-subset(data, type=="J")
J$x <- reorder(J$x, J$y, max)
K<-subset(data, type=="K")
K$x <- reorder(K$x, K$y, max)
Now we can plot them with
ggplot(mapping = aes(x=y, y=x, xend=0, yend=x)) +
geom_segment(data=J, colour="grey50") +
geom_point(data=J, size=3, aes(colour=type)) +
geom_segment(data=K, colour="grey50") +
geom_point(data=K, size=3, aes(colour=type)) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(type ~ ., scales="free_y", space="free_y")
which results in

How to add different lines for facets

I have data where I look at the difference in growth between a monoculture and a mixed culture for two different species. Additionally, I made a graph to make my data clear.
I want a barplot with error bars, the whole dataset is of course bigger, but for this graph this is the data.frame with the means for the barplot.
plant species means
Mixed culture Elytrigia 0.886625
Monoculture Elytrigia 1.022667
Monoculture Festuca 0.314375
Mixed culture Festuca 0.078125
With this data I made a graph in ggplot2, where plant is on the x-axis and means on the y-axis, and I used a facet to divide the species.
This is my code:
limits <- aes(ymax = meansS$means + eS$se, ymin=meansS$means - eS$se)
dodge <- position_dodge(width=0.9)
myplot <- ggplot(data=meansS, aes(x=plant, y=means, fill=plant)) + facet_grid(. ~ species)
myplot <- myplot + geom_bar(position=dodge) + geom_errorbar(limits, position=dodge, width=0.25)
myplot <- myplot + scale_fill_manual(values=c("#6495ED","#FF7F50"))
myplot <- myplot + labs(x = "Plant treatment", y = "Shoot biomass (gr)")
myplot <- myplot + opts(title="Plant competition")
myplot <- myplot + opts(legend.position = "none")
myplot <- myplot + opts(panel.grid.minor=theme_blank(), panel.grid.major=theme_blank())
So far it is fine. However, I want to add two different horizontal lines in the two facets. For that, I used this code:
hline.data <- data.frame(z = c(0.511,0.157), species = c("Elytrigia","Festuca"))
myplot <- myplot + geom_hline(aes(yintercept = z), hline.data)
However if I do that, I get a plot were there are two extra facets, where the two horizontal lines are plotted. Instead, I want the horizontal lines to be plotted in the facets with the bars, not to make two new facets. Anyone a idea how to solve this.
I think it makes it clearer if I put the graph I create now:
Make sure that the variable species is identical in both datasets. If it a factor in one on them, then it must be a factor in the other too
library(ggplot2)
dummy1 <- expand.grid(X = factor(c("A", "B")), Y = rnorm(10))
dummy1$D <- rnorm(nrow(dummy1))
dummy2 <- data.frame(X = c("A", "B"), Z = c(1, 0))
ggplot(dummy1, aes(x = D, y = Y)) + geom_point() + facet_grid(~X) +
geom_hline(data = dummy2, aes(yintercept = Z))
dummy2$X <- factor(dummy2$X)
ggplot(dummy1, aes(x = D, y = Y)) + geom_point() + facet_grid(~X) +
geom_hline(data = dummy2, aes(yintercept = Z))

Resources