use ggplot to plot a panel of bar plots - r

I have a data frame which reads as below:
factor bin ret
1 beta 1 -0.026840807
2 beta 2 -0.051610137
3 beta 3 -0.044658901
4 beta 4 -0.053322048
5 beta 5 -0.060173704
6 size 1 -0.047448288
7 size 2 -0.045603776
8 size 3 -0.051804757
9 size 4 -0.047044614
10 size 5 -0.045720971
11 liquidity 1 -0.057657070
12 liquidity 2 -0.053105474
13 liquidity 3 -0.045501401
14 liquidity 4 -0.048572585
15 liquidity 5 -0.032209038
16 nonlinear 1 -0.045752503
17 nonlinear 2 -0.047673201
18 nonlinear 3 -0.051107792
19 nonlinear 4 -0.045364070
20 nonlinear 5 -0.047722148
21 btop 1 -0.004399745
22 btop 2 -0.035082069
23 btop 3 -0.054526058
24 btop 4 -0.063497535
25 btop 5 -0.077123859
I would like to plot a panel of charts which looks similar to this:
The difference is that the chart I would like to create would have the bin as the x- axis, and ret as the y- axis. And charts should be bar plot. Anyone could help me with this question?
FYI: The code for the sample plot I've included is:
print(ggplot(df, aes(date,value)) +ylab('return(bps)') + geom_line() + facet_wrap(~ series,ncol=input$numCol)+theme(strip.text.x = element_text(size = 20, colour = "red", angle = 0)))
I wonder if minor change to the code could solve my problem.

From you're description i'll assume this is what you're after
print(ggplot(df, aes(bin, ret)) +
ylab('return(bps)') +
geom_bar(stat="identity") +
facet_wrap(~ factor,ncol=2)+
theme(strip.text.x = element_text(size = 20, colour = "red", angle = 0)))

Related

Plot boxplots over time using multiple categories

I am sorry for the header I was not so sure how to ask about it.
I have a data frame that looks like this.
Sample=c("A","A", "A", "B","B","B","A","A", "A", "B","B","B","A","A", "A", "B","B","B","A","A", "A", "B","B","B")
Treatment=c("twiter","twiter","twiter","twiter","twiter","twiter","facebook","facebook","facebook","facebook","facebook","facebook",
"twiter","twiter","twiter","twiter","twiter","twiter","facebook","facebook","facebook","facebook","facebook","facebook")
replicate=c(1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3)
time=c( 10,10,10,10,10,10,10,10,10,10,10,10,20,20,20,20,20,20,20,20,20,20,20,20)
points=c(20,40,80,20,60,120, 30,100,55, 28, 45,90, 80,20,100, 40,90,56,20,30,12,3,5,8)
length(points)
Sample Treatment replicate time points
1 A twiter 1 10 20
2 A twiter 2 10 40
3 A twiter 3 10 80
4 B twiter 1 10 20
5 B twiter 2 10 60
6 B twiter 3 10 120
7 A facebook 1 10 30
8 A facebook 2 10 100
9 A facebook 3 10 55
10 B facebook 1 10 28
11 B facebook 2 10 45
12 B facebook 3 10 90
13 A twiter 1 20 80
14 A twiter 2 20 20
15 A twiter 3 20 100
16 B twiter 1 20 40
17 B twiter 2 20 90
18 B twiter 3 20 56
19 A facebook 1 20 20
20 A facebook 2 20 30
21 A facebook 3 20 12
22 B facebook 1 20 3
23 B facebook 2 20 5
24 B facebook 3 20 8
I would like to plot my data using boxplots at each time point.
I would like to have one box plot that shows Sample A with "twiter" Sample A with "facebook"
Sample "B" with "twiter" and Sample B with "facebook" at time point 10 and the same at time point 20.
So far I can do something like this.
ggplot(data,aes(x=time, y=points,color=Sample, fill=Sample, group=interaction(Sample,Treatment)), alpha=0.1) +
geom_boxplot(alpha=0.1) +
geom_point(position = position_dodge(width=0.75), alpha=0.2)+
theme_bw()
But this is wrong I would like to have the sample A, and B from the two different treatments next to each other at each time point to have a look at the differences. I don't want to use facet_wrap. It is a challenge for me. Thank you for your time
Turning my comment into an answer: your issue is that group=interaction(Sample,Treatment) overrides the grouping by the x-axis (time) that would normally be done. To include time in the grouping, add it to the interaction:
ggplot(data,
aes(
x = time,
y = points,
color = Sample,
fill = Sample,
group = interaction(Sample, Treatment, time)
),
alpha = 0.1) +
geom_boxplot(alpha = 0.1) +
geom_point(position = position_dodge(width = 0.75), alpha = 0.2) +
theme_bw()
Of course, the issue remains that there's no way to tell which box goes with which treatment, but I'll leave that to you to address.
Try this:
library(dplyr)
library(ggplot2)
#Plot
data %>%
arrange(Sample) %>%
mutate(Var=paste(Sample,Treatment),
Var=factor(Var,levels = unique(Var),ordered = T)) %>%
ggplot(aes(x=time,
y=points,
color=Var, fill=Var,
group=Var), alpha=0.1) +
geom_boxplot(alpha=0.1)+
geom_point(position = position_dodge(width=0.75), alpha=0.2)+
theme_bw()+
scale_color_manual(values=c('tomato','tomato','cyan3','cyan3'))+
scale_fill_manual(values=c('tomato','tomato','cyan3','cyan3'))
Output:
If you don't mind making time a factor, you can do the following. Note that I turned your data into a data frame named 'dat'.
dat <- data.frame(Sample=c("A","A", "A", "B","B","B","A","A", "A", "B","B","B","A","A", "A", "B","B","B","A","A", "A", "B","B","B"),
Treatment=c("twiter","twiter","twiter","twiter","twiter","twiter","facebook","facebook","facebook","facebook","facebook","facebook",
"twiter","twiter","twiter","twiter","twiter","twiter","facebook","facebook","facebook","facebook","facebook","facebook"),
replicate=c(1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3),
time=c( 10,10,10,10,10,10,10,10,10,10,10,10,20,20,20,20,20,20,20,20,20,20,20,20),
points=c(20,40,80,20,60,120, 30,100,55, 28, 45,90, 80,20,100, 40,90,56,20,30,12,3,5,8))
dat %>%
mutate(time = factor(time)) %>%
ggplot(aes(x=time, y=points, color=Sample, fill=Sample), alpha=0.1) +
geom_boxplot(alpha=0.1) +
geom_point(position = position_dodge(width=0.75), alpha=0.2)+
theme_bw()

Grouped barplot side by side

I'm trying to plot the table below using a grouped barplot with ggplot2.
How do I plot it in a way such that the scheduled audits and noofemails are plotted sided by side based on each day?
Email Type Sent Month Sent Day Scheduled Audits Noofemails
27 A 1 30 7 581
29 A 1 31 0 9
1 A 2 1 2 8
26 B 1 29 1048 25312
28 B 1 30 23 170
30 B 1 31 18 109
2 B 2 1 6 93
3 B 2 2 9 86
4 B 2 4 3 21
ggplot(joined, aes(x=`Sent Day`, y=`Scheduled Audits`, fill = Noofemails )) +
geom_bar(stat="identity", position = position_dodge()) +
scale_x_continuous(breaks = c(1:29)) +
ggtitle("Number of emails sent in February") +
theme_classic()
Does not achieve the plot I hope to see.
Using this data format, so slightly new column names, no more back-ticks. read_table(text = "") is a nice way to share little datasets on Stack
joined <- read.table(text =
"ID Email_Type Sent_Month Sent_Day Scheduled_Audits Noofemails
27 A 1 30 7 581
29 A 1 31 0 9
1 A 2 1 2 8
26 B 1 29 1048 25312
28 B 1 30 23 170
30 B 1 31 18 109
2 B 2 1 6 93
3 B 2 2 9 86
4 B 2 4 3 21",
header = TRUE)
This is why ggplot2 really likes long data instead of wide data. Because it needs column names to create the aesthetics.
So you can use the function tidyr::gather() to rearrange the two columns of interest into one with labels and one with values. This increase the number of rows in the data frame, so thats why its called long.
long <- tidyr::gather(joined,"key", "value", Scheduled_Audits, Noofemails)
ggplot(long, aes(Sent_Day, value, fill = key)) +
geom_col(position = "dodge")
Alternatively you can use the melt() function from the reshape package. See example below.
library("ggplot2")
library(reshape2)
joined2 <- melt(joined[,c("Sent_Day", "Noofemails", "Scheduled_Audits")], id="Sent_Day")
ggplot(joined2, aes(x=`Sent_Day`, y= value, group = variable, fill= variable)) +
geom_bar(stat="identity", position = position_dodge()) +
scale_x_continuous(breaks = c(1:29)) +
ggtitle("Number of emails sent in February") +
theme_classic()

ggplot2 pch 21 fill within aes not working

I'm trying to make a scatterplot of sorts, where the symbol shape and the symbol fill are set by two different variables. Here is an example of my data:
Sample Experiment Replicate EPS Time Group
1 1 1 5 24 Wild-type
2 1 3 4.5 24 Wild-type
3 2 2 2 24 Wild-type
4 1 2 6 24 Variant
5 2 1 4 24 Variant
6 1 2 3 48 Wild-type
7 1 3 2.5 48 Wild-type
8 2 3 3.5 48 Wild-type
9 1 2 3.5 48 Variant
10 2 2 6.5 48 Variant
11 1 1 3 72 Wild-type
12 2 3 3.5 72 Wild-type
13 1 3 9.5 72 Variant
14 2 3 12.5 72 Variant
Here is the code I'm using. Everything works fine except there is no fill in any of my symbols:
fig.one<-read.table(file='data/Figure1.txt', header=TRUE)
fig.one$time.cat[fig.one$Time == 24] <- 2.5
fig.one$time.cat[fig.one$Time == 48] <- 6
fig.one$time.cat[fig.one$Time == 72] <- 9.5
fig.one$scat.adj[fig.one$Group=='Wild-type']<- -0.50
fig.one$scat.adj[fig.one$Group=='Variant']<- 0.50
my.pch<-c(21,24)
my.bg<-c('black','white')
ggplot(fig.one, aes(time.cat, EPS, shape=my.pch[Experiment]),fill=my.bg[factor(Group)]) +
geom_jitter(aes(time.cat + scat.adj,EPS),
position=position_jitter(width=0.2,height=0),
alpha=0.6,
size=3) +
scale_fill_identity() +
scale_shape_identity() +
scale_x_continuous("Time since inoculation (hours)", breaks=c(2.5,6,9.5),labels=c( "24", "48", "72"), limits=c(1,11)) +
ylab("EPS (grams per litre)") +
theme(axis.text=element_text(size=12, face='bold'),
axis.title=element_text(size=14, face='bold'),
panel.grid.major = element_blank(), panel.grid.minor = element_blank())
Thanks as always for help!
If it's the fill color that's giving you trouble you might try using color instead of fill. Here's a reproducible example:
my.bg <- c('black','white', 'blue')
ggplot(iris, aes(Sepal.Width, Petal.Length, color = my.bg[factor(Species)])) +
geom_jitter() +
scale_color_identity()
Your fill argument is defined outside the aes() function. Try re-writing the first ggplot line as
ggplot(fig.one, aes(time.cat, EPS, shape=my.pch[Experiment], fill=my.bg[factor(Group)])) + ...

R scatter plot by shape, colour and fill

I'm very new to R and I'm trying to build a scatter plot that codes my data according to shape, colour and fill.I want 5 different colours, 3 different shapes, and these to be either filled or not filled (in an non filled point, I would still want the shape and the colour).
My data looks basically like this:
blank.test <- read.table(header=T, text="Colour Shape Fill X13C X15N
1 B B A 16 10
2 D A A 16 12
3 E A B 17 14
4 C A A 14 18
5 A A B 13 18
6 C B B 18 13
7 E C B 10 12
8 E A B 11 10
9 A C B 14 13
10 B A A 11 14
11 C B A 11 10
12 E B A 11 19
13 A B A 10 18
14 A C B 17 16
15 E B A 16 13
16 A C A 16 14")
If I do this:
ggplot(blank.test, aes(x=X13C, y=X15N,size=5)) +
geom_point(aes(shape=Shape,fill=Fill,color=Colour))
I get no filled or unfilled data points
I did a little a little research and it looked like the problem was with the symbols themselves, which cannot take different settings for line and fill; it was recommended I used shapes pch between 21 and 25
But if I do this:
ggplot(blank.test, aes(x=X13C, y=X15N,color=(Colour), shape=(Shape),fill=(Fill),size=5)) +
geom_point() + scale_shape_manual(values=c(21,22,25))`
I still don't get what I want
I also tried playing around with scale_fill_manual without any good result.
I don't think you can use fill for points. What I would do is create an interaction between fill and shape and use this new factor to define your shape and fill/open symbols
blank.test$inter <- with(blank.test, interaction(Shape, Fill))
and then for your plot I would use something like that
ggplot(blank.test, aes(x=X13C, y=X15N)) +
geom_point(aes(shape=inter,color=Colour)) + scale_shape_manual(name="shape", values=c(0,15,1, 16, 2, 17)) + scale_color_manual(name="colour", values=c("red","blue","yellow", "green", "purple"))
I can get the plot to work just fine, but the legend seems to absolutely insist on being black for fill. I can't figure out why. Maybe someone else has the answer to that one.
The 5 being on the legend is cause by having it inside the aes, where only elements that change with your data belong.
Here is some example code:
ggplot(blank.test, aes(x = X13C, y = X15N, color = Colour, shape = Shape, fill = Fill)) +
geom_point(size = 5, stroke = 3) +
scale_shape_manual(values=c(21,22,25)) +
scale_color_brewer(palette = "Set2") +
scale_fill_brewer(palette = "Set1") +
theme_bw()

How to display the mean value and error bars in a percent bar graph

I've been trying to make a percent bar chart to show mortality of two species of tadpoles on three different backgrounds (for a camouflage study)
My dataset is:
[[1]]
campanha fundo sobreviventes especie intactos percentsob
1 5 light 10 Bs 9 66.66667
2 5 mixed 8 Bs 5 53.33333
3 5 dark 8 Bs 8 53.33333
4 6 light 15 Bs 13 100.00000
5 6 mixed 15 Bs 11 100.00000
6 6 dark 14 Bs 11 93.33333
7 5 light 7 Sm 5 46.66667
8 5 mixed 10 Sm 9 66.66667
9 5 dark 12 Sm 10 80.00000
10 6 light 14 Sm 6 93.33333
11 6 mixed 14 Sm 6 93.33333
12 6 dark 15 Sm 9 100.00000
and my script is (file name=odonatapint, and 15 = total number of individuals used per trial):
odonatapint$percentsob<-odonatapint$sobreviventes*100/15
ggplot(data=odonatapint,aes(x=fundo,y=percentsob,fill=especie)) +
geom_bar(method="mean",stat="identity",position="dodge") +
scale_fill_manual(values=c("#999999", "#000000")) +
xlab("Background type (natural)") +
ylab("Tadpoles surviving (%)")
However I noticed the graph to show the highest value for each category instead of the mean (I tried to post the graph but I wasn't allowed because I just registered).
What should I do to fix it? And how can I add error bars after I get to display the mean value?
This is what I did, first calculate the mean and standard deviation (for error bars)
library(dplyr)
odonatapint <- odonatapint %>% group_by(fundo,especie) %>% mutate(mean = mean(percentsob), sd = sd(percentsob))
then plot using ggplot (first plot bars using geom_bar, then use geom_errorbar to add error bars
ggplot(data=odonatapint,aes(x=fundo,y=mean,fill=especie)) +
geom_bar(method="mean",stat="identity",position=position_dodge()) +
geom_errorbar(aes(ymax = mean + sd, ymin = mean - sd), position = position_dodge()) +
scale_fill_manual(values=c("#999999", "#000000")) +
xlab("Background type (natural)") +
ylab("Tadpoles surviving (%)")
The figure generated is shown below
I don't know that much about geom_bar(), so this takes a slightly different approach, but it works with example datasets. It computes the error bar values by bootstrapping, which may be computationally intensive with larger datasets.
ggplot(data=odonatapint,aes(x=fundo,y=percentsob,fill=especie)) +
stat_summary(geom='bar', fun.y='mean', position='dodge') +
stat_summary(geom='errorbar', fun.data='mean_cl_boot', position='dodge')

Resources