Hello I have generated the following:
Using the following code:
library(ggplot2,quietly=TRUE)
args <- commandArgs(TRUE)
data <-read.table(args[1],header=T,sep="\t")
pdf(args[2])
p1 <- ggplot( data,aes(x=Mutation,y=difference) ) +
geom_bar( width=.75,aes(fill=as.factor(position),stat="identity"),position="dodge") +
geom_errorbar(aes(y=difference, ymin=difference-difference_std, ymax=difference+difference_std )) +
theme(legend.key = element_blank()) +
ylab(expression(Delta*"Energy (Design - Wild Type)" )) +
xlab( "Mutation" ) +
ylim(-2.5,2.5) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90,size=9,hjust=1))+
opts(legend.position="none")+
ggtitle(expression("PG9 Mutational Analysis"))
p1
dev.off()
As you can see I've sort of grouped the positions using the fill layer so you can group them together. Ideally though I would like to put a break in the xbreak when the position changes instead of grouping them by the color.
So it would go like orange bar, orange bar, break, green bar, break, cyan bar, break, etc...
*EDIT:
Here is what the data from an input table looks like:
position Mutation difference difference_std
97 R97L -0.3174848488298787 0.2867477631591484
97 R97N 0.5333956571765566 0.35232170408577224
99 A99H -0.2294999853769939 0.24017957128601522
99 A99S -0.45171049187057877 0.0013816966425459903
101 G101S 0.5315110947026147 0.08483810927415139
102 P102S -0.04872141488960846 0.02890820273131048
103 D103Y 0.6692000007629395 0.07312815307204293
....
So all the positions would be grouped together with a break on either side.
I hope I'm making sense. Is there an easy way to do this?
J
I think the easiest way would be to use faceting (used sample data from question). To get new variable for the faceting, first, order your data frame according to Mutation. Then add new column pos2 where using cumsum() and diff() according to column position sequence of numbers is added (idea of #agstudy).
df2<-df[order(df$Mutation),]
df2$pos2<-cumsum(c(0,diff(df2$position)) != 0)
df2
position Mutation difference difference_std pos2
3 99 A99H -0.22949999 0.240179571 0
4 99 A99S -0.45171049 0.001381697 0
7 103 D103Y 0.66920000 0.073128153 1
5 101 G101S 0.53151109 0.084838109 2
6 102 P102S -0.04872141 0.028908203 3
1 97 R97L -0.31748485 0.286747763 4
2 97 R97N 0.53339566 0.352321704 4
Now use new column pos2 for facetting. With theme() and strip.text= and strip.background= you can remove strip texts and grey background.
ggplot(df2,aes(x=Mutation,y=difference) ) +
geom_bar( width=.75,aes(fill=as.factor(position),
stat="identity"),position="dodge") +
geom_errorbar(aes(y=difference, ymin=difference-difference_std,
ymax=difference+difference_std )) +
theme(legend.key = element_blank()) +
ylim(-2.5,2.5) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90,size=9,hjust=1))+
theme(legend.position="none")+
theme(strip.text=element_blank(),
strip.background=element_blank())+
facet_grid(.~pos2,scales="free_x",space="free_x")
UPDATE - empty levels
Other possibility is to use scale_x_discrete() and argument limits= and add empty levels in places where you need space between bars (actual levels). Problem with this approach is that you need to add those levels manually.
For example used the same data as in previous example (ordered question data).
ggplot(df2,aes(x=Mutation,y=difference) ) +
geom_bar( width=.75,aes(fill=as.factor(position),
stat="identity"),position="dodge") +
geom_errorbar(aes(y=difference, ymin=difference-difference_std,
ymax=difference+difference_std )) +
theme(legend.key = element_blank()) +
ylim(-2.5,2.5) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90,size=9,hjust=1))+
theme(legend.position="none")+
scale_x_discrete(limits=c("A99H", "A99S","", "D103Y","","G101S",
"","P102S","","R97L", "R97N"))
I think you need first to create a new variable that group the labels.
dat$group <- cumsum(c(0,diff(dat$position)) != 0)
Mutation difference position group
20 0wYpO 0.93746859 4 0
17 63L00 -0.57833783 4 0
3 6hfEp -1.01620448 3 1
1 FvAz4 0.09496127 2 2
8 ghNTj -1.10180158 3 3
14 GxYzD 0.41997924 3 3
Then I don't think it will be an easy way to add a gap to your plot. But You can create a barplot by group and add the genes names with geom_text. here a first version, I hope that someone more experienced with ggplot2 can help to adjust the texts in the middle
ggplot( dat,aes(x=factor(group),y=difference) ) +
geom_bar( width=.75,aes(fill=as.factor(Mutation)),stat="identity",
position="dodge") +
geom_text(aes(label=Mutation),position=position_dodge(height=0.9),angle=90)+
theme_bw() +
opts(legend.position="none")
PS: The code below can be used to generate data:
N <- 21
Mutation <- replicate(N,paste(sample(c(0:9, letters, LETTERS),
5, replace=TRUE),
collapse=""))
difference <- rnorm(N)
position <- c(4, 4, 3, 2, 3, 3, 2, 3, 2,
5, 2, 2, 2, 5, 5,
5, 2, 4, 5, 4, 2)
difference_std <- sd(difference)
dat <- data.frame(Mutation,difference)
dat <- dat[order(dat$Mutation),]
dat$position <- position
dat$group <- cumsum(c(0,diff(dat$position)) != 0)
Related
I am trying to make a graph that will plot the cumulative sum value of different customers which will reset whenever a new order is placed. When a new order is placed, it will be indicated with a DateTick = 1 and I've tried to add this to my plots with vlines. Unfortunately, the plot will only show me either the correct Vlines or the correct series lines.
The data I'm using looks something like this
> head(CUSTWP)
# A tibble: 6 x 6
# Groups: Customer [1]
Customer YearWeek `Corrected Delta` `Ordered Quantity TU` DateTick ROP
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 CustLoc1 2020-01 46 NA 0 46
2 CustLoc1 2020-02 148 NA 0 194
3 CustLoc1 2020-03 150 NA 0 344
4 CustLoc1 2020-04 186 NA 0 530
5 CustLoc1 2020-05 205 NA 0 735
6 CustLoc1 2020-06 246 NA 0 981
I used below mentioned code to create the graphs.
p <- CUSTWP[CUSTWP$DateTick==1,]
p <- p[,1:2]
vline.dat <- data.frame(z=p$Customer, vl=p$YearWeek)
ggplot(CUSTWP, aes(YearWeek,`ROP`, group=1)) + geom_line(color= 'red', size = 0.8) + geom_vline(aes(xintercept=vl), data=vline.dat, linetype=4) +
facet_grid(Customer ~ ., scales = "free_y") + theme_light() + ggtitle('Reordering Points') +
theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=0.9), text = element_text(size = 14)) +
scale_x_discrete(guide = guide_axis(check.overlap = TRUE))
When I execute the code, I get a result as can be seen in the link.
The issue with this graph is that the Vlines are the orders DateTicks for all customers rather than the DateTicks grouped by customer. I've tried a different code that somehow produces the correct graphs but also a bunch of incorrect graphs with below-mentioned code.
p <- CUSTWP[CUSTWP$DateTick==1,]
p <- p[,1:2]
vline.dat <- data.frame(z=p$Customer, vl=p$YearWeek)
ggplot(CUSTWP, aes(YearWeek,`ROP`, group=1)) + geom_line(color= 'red', size = 0.8) + geom_vline(aes(xintercept=vl), data=vline.dat, linetype=4) +
facet_grid(Customer ~ z, scales = "free_y") + theme_light() + ggtitle('Reordering Points') +
theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=0.9), text = element_text(size = 14)) +
scale_x_discrete(guide = guide_axis(check.overlap = TRUE))
The above code creates a matrix of plots but the only correct ones are the plots on the diagonal line running from top left to bottom right.
I would really appreciate your input on this as I've been stuck on this for quite some time. Thank you in advance and apologies for the incorrect posting standards, this is my first post.
This question already has answers here:
Can I input my own order for a ggplot path?
(3 answers)
Closed 11 months ago.
I am working with the R programming language.
I generated the following random data set in R and made a plot of these points:
library(ggplot2)
set.seed(123)
x_cor = rnorm(5,100,100)
y_cor = rnorm(5,100,100)
my_data = data.frame(x_cor,y_cor)
x_cor y_cor
1 43.95244 271.50650
2 76.98225 146.09162
3 255.87083 -26.50612
4 107.05084 31.31471
5 112.92877 55.43380
ggplot(my_data, aes(x=x_cor, y=y_cor)) + geom_point() + ggtitle("Travelling Salesman Example")
Suppose I want to connect these dots together in the following order: 1 with 3, 3 with 4, 4 with 5, 5 with 2, 2 with 1
I can make a new variable that contains this ordering:
my_data$order = c(3, 1, 4, 5, 2)
Is it possible to make this kind of graph using ggplot2?
I tried the following code - but this connects the points based on the order they appear in, and not the custom ordering:
ggplot(my_data, aes(x = x_cor, y = y_cor)) +
geom_path() +
geom_point(size = 2)
I could probably manually re-shuffle the dataset to match this ordering - but is there an easier way to do this?
In the past, I have made these kind of graphs using "igraph" - but is it possible to make them with ggplot2?
Can someone please show me how to do this?
Thanks!
You can order your data like so:
my_data$order = c(1, 5, 2, 3, 4)
ggplot(my_data[order(my_data$order),], aes(x = x_cor, y = y_cor)) +
geom_path() +
geom_point(size = 2)
If you want to close the path, use geom_polygon:
ggplot(my_data[order(my_data$order),], aes(x = x_cor, y = y_cor)) +
geom_polygon(fill = NA, color = "black") +
geom_point(size = 2)
I'm trying to do a waffle chart for the championships won by F1 drivers so far. The chart comes out good but it comes out with alphabetical labels. I want it to start from the most titles won to the least.
I've tried ordering and fct_relevel. But nothing works. Below is the code
ggplot(data = dfc, aes(fill=Champions, values=one)) +
geom_waffle(color = "cornsilk", size=0.25, n_rows=7)+
facet_wrap(~Champions, nrow = 3, strip.position = "bottom",labeller = label_wrap_gen(6))
And this is the
result I'm looking for.
You can find the entire code here
The dataset looks like
Season Champions Team one
1 a x 1
2 a x 1
3 b y 1
4 a x 1
5 c z 1
Here's a solution using forcats (also part of the tidyverse package).
fct_infreq() orders factors according to their frequency in the data, and you can use that to specify the ordering of the levels in your data.
dfc$Champions <- factor(dfc$Champions, levels=levels(fct_infreq(dfc$Champions)))
ggplot(data = dfc, aes(fill=Champions, values=one)) +
geom_waffle(color = "cornsilk", size=0.25, n_rows=7) +
facet_wrap(~Champions, nrow = 3, strip.position = "bottom", labeller = label_wrap_gen(6))
assume I've the following data to generate two barplot as a multiplot:
quarter1
variable value merge1
1 h=4 3 no
2 h=7 2 yes
3 h=8 3 no
4 h=21 2 no
quarter2
variable value merge2
1 h=6 1 no
2 h=7 2 yes
3 h=10 1 no
4 h=12 3 no
5 h=13 1 no
6 h=16 1 no
7 h=17 1 no
Code for the two plots:
bar_q <- ggplot(quarter1, aes(x=variable, y=value, fill=merge1)) + geom_bar(stat="identity")
bar_qf <- bar_q + ggtitle("k = 0") +
theme(axis.text=element_text(size=24, color="gray0"), axis.title=element_blank()) +
scale_y_continuous(breaks= function(x) unique(floor(pretty(seq(0, (max(x) + 1) * 1.1))))) +
scale_fill_manual(values= grp_colors, guide=F) +
theme(plot.title = element_text(hjust = 0.5, size=24))
bar2_q <- ggplot(quarter2, aes(x=variable, y=value, fill=merge2)) + geom_bar(stat="identity")
bar2_qf <- bar2_q + ggtitle(expression(k %in% group("[", "1;4", "]"))) + theme(axis.text=element_text(size=24 , color="gray0"), axis.title=element_blank()) +
scale_y_continuous(limits=c(0,3), breaks=seq(3)) +
scale_fill_manual(values= grp_colors, guide=F) +
theme(plot.title = element_text(hjust = 0.5, size=24))
Generating multiplot with:
plot_quarter <- egg::ggarrange(bar_qf,bar2_qf, ncol=2, top=textGrob("Quartalsdaten:Häufigkeiten", gp=gpar(fontsize=28,font=2)))
Resulting plot looks like this:
If you look at the "g" in the plot title it's a bit cutted. However I tried to increase the distance between the title of the multiplot and the titles of each plot but don't find a way to do it in "ggarange". Do someone know how I can do this in ggarange/textGrob
grid has always been a bit peculiar in estimating textGrob heights. ggplot2 recently introduced a titleGrob to deal with this more consistently; unfortunately it's a private function not meant to be used externally.
The two easiest workarounds I can think of are:
library(ggplot2)
p1 <- p2 <- ggplot()
library(grid)
# create a new class and give it a more generous height
tg <- grobTree(textGrob("Quartalsdaten:Häufigkeiten", gp=gpar(fontsize=28,font=2)), cl='title')
heightDetails.title <- function(x) grobHeight(x$children[[1]]) + unit(2,"line")
egg::ggarrange(p1,p2, ncol=2, top=tg)
library(gridExtra)
# wrap the text in a dummy gtable
tg <- gridExtra::tableGrob("Quartalsdaten:Häufigkeiten",
theme = ttheme_minimal(base_size = 28, padding = unit(c(0, 2), "line"),
core = list(fg_params=list(font=2))))
egg::ggarrange(p1,p2, ncol=2, top=tg)
Edit: actually, as pointed out in the comments, ggarrange has a padding argument for this purpose,
egg::ggarrange(p1,p2, ncol=2, top=textGrob("Quartalsdaten:Häufigkeiten", gp=gpar(fontsize=28,font=2)), padding = unit(1,"line"))
i have this data frame:
TotalCost Vehicles Bikes
92 1 2
92 1 3
96 1 6
93 2 2
93 2 3
95 2 6
108 3 2
108 3 3
108 3 6
I would like to plot with bars filled in 'Bikes' parameter, but this command:
ggplot(data, aes(Vehicles, TotalCost)) + geom_bar(aes(fill = Bikes), position = "dodge", stat="identity")
gives me this plot, without any color
What am i doing wrong?
library("magrittr")
library("reshape2")
library("ggplot2")
rawdata = matrix(data = strsplit(split = ",", "92,1,2,92,1,3,96,1,6,93,2,2,93,2,3,95,2,6,108,3,2,108,3,3,108,3,6") %>% unlist %>% as.numeric,
ncol = 3, byrow = T)
colnames(rawdata) = c("TotalCost","Vehicles","Bikes")
df = as.data.frame(rawdata, stringsAsFactors = F)
If your "Bikes" data are continuous, then you could be looking for the following:
ggplot(df, aes(x = Vehicles, y = TotalCost)) + geom_bar(aes(fill = Bikes), stat="identity")
If the Bikes" are more of distinct categories, then the following might be it:
ggplot(df, aes(x = Vehicles, y = TotalCost)) + geom_bar(aes(fill = as.factor(Bikes)), stat="identity", position = "dodge")
This is happening because you can't dodge based on a numeric quantity, because it it continuous. If you specify fill=factor(Bikes) it will do the right thing; otherwise ggplot doesn't know how to "dodge" the bars for a continuous value.
Alternatively, you can specify the grouping explicitly, by adding group=Bikes to the aesthetics for the master plot or geom_bar:
ggplot(df, aes(x=Vehicles, y=TotalCost)) +
geom_bar(aes(fill=Bikes, group=Bikes), position="dodge", stat="identity")
The advantage of the factor approach is that each bar gets its own label, and you can use discrete color scales (like Brewer) to make the differentiation clear.
With the group approach, the coloration will reflect the relative values, which may be desirable, but may make the plot hard to read if there are more values for bikes, since comparing adjacent Vehicles columns will involve comparing subtle gradations. If we append another row with 108, 3, 7, then it will be hard to compare the 2 and 3 groupings.
ggplot(rbind(df, c(108, 3, 7)), aes(x=Vehicles, y=TotalCost)) +
geom_bar(aes(fill=Bikes, group=Bikes), position="dodge", stat="identity")