Related
I've made a bar chart using ggplot with grouped data, and facetted with facet_grid. The column widths are inconsistent, so I want to make them all the same. I've read this can be done with preserve="single, but it seems to mess up the position dodging. Any idea how to prevent this happening??
Here is a small sample of the data:
data <- structure(list(grp2 = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
7L, 7L, 7L, 7L, 7L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L,
7L), .Label = c("CSF1", "CSF2", "PC", "NC", "GPC", "GNC", "standard"
), class = "factor"), label2 = structure(c(7L, 8L, 9L, 7L, 8L,
9L, 7L, 15L, 15L, 15L, 15L, 15L, 7L, 8L, 9L, 7L, 8L, 9L, 7L,
15L, 15L, 15L, 15L, 15L), .Label = c("CSF1_raw", "CSF1_supernatant",
"CSF1_pellet", "CSF2_raw", "CSF2_supernatant", "CSF2_pellet",
"PC_raw", "PC_supernatant", "PC_pellet", "NC_raw", "NC_supernatant",
"NC_pellet", "GPC", "GNC", "standard", "NC"), class = "factor"),
mda_label = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 1L, 1L, 1L, 1L
), .Label = c("none", "mda_20", "mda_200"), class = "factor"),
conc = c(`7` = 0, `8` = 0, `9` = 0.324886127298521, `55` = 4.14765656994934,
`56` = 1.16840050032707, `57` = 8.33529714053568, `76` = 10.6220645144775,
`77` = 48.9241552191721, `78` = 4.51513315624087, `79` = 1.03887911533275,
`80` = 0.0445944796011582, `81` = 0.00484116548901831, `89` = 0,
`90` = 0, `91` = 0.322922569348207, `137` = 6.38488684568018,
`138` = 1.68909814271646, `139` = 7.61828609738757, `158` = 15.3082130743032,
`159` = 41.3127531345335, `160` = 4.64193087683391, `161` = 0.411672491030815,
`162` = 0.0568193835425769, `163` = 0.00439419098560105)), row.names = c(NA,
-24L), class = c("tbl_df", "tbl", "data.frame"))
Here's the initial plot:
ggplot(data, aes(x=label2, y=conc, colour=mda_label, fill=mda_label)) +
facet_grid(. ~ grp2, scales="free_x", space="free") +
stat_summary(fun = mean, geom = "bar", position = position_dodge()) +
stat_summary(fun.data = mean_se, geom = "errorbar", colour="black", width=0.5,
position = position_dodge(width=0.9)) +
geom_point(position = position_dodge(width=0.9), pch=21, colour="black") +
scale_y_continuous(trans='pseudo_log',
labels = scales::number_format(accuracy=0.01)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
But when I try to standardise the column widths with preserve="single", it gets messed up:
ggplot(data, aes(x=label2, y=conc, colour=mda_label, fill=mda_label)) +
facet_grid(. ~ grp2, scales="free_x", space="free") +
stat_summary(fun = mean, geom = "bar", position = position_dodge(preserve="single")) +
stat_summary(fun.data = mean_se, geom = "errorbar", colour="black", width=0.5,
position = position_dodge(width=0.9, preserve="single")) +
geom_point(position = position_dodge(width=0.9, preserve="single"), pch=21, colour="black") +
scale_y_continuous(trans='pseudo_log',
labels = scales::number_format(accuracy=0.01)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Since you're using data that as 0 values, you could make the 0 values for the other 'mda_label' on grp2/label2 standard categories.
data <- rbind(data, data.frame(grp2 = c("standard", "standard"),
label2 = c("standard", "standard"),
mda_label = c("mda_20", "mda_200"),
conc = c(0, 0)))
Also you never actually make the bar plot
data %>%
ggplot(aes(label2, conc, fill = mda_label)) +
geom_col(position = position_dodge(width = 1)) +
facet_grid(. ~ grp2, scales = "free", space = "free")
I'm struggling to get polar_coords to work as I had hoped. I want each item to be represented by a coloured track, with a range of 1:50000. I then wanted to plot points over these tracks at the corresponding locations, with symbols representing the different categories. The points would then be annotated with the id.
Dataframe:
structure(list(item = structure(c(1L, 2L, 2L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L), .Label = c("AA", "AB", "AC", "AD", "AE",
"BA", "BB", "BC", "BD", "BE"), class = "factor"), location = c(10045L,
12041L, 15035L, 22054L, 19023L, 49411L, 39012L, 3041L, 23065L,
33015L, 42069L, 26859L), category = structure(c(1L, 1L, 2L, 3L,
1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L), .Label = c("X", "Y", "Z"), class = "factor"),
id = structure(c(1L, 8L, 2L, 7L, 6L, 10L, 5L, 1L, 1L, 3L,
4L, 9L), .Label = c("Apple", "Banana", "Cherry", "Grape",
"Mango", "Melon", "Orange", "Pear", "Raspberry", "Strawberry"
), class = "factor")), .Names = c("item", "location", "category",
"id"), class = "data.frame", row.names = c(NA, -12L))
my_data %>%
ggplot(aes(item, location, shape = category, label = id)) +
geom_col(aes(y = Inf), fill = "gray80") +
geom_point(size = 3) +
geom_text(vjust = -1) +
scale_x_discrete(expand = expand_scale(add = c(5,0))) +
coord_polar(theta = "y") +
theme_void()
If you want a break in the middle, you could change the item to a numeric value relating to it's desired position:
my_data %>%
mutate(item_pos = as.numeric(item),
item_pos = item_pos + if_else(item_pos > 5, 1, 0)) %>%
ggplot(aes(item_pos, location, shape = category, label = id)) +
...
Maybe you can work from this:
ggplot(data,aes(x=location, color=id, y=id)) +
geom_linerange(aes(y=id, xmin=0, xmax=50000, color=category), size=2, alpha=0.5) +
geom_point(size=3) +
coord_polar()
Dataframe
df <- data.frame(
structure(list(biological_group = structure(1:15, .Label = c("A",
"B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
"O"), class = "factor"), norm_expression = c(2L, 3L, 4L, 6L,
1L, 5L, 7L, 8L, 9L, 3L, 2L, 6L, 7L, 8L, 1L), SE = c(0.171499719,
0.089692493, 0.153777208, 0.188012958, 0.153776128, 0.192917199,
0.224766056, 0.231338325, 0.121716906, 0.094763028, 0.09635363,
0.069986333, 0.113681329, 0.094614957, 0.391182473), Group = structure(c(1L,
1L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("",
"Plant Products", "Sugars"), class = "factor")), class = "data.frame", row.names = c(NA,
-15L), .Names = c("biological_group", "norm_expression", "SE",
"Group")))
Sample code
library(ggplot2)
DFplot <- ggplot(df, aes(biological_group,norm_expression)) +
ylab("Relative Normalized\nExpression (∆∆Cq)") +
geom_bar(fill="black",stat="identity") +
theme(axis.title.x = element_blank())
DFplot2 <- DFplot+geom_errorbar(aes(ymin=norm_expression-SE,ymax=norm_expression+SE),width=0.5) +
geom_boxplot() +
facet_grid(~Group, scales = "free_x", switch = "x", space = "free_x") +
scale_y_continuous(expand = c(0,0), labels = scales::number_format(accuracy = 0.1)) +
theme_classic()
That gives me this graph:
I'd like to remove the vertical lines from the strip text labels (specified by strip.background), like this:
I realize I could just use photoshop or something, but I have several graphs to make so it would be easier if I could just specify it in the code.
This answer was helpful.
In your case, you want to find the strip-b for your bottom strips to substitute.
Edit: Replaced top bar of bottom strip that was removed accidentally.
library(grid)
q <- ggplotGrob(DFplot2)
lg <- linesGrob(x=unit(c(1,0),"npc"), y=unit(c(1,1),"npc"), gp=gpar(col="black", lwd=2))
for (k in grep("strip-b",q$layout$name)) {
q$grobs[[k]]$grobs[[1]]$children[[1]] <- lg
}
grid.draw(q)
Thanks to combine stacked bars and dodged bars, I created the plot below using the data frame shown. But now, since the axis titles name the bars, how can I remove the legend elements other than for the one stacked bar? That is, can the legend show only the segments of the Big8 bar?
> dput(combo)
structure(list(firm = structure(c(12L, 1L, 11L, 13L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L), .Label = c("Avg.", "Co", "Firm1",
"Firm2", "Firm3", "Firm4", "Firm5", "Firm6", "Firm7", "Firm8",
"Median", "Q1", "Q3"), class = "factor"), metric = structure(c(5L,
1L, 4L, 6L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Avg.",
"Big8", "Co", "Median", "Q1", "Q3"), class = "factor"), value = c(0.0012,
0.0065, 0.002, 0.0036, 0.0065, 0.000847004466666667, 0.000658907411111111,
0.0002466389, 8.41422555555556e-05, 8.19149222222222e-05, 7.97185555555556e-05,
7.82742555555556e-05, 7.56679888888889e-05), grp = structure(c(1L,
2L, 3L, 6L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Q1",
"Avg.", "Median", "Co", "Big8", "Q3"), class = "factor")), .Names = c("firm",
"metric", "value", "grp"), row.names = c(NA, -13L), class = "data.frame")
Here is the plotting code.
ggplot(combo, aes(x=grp, y=value, fill=firm)) +
geom_bar(stat="identity") +
labs(x = "", y = "") +
theme(legend.position = "bottom") +
guides(fill = guide_legend(nrow = 2))
The plot, which ideally would have a smaller set of elements in the legend.
You can manually set the breaks for scale_fill_discrete:
library(ggplot2)
ggplot(combo, aes(x=grp, y=value, fill=firm)) +
geom_bar(stat="identity") +
labs(x = "", y = "") +
theme(legend.position = "bottom") +
guides(fill = guide_legend(nrow = 2)) +
scale_fill_discrete(breaks = combo$firm[combo$metric=="Big8"])
I'm not 100% sure which labels you want to keep, but a manually entered vector, combo$firm and combo$metric will all work.
I am revisiting this issue I ran into approximately a year ago. I would like my 'colourbar' guide to effectively be displayed on a log scale so that the takeaway when looking at it is that increasingly darker values of blue reflect greater significance.
With the following code, I generate the below image:
pz <- ggplot(dat.m, aes(x=variable,y=Category)) +
geom_tile(aes(fill=value)) +
xlab(NULL) + ylab(NULL) +
scale_fill_gradientn(colours=c("#000066","#0000FF","#DDDDDD","white"),
values=c(0,0.05,0.050000000000001,1.0),
breaks=c(0, 0.000001, 0.01, 0.05, 1),
guide = "colourbar") +
theme_bw()+
theme(panel.background = element_blank(),
panel.border = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank()) +
theme(legend.position="top",
legend.text = element_text(angle=45),
axis.text.x = element_text(angle=45)
)
Or, I can display it as a "legend" as opposed to a "colourbar":
But what I really desire is something like this:
I have tried adding 'trans="log"' (scale_fill_gradientn(trans="log")), but there are lots of zeros in my data which causes a problem. If you have any ideas it would be greatly appreciated!
Previous wording:
I am trying to make a heatmap of p-values for different samples for various categorizations. There are two things I would like to modify on this plot:
I would like to adjust the legend of my geom_tile plot to emphasize the lower end of the legend scale while still maintaining the full spectrum of the gradient - similar to how it would look if it were a log scale. So essentially the white to blue transition from 1.0-0.05 and the blue to darkblue transition from 0.05-0.00 will be approximately equal in size. Is there a way that I can manually adjust the colorbar guide?
I would like to replace the y-axis names so that I can remove my "empty" row label. Note, the Categories are simply represented as letters here, but in my real data set they are long names. I have inserted "dummy" rows of data to split categorizations into chucks and ordered the tiles within each block to go from most significant to not significant - I am sure there is a better solution to this, but this is what I came up with after many failed attempts of other ideas I found on stack overflow! I have tried labeling them with scale_y_discrete, but this gets jumbled with the aforementioned ordering.
Help with either of these issues will be much appreciated!
Here is a sample dataset:
dput(dat.m)
structure(list(Category = structure(c(12L, 11L, 10L, 9L, 8L,
7L, 6L, 5L, 4L, 3L, 2L, 1L, 12L, 11L, 10L, 9L, 8L, 7L, 6L, 5L,
4L, 3L, 2L, 1L, 12L, 11L, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L,
1L), class = "factor", .Label = c("j", "i", "empty2", "h", "empty1",
"g", "f", "e", "d", "c", "b", "a")), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("b2", "c1", "c2"), class = "factor"),
value = c(7.40214650772221e-06, 0.0075828339, 0.1825924627,
0.0384381317, 0.0440256659, 0.3659284985, 0.9777569144, 1,
0.0075828339, 1, 0.2193606406, 0.3659284985, 0.0004289756,
0.0011541045, 0.0004289756, 0.4400885491, 0.6121402215, 0.6724032426,
0.2735924085, 1, 0.018824582, 1, 0.4386503891, 0.4249526456,
1.05094571578633e-05, 0.0027216795, 0.715979827, 0.0050376405,
0.7473334763, 0.9053300832, 1, 1, 0.0015392848, 1, 0.039679469,
0.0950327519)), .Names = c("Category", "variable", "value"
), row.names = c(NA, -36L), class = "data.frame")
And here is my code:
col_blue <- c("#FFFFFF","#000099","#000066","#000033")
ggplot(dat.m, aes(x=variable,y=Category)) +
geom_tile(aes(fill=value)) +
xlab(NULL) + ylab(NULL) +
scale_fill_gradientn(colours=col_blue, values=c(1,0.05,0.01,0),guide="colorbar") +
theme_mary(base_size=12)
UPDATE:
So now I have modified the code as such with the following results. I am getting closer to what I hope to achieve but I would like to play with the proportions of the colourbar to show the gradient from 0.05-0.0 a bit more clearly.
col_blue <- c("#FFFFFF","#000099","#000066","#000033")
ggplot(dat.m, aes(x=variable,y=Category)) +
geom_tile(aes(fill=value)) +
xlab(NULL) + ylab(NULL) +
scale_fill_gradientn(colours=col_blue, values=c(1,0.05,0.01,0), guide=FALSE) +
scale_colour_gradientn(guide = "colourbar", limits = c(0,1),breaks=c(1,0.05,0.01,0),values=c(1,0.05,0.01,0),colours=c("#FFFFFF","#000099","#000066","#000033"))
We can tell scale_fill_gradientn not to display a guide with guide=FALSE, then manually add our own with limits set to c(0,0.1) (or whatever range you want).
ggplot(dat.m, aes(x=variable,y=Category)) +
geom_tile(aes(fill=value)) +
xlab(NULL) +
ylab(NULL) +
scale_fill_gradientn(colours=col_blue, values=c(1,0.05,0.01,0), guide=FALSE) +
scale_colour_gradientn(guide = "colorbar", limits = c(0,0.1), colours=col_blue)
As for your second point, why not just remove the "empty" rows from the source data before plotting?
for (1), simply modify the data being used to drop the empty rows before (or as you are) plotting. eg: ggplot(dat.m[!grepl("^empty", dat.m$Category), ], aes(<etc>...))
for (2), you can override the aesthetics specifically just for the legend. Here is one example, adjust to your taste: + guides(fill=guide_legend(override.aes=list(alpha=1)))