Order geom_lines from the highest to the lowest in each facet

Order geom_lines from the highest to the lowest in each facet - r

I have a factor comp_id that has 4 levels (comp1 to comp4). I want to order each level from the highest to the lowest in a geom_line plot.
I got this plot
using this script
library(data.table)
library(ggplot2)
dat <- as.data.table(df)
dat[, ord := sprintf("%02i", frank(dat, comp_id, -value, ties.method = "first"))]
ggplot(dat, aes(x = ord, y = value , group = comp_id , colour = comp_id))+
geom_line()+
facet_wrap(~comp_id, ncol = 1, scales = "free_x", labeller = label_parsed, drop = TRUE)+
theme(axis.text.x=element_text(angle=35, vjust=1, hjust=1,
))
to replace x axis labels
+scale_x_discrete(labels = dat[, setNames(as.character(predictor), ord)])
As you can see, it worked fine for all levels except comp3 where variables ordered (100 to 105) were plotted at the start of facet where they were supposed to be plotted at the end. I wonder what went wrong. Any suggestions will be appreciated.
DATA
> dput(df)
structure(list(predictor = c("c_C2", "c_C3", "c_C4", "d_D2",
"d_D3", "d_D4", "d_D5", "h_BF", "h_BFI", "h_ER", "h_f", "h_PET",
"h_QuFl", "h_Ra", "l_Da", "l_NaCo", "l_ShBe", "m_a", "m_DrDe",
"m_ElRa", "m_MeElm", "m_MeSlPe", "Mr_Co", "Mr_GRAv", "Mr_GREy",
"Mr_Mu", "Mr_Sa", "s_SaLo", "s_SiLo", "s_sSiLo", "s_Stl", "Sr_Li",
"Sr_SaCoCoTe", "Sr_SaLoSi", "Sr_SaMubcl", "c_C2", "c_C3", "c_C4",
"d_D2", "d_D3", "d_D4", "d_D5", "h_BF", "h_BFI", "h_ER", "h_f",
"h_PET", "h_QuFl", "h_Ra", "l_Da", "l_NaCo", "l_ShBe", "m_a",
"m_DrDe", "m_ElRa", "m_MeElm", "m_MeSlPe", "Mr_Co", "Mr_GRAv",
"Mr_GREy", "Mr_Mu", "Mr_Sa", "s_SaLo", "s_SiLo", "s_sSiLo", "s_Stl",
"Sr_Li", "Sr_SaCoCoTe", "Sr_SaLoSi", "Sr_SaMubcl", "c_C2", "c_C3",
"c_C4", "d_D2", "d_D3", "d_D4", "d_D5", "h_BF", "h_BFI", "h_ER",
"h_f", "h_PET", "h_QuFl", "h_Ra", "l_Da", "l_NaCo", "l_ShBe",
"m_a", "m_DrDe", "m_ElRa", "m_MeElm", "m_MeSlPe", "Mr_Co", "Mr_GRAv",
"Mr_GREy", "Mr_Mu", "Mr_Sa", "s_SaLo", "s_SiLo", "s_sSiLo", "s_Stl",
"Sr_Li", "Sr_SaCoCoTe", "Sr_SaLoSi", "Sr_SaMubcl", "c_C2", "c_C3",
"c_C4", "d_D2", "d_D3", "d_D4", "d_D5", "h_BF", "h_BFI", "h_ER",
"h_f", "h_PET", "h_QuFl", "h_Ra", "l_Da", "l_NaCo", "l_ShBe",
"m_a", "m_DrDe", "m_ElRa", "m_MeElm", "m_MeSlPe", "Mr_Co", "Mr_GRAv",
"Mr_GREy", "Mr_Mu", "Mr_Sa", "s_SaLo", "s_SiLo", "s_sSiLo", "s_Stl",
"Sr_Li", "Sr_SaCoCoTe", "Sr_SaLoSi", "Sr_SaMubcl"), comp_id = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("comp1",
"comp2", "comp3", "comp4"), class = "factor"), value = c(0.0633325075111356,
-0.0193713154441617, 0.000785081075580719, 0.287610195287972,
-0.0913783988809322, -0.122928438782758, 0.305621459875726, 0.0356570047659489,
0.367574915852176, -0.240835821698893, 0.0035597425358522, 0.295952594554233,
-0.0439920206129066, -0.235580426938533, 0.191947159509267, -0.132931615006652,
0.065155805120025, 0.038311284807646, 0.187182963731454, 0.120969596703282,
-0.118935354491654, -0.173851183397175, 0.125870264508295, 0.158977975187947,
-0.209351605852615, -0.0231602829054583, 0.078383405846316, 0.0959455355349004,
0.238306328058919, -0.188667962455942, -0.138302814516594, -0.0586994514783439,
0.019524606432138, 0.210636138928319, -0.204454169255484, -0.149879080476447,
0.282741114373524, -0.272911905666994, 0.102508662574812, -0.35056583225677,
0.257262737814283, 0.202117594283655, 0.191773977367133, 0.298513575892895,
0.139576016330362, 0.165641757285727, -0.071542760140058, 0.116819894570386,
0.145104320521166, 0.126636637925691, 0.0810830011112734, -0.0949935353116725,
0.0785254958291791, 0.0326439188223452, 0.065833153228218, 0.155405435626813,
0.128737420120173, 0.214943178842044, -0.0210359058420932, 0.0117832135586799,
0.0762824228178598, -0.29145271973574, -0.17089908579109, -0.0992003952524557,
0.163749177828358, 0.196561728687348, 0.0951493527111932, 0.17238711709624,
0.0638301486629609, -0.0351097560634362, 0.0647994534663104,
-0.154895398844537, 0.186448424833243, 0.240881706707846, -0.241364320964797,
-0.089459273670017, 0.0491598702691844, -0.200660845431752, -0.0339722426751736,
0.131396251991635, -0.195471026941394, -0.05919918680627, -0.184160478394361,
0.129464190293723, 0.193021703469902, 0.178985522376368, -0.245966624042807,
-0.23478025602535, 0.198620462933836, -0.157573246492692, -0.00808698000885529,
0.0413693509741982, -0.121020524702316, 0.105148862728949, 0.214386790903084,
-0.204515275979768, -0.0906160054540168, -0.276985960928353,
0.0768294557774406, -0.074181085595352, 0.138680723918144, -0.119684214245213,
-0.0919678069134681, 0.322602153170851, 0.228878715511945, -0.433082572929477,
0.05754301130056, 0.130719232236558, 0.253999327778221, 0.0469683234741709,
-0.0258294537417061, -0.258318910865727, -0.00406472629347961,
-0.165003562015847, -0.0292142578447021, 0.00862320222199929,
0.0875367120866572, 0.0331716236283754, -0.0418387105725687,
-0.12523142839593, -0.200857915084298, 0.138378222132672, 0.00992811008724002,
-0.0201043482518474, -0.148894977354092, -0.323240591170999,
-0.0556713655820164, 0.379033571103569, -0.264420286734383, 0.127560649906739,
-0.00546455207923468, -0.203293330594455, -0.122085266718802,
-0.0970860819632599, -0.173818516285048, -0.0585031143296301,
0.125084378608705, 0.0655074180474436, 0.254339734692359, 0.00114212078410835
)), class = "data.frame", .Names = c("predictor", "comp_id",
"value"), row.names = c(NA, -140L))

Here is an approach using tidyverse and continuous scale
library(tidyverse)
df %>%
arrange(comp_id, desc(value)) %>% #arrange by comp_id and descending value
mutate(ord = 1:n()) -> dat #create the x scale
ggplot(dat, aes(x = ord, y = value , group = comp_id , colour = comp_id))+
geom_line()+
facet_wrap(~comp_id, ncol = 1, scales = "free_x", drop = TRUE)+
theme(axis.text.x=element_text(angle=35, vjust=1, hjust=1)) +
scale_x_continuous(labels = dat$predictor, breaks = dat$ord, expand = c(0.02, 0.02))

In addition to the nice answer by #missuse, there was another way that gave me what I wanted.
using as factor / as numeric / as.character with the x axis
aes(x = as.factor(as.numeric(as.character(ord)))
and using as numeric /as character while replacing the x axis labels
as.numeric(as.character(ord))
The final script is
ggplot(dat, aes(x = as.factor(as.numeric(as.character(ord))), y = value , group = comp_id , colour = comp_id))+
geom_line()+
facet_wrap(~comp_id, ncol = 1, scales = "free_x", labeller = label_parsed, drop = TRUE)+
theme(axis.text.x=element_text(angle=35, vjust=1, hjust=1,
))+
scale_x_discrete(labels = dat[, setNames(as.character(predictor), as.numeric(as.character(ord)))])

Related

ggplot: why does order on x-axis not level instead of printing alphabetically?

I have this plot
With
> str(a)
'data.frame': 150 obs. of 2 variables:
$ study: Factor w/ 7 levels "A","S","H","D",..: 7 2 4 5 3 1 7 2 2 4 ...
$ n : Factor w/ 6 levels "N0","N1","N2a",..: 1 1 2 4 1 1 2 1 1 1 ...
I would like the x-axis to arrange by sample size, i.e. level = c("all", "S", "H", "B", "C", "A", "K", "D")
As you can see, the order is printed alphabetically.
I have tried specifying as ... aes(x=factor(nystudie, level=c(...), but that does not work. What am I doing wrong? I followed this post
library(tidyverse)
colsze <- c("#E1B930", "#2C77BF", "#E38072", "#6DBCC3", "grey40", "black", "#8B3A62")
a %>%
as_tibble() %>%
mutate(nystudie=as.factor(study),
n.seven=as.factor(n)) %>%
bind_rows(., mutate(., nystudie="all")) %>%
count(nystudie, n.seven, .drop=F) %>%
ggplot(aes(x = factor(nystudie, level = c("all", "S", "H", "B", "C", "A", "K", "D")),
n, color = n.seven, fill= n.seven, label=n)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0.1))+
geom_text(aes(label=n),position = position_dodge2(0.9), vjust=-0.25, fontface=2, cex=4.5, show.legend = F) +
scale_fill_manual(values = alpha(colsze, .2),
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_color_manual(values = colsze,
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_x_discrete(name = "", label=c("All\n(n=1,905)",
"A\n(n=221)",
"B\n(n=234)",
"C\n(n=232)",
"D\n(n=108)",
"H\n(n=427)",
"K\n(n=221)",
"S\n(n=462)")) +
scale_y_continuous(name="",
breaks=seq(0,950,100)) +
coord_cartesian(ylim = c(0,950)) +
guides(fill = guide_legend(nrow = 1)) + theme(axis.text.x = element_text(color = "grey20", size =15),
legend.text=element_text(size=16), legend.title=element_text(size=16, face="bold"),
legend.position="top")
Data sample
a <- structure(list(study = structure(c(7L, 2L, 4L, 5L, 3L, 1L, 7L,
2L, 2L, 4L, 4L, 6L, 2L, 5L, 3L, 7L, 1L, 1L, 2L, 6L, 1L, 3L, 2L,
7L, 2L, 2L, 6L, 6L, 6L, 2L, 1L, 2L, 6L, 1L, 2L, 2L, 3L, 4L, 2L,
3L, 2L, 5L, 2L, 3L, 6L, 5L, 3L, 2L, 4L, 3L, 5L, 6L, 2L, 7L, 2L,
3L, 3L, 3L, 7L, 7L, 3L, 4L, 1L, 1L, 2L, 2L, 6L, 2L, 3L, 2L, 3L,
2L, 1L, 2L, 3L, 5L, 3L, 1L, 1L, 1L, 7L, 4L, 3L, 2L, 4L, 3L, 3L,
3L, 2L, 6L, 7L, 3L, 2L, 2L, 6L, 2L, 2L, 6L, 7L, 3L, 3L, 3L, 6L,
2L, 2L, 7L, 7L, 1L, 1L, 6L, 3L, 3L, 7L, 1L, 2L, 7L, 1L, 1L, 7L,
4L, 4L, 4L, 2L, 3L, 3L, 6L, 1L, 4L, 6L, 3L, 5L, 5L, 3L, 3L, 7L,
5L, 3L, 6L, 3L, 5L, 2L, 3L, 7L, 6L, 2L, 1L, 6L, 5L, 1L, 6L), .Label = c("A",
"S", "H", "D", "K", "C", "B"), class = "factor"), n = structure(c(1L,
1L, 2L, 4L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 4L, 2L, 1L, 2L,
3L, 2L, 2L, 4L, 4L, 4L, 2L, 4L, 1L, 2L, 4L, 1L, 1L, 4L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 4L, 1L, 1L, 4L, 2L, 1L, 1L, 4L, 1L, 1L, 2L,
1L, 5L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 2L, 1L,
4L, 1L, 1L, 1L, 1L, 6L, 1L, 2L, 5L, 4L, 2L, 6L, 1L, 4L, 2L, 4L,
2L, 1L, 1L, 4L, 1L, 2L, 1L, 1L, 4L, 4L, 4L, 1L, 4L, 2L, 1L, 1L,
4L, 2L, 1L, 2L, 1L, 5L, 5L, 1L, 4L, 1L, 2L, 2L, 4L, 1L, 1L, 1L,
2L, 4L, 4L, 1L, 5L, 2L, 1L, 5L, 2L, 4L, 1L, 1L, 1L, 4L, 4L, 1L,
1L, 4L, 4L, 4L, 1L, 4L, 4L, 1L, 4L, 5L, 4L, 5L, 1L, 5L, 1L, 1L,
4L, 2L, 1L, 2L, 4L), .Label = c("N0", "N1", "N2a", "N2b", "N2c",
"N3"), class = "factor")), row.names = c(NA, -150L), class = "data.frame")

The levels are being changed again at scale_x_discrete step. Try :
library(dplyr)
library(ggplot2)
a %>%
mutate(nystudie=as.factor(study),
n.seven=as.factor(n)) %>%
bind_rows(., mutate(., nystudie="all")) %>%
count(nystudie, n.seven, .drop=F) %>%
mutate(nystudie = factor(nystudie,
level = c("all", "S", "H", "B", "C", "A", "K", "D"),
labels = c("All\n(n=1,905)", "S\n(n=462)", "H\n(n=427)", "B\n(n=234)",
"C\n(n=232)", "A\n(n=221)", "K\n(n=221)", "D\n(n=108)"))) %>%
ggplot(aes(x = nystudie,
n, color = n.seven, fill= n.seven, label=n)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0.1))+
geom_text(aes(label=n),position = position_dodge2(0.9), vjust=-0.25, fontface=2, cex=4.5, show.legend = F) +
scale_fill_manual(values = alpha(colsze, .2),
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_color_manual(values = colsze,
name="Stage", label=c("N0", "N1", "N2a", "N2b", "N2c", "N3")) +
scale_x_discrete(name = "") +
scale_y_continuous(name="",
breaks=seq(0,950,100)) +
coord_cartesian(ylim = c(0,950)) +
guides(fill = guide_legend(nrow = 1)) +
theme(axis.text.x = element_text(color = "grey20", size =15),
legend.text=element_text(size=16),
legend.title=element_text(size=16, face="bold"),
legend.position="top")

Reversed Stacked bar chart in R leads to weird positioning

I am trying to create a stacked bar plot in R, and to reverse the order.
df <- structure(list(HouseholdEarnings = structure(c(2L, 2L, 3L, 3L,
3L, 2L, 2L, 1L, 4L, 2L, 3L, 6L, 5L, 4L, 2L, 1L, 2L, 3L, 1L, 3L,
3L, 3L, 2L, 2L, 2L, 6L, 2L, 5L, 2L, 6L, 2L, 2L, 3L, 1L, 3L, 2L,
4L, 2L, 1L, 3L, 2L, 1L, 5L, 3L, 3L, 3L, 2L, 2L, 3L, 2L, 1L, 3L,
3L, 4L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 6L, 4L, 3L, 3L, 2L, 3L,
4L, 2L, 2L, 3L, 2L, 4L, 1L, 1L, 2L, 2L, 2L, 4L, 4L, 6L, 3L, 4L,
2L, 4L, 4L, 2L, 4L, 6L, 3L, 4L, 1L, 2L, 4L, 2L, 2L, 5L, 3L, 2L
), .Label = c("Below $2,000 per month", "$2,000 - $3,999", "$4,000 - $5,999",
"$6,000 - $7,999", "$8,000 - $9,999", "$10,000 & above"), class = c("ordered",
"factor"))), row.names = c(NA, -100L), class = "data.frame")
Based on solutions from other threads, putting geom_col(position = position_stack(reverse = TRUE)) solved the issue.
df %>% group_by(HouseholdEarnings) %>% summarise(Count = n()) %>% mutate(Proportion=Count/sum(Count), group='All') %>%
as.data.frame() %>% mutate_if(is.numeric, round, digits = 2) %>%
ggplot(aes(fill=HouseholdEarnings, y=Proportion, x=group)) +
geom_col(position = position_stack(reverse = T), color = "black") +
geom_text(aes(label=paste0(Proportion*100, "%")),
position=position_stack(vjust=0.5), colour="white",size=3) +
coord_flip()
However, now my labels are out of position (still in the original positions):
It is fine without using reverse = T:
df %>% group_by(HouseholdEarnings) %>% summarise(Count = n()) %>% mutate(Proportion=Count/sum(Count), group='All') %>%
as.data.frame() %>% mutate_if(is.numeric, round, digits = 2) %>%
ggplot(aes(fill=HouseholdEarnings, y=Proportion, x=group)) +
geom_col(position = position_stack(), color = "black") +
geom_text(aes(label=paste0(Proportion*100, "%")),
position=position_stack(vjust=0.5), colour="white",size=3) +
coord_flip()
Edit: I realize that only the labels are still fixed to their original positions, so is there any way to flip them?

You need to pass reverse = TRUE in your geom_text too:
df %>% group_by(HouseholdEarnings) %>% summarise(Count = n()) %>% mutate(Proportion=Count/sum(Count), group='All') %>%
as.data.frame() %>% mutate_if(is.numeric, round, digits = 2) %>%
ggplot(aes(fill=HouseholdEarnings, y=Proportion, x=group)) +
geom_col(position = position_stack(reverse = T), color = "black") +
geom_text(aes(label=paste0(Proportion*100, "%")),
position=position_stack(reverse = TRUE, vjust = 0.5), colour="white",size=3) +
coord_flip()
Does it answer your question ?

saving figures like a loop until end of a data frame

I have a data like this and I want to save figures one after another by
df<- structure(list(x = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L), rn = structure(c(1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("AAAAA",
"BBBBB", "CCCCC", "DDDDD"), class = "factor"), key = structure(c(2L,
4L, 3L, 1L, 2L, 4L, 3L, 1L, 2L, 4L, 3L, 1L, 2L, 4L, 3L, 1L), .Label = c("WSAfrica",
"Wscanada", "WSInida", "WSUSA"), class = "factor"), Median = c(0.000621,
0.000777, 0.000574, 0.000537, 0.000381, 0.00177, 0.002, 0.000457,
0.00247, 0.00199, 0.00287, 0.00224, 5.94e-05, 4.12e-05, 4.44e-05,
5.68e-05), SD = c(0.000127453, 0.000107802, 0.001048659, 9.32e-05,
9.23e-05, 0.000120554, 0.000914697, 0.000167046, 0.000125033,
0.000410528, 0.000450444, 0.000310483, 5.91e-06, 8.98e-06, 1.11e-05,
1.16e-05)), class = "data.frame", row.names = c(NA, -16L))
I tried the following function but I get an error saying that
Error in UseMethod("grid.draw") : no applicable method for
'grid.draw' applied to an object of class "function"
d_ply(df, .(rn),
function(x) (ggplot(x, aes(x = x, y = Median))+
geom_point() +
scale_x_discrete(limit = c("Wscanada", "WSUSA", "WSInida","WSAfrica")) +
geom_errorbar(aes(ymin = Median-SD, ymax = Median+SD))+
ggsave(., filename = paste0("", x$rn[1],".pdf"))))

You're already adding the plot to ggsave, no need to use the . for the first argument:
d_ply(df, .(rn),
function(x) (ggplot(x, aes(x = x, y = Median))+
geom_point() +
scale_x_discrete(limit = c("Wscanada", "WSUSA", "WSInida","WSAfrica")) +
geom_errorbar(aes(ymin = Median-SD, ymax = Median+SD))+
ggsave(filename = paste0("", x$rn[1],".pdf"))))

Normalization of data within ggplot

I have my data as
melted.df <- structure(list(organisms = structure(c(1L, 1L, 1L, 2L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
1L, 1L, 1L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 2L, 3L,
3L, 3L, 3L, 4L, 4L, 4L), .Label = c("Botrytis cinerea", "Fusarium graminearum",
"Human", "Mus musculus"), class = "factor"), types = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("AllMismatches",
"mismatchType2", "MismatchesType1", "totalDNA"), class = "factor"),
mutations = c(30501L, 12256L, 58357L, 366531L, 3475L, 186907L,
253453L, 222L, 24906L, 2775L, 247990L, 12324L, 4395L, 25324L,
77862L, 1862L, 112217L, 163117L, 100L, 17549L, 1057L, 20331L,
18177L, 7861L, 33033L, 288669L, 1613L, 74690L, 90336L, 122L,
7357L, 1718L, 227659L, 635951L, 229493L, 868052L, 2418724L,
65833L, 1081903L, 1339758L, 4318L, 59387L, 15199L, 2134229L
)), row.names = c(NA, -44L), class = "data.frame")
The values totalDNA in type column indicates total DNAs in the data whereas mismatches are the mutations. I would like to normalize this data based on totalDNA values and plot it. The way I am plotting right now doesn't give me the accurate picture of the data as todalDNA inflates the whole Y-axis and other three types(mismatchType2, mismatchesType1 and AllMismatches) are not properly visible with respect to totalDNA. What would be the better way to plot this? Should I first calculate the percentage? or Perhaps do log scaling? Thanks for helping me out.
ggplot(melted.df, aes(x = types, y = mutations, color=types)) +
geom_point()+
facet_grid(.~organisms)+
xlab("Types")+
ylab("Mismatches")+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())

Try a log scale?
ggplot(melted.df, aes(x = types, y = mutations, color=types)) +
geom_point()+
facet_grid(.~organisms)+
xlab("Types")+
ylab("Mismatches")+
# ylim(c(90,130))+
scale_y_log10()+ #add log scale
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
How would you normalise on total DNA? Would you use the (geometric) mean?

Increase space between x-axis factors in ggplot

I am using the geom_pointrange function in ggplot2 in order to plot the spread of some measurement over different condition for 5 subjects. In order not to have the subjects overlap I have constructed the plot as follows:
Final = structure(list(Subject = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L), .Label = c("1", "2", "3", "4", "5"), class = "factor"),
X00.conditionName = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L,
4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L,
4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L,
3L, 4L, 4L), .Label = c("EyeClose-Haptic", "mixed-Haptic_Visual",
"only-Haptic", "only-Visual"), class = "factor"), X03.totalTargetNumber = c(2L,
3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L,
2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L,
3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L), Accuracy = c(0.075870763,
0.0907863686, 0.0222156611, 0.0492028585333333, 0.0301178471,
0.0736098328666667, 0.0329723832, 0.0455095300666667, 0.065151615,
0.0979033533333333, 0.0247176775, 0.0335825226666667, 0.027385248,
0.0462643053333333, 0.037272505, 0.0652166726666667, 0.043005086,
0.061848328, 0.031106749, 0.0275656054, 0.026701889, 0.0373967466666667,
0.028998468, 0.03219287, 0.0597213356, 0.0851717708333333,
0.030286913, 0.0779058462666667, 0.043368508, 0.051437624,
0.029002474, 0.0479204566666667, 0.094555739, 0.0856268291666667,
0.031908514, 0.0310441326666667, 0.036311762, 0.0496942306666667,
0.054625148, 0.0482682121666667), upperCI = c(0.116082073022708,
0.139632763787946, 0.0315087794760623, 0.0727058964327625,
0.0468512606854127, 0.116787586356955, 0.0444933233012107,
0.062820743812494, 0.0858551911272202, 0.136013260005381,
0.0327074347874691, 0.0460471773903695, 0.035302995136302,
0.0740077338495226, 0.0641795522210299, 0.131047110446756,
0.0572545979325947, 0.0809511078363974, 0.0414215170576924,
0.0341480438532189, 0.0382253716300962, 0.0519626825555577,
0.0377955915789704, 0.0430125127419472, 0.0903928001427357,
0.114245467448517, 0.0461054194398361, 0.129350863514659,
0.0635159480110737, 0.0717647837071829, 0.0371919026867606,
0.0615899295823839, 0.170222051412597, 0.128502458351433,
0.046712862081242, 0.0388340720489338, 0.0574188259607336,
0.0786845830951613, 0.0844193698576058, 0.0784830058409822
), lowerCI = c(0.0356594529772922, 0.0419399734120541, 0.0129225427239377,
0.0256998206339042, 0.0133844335145873, 0.0304320793763786,
0.0214514430987893, 0.0281983163208393, 0.0444480388727798,
0.059793446661286, 0.0167279202125309, 0.0211178679429639,
0.019467500863698, 0.0185208768171441, 0.0103654577789701,
-0.000613765113422152, 0.0287555740674053, 0.0427455481636026,
0.0207919809423076, 0.0209831669467811, 0.0151784063699038,
0.0228308107777757, 0.0202013444210296, 0.0213732272580528,
0.0290498710572643, 0.0560980742181497, 0.0144684065601638,
0.0264608290186746, 0.0232210679889263, 0.0311104642928171,
0.0208130453132394, 0.0342509837509495, 0.018889426587403,
0.0427511999819006, 0.017104165918758, 0.0232541932843995,
0.0152046980392664, 0.0207038782381721, 0.0248309261423941,
0.0180534184923511), CondLevel = c("EyeClose-Haptic2", "EyeClose-Haptic3",
"mixed-Haptic_Visual2", "mixed-Haptic_Visual3", "only-Haptic2",
"only-Haptic3", "only-Visual2", "only-Visual3", "EyeClose-Haptic2",
"EyeClose-Haptic3", "mixed-Haptic_Visual2", "mixed-Haptic_Visual3",
"only-Haptic2", "only-Haptic3", "only-Visual2", "only-Visual3",
"EyeClose-Haptic2", "EyeClose-Haptic3", "mixed-Haptic_Visual2",
"mixed-Haptic_Visual3", "only-Haptic2", "only-Haptic3", "only-Visual2",
"only-Visual3", "EyeClose-Haptic2", "EyeClose-Haptic3", "mixed-Haptic_Visual2",
"mixed-Haptic_Visual3", "only-Haptic2", "only-Haptic3", "only-Visual2",
"only-Visual3", "EyeClose-Haptic2", "EyeClose-Haptic3", "mixed-Haptic_Visual2",
"mixed-Haptic_Visual3", "only-Haptic2", "only-Haptic3", "only-Visual2",
"only-Visual3")), .Names = c("Subject", "X00.conditionName",
"X03.totalTargetNumber", "Accuracy", "upperCI", "lowerCI", "CondLevel"
), row.names = c(NA, -40L), class = "data.frame")
require(ggplot2)
pdf("Pilot2.pdf", w = 12, h = 8)
limits <- aes(ymax = upperCI, ymin=lowerCI)
BaseLayer = ggplot(data = Final, aes (x = X00.conditionName, y = Accuracy, color = Subject, group = Subject ))
BaseLayer + geom_pointrange(limits, position=position_dodge(width=1), size = 1.5) +
theme(axis.text=element_text(size=14), axis.title=element_text(size=14), axis.text.x = element_text(angle = 25, hjust = 1)) +
facet_grid (.~X03.totalTargetNumber) + ggtitle ("Pilot 2") + xlab ("Condition")
dev.off()
As you can see the x-axis is discrete, and the points are very "crowded", so that it is difficult to tell apart the different categories.
Is there a way to increase the space between the different categories ?

The best solutoin is to use facets to create 8 separate tall and skinny plots with all these features that are separated by a thin white gutter between them with a solid label at the top. You could keep or lose X-axis labels. It creates one figure of 8 graphs that communicates better than on big graph.
Like this:Stack-ggplot2-geom-pointrange-facet-grid-with-coord-flip
except yours would be verticle

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Order geom_lines from the highest to the lowest in each facet - r

Related

ggplot: why does order on x-axis not level instead of printing alphabetically?

Reversed Stacked bar chart in R leads to weird positioning

saving figures like a loop until end of a data frame

Normalization of data within ggplot

Increase space between x-axis factors in ggplot

Categories

Resources