Order Y axis labels based on X increasing values - r

I have this data like this
z <- structure(list(Description = c("Neurotransmitter receptors and postsynaptic signal transmission",
"Muscle contraction", "Class A/1 (Rhodopsin-like receptors)",
"Signaling by Rho GTPases", "Metabolism of carbohydrates", "Extracellular matrix organization",
"Transmission across Chemical Synapses", "G alpha (i) signalling events",
"GPCR ligand binding", "Neuronal System"), p.adjust = c(0.563732077253957,
0.563732077253957, 0.774251160588198, 0.797669099976286, 0.655931854998983,
0.655931854998983, 0.563732077253957, 0.774251160588198, 0.774251160588198,
0.655931854998983), Count = c(9L, 9L, 9L, 9L, 10L, 10L, 11L,
11L, 12L, 13L)), row.names = c("R-HSA-112314", "R-HSA-397014",
"R-HSA-373076", "R-HSA-194315", "R-HSA-71387", "R-HSA-1474244",
"R-HSA-112315", "R-HSA-418594", "R-HSA-500792", "R-HSA-112316"
), class = "data.frame")
I would like to plot labels of y axis based on values of x axis, so from the smallest one to the largest one. Now it plots me in alphabetic order. How to do this?
ggplot(z, aes(Count, Description, size=Count, color=p.adjust))+
geom_point()
Somethine like this

With forcats::fct_reorder(Description, Count) you can change the order of y values.
library(ggplot2)
library(forcats)
ggplot(z, aes(Count, fct_reorder(Description, Count), size=Count, color=p.adjust))+
geom_point()
Created on 2022-02-01 by the reprex package (v2.0.1)

Related

Grouped bar chart in R for multiple filter and select

Following is my dataset:
Result
course1
course2
course3
pass
15
17
18
pass
12
14
19
Fail
9
13
3
Fail
3
2
0
pass
14
11
20
Fail
5
0
7
I want to plot a grouped bar graph. I am able to plot following graphs but I want both the results in same graph.
par(mfrow=c(1,1))
options(scipen=999)
coul <- brewer.pal(3, "Set2")
# Bar graph for passed courses
result_pass <-data %>% filter(Result=='Pass') %>% summarize(c1_tot=sum(course1),
c2_tot = sum(course2), c3_tot = sum(course3) )
col_sum <- colSums(result_pass[,1:3])
barplot(colSums(result_pass[,1:3]), xlab = "Courses", ylab = "Total Marks", col = coul, ylim=range(pretty(c(0, col_sum))), main = "Passed courses ")
# Bar graph for Failed courses
result_fail <-data %>% filter(Result=='Fail') %>% summarize(c1_tot=sum(course1),
c2_tot = sum(course2), c3_tot = sum(course3) )
col_sum <- colSums(result_fail[,1:3])
barplot(colSums(result_fail[,1:3]), xlab = "Courses", ylab = "Total Marks", col = coul, ylim=range(pretty(c(0, col_sum))), main = "Failed courses ")
Any suggestion for which I can merge both the above plots and create grouped bar graph for Pass and Fail courses.
It's probably easier than you think. Just put the data directly in aggregate and use as formula . ~ Result, where . means all other columns. Removing first column [-1] and coerce as.matrix (because barplot eats matrices) yields exactly the format we need for barplot.
This is the basic code:
barplot(as.matrix(aggregate(. ~ Result, data, sum)[-1]), beside=TRUE)
And here with some visual enhancements:
barplot(as.matrix(aggregate(. ~ Result, data, sum)[-1]), beside=TRUE, ylim=c(0, 70),
col=hcl.colors(2, palette='viridis'), legend.text=sort(unique(data$Result)),
names.arg=names(data)[-1], main='Here could be your title',
args.legend=list(x='topleft', cex=.9))
box()
Data:
data <- structure(list(Result = c("pass", "pass", "Fail", "Fail", "pass",
"Fail"), course1 = c(15L, 12L, 9L, 3L, 14L, 5L), course2 = c(17L,
14L, 13L, 2L, 11L, 0L), course3 = c(18L, 19L, 3L, 0L, 20L, 7L
)), class = "data.frame", row.names = c(NA, -6L))

R - Fill area between lines based on top value

I am analyzing monthly observations of water input (rainfall) and output (evaporation) at a given location.
I need to plot time series of both rainfall and evaporation, shading the area between the data points with varying colors according to which line is above the other.
This is what I have:
library(ggplot2)
library(reshape2)
dat1 <- structure(list(month = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L), value = c(226.638505697305, 186.533910906497, 141.106702957603,
93.4474376969313, 134.58903301495, 77.6436398653559, 77.301864710113,
69.7349071531699, 109.208227499776, 165.197186758555, 156.057081859175,
168.342059689587, 136.34266772667, 119.741309096806, 120.395245911241,
98.1418096019397, 72.4585192294772, 59.6209861948614, 69.6993145911677,
97.1585171469416, 118.357052089691, 132.74037278737, 139.141233379528,
146.583047731729), var = c("rainfall", "rainfall", "rainfall",
"rainfall", "rainfall", "rainfall", "rainfall", "rainfall", "rainfall",
"rainfall", "rainfall", "rainfall", "evaporation", "evaporation",
"evaporation", "evaporation", "evaporation", "evaporation", "evaporation",
"evaporation", "evaporation", "evaporation", "evaporation", "evaporation"
)), row.names = c(NA, -24L), class = "data.frame")
ggplot(dat1, aes(x=month,y=value, colour=var)) +
geom_line() +
scale_color_manual(values=c("firebrick1", "dodgerblue")) +
theme_bw(base_size=18)
which yields the following graph (with little edits to show what I'm trying to achieve):
My initial attempt to fill the areas between the lines was based on this SO answer:
dat2 <- data.frame(month=1:12,
rainfall=dat1[dat1$var=="rainfall",]$value,
evaporation=dat1[dat1$var=="evaporation",]$value)
dat2 <- cbind(dat2, min_line=pmin(dat2[,2],dat2[,3]) )
dat2 <- melt(dat2, id.vars=c("month","min_line"), variable.name="var", value.name="value")
ggplot(data=dat2, aes(x=month, fill=var)) +
geom_ribbon(aes(ymax=value, ymin=min_line)) +
scale_fill_manual(values=c(rainfall="dodgerblue", evaporation="firebrick1"))
However, it's not quite what I need.
How can I achieve the desired result?
The reason you're getting the wrong shading is probably because the data is a bit on the coarse side. My advice would be to interpolate the data first. Assuming dat1 is from your example.
library(ggplot2)
# From long data to wide data
dat2 <- tidyr::pivot_wider(dat1, values_from = value, names_from = var)
# Setup interpolated data (tibble because we can then reference column x)
dat3 <- tibble::tibble(
x = seq(min(dat2$month), max(dat2$month), length.out = 1000),
rainfall = with(dat2, approx(month, rainfall, xout = x)$y),
evaporation = with(dat2, approx(month, evaporation, xout = x)$y)
)
Then, we need to find a way to identify groups, and here is a helper function for that. Group IDs are based on the runs in run length encoding.
# Make function to identify groups
rle_id <- function(x) {
x <- rle(x)
rep.int(seq_along(x$lengths), x$lengths)
}
And now we can plot it.
ggplot(dat3, aes(x)) +
geom_ribbon(aes(ymin = pmin(evaporation, rainfall),
ymax = pmax(evaporation, rainfall),
group = rle_id(sign(rainfall - evaporation)),
fill = as.factor(sign(rainfall - evaporation))))
Created on 2021-02-14 by the reprex package (v1.0.0)

Change order for factor in grouped-bar plot

I have the following code:
figg4 <- lala4 %>% gather(key, value, -Species_Name) %>%
mutate (Species_Name = factor(Species_Name,
levels=c('Dasyprocta punctata',
'Cuniculus paca','Large Rats',
'Heteromys unknown', 'Sciurus variegatoides',
'Sciurus granatensis','Dasypus novemcinctus',
'Didelphis marsupialis', 'Philander opossum',
'Metachirus nudicaudatus', 'Nasua narica',
'Procyon lotor', 'Eira barbara',
'Galictis vittata', 'Leopardus pardalis'))) %>%
ggplot(aes(x=Species_Name, y=value,
fill=key)) + coord_flip() + geom_col (position = "stack") +
theme(panel.background = element_blank()) + bbc_style() +
labs(title = "Species occupancy by Site Type")+
scale_fill_manual(values = c("#333333","#1380A1", "#FAAB18"))
I get bar graph which is listing the names in the reverse order, I want to make them appear in the order that I have written the levels in... how do I do so?
I tried using fct_reorder from forcats by adding the following code
mutate(name = fct_reorder(Species_Name, desc(value)))
But that did not change the order.
I am quite new to r and not sure of how to do this. Would be grateful for any help
Here is dput output for the source:
dput(lala4)
structure(list(Species_Name = structure(c(9L, 12L, 13L, 14L,
19L, 22L, 27L, 46L, 41L, 42L, 10L, 15L, 32L, 33L, 24L), .Label = c("Buteo platypterus",
"Canis latrans", "Cathartes aura", "Catharus unknown", "Catharus ustulatus",
"Cebus capucinus", "Chordeiles unknown", "Conepatus semistriatus",
"Crax rubra", "Crypturellus cinnamomeus", "Cuniculus paca", "Dasyprocta punctata",
"Dasypus novemcinctus", "Didelphis marsupialis", "Eira barbara",
"Galictis vittata", "Geotrygon montana", "Geotrygon violacea",
"Heteromys unknown", "Holcosus quadrilineatus", "Large Rats",
"Leopardus pardalis", "Leopardus wiedii", "Leptotila unknown",
"Melozone unknown", "Metachirus nudicaudatus", "Nasua narica",
"Odocoileus virginianus", "Panthera onca", "Parkesia noveboracensis",
"Pecari tajacu", "Penelopina nigra", "Philander opossum", "Piaya cayana",
"Procyon lotor", "Puma concolor", "Puma yagouaroundi", "Sciurus granatensis",
"Sciurus variegatoides", "Setophaga unknown", "Sylvilagus sp ",
"Tamandua mexicana", "Tapirus bairdii", "Tayassu pecari", "Tigrisoma fasciatum",
"Tinamus major"), class = "factor"), Forest Area (<5ha) = c(0.067307692,
0.134615385, 0.173076923, 0.144230769, 0.019230769, 0.086538462,
0.192307692, 0.009615385, 0.163461538, 0.038461538, 0, 0.019230769,
0, 0.163461538, 0.153846154), Forest Area (5-27ha) = c(0.067307692,
0.317307692, 0.269230769, 0.096153846, 0.038461538, 0.105769231,
0.192307692, 0.115384615, 0.134615385, 0.057692308, 0, 0.096153846,
0, 0.076923077, 0.173076923), Forest Area (>350ha) = c(0.163461538,
0.384615385, 0.278846154, 0.201923077, 0.105769231, 0.067307692,
0.144230769, 0.298076923, 0.028846154, 0.048076923, 0.086538462,
0.038461538, 0.019230769, 0.028846154, 0.125)), row.names = c(NA,
15L), class = "data.frame")
You need to redefine the factor as an ordered factor first.
Try just fixing the code where you define the factor by adding
ordered = TRUE
This should probably work:
figg4 <- lala4 %>% gather(key, value, -Species_Name) %>%
mutate (Species_Name = factor(Species_Name,
levels=c('Dasyprocta punctata',
'Cuniculus paca','Large Rats',
'Heteromys unknown', 'Sciurus variegatoides',
'Sciurus granatensis','Dasypus novemcinctus',
'Didelphis marsupialis', 'Philander opossum',
'Metachirus nudicaudatus', 'Nasua narica',
'Procyon lotor', 'Eira barbara',
'Galictis vittata', 'Leopardus pardalis'), ordered = TRUE)) %>%
ggplot(aes(x=Species_Name, y=value,
fill=key)) + coord_flip() + geom_col (position = "stack") +
theme(panel.background = element_blank()) + bbc_style() +
labs(title = "Species occupancy by Site Type")+
scale_fill_manual(values = c("#333333","#1380A1", "#FAAB18"))
I can't run it because I don't have the lala4 data to test though.

ranking in a descending order in R

I want to rank the variables in my dataset in a descending order of the Number of Plants used. I tried ranking in .csv and then exporting it in R. But even then, the plot was not ranked in the required order. Here is my dataset
df <- structure(list(Lepidoptera.Family = structure(c(3L, 2L, 5L, 1L, 4L, 6L),
.Label = c("Hesperiidae", "Lycaenidae", "Nymphalidae", "Papilionidae", "Pieridae","Riodinidae"), class = "factor"),
LHP.Families = c(55L, 55L, 15L, 14L, 13L, 1L)),
.Names = c("Lepidoptera.Family", "LHP.Families"),
class = "data.frame", row.names = c(NA, -6L))
library(ggplot2)
library(reshape2)
gg <- melt(df,id="Lepidoptera.Family", value.name="LHP.Families", variable.name="Type")
ggplot(gg, aes(x=Lepidoptera.Family, y=LHP.Families, fill=Type))+
geom_bar(stat="identity")+
coord_flip()+facet_grid(Type~.)
How do i rank them in a descending order? Also, i want to combine 3 plots into one. How can i go about it?
The reason this is happening is that ggplot plots the x variables that are factors in the ordering of the underlying values (recall that factors are stored as numbers underneath the covers). If you want to graph them in an alternate order, you should change the order of the levels before plotting
gg$Lepidoptera.Family<-with(gg,
factor(Lepidoptera.Family,
levels=Lepidoptera.Family[order(LHP.Families)]))
The trick is to reorder the levels of the Lepidoptera.Family factor, which by default is alphabetical:
df = within(df, {
factor(Lepidoptera.Family, levels = reorder(Lepidoptera.Family, LHP.Families))
})
gg <- melt(df,id="Lepidoptera.Family", value.name="LHP.Families", variable.name="Type")
ggplot(gg, aes(x=Lepidoptera.Family, y=LHP.Families, fill=Type))+ geom_bar(stat="identity")+ coord_flip()+facet_grid(Type~.)

How can I label the points of a quantile-quantile plot composed with ggplot2?

I am building a quantile-quantile plot out of an variable called x from a data frame called df in the working example provided below. I would like to label the points with the name variable of my df dataset.
Is it possible to do this in ggplot2 without resorting to the painful solution (coding the theoretical distribution by hand and then plotting it against the empirical one)?
Edit: it happens that yes, thanks to a user who posted and then deleted his answer. See the comments after Arun's answer below. Thanks to Didzis for his otherwise clever solution with ggbuild.
# MWE
df <- structure(list(name = structure(c(1L, 2L, 3L, 4L, 5L, 7L, 9L,
10L, 6L, 12L, 13L, 14L, 15L, 16L, 17L, 19L, 18L, 20L, 21L, 22L,
8L, 23L, 11L, 24L), .Label = c("AUS", "AUT", "BEL", "CAN", "CYP",
"DEU", "DNK", "ESP", "FIN", "FRA", "GBR", "GRC", "IRL", "ITA",
"JPN", "MLT", "NLD", "NOR", "NZL", "PRT", "SVK", "SVN", "SWE",
"USA"), class = "factor"), x = c(-0.739390016757746, 0.358177826874146,
1.10474523846099, -0.250589535389937, -0.423112615445571, -0.862144579740376,
0.823039669834058, 0.079521521937704, 1.08173649722493, -2.03962942823921,
1.05571087029737, 0.187147291278723, -0.144770773941437, 0.957990771847331,
-0.0546549555439176, -2.70142550075757, -0.391588386498849, -0.23855544527369,
-0.242781575907386, -0.176765072121165, 0.105155860923456, 2.69031085872414,
-0.158320176671995, -0.564560815972446)), .Names = c("name",
"x"), row.names = c(NA, -24L), class = "data.frame")
library(ggplot2)
qplot(sample = x, data = df) + geom_abline(linetype = "dotted") + theme_bw()
# ... using names instead of points would allow to spot the outliers
I am working on an adaptation of this gist, and will consider sending other questions to CrossValidated if I have questions about the regression diagnostics, which might be of interest to CV users.
You can save your original QQ plot as object (used function ggplot() and stat_qq() instead of qplot())
g<-ggplot(df, aes(sample = x)) + stat_qq()
Then with function ggplot_build() you can extract data used for plotting. They are stored in element data[[1]]. Saved those data as new data frame.
df.new<-ggplot_build(g)$data[[1]]
head(df.new)
x y sample theoretical PANEL group
1 -2.0368341 -2.7014255 -2.7014255 -2.0368341 1 1
2 -1.5341205 -2.0396294 -2.0396294 -1.5341205 1 1
3 -1.2581616 -0.8621446 -0.8621446 -1.2581616 1 1
4 -1.0544725 -0.7393900 -0.7393900 -1.0544725 1 1
5 -0.8871466 -0.5645608 -0.5645608 -0.8871466 1 1
6 -0.7415940 -0.4231126 -0.4231126 -0.7415940 1 1
Now you can add to hew data frame names of observations. Important is to use order() as data in new data frame are ordered.
df.new$name<-df$name[order(df$x)]
Now plot new data frame as usual and instead of geom_point() provide geom_text().
ggplot(df.new,aes(theoretical,sample,label=name))+geom_text()+
geom_abline(linetype = "dotted") + theme_bw()
The points are too close by. I would do something like this:
df <- df[with(df, order(x)), ]
df$t <- quantile(rnorm(1000), seq(0, 100, length.out = nrow(df))/100)
p <- ggplot(data = df, aes(x=t, y=x)) + geom_point(aes(colour=df$name))
This gives:
If you insist on having labels inside the plot, then, you could try something like:
df <- df[with(df, order(x)), ]
df$t <- quantile(rnorm(1000), seq(0, 100, length.out = nrow(df))/100)
p <- ggplot(data = df, aes(x=t, y=x)) + geom_point(aes(colour=df$name))
p <- p + geom_text(aes(x=t-0.05, y=x-0.15, label=df$name, size=1, colour=df$name))
p
You can play around with the x and y coordinates and if you want you can always remove the colour aesthetics.
#Arun has a good solution in the comment above, but this works with R 4.0.3:
ggplot(data = df, aes(sample = x)) + geom_qq() + geom_text_repel(label=df$name[order(df$x)], stat="qq") + stat_qq_line()
Basically the same thing, with addition of stat_qq_line() and [order(df$x)] as part of the label. If you don't include the order function then your labels will be all out of order and very misleading.
Here's hoping this saves someone else some hours of their life.

Resources