ggplot, facet, piechart, missing values - r

This is my final dataset. I originally obtained this table by calculating the values separately and doing rbind between females (F) and males (M) from an original bigger dataset.
I am trying to make a handsome piechart, with the percentage labels outside and I've encountered ALL problems possible for which I cannot find a solution.
Notice there is no value A for males in the dataframe.
Dataframe:
sex ms n_ms n msPerc value
1 F A 1 91 0.01098901 1.098901
2 F B 18 91 0.19780220 19.780220
3 F C 65 91 0.71428571 71.428571
4 F D 7 91 0.07692308 7.692308
5 M B 11 108 0.10185185 10.185185
6 M C 86 108 0.79629630 79.629630
7 M D 11 108 0.10185185 10.185185
library(ggplot2)
library(ggrepel)
library(tidyverse)
n<- c(91, 91 , 91, 91, 108, 108, 108 )
n_ms<-c(1,18,65,7,11,86,11)
sex<- c("F","F","F","F", "M"," M","M")
ms<- c("A","B","C","D","B","C","D")
df <- data.frame(sex, ms, n_ms, n)
df[is.na(df)]<- 0
df$msPerc <- df$n_ms /df$n
df$value <- 100*df$n_ms /df$n
df$n_ms<- as.integer(df$n_ms) # original big dataframe (doing for replication purposes)
df$n<- as.integer(df$n)
#creating position of labels
df2 <- df %>%
mutate(csum = rev(cumsum(rev(value))),
pos = value/2 + lead(csum, 1),
pos = if_else(is.na(pos), value/2, pos))
ms_pie<-ggplot(df, aes(x="", y=msPerc, group=sex, fill=ms)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start=0) +
facet_grid(.~ sex) +
theme_void()+
theme(legend.position="top",
legend.text = element_text(size = 9),
legend.title = element_text(size = 9,face = "bold"))+
scale_fill_manual(values=c("#d7191c", "#fdae61", "#abd9e9","#5e3c99"),
name="Moulting stage",
labels=c("A","B","C","D"))+
# geom_label(aes(label = percent(msPerc)),
# position = position_stack(vjust = 0.5),
# show.legend = FALSE)
# geom_text(aes(label = percent(msPerc)),size = 3,color = "black",
# position = position_stack(vjust = 0.5),
# show.legend = FALSE)
geom_label_repel(data = df2,
aes(y = pos, label = paste0(value, "%")),
size = 4.5, nudge_x = 1, show.legend = FALSE)
ms_pie
This is what happens...
What I would like is a piechart like this one from https://r-charts.com/part-whole/pie-chart-labels-outside-ggplot2/ but including facet_grid in variable "sex".
So far this is the closest I've got. Using geom_label, however my values overlap and I do not know how to separate them either... the joys of being a beginner in R.
I also tried the solutions provided in ggplot, facet, piechart: placing text in the middle of pie chart slices but coor_polar won't work with scales "free".
I would much appreciate the help.
Kind regards.

There are several problems with your code I've tried to clean (see comments preceeded by ####) - this should get you closer:
library(ggplot2)
library(ggrepel)
library(tidyverse)
library(scales) #### using scales for number formatting
n<- c(91, 91 , 91, 91, 108, 108, 108 )
n_ms<-c(1,18,65,7,11,86,11)
sex<- c("F","F","F","F", "M","M","M") #### changed " M" to "M" at last but one element
ms<- c("A","B","C","D","B","C","D")
df <- data.frame(sex, ms, n_ms, n)
df[is.na(df)]<- 0
df$msPerc <- df$n_ms /df$n
df$value <- 100*df$n_ms /df$n
df$n_ms<- as.integer(df$n_ms) # original big dataframe (doing for replication purposes)
df$n<- as.integer(df$n)
#creating position of labels
df2 <- df %>% group_by(sex) %>% #### you need to group your data by the facets you want to show
mutate(csum = cumsum(msPerc), #### adjusted example code to use "msPerc"
pos = msPerc/2 + lag(csum, 1),
pos = if_else(is.na(pos), msPerc/2, pos))
ms_pie<-ggplot(df, aes(x="", y=msPerc, group=sex, fill=ms)) +
geom_col(width = 1) +
coord_polar("y", start=0) +
facet_grid(~sex) +
theme_void()+
theme(legend.position="top",
legend.text = element_text(size = 9),
legend.title = element_text(size = 9,face = "bold"))+
scale_fill_manual(values=c("#d7191c", "#fdae61", "#abd9e9","#5e3c99"),
name="Moulting stage",
labels=c("A","B","C","D"))+
geom_label_repel(data = df2,
aes(y = pos, label = percent(msPerc, digits = 1)),
size = 4.5, nudge_x = 1, show.legend = FALSE)
ms_pie

Related

R: How to Fill Points in ggplot2 with a variable

I am attempting to make a ggplot2 scatter plot that is grouped by bins in R. I successfully made the first model, which I did not try to alter the fill for. But when I tried to have the fill of the scatter plot be based upon my variable (Miss.) ,which is a numeric value ranging from 0.00 to 0.46, it essentially ignores the heat map scale and turns everything gray.
ggplot(data = RightFB, mapping = aes(x = TMHrzBrk, y = TMIndVertBrk))+
geom_bin_2d(bins = 15)+
scale_fill_continuous(type = "viridis")+
ylim(5, 20)+
xlim(0,15)+
coord_fixed(1.3)
ggplot(data = RightFB, mapping = aes(x = TMHrzBrk, y = TMIndVertBrk, fill
=Miss.))+
geom_bin_2d(bins = 15)+
scale_fill_continuous(type = "viridis")+
ylim(5, 20)+
xlim(0,15)+
coord_fixed(1.3)
I appreciate any help! Thanks!
I think I understand your problem, so let's replicate it with a reproducible example. Obviously we don't have your data, but the following data frame has the same names, types and ranges as your own data, so this walk-through should work for you.
set.seed(1)
RightFB <- data.frame(TMHrzBrk = runif(1000, 0, 15),
TMIndVertBrk = runif(1000, 5, 20),
Miss. = runif(1000, 0, 0.46))
Your first plot will look something like this:
library(tidyverse)
ggplot(data = RightFB, mapping = aes(x = TMHrzBrk, y = TMIndVertBrk)) +
geom_bin_2d(bins = 15) +
scale_fill_continuous(type = "viridis") +
ylim(5, 20) +
xlim(0, 15) +
coord_fixed(1.3)
#> Warning: Removed 56 rows containing missing values (`geom_tile()`).
Here, the fill colors represent the counts of observations within each bin. But if you try to map the fill to Miss., you get all gray squares:
ggplot(data = RightFB, mapping = aes(x = TMHrzBrk, y = TMIndVertBrk,
fill = Miss.)) +
geom_bin_2d(bins = 15) +
scale_fill_continuous(type = "viridis") +
ylim(5, 20) +
xlim(0, 15) +
coord_fixed(1.3)
#> Warning: The following aesthetics were dropped during statistical transformation: fill
#> i This can happen when ggplot fails to infer the correct grouping structure in
#> the data.
#> i Did you forget to specify a `group` aesthetic or to convert a numerical
#> variable into a factor?
#> Removed 56 rows containing missing values (`geom_tile()`).
The reason this happens is that by default geom_bin_2d calculates the bins and the counts within each bin to get the fill variable. There are multiple observations within each bin, and they all have a different value of Miss. . Furthermore, geom_bin_2d doesn't know what you want to do with this variable. My guess is that you are looking for the average of Miss. within each bin, but this is difficult to achieve within the framework of geom_bin_2d.
The alternative is to calculate the bins yourself, get the average of Miss. in each bin, and plot as a geom_tile
RightFB %>%
mutate(TMHrzBrk = cut(TMHrzBrk, breaks = seq(0, 15, 1), seq(0.5, 14.5, 1)),
TMIndVertBrk = cut(TMIndVertBrk, seq(5, 20, 1), seq(5.5, 19.5, 1))) %>%
group_by(TMHrzBrk, TMIndVertBrk) %>%
summarize(Miss. = mean(Miss., na.rm = TRUE), .groups = "drop") %>%
mutate(across(TMHrzBrk:TMIndVertBrk, ~as.numeric(as.character(.x)))) %>%
ggplot(aes(x = TMHrzBrk, y = TMIndVertBrk, fill = Miss.)) +
geom_tile() +
scale_fill_continuous(type = "viridis") +
ylim(5, 20) +
xlim(0, 15) +
coord_fixed(1.3)
EDIT
With the link to the data in the comments, here is a full reprex:
library(tidyverse)
RightFB <- read.csv(paste0("https://raw.githubusercontent.com/rileyfeltner/",
"FB-Analysis/main/Right%20FB.csv"))
RightFB <- RightFB[c(2:6, 9, 11, 13, 18, 19)]
RightFB$Miss. <- as.numeric(as.character(RightFB$Miss.))
#> Warning: NAs introduced by coercion
RightFB$TMIndVertBrk <- as.numeric(as.character(RightFB$TMIndVertBrk))
#> Warning: NAs introduced by coercion
RightFB <- na.omit(RightFB)
RightFB1 <- filter(RightFB, P > 24)
RightFB %>%
mutate(TMHrzBrk = cut(TMHrzBrk, breaks = seq(0, 15, 1), seq(0.5, 14.5, 1)),
TMIndVertBrk = cut(TMIndVertBrk, seq(5, 20, 1), seq(5.5, 19.5, 1))) %>%
group_by(TMHrzBrk, TMIndVertBrk) %>%
summarize(Miss. = mean(Miss., na.rm = TRUE), .groups = "drop") %>%
mutate(across(TMHrzBrk:TMIndVertBrk, ~as.numeric(as.character(.x)))) %>%
ggplot(aes(x = TMHrzBrk, y = TMIndVertBrk, fill = Miss.)) +
geom_tile() +
scale_fill_continuous(type = "viridis") +
ylim(5, 20) +
xlim(0, 15) +
coord_fixed(1.3)
#> Warning: Removed 18 rows containing missing values (`geom_tile()`).
Created on 2022-11-23 with reprex v2.0.2

Caching a huge data table with knitr

I am trying to cache a big data.table and then make a plot out of it, the code is as follow:
{r gen-data, tidy=TRUE, warning=FALSE, tidy.opts=list(width.cutoff=60), cache = TRUE, cache.lazy=FALSE}
DT = fread("reference.txt.gz", header = FALSE)
vc = c("chromosome_1", "chromosome_2", "chromosome_3", "chromosome_4", "chromosome_5", "chromosome_6")
colnames(DT) = c("chrom", "position", "score", "corrected base", "score of the corrected base")
DT=setDT(DT, key = "chrom")[J(vc), nomatch = 0]
{r, cache=TRUE, tidy=TRUE, warning=FALSE, tidy.opts=list(width.cutoff=60), dependson='gen-data'}
plot = ggplot(data = DT) + geom_line(aes(x = position, y = score, group = 1), stat = "summary_bin", fun.y = "mean", binwidth = 100000, color = ghibli_palette("MononokeMedium")[2])
ttle = paste0("coverage of the 6 longest scaffolds of Shasta + instagraal assembly")
plot = plot + labs(
title = ttle) + theme(plot.title = element_markdown(lineheight = 1.5, size = 12), legend.text = element_markdown(size = 14))
plot = plot + theme(axis.title = element_markdown(size = 12)) + theme(axis.text.x = element_text(size=5)) + theme(axis.text.y = element_text(size=3))
plot = plot + theme(legend.title = element_markdown(size = 12))
p = plot + facet_wrap(~chrom, scales = "free_x") +xlab( "position") + ylab("mean score per 100 Kb windows")
v = ggplotly(p) %>%
layout(
xaxis = list(automargin=TRUE),
yaxis = list(automargin=TRUE)
)
v
So what I was thinking, is that the first chunk read the data into a data.table, then apply the relevant selection, and finally cache a DT object.
However, the first chunk is evaluated every time, no matter what. Therefore I must be doing something wrong but I can't see what.
Thanks for any help.
EDIT:
adding some of the, here is the reference.txt sample (yes it's normal it has only 3 column entries, some lines can have up to 5).
chromosome_1 1 91
chromosome_1 2 91
chromosome_1 3 91
chromosome_1 4 91
chromosome_1 5 91
chromosome_1 6 91
chromosome_1 7 91
chromosome_1 8 91
chromosome_1 9 91
chromosome_1 10 91

How to put gestational age in weeks.days on x-axis in ggplot

I am trying to plot weight of a fetus over time.
The y-axis is fetal weight in grams
The x-axis needs to be formatted as the following:
7 weeks 3 days == 27.3
29 weeks 6 days == 29.6
etc
My data (df) looks something like this
weight age
2013 22.4
2302 25.6
2804 27.2
3011 29.1
I have tried something like this... but not sure how to adjust the scale...
ggplot(df, aes(x = age, y = weight)) +
geom_point() +
scale_x_continuous()
If I get the actual numeric value for the age (i.e. 22.4 == 22weeks + 4/7days == 22.57),
Is it possible to label the corresponding age value with the label i want?
For example...
weight age.label age.value
2013 22.4 22.57
2302 25.6 25.86
2804 27.2 27.29
3011 29.1 29.14
When I call this:
df <- df %>% mutate(age.label = as.character(age.label))
ggplot(df, aes(x = age.value, y = weight)) +
geom_point() +
scale_x_continuous(label = "age.label")
I get the following...
Error in f(..., self = self) : Breaks and labels are different lengths
Any help much appreciated
I borrowed from this answer and this one, to create a variable ticks labels that uses formatting to seperate the days and the weeks.
I have supplied three different methods.
Simply places ticks at every day point but does not number them.
Numbers the days and the weeks correctly and distinguishes between them by making weeks bold and days light grey.
Same as 2 but uses size. This method doesn't work very well, as it creates a large gap between the labels and the plot. It has been included for completeness... and in the hope somebody says how to fix it.
The plot below is the second method.
I think the vertical tick lines could also be coloured so that some of them disappear if you want as well.
library(ggplot2)
library(tidyverse)
df<-read.table(header=TRUE, text="weight age.label age.value
2013 22.4 22.57
2302 25.6 25.86
2804 27.2 27.29
3011 29.1 29.14")
#have ticks for every day using 1/7 distance tick marks
ggplot(df, aes(x = age.value, y = weight)) +
geom_point() +
scale_x_continuous(limits=c(22, 30),
minor_breaks = seq(from = 1, to = 33, by = 1/7),
breaks = 1:30)
#create a df of tick marks labels containing day number and week number
breaks_labels_df <- data.frame(breaks = seq(from = 1, to = 33, by = 1/7)) %>%
mutate(minors= rep(0:6, length.out = nrow(.)),
break_label = ifelse(minors == 0, breaks, minors))
#plot both day number and week number differentiating between them by the label formatting.
#remove the minor tick lines to reduce the busyness of the plot
ggplot(df, aes(x = age.value, y = weight)) +
geom_point() +
scale_x_continuous(limits=c(22, 30),
breaks = seq(from = 1, to = 33, by = 1/7),
labels = breaks_labels_df$break_label) +
theme(axis.text.x = element_text(color = c("grey60","grey60","black",rep("grey60",4)),
size = 8, angle = 0,
hjust = .5, vjust = .5,
face = c("plain","plain","bold",rep("plain",4))),
panel.grid.minor.x = element_blank()) +
labs(title = "Baby weight in relation to age", x = "Age in weeks and days", y = "weight in grams")
#Changing the font size places a large gap between the tick labels and the axis
ggplot(df, aes(x = age.value, y = weight)) +
geom_point() +
scale_x_continuous(limits=c(22, 30),
breaks = seq(from = 1, to = 33, by = 1/7),
labels = breaks_labels_df$break_label) +
theme(axis.text.x = element_text(vjust = 0, size = c(8,8,12,rep(8,4)),
margin = margin(t = 0), lineheight = 0))
In order to add labels to the plot, use the geom_text function in the ggplot2 package. One can use the "hjust" and "vjust" to fine tune the placement.
df<-read.table(header=TRUE, text="weight age
2013 22.4
2302 25.6
2804 27.2
3011 29.1")
library(dplyr)
library(ggplot2)
#calculate the proper decimal value for axis
df<-df %>%mutate(age.value=floor(age)+ (age-floor(age))*10/7) %>% round(2)
ggplot(df, aes(x = age.value, y = weight)) +
geom_point() +
scale_x_continuous(limits=c(20, 30)) +
geom_text(aes(label = age), hjust = -.2, vjust=.1)

Small ggplots on a ggmap - a purrr map version

Based on Small ggplot2 plots placed on coordinates on a ggmap
I would like to have the same solution, but with ggplot function outside the pipeline, applied with purrr::map().
The data for small bar subplots indicating 2 values, may contain
lon, lat, id, valueA, valueB,
After tidyr::gather operation it may look like:
Town, Potential_Sum, lon, lat, component , sales
Aaa, 9.00, 20.80, 54.25, A, 5.000
Aaa, 9.00, 20.80, 54.25, B, 4.000
Bbb, 5.00, 19.60, 50.50, A, 3.000
Bbb, 5.00, 19.60, 50.50, B, 2.000
Current working solution is to use do() to generate sublopts and then ggplotGrob to generate a column with objects "grobs" to be placed at lon,lat locations on a ggmap.
maxSales <- max(df$sales)
df.grobs <- df %>%
do(subplots = ggplot(., aes(1, sales, fill = component)) +
geom_col(position = "dodge", alpha = 0.50, colour = "white") +
coord_cartesian(ylim = c(0, maxSales)) +
scale_fill_manual(values = c("green", "red"))+
geom_text(aes(label=if_else(sales>0,round(sales), NULL)), vjust=0.35,hjust=1.1, colour="black",
position=position_dodge(.9), size=2.5, angle=90)+
theme_void()+ guides(fill = F)) %>%
mutate(subgrobs = list(annotation_custom(ggplotGrob(subplots),
x = lon-0.14, y = lat-0.20,
xmax = lon+0.14, ymax = lat+1.2)))
df.grobs %>%
{p + geom_label(aes(x = 15, y = 49.8, label = "A"), colour = c("black"),fill = "green", size=3)+
geom_label(aes(x = 15, y = 5.01, label = "B"), colour = c("black"),fill = "red", size=3)+
.$subgrobs +
geom_text(data=df, aes(label = Miasto), vjust = 3.5,nudge_x = 0.05, size=2.5) +
geom_col(data = df,
aes(0,0, fill = component),
colour = "white")}
p is a ggmap object, map of Poland, on which I would like to place small plots:
# p <-
# get_googlemap(
# "Poland",
# maptype = "roadmap",
# zoom = 6,
# color = "bw",
# crop = T,
# style = "feature:all|element:labels|visibility:off" # 'feature:administrative.country|element:labels|visibility:off'
# ) %>% # or 'feature:all|element:labels|visibility:off'
# ggmap() + coord_cartesian() +
# scale_x_continuous(limits = c(14, 24.3), expand = c(0, 0)) +
# scale_y_continuous(limits = c(48.8, 55.5), expand = c(0, 0))
#
How to translate this solution to the syntax nest - apply -unnest so that the ggplot part should be outside of the piped expression as a function.
In other words. How to replace do() with map(parameters, GGPlot_function) and then plot grobs on a ggmap .
What I did so far was I tried to write a ggplot function
#----barplots----
maxSales <- max(df$sales)
fn_ggplot <- function (df, x, component, maxX) {
x <- enquo(x)
component <-enquo(component)
maxX <-enquo(maxX)
p <- ggplot(df, aes(1, !!x, fill = !!component)) +
geom_col(position = "dodge", alpha = 0.50, colour = "white") +
coord_cartesian(ylim = c(0, !!maxX)) +
scale_fill_manual(values = c("green", "red"))+
geom_text(aes(label=if_else(x>0,round(!!x), NULL)), vjust=0.35,hjust=1.1, colour="black",
position=position_dodge(.9), size=2.5, angle=90)+
theme_void()+ guides(fill = F)
return(p)
}
And got totaly confused trying to apply it like this (I am a constant beginner unfortunately)... this is not working, showing
df.grobs <- df %>%
mutate(subplots = pmap(list(.,sales,component,Potential_Sum),fn_ggplot)) %>%
mutate(subgrobs = list(annotation_custom(ggplotGrob(subplots),
x = lon-0.14, y = lat-0.20,
xmax = lon+0.14, ymax = lat+1.2)))
I get errors indicating I do not know what I am doing, ie lengths of arguments are incorrect and something else is expected.
message: Element 2 of `.l` must have length 1 or 7, not 2
class: `purrr_error_bad_element_length`
backtrace:
1. dplyr::mutate(...)
12. purrr:::stop_bad_length(...)
13. dplyr::mutate(...)
Call `rlang::last_trace()` to see the full backtrace
> rlang::last_trace()
x
1. +-`%>%`(...)
2. | +-base::withVisible(eval(quote(`_fseq`(`_lhs`)), env, env))
3. | \-base::eval(quote(`_fseq`(`_lhs`)), env, env)
4. | \-base::eval(quote(`_fseq`(`_lhs`)), env, env)
5. | \-global::`_fseq`(`_lhs`)
6. | \-magrittr::freduce(value, `_function_list`)
7. | \-function_list[[i]](value)
8. | +-dplyr::mutate(...)
9. | \-dplyr:::mutate.tbl_df(...)
10. | \-dplyr:::mutate_impl(.data, dots, caller_env())
11. +-purrr::pmap(list(., sales, component, Potential_Sum), fn_ggplot)
12. \-purrr:::stop_bad_element_length(...)
13. \-purrr:::stop_bad_length(...)
data
First let's build some sample data close to yours but reproducible without the need for an api key.
As a starting point we have a plot of a country map stored in p, and some data in long form to build the charts stored in plot_data.
library(maps)
library(tidyverse)
p <- ggplot(map_data("france"), aes(long,lat,group=group)) +
geom_polygon(fill = "lightgrey") +
theme_void()
set.seed(1)
plot_data <- tibble(lon = c(0,2,5), lat = c(44,48,46)) %>%
group_by(lon, lat) %>%
do(tibble(component = LETTERS[1:3], value = runif(3,min=1,max=5))) %>%
mutate(total = sum(value)) %>%
ungroup()
plot_data
# # A tibble: 9 x 5
# lon lat component value total
# <dbl> <dbl> <chr> <dbl> <dbl>
# 1 0 44 A 2.06 7.84
# 2 0 44 B 2.49 7.84
# 3 0 44 C 3.29 7.84
# 4 2 48 A 4.63 11.0
# 5 2 48 B 1.81 11.0
# 6 2 48 C 4.59 11.0
# 7 5 46 A 4.78 11.9
# 8 5 46 B 3.64 11.9
# 9 5 46 C 3.52 11.9
define a plotting function
we isolate the plotting code in a separate function
my_plot_fun <- function(data){
ggplot(data, aes(1, value, fill = component)) +
geom_col(position = position_dodge(width = 1),
alpha = 0.75, colour = "white") +
geom_text(aes(label = round(value, 1), group = component),
position = position_dodge(width = 1),
size = 3) +
theme_void()+ guides(fill = F)
}
build a wrapper
This function takes a data set, some coordinates and the plotting function as parameters, to annotate at the right spot.
annotation_fun <- function(data, lat,lon, plot_fun) {
subplot = plot_fun(data)
sub_grob <- annotation_custom(ggplotGrob(subplot),
x = lon-0.5, y = lat-0.5,
xmax = lon+0.5, ymax = lat+0.5)
}
The final code
The the code becomes simple, using nest and pmap
subgrobs <- plot_data %>%
nest(-lon,-lat) %>%
pmap(annotation_fun,plot_fun = my_plot_fun)
p + subgrobs

Split dataframe and Create multipanel scatterplots from list of data frames

I have a dataframe like so:
set.seed(453)
year= as.factor(c(rep("1998", 20), rep("1999", 16)))
lepsp= c(letters[seq(from = 1, to = 20 )], c('a','b','c'),letters[seq(from =8, to = 20 )])
freq= c(sample(1:15, 20, replace=T), sample(1:18, 16,replace=T))
df<-data.frame(year, lepsp, freq)
df<-
df %>%
group_by(year) %>%
mutate(rank = dense_rank(-freq))
Frequencies freq of each lepsp within each year are ranked in the rank column. Larger freq values correspond to the smallest rank value and smaller freq values have the largest rank values. Some rankings are repeated if levels of lepsp have the same abundance.
I would like to split the df into multiple subsets by year. Then I would like to plot each subsetted dataframe in a multipanel figure. Essentially this is to create species abundance curves. The x-axis would be rank and the yaxis needs to be freq.
In my real dataframe I have 22 years of data. I would prefer the graphs to be displayed as 2 columns of 4 rows for a total of 8 graphs per page. Essentially I would have to repeat the solution offered here 3 times.
I also need to demarcate the 25%, 50% and 75% quartiles with vertical lines to look like this (desired result):
It would be great if each graph specified the year to which it belonged, but since all axis are the same name, I do not want x and y labels to be repeated for each graph.
I have tried to plot multiple lines on the same graph but it gets messy.
year.vec<-unique(df$year)
plot(sort(df$freq[df$year==year.vec[1]],
decreasing=TRUE),bg=1,type="b", ylab="Abundance", xlab="Rank",
pch=21, ylim=c(0, max(df$freq)))
for (i in 2:22){
points(sort(df$freq[df$year==year.vec[i]], decreasing=TRUE), bg=i,
type="b", pch=21)
}
legend("topright", legend=year.vec, pt.bg=1:22, pch=21)
I have also tried a loop, however it does not produce an output and is missing some of the arguments I would like to include:
jpeg('pract.jpg')
par(mfrow = c(6, 4)) # 4 rows and 2 columns
for (i in unique(levels(year))) {
plot(df$rank,df$freq, type="p", main = i)
}
dev.off()
Update
(Attempted result)
I found the following code after my post which gets me a little closer, but is still missing all the features I would like:
library(reshape2)
library(ggplot2)
library (ggthemes)
x <- ggplot(data = df2, aes(x = rank, y = rabun)) +
geom_point(aes(fill = "dodgerblue4")) +
theme_few() +
ylab("Abundance") + xlab("Rank") +
theme(axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15),
plot.title = element_blank(), # we don't want individual plot titles as the facet "strip" will give us this
legend.position = "none", # we don't want a legend either
panel.border = element_rect(fill = NA, color = "darkgrey", size = 1.25, linetype = "solid"),
axis.ticks = element_line(colour = 'darkgrey', size = 1.25, linetype = 'solid')) # here, I just alter to colour and thickness of the plot outline and tick marks. You generally have to do this when faceting, as well as alter the text sizes (= element_text() in theme also)
x
x <- x + facet_wrap( ~ year, ncol = 4)
x
I prefer base R to modify graph features, and have not been able to find a method using base R that meets all my criteria above. Any help is appreciated.
Here's a ggplot approach. First off, I made some more data to get the 3x2 layout:
df = rbind(df, mutate(df, year = year + 4), mutate(df, year = year + 8))
Then We do a little manipulation to generate the quantiles and labels by group:
df_summ =
df %>% group_by(year) %>%
do(as.data.frame(t(quantile(.$rank, probs = c(0, 0.25, 0.5, 0.75)))))
names(df_summ)[2:5] = paste0("q", 0:3)
df_summ_long = gather(df_summ, key = "q", value = "value", -year) %>%
inner_join(data.frame(q = paste0("q", 0:3), lab = c("Common", "Rare-75% -->", "Rare-50% -->", "Rare-25% -->"), stringsAsFactors = FALSE))
With the data in good shape, plotting is fairly simple:
library(ggthemes)
library(ggplot2)
ggplot(df, aes(x = rank, y = freq)) +
geom_point() +
theme_few() +
labs(y = "Abundance (% of total)", x = "Rank") +
geom_vline(data = df_summ_long[df_summ_long$q != "q0", ], aes(xintercept = value), linetype = 4, size = 0.2) +
geom_text(data = df_summ_long, aes(x = value, y = Inf, label = lab), size = 3, vjust = 1.2, hjust = 0) +
facet_wrap(~ year, ncol = 2)
There's some work left to do - mostly in the rarity text overlapping. It might not be such an issue with your actual data, but if it is you could pull the max y values into df_summ_long and stagger them a little bit, actually using y coordinates instead of just Inf to get it at the top like I did.

Resources