ggplot place facet between two rows of facets - r

I have 9 plots with 3 time series in each plot, one of these plots contains only one curve and it's the reference plot which I would like to place in between the two rows that contain the other 8 plots. Is there an easy way to do so?
I use facet_wrap(~density,nrow=2) but I get one row with 5 and another with 4 plots. I am sure other people had this problem, is there an easy way around to organize the position of this reference plot, or do I have to create two separate plots and overlay them? Otherwise I might have to move this reference plot in all the other plots but it seems redundant information.
This is my current result, but as you can see it's not very well laid out.

The graphic you are looking for can be generated with gridArrange from the
gridExtra package. Here is
an example using the storms data set from the
dplyr.
library(ggplot2)
library(gridExtra)
library(dplyr)
data(storms, package = 'dplyr')
str(storms)
## Classes 'tbl_df', 'tbl' and 'data.frame': 10010 obs. of 13 variables:
## $ name : chr "Amy" "Amy" "Amy" "Amy" ...
## $ year : num 1975 1975 1975 1975 1975 ...
## $ month : num 6 6 6 6 6 6 6 6 6 6 ...
## $ day : int 27 27 27 27 28 28 28 28 29 29 ...
## $ hour : num 0 6 12 18 0 6 12 18 0 6 ...
## $ lat : num 27.5 28.5 29.5 30.5 31.5 32.4 33.3 34 34.4 34 ...
## $ long : num -79 -79 -79 -79 -78.8 -78.7 -78 -77 -75.8 -74.8 ...
## $ status : chr "tropical depression" "tropical depression" "tropical depression" "tropical depression" ...
## $ category : Ord.factor w/ 7 levels "-1"<"0"<"1"<"2"<..: 1 1 1 1 1 1 1 1 2 2 ...
## $ wind : int 25 25 25 25 25 25 25 30 35 40 ...
## $ pressure : int 1013 1013 1013 1013 1012 1012 1011 1006 1004 1002 ...
## $ ts_diameter: num NA NA NA NA NA NA NA NA NA NA ...
## $ hu_diameter: num NA NA NA NA NA NA NA NA NA NA ...
Let's create two graphics. The first graphic will be only form category == -1
storms (this would be the control group in your question). The second
graphic will be a facteted graphic for the category > -1 storm
First, we'll build a generic ggplot object for the graphics.
graphic <-
ggplot() +
aes(x = long, y = lat, color = category) +
geom_point() +
facet_wrap( ~ category) +
scale_color_hue(breaks = levels(storms$category),
labels = levels(storms$category),
drop = FALSE)
Next we build the two graphics as needed.
g1 <- graphic %+% dplyr::filter(storms, category == -1) + theme(legend.position = "none")
g2 <- graphic %+% dplyr::filter(storms, category != -1)
gridExtra::grid.arrange can take a layout matrix where the numbers 1 and 2
denote the first and second graphics passed to the function. (This works for
a lot more than just two graphics, by the way.) By repeating the values of 1
and 2 in the matrix we can control the relative size of the two graphics in
the graphics device.
gridExtra::grid.arrange(g1, g2,
layout_matrix =
matrix(c(1, 1, 1, 2, 2, 2, 2, 2,
1, 1, 1, 2, 2, 2, 2, 2,
1, 1, 1, 2, 2, 2, 2, 2),
byrow = TRUE, nrow = 3)
)

If I understand the question correctly you could reformat your data with appropriate facetting variables to introduce a new row of reference panels
library(ggplot2)
d <- data.frame(x=rep(1:10, 8), y = rnorm(80),
f=gl(8,10, ordered = TRUE))
d$f1 <- factor(d$f <= 4, labels=c(1,3))
d$f2 <- as.numeric(d$f) %% 4
d2 <- data.frame(x=1:10, y=0, f1 = 2)
ggplot(d, aes(x,y)) +
geom_point(aes(colour=f)) +
geom_point(data=d2, colour="black") +
facet_grid(f1~f2)

Related

Error in MEEM(object, conLin, control$niterEM) in lme function

I'm trying to apply the lme function to my data, but the model gives follow message:
mod.1 = lme(lon ~ sex + month2 + bat + sex*month2, random=~1|id, method="ML", data = AA_patch_GLM, na.action=na.exclude)
Error in MEEM(object, conLin, control$niterEM) :
Singularity in backsolve at level 0, block 1
dput for data, copy from https://pastebin.com/tv3NvChR (too large to include here)
str(AA_patch_GLM)
'data.frame': 2005 obs. of 12 variables:
$ lon : num -25.3 -25.4 -25.4 -25.4 -25.4 ...
$ lat : num -51.9 -51.9 -52 -52 -52 ...
$ id : Factor w/ 12 levels "24641.05","24642.03",..: 1 1 1 1 1 1 1 1 1 1 ...
$ sex : Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...
$ bat : int -3442 -3364 -3462 -3216 -3216 -2643 -2812 -2307 -2131 -2131 ...
$ year : chr "2005" "2005" "2005" "2005" ...
$ month : chr "12" "12" "12" "12" ...
$ patch_id: Factor w/ 45 levels "111870.17_1",..: 34 34 34 34 34 34 34 34 34 34 ...
$ YMD : Date, format: "2005-12-30" "2005-12-31" "2005-12-31" ...
$ month2 : Ord.factor w/ 7 levels "January"<"February"<..: 7 7 7 7 7 1 1 1 1 1 ...
$ lonsc : num [1:2005, 1] -0.209 -0.213 -0.215 -0.219 -0.222 ...
$ batsc : num [1:2005, 1] 0.131 0.179 0.118 0.271 0.271 ...
What's the problem?
I saw a solution applying the lme4::lmer function, but there is another option to continue to use lme function?
The problem is that you have collinear combinations of predictors. In particular, here are some diagnostics:
## construct the fixed-effect model matrix for your problem
X <- model.matrix(~ sex + month2 + bat + sex*month2, data = AA_patch_GLM)
lc <- caret::findLinearCombos(X)
colnames(X)[lc$linearCombos[[1]]]
## [1] "sexM:month2^6" "(Intercept)" "sexM" "month2.L"
## [5] "month2.C" "month2^4" "month2^5" "month2^6"
## [9] "sexM:month2.L" "sexM:month2.C" "sexM:month2^4" "sexM:month2^5"
This is in a weird order, but it suggests that the sex × month interaction is causing problems. Indeed:
with(AA_patch_GLM, table(sex, month2))
## sex January February March April May June December
## F 367 276 317 204 43 0 6
## M 131 93 90 120 124 75 159
shows that you're missing data for one sex/month combination (i.e., no females were sampled in June).
You can:
construct the sex/month interaction yourself (data$SM <- with(data, interaction(sex, month2, drop = TRUE))) and use ~ SM + bat — but then you'll have to sort out main effects and interactions yourself (ugh)
construct the model matrix by hand (as above), drop the redundant column(s), then include all the resulting columns in the model:
d2 <- with(AA_patch_GLM,
data.frame(lon,
as.data.frame(X),
id))
## drop linearly dependent column
## note data.frame() has "sanitized" variable names (:, ^ both converted to .)
d2 <- d2[names(d2) != "sexM.month2.6"]
lme(reformulate(colnames(d2)[2:15], response = "lon"),
random=~1|id, method="ML", data = d2)
Again, the results will be uglier than the simpler version of the model.
use a patched version of nlme (I submitted a patch here but it hasn't been considered)
remotes::install_github("bbolker/nlme")

Formula notation for scatterplot producing unexpected results

I am working on a map, where the color of each point is proportional to one response variable, and the size of the point is proportional to another. I've noticed that when I try to plot the points using formula notation things go haywire, while default notation performs as expected. I have used formula notation to plot maps many times before, and thought that the notations were nearly interchangeable. Why would these produce different results? I have read through the plot.formula and plot.default documentation and haven't been able to figure it out. Based on this I am wondering if it has to do with the columns of dat being coerced to factors, but I'm not sure why that would be happening. Any ideas?
Consider the following example data frame, dat:
latitude <- c(runif(10, min = 45, max = 48))
latitude[9] <- NA
longitude <- c(runif(10, min = -124.5, max = -122.5))
longitude[9] <- NA
color <- c("#00FFCCCC", "#99FF00CC", "#FF0000CC", "#3300FFCC", "#00FFCCCC",
"#00FFCCCC", "#3300FFCC", "#00FFCCCC", NA, "#3300FFCC")
size <- c(4.916667, 5.750000, 7.000000, 2.000000, 5.750000,
4.500000, 2.000000, 4.500000, NA, 2.000000)
dat <- as.data.frame(cbind(longitude, latitude, color, size))
Plotting according to formula notation
plot(latitude ~ longitude, data = dat, type = "p", pch = 21, col = 1, bg = color, cex = size)
produces
this mess and the following error: graphical parameter "type" is obsolete.
Plotting according to the default notation
plot(longitude, latitude, type = "p", pch = 21, col = 1, bg = color, cex = size)
works as expected, though with the same error.
There are a couple of problems with this. First is that your use of cbind is turning this into a matrix, albeit temporarily, which is converting your numbers to character. See:
dat <- as.data.frame(cbind(longitude, latitude, color, size))
str(dat)
# 'data.frame': 10 obs. of 4 variables:
# $ longitude: Factor w/ 9 levels "-122.855375511572",..: 6 8 9 1 4 3 2 7 NA 5
# $ latitude : Factor w/ 9 levels "45.5418886151165",..: 6 2 4 1 3 7 5 9 NA 8
# $ color : Factor w/ 4 levels "#00FFCCCC","#3300FFCC",..: 1 3 4 2 1 1 2 1 NA 2
# $ size : Factor w/ 5 levels "2","4.5","4.916667",..: 3 4 5 1 4 2 1 2 NA 1
If instead you just use data.frame, you'll get:
dat <- data.frame(longitude, latitude, color, size)
str(dat)
# 'data.frame': 10 obs. of 4 variables:
# $ longitude: num -124 -124 -124 -123 -124 ...
# $ latitude : num 47.3 45.9 46.3 45.5 46 ...
# $ color : Factor w/ 4 levels "#00FFCCCC","#3300FFCC",..: 1 3 4 2 1 1 2 1 NA 2
# $ size : num 4.92 5.75 7 2 5.75 ...
plot(latitude ~ longitude, data = dat, pch = 21, col = 1, bg = color, cex = size)
But now the colors are all dorked. Okay, the problem is likely because your $color is a factor, which is being interpreted internally as integers. Try stringsAsFactors=F:
dat <- data.frame(longitude, latitude, color, size, stringsAsFactors=FALSE)
str(dat)
# 'data.frame': 10 obs. of 4 variables:
# $ longitude: num -124 -124 -124 -123 -124 ...
# $ latitude : num 47.3 45.9 46.3 45.5 46 ...
# $ color : chr "#00FFCCCC" "#99FF00CC" "#FF0000CC" "#3300FFCC" ...
# $ size : num 4.92 5.75 7 2 5.75 ...
plot(latitude ~ longitude, data = dat, pch = 21, col = 1, bg = color, cex = size)

R circlular wheel chart

I'm trying to make a wheel chart that has rings. My result looks like the lines all go back to zero before continuing to the next point. Is it a discreet/continuous issue? I've tried making Lap.Time and Lap both numeric to no avail:
f1 <- read.csv("F1 2011 Turkey - Fuel Corrected Lap Times.csv", header = T)
str(f1)
# data.frame: 1263 obs. of 5 variables:
# $ Driver : Factor w/ 23 levels "1","2","3","4",..: 23 23 23 23 23 23 23 23 23 23 ...
# $ Lap : int 1 2 3 4 5 6 7 8 9 10 ...
# $ Lap.Time : num 107 99.3 98.4 97.5 97.4 ...
# $ Fuel.Adjusted.Laptime : num 102.3 94.7 93.9 93.1 93.1 ...
# $ Fuel.and.fastest.lap.adjusted.laptime: num 9.73 2.124 1.321 0.54 0.467 ...
library(ggplot2)
f1$Driver<-as.factor(f1$Driver)
p1 <- ggplot(data=subset(f1, Lap.Time <= 120), aes(x = Lap, y= Lap.Time, colour = Driver)) +
geom_point(aes(colour=Driver))
p2 <- ggplot(subset(f1, Lap.Time <= 120),
aes(x = Lap, y= Lap.Time, colour = Driver, group = 1)) +
geom_line(aes(colour=Driver))
pout <- p1 + coord_polar()
pout2 <- p2 + coord_polar()
pout
pout2
resulting chart image
All the data is in this csv:
https://docs.google.com/spreadsheets/d/1Ef2ewd1-0FM1mJL1o00C6c2gf7HFmanJh8an1EaAq2Q/edit?hl=en_GB&authkey=CMSemOQK#gid=0
Sample of csv:
Driver,Lap,Lap Time,Fuel Adjusted Laptime,Fuel and fastest lap adjusted laptime
25,1,106.951,102.334,9.73
25,2,99.264,94.728,2.124
25,3,98.38,93.925,1.321
25,4,97.518,93.144,0.54
25,5,97.364,93.071,0.467
25,6,97.853,93.641,1.037
25,7,98.381,94.25,1.646
25,8,98.142,94.092,1.488
25,9,97.585,93.616,1.012
25,10,97.567,93.679,1.075
25,11,97.566,93.759,1.155
25,12,97.771,94.045,1.441
25,13,98.532,94.887,2.283
25,14,99.146,95.582,2.978
25,15,98.529,95.046,2.442
25,16,99.419,96.017,3.413
25,17,114.593,111.272,18.668

Passing arguments to ggplot and facet_grid

I need some help with these lines of code.
My data set:
> str(data.tidy)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 9480 obs. of 11 variables:
$ Country.Name : Factor w/ 248 levels "Afghanistan",..: 234 12 13 20 22 31 17 16 25 28 ...
$ Country.Code : Factor w/ 248 levels "ABW","AFG","AGO",..: 7 12 13 16 17 18 19 21 27 28 ...
$ Year : Factor w/ 56 levels "1960","1961",..: 1 1 1 1 1 1 1 1 1 1 ...
$ InfantMortality : num 137.3 20.3 37.3 29.5 186.9 ...
$ AdolFertilityRate: num 176.9 44.8 48.4 27.1 85.8 ...
$ FertilityRate : num 6.93 3.45 2.69 2.54 6.28 ...
$ LifeExpectancy : num 52.2 70.8 68.6 69.7 37.3 ...
$ TotalUnemp : num NA NA NA NA NA NA NA NA NA NA ...
$ TotalPop : num 92612 10276477 7047539 9153489 2431620 ...
$ Region : Factor w/ 8 levels "","East Asia & Pacific",..: 5 2 3 3 8 8 7 5 4 4 ...
$ IncomeGroup : Factor w/ 6 levels "","High income: nonOECD",..: 2 3 3 3 4 4 5 2 5 6 ...
Reference code that I want to 'functionize':
ggplot(data.tidy,aes(as.numeric(as.character(Year)),y=InfantMortality))+
geom_line(aes(color=Country.Name))+
facet_grid(.~IncomeGroup)+
theme(legend.position="none")+
theme(strip.text.x = element_text(size = 7))+
labs(x='Year', title='Change in mortality rate over time')+
geom_smooth(color='black')
I want to replace data.tidy, InfantMortality, IncomeGroup and title in the example above.
Here was my attempt at the code:
facetedlineplot <- function(df,y,facet,title){
ggplot(df,aes(as.numeric(as.character(Year)),y=y))+
geom_line(aes(color=Country.Name))+
facet_grid(.~facet)+
theme(legend.position="none")+
theme(strip.text.x = element_text(size = 7))+
labs(x='Year',title=title)+
geom_smooth(color='black')
}
The error:
> facetedlineplot(data.tidy,y = 'InfantMortality',facet = 'IncomeGroup',title = 'Title goes here')
Error in layout_base(data, cols, drop = drop) :
At least one layer must contain all variables used for facetting
I have tried aes_string, but I couldn't get it to work. What does the error mean? How can I work around this issue?
Update:
I have some code that partially works now, using reformulate()
facetedlineplot <- function(df,y,facet,title){
year <- as.numeric(as.character(df$Year))
ggplot(df,aes(x=year,y=y))+
geom_line(aes(color=Country.Name))+
facet_grid(paste('.~',reformulate(facet)))+
theme(legend.position="none")+
theme(strip.text.x = element_text(size = 7))+
labs(x='Year',title=title)+
geom_smooth(color='black')
}
> facetedlineplot(data.tidy,y = 'InfantMortality', facet = 'IncomeGroup', title = 'Title goes here')
Warning message:
Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
>
Still, an incorrect plot>
Thank you in advance,
Rahul
I have the solution. Three steps worked for me:
- Change datatype of the Year variable in data.tidy from factor to numeric.
- Use aes_string for the ggplot argument
- For facet_grid(), many things worked:
Use as.formula() to pass '~IncomeGroup'
Just pass '~IncomeGroup' directly to facet_grid()
Final code:
facetedlineplot <- function(df,y,facet,title){
ggplot(df,aes_string(x = 'Year', y = y))+
geom_line(aes(color=Country.Name))+
facet_grid(facet)+
theme(legend.position="none")+
theme(strip.text.x = element_text(size = 9))+
labs(x='Year',title=title)+
geom_smooth(color='black')
}
d <- data.tidy
d$Year <- as.numeric(as.character(d$Year))
facetedlineplot(d,'InfantMortality','~IncomeGroup','Title')

Adding Different Percentiles in boxplots in R

I am failry new to R and recently used it to make some Boxplots. I also added the mean and standard deviation in my boxplot. I was wondering if i could add some kind of tick mark or circle in different percentile as well. Let's say if i want to mark the 85th, $ 90th percentile in each HOUR boxplot, is there a way to do this? My data consist of a year worth of loads in MW in each hour & My output consist of 24 boxplots for each hour for each month. I am doing each month at a time because i am not sure if there is a way to run all 96(Each month, weekday/weekend , for 4 different zones) boxplots at once. Thanks in advance for help.
JANWD <-read.csv("C:\\My Directory\\MWBox2.csv")
JANWD.df<-data.frame(JANWD)
JANWD.sub <-subset(JANWD.df, MONTH < 2 & weekend == "NO")
KeepCols <-c("Hour" , "Houston_Load")
HWD <- JANWD.sub[ ,KeepCols]
sd <-tapply(HWD$Houston_Load, HWD$Hour, sd)
means <-tapply(HWD$Houston_Load, HWD$Hour, mean)
boxplot(Houston_Load ~ Hour, data=HWD, xlab="WEEKDAY HOURS", ylab="MW Differnce", ylim= c(-10, 20), smooth=TRUE ,col ="bisque", range=0)
points(sd, pch = 22, col= "blue")
points(means, pch=23, col ="red")
#Output of the subset of data used to run boxplot for month january in Houston
str(HWD)
'data.frame': 504 obs. of 2 variables:
`$ Hour : int 1 2 3 4 5 6 7 8 9 10 ...'
`$ Houston_Load: num 1.922 2.747 -2.389 0.515 1.922 ...'
#OUTPUT of the original data
str(JANWD)
'data.frame': 8783 obs. of 9 variables:
$ Date : Factor w/ 366 levels "1/1/2012","1/10/2012",..: 306 306 306 306 306 306 306 306 306 306 ...
`$ Hour : int 1 2 3 4 5 6 7 8 9 10 ...'
` $ MONTH : int 8 8 8 8 8 8 8 8 8 8 ...'
`$ weekend : Factor w/ 2 levels "NO","YES": 1 1 1 1 1 1 1 1 1 1 ...'
`$ TOTAL_LOAD : num 0.607 5.111 6.252 7.607 0.607 ...'
`$ Houston_Load: num -2.389 0.515 1.922 2.747 -2.389 ...'
`$ North_Load : num 2.95 4.14 3.55 3.91 2.95 ...'
`$ South_Load : num -0.108 0.267 0.54 0.638 -0.108 ...'
`$ West_Load : num 0.154 0.193 0.236 0.311 0.154 ...'
Here is one way, using quantile() to compute the relevant percentiles for you. I add the marks using rug().
set.seed(1)
X <- rnorm(200)
boxplot(X, yaxt = "n")
## compute the required quantiles
qntl <- quantile(X, probs = c(0.85, 0.90))
## add them as a rgu plot to the left hand side
rug(qntl, side = 2, col = "blue", lwd = 2)
## add the box and axes
axis(2)
box()
Update: In response to the OP providing str() output, here is an example similar to the data that the OP has to hand:
set.seed(1) ## make reproducible
HWD <- data.frame(Hour = rep(0:23, 10),
Houston_Load = rnorm(24*10))
Now get I presume you want ticks at 85th and 90th percentiles for each Hour? If so we need to split the data by Hour and compute via quantile() as I showed earlier:
quants <- sapply(split(HWD$Houston_Load, list(HWD$Hour)),
quantile, probs = c(0.85, 0.9))
which gives:
R> quants <- sapply(split(HWD$Houston_Load, list(HWD$Hour)),
+ quantile, probs = c(0.85, 0.9))
R> quants
0 1 2 3 4 5 6
85% 0.3576510 0.8633506 1.581443 0.2264709 0.4164411 0.2864026 1.053742
90% 0.6116363 0.9273008 2.109248 0.4218297 0.5554147 0.4474140 1.366114
7 8 9 10 11 12 13 14
85% 0.5352211 0.5175485 1.790593 1.394988 0.7280584 0.8578999 1.437778 1.087101
90% 0.8625322 0.5969672 1.830352 1.519262 0.9399476 1.1401877 1.763725 1.102516
15 16 17 18 19 20 21
85% 0.6855288 0.4874499 0.5493679 0.9754414 1.095362 0.7936225 1.824002
90% 0.8737872 0.6121487 0.6078405 1.0990935 1.233637 0.9431199 2.175961
22 23
85% 1.058648 0.6950166
90% 1.145783 0.8436541
Now we can draw marks at the x locations of the boxplots
boxplot(Houston_Load ~ Hour, data = HWD, axes = FALSE)
xlocs <- 1:24 ## where to draw marks
tickl <- 0.15 ## length of marks used
for(i in seq_len(ncol(quants))) {
segments(x0 = rep(xlocs[i] - 0.15, 2), y0 = quants[, i],
x1 = rep(xlocs[i] + 0.15, 2), y1 = quants[, i],
col = c("red", "blue"), lwd = 2)
}
title(xlab = "Hour", ylab = "Houston Load")
axis(1, at = xlocs, labels = xlocs - 1)
axis(2)
box()
legend("bottomleft", legend = paste(c("0.85", "0.90"), "quantile"),
bty = "n", lty = "solid", lwd = 2, col = c("red", "blue"))
The resulting figure should look like this:

Resources