Bootstrapping eigenvalues for nonlinear PCA in r - r

I am running nonlinear PCA in r, using the homals package. Here is a chunk of the code I am using as an example:
res1 <- homals(data = mydata, rank = 1, ndim = 9, level = "nominal")
res1 <- rescale(res1)
I want to generate 1000 bootstrap estimates of the eigenvalues in this analysis (with replacement), but I can't figure out the code. Does anyone have any suggestions?
Sample data:
dput(head(mydata, 30))
structure(list(`W age` = c(45L, 43L, 42L, 36L, 19L, 38L, 21L,
27L, 45L, 38L, 42L, 44L, 42L, 38L, 26L, 48L, 39L, 37L, 39L, 26L,
24L, 46L, 39L, 48L, 40L, 38L, 29L, 24L, 43L, 31L), `W education` = c(1L,
2L, 3L, 3L, 4L, 2L, 3L, 2L, 1L, 1L, 1L, 4L, 2L, 3L, 2L, 1L, 2L,
2L, 2L, 3L, 3L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 1L, 3L), `H education` = c(3L,
3L, 2L, 3L, 4L, 3L, 3L, 3L, 1L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 2L,
2L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L), `N children` = c(10L,
7L, 9L, 8L, 0L, 6L, 1L, 3L, 8L, 2L, 4L, 1L, 1L, 2L, 0L, 7L, 6L,
8L, 5L, 1L, 0L, 1L, 1L, 5L, 8L, 1L, 0L, 0L, 8L, 2L), `W religion` = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), `W employment` = c(1L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L), `H occupation` = c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 1L, 1L, 3L, 2L, 4L, 2L, 2L,
2L, 2L, 4L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 2L, 1L), `Standard of living` =
c(4L,
4L, 3L, 2L, 3L, 2L, 2L, 4L, 2L, 3L, 3L, 4L, 3L, 3L, 1L, 4L, 4L,
3L, 1L, 1L, 1L, 4L, 4L, 4L, 3L, 4L, 4L, 2L, 4L, 4L), Media = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Contraceptive = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("W age",
"W education", "H education", "N children", "W religion", "W employment",
"H occupation", "Standard of living", "Media", "Contraceptive"
), row.names = c(NA, 30L), class = "data.frame")
>
I was given the rescale function to use with the homals package, to do optimal scaling. Here is the function:
rescale <- function(res) {
# Rescale homals results to proper scaling
n <- nrow(res$objscores)
m <- length(res$catscores)
res$objscores <- (n * m)^0.5 * res$objscores
res$scoremat <- (n * m)^0.5 * res$scoremat
res$catscores <- lapply(res$catscores, FUN = function(x) (n * m)^0.5 * x)
res$cat.centroids <- lapply(res$cat.centroids, FUN = function(x) (n * m)^0.5 * x)
res$low.rank <- lapply(res$low.rank, FUN = function(x) n^0.5 * x)
res$loadings <- lapply(res$loadings, FUN = function(x) m^0.5 * x)
res$discrim <- lapply(res$discrim, FUN = function(x) (n * m)^0.5 * x)
res$eigenvalues <- n * res$eigenvalues
return(res)
}

The standard way to bootstrap in R is to use base package boot.
I am not very satistied with the code that follows because it is throwing lots of warnings. But maybe this is due to the dataset I have tested it with. I have used the dataset and 3rd example in help("homals").
I have run 10 bootstrap replicates only.
library(homals)
library(boot)
boot_eigen <- function(data, indices){
d <- data[indices, ]
res <- homals(d, active = c(rep(TRUE, 4), FALSE), sets = list(c(1,3,4),2,5))
res$eigenvalues
}
data(galo)
set.seed(7578) # Make the results reproducible
eig <- boot(galo, boot_eigen, R = 10)
eig
#
#ORDINARY NONPARAMETRIC BOOTSTRAP
#
#
#Call:
#boot(data = galo, statistic = boot_eigen, R = 10)
#
#
#Bootstrap Statistics :
# original bias std. error
#t1* 0.1874958 0.03547116 0.005511776
#t2* 0.2210821 -0.02478596 0.005741331
colMeans(eig$t)
#[1] 0.2229669 0.1962961
If this also doesn't run properly in your case, please say so and I will delete the answer.
EDIT.
In order to answer to the discussion in the comments, I have changed the function boot_eigen, the call to homals now follows the question code and rescale is called before returning.
boot_eigen <- function(data, indices){
d <- data[indices, ]
res <- homals(data = d, rank = 1, ndim = 9, level = "nominal")
res <- rescale(res)
res$eigenvalues
}
set.seed(7578) # Make the results reproducible
eig <- boot(mydata, boot_eigen, R = 10)

Related

ggplot2 select categories for bar chart and create labels

I am trying make bar chart with ggplot2 with the dataset below. When I use the code
ggplot(p.data, aes(x = `Period Number`, y = `Total Jumps`)) +
stat_summary(data = subset(p.data, Status = "Starter"), fun ="mean", geom = "bar")
I get this graph:
The most concerning aspect is the for period 2, 3, 4, and 5 the bars should be taller (period 2 should be around 9.9). Additionally, I would like to remove period 0 and period 1 and add bar labels with the raw data and without creating an additional data frame.
p.data <- structure(list(`Period Number` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L),
`Total Jumps` = c(112L, 97L, 28L, 132L, 162L, 19L, 92L, 112L,
97L, 141L, 68L, 86L, 76L, 26L, 105L, 125L, 19L, 92L, 112L,
64L, 101L, 68L, 4L, 8L, 0L, 8L, 12L, 0L, 0L, 0L, 13L, 8L,
0L, 8L, 2L, 2L, 5L, 12L, 0L, 0L, 0L, 5L, 11L, 0L, 0L, 6L,
0L, 9L, 8L, 0L, 0L, 0L, 7L, 10L, 0L, 14L, 5L, 0L, 5L, 5L,
0L, 0L, 0L, 8L, 11L, 0L, 108L, 131L, 47L, 136L, 159L, 35L,
114L, 116L, 111L, 190L, 64L, 75L, 95L, 47L, 116L, 123L, 27L,
103L, 108L, 70L, 152L, 64L, 4L, 7L, 0L, 14L, 10L, 0L, 0L,
0L, 15L, 10L, 0L, 4L, 0L, 0L, 3L, 7L, 7L, 8L, 8L, 5L, 10L,
0L, 7L, 14L, 0L, 3L, 10L, 1L, 0L, 0L, 11L, 7L, 0L, 18L, 15L,
0L, 0L, 9L, 0L, 3L, 0L, 10L, 11L, 0L, 118L, 96L, 48L, 143L,
170L, 37L, 118L, 117L, 116L, 165L, 56L, 80L, 68L, 48L, 114L,
130L, 36L, 114L, 107L, 80L, 123L, 56L, 2L, 10L, 0L, 8L, 11L,
0L, 0L, 0L, 5L, 9L, 0L, 4L, 12L, 0L, 6L, 5L, 0L, 4L, 8L,
12L, 8L, 0L, 7L, 4L, 0L, 10L, 10L, 0L, 0L, 0L, 12L, 13L,
0L, 25L, 2L, 0L, 5L, 14L, 1L, 0L, 2L, 7L, 12L, 0L), Status = structure(c(1L,
2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 1L), .Label = c("Bench", "Starter"), class = "factor")), row.names = c(NA,
198L), class = "data.frame")
Thank you for your help!
It's best to pass that data you actually want to plot to the plotting function, rather than trying to coerce it within the plotting function. In this case you were trying to subset a different data frame from the one you passed to ggplot inside stat_summary. The call to ggplot had already set up the aesthetics you wanted mapped, then in your only geom layer, you were telling ggplot you wanted a completely different set of aesthetics.
You don't need to create another data frame to reshape your data. Here's how you could do it using dplyr:
library(dplyr)
library(ggplot2)
p.data %>%
filter(Status == "Starter") %>%
group_by(`Period Number`) %>%
summarise(`Total Jumps` = mean(`Total Jumps`)) %>%
filter(`Period Number` > 1) %>%
ggplot(aes(x = `Period Number`, y = `Total Jumps`)) +
geom_col(fill = "dodgerblue", colour = "black") +
geom_text(aes(y = `Total Jumps` + 1, label = signif(`Total Jumps`, 2)))

R - Non-numeric argument to binary operator when dividing variable by participant-wise mean of that variable

I have a problem with some code.
My goal is to divide each value of the latency variable by the mean for latency of each individual participant.That means, I want to divide all latencies of participant 1 by the mean latency of participant, all latencies of participant 2 by the mean latency of participant 2 and so forth.
The error message is:
"Error in latency/mean(latency, na.rm = TRUE) :
non-numeric argument to binary operator
In addition: Warning messages:
1: In mean.default(latency) :
argument is not numeric or logical: returning NA
2: In mean.default(latency, na.rm = TRUE) :
argument is not numeric or logical: returning NA"
Importantly, the code works with another dataset.
Below find the code I used to try and achieve this and some example data to reproduce the error:
rt = df2$latency) as.numeric(df2$latency) na.omit(df2$latency) temp <- ddply(xy, c('participant'), transform, avg = mean(latency),
x = latency / mean(latency, na.rm = TRUE)
#Example Data
> dput(head (df2, 20))
structure(list(participant = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), timestamp = c(1547125307L,
1547125307L, 1547125307L, 1547125307L, 1547125307L, 1547125307L,
1547125307L, 1547125307L, 1547125307L, 1547125307L, 1547125307L,
1547125307L, 1547125307L, 1547125307L, 1547125307L, 1547125307L,
1547125307L, 1547125307L, 1547125307L, 1547125307L), dominance = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), blocknum = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), trialnum = 1:20,
condition = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "CFS", class = "factor"),
eyesidecfs = structure(c(1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L), .Label = c("lefteye",
"righteye"), class = "factor"), stimside = structure(c(2L,
1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 2L), .Label = c("left", "right"), class = "factor"),
stimpos = c(-97L, -97L, -83L, -55L, -47L, -1L, 61L, 46L,
-4L, 28L, -60L, 16L, 11L, 77L, 96L, 52L, -29L, 23L, 84L,
93L), stimulus = structure(c(6L, 41L, 13L, 45L, 1L, 45L,
40L, 44L, 19L, 38L, 13L, 35L, 39L, 16L, 3L, 33L, 25L, 4L,
2L, 9L), .Label = c("attr_male_0.bmp", "attr_male_1.bmp",
"attr_male_10.bmp", "attr_male_11.bmp", "attr_male_12.bmp",
"attr_male_13.bmp", "attr_male_14.bmp", "attr_male_15.bmp",
"attr_male_16.bmp", "attr_male_17.bmp", "attr_male_18.bmp",
"attr_male_19.bmp", "attr_male_2.bmp", "attr_male_20.bmp",
"attr_male_21.bmp", "attr_male_3.bmp", "attr_male_4.bmp",
"attr_male_5.bmp", "attr_male_6.bmp", "attr_male_7.bmp",
"attr_male_8.bmp", "attr_male_9.bmp", "practice_0.png", "practice_1.png",
"unattr_male_0.bmp", "unattr_male_1.bmp", "unattr_male_10.bmp",
"unattr_male_11.bmp", "unattr_male_12.bmp", "unattr_male_13.bmp",
"unattr_male_14.bmp", "unattr_male_15.bmp", "unattr_male_16.bmp",
"unattr_male_17.bmp", "unattr_male_18.bmp", "unattr_male_19.bmp",
"unattr_male_2.bmp", "unattr_male_20.bmp", "unattr_male_21.bmp",
"unattr_male_3.bmp", "unattr_male_4.bmp", "unattr_male_5.bmp",
"unattr_male_6.bmp", "unattr_male_7.bmp", "unattr_male_8.bmp",
"unattr_male_9.bmp"), class = "factor"), response = structure(c(1L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L,
2L, 2L, 1L, 1L), .Label = c("num_5", "s", "None"), class = "factor"),
correct = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), latency = c("0.957696499963",
"0.791598779233", "1.10196583883", "1.47500942541", "1.10195224516",
"0.874937406699", "0.974977383185", "0.891667885011", "0.925115802807",
"1.29170322855", "1.10850134231", "1.27520744911", "2.82531718331",
"1.40841043117", "2.24205221134", "1.1019921939", "0.74171666964",
"1.32521745017", "1.12505149643", "0.891592148851"), stimulus2 = structure(c(6L,
41L, 13L, 45L, 1L, 45L, 40L, 44L, 19L, 38L, 13L, 35L, 39L,
16L, 3L, 33L, 25L, 4L, 2L, 9L), .Label = c("attr_male_0.bmp",
"attr_male_1.bmp", "attr_male_10.bmp", "attr_male_11.bmp",
"attr_male_12.bmp", "attr_male_13.bmp", "attr_male_14.bmp",
"attr_male_15.bmp", "attr_male_16.bmp", "attr_male_17.bmp",
"attr_male_18.bmp", "attr_male_19.bmp", "attr_male_2.bmp",
"attr_male_20.bmp", "attr_male_21.bmp", "attr_male_3.bmp",
"attr_male_4.bmp", "attr_male_5.bmp", "attr_male_6.bmp",
"attr_male_7.bmp", "attr_male_8.bmp", "attr_male_9.bmp",
"practice_0.png", "practice_1.png", "unattr_male_0.bmp",
"unattr_male_1.bmp", "unattr_male_10.bmp", "unattr_male_11.bmp",
"unattr_male_12.bmp", "unattr_male_13.bmp", "unattr_male_14.bmp",
"unattr_male_15.bmp", "unattr_male_16.bmp", "unattr_male_17.bmp",
"unattr_male_18.bmp", "unattr_male_19.bmp", "unattr_male_2.bmp",
"unattr_male_20.bmp", "unattr_male_21.bmp", "unattr_male_3.bmp",
"unattr_male_4.bmp", "unattr_male_5.bmp", "unattr_male_6.bmp",
"unattr_male_7.bmp", "unattr_male_8.bmp", "unattr_male_9.bmp"
), class = "factor"), Group = c(1, 2, 1, 2, 1, 2, 2, 2, 1,
2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 1)), row.names = 5:24, class = "data.frame")```

Lattice dotplot not showing count for y-values in R

I'm very inexperienced with R, but I'm required to use it for the statistics class I'm taking. I'm trying to make a dot plot using
library(lattice)
dotplot(Bio$SS,
main = "Plants by Number of Short Shoots",
xlab = "Number of Short Shoots",
ylab = "Number of Plants",)
However, the graph doesn't provide a count for the y-value. It looks like this instead:
As you can see, there are no y-values given to the dot plot, even though it should be listing the number of plants with each value. When I made a histogram using a similar formula it worked fine:
hist(Bio$SS,
main = "Plants by Number of Short Shoots",
xlab = "Number of Short Shoots",
ylab = "Number of Plants",
col = "green")
Here is how that turned out:
This one properly provided the count for a y-value. How can I make the dotplot do the same?
Here's the data I'm using:
structure(list(ï..Block = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), Treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("NFCT", "NFNP", "SFCT", "SFNP"), class = "factor"),
Plant = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), Stem = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L), SS = c(4L, 2L, 3L, 2L,
1L, 2L, 5L, 5L, 4L, 4L, 5L, 3L, 3L, 2L, 4L, 2L, 6L, 3L, 10L,
2L, 5L, 2L, 6L, 2L, 4L), LS = c(4L, 7L, 1L, 7L, 7L, 6L, 5L,
5L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 1L, 4L, 1L, 4L, 4L, 4L,
2L, 4L, 1L), Leaves = c(30L, 30L, 13L, 32L, 32L, 35L, 33L,
34L, 27L, 23L, 21L, 20L, 25L, 24L, 25L, 25L, 24L, 25L, 29L,
20L, 20L, 22L, 25L, 23L, 13L), Inf. = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), TLength = c(10.5, 11.2, 6.2, 12.2, 11.3,
11.5, 11.9, 11.7, 10, 11.5, 10.9, 12.2, 12.6, 12.2, 12.1,
12, 6.5, 6.7, 13, 6.2, 7.6, 5.9, 7.7, 6, 5.6)), row.names = c(NA,
25L), class = "data.frame")

How get several horizontal barplots for many variables in same figure

I have several categorical variables and I need to plot its horizontal barplots in function of the frequency of their modalities. for example, if I want to plot horizontal barplot of the variable INTERET_ENVIRONNEMENT knowing that its modalities are:
> unique(DATABASE$INTERET_ENVIRONNEMENT)
[1] Beaucoup Un peu Pas du tout
Levels: Beaucoup Pas du tout Un peu
then using the code above :
ords <- c("Beaucoup", "Un peu", "Pas du tout")
ggplot(DATABASE, aes(x = INTERET_ENVIRONNEMENT)) +
geom_bar(fill = "orange", width = 0.7) +
scale_x_discrete(limits = ords) +
coord_flip() +
xlab("Storm Type") + ylab("Number of Observations")
I get this
Now I want to add all other categorical variables to get their horizontal bar plots in the same plot.
For example, if I want to add also the INTERET_COMPOSITION variable which has the same modalities ("Beaucoup", "Un peu", "Pas du tout").
I try using this code
ggplot(DATABASE, aes(x = INTERET_ENVIRONNEMENT)) +
geom_bar(fill = "orange", width = 0.7) +
scale_x_discrete(limits = ords) +
coord_flip() +
xlab("Storm Type") + ylab("Number of Observations")+
facet_wrap(~INTERET_COMPOSITION)
But, it doesn't give the needed results.
To make my example reproductible, this is a data set which contains 4 categorical variables having same modalities:
structure(list(INTERET_COMPOSITION = structure(c(1L, 1L, 1L,
3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 3L, 1L,
1L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Beaucoup",
"Pas du tout", "Un peu"), class = "factor"), INTERET_ENVIRONNEMENT = structure(c(1L,
3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 1L,
1L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("Beaucoup", "Pas du tout", "Un peu"), class = "factor"),
INTERET_ORIGINE_GEO = structure(c(1L, 2L, 1L, 1L, 3L, 1L,
3L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 2L, 1L, 1L, 3L, 1L, 1L,
1L, 1L, 3L, 3L, 1L, 2L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
3L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L), .Label = c("Beaucoup",
"Pas du tout", "Un peu"), class = "factor"), INTERET_ALIM_NATURELLE = structure(c(1L,
3L, 3L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 2L,
3L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 3L, 1L, 1L, 3L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("Beaucoup", "Pas du tout", "Un peu"
), class = "factor")), .Names = c("INTERET_COMPOSITION",
"INTERET_ENVIRONNEMENT", "INTERET_ORIGINE_GEO", "INTERET_ALIM_NATURELLE"
), row.names = c(1L, 2L, 3L, 5L, 9L, 13L, 14L, 16L, 18L, 19L,
20L, 24L, 27L, 29L, 30L, 32L, 33L, 35L, 36L, 37L, 39L, 44L, 49L,
51L, 52L, 53L, 55L, 56L, 61L, 62L, 63L, 65L, 66L, 67L, 71L, 74L,
75L, 80L, 81L, 84L, 86L, 90L, 92L, 95L, 96L, 99L, 100L, 103L,
104L, 107L), class = "data.frame")
>
Please, how should I do to plot their horizontal barplot in same figure?
You have to transform your data from wide to long
library(tidyverse)
d %>%
gather(k, v) %>%
ggplot(aes(v)) +
geom_bar(fill = "orange", width = 0.7) +
coord_flip() +
facet_wrap(~k)

Label an axis generated by axis() function

I'm plotting a dual axis graph in r base and I can't add a label to the secondary axis. Here's what I'm working with:
dados = structure(list(IDADE = c(65L, 35L, 65L, 42L, 50L, 44L, 0L, 58L,
22L, 27L, 34L, 31L, 0L, 24L, 34L, 20L, 4L, 34L, 20L, 27L, 8L,
2L, 20L, 31L, 45L, 26L, 26L, 40L, 50L, 34L), ESTADO = c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L), TIPINT = c(3L,
1L, 3L, 2L, 3L, 2L, 2L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L,
3L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 1L, 3L, 2L, 3L, 1L), DIARIAS = c(2L,
2L, 4L, 1L, 1L, 1L, 10L, 1L, 2L, 2L, 2L, 3L, 3L, 1L, 2L, 3L,
6L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 6L, 1L, 1L, 2L)), .Names = c("IDADE",
"ESTADO", "TIPINT", "DIARIAS"), row.names = c(NA, 30L), class = "data.frame")
#Define boxplot base
bx = boxplot(dados$IDADE~dados$TIPINT,axes=F, xlab=NA,ylab=NA,col=heat.colors(3,0.6))
#Posiciona eixos
par(mar = c(5,5,2,5))
#Plota bar
barplot(table(dados$TIPINT),col=heat.colors(3,0.4),names=c("Tipo 1","Tipo 2","Tipo 3"),
ylab = "Quantidade de pacientes")
#Plota box
par(new=T)
bx2 = bxp(bx,boxwex = 0.50, at = c(0.9, 2,3.1),axes=F, xlab=NA, ylab="l",boxfill=heat.colors(3,0.7))
axis(side=4)
Which gets me the following graph
I'm trying to label the right axis, but axis(side=4,labels="labels") is for something else and axis(side=4,ylab="label") doesn't work.
Thanks
You can use the function mtext to add a label to a secondary axis, using the line=2 argument to move it the appropriate distance from the axis.
mtext("label", 4, line=2)
Alternatively you could use just text to have finer control, if say, you want to reorient the label. You can pull the plot dimensions from par("usr") and adjust accordingly.
p <- par("usr")
text(p[2]+0.4, mean(p[3:4]), labels = "label", xpd=NA, srt = -90)

Resources