sampling based on frequency in R

sampling based on frequency in R - r

I want to make 20000 sample from a data which is quite big,based on the each value size in order to fill the NA values:
so I use the output of histogram, but it wasn't successful, and get me an error, how to avoid it ?
y=hist(maindata,col="red",breaks=length(unique(maindata))
for(k in 1:20000){
data=maindata
for(i in 1:nrow(data)){
if (data[i]="Na"){
data[i]=sample(y$breaks,size=1,replace=FALSE,prob=y$density)}}}
I get this error :
Error in sample.int(length(x), size, replace, prob) :
incorrect number of probabilities
and I check the length(y$breaks) and length(y$density),length(y$breaks) was one unit more, how should I fixed it ?
thank you in advance
EDIT :
structure(list(breaks = c(15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
103, 104, 105, 106, 107, 108, 109), counts = c(27L, 17L, 31L,
83L, 118L, 144L, 211L, 279L, 354L, 312L, 300L, 377L, 407L, 443L,
481L, 351L, 302L, 236L, 248L, 178L, 141L, 101L, 77L, 80L, 63L,
44L, 64L, 44L, 60L, 46L, 24L, 29L, 15L, 28L, 21L, 13L, 19L, 10L,
30L, 11L, 12L, 12L, 7L, 12L, 12L, 11L, 11L, 7L, 7L, 4L, 4L, 4L,
1L, 2L, 3L, 6L, 1L, 1L, 3L, 3L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 1L, 2L), density = c(0.00453172205438067,
0.00285330647868412, 0.00520308828465928, 0.0139308492782813,
0.0198053037932192, 0.0241691842900302, 0.035414568647197, 0.0468277945619335,
0.0594159113796576, 0.0523665659617321, 0.0503524672708963, 0.0632762672037596,
0.0683115139308493, 0.0743538100033568, 0.0807317891910037, 0.0589123867069486,
0.0506881503860356, 0.0396106075864384, 0.0416247062772743, 0.0298757972473985,
0.0236656596173212, 0.0169519973145351, 0.0129237999328634, 0.0134273246055723,
0.0105740181268882, 0.00738502853306479, 0.0107418596844579,
0.00738502853306479, 0.0100704934541793, 0.0077207116482041,
0.0040281973816717, 0.00486740516951997, 0.00251762336354481,
0.00469956361195032, 0.00352467270896274, 0.00218194024840551,
0.00318898959382343, 0.00167841557569654, 0.00503524672708963,
0.0018462571332662, 0.00201409869083585, 0.00201409869083585,
0.00117489090298758, 0.00201409869083585, 0.00201409869083585,
0.0018462571332662, 0.0018462571332662, 0.00117489090298758,
0.00117489090298758, 0.000671366230278617, 0.000671366230278617,
0.000671366230278617, 0.000167841557569654, 0.000335683115139308,
0.000503524672708963, 0.00100704934541793, 0.000167841557569654,
0.000167841557569654, 0.000503524672708963, 0.000503524672708963,
0, 0, 0, 0.000167841557569654, 0.000167841557569654, 0, 0, 0,
0.000167841557569654, 0, 0, 0.000167841557569654, 0, 0.000167841557569654,
0, 0.000167841557569654, 0, 0.000167841557569654, 0.000167841557569654,
0, 0, 0.000167841557569654, 0.000167841557569654, 0, 0, 0, 0,
0, 0.000503524672708963, 0, 0, 0, 0.000167841557569654, 0.000335683115139308
), mids = c(15.5, 16.5, 17.5, 18.5, 19.5, 20.5, 21.5, 22.5, 23.5,
24.5, 25.5, 26.5, 27.5, 28.5, 29.5, 30.5, 31.5, 32.5, 33.5, 34.5,
35.5, 36.5, 37.5, 38.5, 39.5, 40.5, 41.5, 42.5, 43.5, 44.5, 45.5,
46.5, 47.5, 48.5, 49.5, 50.5, 51.5, 52.5, 53.5, 54.5, 55.5, 56.5,
57.5, 58.5, 59.5, 60.5, 61.5, 62.5, 63.5, 64.5, 65.5, 66.5, 67.5,
68.5, 69.5, 70.5, 71.5, 72.5, 73.5, 74.5, 75.5, 76.5, 77.5, 78.5,
79.5, 80.5, 81.5, 82.5, 83.5, 84.5, 85.5, 86.5, 87.5, 88.5, 89.5,
90.5, 91.5, 92.5, 93.5, 94.5, 95.5, 96.5, 97.5, 98.5, 99.5, 100.5,
101.5, 102.5, 103.5, 104.5, 105.5, 106.5, 107.5, 108.5), xname = "b",
equidist = TRUE), .Names = c("breaks", "counts", "density",
"mids", "xname", "equidist"), class = "histogram")
Data information :
> head(maindata)
[1] 30 44 -1 32 30 34
> is.numeric(maindata)
[1] TRUE
> is.vector(maindata)
[1] TRUE
> length(maindata)
[1] 36203

Do you just want 20,000 samples from the distribution of the non-missing data? If so, another way to approach this would be to just calculate a kernel density estimate directly from the non-missing data and then sample from that. For example, using fake data:
# Fake data with some missing values
set.seed(31)
dat = rnorm(30000, 20, 10)
dat[sample(1:30000, 5000)] = NA
# Create kernel density estimate from the data
# n is the number of grid points used in the esimate (should always be a power of 2)
dat.dens = density(dat[!is.na(dat)], n=2^10)
sim.sample = sample(dat.dens$x, 2e4, replace=TRUE, prob=dat.dens$y)
plot(dat.dens)
lines(density(sim.sample), col="red")
Please let me know if I've misunderstood what you're trying to do.

Related

Adding sample size to ggplot boxplot

I'm interested to see how age is related to a continuous outcome, for which I have the following data:
library(dplyr)
library(tidyverse)
library(magrittr)
library(ggplot2)
mydata <-
structure(list(ID = c(104, 157, 52, 152, 114, 221, 320, 125,
75, 171, 80, 76, 258, 82, 142, 203, 37, 92, 202, 58, 194, 38,
4, 137, 25, 87, 40, 117, 21, 255, 277, 315, 96, 134, 185, 94,
3, 153, 172, 65, 279, 209, 60, 13, 154, 160, 24, 29, 159, 213,
127, 74, 48, 126, 184, 132, 61, 141, 27, 49, 8, 39, 164, 162,
34, 205, 179, 119, 77, 135, 138, 165, 103, 253, 14, 20, 310,
84, 30, 273, 22, 105, 262, 116, 86, 83, 145, 31, 95, 51, 81,
271, 36, 50, 189, 2, 115, 7, 197, 54), age = c(67.1, 70.7, 53,
61.7, 66.1, 57.7, 54.1, 67.2, 60.9, 55.8, 40.7, 57.6, 64.1, 70.7,
47.5, 46.3, 66.7, 55, 63.3, 68.2, 61.2, 60.5, 52, 65.3, 48.9,
56.9, 62.7, 75.2, 61.4, 57.9, 53.6, 58.1, 51, 67.3, 63.9, 57,
43.2, 64.7, 62.8, 56.3, 51.7, 39.4, 45.2, 57.8, 55.7, 69.6, 61.5,
50.1, 73.7, 55.5, 65.2, 54.6, 49, 35.2, 52.9, 46.3, 55, 52.5,
54.2, 61, 57.4, 56.5, 53.6, 47.7, 64.2, 53.4, 60.9, 58.2, 60.7,
50.3, 48.3, 74.7, 52.1, 59.9, 52.4, 70.8, 61.2, 66.5, 55.4, 57.5,
59.2, 60.1, 52.3, 60.2, 54.8, 36.3, 61.5, 48.6, 56, 62, 64.8,
40.4, 68.3, 60, 69.1, 56.6, 45.3, 58.5, 52.3, 52), continuous_outcome = c(3636.6,
1128.2, 2007.5, 802.9, 332.3, 2636.1, 169.5, 67.9, 3261.8, 1920.3,
155.2, 1677.2, 198.2, 11189.7, 560.9, 633.1, 196.1, 13.9, 100.7,
7594.5, 1039.8, 83.9, 2646.8, 284.6, 306, 1135.6, 1883.1, 5681.4,
1706.2, 2241.1, 97.7, 1106.8, 1107.1, 290.8, 2123.4, 267, 115.3,
138.5, 152.7, 1338.9, 6709.8, 561.7, 1931.7, 3112.4, 1876.3,
3795.9, 5706.7, 7.4, 1324.9, 4095.4, 205.4, 1886, 177.3, 304.4,
1319.1, 415.9, 537.2, 3141.1, 740, 1976.7, 624.8, 983.1, 1163.5,
1432.6, 3730.4, 2023.4, 498.2, 652.5, 982.7, 1345.3, 138.4, 1505.1,
3528.1, 11.9, 884.5, 10661.6, 1911.4, 2800.8, 81.5, 396.4, 409.1,
417.3, 186, 1892.4, 1689.7, 0, 210.1, 210.5, 3484.5, 3196.8,
57.2, 20.2, 947, 540, 1603.1, 1571.8, 9.1, 149.2, 122, 63.2),
age_decades = structure(c(3L, 4L, 2L, 3L, 3L, 2L, 2L, 3L,
3L, 2L, 1L, 2L, 3L, 4L, 1L, 1L, 3L, 2L, 3L, 3L, 3L, 3L, 2L,
3L, 1L, 2L, 3L, 4L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 1L, 3L,
3L, 2L, 2L, 1L, 1L, 2L, 2L, 3L, 3L, 2L, 4L, 2L, 3L, 2L, 1L,
1L, 2L, 1L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 1L, 3L, 2L, 3L, 2L,
3L, 2L, 1L, 4L, 2L, 2L, 2L, 4L, 3L, 3L, 2L, 2L, 2L, 3L, 2L,
3L, 2L, 1L, 3L, 1L, 2L, 3L, 3L, 1L, 3L, 2L, 3L, 2L, 1L, 2L,
2L, 2L), .Label = c("1", "2", "3", "4"), class = "factor")), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
To make a boxplot of age decades on the x axis and my continuous outcome I'm using ggplot2.
I want to make several, and automatically plot the sample size on the x-axis ticks. To do so I've computed labels in the dataset as follows:
mydata <-
mydata %>%
group_by(age_decades) %>%
mutate(n_decades=as_character(n())) %>%
mutate(label_decades=case_when(age_decades==1 ~ "Below 50",
age_decades==2 ~ "Between 50 and 60",
age_decades==3 ~ "Between 60 and 70",
age_decades==4 ~ "Above 70")) %>%
mutate(label_decades=paste0(label_decades, '\n n = ', n_decades)) %>%
ungroup() %>%
relocate(age_decades, label_decades, .after=age) %>%
select(-n_decades) %>%
arrange(ID)
Then I've tried to plot the boxplot using the newly created variable label_decades to label. The first thing I tried was:
ggplot(mydata, aes(x=age_decades, y=continuous_outcome)) +
geom_boxplot() +
scale_x_discrete(labels=mydata$label_decades)
But that just plots the first few labels as they occur in the dataset (so they dont correspond to the actual boxplot):
Then I tried:
ggplot(mydata, aes(x=age_decades, y=continuous_outcome)) +
geom_boxplot() +
geom_text(data=mydata, aes(age_decades, Inf, label=label_decades),
vjust = 15, size=4)
Which works better but the font is really weird and also the original x axis labels/ticks are still showing.
Anyone know how to solve this issue? Thanks!

The font looks wired because there are many labels with the same text plotted on top of each other. You can use distinct to get only one label per x tick and use the theme function to get rid of x tick labels:
mydata %>%
ggplot(aes(age_decades, continuous_outcome)) +
geom_boxplot() +
geom_text(
data = mydata %>% distinct(age_decades, label_decades),
mapping = aes(label = label_decades),
y = 9e3
) +
theme(
axis.text.x = element_blank()
)

One way would be to turn the labels to factor as well.
library(dplyr)
library(ggplot2)
mydata <- mydata %>%
group_by(age_decades) %>%
mutate(n_decades= as.character(n())) %>%
mutate(label_decades= case_when(age_decades==1 ~ "Below 50",
age_decades==2 ~ "Between 50 and 60",
age_decades==3 ~ "Between 60 and 70",
age_decades==4 ~ "Above 70")) %>%
mutate(label_decades= factor(paste0(label_decades, '\n n = ', n_decades))) %>%
ungroup() %>%
relocate(age_decades, label_decades, .after=age) %>%
select(-n_decades) %>%
arrange(ID)
You can then use it's levels in scale_x_discrete.
ggplot(mydata, aes(x=age_decades, y=continuous_outcome)) +
geom_boxplot() +
scale_x_discrete(labels= levels(mydata$label_decades))

I've an error when analyze "Simple slope" with R function

I've tried to analyze "Simple slope" with Moderation Regression
Using library names interactions
but turns out it doesn't work
I've already searched in google but it seems no one has the same problem I had
install.packages("interactions", dependencies = TRUE)
library(interactions)
out1 = lm(timetogether ~ malehappy + femalehappy, df)
out2 = lm(timetogether ~ malehappy*femalehappy, df)
summary(out1)
summary(out2)
anova(out1, out2)
sim_slopes(out2, pred = "malehappy", modx = "femalehappy")
When I compute function names sim_slopes(out2, pred...)
it returns me as
"Error in isFALSE(row.names) : could not find function "isFALSE""
Some might run sim_slopes() without any error.
but not for me...
What should I do, to resolve it, or to check it?
Thank you
and here, is the output of dput(df)
structure(list(malehappy = structure(c(62, 53, 55, 36, 60, 50, 45,
53, 48, 50, 63, 46, 72, 40, 40, 30, 49, 49, 45, 59, 46.1513513513514,
51, 36, 47, 53, 65, 46, 39, 41, 56, 54, 41, 36, 46.1513513513514, 51,
50, 47, 56, 44, 42, 61, 44, 47, 55, 57, 55, 32, 62, 53, 60, 59, 65,
49, 49, 60, 56, 67, 54, 46.1513513513514, 46.1513513513514,
46.1513513513514, 34, 57, 61, 73, 42, 84, 46.1513513513514, 47, 43, 46.1513513513514, 59, 40, 42, 49, 55, 46, 56, 50, 48, 57, 50, 53, 46.1513513513514, 50, 46.1513513513514, 61, 64, 48, 42, 31, 71, 54, 29, 45, 56, 53, 56, 47, 48, 39, 58, 51, 48, 54, 52, 57, 89, 53, 53,
44, 53, 40, 47, 40, 47, 54, 69, 60, 56, 47, 65, 50, 29, 58, 50,
46.1513513513514, 39, 66, 50, 46.1513513513514, 47, 38, 50, 70, 36, 59, 71, 41, 54, 18, 46.1513513513514, 38, 29, 71, 46.1513513513514,
51, 46, 48, 61, 52, 41, 48, 44, 37, 43, 54, 56, 44, 55, 51, 64, 52,
38, 48, 60, 45, 43, 44, 39, 54, 56, 47, 53, 51, 43, 49, 50, 56, 41,
37, 49, 59, 60, 72, 31, 58, 52, 49, 58, 60, 52, 47, 65, 63, 67,
46.1513513513514, 54, 60,
46.1513513513514, 52, 43, 45, 26, 50, 40, 35, 43, 38, 40, 53, 36, 62, 30, 30, 46.1513513513514, 39, 39, 35, 49, 34, 41, 26, 37, 43, 55, 36,
29, 31, 46, 44, 31, 26, 28, 41, 40, 37, 46, 34,
46.1513513513514, 51, 34, 37, 45, 47, 45, 22, 52, 43, 50, 49, 55, 39, 39, 50, 46, 46.1513513513514, 44, 46.1513513513514, 43,
46.1513513513514, 24, 47, 51, 63, 32, 74, 24, 37, 33, 42, 49, 30, 32, 39, 45, 36, 46, 40, 46.1513513513514, 47, 40, 43, 58, 40, 47, 51, 54,
38, 32, 21, 61, 44, 19, 35, 46, 43, 46, 37, 38, 29, 48, 41, 38, 44,
42, 47, 79, 43, 43, 34, 43, 30, 37, 30, 37, 44, 59, 50, 46,
46.1513513513514, 55, 40, 19, 48, 40, 37, 29, 56, 40, 49, 37, 28, 46.1513513513514, 60, 26, 49, 61, 31, 44, 8, 36, 28, 19, 61, 38, 41, 36, 38, 51, 42, 31, 38, 34, 27, 33, 44, 46, 46.1513513513514,
46.1513513513514, 46.1513513513514, 54, 42, 28, 38, 50, 35, 46.1513513513514, 34, 29, 46.1513513513514, 46, 37, 43, 41, 33, 39, 40, 46, 31, 27, 39, 49, 46.1513513513514, 62, 46.1513513513514, 48,
42, 39, 48, 50, 42, 37, 55, 53, 57, 44, 44, 50, 52), imputed = c(21L,
34L, 59L, 60L, 61L, 68L, 71L, 84L, 86L, 127L, 131L, 142L, 146L, 197L,
200L, 216L, 240L, 257L, 259L, 261L, 280L, 321L, 334L, 359L, 360L,
361L, 368L, 371L, 384L, 386L), class = "impute"), femalehappy =
structure(c(59, 54, 51, 35, 50, 55.5978260869565, 45, 59, 49, 63, 53,
57, 65, 38, 45, 45, 34, 48, 35, 89, 45, 53, 46, 30, 54, 59, 31, 44,
37, 55, 46, 63, 41, 43, 57, 65, 41, 67, 52, 55, 69, 41, 55, 37, 50,
39, 23, 63, 63, 47, 53, 52, 37, 51, 52, 34, 58, 55, 55.5978260869565,
60, 55.5978260869565, 42, 42, 55.5978260869565, 55, 39, 71,
55.5978260869565, 41, 51, 38, 38, 44, 72, 57, 44, 45, 57, 56, 43, 55.5978260869565, 51, 46, 64, 64, 65, 74, 58, 54, 51, 45, 61, 56, 39, 48, 49, 57, 56, 39, 51, 35, 42, 49, 43, 43, 53, 64, 67, 43, 54, 49,
57, 43, 44, 57, 48, 64, 56, 57, 69, 55.5978260869565, 65, 65, 37, 52,
50, 55.5978260869565, 55.5978260869565, 61, 57, 55.5978260869565, 46,
62, 55, 66, 50, 70, 63, 44, 62, 36, 55.5978260869565, 23, 47, 54,
55.5978260869565, 41, 40, 57, 40, 61, 45, 57, 30, 40, 42, 55.5978260869565, 57, 45, 44, 46, 48, 33, 45, 49, 55, 47, 40, 47, 42, 60, 55.5978260869565, 38, 55.5978260869565, 41, 55, 36, 52, 50, 36,
44, 50, 59, 59, 55.5978260869565, 49, 62, 57, 37, 59, 63, 43, 38, 63,
53, 58, 60, 47, 49, 55.5978260869565, 69, 64, 61, 45, 60, 61, 55, 69,
59, 73, 63, 67, 75, 48, 55, 55.5978260869565, 44, 58, 45, 99, 55, 63,
56, 40, 64, 69, 55.5978260869565, 54, 47, 65, 56, 73, 51, 53, 67, 75,
51, 77, 62, 55.5978260869565, 79, 51, 65, 47, 60, 49, 33, 73, 73,
55.5978260869565, 63, 62, 47, 61, 62, 44, 68, 65, 55.5978260869565, 70, 55.5978260869565, 52, 52, 64, 65, 49, 81, 48, 51, 61, 48, 48, 54,
55.5978260869565, 67, 54, 55, 67, 66, 55.5978260869565, 55.5978260869565, 61, 56, 74, 74, 75, 84, 68, 64, 61, 55, 71, 66, 49, 58, 59, 67, 66, 49, 61, 45, 52, 59, 53, 53, 55.5978260869565, 74, 77,
53, 64, 59, 67, 53, 54, 67, 58, 74, 66, 67, 79, 57, 75, 75, 47, 62,
60, 57, 42, 71, 67, 63, 56, 72, 65, 76, 60, 80, 73, 54, 72, 46, 57,
33, 57, 64, 72, 51, 50, 67, 50, 71, 55, 67, 40, 50, 52, 56, 67,
55.5978260869565, 54, 55.5978260869565, 58, 43, 55.5978260869565, 59, 65, 57, 55.5978260869565, 57, 52, 70, 56, 48, 65, 51, 65, 46, 62, 60,
46, 55.5978260869565, 60, 69, 69, 84, 59, 72, 67, 47, 69, 73, 53, 48,
73, 63, 68, 70, 57, 59, 72), imputed = c(6L, 59L, 61L, 64L, 68L, 81L,
121L, 127L, 128L, 131L, 142L, 146L, 157L, 172L, 174L, 185L, 200L,
216L, 227L, 240L, 250L, 259L, 261L, 274L, 280L, 281L, 306L, 359L,
361L, 364L, 368L, 381L), class = "impute"), timetogether =
structure(c(132, 89, 86, 19, 96, 74, 47, 91.7415143603133, 62, 104,
114, 76, 195, 27, 39, 18, 30, 63, 28, 91.7415143603133, 45, 79, 29,
18, 89, 145, 20, 34, 26, 101, 69, 70, 25, 32, 93, 107, 43, 136, 60,
59, 165, 37, 73, 43, 89, 49, 6, 146, 91.7415143603133, 85,
91.7415143603133, 115, 36, 71, 103, 35, 145, 93, 37, 104, 69, 91.7415143603133, 64, 114, 152, 31, 91.7415143603133, 20, 43, 54, 43, 51, 36, 87, 85, 65, 50, 109, 85, 48, 89, 74, 67, 178, 105, 136, 186,
138, 75, 51, 19, 172, 96, 14, 55, 84, 98, 91.7415143603133, 38, 68,
22, 64, 70, 49, 60, 82, 132, 277, 60, 89, 54, 98, 36, 51, 57, 58,
122, 142, 118, 146, 57, 165, 109, 13, 95, 70, 55, 17, 153, 88, 103,
52, 58, 82, 190, 36, 162, 184, 38, 91.7415143603133, 0, 56, 5, 17,
139, 90, 48, 39, 82, 61, 103, 41, 82, 16, 26, 38, 68, 108, 45, 66,
61, 98, 29, 34, 64, 114, 51, 35, 51, 30, 109, 74, 35, 89, 50,
91.7415143603133, 34, 75, 85, 26, 31, 67, 122, 128, 237, 21, 130, 95, 36, 123, 141, 55, 37, 158, 116, 145, 109, 72, 92, 91.7415143603133,
164, 113, 120, 47, 137, 100, 73, 119, 88, 111, 157, 87, 231, 57, 59,
23, 78, 91, 71, 205, 71, 103, 41, 70, 116, 181, 70, 54, 60, 130, 108,
63, 43, 51, 111, 111, 79, 147, 75, 65, 179, 69, 87, 97, 127, 101, 47,
171, 124, 130, 139, 163, 81, 95, 142, 95, 185, 118, 66, 121, 96, 39,
113, 151, 206, 63, 325, 41, 79, 71, 90, 110, 56, 69, 101, 109, 79,
134, 103, 82, 125, 99, 103, 211, 110, 150, 194, 175, 93, 66, 25, 214,
120, 26, 78, 121, 120, 132, 78, 91, 55, 114, 100, 83, 103, 108, 148,
91.7415143603133, 102, 115, 74, 120, 59, 83, 59, 84, 134, 189, 150, 91.7415143603133, 85, 193, 114, 28, 131, 98, 83, 55, 188, 105, 138, 81, 49, 102, 223, 42, 172, 222, 61, 132, 0, 81, 54, 16, 191, 96, 90,
74, 91.7415143603133, 119, 91.7415143603133, 62, 97, 64, 50, 67, 106,
133, 71, 109, 96, 91.7415143603133, 84, 51, 89, 149,
91.7415143603133, 68, 74, 53, 128, 116, 76, 113, 91, 70, 80, 98, 121, 60, 48, 92, 149, 157, 262, 21, 151, 114, 81, 148, 164, 97, 78, 188,
158, 186, 126, 91.7415143603133, 136, 174), imputed = c(8L, 20L, 49L,
51L, 62L, 67L, 98L, 140L, 176L, 200L, 308L, 320L, 349L, 351L, 362L,
367L, 398L), class = "impute"),
kids = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("nokids", "kids"), class = "factor")), .Names = c("malehappy", "femalehappy", "timetogether", "kids"), row.names =
c(NA, -400L ), class = "data.frame")

It is related to the R version. The funcion isFALSE is built-in in R from version 3.5 onwards. I was getting the same error with the package "jjtools" when using R 3.4. Upgraded R to version 3.6, and the problem was gone.

Randomly sampling and assigning a variable using dplyr

I have a data frame of 200 individuals, and using dplyr I would like to randomly select half of them, create a variable called 'sex,' and assign 100 with sex as male. For the remaining 100 individuals, I would like to assign the sex as female. A reproducible example of the data set is available below.
df <- dput(input)
structure(list(id = 1:200, age = c(6L, 4L, 4L, 6L, 1L, 5L, 3L,
1L, 0L, 0L, 0L, 5L, 5L, 5L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L, 6L,
4L, 4L, 0L, 4L, 6L, 1L, 5L, 2L, 6L, 2L, 2L, 0L, 3L, 1L, 6L, 0L,
2L, 5L, 3L, 5L, 3L, 1L, 6L, 6L, 0L, 4L, 5L, 0L, 5L, 3L, 6L, 1L,
2L, 1L, 1L, 4L, 2L, 1L, 2L, 0L, 4L, 3L, 3L, 6L, 2L, 1L, 2L, 5L,
0L, 5L, 2L, 5L, 3L, 3L, 3L, 2L, 5L, 1L, 0L, 0L, 1L, 6L, 3L, 1L,
5L, 6L, 4L, 4L, 4L, 0L, 6L, 6L, 3L, 4L, 6L, 5L, 2L, 5L, 6L, 2L,
2L, 4L, 0L, 4L, 6L, 5L, 6L, 0L, 6L, 2L, 1L, 5L, 5L, 5L, 5L, 3L,
1L, 6L, 3L, 1L, 1L, 3L, 4L, 2L, 4L, 2L, 0L, 5L, 0L, 3L, 1L, 1L,
2L, 0L, 5L, 2L, 3L, 6L, 5L, 2L, 6L, 0L, 0L, 6L, 6L, 1L, 4L, 2L,
0L, 4L, 1L, 3L, 6L, 3L, 4L, 3L, 0L, 1L, 6L, 6L, 5L, 4L, 1L, 1L,
6L, 0L, 1L, 2L, 1L, 1L, 2L, 0L, 4L, 1L, 2L, 2L, 2L, 1L, 6L, 5L,
3L, 2L, 3L, 5L, 2L, 3L, 4L, 5L, 0L, 6L, 5L, 1L, 4L, 5L, 3L, 5L,
5L), x = c(21, 9, 31, 55, 5, 63, 63, 3, 13, 21, 53, 77, 5, 67,
63, 31, 17, 5, 21, 45, 79, 3, 7, 43, 27, 1, 63, 11, 37, 33, 27,
53, 71, 73, 97, 87, 77, 17, 85, 91, 49, 87, 89, 61, 65, 17, 71,
33, 53, 85, 49, 41, 75, 85, 79, 75, 23, 63, 89, 31, 29, 47, 75,
63, 65, 27, 27, 71, 89, 29, 25, 49, 91, 91, 39, 65, 45, 99, 53,
21, 29, 81, 35, 7, 27, 81, 93, 41, 79, 83, 31, 51, 33, 75, 15,
69, 7, 29, 7, 35, 87, 93, 57, 13, 91, 87, 95, 77, 7, 37, 81,
99, 83, 69, 85, 5, 77, 69, 55, 7, 39, 5, 41, 1, 63, 25, 13, 39,
97, 73, 25, 49, 35, 95, 59, 75, 23, 35, 67, 73, 91, 83, 79, 9,
27, 89, 79, 53, 89, 69, 95, 57, 11, 45, 63, 5, 25, 61, 3, 89,
1, 61, 85, 75, 67, 73, 63, 77, 43, 31, 69, 39, 47, 59, 75, 45,
57, 73, 5, 85, 57, 13, 91, 69, 79, 89, 13, 33, 15, 23, 89, 85,
39, 87, 7, 97, 57, 5, 61, 85), y = c(41, 57, 29, 59, 83, 77,
35, 73, 99, 69, 85, 23, 85, 11, 63, 97, 73, 47, 57, 73, 77, 1,
91, 17, 71, 57, 11, 3, 81, 31, 5, 41, 69, 93, 3, 11, 45, 97,
81, 87, 43, 9, 53, 61, 11, 63, 59, 33, 49, 89, 87, 79, 47, 59,
41, 25, 47, 13, 69, 11, 93, 83, 91, 85, 13, 95, 13, 37, 99, 35,
11, 63, 19, 99, 71, 55, 5, 21, 43, 59, 49, 15, 99, 15, 75, 77,
53, 51, 91, 45, 83, 21, 29, 35, 3, 27, 97, 95, 29, 53, 55, 41,
45, 31, 75, 37, 15, 47, 3, 1, 99, 55, 81, 37, 1, 41, 51, 45,
27, 83, 9, 69, 13, 81, 91, 55, 51, 31, 17, 97, 1, 47, 35, 7,
53, 59, 5, 51, 7, 5, 93, 63, 95, 51, 33, 43, 75, 67, 59, 89,
49, 83, 21, 49, 5, 5, 19, 45, 29, 41, 25, 3, 9, 1, 73, 53, 43,
99, 69, 41, 21, 3, 3, 13, 39, 21, 55, 75, 91, 31, 79, 17, 43,
91, 73, 11, 75, 15, 49, 77, 77, 23, 83, 47, 51, 53, 57, 99, 35,
15)), row.names = c(NA, -200L), class = "data.frame", .Names = c("id",
"age", "x", "y"))
I'm new to using dplyr, so I'm not exactly sure how to perform this operation. I'm thinking it would look something like this:
new_df <- df %>%
sample_frac(0.5) %>% # use sample_frac or sample_n to select 100 individuals
mutate(sex = "male")
but obviously that just results in a new data frame. Is there a way to select 100 males from the original data frame, then use something like an ifelse statement to assign the rest as female?

If you absolutely need a 50/50 distribution between male and female, you could run with dplyr:
dfs <- sample_n(df, 100, replace = FALSE) %>%
mutate(sex = "male") %>%
select(id, sex) %>%
right_join(df, by = "id") %>%
mutate(sex = if_else(is.na(sex), "female", "male"))
results:
table(dfs$sex)
female male
100 100

Date format in hover for ggplot2 and plotly

I have a question about date formats in plotly. I made a time series plot in ggplot2 that I'm trying to visualize with plotly but a format issue for date-time appears on the hover (see image). I would like the date format to be YYMMD-hh:mm. How could I get this format?
Relevant R code on my script:
library(lubridate)
datosO3.melt <- melt(datosO3.plot, id.vars="fecha", value.name="value")
ozono.plot <- ggplot() + geom_line(data=datosO3.melt, aes(x=fecha, y=value, colour=variable))
ggplotly(ozono.plot)
The point is that column fecha in the dataframe is a date-time object created in a previous dataframe.
datosO3<-within(datosO3, fecha.hora <- ymd_hm(paste(datosO3$AAMMDD,datosO3$hhmm,sep="")))
and inherited by datosO3.melt
str(datosO3.melt)
'data.frame': 23328 obs. of 3 variables:
$ fecha : POSIXct, format: "2017-06-13 00:00:00" "2017-06-13 00:10:00" ...
$ variable: Factor w/ 54 levels "Alcoi.Verge_dels_Lliris",..: 1 1 1 1 1 1 1 1 1 1 ...
$ value : num 75 76 73 72 71 72 73 74 74 73 ...
But when I dput I get:
> dput(data)
structure(list(fecha = structure(c(1497312000, 1497312600, 1497313200,
1497313800, 1497314400, 1497315000, 1497315600, 1497316200, 1497316800,
1497317400, 1497318000, 1497318600, 1497319200, 1497319800, 1497320400,
1497321000, 1497321600, 1497322200, 1497322800, 1497323400, 1497324000,
1497324600, 1497325200, 1497325800, 1497326400, 1497327000, 1497327600,
1497328200, 1497328800, 1497329400, 1497330000, 1497330600, 1497331200,
1497331800, 1497332400, 1497333000, 1497333600, 1497334200, 1497334800,
1497335400, 1497336000, 1497336600, 1497337200, 1497337800, 1497338400,
1497339000, 1497339600, 1497340200, 1497340800, 1497341400, 1497342000,
1497342600, 1497343200, 1497343800, 1497344400, 1497345000, 1497345600,
1497346200, 1497346800, 1497347400, 1497348000, 1497348600, 1497349200,
1497349800, 1497350400, 1497351000, 1497351600, 1497352200, 1497352800,
1497353400, 1497354000, 1497354600, 1497355200, 1497355800, 1497356400,
1497357000, 1497357600, 1497358200, 1497358800, 1497359400, 1497360000,
1497360600, 1497361200, 1497361800, 1497362400, 1497363000, 1497363600,
1497364200, 1497364800, 1497365400, 1497366000, 1497366600, 1497367200,
1497367800, 1497368400, 1497369000, 1497369600, 1497370200, 1497370800,
1497371400), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("Alcoi.Verge_dels_Lliris", "Alacant.El_Pla",
"Alacant.Florida_Babel", "Alacant.Rabassa", "Benidorm", "Elx.Agroalimentari",
"Elx.Parc_de_Bombers", "Elda.Lacy", "Orihuela", "El_Pinos",
"Torrevieja", "L.Alcora", "Burriana", "Castello.Penyeta",
"Castello.Ermita", "Castello.Grau", "Castello.Patronat_d.Esports",
"Cirat", "Morella", "Onda", "Coratxar", "Sant_Jordi", "Torre_Endomenech",
"La_Vall_d.Uixo", "Vilafranca", "Vinaros_Planta", "Viver",
"Zorita", "Albalat_dels_Tarongers", "Alzira", "Algar_de_Palancia",
"Beniganim", "Bunnol.Cemex", "Burjassot.Facultats", "Caudete_de_las_Fuentes",
"Cortes_de_Pallas", "Quart_de_Poblet", "Gandia", "Ontinyent",
"Paterna.CEAM", "Sagunt.Port", "Sagunt.Nord", "Sagunt.CEA",
"Torrebaja", "Valencia.Pista_de_Silla", "Valencia.Vivers",
"Valencia.Politecnic", "Valencia.Avd._Francia", "Valencia.Moli_del_Sol",
"Valencia.Bulevard_Sud", "Vilamarxant", "Villar_del_Arzobispo",
"Torrent.El_Vedat", "Chiva_UM"), class = "factor"), value = c(75,
76, 73, 72, 71, 72, 73, 74, 74, 73, 71, 72, 71, 72, 74, 74,
73, 73, 73, 74, 74, 74, 72, 72, 72, 71, 70, 70, 70, 70, 72,
71, 68, 66, 68, 68, 65, 61, 63, 65, 71, 71, 79, 91, 84, 82,
91, 94, 91, 88, 88, 92, 99, 102, 103, 100, 105, 104, 104,
101, 102, 100, 101, 104, 109, 109, 112, 115, 116, 116, 113,
111, 110, 113, 113, 114, 115, 115, 114, 113, 111, 112, 115,
114, 112, 112, 114, 116, 116, 115, 114, 115, 113, 112, 112,
110, 109, 110, 110, 111)), .Names = c("fecha", "variable",
"value"), row.names = c(NA, 100L), class = "data.frame")
>
How do I change the fecha format to be reflected in the hover?
EDIT 1: Added data
> dput(datosO3.plot)
structure(list(Alcoi.Verge_dels_Lliris = c(75, 76, 73, 72, 71,
72), Alacant.El_Pla = c(56, 55, 53, 56, 55, 54), Alacant.Florida_Babel = c(56,
49, 48, 45, 44, 42), Alacant.Rabassa = c(43, 42, 43, 41, 41,
43), Benidorm = c(110, 105, 95, 107, 110, 107), Elx.Agroalimentari = c(80,
77, 75, 69, 64, 62), Elx.Parc_de_Bombers = c(71, 68, 67, 68,
65, 66), Elda.Lacy = c(39, 34, 32, 28, 25, 26), Orihuela = c(16,
13, 25, 13, 17, 9), El_Pinos = c(48, 35, 36, 35, 33, 43), Torrevieja = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), L.Alcora = c(40,
45, 42, 44, 48, 50), Burriana = c(14, 8, 8, 9, 7, 7), Castello.Penyeta = c(57,
61, 68, 65, 58, 59), Castello.Ermita = c(18, 20, 14, 16, 21,
19), Castello.Grau = c(20, 21, 19, 13, 11, 16), Castello.Patronat_d.Esports = c(36,
26, 29, 28, 28, 29), Cirat = c(56, 56, 54, 54, 51, 51), Morella = c(119,
121, 122, 122, 123, 123), Onda = c(57, 58, 57, 58, 60, 60), Coratxar = c(123,
125, 126, 127, 128, 125), Sant_Jordi = c(37, 36, 37, 38, 40,
39), Torre_Endomenech = c(28, 34, 35, 32, 30, 30), La_Vall_d.Uixo = c(63,
64, 65, 65, 64, 65), Vilafranca = c(100, 101, 97, 98, 97, 99),
Vinaros_Planta = c(26.7, 31.3, 31.6, 31.7, 37.8, 41.7), Viver = c(40.6,
36.9, 47.6, 36.7, 43.5, 46.1), Zorita = c(67, 70, 69, 64,
64, 68), Albalat_dels_Tarongers = c(33, 32, 32, 29, 26, 26
), Alzira = c(24, 26, 23, 19, 20, 39), Algar_de_Palancia = c(47,
50, 48, 49, 47, 52), Beniganim = c(53, 58, 56, 56, 54, 53
), Bunnol.Cemex = c(64, 55, 53, 53, 53, 55), Burjassot.Facultats = c(43,
30, 30, 28, 16, 20), Caudete_de_las_Fuentes = c(71, 68, 66,
72, 74, 72), Cortes_de_Pallas = c(88, 74, 78, 82, 82, 85),
Quart_de_Poblet = c(13, 18, 21, 23, 30, 38), Gandia = c(45,
39, 49, 49, 48, 46), Ontinyent = c(88, 83, 83, 89, 86, 82
), Paterna.CEAM = c(48, 49, 47, 47, 48, 47), Sagunt.Port = c(52,
51, 51, 50, 50, 49), Sagunt.Nord = c(33, 34, 34, 32, 31,
31), Sagunt.CEA = c(34.8, 36.3, 37.6, 43.8, 40.7, 37.5),
Torrebaja = c(51, 42, 56, 52, 45, 65), Valencia.Pista_de_Silla = c(37,
52, 57, 60, 35, 7), Valencia.Vivers = c(45, 42, 39, 34, 32,
33), Valencia.Politecnic = c(40, 33, 30, 26, 25, 23), Valencia.Avd._Francia = c(54,
50, 50, 48, 45, 43), Valencia.Moli_del_Sol = c(9, 10, 10,
8, 7, 9), Valencia.Bulevard_Sud = c(1, 0, 0, 2, 0, 0), Vilamarxant = c(21,
29, 33, 27, 33, 22), Villar_del_Arzobispo = c(55, 57, 57,
54, 53, 55), Torrent.El_Vedat = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), Chiva_UM = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), fecha = structure(c(1497312000,
1497312600, 1497313200, 1497313800, 1497314400, 1497315000
), class = c("POSIXct", "POSIXt"), tzone = "UTC")), .Names = c("Alcoi.Verge_dels_Lliris",
"Alacant.El_Pla", "Alacant.Florida_Babel", "Alacant.Rabassa",
"Benidorm", "Elx.Agroalimentari", "Elx.Parc_de_Bombers", "Elda.Lacy",
"Orihuela", "El_Pinos", "Torrevieja", "L.Alcora", "Burriana",
"Castello.Penyeta", "Castello.Ermita", "Castello.Grau", "Castello.Patronat_d.Esports",
"Cirat", "Morella", "Onda", "Coratxar", "Sant_Jordi", "Torre_Endomenech",
"La_Vall_d.Uixo", "Vilafranca", "Vinaros_Planta", "Viver", "Zorita",
"Albalat_dels_Tarongers", "Alzira", "Algar_de_Palancia", "Beniganim",
"Bunnol.Cemex", "Burjassot.Facultats", "Caudete_de_las_Fuentes",
"Cortes_de_Pallas", "Quart_de_Poblet", "Gandia", "Ontinyent",
"Paterna.CEAM", "Sagunt.Port", "Sagunt.Nord", "Sagunt.CEA", "Torrebaja",
"Valencia.Pista_de_Silla", "Valencia.Vivers", "Valencia.Politecnic",
"Valencia.Avd._Francia", "Valencia.Moli_del_Sol", "Valencia.Bulevard_Sud",
"Vilamarxant", "Villar_del_Arzobispo", "Torrent.El_Vedat", "Chiva_UM",
"fecha"), row.names = 289:294, class = "data.frame")

We can use the "hidden" text aes, to use it in the tooltip:
ggplot(datosO3.melt) +
geom_line(aes(x = fecha,
y = value,
colour = variable,
group = variable,
text = paste('fecha: ', fecha, '\n',
'variable: ', variable, '\n',
'value: ', value, '\n')
)
)
ggplotly(tooltip = 'text')
However for anything that's slightly more complicated than default, especially when working with hover tooltips I usually prefer to work directly in plotly:
plot_ly(datosO3.melt,
type = 'scatter',
mode = 'lines',
x = ~fecha,
y = ~value,
color = ~variable,
text = ~paste('fecha: ', fecha, '\n',
'variable: ', variable, '\n',
'value: ', value, '\n'),
hoverinfo = 'text'
)
To use a custom date format, other the print.Date default, just substitute fecha with the format you prefer, e.g:
plot_ly(datosO3.melt,
type = 'scatter',
mode = 'lines',
x = ~fecha,
y = ~value,
color = ~variable,
text = ~paste('fecha: ', format(fecha, '%Y-%m-%d %H:%M'), '\n',
'variable: ', variable, '\n',
'value: ', value, '\n'),
hoverinfo = 'text'
)

How to plot one variable against each of the three time points

I have a dataset containing one DV called Soma(Somatotype) and three IV called WT2(weight at age 2),WT9(weight at age9),WT18(weight at age18) and I am going to plot Soma against weight at each of the three time points. But since it is not exactly like a time series dataset and I am totally stuck with this.
I was thinking of use ggplot but I am not familiar with that and failed a lot.
The dataset:
structure(list(X = 67:136, Sex = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
WT2 = c(13.6, 11.3, 17, 13.2, 13.3, 11.3, 11.6, 11.6, 12.4,
17, 12.2, 15, 14.5, 10.2, 12.2, 12.8, 13.6, 10.9, 13.1, 13.4,
11.8, 12.7, 11.8, 14.1, 10.9, 11.8, 13.6, 12.7, 12.3, 11.5,
12.6, 14.1, 11.5, 12, 10.9, 12.7, 11.3, 11.8, 15.4, 10.9,
13.2, 14.3, 11.1, 13.6, 12.9, 13.5, 16.3, 13.6, 10.2, 12.6,
12.9, 13.3, 13.4, 12.7, 12.2, 15.4, 12.7, 13.2, 12.4, 10.9,
13.4, 10.6, 11.8, 14.2, 12.7, 13.2, 11.8, 13.3, 13.2, 15.9
), HT2 = c(87.7, 90, 89.6, 90.3, 89.4, 85.5, 90.2, 82.2,
85.6, 97.3, 87.1, 88.9, 87.6, 82.6, 87.1, 84, 83.6, 81.4,
89.7, 88.4, 86.4, 83.8, 87.6, 94, 82, 86.4, 88.9, 86.7, 86.4,
86.4, 83.8, 88.9, 85.9, 86.2, 85.1, 88.6, 83, 88.9, 89.7,
81.3, 88.7, 88.4, 85.1, 91.4, 87.6, 86.1, 94, 85.9, 82.2,
88.2, 87.5, 88.6, 86.9, 86.4, 80.9, 90, 94, 89.7, 86.4, 82.6,
86.4, 81.8, 86.2, 86, 91.4, 88.9, 88.6, 86.4, 94, 89.2),
WT9 = c(32.5, 27.8, 44.4, 40.5, 29.9, 22.8, 30, 24.3, 29.9,
44.5, 31.8, 32.1, 39.2, 23.7, 26, 36.3, 29.9, 22.2, 34.4,
35.5, 33, 25.7, 29.2, 31.7, 23.7, 35.3, 39, 30.8, 29.3, 28,
33, 47.4, 27.6, 34.2, 28.1, 27.5, 23.9, 32.2, 29.4, 22, 28.8,
38.8, 36, 31.3, 26.9, 33.3, 36.2, 29.5, 23.4, 33.8, 34.5,
34.4, 38.2, 31.7, 26.6, 34.2, 27.7, 28.5, 30.5, 26.6, 39,
25, 25.6, 34.2, 29.8, 27.9, 27, 41.4, 41.6, 42.4), HT9 = c(133.4,
134.8, 141.5, 137.1, 136.1, 130.6, 136, 128, 132.4, 152.5,
138.4, 135.2, 142.3, 129.1, 133.2, 136.3, 133.1, 123.2, 135.8,
139.5, 139.4, 124.2, 135.6, 144.1, 123.8, 134.6, 137.2, 139.8,
128.8, 134.2, 136.5, 140.8, 132.1, 137, 129, 139.4, 125.6,
137.1, 133.6, 121.4, 133.6, 134.1, 139.4, 138.1, 133.2, 138.4,
139.5, 132.8, 129.8, 144.8, 138.9, 140.3, 143.8, 133.6, 123.5,
139.9, 136.1, 135.8, 131.9, 133.1, 130.9, 126.3, 135.9, 135,
135.5, 136.5, 134, 138.2, 142, 140.8), LG9 = c(28.4, 26.9,
31.9, 31.8, 27.7, 23.4, 27.2, 25.1, 27.5, 32.7, 28.3, 26.9,
31.6, 25.9, 26.7, 28.4, 26.2, 24.9, 32.3, 30, 26.9, 26.2,
26.3, 27.2, 25.5, 30.4, 32.4, 26, 28.3, 25, 29, 32.3, 26.3,
27.3, 27.4, 25.7, 24.5, 28.2, 26.6, 24.4, 26.5, 31.1, 28.2,
27.6, 26.3, 29.4, 28, 27.6, 22.6, 28.3, 30.5, 31.2, 29.8,
27.5, 27.2, 29.1, 26.7, 25.5, 28.6, 25.4, 29.3, 25, 23.7,
27.6, 27, 26.5, 26.5, 32.5, 31, 32.6), ST9 = c(74L, 65L,
104L, 79L, 83L, 60L, 67L, 44L, 76L, 81L, 59L, 67L, 72L, 40L,
40L, 54L, 67L, 58L, 57L, 61L, 64L, 48L, 61L, 74L, 50L, 58L,
80L, 57L, 44L, 46L, 57L, 69L, 51L, 44L, 48L, 68L, 22L, 59L,
58L, 44L, 58L, 57L, 64L, 64L, 58L, 73L, 52L, 52L, 60L, 107L,
62L, 88L, 78L, 52L, 40L, 71L, 30L, 76L, 59L, 75L, 38L, 50L,
45L, 62L, 57L, 66L, 54L, 44L, 56L, 74L), WT18 = c(56.9, 49.9,
55.3, 65.9, 62.3, 47.4, 57.3, 50, 58.8, 80.2, 59.9, 56.3,
67.9, 52.9, 58.5, 73.2, 54.7, 44.1, 70.5, 60.6, 73.2, 57.2,
56.4, 56.6, 46.3, 63.3, 65.4, 60.1, 55, 55.7, 71.2, 65.5,
57.2, 58.2, 56, 64.5, 53, 52.4, 56.8, 49.2, 55.6, 77.8, 69.6,
56.2, 52.5, 64.9, 59.3, 54.2, 49.8, 62.6, 66.6, 65.3, 65.9,
59, 47.4, 60.4, 56.3, 61.7, 52.4, 52.1, 58.4, 52.8, 60.4,
61, 67.4, 54.3, 56.3, 97.7, 68.1, 63.1), HT18 = c(158.9,
166, 162.2, 167.8, 170.9, 164.9, 168.1, 164, 163.3, 183.2,
167, 163.8, 174, 163, 167.1, 168.1, 163, 154.6, 170.3, 170.6,
175.1, 156.5, 160.3, 170.8, 156.5, 165.2, 169.8, 171.2, 160.4,
163.8, 169.6, 172.7, 162.4, 166.8, 157.1, 181.1, 158.4, 165.6,
166.7, 156.5, 168.1, 165.3, 163.7, 173.7, 163.9, 169.2, 170.1,
166, 164.2, 176, 170.9, 169.2, 172, 163, 154.5, 172.5, 175.6,
167.2, 164, 162.1, 161.6, 153.6, 177.5, 169.8, 173.5, 166.8,
166.2, 162.8, 168.6, 169.2), LG18 = c(34.6, 33.8, 35.1, 39.3,
36.3, 31.8, 35, 31.2, 36.2, 42.9, 36.5, 32.6, 37.5, 37.7,
34.5, 37.2, 33.2, 32.4, 40.1, 38.2, 35.1, 35.6, 34.6, 32.6,
32.9, 38.5, 38.6, 33, 36.3, 33.2, 38.8, 36.2, 36.5, 34.3,
37.8, 34.2, 32.4, 33.8, 32.7, 33.5, 34.1, 39.8, 38.6, 34.2,
34.6, 36.7, 32.8, 34.9, 30.3, 35.8, 38.8, 39, 35.7, 32.7,
32.2, 35.7, 34, 35.5, 34.8, 34.1, 33, 33.4, 34.3, 34.5, 34.5,
33.6, 36.2, 42.5, 38.4, 37.9), ST18 = c(143L, 117L, 143L,
148L, 152L, 126L, 134L, 77L, 118L, 135L, 118L, 96L, 131L,
108L, 99L, 105L, 122L, 146L, 126L, 124L, 100L, 118L, 123L,
131L, 101L, 121L, 182L, 116L, 127L, 130L, 107L, 134L, 120L,
130L, 101L, 149L, 112L, 136L, 118L, 110L, 104L, 138L, 108L,
134L, 108L, 141L, 122L, 125L, 128L, 168L, 126L, 142L, 132L,
116L, 112L, 137L, 114L, 122L, 121L, 148L, 107L, 140L, 125L,
124L, 123L, 89L, 135L, 125L, 142L, 142L), Soma = c(5, 4,
5.5, 5.5, 4.5, 3, 5, 4, 5, 5.5, 5, 5, 5.5, 4, 5, 6.5, 4.5,
3.5, 5.5, 4.5, 6, 5, 4.5, 4, 4, 5, 4.5, 4.5, 5, 5, 6, 4.5,
5, 5, 5, 4, 5, 4, 4.5, 4, 4.5, 6.5, 5.5, 3.5, 4, 5, 4.5,
4, 4, 5, 5, 5, 5.5, 5.5, 4, 4, 3, 4.5, 5, 4, 6.5, 5, 3.5,
5.5, 5, 4, 4.5, 7, 5.5, 5.5)), .Names = c("X", "Sex", "WT2",
"HT2", "WT9", "HT9", "LG9", "ST9", "WT18", "HT18", "LG18", "ST18",
"Soma"), row.names = 67:136, class = "data.frame")
my command:
library(tidyr)
library(ggplot2)
newdata.girls %>%
# put WT2, WT9, WT18 in the weight column
# and the weights in the value column
gather(weight, value, -Soma) %>%
# make WT2, WT9, WT18 factors and order them so as
# they plot in the correct order
mutate(weight = factor(weight, levels = c("WT2", "WT9", "WT18"))) %>%
# plot Soma versus value by time
ggplot(aes(Soma, value)) + geom_point() + facet_grid(. ~ weight)
It gives out a column of NA.
result

It's not entirely clear how you would like the output to look, or if Soma is continuous or categorical. But taking your sentence "Soma against weight at each of the three time points" as a start point, an initial attempt could look like this. Assume your data frame is named df1:
library(tidyr)
library(dplyr)
library(ggplot2)
df1 %>%
# put WT2, WT9, WT18 in the weight column
# and the weights in the value column
gather(weight, value, -Soma) %>%
# make WT2, WT9, WT18 factors and order them so as
# they plot in the correct order
mutate(weight = factor(weight, levels = c("WT2", "WT9", "WT18"))) %>%
# plot Soma versus value by time
ggplot(aes(Soma, value)) + geom_point() + facet_grid(. ~ weight) + theme_light()
Result:

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

sampling based on frequency in R - r

Related

Adding sample size to ggplot boxplot

I've an error when analyze "Simple slope" with R function

Randomly sampling and assigning a variable using dplyr

Date format in hover for ggplot2 and plotly

How to plot one variable against each of the three time points

Categories

Resources