Related
I wrote the following code in R
library(fda)
n_curves <- 15951
n_points <- 2537
argvals <- matrix(df_l$Time, nrow = n_points, ncol = n_curves)
y_mat <- matrix(df_l$Curve, nrow = n_points, ncol = n_curves)
W.obj <- Data2fd(argvals = argvals, y = y_mat, basisobj = basis, lambda = 0.5)
But I'm getting an error
Error in if ((a01[1] <= arng[1]) && (arng[2] <= a01[2])) { :
missing value where TRUE/FALSE needed
What does it mean, and how do I prevent it?
I'm using a repeated measures data, and I`m trying to do functional data analysis.My data has a lot of missing values(NA). I'm thinking that NA is probably the cause of something.
data:
> dput(head(df_l, 30))
structure(list(Time = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30), Curve = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 5, 10, 10, 10, 10, 8, 8, 8, 8,
8, 8)), row.names = c(NA, 30L), class = "data.frame")
> dput(head(basis, 5))
list(call = basisfd(type = type, rangeval = rangeval, nbasis = nbasis,
params = params, dropind = dropind, quadvals = quadvals,
values = values, basisvalues = basisvalues), type = "bspline",
rangeval = c(0, 2537), nbasis = 53, params = c(50.74, 101.48,
152.22, 202.96, 253.7, 304.44, 355.18, 405.92, 456.66, 507.4,
558.14, 608.88, 659.62, 710.36, 761.1, 811.84, 862.58, 913.32,
964.06, 1014.8, 1065.54, 1116.28, 1167.02, 1217.76, 1268.5,
1319.24, 1369.98, 1420.72, 1471.46, 1522.2, 1572.94, 1623.68,
1674.42, 1725.16, 1775.9, 1826.64, 1877.38, 1928.12, 1978.86,
2029.6, 2080.34, 2131.08, 2181.82, 2232.56, 2283.3, 2334.04,
2384.78, 2435.52, 2486.26))
I am working with ggeffects package
I have the following syntax
data_example <- structure(list(paciente = structure(c(6171, 6488, 6300, 6446,
6489, 6445, 6473, 6351, 6212, 6387), label = "Paciente", format.spss = "F6.0"),
edad_s1 = structure(c(69, 62, 60, 71, 67, 59, 63, 66, 67,
70), label = "Edad", format.spss = "F3.0"), sexo_s1 = structure(c(1L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L), .Label = c("Hombre",
"Mujer"), label = "Sexo", class = "factor"), grupo_int_v00 = structure(c(1L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L), .Label = c("A", "B"), label = "Grupo de intervención", class = "factor"),
time = c(0, 0, 0, 2, 2, 2, 1, 2, 1, 1), peso1 = c(89.9, 62,
91.5, 75.2, 68.2, 88.4, 93.6, 79, 88.3, 84.4), cintura1 = c(113,
90, 112, NA, 87.5, 116, 98.5, 104, 112.5, 108.5), tasis2_e = c(132,
132, 149, NA, 145, 137, 129, 152, 146, 129), tadias2_e = c(81,
58, 79, NA, 80, 60, 79, 87, 79, 68), p17_total = c(7, 9,
10, 10, 10, 10, 10, 7, 10, 11), geaf_tot = c(3412.59, 3524.48,
559.44, 5454.55, 4293.71, 839.16, 3146.85, 7552.45, 4335.66,
566.9), glucosa = c(102, 97, 89, NA, 88, 168, 104, NA, 114,
121), albumi = c(4.94, 4.68, 4.75, NA, 4.34, 5.06, 4.56,
NA, 5.06, 3.96), coltot = c(232, 253, 215, NA, 202, 287,
255, NA, 217, 147), hdl = c(59, 64, 68, NA, 71, 46, 61, NA,
40, 42), ldl_calc = c(143, 150, 127, NA, 114, NA, 170, NA,
143, 86), trigli = c(152, 195, 99, NA, 85, 378, 121, NA,
170, 93), hba1c = c(5.61, 5.66, 5.43, NA, 5.38, 8.14, 5.81,
NA, 6, 6.38), i_hucpeptide = c(988.91, 673.5, 1036.03, NA,
734.29, 1266.3, 610.9, NA, 1144.8, 672.08), i_hughrelin = c(1133.35,
1230.06, 1109.98, NA, 1064.79, 725.35, 1437.85, NA, 866.07,
822.83), i_hugip = c(2.67, 2.67, 2.67, NA, 2.67, 2.67, 2.67,
NA, 2.67, 2.67), i_huglp1 = c(145.43, 138.32, 194.14, NA,
99.37, 166.27, 218.33, NA, 184.04, 222.84), i_huglucagon = c(513.89,
357.35, 624.73, NA, 464.85, 448.49, 304.29, NA, 310.61, 426.52
), i_huinsulin = c(234.23, 229.06, 358.86, NA, 175.38, 466,
99.02, NA, 367.95, 77.33), i_huleptin = c(7898.28, 5211.27,
14670.25, NA, 7161.39, 3218.49, 2659.8, NA, 3766.01, 1207.58
), i_hupai1 = c(3468.4, 1977.9, 4101.1, NA, 1613.4, 2847.27,
2442.49, NA, 1953.26, 1752.88), i_huresistin = c(4783.28,
2676.05, 3064.57, NA, 2165.52, 3878.48, 8343.46, NA, 2822.68,
6496.73), i_huvisfatin = c(831.6, 649.45, 2270.65, NA, 1578.88,
9.63, 185.09, NA, 162.8, 8.64), col_rema = c(30, 39, 20,
NA, 17, NA, 24, NA, 34, 19), homa = c(1061.843, 987.503,
1419.491, NA, 685.931, 3479.467, 457.692, NA, 1864.28, 415.864
), i_pcr = c(0.05, NA, 0.27, NA, 0.03, 0.23, 0.04, NA, 0.09,
0.09), d_homa = c(NA, NA, NA, NA, -2.629, 33.042, -181.211,
NA, -929.683, -89.108), d_hughrelin = c(NA, NA, NA, NA, -213.59,
48.43, 95.27, NA, -228.62, -146.8), d_huinsulin = c(NA, NA,
NA, NA, 3.24, -68.79, -43.31, NA, -147.33, -7.46), d_hucpeptide = c(NA,
NA, NA, NA, 192.39, -263.54, -71.56, NA, -437.38, -215.44
), d_huglucagon = c(NA, NA, NA, NA, 38.99, -112.45, -10.75,
NA, -133.55, -259.73), d_huleptin = c(NA, NA, NA, NA, 409.76,
-1081.5, -1778.69, NA, -353.91, -679.7), d_huresistin = c(NA,
NA, NA, NA, 391.02, -155.41, -436.47, NA, -1137.79, -922.75
), d_huvisfatin = c(NA, NA, NA, NA, 457.54, -260.79, -341.02,
NA, -426.89, 0), d_glucosa = c(NA, NA, NA, NA, -2, 23, 3,
NA, -8, -13), d_coltot = c(NA, NA, NA, NA, -52, 36, -11,
NA, 15, -12), d_hdl = c(NA, NA, NA, NA, 1, 3, -1, NA, 1,
4), d_ldl_calc = c(NA, NA, NA, NA, -50, NA, -10, NA, 12,
-15), d_col_rema = c(NA, NA, NA, NA, -3, NA, 0, NA, 2, -1
), d_trigli = c(NA, NA, NA, NA, -14, 132, -1, NA, 8, -5),
d_hba1c = c(NA, NA, NA, NA, -0.11, -0.04, -0.18, NA, -1.76,
-0.67), d_tasis2_e = c(NA, NA, NA, NA, 0, 6, -1, 7, -21,
-9), d_tadias2_e = c(NA, NA, NA, NA, 0, 2, -8, 8, -10, -17
), d_peso1 = c(NA, NA, NA, -6, -2.3, 0.2, -11.4, 0.8, -4.1,
-9.3), d_cintura1 = c(NA, NA, NA, NA, -2.5, -4, -12.5, 6,
-3.5, -4.5), d_geaf_tot = c(NA, NA, NA, 699.31, 2055.95,
-2181.82, 1748.25, 3776.23, 867.13, -6593.94), d_p17_total = c(NA,
NA, NA, 1, 4, 5, 4, -5, 5, 2), d_hupai1 = c(NA, NA, NA, NA,
-185.03, 204.77, 202.01, NA, -1551.91, 57.2), d_hugip = c(NA,
NA, NA, NA, 0, 0, 0, NA, 0, 0), d_huglp1 = c(NA, NA, NA,
NA, -42.07, -163.02, 107.28, NA, -95.82, -87.5), d_pcr = c(NA,
NA, NA, NA, NA, NA, NA, NA, -0.18, -0.22), ln_trigli = c(5.024,
5.273, 4.595, NA, 4.443, 5.935, 4.796, NA, 5.136, 4.533),
ln_homa = c(6.968, 6.895, 7.258, NA, 6.531, 8.155, 6.126,
NA, 7.531, 6.03), ln_hba1c = c(1.725, 1.733, 1.692, NA, 1.683,
2.097, 1.76, NA, 1.792, 1.853), ln_geaf_tot = c(8.135, 8.167,
6.327, 8.604, 8.365, 6.732, 8.054, 8.93, 8.375, 6.34), i_ratiolg = c(6.969,
4.237, 13.217, NA, 6.726, 4.437, 1.85, NA, 4.348, 1.468)), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
The mixed model I have created following the syntax
lme_peso <- lme(peso1 ~ sexo_s1 + edad_s1 + poly(time, 2)*grupo_int_v00 + p17_total,
random = ~ poly(time, 2)|paciente, control=lmeControl(opt="optim"),
data = dat_longer, subset = !is.na(peso1), na.action = na.omit)
And then to plot it
ggpredict(lme_peso, c("time [all]", "grupo_int_v00"), type="fixed") %>%
ggplot(aes(x = x, y = predicted, colour = group)) +
geom_point() +
geom_line() +
stat_smooth(method = "loess",se = T) +
labs(x = "time (months)", y = "Weight (kg)") +
scale_color_manual(labels = c("Control", "Intervention"), values = c("orange", "green")) +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = F),alpha = 1/5) +
scale_x_continuous(breaks = 0:2, labels = c(0, 6, 12))
When I supress the arguments of fill in geom_ribbon the fill stays black. But I don't know how to manage to keep just one legend with 2 groups (Control and Intervention). I have the extra-added legend (with F in this case)
Thanks in advance
I couldn't run your code, but I rebuilt it with iris.
Like Matt suggested, one thing would be, remove fill=F:
ggplot(data=iris, aes(x = SepalLength , y = PetalLength, group=Name)) +
geom_point() +
geom_line() +
stat_smooth(method = "loess",se = T, aes(color=Name)) +
geom_ribbon(aes(ymin = 1, ymax = 3),alpha = 1/5) +
scale_x_continuous(breaks = 0:2, labels = c(0, 6, 12))
Or if you need it for some reason, use guides(fill="none"):
ggplot(data=iris, aes(x = SepalLength , y = PetalLength, group=Name)) +
geom_point() +
geom_line() +
stat_smooth(method = "loess",se = T, aes(color=Name)) +
geom_ribbon(aes(ymin = 1, ymax = 3, fill=FALSE),alpha = 1/5) +
scale_x_continuous(breaks = 0:2, labels = c(0, 6, 12)) +
guides(fill="none")
Output:
I'm using the DT package and I'd like to show two tables on separate panes with one dataset.
Ideally, I'd like something like facet wrap that would let me make tables based on the plan id. I'd like to have one table that has all the values for the personal plan and another table that has all the values for the team plan. I can go the long way around and make two separate datasets, but I'm hoping there's something that I can do that might be more efficient
This is my data
structure(list(first_month = structure(c(17532, 17532, 17563,
17563, 17591, 17591, 17622, 17622, 17652, 17652), class = "Date"),
plan_id = c("personal", "team", "personal", "team", "personal",
"team", "personal", "team", "personal", "team"), new_customers = c(16,
32, 27, 33, 19, 41, 36, 46, 48, 46), `1` = c(16, 32, 27,
33, 19, 41, 36, 46, 48, 46), `2` = c(13, 29, 24, 30, 15,
37, 31, 40, 43, 38), `3` = c(13, 26, 22, 28, 14, 31, 30,
40, 36, 35), `4` = c(10, 20, 22, 22, 12, 29, 27, 39, 32,
33), `5` = c(10, 18, 20, 20, 11, 25, 22, 36, 27, 27), `6` = c(10,
16, 16, 20, 9, 24, 19, 34, 24, 25), `7` = c(10, 12, 13, 18,
7, 24, 16, 32, 21, 23), `8` = c(8, 10, 10, 14, 7, 21, 16,
30, 19, 21), `9` = c(7, 8, 7, 12, 7, 18, 16, 25, NA, NA),
`10` = c(7, 7, 5, 11, 6, 14, NA, NA, NA, NA), `11` = c(5,
6, 5, 10, NA, NA, NA, NA, NA, NA), `12` = c(5, 6, NA, NA,
NA, NA, NA, NA, NA, NA)), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"))
This is my code
datatable(monthly_new_customer_cohorts_formatted_as_cohort_analysis_customer_counts,
class = 'cell-border stripe',
rownames = FALSE,
options = list(
ordering=F,
dom = 't',
pageLength = 1000))
Might not be the slickest answer, but hopefully this helps. You can use group_by, nest, mutate and map2 to create separate data-sets based on plan_id, then create a separate DT widget from each.
library(dplyr)
library(tidyr)
library(purrr)
# Separate data for each plan_id value:
widgets <- monthly_new_customer_cohorts_formatted_as_cohort_analysis_customer_counts %>%
group_by(plan_id) %>% nest()
# For each data subset, create a separate DT widget:
widgets <- widgets %>% mutate(dt_widget = map2(.y = plan_id, .x = data, .f = function(x,y){
datatable(x,class = 'cell-border stripe',caption = y,
rownames = FALSE,options = list(
ordering=F,
dom = 't',
pageLength = 1000),
height = "100%",width = "100%")}))
Each element of widgets$dt_widget is now a separate datatable widget. Trick now is to have them all in the same viewer:
library(htmltools)
browsable(x =
tagList(lapply(
widgets$dt_widget,function(x) tags$div(x)))
)
Unfortunately I don't think there is a function to give you the nice grid ala ggplot2::facet_wrap (e.g. a 3x3 grid for 9 facets) but you can manually do this by adjusting the style values in the div elements. For example, this allows the tables to go on the same row by reducing the width and adjusting the float:
browsable(
tagList(list(
tags$div(
widgets$dt_widget[[1]],
style = 'width:49%;display:block;float:left;'
),
tags$div(
widgets$dt_widget[[2]],
style = 'width:49%;display:block;float:right;')
)
))
This is small example of my data set. This set contains weekly data about 52 weeks. You can see data with code below:
# CODE
#Data
library(tidyverse)
library(plotly)
ARTIFICIALDATA<-dput(structure(list(week = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52), `2019 Series_1` = c(534.771929824561,
350.385964912281, 644.736842105263, 366.561403508772, 455.649122807018,
533.614035087719, 829.964912280702, 466.035087719298, 304.421052631579,
549.473684210526, 649.719298245614, 537.964912280702, 484.982456140351,
785.929824561404, 576.736842105263, 685.508771929824, 514.842105263158,
464.491228070175, 608.245614035088, 756.701754385965, 431.859649122807,
524.315789473684, 739.40350877193, 604.736842105263, 669.684210526316,
570.491228070175, 641.649122807018, 649.298245614035, 664.210526315789,
530.385964912281, 754.315789473684, 646.80701754386, 764.070175438596,
421.333333333333, 470.842105263158, 774.245614035088, 752.842105263158,
575.368421052632, 538.315789473684, 735.578947368421, 522, 862.561403508772,
496.526315789474, 710.631578947368, 584.456140350877, 843.19298245614,
563.473684210526, 568.456140350877, 625.368421052632, 768.912280701754,
679.824561403509, 642.526315789474), `2020 Series_1` = c(294.350877192983,
239.824561403509, 709.614035087719, 569.824561403509, 489.438596491228,
561.964912280702, 808.456140350877, 545.157894736842, 589.649122807018,
500.877192982456, 584.421052631579, 524.771929824561, 367.438596491228,
275.228070175439, 166.736842105263, 58.2456140350878, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA)), row.names = c(NA, -52L), class = c("tbl_df", "tbl",
"data.frame")))
colnames(ARTIFICIALDATA) <- c('week', 'series1', 'series2')
So the next step is to plot this data with r-plotly package. I want to have a plot like the example below. Because this is weekly data, first series1 have 52 observations while series2 has 16 observation (series1 is mean data for 2019 and series2 data for 2020). So for that reason, the comparison must be only on 16 observation (all observations which don't have NA) like the example below:
So can anybody help how to plot this graph with plotly?
Try this:
colnames(ARTIFICIALDATA) <- c("week", "series1", "series2")
ARTIFICIALDATA %>%
# Drop rows with NA
drop_na() %>%
# Convert to long format
pivot_longer(-week, names_to = "series") %>%
# Set the labels for the plot. If you want other lables simply adjust
mutate(label = case_when(
series == "series1" ~ "2019 Series_1",
series == "series2" ~ "2020 Series_1")) %>%
# Compute sum by sereis
group_by(label) %>%
summarise(sum = sum(value, na.rm = TRUE)) %>%
ungroup() %>%
# Plot
plot_ly(x = ~label, y = ~sum) %>%
add_bars() %>%
# Remove title for xaxis. But can you can label it as you like
layout(xaxis = list(title = ""))
This is small example of my data set.This set contain weekly data about 52 weeks.You can see data with code below:
# CODE
#Data
ARTIFICIALDATA<-dput(structure(list(week = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52), `2019 Series_1` = c(534.771929824561,
350.385964912281, 644.736842105263, 366.561403508772, 455.649122807018,
533.614035087719, 829.964912280702, 466.035087719298, 304.421052631579,
549.473684210526, 649.719298245614, 537.964912280702, 484.982456140351,
785.929824561404, 576.736842105263, 685.508771929824, 514.842105263158,
464.491228070175, 608.245614035088, 756.701754385965, 431.859649122807,
524.315789473684, 739.40350877193, 604.736842105263, 669.684210526316,
570.491228070175, 641.649122807018, 649.298245614035, 664.210526315789,
530.385964912281, 754.315789473684, 646.80701754386, 764.070175438596,
421.333333333333, 470.842105263158, 774.245614035088, 752.842105263158,
575.368421052632, 538.315789473684, 735.578947368421, 522, 862.561403508772,
496.526315789474, 710.631578947368, 584.456140350877, 843.19298245614,
563.473684210526, 568.456140350877, 625.368421052632, 768.912280701754,
679.824561403509, 642.526315789474), `2020 Series_1` = c(294.350877192983,
239.824561403509, 709.614035087719, 569.824561403509, 489.438596491228,
561.964912280702, 808.456140350877, 545.157894736842, 589.649122807018,
500.877192982456, 584.421052631579, 524.771929824561, 367.438596491228,
275.228070175439, 166.736842105263, 58.2456140350878, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA)), row.names = c(NA, -52L), class = c("tbl_df", "tbl",
"data.frame")))
So next steep is plot this data with ggplot2.So you can see my plot below
library(ggplot2)
p <- ggplot() +
geom_line(data = ARTIFICIALDATA, aes(x = week, y = `2019 Series_1`), color = "black") +
geom_line(data = ARTIFICIALDATA, aes(x = week, y = `2020 Series_1`), color = "red",size=1,linetype=2) +
xlab('Weeks') +
ylab('US dolars')+
theme(legend.position="top")
p
So this is how look like my plot, but here missing two things.First is legend (for 2019 Series_1 and 2020 Series_1) and x axis need to show values for all 52 weeks.So can anybody help me how to resolve this problem?
It would be simple to plot this if you have data in long format. Also you might control what to show on x-axis using scale_x_continuous by adding custom breaks.
library(ggplot2)
ARTIFICIALDATA %>%
tidyr::pivot_longer(cols = -week, names_to = 'Series') %>%
ggplot() + aes(x = week, y = value, color = Series) + geom_line() +
xlab('Weeks') +
ylab('US dolars') +
scale_x_continuous(breaks = c(seq(10, 40, 10), 52)) +
theme(legend.position="top")
Here the answer:
library(tidyverse)
ARTIFICIALDATA_rec <- ARTIFICIALDATA %>%
gather(key = Year_indicator, value = time_series_value, -1)
your_plot <- ggplot(data = ARTIFICIALDATA_rec, aes(x = week, y = time_series_value, group = Year_indicator)) +
geom_line(aes(color = Year_indicator)) +
scale_x_continuous(name = "Week of the year", limits=c(0, 52), breaks=seq(0,52,2))
and this is the plot: