Data manipulation for plotting in r, getting NULLs - r

I once used the code below to sort my data for a dumbbell plot, I tried reusing the code for different data but I am getting an empty output
data10 <- structure(list(Trial_type = c(1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1,
1), Trial_type2 = c(1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1), GROUP = c("LLL",
"LLL", "LLL", "LRL", "LRL", "LRL", "RLR", "RLR", "RLR", "RRR",
"RRR", "RRR"), conditon2 = c("CEN_LLL", "IPS_LLL", "CTL_LLL",
"CEN_LRL", "IPS_LRL", "CTL_LRL", "CEN_RLR", "IPS_RLR", "CTL_RLR",
"CEN_RRR", "IPS_RRR", "CTL_RRR"), condition20 = c(1, 2, 3, 1,
2, 3, 1, 2, 3, 1, 2, 3), Training = c("left", "left", "left",
"right", "right", "right", "left", "left", "left", "right", "right",
"right"), AveResultantVel_102 = c(2.150005313, 1.854148813, 1.647962313,
2.35681725, 2.067673063, 1.10213475, 2.364870813, 2.027195438,
1.61692725, 2.111901813, 2.026179, 1.595148125), AveResultantVel_104 = c(2.37879375,
2.127869563, 1.903676063, 2.932732875, 2.230088313, 1.311275125,
2.69564575, 2.473001938, 1.926669438, 2.54519575, 2.201091438,
1.902556875)), row.names = c(NA, -12L), class = c("tbl_df", "tbl",
"data.frame"))
library(tidyverse)
data10A <- data10 %>% select(conditon2,Trial_type,AveResultantVel_102,AveResultantVel_104) %>% mutate("key"="Change in resultant velocity (cm/s)")
data10A$Trial_type <- factor(data10A$Trial_type, levels = 1:2, labels = c("Retention", "Transfer"))
i1 <- grepl("_RR", levels(data10A$conditon2))
i2 <- grepl("_RL", levels(data10$conditon2))
i3 <- grepl("_LL", levels(data10A$conditon2))
RRR_levels <- levels(data10A$conditon2)[i1]
RLR_levels <- levels(data10A$conditon2)[i2]
LLL_levels <- levels(data10A$conditon2)[i3]
LRL_levels <- levels(data10A$conditon2)[!i1 & !i2 & !i3]
ord_levels <- c(LLL_levels, RRR_levels, RLR_levels, LRL_levels)
data10A$conditon2 <- factor(data10A$conditon2, levels = ord_levels)

condition2 is of character type so it does not have levels. Change it to factor.
data10A$conditon2 <- factor(data10A$conditon2)
An alternative would be to use unique(data10A$conditon2) instead of levels(data10A$conditon2) which will work for both character and factor data.
Complete code -
library(dplyr)
data10A <- data10 %>%
select(conditon2,Trial_type,AveResultantVel_102,AveResultantVel_104) %>%
mutate("key"="Change in resultant velocity (cm/s)")
data10A$Trial_type <- factor(data10A$Trial_type, levels = 1:2, labels = c("Retention", "Transfer"))
data10A$conditon2 <- factor(data10A$conditon2)
i1 <- grepl("_RR", levels(data10A$conditon2))
i2 <- grepl("_RL", levels(data10A$conditon2))
i3 <- grepl("_LL", levels(data10A$conditon2))
RRR_levels <- levels(data10A$conditon2)[i1]
RLR_levels <- levels(data10A$conditon2)[i2]
LLL_levels <- levels(data10A$conditon2)[i3]
LRL_levels <- levels(data10A$conditon2)[!i1 & !i2 & !i3]
ord_levels <- c(LLL_levels, RRR_levels, RLR_levels, LRL_levels)
data10A$conditon2 <- factor(data10A$conditon2, levels = ord_levels)

Related

Using dplyr to create a ICCs table

I am trying to create a table with ICCs for multiple raters and multiple variables, I am trying to use a function and dplyr, but it is not working as I expected.
This is the structure of the data frame and the expected ICCs table:
# Create data frame
ID <- c("r1", "r1", "r1", "r1", "r1", "r2", "r2", "r2", "r2", "r2", "r3", "r3", "r3", "r3", "r3")
V1.1 <- c(3, 3, 3, 3, 3, 3, 2, 3, 3, 1, 2, 2, 1, 1, 2)
V2.1 <- c(1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 2, 1, 1, 3)
V3.1 <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
V4.1 <- c(2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2)
V1.2 <- c(3, 3, 3, 3, 3, 3, 2, 3, 2, 2, 3, 2, 1, 2, 1)
V2.2 <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2)
V3.2 <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
V4.2 <- c(2, 4, 2, 1, 3, 2, 1, 3, 2, 2, 3, 2, 1, 2, 1)
df <- data.frame(ID, V1.1, V2.1, V3.1, V4.1, V1.2, V2.2, V3.2, V4.2)
# Empty data frame for ICCs
ids <- c("r1", "r2", "r3")
vars <- c("V1", "V2", "V3", "V4")
icc_table <- data.frame(ID = ids)
icc_table <- cbind(icc_table, matrix(NA, nrow = length(ids), ncol = length(vars)))
names(icc_table)[2:ncol(icc_table)] <- vars
Here is the attempt to create the ICCs table with a function and dplyr:
# ICC function
icc.fun <- function(data, x1, x2){
result <- irr::icc(subset(data, select = c(x1, x2)),
model = "twoway",
type = "agreement",
unit = "single")
result$value
}
# Table attempt
icc_table <- df %>%
pivot_longer(cols = -ID, names_to = c("criteria", ".value"), names_pattern = "(V\\d)\\.(\\d)") %>%
group_by(ID, criteria) %>%
rename("val1" = `1`, "val2" = `2`) %>%
summarise(icc = icc.fun(df, val1, val2), .groups = "drop") %>%
pivot_wider(id_cols = ID, names_from = criteria, values_from = icc)
However, it is not working and it returns a table with a lot of NAs. When I tried the function it seems to be working fine, so I guess it is a problem with the dplyr code. If you have any other solution apart from dplyr it is also welcomed!
Thanks!
I think the issue is between the subset() in your icc.fun and summarise(), try:
# ICC function
icc.fun <- function(x1, x2){
result <- irr::icc(data.frame(x1, x2)),
model = "twoway",
type = "agreement",
unit = "single")
result$value
}
# Table attempt
icc_table <- df %>%
pivot_longer(cols = -ID, names_to = c("criteria", ".value"), names_pattern = "(V\\d)\\.(\\d)") %>%
group_by(ID, criteria) %>%
rename("val1" = `1`, "val2" = `2`) %>%
summarise(icc = icc.fun(val1, val2), .groups = "drop") %>%
pivot_wider(id_cols = ID, names_from = criteria, values_from = icc)
In case it is useful for someone, here is the solution that I found:
I simplified the function by subsetting the data using R base
# ICC function
icc.fun <- function(data, x1, x2){
result <- icc(data[ ,c(x1, x2)],
model = "twoway",
type = "agreement",
unit = "single")
result$value
}
I used the group_modify() instead of summarise(), plus enframe()
# Create ICC table
icc_table <- df %>%
pivot_longer(cols = -ID, names_to = c("criteria", ".value"), names_pattern = "(V\\d)\\.(\\d)") %>%
group_by(ID, criteria) %>%
rename("val1" = `1`, "val2" = `2`) %>%
group_modify(~ {
icc.fun(.x, "val1", "val2") %>%
tibble::enframe(name = "variable", value = "icc")
}) %>%
pivot_wider(id_cols = ID, names_from = criteria, values_from = icc)

What color does plot.xts use?

Does anybody know what colours plot.xts uses? I can't find anything on the help page.
I would like to use the same colours in my legend.
Or is there another way to get the same plot with addLegend()?
Here the code I am using:
library(xts)
library(PerformanceAnalytics)
library(TTR)
xts1 <- xts(matrix(rnorm(300), ncol = 3), order.by = as.Date(1:100))
xts2 <- xts(matrix(rnorm(300), ncol = 3), order.by = as.Date(1:100))
colnames(xts1) <- c("A", "B", "C")
colnames(xts2) <- c("A", "B", "C")
plot_chart <- function(x) {
ff <- tempfile()
png(filename = ff)
chart.CumReturns(x)
dev.off()
unlink(ff)
}
m <- matrix(c(1, 2, 3, 3), nrow = 2, ncol = 2, byrow = TRUE)
layout(mat = m, heights = c(0.8, 0.1))
par(mar = c(2, 2, 1, 1))
plot_chart(xts1)
addSeries(reclass(apply(xts1, 2, runSD), xts1))
par(mar = c(2, 2, 1, 1))
plot_chart(xts2)
addSeries(reclass(apply(xts2, 2, runSD), xts2))
par(mar=c(0, 0, 1, 0))
plot(1, type = "n", axes = FALSE, xlab = "", ylab = "")
# which colors do I have to insert in here?
plot_colors <- c("blue", "green", "pink")
legend(x = "top", inset = 0,
legend = colnames(xts1),
col = plot_colors, lwd = 7, cex = .7, horiz = TRUE)
Answer
Use the colorset argument of chart.CumReturns:
plot_chart <- function(x, col) {
ff <- tempfile()
png(filename = ff)
chart.CumReturns(x, colorset = col)
dev.off()
unlink(ff)
}
par(mar = c(2, 2, 1, 1))
plot_chart(xts1, col = plot_colors)
addSeries(reclass(apply(xts1, 2, runSD), xts1))
par(mar = c(2, 2, 1, 1))
plot_chart(xts2, col = plot_colors)
addSeries(reclass(apply(xts2, 2, runSD), xts2))

R: non-numeric arguments to binary operators

I am working with the R programming language. I am trying to make a "parallel coordinates plot" using some fake data:
library(MASS)
a = rnorm(100, 10, 10)
b = rnorm(100, 10, 5)
c = rnorm(100, 5, 10)
d = matrix(a, b, c)
parcoord(d[, c(3, 1, 2)], col = 1 + (0:149) %/% 50)
However, a problem arises when I try to mix numeric and factor variables together:
group <- sample( LETTERS[1:4], 100, replace=TRUE, prob=c(0.25, 0.25, 0.25, 0.25) )
d = matrix(a,b, group)
parcoord(d[, c(3, 1, 2)], col = 1 + (0:149) %/% 50)
Error in x - min(x, na.rm = TRUE): non-numeric argument to binary operator
I am just curious. Can this problem be resolved? Or is it simply impossible to make such a plot using numeric and factor variables together?
I saw a previous stackoverflow post over here where a similar plot is made using numeric and factor variables: How to plot parallel coordinates with multiple categorical variables in R
However, I am using a computer with no USB port or internet access - I have a pre-installed version of R with limited libraries (I have plotly, ggplot2, dplyr, MASS ... I don't have ggally or tidyverse) and was looking for a way to do this only with the parcoord() function.
Does anyone have any ideas if this can be done?
Thanks
Thanks
One option is to label rows of the matrix using a factor and use that on the plot, e.g.
library(MASS)
set.seed(300)
par(xpd=TRUE)
par(mar=c(4, 4, 4, 6))
a = rnorm(12, 10, 10)
b = rnorm(12, 10, 5)
c = rnorm(12, 5, 10)
group <- sample(c("#FF9289", "#FF8AFF", "#00DB98", "#00CBFF"),
12, replace=TRUE)
d = cbind(a, b, c)
rownames(d) <- group
parcoord(d[, c(3, 1, 2)], col = group)
title(main = "Plot", xlab = "Variable", ylab = "Values")
axis(side = 2, at = seq(0, 1, 0.1),
tick = TRUE, las = 1)
legend(3.05, 1, legend = c("A", "B", "C", "D"), lty = 1,
col = c("#FF9289", "#FF8AFF", "#00DB98", "#00CBFF"))
EDIT
Thanks for the additional explanation. What you want does make sense, but unfortunately it doesn't look like it will work as I expected. I tried to make a plot using an ordered factor as the middle variable (per https://pasteboard.co/JKK4AUD.jpg) but got the same error ("non-numeric argument to binary operator").
One way I thought of doing it is to recode the factor as a number (e.g. "Var_1" -> 0.2, "Var_2" -> 0.4) as below:
library(MASS)
set.seed(123)
par(xpd=TRUE)
par(mar=c(4, 4, 4, 6))
a = rnorm(12, 10, 10)
b = c(rep("Var_1", 3),
rep("Var_2", 3),
rep("Var_3", 3),
rep("Var_4", 3))
c = rnorm(12, 5, 10)
group <- c(rep("#FF9289", 3),
rep("#FF8AFF", 3),
rep("#00DB98", 3),
rep("#00CBFF", 3))
d = data.frame("A" = a,
"Factor" = b,
"C" = c,
"Group" = group)
d$Factor <- sapply(d$Factor, switch,
"Var_1" = 0.8,
"Var_2" = 0.6,
"Var_3" = 0.4,
"Var_4" = 0.2)
parcoord(d[, c(1, 2, 3)], col = group)
title(main = "Plot", xlab = "Variable", ylab = "Values")
axis(side = 2, at = seq(0, 1, 0.1),
tick = TRUE, las = 1)
legend(3.05, 1, legend = c("A", "B", "C", "D"), lty = 1,
col = c("#FF9289", "#FF8AFF", "#00DB98", "#00CBFF"))
mtext(text = "Var 1", side = 1, adj = 0.6, padj = -30)
mtext(text = "Var 3", side = 1, adj = 0.6, padj = -12)
mtext(text = "Var 2", side = 1, adj = 0.6, padj = -21)
mtext(text = "Var 4", side = 1, adj = 0.6, padj = -3)

Adding rows to make a full long dataset for longitudinal data analysis

I am working with a long-format longitudinal dataset where each person has 1, 2 or 3 time points. In order to perform certain analyses I need to make sure that each person has the same number of rows even if it consists of NAs because they did not complete the certain time point.
Here is a sample of the data before adding the rows:
structure(list(Values = c(23, 24, 45, 12, 34, 23), P_ID = c(1,
1, 2, 2, 2, 3), Event_code = c(1, 2, 1, 2, 3, 1), Site_code = c(1,
1, 3, 3, 3, 1)), class = "data.frame", row.names = c(NA, -6L))
This is the data I aim to get after adding the relevant rows:
structure(list(Values = c(23, 24, NA, 45, 12, 34, 23, NA, NA),
P_ID = c(1, 1, 1, 2, 2, 2, 3, 3, 3), Event_code = c(1, 2,
3, 1, 2, 3, 1, 2, 3), Site_code = c(1, 1, 1, 3, 3, 3, 1,
1, 1)), class = "data.frame", row.names = c(NA, -9L))
I want to come up with code that would automatically add rows to the dataset conditionally on whether the participant has had 1, 2 or 3 visits. Ideally it would make rest of data all NAs while copying Participant_ID and site_code but if not possible I would be satisfied just with creating the right number of rows.
We could use fill after doing a complete
library(dplyr)
library(tidyr)
ExpandedDataset %>%
complete(P_ID, Event_code) %>%
fill(Site_code)
I came with quite a long code, but you could group it in a function and make it easier:
Here's your dataframe:
df <- data.frame(ID = c(rep("P1", 2), rep("P2", 3), "P3"),
Event = c("baseline", "visit 2", "baseline", "visit 2", "visit 3", "baseline"),
Event_code = c(1, 2, 1, 2, 3, 1),
Site_code = c(1, 1, 2, 2, 2, 1))
How many records you have per ID?
values <- summary(df$ID)
What is the maximum number of records for a single patient?
target <- max(values)
Which specific patients have less records than the maximum?
uncompliant <- names(which(values<target))
And how many records do you have for those patients who have missing information?
rowcount <- values[which(values<target)]
So now, let's create the vectors of the data frame we will add to your original one. First, IDs:
IDs <- vector()
for(i in 1:length(rowcount)){
y <- rep(uncompliant[i], target - rowcount[i])
IDs <- c(IDs, y)
}
And now, the sitecodes:
SC <- vector()
for(i in 1:length(rowcount)){
y <- rep(unique(df$Site_code[which(df$ID == uncompliant[i])]), target - rowcount[i])
SC <- c(SC, y)
}
Finally, a data frame with the values we will introduce:
introduce <- data.frame(ID = IDs, Event = rep(NA, length(IDs)),
Event_code = rep(NA, length(IDs)),
Site_code = SC)
Combine the original dataframe with the new values to be added and sort it so it looks nice:
final <- as.data.frame(rbind(df, introduce))
final <- final[order(v$ID), ]

What does all.equal do in R when it is executed on two matrices

What does all.equal do in R when it is executed on two matrices like bellow?
What is Mean relative difference and how is it calculated?
a <-
matrix(c(4, 1, 1, 4),
nrow = 2,
dimnames = list(Guess = c("Milk", "Tea"),
Truth = c("Milk", "Tea")))
b <-
matrix(c(2, 3, 3, 2),
nrow = 2,
dimnames = list(Guess = c("Milk", "Tea"),
Truth = c("Milk", "Tea")))
all.equal(a, b)
For numeric input mean relative difference is calculated as absolute difference between both vectors as compared to first vector. So something like
mean(abs(a-b))/mean(abs(a))
#[1] 0.8
This is a weird behavior. "Istruing" all equal would produce expected output (FALSE).
Sorry if I'm not really answering the question.
a <-
matrix(c(4, 1, 1, 4),
nrow = 2,
dimnames = list(Guess = c("Milk", "Tea"),
Truth = c("Milk", "Tea")))
b <-
matrix(c(2, 3, 3, 2),
nrow = 2,
dimnames = list(Guess = c("Milk", "Tea"),
Truth = c("Milk", "Tea")))
isTRUE(all.equal(a, b))
a <-
matrix(c(4, 1, 1, 4),
nrow = 2,
dimnames = list(Guess = c("Milk", "Tea"),
Truth = c("Milk", "Tea")))
b <-
matrix(c(4, 1, 1, 4),
nrow = 2,
dimnames = list(Guess = c("Milk", "Tea"),
Truth = c("Milk", "Tea")))
isTRUE(all.equal(a, b))

Resources