I am trying to connect points over time using a R plotly animation.
Several issues appear with the code below:
The segments float from one connection to another.
The segments disappear from one day to another even though the connection still exists on the consecutive days (see df).
Not all connections were plotted.
I tried to exchange add_segments with add_annotations to have arrows instead of segments but the frame argument would not work.
Note that removing the color and symbol arguments help with point #2 and #3.
library(dplyr)
library(plotly)
set.seed(12)
df <- tibble(
day = rep(1:8, each = 10),
id = rep(paste0("ID", 1:10), 8),
infector = NA
) %>%
group_by(id) %>%
mutate(x = rnorm(1),
y = rnorm(1),
group = sample(c("A", "B", "C"), 1)) %>%
ungroup() %>%
mutate(
infector = case_when(
id == "ID2" & day >= 1 ~ "ID4",
id == "ID3" & day >= 2 ~ "ID4",
id == "ID1" & day >= 3 ~ "ID2",
id == "ID5" & day >= 3 ~ "ID3",
id == "ID6" & day >= 3 ~ "ID4",
id == "ID10" & day >= 4 ~ "ID2",
id == "ID9" & day >= 7 ~ "ID5"
)
)
infectors <- df %>% filter(day == 1 & id %in% .$infector) %>%
select(id, x, y, group) %>%
rename(infector.x = x,
infector.y = y,
infector_group = group)
df <- left_join(df, infectors, by = c("infector" = "id"))
pal <- c("A" = "blue", "B" = "green", "C" = "red")
plot_ly(df) %>%
add_markers(
x = ~ x,
y = ~ y,
frame = ~ day,
hoverinfo = "text",
text = ~ paste("ID:", id),
symbol = ~group,
color = ~group,
colors = pal
) %>%
add_segments(
x = ~infector.x,
xend = ~x,
y = ~infector.y,
yend = ~y,
color = ~infector_group,
colors = pal,
frame = ~day)
Related
I'm trying to pass vectors, each with a different number of NA values, through to a map() function but it's returning an error.
I have a tibble of N numeric columns and 1 categorical column. I want to compare the distributions for each of the numeric columns against the other split by the values of the categorical column. I use overlapping::overlap() to calculate the overlap of the distributions, and i feed the numeric columns into a map_dfr function for the iteration. For example:
require(overlapping)
require(dplyr)
require(purrr)
set.seed( 1 )
n <- 100
G1 <- sample( 0:30, size = n, replace = TRUE )
G2 <- sample( 0:30, size = n, replace = TRUE, prob = dbinom( 0:30, 31, .55 ))
G3 <- sample( 0:30, size = n, replace = TRUE, prob = dbinom( 0:30, 41, .65 ))
Data <- data.frame(y = G1, x = G2, z = G3, group = rep(c("G1","G2", "G3"), each = n), class = rep(c("C1","C2", "C3"), each = 1)) %>% as_tibble()
Data
overlap_fcn <- function(.x) {
## construct list of vectors
dist_list <- list(
"C1" = Data %>%
filter(class == 'C1', !is.na(.x)) %>%
pull(.x),
"C2" = Data %>%
filter(class == 'C2', !is.na(.x)) %>%
pull(.x),
"C3" = Data %>%
filter(class == 'C3', !is.na(.x)) %>%
pull(.x)
)
## calculate distribution overlaps
return(
enframe(
overlapping::overlap(dist_list)$OV*100
) %>%
mutate(value = paste0(round(value, 2), "%"),
class = .x) %>%
rename(comparison = name, overlap = value) %>%
relocate(class)
)
}
overlap_table <- purrr::map_dfr(
.x = c('y', 'x', "z"),
.f = ~overlap_fcn(.x))
overlap_table
The above works as intended. However, in practice I have different amounts of missingess in each of x, y, and z. I try to account for this with the filter on !is.na(.x) but it's not working. For example:
Data$x[1:3] <- NA
Data$y[10:20] <- NA
Data$z[100:150] <- NA
overlap_table <- purrr::map_dfr(
.x = c('x', 'y', "z"),
.f = ~overlap_fcn(.x))
returns this error:
Error in density.default(x[[j]], n = nbins, ...): 'x' contains missing values
Error in density.default(x[[j]], n = nbins, ...): 'x' contains missing values
Traceback:
1. purrr::map_dfr(.x = c("x", "y", "z"), .f = ~overlap_fcn(.x))
2. map(.x, .f, ...)
3. .f(.x[[i]], ...)
4. overlap_fcn(.x)
5. enframe(overlapping::overlap(dist_list)$OV * 100) %>% mutate(value = paste0(round(value,
. 2), "%"), class = .x) %>% rename(comparison = name, overlap = value) %>%
. relocate(class) # at line 25-33 of file <text>
6. relocate(., class)
7. rename(., comparison = name, overlap = value)
8. mutate(., value = paste0(round(value, 2), "%"), class = .x)
9. enframe(overlapping::overlap(dist_list)$OV * 100)
10. overlapping::overlap(dist_list)
11. density(x[[j]], n = nbins, ...)
12. density.default(x[[j]], n = nbins, ...)
13. stop("'x' contains missing values")
Can anyone help me out here please? I'm sure it's something super obvious i'm missing; i just can't see what!
Here, the .x is character class. We may need to convert to symbol and evaluate (!!)
overlap_fcn <- function(.x) {
## construct list of vectors
dist_list <- list(
"C1" = Data %>%
filter(class == 'C1', !is.na(!! rlang::sym(.x))) %>%
pull(.x),
"C2" = Data %>%
filter(class == 'C2', !is.na(!! rlang::sym(.x))) %>%
pull(.x),
"C3" = Data %>%
filter(class == 'C3', !is.na(!! rlang::sym(.x))) %>%
pull(.x)
)
## calculate distribution overlaps
return(
enframe(
overlapping::overlap(dist_list)$OV*100
) %>%
mutate(value = paste0(round(value, 2), "%"),
class = .x) %>%
rename(comparison = name, overlap = value) %>%
relocate(class)
)
}
-testing after creating the NAs in Data
> purrr::map_dfr(
+ .x = c('x', 'y', "z"),
+ .f = ~overlap_fcn(.x))
# A tibble: 9 × 3
class comparison overlap
<chr> <chr> <chr>
1 x C1-C2 98.61%
2 x C1-C3 97.46%
3 x C2-C3 97.5%
4 y C1-C2 95.47%
5 y C1-C3 96.22%
6 y C2-C3 97.14%
7 z C1-C2 90.17%
8 z C1-C3 94.9%
9 z C2-C3 89.24%
I think I'm basically looking for an R plotly equivalent to this python plotly post:
I have a XY data.frame that I'd like to plot using R's plotly, where each point belongs to either one of two types ("a"/"b"), and nested within each type is a group, and the group assignment is redundant.
My purpose is to color code the points according to the group frequency, where each type uses a different color scale.
Here's the data.frame:
library(dplyr)
set.seed(1)
df <- rbind(data.frame(type = "a", group = paste0("a", sample(1000, 500, replace = T))) %>%
cbind(as.data.frame(MASS::mvrnorm(n = 1000,mu = c(-5,-5),Sigma = matrix(c(5, 3, 4, 4), ncol=2)))),
data.frame(type = "b", group = paste0("b", sample(500, 50, replace = T))) %>%
cbind(as.data.frame(MASS::mvrnorm(n = 500,mu = c(5,5),Sigma = matrix(c(5, 3, 4, 4), ncol=2))))) %>%
dplyr::rename(x = V1, y = V2)
Here I compute the frequency of each group, for each type, and then add two artificial points per each type, with the global minimum and maximum frequency (f), so that the color scales use a comment numeric scale:
freq.df <- rbind(dplyr::group_by(dplyr::filter(df, type == "a"), type, group) %>%
dplyr::tally() %>%
dplyr::mutate(f = 100*n/sum(n)) %>%
dplyr::select(-n),
dplyr::group_by(dplyr::filter(df, type == "b"), type, group) %>%
dplyr::tally() %>%
dplyr::mutate(f = 100*n/sum(n)) %>%
dplyr::select(-n)) %>%
dplyr::ungroup() %>%
rbind(data.frame(type = c(rep("a", 2), rep("b", 2)), group = c(rep("a", 2), rep("b", 2)), f = rep(c(min(.$f), max(.$f)), 2), stringsAsFactors = F))
And now joining freq.df to df:
df <- df %>% dplyr::left_join(freq.df)
Here's how I'm trying to plot it:
plotly::plot_ly(marker = list(size = 3), type = 'scatter', mode = "markers", color = dplyr::filter(df, type == "a")$f, colors = viridis::viridis_pal(option = "D")(3), x = dplyr::filter(df, type == "a")$x, y = dplyr::filter(df, type == "a")$y) %>%
plotly::add_trace(marker = list(size = 3),type = 'scatter', mode = "markers",color = dplyr::filter(df, type == "b")$f,colors = viridis::viridis_pal(option = "A")(3), x = dplyr::filter(df, type == "b")$x,y=dplyr::filter(df,type == "b")$y) %>%
plotly::layout(xaxis = list(zeroline = F, showticklabels = F, showgrid = F),yaxis = list(zeroline = F,showticklabels = F, showgrid = F))
Which only gives me the colorbar of first color scale (viridis's cividis):
Any idea how to get both colorbars (viridis's cividis and viridis's magma) appear side by side?
The following loop is effective in that it gets me to the finish line but i'm looking for a way to make it more efficient as I'm looping through a large dataset. Possibly using a Purrr function?
library(tidyverse)
library(timetk)
#### CREATE DATA
df_1 <- data.frame(Date = seq.Date(as.Date("2016-01-01"), length.out = 36, by = "month"),
Inventory = round(runif(36,5,100),0),
Purchases = round(runif(36,5,100),0),
Sales = round(runif(36,5,100),0),
Ending_Inventory = round(runif(36,5,100),0)) %>%
mutate(Starting_Inventory = lag(Ending_Inventory,1)) %>%
mutate(product = "Product_1")
df_2 <- data.frame(Date = seq.Date(as.Date("2016-01-01"), length.out = 36, by = "month"),
Inventory = round(runif(36,5,100),0),
Purchases = round(runif(36,5,100),0),
Sales = round(runif(36,5,100),0),
Ending_Inventory = round(runif(36,5,100),0)) %>%
mutate(Starting_Inventory = lag(Ending_Inventory,1)) %>%
mutate(product = "Product_2")
df <- rbind(df_1, df_2) %>%
group_by(product) %>%
timetk::future_frame(
.date_var = Date,
.length_out = "12 months",
.bind_data = TRUE
)
Here I'm creating a date sequence to iterate through the for loop
#### CREATE DATE SEQUENCE
Dates <- seq(min(df$Date) %m+% months(36), min(df$Date) %m+% months(48), by = "month")
The dates from the sequence above will iterate through the loop to fill in the future data and then I join, rename some columns, and drop all that contain ("y")... Seems like I'm performing some steps that aren't necessary.
for (i in 1:length(Dates)){
df <- df %>%
mutate(Purchases = case_when(Date < Dates[i] ~ Purchases,
Date == Dates[i] ~ lag(Purchases, 12)*1.05,
TRUE ~ 0
)) %>%
mutate(Starting_Inventory = case_when(Date < Dates[i] ~ Starting_Inventory,
Date == Dates[i] ~ lag(Ending_Inventory,1),
TRUE ~ 0
)) %>%
mutate(Sales = case_when(Date < Dates[i] ~ Sales,
Date == Dates[i] ~ lag(Sales,12) * 1.15,
TRUE ~ 0
)) %>%
mutate(Ending_Inventory = case_when(Date < Dates[i] ~ Ending_Inventory,
Date == Dates[i] ~ Starting_Inventory + Sales + Purchases,
TRUE ~ 0
)) %>%
mutate(Inventory = case_when(Date < Dates[i] ~ Inventory,
Date == Dates[i] ~ Ending_Inventory,
TRUE ~ 0
))
new_data <- df[df$Date == (Dates[i]),]
df <- df %>%
left_join(., new_data, by = c("product", "Date")) %>%
mutate(Inventory.x = ifelse(Date == Dates[i],Inventory.y,Inventory.x),
Purchases.x = ifelse(Date == Dates[i],Purchases.y,Purchases.x),
Sales.x = ifelse(Date == Dates[i],Sales.y,Sales.x),
Starting_Inventory.x = ifelse(Date == Dates[i],Starting_Inventory.y,Starting_Inventory.x),
Ending_Inventory.x = ifelse(Date == Dates[i],Ending_Inventory.y,Ending_Inventory.x),
) %>%
rename(Inventory = Inventory.x,
Purchases = Purchases.x,
Starting_Inventory = Starting_Inventory.x,
Sales = Sales.x,
Ending_Inventory = Ending_Inventory.x) %>%
dplyr::select(-contains(".y"))
return
print(i)
gc()
}
There are a lot of unnecessary steps in there.
Mutate can take more than one expression at once.
The case_when is unnecessary since in the next step you only keep the rows that got modified.
Then, for the same reason, the join and renaming is more steps than needed, you can just replace the old rows with the new row by selecting a subset.
for (i in seq_along(Dates)){
new_data <- df2 %>%
mutate(Purchases = lag(Purchases, 12)*1.05,
Starting_Inventory = lag(Ending_Inventory,1),
Sales = lag(Sales,12) * 1.15,
Ending_Inventory = Starting_Inventory + Sales + Purchases,
Inventory = Ending_Inventory)
df2[df2$Date == Dates[i],] <- new_data[new_data$Date == Dates[i],]
}
But then you're stil recalculating your whole data.frame for each loop. No need for that either since mutate() is iterative. You can do it all with just that function.
Also, since there are only 2 conditions really needed, you can replace the case_when with ifelse and it's faster.
df <- df %>%
mutate(
Purchases = ifelse(
Date %in% Dates, lag(Purchases, 12)*1.05, Purchases
),
Starting_Inventory = ifelse(
Date %in% Dates, lag(Ending_Inventory,1), Starting_Inventory
),
Sales = ifelse(
Date %in% Dates, lag(Sales,12) * 1.15, Sales
),
Ending_Inventory = ifelse(
Date %in% Dates, Starting_Inventory + Sales + Purchases,
Ending_Inventory
),
Inventory = ifelse(
Date %in% Dates, Ending_Inventory, Inventory
)
)
Edit:
I think it's important to break down what you're trying to do when you end up with long for loop like this. Since you're trying to do in place modifications, even in base R, you could do this with this short a for loop :
df3 <- df.o
df3 <- df3 |> within({
for (i in which(Date %in% Dates)){
Purchases[i] = Purchases[i-12]*1.05
Sales[i] = Sales[i-12] * 1.15
Ending_Inventory[i] = Starting_Inventory[i] + Sales[i] + Purchases[i]
Inventory[i] = Ending_Inventory[i]
Starting_Inventory[i] = Ending_Inventory[i-1]
}
i = NULL
})
A bit slower than mutate, but it's the same logic.
R newbie here :)
I have recently started using R library Highcharter as an alternative to ggplot2.
This is the sample code I am currently working on:
library(highcharter)
library(dplyr)
## Sample dataframe
YEAR <- c(2019,2020,2021)
CATEGORY <- c("dog", "cat", "mouse")
SAMPLE_DATA <- expand.grid(YEAR, CATEGORY)
names(SAMPLE_DATA)[1] <- "CATEGORY"
names(SAMPLE_DATA)[2] <- "YEAR"
SAMPLE_DATA$VALUE <- runif(n = 9, min = 400, max = 900)
## Chart
SAMPLE_DATA <- SAMPLE_DATA %>%
group_by(YEAR, CATEGORY) %>%
summarise(VALUE = sum(VALUE, na.rm = T))
highchart() %>%
hc_add_series(data = SAMPLE_DATA, hcaes(x = YEAR, y = round(VALUE,0), group = CATEGORY), type = "column") %>%
hc_plotOptions(column = list(stacking = "normal"))
What I am trying to do is:
Sort how the group "CATEGORY" is piled in each column, based on ascending/descending "VALUE"
Have that effect which highlights the same group in all columns as you hover over it
Does anyone have an idea? Thank you!
This is a late answer but I believe this is what you want.
Adding the data again because I think you swapped some column names on accident:
YEAR <- c(2019,2020,2021)
CATEGORY <- c("dog", "cat", "mouse")
SAMPLE_DATA <- expand.grid(YEAR, CATEGORY)
names(SAMPLE_DATA)[1] <- "YEAR"
names(SAMPLE_DATA)[2] <- "CATEGORY"
SAMPLE_DATA$VALUE <- runif(n = 9, min = 400, max = 900)
## Chart
SAMPLE_DATA <- SAMPLE_DATA %>%
group_by(YEAR, CATEGORY) %>%
summarise(VALUE = sum(VALUE, na.rm = T))
Creating plot:
SAMPLE_DATA %>%
ungroup() %>%
mutate(YEAR = factor(YEAR) %>% fct_reorder(VALUE, .desc = TRUE)) %>%
mutate(year_index = as.numeric(YEAR)) %>%
hchart(
type = "column",
hcaes(x = year_index,
y = VALUE,
group = CATEGORY,
name = YEAR),
) %>%
hc_xAxis(type = "category", labels = list(step = 1)) %>%
hc_plotOptions(series = list(stacking = TRUE))
I'm trying to add like an information box on hover on title of the highcharter graph. When the user hovers over title, it should show some information about the graph. Can this be achieved using Highcharter in R ?
Thank You! Here is a sample code to which I want to add hover property for title.
library(dplyr)
library(stringr)
library(purrr)
n <- 5
set.seed(123)
colors <- c("#d35400", "#2980b9", "#2ecc71", "#f1c40f", "#2c3e50", "#7f8c8d")
colors2 <- c("#000004", "#3B0F70", "#8C2981", "#DE4968", "#FE9F6D", "#FCFDBF")
df <- data.frame(x = seq_len(n) - 1) %>%
mutate(
y = 10 + x + 10 * sin(x),
y = round(y, 1),
z = (x*y) - median(x*y),
e = 10 * abs(rnorm(length(x))) + 2,
e = round(e, 1),
low = y - e,
high = y + e,
value = y,
name = sample(fruit[str_length(fruit) <= 5], size = n),
color = rep(colors, length.out = n),
segmentColor = rep(colors2, length.out = n)
)
create_hc <- function(t) {
dont_rm_high_and_low <- c("arearange", "areasplinerange",
"columnrange", "errorbar")
is_polar <- str_detect(t, "polar")
t <- str_replace(t, "polar", "")
if(!t %in% dont_rm_high_and_low) df <- df %>% select(-e, -low, -high)
highchart() %>%
hc_title(text = paste(ifelse(is_polar, "polar ", ""), t, '\u2370'),
style = list(fontSize = "15px")) %>%
hc_chart(type = t,
polar = is_polar) %>%
hc_xAxis(categories = df$name) %>%
hc_add_series(df, name = "Fruit Consumption", showInLegend = FALSE)
}
hcs <- c("line") %>%
map(create_hc)