How to plot the distribution as combo chart in R? - r

Here is my initial data set:
data_x <- tribble(
~price, ~id, ~cost, ~revenue,
1, 10, 0.20, 0,
2, 20, 0.30, 60,
3, 20, 0.30, 0,
4, 10, 0.20, 100,
5, 30, 0.10, 40,
6, 10, 0.20, 0,
1, 20, 0.30, 80,
2 , 10, 0.20, 0,
3, 30, 0.10, 20,
3, 20, 0.30, 40,
)
Then, I have a new variable that is zet:
data_y <- data_x %>%
mutate(zet = cost/revenue) %>%
mutate_if(is.numeric, list(~na_if(., Inf))) %>%
mutate_all(funs(replace_na(.,0)))
Now, I plot the price distribution while showing the zet distribution, as well. Here is my desired plot:
To do this, I first wanted to see price and zet distribution even they are not percentage now.
price_dist <- data_y %>%
group_by(priceseg = cut(as.numeric(price), c(0, 1, 3, 5, 6))) %>%
summarise(price_n = n_distinct(price)) %>%
pivot_wider(names_from = priceseg, values_from = price_n)
zet_dist <- data_y %>%
group_by(priceseg = cut(as.numeric(price), c(0, 1, 3, 5, 6))) %>%
summarise(zet_n = n_distinct(zet)) %>%
pivot_wider(names_from = priceseg, values_from = zet_n)
I would be grateful if you could help me to plot my desired chart.

d <- data_y %>%
group_by(priceseg = cut(as.numeric(price), c(0, 1, 3, 5, 6))) %>%
summarise(price_n = n_distinct(price),
zet_n = n_distinct(zet)) %>%
mutate(price_n = 100 * prop.table(price_n),
zet_n2 = 100 * prop.table(zet_n))
ggplot(d) +
geom_col(aes(x = priceseg, y = price_n)) +
geom_line(data = d, mapping = aes(x = priceseg, y = zet_n2, group = 1)) +
geom_label(data = d, mapping = aes(x = priceseg, y = zet_n2, label = zet_n), nudge_y = 5)

Related

how do i create a bar chart to compare pre and post scores between participants?

I am trying to create a bar chart or column chart plot to compare pre and post scores between participants. I managed to do this in a line graph, however, I am struggling to visualise this within a bar chart, can anyone help me with this?
Here is the data I am using:
structure(list(Participant = c(2, 3, 5, 7), PRE_QUIP_RS = c(24,
24, 20, 20), POST_QUIP_RS = c(10, 23, 24, 14), PRE_PDQ8 = c(11,
8, 10, 4), POST_PDQ8 = c(7, 7, 9, 4), PRE_GDS = c(1, 7, 1, 0),
POST_GDS = c(1, 4, 2, 0), PRE_PERSISTENT = c(9, 13, 6, 2),
POST_PERSISTENT = c(9, 13, 11, 3), PRE_EPISODIC = c(3, 4,
2, 0), POST_EPISODIC = c(2, 5, 6, 2), PRE_AVOIDANCE = c(6,
3, 0, 2), POST_AVOIDANCE = c(3, 3, 4, 1), PRE_IPQ = c(39,
48, 40, 37), POST_IPQ = c(16, 44, 30, 17), PRE_GSE = c(28,
31, 36, 29), POST_GSE = c(29, 30, 30, 29), PRE_BCI = c(11,
9, 5, 3), POST_BCI = c(3, 15, 0, 0)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -4L))
In terms of how I roughly want it to look, I want the bars to be placed together for pre and post for each participant, kind of like this:
You may try
library(tidyverse)
df %>%
select(Participant, PRE_QUIP_RS, POST_QUIP_RS) %>%
pivot_longer(cols = c(PRE_QUIP_RS, POST_QUIP_RS), names_to = "group") %>%
mutate(group = str_split(group, "_", simplify = T)[,1],
Participant = as.factor(Participant)) %>%
ggplot(aes(x = Participant, y = value, group = group, fill = group)) +
geom_col(position = "dodge")
PRE POST order
dummy %>%
select(Participant, PRE_QUIP_RS, POST_QUIP_RS) %>%
pivot_longer(cols = c(PRE_QUIP_RS, POST_QUIP_RS), names_to = "group") %>%
mutate(group = str_split(group, "_", simplify = T)[,1] %>%
factor(., levels = c("PRE", "POST")), # HERE
Participant = as.factor(Participant)) %>%
ggplot(aes(x = Participant, y = value, group = group, fill = group)) +
geom_col(position = "dodge")

Using add_header_row from flextable to create columns of varying widths

I have data as follows:
dat <- structure(list(rn = c("type_A", "type_B", "type_C"
), freq = list(c(0, 0, 0, 5, 7, 16, 28), c(2, 1, 0, 5, 0, 8),
c(0, 0, 3, 5, 12, 53, 73)), colspan = list(c(`25` = 1, `100` = 2,
`250` = 1, `500` = 1, `1000` = 1, Infinity = 3, SUM = 1), c(`25` = 1,
`100` = 2, `250` = 1, `500` = 1, Infinity = 4, SUM = 1), c(`25` = 1,
`50` = 1, `100` = 1, `250` = 1, `500` = 1, Infinity = 4, SUM = 1
))), row.names = c(NA, 3L), class = "data.frame")
total_colspan = c(0, 25, 50, 100, 250, 500, 1000, 1500, 3000, "Infinity", "SUM")
rn freq colspan
1 type_A 0, 0, 0, 5, 7, 16, 28 1, 2, 1, 1, 1, 3, 1
2 type_B 2, 1, 0, 5, 0, 8 1, 2, 1, 1, 4, 1
3 type_C 0, 0, 3, 5, 12, 53, 73 1, 1, 1, 1, 1, 4, 1
I would like to create a table with varying column spans (but they all add up to 10), in an R-markdown Word document, like the table below:
I was advised to try flextable for this (link). I am trying to use the header options to create these varying colspan. I thought about doing something like:
dat_table <- flextable(dat)
dat_table <- lapply(dat_table, add_header_row, values = unlist(freq), colwidths = unlist(colspan))
But this is not working.
EDIT:
My second attempt:
dat <- structure(list(rn = c("type_A", "type_B", "type_C"
), freq = list(c(0, 0, 0, 5, 7, 16, 28), c(2, 1, 0, 5, 0, 8),
c(0, 0, 3, 5, 12, 53, 73)), colspan = list(c(1, 2, 1, 1, 1, 3, 1), c(1, 2, 1, 1, 4, 1), c(1, 1, 1, 1, 1, 4, 1
))), row.names = c(NA, 3L), class = "data.frame")
# The thresholds as in the picture
thresholds <- data.frame(c("Lower threshold","Upper threshold"), c(0,25), c(25,50), c(50,100), c(100,250), c(250,500),c(500,1000),c(1000,1500),c(1500,3000),c(3000, "Infinity"), c("", "SUM"))
names(thresholds) <- c("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Eleven")
thresholds <- flextable(thresholds)
# There was one column to few in the example
dat <- transform(dat, colspan=Map('c', 1, dat[["colspan"]] ))
dat <- transform(dat, freq=Map('c', "", dat[["freq"]] ))
# for loop to stick to the syntax
for (i in nrow(dat)) {
thresholds <- add_header_row(thresholds, values = dat[[2]][[i]], colwidths = dat[[3]][[i]])
}
For some reason it only adds one row (while it allows for more headers to be added).
Here's a solution that is perhaps way too overkill, but seems to do what you're looking for:
library(tidyverse)
library(flextable)
dat <- structure(list(rn = c("type_A", "type_B", "type_C"
), freq = list(c(0, 0, 0, 5, 7, 16, 28), c(2, 1, 0, 5, 0, 8),
c(0, 0, 3, 5, 12, 53, 73)), colspan = list(c(1, 2, 1, 1, 1, 3, 1), c(1, 2, 1, 1, 4, 1), c(1, 1, 1, 1, 1, 4, 1
))), row.names = c(NA, 3L), class = "data.frame")
# The thresholds as in the picture
thresholds <- data.frame(c("Lower threshold","Upper threshold"), c(0,25), c(25,50), c(50,100), c(100,250), c(250,500),c(500,1000),c(1000,1500),c(1500,3000),c(3000, "Infinity"), c("", "SUM"))
names(thresholds) <- c("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Eleven")
out <- map(1:nrow(dat), function(index){
out <- data.frame("freq" = dat$freq[[index]],
"span" = dat$colspan[[index]]) %>%
tidyr::uncount(span, .id = 'span') %>%
mutate(freq = ifelse(span>1, NA, freq)) %>%
t %>%
as.data.frame() %>%
mutate(rn = dat$rn[[index]],
across(everything(), ~as.character(.))) %>%
select(rn, everything()) %>%
set_names(nm = names(thresholds)) %>%
slice(1)
return(out)
})
combined <- thresholds %>%
mutate(across(everything(), ~as.character(.))) %>%
bind_rows(out)
spans <- map(1:length(dat$colspan), function(index){
spans <- dat$colspan[[index]] %>%
as_tibble() %>%
mutate(idx = row_number()) %>%
tidyr::uncount(value, .remove = F) %>%
group_by(idx) %>%
mutate(pos = 1:n(),
value = ifelse(pos != 1, 0, value)) %>%
ungroup() %>%
select(value) %>%
t
return(append(1, spans))
})
myft <- flextable(combined) %>%
theme_box()
myft$body$spans$rows[3:nrow(myft$body$spans$rows),] <- matrix(unlist(spans), ncol = ncol(combined), byrow = TRUE)
myft
Created on 2022-04-29 by the reprex package (v2.0.1)
This makes the table:
I don't think you can pass colspan options here without quite a bit of hacking. If at all possible, I would suggest adding the information which cells need to be combined manually. This is the only option, as far as I know, in flextable:
library(flextable)
library(tidyverse)
# clean up the object
dat_clean <- dat %>%
mutate(freq = map2(freq, colspan, ~rep(.x, .y))) %>%
select(-colspan) %>%
unnest(freq) %>%
group_by(rn) %>%
mutate(col = paste0("col_", row_number())) %>%
pivot_wider(names_from = col, values_from = freq)
flextable(dat_clean) %>%
merge_at(i = 1, j = 3:4, part = "body") %>%
merge_at(i = 1, j = 7:9, part = "body") %>%
border_inner(part="all", border = fp_border_default()) %>%
align(align = "center", part = "all")
Created on 2022-04-25 by the reprex package (v2.0.1)
It is a bit tricky to merge those two tables. This is the closest I came to reproduce your desired table. First I created your data in a suitable way:
thresholds <- data.frame(c("Lower threshold", "Upper threshold", "type_A", "type_B", "type_C"),
c(0,25, 0, 2, 0),
c(25,50, 0, 1, 0),
c(50,100, NA, NA,3),
c(100,250,0,0,5),
c(250,5005,5,5,12),
c(500,1000,7,0,53),
c(1000,1500,16,NA,NA),
c(1500,3000,NA,NA,NA),
c(3000, "Infinity",NA,NA,NA),
c("SUM", "SUM", 28,8,73))
names(thresholds) <- c("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Eleven")
Using the officer package you can give the horizontal and vertical lines different colors you want. Using the merge_at function you can merge certain cells. With the border_inner function you get borders in the table. You can use the following code:
library(officer)
std_border = fp_border(color="gray")
library(flextable)
library(dplyr)
thresholds %>%
flextable() %>%
merge_at(i = 3, j = 3:4, part = "body") %>%
merge_at(i = 4, j = 3:4, part = "body") %>%
merge_at(i = 3, j = 8:10, part = "body") %>%
merge_at(i = 4, j = 7:10, part = "body") %>%
merge_at(i = 5, j = 7:10, part = "body") %>%
border_inner(border = std_border) %>%
align(align = "left", part = "all")
Output:

Why is ggplot only filling in some of my stacked bars and not others?

I'm making a stacked barplot using ggplot, but for some reason, it keeps leaving 2 bars unfilled, despite filling in other ones using the same criteria. Why is it doing this and how can I prevent this from happening?
library(ggplot2)
library(dplyr)
library(scales)
#Code to replicate
data <- tibble(team = factor(c(rep("Team 1", 10), rep("Team 2", 10), rep("Team 3", 10), rep("Team 4", 10)), levels = c("Team 1", "Team 2", "Team 3", "Team 4")),
state = factor(c(rep(c("Won", "Tied",
"Rematch", "Postponed", "Forfeit",
"Lost", "Withdrew", "Ongoing",
"Undetermined", "Unknown"), 4)), levels = c("Won", "Tied",
"Rematch", "Postponed", "Forfeit",
"Lost", "Withdrew", "Ongoing",
"Undetermined", "Unknown")),
count = c(1920, 80, 241, 5, 310, 99, 2, 127, 20, 33,
48, 1, 8, 0, 11, 3, 0, 4, 3, 3,
140, 5, 8, 0, 17, 2, 0, 5, 3, 7,
477, 20, 59, 1, 106, 1, 0, 33, 7, 10))
data <- data %>%
group_by(team) %>%
mutate(percentage = round((count/sum(count, na.rm = TRUE)), 2))
data %>%
ggplot(aes(fill= state, y = percentage, x = team)) +
geom_col(position="stack",width = 0.4) +
coord_flip() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0, 1)) +
geom_text(aes(label = scales::percent(percentage, accuracy = 1)),
position = position_stack(vjust = .5),
check_overlap = TRUE )
Here's how it looks; the floating 75% and 59% for Team 3 and Team 2, respectively, should be in the salmon color that is used for Teams 4 and 1. I know it's not a typo because I'm using the same title for each.
Change the position argument to fill
data %>%
ggplot(aes(fill= state, y = percentage, x = team)) +
geom_col(position="fill",width = 0.4) +
coord_flip() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0, 1)) +
geom_text(aes(label = scales::percent(percentage, accuracy = 1)),
position = position_stack(vjust = .5),
check_overlap = TRUE )

Tidyverse change values based on name

I have a dataframe as follows
library(tidyverse)
library(tidymodels)
#df <- read_csv("C:\\Users\\omarl\\OneDrive\\Escritorio\\games.csv")
df <- structure(list(gameId = 3326086514, creationTime = 1504279457970,
gameDuration = 1949, seasonId = 9, winner = 1, firstBlood = 2,
firstTower = 1, firstInhibitor = 1, firstBaron = 1, firstDragon = 1,
firstRiftHerald = 2, t1_champ1id = 8, t1_champ1_sum1 = 12,
t1_champ1_sum2 = 4, t1_champ2id = 432, t1_champ2_sum1 = 3,
t1_champ2_sum2 = 4, t1_champ3id = 96, t1_champ3_sum1 = 4,
t1_champ3_sum2 = 7, t1_champ4id = 11, t1_champ4_sum1 = 11,
t1_champ4_sum2 = 6, t1_champ5id = 112, t1_champ5_sum1 = 4,
t1_champ5_sum2 = 14, t1_towerKills = 11, t1_inhibitorKills = 1,
t1_baronKills = 2, t1_dragonKills = 3, t1_riftHeraldKills = 0,
t1_ban1 = 92, t1_ban2 = 40, t1_ban3 = 69, t1_ban4 = 119,
t1_ban5 = 141, t2_champ1id = 104, t2_champ1_sum1 = 11, t2_champ1_sum2 = 4,
t2_champ2id = 498, t2_champ2_sum1 = 4, t2_champ2_sum2 = 7,
t2_champ3id = 122, t2_champ3_sum1 = 6, t2_champ3_sum2 = 4,
t2_champ4id = 238, t2_champ4_sum1 = 14, t2_champ4_sum2 = 4,
t2_champ5id = 412, t2_champ5_sum1 = 4, t2_champ5_sum2 = 3,
t2_towerKills = 5, t2_inhibitorKills = 0, t2_baronKills = 0,
t2_dragonKills = 1, t2_riftHeraldKills = 1, t2_ban1 = 114,
t2_ban2 = 67, t2_ban3 = 43, t2_ban4 = 16, t2_ban5 = 51), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame"))
df <- df %>%
mutate(winner = ifelse(winner == 1, "team1", "team2")) %>%
mutate(firstBlood = ifelse(firstBlood == 1, "team1", "team2")) %>%
mutate(firstTower = ifelse(firstTower == 1, "team1", "team2")) %>%
mutate(firstInhibitor = ifelse(firstInhibitor == 1, "team1", "team2")) %>%
mutate(firstBaron = ifelse(firstBaron == 1, "team1", "team2")) %>%
mutate(firstDragon = ifelse(firstDragon == 1, "team1", "team2")) %>%
mutate(firstRiftHerald = ifelse(firstRiftHerald == 1, "team1", "team2")) %>%
select(-gameId, -creationTime) %>%
filter(seasonId == 9) %>%
select(gameDuration, winner, firstBlood, firstTower, firstInhibitor, firstBaron, firstDragon,
firstRiftHerald)
As you can see, mutate is really redundant here, because I'm copying the code for every variable. Is there any way to apply the ifelse to columns that start with first, t1, etc. programatically?
You may try
library(dplyr)
df %>%
mutate(across(starts_with("t1")|starts_with("first"), ~ifelse(.x == 1, "team1", "team2")))
Park gave a best (one liner) solution. But if you want to look at some other options, here is how we can do it via using some other functions in dplyr:
df %>%
gather(key, value, firstBlood:t1_ban5) %>%
mutate(value = ifelse(value == 1, "team1", "team2")) %>%
spread(key, value) %>%
select(-gameId, -creationTime) %>%
filter(seasonId == 9) %>%
select(gameDuration, winner, firstBlood, firstTower, firstInhibitor, firstBaron, firstDragon,
firstRiftHerald)

R Plotly 3D: Add Mesh to Scatter plot without inheriting color scheme

Basically I have a scatter 3d Scatter plot with the points in two categories (selected, unselected) which are represented in red and grey. To better visualize the selected volume I want to add a cube with low opacity in blue. However, when I add the mesh for the cube, the cube appears in green and the unselected points in orange instead of grey.
In short: Why is the cube not blue and the unselected points not grey and how can I make them do so?
library(shiny)
library(plotly)
ui <- fluidPage(
tags$h2("This is my 3D plot."),
plotlyOutput("Plot3d", width = "1000px", height = "1000px")
)
server <- function(input, output, session){
output$Plot3d <- renderPlotly ({
#Defining data frame for scatter
df_scatter <- data.frame(X_VAL = rnorm(50, mean = 0.5, sd = 0.15),
Y_VAL = rnorm(50, mean = 0.5, sd = 0.15),
Z_VAL = rnorm(50, mean = 0.5, sd = 0.15),
SCATTER_COL = rep("unselected", 50))
#Every point inside of the cube is labeled "selected"
for (i in 1:nrow(df_scatter)){
if (df_scatter$X_VAL[i] < 0.5 && df_scatter$Y_VAL[i] < 0.5 && df_scatter$Z_VAL[i]< 0.5) {
df_scatter$SCATTER_COL[i] <- "selected"
}
}
df_scatter$SCATTER_COL <- factor(df_scatter$SCATTER_COL, levels = c("selected", "unselected"))
#Defining data frame for mesh
df_mesh <- data.frame(X_VAL = c(0, 0, 0.5, 0.5, 0, 0, 0.5, 0.5),
Y_VAL = c(0, 0.5, 0.5, 0, 0, 0.5, 0.5, 0),
Z_VAL = c(0, 0, 0, 0, 0.5, 0.5, 0.5, 0.5),
MESH_COL = factor(rep("CUBE", 8), levels = c("CUBE")))
plot_ly()%>%
add_markers(type = "scatter3d",
mode = "markers",
data = df_scatter,
x = ~X_VAL,
y = ~Y_VAL,
z = ~Z_VAL,
color = ~SCATTER_COL,
colors = c('red', 'grey')) %>%
#Here the trouble starts
add_trace(type = 'mesh3d',
data = df_mesh,
x = ~X_VAL,
y = ~Y_VAL,
z = ~Z_VAL,
i = c(7, 0, 0, 0, 4, 4, 6, 1, 4, 0, 3, 6),
j = c(3, 4, 1, 2, 5, 6, 5, 2, 0, 1, 6, 3),
k = c(0, 7, 2, 3, 6, 7, 1, 6, 5, 5, 7, 2),
color = ~MESH_COL,
colors = c("blue"),
inherit = FALSE,
opacity = 0.1
)
})
}
shinyApp(ui = ui, server=server)
Any help is greatly appreciated.
Try facecolor:
library(shiny)
library(plotly)
#> Loading required package: ggplot2
#>
#> Attaching package: 'plotly'
#> The following object is masked from 'package:ggplot2':
#>
#> last_plot
#> The following object is masked from 'package:stats':
#>
#> filter
#> The following object is masked from 'package:graphics':
#>
#> layout
mycolors <- colours()[2:10]
ui <- fluidPage(
tags$h2("This is my 3D plot."),
plotlyOutput("Plot3d", width = "1000px", height = "1000px")
)
server <- function(input, output, session){
output$Plot3d <- renderPlotly ({
#Defining data frame for scatter
df_scatter <- data.frame(X_VAL = rnorm(50, mean = 0.5, sd = 0.15),
Y_VAL = rnorm(50, mean = 0.5, sd = 0.15),
Z_VAL = rnorm(50, mean = 0.5, sd = 0.15),
SCATTER_COL = rep("unselected", 50))
#Every point inside of the cube is labeled "selected"
for (i in 1:nrow(df_scatter)){
if (df_scatter$X_VAL[i] < 0.5 && df_scatter$Y_VAL[i] < 0.5 && df_scatter$Z_VAL[i]< 0.5) {
df_scatter$SCATTER_COL[i] <- "selected"
}
}
df_scatter$SCATTER_COL <- factor(df_scatter$SCATTER_COL, levels = c("selected", "unselected"))
#Defining data frame for mesh
df_mesh <- data.frame(X_VAL = c(0, 0, 0.5, 0.5, 0, 0, 0.5, 0.5),
Y_VAL = c(0, 0.5, 0.5, 0, 0, 0.5, 0.5, 0),
Z_VAL = c(0, 0, 0, 0, 0.5, 0.5, 0.5, 0.5))
plot_ly()%>%
add_markers(type = "scatter3d",
mode = "markers",
data = df_scatter,
x = ~X_VAL,
y = ~Y_VAL,
z = ~Z_VAL,
color = ~SCATTER_COL,
colors = c('red', 'grey')) %>%
add_trace(type = 'mesh3d',
data = df_mesh,
x = ~X_VAL,
y = ~Y_VAL,
z = ~Z_VAL,
i = c(7, 0, 0, 0, 4, 4, 6, 1, 4, 0, 3, 6),
j = c(3, 4, 1, 2, 5, 6, 5, 2, 0, 1, 6, 3),
k = c(0, 7, 2, 3, 6, 7, 1, 6, 5, 5, 7, 2),
facecolor = rep("blue", 12),
opacity = 0.1
)
})
}
shinyApp(ui = ui, server=server)
Created on 2020-07-02 by the reprex package (v0.3.0)

Resources