Related
I am trying to create a bar chart or column chart plot to compare pre and post scores between participants. I managed to do this in a line graph, however, I am struggling to visualise this within a bar chart, can anyone help me with this?
Here is the data I am using:
structure(list(Participant = c(2, 3, 5, 7), PRE_QUIP_RS = c(24,
24, 20, 20), POST_QUIP_RS = c(10, 23, 24, 14), PRE_PDQ8 = c(11,
8, 10, 4), POST_PDQ8 = c(7, 7, 9, 4), PRE_GDS = c(1, 7, 1, 0),
POST_GDS = c(1, 4, 2, 0), PRE_PERSISTENT = c(9, 13, 6, 2),
POST_PERSISTENT = c(9, 13, 11, 3), PRE_EPISODIC = c(3, 4,
2, 0), POST_EPISODIC = c(2, 5, 6, 2), PRE_AVOIDANCE = c(6,
3, 0, 2), POST_AVOIDANCE = c(3, 3, 4, 1), PRE_IPQ = c(39,
48, 40, 37), POST_IPQ = c(16, 44, 30, 17), PRE_GSE = c(28,
31, 36, 29), POST_GSE = c(29, 30, 30, 29), PRE_BCI = c(11,
9, 5, 3), POST_BCI = c(3, 15, 0, 0)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -4L))
In terms of how I roughly want it to look, I want the bars to be placed together for pre and post for each participant, kind of like this:
You may try
library(tidyverse)
df %>%
select(Participant, PRE_QUIP_RS, POST_QUIP_RS) %>%
pivot_longer(cols = c(PRE_QUIP_RS, POST_QUIP_RS), names_to = "group") %>%
mutate(group = str_split(group, "_", simplify = T)[,1],
Participant = as.factor(Participant)) %>%
ggplot(aes(x = Participant, y = value, group = group, fill = group)) +
geom_col(position = "dodge")
PRE POST order
dummy %>%
select(Participant, PRE_QUIP_RS, POST_QUIP_RS) %>%
pivot_longer(cols = c(PRE_QUIP_RS, POST_QUIP_RS), names_to = "group") %>%
mutate(group = str_split(group, "_", simplify = T)[,1] %>%
factor(., levels = c("PRE", "POST")), # HERE
Participant = as.factor(Participant)) %>%
ggplot(aes(x = Participant, y = value, group = group, fill = group)) +
geom_col(position = "dodge")
I have data as follows:
dat <- structure(list(rn = c("type_A", "type_B", "type_C"
), freq = list(c(0, 0, 0, 5, 7, 16, 28), c(2, 1, 0, 5, 0, 8),
c(0, 0, 3, 5, 12, 53, 73)), colspan = list(c(`25` = 1, `100` = 2,
`250` = 1, `500` = 1, `1000` = 1, Infinity = 3, SUM = 1), c(`25` = 1,
`100` = 2, `250` = 1, `500` = 1, Infinity = 4, SUM = 1), c(`25` = 1,
`50` = 1, `100` = 1, `250` = 1, `500` = 1, Infinity = 4, SUM = 1
))), row.names = c(NA, 3L), class = "data.frame")
total_colspan = c(0, 25, 50, 100, 250, 500, 1000, 1500, 3000, "Infinity", "SUM")
rn freq colspan
1 type_A 0, 0, 0, 5, 7, 16, 28 1, 2, 1, 1, 1, 3, 1
2 type_B 2, 1, 0, 5, 0, 8 1, 2, 1, 1, 4, 1
3 type_C 0, 0, 3, 5, 12, 53, 73 1, 1, 1, 1, 1, 4, 1
I would like to create a table with varying column spans (but they all add up to 10), in an R-markdown Word document, like the table below:
I was advised to try flextable for this (link). I am trying to use the header options to create these varying colspan. I thought about doing something like:
dat_table <- flextable(dat)
dat_table <- lapply(dat_table, add_header_row, values = unlist(freq), colwidths = unlist(colspan))
But this is not working.
EDIT:
My second attempt:
dat <- structure(list(rn = c("type_A", "type_B", "type_C"
), freq = list(c(0, 0, 0, 5, 7, 16, 28), c(2, 1, 0, 5, 0, 8),
c(0, 0, 3, 5, 12, 53, 73)), colspan = list(c(1, 2, 1, 1, 1, 3, 1), c(1, 2, 1, 1, 4, 1), c(1, 1, 1, 1, 1, 4, 1
))), row.names = c(NA, 3L), class = "data.frame")
# The thresholds as in the picture
thresholds <- data.frame(c("Lower threshold","Upper threshold"), c(0,25), c(25,50), c(50,100), c(100,250), c(250,500),c(500,1000),c(1000,1500),c(1500,3000),c(3000, "Infinity"), c("", "SUM"))
names(thresholds) <- c("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Eleven")
thresholds <- flextable(thresholds)
# There was one column to few in the example
dat <- transform(dat, colspan=Map('c', 1, dat[["colspan"]] ))
dat <- transform(dat, freq=Map('c', "", dat[["freq"]] ))
# for loop to stick to the syntax
for (i in nrow(dat)) {
thresholds <- add_header_row(thresholds, values = dat[[2]][[i]], colwidths = dat[[3]][[i]])
}
For some reason it only adds one row (while it allows for more headers to be added).
Here's a solution that is perhaps way too overkill, but seems to do what you're looking for:
library(tidyverse)
library(flextable)
dat <- structure(list(rn = c("type_A", "type_B", "type_C"
), freq = list(c(0, 0, 0, 5, 7, 16, 28), c(2, 1, 0, 5, 0, 8),
c(0, 0, 3, 5, 12, 53, 73)), colspan = list(c(1, 2, 1, 1, 1, 3, 1), c(1, 2, 1, 1, 4, 1), c(1, 1, 1, 1, 1, 4, 1
))), row.names = c(NA, 3L), class = "data.frame")
# The thresholds as in the picture
thresholds <- data.frame(c("Lower threshold","Upper threshold"), c(0,25), c(25,50), c(50,100), c(100,250), c(250,500),c(500,1000),c(1000,1500),c(1500,3000),c(3000, "Infinity"), c("", "SUM"))
names(thresholds) <- c("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Eleven")
out <- map(1:nrow(dat), function(index){
out <- data.frame("freq" = dat$freq[[index]],
"span" = dat$colspan[[index]]) %>%
tidyr::uncount(span, .id = 'span') %>%
mutate(freq = ifelse(span>1, NA, freq)) %>%
t %>%
as.data.frame() %>%
mutate(rn = dat$rn[[index]],
across(everything(), ~as.character(.))) %>%
select(rn, everything()) %>%
set_names(nm = names(thresholds)) %>%
slice(1)
return(out)
})
combined <- thresholds %>%
mutate(across(everything(), ~as.character(.))) %>%
bind_rows(out)
spans <- map(1:length(dat$colspan), function(index){
spans <- dat$colspan[[index]] %>%
as_tibble() %>%
mutate(idx = row_number()) %>%
tidyr::uncount(value, .remove = F) %>%
group_by(idx) %>%
mutate(pos = 1:n(),
value = ifelse(pos != 1, 0, value)) %>%
ungroup() %>%
select(value) %>%
t
return(append(1, spans))
})
myft <- flextable(combined) %>%
theme_box()
myft$body$spans$rows[3:nrow(myft$body$spans$rows),] <- matrix(unlist(spans), ncol = ncol(combined), byrow = TRUE)
myft
Created on 2022-04-29 by the reprex package (v2.0.1)
This makes the table:
I don't think you can pass colspan options here without quite a bit of hacking. If at all possible, I would suggest adding the information which cells need to be combined manually. This is the only option, as far as I know, in flextable:
library(flextable)
library(tidyverse)
# clean up the object
dat_clean <- dat %>%
mutate(freq = map2(freq, colspan, ~rep(.x, .y))) %>%
select(-colspan) %>%
unnest(freq) %>%
group_by(rn) %>%
mutate(col = paste0("col_", row_number())) %>%
pivot_wider(names_from = col, values_from = freq)
flextable(dat_clean) %>%
merge_at(i = 1, j = 3:4, part = "body") %>%
merge_at(i = 1, j = 7:9, part = "body") %>%
border_inner(part="all", border = fp_border_default()) %>%
align(align = "center", part = "all")
Created on 2022-04-25 by the reprex package (v2.0.1)
It is a bit tricky to merge those two tables. This is the closest I came to reproduce your desired table. First I created your data in a suitable way:
thresholds <- data.frame(c("Lower threshold", "Upper threshold", "type_A", "type_B", "type_C"),
c(0,25, 0, 2, 0),
c(25,50, 0, 1, 0),
c(50,100, NA, NA,3),
c(100,250,0,0,5),
c(250,5005,5,5,12),
c(500,1000,7,0,53),
c(1000,1500,16,NA,NA),
c(1500,3000,NA,NA,NA),
c(3000, "Infinity",NA,NA,NA),
c("SUM", "SUM", 28,8,73))
names(thresholds) <- c("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Eleven")
Using the officer package you can give the horizontal and vertical lines different colors you want. Using the merge_at function you can merge certain cells. With the border_inner function you get borders in the table. You can use the following code:
library(officer)
std_border = fp_border(color="gray")
library(flextable)
library(dplyr)
thresholds %>%
flextable() %>%
merge_at(i = 3, j = 3:4, part = "body") %>%
merge_at(i = 4, j = 3:4, part = "body") %>%
merge_at(i = 3, j = 8:10, part = "body") %>%
merge_at(i = 4, j = 7:10, part = "body") %>%
merge_at(i = 5, j = 7:10, part = "body") %>%
border_inner(border = std_border) %>%
align(align = "left", part = "all")
Output:
For instance, I want to get a table with the average of "value" for 1 day (already calculated), the last 2 days, the last 3 days, etc. and up to the last 10 days.
df<- data.frame(day = seq(as.Date("1910/1/1"), as.Date("1910/1/10"), "days"), value = c(1, 4, 7, 33, 2, 6, 9, 88, 5, 6))
Any hints?
df<- data.frame(day = seq(as.Date("1910/1/1"), as.Date("1910/1/10"), "days"), value = c(1, 4, 7, 33, 2, 6, 9, 88, 5, 6))
library(tidyverse)
library(slider)
df <- 1:9 %>%
map(
~slide_dbl(df$value, ~mean(.x), .before = .x, .complete = F)
) %>%
bind_cols() %>%
bind_cols(df, .) %>%
set_names(c("day", "value", paste0("value", 2:10)))
Here is my initial data set:
data_x <- tribble(
~price, ~id, ~cost, ~revenue,
1, 10, 0.20, 0,
2, 20, 0.30, 60,
3, 20, 0.30, 0,
4, 10, 0.20, 100,
5, 30, 0.10, 40,
6, 10, 0.20, 0,
1, 20, 0.30, 80,
2 , 10, 0.20, 0,
3, 30, 0.10, 20,
3, 20, 0.30, 40,
)
Then, I have a new variable that is zet:
data_y <- data_x %>%
mutate(zet = cost/revenue) %>%
mutate_if(is.numeric, list(~na_if(., Inf))) %>%
mutate_all(funs(replace_na(.,0)))
Now, I plot the price distribution while showing the zet distribution, as well. Here is my desired plot:
To do this, I first wanted to see price and zet distribution even they are not percentage now.
price_dist <- data_y %>%
group_by(priceseg = cut(as.numeric(price), c(0, 1, 3, 5, 6))) %>%
summarise(price_n = n_distinct(price)) %>%
pivot_wider(names_from = priceseg, values_from = price_n)
zet_dist <- data_y %>%
group_by(priceseg = cut(as.numeric(price), c(0, 1, 3, 5, 6))) %>%
summarise(zet_n = n_distinct(zet)) %>%
pivot_wider(names_from = priceseg, values_from = zet_n)
I would be grateful if you could help me to plot my desired chart.
d <- data_y %>%
group_by(priceseg = cut(as.numeric(price), c(0, 1, 3, 5, 6))) %>%
summarise(price_n = n_distinct(price),
zet_n = n_distinct(zet)) %>%
mutate(price_n = 100 * prop.table(price_n),
zet_n2 = 100 * prop.table(zet_n))
ggplot(d) +
geom_col(aes(x = priceseg, y = price_n)) +
geom_line(data = d, mapping = aes(x = priceseg, y = zet_n2, group = 1)) +
geom_label(data = d, mapping = aes(x = priceseg, y = zet_n2, label = zet_n), nudge_y = 5)
Using big data sets, this code is taking a really long time to process. Does anyone have any simpler ways of running it?
Ran this code, locked up my machine for a while
SID_Scores <- filtered %>%
group_by(SalesPerson_SID) %>%
summarise(
Brand_Advocacy = mean(Q1, na.rm = TRUE),
Vehicle_Satisfaction = mean(Q2, na.rm = TRUE),
Dealer_Satisfaction = mean(Q3, na.rm = TRUE),
Sales_Advocacy = mean(Q6N_srvsls_Recommend_10Pt, na.rm = TRUE),
Overall_SalesCon = mean(Q5N1_ovrsls, na.rm = TRUE),
Understanding_Needs = mean(Q7N1_slsneeds, na.rm = TRUE),
Product_Features = mean(Q7N2_slsfeat, na.rm = TRUE),
Professional_Court = mean(Q7N3_slsprof, na.rm = TRUE),
Feel_Valued = mean(SlsValued, na.rm = TRUE),
Trustworthy = mean(SlsTrustworthy, na.rm = TRUE),
Financial_Arrang = mean(Q5N2_ovrfin, na.rm = TRUE),
Financial_Agreement = mean(Q8N2_finease, na.rm = TRUE),
Respect_Time = mean(Q8N3_fintime, na.rm = TRUE),
Honesty = mean(Q8N4_finhon, na.rm = TRUE),
Delivery = mean(Q5N3_ovrdlv, na.rm = TRUE),
U_Pairing = (sum(filtered$Q_UCPairing == '1', na.rm = TRUE)) / (
sum(filtered$Q_UCPairing == '1', na.rm = TRUE) +
sum(filtered$Q_UCPairing == '2', na.rm = TRUE)
),
U_Demonstrate = (sum(filtered$Q_UCDemonstrate == '1', na.rm = TRUE)) /
(
sum(filtered$Q_UCDemonstrate == '1', na.rm = TRUE) +
sum(filtered$Q_UCDemonstrate == '2', na.rm = TRUE)
),
U_FreeTrials = (sum(filtered$Q_UCFreeTrials == '1', na.rm = TRUE)) /
(
sum(filtered$Q_UCFreeTrials == '1', na.rm = TRUE) +
sum(filtered$Q_UCFreeTrials == '2', na.rm = TRUE)
),
U_Presets = (sum(filtered$Q_UCRadioPreset == '1', na.rm = TRUE)) /
(
sum(filtered$Q_UCRadioPreset == '1', na.rm = TRUE) +
sum(filtered$Q_UCRadioPreset == '2', na.rm = TRUE)
)
) %>%
group_by(SalesPerson_SID)
This has been running for several hours now. Filtered has 540000 rows with 35 variables
Here is the code to reproduce some sample data:
structure(list(EventType = c("001", "001", "001", "001", "001",
"001"), `Survey Type` = c("Sales", "Sales", "Sales", "Sales",
"Sales", "Sales"), ModelYear = c(2018, 2019, 2018, 2018, 2018,
2018), PurchaseDate = c(20181209, 20181216, 20181209, 20181215,
20181218, 20181218), `ZoneCode (BC)` = c("32", "71", "71", "51",
"63", "74"), SalesDistrict = c("G", "D", "G", "C", "T", "G"),
SalesGroupSize = c("E", "E", "B", "D", "D", "B"), DealerCode = c("60698",
"45622", "69319", "36277", "44107", "26922"), Q1 = c(9, 8,
10, 10, 10, 9), Q2 = c(9, 10, 10, 10, 10, 9), Q3 = c(8, 10,
10, 10, 10, 9), Q6N_srvsls_Recommend_10Pt = c(9, 10, 10,
10, 10, 9), Q5N1_ovrsls = c(8, 10, 10, 10, 10, 8), Q5N2_ovrfin = c(9,
10, 10, 10, 10, 7), Q5N3_ovrdlv = c(8, NA, 10, 10, 10, 6),
Q5N4_srvsls_facility = c(9, 10, 10, 10, 10, 10), Q7N1_slsneeds = c(9,
10, 10, 10, 10, 9), Q7N2_slsfeat = c(9, 10, 10, 10, 10, 9
), Q7N3_slsprof = c(10, 10, 10, 10, 10, 9), Q8N1_finneg = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Q8N2_finease = c(9,
10, 10, 9, 10, 7), Q8N3_fintime = c(9, 10, 10, 10, 10, 10
), Q8N4_finhon = c(9, 10, 10, 10, 10, 9), Q9 = c(0, 0, 0,
0, 0, 0), SlsValued = c(9, 10, 10, 10, 10, 8), SlsTrustworthy = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), SlsPaperwork = c(NA,
3, 2, 2, 2, NA), `SlsF&ITransaction` = c(3, 2, 2, 3, 1, 4
), SalesPerson_SID = c("S39547M", "S56830O", "S35478Q", "S61788P",
"S35680B", "S75254K"), Q_UCPairing = c(1, 1, 1, 1, 1, 1),
Q_UCDemonstrate = c(1, 1, 1, 1, NA, 1), Q_UCFreeTrials = c(1,
1, 1, 1, 1, 1), Q_UCRadioPreset = c(1, 1, 1, 2, 1, 1)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -6L), .Names = c("EventType",
"Survey Type", "ModelYear", "PurchaseDate", "ZoneCode (BC)",
"SalesDistrict", "SalesGroupSize", "DealerCode", "Q1", "Q2",
"Q3", "Q6N_srvsls_Recommend_10Pt", "Q5N1_ovrsls", "Q5N2_ovrfin",
"Q5N3_ovrdlv", "Q5N4_srvsls_facility", "Q7N1_slsneeds", "Q7N2_slsfeat",
"Q7N3_slsprof", "Q8N1_finneg", "Q8N2_finease", "Q8N3_fintime",
"Q8N4_finhon", "Q9", "SlsValued", "SlsTrustworthy", "SlsPaperwork",
"SlsF&ITransaction", "SalesPerson_SID", "Q_UCPairing", "Q_UCDemonstrate",
"Q_UCFreeTrials", "Q_UCRadioPreset"))