In my plot below, d_math and d_hyp are each {0,1} variables. Given this fact, in my plot below, I was wondering if we can combine the two plots into one, just like in the desired plot further below?
ps. I'm open to any R packages.
multivariate <- read.csv('https://raw.githubusercontent.com/hkil/m/master/bv.csv')
library(nlme)
library(effects) # for plot
m2 <- lme(var ~ 0 + d_math + d_hyp + d_math:I(grade-2) + d_hyp:I(grade-2),
random = ~ 0 + d_math + d_hyp + d_math:I(grade-2) + d_hyp:I(grade-2) | id, data = multivariate,
na.action = na.omit, weights = varIdent(c(hyp=.3), form = ~1|grp),
control = lmeControl(maxIter = 200, msMaxIter = 200, niterEM = 50,
msMaxEval = 400))
plot(allEffects(m2), multiline = TRUE, x.var="grade")
Desired:
We could use tidyverse to create a single plot. Loop over the list of allEffects output with imap, convert to tibble, select the columns needed, row bind the list elements to single dataset (_dfr), unite two columns to a single, and use ggplot for plotting
library(dplyr)
library(tidyr)
library(purrr)
library(ggplot2)
imap_dfr(allEffects(m2), ~ as_tibble(.x) %>%
mutate(dname = grep("d_", names(.), value = TRUE)) %>%
select(dname, dvalue = starts_with('d_'), grade, fit) %>%
mutate(grp = .y)) %>%
unite(dname, dname, dvalue, sep=" = ") %>%
ggplot(aes(x = grade, y = fit, color = dname)) +
geom_line() +
theme_bw() #+
# facet_wrap(~ grp)
-output
If we want the labels at the end of line, use directlabels
library(directlabels)
imap_dfr(allEffects(m2), ~ as_tibble(.x) %>%
mutate(dname = grep("d_", names(.), value = TRUE)) %>%
select(dname, dvalue = starts_with('d_'), grade, fit) %>%
mutate(grp = .y)) %>%
unite(dname, dname, dvalue, sep=" = ") %>%
ggplot(aes(x = grade, y = fit, group = dname, color = dname)) +
geom_line() +
theme_bw() +
scale_colour_discrete(guide = 'none') +
geom_dl(aes(label = dname), method="last.qp", cex = 0.8)
Also, this can be done for each 'dvalue' as a facet
imap_dfr(allEffects(m2), ~ as_tibble(.x) %>%
mutate(dname = grep("d_", names(.), value = TRUE)) %>%
select(dname, dvalue = starts_with('d_'), grade, fit) %>%
mutate(grp = .y)) %>%
unite(dname, dname, dvalue, sep=" = ", remove = FALSE) %>%
ggplot(aes(x = grade, y = fit, group = dname, color = dname)) +
geom_line() +
theme_bw() +
scale_colour_discrete(guide = 'none') +
geom_dl(aes(label = dname), method="last.qp", cex = 0.8) +
facet_wrap(~ dvalue)
Or if we need only a specific level, then filter
imap_dfr(allEffects(m2), ~ as_tibble(.x) %>%
mutate(dname = grep("d_", names(.), value = TRUE)) %>%
select(dname, dvalue = starts_with('d_'), grade, fit) %>%
mutate(grp = .y)) %>%
unite(dname, dname, dvalue, sep=" = ") %>%
filter(dname %in% c("d_hyp = 1", "d_math = 1")) %>%
ggplot(., aes(x = grade, y = fit, colour = dname, group = dname)) +
geom_line() +
scale_colour_discrete(guide = 'none') +
geom_dl(aes(label = dname), method="last.qp", cex = 0.6) +
theme_bw()
You could do it like this with lattice and a bit more brute force than #akrun's approach:
e <- allEffects(m2)
f1 <- matrix(e[[1]]$fit, ncol=5) # math
f2 <- matrix(e[[2]]$fit, ncol=5) # hyp
dat = data.frame(
fit = c(f1[5,], f2[5,]),
grade = rep(c(2,4,5,6,8), 2),
variable = factor(rep(1:2, each=5),
labels=c("Math=1", "Hyp=1"))
)
xyplot(fit ~ grade, data=dat, group=variable, type="l",
auto.key=list(space="top", lines=TRUE,points=FALSE))
Related
I would like to make a simple flow graph.
Here is my code:
## Data
x = tibble(qms = c("FLOW", "FLOW"),
move1 = c("Birth", "Birth"),
move2 = c("Direct", NA),
freq = c(100, 50))
## Graph
x %>%
mutate(id = qms) %>%
to_lodes_form(axis = 2:3, id = id) %>%
na.omit() %>%
ggplot(aes(x = x, stratum = stratum, alluvium = id,
y = freq, label = stratum)) +
scale_x_discrete(expand = c(.1, .1)) +
geom_flow(aes(fill = qms),stat = "alluvium") +
geom_stratum(aes(fill = stratum), show.legend=FALSE) +
geom_text(stat = "stratum", size = 3)
This is the outcome:
My desired outcome is that:
How can I express the decreasing pattern with the missing value?
By slightly reshaping your data you can get what you want. I think the key is to map the alluvium to something fixed like 1 so that it will be a single flow, and mapping stratum to the same variable as x.
library(tidyverse)
library(ggalluvial)
x <- tibble(x = c("Birth", "Direct"),
y = c(100, 50))
x %>%
ggplot(aes(x, y, alluvium = 1, stratum = x)) +
geom_alluvium() +
geom_stratum()
Created on 2022-11-15 with reprex v2.0.2
There's a given dataframe
df <- data.frame("V1" = c(0,0,0,0,0,2,2,2,2,2,3,3,3),
"V2" = c(9,9,9,0,0,2,2,2,0,0,3,0,0))
and I would like to create a bar plot out of it, where each value has a specific colour. With the great help of one of users we managed to create code
p <- df %>%
mutate(index = 1) %>%
pivot_longer(cols = -index) %>%
mutate(color = case_when(value == 9 ~ 'white',
value == 0 ~ 'darkgreen',
value == 1 ~ 'blue',
value == 2 ~ 'red',
value == 3 ~ 'darkorange')) %>%
ggplot(aes(x = index, y = name, fill = color)) +
geom_col(width = 0.3) +
scale_fill_identity(guide = 'legend') +
theme_classic() +
scale_x_continuous(expand = c(0,0), breaks = pretty_breaks(2))
vec_colors <- df %>%
mutate(index = 1) %>%
pivot_longer(cols = -index) %>%
mutate(color = case_when(value == 9 ~ 'white',
value == 0 ~ 'darkgreen',
value == 1 ~ 'blue',
value == 2 ~ 'red',
value == 3 ~ 'darkorange')) %>%
arrange(name) %>%
pull(color)
q <- ggplot_build(p)
q$data[[1]] <- q$data[[1]] %>%
group_by(y) %>%
arrange(x, .by_group = TRUE)
q$data[[1]]$fill <- vec_colors
q <- ggplot_gtable(q)
plot(q)
that results in such a plot
Question: how do I create a legend that looks like this?
Or like this?
Well, first your approach to achieve your desired result is quite complicated. Instead you could simplify using a named color vector and switching to scale_fill_manual. Doing so will give you atomatically a legend similar to your desired result which I tweak a bit using the breaks argument. Also, instead of geom_col I would go for geom_tile. To this end use the row number as the index.
library(dplyr)
library(tidyr)
library(ggplot2)
df_long <- df %>%
mutate(index = row_number()) %>%
pivot_longer(cols = -index)
cols <- c( 'white', 'darkgreen', 'blue', 'red', 'darkorange')
names(cols) <- c(9, 0, 1, 2, 3)
ggplot(df_long, aes(x = index, y = name, fill = factor(value))) +
geom_tile(height = .3) +
scale_fill_manual(values = cols, limits = force, breaks = c(0, 3, 2), name = "State") +
theme_classic() +
scale_x_continuous(expand = c(0,0), breaks = scales::pretty_breaks(2))
I am new to crosstalk & trying to make rmarkdown file more interactive by using on bar+line plot but it is not giving line on the plot and also gets weird when I change country.
library(tidyverse)
library(plotly)
library(crosstalk)
library(glue)
library(scales)
library(tidytext)
load data:
file_url <- "https://raw.githubusercontent.com/johnsnow09/covid19-df_stack-code/main/test_crosswalk.csv"
test_df <- read.csv(url(file_url))
Country_selected = c("Brazil")
selected_case_type = c("Confirmed_daily")
trend_sd <- test_df %>%
filter(Daily_Cases_type %in% selected_case_type
# Country.Region %in% Country_selected,
) %>%
select(Country.Region, date, Cases_count)%>%
arrange(date) %>%
group_by(Country.Region) %>%
mutate(new_avg = cumsum(Cases_count)/ seq_len(length(Cases_count))) %>%
ungroup() %>%
SharedData$new()
bscols(widths = c(9, 3),
list(
filter_select(id = "country", label = "Country:", sharedData = trend_sd, group = ~ Country.Region),
ggplotly(ggplot(data = trend_sd) +
geom_col(aes(x = date, y = Cases_count), fill = "turquoise", alpha = .3) +
geom_point(aes(x = date, y = new_avg), col = "tomato") +
geom_line(aes(x = date, y = new_avg), col = "tomato", size = .9, alpha = .3) +
scale_y_continuous(labels = comma) +
# expand_limits(y = 100000) +
labs(title = glue("{Country_selected}'s {selected_case_type} Cases {date_from} onwards"),
caption = "Data source: covid19.analytics")
))
)
This doesn't give correct line plot & even when I change country to some other then bars gets distorted.
Code & Plot Result below without crosstalk & plotly:
Country_selected = c("India") # can be selective
selected_case_type = c("Confirmed_daily")
test_df %>%
filter(Daily_Cases_type %in% selected_case_type,
Country.Region %in% Country_selected,
) %>%
select(Country.Region, date, Cases_count)%>%
arrange(date) %>%
group_by(Country.Region) %>%
mutate(new_avg = cumsum(Cases_count)/ seq_len(length(Cases_count))) %>%
ungroup() %>%
ggplot() +
geom_col(aes(x = date, y = Cases_count), fill = "turquoise", alpha = .3) +
geom_point(aes(x = date, y = new_avg), col = "tomato") +
geom_line(aes(x = date, y = new_avg), col = "tomato", size = .9, alpha = .3) +
scale_y_continuous(labels = comma) +
labs(title = glue("{Country_selected}'s {selected_case_type} Cases {date_from} onwards"),
subtitle = "With Average Daily Cases Trend line",
caption = "Data source: covid19.analytics")
I have some troubles with my code. I'm very very beginner in R, so I would like some help. I have a dataframe and I need to make an hist chart and then highlight some points. But I cannot understand how to find those points in my dataset. Here is and example of what I have.
x <- c("a","b","c","d","f","g","h","i","j","k")
y <- c(197421,77506,130474,18365,30470,22518,70183,15378,29747,11148)
z <- data.frame(x,y)
hist(z$y)
For example, how can I find in the hist where is the "a" and "h" value placed? and in a barplot? I tried the function points, but I cannot find the coordinates. Please let me know how could I make that . Thanks in advance.
Here is a way with dplyr and ggplot2. The approach is to cut the y variable into bins and then use summarise to create the counts and the labels.
library(dplyr)
library(ggplot2)
z %>%
mutate(bins = cut(y, seq(0, 200000, 50000))) %>%
group_by(bins) %>%
summarise(xes = paste0(x, collapse = ", "),
count = n()) %>%
ggplot() +
geom_bar(aes(x = bins, y = count), stat = "identity", color = "black", fill = "grey") +
geom_text(aes(x = bins, y = count + 0.5, label = xes)) +
xlab("y")
Here is a more complicated way that makes a plot that looks more like what hist() produces.
z2 <- z %>%
mutate(bins = cut(y, seq(0, 200000, 50000))) %>%
group_by(bins) %>%
summarise(xes = paste0(x, collapse = ", "),
count = n()) %>%
separate(bins, into = c("start", "end"), sep = ",") %>%
mutate(across(start:end, ~as.numeric(str_remove(., "\\(|\\]"))))
ggplot() +
geom_histogram(data = z, aes(x = y), breaks = seq(0, 200000, 50000),
color = "black", fill = "grey") +
geom_text(data = z2, aes(x = (start + end) / 2, y = count + 0.5, label = xes))
I've spent the past few days looking through so many forums and sites, so I hope you can help.
You can find the data I've been using here, as well as the three model predictions.
I'm predicting subjective well-being (i.e. positive affect, negative affect, and life satisfaction) from last night's person-centered sleep satisfaction. I came up with three models that I now want to plot next to each other. The problem is that facet_wrap puts the models next to each other alphabetically and not how I want them (positive affect, negative affect, and life satisfaction).
You can view my current graph here
This is my code to get the graph going:
library("afex")
library("tidyverse")
library("tidylog")
theme_set(theme_bw(base_size = 15))
library("sjPlot")
d3 <- read.csv("d3.csv")
d3 <- d3 %>%
group_by(ID) %>%
mutate(SD_person_centred = sleepDur - mean(sleepDur, na.rm = TRUE)) %>%
mutate(sleep_satisfaction_person_centred = Sleep_quality_open - mean(Sleep_quality_open, na.rm = TRUE)) %>%
mutate(MS_person_centred = mid_sleep_modified - mean(mid_sleep_modified, na.rm = TRUE)) %>%
mutate(MS_person_freeday_centred = abs(mid_sleep_modified -
mean(mid_sleep_modified[Routine_work_day_open == "No"], na.rm = TRUE))) %>%
mutate(MS_person_mctq_centred = abs(mid_sleep_modified - MCTQ_MSF_number)) %>%
mutate(sleep_onset_person_centred = Sleep_Onset_open - mean(Sleep_Onset_open, na.rm = TRUE)) %>%
mutate(sleep_efficiency_person_centred = SleepEfficiency_act - mean(SleepEfficiency_act, na.rm = TRUE)) %>%
ungroup
m_p_sls_1 <- readRDS("m_p_sls_1.rds")
m_n_sls_1 <- readRDS("m_n_sls_1.rds")
m_s_sls_1 <- readRDS("m_s_sls_1.rds")
tmp <- get_model_data(m_p_sls_1$full_model, type = "pred", terms = "sleep_satisfaction_person_centred")
tmp$DV <- "positive_affect"
tmp2 <- get_model_data(m_n_sls_1$full_model, type = "pred", terms = "sleep_satisfaction_person_centred")
tmp2$DV <- "negative_affect"
tmp3 <- get_model_data(m_s_sls_1$full_model, type = "pred", terms = "sleep_satisfaction_person_centred")
tmp3$DV <- "life_satisfaction"
tmp <- bind_rows(tmp, tmp2, tmp3)
tmp
tmp$DV
Here I change tmp$DV into a factor as this was the solution I found online. However, this did not change anything:
tmp$DV <- factor(tmp$DV, levels=c("positive_affect","negative_affect","life_satisfaction"))
levels(tmp$DV)
This is my code for the graph:
variable_names <- list(
"positive_affect" = "positive affect" ,
"negative_affect" = "negative affect",
"life_satisfaction" = "life satisfaction"
)
variable_labeller <- function(variable,value){
return(variable_names[value])
}
d3 %>%
pivot_longer(cols="positive_affect":"life_satisfaction", names_to = "DV", values_to = "Score") %>%
ggplot(aes(x = sleep_satisfaction_person_centred, y = Score)) +
geom_ribbon(data = tmp, aes(x = x, ymin = conf.low, ymax = conf.high, y = predicted),
fill = "lightgrey") +
geom_line(data = tmp, aes(x = x, y = predicted, group = 1)) +
geom_point(alpha = 0.2) +
facet_wrap(~DV, scales = "free_y",labeller=variable_labeller) +
labs(y = "Score",
x = "Sleep satisfaction person centered")
When I give the factor of tmp$DV a different name, i.e. tmp$facet and add this to my code, I do get the right order, but the scales are not free on the y-axis anymore. Please have a look here.
tmp$facet <- factor(tmp$DV, levels=c("positive_affect", "negative_affect", "life_satisfaction"))
d3 %>%
pivot_longer(cols="positive_affect":"life_satisfaction", names_to = "DV", values_to = "Score") %>%
ggplot(aes(x = sleep_satisfaction_person_centred, y = Score)) +
geom_ribbon(data = tmp, aes(x = x, ymin = conf.low, ymax = conf.high, y = predicted),
fill = "lightgrey") +
geom_line(data = tmp, aes(x = x, y = predicted, group = 1)) +
geom_point(alpha = 0.2) +
facet_wrap(~facet, scales = "free_y",labeller=variable_labeller) +
labs(y = "Score",
x = "Sleep satisfaction person centered")
When I change pivot_longer to facet in the first row, I get the same graph as the one before.
Sorry for the long post, but I tried to be as clear as possible. Please let me know if I wasn't.
I'd appreciate any kind of hints. Thanks a lot for your time.
All the best,
Anita
Just got the answer from my colleague Henrik Singmann, in case anybody was wondering:
d3 %>%
pivot_longer(cols="positive_affect":"life_satisfaction", names_to = "DV", values_to = "Score") %>%
mutate(DV = factor(DV, levels=c("positive_affect","negative_affect","life_satisfaction"))) %>%
ggplot(aes(x = sleep_satisfaction_person_centred, y = Score)) +
geom_ribbon(data = tmp, aes(x = x, ymin = conf.low, ymax = conf.high, y = predicted),
fill = "lightgrey") +
geom_line(data = tmp, aes(x = x, y = predicted, group = 1)) +
geom_point(alpha = 0.2) +
facet_wrap(~DV, scales = "free_y",labeller=variable_labeller) +
labs(y = "Score",
x = "Sleep satisfaction person centered")
So the factor needs to be defined in d3 before being handed over to ggplot.