Indicating An Event on a Plot - r

I am studying patient fluid intake and frequency of urination.
I'm collecting volume and time of fluids drank and time of urination.
I want to indicate on a graph that has liquid intake when urination occurs.
Here's my data and code so far ...
time_log <- c("01:10", "05:50", "06:00","06:15", "06:25", "09:35", "10:00", "12:40",
"14:00")
time_log <- paste("04/04/2019", time_log, sep=" ")
time_log <- strptime(time_log, format = "%d/%m/%Y %H:%M")
time_view <- format(time_log, "%H:%M")
event <- c("u", "u", "T", "T", "u", "u", "T","T","u")
Volume <- c(NA, NA, 0.25, 0.25, NA, NA, 0.125, 0.625, NA)
patient_data <- data.frame(time_log, time_view, event, Volume)
total_liquids <- sum(patient_data$Volume, na.rm=TRUE)
plot(patient_data$time_log, patient_data$Volume,
xlim = c(as.POSIXct("2019-04-04 00:00:00"),as.POSIXct("2019-04-04 24:00:00")),
xlab="Hours of Study", ylab = "Volume of Liquid Drank /L",
main = paste("Total Liquids Drank = ", total_liquids, " L"))
This is related to the following question
Time Series Data - How to which was poorly received by the Stack Overflow community.

Here's a way using ggplot2 and dashed vertical lines. When adding the geom_vline, we subset the data for just the urination events (i.e., event == "u").
library(ggplot2)
ggplot(patient_data, aes(x = time_log, y = Volume)) +
geom_point() +
geom_vline(
data = subset(patient_data, event == "u"),
aes(xintercept = time_log),
linetype = 2
) +
labs(
title = paste("Total Liques Drank = ", total_liquids, " L"),
subtitle = "Dashed line reprents urination",
x = "Hours of Study",
y = "Volume of Liquid Drank (L)"
) +
scale_y_continuous(limits = c(0, NA)) # just so we don't start the y-axis at 0.1 or something misleading.

Related

speech-gaze activity plot in ggplot2

I have data with Utterances by speakers in conversation as well as their gazes to one another. The speakers' gazes are in columns A_aoi, B_aoi, and C_aoi, the gaze durations are in A_aoi_dur, B_aoi_dur, and C_aoi_dur. Here's a reproducible snippet of the data:
df0 <- structure(list(Line = c(105L, 106L, 107L, 109L, 110L, 111L, 112L,
113L, 114L, 115L, 116L), Speaker = c("ID01.A", NA, "ID01.A",
NA, "ID01.B", NA, "ID01.A", NA, "ID01.A", NA, "ID01.C"), Utterance = c("so you've ↑obviously↑ thought about it obviously: (.) have made a decision (.) I'm !head!ing in this door (.) one of the cleaning ladies at the UB !grabb!ed my elbow",
"(0.662)", "and said (.) ~no no no !this! is the !womens'! bathroom~=",
"(0.015)", "=((v: gasps))=", "(0.166)", "=NOW", "(0.622)", "!how! this always plays out ",
"(0.726)", "[when was] that¿="), UttStart = c(163898L, 172500L,
173162L, 176100L, 176115L, 176800L, 176966L, 177372L, 177994L,
179328L, 180054L), UttEnd = c(172500, 173162, 176100, 176115,
176800, 176966, 177372, 177994, 179328, 180054, 180668), UttDur = c(8602,
662, 2938, 15, 685, 166, 406, 622, 1334, 726, 614), A_aoi = c("*B*C*B*C*B*C*B*C*B*C",
"C*", "*B*C*C", "C", "C*", "*", "*C", "C", "C*B", "B*", "*"),
A_aoi_dur = c("21,516,79,333,200,634,233,651,17,2332,33,400,33,518,17,532,33,1900,119,1",
"414,248", "1124,412,116,533,600,153", "15", "616,69", "166",
"153,253", "622", "204,151,979", "219,507", "614"), B_aoi = c("A*A*A*A*A",
"A", "A", "A", "A", "A", "A", "A*", "*A*A", "A*A", "A*A"),
B_aoi_dur = c("475,130,567,137,1983,313,787,1400,2810", "662",
"2938", "15", "685", "166", "406", "398,224", "76,136,284,838",
"108,571,47", "116,270,228"), C_aoi = c("A", "A", "A*A*A",
"A", "A", "A", "A", "A*A", "A", "A*A", "A"), C_aoi_dur = c("8602",
"662", "1058,123,1300,144,313", "15", "685", "166", "406",
"264,351,7", "1334", "125,323,278", "614")), row.names = c(NA,
-11L), class = c("tbl_df", "tbl", "data.frame"))
EDIT: new test data with temporally overlapping Utterances:
df0 <- structure(list(Line = 137:145,
Speaker = c("ID01.A", "ID01.A-Q", NA, "ID01.A", "ID01.A-Q", "ID01.A-Q", "ID01.A-Q", "ID01.A-Q",NA),
Utterance = c("↑she gra:bs my elbow (.) I turn to !look! at her↑ and she's like ~this is a (.) womens' bathroom you can't go in there~",
"~this is a (.) womens' bathroom you can't go in there~", "(0.534)",
"and I'm like ~((silent f: blank stare))~ (.) and she didn't, she was just like ~you can't go in~ (.) I'm like ~I'm a !woman!~ she said ~no you're not you can't go in~",
"~((silent f: blank stare))~", "~you can't go in~", "~I'm a !woman!~",
"~no you're not you can't go in~", "(0.487)"),
UttStart = c(208845L, 211450L, 214136L, 214670L, 215409L, 218307L, 219235L, 220076L, 221368L),
UttEnd = c(214136, 214136, 214670, 221368, 217117, 219050, 219885, 221368, 221855),
UttDur = c(5291, 2686, 534, 6698, 1708, 743, 650, 1292, 487),
A_aoi = c("C*B*C*C*B*C*", "C*B*C*", "*B", "B*C*B*C*C*B*B", "C*B", "C*B", "*", "*B","B"),
A_aoi_dur = c("57,445,1100,135,199,333,866,302,832,33,468,521","530,302,832,33,468,521",
"144,390", "377,235,466,399,1268,132,268,132,433,6,716,1412,854","339,399,970", "73,6,664", "650", "438,854", "487"),
B_aoi = c("A*A","A", "A", "A*A*A*A*A*A", "A", "*A*A", "*A", "A*A", "A"),
B_aoi_dur = c("1691,121,3479", "2686", "534", "53,180,3333,134,253,280,203,534,1296,138,294",
"1708", "63,253,280,147", "405,245", "860,138,294", "487"),
C_aoi = c("A", "A", "A", "A*A", "A", "A*", "A", "A", "A"),
C_aoi_dur = c("5291", "2686", "534", "3766,734,2198",
"1708", "129,614", "650", "1292", "487")),
row.names = c(NA, -9L), class = c("tbl_df", "tbl", "data.frame"))
What I'd like to be able to visualize is who is looking at whom and for how long for each Utterance, roughly like in this schematic representation:
What I can do at present is plot the gazes on a minute-by-minute scale, but just the gazes - not the Utterances: Plotting gaze movements by multiple speakers in a single plot. Starting from the data as above, this can be achieved by multiple transformations (shown below) but the resulting plot does not feature the Utterances and it plots the gazes per minute, whereas I need the gazes per Utterance:
I'm fully aware that this is demanding a lot. Help with it is all the more appreciated.
# pivot_longer so that all gazes have their own row:
df0 <- df0 %>%
rename_with(~ str_c(., "_AOI"), ends_with("_aoi")) %>%
pivot_longer(cols = contains("_"),
names_to = c("Gaze_by", ".value"), #
names_pattern = "^(.*)_([^_]+$)"
) %>%
mutate(Gaze_by = sub("^(.).*", "\\1", Gaze_by)) %>%
mutate(AOI = str_replace_all(AOI, "(?<=.)(?=.)", ",")) %>%
separate_rows(c(AOI, dur), sep = ",", convert = TRUE)
# compute starttimes and endtimes for gazes:
df1 <- df0 %>%
group_by(Gaze_by) %>%
mutate(
end = cumsum(dur),
start = end - dur
)
View(df1)
# compute minutes:
df2 <- df1 %>%
mutate(
# which minute does the event start in?
minute_start = as.integer(start/60000),
# which minute does the event end in?
minute_end = as.integer(end/60000),
# does the event straddle a minute mark?
straddler = minute_end > minute_start)
View(df2)
# 1st subset of `df2`:
df2_A1 <- df2 %>%
# filter those rows that contain events straddling minute marks:
filter(straddler=="TRUE") %>%
# reduce the endtime to the exact minute mark:
mutate(end = minute_end*60000)
View(df2_A1)
# 2nd subset of `df2`:
df2_A2 <- df2 %>%
# filter those rows that contain events straddling minute marks:
filter(straddler=="TRUE") %>%
# reduce the starttime to the exact minute mark:
mutate(start = minute_end*60000)
View(df2_A2)
# 3rd subset of `df0`:
df2_A3 <- df2 %>%
# filter those rows that do not contain events straddling minute marks:
filter(!straddler == "TRUE")
View(df2_A3)
# row-bind all three subsets:
df4 <- rbind(df2_A1, df2_A2, df2_A3) %>%
arrange(start) %>%
mutate(
minute = as.integer(start/60000),
# reduce total starttimes to starttimes per minute:
start_pm = start - 60000*minute,
# reduce total endtimes to endtimes per minute:
end_pm = end - 60000*minute)
# plot gaze activity for **ALL** speakers:
df4 %>%
ggplot(aes(x = start_pm,
xend = end_pm,
y = minute + scale(as.numeric(as.factor(Gaze_by))) / 6,
yend = minute + scale(as.numeric(as.factor(Gaze_by))) / 6,
color = AOI)) +
# draw segments for AOI:
geom_segment(size = 2) +
# reverse y-axis scale:
scale_y_reverse(breaks = 0:max(df4$minute),
labels = paste(0:max(df4$minute), "min", " Gaze_by_A\n Gaze_by_B\n Gaze_by_C", sep = " "),
name = NULL) +
# define custom colors:
scale_colour_manual(values = c("*" = "lemonchiffon",
"A" = "darkorange",
"B" = "lawngreen",
"C" = "slateblue1")) +
# plot title:
labs(title = "Gaze activity") +
theme(axis.title.x.bottom = element_blank())
Here is a solution that gets close to what you are looking for, making use of facets. It also uses forcats::fct_reorder and stringr::str_wrap (which are both part of the tidyverse).
This also wraps any long utterances and keeps the x-scale the same for all facets, rather than allowing them to stretch to fill the width.
df4 %>%
mutate(#add text for y axis labels
Gaze_by = paste0("Gaze_by_", Gaze_by),
#reorder facet panels, add speaker at start, and wrap to 120 characters
Utterance = fct_reorder(str_wrap(paste0(substr(Speaker, 6, 6), ": ",
Utterance),
120),
start_pm),
#set a dummy end point for each utterance based on the longest one
max_x = UttStart - min(UttStart) + max(UttDur)) %>%
ggplot(aes(x = start_pm, xend = end_pm,
y = Gaze_by, yend = Gaze_by, #as discrete variable
color = AOI)) +
geom_segment(size = 3) +
geom_point(aes(x = max_x, y = Gaze_by), alpha = 0) + #plot invisible dummy end points
scale_y_discrete(name = NULL, limits = rev) + #rev to get A at the top
facet_wrap(~Utterance, scales = "free_x", ncol = 1) +
scale_colour_manual(values = c("*" = "lemonchiffon",
"A" = "darkorange",
"B" = "lawngreen",
"C" = "slateblue1")) +
labs(title = "Gaze activity") +
theme_minimal() + #removes a lot of lines etc
theme(strip.text = element_text(color = "blue", hjust = 0), #facet strip text
strip.background = element_rect(fill = "white", color = "white"),
axis.title.x.bottom = element_blank())
To cut the utterances into 4-second chunks, you can do something like this...
df4 %>% group_by(Utterance) %>%
#work out relative durations from start of utterance and create subutterances
mutate(relStart = start_pm - min(start_pm),
relEnd = end_pm - min(start_pm),
subNo = map2(relStart, relEnd, ~seq(.x %/% 4000, .y %/% 4000, 1))) %>%
unnest(subNo) %>% #expand one row per subutterance
mutate(Utterance = paste0(Utterance, " (#", subNo + 1, ")"), #add sub no
subStart = pmax(4000 * subNo, relStart), #limits on subUtt
subEnd = pmin(4000 * (subNo + 1), relEnd), #limits on subUtt
start_pm = min(start_pm) + subStart, #redefine start
end_pm = min(start_pm) + subEnd) %>% #redefine end
group_by(Utterance) %>% #regroup as Utterance has changed!
mutate(max_x = min(start_pm) + 4000) %>% #define dummy end points
ungroup() %>%
mutate(Gaze_by = paste0("Gaze_by_", Gaze_by),
Utterance = fct_reorder(str_wrap(paste0(substr(Speaker, 6, 6), ": ", Utterance),
120), start_pm)) %>%
ggplot(...) #...as per code above from this point

How to edit the labels of a facet_wrap/grid if there are two variables?

In ggplot I have faceted by two variables (tau and z) but can only change the label of the first:
df<-data.frame(x=runif(1e3),y=runif(1e3),tau=rep(c("A","aBc"),each=500),z=rep(c("DDD","EEE"),each=500))
tauNames <- c(
`A` = "10% load",
`aBc` = "40% load"
)
df%>%
ggplot(aes(x=x,y=y))+
geom_point(alpha=0.4)+
xlab(label = "Time[s]")+
ylab(label = "Dose")+
facet_grid(tau~z,labeller = as_labeller(tauNames))+
ggpubr::theme_pubclean()
As you can see I can change one of the labels but not both. Any thoughts are much appreciated
In the documentation of ?as_labeller you can find in the examples how you get the labels for multiple faceting variables.
library(tidyverse)
df<-data.frame(x=runif(1e3),y=runif(1e3),tau=rep(c("A","aBc"),each=500),z=rep(c("DDD","EEE"),each=500))
tauNames <- c(
`A` = "10% load",
`aBc` = "40% load"
)
df%>%
ggplot(aes(x=x,y=y))+
geom_point(alpha=0.4)+
xlab(label = "Time[s]")+
ylab(label = "Dose")+
facet_grid(tau~z,labeller = labeller(tau = tauNames,
z = c("DDD" = "D", "EEE" = "E")))+
ggpubr::theme_pubclean()

Is it possible to subset facets in a polyfreq in GGplot?

I was wondering if it was possible to use subset on a geom_polyfreq()?
I am running a topic model and in order to report the facets properly i want to remove 4 out of 10 facets.
My code is as follows:
ggplot(data = dat,
aes(x = date,
fill = Topics)) +
geom_freqpoly(binwidth = 3) +
labs(x = "",
y = "Topic Count",
title = "Mentions of Topic On a Monthly Basis")+
scale_x_date(date_breaks = "months", date_labels="%b")+
theme(text = element_text(size=8)) +
theme(axis.text.x = element_text(angle = 45))+
facet_wrap(Topics ~ ., scales = "free")
> ggplot(subset(dat, Topics %in% c(3, 4, 5, 7, 8, 9)),
aes(x = date,
fill = topic)) +
geom_freqpoly(binwidth = 3) +
labs(x = "",
y = "Topic Count",
title = "Mentions of Topic On a Monthly Basis")+
scale_x_date(date_breaks = "months", date_labels="%b")+
theme(text = element_text(size=9)) +
theme(axis.text.x = element_text(angle = 45))+
facet_wrap(Topics ~ ., scales = "free")
However, when I try to subset the data, I get an error that says:
Fejl: Faceting variables must have at least one value
Does anybody know what the issue is?
I hope this makes sense.
The full code is down below.
article.data <- article.data[!is.na(article.data$fulltext), ]
## Get date
article.data$date <- as.Date(article.data$date, "%Y-%m-%d")
#all of 2018
dat <- article.data[article.data$date > as.Date("2018-01-01", "%Y-%m-%d") &
article.data$date < as.Date("2018-12-01", "%Y-%m-%d"), ]
## 'tokenize' fulltext
quanteda_options("language_stemmer" = "danish")
texts <- gsub(":", " ", dat$fulltext, fixed = T)
texts <- tokens(texts, what = "word",
remove_numbers = T,
remove_punct = T,
remove_symbols = T,
remove_separators = T,
remove_hyphens = T,
remove_url = T,
verbose = T)
texts <- tokens_tolower(texts)
texts <- tokens_remove(texts, stopwords("danish"))
texts <- tokens_wordstem(texts)
texts <- tokens_remove(texts, stopwords("danish"))
# get actual dfm from tokens
txt.mat <- dfm(texts)
#remove frequent words with no substance
txt.mat <- txt.mat %>% dfm_remove(c("ad",
"af","aldrig","alene","alle",
"allerede","alligevel","alt",
"altid","anden","andet","andre",
"at","bag","bare", "bedre", "begge","bl.a.",
"blandt", "blev", "blevet", "blive","bliver",
"burde", "bør","ca.", "com", "da",
"dag", "dansk", "danske", "de",
"dem", "den", "denne","dens",
"der","derefter","deres","derfor",
"derfra","deri","dermed","derpå",
"derved","det","dette","dig",
"din","dine","disse","dit",
"dog","du","efter","egen",
"ej","eller","ellers","en",
"end","endnu","ene","eneste","enhver","ens",
"enten","er","et","f.eks.","far","fem",
"fik","fire","flere","flest",
"fleste","for", "foran",
"fordi","forrige","fra", "fx",
"få","får","før","først",
"gennem","gjorde","gjort","god",
"godt","gør","gøre","gørende",
"ham","han","hans","har",
"havde","have","hej","hel",
"heller","helt","hen","hende",
"hendes","henover","her",
"herefter","heri","hermed",
"herpå","hos","hun","hvad",
"hvem","hver","hvilke","hvilken",
"hvilkes","hvis",
"hvor", "hvordan","hvorefter","hvorfor",
"hvorfra","hvorhen","hvori","hvorimod",
"hvornår","hvorved","i", "ifølge", "igen",
"igennem","ikke","imellem","imens",
"imod","ind","indtil","ingen",
"intet","ja","jeg","jer","jeres",
"jo","kan","kom","komme",
"kommer", "kroner", "kun","kunne","lad",
"langs", "lang", "langt", "lav","lave","lavet",
"lidt","lige","ligesom","lille",
"længere","man","mand","mange",
"med","meget","mellem","men", "mener",
"mens","mere","mest","mig",
"min","mindre","mindst","mine",
"mit","mod","må","måske",
"ned","nej","nemlig","ni",
"nogen","nogensinde","noget",
"nogle","nok","nu","ny", "nye",
"nyt","når","nær","næste",
"næsten","og","også","okay",
"om","omkring","op","os",
"otte","over","overalt","pga.", "partier",
"partiets", "partiers", "politiske",
"procent", "på", "ritzau", "samme",
"sammen","se","seks","selv","selvom",
"senere","ser","ses","siden","sig",
"sige", "siger", "sin","sine","sit",
"skal","skulle","som","stadig",
"stor","store","synes","syntes",
"syv","så","sådan","således",
"tag","tage","temmelig","thi",
"ti","tidligere","til","tilbage",
"tit","to","tre","ud","uden",
"udover","under","undtagen","var",
"ved","vi","via","vil","ville", "viser",
"vor","vore","vores","vær","være",
"været","øvrigt","facebook","http", "https",
"www","millioner", "frem", "lars", "lars_løkke",
"rasmussen", "løkke_rasmussen", "statsminister", "politik",
"formand", "partiet", "år", "tid", "and", "fler",
"sid", "regeringen", "giv", "politisk", "folketing", "mer",
"ifølg"))
############################################################
## FEATURE SELECTION
############################################################
# check out top-appearing features in dfm
topfeatures(txt.mat)
# keep features (words) appearing in >2 documents
txt.mat <- dfm_trim(txt.mat, min_termfreq = 4)
# filter out one-character words
txt.mat <- txt.mat[, str_length(colnames(txt.mat)) > 2]
# filter out some html trash features
#txt.mat <- txt.mat[, !grepl("[[:digit:]]+px", colnames(txt.mat))]
#txt.mat <- txt.mat[, !grepl(".", colnames(txt.mat), fixed = T)]
#txt.mat <- txt.mat[, !grepl("_", colnames(txt.mat), fixed = T)]
#txt.mat <- txt.mat[, !grepl("#", colnames(txt.mat), fixed = T)]
#txt.mat <- txt.mat[, !grepl("#", colnames(txt.mat), fixed = T)]
############################################################
## SELECT FEATURES BY TF-IDF
############################################################
# Create tf_idf-weighted dfm
ti <- dfm_tfidf(txt.mat)
# Select from main dfm using its top features
txt.mat <- dfm_keep(txt.mat, names(topfeatures(ti, n = 1000)))
############################################################
## RUN TOPIC MODEL
############################################################
# convert quanteda dfm to tm 'dtm'
dtm <- convert(txt.mat, to = "topicmodels")
# run lda with 8 topics
lda <- LDA(dtm, k = 8)
# review terms by topic
terms(lda, 10)
############################################################
## LOOK FOR 'OPTIMAL' k
############################################################
# randomly sample test data
set.seed(61218)
select <- sample(1:nrow(dtm), size = 100)
test <- dtm[select, ]
train <- dtm[!(1:nrow(dtm) %in% select), ]
n.tops <- 3:14
metrics <- data.frame(topics = n.tops,
perplexity = NA)
for(i in n.tops) { # NB: takes awhile to run
print(i)
est <- LDA(train, k = i)
metrics[(i - 1), "perplexity"] <- perplexity(est, newdata = test)
}
save(metrics, file = "lda_perplexity2018.RData")
qplot(data = metrics, x = topics, y = perplexity, geom = "line",
xlab = "Number of topics",
ylab = "Perplexity on test data") + theme_bw()
#We found that 8 topics was one of those of lowest perplexity but
#also the ones which made the most sense
############################################################
## RERUN WITH BETTER CHOICE OF k
############################################################
# run lda with 10 topics
lda <- LDA(dtm, k = 10)
save(lda, file = "dr_ft_keep2018.RData")
# examine output
terms(lda, 20)
# put topics into original data
dat$topic <- topics(lda)
# add labels
#dat$date <- factor(dat$date,
#levels = 1:12,
#labels = c("januar","februar", "marts","april", "maj", "juni", "juli", "august", "september", "oktober", "november", "decemeber"))
dat$Topics <- factor(dat$topic,
levels = 1:10,
labels = c("Topc 1", "Topic 2", "Integration", "Taxation", "Burka Prohibition",
"Topic 6", "Justice", "Foreign Affairs", "Housing", "Topic 10"))
# frequency
qplot(data = dat, x = Topics,
geom = "bar", xlab = "",
ylab = "Topic Frequency", fill=Topics, main = "Figure 1: Main Topics in 2018 - DR") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90))
#Make visualization showing topics over time
ggplot(data = dat,
aes(x = date,
fill = Topics[1])) +
geom_freqpoly(binwidth = 30) +
facet_wrap(Topics ~ ., scales = "free")+
theme_classic() +
scale_x_date(breaks = as.Date(c( "2018-02-01", "2018-04-01", "2018-06-01", "2018-08-01", "2018-10-01", "2018-12-01", date_labels="%B"))) +
theme(axis.text.x = element_text(angle = 90))
ggplot(data = dat,
aes(x = date,
fill = Topics)) +
geom_freqpoly(binwidth = 3) +
labs(x = "",
y = "Topic Count",
title = "Mentions of Topic On a Monthly Basis")+
scale_x_date(date_breaks = "months", date_labels="%b")+
theme(text = element_text(size=8)) +
theme(axis.text.x = element_text(angle = 45))+
facet_wrap(Topics ~ ., scales = "free")
It's best practice on this forum to make your question reproducible, so that others can try it and test their solutions to confirm they work. It's also good if you can make it minimal, both to respect potential answerers' time and to help clarify your own understanding of the problem.
How to make a great R reproducible example
In this case, the error message suggests that your subsetting is removing all your data, which breaks the faceting. It can't plot any facets if the faceting variable has no values.
It looks like dat$Topics is a factor, but your loop is referring to Topics like they're numeric with Topics %in% c(3, 4, 5, 7, 8, 9). For example, I could define a factor vector with the same levels as your Topics variable:
Topics <- factor(1:10, levels = 1:10,
labels = c("Topc 1", "Topic 2", "Integration", "Taxation", "Burka Prohibition",
"Topic 6", "Justice", "Foreign Affairs", "Housing", "Topic 10"))
Compare the output of these three lines:
Topics %in% c(1, 2)
# [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
as.numeric(Topics) %in% c(1, 2)
# [1] TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
Topics %in% c("Topc 1", "Topic 2")
# [1] TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
In the top case, none of the data matches the test, so using that to subset the data would give you an empty data set and seems like a plausible cause for the error you got.
To refer to the Topics by their underlying level, we can refer to as.numeric(Topics) %in% c(1, 2). If we want to refer to the Topics by their labels, I could use Topics %in% c("Topc 1", "Topic 2").
Since I don't have your data, I can't confirm this exact syntax will work for you, but I hope something along these lines will.
For more on how to work with factors in R, I recommend: https://r4ds.had.co.nz/factors.html

X-axis for times

I am trying to generate a series of plots that show the same patient taking drinks and urinating at different times. Each plot represents a single day. I want to compare the days and hence I need to ensure that all graphs plotted have the same x-axis. My code is below which I cribbed from How to specify the actual x axis values to plot as x axis ticks in R
### Data Input
time_Thurs <- c("01:10", "05:50", "06:00","06:15", "06:25", "09:35", "10:00", "12:40",
"14:00", "17:20", "18:50", "19:10", "20:10", "21:00", "22:05", "22:35")
event_Thurs <- c("u", "u", "T", "T", "u", "u", "T","T","u", "u", "T", "T", "T", "T", "u", "W")
volume_Thurs <- c(NA, NA, 0.25, 0.25, NA, NA, 0.125, 0.625, NA, NA, 0.25, 0.25, 0.25, 0.25,
NA, 0.25)
total_liquids_Thurs <- sum(volume_Thurs, na.rm=TRUE)
time_Thurs <- paste("04/04/2019", time_Thurs, sep=" ")
time_Fri <- c("01:15", "06:00", "06:10", "06:25", "06:30", "07:10", "08:40", "09:20",
"12:45", "13:45")
event_Fri <- c("u","u", "T","T","u","uu","T", "u", "T", "u")
volume_Fri <- c(NA, NA, 0.25, 0.25, NA, NA, 0.125, NA, 0.625, NA)
total_liquids_Fri <- sum(volume_Fri, na.rm=TRUE)
time_Fri <- paste("05/04/2019", time_Fri, sep=" ")
### Collect all data together
event <- c(event_Thurs, event_Fri)
Volume <- c(volume_Thurs, volume_Fri)
time_log <- c(time_Thurs, time_Fri)
time_log <- strptime(time_log, format = "%d/%m/%Y %H:%M")
time_view <- format(time_log, "%H:%M")
### Put into Dataframe
patient_data <- data.frame(time_log, time_view, event, Volume)
# write.csv(patient_data, file="patient_data.csv", row.names = FALSE)
daily_plot <- function(x, day) {
# x patient data - a data.frame with four columns:
# POSIXct time, time, event and Volume
# date number of day of month
# y volume of liquid
# TotVol total volume of intake over week
# Event - drink or otherwise
x <- x[as.numeric(format(x[,1], "%d")) == day, ]
TotVol <- sum(x[,4], na.rm = TRUE)
DayOfWeek <- weekdays(x[1,1], abbreviate = FALSE)
plot(x[,1],x[,4],
xlim = c(x[1,1],x[length(x[,1]),1]),
xlab="Hours of Study", ylab = "Volume of Liquid Drank /L",
main = paste("Total Liquids Drank = ", TotVol, " L on ", DayOfWeek, "Week 1, Apr 2019"),
sub = "dashed red line = urination", pch=16,
col = c("black", "yellow", "green", "blue")[as.numeric(x[,3])],
xaxt = 'n'
)
xAxis_hrs <- seq(as.POSIXct(x[1,1]), as.POSIXct(x[length(x[,1]),1]), by="hour")
axis(1, at = xAxis_hrs, las = 2)
abline( v = c(x[x[,3] == "u",1]), lty=3, col="red")
}
When I run the function,
daily_plot(patient_data, 4)
I want to print out my x-axis, as amended in the form of hours representing the events over the 24 hour period.
When I wrap my xAxis_hrs vector in strptime(xAxis_hrs, format = "%H") the code crashes - that is the x-axis doesn't print out and I see, Error in axis(1, at = xAxis_hrs, las = 2) : (list) object cannot be coerced to type 'double' . Any help?
The issue is that you pass the labels to the wrong named argument, namely at (which should be the numeric positions of the labels). Use the following instead:
axis(1, at = xAxis_hrs, labels = strptime(xAxis_hrs, format = "%H"), las = 2)
Unfortunately this doesn’t change the fact that the axis labels don’t fit into the plot, and collide with the axis title. The former can be fixed by adjusting the plot margins. I’m not aware of a good solution for the latter, although changing the time format might help: it’s probably not necessary/helpful to print the full minutes and seconds (which are always 0). In fact, did you mean to use format instead of strptime?
Apart from that I fundamentally agree with the other answer recommending ggplot2 in the long run. It makes this kind of stuff a lot less painful.
If you're open to a ggplot solution:
library(tidyverse)
library(lubridate)
daily_ggplot <- function(df, selected_day) {
df_day <- filter(df, day(time_log) == selected_day)
df_urine <- filter(df_day, event == "u")
df_drink <- filter(df_day, event != "u")
TotVol <- sum(df_day$Volume, na.rm = TRUE)
Date <- floor_date(df_day$time_log[1], 'days')
DayOfWeek <- weekdays(Date, abbreviate = F)
plot_title <- paste0("Total drank = ", TotVol, "L on ", DayOfWeek, " Week 1, Apr 2018")
ggplot(df_drink) +
aes(time_log, Volume, color = event) +
geom_point(size = 2) +
geom_vline(data = df_urine, aes(xintercept = time_log), color = "red", linetype = 3) +
labs(x = "Hours of Study", ylab = "Volume of Liquid Drank (L)",
title = plot_title, subtitle = "lines = urination") +
theme_bw() +
scale_x_datetime(date_labels = "%H:%M", limits = c(Date, Date + days(1)))
}
daily_ggplot(patient_data, 4)

Loop functions with multiple variables for ggplot2

I want to build several plots from one large database, so that I have one plot for each Text (factor) and for each Measure (the many resulting measures of an eye tracking study). The following is a much simpler example of what I am trying to to:
Let's say this is my dataset
Text <- c(1,1,1,1,2,2,2,2,1,1,1,1,2,2,2,2)
Position <- c(1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4)
Modified <- c(1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0)
Line_on_page <- c(1, 1, 1, 1, 2,2,2,2 ,1 ,1,1,1,2,2,2,2)
IA_FIXATION_DURATION <- c(250.3, 70.82, 400, 120.12, 270, 120.5, 100.54, 212.43, 250.3, 70.82, 320.29, 123.12, 260, 121.5, 100.54, 272.43)
IA_FIXATION_COUNT <- c(1,0,1,1,3,2,0, 1, 1,0,1,2,3,2,0, 2)
IA_LABEL <- c("she", "did", "not", "know", "what", "to", "say", "to", "she", "did", "not", "know", "what", "to", "do", "to")
testDF <- data.frame(Text , Position , Line_on_page, Modified, IA_FIXATION_DURATION, IA_FIXATION_COUNT, IA_LABEL)
so I want a heatmap (or another graph) for each Text (1/2/3), and for each measure (IA_FIXATION_DURATION/IA_FIXATION_COUNT)
# so first i create my vectors
library(stringr)
library(reshape2)
library(ggplot2)
library(ggthemes)
library(tidyverse)
Text_list <- unique(testDF$Text)
Measure_list <- testDF %>% dplyr::select_if(is.numeric) %>% colnames() %>% as.vector()
# create graphing function
Heatmap_FN <- function(testDF, na.rm = TRUE, ...){
# create for loop to produce ggplot2 graphs
for (i in seq_along(Text_list)) {
for (j in seq_along(Measure_list)) {
# create plot for each text in dataset
plots <- ggplot(subset(testDF, testDF$Text==Text_list[i])) +
geom_tile(aes(x=Position,
y=Line_on_page,
fill = Measure_list[j])) +
geom_text(aes(x=Position,
y=Line_on_page,
label=IA_LABEL),
color = "white", size = 2, family = "sans") +
scale_fill_viridis_c(option = "C", na.value = "black") +
scale_y_reverse() +
facet_grid(Page ~ Modified)+
theme(legend.position = "bottom") +
ggtitle(paste(Text_list[i],j, 'Text \n'))
ggsave(plots, file=paste(Measure_list[j], "_T", Text_list[i], ".pdf", sep = ""), height = 8.27, width = 11.69, units = c("in"))
}
}
}
Heatmap_FN(testDF)
now, I am pretty sure that the problem lies in the geom_tile "fill" part, where I would like to indicate to the function that I want to use the results variables one by one to produce the plot.
Any ideas on how to fix that?
Thanks

Resources