speech-gaze activity plot in ggplot2 - r
I have data with Utterances by speakers in conversation as well as their gazes to one another. The speakers' gazes are in columns A_aoi, B_aoi, and C_aoi, the gaze durations are in A_aoi_dur, B_aoi_dur, and C_aoi_dur. Here's a reproducible snippet of the data:
df0 <- structure(list(Line = c(105L, 106L, 107L, 109L, 110L, 111L, 112L,
113L, 114L, 115L, 116L), Speaker = c("ID01.A", NA, "ID01.A",
NA, "ID01.B", NA, "ID01.A", NA, "ID01.A", NA, "ID01.C"), Utterance = c("so you've ↑obviously↑ thought about it obviously: (.) have made a decision (.) I'm !head!ing in this door (.) one of the cleaning ladies at the UB !grabb!ed my elbow",
"(0.662)", "and said (.) ~no no no !this! is the !womens'! bathroom~=",
"(0.015)", "=((v: gasps))=", "(0.166)", "=NOW", "(0.622)", "!how! this always plays out ",
"(0.726)", "[when was] that¿="), UttStart = c(163898L, 172500L,
173162L, 176100L, 176115L, 176800L, 176966L, 177372L, 177994L,
179328L, 180054L), UttEnd = c(172500, 173162, 176100, 176115,
176800, 176966, 177372, 177994, 179328, 180054, 180668), UttDur = c(8602,
662, 2938, 15, 685, 166, 406, 622, 1334, 726, 614), A_aoi = c("*B*C*B*C*B*C*B*C*B*C",
"C*", "*B*C*C", "C", "C*", "*", "*C", "C", "C*B", "B*", "*"),
A_aoi_dur = c("21,516,79,333,200,634,233,651,17,2332,33,400,33,518,17,532,33,1900,119,1",
"414,248", "1124,412,116,533,600,153", "15", "616,69", "166",
"153,253", "622", "204,151,979", "219,507", "614"), B_aoi = c("A*A*A*A*A",
"A", "A", "A", "A", "A", "A", "A*", "*A*A", "A*A", "A*A"),
B_aoi_dur = c("475,130,567,137,1983,313,787,1400,2810", "662",
"2938", "15", "685", "166", "406", "398,224", "76,136,284,838",
"108,571,47", "116,270,228"), C_aoi = c("A", "A", "A*A*A",
"A", "A", "A", "A", "A*A", "A", "A*A", "A"), C_aoi_dur = c("8602",
"662", "1058,123,1300,144,313", "15", "685", "166", "406",
"264,351,7", "1334", "125,323,278", "614")), row.names = c(NA,
-11L), class = c("tbl_df", "tbl", "data.frame"))
EDIT: new test data with temporally overlapping Utterances:
df0 <- structure(list(Line = 137:145,
Speaker = c("ID01.A", "ID01.A-Q", NA, "ID01.A", "ID01.A-Q", "ID01.A-Q", "ID01.A-Q", "ID01.A-Q",NA),
Utterance = c("↑she gra:bs my elbow (.) I turn to !look! at her↑ and she's like ~this is a (.) womens' bathroom you can't go in there~",
"~this is a (.) womens' bathroom you can't go in there~", "(0.534)",
"and I'm like ~((silent f: blank stare))~ (.) and she didn't, she was just like ~you can't go in~ (.) I'm like ~I'm a !woman!~ she said ~no you're not you can't go in~",
"~((silent f: blank stare))~", "~you can't go in~", "~I'm a !woman!~",
"~no you're not you can't go in~", "(0.487)"),
UttStart = c(208845L, 211450L, 214136L, 214670L, 215409L, 218307L, 219235L, 220076L, 221368L),
UttEnd = c(214136, 214136, 214670, 221368, 217117, 219050, 219885, 221368, 221855),
UttDur = c(5291, 2686, 534, 6698, 1708, 743, 650, 1292, 487),
A_aoi = c("C*B*C*C*B*C*", "C*B*C*", "*B", "B*C*B*C*C*B*B", "C*B", "C*B", "*", "*B","B"),
A_aoi_dur = c("57,445,1100,135,199,333,866,302,832,33,468,521","530,302,832,33,468,521",
"144,390", "377,235,466,399,1268,132,268,132,433,6,716,1412,854","339,399,970", "73,6,664", "650", "438,854", "487"),
B_aoi = c("A*A","A", "A", "A*A*A*A*A*A", "A", "*A*A", "*A", "A*A", "A"),
B_aoi_dur = c("1691,121,3479", "2686", "534", "53,180,3333,134,253,280,203,534,1296,138,294",
"1708", "63,253,280,147", "405,245", "860,138,294", "487"),
C_aoi = c("A", "A", "A", "A*A", "A", "A*", "A", "A", "A"),
C_aoi_dur = c("5291", "2686", "534", "3766,734,2198",
"1708", "129,614", "650", "1292", "487")),
row.names = c(NA, -9L), class = c("tbl_df", "tbl", "data.frame"))
What I'd like to be able to visualize is who is looking at whom and for how long for each Utterance, roughly like in this schematic representation:
What I can do at present is plot the gazes on a minute-by-minute scale, but just the gazes - not the Utterances: Plotting gaze movements by multiple speakers in a single plot. Starting from the data as above, this can be achieved by multiple transformations (shown below) but the resulting plot does not feature the Utterances and it plots the gazes per minute, whereas I need the gazes per Utterance:
I'm fully aware that this is demanding a lot. Help with it is all the more appreciated.
# pivot_longer so that all gazes have their own row:
df0 <- df0 %>%
rename_with(~ str_c(., "_AOI"), ends_with("_aoi")) %>%
pivot_longer(cols = contains("_"),
names_to = c("Gaze_by", ".value"), #
names_pattern = "^(.*)_([^_]+$)"
) %>%
mutate(Gaze_by = sub("^(.).*", "\\1", Gaze_by)) %>%
mutate(AOI = str_replace_all(AOI, "(?<=.)(?=.)", ",")) %>%
separate_rows(c(AOI, dur), sep = ",", convert = TRUE)
# compute starttimes and endtimes for gazes:
df1 <- df0 %>%
group_by(Gaze_by) %>%
mutate(
end = cumsum(dur),
start = end - dur
)
View(df1)
# compute minutes:
df2 <- df1 %>%
mutate(
# which minute does the event start in?
minute_start = as.integer(start/60000),
# which minute does the event end in?
minute_end = as.integer(end/60000),
# does the event straddle a minute mark?
straddler = minute_end > minute_start)
View(df2)
# 1st subset of `df2`:
df2_A1 <- df2 %>%
# filter those rows that contain events straddling minute marks:
filter(straddler=="TRUE") %>%
# reduce the endtime to the exact minute mark:
mutate(end = minute_end*60000)
View(df2_A1)
# 2nd subset of `df2`:
df2_A2 <- df2 %>%
# filter those rows that contain events straddling minute marks:
filter(straddler=="TRUE") %>%
# reduce the starttime to the exact minute mark:
mutate(start = minute_end*60000)
View(df2_A2)
# 3rd subset of `df0`:
df2_A3 <- df2 %>%
# filter those rows that do not contain events straddling minute marks:
filter(!straddler == "TRUE")
View(df2_A3)
# row-bind all three subsets:
df4 <- rbind(df2_A1, df2_A2, df2_A3) %>%
arrange(start) %>%
mutate(
minute = as.integer(start/60000),
# reduce total starttimes to starttimes per minute:
start_pm = start - 60000*minute,
# reduce total endtimes to endtimes per minute:
end_pm = end - 60000*minute)
# plot gaze activity for **ALL** speakers:
df4 %>%
ggplot(aes(x = start_pm,
xend = end_pm,
y = minute + scale(as.numeric(as.factor(Gaze_by))) / 6,
yend = minute + scale(as.numeric(as.factor(Gaze_by))) / 6,
color = AOI)) +
# draw segments for AOI:
geom_segment(size = 2) +
# reverse y-axis scale:
scale_y_reverse(breaks = 0:max(df4$minute),
labels = paste(0:max(df4$minute), "min", " Gaze_by_A\n Gaze_by_B\n Gaze_by_C", sep = " "),
name = NULL) +
# define custom colors:
scale_colour_manual(values = c("*" = "lemonchiffon",
"A" = "darkorange",
"B" = "lawngreen",
"C" = "slateblue1")) +
# plot title:
labs(title = "Gaze activity") +
theme(axis.title.x.bottom = element_blank())
Here is a solution that gets close to what you are looking for, making use of facets. It also uses forcats::fct_reorder and stringr::str_wrap (which are both part of the tidyverse).
This also wraps any long utterances and keeps the x-scale the same for all facets, rather than allowing them to stretch to fill the width.
df4 %>%
mutate(#add text for y axis labels
Gaze_by = paste0("Gaze_by_", Gaze_by),
#reorder facet panels, add speaker at start, and wrap to 120 characters
Utterance = fct_reorder(str_wrap(paste0(substr(Speaker, 6, 6), ": ",
Utterance),
120),
start_pm),
#set a dummy end point for each utterance based on the longest one
max_x = UttStart - min(UttStart) + max(UttDur)) %>%
ggplot(aes(x = start_pm, xend = end_pm,
y = Gaze_by, yend = Gaze_by, #as discrete variable
color = AOI)) +
geom_segment(size = 3) +
geom_point(aes(x = max_x, y = Gaze_by), alpha = 0) + #plot invisible dummy end points
scale_y_discrete(name = NULL, limits = rev) + #rev to get A at the top
facet_wrap(~Utterance, scales = "free_x", ncol = 1) +
scale_colour_manual(values = c("*" = "lemonchiffon",
"A" = "darkorange",
"B" = "lawngreen",
"C" = "slateblue1")) +
labs(title = "Gaze activity") +
theme_minimal() + #removes a lot of lines etc
theme(strip.text = element_text(color = "blue", hjust = 0), #facet strip text
strip.background = element_rect(fill = "white", color = "white"),
axis.title.x.bottom = element_blank())
To cut the utterances into 4-second chunks, you can do something like this...
df4 %>% group_by(Utterance) %>%
#work out relative durations from start of utterance and create subutterances
mutate(relStart = start_pm - min(start_pm),
relEnd = end_pm - min(start_pm),
subNo = map2(relStart, relEnd, ~seq(.x %/% 4000, .y %/% 4000, 1))) %>%
unnest(subNo) %>% #expand one row per subutterance
mutate(Utterance = paste0(Utterance, " (#", subNo + 1, ")"), #add sub no
subStart = pmax(4000 * subNo, relStart), #limits on subUtt
subEnd = pmin(4000 * (subNo + 1), relEnd), #limits on subUtt
start_pm = min(start_pm) + subStart, #redefine start
end_pm = min(start_pm) + subEnd) %>% #redefine end
group_by(Utterance) %>% #regroup as Utterance has changed!
mutate(max_x = min(start_pm) + 4000) %>% #define dummy end points
ungroup() %>%
mutate(Gaze_by = paste0("Gaze_by_", Gaze_by),
Utterance = fct_reorder(str_wrap(paste0(substr(Speaker, 6, 6), ": ", Utterance),
120), start_pm)) %>%
ggplot(...) #...as per code above from this point
Related
Making 2-way graph (ggplot2) out of a tabyl table changing values
male FALSE TRUE 0 50.0% 66.7% 1 50.0% 33.3% structure(list(male = 0:1, `FALSE` = c("50.0%", "50.0%"), `TRUE` = c("66.7%", "33.3%")), row.names = c(NA, -2L), core = structure(list(male = 0:1, `FALSE` = c(1, 1), `TRUE` = c(4, 2)), class = "data.frame", row.names = c(NA, -2L)), tabyl_type = "two_way", var_names = list(row = "male", col = "dummy"), class = c("tabyl", "data.frame")) How can I make a plot using ggplot2 of this table constructed with janitor? The thing is that I would like two plots side-by-side: one for dummy=TRUE and the other for dummy=FALSE (but changing the labels such that TRUE is replaced by a and FALSE by b -- i am having difficulties with this because TRUE and FALSE are logical). I would also like to replace the values 0 and 1 for c and d respectively.
You can try a tidyverse. The trick is to transform the data from wide to long since this is the prefered input for ggplot. Here I used pivot_longer, but you can also use reshape or melt. library(tidyverse) df %>% pivot_longer(-1) %>% mutate(name = ifelse(name, "a", "b")) %>% ggplot( aes(factor(male), value, fill =name)) + geom_col(position = position_dodge()) Using base R you can try # transform percentages to numerics df$a <- as.numeric(gsub("%", "", df$`TRUE`)) df$b <- as.numeric(gsub("%", "", df$`FALSE`)) barplot(cbind(a, b) ~ male, df, beside=T,legend.text = TRUE)
Plot multiple geom_line and geom_smooth objects in one plot
I have somewhat messy looking dataframes, like this one: df0 # A tibble: 3 x 9 # Groups: Sequ [1] Sequ Speaker Utterance A_intpl A_dur B_intpl B_dur C_intpl C_dur <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> 1 2 ID16.A cool >wha… 31.44786152… 10.5,17,1… 32.86993284… 9.5,16,17… 58.3368399… 14,17,17… 2 2 NA (0.228) 32.75735987… 15.5,17,1… 30.83469006… 14.5,16.9… 26.0386462… 3,17,16,… 3 2 ID16.B u:m Tenne… 32.05752604… 4.5,17,16… 29.95825107… 3.5,16,17… 55.9298614… 8,17,17,… I want to plot the *_intpl values for each speaker (A, B, or C) for each of the three Utterances in a single chart both as line charts and as trend lines. I'm just half successful doing this: library(tidyr) library(ggplot2) library(dplyr) df0 %>% pivot_longer(cols = contains("_"), names_to = c("Event_by", ".value"), names_pattern = "^(.*)_([^_]+$)") %>% separate_rows(c(intpl, dur), sep = ",", convert = TRUE) %>% mutate(Time = cumsum(dur)) %>% mutate(Utterance = paste0(sub(".*(.)$", "\\1",Speaker), ": ", Utterance), Utterance = factor(Utterance, levels = unique(Utterance))) %>% ggplot(aes(x = Time, y = log2(intpl), group = Event_by, colour = Event_by)) + geom_line()+ geom_smooth(method = 'lm', color = "red", formula = y~x)+ facet_wrap(~ Utterance, ncol = 1, scales= "free_x") Half successful because the line plots and trend lines are side-by-side, as if in three columns, whereas they should be in rows, one below the other - how can that be achieved? Reproducible data: structure(list(Sequ = c(2L, 2L, 2L), Speaker = c("ID16.A", NA, "ID16.B"), Utterance = c("cool >what part?<", "(0.228)", "u:m Tennessee=" ), A_intpl = c("31.4478615210995,31.5797510648522,31.7143985369445,31.651083739602,31.5806035086034,36.8956763912703,36.2882129597292,35.2124499461012,34.1366869324732,34.1366869324732,32.1927035724058,30.2487202123383,28.3047368522709,26.3607534922035,30.5278334848495,30.5919390424853,30.8898529369568,31.578968913188,31.9011198738002,32.1543265113196,31.9708002079533,31.966536408565,31.8762658607759,31.8994741472105,31.4215913971938,32.1510578328563,31.7863350712876,32.4685052625667,31.7422271490296,32.3286054977263,31.9998974949481,32.5177992323864,32.4727499785435,32.9310888953766,32.7592010033585,33.2231711877427,33.1593949301066,33.2432973964816,33.2569729073414,33.492144800249,33.317650964723,33.4835787832119,33.2377190454279,32.9200836384356,32.9684568771567,32.6400987016883,27.5447101464944,29.3948945479171,35.3449171857603,33.5932932239592,31.8416692621581,30.0900453003569,32.7850431084597,32.7589003618266,32.8365550655013,32.386716057622,32.8420792704881,32.6909995562489,32.6269434402016,32.7370944106334,32.7529759209752,32.6528826975113,32.3663573764448,32.7326853004792,32.6930038462418,32.8975978772676,33.1752899475416,33.2034433355001,33.0667431432803,32.6322933080614,33.2503168843178,32.7573598713719", "32.7573598713719,32.7531704791313,32.7366130631104,32.918942216354,32.8309939530596,32.3856893430525,32.5368873543441,32.5628510484821,32.5628510484821,32.5628510484821,32.5506564332008,32.7477119716583,32.3458470743288,32.0575260428013", "32.0575260428013,32.1628824338111,32.0093334061923,32.1461460586991,31.9080762250966,31.9469105074833,31.7431187667232,31.7194255656503,31.7394296413187,31.8594986292975,31.7498243274746,31.9069142374258,32.0835520942767,31.6257067057109,31.757232379438,31.9036689124911,32.1319749301918,31.7203280774998,31.7877137245706,32.3030946636177,32.2800139298454,32.164646135728,32.3636504940227,32.5657818936495,32.3859453482697,32.4797898358193,32.5319835105237,32.92233491509,32.8240561109448,32.664496027779,33.1835064752029,33.0366413969703,33.0406288190821,33.3232964677672,33.2206260057731,33.1537134269402,33.2783471506207,33.2933281566788,33.5322350394609,33.3815736723684,33.7905544185063,33.6143820666896,33.7490659591585,33.7260102344634,34.0721931066557,34.0455026427054,34.3735788774521,34.2888420421073,34.3913721165542,34.5982135545306,34.4417202731001,34.6586347152449,31.1590521215434,31.3276405983897,28.2379253186548,31.133030931336,34.0715906921349,35.8967950760285,35.9334551147377,35.8565504335515,35.7446081905229,35.6300325834155,35.8390086948751,35.9711743270411,36.0029493274176,35.8891056768339" ), A_dur = c("10.5,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,0.5", "15.5,17,17,16,17,17,16,17,17,16,17,17,16,12.5", "4.5,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,5.5" ), B_intpl = c("32.8699328424689,32.8154348109057,32.5454364786882,32.408257038977,32.5304564519672,32.3270203236281,31.9233218634346,32.0166346064182,31.7360745988363,31.7546527359571,31.8603220354065,31.6520061326962,31.5603191463274,31.3357561466519,31.0976090032219,31.1405090978825,31.1697180784961,31.0863999545386,31.3126984044729,30.580776446803,30.7137016246273,31.0801914571091,31.2343922096768,31.2749857511594,31.3488604642844,30.9327390960718,31.0750482778561,31.1849119826023,31.4180114886183,31.5284273181104,31.147361398529,31.1128597713973,31.5551385744611,31.7479939892741,31.5890352680344,31.5470790538009,31.5427330200078,31.3901913024084,31.5423214446953,31.4814325586741,31.4937336232021,31.3483738841556,31.2516462059018,31.2233881922543,31.2572951780583,31.0087226975291,31.1197589042273,31.053748381687,30.8202174718598,30.845143129195,30.8727194789634,30.4231467151428,30.7254093759809,30.2757746547116,30.6047530953025,29.6835591414008,28.257421076205,29.4634886416064,29.183064807185,28.6935506287734,29.3989017421637,30.8936090542518,30.6884831327852,30.805770713392,30.6938909098627,30.8317757801268,30.8509115577427,30.6836198471168,30.7979978629801,31.0260101704105,30.6248844591805,30.8346900656087", "30.8346900656087,30.9826158466835,29.814086001996,29.7839590794955,30.7928804535206,31.1589874726521,31.0547403039501,31.2268131145794,31.155503802286,31.3036925274762,31.4782621660348,31.0928322383151,31.589958621025,29.9582510795225", "29.9582510795225,29.9796434055214,29.9405638729798,30.2602098442174,30.5011865525849,30.6753859842987,28.9331380886365,30.7736467776919,30.8457967803438,30.843630408183,30.8767570425033,30.9178344980247,30.734598946287,30.8877440413271,30.9225051837881,30.9534076039184,31.0172861192043,30.9371712793451,30.9806052132295,31.0593603717961,31.1156928565737,30.4713263393479,26.028518302418,28.1426546887905,29.4308434671559,30.7190322455213,31.2289674937063,31.7389027418913,32.2488379900763,32.7587732382613,33.2687084864463,33.7786437346312,34.2885789828162,34.7985142310012,35.3084494791862,35.8183847273712,36.3283199755562,36.8382552237412,37.3481904719262,37.8581257201112,38.3680609682962,25.5986933949893,29.7968031963901,30.5336819967028,30.1876589408847,30.4260367500101,30.2997107671214,30.3429716412578,30.3537316791924,30.4111899964144,30.7293520851914,30.7778983966343,30.9712137067708,30.9072589183658,31.0696990205164,30.5713926084448,31.3458855877875,31.4169903025083,31.5148974986093,31.5972499257413,31.2293401943969,31.2033325602348,31.1657434266985,30.6784877073261,30.6991365599664,30.6763195188897" ), B_dur = c("9.5,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,2.5", "14.5,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,13.4999999999854", "3.5,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,7.5" ), C_intpl = c("58.3368399069697,58.249224089011,59.5198368051218,58.8722012497097,58.4418996252205,58.5849059154389,59.2752163985494,52.8407480422202,51.6276603912397,48.0255346632529,44.753541512539,41.4815483618252,38.2095552111114,34.9375620603975,31.6655689096837,28.3935757589698,25.121582608256,19.4712933827274,22.0108873782783,24.5504813738291,24.8441573376901,24.6902151101703,24.4029572181118,24.9753161974674,24.8664406826514,24.8486668451201,25.1137001504163,25.1142578332509,25.4902077628339,25.4075561268027,25.6622548410237,61.2421678149908,25.1600975771354,25.6667198263373,25.442560744158,25.8736383423437,25.5859074180431,24.7860400673889,24.4337707697216,24.3214953242744,23.915753514736,23.7363185577661,23.7186569801299,23.4313514771952,23.5730151254578,62.5124513171595,23.3260531660862,23.4498217326665,23.2145314844252,57.5586745434594,63.4646233226955,23.0706406704345,23.3318690599491,62.044649715831,62.2720656330432,22.2532276715887,62.7059140614625,22.9511208849958,22.5603175709988,23.3456453893988,63.2523901625561,60.6655429980934,60.2358824325868,59.957910796633,57.3999702562457,54.8277282980263,43.0269305132552,31.2261327284841,19.425334943713,22.7319906068577,26.0386462700023", "26.0386462700023,29.345301933147,32.6519575962917,35.9586132594364,48.3773995023798,60.7961857453232,49.4980424442242,55.9907960862667,57.2956837917999,58.1409925994177,59.025022056064,60.0098263540792,60.4028460580062,61.2629030450653,55.9298614021542", "55.9298614021542,55.3877180252389,61.3547152702855,61.7847919095391,56.2457623439544,62.5477315546977,62.3078007189967,62.4272469013149,57.6479672147315,62.9844338801191,58.0081708266629,63.3872796098875,59.0138830718112,58.0612924481098,58.38680047729,58.687179350318,63.8724230039733,63.4126777597892,63.6865154626743,63.5670658627636,63.4496590540706,63.7595297692908,58.9069708176601,63.4547681163061,64.3198376700797,63.415319961042,64.0985879957056,64.1201809531605,63.677902665454,64.1934303628317,64.4682003346273,64.2868853545462,24.8444135816353,64.1579626357752,63.8897139146875,58.5472675827292,64.5784992977498,64.0848591719068,63.8841268679761,64.2901359712354,64.395692486112,64.5425896391638,64.8060565909917,64.3618830026368,64.7088481705444,64.5005944199885,64.5540289192148,64.7408010459365,63.378880767685,63.3415589069662,63.5362700331647,63.5924807719723,63.575801461932,63.6799360982113,64.0041021410894,64.3144923757986,63.8692943755376,63.8594574363473,64.2731841085802,63.3314657812309,64.2758880216293,64.1011768977101,64.0261661917799,64.2865302330478,63.724697791255,64.1202175712152" ), C_dur = c("14,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,14", "3,17,16,17,17,16,17,17,16,17,17,16,17,17,8", "8,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,2" )), row.names = c(NA, -3L), groups = structure(list(Sequ = 2L, .rows = structure(list(1:3), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))), row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df", "tbl", "data.frame"))
There's a possible solution with use of grid.arrange() func from library(gridExtra) library(grid) packages. I've wrapped your data into unique charts and combined them together into arranged chart. df1 = df0 %>% pivot_longer(cols = contains("_"), names_to = c("Event_by", ".value"), names_pattern = "^(.*)_([^_]+$)") %>% separate_rows(c(intpl, dur), sep = ",", convert = TRUE) %>% mutate(Time = cumsum(dur)) %>% mutate(Utterance = paste0(sub(".*(.)$", "\\1",Speaker), ": ", Utterance), Utterance = factor(Utterance, levels = unique(Utterance))) Set chart objects into enviroment: for (i in unique(df1$Event_by)){ for (j in levels(df1$Utterance)){ assign(x = paste0(i,j), value = ggplot(data = df1[df1$Event_by == i & df1$Utterance == j,], aes(x = Time, y = log2(intpl))) + geom_line()+ geom_smooth(method = 'lm', color = "red", formula = y~x)) } } Create grided chart: library(gridExtra) library(grid) grid.arrange( `AA: cool >what part?<`, `AB: u:m Tennessee=` , `ANA: (0.228)` , `BA: cool >what part?<` , `BB: u:m Tennessee=` , `BNA: (0.228)` , `CA: cool >what part?<` , `CB: u:m Tennessee=` , `CNA: (0.228)` , nrow = 3) Although i think there should be better solution for that. You can also try to explore below articlesfor arranging plots: http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/81-ggplot2-easy-way-to-mix-multiple-graphs-on-the-same-page/ https://ggplot2-book.org/facet.html Moreover, there's is no themming added to my solution
Comparing "Unlimited" value to numerical values in ggplot
I am trying to make a visual comparison between an input vector and my database.However, the input vector or the database may contain the "UL" character, which means, an infinite number. Think of it as your unlimited voice plan, with which you can make an unlimited number of calls. Here is the code I have used to try to make a visual comparison between "UL" and other numerical values. # d is the database data.frame, with which we want to compare the input vector d = structure(list(Type = c("H1", "H2", "H3"), P1 = c(2000L, 1500L, 1000L), P2 = c(60L, 40L, 20L), P3 = c("UL", 3000L, 2000L)), class = "data.frame", row.names = c(NA, -3L)) # d2 is the input vector d2 = structure(list(Type = "New_offre", P1 = 1200L, P2 = "UL", P3 = 2000), class = "data.frame", row.names = c(NA, -1L)) #Check if there are some unlimited values in both d and d2 y1 <-rbind(d,d2) y <- y1 if("UL" %in% y$P3){ max_P3_scale <- max(as.numeric(y[y$P3!="UL","P3"])) y[y$P3=="UL","P3"]= 2*max_P3_scale } if("UL" %in% y$P2){ max_P2_scale <- max(as.numeric(y[y$P2!="UL","P2"])) y[y$P2=="UL","P2"]= 2*max_P2_scale } y <- transform(y,P1=as.numeric(P1), P2=as.numeric(P2), P3=as.numeric(P3)) d <- y[1:nrow(d),] d2<- y[nrow(d)+1,] d %>% gather(var1, current, -Type) %>% mutate(new = as.numeric(d2[cbind(rep(1, max(row_number())), match(var1, names(d2)))]), slope = factor(sign(current - new), -1:1)) %>% gather(var2, val, -Type, -var1, -slope) %>% ggplot(aes(x = factor(var2,levels = c("new","current")), y = val, group = 1)) + geom_point(aes(fill = var2), shape = 2,size=4) + geom_line(aes(colour = slope)) + scale_colour_manual(values = c("green","green", "red")) + facet_wrap(Type ~ var1,scales = "free") My first attempt was to find if there is "UL" values in P2 and P3. If yes, I try to find the maximum numeric value other than "UL". Then, I replace all "UL" occurrences by this maximum value* 2, so the graphical representations will always show that "UL" is maximum. The issue with this is that I am not able to differentiate between actual values and "UL" ones. Here is how my plot looks like using this solution
Subset Data.Frame With Multiple Conditions
End Goal: Create a plot for each region of StressCumulative, BaseCumulative, StressQoQ, and BaseQoQ over the date range from rows 1:167. Problem: I'm having difficulty subsetting my data.frame. My issue is that the condition by which I'm subsetting is logical, and thus will only return the first element after the condition. subset_region_1 <- subset.data.frame(HPF, HPF$region == 1, select = BaseCumulative, HPF$StressCumulative, StressQoQ, BaseQoQ) Warning messages: 1: In if (drop) warningc("drop ignored") : the condition has length > 1 and only the first element will be used 2: drop ignored This returns only the first column, BaseCumulative. Data: Here you get a glimpse of what I'm working with. This is the table I am looking to subset from. My data.frame is in a tall format I would like to create a subset in order to graph BaseCumulative, StressCumulative, BaseQoQ, and StressQoQ variables over the range of dates from rows 1:167. The date column uses the same dates for all 100 regions. My issue is that when I go to plot in ggplot, I get an error that my aes mappings are not of the same size. The full table has date = 18370 rows long, but the values repeat every 167 rows (for each unique region). Further, the BaseCumulative variable is also 18370 rows long but is unique for all regions, i.e. every 167 rows. I want to know how I can subset by region while obtaining the correct row size for the variables I am interested in measuring. Data Pts: #Rows 1-3 (Region 1 Sample): dput(head(HPF[1:3, ])) structure(list(region = c(1, 1, 1), path = c(1, 1, 1), date = c(20140215, 20140515, 20140815), index_value = c(1, 1.033852765, 1.041697122 ), index = 0:2, counter = 1:3, BaseQoQ = c(NA, 0.033852765, 0.00758749917354029 ), BaseCumulative = c(100, 103.3852765, 104.1697122), StressCumulative = c(110, 113.3852765, 114.1697122), StressQoQ = c(NA, 0.0307752409090909, 0.00691832065162346)), .Names = c("region", "path", "date", "index_value", "index", "counter", "BaseQoQ", "BaseCumulative", "StressCumulative", "StressQoQ"), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame")) #Rows 168:200 (Region 2 Sample): dput(head(HPF[168:200, ])) structure(list(region = c(2, 2, 2, 2, 2, 2), path = c(1, 1, 1, 1, 1, 1), date = c(20140215, 20140515, 20140815, 20141115, 20150215, 20150515), index_value = c(1, 1.014162265, 1.01964828, 1.009372314, 1.007210703, 1.018695493), index = 0:5, counter = 1:6, BaseQoQ = c(NA, 0.014162265, 0.00540940556489744, -0.0100779515854232, -0.0021415398163972, 0.0114025694582001), BaseCumulative = c(100, 101.4162265, 101.964828, 100.9372314, 100.7210703, 101.8695493), StressCumulative = c(110, 111.4162265, 111.964828, 110.9372314, 110.7210703, 101.8695493 ), StressQoQ = c(NA, 0.0128747863636363, 0.00492389230216839, -0.00917785181610786, -0.00194849914020834, -0.0799443229370588 )), .Names = c("region", "path", "date", "index_value", "index", "counter", "BaseQoQ", "BaseCumulative", "StressCumulative", "StressQoQ" ), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame" )) Question: How do I subset other columns in addition to specifying region == #? I have tried the following but then the issue is that values recycle for the dates and my charts are incorrect: ggplot(HPF, aes(x = date, y= BaseCumulative, linetype = factor(region == 1))) + geom_line() + theme_light() Further, I am also unsuccessful if I try to subset within the ggplot such as: ggplot(HPF[HPF$region == 1, ], aes(x = HPF$date[1:167, ], y= HPF$BaseCumulative[1:167, ], linetype = factor(region == 1))) + geom_line() + theme_light() Any help is appreciated.
I'm not entirely sure what you're trying to show in your plot; is this what you're after? library(tidyverse); df %>% gather(what, value, 7:10) %>% ggplot(aes(date, value, colour = what)) + geom_line() + theme_light() Explanation: Convert your data from wide to long format, then pass what as a colour (or linetype) aesthetic to get different line plots for columns 7, 8, 9, 10 in one plot. If you want separate plots for region, you could add + facet_wrap(~ as.factor(region)), e.g. df %>% gather(what, value, 7:10) %>% ggplot(aes(date, value, colour = what)) + geom_line() + theme_light() + facet_wrap(~ as.factor(region)) Sample data df1 <- structure(list(region = c(1, 1, 1), path = c(1, 1, 1), date = c(20140215, 20140515, 20140815), index_value = c(1, 1.033852765, 1.041697122 ), index = 0:2, counter = 1:3, BaseQoQ = c(NA, 0.033852765, 0.00758749917354029 ), BaseCumulative = c(100, 103.3852765, 104.1697122), StressCumulative = c(110, 113.3852765, 114.1697122), StressQoQ = c(NA, 0.0307752409090909, 0.00691832065162346)), .Names = c("region", "path", "date", "index_value", "index", "counter", "BaseQoQ", "BaseCumulative", "StressCumulative", "StressQoQ"), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame")); df2 <- structure(list(region = c(2, 2, 2, 2, 2, 2), path = c(1, 1, 1, 1, 1, 1), date = c(20140215, 20140515, 20140815, 20141115, 20150215, 20150515), index_value = c(1, 1.014162265, 1.01964828, 1.009372314, 1.007210703, 1.018695493), index = 0:5, counter = 1:6, BaseQoQ = c(NA, 0.014162265, 0.00540940556489744, -0.0100779515854232, -0.0021415398163972, 0.0114025694582001), BaseCumulative = c(100, 101.4162265, 101.964828, 100.9372314, 100.7210703, 101.8695493), StressCumulative = c(110, 111.4162265, 111.964828, 110.9372314, 110.7210703, 101.8695493 ), StressQoQ = c(NA, 0.0128747863636363, 0.00492389230216839, -0.00917785181610786, -0.00194849914020834, -0.0799443229370588 )), .Names = c("region", "path", "date", "index_value", "index", "counter", "BaseQoQ", "BaseCumulative", "StressCumulative", "StressQoQ" ), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame" )) df <- rbind.data.frame(df1, df2);
how to make a merged heatmap between each two columns of values
How can I put two columns in one heatmap? Lets say I have the following data data<- structure(list(names = structure(c(5L, 1L, 10L, 2L, 6L, 4L, 9L, 7L, 11L, 3L, 8L), .Label = c("Bin", "Dari", "Down", "How", "India", "Karachi", "Left", "middle", "Right", "Trash", "Up"), class = "factor"), X1Huor = c(1.555555556, 5.2555556, 2.256544, 2.3654225, 1.2665545, 0, 1.889822365, 2.37232101, -1, -1.885618083, 1.128576187 ), X2Hour = c(1.36558854, 2.254887, 2.3333333, 0.22255444, 2.256588, 5.66666, -0.377964473, 0.107211253, -1, 0, 0), X3Hour = c(0, 1.222222222, 5.336666, 1.179323788, 0.832050294, -0.397359707, 0.185695338, 1.393746295, -1, -2.121320344, 1.523019248), X4Hour = c(3.988620176, 3.544745039, -2.365555, 2.366666, 1.000000225, -0.662266179, -0.557086015, 0.862662186, 0, -1.305459824, 1.929157714), X5Hour = c(2.366666, 2.333365, 4.22222, 0.823333333, 0.980196059, -2.516611478, 2.267786838, 0.32163376, 0, -2.592724864, 0.816496581)), .Names = c("names", "X1Huor", "X2Hour", "X3Hour", "X4Hour", "X5Hour"), class = "data.frame", row.names = c(NA, -11L)) This data has 5 columns of values. I want to make a heatmap which half of it is the value from first colum and the other half of each cell is from the second column. The same for the third column and fourth The same for the fifth and sixth ( there is no sixth but I can leave it empty) This is just an example to show what I am looking for. I have searched a lot but I could not find anything like this The color range from Red to green, if the value is higher than 2 the color red and if the value is lower than -2 the color is green. Any thought how to do this ?
This is a somewhat hacky solution, but it might work for you, so check this out. The idea is to utilize geom_polygon to create the triangles and stack them. To do that we first need to generate the triangle coordinates library(dplyr) library(tidyr) library(stringr) # the following two line create the triangle coordinates x = rep(c(1,2,2, 1, 1, 2),nrow(data)) y = rep(c(1,1,2, 1, 2, 2),nrow(data)) + rep(0:10, each=6) Now that we have our coordinates we need to generate their ids, which are the names. But because we want each triangle to be unique, we need to create two unique versions of each name: names <- data %>% select(names, X1Huor, X2Hour) %>% gather("key", "value", X1Huor, X2Hour) %>% arrange(names, key) %>% mutate(name = str_c(names, key)) %>% .$name %>% rep(each = 3) And now we do the same with the hours: hour <- data %>% select(names, X1Huor, X2Hour) %>% gather("key", "value", X1Huor, X2Hour) %>% arrange(names, key) %>% .$value %>% rep(each = 3) datapoly <- data.frame(x = x, y = y , hour = hour, names = names) Since there are no proper labels for the plot in our datapoly df, we need to create one: name_labels <- data %>% select(names) %>% arrange(names) %>% .$names The scene is now set for our graph: ggplot(datapoly, aes(x = x, y = y)) + geom_polygon(aes(group = names, fill = hour), color = "black") + scale_fill_continuous(low = "green", high = "red") + scale_y_continuous(breaks = 1:nrow(data), labels = name_labels) + theme(axis.text.y = element_text(vjust = -2), axis.ticks = element_blank(), axis.text.x = element_blank(), axis.title = element_blank()) The output looks like this: Several points to keep in mind: Is this really a plot you want to be creating and using? Is this really useful for your purposes? Perhaps other, more traditional visualization methods are more suitable. Also, I didn't bother doing the same for the other hour columns as these are quite tedious, but the method on how to achieve them should be clear enough (I hope).