Plot multiple geom_line and geom_smooth objects in one plot - r

I have somewhat messy looking dataframes, like this one:
df0
# A tibble: 3 x 9
# Groups: Sequ [1]
Sequ Speaker Utterance A_intpl A_dur B_intpl B_dur C_intpl C_dur
<int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 2 ID16.A cool >wha… 31.44786152… 10.5,17,1… 32.86993284… 9.5,16,17… 58.3368399… 14,17,17…
2 2 NA (0.228) 32.75735987… 15.5,17,1… 30.83469006… 14.5,16.9… 26.0386462… 3,17,16,…
3 2 ID16.B u:m Tenne… 32.05752604… 4.5,17,16… 29.95825107… 3.5,16,17… 55.9298614… 8,17,17,…
I want to plot the *_intpl values for each speaker (A, B, or C) for each of the three Utterances in a single chart both as line charts and as trend lines.
I'm just half successful doing this:
library(tidyr)
library(ggplot2)
library(dplyr)
df0 %>%
pivot_longer(cols = contains("_"),
names_to = c("Event_by", ".value"),
names_pattern = "^(.*)_([^_]+$)") %>%
separate_rows(c(intpl, dur), sep = ",", convert = TRUE) %>%
mutate(Time = cumsum(dur)) %>%
mutate(Utterance = paste0(sub(".*(.)$", "\\1",Speaker), ": ", Utterance),
Utterance = factor(Utterance, levels = unique(Utterance))) %>%
ggplot(aes(x = Time, y = log2(intpl),
group = Event_by,
colour = Event_by)) +
geom_line()+
geom_smooth(method = 'lm', color = "red", formula = y~x)+
facet_wrap(~ Utterance, ncol = 1, scales= "free_x")
Half successful because the line plots and trend lines are side-by-side, as if in three columns, whereas they should be in rows, one below the other - how can that be achieved?
Reproducible data:
structure(list(Sequ = c(2L, 2L, 2L), Speaker = c("ID16.A", NA,
"ID16.B"), Utterance = c("cool >what part?<", "(0.228)", "u:m Tennessee="
), A_intpl = c("31.4478615210995,31.5797510648522,31.7143985369445,31.651083739602,31.5806035086034,36.8956763912703,36.2882129597292,35.2124499461012,34.1366869324732,34.1366869324732,32.1927035724058,30.2487202123383,28.3047368522709,26.3607534922035,30.5278334848495,30.5919390424853,30.8898529369568,31.578968913188,31.9011198738002,32.1543265113196,31.9708002079533,31.966536408565,31.8762658607759,31.8994741472105,31.4215913971938,32.1510578328563,31.7863350712876,32.4685052625667,31.7422271490296,32.3286054977263,31.9998974949481,32.5177992323864,32.4727499785435,32.9310888953766,32.7592010033585,33.2231711877427,33.1593949301066,33.2432973964816,33.2569729073414,33.492144800249,33.317650964723,33.4835787832119,33.2377190454279,32.9200836384356,32.9684568771567,32.6400987016883,27.5447101464944,29.3948945479171,35.3449171857603,33.5932932239592,31.8416692621581,30.0900453003569,32.7850431084597,32.7589003618266,32.8365550655013,32.386716057622,32.8420792704881,32.6909995562489,32.6269434402016,32.7370944106334,32.7529759209752,32.6528826975113,32.3663573764448,32.7326853004792,32.6930038462418,32.8975978772676,33.1752899475416,33.2034433355001,33.0667431432803,32.6322933080614,33.2503168843178,32.7573598713719",
"32.7573598713719,32.7531704791313,32.7366130631104,32.918942216354,32.8309939530596,32.3856893430525,32.5368873543441,32.5628510484821,32.5628510484821,32.5628510484821,32.5506564332008,32.7477119716583,32.3458470743288,32.0575260428013",
"32.0575260428013,32.1628824338111,32.0093334061923,32.1461460586991,31.9080762250966,31.9469105074833,31.7431187667232,31.7194255656503,31.7394296413187,31.8594986292975,31.7498243274746,31.9069142374258,32.0835520942767,31.6257067057109,31.757232379438,31.9036689124911,32.1319749301918,31.7203280774998,31.7877137245706,32.3030946636177,32.2800139298454,32.164646135728,32.3636504940227,32.5657818936495,32.3859453482697,32.4797898358193,32.5319835105237,32.92233491509,32.8240561109448,32.664496027779,33.1835064752029,33.0366413969703,33.0406288190821,33.3232964677672,33.2206260057731,33.1537134269402,33.2783471506207,33.2933281566788,33.5322350394609,33.3815736723684,33.7905544185063,33.6143820666896,33.7490659591585,33.7260102344634,34.0721931066557,34.0455026427054,34.3735788774521,34.2888420421073,34.3913721165542,34.5982135545306,34.4417202731001,34.6586347152449,31.1590521215434,31.3276405983897,28.2379253186548,31.133030931336,34.0715906921349,35.8967950760285,35.9334551147377,35.8565504335515,35.7446081905229,35.6300325834155,35.8390086948751,35.9711743270411,36.0029493274176,35.8891056768339"
), A_dur = c("10.5,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,0.5",
"15.5,17,17,16,17,17,16,17,17,16,17,17,16,12.5", "4.5,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,5.5"
), B_intpl = c("32.8699328424689,32.8154348109057,32.5454364786882,32.408257038977,32.5304564519672,32.3270203236281,31.9233218634346,32.0166346064182,31.7360745988363,31.7546527359571,31.8603220354065,31.6520061326962,31.5603191463274,31.3357561466519,31.0976090032219,31.1405090978825,31.1697180784961,31.0863999545386,31.3126984044729,30.580776446803,30.7137016246273,31.0801914571091,31.2343922096768,31.2749857511594,31.3488604642844,30.9327390960718,31.0750482778561,31.1849119826023,31.4180114886183,31.5284273181104,31.147361398529,31.1128597713973,31.5551385744611,31.7479939892741,31.5890352680344,31.5470790538009,31.5427330200078,31.3901913024084,31.5423214446953,31.4814325586741,31.4937336232021,31.3483738841556,31.2516462059018,31.2233881922543,31.2572951780583,31.0087226975291,31.1197589042273,31.053748381687,30.8202174718598,30.845143129195,30.8727194789634,30.4231467151428,30.7254093759809,30.2757746547116,30.6047530953025,29.6835591414008,28.257421076205,29.4634886416064,29.183064807185,28.6935506287734,29.3989017421637,30.8936090542518,30.6884831327852,30.805770713392,30.6938909098627,30.8317757801268,30.8509115577427,30.6836198471168,30.7979978629801,31.0260101704105,30.6248844591805,30.8346900656087",
"30.8346900656087,30.9826158466835,29.814086001996,29.7839590794955,30.7928804535206,31.1589874726521,31.0547403039501,31.2268131145794,31.155503802286,31.3036925274762,31.4782621660348,31.0928322383151,31.589958621025,29.9582510795225",
"29.9582510795225,29.9796434055214,29.9405638729798,30.2602098442174,30.5011865525849,30.6753859842987,28.9331380886365,30.7736467776919,30.8457967803438,30.843630408183,30.8767570425033,30.9178344980247,30.734598946287,30.8877440413271,30.9225051837881,30.9534076039184,31.0172861192043,30.9371712793451,30.9806052132295,31.0593603717961,31.1156928565737,30.4713263393479,26.028518302418,28.1426546887905,29.4308434671559,30.7190322455213,31.2289674937063,31.7389027418913,32.2488379900763,32.7587732382613,33.2687084864463,33.7786437346312,34.2885789828162,34.7985142310012,35.3084494791862,35.8183847273712,36.3283199755562,36.8382552237412,37.3481904719262,37.8581257201112,38.3680609682962,25.5986933949893,29.7968031963901,30.5336819967028,30.1876589408847,30.4260367500101,30.2997107671214,30.3429716412578,30.3537316791924,30.4111899964144,30.7293520851914,30.7778983966343,30.9712137067708,30.9072589183658,31.0696990205164,30.5713926084448,31.3458855877875,31.4169903025083,31.5148974986093,31.5972499257413,31.2293401943969,31.2033325602348,31.1657434266985,30.6784877073261,30.6991365599664,30.6763195188897"
), B_dur = c("9.5,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,2.5",
"14.5,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,13.4999999999854",
"3.5,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,7.5"
), C_intpl = c("58.3368399069697,58.249224089011,59.5198368051218,58.8722012497097,58.4418996252205,58.5849059154389,59.2752163985494,52.8407480422202,51.6276603912397,48.0255346632529,44.753541512539,41.4815483618252,38.2095552111114,34.9375620603975,31.6655689096837,28.3935757589698,25.121582608256,19.4712933827274,22.0108873782783,24.5504813738291,24.8441573376901,24.6902151101703,24.4029572181118,24.9753161974674,24.8664406826514,24.8486668451201,25.1137001504163,25.1142578332509,25.4902077628339,25.4075561268027,25.6622548410237,61.2421678149908,25.1600975771354,25.6667198263373,25.442560744158,25.8736383423437,25.5859074180431,24.7860400673889,24.4337707697216,24.3214953242744,23.915753514736,23.7363185577661,23.7186569801299,23.4313514771952,23.5730151254578,62.5124513171595,23.3260531660862,23.4498217326665,23.2145314844252,57.5586745434594,63.4646233226955,23.0706406704345,23.3318690599491,62.044649715831,62.2720656330432,22.2532276715887,62.7059140614625,22.9511208849958,22.5603175709988,23.3456453893988,63.2523901625561,60.6655429980934,60.2358824325868,59.957910796633,57.3999702562457,54.8277282980263,43.0269305132552,31.2261327284841,19.425334943713,22.7319906068577,26.0386462700023",
"26.0386462700023,29.345301933147,32.6519575962917,35.9586132594364,48.3773995023798,60.7961857453232,49.4980424442242,55.9907960862667,57.2956837917999,58.1409925994177,59.025022056064,60.0098263540792,60.4028460580062,61.2629030450653,55.9298614021542",
"55.9298614021542,55.3877180252389,61.3547152702855,61.7847919095391,56.2457623439544,62.5477315546977,62.3078007189967,62.4272469013149,57.6479672147315,62.9844338801191,58.0081708266629,63.3872796098875,59.0138830718112,58.0612924481098,58.38680047729,58.687179350318,63.8724230039733,63.4126777597892,63.6865154626743,63.5670658627636,63.4496590540706,63.7595297692908,58.9069708176601,63.4547681163061,64.3198376700797,63.415319961042,64.0985879957056,64.1201809531605,63.677902665454,64.1934303628317,64.4682003346273,64.2868853545462,24.8444135816353,64.1579626357752,63.8897139146875,58.5472675827292,64.5784992977498,64.0848591719068,63.8841268679761,64.2901359712354,64.395692486112,64.5425896391638,64.8060565909917,64.3618830026368,64.7088481705444,64.5005944199885,64.5540289192148,64.7408010459365,63.378880767685,63.3415589069662,63.5362700331647,63.5924807719723,63.575801461932,63.6799360982113,64.0041021410894,64.3144923757986,63.8692943755376,63.8594574363473,64.2731841085802,63.3314657812309,64.2758880216293,64.1011768977101,64.0261661917799,64.2865302330478,63.724697791255,64.1202175712152"
), C_dur = c("14,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,14",
"3,17,16,17,17,16,17,17,16,17,17,16,17,17,8", "8,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,2"
)), row.names = c(NA, -3L), groups = structure(list(Sequ = 2L,
.rows = structure(list(1:3), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))

There's a possible solution with use of grid.arrange() func from library(gridExtra) library(grid) packages.
I've wrapped your data into unique charts and combined them together into arranged chart.
df1 = df0 %>%
pivot_longer(cols = contains("_"),
names_to = c("Event_by", ".value"),
names_pattern = "^(.*)_([^_]+$)") %>%
separate_rows(c(intpl, dur), sep = ",", convert = TRUE) %>%
mutate(Time = cumsum(dur)) %>%
mutate(Utterance = paste0(sub(".*(.)$", "\\1",Speaker), ": ", Utterance),
Utterance = factor(Utterance, levels = unique(Utterance)))
Set chart objects into enviroment:
for (i in unique(df1$Event_by)){
for (j in levels(df1$Utterance)){
assign(x = paste0(i,j), value = ggplot(data = df1[df1$Event_by == i & df1$Utterance == j,], aes(x = Time, y = log2(intpl))) +
geom_line()+
geom_smooth(method = 'lm', color = "red", formula = y~x))
}
}
Create grided chart:
library(gridExtra) library(grid)
grid.arrange(
`AA: cool >what part?<`,
`AB: u:m Tennessee=` ,
`ANA: (0.228)` ,
`BA: cool >what part?<` ,
`BB: u:m Tennessee=` ,
`BNA: (0.228)` ,
`CA: cool >what part?<` ,
`CB: u:m Tennessee=` ,
`CNA: (0.228)` ,
nrow = 3)
Although i think there should be better solution for that.
You can also try to explore below articlesfor arranging plots:
http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/81-ggplot2-easy-way-to-mix-multiple-graphs-on-the-same-page/
https://ggplot2-book.org/facet.html
Moreover, there's is no themming added to my solution

Related

Making 2-way graph (ggplot2) out of a tabyl table changing values

male FALSE TRUE
0 50.0% 66.7%
1 50.0% 33.3%
structure(list(male = 0:1, `FALSE` = c("50.0%", "50.0%"), `TRUE` = c("66.7%",
"33.3%")), row.names = c(NA, -2L), core = structure(list(male = 0:1,
`FALSE` = c(1, 1), `TRUE` = c(4, 2)), class = "data.frame", row.names = c(NA,
-2L)), tabyl_type = "two_way", var_names = list(row = "male",
col = "dummy"), class = c("tabyl", "data.frame"))
How can I make a plot using ggplot2 of this table constructed with janitor? The thing is that I would like two plots side-by-side: one for dummy=TRUE and the other for dummy=FALSE (but changing the labels such that TRUE is replaced by a and FALSE by b -- i am having difficulties with this because TRUE and FALSE are logical). I would also like to replace the values 0 and 1 for c and d respectively.
You can try a tidyverse. The trick is to transform the data from wide to long since this is the prefered input for ggplot. Here I used pivot_longer, but you can also use reshape or melt.
library(tidyverse)
df %>%
pivot_longer(-1) %>%
mutate(name = ifelse(name, "a", "b")) %>%
ggplot( aes(factor(male), value, fill =name)) +
geom_col(position = position_dodge())
Using base R you can try
# transform percentages to numerics
df$a <- as.numeric(gsub("%", "", df$`TRUE`))
df$b <- as.numeric(gsub("%", "", df$`FALSE`))
barplot(cbind(a, b) ~ male, df, beside=T,legend.text = TRUE)

speech-gaze activity plot in ggplot2

I have data with Utterances by speakers in conversation as well as their gazes to one another. The speakers' gazes are in columns A_aoi, B_aoi, and C_aoi, the gaze durations are in A_aoi_dur, B_aoi_dur, and C_aoi_dur. Here's a reproducible snippet of the data:
df0 <- structure(list(Line = c(105L, 106L, 107L, 109L, 110L, 111L, 112L,
113L, 114L, 115L, 116L), Speaker = c("ID01.A", NA, "ID01.A",
NA, "ID01.B", NA, "ID01.A", NA, "ID01.A", NA, "ID01.C"), Utterance = c("so you've ↑obviously↑ thought about it obviously: (.) have made a decision (.) I'm !head!ing in this door (.) one of the cleaning ladies at the UB !grabb!ed my elbow",
"(0.662)", "and said (.) ~no no no !this! is the !womens'! bathroom~=",
"(0.015)", "=((v: gasps))=", "(0.166)", "=NOW", "(0.622)", "!how! this always plays out ",
"(0.726)", "[when was] that¿="), UttStart = c(163898L, 172500L,
173162L, 176100L, 176115L, 176800L, 176966L, 177372L, 177994L,
179328L, 180054L), UttEnd = c(172500, 173162, 176100, 176115,
176800, 176966, 177372, 177994, 179328, 180054, 180668), UttDur = c(8602,
662, 2938, 15, 685, 166, 406, 622, 1334, 726, 614), A_aoi = c("*B*C*B*C*B*C*B*C*B*C",
"C*", "*B*C*C", "C", "C*", "*", "*C", "C", "C*B", "B*", "*"),
A_aoi_dur = c("21,516,79,333,200,634,233,651,17,2332,33,400,33,518,17,532,33,1900,119,1",
"414,248", "1124,412,116,533,600,153", "15", "616,69", "166",
"153,253", "622", "204,151,979", "219,507", "614"), B_aoi = c("A*A*A*A*A",
"A", "A", "A", "A", "A", "A", "A*", "*A*A", "A*A", "A*A"),
B_aoi_dur = c("475,130,567,137,1983,313,787,1400,2810", "662",
"2938", "15", "685", "166", "406", "398,224", "76,136,284,838",
"108,571,47", "116,270,228"), C_aoi = c("A", "A", "A*A*A",
"A", "A", "A", "A", "A*A", "A", "A*A", "A"), C_aoi_dur = c("8602",
"662", "1058,123,1300,144,313", "15", "685", "166", "406",
"264,351,7", "1334", "125,323,278", "614")), row.names = c(NA,
-11L), class = c("tbl_df", "tbl", "data.frame"))
EDIT: new test data with temporally overlapping Utterances:
df0 <- structure(list(Line = 137:145,
Speaker = c("ID01.A", "ID01.A-Q", NA, "ID01.A", "ID01.A-Q", "ID01.A-Q", "ID01.A-Q", "ID01.A-Q",NA),
Utterance = c("↑she gra:bs my elbow (.) I turn to !look! at her↑ and she's like ~this is a (.) womens' bathroom you can't go in there~",
"~this is a (.) womens' bathroom you can't go in there~", "(0.534)",
"and I'm like ~((silent f: blank stare))~ (.) and she didn't, she was just like ~you can't go in~ (.) I'm like ~I'm a !woman!~ she said ~no you're not you can't go in~",
"~((silent f: blank stare))~", "~you can't go in~", "~I'm a !woman!~",
"~no you're not you can't go in~", "(0.487)"),
UttStart = c(208845L, 211450L, 214136L, 214670L, 215409L, 218307L, 219235L, 220076L, 221368L),
UttEnd = c(214136, 214136, 214670, 221368, 217117, 219050, 219885, 221368, 221855),
UttDur = c(5291, 2686, 534, 6698, 1708, 743, 650, 1292, 487),
A_aoi = c("C*B*C*C*B*C*", "C*B*C*", "*B", "B*C*B*C*C*B*B", "C*B", "C*B", "*", "*B","B"),
A_aoi_dur = c("57,445,1100,135,199,333,866,302,832,33,468,521","530,302,832,33,468,521",
"144,390", "377,235,466,399,1268,132,268,132,433,6,716,1412,854","339,399,970", "73,6,664", "650", "438,854", "487"),
B_aoi = c("A*A","A", "A", "A*A*A*A*A*A", "A", "*A*A", "*A", "A*A", "A"),
B_aoi_dur = c("1691,121,3479", "2686", "534", "53,180,3333,134,253,280,203,534,1296,138,294",
"1708", "63,253,280,147", "405,245", "860,138,294", "487"),
C_aoi = c("A", "A", "A", "A*A", "A", "A*", "A", "A", "A"),
C_aoi_dur = c("5291", "2686", "534", "3766,734,2198",
"1708", "129,614", "650", "1292", "487")),
row.names = c(NA, -9L), class = c("tbl_df", "tbl", "data.frame"))
What I'd like to be able to visualize is who is looking at whom and for how long for each Utterance, roughly like in this schematic representation:
What I can do at present is plot the gazes on a minute-by-minute scale, but just the gazes - not the Utterances: Plotting gaze movements by multiple speakers in a single plot. Starting from the data as above, this can be achieved by multiple transformations (shown below) but the resulting plot does not feature the Utterances and it plots the gazes per minute, whereas I need the gazes per Utterance:
I'm fully aware that this is demanding a lot. Help with it is all the more appreciated.
# pivot_longer so that all gazes have their own row:
df0 <- df0 %>%
rename_with(~ str_c(., "_AOI"), ends_with("_aoi")) %>%
pivot_longer(cols = contains("_"),
names_to = c("Gaze_by", ".value"), #
names_pattern = "^(.*)_([^_]+$)"
) %>%
mutate(Gaze_by = sub("^(.).*", "\\1", Gaze_by)) %>%
mutate(AOI = str_replace_all(AOI, "(?<=.)(?=.)", ",")) %>%
separate_rows(c(AOI, dur), sep = ",", convert = TRUE)
# compute starttimes and endtimes for gazes:
df1 <- df0 %>%
group_by(Gaze_by) %>%
mutate(
end = cumsum(dur),
start = end - dur
)
View(df1)
# compute minutes:
df2 <- df1 %>%
mutate(
# which minute does the event start in?
minute_start = as.integer(start/60000),
# which minute does the event end in?
minute_end = as.integer(end/60000),
# does the event straddle a minute mark?
straddler = minute_end > minute_start)
View(df2)
# 1st subset of `df2`:
df2_A1 <- df2 %>%
# filter those rows that contain events straddling minute marks:
filter(straddler=="TRUE") %>%
# reduce the endtime to the exact minute mark:
mutate(end = minute_end*60000)
View(df2_A1)
# 2nd subset of `df2`:
df2_A2 <- df2 %>%
# filter those rows that contain events straddling minute marks:
filter(straddler=="TRUE") %>%
# reduce the starttime to the exact minute mark:
mutate(start = minute_end*60000)
View(df2_A2)
# 3rd subset of `df0`:
df2_A3 <- df2 %>%
# filter those rows that do not contain events straddling minute marks:
filter(!straddler == "TRUE")
View(df2_A3)
# row-bind all three subsets:
df4 <- rbind(df2_A1, df2_A2, df2_A3) %>%
arrange(start) %>%
mutate(
minute = as.integer(start/60000),
# reduce total starttimes to starttimes per minute:
start_pm = start - 60000*minute,
# reduce total endtimes to endtimes per minute:
end_pm = end - 60000*minute)
# plot gaze activity for **ALL** speakers:
df4 %>%
ggplot(aes(x = start_pm,
xend = end_pm,
y = minute + scale(as.numeric(as.factor(Gaze_by))) / 6,
yend = minute + scale(as.numeric(as.factor(Gaze_by))) / 6,
color = AOI)) +
# draw segments for AOI:
geom_segment(size = 2) +
# reverse y-axis scale:
scale_y_reverse(breaks = 0:max(df4$minute),
labels = paste(0:max(df4$minute), "min", " Gaze_by_A\n Gaze_by_B\n Gaze_by_C", sep = " "),
name = NULL) +
# define custom colors:
scale_colour_manual(values = c("*" = "lemonchiffon",
"A" = "darkorange",
"B" = "lawngreen",
"C" = "slateblue1")) +
# plot title:
labs(title = "Gaze activity") +
theme(axis.title.x.bottom = element_blank())
Here is a solution that gets close to what you are looking for, making use of facets. It also uses forcats::fct_reorder and stringr::str_wrap (which are both part of the tidyverse).
This also wraps any long utterances and keeps the x-scale the same for all facets, rather than allowing them to stretch to fill the width.
df4 %>%
mutate(#add text for y axis labels
Gaze_by = paste0("Gaze_by_", Gaze_by),
#reorder facet panels, add speaker at start, and wrap to 120 characters
Utterance = fct_reorder(str_wrap(paste0(substr(Speaker, 6, 6), ": ",
Utterance),
120),
start_pm),
#set a dummy end point for each utterance based on the longest one
max_x = UttStart - min(UttStart) + max(UttDur)) %>%
ggplot(aes(x = start_pm, xend = end_pm,
y = Gaze_by, yend = Gaze_by, #as discrete variable
color = AOI)) +
geom_segment(size = 3) +
geom_point(aes(x = max_x, y = Gaze_by), alpha = 0) + #plot invisible dummy end points
scale_y_discrete(name = NULL, limits = rev) + #rev to get A at the top
facet_wrap(~Utterance, scales = "free_x", ncol = 1) +
scale_colour_manual(values = c("*" = "lemonchiffon",
"A" = "darkorange",
"B" = "lawngreen",
"C" = "slateblue1")) +
labs(title = "Gaze activity") +
theme_minimal() + #removes a lot of lines etc
theme(strip.text = element_text(color = "blue", hjust = 0), #facet strip text
strip.background = element_rect(fill = "white", color = "white"),
axis.title.x.bottom = element_blank())
To cut the utterances into 4-second chunks, you can do something like this...
df4 %>% group_by(Utterance) %>%
#work out relative durations from start of utterance and create subutterances
mutate(relStart = start_pm - min(start_pm),
relEnd = end_pm - min(start_pm),
subNo = map2(relStart, relEnd, ~seq(.x %/% 4000, .y %/% 4000, 1))) %>%
unnest(subNo) %>% #expand one row per subutterance
mutate(Utterance = paste0(Utterance, " (#", subNo + 1, ")"), #add sub no
subStart = pmax(4000 * subNo, relStart), #limits on subUtt
subEnd = pmin(4000 * (subNo + 1), relEnd), #limits on subUtt
start_pm = min(start_pm) + subStart, #redefine start
end_pm = min(start_pm) + subEnd) %>% #redefine end
group_by(Utterance) %>% #regroup as Utterance has changed!
mutate(max_x = min(start_pm) + 4000) %>% #define dummy end points
ungroup() %>%
mutate(Gaze_by = paste0("Gaze_by_", Gaze_by),
Utterance = fct_reorder(str_wrap(paste0(substr(Speaker, 6, 6), ": ", Utterance),
120), start_pm)) %>%
ggplot(...) #...as per code above from this point

How to reorder a graph with multiple variable based on one value?

I am trying to reorder the following graph based on the rank of the lowest confidence interval (conf.low). This means that Austria (AU) should be the first country, Bulgaria (BG) the second and Belgium (BE) the third. I know there is a way to do it manually by choosing the order of the country variable but i prefer to find a way to do it automatically since i have 30 countries. Could someone help?
Here is the data and the code:
df= structure(list(cntry = structure(1:3, .Label = c("AU", "BE",
"BG"), class = "factor"), estimate = c(0.0053, 0.01740,
0.0036), conf.low = c(-0.0257, 0.0005,
-0.0006), conf.high = c(0.0365, 0.0343,
0.0079)), row.names = c(NA, -3L), class = "data.frame")
df %>%
arrange(estimate) %>%
mutate(label = replace(round(estimate, 3),cntry==1, '')) %>%
ggplot(aes(estimate, cntry,label=label)) +
geom_point()+
geom_text(vjust= -1) +
geom_linerange(mapping=aes(xmin=conf.low , xmax=conf.high, y=cntry)) +
geom_point(mapping=aes(x=estimate, y=cntry))
Using forcats::fct_reorder() you could do this:
library(dplyr)
library(ggplot2)
library(forcats)
df %>%
arrange(estimate) %>%
mutate(label = replace(round(estimate, 3), cntry==1, '')) %>%
ggplot(aes(estimate, fct_reorder(cntry, conf.low, .desc = TRUE),label=label)) +
geom_point()+
geom_text(vjust= -1) +
geom_linerange(mapping=aes(xmin=conf.low , xmax=conf.high, y=cntry)) +
geom_point(mapping=aes(x=estimate, y=cntry))+
ylab("Country")
Created on 2021-04-22 by the reprex package (v2.0.0)
data
df= structure(list(cntry = structure(1:3, .Label = c("AU", "BE",
"BG"), class = "factor"), estimate = c(0.0053, 0.01740,
0.0036), conf.low = c(-0.0257, 0.0005,
-0.0006), conf.high = c(0.0365, 0.0343,
0.0079)), row.names = c(NA, -3L), class = "data.frame")

Create a multiline plot from a dataset with time on one axis and genes on the other

I have a dataset with mean gene counts for each decade as shown below:
structure(list(decade_0 = c(92.500989948184, 2788.27384875413,
28.6937227408861, 1988.03831525414, 1476.83143096418), decade_1 = c(83.4606306426572,
537.725421951383, 10.2747132062782, 235.380422949258, 685.043600629146
), decade_2 = c(188.414375201462, 2091.84249935145, 17.080858894829,
649.55107199935, 1805.3484565514), decade_3 = c(43.3316024314987,
141.64396529835, 2.77851259926935, 94.7748265692319, 413.248354335235
), decade_4 = c(54.4891626582901, 451.076574268175, 12.4298374245007,
346.102609621018, 769.215535857077), decade_5 = c(85.5621750431284,
131.822699578988, 13.3130607062134, 151.002200923853, 387.727911723968
), decade_6 = c(112.860998806804, 4844.59668489898, 19.7317645111144,
2084.76584309876, 766.375852567831), decade_7 = c(73.2198969730458,
566.042952305845, 3.2457873699886, 311.853982701609, 768.801733767044
), decade_8 = c(91.8161648275608, 115.161700090147, 10.7289451320065,
181.747670625714, 549.21661120626), decade_9 = c(123.31045087146,
648.23694540667, 17.7690326882018, 430.301803845829, 677.187054208271
)), row.names = c("ANK1", "NTN4", "PTPRH", "JAG1", "PLAT"), class = "data.frame")
I would like to plot a line graph with the changes in counts over time for each of >30 genes as shown here in excel.
To do this with ggplot I have to convert it to col1: decade, col2: gene, col3: counts.
My question is, either how to convert my table into this ggplot friendly table, or if there is a better way to produce the plot with a different tool?
Thanks!
One possibility: transpose your data frame, convert rownames to columns, then gather ("make long"). Plotting is then easy.
library(tidyverse)
mydat <- structure(list(decade_0 = c(92.500989948184, 2788.27384875413,
28.6937227408861, 1988.03831525414, 1476.83143096418), decade_1 = c(83.4606306426572,
537.725421951383, 10.2747132062782, 235.380422949258, 685.043600629146
), decade_2 = c(188.414375201462, 2091.84249935145, 17.080858894829,
649.55107199935, 1805.3484565514), decade_3 = c(43.3316024314987,
141.64396529835, 2.77851259926935, 94.7748265692319, 413.248354335235
), decade_4 = c(54.4891626582901, 451.076574268175, 12.4298374245007,
346.102609621018, 769.215535857077), decade_5 = c(85.5621750431284,
131.822699578988, 13.3130607062134, 151.002200923853, 387.727911723968
), decade_6 = c(112.860998806804, 4844.59668489898, 19.7317645111144,
2084.76584309876, 766.375852567831), decade_7 = c(73.2198969730458,
566.042952305845, 3.2457873699886, 311.853982701609, 768.801733767044
), decade_8 = c(91.8161648275608, 115.161700090147, 10.7289451320065,
181.747670625714, 549.21661120626), decade_9 = c(123.31045087146,
648.23694540667, 17.7690326882018, 430.301803845829, 677.187054208271
)), row.names = c("ANK1", "NTN4", "PTPRH", "JAG1", "PLAT"), class = "data.frame")
newdat <- mydat %>% t() %>% as.data.frame() %>% tibble::rownames_to_column('decade') %>%
pivot_longer(-decade, names_to = 'gene', values_to = 'count')
ggplot(newdat) + geom_line(aes(decade, count, color = gene, group = gene))
Created on 2020-02-14 by the reprex package (v0.3.0)

Trouble creating a ggplot jitter plot in r

I am having trouble creating a ggplot jitter plot in R. I have a data frame, aa, and want to make the x-axis to be labelled with each gene name (i.e. AADAT). I want the y-axis to display fold-change values (the numeric values from aa). Also, I have two lists, b1 and b2, containing a certain number of the TCGA samples for each gene and their fold-change values. I want to color all the fold change values from the jitter plot based on whether they belong to b1 or b2. How would I do this?
dput(aa):
structure(list(TCGA.BC.A10Q = c(2.54076411223946, 1.11243159235432,
-8.07819965644818), TCGA.DD.A1EB = c(0.437216525767862, 0.461169651797969,
-1.35226172820141), TCGA.DD.A1EG = c(2.19320501695823, 1.27412886320315,
-3.46331855085169), TCGA.DD.A1EH = c(3.26575582726209, 1.80298461724572,
-4.4298527877724), TCGA.DD.A1EI = c(0.606030095793745, -0.0475743042500462,
-3.03789531487311), TCGA.DD.A3A6 = c(2.92707172081351, 1.0710641387449,
-4.67961035825927), TCGA.DD.A3A8 = c(0.679951440435414, 0.433630069956858,
-2.02366124071563), TCGA.ES.A2HT = c(-0.0812955357950507, 1.76935812455138,
0.236573023675848), TCGA.FV.A23B = c(2.29637640282035, 0.364439713535423,
-1.94003185763597), TCGA.FV.A3I0 = c(3.196518439057, 1.39220627799838,
-7.67942521158398), TCGA.FV.A3R2 = c(0.859594276372461, 1.0282030128145,
0.131890257248429)), .Names = c("TCGA.BC.A10Q", "TCGA.DD.A1EB",
"TCGA.DD.A1EG", "TCGA.DD.A1EH", "TCGA.DD.A1EI", "TCGA.DD.A3A6",
"TCGA.DD.A3A8", "TCGA.ES.A2HT", "TCGA.FV.A23B", "TCGA.FV.A3I0",
"TCGA.FV.A3R2"), row.names = c("ABCC10", "ACBD6", "ACSL1"), class = "data.frame")
dput(b1):
structure(list(ABCC10 = structure(c(2.19320501695823, 0.859594276372461,
3.196518439057, 3.26575582726209, 2.29637640282035), .Names = c("TCGA.DD.A1EG",
"TCGA.FV.A3R2", "TCGA.FV.A3I0", "TCGA.DD.A1EH", "TCGA.FV.A23B"
)), ACBD6 = structure(c(1.80298461724572, 0.433630069956858,
1.76935812455138, 1.27412886320315, 0.461169651797969), .Names = c("TCGA.DD.A1EH",
"TCGA.DD.A3A8", "TCGA.ES.A2HT", "TCGA.DD.A1EG", "TCGA.DD.A1EB"
)), ACSL1 = structure(c(-1.94003185763597, -3.46331855085169,
-3.03789531487311, -4.4298527877724), .Names = c("TCGA.FV.A23B",
"TCGA.DD.A1EG", "TCGA.DD.A1EI", "TCGA.DD.A1EH"))), .Names = c("ABCC10",
"ACBD6", "ACSL1"))
dput(b2):
structure(list(ABCC10 = structure(c(2.54076411223946, 0.437216525767862,
0.606030095793745, 2.92707172081351, 0.679951440435414, -0.0812955357950507
), .Names = c("TCGA.BC.A10Q", "TCGA.DD.A1EB", "TCGA.DD.A1EI",
"TCGA.DD.A3A6", "TCGA.DD.A3A8", "TCGA.ES.A2HT")), ACBD6 = structure(c(1.11243159235432,
-0.0475743042500462, 1.0710641387449, 0.364439713535423, 1.39220627799838,
1.0282030128145), .Names = c("TCGA.BC.A10Q", "TCGA.DD.A1EI",
"TCGA.DD.A3A6", "TCGA.FV.A23B", "TCGA.FV.A3I0", "TCGA.FV.A3R2"
)), ACSL1 = structure(c(-8.07819965644818, -1.35226172820141,
-4.67961035825927, -2.02366124071563, 0.236573023675848, -7.67942521158398,
0.131890257248429), .Names = c("TCGA.BC.A10Q", "TCGA.DD.A1EB",
"TCGA.DD.A3A6", "TCGA.DD.A3A8", "TCGA.ES.A2HT", "TCGA.FV.A3I0",
"TCGA.FV.A3R2"))), .Names = c("ABCC10", "ACBD6", "ACSL1"))
Are you looking for something like this?
library(dplyr); library(tidyr); library(ggplot2)
# convert aa from wide to long format
aa$gene <- rownames(aa)
aa <- aa %>%
gather(TCGA, fold.change, -gene)
# convert lookup lists into data frame for matching
match.table <- rbind(stack(b1) %>% mutate(source = "b1"),
stack(b2) %>% mutate(source = "b2"))
aa <- left_join(aa, match.table,
by = c("gene" = "ind", "fold.change" = "values"))
ggplot(aa,
aes(x = gene, y = fold.change, col = source)) +
geom_jitter() +
theme_light()

Resources