Related
I am using the following function in R to develop a simple volcano plot:
EnhancedVolcano(all_genes, x = "logFC", y = "adjust.p.value", lab = all_genes$Gene.ID,
pCutoff = 10e-2, FCcutoff = 1)
I would like my pCutoff line to appear to represent p = 0.05 which on a log scale for this figure would appear as 1.3 on the y-axis. However, changing "10e-2" to say "10e-2.5" generates an error
Error: unexpected numeric constant in: "EnhancedVolcano(all_genes, x =
"logFC", y = "adjust.p.value", lab = all_genes$Gene.ID,
pCutoff = 10e-2.5"
Any suggestions on how I can get a horizontal p-value cut-off line at exactly 1.3 (currently appears at 1.2). Here is some reproducible data:
structure(list(X = 1:14, Gene.ID = c("A", "B", "C", "D", "E", "F",
"G", "H", "I", "J", "K", "L", "M", "N"), logFC = c(1.5,
0.17212922, 0.145542174, 0.304348578, 0.124636936, 0.247841044,
0.160818268, 0.123741518, 0.148530876, 0.148960225, 0.114135472,
-0.147118359, 0.095549291, 0.138521594), AveExpr = c(5.426424957,
4.289728233, 4.901134193, 4.742864705, 5.447030699, 4.539641767,
4.650750102, 5.901020922, 5.365944907, 5.818788787, 4.837214384,
7.017656548, 4.531897822, 5.192294452), t = c(6.15098624, 5.452898247,
4.979246654, 4.949519834, 4.818043279, 4.73403717, 4.701937811,
4.522692175, 4.518518374, 4.281900066, 4.247981727, -4.194421592,
4.10350597, 4.088357671), p.value = c(1.27e-09, 6.8e-08, 7.99e-07,
9.26e-07, 1.77e-06, 2.65e-06, 3.09e-06, 7.13e-06, 7.27e-06, 2.1e-05,
2.44e-05, 3.07e-05, 4.53e-05, 4.83e-05), adjust.p.value = c(1.64e-05,
0.000438854, 0.002987004, 0.002987004, 0.004558267, 0.005687325,
0.005687325, 0.010422933, 0.010422933, 0.027128901, 0.028601707,
0.033061438, 0.04452146, 0.04452146), B = c(11.2786109, 7.664706936,
5.439886439, 5.306497286, 4.725465519, 4.361868581, 4.224515919,
3.473656504, 3.45649938, 2.508304771, 2.376338878, 2.169980059,
1.825392322, 1.76867543)), class = "data.frame", row.names = c(NA,
-14L))
I think you want the following code where the p-value is calculated like p=10^-s where s is your 1.3 like this:
library(EnhancedVolcano)
EnhancedVolcano(all_genes, x = "logFC", y = "adjust.p.value", lab = all_genes$Gene.ID,
pCutoff = 10^-1.3, FCcutoff = 1)
Output:
I am trying to create a map where I show the amount and category of Exports in every European country, using a scatterpie plot. This is the data I am trying to represent:
Country A B C D E F G Total
1 FR 48208727011 129696846358 34574295963 99154544367 87056475894 104059261659 391086898 50.3141238
2 BE 30008344795 130642251666 27315419464 48966420544 51351672841 57686707705 875915760 34.6846733
3 NL 53815652300 126965690773 52604259051 164935573324 43089183110 79607329056 516212340 52.1533900
4 DE 79643366705 285793555191 66579801287 230961697801 160598853461 167790359814 13590821673 100.4958456
5 IT 35306881277 124880125091 31042897909 65051137874 44481779280 65707113992 307508636 36.6777444
6 UK 4190569134 14226329164 4343541388 8299777138 7863823675 8191378024 177728913 4.7293147
7 IE 8049979989 25547263228 3324685081 15609577840 18293778082 13299495081 284077060 8.4408856
8 DK 10844794488 22366273732 3669934507 20904821209 8871184551 17364886109 1104100358 8.5125995
9 EL 5548998459 14199041489 9684405892 6969942717 2877634605 8740624663 9513713 4.8030162
10 PT 9302893141 19921174761 5742487970 12183620710 9794141959 10889202370 59025653 6.7892547
11 ES 29087706350 79136960848 26777114009 45807156391 43316950993 54577475375 225619825 27.8928984
12 LU 2103037221 5485541709 1274451840 3165573258 3448812873 2685200517 23828895 1.8186446
13 SE 14297019504 32367817406 10023929115 31082425639 18504243058 21520786963 251825497 12.8048047
14 FI 4368941438 17924135085 6424290821 13268574752 7679357024 7759601514 87932852 5.7512833
15 AT 11108739001 47969735941 8282060600 36180768764 20761302493 26060191499 319396555 15.0682195
16 MT 529547453 748570490 789405002 772157398 939286493 808546088 1179489 0.4588692
17 EE 1387220092 4797469841 1253135597 3127037067 1483571375 2251847940 315884341 1.4616166
18 LV 2714038229 4237027490 958962478 3158721396 1479290893 2931423023 89667330 1.5569131
19 LT 3408636288 8761053696 3263941940 5534705815 2630113004 4477086678 348351748 2.8423889
20 PL 17264039729 70678231411 11723435712 53284056901 28214023352 41438947683 319384835 22.2922120
21 CZ 7664643659 38573705210 5359209173 54059163460 20745595183 22423687496 216009863 14.9042014
22 SK 4193310193 17229538594 3771900263 19251595573 18415022178 10092362707 163300267 7.3117030
23 HU 5067726212 26282833327 5807291521 31406620462 16576651093 12918544146 456905984 9.8516573
24 RO 7210065674 24768518425 3986448288 20279628790 10274528929 13490373296 213856837 8.0223420
25 BG 3364866564 11098005470 2490021719 5767532283 2282959524 4540599434 289425842 2.9833411
26 SI 2226481542 11769625979 2186097710 5986840366 6169533307 8453642146 32927930 3.6825149
27 HR 2664219116 7204053277 2281750708 4155735739 2094082503 4970586651 14826478 2.3385254
28 CY 847756088 1467939342 983937418 824244195 1900124484 1375465594 47109886 0.7446577
Using the following code:
library(giscoR)
borders <- gisco_get_countries(
epsg = "3035",
year = "2020",
resolution = "3",
country = idf$Country
)
merged <- merge(borders,
idf,
by.x = "CNTR_ID",
by.y = "Country",
all.x = TRUE
)
library(tidyverse)
symbol_pos <- st_centroid(merged, of_largest_polygon = TRUE)
separate_coords = symbol_pos %>% mutate(lat = unlist(map(symbol_pos$geometry, 1)), long = unlist(map(symbol_pos$geometry, 2)))
sympos = data.frame(Country = separate_coords$CNTR_ID, lat = separate_coords$lat, long = separate_coords$long)
merged <- merge(merged,
sympos,
by.x = "CNTR_ID",
by.y = "Country",
all.x = TRUE
)
ggplot() +
geom_sf(data = merged, size = 0.1) +
geom_scatterpie(data = merged, aes(x = long, y = lat, r = Total), cols = LETTERS[1:7])+
coord_sf(xlim = c(2377294, 6500000), ylim = c(1413597, 5228510))
And it gives me this error:
Error in rowSums(data[, cols]) : 'x' must be numeric
I am trying to create a map similar to this one:
And I would be grateful if someone can provide some hint as to how to fix the error. Thanks.
Edit: below is the dput(idf) output:
structure(list(Country = c("FR", "BE", "NL", "DE", "IT", "UK",
"IE", "DK", "EL", "PT", "ES", "LU", "SE", "FI", "AT", "MT", "EE",
"LV", "LT", "PL", "CZ", "SK", "HU", "RO", "BG", "SI", "HR", "CY"
), A = c(48208727011, 30008344795, 53815652300, 79643366705,
35306881277, 4190569134, 8049979989, 10844794488, 5548998459,
9302893141, 29087706350, 2103037221, 14297019504, 4368941438,
11108739001, 529547453, 1387220092, 2714038229, 3408636288,
17264039729,
7664643659, 4193310193, 5067726212, 7210065674, 3364866564,
2226481542,
2664219116, 847756088), B = c(129696846358, 130642251666,
126965690773,
285793555191, 124880125091, 14226329164, 25547263228,
22366273732,
14199041489, 19921174761, 79136960848, 5485541709, 32367817406,
17924135085, 47969735941, 748570490, 4797469841, 4237027490,
8761053696, 70678231411, 38573705210, 17229538594, 26282833327,
24768518425, 11098005470, 11769625979, 7204053277, 1467939342
), C = c(34574295963, 27315419464, 52604259051, 66579801287,
31042897909, 4343541388, 3324685081, 3669934507, 9684405892,
5742487970, 26777114009, 1274451840, 10023929115, 6424290821,
8282060600, 789405002, 1253135597, 958962478, 3263941940,
11723435712,
5359209173, 3771900263, 5807291521, 3986448288, 2490021719,
2186097710,
2281750708, 983937418), D = c(99154544367, 48966420544,
164935573324,
230961697801, 65051137874, 8299777138, 15609577840, 20904821209,
6969942717, 12183620710, 45807156391, 3165573258, 31082425639,
13268574752, 36180768764, 772157398, 3127037067, 3158721396,
5534705815, 53284056901, 54059163460, 19251595573, 31406620462,
20279628790, 5767532283, 5986840366, 4155735739, 824244195),
E = c(87056475894, 51351672841, 43089183110, 160598853461,
44481779280, 7863823675, 18293778082, 8871184551, 2877634605,
9794141959, 43316950993, 3448812873, 18504243058, 7679357024,
20761302493, 939286493, 1483571375, 1479290893, 2630113004,
28214023352, 20745595183, 18415022178, 16576651093, 10274528929,
2282959524, 6169533307, 2094082503, 1900124484), F =
c(104059261659,
57686707705, 79607329056, 167790359814, 65707113992, 8191378024,
13299495081, 17364886109, 8740624663, 10889202370, 54577475375,
2685200517, 21520786963, 7759601514, 26060191499, 808546088,
2251847940, 2931423023, 4477086678, 41438947683, 22423687496,
10092362707, 12918544146, 13490373296, 4540599434, 8453642146,
4970586651, 1375465594), G = c(391086898, 875915760, 516212340,
13590821673, 307508636, 177728913, 284077060, 1104100358,
9513713, 59025653, 225619825, 23828895, 251825497, 87932852,
319396555, 1179489, 315884341, 89667330, 348351748, 319384835,
216009863, 163300267, 456905984, 213856837, 289425842, 32927930,
14826478, 47109886), Total = c(50.314123815, 34.6846732775,
52.1533899954, 100.4958455932, 36.6777444059, 4.7293147436,
8.4408856361, 8.5125994954, 4.8030161538, 6.7892546564,
27.8928983791,
1.8186446313, 12.8048047182, 5.7512833486, 15.0682194853,
0.4588692413, 1.4616166253, 1.5569130839, 2.8423889169,
22.2922119623,
14.9042014044, 7.3117029775, 9.8516572745, 8.0223420239,
2.9833410836, 3.682514898, 2.3385254472, 0.7446577007)),
row.names = c(NA,
-28L), class = "data.frame")
Please find below one possible solution to your request. The main problem was that geom_scatterpie() expects a dataframe and not an sf object. So you need to use as.data.frame() inside geom_scatterpie(). I also took the opportunity to simplify your code a bit.
Reprex
Code
library(giscoR)
library(sf)
library(dplyr)
library(ggplot2)
library(scatterpie)
borders <- gisco_get_countries(
epsg = "3035",
year = "2020",
resolution = "3",
country = idf$Country
)
merged <- merge(borders,
idf,
by.x = "CNTR_ID",
by.y = "Country",
all.x = TRUE
)
symbol_pos <- st_centroid(merged, of_largest_polygon = TRUE)
sympos <- symbol_pos %>%
st_drop_geometry() %>%
as.data.frame() %>%
cbind(., symbol_pos %>% st_coordinates()) %>%
select(CNTR_ID, X, Y) %>%
rename(Country = CNTR_ID, long = X, lat = Y)
merged <- merge(merged,
sympos,
by.x = "CNTR_ID",
by.y = "Country",
all.x = TRUE
)
Visualization
ggplot() +
geom_sf(data = merged, size = 0.1) +
geom_scatterpie(data = as.data.frame(merged), aes(x = long, y = lat, r = Total*2200), cols = LETTERS[1:7]) +
coord_sf(xlim = c(2377294, 6500000), ylim = c(1413597, 5228510))
Created on 2022-01-23 by the reprex package (v2.0.1)
Sorry if this question has already been answered but I could not find the solution to what I am after. I have a plot that uses both geom_line and geom_point. The result of this is that in the legend, it adds both a line and a point when they should have one or the other. I want to keep the circles for the data tg1 and tg2 and remove the line and then do the opposite to the data full i.e. keep the line but remove the circle. I have seen that something like this works where you want to remove dots from all of the legend entries but nothing to only do specifics Removing ggplot2's geom_point icons from the legend. Can anyone help? Thanks.
#code for plot
library(ggplot2)
library(tidypaleo)
ggplot(LGRSL, aes(x =mmsl , y = Age))+
coord_flip()+
theme_classic(12)+
geom_point(data=tg1,aes(x=mmslc,y=Year,col="Fort Denison 1"),pch=1,size=2)+
geom_point(data=tg2,aes(x=mmslc,y=Year,col="Fort Denison 2"),pch=1,size=2)+
geom_lineh(data = full, aes(x=Lutregalammslc,y=Year,col="Full budget"))+
scale_colour_manual(values=c("grey15","grey50","black"))
## data
## tg1
structure(list(Year = 1886:1891, SLR = c(6919L, 6935L, 6923L,
6955L, 6956L, 6957L), mmsl = c(-0.158, -0.142, -0.154, -0.122,
-0.121, -0.12), m = c(6.919, 6.935, 6.923, 6.955, 6.956, 6.957
), GIA.correction = c(-0.02814, -0.02793, -0.02772, -0.02751,
-0.0273, -0.02709), SLRc = c(6.89086, 6.90707, 6.89528, 6.92749,
6.9287, 6.92991), mmslc = c(-0.19667, -0.18046, -0.19225, -0.16004,
-0.15883, -0.15762)), row.names = c(NA, 6L), class = "data.frame")
##tg2
structure(list(Year = 1915:1920, SLR = c(7011L, 6929L, 6987L,
6945L, 6959L, 6951L), mmsl = c(-0.066, -0.148, -0.09, -0.132,
-0.118, -0.126), m = c(7.011, 6.929, 6.987, 6.945, 6.959, 6.951
), GIA.correction = c(-0.02205, -0.02184, -0.02163, -0.02142,
-0.02121, -0.021), SLRc = c(6.98895, 6.90716, 6.96537, 6.92358,
6.93779, 6.93), mmslc = c(-0.09858, -0.18037, -0.12216, -0.16395,
-0.14974, -0.15753)), row.names = c(NA, 6L), class = "data.frame")
##full
structure(list(Year = 1900:1905, Lutregala = c(-0.103609677,
-0.118603251, -0.134550791, -0.105553735, -0.103983082, -0.121731984
), Wapengo = c(-0.095213147, -0.096005337, -0.115700625, -0.097696891,
-0.084444784, -0.109161066), Tarra = c(-0.106672829, -0.109537943,
-0.135256365, -0.101357772, -0.089716518, -0.104258351), Lutregalammsl = c(-0.292863465,
-0.307857039, -0.323804579, -0.294807523, -0.29323687, -0.310985772
), Wapengommsl = c(-0.257028279, -0.257820469, -0.277515756,
-0.259512023, -0.246259916, -0.270976198), Tarrammsl = c(-0.30925682,
-0.312121933, -0.337840355, -0.303941762, -0.292300508, -0.306842342
), LgGIAc = c(-0.01921, -0.01904, -0.01887, -0.0187, -0.01853,
-0.01836), WapGIAc = c(-0.02486, -0.02464, -0.02442, -0.0242,
-0.02398, -0.02376), TarGIAc = c(-0.02373, -0.02352, -0.02331,
-0.0231, -0.02289, -0.02268), Lutregalammslc = c(-0.312073465,
-0.326897039, -0.342674579, -0.313507523, -0.31176687, -0.329345772
), Wapmmslc = c(-0.281888279, -0.282460469, -0.301935756, -0.283712023,
-0.270239916, -0.294736198), Tarmmslc = c(-0.33298682, -0.335641933,
-0.361150355, -0.327041762, -0.315190508, -0.329522342)), row.names = c(NA,
6L), class = "data.frame")
##LGRSL
structure(list(depths = c(0.5, 1.5, 2.5, 3.5, 4.5, 5.5), RSL = c(0.047746907,
0.025564293, 0.021733558, 0.007855661, -0.004909879, 0.01747051
), RSLerror = c(0.058158556, 0.057902654, 0.057988654, 0.057957388,
0.057905405, 0.057226072), Age = c(2017.456716, 2013.594255,
2006.92838, 1999.675523, 1994.729181, 1990.518154), Ageerror = c(0.373138707,
0.77640096, 1.430582242, 1.627131115, 3.222393394, 3.239674718
), mmsl = c(0.01993169, -0.002250924, -0.006081659, -0.019959556,
-0.032725096, -0.010344707)), row.names = c(NA, 6L), class = "data.frame")
##LGRSLgp
structure(list(Age = 1892:1897, mean = c(-0.298147401, -0.304630597,
-0.31023294, -0.315506983, -0.321225142, -0.327190675), error = c(0.051858047,
0.04985084, 0.047760525, 0.045624121, 0.043505044, 0.041477551
), min = c(-0.246289354, -0.254779758, -0.262472416, -0.269882862,
-0.277720098, -0.285713124), max = c(-0.350005447, -0.354481437,
-0.357993465, -0.361131103, -0.364730186, -0.368668226), x = c(-0.02125,
-0.02108, -0.02091, -0.02074, -0.02057, -0.0204), meangia = c(-0.276897401,
-0.283550597, -0.28932294, -0.294766983, -0.300655142, -0.306790675
), rate = c(NA, -4.967327, -4.946326, -4.964493, -4.977451, -4.911859
), raterror = c(NA, 3.581013, 3.796417, 4.022157, 4.226762, 4.255126
), mmsl = c(-0.325962618, -0.332445814, -0.338048157, -0.3433222,
-0.349040359, -0.355005892)), row.names = c(NA, 6L), class = "data.frame")
Here is a way.
Override the guide legend with a list of vectors of values for each of the aesthetics involved, shape and linetype. Note the different ways to specify what is to be removed.
I have also simplified the code a bit.
library(ggplot2)
library(dplyr)
colrs <- c("Fort Denison 1" = "grey15",
"Fort Denison 2" = "grey50",
"Full budget" = "black")
legnd <- list(shape = c(1, 1, NA),
linetype = c("blank", "blank", "solid"))
bind_rows(
tg1 %>% mutate(col = "Fort Denison 1"),
tg2 %>% mutate(col = "Fort Denison 2")
) %>%
ggplot(aes(x = mmslc, y = Year, colour = col)) +
geom_point(pch = 1, size = 2) +
geom_lineh(data = full, aes(x = Lutregalammslc, col = "Full budget"))+
scale_colour_manual(values = colrs,
guide = guide_legend(override.aes = legnd)) +
coord_flip() +
theme_classic(base_size = 12)
I have data with Utterances by speakers in conversation as well as their gazes to one another. The speakers' gazes are in columns A_aoi, B_aoi, and C_aoi, the gaze durations are in A_aoi_dur, B_aoi_dur, and C_aoi_dur. Here's a reproducible snippet of the data:
df0 <- structure(list(Line = c(105L, 106L, 107L, 109L, 110L, 111L, 112L,
113L, 114L, 115L, 116L), Speaker = c("ID01.A", NA, "ID01.A",
NA, "ID01.B", NA, "ID01.A", NA, "ID01.A", NA, "ID01.C"), Utterance = c("so you've ↑obviously↑ thought about it obviously: (.) have made a decision (.) I'm !head!ing in this door (.) one of the cleaning ladies at the UB !grabb!ed my elbow",
"(0.662)", "and said (.) ~no no no !this! is the !womens'! bathroom~=",
"(0.015)", "=((v: gasps))=", "(0.166)", "=NOW", "(0.622)", "!how! this always plays out ",
"(0.726)", "[when was] that¿="), UttStart = c(163898L, 172500L,
173162L, 176100L, 176115L, 176800L, 176966L, 177372L, 177994L,
179328L, 180054L), UttEnd = c(172500, 173162, 176100, 176115,
176800, 176966, 177372, 177994, 179328, 180054, 180668), UttDur = c(8602,
662, 2938, 15, 685, 166, 406, 622, 1334, 726, 614), A_aoi = c("*B*C*B*C*B*C*B*C*B*C",
"C*", "*B*C*C", "C", "C*", "*", "*C", "C", "C*B", "B*", "*"),
A_aoi_dur = c("21,516,79,333,200,634,233,651,17,2332,33,400,33,518,17,532,33,1900,119,1",
"414,248", "1124,412,116,533,600,153", "15", "616,69", "166",
"153,253", "622", "204,151,979", "219,507", "614"), B_aoi = c("A*A*A*A*A",
"A", "A", "A", "A", "A", "A", "A*", "*A*A", "A*A", "A*A"),
B_aoi_dur = c("475,130,567,137,1983,313,787,1400,2810", "662",
"2938", "15", "685", "166", "406", "398,224", "76,136,284,838",
"108,571,47", "116,270,228"), C_aoi = c("A", "A", "A*A*A",
"A", "A", "A", "A", "A*A", "A", "A*A", "A"), C_aoi_dur = c("8602",
"662", "1058,123,1300,144,313", "15", "685", "166", "406",
"264,351,7", "1334", "125,323,278", "614")), row.names = c(NA,
-11L), class = c("tbl_df", "tbl", "data.frame"))
EDIT: new test data with temporally overlapping Utterances:
df0 <- structure(list(Line = 137:145,
Speaker = c("ID01.A", "ID01.A-Q", NA, "ID01.A", "ID01.A-Q", "ID01.A-Q", "ID01.A-Q", "ID01.A-Q",NA),
Utterance = c("↑she gra:bs my elbow (.) I turn to !look! at her↑ and she's like ~this is a (.) womens' bathroom you can't go in there~",
"~this is a (.) womens' bathroom you can't go in there~", "(0.534)",
"and I'm like ~((silent f: blank stare))~ (.) and she didn't, she was just like ~you can't go in~ (.) I'm like ~I'm a !woman!~ she said ~no you're not you can't go in~",
"~((silent f: blank stare))~", "~you can't go in~", "~I'm a !woman!~",
"~no you're not you can't go in~", "(0.487)"),
UttStart = c(208845L, 211450L, 214136L, 214670L, 215409L, 218307L, 219235L, 220076L, 221368L),
UttEnd = c(214136, 214136, 214670, 221368, 217117, 219050, 219885, 221368, 221855),
UttDur = c(5291, 2686, 534, 6698, 1708, 743, 650, 1292, 487),
A_aoi = c("C*B*C*C*B*C*", "C*B*C*", "*B", "B*C*B*C*C*B*B", "C*B", "C*B", "*", "*B","B"),
A_aoi_dur = c("57,445,1100,135,199,333,866,302,832,33,468,521","530,302,832,33,468,521",
"144,390", "377,235,466,399,1268,132,268,132,433,6,716,1412,854","339,399,970", "73,6,664", "650", "438,854", "487"),
B_aoi = c("A*A","A", "A", "A*A*A*A*A*A", "A", "*A*A", "*A", "A*A", "A"),
B_aoi_dur = c("1691,121,3479", "2686", "534", "53,180,3333,134,253,280,203,534,1296,138,294",
"1708", "63,253,280,147", "405,245", "860,138,294", "487"),
C_aoi = c("A", "A", "A", "A*A", "A", "A*", "A", "A", "A"),
C_aoi_dur = c("5291", "2686", "534", "3766,734,2198",
"1708", "129,614", "650", "1292", "487")),
row.names = c(NA, -9L), class = c("tbl_df", "tbl", "data.frame"))
What I'd like to be able to visualize is who is looking at whom and for how long for each Utterance, roughly like in this schematic representation:
What I can do at present is plot the gazes on a minute-by-minute scale, but just the gazes - not the Utterances: Plotting gaze movements by multiple speakers in a single plot. Starting from the data as above, this can be achieved by multiple transformations (shown below) but the resulting plot does not feature the Utterances and it plots the gazes per minute, whereas I need the gazes per Utterance:
I'm fully aware that this is demanding a lot. Help with it is all the more appreciated.
# pivot_longer so that all gazes have their own row:
df0 <- df0 %>%
rename_with(~ str_c(., "_AOI"), ends_with("_aoi")) %>%
pivot_longer(cols = contains("_"),
names_to = c("Gaze_by", ".value"), #
names_pattern = "^(.*)_([^_]+$)"
) %>%
mutate(Gaze_by = sub("^(.).*", "\\1", Gaze_by)) %>%
mutate(AOI = str_replace_all(AOI, "(?<=.)(?=.)", ",")) %>%
separate_rows(c(AOI, dur), sep = ",", convert = TRUE)
# compute starttimes and endtimes for gazes:
df1 <- df0 %>%
group_by(Gaze_by) %>%
mutate(
end = cumsum(dur),
start = end - dur
)
View(df1)
# compute minutes:
df2 <- df1 %>%
mutate(
# which minute does the event start in?
minute_start = as.integer(start/60000),
# which minute does the event end in?
minute_end = as.integer(end/60000),
# does the event straddle a minute mark?
straddler = minute_end > minute_start)
View(df2)
# 1st subset of `df2`:
df2_A1 <- df2 %>%
# filter those rows that contain events straddling minute marks:
filter(straddler=="TRUE") %>%
# reduce the endtime to the exact minute mark:
mutate(end = minute_end*60000)
View(df2_A1)
# 2nd subset of `df2`:
df2_A2 <- df2 %>%
# filter those rows that contain events straddling minute marks:
filter(straddler=="TRUE") %>%
# reduce the starttime to the exact minute mark:
mutate(start = minute_end*60000)
View(df2_A2)
# 3rd subset of `df0`:
df2_A3 <- df2 %>%
# filter those rows that do not contain events straddling minute marks:
filter(!straddler == "TRUE")
View(df2_A3)
# row-bind all three subsets:
df4 <- rbind(df2_A1, df2_A2, df2_A3) %>%
arrange(start) %>%
mutate(
minute = as.integer(start/60000),
# reduce total starttimes to starttimes per minute:
start_pm = start - 60000*minute,
# reduce total endtimes to endtimes per minute:
end_pm = end - 60000*minute)
# plot gaze activity for **ALL** speakers:
df4 %>%
ggplot(aes(x = start_pm,
xend = end_pm,
y = minute + scale(as.numeric(as.factor(Gaze_by))) / 6,
yend = minute + scale(as.numeric(as.factor(Gaze_by))) / 6,
color = AOI)) +
# draw segments for AOI:
geom_segment(size = 2) +
# reverse y-axis scale:
scale_y_reverse(breaks = 0:max(df4$minute),
labels = paste(0:max(df4$minute), "min", " Gaze_by_A\n Gaze_by_B\n Gaze_by_C", sep = " "),
name = NULL) +
# define custom colors:
scale_colour_manual(values = c("*" = "lemonchiffon",
"A" = "darkorange",
"B" = "lawngreen",
"C" = "slateblue1")) +
# plot title:
labs(title = "Gaze activity") +
theme(axis.title.x.bottom = element_blank())
Here is a solution that gets close to what you are looking for, making use of facets. It also uses forcats::fct_reorder and stringr::str_wrap (which are both part of the tidyverse).
This also wraps any long utterances and keeps the x-scale the same for all facets, rather than allowing them to stretch to fill the width.
df4 %>%
mutate(#add text for y axis labels
Gaze_by = paste0("Gaze_by_", Gaze_by),
#reorder facet panels, add speaker at start, and wrap to 120 characters
Utterance = fct_reorder(str_wrap(paste0(substr(Speaker, 6, 6), ": ",
Utterance),
120),
start_pm),
#set a dummy end point for each utterance based on the longest one
max_x = UttStart - min(UttStart) + max(UttDur)) %>%
ggplot(aes(x = start_pm, xend = end_pm,
y = Gaze_by, yend = Gaze_by, #as discrete variable
color = AOI)) +
geom_segment(size = 3) +
geom_point(aes(x = max_x, y = Gaze_by), alpha = 0) + #plot invisible dummy end points
scale_y_discrete(name = NULL, limits = rev) + #rev to get A at the top
facet_wrap(~Utterance, scales = "free_x", ncol = 1) +
scale_colour_manual(values = c("*" = "lemonchiffon",
"A" = "darkorange",
"B" = "lawngreen",
"C" = "slateblue1")) +
labs(title = "Gaze activity") +
theme_minimal() + #removes a lot of lines etc
theme(strip.text = element_text(color = "blue", hjust = 0), #facet strip text
strip.background = element_rect(fill = "white", color = "white"),
axis.title.x.bottom = element_blank())
To cut the utterances into 4-second chunks, you can do something like this...
df4 %>% group_by(Utterance) %>%
#work out relative durations from start of utterance and create subutterances
mutate(relStart = start_pm - min(start_pm),
relEnd = end_pm - min(start_pm),
subNo = map2(relStart, relEnd, ~seq(.x %/% 4000, .y %/% 4000, 1))) %>%
unnest(subNo) %>% #expand one row per subutterance
mutate(Utterance = paste0(Utterance, " (#", subNo + 1, ")"), #add sub no
subStart = pmax(4000 * subNo, relStart), #limits on subUtt
subEnd = pmin(4000 * (subNo + 1), relEnd), #limits on subUtt
start_pm = min(start_pm) + subStart, #redefine start
end_pm = min(start_pm) + subEnd) %>% #redefine end
group_by(Utterance) %>% #regroup as Utterance has changed!
mutate(max_x = min(start_pm) + 4000) %>% #define dummy end points
ungroup() %>%
mutate(Gaze_by = paste0("Gaze_by_", Gaze_by),
Utterance = fct_reorder(str_wrap(paste0(substr(Speaker, 6, 6), ": ", Utterance),
120), start_pm)) %>%
ggplot(...) #...as per code above from this point
Let's say I have a saved plot named my_plot, produced with ggplot. Also, let's say that the column in my_plot[[1]] data frame used for horizontal axis is named my_dates
Now, I want to add some vertical lines to the plot, which, of course, can be done by something like that:
my_plot +
geom_vline(aes(xintercept = my_dates[c(3, 8)]))
Since I perform this task quite on a regular basis, I want to write a function for that -- something like that:
ggplot.add_lines <- function(given_plot, given_points) {
finale <- given_plot +
geom_vline(aes(xintercept = given_plot[[1]]$my_dates[given_points]))
return(finale)
}
Which, as it's probably obvious to everyone, doesn't work:
> ggplot.add_lines(my_plot, c(3, 5))
Error in eval(expr, envir, enclos) : object 'given_plot' not found
So, my question would be what am I doing wrong, and how can it be fixed? Below is some data for a reproducible example:
> dput(my_plot)
structure(list(data = structure(list(my_dates = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10), my_points = c(-2.20176409422924, -1.12872396340683,
-0.259703895194354, 0.634233385649338, -0.678983982973015, -1.83157126614836,
1.33360095418957, -0.120455389285709, -0.969431974863616, -1.20451262626184
)), .Names = c("my_dates", "my_points"), row.names = c(NA, -10L
), class = "data.frame"), layers = list(<environment>), scales = <S4 object of class structure("Scales", package = "ggplot2")>,
mapping = structure(list(x = my_dates, y = my_points), .Names = c("x",
"y"), class = "uneval"), theme = list(), coordinates = structure(list(
limits = structure(list(x = NULL, y = NULL), .Names = c("x",
"y"))), .Names = "limits", class = c("cartesian", "coord"
)), facet = structure(list(shrink = TRUE), .Names = "shrink", class = c("null",
"facet")), plot_env = <environment>, labels = structure(list(
x = "my_dates", y = "my_points"), .Names = c("x", "y"
))), .Names = c("data", "layers", "scales", "mapping", "theme",
"coordinates", "facet", "plot_env", "labels"), class = c("gg",
"ggplot"))
According to this post, below is my solution to this problem. The environment issue in the **ply and ggplot is annoying.
ggplot.add_lines <- function(given_plot, given_points) {
finale <- eval(substitute( expr = {given_plot +
geom_vline(aes(xintercept = my_dates[given_points]))}, env = list(given_points = given_points)))
return(finale)
}
The following code runs well on my machine. (I cannot make your reproducible work on my machine...)
df <- data.frame(my_dates = 1:10, val = 1:10)
my_plot <- ggplot(df, aes(x = my_dates, y = val)) + geom_line()
my_plot <- ggplot.add_lines(my_plot, c(3, 5))
print(my_plot)
Update: The above solution fails when more than two points are used.
It seems that we can easily solve this problem by not including the aes (subsetting together with aescauses problems):
ggplot.add_lines <- function(given_plot, given_points) {
finale <- given_plot + geom_vline(xintercept = given_plot[[1]]$my_dates[given_points])
return(finale)
}
I would take the following approach: extract the data.frame of interest, and pass it to the new layer,
df <- data.frame(my_dates = 1:10, val = rnorm(10))
my_plot <- ggplot(df, aes(x = my_dates, y = val)) + geom_line()
add_lines <- function(p, given_points=c(3,5), ...){
d <- p[["data"]][given_points,]
p + geom_vline(data = d, aes_string(xintercept="my_dates"), ...)
}
add_lines(my_plot, c(3,5), lty=2)