How to summarize a tibble by time periods? - r

EDIT: I have updated the input, added the expected output.
I have a table that contains time-dates and a grouping criterion NEL_Hotspots.
I am trying to summarise the table according to these rules:
Observations grouped by NEL_Hotspots and then all observations that fall within the same day (24h), AND have Wind_direc within +- 10.
This is a small subset of a larger table:
structure(list(Serial_number = c(10, 8, 9, 20, 21, 23, 3, 5,
7, 11, 13, 20, 24), Date_time = c("3/31/05 1:57", "3/31/05 4:12",
"3/31/05 18:12", "4/1/05 2:12", "4/1/05 3:12", "4/3/05 16:12",
"3/28/05 9:57", "3/30/05 13:42", "3/31/05 1:57", "4/10/05 10:57",
"4/10/05 18:57", "4/10/05 20:13", "4/10/05 21:30"), Wind_direc = c(50,
60, 70, 60, 70, 70, 60, 140, 50, 270, 300, 310, 290), NEL_Hotspots = c(0,
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), Dust_Intens = c("weak",
"weak", "weak", "weak", "medium", "weak", "weak", "medium", "weak",
"weak", "medium", "medium", "high"), Area_km2 = c(290, 241, 225,
240, 340, 320, 176, 143, 211, 72, 171, 167, 121)), .Names = c("Serial_number",
"Date_time", "Wind_direc", "NEL_Hotspots", "Dust_Intens", "Area_km2"
), class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA,
-13L), spec = structure(list(cols = structure(list(Serial_number = structure(list(), class = c("collector_double",
"collector")), Date_time = structure(list(), class = c("collector_character",
"collector")), Wind_direc = structure(list(), class = c("collector_double",
"collector")), NEL_Hotspots = structure(list(), class = c("collector_double",
"collector")), Dust_Intens = structure(list(), class = c("collector_character",
"collector")), Area_km2 = structure(list(), class = c("collector_double",
"collector"))), .Names = c("Serial_number", "Date_time", "Wind_direc",
"NEL_Hotspots", "Dust_Intens", "Area_km2")), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), .Names = c("cols", "default", "skip"
), class = "col_spec"))
Once the data is loaded I used df <- df %>% mutate(full_date = ymd_hms(Date_time)) from lubridate to create the column full_date.
The expected output is:
structure(list(`First Date_time` = c("3/31/05 1:57", "3/31/05 18:12",
"4/1/05 2:12", "4/3/05 16:12", "3/28/05 9:57", "3/30/05 13:42",
"3/31/05 1:57", "4/10/05 10:57", "4/10/05 18:57"), `Last Date_time` = c("3/31/05 4:12",
"3/31/05 18:12", "4/1/05 3:12", "4/3/05 16:12", "3/28/05 9:57",
"3/30/05 13:42", "3/31/05 1:57", "4/10/05 10:57", "4/10/05 21:30"
), Wind_direc_avg = c(55, 70, 60, 70, 60, 140, 50, 270, 300),
wind_direc_min = c(50, 70, 60, 70, 60, 140, 50, 270, 290),
wind_direc_max = c(60, 70, 70, 70, 60, 140, 50, 270, 310),
NEL_Hotspots = c(0, 0, 0, 0, 1, 1, 1, 1, 1), Dust_Intens = c("weak,weak",
"weak", "weak,medium", "weak", "weak", "medium", "weak",
"weak", "medium, medium, high"), Area_km2_avg = c(265.5,
225, 290, 320, 176, 143, 211, 72, 153), Area_km2_stdv = c(34.64,
0, 70.71, 0, 0, 0, 0, 0, 27.78), events_count = c(2, 1, 2,
1, 1, 1, 1, 1, 3), serial_numbers = c("10, 8", "9", "20, 21",
"23", "3", "5", "7", "11", "13, 20, 24")), .Names = c("First Date_time",
"Last Date_time", "Wind_direc_avg", "wind_direc_min", "wind_direc_max",
"NEL_Hotspots", "Dust_Intens", "Area_km2_avg", "Area_km2_stdv",
"events_count", "serial_numbers"), class = c("spec_tbl_df", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -9L), spec = structure(list(
cols = structure(list(`First Date_time` = structure(list(), class = c("collector_character",
"collector")), `Last Date_time` = structure(list(), class = c("collector_character",
"collector")), Wind_direc_avg = structure(list(), class = c("collector_double",
"collector")), wind_direc_min = structure(list(), class = c("collector_double",
"collector")), wind_direc_max = structure(list(), class = c("collector_double",
"collector")), NEL_Hotspots = structure(list(), class = c("collector_double",
"collector")), Dust_Intens = structure(list(), class = c("collector_character",
"collector")), Area_km2_avg = structure(list(), class = c("collector_double",
"collector")), Area_km2_stdv = structure(list(), class = c("collector_double",
"collector")), events_count = structure(list(), class = c("collector_double",
"collector")), serial_numbers = structure(list(), class = c("collector_character",
"collector"))), .Names = c("First Date_time", "Last Date_time",
"Wind_direc_avg", "wind_direc_min", "wind_direc_max", "NEL_Hotspots",
"Dust_Intens", "Area_km2_avg", "Area_km2_stdv", "events_count",
"serial_numbers")), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), .Names = c("cols", "default", "skip"
), class = "col_spec"))
I would appreciate any help!

Try to create groups based on your condition. A new group is created when -
Date changes
Every +10 value change in Wind
For each group calculate all the statistics that you want in summarise
library(dplyr)
df %>%
mutate(Date_time = lubridate::mdy_hm(Date_time),
date = as.Date(Date_time)) %>%
group_by(date) %>%
group_by(val = lag(ceiling((Wind_direc - first(Wind_direc))/10),
default = 0), .add = TRUE) %>%
summarise(first_date_time = first(Date_time),
last_date_time = last(Date_time),
Wind_direc_avg = mean(Wind_direc),
Wind_direc_min = min(Wind_direc),
Wind_direc_max = max(Wind_direc),
NEL_Hotspots = sum(NEL_Hotspots),
Dust_Intens = toString(Dust_Intens),
Area_km2_avg = mean(Area_km2))

Related

Changing apparence of nodes in markerCLusterOptions using leaflet

I would like to change the default behaviour of markerClusterOptions and I add a node to each one of the end of the lines when you click on clusters:
Also, I want each node to be of the color specified by the legend.
Any way to accomplish this?
This is my code:
data %>%
leaflet() %>%
addTiles() %>%
addMarkers(
clusterOptions = markerClusterOptions()) %>%
addLegend("topright",
pal = pal, values = ~ACTIVIDAD,
title = "Number of reservations first week of 2021",
opacity = 0.8)
dput of data:
structure(list(X1 = c(21789, 17031, 6566, 5844, 19533, 5111,
15152, 12890, 22602, 22109, 21538, 13036, 2461, 7280, 12313,
2621, 13520, 6361, 4114, 11304), FHSTART = structure(c(1609867892,
1609849545, 1609765036, 1609762965, 1609857397, 1609761127, 1609844798,
1609839427, 1609874643, 1609870091, 1609866388, 1609839798, 1609754738,
1609767241, 1609837948, 1609755121, 1609840885, 1609764463, 1609758662,
1609834336), tzone = "UTC", class = c("POSIXct", "POSIXt")),
FHSTARTREAL = structure(c(1609867892, 1609849545, 1609765036,
1609762965, 1609857397, 1609761127, 1609844798, 1609839427,
1609874643, 1609870091, 1609866388, 1609839798, 1609754738,
1609767241, 1609837948, 1609755121, 1609840885, 1609764463,
1609758662, 1609834336), tzone = "UTC", class = c("POSIXct",
"POSIXt")), FHSTOPREAL = structure(c(1609870836, 1609851345,
1609771973, 1609766832, 1609858252, 1609763401, 1609846598,
1609842422, 1609944134, 1609872502, 1609868749, 1609843424,
1609758052, 1609769041, 1609838735, 1609756908, 1609843469,
1609766718, 1609759589, 1609836505), tzone = "UTC", class = c("POSIXct",
"POSIXt")), MATRICULA = c("/gg{f+>", "xg(|+)f", "/(//;af",
"e\\|{:c#", "/|\\x>)k", "\\((/;c;", "/e\\|++>", "e*(*:k~",
"\\*e/:$_", "g|*{;~}", "={(/&[;", "ee=g+^;", "\\/xx;#>",
"{//{)a:", "e*{=&+#", "xx/(;$a", "\\(*x>&#", "e|*x_}_", "|(//+._",
"(\\/{>;k"), ID_ZONADUM = c(1622, 1562, 1431, 3792, 1312,
3038, 1673, 1292, 1759, 1274, 3057, 3605, 3708, 1221, 1433,
1365, 1452, 1355, 3377, 3398), ORIGEN = c("SPRO", "SMS",
"SPRO", "SPRO", "SPRO", "SPRO", "SMS", "SPRO", "SPRO", "SPRO",
"SPRO", "SPRO", "SPRO", "SMS", "SPRO", "SPRO", "SPRO", "SPRO",
"SPRO", "SPRO"), ACTIVIDAD = structure(c(6L, 3L, 2L, 2L,
5L, 5L, 3L, 2L, 5L, 5L, 2L, 1L, 2L, 3L, 5L, 2L, 5L, 5L, 5L,
2L), .Label = c("HORECA", "Instalaciones, Mantenimiento y Obra Civil",
"No Spec", "Otros", "Paquetería y transporte urgente", "Pequeño Comercio",
"Supermercados"), class = "factor"), lat = c(41.38308059149,
41.390752258232, 41.395685321124, 41.377472141928, 41.395826426361,
41.391961693047, 41.385461301675, 41.400721690743, 41.3776097494,
41.407150896929, 41.402077667164, 41.393085133204, 41.380250123556,
41.406037037105, 41.39670664047, 41.386903981542, 41.39516783617,
41.388200567422, 41.394411263743, 41.404228695141), lon = c(2.154837049375,
2.154177593286, 2.162988328549, 2.151007353295, 2.172271855917,
2.162611774023, 2.149397960016, 2.174697200163, 2.155300874341,
2.175502512754, 2.177826477447, 2.168626816485, 2.153614884012,
2.181277163321, 2.164337648433, 2.169052711932, 2.166379112099,
2.169668756863, 2.154389212806, 2.172054447565), usetime = c("0 days 00:49:04",
"0 days 00:30:00", "0 days 01:55:37", "0 days 01:04:27",
"0 days 00:14:15", "0 days 00:37:54", "0 days 00:30:00",
"0 days 00:49:55", "0 days 19:18:11", "0 days 00:40:11",
"0 days 00:39:21", "0 days 01:00:26", "0 days 00:55:14",
"0 days 00:30:00", "0 days 00:13:07", "0 days 00:29:47",
"0 days 00:43:04", "0 days 00:37:35", "0 days 00:15:27",
"0 days 00:36:09"), usetime_REAL = c("0 days 00:49:04", "0 days 00:30:00",
"0 days 01:55:37", "0 days 01:04:27", "0 days 00:14:15",
"0 days 00:37:54", "0 days 00:30:00", "0 days 00:49:55",
"0 days 19:18:11", "0 days 00:40:11", "0 days 00:39:21",
"0 days 01:00:26", "0 days 00:55:14", "0 days 00:30:00",
"0 days 00:13:07", "0 days 00:29:47", "0 days 00:43:04",
"0 days 00:37:35", "0 days 00:15:27", "0 days 00:36:09"),
hora_start = c(17, 12, 12, 12, 14, 11, 11, 9, 19, 18, 17,
9, 10, 13, 9, 10, 10, 12, 11, 8), hora_stop = c(18, 12, 14,
13, 14, 12, 11, 10, 14, 18, 17, 10, 11, 14, 9, 10, 10, 13,
11, 8), mes_start = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1), dia_start = c(5, 5, 4, 4, 5, 4,
5, 5, 5, 5, 5, 5, 4, 4, 5, 4, 5, 4, 4, 5), minuto_start = c(31,
25, 57, 22, 36, 52, 6, 37, 24, 8, 6, 43, 5, 34, 12, 12, 1,
47, 11, 12), minuto_stop = c(20, 55, 52, 27, 50, 30, 36,
27, 42, 48, 45, 43, 0, 4, 25, 41, 44, 25, 26, 48), mes = c("Jan",
"Jan", "Jan", "Jan", "Jan", "Jan", "Jan", "Jan", "Jan", "Jan",
"Jan", "Jan", "Jan", "Jan", "Jan", "Jan", "Jan", "Jan", "Jan",
"Jan"), dia_semana = c("Tuesday", "Tuesday", "Monday", "Monday",
"Tuesday", "Monday", "Tuesday", "Tuesday", "Tuesday", "Tuesday",
"Tuesday", "Tuesday", "Monday", "Monday", "Tuesday", "Monday",
"Tuesday", "Monday", "Monday", "Tuesday"), col_5_30 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0),
col_30_120 = c(0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 1, 0, 0), col_120_360 = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), col_max_360 = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), fecha = structure(c(18632,
18632, 18631, 18631, 18632, 18631, 18632, 18632, 18632, 18632,
18632, 18632, 18631, 18631, 18632, 18631, 18632, 18631, 18631,
18632), class = "Date"), time_start = structure(c(63092,
44745, 46636, 44565, 52597, 42727, 39998, 34627, 69843, 65291,
61588, 34998, 36338, 48841, 33148, 36721, 36085, 46063, 40262,
29536), class = c("hms", "difftime"), units = "secs"), fecha_stop = structure(c(18632,
18632, 18631, 18631, 18632, 18631, 18632, 18632, 18633, 18632,
18632, 18632, 18631, 18631, 18632, 18631, 18632, 18631, 18631,
18632), class = "Date"), time_stop = structure(c(66036, 46545,
53573, 48432, 53452, 45001, 41798, 37622, 52934, 67702, 63949,
38624, 39652, 50641, 33935, 38508, 38669, 48318, 41189, 31705
), class = c("hms", "difftime"), units = "secs")), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L), spec = structure(list(
cols = list(X1 = structure(list(), class = c("collector_double",
"collector")), FHSTART = structure(list(format = ""), class = c("collector_datetime",
"collector")), FHSTARTREAL = structure(list(format = ""), class = c("collector_datetime",
"collector")), FHSTOPREAL = structure(list(format = ""), class = c("collector_datetime",
"collector")), MATRICULA = structure(list(), class = c("collector_character",
"collector")), ID_ZONADUM = structure(list(), class = c("collector_double",
"collector")), ORIGEN = structure(list(), class = c("collector_character",
"collector")), ACTIVIDAD = structure(list(), class = c("collector_character",
"collector")), LATITUD = structure(list(), class = c("collector_double",
"collector")), LONGITUD = structure(list(), class = c("collector_double",
"collector")), usetime = structure(list(), class = c("collector_character",
"collector")), usetime_REAL = structure(list(), class = c("collector_character",
"collector")), hora_start = structure(list(), class = c("collector_double",
"collector")), hora_stop = structure(list(), class = c("collector_double",
"collector")), mes_start = structure(list(), class = c("collector_double",
"collector")), dia_start = structure(list(), class = c("collector_double",
"collector")), minuto_start = structure(list(), class = c("collector_double",
"collector")), minuto_stop = structure(list(), class = c("collector_double",
"collector")), mes = structure(list(), class = c("collector_character",
"collector")), dia_semana = structure(list(), class = c("collector_character",
"collector")), col_5_30 = structure(list(), class = c("collector_double",
"collector")), col_30_120 = structure(list(), class = c("collector_double",
"collector")), col_120_360 = structure(list(), class = c("collector_double",
"collector")), col_max_360 = structure(list(), class = c("collector_double",
"collector")), fecha = structure(list(format = ""), class = c("collector_date",
"collector")), time_start = structure(list(format = ""), class = c("collector_time",
"collector")), fecha_stop = structure(list(format = ""), class = c("collector_date",
"collector")), time_stop = structure(list(format = ""), class = c("collector_time",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))

Loop in tidyverse

I am learning tidyverse() and I am using a time-series dataset, and I selected columns that start with sec. What I would like basically to identify those values from columns that equal 123, keep these and have the rest replace with 0. But I don't know how to loop from sec1:sec4. Also how can I sum() per columns?
df1<-df %>%
select(starts_with("sec")) %>%
select(ifelse("sec1:sec4"==123, 1, 0))
Sample data:
structure(list(sec1 = c(1, 123, 1), sec2 = c(123, 1, 1), sec3 = c(123,
0, 0), sec4 = c(1, 123, 1)), spec = structure(list(cols = list(
sec1 = structure(list(), class = c("collector_double", "collector"
)), sec2 = structure(list(), class = c("collector_double",
"collector")), sec3 = structure(list(), class = c("collector_double",
"collector")), sec4 = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), row.names = c(NA,
-3L), class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"))
I think you would have to use mutate and across to accomplish this. below you will mutate across each column starting with sec and then keep all values that are 123 and replace all others with 0.
df1<-df %>%
select(starts_with("sec")) %>%
mutate(across(starts_with("sec"),.fns = function(x){ifelse(x == 123,x,0)}))

Issues with geom_dumbbel plot giving me multiple points instead on single points

My dumbbell plot is giving me multiple points on my graph and I am wondering why. I assumed I am supposed to be getting a single point. I have tried editing the parameters but to no avail. it is making it difficult to add other aesthetics. I will appreciate any help.
data10 <- structure(list(GROUP = c("LLL", "LLL", "LLL", "LRL", "LRL", "LRL",
"RLR", "RLR", "RLR", "RRR", "RRR", "RRR"), conditon2 = c("Midline_Ret",
"No Midline crossing_Ret", "Midline crossing_Ret", "Midline_Trans",
"No Midline crossing_Trans", "Midline crossing_Trans", "Midline_Trans",
"No Midline crossing_Trans", "Midline crossing_Trans", "Midline_Ret",
"No Midline crossing_Ret", "Midline crossing_Ret"), Trial_type = c("retention",
"retention", "retention", "transfer", "transfer", "transfer",
"transfer", "transfer", "transfer", "retention", "retention",
"retention"), Training = c("left", "left", "left", "right", "right",
"right", "left", "left", "left", "right", "right", "right"),
N = c(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8), MovementTime_102 = c(1940.625,
2200.234375, 1950.078125, 1623.59375, 2266.875, 2608.4375,
1649.21875, 1690.390625, 3128.660714, 2018.75, 1701.40625,
2505.703125), MovementTime_104 = c(1601.875, 1658.046875,
1573.839286, 1435.15625, 2013.359375, 2302.109375, 1390.859375,
1616.71875, 2399.765625, 1410.625, 1336.40625, 1684.53125
), Pathlength_102 = c(2.8680385, 3.872267719, 2.808966672,
3.184808844, 3.450548063, 2.779296859, 3.057751688, 2.823023969,
3.831920395, 2.991207031, 2.790851078, 3.276254563), Pathlength_104 = c(2.65516175,
2.945664516, 2.568061634, 3.017285625, 3.408170609, 2.625024781,
2.655326156, 2.628399641, 2.975724094, 2.636576609, 2.661606391,
2.782565766), NormalizedJerk_102 = c(2060.157118, 2981.812369,
2089.925187, 1391.973644, 3900.411917, 4015.516784, 1411.201689,
1853.413926, 6830.819063, 2310.589311, 1340.465366, 5617.967587
), NormalizedJerk_104 = c(1092.701687, 1508.285476, 1269.670456,
914.3836443, 2335.718672, 2563.167235, 847.952528, 1394.847247,
3915.019566, 1023.170254, 765.2752941, 1705.629422), AveResultantVel_102 = c(2.021215719,
1.994460031, 1.789839578, 2.449869109, 2.037483406, 1.198650234,
2.374258766, 2.162818172, 1.467132962, 2.101220406, 1.983419094,
1.595565484), AveResultantVel_104 = c(2.317185313, 2.193130625,
2.113643324, 2.822947859, 2.213037, 1.311399453, 2.39646225,
2.113288797, 1.481807047, 2.644351188, 2.54589975, 2.014750766
), EndpointError_102 = c(1.62285542, 2.05362611, 2.27036917,
1.57970041, 1.83768956, 2.14219202, 1.374642, 2.03515938,
2.58900025, 2.28107478, 1.64171472, 2.13489883), EndpointError_104 = c(0.979220453,
1.477764016, 1.621229031, 1.239002656, 1.404618047, 1.796644641,
1.01018125, 1.593606016, 1.672676594, 1.483629813, 1.503123406,
1.370374047)), class = c("spec_tbl_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -12L), spec = structure(list(
cols = list(GROUP = structure(list(), class = c("collector_character",
"collector")), conditon2 = structure(list(), class = c("collector_character",
"collector")), Trial_type = structure(list(), class = c("collector_character",
"collector")), Training = structure(list(), class = c("collector_character",
"collector")), N = structure(list(), class = c("collector_double",
"collector")), MovementTime_102 = structure(list(), class = c("collector_double",
"collector")), MovementTime_104 = structure(list(), class = c("collector_double",
"collector")), Pathlength_102 = structure(list(), class = c("collector_double",
"collector")), Pathlength_104 = structure(list(), class = c("collector_double",
"collector")), NormalizedJerk_102 = structure(list(), class = c("collector_double",
"collector")), NormalizedJerk_104 = structure(list(), class = c("collector_double",
"collector")), AveResultantVel_102 = structure(list(), class = c("collector_double",
"collector")), AveResultantVel_104 = structure(list(), class = c("collector_double",
"collector")), EndpointError_102 = structure(list(), class = c("collector_double",
"collector")), EndpointError_104 = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"))
ggplot(data10, aes(y=conditon2, x=MovementTime_102, xend=MovementTime_104)) +
geom_dumbbell(size=3, color="#e3e2e1",
colour_x = "red", colour_xend = "blue",
dot_guide=TRUE, dot_guide_size=0.25) +
labs(x=NULL, y=NULL, title="Change in Movement time ms")
You've got duplicates for condition2. You probably want to facet on the GROUP variable.
ggplot(data10, aes(y=conditon2, x=MovementTime_102, xend=MovementTime_104)) +
geom_dumbbell(size=3, color="#e3e2e1",
colour_x = "red", colour_xend = "blue",
dot_guide=TRUE, dot_guide_size=0.25) +
labs(x=NULL, y=NULL, title="Change in Movement time ms") +
theme_minimal() +
facet_grid(~GROUP)
Required packages:
library(ggalt)
library(ggplot2)
I started using this platform I dont have enough reputation to use comments.
Your problem lies here data10$conditon2. You have multiple conditions in that column make them unique or group them.
You columns must be unique.
unique(data10$conditon2)

Rotating through multiple Y Variables in gganimate

I'm currently trying to animate a plot using gganimate but am struggling to figure out how I would rotate through multiple y variables. The following data was collected from twitter scraping which allowed me to calculate a "sentiment score" based on the tweets following the recent Democratic debate. The goal here is to create an animated plot that eases through all 10 sentiment scores and adjusts the ggplot for each candidate. Is this possible with gganimate?
structure(
list(
candidate = c("warren", "booker", "yang", "harris", "biden", "sanders", "buttigieg"),
anger = c(162, 216, 193, 74, 451, 290, 114),
anticipation = c(570, 492, 401, 205, 360, 419, 499),
disgust = c(94, 75, 52, 61, 202, 81, 69),
fear = c(245, 241, 119, 117, 271, 251, 102),
joy = c(574, 525, 279, 181, 214, 319, 183),
sadness = c(237, 161, 138, 106, 406, 157, 251),
surprise = c(104, 191, 176, 106, 255, 343, 123),
trust = c(741, 749, 460, 325, 593, 574, 410),
negative = c(540, 317, 253, 205, 715, 360, 469),
positive = c(989, 1202, 857, 510, 751, 790, 701)
),
class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"),
row.names = c(NA, -7L),
spec = structure(
list(
cols = list(
candidate = structure(list(), class = c("collector_character", "collector")),
anger = structure(list(), class = c("collector_double", "collector")),
anticipation = structure(list(), class = c("collector_double", "collector")),
disgust = structure(list(), class = c("collector_double", "collector")),
fear = structure(list(), class = c("collector_double", "collector")),
joy = structure(list(), class = c("collector_double", "collector")),
sadness = structure(list(), class = c("collector_double", "collector")),
surprise = structure(list(), class = c("collector_double", "collector")),
trust = structure(list(), class = c("collector_double", "collector")),
negative = structure(list(), class = c("collector_double", "collector")),
positive = structure(list(), class = c("collector_double", "collector"))),
default = structure(list(), class = c("collector_guess", "collector")), skip = 1
),
class = "col_spec")
)
Here is the script I currently have written:
library ("ggplot2")
library("dplyr")
library("tidyverse")
library("plotly")
library("viridis")
library("gganimate")
#Read in CSV Files
sentiment_score <- read_csv('C:\\Users\\tdago\\Documents\\R\\Sentiment_Scores.csv')
sentiment_score_hashtag <- read_csv('C:\\Users\\tdago\\Documents\\R\\Sentiment_Scores_hashtag.csv')
#Tidy Data
sentiment_score <- sentiment_score %>%
rename(candidate = X1)
sentiment_score_hashtag <-sentiment_score_hashtag %>%
rename(candidate = X1)
#Create Charts for Comparison
ggplot(data=sentiment_score,aes(x = candidate, y=anger))+
geom_bar(aes(fill=candidate),stat = "identity")+
theme(legend.position="none")+
xlab("Presidential Candidates")+ylab("Scores")+ggtitle("Anger") +
labs(x = "", y = "{sentiment"}) +
ease_aes('linear')
Note: the sentiment_score object is the only one that is being used in this specific chart. sentiment_score_hashtag is a similar data frame that contains sentiment scores based on a different search.
I don't think you can rotate through Y variables with gganimate. Is easier to transform your data from wide to long format (see this question for a comprehensive list of methods to achieve this). I will go with the tidy way, using tidyr::pivot_longer:
> sentiment_score %>%
+ pivot_longer(-candidate, names_to = 'sentiment')
# A tibble: 70 x 3
candidate sentiment value
<chr> <chr> <dbl>
1 warren anger 162
2 warren anticipation 570
3 warren disgust 94
4 warren fear 245
5 warren joy 574
6 warren sadness 237
7 warren surprise 104
8 warren trust 741
9 warren negative 540
10 warren positive 989
# … with 60 more rows
>
This way, you can use easily sentiment as a state variable in gganimate, and follow the nice gganimate getting started manual.
Here is an example of the possibilities:
library ("ggplot2")
library("dplyr")
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library("tidyverse")
# library("plotly")
# library("viridis")
library("gganimate")
#Tidy Data
sentiment_score <- structure(
list(
candidate = c("warren", "booker", "yang", "harris", "biden", "sanders", "buttigieg"),
anger = c(162, 216, 193, 74, 451, 290, 114),
anticipation = c(570, 492, 401, 205, 360, 419, 499),
disgust = c(94, 75, 52, 61, 202, 81, 69),
fear = c(245, 241, 119, 117, 271, 251, 102),
joy = c(574, 525, 279, 181, 214, 319, 183),
sadness = c(237, 161, 138, 106, 406, 157, 251),
surprise = c(104, 191, 176, 106, 255, 343, 123),
trust = c(741, 749, 460, 325, 593, 574, 410),
negative = c(540, 317, 253, 205, 715, 360, 469),
positive = c(989, 1202, 857, 510, 751, 790, 701)
),
class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"),
row.names = c(NA, -7L),
spec = structure(
list(
cols = list(
candidate = structure(list(), class = c("collector_character", "collector")),
anger = structure(list(), class = c("collector_double", "collector")),
anticipation = structure(list(), class = c("collector_double", "collector")),
disgust = structure(list(), class = c("collector_double", "collector")),
fear = structure(list(), class = c("collector_double", "collector")),
joy = structure(list(), class = c("collector_double", "collector")),
sadness = structure(list(), class = c("collector_double", "collector")),
surprise = structure(list(), class = c("collector_double", "collector")),
trust = structure(list(), class = c("collector_double", "collector")),
negative = structure(list(), class = c("collector_double", "collector")),
positive = structure(list(), class = c("collector_double", "collector"))),
default = structure(list(), class = c("collector_guess", "collector")), skip = 1
),
class = "col_spec")
)
#Create Charts for Comparison
candidates_plot <- sentiment_score %>%
pivot_longer(-candidate, names_to = 'sentiment') %>%
ggplot(aes(x = candidate, y=value))+
geom_bar(aes(fill=candidate, group = sentiment),stat = "identity")+
scale_y_continuous(expand = c(0,0), limits = c(0,1250)) +
theme(legend.position="none")#+
# xlab("Presidential Candidates")+ylab("Scores")+ggtitle("{sentiment}") +
# labs(x = "Presidential Candidates", y = "{sentiment}")
anim <- candidates_plot +
transition_states(
sentiment, 2, 2
) +
enter_fade() + enter_drift(y_mod = -500) +
exit_shrink() + exit_drift(y_mod = -500) +
labs(
title = '{closest_state}',
x = "Presidential Candidates", y = "{closest_state}"
)
animate(
anim, width = 500, height = 300, res = 90
)
Created on 2019-11-25 by the reprex package (v0.3.0)

ggplot loop deal with special characters

Hi there I'm trying to plot a defined number of graphs using gridExtra.
This is working but unfortunately it is not dealing with special characters in its name. I tried to work around by using R friendly names and add in the actual name as a subtitle
library(gridExtra)
library(ggplot2)
Dataframe<-read.csv2(File_with_R_friendly_names.csv)
names<-read.csv2(File_with_actual_names.csv)
bar<-colnames(names)
list_of_plots<-lapply(names(Dataframe)[2:10], function(i) {
ggplot(Dataframe, aes_string(x="X1", y=i)) + geom_point()+labs(x=i, y="Intensity", subtitle=bar[i])
})
do.call(grid.arrange, c(list_of_plots, ncol=3))
If I put in bar[2] all graphs get the actual name but it is the same one for all while if I set bar to i, all graphs get NA.
The names I use to suit R are
Met1, Met2, Met3, Met4, Met5, Met6, Met7, Met8, Met9 and Met10
Examples of names that I need on the plots are:
-(-)-Corey lactone
-(2R)-2,3-Dihydroxypropanoic acid
-(D-(+)-Glyceric acid?)
-1,5-Naphthalenediamine
-12-Aminododecanoic acid
-2,5-di-tert-Butylhydroquinone
-2,6-di-tert-Butylphenol
-2-Amino-N,N-diethylacetamide
-2-Ethyl-2-phenylmalonamide
-2-Naphthalenesulfonic acid
Here is the dput to reproduce the bar (names):
`bar<-c("X1", "(-)-Corey lactone", "(2R)-2,3-Dihydroxypropanoic acid (D-(+)- Glyceric acid?)", "1,5-Naphthalenediamine", "12-Aminododecanoic acid", "2,5-di- tert-Butylhydroquinone", "2,6-di-tert-Butylphenol", "2-Amino-N,N- diethylacetamide", "2-Ethyl-2-phenylmalonamide", "2-Naphthalenesulfonic acid")`
Here is the dput to reproduce the dataframe:
Dataframe<-structure(list(X1 = c(0, 0, 0.25, 0.25, 0.5, 0.5, 1, 1, 2, 2),
Met1 = c(0, 0, 38096319.85, 45978353.93, 35077691.7, 42146132.41,
62606961.17, 32786049.6, 51054004.82, 48898547.32), Met2 = c(0,
0, 1288905.771, 948466.4001, 645979.6463, 1228663.251, 1137957.136,
940928.9344, 1443680.706, 1755726.385), Met3 = c(0, 0, 575887.464,
693692.0349, 1362477.6, 1515767.293, 2241120.502, 2417932.908,
3866432.112, 3894701.876), Met4 = c(0, 0, 16737068.73, 21915551.3,
12088089.1, 16003037.3, 17720785.29, 11957614.24, 13127281.5,
14192542.13), Met5 = c(0, 0, 4556006.426, 4782909.936, 4484706.271,
8019957.826, 5112289.476, 8537488.48, 6680688.948, 5959748.061
), Met6 = c(0, 0, 16874476.32, 15721984.25, 18093323.61,
18619817.92, 22055835.04, 19754379.11, 29211315.88, 27321333.35
), Met7 = c(0, 0, 6604385.457, 6396794.568, 13823034.64,
15449539.63, 26013299.82, 20262673.28, 35301685.57, 33367520.66
), Met8 = c(0, 0, 6727973.448, 7166827.569, 13238311.46,
13986568.69, 20957194.23, 19186953.76, 34513697.47, 31192991.75
), Met9 = c(0, 0, 2373752.304, 3259738.104, 1998529.732,
2387445.15, 2479309.442, 26924139.6, 4611277.427, 2439602.098
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-10L), .Names = c("X1", "Met1", "Met2", "Met3", "Met4", "Met5",
"Met6", "Met7", "Met8", "Met9"), spec = structure(list(cols = structure(list(
X1 = structure(list(), class = c("collector_double", "collector"
)), Met1 = structure(list(), class = c("collector_double",
"collector")), Met2 = structure(list(), class = c("collector_double",
"collector")), Met3 = structure(list(), class = c("collector_double",
"collector")), Met4 = structure(list(), class = c("collector_double",
"collector")), Met5 = structure(list(), class = c("collector_double",
"collector")), Met6 = structure(list(), class = c("collector_double",
"collector")), Met7 = structure(list(), class = c("collector_double",
"collector")), Met8 = structure(list(), class = c("collector_double",
"collector")), Met9 = structure(list(), class = c("collector_double",
"collector"))), .Names = c("X1", "Met1", "Met2", "Met3",
"Met4", "Met5", "Met6", "Met7", "Met8", "Met9")), default = structure(list(), class = c("collector_guess",
"collector"))), .Names = c("cols", "default"), class = "col_spec"))
Because names(Dataframe)[2:10] is not number. Below will work:
list_of_plots<-lapply(as.numeric(names(Dataframe)[2:10]), function(i) {
ggplot(Dataframe, aes_string(x="X1", y=i)) + geom_point()+labs(x=i,
y="Intensity", subtitle=bar[i])
})
do.call(grid.arrange, c(list_of_plots, ncol=3))

Resources