use if() to use select() within a dplyr pipe chain - r

Read these two posts already:
can dplyr package be used for conditional mutating?
R Conditional evaluation when using the pipe operator %>%
I'm using Shiny input$selector and if the user has selected a particular value, I want my dataframe to be different than otherwise.
Here's a chain:
filtered_funnel <- reactive({
lastmonth_funnel %>%
filter(input$channel == "All" | Channel == input$channel) %>%
filter(input$promo == "All" | Promo == input$promo) %>%
## HERE IS WHERE I'M STRUGGLING
{if(input$promo != "none") select(., c("Channel", "Promo", "ShippingDetails", "Checkout", "Transactions"))} %>%
gather(Funnel, Sessions, -Channel, -Promo) %>%
group_by(Channel, Promo, Funnel) %>%
summarise(Sessions = sum(Sessions))
})
If the user input does not equal "none" I would like to select variables "Channel", "Promo", "ShippingDetails", "Checkout" and "Transactions".
I tried a few variations of the problem line above but kept getting errors:
When I tried this within the pipe chain
{if(input$promo != "none") select(., c("Channel", "Promo", "ShippingDetails", "Checkout", "Transactions"))} %>%
I received this error:
Warning: Error in : All select() inputs must resolve to integer column
positions. The following do not:
* c("Channel", "Promo", "ShippingDetails", "Checkout", "Transactions")
I also tried:
{if(input$promo != "none") select(., c(Channel, Promo, ShippingDetails, Checkout:Transactions))} %>%
This actually runs till I select "none" in the input, in which case I get
Error in : is.character(x) is not TRUE
I got the same error when I tried this:
{ifelse(input$promo != "none", select(., c(Channel, Promo, ShippingDetails, Checkout:Transactions)), .)} %>%
How can I nest in a dplyr pipe chain a select statement that says if input$promo != "none" then select Channel, Promo, ShippingDetails, Checkout:Transactions from the passed object in the pipe?
-- Here's dput of the randomly generated data--
> dput(lastmonth_funnel)
structure(list(Channel = c("Facebook", "Youtube", "SEM", "Organic",
"Direct", "Email", "Facebook", "Youtube", "SEM", "Organic", "Direct",
"Email", "Facebook", "Youtube", "SEM", "Organic", "Direct", "Email",
"Facebook", "Youtube", "SEM", "Organic", "Direct", "Email", "Facebook",
"Youtube", "SEM", "Organic", "Direct", "Email"), Promo = c("none",
"none", "none", "none", "none", "none", "banannas", "banannas",
"banannas", "banannas", "banannas", "banannas", "carrots", "carrots",
"carrots", "carrots", "carrots", "carrots", "pears", "pears",
"pears", "pears", "pears", "pears", "apples", "apples", "apples",
"apples", "apples", "apples"), Sessions = c(6587, 3015, 6316,
11219, 8117, 6473, 12464, 14032, 14318, 17535, 16219, 7838, 10685,
12040, 19907, 13694, 6187, 16784, 21425, 18890, 24891, 16251,
16977, 25206, 28573, 18704, 29178, 22069, 39687, 53734), AddToCart = c(279,
4955, 5636, 8991, 15530, 18374, 9431, 5980, 4852, 5412, 4114,
1782, 370, 3208, 6311, 9760, 7428, 6792, 3500, 5446, 1507, 783,
2032, 833, 397, 2760, 5784, 9810, 13274, 14470), Registrations = c(194,
3210, 3573, 6067, 10305, 12653, 6564, 3874, 3076, 3652, 2730,
1227, 257, 2078, 4001, 6586, 4929, 4677, 2436, 3528, 955, 528,
1348, 573, 276, 1788, 3667, 6620, 8808, 9964), ShippingDetails = c(134,
2235, 2593, 4266, 7408, 9244, 4557, 2698, 2232, 2568, 1962, 896,
178, 1447, 2904, 4631, 3543, 3417, 1691, 2457, 693, 371, 969,
418, 191, 1245, 2661, 4655, 6332, 7280), Checkout = c(90, 1436,
1792, 2864, 4672, 5666, 3078, 1734, 1543, 1724, 1237, 549, 120,
930, 2007, 3109, 2234, 2094, 1142, 1579, 479, 249, 611, 256,
129, 800, 1839, 3125, 3993, 4462), Transactions = c(59, 937,
1192, 1819, 2602, 2926, 2039, 1132, 1026, 1095, 689, 283, 79,
607, 1335, 1975, 1244, 1081, 756, 1031, 318, 158, 340, 132, 85,
522, 1223, 1985, 2224, 2304)), class = "data.frame", row.names = c(NA,
-30L), .Names = c("Channel", "Promo", "Sessions", "AddToCart",
"Registrations", "ShippingDetails", "Checkout", "Transactions"
))

You need to make sure that your statement between { returns a data.frame regardless of the condition. So you need an else ..
cond <- FALSE
mtcars %>%
group_by(cyl) %>%
{ if (cond) filter(., am == 1) else . } %>%
summarise(m = mean(wt))
Works fine with TRUE or FALSE.
(Also note that a simple example like this really makes the question a lot more easy to grasp.)

Related

Only the map legend appearing in ggplot

I am trying to plot data on a map. However, only the legend appears on a blank graph. The data set.
structure(list(States.Uts = c("Jammu and Kashmir", "Karnataka",
"Odisha", "Sikkim", "Madhya Pradesh", "Maharashtra", "Kerala",
"Rajasthan", "Delhi", "Andhra Pradesh", "Uttar Pradesh", "Gujarat",
"West Bengal", "Mizoram", "Uttarakhand", "Assam", "Haryana",
"Himachal Pradesh", "Jharkhand", "Punjab"), id = c(35, 8, 36,
25, 11, 12, 9, 21, 5, 17, 24, 22, 1, 30, 23, 26, 6, 19, 7, 20
), long = c(74.8692906760123, 76.1671602023197, 84.4299347735266,
88.47355094813, 78.2889834734203, 76.1073683433136, 76.4080579710788,
73.8499033398323, 77.1154800785184, 79.964340328073, 80.5663333555389,
71.5737853588608, 87.9835046818375, 92.8318089515678, 79.2071970183086,
92.8261810120493, 76.3401988564537, 77.2453583452855, 85.5641242711505,
75.4154856775423), lat = c(33.7066861126216, 14.7103409600977,
20.5129916361452, 27.5703696825569, 23.5382000019923, 19.4517685463546,
10.4515827401328, 26.5845654103506, 28.6433836730517, 15.7549664734525,
26.9232961978376, 22.6974841269408, 23.8143407874487, 23.3070171585463,
30.1564981917953, 26.3553441447585, 29.198093425323, 31.92360060294,
23.6561315040147, 30.8424285351448), Type = c("Union Territory",
"State", "State", "State", "State", "State", "State", "State",
"Union Territory", "State", "State", "State", "State", "State",
"State", "State", "State", "State", "State", "State"), low_prestige = c(1000,
836, 195, 1000, 188, 441, 441, 736, 370, 235, 0, 151, 82, 42,
28, 0, 0, 0, 0, 0), high_prestige = c(0, 0, 0, 0, 53, 149, 212,
264, 630, 765, 808, 849, 918, 958, 972, 1000, 1000, 1000, 1000,
1000)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-20L))
The code that I have tried:
map1<-ggplot(final.plot1, aes( x = long, y = lat, group=id)) +
geom_polygon(aes(fill = low_prestige), color = "black")
map1
Another option is using the sf package which is useful to plot geographic data in combination with ggplot. Here is a reproducible example:
library(ggplot2)
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
your_sf <- st_as_sf(final.plot1, coords = c('long', 'lat'))
ggplot(your_sf) +
geom_sf(aes(color = low_prestige))
Created on 2022-11-28 with reprex v2.0.2

How to solve error when using adorn_totals function in R?

I get the following message of error when using janitor::adorn_totals("row"):
"Error in adorn_totals(., "row") :
trying to re-add a totals dimension that is already been added"
Here is the head of my dataset :
structure(list(code_1 = c("M01", "C03", "M99", "C05", "O01",
"C07"), regroupement_elsan = c("Gastro", "Ophtalmo", "Divers médecine",
"Gynéco", "Accouchements", "bouche et dents"), actes_2019 = c(9179,
5589, 6024, 4150, 4028, 3458), actes_2020 = c(7933, 4167, 3740,
2994, 3348, 2206), actes_2021 = c(6504, 5505, 4682, 3376, 3226,
3035), sejours_2019 = c(1631, 2502, 1028, 852, 1455, 1288), sejours_2020 = c(1335,
1819, 726, 574, 1371, 801), sejours_2021 = c(1109, 2416, 825,
657, 1259, 1106), tx_0_nuit_2019 = c("3.92397302268547", "90.7673860911271",
"32.9766536964981", "57.5117370892019", "0.206185567010309",
"98.9130434782609"), tx_0_nuit_2020 = c("3.29588014981273", "92.9081913139087",
"47.1074380165289", "59.581881533101", "0.291757840991977", "99.250936329588"
), tx_0_nuit_2021 = c("3.6068530207394", "95.4470198675497",
"18.3030303030303", "60.2739726027397", "0.158856235107228",
"98.7341772151899"), pourcentage = c(5.37796226165473, 4.55191916519208,
3.87140518282095, 2.79151300666457, 2.66748251170021, 2.50955034811226
), pourcentage_cumule = c(78.4062908267046, 82.9582099918967,
86.8296151747176, 89.6211281813822, 92.2886106930824, 94.7981610411947
)), row.names = c(NA, -6L), class = c("tabyl", "tbl_df", "tbl",
"data.frame"), core = structure(list(code_1 = c("M01b", "C01",
"C02", "C04", "M01", "C03", "M99", "C05", "O01", "C07", "C08",
"C99", "C98", "C10", "C06", "M03", "O02", "M02", "M04", "C01b",
"O03", "S99", "***", "C10b", "M05", "M98", "O04"), regroupement_elsan = c("Endoscopies
digestives",
"Ortho (+ rhumato et rachis)", "Chirurgie digestive", "Uro-néphro",
"Gastro", "Ophtalmo", "Divers médecine", "Gynéco", "Accouchements",
"bouche et dents", "Tissus mou et chir plastique", "Divers chir",
"Chir esth et hors sécu", "Chir thoracique et vasculaire", "ORL Stomato sf bouche et
dent",
"Pneumologie", "Obstétrique autre (hors IVG)", "Cardio Vasc (médecine)",
"Neurologie", "Rachis", "IVG", "Séances autres", "Autres", "Chir thoracique",
"Soins palliatifs", "Vasculaire interventionnel", "Néo nat"),
actes_2019 = c(36079, 29520, 14618, 6515, 9179, 5589, 6024,
4150, 4028, 3458, 2137, 2180, 575, 449, 866, 388, 294, 311,
714, 395, 292, 1842, 10, 0, 4, 0, 1), actes_2020 = c(30192,
25451, 12845, 7376, 7933, 4167, 3740, 2994, 3348, 2206, 2107,
1477, 575, 437, 337, 897, 193, 218, 267, 308, 118, 737, 8,
4, 0, 11, 5), actes_2021 = c(42333, 24055, 13735, 8196, 6504,
5505, 4682, 3376, 3226, 3035, 2571, 1134, 689, 511, 352,
272, 181, 161, 138, 106, 82, 61, 18, 8, 7, 0, 0), sejours_2019 = c(6992,
5493, 2577, 1221, 1631, 2502, 1028, 852, 1455, 1288, 540,
397, 236, 158, 260, 63, 148, 101, 90, 44, 246, 1820, 4, 0,
1, 0, 1), sejours_2020 = c(5811, 4946, 2220, 1220, 1335,
1819, 726, 574, 1371, 801, 554, 269, 221, 140, 94, 42, 109,
79, 58, 34, 98, 720, 2, 1, 0, 1, 5), sejours_2021 = c(7922,
5144, 2523, 1451, 1109, 2416, 825, 657, 1259, 1106, 649,
264, 278, 162, 111, 51, 108, 69, 30, 21, 77, 54, 7, 1, 2,
0, 0), tx_0_nuit_2019 = c("96.0955377574371", "63.5718186783179",
"41.4435389988359", "36.2817362817363", "3.92397302268547",
"90.7673860911271", "32.9766536964981", "57.5117370892019",
"0.206185567010309", "98.9130434782609", "72.5925925925926",
"53.904282115869", "13.9830508474576", "96.2025316455696",
"50.7692307692308", "42.8571428571429", "85.1351351351351",
"72.2772277227723", "11.1111111111111", "4.54545454545455",
"100,0", "100,0", "100,0", "0,0", "0,0", "0,0", "0,0"), tx_0_nuit_2020 =
c("96.0936155567028",
"67.3069146785281", "40.5855855855856", "34.344262295082",
"3.29588014981273", "92.9081913139087", "47.1074380165289",
"59.581881533101", "0.291757840991977", "99.250936329588",
"76.3537906137184", "49.814126394052", "11.7647058823529",
"99.2857142857143", "53.1914893617021", "16.6666666666667",
"74.3119266055046", "81.0126582278481", "25.8620689655172",
"8.82352941176471", "98.9795918367347", "100,0", "100,0",
"100,0", "0,0", "0,0", "20,0"), tx_0_nuit_2021 = c("96.7053774299419",
"73.2892690513219", "51.0503369005153", "41.9021364576154",
"3.6068530207394", "95.4470198675497", "18.3030303030303",
"60.2739726027397", "0.158856235107228", "98.7341772151899",
"83.9753466872111", "60.2272727272727", "50,0", "94.4444444444444",
"72.972972972973", "1.96078431372549", "81.4814814814815",
"85.5072463768116", "43.3333333333333", "52.3809523809524",
"100,0", "100,0", "100,0", "100,0", "0,0", "0,0", "0,0")), row.names = c(NA,
-27L), class = "data.frame"), tabyl_type = "two_way", totals = "row")
And the code I tried :
library(janitor)
autres %>%
adorn_totals("row")
Could anyone help ? I had indeed used the adorn_totals function on the dataframe used to generate the dataframe "autres", but I made sure the row "total" isn't in the dataframe "autres" anymore.
With the object you have shared as x:
x %>%
untabyl() %>%
adorn_totals()
Why it works:
You can see at the end of the object you shared, tabyl_type = "two_way", totals = "row". Those attributes are stored with the data.frame you're working with. When you try to adorn_totals() a second time, janitor checks this and errors.
When you call untabyl() it strips those attributes. Then adorn_totals() succeeds.
I notice you have a cumulative percentage column. If desired, you can control exactly which columns get a totals value in adorn_totals() - see ?adorn_totals and the ... argument for how, and here's an example: https://stackoverflow.com/a/69759313.

quo and enquo in dplyr style function, Error in ~Sessions : object 'Sessions' not found

I have a dataframe:
dput(dat.trended)
structure(list(date = structure(c(18230, 18230, 18230, 18230,
18230, 18230, 18230, 18230, 18231, 18231, 18231, 18231, 18231,
18231, 18231, 18231, 18232, 18232, 18232, 18232, 18232, 18232,
18232, 18233, 18233, 18233, 18233, 18233, 18233, 18233, 18234,
18234, 18234, 18234, 18234, 18234, 18234, 18235, 18235, 18235,
18235, 18235, 18235, 18235, 18236, 18236, 18236, 18236, 18236,
18236, 18236, 18237, 18237, 18237, 18237, 18237, 18237, 18237,
18237, 18238, 18238, 18238, 18238, 18238, 18238, 18238, 18238,
18239, 18239, 18239, 18239, 18239, 18239, 18239, 18239, 18240,
18240, 18240, 18240, 18240, 18240, 18240, 18240, 18241, 18241,
18241, 18241, 18241, 18241, 18241, 18241, 18242, 18242, 18242,
18242, 18242, 18242, 18242, 18242, 18243, 18243, 18243, 18243,
18243, 18243, 18243, 18243, 18244, 18244, 18244, 18244, 18244,
18244, 18244, 18244, 18245, 18245, 18245, 18245, 18245, 18245,
18245, 18245, 18246, 18246, 18246, 18246, 18246, 18246, 18246,
18246, 18247, 18247, 18247, 18247, 18247, 18247, 18247, 18247,
18247, 18248, 18248, 18248, 18248, 18248, 18248, 18248, 18248,
18248, 18249, 18249, 18249, 18249, 18249, 18249, 18249, 18249,
18250, 18250, 18250, 18250, 18250, 18250, 18250, 18250, 18250,
18251, 18251, 18251, 18251, 18251, 18251, 18251, 18251, 18252,
18252, 18252, 18252, 18252, 18252, 18252, 18252, 18253, 18253,
18253, 18253, 18253, 18253, 18253, 18254, 18254, 18254, 18254,
18254, 18254, 18254, 18254, 18255, 18255, 18255, 18255, 18255,
18255, 18255, 18255, 18256, 18256, 18256, 18256, 18256, 18256,
18256, 18256, 18257, 18257, 18257, 18257, 18257, 18257, 18257,
18257, 18258, 18258, 18258, 18258, 18258, 18258, 18258, 18258
), class = "Date"), Channel = c("(Other)", "Direct", "Display",
"Email", "Organic Search", "Paid Search", "Referral", "Social",
"(Other)", "Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Email", "Organic Search",
"Paid Search", "Referral", "Social", "(Other)", "Direct", "Email",
"Organic Search", "Paid Search", "Referral", "Social", "(Other)",
"Direct", "Email", "Organic Search", "Paid Search", "Referral",
"Social", "(Other)", "Direct", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Email", "Organic Search",
"Paid Search", "Referral", "Social", "(Other)", "Direct", "Display",
"Email", "Organic Search", "Paid Search", "Referral", "Social",
"(Other)", "Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Affiliates", "Direct", "Email",
"Organic Search", "Paid Search", "Referral", "Social", "(Other)",
"Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Display", "Email",
"Organic Search", "Paid Search", "Referral", "Social", "(Other)",
"Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Display", "Email",
"Organic Search", "Paid Search", "Referral", "Social", "(Other)",
"Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Display", "Email",
"Organic Search", "Paid Search", "Referral", "Social", "(Other)",
"Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Affiliates", "Direct", "Display",
"Email", "Organic Search", "Paid Search", "Referral", "Social",
"(Other)", "Affiliates", "Direct", "Display", "Email", "Organic Search",
"Paid Search", "Referral", "Social", "(Other)", "Direct", "Display",
"Email", "Organic Search", "Paid Search", "Referral", "Social",
"(Other)", "Affiliates", "Direct", "Display", "Email", "Organic Search",
"Paid Search", "Referral", "Social", "(Other)", "Direct", "Display",
"Email", "Organic Search", "Paid Search", "Referral", "Social",
"(Other)", "Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Email", "Organic Search",
"Paid Search", "Referral", "Social", "(Other)", "Direct", "Display",
"Email", "Organic Search", "Paid Search", "Referral", "Social",
"(Other)", "Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Display", "Email",
"Organic Search", "Paid Search", "Referral", "Social", "(Other)",
"Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Display", "Email",
"Organic Search", "Paid Search", "Referral", "Social"), DailyUsers = c(4584,
122507, 1, 790, 36015, 733, 22716, 44639, 5107, 136612, 1, 773,
41964, 808, 26301, 53908, 6613, 180241, 1330, 48477, 907, 32545,
54564, 6776, 166011, 1782, 46269, 900, 29584, 49245, 6716, 175636,
1061, 44079, 925, 27302, 46499, 6122, 169174, 1159, 43812, 836,
27814, 43787, 5499, 151641, 796, 40171, 755, 25464, 41112, 4568,
128915, 1, 540, 36766, 789, 23039, 41885, 5205, 143873, 2, 633,
42361, 802, 26525, 51519, 7005, 1, 179617, 868, 48641, 915, 33230,
55982, 7247, 172763, 1, 927, 45502, 834, 29853, 49680, 6629,
163548, 10, 1102, 42964, 773, 27673, 46432, 6774, 158663, 9,
1313, 41267, 764, 26924, 41696, 5868, 146750, 11, 661, 38098,
758, 24081, 38489, 4575, 121006, 6, 482, 34642, 737, 21416, 40289,
4616, 135709, 7, 562, 38238, 815, 24560, 45485, 7468, 172483,
4, 780, 44160, 780, 30592, 45159, 6908, 1, 160183, 10, 714, 41889,
784, 26831, 41552, 6330, 1, 150554, 10, 898, 38073, 704, 25583,
36881, 5495, 145060, 8, 673, 35519, 722, 22461, 34739, 5237,
1, 142440, 10, 874, 32385, 645, 21154, 33666, 4061, 112273, 8,
381, 29598, 675, 17570, 32665, 3888, 106445, 1, 368, 30830, 686,
18249, 35123, 4531, 120502, 1195, 31502, 701, 19681, 34551, 3603,
95447, 1, 577, 23994, 576, 14703, 26813, 2507, 83464, 2, 324,
21731, 595, 12008, 22082, 2867, 98924, 1, 361, 28706, 717, 15739,
27070, 2519, 110161, 3, 327, 30031, 709, 16066, 28716, 2135,
101149, 2, 286, 28501, 743, 14617, 30129), Sessions = c(5662,
140831, 1, 964, 41266, 769, 28090, 48214, 6258, 157435, 1, 976,
48872, 850, 32734, 58368, 8263, 206339, 1616, 56362, 936, 41013,
60134, 8475, 189989, 2003, 52984, 952, 36599, 53858, 8271, 198848,
1335, 50233, 955, 33893, 50887, 7693, 191770, 1370, 49736, 864,
34287, 47590, 6901, 172475, 1003, 45772, 800, 31295, 44571, 5704,
148088, 1, 656, 42294, 822, 28452, 45226, 6607, 165534, 2, 801,
49337, 858, 32962, 56008, 8686, 1, 205329, 1066, 56373, 950,
41401, 61180, 8974, 197191, 1, 1116, 52376, 887, 37316, 54398,
8328, 186861, 10, 1282, 48913, 817, 34246, 50714, 8269, 180251,
9, 1521, 46949, 810, 33109, 45453, 7272, 167272, 11, 836, 43257,
806, 29710, 41932, 5728, 139022, 6, 603, 39619, 787, 26441, 43352,
5900, 155182, 7, 683, 44232, 853, 30398, 49342, 10388, 217068,
4, 1049, 54970, 850, 40829, 53278, 8685, 1, 182160, 10, 882,
47799, 826, 33132, 45484, 7794, 1, 170454, 10, 1033, 43464, 726,
31096, 40310, 6677, 163956, 8, 851, 40376, 753, 27458, 37682,
6324, 1, 159839, 10, 1011, 36577, 685, 25788, 36395, 5010, 127954,
8, 499, 33887, 690, 21511, 35205, 4752, 121747, 1, 460, 35583,
725, 22589, 37848, 5539, 137528, 1381, 36278, 728, 24421, 37487,
4505, 109400, 1, 692, 27551, 630, 17949, 28790, 3074, 94643,
2, 395, 24511, 633, 14542, 23327, 6888, 222318, 2, 888, 63947,
1486, 37906, 57280, 3092, 122398, 3, 407, 33506, 768, 19062,
30477, 2625, 112959, 3, 375, 31670, 812, 17500, 31946), Transactions = c(24,
614, 0, 39, 73, 0, 1781, 75, 29, 898, 0, 50, 104, 0, 2205, 125,
46, 1161, 84, 117, 0, 2822, 125, 64, 779, 63, 74, 0, 2081, 91,
92, 610, 68, 72, 0, 1798, 130, 36, 637, 60, 95, 0, 1893, 91,
33, 523, 49, 78, 0, 1698, 78, 18, 496, 0, 41, 50, 0, 1412, 78,
31, 751, 0, 43, 99, 0, 2041, 122, 30, 0, 856, 54, 118, 0, 2441,
128, 35, 779, 0, 45, 97, 0, 2124, 113, 40, 779, 0, 43, 101, 0,
1884, 112, 34, 671, 0, 37, 79, 0, 1952, 102, 27, 604, 0, 35,
76, 0, 1635, 77, 29, 506, 0, 30, 72, 0, 1391, 67, 30, 667, 0,
39, 104, 0, 1785, 91, 74, 1125, 0, 50, 110, 0, 2219, 108, 38,
0, 639, 0, 38, 70, 0, 1764, 86, 30, 0, 550, 0, 35, 66, 0, 1475,
70, 42, 487, 0, 32, 61, 0, 1334, 56, 24, 0, 422, 0, 27, 39, 0,
1159, 57, 16, 345, 0, 32, 47, 0, 935, 63, 9, 366, 0, 21, 44,
0, 908, 53, 26, 413, 19, 44, 0, 1040, 50, 19, 252, 0, 17, 20,
0, 623, 42, 8, 155, 0, 16, 18, 0, 468, 21, 22, 448, 0, 30, 44,
0, 1332, 64, 12, 242, 0, 27, 39, 0, 684, 32, 7, 192, 0, 17, 34,
0, 695, 28), Revenue = c(1739.74331, 31113.4098, 0, 2312.482089,
4707.59151, 0, 82485.8405, 3342.691127, 1471.14892, 52576.42497,
0, 4527.28196, 5729.90202, 0, 103926.5696, 4292.44231, 3521.7898,
62665.81251, 6011.545189, 7449.704978, 0, 144009.60402, 5953.210453,
1577.87009, 38261.10011, 2694.018798, 4000.98632, 0, 96847.5706,
3309.47883, 1701.177895, 27902.17329, 2951.83414, 2971.27577,
0, 76756.6602, 3515.29931, 2764.4399, 30351.32437, 4789.010241,
5144.50085, 0, 82277.59305, 2958.273109, 1959.03252, 28802.46538,
3031.68041, 3586.315, 0, 73028.73399, 3240.188819, 439.617731,
22872.02071, 0, 2676.741934, 2176.79642, 0, 61351.30115, 2989.81603,
851.234387, 36528.76585, 0, 2841.349662, 4151.194286, 0, 89709.3116,
4359.76453, 778.43277, 0, 36356.39973, 2516.140702, 5155.146913,
0, 96324.67089, 4357.012963, 1772.90713, 35896.53789, 0, 1941.54086,
4323.53325, 0, 93397.92809, 3192.625251, 1461.7126, 28900.90783,
0, 1916.5007105, 4841.32437, 0, 85239.33652, 5585.45811, 2275.3234222,
28425.09616, 0, 1423.95997, 3500.040202, 0, 78402.95617, 2992.36592,
839.113369, 24428.0301, 0, 2660.20427, 3219.88335, 0, 77911.54692,
3649.76663, 952.832833, 20949.3076, 0, 1694.55981, 3402.67867,
0, 60677.01657, 3507.35016, 790.85074, 28618.47916, 0, 2127.7198902,
4998.198504, 0, 74267.3436, 3810.81114, 1593.180049, 39967.90885,
0, 2043.741553, 4707.7458188, 0, 96546.4659, 3873.8297254, 1152.759991,
0, 30586.18438, 0, 2263.76183, 3255.65978, 0, 74831.96684, 2302.93888,
1541.0201, 0, 25838.51692, 0, 1564.607309, 2585.175407, 0, 56909.35993,
1893.008107, 3125.484002, 20030.02974, 0, 2984.73174, 2053.920653,
0, 54000.0694, 2391.062841, 994.023615, 0, 15894.98859, 0, 1587.93677,
2574.329865, 0, 47319.38721, 1635.377802, 1541.12045, 13794.50411,
0, 1865.84062, 1858.920211, 0, 37858.94079, 1723.849722, 284.334126,
15552.58137, 0, 1233.98682, 2079.841952, 0, 34436.41117, 982.248295,
1098.275799, 14903.710499, 697.91002, 1222.725965, 0, 33794.59734,
1959.086548, 461.12527, 11560.703802, 0, 1007.17398, 596.599993,
0, 22088.53336, 1142.67293, 135.400002, 6869.02614, 0, 673.31943,
1630.839026, 0, 18944.7318, 1252.02348, 1150.00001, 20721.64957,
0, 1893.8333, 989.919695, 0, 59597.87949, 2347.95408, 605.82993,
8630.90537, 0, 1581.627411, 2021.417672, 0, 28167.96098, 1222.06059,
409.939999, 9957.808578, 0, 705.67002, 1168.074171, 0, 31991.812556,
1061.337995)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-229L))
I am attempting to make a function and have ben reading this dplyr post on how to make functions.
My attempt:
metricTables <- function(df, dim, metric) {
dim <- enquo(dim)
metric <- enquo(metric)
df %>% mutate(date = ordered(
format(date, "%d-%b"),
levels = format(sort(unique(date)), "%d-%b")
)) %>%
group_by(date, !! dim) %>%
summarise(metric = sum(!! metric)) %>% # here I would like the name of the sum(metric) to be the same string that was provided for metric whent he function was called
gather(Key, value, -!! metric, -date) %>%
spread(date, Value) %>%
select(-Key) %>%
replace(is.na(.), 0) %>%
mutate_at(vars(-!! dim), round, 0) %>%
mutate_at(vars(-!! dim), scales::comma)
}
When I use this code outside of a function, it works as expected, e.g.:
# works
# sessions table
sessionsTable <- dat.trended %>%
mutate(date = ordered(
format(date, "%d-%b"),
levels = format(sort(unique(date)), "%d-%b")
)) %>%
group_by(date, Channel) %>%
summarise(Sessions = sum(Sessions)) %>%
gather(Key, Value, -Channel, -date) %>%
spread(date, Value) %>%
select(-Key) %>%
replace(is.na(.), 0) %>%
mutate_at(vars(-Channel), round, 0) %>%
mutate_at(vars(-Channel), scales::comma)
However when running as a function I get an error:
> metricTables(dat.trended, Channel, Sessions)
Error in ~Sessions : object 'Sessions' not found
dat.trended is the data frame I provided above.
Expected output:
sessionsTable <- dat.trended %>%
mutate(date = ordered(
format(date, "%d-%b"),
levels = format(sort(unique(date)), "%d-%b")
)) %>%
group_by(date, Channel) %>%
summarise(Sessions = sum(Sessions)) %>%
gather(Key, Value, -Channel, -date) %>%
spread(date, Value) %>%
select(-Key) %>%
replace(is.na(.), 0) %>%
mutate_at(vars(-Channel), round, 0) %>%
mutate_at(vars(-Channel), scales::comma)
> glimpse(sessionsTable)
Observations: 9
Variables: 30
$ Channel <chr> "(Other)", "Affiliates", "Direct", "Display", "Email", "Organic Search", "Paid Search", "Referral", …
$ `30-Nov` <chr> "5,662.0", "0.0", "140,831.0", "1.0", "964.0", "41,266.0", "769.0", "28,090.0", "48,214.0"
$ `01-Dec` <chr> "6,258.0", "0.0", "157,435.0", "1.0", "976.0", "48,872.0", "850.0", "32,734.0", "58,368.0"
$ `02-Dec` <chr> "8,263", "0", "206,339", "0", "1,616", "56,362", "936", "41,013", "60,134"
$ `03-Dec` <chr> "8,475", "0", "189,989", "0", "2,003", "52,984", "952", "36,599", "53,858"
$ `04-Dec` <chr> "8,271", "0", "198,848", "0", "1,335", "50,233", "955", "33,893", "50,887"
$ `05-Dec` <chr> "7,693", "0", "191,770", "0", "1,370", "49,736", "864", "34,287", "47,590"
$ `06-Dec` <chr> "6,901", "0", "172,475", "0", "1,003", "45,772", "800", "31,295", "44,571"
$ `07-Dec` <chr> "5,704.0", "0.0", "148,088.0", "1.0", "656.0", "42,294.0", "822.0", "28,452.0", "45,226.0"
$ `08-Dec` <chr> "6,607.0", "0.0", "165,534.0", "2.0", "801.0", "49,337.0", "858.0", "32,962.0", "56,008.0"
$ `09-Dec` <chr> "8,686.0", "1.0", "205,329.0", "0.0", "1,066.0", "56,373.0", "950.0", "41,401.0", "61,180.0"
$ `10-Dec` <chr> "8,974.0", "0.0", "197,191.0", "1.0", "1,116.0", "52,376.0", "887.0", "37,316.0", "54,398.0"
$ `11-Dec` <chr> "8,328", "0", "186,861", "10", "1,282", "48,913", "817", "34,246", "50,714"
$ `12-Dec` <chr> "8,269.0", "0.0", "180,251.0", "9.0", "1,521.0", "46,949.0", "810.0", "33,109.0", "45,453.0"
$ `13-Dec` <chr> "7,272", "0", "167,272", "11", "836", "43,257", "806", "29,710", "41,932"
$ `14-Dec` <chr> "5,728.0", "0.0", "139,022.0", "6.0", "603.0", "39,619.0", "787.0", "26,441.0", "43,352.0"
$ `15-Dec` <chr> "5,900.0", "0.0", "155,182.0", "7.0", "683.0", "44,232.0", "853.0", "30,398.0", "49,342.0"
$ `16-Dec` <chr> "10,388.0", "0.0", "217,068.0", "4.0", "1,049.0", "54,970.0", "850.0", "40,829.0", "53,278.0"
$ `17-Dec` <chr> "8,685.0", "1.0", "182,160.0", "10.0", "882.0", "47,799.0", "826.0", "33,132.0", "45,484.0"
$ `18-Dec` <chr> "7,794.0", "1.0", "170,454.0", "10.0", "1,033.0", "43,464.0", "726.0", "31,096.0", "40,310.0"
$ `19-Dec` <chr> "6,677.0", "0.0", "163,956.0", "8.0", "851.0", "40,376.0", "753.0", "27,458.0", "37,682.0"
$ `20-Dec` <chr> "6,324.0", "1.0", "159,839.0", "10.0", "1,011.0", "36,577.0", "685.0", "25,788.0", "36,395.0"
$ `21-Dec` <chr> "5,010.0", "0.0", "127,954.0", "8.0", "499.0", "33,887.0", "690.0", "21,511.0", "35,205.0"
$ `22-Dec` <chr> "4,752.0", "0.0", "121,747.0", "1.0", "460.0", "35,583.0", "725.0", "22,589.0", "37,848.0"
$ `23-Dec` <chr> "5,539", "0", "137,528", "0", "1,381", "36,278", "728", "24,421", "37,487"
$ `24-Dec` <chr> "4,505.0", "0.0", "109,400.0", "1.0", "692.0", "27,551.0", "630.0", "17,949.0", "28,790.0"
$ `25-Dec` <chr> "3,074.0", "0.0", "94,643.0", "2.0", "395.0", "24,511.0", "633.0", "14,542.0", "23,327.0"
$ `26-Dec` <chr> "6,888.0", "0.0", "222,318.0", "2.0", "888.0", "63,947.0", "1,486.0", "37,906.0", "57,280.0"
$ `27-Dec` <chr> "3,092.0", "0.0", "122,398.0", "3.0", "407.0", "33,506.0", "768.0", "19,062.0", "30,477.0"
$ `28-Dec` <chr> "2,625.0", "0.0", "112,959.0", "3.0", "375.0", "31,670.0", "812.0", "17,500.0", "31,946.0"
How can I get my function metricTables() to correctly evaluate?
Here, we can convert to string for 'metric' and use :=
library(dplyr)
library(tidyr)
metricTables <- function(df, dim, metric) {
dim <- enquo(dim)
metric <- enquo(metric)
df %>% mutate(date = ordered(
format(date, "%d-%b"),
levels = format(sort(unique(date)), "%d-%b")
)) %>%
group_by(date, !! dim) %>%
summarise(!! rlang::as_name(metric) := sum(!! metric)) %>%
pivot_longer(cols = -c(date, !!dim), names_to = 'Key', values_to = 'value') %>%
pivot_wider(names_from = date, values_from = value) %>%
select(-Key) %>%
replace(is.na(.), 0) %>%
mutate_at(vars(- !!rlang::as_name(dim)), round, 0) %>%
mutate_at(vars(- !!rlang::as_name(dim)), scales::comma)
}
metricTables(dat.trended, Channel, Sessions)
Can also make use of curly-curly operator ({{...}})
metricTables <- function(df, dim, metric) {
df %>%
mutate(date = ordered(
format(date, "%d-%b"),
levels = format(sort(unique(date)), "%d-%b")
)) %>%
group_by(date, {{dim}}) %>%
summarise(!! rlang::ensym(metric) := sum({{metric}})) %>%
pivot_longer(cols = -c(date, {{dim}}), names_to = 'Key', values_to = 'value') %>%
pivot_wider(names_from = date, values_from = value) %>%
select(-Key) %>%
replace(is.na(.), 0) %>%
mutate_at(vars(- !!rlang::ensym(dim)), round, 0) %>%
mutate_at(vars(- !!rlang::ensym(dim)), scales::comma)
}
metricTables(dat.trended, Channel, Sessions)

Is there an explanation for this R function merge() error?

I am trying to use the R merge function to combine two data.frames, but keep getting the following error:
Error in fix.by(by.y, y) : 'by' must specify a uniquely valid column
I am not sure what this error means or how to resolve it.
My code thus far is the following:
movies <- read_csv("movies.csv")
firsts = vector(length = nrow(movies))
for (i in 1:nrow(movies)) {
firsts[i] = movies$director[i] %>% str_split(" ", n = 2) %>% unlist %>% .[1]
}
movies$firsts = firsts
movies <- movies[-c(137, 147, 211, 312, 428, 439, 481, 555, 602, 830, 850, 1045, 1080, 1082, 1085, 1096, 1255, 1258, 1286, 1293, 1318, 1382, 1441, 1456, 1494, 1509, 1703, 1719, 1735, 1944, 1968, 1974, 1977, 2098, 2197, 2409, 2516, 2546, 2722, 2751, 2988, 3191,
3227, 3270, 3283, 3285, 3286, 3292, 3413, 3423, 3470, 3480, 3511, 3676, 3698, 3826, 3915, 3923, 3954, 4165, 4381, 4385, 4390, 4397, 4573, 4711, 4729, 4774, 4813, 4967, 4974, 5018, 5056, 5258, 5331, 5405, 5450, 5469, 5481, 4573, 5708, 5715, 5786, 5886, 5888, 5933, 5934, 6052, 6091, 6201, 6234, 6236, 6511, 6544, 6551, 6562, 6803, 4052, 4121, 4326),]
movies <- movies[-c(4521,5846),]
g <- gender_df(movies, name_col = "firsts", year_col = "year", method = c("ssa"))
merge(movies, g, by = c("firsts", "name"), all = FALSE)
I thinks you are trying to give the by argument a non-valid value. Indeed, the documentation tells:
By default the data frames are merged on the columns with names they
both have, but separate specifications of the columns can be given by
by.x and by.y. The rows in the two data frames that match on the
specified columns are extracted, and joined together. If there is more
than one match, all possible matches contribute one row each. For the
precise meaning of ‘match’, see match.
In your case, you shall try the following:
merge(x = movies,y = g, by.x = "firsts", by.y = "name", all = FALSE)

Adding percent change between bars on a ggplot

I have a geom_bar plot with labels for the values of each bar on the visual itself.
Here is the code to generate the plot:
# libraries
library(shiny)
library(tidyverse)
# funnel bar blot
output$funnel_plot <- renderPlot({
ggplot(exampledf, aes(x = reorder(Funnel, -Sessions), y = Sessions)) +
geom_bar(stat = "identity", fill = "#008080", alpha = 0.6) +
stat_summary(aes(label = scales::comma(..y..)), fun.y = 'sum', geom = 'text', col = 'white', vjust = 1.5) +
xlab("Step") +
ylab("Events") +
scale_y_continuous(labels = function(l) {l = l / 1000; paste0(l, "K")})
})
And here is a data frame to feed into it.
exampledf <- structure(list(Channel = c("Facebook", "Youtube", "SEM", "Organic",
"Direct", "Email", "Facebook", "Youtube", "SEM", "Organic", "Direct",
"Email", "Facebook", "Youtube", "SEM", "Organic", "Direct", "Email",
"Facebook", "Youtube", "SEM", "Organic", "Direct", "Email"),
Promo = c("None", "None", "None", "None", "None", "None",
"Partner Offer", "Partner Offer", "Partner Offer", "Partner Offer",
"Partner Offer", "Partner Offer", "Print Code", "Print Code",
"Print Code", "Print Code", "Print Code", "Print Code", "Affiliate Promo",
"Affiliate Promo", "Affiliate Promo", "Affiliate Promo",
"Affiliate Promo", "Affiliate Promo"), Sessions = c(26126,
16885, 32229, 2446, 16353, 79202, 7688, 83475, 48834, 53475,
71238, 78728, 76710, 125571, 125719, 17142, 103206, 181082,
27071, 42571, 716, 139871, 21676, 59560), AddToCart = c(7228,
4436, 8344, 575, 4275, 23681, 1982, 18489, 14433, 14995,
20769, 20119, 18471, 35566, 33423, 5187, 28138, 48186, 7140,
11602, 190, 35795, 5193, 17064), Registrations = c(2780,
1706, 3209, 221, 1644, 9108, 762, 7111, 5551, 5767, 7988,
7738, 7104, 13679, 12855, 1995, 10822, 18533, 2746, 4462,
73, 13767, 1997, 6563), ShippingDetails = c(1069, 656, 1234,
85, 632, 3503, 293, 2735, 2135, 2218, 3072, 2976, 2732, 5261,
4944, 767, 4162, 7128, 1056, 1716, 28, 5295, 768, 2524),
Checkout = c(668, 410, 771, 53, 395, 2189, 183, 1709, 1334,
1386, 1920, 1860, 1707, 3288, 3090, 479, 2601, 4455, 660,
1072, 17, 3309, 480, 1577), Transactions = c(556, 341, 642,
44, 329, 1824, 152, 1424, 1111, 1155, 1600, 1550, 1422, 2740,
2575, 399, 2167, 3712, 550, 893, 14, 2757, 400, 1314)), class = "data.frame", row.names = c(NA,
-24L), .Names = c("Channel", "Promo", "Sessions", "AddToCart",
"Registrations", "ShippingDetails", "Checkout", "Transactions"
))
Here is a screen shot of how the plot looks:
I would like to add a new line (a new stat summary perhaps?) in between each bar showing the percentage change. For the first 2 bars, sessions and add to cart, the value has changed from 1.4M to 385k = a drop of ~ 72%. So, I would like "72%" shown some way between the bars.
Is there a straight forwards way of doing this?
I could just create a table and display it under the visualization but I wanted to see how it looked adding the percentage drops to the visual itself.
How would I add the percentage drop between each bar reading left to right?
You can try:
as.tbl(df) %>%
gather(key, value, -Channel, -Promo) %>%
group_by(key) %>%
summarise(Sum=sum(value)) %>%
arrange(-Sum) %>%
mutate(End=lag(Sum),
xpos=1:n()-0.5,
Diff=End-Sum,
Percent=paste(round(Diff/End*100,1),"%")) %>%
ggplot(aes(x = reorder(key, -Sum), y = Sum)) +
geom_col(alpha = 0.6) +
stat_summary(aes(label = scales::comma(..y..)), fun.y = 'sum',
geom = 'text', col = 'white', vjust = 1.5) +
geom_segment(aes(x=xpos, y = End, xend=xpos, yend=Sum)) +
geom_text(aes(x=xpos,y = End-Diff/2, label=Percent),hjust=-0.2)

Resources