Adding percent change between bars on a ggplot - r

I have a geom_bar plot with labels for the values of each bar on the visual itself.
Here is the code to generate the plot:
# libraries
library(shiny)
library(tidyverse)
# funnel bar blot
output$funnel_plot <- renderPlot({
ggplot(exampledf, aes(x = reorder(Funnel, -Sessions), y = Sessions)) +
geom_bar(stat = "identity", fill = "#008080", alpha = 0.6) +
stat_summary(aes(label = scales::comma(..y..)), fun.y = 'sum', geom = 'text', col = 'white', vjust = 1.5) +
xlab("Step") +
ylab("Events") +
scale_y_continuous(labels = function(l) {l = l / 1000; paste0(l, "K")})
})
And here is a data frame to feed into it.
exampledf <- structure(list(Channel = c("Facebook", "Youtube", "SEM", "Organic",
"Direct", "Email", "Facebook", "Youtube", "SEM", "Organic", "Direct",
"Email", "Facebook", "Youtube", "SEM", "Organic", "Direct", "Email",
"Facebook", "Youtube", "SEM", "Organic", "Direct", "Email"),
Promo = c("None", "None", "None", "None", "None", "None",
"Partner Offer", "Partner Offer", "Partner Offer", "Partner Offer",
"Partner Offer", "Partner Offer", "Print Code", "Print Code",
"Print Code", "Print Code", "Print Code", "Print Code", "Affiliate Promo",
"Affiliate Promo", "Affiliate Promo", "Affiliate Promo",
"Affiliate Promo", "Affiliate Promo"), Sessions = c(26126,
16885, 32229, 2446, 16353, 79202, 7688, 83475, 48834, 53475,
71238, 78728, 76710, 125571, 125719, 17142, 103206, 181082,
27071, 42571, 716, 139871, 21676, 59560), AddToCart = c(7228,
4436, 8344, 575, 4275, 23681, 1982, 18489, 14433, 14995,
20769, 20119, 18471, 35566, 33423, 5187, 28138, 48186, 7140,
11602, 190, 35795, 5193, 17064), Registrations = c(2780,
1706, 3209, 221, 1644, 9108, 762, 7111, 5551, 5767, 7988,
7738, 7104, 13679, 12855, 1995, 10822, 18533, 2746, 4462,
73, 13767, 1997, 6563), ShippingDetails = c(1069, 656, 1234,
85, 632, 3503, 293, 2735, 2135, 2218, 3072, 2976, 2732, 5261,
4944, 767, 4162, 7128, 1056, 1716, 28, 5295, 768, 2524),
Checkout = c(668, 410, 771, 53, 395, 2189, 183, 1709, 1334,
1386, 1920, 1860, 1707, 3288, 3090, 479, 2601, 4455, 660,
1072, 17, 3309, 480, 1577), Transactions = c(556, 341, 642,
44, 329, 1824, 152, 1424, 1111, 1155, 1600, 1550, 1422, 2740,
2575, 399, 2167, 3712, 550, 893, 14, 2757, 400, 1314)), class = "data.frame", row.names = c(NA,
-24L), .Names = c("Channel", "Promo", "Sessions", "AddToCart",
"Registrations", "ShippingDetails", "Checkout", "Transactions"
))
Here is a screen shot of how the plot looks:
I would like to add a new line (a new stat summary perhaps?) in between each bar showing the percentage change. For the first 2 bars, sessions and add to cart, the value has changed from 1.4M to 385k = a drop of ~ 72%. So, I would like "72%" shown some way between the bars.
Is there a straight forwards way of doing this?
I could just create a table and display it under the visualization but I wanted to see how it looked adding the percentage drops to the visual itself.
How would I add the percentage drop between each bar reading left to right?

You can try:
as.tbl(df) %>%
gather(key, value, -Channel, -Promo) %>%
group_by(key) %>%
summarise(Sum=sum(value)) %>%
arrange(-Sum) %>%
mutate(End=lag(Sum),
xpos=1:n()-0.5,
Diff=End-Sum,
Percent=paste(round(Diff/End*100,1),"%")) %>%
ggplot(aes(x = reorder(key, -Sum), y = Sum)) +
geom_col(alpha = 0.6) +
stat_summary(aes(label = scales::comma(..y..)), fun.y = 'sum',
geom = 'text', col = 'white', vjust = 1.5) +
geom_segment(aes(x=xpos, y = End, xend=xpos, yend=Sum)) +
geom_text(aes(x=xpos,y = End-Diff/2, label=Percent),hjust=-0.2)

Related

Only the map legend appearing in ggplot

I am trying to plot data on a map. However, only the legend appears on a blank graph. The data set.
structure(list(States.Uts = c("Jammu and Kashmir", "Karnataka",
"Odisha", "Sikkim", "Madhya Pradesh", "Maharashtra", "Kerala",
"Rajasthan", "Delhi", "Andhra Pradesh", "Uttar Pradesh", "Gujarat",
"West Bengal", "Mizoram", "Uttarakhand", "Assam", "Haryana",
"Himachal Pradesh", "Jharkhand", "Punjab"), id = c(35, 8, 36,
25, 11, 12, 9, 21, 5, 17, 24, 22, 1, 30, 23, 26, 6, 19, 7, 20
), long = c(74.8692906760123, 76.1671602023197, 84.4299347735266,
88.47355094813, 78.2889834734203, 76.1073683433136, 76.4080579710788,
73.8499033398323, 77.1154800785184, 79.964340328073, 80.5663333555389,
71.5737853588608, 87.9835046818375, 92.8318089515678, 79.2071970183086,
92.8261810120493, 76.3401988564537, 77.2453583452855, 85.5641242711505,
75.4154856775423), lat = c(33.7066861126216, 14.7103409600977,
20.5129916361452, 27.5703696825569, 23.5382000019923, 19.4517685463546,
10.4515827401328, 26.5845654103506, 28.6433836730517, 15.7549664734525,
26.9232961978376, 22.6974841269408, 23.8143407874487, 23.3070171585463,
30.1564981917953, 26.3553441447585, 29.198093425323, 31.92360060294,
23.6561315040147, 30.8424285351448), Type = c("Union Territory",
"State", "State", "State", "State", "State", "State", "State",
"Union Territory", "State", "State", "State", "State", "State",
"State", "State", "State", "State", "State", "State"), low_prestige = c(1000,
836, 195, 1000, 188, 441, 441, 736, 370, 235, 0, 151, 82, 42,
28, 0, 0, 0, 0, 0), high_prestige = c(0, 0, 0, 0, 53, 149, 212,
264, 630, 765, 808, 849, 918, 958, 972, 1000, 1000, 1000, 1000,
1000)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-20L))
The code that I have tried:
map1<-ggplot(final.plot1, aes( x = long, y = lat, group=id)) +
geom_polygon(aes(fill = low_prestige), color = "black")
map1
Another option is using the sf package which is useful to plot geographic data in combination with ggplot. Here is a reproducible example:
library(ggplot2)
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
your_sf <- st_as_sf(final.plot1, coords = c('long', 'lat'))
ggplot(your_sf) +
geom_sf(aes(color = low_prestige))
Created on 2022-11-28 with reprex v2.0.2

How to add color and change space between ticks in ggplot?

I am currently learning r, I try to produce a time series graph with ggplot(), here is my current code:
library(readxl)
library(ggplot2)
AUS_GDP <- c(826, 834, 858, 886, 936, 982, 999, 1030, 1057, 1114, 1352,
1398, 1428, 1435, 1508, 1825, 1793, 1881, 1857, 1733, 2190, 1868,
1704, 1975, 2246, 2310, 2554, 2954, 3381, 3339, 3148, 3736, 4382,
4798, 4202, 3988, 4866, 4294, 3661, 4728, 4613, 4544, 4368, 4352,
4616, 4403, 4532, 4956, 5031, 4932, 5217, 5259, 5663, 6095, 6113,
6596, 6387, 6433, 6817, 6703, 6830, 7101, 6476, 7133, 6861, 7049,
6900, 7383, 7179, 7567, 7106, 7438, 6368, 5910, 6003, 5558, 5874,
5451, 6202, 6121, 6397, 6119, 6094, 6497, 6846, 6825, 7184, 7358,
7481, 7940, 8305, 8136, 8126, 8220, 8013, 7806, 7775, 7637, 7336,
7517, 7597, 7828, 8072, 8276, 8635, 8851, 8883, 8837, 8690, 8389,
7504, 6940, 7275, 7718, 8066, 8477, 8792, 9159, 9382, 9318, 9828,
10820, 11963, 12278, 11735, 11026, 10512, 10622, 11105, 11536,
11815, 11966, 11824, 11963, 12419, 12795, 12924, 12895, 13238,
13753, 14013, 13793, 14389, 14983, 15699, 16182, 16324, 17108,
17770, 18428, 19166, 19590, 19772, 20527, 20698, 20993, 21613,
21592, 21948, 22826, 22972, 23368, 22972, 22697, 24009, 24927,
25116, 25971, 26702, 27407, 27373, 26861.2749592856, 27560.1782773384,
28622.4757584582, 29844.1713244349, 30690.0624398521, 31740.4697474047,
32857.9937806025, 34337.139432097, 35551.0195059737, 36603.0449959447,
37275.9912489095, 38567.0654883377, 39523.6551974459, 40887.725336397,
41904.4458707113, 42650.9859308496, 44033.5837617713, 44421.6404707281,
44686.541383463, 45400.2233987967, 46132, 46999, 47250, 47867,
48357, 48845, 49265.6135020065, 49830.7993065638)
log_AUS_GDP <- log(AUS_GDP)
year <- 1820:2018
AUS <- data.frame(year = year,
gdp = AUS_GDP,
log_gdp = log_AUS_GDP)
ggplot(AUS, aes(x = year, y = gdp)) +
geom_line() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.1, size = 10)) +
scale_x_continuous("year", labels = as.character(year), breaks = year)
And here is what I have right now:
I wish to achieve something like this;
but I am not sure how to do that, can someone show me the way? Thank you so much!
I think you have different dataframes for each country. You can combine them in one dataframe with a country column that identifies each dataset and plot them with ggplot.
library(dplyr)
library(ggplot2)
bind_rows(lst(AUS, US, FRANCE), .id = 'country') %>%
ggplot(aes(year, gdp, color = country)) +
geom_line() +
labs(x = 'YEAR',
y = 'GDP',
title = 'GDP per Capita in France, UK and the US')
Keep it simple: like below:
I added one column with Country Aus
ggplot(AUS, aes(x = year, y = gdp, color=Country)) +
geom_line() +
theme_minimal() +
theme(legend.position="bottom")

Add pie chart in a bar graph

My goal is to create a bar graph of categorical variables and then add a pie chart inside it as shown in the attached image.
my data is the same as in the image and is below:
#For bar grapgh
chromosomes = c("1A", "1B", "1D", "2A", "2B", "2D", "3A", "3B", "3D", "4A","4B","4D","5A","5B","5D","6A","6B","6D","7A","7B","7D")
Frequency = c(668, 752, 213, 826, 948, 334, 625, 834, 264, 488, 391, 136, 745, 917, 234, 543, 848, 182, 901, 740, 241)
data_bar <- data.frame(chromosomes, Frequency)
#For pie chart
Genome = c("A","B","D")
Count = c(4796, 5430, 1604)
data_pie <- data.frame(Genome, Count)
I will highly appreciate if anyone can guide me or direct me towards where I can find the answers
Here's a ggplot2 solution:
pie <- ggplot(data_pie, aes(x = 1, y = Count, fill = Genome)) +
geom_col(color = "black") +
scale_fill_manual(values = c("red2", "forestgreen", "dodgerblue3")) +
coord_polar(theta = "y") +
theme_void() +
theme(legend.position = "none")
ggplot(data_bar) +
geom_col(aes(chromosomes, Frequency, fill = substr(chromosomes, 2, 2)),
color = "black", width = 0.5) +
scale_fill_manual(values = c("red2", "forestgreen", "dodgerblue3")) +
theme_classic() +
theme(legend.position = "none") +
annotation_custom(ggplotGrob(pie), xmin = "2B", xmax = "6A", ymin = 500)
Using only base R functions, please see the code below:
## Your data ---------------------------------
#For bar grapgh
chromosomes = c("1A", "1B", "1D", "2A", "2B", "2D", "3A", "3B", "3D", "4A","4B","4D","5A","5B","5D","6A","6B","6D","7A","7B","7D")
Frequency = c(668, 752, 213, 826, 948, 334, 625, 834, 264, 488, 391, 136, 745, 917, 234, 543, 848, 182, 901, 740, 241)
#For pie chart
Genome = c("A","B","D")
Count = c(4796, 5430, 1604)
## One idea to start with --------------------
plot.new()
par(mfrow = c(2, 1), # set the layout, you might also consider layout() or split.screen() as suggested in ?par
mar=c(4, 4, 1, 1), # this set up give enough space to see axis labels and titles
oma=c(0, 0, 0, 0)) # remove outer margins
pie(Count, labels = Genome)
barplot(Frequency~chromosomes)
Output:
I think it is possible to make it look cleaner adjusting par() arguments but I am not very familiar with them.
There are also packages cowplot and gridExtra that works nicely with ggplot2.

quo and enquo in dplyr style function, Error in ~Sessions : object 'Sessions' not found

I have a dataframe:
dput(dat.trended)
structure(list(date = structure(c(18230, 18230, 18230, 18230,
18230, 18230, 18230, 18230, 18231, 18231, 18231, 18231, 18231,
18231, 18231, 18231, 18232, 18232, 18232, 18232, 18232, 18232,
18232, 18233, 18233, 18233, 18233, 18233, 18233, 18233, 18234,
18234, 18234, 18234, 18234, 18234, 18234, 18235, 18235, 18235,
18235, 18235, 18235, 18235, 18236, 18236, 18236, 18236, 18236,
18236, 18236, 18237, 18237, 18237, 18237, 18237, 18237, 18237,
18237, 18238, 18238, 18238, 18238, 18238, 18238, 18238, 18238,
18239, 18239, 18239, 18239, 18239, 18239, 18239, 18239, 18240,
18240, 18240, 18240, 18240, 18240, 18240, 18240, 18241, 18241,
18241, 18241, 18241, 18241, 18241, 18241, 18242, 18242, 18242,
18242, 18242, 18242, 18242, 18242, 18243, 18243, 18243, 18243,
18243, 18243, 18243, 18243, 18244, 18244, 18244, 18244, 18244,
18244, 18244, 18244, 18245, 18245, 18245, 18245, 18245, 18245,
18245, 18245, 18246, 18246, 18246, 18246, 18246, 18246, 18246,
18246, 18247, 18247, 18247, 18247, 18247, 18247, 18247, 18247,
18247, 18248, 18248, 18248, 18248, 18248, 18248, 18248, 18248,
18248, 18249, 18249, 18249, 18249, 18249, 18249, 18249, 18249,
18250, 18250, 18250, 18250, 18250, 18250, 18250, 18250, 18250,
18251, 18251, 18251, 18251, 18251, 18251, 18251, 18251, 18252,
18252, 18252, 18252, 18252, 18252, 18252, 18252, 18253, 18253,
18253, 18253, 18253, 18253, 18253, 18254, 18254, 18254, 18254,
18254, 18254, 18254, 18254, 18255, 18255, 18255, 18255, 18255,
18255, 18255, 18255, 18256, 18256, 18256, 18256, 18256, 18256,
18256, 18256, 18257, 18257, 18257, 18257, 18257, 18257, 18257,
18257, 18258, 18258, 18258, 18258, 18258, 18258, 18258, 18258
), class = "Date"), Channel = c("(Other)", "Direct", "Display",
"Email", "Organic Search", "Paid Search", "Referral", "Social",
"(Other)", "Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Email", "Organic Search",
"Paid Search", "Referral", "Social", "(Other)", "Direct", "Email",
"Organic Search", "Paid Search", "Referral", "Social", "(Other)",
"Direct", "Email", "Organic Search", "Paid Search", "Referral",
"Social", "(Other)", "Direct", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Email", "Organic Search",
"Paid Search", "Referral", "Social", "(Other)", "Direct", "Display",
"Email", "Organic Search", "Paid Search", "Referral", "Social",
"(Other)", "Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Affiliates", "Direct", "Email",
"Organic Search", "Paid Search", "Referral", "Social", "(Other)",
"Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Display", "Email",
"Organic Search", "Paid Search", "Referral", "Social", "(Other)",
"Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Display", "Email",
"Organic Search", "Paid Search", "Referral", "Social", "(Other)",
"Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Display", "Email",
"Organic Search", "Paid Search", "Referral", "Social", "(Other)",
"Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Affiliates", "Direct", "Display",
"Email", "Organic Search", "Paid Search", "Referral", "Social",
"(Other)", "Affiliates", "Direct", "Display", "Email", "Organic Search",
"Paid Search", "Referral", "Social", "(Other)", "Direct", "Display",
"Email", "Organic Search", "Paid Search", "Referral", "Social",
"(Other)", "Affiliates", "Direct", "Display", "Email", "Organic Search",
"Paid Search", "Referral", "Social", "(Other)", "Direct", "Display",
"Email", "Organic Search", "Paid Search", "Referral", "Social",
"(Other)", "Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Email", "Organic Search",
"Paid Search", "Referral", "Social", "(Other)", "Direct", "Display",
"Email", "Organic Search", "Paid Search", "Referral", "Social",
"(Other)", "Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Display", "Email",
"Organic Search", "Paid Search", "Referral", "Social", "(Other)",
"Direct", "Display", "Email", "Organic Search", "Paid Search",
"Referral", "Social", "(Other)", "Direct", "Display", "Email",
"Organic Search", "Paid Search", "Referral", "Social"), DailyUsers = c(4584,
122507, 1, 790, 36015, 733, 22716, 44639, 5107, 136612, 1, 773,
41964, 808, 26301, 53908, 6613, 180241, 1330, 48477, 907, 32545,
54564, 6776, 166011, 1782, 46269, 900, 29584, 49245, 6716, 175636,
1061, 44079, 925, 27302, 46499, 6122, 169174, 1159, 43812, 836,
27814, 43787, 5499, 151641, 796, 40171, 755, 25464, 41112, 4568,
128915, 1, 540, 36766, 789, 23039, 41885, 5205, 143873, 2, 633,
42361, 802, 26525, 51519, 7005, 1, 179617, 868, 48641, 915, 33230,
55982, 7247, 172763, 1, 927, 45502, 834, 29853, 49680, 6629,
163548, 10, 1102, 42964, 773, 27673, 46432, 6774, 158663, 9,
1313, 41267, 764, 26924, 41696, 5868, 146750, 11, 661, 38098,
758, 24081, 38489, 4575, 121006, 6, 482, 34642, 737, 21416, 40289,
4616, 135709, 7, 562, 38238, 815, 24560, 45485, 7468, 172483,
4, 780, 44160, 780, 30592, 45159, 6908, 1, 160183, 10, 714, 41889,
784, 26831, 41552, 6330, 1, 150554, 10, 898, 38073, 704, 25583,
36881, 5495, 145060, 8, 673, 35519, 722, 22461, 34739, 5237,
1, 142440, 10, 874, 32385, 645, 21154, 33666, 4061, 112273, 8,
381, 29598, 675, 17570, 32665, 3888, 106445, 1, 368, 30830, 686,
18249, 35123, 4531, 120502, 1195, 31502, 701, 19681, 34551, 3603,
95447, 1, 577, 23994, 576, 14703, 26813, 2507, 83464, 2, 324,
21731, 595, 12008, 22082, 2867, 98924, 1, 361, 28706, 717, 15739,
27070, 2519, 110161, 3, 327, 30031, 709, 16066, 28716, 2135,
101149, 2, 286, 28501, 743, 14617, 30129), Sessions = c(5662,
140831, 1, 964, 41266, 769, 28090, 48214, 6258, 157435, 1, 976,
48872, 850, 32734, 58368, 8263, 206339, 1616, 56362, 936, 41013,
60134, 8475, 189989, 2003, 52984, 952, 36599, 53858, 8271, 198848,
1335, 50233, 955, 33893, 50887, 7693, 191770, 1370, 49736, 864,
34287, 47590, 6901, 172475, 1003, 45772, 800, 31295, 44571, 5704,
148088, 1, 656, 42294, 822, 28452, 45226, 6607, 165534, 2, 801,
49337, 858, 32962, 56008, 8686, 1, 205329, 1066, 56373, 950,
41401, 61180, 8974, 197191, 1, 1116, 52376, 887, 37316, 54398,
8328, 186861, 10, 1282, 48913, 817, 34246, 50714, 8269, 180251,
9, 1521, 46949, 810, 33109, 45453, 7272, 167272, 11, 836, 43257,
806, 29710, 41932, 5728, 139022, 6, 603, 39619, 787, 26441, 43352,
5900, 155182, 7, 683, 44232, 853, 30398, 49342, 10388, 217068,
4, 1049, 54970, 850, 40829, 53278, 8685, 1, 182160, 10, 882,
47799, 826, 33132, 45484, 7794, 1, 170454, 10, 1033, 43464, 726,
31096, 40310, 6677, 163956, 8, 851, 40376, 753, 27458, 37682,
6324, 1, 159839, 10, 1011, 36577, 685, 25788, 36395, 5010, 127954,
8, 499, 33887, 690, 21511, 35205, 4752, 121747, 1, 460, 35583,
725, 22589, 37848, 5539, 137528, 1381, 36278, 728, 24421, 37487,
4505, 109400, 1, 692, 27551, 630, 17949, 28790, 3074, 94643,
2, 395, 24511, 633, 14542, 23327, 6888, 222318, 2, 888, 63947,
1486, 37906, 57280, 3092, 122398, 3, 407, 33506, 768, 19062,
30477, 2625, 112959, 3, 375, 31670, 812, 17500, 31946), Transactions = c(24,
614, 0, 39, 73, 0, 1781, 75, 29, 898, 0, 50, 104, 0, 2205, 125,
46, 1161, 84, 117, 0, 2822, 125, 64, 779, 63, 74, 0, 2081, 91,
92, 610, 68, 72, 0, 1798, 130, 36, 637, 60, 95, 0, 1893, 91,
33, 523, 49, 78, 0, 1698, 78, 18, 496, 0, 41, 50, 0, 1412, 78,
31, 751, 0, 43, 99, 0, 2041, 122, 30, 0, 856, 54, 118, 0, 2441,
128, 35, 779, 0, 45, 97, 0, 2124, 113, 40, 779, 0, 43, 101, 0,
1884, 112, 34, 671, 0, 37, 79, 0, 1952, 102, 27, 604, 0, 35,
76, 0, 1635, 77, 29, 506, 0, 30, 72, 0, 1391, 67, 30, 667, 0,
39, 104, 0, 1785, 91, 74, 1125, 0, 50, 110, 0, 2219, 108, 38,
0, 639, 0, 38, 70, 0, 1764, 86, 30, 0, 550, 0, 35, 66, 0, 1475,
70, 42, 487, 0, 32, 61, 0, 1334, 56, 24, 0, 422, 0, 27, 39, 0,
1159, 57, 16, 345, 0, 32, 47, 0, 935, 63, 9, 366, 0, 21, 44,
0, 908, 53, 26, 413, 19, 44, 0, 1040, 50, 19, 252, 0, 17, 20,
0, 623, 42, 8, 155, 0, 16, 18, 0, 468, 21, 22, 448, 0, 30, 44,
0, 1332, 64, 12, 242, 0, 27, 39, 0, 684, 32, 7, 192, 0, 17, 34,
0, 695, 28), Revenue = c(1739.74331, 31113.4098, 0, 2312.482089,
4707.59151, 0, 82485.8405, 3342.691127, 1471.14892, 52576.42497,
0, 4527.28196, 5729.90202, 0, 103926.5696, 4292.44231, 3521.7898,
62665.81251, 6011.545189, 7449.704978, 0, 144009.60402, 5953.210453,
1577.87009, 38261.10011, 2694.018798, 4000.98632, 0, 96847.5706,
3309.47883, 1701.177895, 27902.17329, 2951.83414, 2971.27577,
0, 76756.6602, 3515.29931, 2764.4399, 30351.32437, 4789.010241,
5144.50085, 0, 82277.59305, 2958.273109, 1959.03252, 28802.46538,
3031.68041, 3586.315, 0, 73028.73399, 3240.188819, 439.617731,
22872.02071, 0, 2676.741934, 2176.79642, 0, 61351.30115, 2989.81603,
851.234387, 36528.76585, 0, 2841.349662, 4151.194286, 0, 89709.3116,
4359.76453, 778.43277, 0, 36356.39973, 2516.140702, 5155.146913,
0, 96324.67089, 4357.012963, 1772.90713, 35896.53789, 0, 1941.54086,
4323.53325, 0, 93397.92809, 3192.625251, 1461.7126, 28900.90783,
0, 1916.5007105, 4841.32437, 0, 85239.33652, 5585.45811, 2275.3234222,
28425.09616, 0, 1423.95997, 3500.040202, 0, 78402.95617, 2992.36592,
839.113369, 24428.0301, 0, 2660.20427, 3219.88335, 0, 77911.54692,
3649.76663, 952.832833, 20949.3076, 0, 1694.55981, 3402.67867,
0, 60677.01657, 3507.35016, 790.85074, 28618.47916, 0, 2127.7198902,
4998.198504, 0, 74267.3436, 3810.81114, 1593.180049, 39967.90885,
0, 2043.741553, 4707.7458188, 0, 96546.4659, 3873.8297254, 1152.759991,
0, 30586.18438, 0, 2263.76183, 3255.65978, 0, 74831.96684, 2302.93888,
1541.0201, 0, 25838.51692, 0, 1564.607309, 2585.175407, 0, 56909.35993,
1893.008107, 3125.484002, 20030.02974, 0, 2984.73174, 2053.920653,
0, 54000.0694, 2391.062841, 994.023615, 0, 15894.98859, 0, 1587.93677,
2574.329865, 0, 47319.38721, 1635.377802, 1541.12045, 13794.50411,
0, 1865.84062, 1858.920211, 0, 37858.94079, 1723.849722, 284.334126,
15552.58137, 0, 1233.98682, 2079.841952, 0, 34436.41117, 982.248295,
1098.275799, 14903.710499, 697.91002, 1222.725965, 0, 33794.59734,
1959.086548, 461.12527, 11560.703802, 0, 1007.17398, 596.599993,
0, 22088.53336, 1142.67293, 135.400002, 6869.02614, 0, 673.31943,
1630.839026, 0, 18944.7318, 1252.02348, 1150.00001, 20721.64957,
0, 1893.8333, 989.919695, 0, 59597.87949, 2347.95408, 605.82993,
8630.90537, 0, 1581.627411, 2021.417672, 0, 28167.96098, 1222.06059,
409.939999, 9957.808578, 0, 705.67002, 1168.074171, 0, 31991.812556,
1061.337995)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-229L))
I am attempting to make a function and have ben reading this dplyr post on how to make functions.
My attempt:
metricTables <- function(df, dim, metric) {
dim <- enquo(dim)
metric <- enquo(metric)
df %>% mutate(date = ordered(
format(date, "%d-%b"),
levels = format(sort(unique(date)), "%d-%b")
)) %>%
group_by(date, !! dim) %>%
summarise(metric = sum(!! metric)) %>% # here I would like the name of the sum(metric) to be the same string that was provided for metric whent he function was called
gather(Key, value, -!! metric, -date) %>%
spread(date, Value) %>%
select(-Key) %>%
replace(is.na(.), 0) %>%
mutate_at(vars(-!! dim), round, 0) %>%
mutate_at(vars(-!! dim), scales::comma)
}
When I use this code outside of a function, it works as expected, e.g.:
# works
# sessions table
sessionsTable <- dat.trended %>%
mutate(date = ordered(
format(date, "%d-%b"),
levels = format(sort(unique(date)), "%d-%b")
)) %>%
group_by(date, Channel) %>%
summarise(Sessions = sum(Sessions)) %>%
gather(Key, Value, -Channel, -date) %>%
spread(date, Value) %>%
select(-Key) %>%
replace(is.na(.), 0) %>%
mutate_at(vars(-Channel), round, 0) %>%
mutate_at(vars(-Channel), scales::comma)
However when running as a function I get an error:
> metricTables(dat.trended, Channel, Sessions)
Error in ~Sessions : object 'Sessions' not found
dat.trended is the data frame I provided above.
Expected output:
sessionsTable <- dat.trended %>%
mutate(date = ordered(
format(date, "%d-%b"),
levels = format(sort(unique(date)), "%d-%b")
)) %>%
group_by(date, Channel) %>%
summarise(Sessions = sum(Sessions)) %>%
gather(Key, Value, -Channel, -date) %>%
spread(date, Value) %>%
select(-Key) %>%
replace(is.na(.), 0) %>%
mutate_at(vars(-Channel), round, 0) %>%
mutate_at(vars(-Channel), scales::comma)
> glimpse(sessionsTable)
Observations: 9
Variables: 30
$ Channel <chr> "(Other)", "Affiliates", "Direct", "Display", "Email", "Organic Search", "Paid Search", "Referral", …
$ `30-Nov` <chr> "5,662.0", "0.0", "140,831.0", "1.0", "964.0", "41,266.0", "769.0", "28,090.0", "48,214.0"
$ `01-Dec` <chr> "6,258.0", "0.0", "157,435.0", "1.0", "976.0", "48,872.0", "850.0", "32,734.0", "58,368.0"
$ `02-Dec` <chr> "8,263", "0", "206,339", "0", "1,616", "56,362", "936", "41,013", "60,134"
$ `03-Dec` <chr> "8,475", "0", "189,989", "0", "2,003", "52,984", "952", "36,599", "53,858"
$ `04-Dec` <chr> "8,271", "0", "198,848", "0", "1,335", "50,233", "955", "33,893", "50,887"
$ `05-Dec` <chr> "7,693", "0", "191,770", "0", "1,370", "49,736", "864", "34,287", "47,590"
$ `06-Dec` <chr> "6,901", "0", "172,475", "0", "1,003", "45,772", "800", "31,295", "44,571"
$ `07-Dec` <chr> "5,704.0", "0.0", "148,088.0", "1.0", "656.0", "42,294.0", "822.0", "28,452.0", "45,226.0"
$ `08-Dec` <chr> "6,607.0", "0.0", "165,534.0", "2.0", "801.0", "49,337.0", "858.0", "32,962.0", "56,008.0"
$ `09-Dec` <chr> "8,686.0", "1.0", "205,329.0", "0.0", "1,066.0", "56,373.0", "950.0", "41,401.0", "61,180.0"
$ `10-Dec` <chr> "8,974.0", "0.0", "197,191.0", "1.0", "1,116.0", "52,376.0", "887.0", "37,316.0", "54,398.0"
$ `11-Dec` <chr> "8,328", "0", "186,861", "10", "1,282", "48,913", "817", "34,246", "50,714"
$ `12-Dec` <chr> "8,269.0", "0.0", "180,251.0", "9.0", "1,521.0", "46,949.0", "810.0", "33,109.0", "45,453.0"
$ `13-Dec` <chr> "7,272", "0", "167,272", "11", "836", "43,257", "806", "29,710", "41,932"
$ `14-Dec` <chr> "5,728.0", "0.0", "139,022.0", "6.0", "603.0", "39,619.0", "787.0", "26,441.0", "43,352.0"
$ `15-Dec` <chr> "5,900.0", "0.0", "155,182.0", "7.0", "683.0", "44,232.0", "853.0", "30,398.0", "49,342.0"
$ `16-Dec` <chr> "10,388.0", "0.0", "217,068.0", "4.0", "1,049.0", "54,970.0", "850.0", "40,829.0", "53,278.0"
$ `17-Dec` <chr> "8,685.0", "1.0", "182,160.0", "10.0", "882.0", "47,799.0", "826.0", "33,132.0", "45,484.0"
$ `18-Dec` <chr> "7,794.0", "1.0", "170,454.0", "10.0", "1,033.0", "43,464.0", "726.0", "31,096.0", "40,310.0"
$ `19-Dec` <chr> "6,677.0", "0.0", "163,956.0", "8.0", "851.0", "40,376.0", "753.0", "27,458.0", "37,682.0"
$ `20-Dec` <chr> "6,324.0", "1.0", "159,839.0", "10.0", "1,011.0", "36,577.0", "685.0", "25,788.0", "36,395.0"
$ `21-Dec` <chr> "5,010.0", "0.0", "127,954.0", "8.0", "499.0", "33,887.0", "690.0", "21,511.0", "35,205.0"
$ `22-Dec` <chr> "4,752.0", "0.0", "121,747.0", "1.0", "460.0", "35,583.0", "725.0", "22,589.0", "37,848.0"
$ `23-Dec` <chr> "5,539", "0", "137,528", "0", "1,381", "36,278", "728", "24,421", "37,487"
$ `24-Dec` <chr> "4,505.0", "0.0", "109,400.0", "1.0", "692.0", "27,551.0", "630.0", "17,949.0", "28,790.0"
$ `25-Dec` <chr> "3,074.0", "0.0", "94,643.0", "2.0", "395.0", "24,511.0", "633.0", "14,542.0", "23,327.0"
$ `26-Dec` <chr> "6,888.0", "0.0", "222,318.0", "2.0", "888.0", "63,947.0", "1,486.0", "37,906.0", "57,280.0"
$ `27-Dec` <chr> "3,092.0", "0.0", "122,398.0", "3.0", "407.0", "33,506.0", "768.0", "19,062.0", "30,477.0"
$ `28-Dec` <chr> "2,625.0", "0.0", "112,959.0", "3.0", "375.0", "31,670.0", "812.0", "17,500.0", "31,946.0"
How can I get my function metricTables() to correctly evaluate?
Here, we can convert to string for 'metric' and use :=
library(dplyr)
library(tidyr)
metricTables <- function(df, dim, metric) {
dim <- enquo(dim)
metric <- enquo(metric)
df %>% mutate(date = ordered(
format(date, "%d-%b"),
levels = format(sort(unique(date)), "%d-%b")
)) %>%
group_by(date, !! dim) %>%
summarise(!! rlang::as_name(metric) := sum(!! metric)) %>%
pivot_longer(cols = -c(date, !!dim), names_to = 'Key', values_to = 'value') %>%
pivot_wider(names_from = date, values_from = value) %>%
select(-Key) %>%
replace(is.na(.), 0) %>%
mutate_at(vars(- !!rlang::as_name(dim)), round, 0) %>%
mutate_at(vars(- !!rlang::as_name(dim)), scales::comma)
}
metricTables(dat.trended, Channel, Sessions)
Can also make use of curly-curly operator ({{...}})
metricTables <- function(df, dim, metric) {
df %>%
mutate(date = ordered(
format(date, "%d-%b"),
levels = format(sort(unique(date)), "%d-%b")
)) %>%
group_by(date, {{dim}}) %>%
summarise(!! rlang::ensym(metric) := sum({{metric}})) %>%
pivot_longer(cols = -c(date, {{dim}}), names_to = 'Key', values_to = 'value') %>%
pivot_wider(names_from = date, values_from = value) %>%
select(-Key) %>%
replace(is.na(.), 0) %>%
mutate_at(vars(- !!rlang::ensym(dim)), round, 0) %>%
mutate_at(vars(- !!rlang::ensym(dim)), scales::comma)
}
metricTables(dat.trended, Channel, Sessions)

use if() to use select() within a dplyr pipe chain

Read these two posts already:
can dplyr package be used for conditional mutating?
R Conditional evaluation when using the pipe operator %>%
I'm using Shiny input$selector and if the user has selected a particular value, I want my dataframe to be different than otherwise.
Here's a chain:
filtered_funnel <- reactive({
lastmonth_funnel %>%
filter(input$channel == "All" | Channel == input$channel) %>%
filter(input$promo == "All" | Promo == input$promo) %>%
## HERE IS WHERE I'M STRUGGLING
{if(input$promo != "none") select(., c("Channel", "Promo", "ShippingDetails", "Checkout", "Transactions"))} %>%
gather(Funnel, Sessions, -Channel, -Promo) %>%
group_by(Channel, Promo, Funnel) %>%
summarise(Sessions = sum(Sessions))
})
If the user input does not equal "none" I would like to select variables "Channel", "Promo", "ShippingDetails", "Checkout" and "Transactions".
I tried a few variations of the problem line above but kept getting errors:
When I tried this within the pipe chain
{if(input$promo != "none") select(., c("Channel", "Promo", "ShippingDetails", "Checkout", "Transactions"))} %>%
I received this error:
Warning: Error in : All select() inputs must resolve to integer column
positions. The following do not:
* c("Channel", "Promo", "ShippingDetails", "Checkout", "Transactions")
I also tried:
{if(input$promo != "none") select(., c(Channel, Promo, ShippingDetails, Checkout:Transactions))} %>%
This actually runs till I select "none" in the input, in which case I get
Error in : is.character(x) is not TRUE
I got the same error when I tried this:
{ifelse(input$promo != "none", select(., c(Channel, Promo, ShippingDetails, Checkout:Transactions)), .)} %>%
How can I nest in a dplyr pipe chain a select statement that says if input$promo != "none" then select Channel, Promo, ShippingDetails, Checkout:Transactions from the passed object in the pipe?
-- Here's dput of the randomly generated data--
> dput(lastmonth_funnel)
structure(list(Channel = c("Facebook", "Youtube", "SEM", "Organic",
"Direct", "Email", "Facebook", "Youtube", "SEM", "Organic", "Direct",
"Email", "Facebook", "Youtube", "SEM", "Organic", "Direct", "Email",
"Facebook", "Youtube", "SEM", "Organic", "Direct", "Email", "Facebook",
"Youtube", "SEM", "Organic", "Direct", "Email"), Promo = c("none",
"none", "none", "none", "none", "none", "banannas", "banannas",
"banannas", "banannas", "banannas", "banannas", "carrots", "carrots",
"carrots", "carrots", "carrots", "carrots", "pears", "pears",
"pears", "pears", "pears", "pears", "apples", "apples", "apples",
"apples", "apples", "apples"), Sessions = c(6587, 3015, 6316,
11219, 8117, 6473, 12464, 14032, 14318, 17535, 16219, 7838, 10685,
12040, 19907, 13694, 6187, 16784, 21425, 18890, 24891, 16251,
16977, 25206, 28573, 18704, 29178, 22069, 39687, 53734), AddToCart = c(279,
4955, 5636, 8991, 15530, 18374, 9431, 5980, 4852, 5412, 4114,
1782, 370, 3208, 6311, 9760, 7428, 6792, 3500, 5446, 1507, 783,
2032, 833, 397, 2760, 5784, 9810, 13274, 14470), Registrations = c(194,
3210, 3573, 6067, 10305, 12653, 6564, 3874, 3076, 3652, 2730,
1227, 257, 2078, 4001, 6586, 4929, 4677, 2436, 3528, 955, 528,
1348, 573, 276, 1788, 3667, 6620, 8808, 9964), ShippingDetails = c(134,
2235, 2593, 4266, 7408, 9244, 4557, 2698, 2232, 2568, 1962, 896,
178, 1447, 2904, 4631, 3543, 3417, 1691, 2457, 693, 371, 969,
418, 191, 1245, 2661, 4655, 6332, 7280), Checkout = c(90, 1436,
1792, 2864, 4672, 5666, 3078, 1734, 1543, 1724, 1237, 549, 120,
930, 2007, 3109, 2234, 2094, 1142, 1579, 479, 249, 611, 256,
129, 800, 1839, 3125, 3993, 4462), Transactions = c(59, 937,
1192, 1819, 2602, 2926, 2039, 1132, 1026, 1095, 689, 283, 79,
607, 1335, 1975, 1244, 1081, 756, 1031, 318, 158, 340, 132, 85,
522, 1223, 1985, 2224, 2304)), class = "data.frame", row.names = c(NA,
-30L), .Names = c("Channel", "Promo", "Sessions", "AddToCart",
"Registrations", "ShippingDetails", "Checkout", "Transactions"
))
You need to make sure that your statement between { returns a data.frame regardless of the condition. So you need an else ..
cond <- FALSE
mtcars %>%
group_by(cyl) %>%
{ if (cond) filter(., am == 1) else . } %>%
summarise(m = mean(wt))
Works fine with TRUE or FALSE.
(Also note that a simple example like this really makes the question a lot more easy to grasp.)

Resources