Plot a simple conversion funnel in ggplot - r

I have a simple dataframe that looks like this:
df
steps numbers rate
1 clicks 332835 100.000000
2 signup 157697 47.379933
3 cart 29866 8.973215
4 buys 17012 5.111241
How can I plot a simple conversion funnel instead of a barchart?

If you must do the funnel thing, it's just a variation on bar chart:
library(ggplot2)
library(reshape2) # for melt()
# get data
dat <- read.table(text=
"steps numbers rate
clicks 332835 100.000000
signup 157697 47.379933
cart 29866 8.973215
buys 17012 5.111241",
header = T)
# add spacing, melt, sort
total <- subset(dat, rate==100)$numbers
dat$padding <- (total - dat$numbers) / 2
molten <- melt(dat[, -3], id.var='steps')
molten <- molten[order(molten$variable, decreasing = T), ]
molten$steps <- factor(molten$steps, levels = rev(dat$steps))
ggplot(molten, aes(x=steps)) +
geom_bar(aes(y = value, fill = variable),
stat='identity', position='stack') +
geom_text(data=dat,
aes(y=total/2, label= paste(round(rate), '%')),
color='white') +
scale_fill_manual(values = c('grey40', NA) ) +
coord_flip() +
theme(legend.position = 'none') +
labs(x='stage', y='volume')
That said, there's no real point in a "funnel chart" - the same information can be presented in a plain bar chart with less fuss:
# get data
dat <- read.table(text=
"steps numbers rate
clicks 332835 100.000000
signup 157697 47.379933
cart 29866 8.973215
buys 17012 5.111241",
header = T)
# order x axis
dat$steps <- factor(dat$steps, levels = dat$steps)
# plot
ggplot(dat, aes(x=steps, y=numbers)) +
geom_bar(stat='identity') +
geom_text(aes(label = paste(round(rate), '%')), vjust=-0.5

Alternatively, you can do a simple funnel in highcharts. My dataframe looks like this:
# data is a df called check_stage
check_stage
# A tibble: 9 × 4
stage_name count x percent
<ord> <int> <chr> <dbl>
1 Opportunity Disqualified 805 1 13.5
2 Qualifying 5138 2 86.5
3 Evaluation 1773 3 29.8
4 Meeting Scheduled 4104 4 69.1
5 Quoted 4976 5 83.7
6 Order Submitted 1673 6 28.2
7 Closed Won 1413 7 23.8
8 Closed Lost 957 8 16.1
9 Nurture 1222 9 20.6
library(highcharter)
# make a funnel plot of stage
options(highcharter.theme = hc_theme_smpl(tooltip = list(valueDecimals = 1,
pointFormat = "{point.y}%")))
hc <- check_stage %>%
hchart(
"funnel", hcaes(x = stage_name, y = percent), name = "Proportion of Leads")
hc

Related

Plot part of a data using ggplot, in R

I want to create a chart, using ggplot, relating the variables "var_share" (in the y-axis) and "cbo" (in the x-axis), but by three time periods: 1996-2002, 2002-2008 and 2008-2012. Also, I want to calculate the "cbo" variable, by percentile. Here is my dataset:
ano cbo ocupado quant total share var_share
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1996 20 1 32 39675 0.0807 -0.343
2 1997 20 1 52 41481 0.125 0.554
3 1998 20 1 34 40819 0.0833 -0.336
4 1999 20 1 44 41792 0.105 0.264
5 2001 20 1 57 49741 0.115 0.0884
6 1996 21 1 253 39675 0.638 -0.0326
You can download the full dataset here.
The result is almost like this:
I believe this is what you are looking for. After reading your data in, a new variable called ano2 is build and after that a new DF which contains the bins called new you have defined.
The first plot then builds on this DF and uses stat_summary.
You also said something about the quantiles. I am not sure what exactly you have meant, but I grouped over this new variable and used technique from purrr to calculate the desired quantiles.
library(tidyverse)
df <- ocupacoes
df$ano2 <- readr::parse_date(paste0('01-01-', df$ano), '%d-%m-%Y')
ddf <- df %>%
mutate(new = case_when(
lubridate::year(ano2) %in% 1996:2002 ~ '96-02',
lubridate::year(ano2) %in% 2003:2008 ~ '02-08',
lubridate::year(ano2) %in% 2009:2012 ~ '08-12'
))
ggplot(ddf,aes(x = new, y = var_share, color = new,)) +
stat_summary(fun = mean, colour = "red", size = 1) +
scale_x_discrete(limits = c('96-02', '02-08', '08-12'))
# I think you were also looking for quantiles of cbo
ddf %>%
group_by(new) %>%
group_modify(~ {
quantile(.x$cbo, probs = seq(0,1, by = .2)) %>%
tibble::enframe(name = "prob", value = "quantile")
}) %>%
ggplot(aes(x = prob, quantile, color = new, group = new)) +
geom_line() +
scale_x_discrete(limits = c('0%', '20%' ,
'40%', '60%',
'80%' , '100%'))

Need to make a 2x2 ggplot in R

I have the following data:
unigrams Freq
1 the 236133
2 to 154296
3 and 128165
4 a 127434
5 i 124599
6 of 103380
7 in 81985
8 you 69504
9 is 65243
10 for 62425
11 it 60298
12 that 58605
13 on 45935
14 my 45424
15 with 38270
16 this 34799
17 was 33009
18 be 32725
19 have 31728
20 at 30255
and this set of data:
bigrams Freq
1 of the 20707
2 in the 19443
3 for the 11090
4 to the 10939
5 on the 10280
6 to be 9555
7 at the 7184
8 i have 6408
9 and the 6387
10 i was 6143
11 is a 6114
12 and i 5993
13 i am 5843
14 in a 5770
15 it was 5644
16 for a 5343
17 if you 5326
18 it is 5196
19 with the 5092
20 have a 4936
I would like to place two qplots together side-by-side, ncol = 2. I tried the gridExtra library, but it is generating errors that I can't seem to figure out how to correct. Any ideas on how to do this, please?
library(gridExtra)
# The 20 most unigrams in the dataset
ugrams <- as.data.frame(unigrams)
graph.data <- ugrams[order(ugrams$Freq, decreasing = T), ]
graph.data <- graph.data[1:20, ]
p1 <- qplot(unigrams,Freq, data=graph.data,fill=unigrams,geom=c("histogram"))
# The 20 most bigrams in the dataset
bgrams <- as.data.frame(bigrams)
graph.data <- bgrams[order(bgrams$Freq, decreasing = T), ]
graph.data <- graph.data[1:20, ]
p2 <- qplot(bigrams,Freq, data=graph.data,fill=bigrams,geom=c("histogram"))
grid.arrange(p1,p2,ncol=2)
This is the error that is generated:
<error/rlang_error>
stat_bin() can only have an x or y aesthetic.
Backtrace:
1. (function (x, ...) ...
2. ggplot2:::print.ggplot(x)
4. ggplot2:::ggplot_build.ggplot(x)
5. ggplot2:::by_layer(function(l, d) l$compute_statistic(d, layout))
6. ggplot2:::f(l = layers[[i]], d = data[[i]])
7. l$compute_statistic(d, layout)
8. ggplot2:::f(..., self = self)
9. self$stat$setup_params(data, self$stat_params)
10. ggplot2:::f(...)
I would like to have the graphs resemble this one:
Which was accomplished by the following code:
# The 20 most quadgrams in the dataset
qgrams <- as.data.frame(quadgrams)
graph.data <- qgrams[order(qgrams$Freq, decreasing = T), ]
graph.data <- graph.data[1:20, ]
ggplot(data=graph.data, aes(x=quadgrams, y=Freq, fill=quadgrams)) + geom_bar(stat="identity") +
theme(axis.text.x = element_text(angle = 40, hjust = 1))
Is that possible
Edited for your shift from histograms to bar plots. Assuming that graph.data is actually your ugrams dataset, the working single plot is
Putting them side-by-side can be done with facets:
dplyr::bind_rows(
unigrams = select(ugrams, grams = unigrams, Freq),
bigrams = select(bigrams, grams = bigrams, Freq),
.id = "id") %>%
arrange(-Freq) %>%
mutate(
id = factor(id, levels = c("unigrams", "bigrams")),
grams = factor(grams, levels = grams)
) %>%
ggplot(aes(x = grams, y = Freq, fill = grams)) +
facet_wrap(~ id, ncol = 2, scales = "free_x") +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 40, hjust = 1))
(Obviously, these are "too small" to hold all of the legend, but that depends on where you are using it. I wonder if the legend shouldn't be included, since it is somewhat redundant with the x-axis labels.)
The y-axis on the left is harder to see because it is dwarfed by the unigrams on the right. While it does bias the plot (it might be natural to compare the vertical levels of the plot on the left with those on the right), you can alleviate that by freeing both the "x" (already free) and "y" axes with scales="free":

Dynamic Re-sizing Flexdashboard/Plotly Image to a Different Graphic

I'm building a dynamic flexdashboard with plotly and I was wondering if there was a way to dynamically resize my dashboard. For example, I have created plots of subjects being tested over time. When I shrink the page down, what I'd like is for it to dynamically adjust to a time-series plot of the average for the group at each test day.
My data looks like this:
library(flexdashboard)
library(knitr)
library(tidyverse)
library(plotly)
subject <- rep(c("A", "B", "C"), each = 8)
testDay <- rep(1:8, times = 3)
variable1 <- rnorm(n = length(subject), mean = 30, sd = 10)
variable2 <- rnorm(n = length(subject), mean = 15, sd = 3)
df <- data.frame(subject, testDay, variable1, variable2)
subject testDay variable1 variable2
1 A 1 21.816831 8.575000
2 A 2 14.947327 17.387903
3 A 3 18.014435 16.734653
4 A 4 33.100524 11.381793
5 A 5 37.105911 13.862776
6 A 6 32.181317 10.722458
7 A 7 41.107293 9.176348
8 A 8 36.674051 17.114815
9 B 1 33.710838 17.508234
10 B 2 23.788428 13.903532
11 B 3 42.846120 17.032208
12 B 4 9.785957 15.275293
13 B 5 32.551619 21.172497
14 B 6 36.912465 18.694263
15 B 7 40.061797 13.759541
16 B 8 41.094825 15.472144
17 C 1 27.663408 17.949291
18 C 2 31.263966 11.546486
19 C 3 39.734050 19.831854
20 C 4 25.461309 19.239821
21 C 5 22.128139 10.837672
22 C 6 31.234339 16.976004
23 C 7 46.273664 19.255745
24 C 8 27.057218 21.086204
My plotly code looks like this (a graph of each subject over time):
Dynamic Chart
===========================
Row
-----------------------------------------------------------------------
```{r}
p1 <- df %>%
ggplot(aes(x = as.factor(testDay), y = variable1, color = subject, group = 1)) +
geom_line() +
theme_bw() +
ggtitle("Variable 1")
ggplotly(p1)
```
```{r}
p2 <- df %>%
ggplot(aes(x = as.factor(testDay), y = variable2, color = subject, group = 1)) +
geom_line() +
theme_bw() +
ggtitle("Variable 2")
ggplotly(p2)
```
Is there a way that when I shrink the website down these plots can dynamically change to a group average plot, like this:
p1_avg <- df %>%
ggplot(aes(x = as.factor(testDay), y = variable1, group = 1)) +
stat_summary(fun.y = "mean", geom = "line") +
theme_bw() +
ggtitle("Variable 1 Avg")
ggplotly(p1_avg)
p2_avg <- df %>%
ggplot(aes(x = as.factor(testDay), y = variable2, group = 1)) +
stat_summary(fun.y = "mean", geom = "line") +
theme_bw() +
ggtitle("Variable 2 Avg")
ggplotly(p2_avg)
You can put your plotly object inside the plotly function renderPlotly() for dynamically resizing to the page. See an example how I used the function in this blog post:
https://medium.com/analytics-vidhya/shiny-dashboards-with-flexdashboard-e66aaafac1f2

How to Make animated ggplot for different years using gganimate

So I have a simple data frame where the first column includes roadway IDs and the next 10 columns have traffic volumes on each roadway ID over 10 years.
I have been trying to come up with a code to display roadway ID on X axis and Traffic volume on Y axis. Then animate the graph over multiple years (Traffic volumes on the Y axis change). Here is a sample of my data frame:
Could anyone suggest a piece of code to do it? Here is a code that I have written but doesn't really work. I know this may be very wrong, but I am very new to gganimate and not sure how I can get different functions to work. Any help is appreciated.
year <- c(2001,2002,2003,2004,2005,2006,2007,2008,2009,2010)
p1 <- ggplot(data = Data) +
geom_point(aes(x = Data$LinkIDs, y=Data$Year2001Traffic)) +
geom_point(aes(x = Data$LinkIDs, y=Data$Year2002Traffic)) +
geom_point(aes(x = Data$LinkIDs, y=Data$Year2003Traffic)) +
geom_point(aes(x = Data$LinkIDs, y=Data$Year2004Traffic)) +
geom_point(aes(x = Data$LinkIDs, y=Data$Year2005Traffic)) +
geom_point(aes(x = Data$LinkIDs, y=Data$Year2006Traffic)) +
geom_point(aes(x = Data$LinkIDs, y=Data$Year2007Traffic)) +
geom_point(aes(x = Data$LinkIDs, y=Data$Year2008Traffic)) +
geom_point(aes(x = Data$LinkIDs, y=Data$Year2009Traffic)) +
geom_point(aes(x = Data$LinkIDs, y=Data$Year2010Traffic)) +
labs(title = 'Year: {frame_time}', x = 'Link ID', y = 'Traffic Volume') +
transition_time(year)
animate(p1)
Most of the work lies in changing the data before you send it to ggplot and gganimate. To help you with that work, I have created some sample data based on your picture (in the future please supply sample data yourself).
library(tidyverse)
library(gganimate)
df <- tribble(
~LinkIDs, ~Year2001Traffic, ~Year2002Traffic, ~Year2003Traffic,
"A", 1, 10, 15,
"B", 3, 1, 10,
"C", 10, 5, 1)
df
# A tibble: 3 x 4
LinkIDs Year2001Traffic Year2002Traffic Year2003Traffic
<chr> <dbl> <dbl> <dbl>
1 A 1 10 15
2 B 3 1 10
3 C 10 5 1
gganimate and ggplot work best with data in long format. So the first step is to change the data from wide to long before sending it to ggplot.
df <- df %>% gather(Year, Traffic, -LinkIDs)
df
# A tibble: 9 x 3
LinkIDs Year Traffic
<chr> <chr> <dbl>
1 A Year2001Traffic 1
2 B Year2001Traffic 3
3 C Year2001Traffic 10
4 A Year2002Traffic 10
5 B Year2002Traffic 1
6 C Year2002Traffic 5
7 A Year2003Traffic 15
8 B Year2003Traffic 10
9 C Year2003Traffic 1
gganimate needs the Year column to be a number before it can use it for animation. So we need to extract the numbers that are contained in the values.
df <- df %>% mutate(
Year = parse_number(Year))
df
# A tibble: 9 x 3
LinkIDs Year Traffic
<chr> <dbl> <dbl>
1 A 2001 1
2 B 2001 3
3 C 2001 10
4 A 2002 10
5 B 2002 1
6 C 2002 5
7 A 2003 15
8 B 2003 10
9 C 2003 1
Now the rest is straightforward. Just the plot the data, and use the year variable for the animation argument.
p1 <- ggplot(df, aes(x = LinkIDs, y = Traffic))+
geom_point()+
labs(title = 'Year: {frame_time}', x = 'Link ID', y = 'Traffic Volume')+
transition_time(Year)
animate(p1)
_________________________ EDIT AFTER UPDATED COMMENTS_______
Request in comments:
"I just want it to go through the timeline (from 2001 to 2003) just
once and then stop at 2003."
In case you want to stop at the year 2003, you would need to filter the data before you send it to ggplot - this is done via the filter command.
As of 23/3 2019, the is, as far as I know, no way to go through the animation just once. You can alter the end_pause argument in order to insert a pause after each iteration of the animation (I changed geom_point() to geom_col() given your description).
p2 <- df %>%
#keep only observations from the year 2003 and earlier
filter(Year <= 2003) %>%
#Send the data to plot
ggplot(aes(x = LinkIDs, y = Traffic, fill = LinkIDs))+
geom_col()+
labs(title = 'Year: {frame_time}', x = 'Link ID', y = 'Traffic Volume')+
transition_time(Year)
animate(p2, fps = 20, duration = 25, end_pause = 95)

How to prepare data for ggplot2 and stack bar chart with negative values?

Given the following dataset:
Output<- read.table(text = "Type 2012-06-30' 2012-09-30
1 Market 2 3
2 Geography 3 -2
3 Industry -1 5 ",header = TRUE,sep = "",row.names = 1)
I'm trying to prepare the data in order to use the ggplot2 package and create a stacked bar chart with negative values. Here's the basic chart sequence I'm using:
Output$row <- seq_len(nrow(Output))
dat2 <- melt(Output, id.vars = "row")
But this gives me:
dat2
row variable value
1 1 Type Market
2 2 Type Geography
3 3 Type Industry
4 1 X2012.06.30. 2
5 2 X2012.06.30. 3
6 3 X2012.06.30. -1
7 1 X2012.09.30 3
8 2 X2012.09.30 -2
9 3 X2012.09.30 5
Ideally in the 'row' column instead of numbers I would have Market io 1, Geography io 2, Industry io 3 so that I fill my bar chart with the different (Market, Geography, Industry) categories and not 1-2-3.Also the rows 1 to 3 in dat2 should be dropped since they dont correspond to a quarter data. Thank you!
dat1 <- subset(dat2,value >= 0)
dat3 <- subset(dat2,value < 0)
ggplot() +
geom_bar(data = dat1, aes(x=variable, y=value, fill=row),stat = "identity") +
geom_bar(data = dat3, aes(x=variable, y=value, fill=row),stat = "identity") +
scale_fill_brewer(type = "seq", palette = 1)
I had a go at the below, but I am quite confused by your question in bold. The odd formatting of your data seemed caused by using id.vars = "row", but please clarify if need be.
Output<- read.table(text = "Type 2012-06-30' 2012-09-30
1 Market 2 3
2 Geography 3 -2
3 Industry -1 5 ",header = TRUE,sep = "",row.names = 1)
melt(Output)
dat2 <- melt(Output)
dat1 <- subset(dat2,value >= 0)
dat3 <- subset(dat2,value < 0)
ggplot() +
geom_bar(data = dat1, aes(x=variable, y=value, fill=Type),stat = "identity") +
geom_bar(data = dat3, aes(x=variable, y=value, fill=Type),stat = "identity") +
scale_fill_brewer(type = "seq", palette = 1)

Resources