programmatically setting individual axis limits in facets - r

I need help on setting the individual x-axis limits on different facets as described below.
A programmatical approach is preferred since I will apply the same template to different data sets.
first two facets will have the same x-axis limits (to have comparable bars)
the last facet's (performance) limits will be between 0 and 1, since it is calculated as a percentage
I have seen this and some other related questions but couldn't apply it to my data.
Thanks in advance.
df <-
data.frame(
call_reason = c("a","b","c","d"),
all_records = c(100,200,300,400),
problematic_records = c(80,60,100,80))
df <- df %>% mutate(performance = round(problematic_records/all_records, 2))
df
call_reason all_records problematic_records performance
a 100 80 0.80
b 200 60 0.30
c 300 100 0.33
d 400 80 0.20
df %>%
gather(key = facet_group, value = value, -call_reason) %>%
mutate(facet_group = factor(facet_group,
levels=c('all_records','problematic_records','performance'))) %>%
ggplot(aes(x=call_reason, y=value)) +
geom_bar(stat="identity") +
coord_flip() +
facet_grid(. ~ facet_group)

So here is one way to go about it with facet_grid(scales = "free_x"), in combination with a geom_blank(). Consider df to be your df at the moment before piping it into ggplot.
ggplot(df, aes(x=call_reason, y=value)) +
# geom_col is equivalent to geom_bar(stat = "identity")
geom_col() +
# geom_blank includes data for position scale training, but is not rendered
geom_blank(data = data.frame(
# value for first two facets is max, last facet is 1
value = c(rep(max(df$value), 2), 1),
# dummy category
call_reason = levels(df$call_reason)[1],
# distribute over facets
facet_group = levels(df$facet_group)
)) +
coord_flip() +
# scales are set to "free_x" to have them vary independently
# it doesn't really, since we've set a geom_blank
facet_grid(. ~ facet_group, scales = "free_x")
As long as your column names remain te same, this should work.
EDIT:
To reorder the call_reason variable, you could add the following in your pipe that goes into ggplot:
df %>%
gather(key = facet_group, value = value, -call_reason) %>%
mutate(facet_group = factor(facet_group,
levels=c('all_records','problematic_records','performance')),
# In particular the following bit:
call_reason = factor(call_reason, levels(call_reason)[order(value[facet_group == "performance"])]))

Related

Horizontal Group Bar Chart - How to scale to 100% and how to specify the order of the layers

So I have the following code which produces:
The issue here is twofold:
The group bar chart automatically places the highest value on the top (i.e. for avenue 4 CTP is on top), whereas I would always want FTP to be shown first then CTP to be shown after (so always blue bar then red bar)
I need all of the values to scale to 100 or 100% for their respective group (so for CTP avenue 4 would have a huge bar graph but the other avenues should be extremely tiny)
I am new to 'R'/Stack overflow so sorry if anything is wrong/you need more but any help is greatly appreciated.
library(ggplot2)
library(tidyverse)
library(magrittr)
# function to specify decimals
specify_decimal <- function(x, k) trimws(format(round(x, k), nsmall=k))
# sample data
avenues <- c("Avenue1", "Avenue2", "Avenue3", "Avenue4")
flytip_amount <- c(1000, 2000, 1500, 250)
collection_amount <- c(5, 15, 10, 2000)
# create data frame from the sample data
df <- data.frame(avenues, flytip_amount, collection_amount)
# got it working - now to test
df3 <- df
SumFA <- sum(df3$flytip_amount)
df3$FTP <- (df3$flytip_amount/SumFA)*100
df3$FTP <- specify_decimal(df3$FTP, 1)
SumCA <- sum(df3$collection_amount)
df3$CTP <- (df3$collection_amount/SumCA)*100
df3$CTP <- specify_decimal(df3$CTP, 1)
# Now we have percentages remove whole values
df2 <- df3[,c(1,4,5)]
df2 <- df2 %>% pivot_longer(-avenues)
FTGraphPos <- df2$name
ggplot(df2, aes(x = avenues, fill = as.factor(name), y = value)) +
geom_col(position = "dodge", width = 0.75) + coord_flip() +
labs(title = "Flytipping & Collection %", x = "ward_name", y = "Percentageperward") +
geom_text(aes(x= avenues, label = value), vjust = -0.1, position = "identity", size = 5)
I have tried the above and I have looked at lots of tutorials but nothing is exactly precise to what I need of ensuring the group bar charts puts the layers in the same order despite amount and scaling to 100/100%
As Camille notes, to handle ordering of the categories in a plot, you need to set them as factors, and then use functions from the forcats package to handle the order. Here I am using fct_relevel() (note that it will automatically convert character variables to factors).
Your numeric values are in fact set to character, so they need to be set to numeric for the chart to make sense.
To cover point #2, I'm using group_by() to calculate percentages within each name.
I have also fixed the labels so that they are properly dodged along with the bar chart. Also, note that you don't need to call ggplot2 or magrittr if you are calling tidyverse - those packages come along with it already.
df_plot <- df2 |>
mutate(name = fct_relevel(name, "CTP"),
value = as.numeric(value)) |>
group_by(name) |>
mutate(perc = value / sum(value)) |>
ungroup()
ggplot(df_plot, aes(x = value, y = avenues, fill = name)) +
geom_col(position = "dodge", width = 0.75) +
geom_text(aes(label = value), position = position_dodge(width = 0.75), size = 5) +
labs(title = "Flytipping & Collection %", x = "Percentageperward", y = "ward_name") +
guides(fill = guide_legend(reverse = TRUE))

How to make a dual axis in ggplot R

I have made a time series plot for total count data of 4 different species. As you can see the results with sharksucker have a much higher count than the other 3 species. To see the trends of the other 3 species they need to plotted separately (or on a smaller y axis). However, I have a figure limit in my masters paper. So, I was trying to create a dual axis plot or have the y axis split into two. Does anyone know of a way I could do this?
library(tidyverse)
library(reshape2)
dat <- read_xlsx("ReefPA.xlsx")
dat1 <- dat
dat1$Date <- format(dat1$Date, "%Y/%m")
plot_dat <- dat1 %>%
group_by(Date) %>%
summarise(Sharksucker_Remora = sum(Sharksucker_Remora)) %>%
melt("Date") %>%
filter(Date > '2018-01-01') %>%
arrange(Date)
names(plot_dat) <- c("Date", "Species", "Count")
ggplot(data = plot_dat) +
geom_line(mapping = aes(x = Date, y = Count, group = Species, colour = Species)) +
stat_smooth(method=lm, aes(x = Date, y = Count, group = Species, colour = Species)) +
scale_colour_manual(values=c(Golden_Trevally="goldenrod2", Red_Snapper="firebrick2", Sharksucker_Remora="darkolivegreen3", Juvenile_Remora="aquamarine2")) +
xlab("Date") +
ylab("Total Presence Per Month") +
theme(legend.title = element_blank()) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
The thing is, the problem you're trying to solve doesn't seem like a 2nd Y axis issue. The problem here is of relative scale of the species. You might want to think of something like standardizing the initial species presence to 100 and showing growth or decline from there.
Another option would be faceting by species.

geom_bar overlapping labels

for simplicity lets suppose we have a database like
# A
1 1
2 2
3 2
4 2
5 3
We have a categorical variable "A" with 3 possible values (1,2,3). And im tring this code:
ggplot(df aes(x="", y=df$A, fill=A))+
geom_bar(width = 1, stat = "identity")
The problem is that the labels are overlapping. Also i want to change the labes for 1,2,3 to x,y,z.
Here is picture of what is happening
And here is a link for the actual data that im using.
https://a.uguu.se/anKhhyEv5b7W_Data.csv
Your graph does not correspond to the sample of data you are showing, so it is hard to be sure that the structure of your real data is actually the same.
Using a random example, I get the following plot:
df <- data.frame(A = sample(1:3,20, replace = TRUE))
library(ggplot2)
ggplot(df, aes(x="A", y=A, fill=as.factor(A)))+
geom_bar(width = 1, stat = "identity") +
scale_fill_discrete(labels = c("x","y","z"))
EDIT: Using data provided by the OP
Here using your data, you should get the following plot:
ggplot(df, aes(x = "A",y = A, fill = as.factor(A)))+
geom_col()
Or if you want the count of each individual values of A, you can do:
library(dplyr)
library(ggplot2)
df %>% group_by(A) %>% count() %>%
ggplot(aes(x = "A", y = n, fill = as.factor(A)))+
geom_col()
Is it what you are looking for ?

How to get the plots side by side and that too sorted according to Fill in R Language [duplicate]

I am making a dodged barplot in ggplot2 and one grouping has a zero count that I want to display. I remembered seeing this on HERE a while back and figured the scale_x_discrete(drop=F) would work. It does not appear to work with dodged bars. How can I make the zero counts show?
For instance, (code below) in the plot below, type8~group4 has no examples. I would still like the plot to display the empty space for the zero count instead of eliminating the bar. How can I do this?
mtcars2 <- data.frame(type=factor(mtcars$cyl),
group=factor(mtcars$gear))
m2 <- ggplot(mtcars2, aes(x=type , fill=group))
p2 <- m2 + geom_bar(colour="black", position="dodge") +
scale_x_discrete(drop=F)
p2
Here's how you can do it without making summary tables first.
It did not work in my CRAN versioin (2.2.1) but in the latest development version of ggplot (2.2.1.900) I had no issues.
ggplot(mtcars, aes(factor(cyl), fill = factor(vs))) +
geom_bar(position = position_dodge(preserve = "single"))
http://ggplot2.tidyverse.org/reference/position_dodge.html
Updated geom_bar() needs stat = "identity"
For what it's worth: The table of counts, dat, above contains NA. Sometimes, it is useful to have an explicit 0 instead; for instance, if the next step is to put counts above the bars. The following code does just that, although it's probably no simpler than Joran's. It involves two steps: get a crosstabulation of counts using dcast, then melt the table using melt, followed by ggplot() as usual.
library(ggplot2)
library(reshape2)
mtcars2 = data.frame(type=factor(mtcars$cyl), group=factor(mtcars$gear))
dat = dcast(mtcars2, type ~ group, fun.aggregate = length)
dat.melt = melt(dat, id.vars = "type", measure.vars = c("3", "4", "5"))
dat.melt
ggplot(dat.melt, aes(x = type,y = value, fill = variable)) +
geom_bar(stat = "identity", colour = "black", position = position_dodge(width = .8), width = 0.7) +
ylim(0, 14) +
geom_text(aes(label = value), position = position_dodge(width = .8), vjust = -0.5)
The only way I know of is to pre-compute the counts and add a dummy row:
dat <- rbind(ddply(mtcars2,.(type,group),summarise,count = length(group)),c(8,4,NA))
ggplot(dat,aes(x = type,y = count,fill = group)) +
geom_bar(colour = "black",position = "dodge",stat = "identity")
I thought that using stat_bin(drop = FALSE,geom = "bar",...) instead would work, but apparently it does not.
I asked this same question, but I only wanted to use data.table, as it's a faster solution for much larger data sets. I included notes on the data so that those that are less experienced and want to understand why I did what I did can do so easily. Here is how I manipulated the mtcars data set:
library(data.table)
library(scales)
library(ggplot2)
mtcars <- data.table(mtcars)
mtcars$Cylinders <- as.factor(mtcars$cyl) # Creates new column with data from cyl called Cylinders as a factor. This allows ggplot2 to automatically use the name "Cylinders" and recognize that it's a factor
mtcars$Gears <- as.factor(mtcars$gear) # Just like above, but with gears to Gears
setkey(mtcars, Cylinders, Gears) # Set key for 2 different columns
mtcars <- mtcars[CJ(unique(Cylinders), unique(Gears)), .N, allow.cartesian = TRUE] # Uses CJ to create a completed list of all unique combinations of Cylinders and Gears. Then counts how many of each combination there are and reports it in a column called "N"
And here is the call that produced the graph
ggplot(mtcars, aes(x=Cylinders, y = N, fill = Gears)) +
geom_bar(position="dodge", stat="identity") +
ylab("Count") + theme(legend.position="top") +
scale_x_discrete(drop = FALSE)
And it produces this graph:
Furthermore, if there is continuous data, like that in the diamonds data set (thanks to mnel):
library(data.table)
library(scales)
library(ggplot2)
diamonds <- data.table(diamonds) # I modified the diamonds data set in order to create gaps for illustrative purposes
setkey(diamonds, color, cut)
diamonds[J("E",c("Fair","Good")), carat := 0]
diamonds[J("G",c("Premium","Good","Fair")), carat := 0]
diamonds[J("J",c("Very Good","Fair")), carat := 0]
diamonds <- diamonds[carat != 0]
Then using CJ would work as well.
data <- data.table(diamonds)[,list(mean_carat = mean(carat)), keyby = c('cut', 'color')] # This step defines our data set as the combinations of cut and color that exist and their means. However, the problem with this is that it doesn't have all combinations possible
data <- data[CJ(unique(cut),unique(color))] # This functions exactly the same way as it did in the discrete example. It creates a complete list of all possible unique combinations of cut and color
ggplot(data, aes(color, mean_carat, fill=cut)) +
geom_bar(stat = "identity", position = "dodge") +
ylab("Mean Carat") + xlab("Color")
Giving us this graph:
Use count and complete from dplyr to do this.
library(tidyverse)
mtcars %>%
mutate(
type = as.factor(cyl),
group = as.factor(gear)
) %>%
count(type, group) %>%
complete(type, group, fill = list(n = 0)) %>%
ggplot(aes(x = type, y = n, fill = group)) +
geom_bar(colour = "black", position = "dodge", stat = "identity")
You can exploit the feature of the table() function, which computes the number of occurrences of a factor for all its levels
# load plyr package to use ddply
library(plyr)
# compute the counts using ddply, including zero occurrences for some factor levels
df <- ddply(mtcars2, .(group), summarise,
types = as.numeric(names(table(type))),
counts = as.numeric(table(type)))
# plot the results
ggplot(df, aes(x = types, y = counts, fill = group)) +
geom_bar(stat='identity',colour="black", position="dodge")

Don't drop zero count: dodged barplot

I am making a dodged barplot in ggplot2 and one grouping has a zero count that I want to display. I remembered seeing this on HERE a while back and figured the scale_x_discrete(drop=F) would work. It does not appear to work with dodged bars. How can I make the zero counts show?
For instance, (code below) in the plot below, type8~group4 has no examples. I would still like the plot to display the empty space for the zero count instead of eliminating the bar. How can I do this?
mtcars2 <- data.frame(type=factor(mtcars$cyl),
group=factor(mtcars$gear))
m2 <- ggplot(mtcars2, aes(x=type , fill=group))
p2 <- m2 + geom_bar(colour="black", position="dodge") +
scale_x_discrete(drop=F)
p2
Here's how you can do it without making summary tables first.
It did not work in my CRAN versioin (2.2.1) but in the latest development version of ggplot (2.2.1.900) I had no issues.
ggplot(mtcars, aes(factor(cyl), fill = factor(vs))) +
geom_bar(position = position_dodge(preserve = "single"))
http://ggplot2.tidyverse.org/reference/position_dodge.html
Updated geom_bar() needs stat = "identity"
For what it's worth: The table of counts, dat, above contains NA. Sometimes, it is useful to have an explicit 0 instead; for instance, if the next step is to put counts above the bars. The following code does just that, although it's probably no simpler than Joran's. It involves two steps: get a crosstabulation of counts using dcast, then melt the table using melt, followed by ggplot() as usual.
library(ggplot2)
library(reshape2)
mtcars2 = data.frame(type=factor(mtcars$cyl), group=factor(mtcars$gear))
dat = dcast(mtcars2, type ~ group, fun.aggregate = length)
dat.melt = melt(dat, id.vars = "type", measure.vars = c("3", "4", "5"))
dat.melt
ggplot(dat.melt, aes(x = type,y = value, fill = variable)) +
geom_bar(stat = "identity", colour = "black", position = position_dodge(width = .8), width = 0.7) +
ylim(0, 14) +
geom_text(aes(label = value), position = position_dodge(width = .8), vjust = -0.5)
The only way I know of is to pre-compute the counts and add a dummy row:
dat <- rbind(ddply(mtcars2,.(type,group),summarise,count = length(group)),c(8,4,NA))
ggplot(dat,aes(x = type,y = count,fill = group)) +
geom_bar(colour = "black",position = "dodge",stat = "identity")
I thought that using stat_bin(drop = FALSE,geom = "bar",...) instead would work, but apparently it does not.
I asked this same question, but I only wanted to use data.table, as it's a faster solution for much larger data sets. I included notes on the data so that those that are less experienced and want to understand why I did what I did can do so easily. Here is how I manipulated the mtcars data set:
library(data.table)
library(scales)
library(ggplot2)
mtcars <- data.table(mtcars)
mtcars$Cylinders <- as.factor(mtcars$cyl) # Creates new column with data from cyl called Cylinders as a factor. This allows ggplot2 to automatically use the name "Cylinders" and recognize that it's a factor
mtcars$Gears <- as.factor(mtcars$gear) # Just like above, but with gears to Gears
setkey(mtcars, Cylinders, Gears) # Set key for 2 different columns
mtcars <- mtcars[CJ(unique(Cylinders), unique(Gears)), .N, allow.cartesian = TRUE] # Uses CJ to create a completed list of all unique combinations of Cylinders and Gears. Then counts how many of each combination there are and reports it in a column called "N"
And here is the call that produced the graph
ggplot(mtcars, aes(x=Cylinders, y = N, fill = Gears)) +
geom_bar(position="dodge", stat="identity") +
ylab("Count") + theme(legend.position="top") +
scale_x_discrete(drop = FALSE)
And it produces this graph:
Furthermore, if there is continuous data, like that in the diamonds data set (thanks to mnel):
library(data.table)
library(scales)
library(ggplot2)
diamonds <- data.table(diamonds) # I modified the diamonds data set in order to create gaps for illustrative purposes
setkey(diamonds, color, cut)
diamonds[J("E",c("Fair","Good")), carat := 0]
diamonds[J("G",c("Premium","Good","Fair")), carat := 0]
diamonds[J("J",c("Very Good","Fair")), carat := 0]
diamonds <- diamonds[carat != 0]
Then using CJ would work as well.
data <- data.table(diamonds)[,list(mean_carat = mean(carat)), keyby = c('cut', 'color')] # This step defines our data set as the combinations of cut and color that exist and their means. However, the problem with this is that it doesn't have all combinations possible
data <- data[CJ(unique(cut),unique(color))] # This functions exactly the same way as it did in the discrete example. It creates a complete list of all possible unique combinations of cut and color
ggplot(data, aes(color, mean_carat, fill=cut)) +
geom_bar(stat = "identity", position = "dodge") +
ylab("Mean Carat") + xlab("Color")
Giving us this graph:
Use count and complete from dplyr to do this.
library(tidyverse)
mtcars %>%
mutate(
type = as.factor(cyl),
group = as.factor(gear)
) %>%
count(type, group) %>%
complete(type, group, fill = list(n = 0)) %>%
ggplot(aes(x = type, y = n, fill = group)) +
geom_bar(colour = "black", position = "dodge", stat = "identity")
You can exploit the feature of the table() function, which computes the number of occurrences of a factor for all its levels
# load plyr package to use ddply
library(plyr)
# compute the counts using ddply, including zero occurrences for some factor levels
df <- ddply(mtcars2, .(group), summarise,
types = as.numeric(names(table(type))),
counts = as.numeric(table(type)))
# plot the results
ggplot(df, aes(x = types, y = counts, fill = group)) +
geom_bar(stat='identity',colour="black", position="dodge")

Resources