Remove blank categories in geom_bar - r

I've written a function to create a plot with data that changes based on a filter
library(tidyverse)
library(plotly)
one<- as.numeric(NA)
two<- 25
three<- 35
four<- 40
five<- 0
dat<- data.frame(one, two, three, four, five)
get_plot <- function(x, a){
data<- x[, a]
p<- data %>%
pivot_longer(everything(), names_to="variable", values_to="value") %>%
ggplot(aes(x = reorder(variable, value), y = value, fill = variable, text = paste0(value*100, "%"))) +
geom_bar(stat = "identity",position = "dodge")+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
legend.position = "none")+
coord_flip()
ggplotly(p, tooltip = c("text")) %>% config(displaylogo = FALSE,
modeBarButtons = list(list("toImage")))
}
get_plot(dat, a= c(1:5))
Depending on the filter sometimes I end up with a chart with categories that don't have values in them like in the image below. How do I exclude categories from appearing on the plot when there is no value for the category? **** edited to make a more simplified reprex****

update, after thinking about it more this morning and doing more googling and trying things I figure out a solution. I inserted these select statements into the function and it gets me what I wanted!
p<- data %>%
select(where(~!any(is.na(.)))) %>%
select(where(~!any(.== 0))) %>%

Related

ggplot: No legend when using scale_fill_brewer with geom_contour_filled

I've plotted a specific set of meteorological data using ggplot as described in the R code below. However, when I use scale_fill_brewer to specific the fill color, a legend does not appear.
What changes are necessary for the legend to appear?
library(tidyverse)
library(lubridate)
library(ggplot2)
library(RColorBrewer)
qurl <- "https://www.geo.fu-berlin.de/met/ag/strat/produkte/qbo/singapore.dat"
sing <- read_table(qurl, skip=4)
# the data file adds a 100mb data row starting in 1997 increasing the number of rows per year from
# 14 to 15. So, one calcuation must be applied to rnum <140 and a different to rnum >140.
sing2 <- sing %>% separate(1,into=c('hpa','JAN'),sep='\\s+') %>% drop_na() %>%
subset(hpa != 'hPa') %>%
mutate(rnum = row_number(),
hpa=as.integer(hpa)) %>%
mutate(year = case_when(rnum <=140 ~ 1987 + floor(rnum/14), # the last year with 14 rows of data
rnum >=141 ~ 1987 + floor(rnum+10/15))) %>% # the first year with 15 rows of data
relocate(year, .before='hpa') %>% arrange(year,hpa) %>%
pivot_longer(cols=3:14, names_to='month',values_to='qbo') %>%
mutate(date=ymd(paste0(year,'-',month,'-15')),
hpa=as.integer(hpa),
qbo=as.numeric(qbo))
sing2 <- sing %>% separate(1,into=c('hpa','JAN'),sep='\\s+') %>% drop_na() %>%
subset(hpa != 'hPa') %>%
mutate(year=1987+floor(row_number()/15),
hpa=as.integer(hpa)) %>%
relocate(year, .before='hpa') %>% arrange(year,hpa) %>%
pivot_longer(cols=2:13, names_to='month',values_to='qbo') %>%
mutate(date=ymd(paste0(year,'-',month,'-15')),
hpa=as.integer(hpa),
qbo=as.numeric(qbo))
# End Data Massaging. It's ready to be graphed
# A simple call to ggplot with geom_contour_filled generates a legend
sing2 %>%
ggplot(aes(x=date,y=hpa)) +
geom_contour_filled(aes(z=qbo*0.1)) +
scale_y_reverse()
# Adding scale_fill_brewer removes the legend.
# Adding show.legend = TRUE to the geom_countour_filled options has no effect.
limits = c(-1,1)*max(abs(sing2$qbo),na.rm=TRUE)
zCuts <- round(seq(limits[1], limits[2], length.out = 11), digits=0)
sing2 %>%
ggplot() +
geom_contour_filled(aes(x=date,y=hpa, z = qbo*0.1),breaks=zCuts*0.1) +
scale_y_reverse(expand=c(0,0)) +
scale_x_date(expand=c(0,0), date_breaks = '1 year', date_labels = '%Y') +
scale_fill_brewer(palette = 5,type='div',breaks=zCuts) +
theme_bw() +
theme(legend.position = 'right',
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
OP, I don't have a direct answer for you, given that your example is not able to be replicated (unable to access the data you gave). In place, I can give you a bit of advice on debugging, since it seems the issue is related to the breaks= argument of scale_fill_brewer(). As you mention, you get a legend when using geom_contour_filled(), but not when you add the scale_fill_brewer() part.
Let me use the example from the documentation for geom_contour_filled() to illustrate this behavior, which utilizes the built-in dataset, fathfuld.
I'll add in your own palette and type choice, leaving out the breaks argument for example:
v <- ggplot(faithfuld, aes(waiting, eruptions, z = density))
v + geom_contour_filled() +
scale_fill_brewer(palette = 5, type='div')
If you do the same thing, but add in a "nonsensical" breaks argument, you get the same plot, but without a legend (like you are seeing):
v + geom_contour_filled() +
scale_fill_brewer(palette = 5, type='div', breaks=1:4)
For me, this is good evidence that the issue in your code relates to the value for breaks= not being within the range expected. Is this just a typo? Note that breaks=zCuts in scale_fill_brewer(), yet breaks=zCuts*0.1 in geom_contour_filled(). This would put each value for your color scale to be 10 times outside the range of the breaks for the contours themselves. I'd be willing to bet that this change to that scale_fill_brewer() line will do the trick:
# earlier plot code
... +
scale_fill_brewer(palette = 5,type='div',breaks=zCuts*0.1) +
...
# remaining plot code

How to Combine 2 Line Graphs Together

I'm new to using R so please bear with me as my code might not look the best. So I want to combine these two line graphs together since right now I have written code for each item that I am analyzing. This is the dataset I am using: https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-09-01/readme.md I used the "Arable_Land" dataset!
##USA Arable Land
plot_arable_land_USA <- arable_land %>%
filter(Code == "USA") %>%
select(c(Year, Code, `Arable land needed to produce a fixed quantity of crops ((1.0 = 1961))`)) %>%
pivot_longer(-c(Year, Code)) %>%
ggplot(aes(x = Year, y = value,color=name,group=name)) +
geom_line() +
facet_wrap(.~name,scales = 'free_y') +
theme_light() +
theme(legend.position = 'none')
ggplotly(plot_arable_land_USA)
##Canada Arable Land
plot_arable_land_CAN <- arable_land %>%
filter(Code == "CAN") %>%
select(c(Year, Code, `Arable land needed to produce a fixed quantity of crops ((1.0 = 1961))`)) %>%
pivot_longer(-c(Year, Code)) %>%
ggplot(aes(x = Year, y = value,color=name,group=name)) +
geom_line() +
facet_wrap(.~name,scales = 'free_y') +
theme_light() +
theme(legend.position = 'none')
ggplotly(plot_arable_land_CAN)
Ideally, I would like one graph to show both like one line (in Purple) to show the USA and another line(in Brown) to show Canada.
Thank you!
Try this. It is a better practice to reshape data to long as you did. In your case you can add filter() to choose the desired countries. Then, reshape to long and design the plot. The key is setting color and group with Code in order to obtain the desired lines. You can set the colors using scale_color_manual() and I have left the facet option to get the title. Here the code:
library(plotly)
library(tidyverse)
#Code
plot_arable_land_CAN <- arable_land %>% select(-Entity) %>%
filter(Code %in% c('USA','CAN')) %>%
pivot_longer(-c(Code,Year)) %>%
ggplot(aes(x = Year, y = value,color=Code,group=Code)) +
geom_line() +
facet_wrap(.~name,scales = 'free_y') +
theme_light() +
theme(legend.position = 'none')+
scale_color_manual(values = c('brown','purple'))
#Transform
ggplotly(plot_arable_land_CAN)
Output:

How to create stacked and grouped chart?

I'm trying to create a barplot by month that includes two columns, with each column stacked. For each month, the first column would be the total number of video visits, split by vid_new and vid_return. The second column would be the total number of phone visits, split by phone_charge and phone_nocharge.
I still haven't been able to get the bars side-by-side correct. This code uses the data frame in the second picture and it's counting the instances of the word "video" and "phone", not the Count column resulting in the third picture.
plot <- ggplot(data=new_df, aes(x=Month, y = count, fill = gen_type)) +
geom_bar(stat = "identity", position = "dodge")
Below is a pic of the data I'm working with. I've converted it into a few different forms to try new methods by have not been able to form this graph.
How can I make a barplot by group and by stack in ggplot? What data structure do I need to get make it?
Thanks in advance for your advice!
You can try any of these options reshaping your data to long and creating and additional variable so that you can identify the types. Here the code using tidyverse functions:
library(ggplot2)
library(dplyr)
library(tidyr)
#Date
df <- data.frame(Month=c(rep('Mar',4),rep('Apr',4),rep('May',2)),
spec_type=c('vid_new','vid_return','phone_charge','phone_nocharge',
'vid_new','vid_return','phone_charge','phone_nocharge',
'vid_new','vid_return'),
Count=c(7,85,595,56,237,848,2958,274,205,1079))
#Plot 1
df %>% mutate(Month=factor(Month,levels = unique(Month),ordered = T)) %>%
mutate(Dup=spec_type) %>%
separate(Dup,c('Type','Class'),sep='_') %>% select(-Class) %>%
ggplot(aes(x=Type,y=Count,fill=spec_type))+
geom_bar(stat = 'identity',position = 'stack')+
facet_wrap(.~Month,strip.position = 'bottom')+
theme(strip.placement = 'outside',
strip.background = element_blank())
Output:
Or this:
#Plot 2
df %>% mutate(Month=factor(Month,levels = unique(Month),ordered = T)) %>%
mutate(Dup=spec_type) %>%
separate(Dup,c('Type','Class'),sep='_') %>% select(-Class) %>%
ggplot(aes(x=Type,y=Count,fill=spec_type))+
geom_bar(stat = 'identity',position = 'fill')+
facet_wrap(.~Month,strip.position = 'bottom',scales = 'free')+
theme(strip.placement = 'outside',
strip.background = element_blank())
Output:
Or this:
#Plot 3
df %>% mutate(Month=factor(Month,levels = unique(Month),ordered = T)) %>%
mutate(Dup=spec_type) %>%
separate(Dup,c('Type','Class'),sep='_') %>% select(-Class) %>%
ggplot(aes(x=Type,y=Count,fill=spec_type))+
geom_bar(stat = 'identity',position = position_dodge2(preserve = 'single'))+
facet_wrap(.~Month,strip.position = 'bottom',scales = 'free')+
theme(strip.placement = 'outside',
strip.background = element_blank())
Output:
In order to see by month you can use facet_wrap() and placing labels in a smart way.

How can I make axis x vertical so the long names of countries become understandable in the plot

The name of the countries are long and are on top of each other in the x labels, how can I make it readable?
ggplot(results, aes(x = Nationality, horiz=TRUE)) +
theme_solarized() +
geom_bar() +
labs(y = "Number of Medals",
title = "Number of Medals by Country")
Welcome to stackoverflow. Here are some suggestions on how you can deal with the many values. In both methods, I am using the forcats library within the tidyverse. You can read more about it here: https://r4ds.had.co.nz/factors.html
First, some fake data & replicating your problem
library(tidyverse)
df <-
mpg %>%
arrange(manufacturer) %>%
mutate(
n = row_number(),
vehicle = paste(year, manufacturer, model)
) %>%
uncount(n)
# this replicates your problem
ggplot(df, aes(vehicle)) +
geom_bar() +
coord_flip()
Option 1: consolidate
df %>%
mutate(
vehicle = # making heavy use of forcats here
fct_lump(vehicle, 35) %>% # keep only the 35 most frequent values, others in "Other" category
fct_infreq() %>% # order them by frequency
fct_rev() #reverse the order
) %>%
ggplot(aes(vehicle)) +
geom_bar() +
coord_flip()
Option 2: facet
Someone may have a more elegant way of getting these groups but I use this method quite a bit
df %>%
mutate(
vehicle = # similar methods to earlier
fct_infreq(vehicle) %>%
fct_rev(),
num_fct = as.integer(vehicle), # generates a number for each factor
facet = (max(num_fct)-num_fct) %/% 20 # will make groups of 20, but they need to be in descending order within each facet
) %>%
ggplot(aes(vehicle)) +
geom_bar() +
coord_flip() +
facet_wrap(~facet, scales = "free_y", nrow = 1) +
theme(
strip.background = element_blank(),
strip.text = element_blank()
)
Hope this helps.

Sequence index plots in ggplot2 using geom_tile( )

I'm trying to use ggplot to create sequence plots, for the sake of keeping the same visual style within my paper using sequence analysis. I do:
library(ggplot2)
library(TraMineR)
library(dplyr)
library(tidyr)
data(mvad)
mvad_seq<-seqdef(mvad,15:length(mvad))
mvad_trate<-seqsubm(mvad_seq,method="TRATE")
mvad_dist<-seqdist(mvad_seq,method="OM",sm=mvad_trate)
cluster<-cutree(hclust(d=as.dist(mvad_dist),method="ward.D2"),k=6)
mvad$cluster<-cluster
mvad_long<-gather(select(mvad,id,contains("."),-matches("N.Eastern"),-matches("S.Eastern")),
key="Month",value="state",
Jul.93, Aug.93, Sep.93, Oct.93, Nov.93, Dec.93, Jan.94, Feb.94, Mar.94,
Apr.94, May.94, Jun.94, Jul.94, Aug.94, Sep.94, Oct.94, Nov.94, Dec.94, Jan.95,
Feb.95, Mar.95, Apr.95, May.95, Jun.95, Jul.95, Aug.95, Sep.95, Oct.95, Nov.95,
Dec.95, Jan.96, Feb.96, Mar.96, Apr.96, May.96, Jun.96, Jul.96, Aug.96, Sep.96,
Oct.96, Nov.96, Dec.96, Jan.97, Feb.97, Mar.97, Apr.97, May.97, Jun.97, Jul.97,
Aug.97, Sep.97, Oct.97, Nov.97, Dec.97, Jan.98, Feb.98, Mar.98, Apr.98, May.98,
Jun.98, Jul.98, Aug.98, Sep.98, Oct.98, Nov.98, Dec.98, Jan.99, Feb.99, Mar.99,
Apr.99, May.99, Jun.99)
mvad_long<-left_join(mvad_long,select(mvad,id,cluster))
ggplot(data=mvad_long,aes(x=Month,y=id,fill=state))+geom_tile()+facet_wrap(~cluster)
I try to plot the sequences by cluster, and this gives me the following plot:
As you can see, there are gaps for the ids that don't belong to the cluster represented by each facet. I would like to get rid of these gaps, so that the sequences show up stacked just as with the seqIplot() function of TraMineR as in the next figure:
Any suggestions of how to proceed?
Two small changes:
mvad_long$id <- as.factor(mvad_long$id)
ggplot(data=mvad_long,aes(x=Month,y=id,fill=state))+
geom_tile()+facet_wrap(~cluster,scales = "free_y")
ggplot was treating id as a numerical variable, rather than a factor, and then the scales were fixed.
An update: I needed to convert the month in to a date for it to work. Full solution follows:
library(ggplot2)
library(TraMineR)
library(dplyr)
library(tidyr)
library(lubridate)
data(mvad)
mvad_seq <- seqdef(mvad, 15:length(mvad))
mvad_trate <- seqsubm(mvad_seq, method = "TRATE")
mvad_dist <- seqdist(mvad_seq, method = "OM", sm = mvad_trate)
cluster <- cutree(hclust(d = as.dist(mvad_dist), method = "ward.D2"), k = 6)
mvad$cluster <- cluster
mvad_long <- mvad %>%
select(id, matches("\\.\\d\\d")) %>%
gather(key = "month", value = "state", -id) %>%
inner_join(
mvad %>%
select(id, cluster),
by = "id"
) %>%
mutate(
id = factor(id),
date = myd(paste0(month, "01"))
)
mvad_long %>%
ggplot(aes(x = date, y = id, fill = state, color = state)) +
geom_tile() +
facet_wrap(~cluster, scales = "free_y", ncol = 2) +
theme_bw() +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid = element_blank()
) +
scale_fill_brewer(palette = "Accent") +
scale_colour_brewer(palette = "Accent") +
labs(x = "", y = "")

Resources