Order geom_bar groups - r

I have this data:
country name value
<chr> <chr> <dbl>
1 Germany Jd 7.1
2 Germany Jc 8.4
3 Germany Ne 1.3
4 France Jd 8.3
5 France Jc 12
6 France Ne 3.7
and I would like to plot it in two groups of bars (with three columns each). Ordered the same as it is in the dataframe: First Germany, second France and the order of the columns Jd, Jc, Ne.
I did:
p <- ggplot(data, aes(x = country, y = value)) +
geom_bar(aes(fill = name), width=0.7, position = position_dodge(width=0.7), stat='identity')
but I get the plot in a different order: first France, then Germany and the order of the columns Jc, Jd, Ne. (seems to be ordered alphabetically).
How can I order the bars in the way I want?

Probably one of the simplest ways to take control on sorting is to convert as.factor() your ordering columns and define the levels, you'll override any other default ordering:
library(ggplot2)
data$country <- factor( data$country, levels = c("Germany", "France"))
data$name <- factor( data$name, levels = c("Jd", "Jc", "Ne"))
ggplot(data, aes(x = country, y = value,fill = name)) +
# moved the aes() all together, nothing related to the question
geom_bar(width=0.7, position position_dodge(width=0.7), stat='identity')
With data:
data <- read.table(text = "
country name value
Germany Jd 7.1
Germany Jc 8.4
Germany Ne 1.3
France Jd 8.3
France Jc 12
France Ne 3.7",header = T)

Related

Get the proportions in ggplot2 (R) bar charts

Can someone provide me some hints as to what I am doing wrong in my code? Or what I need to correct to get the correct percentages? I am trying to get the proportions by manipulating my ggplot2 code. I would prefer not mutating a column. However, if I can't get ggplot2 to give me the correct proportions, I will then be open to adding columns.
Here is the reproduceable data:
cat_type<-c("1", "1","2","3","1","3", "3","2","1","1","1","3","3","2","3","2","3","1","3","3","3","1","3","1","3","1","1","3","1")
country<-c("India","India","India","India","India","India","India","India","India","India","Indonesia","Russia","Indonesia","Russia","Russia","Indonesia","Indonesia","Indonesia","Indonesia","Russia","Indonesia","Russia","Indonesia","Indonesia","Russia", "Russia", "India","India","India")
bigcats<-data.frame(cat_type=cat_type,country=country)
My data gives me the following proportions (these are correct):
> table(bigcats$cat_type, bigcats$country) ## raw numbers
India Indonesia Russia
1 7 3 2
2 2 1 1
3 4 5 4
>
> 100*round(prop.table(table(bigcats$cat_type, bigcats$country),2),3) ## proportions by column total
India Indonesia Russia
1 53.8 33.3 28.6
2 15.4 11.1 14.3
3 30.8 55.6 57.1
However, my ggplot2 is giving me the incorrect proportions:
bigcats %>% ggplot(aes(x=country, y = prop.table(stat(count)), fill=cat_type, label = scales::percent(prop.table(stat(count)))))+
geom_bar(position = position_fill())+
geom_text(stat = "count", position = position_fill(vjust=0.5),colour = "white", size = 5)+
labs(y="Percent",title="Top Big Cat Populations",x="Country")+
scale_fill_discrete(name=NULL,labels=c("Siberian/Bengal", "Other wild cats", "Puma/Leopard/Jaguar"))+
scale_y_continuous(labels = scales::percent)
The issue is that using prop.table(stat(count)) will not compute the proportions by categories or your countries, i.e. you do:
library(dplyr)
bigcats %>%
count(cat_type, country) %>%
mutate(pct = scales::percent(prop.table(n)))
#> cat_type country n pct
#> 1 1 India 7 24.1%
#> 2 1 Indonesia 3 10.3%
#> 3 1 Russia 2 6.9%
#> 4 2 India 2 6.9%
#> 5 2 Indonesia 1 3.4%
#> 6 2 Russia 1 3.4%
#> 7 3 India 4 13.8%
#> 8 3 Indonesia 5 17.2%
#> 9 3 Russia 4 13.8%
Making use of a helper function to reduce code duplication you could compute your desired proportions like so:
library(ggplot2)
prop <- function(count, group) {
count / tapply(count, group, sum)[group]
}
ggplot(bigcats, aes(
x = country, y = prop(after_stat(count), after_stat(x)),
fill = cat_type, label = scales::percent(prop(after_stat(count), after_stat(x)))
)) +
geom_bar(position = position_fill()) +
geom_text(stat = "count", position = position_fill(vjust = 0.5), colour = "white", size = 5) +
labs(y = "Percent", title = "Top Big Cat Populations", x = "Country") +
scale_fill_discrete(name = NULL, labels = c("Siberian/Bengal", "Other wild cats", "Puma/Leopard/Jaguar")) +
scale_y_continuous(labels = scales::percent)
Created on 2021-07-28 by the reprex package (v2.0.0)

Ploting in ggplot2 with geom_line() with label

I'm trying to plot this dataset with ggplot2, putting the name of each country in each line geom_line() and with the x axis (Year) and the y axis (with the relevant data from each country).
The DataSet to Edit
This is what I have so far. I wanted to include the name of the country in each line. The problem is that each country has its data in a separate column.
If you want to use ggplot you should bring your data into a "longer" format. Using package tidyr:
df %<>%
pivot_longer(cols=matches("[^Year]"),
names_to="Country",
values_to="Value")
gives you
# A tibble: 108 x 3
Year Country Value
<dbl> <chr> <dbl>
1 1995 Argentina 4122262
2 1995 Bolivia 3409890
3 1995 Brazil 36276255
4 1995 Chile 2222563
5 1995 Colombia 10279222
6 1995 Costa_Rica 1611055
7 1997 Argentina 4100563
8 1997 Bolivia 3391943
9 1997 Brazil 35718095
10 1997 Chile 2208382
Based on this it is easy to plot a line for each country using ggplot2:
ggplot(df, aes(x=Year, y=Value, color=Country)) +
geom_line()
You kind of answered your question. You require the package reshape to bring all countries into a single column.
Year<-c(1991,1992,1993,1994,1995,1996)
Argentina<-c(235,531,3251,3153,13851,16513)
Mexico<-c(16503,16035,3516,3155,30351,16513)
Japan<-c(1651,868416,68165,35135,03,136816)
df<-data.frame(Year,Argentina,Mexico,Japan)
library(reshape2)
df2<- melt(data = df, id.vars = "Year", Cont.Val=c("Argentina","Mexico","Japan"))
library(ggplot2)
ggplot(df2, aes(x=Year, y=value, group=variable, color=variable))+
geom_line()

r - Calculate % within a Sub Group using Dplyr

I want to chart the relative no of fatalities by year for each of various event types.
I can do with with facets in ggplot but am struggling to calculate the % By Event based on Event, Year and no of fatalities.
Event Type Year Fatalities % by Event
(calculated)
----- ---- ---------- ----------
Storm 1980 5 12.5%
Storm 1981 9 22.5%
Storm 1982 15 37.5%
Storm 1983 11 27.5%
Ice 1980 7 70%
Ice 1981 3 30%
I have the following code to calculate it, but the calculation is not working with the % using a much higher denominator.
fatalitiesByYearType <- stormDF %>%
group_by(eventType) %>%
mutate(totalEventFatalities = sum(FATALITIES)) %>%
group_by(year, add = TRUE) %>%
mutate(fatalitiesPct = sum(FATALITIES) / totalEventFatalities)
What am I doing wrong?
My charting as a below. I include this in case as I'm also interested to see whether there is a way of showing data in a proportionate way within ggplot.
p <- ggplot(data = fatalitiesByYearType,
aes(x=factor(year),y=fatalitiesPct))
p + geom_bar(stat="identity") +
facet_wrap(.~eventType, nrow = 5) +
labs(x = "Year",
y = "Fatalities",
title = "Fatalities by Type")
Maybe I do not get your problem, but we can start from here:
library(dplyr)
library(ggplot2)
# here the dplyr part
dats <- fatalitiesByYearType %>%
group_by(eventType) %>%
mutate(totalEventFatalities = sum(FATALITIES)) %>%
group_by(year, add = TRUE) %>%
# here we add the summarise
summarise(fatalitiesPct = sum(FATALITIES) / totalEventFatalities)
dats
# A tibble: 6 x 3
# Groups: eventType [?]
eventType year fatalitiesPct
<fct> <int> <dbl>
1 Ice 1980 0.7
2 Ice 1981 0.3
3 Storm 1980 0.125
4 Storm 1981 0.225
5 Storm 1982 0.375
6 Storm 1983 0.275
You can clearly merge everything in an unique dplyr chain:
# here the ggplot2 part
p <- ggplot(dats,aes(x=factor(year),y=fatalitiesPct)) +
geom_bar(stat="identity") +
facet_wrap(.~eventType, nrow = 5) +
labs(x = "Year", y = "Fatalities", title = "Fatalities by Type") +
# here we add the % in the plot
scale_y_continuous(labels = scales::percent)
With data:
fatalitiesByYearType <- read.table(text = "eventType year FATALITIES
Storm 1980 5
Storm 1981 9
Storm 1982 15
Storm 1983 11
Ice 1980 7
Ice 1981 3 ",header = T)

How to order a geom_col columns based on a summarised value in a pipe

I have tryed to understand the other results, but I could not.
This is my dataset:
> HIST
# A tibble: 1,071 x 16
Ano Leilao Fonte UF Vend Projeto
<dbl> <chr> <chr> <chr> <chr> <chr>
1 2008 2008 Leilao 1 Bio SP Abengoa UTE São Luiz (Abengoa São Luiz)
2 2013 2013 A-5 1 Bio MS AMANDINA Amandina
3 2017 2017 A-6 Bio MG BEVAP BIOENERGETICA AROEIRA 2
4 2015 2015 A-5 1 Bio BA Bolt BOLTBAH
5 2013 2013 A-5 1 Bio BA Bolt CAMPO GRANDE
6 2013 2013 A-5 1 Bio PI Bolt CANTO DO BURITI
7 2010 2010 LER Bio TO Bunge PEDRO AFONSO
8 2015 2015 LFA Bio SP Clealco CLEALCO QUEIROZ
9 2015 2015 A-3 Bio SP Clealco CLEALCO QUEIROZ
10 2008 2008 Leilao 1 Bio MG CMAA UTE Vale do Tijuco
# ... with 1,061 more rows, and 10 more variables: CODPPA <dttm>, CAPEX <dbl>,
# MW <dbl>, GF <dbl>, FC <dbl>, PPA <dbl>, RMW <dbl>, WACC <dbl>, TIR <dbl>,
# VPL <dbl>
`
I want to make a graph sorted by the sum(MW), like this:
HIST %>%
group_by(Fonte, UF)%>%
summarise(SUMMW = sum(MW))%>%
arrange(desc(SUMMW))%>%
ggplot(aes(x = UF, y = SUMMW, fill = Fonte))+
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
geom_col()
But the problem is that I get the following chart, not ordered by the sum of MW. I would like this graph`s columns to be ordered by the height of the columns:
thank you, Paulo
I think the easiest way is to reorder your variable SUMMW in the aestetics function aes with reorder(UF, desc(SUMMW)):
HIST %>%
group_by(Fonte, UF)%>%
summarise(SUMMW = sum(MW))%>%
arrange(desc(SUMMW))%>%
ggplot(aes(x = reorder(UF, desc(SUMMW)), y = SUMMW, fill = Fonte))+
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
geom_col()
You can get calculate the height of each bar first & assign UF the appropriate order as a factor. Otherwise ggplot will plot UF's values in categorical order on the x-axis.
# create summary data frame from HIST
df <- HIST %>%
group_by(Fonte, UF) %>%
summarise(SUMMW = sum(MW))
# calculate total bar height for each UF value, & sort accordingly.
df2 <- df %>%
group_by(UF) %>%
summarise(bar.heights = sum(SUMMW)) %>%
ungroup() %>%
arrange(desc(bar.heights))
# convert UF in the summary data frame to factor, with levels in the sorted order
df$UF <- factor(df$UF, levels = df2$UF)
rm(df2) # you can remove df2 after this; it's not needed anymore
# plot
ggplot(df,
aes(x = UF, y = SUMMW, fill = Fonte))+
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
geom_col()

Sorting Y Axis Values ggplot

I'm trying to create a dotplot where countries are listed on my Y axis from A-Z top to bottom. The medal count will be the X axis for each of the four plots, one each for gold, silver, bronze, and total. Of course, ggplot prefers to plot countries from Z-A and despite reading all about the problem, I haven't resolved the issue. I appreciate any straightforward help on both the coding and comprehension fronts.
mdat <- melt(raw, value.name = "Count", variable.name = "Place", id.var = "Country")
mdat[, "Place"] <- factor(mdat[, "Place"], levels=c("Gold", "Silver", "Bronze", "Total"))
##I know my problem is likely on or around the above line ##
plot1 <- ggplot(mdat, aes(x = Count, y = Country, colour = Place)) +
geom_point() +
facet_grid(.~Place) + theme_bw()+
scale_colour_manual(values=c("#FFCC33", "#999999", "#CC6600", "#000000"))
print(plot1)
Algeria Gold 4
Argentina Gold 5
Armenia Gold 1
Algeria Silver 2
Argentina Silver 5
Armenia Silver 2
Algeria Bronze 4
Argentina Bronze 2
Armenia Bronze 0
You have to sort the levels of Country before you plot. Also, there is no Total level the data you provided. The following appraoch should give you the desired result:
Reading the data (including a Total level for the Place variable):
mdat <- read.table(text="Country Place Count
Algeria Gold 4
Argentina Gold 5
Armenia Gold 1
Algeria Silver 2
Argentina Silver 5
Armenia Silver 2
Algeria Bronze 4
Argentina Bronze 2
Armenia Bronze 0
Algeria Total 10
Argentina Total 12
Armenia Total 3", header=TRUE)
Sorting the levels of the Country variable:
mdat$Country <- factor(mdat$Country,levels=sort(unique(mdat$Country),decreasing=TRUE))
Getting your Place variable in the correct order:
levels(mdat$Place) <- c("Bronze"=3,"Gold"=1,"Silver"=2,"Total"=4)
mdat$Place <- as.numeric(mdat$Place)
mdat$Place <- as.factor(mdat$Place)
levels(mdat$Place) <- c("Gold","Silver","Bronze","Total")
Creating the plot:
ggplot(mdat, aes(x = Count, y = Country, colour = Place)) +
geom_point(size=4) +
facet_grid(.~Place) + theme_bw()+
scale_colour_manual(values=c("#FFCC33","#999999","#CC6600","#000000"))
which gives the following plot:
As you melted your data already, I suspect that there is no Total variable in the raw dataframe. You can calculte that with:
raw$Total <- rowSums(..specify the Gold, Silver & Bronze columns here..)

Resources