Frequency plot for different categories - r

I have this input:
structure(list(Topic = structure(c(1L, 2L, 3L, 1L, 2L, 3L), .Label = c("1",
"2", "3"), class = "factor"), Sex = structure(c(1L, 1L, 1L, 2L,
2L, 2L), .Label = c("Female", "Male"), class = "factor"), Count = c(2L,
15L, 23L, 7L, 20L, 34L)), class = "data.frame", row.names = c(NA,
-6L))
and I try this code:
ggplot(data=dat, aes(x=Topic, y=Sex, fill=Sex)) + geom_bar(stat="identity")
However the problem is it shows the same proposion for every topic
How is it possible to make the plot like this one?

What about this?
dat %>%
ggplot(aes(Topic, Count)) +
geom_bar(aes(fill = Sex), stat = "identity", position = "dodge")

Related

How can I increase room for ggplot y-axis?

My plot is cutting off a portion of the y-axis when viewed and when saved as a jpeg. I am sure this is an easy fix, but I can't seem to figure it out. Any advice? Thanks
treat_freqplot<-ggplot(Treat_occur, aes(Trial, freq, fill = Treatment)) + geom_bar(stat = 'identity', alpha = 1) +
#facet_grid(~Trial, scales = "free")+
scale_y_continuous(labels = scales::percent, breaks=seq(0,100,1/10), expand=c(0,0)) +
labs(x = 'Trial', y = "Proportional Deer Use") +
#scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
scale_fill_manual("Treatment",values=c("dark Green", "dark gray", "fire brick 4","dark blue"))+
theme(axis.text = element_text(size=30, colour = "black",vjust=0.3),axis.title=element_text(size=30,face="bold")) +
theme(legend.position = "right",legend.background = element_rect(color="black",linetype="solid", fill="gray100"),
legend.key.size = unit(1, 'cm'),legend.text = element_text(size=25), legend.title=element_text(size=30)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_rect(fill = NA, color = "black"))
treat_freqplot
#ggsave('C:\\Projects\\CaptiveStudy\\Analysis\\Output2\\treatfreq_plot.jpeg',
width = 20, height = 12, units = "in",treat_freqplot)
structure(list(Trial = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14"), class = "factor"), Treatment = structure(c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L), .Label = c("Control", "30%Shade",
"60%Shade", "90%Shade"), class = "factor"), Use = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), n = c(2L, 7L, 5L, 30L, 1L, 2L, 6L,
45L, 9L, 3L), freq = c(0.037037037037037, 0.12962962962963, 0.0925925925925926,
0.555555555555556, 0.0185185185185185, 0.037037037037037, 0.111111111111111,
0.833333333333333, 0.166666666666667, 0.0555555555555556)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -10L), groups = structure(list(
Trial = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L
), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9",
"10", "11", "12", "13", "14"), class = "factor"), Treatment = structure(c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L), .Label = c("Control",
"30%Shade", "60%Shade", "90%Shade"), class = "factor"), .rows = structure(list(
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L), .drop = TRUE))

ggplot count percentage of each bar in bar chart

i have a bar chart with fill of two levels the y axis is categorical with 0,1. the bar chart shows the 0 count and 1 count now i want to show the each individual percentage of each bar on each bar so that i can see which bar is highest and then which 1 in each bar is higher. but my count is categorical
i want to show percentage of each individual bar as 100% then divided into groups.
ggplot(stackoverflow,aes(x=stackoverflow$person, fill=stackoverflow$success))+facet_wrap(~stackoverflow$city)+geom_bar()
Like this
structure(list(data = structure(list(source = structure(c(1L,
1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("p",
"q", "r"), class = "factor"), person = structure(c(1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L), .Label = c("a", "b",
"c"), class = "factor"), city = structure(c(1L, 1L, 3L, 3L, 3L,
2L, 1L, 1L, 1L, 3L, 3L, 3L, 3L), .Label = c("x", "y", "z"), class = "factor"),
success = structure(c(1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 2L, 2L, 2L), .Label = c("0", "1"), class = "factor")), row.names = c(NA,
-13L), class = "data.frame"), layers = list(<environment>), scales = <environment>,
mapping = structure(list(x = ~stackoverflow$person, fill = ~stackoverflow$success), class = "uneval"),
theme = list(), coordinates = <environment>, facet = <environment>,
plot_env = <environment>, labels = list(x = "stackoverflow$person",
fill = "stackoverflow$success", y = "count", weight = "weight")), class = c("gg",
"ggplot"))
Start with some data aggregation using tidyverse:
dk %>%
group_by(person, city, success) %>%
summarise(counts = n()) %>%
right_join(dk %>%
group_by(city, person) %>%
summarise(all_counts= n())) %>%
mutate(percents = paste0(round(counts/all_counts * 100, 2), "%")) %>%
ggplot(aes(x = person, y = counts, fill = as.factor(success))) +
geom_bar(stat = "identity") +
geom_text(aes(label = percents), position = position_stack(vjust = 0.5)) +
facet_wrap(~city) +
coord_flip()
So in effect, I first find the count of success given person and city and divide that by the total counts (all_counts), which is the total number of people given city. Then we find the percents and then plot it using ggplot. Because these are aggregated, we use geom_bar with (stat = "identity") and use geom_text, which prints the percents (position_stack(vjust = 0.5) centers the label). Finally, we facet it based on city. The coord_flip() line flips the x and y axes.

reorder dodge bar plots (ggplot2)

I have a data frame lot_main that looks like this:
I want to make a bar plot with columns reorders according to the wordcount.
library(ggplot2)
library(viridis)
lotr_main %>% ggplot(aes(x = Character, y = wordcount, fill = Film)) +
geom_bar(stat="identity",position = "dodge") +
coord_flip() +
scale_fill_viridis("Film",discrete = TRUE, option = "C")
The plot I got:
What I want is for each character, the bars are reorders with the longest on the top and shortest at the bottom. The orders of the bars don't need to be the same for each character.
You essentially want to fill by one thing, and order by another. A solution is thus to pry them apart and create a separate 'order' variable. Of note, I don't know if sorting your bars by value instead of having the same sequence each 'group' makes your plot more understandable.....
create some data:
library(data.table)
set.seed(123)
dat <- expand.grid(group=LETTERS[1:3],
subgroup=LETTERS[1:3])
dat$value <- runif(nrow(dat))
setDT(dat)
Create the order variable:
dat[,order:=order(value),by=group]
Create the plot
p1 <- ggplot(dat, aes(x=group,y=value, fill=subgroup,group=order))+
geom_bar(aes(group=order),position="dodge", stat="identity") +
coord_flip()
p1
Here's a start. Found data and inspiration here (code below)
LoTRdata <- structure(list(Film = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("The Fellowship Of The Ring",
"The Return Of The King", "The Two Towers"), class = "factor"),
Race = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L,
3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L), .Label = c("Elf", "Hobbit",
"Man"), class = "factor"), Gender = structure(c(1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L
), .Label = c("Female", "Male"), class = "factor"), Words = c(1229L,
971L, 14L, 3644L, 0L, 1995L, 331L, 513L, 0L, 2463L, 401L,
3589L, 183L, 510L, 2L, 2673L, 268L, 2459L)), .Names = c("Film",
"Race", "Gender", "Words"), class = "data.frame", row.names = c(NA,
-18L))
LoTRdataOrder <- LoTRdata[order(LoTRdata$Words, LoTRdata$Film) , ]
# install.packages("ggplot2", dependencies = TRUE)
require(ggplot2)
p <- ggplot(LoTRdataOrder, aes(x = Race, y = Words, fill = Film))
p + geom_bar(stat = "identity", position = "dodge") +
coord_flip() + guides(fill = guide_legend())

Changing order in legend OR in plot, but not both

I have this data:
datat <- structure(list(Carga = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L), .Label = c("Outra", "88"), class = "factor"),
Categoria = structure(c(1L, 1L, 3L, 3L, 2L, 2L, 1L, 1L, 3L,
3L, 2L, 2L), .Label = c("A", "G", "B"), class = "factor"),
Vagas = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Ocupadas", "Autorizadas"), class = "factor"),
Cat.A.88 = c(26, 1, 30, 1, 18, 0, 57, 0, 39, 0, 0, 0)), .Names = c("Carga",
"Categoria", "Vagas", "Cat.A.88"), class = "data.frame", row.names = c(NA,
-12L))
and this plot:
ggplot(datat, aes(x=Carga, y=Cat.A.88, fill=Vagas)) + geom_bar(stat='identity', position='dodge') + ylab('Vagas') + xlab('Carga horĂ¡ria') + facet_grid(. ~ Categoria) + coord_flip()
The legend colours are in inverse order if compared with plot colours (plot have green before red, and legend have red before green). I want they appers in the same order. I tried add the parameter order=-as.numeric(Vagas) in aes(), but didn't changed anything.
This should help:
ggplot(datat, aes(x=Carga, y=Cat.A.88, fill=Vagas)) +
geom_bar(stat='identity', position='dodge') + ylab('Vagas') +
xlab('Carga horĂ¡ria') + facet_grid(. ~ Categoria) + coord_flip() +
guides(fill = guide_legend(reverse=T))

How to remove borders from the legend without removing borders from the bar plot in ggplot2

I have the following plot:
library(reshape)
library(ggplot2)
library(gridExtra)
require(ggplot2)
data2<-structure(list(IR = structure(c(4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L
), .Label = c("0.13-0.16", "0.17-0.23", "0.24-0.27", "0.28-1"
), class = "factor"), variable = structure(c(1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L), .Label = c("Real queens", "Simulated individuals"
), class = "factor"), value = c(15L, 11L, 29L, 42L, 0L, 5L, 21L,
22L), Legend = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("Real queens",
"Simulated individuals"), class = "factor")), .Names = c("IR",
"variable", "value", "Legend"), row.names = c(NA, -8L), class = "data.frame")
p <- ggplot(data2, aes(x =factor(IR), y = value, fill = Legend, width=.15))
data3<-structure(list(IR = structure(c(4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L
), .Label = c("0.13-0.16", "0.17-0.23", "0.24-0.27", "0.28-1"
), class = "factor"), variable = structure(c(1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L), .Label = c("Real queens", "Simulated individuals"
), class = "factor"), value = c(2L, 2L, 6L, 10L, 0L, 1L, 4L,
4L), Legend = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("Real queens",
"Simulated individuals"), class = "factor")), .Names = c("IR",
"variable", "value", "Legend"), row.names = c(NA, -8L), class = "data.frame")
q<- ggplot(data3, aes(x =factor(IR), y = value, fill = Legend, width=.15))
##the plot##
q + geom_bar(position='dodge', colour='black') + ylab('Frequency') +
xlab('IR')+scale_fill_grey() +
theme(axis.text.x=element_text(colour="black"),
axis.text.y=element_text(colour="Black"))+
opts(title='', panel.grid.major = theme_blank(),panel.grid.minor =
theme_blank(),panel.border = theme_blank(),panel.background =
theme_blank(), axis.ticks.x = theme_blank())
I want to keep the black border effect
geom_bar(colour='black)
without having that weird diagonal dash across the blocks depicted in the legend. Is there any way to decouple those or remove that diagonal dash?
As Didzis Elferts mentioned, there's an example of this here (last example).
The trick is to superimpose two plots: the first plot takes care of the legend in the desired way, and the second plot lacks a legend but adds borders to the actual plot itself.

Resources