Plotting many bar charts by year with ggplot - r

Here is a sample dataset:
#sample data
df <- tibble(year=c(1,1,1,1,2,2,2,2,3,3,3,3),
col=c("a", "b", "c", "d", "a", "b", "c", "d", "a", "b", "c", "d"),
freq=c(2,3,5,1,4,3,8,3,5,7,3,9))
I want to create a bar plot for each year of data independently. I'd also like to print the total sample size for each year as a caption on the respective plot. I'm currently doing it manually like this:
#find total sample size for year 1
df_yr_1 <- df %>%
filter(year==1)
sum(df_yr_1$freq)
#make year 1 histogram
hist <- ggplot(df_yr_1, aes(x=col, y=freq)) +
geom_col() +
labs(caption = "N=11")
hist
So on and so forth for each year.
Is there a way to automate this process? Ideally, all the plots would save to a pdf (1 per page), but if they're saved independently that's fine too. I have a feeling adding the total sample size as a caption might make the process more challenging, so if it's possible to automate the process without the sample size captions that would still be very helpful. Thanks in advance!

We could wrap the code in a loop
pdf( "plots.pdf", onefile = TRUE)
for(i in unique(df$year)) {
df_yr_i <- df %>%
filter(year==i)
hist <- ggplot(df_yr_i, aes(x=col, y=freq)) +
geom_col() +
labs(caption = paste0("N=", sum(df_yr_i$freq)))
print(hist)
}
dev.off()
-output

A simple loop should do the trick:
pdf("my.pdf")
for(i in 1:3) {
plot_df <- df %>% filter(year ==1)
p <- ggplot(plot_df, aes(col, freq)) +
geom_col() +
labs(title = paste("Year", i), caption = paste0("N=", sum(plot_df$freq)))
print(p)
}
dev.off()
Resulting in
my.pdf
Page 1
Page 2
Page 3

Related

geom_point - change colour of variable

I coloured my data by "Full Bath", however it is a similar shade & unclear.
I am trying to change the colour by variable of "Full Bath" to be more clear.
Could anyone help :)
This is what I have:
data %>%
ggplot(aes(Sale.Price, Total.Bsmt.SF)) +
geom_point(aes(colour = Full.Bath)) + geom_smooth()
To manually change the colors, you can use scale_color_manual, as follows:
### Simulation of data
set.seed(1)
data <- data.frame(Sale.Price=rnorm(100, 100, 10),
Total.Bsmt.SF=rnorm(100, 10, 1),
Full.Bath=rep(c("a", "b", "c", "d", "e"), 20))
### Plot data
data %>%
ggplot(aes(x = Sale.Price, y = Total.Bsmt.SF)) +
geom_point(aes(colour = Full.Bath)) +
geom_smooth() +
scale_color_manual(values=c("#84d58d", "#84c1d5", "#9e84d5", "#d584b0", "#d5be84"))
Link for colors selection
https://htmlcolorcodes.com/fr/

For looping x-as in ggplot

I would like to create multiple histograms (ggplot) using a for loop. The problem is that my x-as from the plots, stay the same like "value". Do you know how to change the x-as every time it loops?
My dataframe for example:
df <- data.frame(variable = c("A", "A", "B", "B", "C", "C"), value = c(1, 2, 4, 5, 2, 3))
So that means I get three plots with x-as: "A", "B" and "C"
My code:
for (i in unique(df$variable)){
d <- subset(df, df$variable == i)
print(ggplot(d, aes(x = value)) + geom_histogram())
}
You can take help of imap to get different x-axis value after splitting the data by variable.
library(ggplot2)
list_plot <- df %>%
split(.$variable) %>%
purrr::imap(~ggplot(.x, aes(x = value)) +
geom_histogram() + xlab(.y))
Also have you considered using facets? Where x-axis is the same and you get A, B, C as facet names.
ggplot(df, aes(x = value)) + geom_histogram() + facet_wrap(~variable)

Graph Help - Circular Barplot

I've no idea where to even start with this. I've looked at GGPlot and plotly etc to try and find the right thing but haven't come across anything.
This is as example of my data though
Skill <- c("Tackling", "Shooting", "Technique", "Passing", "Pace", "Stamina")
Grade <- c("A", "C", "C", "B", "A", "B")
data <- data.frame(Skill, Grade)
This is the sort of graph I'd like
I'm a football scout and it would be fantastic to be able to have a graph like that to compare the players we have to the player I'm scouting.
so if the grade is D, it would just show red, if the grade was C it would show red and orange. Etc.
This is quite close to what you want:
Skill <- c("Tackling", "Shooting", "Technique", "Passing", "Pace", "Stamina")
Grade <- c("A", "C", "C", "B", "A", "B")
data <- data.frame(Skill, Grade)
library(ggplot2)
library(dplyr)
data$grade <- factor(data$Grade, levels=c("D","C","B","A"))
data$grade2 <- recode(data$grade, A="B")
data$grade3 <- recode(data$grade2, B="C")
data$grade4 <- recode(data$grade3, C="D")
ggplot(data, aes(x=Skill, y=grade)) +
geom_bar(stat="identity", fill="green",col="black",width=1) +
geom_bar(aes(y=grade2),stat="identity", fill="yellow",col="black",width=1) +
geom_bar(aes(y=grade3),stat="identity", fill="orange",col="black",width=1) +
geom_bar(aes(y=grade4),stat="identity", fill="red",col="black",width=1) +
scale_y_discrete(limits = c("D","C","B","A")) +
coord_polar(start = pi/6) + theme_bw() + theme(axis.text.y = element_blank()) +
theme(axis.ticks = element_blank(), axis.title = element_blank())
How about this
library(ggplot2)
ggplot(data = data, aes(Skill, Grade, fill = Grade)) +
geom_tile() +
coord_polar() +
theme_bw()
To have all levels below the grade coded, you'll need to have all those lower levels within the dataframe, which is in a way redundant. Wouldn't it be?
d = transform(data, gr = as.numeric(factor(data$Grade, c("D", "C", "B", "A"))))
d = do.call(rbind, lapply(split(d, d$Skill), function(x){
foo = with(x, setNames(data.frame(Skill[1], Grade[1], seq(gr)), names(x)))
}))
library(ggplot2)
ggplot(d, aes(Skill, gr, fill = factor(gr, 4:1))) +
geom_col() +
coord_polar()

How to make frequency barplot in groups?

Suppose my data is two columns, one is "Condition", one is "Stars"
food <- data.frame(Condition = c("A", "B", "A", "B", "A"), Stars=c('good','meh','meh','meh','good'))
How to make a barplot of the frequency of "Star" as grouped by "Condition"?
I read here but would like to expand that answer to include groups.
for now I have
q <- ggplot(food, aes(x=Stars))
q + geom_bar(aes(y=..count../sum(..count..)))
but that is the proportion of the full data set.
How to make a plot with four bars, that is grouped by 'Condition'?
Eg. 'Condition A' would have 'Good' as 0.66 and 'Meh' as 0.33
I guess this is what you are looking for:
food <- data.frame(Condition = c("A", "B", "A", "B", "A"), Stars=c('good','meh','meh','meh','good'))
library(ggplot2)
library(dplyr)
data <- food %>% group_by(Stars,Condition) %>% summarize(n=n()) %>% mutate(freq=n/sum(n))
ggplot(data, aes(x=Stars, fill = Condition, group = Condition)) + geom_bar(aes(y=freq), stat="identity", position = "dodge")
At first i have calculated the frequencies using dplyr package, which is used as y argument in geom_bar(). Then i have used fill=Condition argument in ggplot() which divided the bars according to Condition. Additionally i have set position="dodge" to get the bars next to each other and stat="identity", due to already calculated frequencies.
I have used value ..prop.., aesthetic group and facet_wrap(). Using aesthetic group proportions are computed by groups. And facet_wrap() is used to plot each condition separately.
require(ggplot2)
food <- data.frame(Condition = c("A", "B", "A", "B", "A"),
Stars=c('good','meh','meh','meh','good'))
ggplot(food) +
geom_bar(aes(x = Stars, y = ..prop.., group = Condition)) +
facet_wrap(~ Condition)

How to make a pie chart with ggplot2?

I am trying to plot a pie chart using the following dataset
dt <- data.frame(name= c("A", "B", "C"),
one = sample(1:10, 3),
two= sample(1:10, 3),
three =sample(1:10, 3))
Of course the data are untidy, so I rearrange the dataset in a longitudinal form using
library(dplyr)
dt <- dt %>% gather("letter")
colnames(dt)[2] <- "number"
And I am perfectly able to plot a barchart
library(ggplot2)
ggplot(dt, aes(x=letter, y=value, fill=number)) +
geom_bar(stat="identity")
But when I apply the coord_polar() transformation, I can't make the slices look even nor make the pie-chart to sum up to 100%
ggplot(dt, aes(x=letter, y=value, fill=number)) +
geom_bar(stat="identity") +
coord_polar(theta = "x")

Resources