I have dataset below and would like to have the results in the picture attached using ggplot.
bar_df
class boys girls
a 20 30
b 40 30
c 50 50
d 30 70
I tried code below
bar_df <- melt(bar_df, id.vars = 'class')
ggplot(bar_df,aes(x = variable,y = value)) +
geom_bar(aes(fill = variable),stat = "identity",position = "dodge")
Another option is to use pivot_longer from tidyverse.
library(tidyverse)
df %>%
pivot_longer(-class) %>%
ggplot(aes(fill=name, y=value, x=class)) +
geom_bar(position="dodge", stat="identity")
Or with melt (as shown by #r2evans):
melt(df, id.vars = 'class') %>%
ggplot(aes(y=value, x=class)) +
geom_bar(aes(fill = variable),stat = "identity",position = "dodge")
Output
Data
df <-
structure(list(
class = c("a", "b", "c", "d"),
boys = c(20L, 40L,
50L, 30L),
girls = c(30L, 30L, 50L, 70L)
),
class = "data.frame",
row.names = c(NA,-4L))
Related
I have a dataset containing y variable as Year and x variables as (A, B, C(%)). I have attached the dataset here.
dput(result)
structure(list(Year = 2008:2021, A = c(4L, 22L, 31L, 48L, 54L,
61L, 49L, 56L, 59L, 85L, 72L, 58L, 92L, 89L), B = c(1L, 2L, 6L,
7L, 14L, 21L, 15L, 27L, 27L, 46L, 41L, 26L, 51L, 62L), C... = c(25,
9.09, 19.35, 14.58, 25.93, 34.43, 30.61, 48.21, 45.76, 54.12,
56.94, 44.83, 55.43, 69.66)), class = "data.frame", row.names = c(NA,
-14L))
The variables A and B will be plotted as stacked bar graph and the C will be plotted as line chart in the same plot. I have generated the plot using excel like below:
How can I create the same plot in R?
You first need to reshape longer, for example with pivot_longer() from tidyr, and then you can use ggplot2 to plot the bars and the line in two separate layers. The fill = argument in the geom_bar(aes()) lets you stratify each bar according to a categorical variable - name is created automatically by pivot_longer().
library(ggplot2)
library(tidyr)
dat |>
pivot_longer(A:B) |>
ggplot(aes(x = Year)) +
geom_bar(stat = "identity", aes(y = value, fill = name)) +
geom_line(aes(y = `C(%)`), size = 2)
Created on 2022-06-09 by the reprex package (v2.0.1)
You're asking for overlaid bars, in which case there's no need to pivot, and you can add separate layers. However I would argue that this could confuse or mislead many people - usually in stacked plots bars are stacked, not overlaid, so thread with caution!
library(ggplot2)
library(tidyr)
dat |>
ggplot(aes(x = Year)) +
geom_bar(stat = "identity", aes(y = A), fill = "lightgreen") +
geom_bar(stat = "identity", aes(y = B), fill = "red", alpha = 0.5) +
geom_line(aes(y = `C(%)`), size = 2) +
labs(y = "", caption = "NB: bars are overlaid, not stacked!")
Created on 2022-06-09 by the reprex package (v2.0.1)
I propose this:
library(data.table)
library(ggplot2)
library(ggthemes)
dt <- fread("dataset.csv")
dt.long <- melt(dt, id.vars = c("Year"))
dt.AB <- dt.long[variable %in% c("A", "B"), ]
dt.C <- copy(dt.long[variable == "C(%)", .(Year, variable, value = value * 3/2)])
ggplot(dt.AB, aes(x = Year, y = value, fill = variable), ) +
geom_bar(stat = "identity") +
geom_line(data=dt.C, colour='red', aes(x = Year, y = value)) +
scale_x_continuous(breaks = pretty(dt.AB$Year,
n = length(unique(dt.AB$Year)))) +
scale_y_continuous(
name = "A&B",
breaks = seq (0, 150, 10),
sec.axis = sec_axis(~.*2/3, name="C(%)", breaks = seq (0, 100, 10))
) + theme_hc() +
scale_fill_manual(values=c("grey70", "grey50", "grey30")) +
theme(
axis.line.y = element_line(colour = 'black', size=0.5,
linetype='solid'))
I have a dataset that looks something like this:
a b a_total b_total
dog 3 5 10 8
cat 6 2 12 13
pig 9 3 15 9
I'm trying to make a stacked barplot using ggplot to have "a_total" on the bottom and "a" on top for each animal. I tried this but doesn't work.
ggplot(df, aes(x = "", y = c("a", "a_total")) + geom_bar(stat= "identity")
How should I go about this?
We create a column from the row names (rownames_to_column), select the 'a' columns along with the new column, reshape to 'long' format (pivot_longer) and do the plotting
library(dplyr)
library(tidyr)
library(ggplot2)
df %>%
rownames_to_column('animal') %>%
select(animal, a, a_total) %>%
pivot_longer(cols = -animal) %>%
ggplot(aes(x = animal, y = value, fill = name)) +
geom_bar(stat = 'identity') +
theme_bw()
-output
Also, this can be done for both 'a' and 'b' in a facet_wrap
df %>%
rownames_to_column('animal') %>%
pivot_longer(cols = -animal) %>%
mutate(abgrp = substr(name, 1, 1)) %>%
ggplot(aes(x = animal, y = value, fill = name)) +
geom_bar(stat = 'identity') +
theme_bw() +
facet_wrap(~ abgrp)
In base R, we can use barplot
barplot(t(df[c('a', 'a_total')]), col = c('red', 'blue'), legend = TRUE)
data
df <- structure(list(a = c(3L, 6L, 9L), b = c(5L, 2L, 3L), a_total = c(10L,
12L, 15L), b_total = c(8L, 13L, 9L)), class = "data.frame", row.names = c("dog",
"cat", "pig"))
I have a time series data with multiple variables measured in different units. it is daily data. The data is as below. (Example data)
structure(list(date = structure(18324:18329, class = "Date"),
x = c(-1805605.65336663, -217934.802608961, -1032002.23625031, 234816.624919304, 1321982.20108174, 104251.623282941), y = c(0.633729348424822, 0.244916933588684, 0.873351667076349, 0.552934182109311, 0.348864572821185, 0.197756679030135), z = c(3L, 5L, 5L, 6L, 5L, 6L)), class = "data.frame", row.names = c(NA, -6L
))
Suppose X is measured in Rs Billion, Y is a ratio between 0 and 1, and Z is a count variable. I want to plot all these variables over the time period in multiple graphs ( preferably using facet_wrap)
You can use the following code
library(tidyverse)
library(lubridate)
df %>%
dplyr::mutate(date = ymd(date)) %>%
gather(key = "key", value = "value",-date) %>%
ggplot(aes(x=date, y=value)) + geom_line() + facet_wrap("key", scales = "free")
Update
df %>%
dplyr::mutate(date = ymd(date)) %>%
gather(key = "key", value = "value",-date) %>%
ggplot(aes(x=date, y=value)) + geom_line() + theme_bw() +
facet_wrap(~key, scales = "free_y", ncol = 1,
strip.position = "left",
labeller=as_labeller(c(x = "Rs Billion", y = "Ratio", z = "Count variable (n)"))) +
ylab(NULL) +xlab("Date")+
theme(strip.background = element_blank(),
strip.placement = "outside")
If I plot df with the code below, I can put the n for each column over the column itself, as seen in this example plot. What I would like to do is also put the percentage for each column in the label. That is the percentage of the total that the column makes up. So, for example, the label on the first column would read 127(42.9%), instead of just 127. How could I do that?
df <- structure(list(Letter = structure(1:7,
.Label = c("A", "B", "C", "D", "E", "F", "G"),
class = "factor"), Freq = c(127L, 101L, 24L, 19L, 3L, 0L, 22L)),
.Names = c("Letter", "Freq"),
row.names = c(NA, -7L),
class = "data.frame")
ggplot(df, aes(Letter, Freq, label = Freq)) +
geom_col() +
geom_text(size = 3, position = position_dodge(width = 1), vjust = -0.25)
Just create the text you want to use as a label.
df$pct = df$Freq / sum(df$Freq) * 100
df$label = sprintf("%s (%s%%)", df$Freq, round(df$pct, 1))
ggplot(df, aes(Letter, Freq, label = label)) +
geom_col() +
geom_text(size = 3, position = position_dodge(width = 1), vjust = -0.25)
I am plotting yearly demand using ggplot (my code below) but I am not able to put color legend for the plot. My data.frame has "Zone" and "TotalDemand" (only 2 columns) and I have three data.frames for three years ("sales12", "sales13" and "sales14").
ggplot() +
geom_point(data=sales12, aes(x=factor(Zone), y=TotalDemand/1000),
color='green',size=6, shape=17) +
geom_point(data=sales13, aes(x=factor(Zone), y=TotalDemand/1000),
color='red',size=6, shape=18)+
geom_point(data=sales14, aes(x=factor(Zone), y=TotalDemand/1000),
color='black',size=4, shape=19) +
labs(y='Demand (in 1000s)',x='Zones') +
scale_colour_manual(name = 'the colour',
values = c('green'='green', 'black'='black', 'red'='red'),
labels = c('12','13','14'))
Please help me to identify my mistake.
With a very small example data frame, df, I melted it to format it for ggplot.
dput(df)
structure(list(Zone = structure(1:4, .Label = c("Alpha", "Baker",
"Charlie", "Delta"), class = "factor"), TotalDemand = c(90L,
180L, 57L, 159L), sales12 = c(25L, 40L, 13L, 50L), sales13 = c(30L,
60L, 16L, 55L), sales14 = c(35L, 80L, 28L, 54L)), .Names = c("Zone",
"TotalDemand", "sales12", "sales13", "sales14"), class = "data.frame", row.names = c(NA,
-4L))
df.m <- melt(df, id.vars = "Zone", measure.vars = c("sales12", "sales13", "sales14"))
ggplot(df.m, aes(x=factor(Zone), y=value, color = variable )) +
geom_point(size=6, shape=17) +
labs(y='Demand (in 1000s)',x='Zones') +
scale_colour_manual(values = c('green', 'black', 'red'))
You can adjust size and shape and colors of your points, add a title, etc.. Your legend can also be positioned on the bottom, for example.