Here is the codes and the present outplot
df <- data.frame(state = c('0','1'),
male = c(26287942,9134784),
female = c(16234000,4406645))
#output
> df
state male female
1 0 26287942 16234000
2 1 9134784 4406645
library(ggplot2)
library(tidyr)
df_long <- pivot_longer(df, cols = c("female","male"))
names(df_long) <- c('state','sex','observations')
ggplot(data = df_long) +
geom_col(aes(x = sex, y =observations, fill = state)) +
theme(legend.position = c(0.1,0.9),
legend.background = element_rect(fill='lightgrey') )
I want to adjust the plots like this. (I marked what I want to change.)
Simplify the scientific records in y-axis.
Count the ratio (the number of state 1)/(the number of state 0 + state 1) and plot like this.
It may be a little complicated, and I don't know which functions to use. If possible, can anyone tell me some related functions or examples?
You can set options(scipen = 99) to disable scientific notation on y-axis. We can create a separate dataset for label data.
library(tidyverse)
options(scipen = 99)
long_data <- df %>%
pivot_longer(cols = c(male, female),
names_to = "sex",
values_to = "observations")
label_data <- long_data %>%
group_by(sex) %>%
summarise(perc = observations[match(1, state)]/sum(observations),
total = sum(observations), .groups = "drop")
ggplot(long_data) +
geom_col(aes(x = sex, y = observations, fill = state)) +
geom_text(data = label_data,
aes(label = round(perc, 2), x = sex, y = total),
vjust = -0.5) +
theme(legend.position = c(0.1,0.9),
legend.background = element_rect(fill='lightgrey'))
By searching the Internet for about two days, I have finished the work!
sex <- c('M','F')
y0 <- c(26287942,16234000)
y1 <- c(9134784, 4406645)
y0 <- y0*10^{-7}
y1 <- y1*10^{-7}
ratio <- y1/(y0+y1)
ratio <- round(ratio,2)
m <- t(matrix(c(y0,y1),ncol=2))
colnames(m) <- c(as.character(sex))
df <- as.data.frame(m)
df <- cbind(c('0','1'),df)
colnames(df)[1] <- 'observations'
df
df_long <- pivot_longer(df, cols = as.character(sex))
names(df_long) <- c('state','sex','observations')
df_r <- as.data.frame(df_long)
df_r <- data.frame(df_r,ratio=rep(ratio,2))
ggplot(data = df_r) +
geom_col(aes(x =sex, y = observations, fill = state))+
theme(legend.position = c(0.1,0.9),
legend.background = element_rect(fill=NULL) )+
geom_line(aes(x=sex,y=ratio*10),group=1)+
geom_point(aes(x=sex,y=ratio*10))+
geom_text(aes(x=sex,y=ratio*10+0.35),label=rep(ratio,2))+
scale_y_continuous(name =expression(paste('observations(','\u00D7', 10^7,')')),
sec.axis = sec_axis(~./10,name='ratio'))
The output:
Given two monthly time series data sample from this link.
I will need to create one plot containing 3 subplots: plot1 for the original values, plot2 for month over month changes, and plot3 for year over year changes.
I'm able to draw the plot with code below, but the code is too redundant. So my question is how could achieve that in a concise way? Thanks.
library(xlsx)
library(ggplot2)
library(reshape)
library(dplyr)
library(tidyverse)
library(lubridate)
library(cowplot)
library(patchwork)
df <- read.xlsx('./sample_data.xlsx', 'Sheet1')
colnames(df)
# df
cols <- c('food_index', 'energy_index')
df <- df %>% mutate(date=as.Date(date)) %>%
mutate(across(-contains('date'), as.numeric)) %>%
mutate(date= floor_date(date, 'month')) %>%
group_by(date) %>%
summarise_at(vars(cols), funs(mean(., na.rm=TRUE))) %>%
mutate(across(cols, list(yoy = ~(. - lag(., 12))/lag(., 12)))*100) %>%
mutate(across(cols, list(mom = ~(. - lag(., 1))/lag(., 1)))*100) %>%
filter(date >= '2018-01-01' & date <= '2021-12-31') %>%
as.data.frame()
df1 <- df %>%
select(!grep('mom|yoy', names(df)))
df1_long <- melt(df1, id.vars = 'date')
plot1 <- ggplot(df1_long[!is.na(df1_long$value), ],
aes(x = date,
y = value,
col = variable)) +
geom_line(size=0.6, alpha=0.5) +
geom_point(size=1, alpha=0.8) +
labs(
x='',
y='Unit: $'
)
# MoM changes
df2 <- df %>%
select(grep('date|mom', names(df)))
df2_long <- melt(df2, id.vars = 'date')
plot2 <- ggplot(df2_long[!is.na(df2_long$value), ],
aes(x = date,
y = value,
col = variable)) +
geom_line(size=0.6, alpha=0.5) +
geom_point(size=1, alpha=0.8) +
labs(
x='',
y='Unit: %'
)
# YoY changes
df3 <- df %>%
select(grep('date|yoy', names(df)))
df3_long <- melt(df3, id.vars = 'date')
plot3 <- ggplot(df3_long[!is.na(df3_long$value), ],
aes(x = date,
y = value,
col = variable)) +
geom_line(size=0.6, alpha=0.5) +
geom_point(size=1, alpha=0.8) +
labs(
x='',
y='Unit: %'
)
plot <- plot1 + plot2 + plot3 + plot_layout(ncol=1)
# plot <- plot_grid(plot1, plot2, plot3, labels = c('Value', 'MoM', 'YoY'), label_size = 12)
plot
Out:
The expected result will be similar to the plot below (the upper plot will display the original data, the middle plot will display the mom changes data, and the lower plot will display the yoy changes data):
References:
https://waterdata.usgs.gov/blog/beyond-basic-plotting/
http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/81-ggplot2-easy-way-to-mix-multiple-graphs-on-the-same-page/
Side-by-side plots with ggplot2
Maybe this is what you are looking for? By reshaping your data to the right shape, using a plot function and e.g. purrr::map2 you could achieve your desired result without duplicating your code like so.
Using some fake random example data to mimic your true data:
library(tidyr)
library(dplyr)
library(ggplot2)
df_long <- df |>
rename(food_index_raw = food_index, energy_index_raw = energy_index) |>
pivot_longer(-date, names_to = c("variable", ".value"), names_pattern = "^(.*?_index)_(.*)$")
plot_fun <- function(x, y, ylab) {
x <- x |>
select(date, variable, value = .data[[y]]) |>
filter(!is.na(value))
ggplot(
x,
aes(
x = date,
y = value,
col = variable
)
) +
geom_line(size = 0.6, alpha = 0.5) +
geom_point(size = 1, alpha = 0.8) +
labs(
x = "",
y = ylab
)
}
yvars <- c("raw", "mom", "yoy")
ylabs <- paste0("Unit: ", c("$", "%", "%"))
plots <- purrr::map2(yvars, ylabs, plot_fun, x = df_long)
library(patchwork)
wrap_plots(plots) + plot_layout(ncol = 1)
DATA
set.seed(123)
date <- seq.POSIXt(as.POSIXct("2017-01-31"), as.POSIXct("2022-12-31"), by = "month")
food_index <- runif(length(date))
energy_index <- runif(length(date))
df <- data.frame(date, food_index, energy_index)
EDIT Adding subtitles to each plot when using patchwork is (as of the moment) a bit tricky. What I would do in this case would be to use a faceting "hack". To this end I slightly adjusted the function to take a subtitle argument and switched to purrr::pmap:
library(tidyr)
library(dplyr)
library(ggplot2)
df_long <- df |>
rename(food_index_raw = food_index, energy_index_raw = energy_index) |>
pivot_longer(-date, names_to = c("variable", ".value"), names_pattern = "^(.*?_index)_(.*)$")
plot_fun <- function(x, y, ylab, subtitle) {
x <- x |>
select(date, variable, value = .data[[y]]) |>
filter(!is.na(value))
ggplot(
x,
aes(
x = date,
y = value,
col = variable
)
) +
geom_line(size = 0.6, alpha = 0.5) +
geom_point(size = 1, alpha = 0.8) +
facet_wrap(~.env$subtitle) +
labs(
x = "",
y = ylab
) +
theme(strip.background = element_blank(), strip.text.x = element_text(hjust = 0))
}
yvars <- c("raw", "mom", "yoy")
ylabs <- paste0("Unit: ", c("$", "%", "%"))
subtitle <- c("Original", "Month-to-Month", "Year-to-Year")
plots <- purrr::pmap(list(y = yvars, ylab = ylabs, subtitle = subtitle), plot_fun, x = df_long)
library(patchwork)
wrap_plots(plots) + plot_layout(ncol = 1)
The target output is done with facets rather than stitching plots together. You could do this too if you like, but it requires reshaping your data in a different way. Which approach you take is really a matter of taste.
library(ggplot2)
library(dplyr)
yoy <- function(x) 100 * (x - lag(x, 13)) / lag(x, 12)
mom <- function(x) 100 * (x - lag(x)) / lag(x)
df %>%
mutate(date = as.Date(date, origin = "1899-12-30"),
`Actual value (Dollars).Food Index` = food_index,
`Month-on-month change (%).Food Index` = mom(food_index),
`Year-on-year change (%).Food Index` = yoy(food_index),
`Actual value (Dollars).Energy Index` = energy_index,
`Month-on-month change (%).Energy Index` = mom(energy_index),
`Year-on-year change (%).Energy Index` = yoy(energy_index)) %>%
select(-food_index, -energy_index) %>%
tidyr::pivot_longer(-1) %>%
filter(date > as.Date("2018-01-01")) %>%
tidyr::separate(name, into = c("series", "index"), sep = "\\.") %>%
ggplot(aes(date, value, color = index)) +
geom_point(na.rm = TRUE) +
geom_line() +
facet_grid(series~., scales = "free_y") +
theme_bw(base_size = 16)
Reproducible data taken from link in question
df <- structure(list(date = c(42766, 42794, 42825, 42855, 42886, 42916,
42947, 42978, 43008, 43039, 43069, 43100, 43131, 43159, 43190,
43220, 43251, 43281, 43312, 43343, 43373, 43404, 43434, 43465,
43496, 43524, 43555, 43585, 43616, 43646, 43677, 43708, 43738,
43769, 43799, 43830, 43861, 43890, 43921, 43951, 43982, 44012,
44043, 44074, 44104, 44135, 44165, 44196, 44227, 44255, 44286,
44316, 44347, 44377, 44408, 44439, 44469, 44500, 44530, 44561
), food_index = c(58.53, 61.23, 55.32, 55.34, 61.73, 56.91, 54.27,
59.08, 60.11, 66.01, 60.11, 63.41, 69.8, 72.45, 81.11, 89.64,
88.64, 88.62, 98.27, 111.11, 129.39, 140.14, 143.44, 169.21,
177.39, 163.88, 135.07, 151.28, 172.81, 143.82, 162.13, 172.22,
176.67, 179.3, 157.27, 169.12, 192.51, 194.2, 179.4, 169.1, 193.17,
174.92, 181.92, 188.41, 192.14, 203.41, 194.19, 174.3, 174.86,
182.33, 182.82, 185.36, 192.41, 195.59, 202.6, 201.51, 225.01,
243.78, 270.67, 304.57), energy_index = c(127.36, 119.87, 120.96,
112.09, 112.19, 109.24, 109.56, 106.89, 109.35, 108.35, 112.39,
117.77, 119.52, 122.24, 120.91, 125.41, 129.72, 135.25, 139.33,
148.6, 169.62, 184.23, 204.38, 198.55, 189.29, 202.47, 220.23,
240.67, 263.12, 249.74, 240.84, 243.42, 261.2, 256.76, 258.69,
277.98, 289.63, 293.46, 310.81, 318.68, 310.04, 302.17, 298.62,
260.92, 269.29, 258.84, 241.68, 224.18, 216.36, 226.57, 235.98,
253.86, 267.37, 261.99, 273.37, 280.91, 291.84, 297.88, 292.78,
289.79)), row.names = c(NA, 60L), class = "data.frame")
Below the result of a R script:
This R code snippet is:
as.data.frame(y3) %>%
mutate(row = row_number()) %>% # add row to simplify next step
pivot_longer(-row) %>% # reshape long
ggplot(aes(value, color = name)) + # map x to value, color to name
geom_density()
How can I change the name of xlabel (value) and ylabel (density) and the legend also (v1, v2, v3, v4, v5)?
Update 1
By using the code snippet of #Park, I get no curves plotted:
as.data.frame(y3) %>%
mutate(row = row_number()) %>% # add row to simplify next step
pivot_longer(-row) %>% # reshape long
mutate(name = recode(name, V1="z = 0.9595", V2="z = 1.087", V3="z = 1.2395", V4="z = 1.45", V5="z = 1.688")) %>%
ggplot(aes(value, color = name)) + # map x to value, color to name
geom_density() +
xlab("Distribution of Ratio $b_{sp}/b_{ph}$ or each redshift") +
ylab("Number of occurences")
and the result:
I tried also to use subscript with Latex format : $b_{sp}/b_{ph}$ but without success.
You may try xlab, ylab, scale_color_manual,
as.data.frame(y3) %>%
mutate(row = row_number()) %>% # add row to simplify next step
pivot_longer(-row) %>% # reshape long
ggplot(aes(value, color = name)) + # map x to value, color to name
geom_density() +
xlab("text") +
ylab("text") +
scale_color_manual(labels = c("a", "b", "c", "d", "e"))
Recode before plot
as.data.frame(y3) %>%
mutate(row = row_number()) %>% # add row to simplify next step
pivot_longer(-row) %>% # reshape long
mutate(name = recode(name, V1 = "a", V2 = "b", V3 = "c", V4 = "d", V5 = "e")) %>%
ggplot(aes(value, color = name)) + # map x to value, color to name
geom_density() +
xlab("text") +
ylab("text")
Using Array_total_WITH_Shot_Noise data
my_data <- read.delim("D:/Prac/Array_total_WITH_Shot_Noise.txt", header = FALSE, sep = " ")
array_2D <- array(my_data)
z_ph <- c(0.9595, 1.087, 1.2395, 1.45, 1.688)
b_sp <- c(1.42904922, 1.52601862, 1.63866958, 1.78259615, 1.91956918)
b_ph <- c(sqrt(1+z_ph))
ratio_squared <- (b_sp/b_ph)^2
nRed <- 5
nRow <- NROW(my_data)
nSample_var <- 1000000
nSample_mc <- 1000
Cl<-my_data[,2:length(my_data)]#suppose cl=var(alm)
Cl_sp <- array(0, dim=c(nRow,nRed))
Cl_ph <- array(0, dim=c(nRow,nRed))
length(Cl)
for (i in 1:length(Cl)) {
#(shape/rate) convention :
Cl_sp[,i] <-(Cl[, i] * ratio_squared[i])
Cl_ph[,i] <- (Cl[, i])
}
L <- array_2D[,1]
L <- 2*(array_2D[,1])+1
# Weighted sum of Chi squared distribution
y3_1<-array(0,dim=c(nSample_var,nRed));y3_2<-array(0,dim=c(nSample_var,nRed));y3<-array(0,dim=c(nSample_var,nRed));
for (i in 1:nRed) {
for (j in 1:nRow) {
# Try to summing all the random variable
y3_1[,i] <- y3_1[,i] + Cl_sp[j,i] * rchisq(nSample_var,df=L[j])
y3_2[,i] <- y3_2[,i] + Cl_ph[j,i] * rchisq(nSample_var,df=L[j])
}
y3[,i] <- y3_1[,i]/y3_2[,i]
}
as.data.frame(y3) %>%
mutate(row = row_number()) %>% # add row to simplify next step
pivot_longer(-row) %>% # reshape long
mutate(name = recode(name, V1="z = 0.9595", V2="z = 1.087", V3="z = 1.2395", V4="z = 1.45", V5="z = 1.688")) %>%
ggplot(aes(value, color = name)) + # map x to value, color to name
geom_density() +
xlab(TeX("Distribution of Ratio $b_{sp}/b_{ph}$ or each redshift")) +
ylab("Number of occurences")
The following code produces a grid of plots:
library(tidyverse)
library(grid)
library(patchwork)
exdata <- diamonds %>%
group_by(cut) %>%
nest %>%
crossing(dummy = 1:3) %>%
crossing(cohort = LETTERS[1:3]) %>%
mutate(plots = map(.x = data, ~ ggplot(.x, aes(x = x, y = y)) + geom_point()))
# vars for plot grid
mod <- 'Fair'
colsn <- length(unique(exdata$dummy))
rowsn <- length(exdata$cohort %>% unique)
# create plots
wrap_plots(plotlist = exdata %>% filter(cut == mod) %>% pull(plots), ncol = colsn, nrow = rowsn) + plot_annotation(title = " ")
# add some text across columns
walk2(seq(0.165,0.835, length.out = colsn), unique(exdata$dummy), ~ grid.draw(textGrob(.y, x = .x, y = 0.98, rot = 0)))
Looks like this:
In that code block I have a variable mod <- 'Fair'. I would like to make this a vector and produce a grid for each type of cut in diamonds. Tried:
mods <- exdata$cut %>% unique %>% as.vector
walk(mods, function(.x) {
wrap_plots(plotlist = exdata %>% filter(cut == .x) %>% pull(plots), ncol = colsn, nrow = rowsn) + plot_annotation(title = " ")
walk2(seq(0.165,0.835, length.out = colsn), unique(exdata$dummy), ~ grid.draw(textGrob(.y, x = .x, y = 0.98, rot = 0)))
})
This code seems to run without error, but no plots all returned. The grid itself is, just no plots:
How can I run a nested walk loop to generate a grid of plots for each type of cut?
Just wrapping your plots in a print statement solves the issue
walk(mods, function(.x) {
print(wrap_plots(plotlist = exdata %>% filter(cut == .x) %>% pull(plots), ncol = colsn, nrow = rowsn) + plot_annotation(title = " "))
walk2(seq(0.165,0.835, length.out = colsn), unique(exdata$dummy), ~ grid.draw(textGrob(.y, x = .x, y = 0.98, rot = 0)))
})
Goal is to produce a visualization indicating ratio.
Please help us how can we produce such ratio chart (high lighted) in R ?
library(tidyverse)
# Dataset creation
df <- data.frame(cls = c(rep("A",4),rep("B",4)),
grd = c("A1",rep("A2",3),rep(c("B1","B2"), 2)),
typ = c(rep("m",2),rep("o",2),"m","n",rep("p",2)),
pnts = c(rep(1:4,2)))
df
#### Data wrangling
df1 <- df %>%
group_by(cls) %>%
summarise(cls_pct = sum(pnts))
df1
df2 <- df %>%
group_by(cls,grd) %>%
summarize(grd_pct = sum(pnts))
df2
df3 <- df %>%
group_by(cls,grd,typ) %>%
summarise(typ_pct = sum(pnts))
df3
#### Attempt to combine all df1,df2,df3
# but mutate and summarise are mixing up leading to wrong results
df3 %>%
group_by(cls,grd) %>%
mutate(grd_pct = sum(typ_pct)) %>%
group_by(cls) %>%
mutate(cls_pct = sum(grd_pct))
Attempt to visualize all the ratios in 1 chart
data %>%
pivot_longer(cols = -c(cls:pnts),
names_to = "per_cat",
values_to = "percent") %>%
ggplot(aes(cls,percent, col = typ, fill = grd)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_bw()
plot of the same.
EDIT -- added formula version with more useful output for visualization.
ORIG: At this point it may be worth making a function to reduce copying and pasting, but this may get you what you need:
library(tidyverse)
df %>%
group_by(cls) %>%
mutate(per1 = sum(pnts),
per1_pct = per1 / sum(per1)) %>%
group_by(cls, grd) %>%
mutate(per2 = sum(pnts),
per2_pct = per2 / sum(per2)) %>%
group_by(cls, grd, typ) %>%
mutate(per3 = sum(pnts),
per3_pct = per3 / sum(per3)) %>%
ungroup()
EDIT: Here's a general function to calculate the stats for a given grouping, making it easier to combine a few groupings together in long format better suited for visualization.
df_sum <- function(df, level, ...) {
df %>%
group_by(...) %>%
summarize(grp_ttl = sum(pnts)) %>%
mutate(ttl = sum(grp_ttl),
pct = grp_ttl / ttl) %>%
ungroup() %>%
mutate(level = {{ level }} )
}
df_sum(df, level = 1, cls) %>%
bind_rows(df_sum(df, level = 2, cls, grd)) %>%
bind_rows(df_sum(df, level = 3, cls, grd, typ)) %>%
mutate(label = coalesce(as.character(typ), # This grabs the first non-NA
as.character(grd),
as.character(cls))) -> df_summed
df_summed %>%
ggplot(aes(level, grp_ttl)) +
geom_col(color = "white") +
geom_text(aes(label = paste0(label, "\n", grp_ttl, "/", ttl)),
color = "white",
position = position_stack(vjust = 0.5)) +
scale_x_reverse() + # To make level 1 at the top
coord_flip() # To switch from vertical to horizontal orientation