How to rename the bins in ggplot in R - r

so basically I have created the bins and the have the means of each bin, having these two columns in a dataframe. Now I am plotting these two columns, but I want the exact number as x lable instead of bins. I am considering renaming each bin by its mid-point. please look at the pictures. The first one is my current plot and the second is the plot I want to acheive.
my current plot:
what I want to have:
my data frame is like this:

To reproduce the style of the plot image you included, you can do:
library(tidyverse)
df %>%
mutate(bin_group = gsub("\\(|\\]", "", bin_group)) %>%
separate(bin_group, sep = ",", into = c("lower", "upper")) %>%
mutate(across(lower:upper, as.numeric)) %>%
mutate(`Birth weight (g)` = (upper + lower) / 2) %>%
ggplot(aes(`Birth weight (g)`, mean_28_day_mortality)) +
geom_vline(xintercept = 1500) +
geom_point(shape = 18, size = 4) +
scale_x_continuous(labels = scales::comma) +
labs(title = "One-year mortality", y = NULL) +
theme_bw(base_family = "serif", base_size = 20) +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(color = "black", size = 0.5),
plot.title = element_text(hjust = 0.5))
Edit
To make the specific changes to the range use the limits argument in scale_x_continuous and scale_y_continuous you can do:
library(tidyverse)
df %>%
mutate(bin_group = gsub("\\(|\\]", "", bin_group)) %>%
separate(bin_group, sep = ",", into = c("lower", "upper")) %>%
mutate(across(lower:upper, as.numeric)) %>%
mutate(`Birth weight (g)` = (upper + lower) / 2) %>%
ggplot(aes(`Birth weight (g)`, mean_28_day_mortality)) +
geom_vline(xintercept = 1500) +
geom_point(shape = 18, size = 4) +
scale_x_continuous(labels = scales::comma, limits = c(1350, 1650),
breaks = seq(1350, 1650, 50)) +
scale_y_continuous(limits = c(0, 0.1), name = NULL) +
labs(title = "One-year mortality") +
theme_bw(base_family = "serif", base_size = 20) +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(color = "black", size = 0.5),
plot.title = element_text(hjust = 0.5))
Data used (obtained from image in question using OCR)
df <- structure(list(bin_group = structure(1:10,
levels = c("(1.35e+03,1.38e+03]",
"(1.38e+03,1.41e+03]", "(1.41e+03,1.44e+03]", "(1.44e+03,1.47e+03]",
"(1.47e+03,1.5e+03]", "(1.5e+03,1.53e+03]", "(1.53e+03,1.56e+03]",
"(1.56e+03,1.59e+03]", "(1.59e+03,1.62e+03]", "(1.62e+03,1.65e+03]"
), class = "factor"), mean_28_day_mortality = c(0.0563498, 0.04886257,
0.04467626, 0.04256053, 0.04248667, 0.04009187, 0.03625538, 0.03455094,
0.03349542, 0.02892909)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L))

If you have groups that (I assume) you made with cut, you could pull out the max and min and then calc the mean before you summarize and plot. Note that I made the regex pretty long because I don't personally know if cut always makes left or inclusive or exclusive.
library(tidyverse)
#example like yours
mtcars |>
mutate(grp = cut(hp, 10)) |>
group_by(grp) |>
summarise(mpg_mean = mean(mpg)) |>
ggplot(aes(grp, mpg_mean))+
geom_point()
#solution
mtcars |>
mutate(grp = cut(hp, 10)) |>
extract(grp,
into = c("min", "max"),
remove = FALSE,
regex = "(?:\\(|\\[)(.*),(.*)(?:\\)|\\])",
convert = TRUE) |>
mutate(mean_grp = (min + max)/2)|>
group_by(mean_grp) |>
summarise(mpg_mean = mean(mpg)) |>
ggplot(aes(mean_grp, mpg_mean))+
geom_point()
EDIT
here is another option if you just want to re-label and not actually transform the data:
lab_fun <- function(x){
str_split(x, ",") |>
map_dbl(~parse_number(.x)
|> mean())
}
mtcars |>
mutate(grp = cut(hp, 10)) |>
group_by(grp) |>
summarise(mpg_mean = mean(mpg)) |>
ggplot(aes(grp, mpg_mean))+
geom_point()+
scale_x_discrete(labels = lab_fun)

Related

Combining two heatmaps with the variables next to each other

I'm trying to combine two heatmaps. I want var_a and var_x on the y axis with for example: var_a first and then var_x. I don't know if I should do this by changing the dataframe or combining them, or if I can do this in ggplot.
Below I have some example code and a drawing of what I want (since I don't know if I explained it right).
I hope someone has ideas how I can do this either in the dataframe or in ggplot!
Example code:
df_one <- data.frame(
vars = c("var_a", "var_b", "var_c"),
corresponding_vars = c("var_x", "var_y", "var_z"),
expression_organ_1_vars = c(5, 10, 20),
expression_organ_2_vars = c(50, 2, 10),
expression_organ_3_vars = c(5, 10, 3)
)
df_one_long <- pivot_longer(df_one,
cols=3:5,
names_to = "tissueType",
values_to = "Expression")
expression.df_one <- ggplot(df_one_long,
mapping = aes(y=tissueType, x=vars, fill = Expression)) +
geom_tile()
expression.df_one
df_two <- data.frame(
corresponding_vars = c("var_x", "var_y", "var_z"),
expression_organ_1_corresponding_vars = c(100, 320, 120),
expression_organ_2_corresponding_vars = c(23, 30, 150),
expression_organ_3_corresponding_vars = c(89, 7, 200)
)
df_two_long <- pivot_longer(df_one,
cols=3:5,
names_to = "tissueType",
values_to = "Expression")
expression.df_two <- ggplot(df_two_long,
mapping = aes(y=tissueType, x=vars, fill = Expression)) +
geom_tile()
expression.df_two
Drawing:
You can bind your data frames together and pivot into a longer format so that vars and corresponding vars are in the same column, but retain a grouping variable to facet by:
df_two %>%
mutate(cor = corresponding_vars) %>%
rename_with(~sub('corresponding_', '', .x)) %>%
bind_rows(df_one %>% rename(cor = corresponding_vars)) %>%
pivot_longer(contains('expression'), names_to = 'organ') %>%
mutate(organ = gsub('expression_|_vars', '', organ)) %>%
group_by(cor) %>%
summarize(vars = vars, organ = organ, value = value,
cor = paste(sort(unique(vars)), collapse = ' cor ')) %>%
ggplot(aes(vars, organ, fill = value)) +
geom_tile(color = 'white', linewidth = 1) +
facet_grid(.~cor, scales = 'free_x', switch = 'x') +
scale_fill_viridis_c() +
coord_cartesian(clip = 'off') +
scale_x_discrete(expand = c(0, 0)) +
theme_minimal(base_size = 16) +
theme(strip.placement = 'outside',
axis.text.x = element_blank(),
axis.ticks.x.bottom = element_line(),
panel.spacing.x = unit(3, 'mm'))
Okay, so I solved the issue for my own project, which is to convert it to a scatter plot. I combined both datasets and then used a simple scatterplot.
df.combined <- dplyr::full_join(df_two_long, df_one_long,
by = c("vars", "corresponding_vars", "tissueType"))
ggplot(df.combined,
aes(x=vars, y=tissueType, colour=Expression.x, size = Expression.y)) +
geom_point()
It's not a solution with heatmaps, but I don't know how to do that at the moment.

where to pass argument to radiate margin labels of a polar heatmap

This question builds on from here:
Drawing a polar heatmap
> dput(names.d)
c("0050773", "0050774", "0050775", "0050776", "0050777", "0050778",
"0050779", "0050780", "0050781", "0050782", "0050783", "0050784",
"0050785", "0050786", "0050787", "0050788", "0050789", "0050790",
"0050808", "0050809", "0050810", "0050811", "0050812", "0050813",
"0050814", "0050818", "0050819", "0050820", "0050821", "0050822"
)
Based on this, I have come up with the following code:
set.seed(20220913)
arr <- matrix(runif(15*30), nrow = 30)
dff <- as.data.frame(arr)
names(dff) <- paste(sample(letters, replace = F), sample(letters, replace = F), sep = " ")[1:15]
library(tidyverse)
dff %>%
mutate(Site = seq(nrow(.))) %>%
pivot_longer(-Site, names_to = 'Species', values_to = 'Abundance') %>%
mutate(yval = match(Species, colnames(dff))) %>%
ggplot(aes(Site, yval, fill = Abundance)) +
geom_tile(color = "black") +
geom_text(aes(label = colnames(dff)), hjust = 1.1, size = 3,
data = data.frame(Site = 31.5, yval = 1:15, Abundance = 1)) +
coord_polar() +
scale_y_continuous(limits = c(-5, 15.5)) +
scale_x_continuous(limits = c(0.5, 31.5), breaks = 1:30, labels = names.d,
name = 'Breeding site') +
scale_fill_gradientn(colors = colorRampPalette(RColorBrewer::brewer.pal(name = "YlOrRd", n = 9))(25), values = 0:1, labels = scales::percent)+
theme_void(base_size = 16) +
theme(axis.text.x = element_text(size = 12),
axis.title.x = element_text())
which gives me the following figure:
Which is great, but I would like the labels on the rim of the figure to radiate out (or be tangent, for that matter). So, I wrote the angles as:
ang <- 1:30/31.5*360
However, I can not see where to pass this argument. Looking around, it would normally be in the aes function, but there the labels are for the y-axis in the figure (before being changed to the polar coordinates), and what I am wanting rotated should be in the x-axis. So, how do I do this? Thanks for any suggestions!
You can add this in the axis.text.x = element_text() :
ang <- 90 - (1:30/31.5*360)
dff %>%
mutate(Site = seq(nrow(.))) %>%
pivot_longer(-Site, names_to = 'Species', values_to = 'Abundance') %>%
mutate(yval = match(Species, colnames(dff))) %>%
ggplot(aes(Site, yval, fill = Abundance)) +
geom_tile(color = "black") +
geom_text(aes(label = colnames(dff)), hjust = 1.1, size = 3,
data = data.frame(Site = 31.5, yval = 1:15, Abundance = 1)) +
coord_polar() +
scale_y_continuous(limits = c(-5, 15.5)) +
scale_x_continuous(limits = c(0.5, 31.5), breaks = 1:30, labels = names.d,
name = 'Breeding site') +
scale_fill_gradientn(colors = colorRampPalette(RColorBrewer::brewer.pal(name = "YlOrRd", n = 9))(25), values = 0:1, labels = scales::percent)+
theme_void(base_size = 16) +
theme(axis.text.x = element_text(size = 12, angle = ang),
axis.title.x = element_text())

How to make A racing Bar Chart in R

I have a dataset that has a column of years from 1965 to 2020 and Teams that have won the championship in the respective years.
I am trying to create a racing bar chart and so far I have been struggling to create the required dataset to create the animated GIF
df1 <- df %>%
group_by(Team) %>%
mutate(cups = 1:n()) %>%
ungroup() %>%
group_by(Year) %>% spread(Year, cups) %>%
replace(is.na(.),0)
which brings a result of the following format.
Kindly assist in how I should go about completing this racing bar chart as I have browsed through several resources but I still cant seem to crack it..
Check if this work, as Jon mentioned you need to pivot your data using pinot_longer
df1 <- pivot_longer(df, -1, names_to = 'Year') %>%
rename(Team= ï..Team) %>%
mutate(Year = as.numeric(substr(Year, 2, 5)))
Then this should create the racing barchart"
df1 <- df1 %>%
group_by(Year) %>%
# The * 1 makes it possible to have non-integer ranks while sliding
mutate(rank = min_rank(-value) * 1,
Value_rel = value/value[rank==1],
Value_lbl = paste0(" ",value)) %>%
filter(rank <=10) %>% # This would show the top 10 teams
ungroup()
p <- ggplot(df1, aes(rank, group = Team,
fill = as.factor(Team), color = as.factor(Team))) +
geom_tile(aes(y = value/2,
height = value,
width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = 0, label = paste(Team, " ")), vjust = 0.2, hjust = 2) +
geom_text(aes(y=value,label = Value_lbl, hjust=0)) +
coord_flip(clip = "off", expand = FALSE) +
scale_y_continuous(labels = scales::comma) +
scale_x_reverse() +
guides(color = FALSE, fill = FALSE) +
labs(title='{closest_state}', x = "", y = "Your Title",
caption = "Your Caption") +
theme(plot.title = element_text(hjust = 0, size = 22),
axis.ticks.y = element_blank(), # These relate to the axes post-flip
axis.text.y = element_blank(), # These relate to the axes post-flip
plot.margin = margin(1,1,1,4, "cm")) +
transition_states(Year, transition_length = 4, state_length = 1) +
ease_aes('cubic-in-out')
animate(p, 200, fps = 10, duration = 40, width = 800, height = 600, renderer = gifski_renderer("gganim.gif"))
anim_save("YourPath//Name.gif")

Display totals and means next to chart in ggplot

I am trying to get my mean and totals to the left of my bar chart like in the example above.
I have my bar charts created in ggplot. Picture and code below. Any advice on how to get the means and totals to display in the right place? Possibly custom_annotations?
Thanks
percentData = stotal %>% #stotal = survey data frame
group_by(qtext, div, response) %>% #qtext is my question text
summarise(N = n()) %>%
mutate(prop = N/sum(N))
percentData$prop = label_percent(accuracy = 1)(percentData$prop) #make percent from decimal
percentData
#colors
myColors <- c("green4","springgreen2","yellow1","orange1","red1","black", "black", "black")
ggplot(stotal)+
geom_bar(aes(x = div, fill = response), position = 'fill', width = 0.5)+
facet_grid(rows = vars(qtext))+
scale_fill_manual (values = myColors)+
coord_flip()+
ylab('')+
xlab('')+
scale_y_continuous(labels = percent)+
ggtitle(i)+
geom_text(data = percentData, aes(fill = response, y = N, label = prop, x = div),
position=position_fill(vjust=0.5))+
theme(strip.text.y = element_text(size = 12, angle = 0, family = "serif"))
One approach to achieve this is by making the table via a second ggplot which can be glued to the main plot by e.g. patchwork. Basically the table plot replicates the main plot with only one category, uses facetting to get the colum layout with the mean and the totals and gets rids of axis, grid, background colors, ...
Using some random example data try this:
library(ggplot2)
library(dplyr)
library(scales)
library(patchwork)
# Random example data
set.seed(42)
stotal <- data.frame(
qtext = rep(c("A", "B"), 50),
div = sample(c("University", "KSAS-HUM"), 100, replace = TRUE),
response = sample(c("Poor", "Fair", "Good", "Very good", "Excellent"), 100, replace = TRUE)
)
stotal$response <- factor(stotal$response, levels = c("Poor", "Fair", "Good", "Very good", "Excellent"))
percentData = stotal %>% #stotal = survey data frame
group_by(qtext, div, response) %>% #qtext is my question text
summarise(N = n()) %>%
mutate(prop = N/sum(N))
#> `summarise()` regrouping output by 'qtext', 'div' (override with `.groups` argument)
percentData$prop = label_percent(accuracy = 1)(percentData$prop) #make percent from decimal
#colors
myColors <- c("green4","springgreen2","yellow1","orange1","red1","black", "black", "black")
p1 <- ggplot(stotal)+
geom_bar(aes(x = div, fill = response), position = 'fill', width = 0.5)+
facet_grid(rows = vars(qtext))+
scale_fill_manual (values = myColors)+
coord_flip()+
ylab('')+
xlab('')+
scale_y_continuous(labels = percent)+
#ggtitle(i)+
geom_text(data = percentData, aes(fill = response, y = N, label = prop, x = div),
position=position_fill(vjust=0.5))+
theme(strip.text.y = element_text(size = 12, angle = 0, family = "serif"))
#> Warning: Ignoring unknown aesthetics: fill
# Table plot
table_data <- stotal %>%
mutate(response = as.numeric(response)) %>%
group_by(qtext, div) %>%
summarise(Mean = mean(response), "Total N" = n()) %>%
mutate(Mean = round(Mean, 1)) %>%
tidyr::pivot_longer(-c(qtext, div), names_to = "var")
#> `summarise()` regrouping output by 'qtext' (override with `.groups` argument)
p2 <- ggplot(table_data, aes(x = div)) +
geom_bar(color = "white", fill = "white", position = 'fill', width = .5)+
#geom_vline(color = "grey", xintercept = c(.5, 1.5, 2.5)) +
geom_text(aes(y = 1, label = value), position=position_fill(vjust=0.5), size = 0.8 * 11 /.pt) +
facet_grid(qtext ~ var, switch = "y") +
coord_flip() +
labs(x = NULL, y = NULL) +
theme_minimal() +
theme(strip.text.y = element_blank()) +
theme(axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(color = "transparent"),
axis.ticks.x = element_line(color = "transparent"),
axis.title = element_blank(),
panel.grid = element_blank(), panel.spacing.x = unit(0, "pt"))
# Glue together
p2 + p1 + plot_layout(widths = c(1, 3))

Text color with geom_label_repel

Not specific to any particular piece of code, is there a relatively straightforward way to change the color of the text in a geom_label_repel box?
Specifically, I have code that produces the below chart
The percentage in the label box is the percent change in 7-day moving average for the most recent week over the week prior. I'd simply like to color the text red when the value is positive and green when it is negative.
The dataframe for this chart can be copied from here.
The plot code is
#endpoint layer
BaseEndpoints <- smDailyBaseData %>% filter(Base %in% AFMCbases) %>%
group_by(Base) %>%
filter(DaysSince == max(DaysSince)) %>%
select(Base, abbv, DaysSince, newRate,label) %>%
ungroup()
ZoomEndpoints <- BaseEndpoints %>% filter(Base != 'Edwards') %>%
mutate(zoom = TRUE)
CAEndPoint <- BaseEndpoints %>% filter(Base == 'Edwards') %>%
mutate(zoom = FALSE)
ZoomEndpoints <- rbind(ZoomEndpoints, CAEndPoint)
BasePlot <- smDailyBaseData %>% filter(Base %in% AFMCbases) %>%
ggplot(mapping = aes(x = as.numeric(DaysSince), y = newRate)) +
geom_line(aes(color=abbv),show.legend = FALSE) +
scale_color_ucscgb() +
geom_point(data = BaseEndpoints,size = 1.5,shape = 21,
aes(color = abbv,fill = abbv), show.legend = FALSE) +
geom_label_repel(data=ZoomEndpoints, aes(label=label), show.legend = FALSE,
vjust = 0, xlim=c(105,200), size=3, direction='y') +
labs(x = "Days Since First Confirmed Case",
y = "% Local Population Infected Daily") +
theme(plot.title = element_text(size = rel(1), face = "bold"),
plot.subtitle = element_text(size = rel(0.7)),
plot.caption = element_text(size = rel(1))) +
facet_zoom(xlim = c(50,120), ylim=c(0,0.011),zoom.data=zoom)
print(BasePlot)
Yes, it's as simple as this:
library(ggplot2)
df <- data.frame(x = c(-1, -1, 1, 1), y = c(-1, 1, 1, -1), value = c(-2, -1, 1, 2))
ggplot(df, aes(x, y)) +
geom_point(size = 3) +
ggrepel::geom_label_repel(aes(label = value, colour = factor(sign(value)))) +
lims(x = c(-100, 100), y = c(-100, 100)) +
scale_colour_manual(values = c("red", "forestgreen"))
EDIT
Now we have a more concrete example, I can see the problem more clearly. There are workarounds such as using ggnewscale or a hand-crafted solution such as Ian Campbell's thorough example. Personally, I would just note that you haven't used the fill scale yet, and this looks pretty good to my eye:
Here's a bit of a hacky solution since you can't have two scale_color_*'s at the same time:
The approach centers on manually assigning the color outside of aes in the geom_label_repel call. Adding one to the grepl result that searches for the minus sign in the label allows you to subset the two colors. You need two colors for each label, I assume for the box and for the text, so I used rep.
smDailyBaseData %>%
ggplot(mapping = aes(x = as.numeric(DaysSince), y = newRate)) +
geom_line(aes(color=abbv),show.legend = FALSE) +
scale_color_ucscgb() +
geom_point(data = BaseEndpoints,size = 1.5,shape = 21,
aes(color = abbv,fill = abbv), show.legend = FALSE) +
geom_label_repel(data=ZoomEndpoints, aes(label=label),
color = rep(c("green","red")[1+grepl("\\-\\d",as.factor(ZoomEndpoints$label))],times = 2),
show.legend = FALSE, vjust = 0, xlim=c(105,200),
size=3, direction='y') +
labs(x = "Days Since First Confirmed Case",
y = "% Local Population Infected Daily") +
theme(plot.title = element_text(size = rel(1), face = "bold"),
plot.subtitle = element_text(size = rel(0.7)),
plot.caption = element_text(size = rel(1))) +
facet_zoom(xlim = c(50,120), ylim=c(0,0.011),zoom.data=zoom)
Data Setup
#source("https://pastebin.com/raw/Vn2abQ4a")
BaseEndpoints <- smDailyBaseData %>%
group_by(Base) %>%
dplyr::filter(DaysSince == max(DaysSince)) %>%
dplyr::select(Base, abbv, DaysSince, newRate,label) %>%
ungroup()
ZoomEndpoints <- BaseEndpoints %>% filter(Base != 'Edwards') %>%
mutate(zoom = TRUE)
CAEndPoint <- BaseEndpoints %>% filter(Base == 'Edwards') %>%
mutate(zoom = FALSE)
ZoomEndpoints <- rbind(ZoomEndpoints, CAEndPoint)

Resources