R: ggplot2 geom_text by condition (for selected) - r

I need to copy this bar chart
The problem is that I don't know how to make red and blue labels on the bars and save grey for others. According to the task red labels should highlight whether the character is included in Golden Trio (Harry, Hermione Granger, Ron Weasley). So their bars should contain this red label and others characters should have grey labels.
My code is here:
ggplot(data = actions, aes(x = names_ordered, y = tot)) + geom_bar(stat = "identity",color="grey85", fill="grey90") +
labs(title = "Character aggression \nHarry Potter Universe", caption = "pmsar2020_hw1_shivarova") + theme_classic() +
geom_text(aes(label = tot), size = 3, vjust = 2, colour = "grey30") + theme(plot.title = element_text(hjust = 0.5),
plot.caption.position = "plot",
plot.caption = element_text(hjust = 1,
face = "italic",
color = 'grey60'),
axis.title.x=element_blank(),
axis.title.y =element_blank())

One option would be to add a column to your dataset with the desired color for each category. For the assignment you could e.g. use dplyr::case_when. Afterwards you could map this new column on the color aes and use scale_color_identity:
Using some fake data to mimic your real dataset:
actions <- data.frame(
names_ordered = LETTERS[1:8],
tot = 8:1
)
library(ggplot2)
library(dplyr)
actions$color <- dplyr::case_when(
actions$names_ordered %in% c("A", "C", "D") ~ "darkred",
actions$names_ordered %in% c("E") ~ "blue",
TRUE ~ "grey30"
)
ggplot(data = actions, aes(x = names_ordered, y = tot)) +
geom_bar(stat = "identity", color = "grey85", fill = "grey90") +
labs(title = "Character aggression \nHarry Potter Universe", caption = "pmsar2020_hw1_shivarova") +
theme_classic() +
geom_text(aes(label = tot, color = color), size = 3, vjust = 2) +
scale_color_identity() +
theme(
plot.title = element_text(hjust = 0.5),
plot.caption.position = "plot",
plot.caption = element_text(
hjust = 1,
face = "italic",
color = "grey60"
),
axis.title.x = element_blank(),
axis.title.y = element_blank()
)

Related

Color dataset by group and add geom_vline to legend only

I have a genome-wide dataset that I'm trying to plot in the following way:
Have each chromosome be a separate color
Have specific windows highlighted by a bar (I'm using geom_vline) - this I'm getting from a
separate table
Have only geom_vline feature in the legend
I have tried many different things, but it seems I cannot have all three together!
Here is the link to both datasets:
allStats & allStats_fstPi_group15
With this code, I can have the first 2, but not the 3rd:
ggplot(allStats, aes(x = mid2, y = Fst_group1_group5,
color = as_factor(scaffold))) +
geom_point(size = 2) +
geom_vline(xintercept = chrom$add, color = "grey") +
scale_y_continuous(expand = c(0,0), limits = c(0, 1)) +
scale_x_continuous(labels = chrom$chrID, breaks = axis_set$center) +
scale_color_manual(values = rep(c("#276FBF", "#183059"), unique(length(chrom$chrID)))) +
scale_size_continuous(range = c(0.5,3)) +
labs(x = NULL,
y = "Fst SBM vs OC") +
theme_minimal() +
theme(
legend.position = "none",
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
axis.title.y = element_text(),
axis.text.x = element_text()) +
geom_vline(data = allStats_fstPi_group15,
aes(xintercept = allStats_fstPi_group15$mid2),
color = "orange", show.legend = T)
With this one I can get 2 and 3 only (I'm not able to color code each block separately):
cols <- c("SBM vs OC" = rep(c("#276FBF", "#183059"), unique(length(chrom$chrID))),
"90th percentile (Fst vs Pi)" = "orange")
ggplot(allStats, aes(x = mid2)) +
geom_point(aes(y = Fst_group1_group5,
color = as_factor(scaffold)),
size = 2) +
geom_vline(data = allStats_fstPi_group15,
aes(xintercept = allStats_fstPi_group15$mid2,
color = "90th percentile (Fst vs Pi)")) +
scale_color_manual(values = cols)
I've seen the issue with the legend being that color needs to be within aes(), so my question is: is it impossible what I'm trying to do?

how to remove a break in geom_line

Using the code below, I have created my plot of interest. The only issue is the break between the brown line the rest of the four lines (Year=205). How can I solve this by joining the brown line to the other four lines?
Thanks,
Nader
UN_2010_plot <- ggplot()+
geom_line(aes(x =Year, y =Population , group=Variant, colour = Variant), data = UN_2010)+
ggrepel::geom_text_repel(aes(x =Year+10, y = Population, colour = Variant, label = Variant, fontface = 'bold'), data = UN_2010 %>%
filter(Year == max(Year)),
segment.color = 'transparent',
direction = "y",
size = 3,
box.padding = 0,
force = 0
) +
theme_bw() +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
strip.background = element_rect(
color="white", fill="white", size=1.5, linetype="solid"
))+
theme(legend.position = "none") +
scale_x_continuous(breaks=seq(1950,2100,10))+
scale_y_continuous(breaks=seq(10000,150000,10000))+
coord_cartesian(ylim = c(10000, 150000))+
labs(
x = NULL,
y = "Population (thousands)",
caption = (NULL),
face = "bold"
) +
ggtitle("The 2010 Revision") +
theme(plot.title = element_text(hjust = 0.5))+
theme(axis.text.x = element_text(angle = -45, vjust = 0.5, hjust=0))
UN_2010_plot
As stefan already mentioned. Here is the psuedo code that fixed your data which would connect the ending lines with starting line.
additional_data <- tibble(Year = rep(2010, 4),
Population = rep(UN_2010$Population[Year == "2010"], 4),
Variant <- c("Low", "Medium", "Constant", "High"))
UN_2010_new <- bind_rows(UN_2010, additional_data) %>% arrange(Year, Variant)

In R, ggplot for a population pyramid: how to align labels near to the axis with geom_bar geom_label after flipping the coordinates

I am making a sort of population pyramid using ggplot (plotrix doesn't allow me to do fancy labels etc), then I start with a geom_bar with labels and later I flip the coordinates. Sadly, labels almost cannot being seeing. I would like to move those labels near to the "y- axis" in the middle, that now is showing the age groups.
Data is here: d <- data.frame(age.grp2 = c("1-10", "11-20", "21-30", "31-40", "41-50", "1-10", "11-20", "21-30", "31-40", "41-50"),
sex = c("Female","Female","Female","Female","Female","Male","Male","Male","Male","Male" ),
n.enroll = c(288,500,400,300,200,300,460,300,200,300),
proportion = c(17.1,29.6,23.7,17.8,11.8,51,47.9,42.9,40,60),
proportion2 = c(-17.1,-29.6,-23.7,-17.8,-11.8,51,47.9,42.9,40,60)) My code is this one: ggplot(d, aes(x = age.grp2, y = proportion2, fill = sex)) +
geom_bar(position = position_dodge(width=1), stat='identity') +
geom_label(aes(label = paste(n.enroll," (",proportion,"%)", sep=""), group = factor(sex)),
fill="white", colour = "black",
position= position_dodge(width=1),
size = 3) +
scale_fill_manual(values=c("#BFD5E3", "grey")) +
facet_share(~sex, dir = "h", scales = "free", reverse_num = TRUE) +
coord_flip() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
#panel.border = element_blank(),
panel.background = element_blank(),
legend.position = "none",
#axis.line.x = element_line(color = "black"),
axis.ticks.y = element_blank(),
axis.text.x = element_text(colour = "black", size = 8, face = "bold", angle=0, hjust=0.5),
axis.text.y = element_text(colour = "black", size = 8, face = "bold"),
axis.title.x = element_text(size = 14, face="bold", margin = margin(t = 30, r = 20, b = 10, l = 20)),
plot.margin = unit(c(1,1,1,1),"cm")) +
labs(y = "Enrollment percentage within sex",x="") I am attaching also the plot, where we can see in females the label in the age group 11-20 is cut. I would like to have all labels near to the age group labels, within each bar: female labels moved to the right and male labels move it to the left. Also, I would like to have each x-axis extended to 100% or at least in same range, in females goes up to 30% and in males goes up to 60%. Thanks for all the comments
Here's a minimal solution using the base ggplot package, without most of your formatting. The key part is to add a conditional y = ... into the geom_label(aes()) section:
d %>%
mutate(
label = str_c(n.enroll, " (", proportion, "%)"),
label_loc = if_else(sex == "Female", -9.5, 3),
proportion_for_chart = if_else(sex == "Female", -proportion, proportion)
) %>%
ggplot(aes(x = age.grp2, y = proportion_for_chart, fill = sex)) +
geom_col(show.legend = FALSE) +
geom_label(aes(y = label_loc, label = label), size = 3, fill = "white", hjust = 0) +
coord_flip() +
facet_wrap(~ sex, scales = "free") +
theme(
axis.title = element_blank()
)
Whenever possible, I try to reshape data and use geom_col rather than try to get lucky with geom_bar. You should be able to play around with different hard-coded values of y in the geom_label call to fix the proper location for your labels based on your formatting and image size/scale.

ggplot change the x axis label colors dynamically

Below is the test code:
library(tidyverse)
df<- data.frame(PCP = c("BOB","FRED","ED","Other"),
closed = c(42,64,45,1812),
total= c(53,81,58,3188),
percentage = c(.7924,.7901,.7758,.5683),
row= c(1, 2, 3,4),
color =c("0099FF","#CCCCCC","#CCCCCC","#660033"),
color_fill = c("99","0","0","98"
))
col <- c(
"99" = "#0099FF",
"98" = "#660033",
"0" = "#CCCCCC"
)
df %>%
arrange(desc(percentage)) %>%
mutate(PCP = PCP,
closed = closed,
total = total,
percentage = percentage,
row = row,
color = color,
color_fill = color_fill) -> df1
ggplot(df1,aes(x=PCP, y = percentage,fill=color_fill, color = color)) +
geom_col() +
coord_flip() +
labs(x ="PCP Name",
y = "Percentage of Gap Closures",
title = "TOP 10 PCPs")+
scale_fill_manual(values = col)+
scale_color_manual(values = col) +
scale_y_continuous(labels = percent_format(), limits=c(0,1))+
theme(legend.position = "none",
panel.grid = element_blank(),
panel.background = element_blank(),
text = element_text(size=15),
plot.caption = element_text(hjust = 0, face= "italic"),
axis.text.y = element_text(colour = col ))
My goal is to match x axis labels with the bar colors.
I have attempted solutions from,
Matching axis.text labels to colors contained in data frame variable in ggplot
However when attempting the factor levels portion, I get an error because my actual data contains other values that utilize the same #CCCCCC color code.
Below is the output of the attached code.
Is there something I am doing wrong?
I'm not entirely sure what the issue is here (I mean I understand that you provide the colors in the wrong order; in the theme you would have to use col[as.integer(df$color)] to mimic the order of the factor but I have no clue why) but I have a workaround that works well.
One less known function in ggplot2 is ggplot_build. Using this you can, as the name suggests, build the plot, which means you can extract the values you want from it. Based on this I wrote a little function which can do what you want.
axis_text_color <- function(plot, col = "fill") {
c <- ggplot_build(plot)$data[[1]]
plot +
theme(axis.text.y = element_text(colour = c[[col]]))
}
The way you use it is by first saving the plot in an object:
library(tidyverse)
plot <- ggplot(df, aes(
x = PCP,
y = percentage,
fill = color_fill,
color = color
)) +
geom_col() +
coord_flip() +
labs(x = "PCP Name",
y = "Percentage of Gap Closures",
title = "TOP 10 PCPs") +
scale_fill_manual(values = col) +
scale_color_manual(values = col) +
scale_y_continuous(labels = scales::percent_format(), limits = c(0, 1)) +
theme(
legend.position = "none",
panel.grid = element_blank(),
panel.background = element_blank(),
text = element_text(size = 15),
plot.caption = element_text(hjust = 0, face = "italic")
)
And then calling the function on that plot:
axis_text_color(plot)
Created on 2020-01-20 by the reprex package (v0.3.0)
If possible, would take out color from your df so you don't duplicate with your col vector.
You would need to order your color_fill based on PCP (the order based on factor, alphabetical).
Finally, would use as.character to create a character vector instead of factor, and use as look up reference for col.
library(tidyverse)
library(scales)
df<- data.frame(PCP = c("BOB","FRED","ED", "Alfred", "Other"),
closed = c(42,64,45,100,1812),
total= c(53,81,58,100,3188),
percentage = c(.7924,.7901,.7758,.3,.5683),
row= c(1,2,3,4,5),
#color =c("0099FF","#CCCCCC","#CCCCCC", "#00EE00", "#660033"),
color_fill = c("99","0","0","97","98")
)
col <- c(
"99" = "#0099FF",
"98" = "#660033",
"97" = "#00FF00",
"0" = "#CCCCCC"
)
ggplot(df,aes(x=PCP, y=percentage, fill=color_fill, color = color_fill)) +
geom_col() +
coord_flip() +
labs(x ="PCP Name",
y = "Percentage of Gap Closures",
title = "TOP 10 PCPs")+
scale_fill_manual(values = col)+
scale_color_manual(values = col)+
scale_y_continuous(labels = percent_format(), limits=c(0,1))+
theme(legend.position = "none",
panel.grid = element_blank(),
panel.background = element_blank(),
text = element_text(size=15),
plot.caption = element_text(hjust = 0, face= "italic"),
axis.text.y = element_text(colour = col[as.character(df$color_fill[order(df$PCP)])]))

ggplot2 - get annotations in legend

I'm working with a dataframe and using ggplot to generate a pie chart.
df <- data.frame(Make=c('toyota','toyota','honda','honda','jeep','jeep','jeep','accura','accura'),
Model=c('camry','corolla','city','accord','compass', 'wrangler','renegade','x1', 'x3'),
Cnt=c(10, 4, 8, 13, 3, 5, 1, 2, 1))
row_threshold = 2
dfc <- df %>%
group_by(Make) %>%
summarise(volume = sum(Cnt)) %>%
mutate(share=volume/sum(volume)*100.0) %>%
arrange(desc(volume))
dfc$Make <- factor(dfc$Make, levels = rev(as.character(dfc$Make)))
pie <- ggplot(dfc[1:10, ], aes("", share, fill = Make)) +
geom_bar(width = 1, size = 1, color = "white", stat = "identity") +
coord_polar("y") +
geom_text(aes(label = paste0(round(share), "%")),
position = position_stack(vjust = 0.5)) +
labs(x = NULL, y = NULL, fill = NULL,
title = "Market Share") +
guides(fill = guide_legend(reverse = TRUE)) +
theme_classic() +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(hjust = 0.5, color = "#666666")) +
scale_color_brewer(palette = "Paired")
this gives me a pie chart as below - how do I add %share along with the Make labels like honda (45%) instead of just honda
This can be achieved by adding breaks and labels to the scale_fill_brewer.
First of all you mapped Make to fill so to control the color you need to use a fill_scale. Secondly if you want to provide custom legend entries define the keys present in the legend in breaks and the new names in labels :
library(ggplot2)
ggplot(dfc[1:10, ], aes("", share, fill = Make)) +
geom_bar(width = 1, size = 1, color = "white", stat = "identity") +
coord_polar("y") +
geom_text(aes(label = paste0(round(share), "%")),
position = position_stack(vjust = 0.5)) +
labs(x = NULL, y = NULL, fill = NULL,
title = "Market Share") +
guides(fill = guide_legend(reverse = TRUE)) +
theme_classic() +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(hjust = 0.5, color = "#666666")) +
scale_fill_brewer(palette = "Paired",
labels = rev(paste0(dfc$Make, " (", round(dfc$share), "%)")),
breaks = rev(dfc$Make))

Resources