Changing x-axis labels in ggplot - r

I have a bar plot that has 12 x values. Using this code I get the plot I want except for the x-axis labels.
p <- ggplot(data=df.mean, aes(x=stock_name, y=invest_amnt, fill=trend_id)) +
geom_bar(stat="identity", position=position_dodge()) +
geom_errorbar(aes(ymin=invest_amnt-ic, ymax=invest_amnt+ic), width=.2,
position=position_dodge(.9))
p + scale_fill_brewer(palette="Paired") + theme_minimal() +
theme(text = element_text(size=12, hjust = 0.5, family="Times")) +
theme_stata() + scale_color_stata()
Instead of displaying all 12 values on the x-axis I want to determine the labels by myself and only display 4.
I adjusted the code like this
p <- ggplot(data=df.mean, aes(x=stock_name, y=invest_amnt, fill=trend_id)) +
geom_bar(stat="identity", position=position_dodge()) +
geom_errorbar(aes(ymin=invest_amnt-ic, ymax=invest_amnt+ic), width=.2,
position=position_dodge(.9)) +
scale_x_discrete( labels=c("UP\nDOWN", "DOWN\nUP", "STRAIGHT\nGAIN", "STRAIGHT\nLOSS")) +
scale_fill_discrete(name = "Trend", labels = c("negative", "flat", "positive"))
p + scale_fill_brewer(palette="Paired") + theme_minimal() +
theme(text = element_text(size=12, hjust = 0.5, family="Times")) +
theme_stata() + scale_color_stata()
Unfortunately, I get my 4 labels but also 8 NAs. I would like my 4 labels to be evenly spread on my x-axis. Since my labels are factors I do not know how to apply break here.

I've generated some sample data...hope I've understood the situation correctly.
This seems to work, i.e. inserts breaks at the specified locations on a barplot, using the specified labels.
library(tidyverse)
df <- tribble(~x, ~y,
'cat',10,
'dog', 20,
'rabbit', 30,
'fox', 30)
df <- df %>%
mutate(x = factor(x))
df %>% ggplot(aes(x,y))+
geom_bar(stat = 'identity') +
scale_x_discrete(breaks = c('cat','fox'), labels = c('pig', 'hen'))

Related

How to change bars colors with scale_fill_manual() in R?

I want my bars to correspond to a legend and to have them colored with my own color, not the default one.
# library
library(ggplot2)
# create a dataset
specie <- c(rep("IFNg_WNH", 2) ,
rep("IFNg_AA", 2),
rep("IL1b_WNH", 2),
rep("IL1b_AA", 2),
rep("IL6_WNH", 2),
rep("IL6_AA", 2)
)
condition <- rep(c("down", "up"), 6)
value <- c(452,216,
348,327,
207,61,
75,53,
177,191,
379,318)
data <- data.frame(specie,condition,value)
data
# Grouped
p <- ggplot(data, aes(fill=condition, y=value, x=specie)) +
geom_bar(position="dodge", stat="identity")
z <- p+labs(y = "Number of genes", x = "Cytokines")+
theme_classic()+
theme(plot.title = element_text(hjust = 0.5))+
theme(axis.line=element_line(size=1))+
scale_y_continuous(expand = c(0, 0), limits = c(0, NA))+
scale_fill_discrete(labels=c('up', 'down'))
z
Once I add
z + scale_fill_manual(values=c('#eb4034','#0a0a0a'))
The color is changing but the legend reverts to the wrong one. What is happening?
Adding type to scale_fill_discrete
type: One of the following:
• A character vector of color codes.
• A list of character vectors of color codes.
• A function that returns a discrete colour/fill scale
ggplot(data, aes(fill=condition, y=value, x=specie)) +
geom_bar(position="dodge", stat="identity") +
labs(y = "Number of genes", x = "Cytokines") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, NA)) +
scale_fill_discrete(labels=c('up', 'down'), type=c('#eb4034','#0a0a0a'))
There are a couple issues here. First, scale_fill_manual() essentially “overwrites” scale_fill_discrete(). Instead, use just one scale_fill_*() call including all relevant arguments:
library(ggplot2)
p +
labs(y = "Number of genes", x = "Cytokines")+
theme_classic()+
theme(plot.title = element_text(hjust = 0.5))+
theme(axis.line=element_line(size=1))+
scale_y_continuous(expand = c(0, 0), limits = c(0, NA))+
scale_fill_manual(labels = c('up', 'down'), values=c('#eb4034', '#0a0a0a'))
However - currently, the labels argument is in effect recoding your data, so that "up" values are labeled "down" and vice versa, which I assume isn’t what you want. My best guess is you’re actually trying to change the order the labels appear in the legend. If so, you can change the factor levels of condition to the order you want:
data$condition <- factor(data$condition, c("up", "down"))
p <- ggplot(data, aes(fill=condition, y=value, x=specie)) +
geom_bar(position="dodge", stat="identity")
p +
labs(y = "Number of genes", x = "Cytokines")+
theme_classic()+
theme(plot.title = element_text(hjust = 0.5))+
theme(axis.line=element_line(size=1))+
scale_y_continuous(expand = c(0, 0), limits = c(0, NA))+
scale_fill_manual(values=c('#eb4034', '#0a0a0a'))

R ggplot - Maintain x axis labels after reorder

Hi I am plotting the below using ggplot. I would like the x axis labels to be based on values in column 'Area', instead this code creates x axis labels based on column 'value' - can this be corrected without losing the reordering please?
#plot with reorder
PrevalencePlot <- ggplot(ICSTable4, aes(x = reorder(value, Area),
y = value, fill = Statistical_Significance)) +
geom_col() +
scale_fill_manual(values = colours) +
geom_errorbar(aes(ymin=errorbarlowerplot,
ymax=errorbarhigherplot), width=.2,
# Width of the error bars
position=position_dodge(.9)) +
theme_bw() + geom_text(aes(label = valuelabel), vjust = 2.5,
colour = "black") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

How can I add a a nested y-axis title in my graph?

I created a ggplot graph using ggsegment for certain subcategories and their cost.
df <- data.frame(category = c("A","A","A","A","A","A","B","B","B","B","B","B","B"),
subcat = c("S1","S2","S3","S4","S5","S6","S7","S8","S9","S10","S11","S12","S13"),
value = c(100,200,300,400,500,600,700,800,900,1000,1100,1200,1300))
df2 <- df %>%
arrange(desc(value)) %>%
mutate(subcat=factor(subcat, levels = subcat)) %>%
ggplot(aes(x=subcat, y=value)) +
geom_segment(aes(xend=subcat, yend=0)) +
geom_point(size=4, color="steelblue") +
geom_text(data=df, aes(x=subcat, y=value, label = dollar(value, accuracy = 1)), position = position_nudge(x = -0.3), hjust = "inward") +
theme_classic() +
coord_flip() +
scale_y_continuous(labels = scales::dollar_format()) +
ylab("Cost Value") +
xlab("subcategory")
df2
This code results in a graph that is shown below:
My main issue is I want the category variable on the left of the subcategory variables. It should look like this:
How do I add the category variables in the y-axis, such that it looks nested?
As mentioned in my comment and adapting this post by #AllanCameron to your case one option to achieve your desired result would be the "facet trick", which uses faceting to get the nesting and some styling to remove the facet look:
Facet by category and free the scales and the space so that the distance between categories is the same.
Remove the spacing between panels and place the strip text outside of the axis text.
Additionally, set the expansion of the discrete x scale to .5 to ensure that the distance between categories is the same at the facet boundaries as inside the facets.
library(dplyr)
library(ggplot2)
library(scales)
df1 <- df %>%
arrange(desc(value)) %>%
mutate(subcat=factor(subcat, levels = subcat))
ggplot(df1, aes(x=subcat, y=value)) +
geom_segment(aes(xend=subcat, yend=0)) +
geom_point(size=4, color="steelblue") +
geom_text(data=df, aes(x=subcat, y=value, label = dollar(value, accuracy = 1)), position = position_nudge(x = -0.3), hjust = "inward") +
theme_classic() +
coord_flip() +
scale_y_continuous(labels = scales::dollar_format()) +
scale_x_discrete(expand = c(0, .5)) +
facet_grid(category~., scales = "free_y", switch = "y", space = "free_y") +
ylab("Cost Value") +
xlab("subcategory") +
theme(panel.spacing.y = unit(0, "pt"), strip.placement = "outside")

r: Automatically stagger overlapping labels in ggplot slopegraph

While creating a slopegraph with ggplot2, as below, I find that many of my labels overlap when their data points are close together. How can I change the labelling to automatically stagger my labels if there is overlap?
library(ggplot2)
library(scales)
install.packages("Lock5Data", repos = "http://cran.us.r-project.org") # you might need this
library(Lock5Data)
data("NBAStandings1e")
data("NBAStandings2016")
colnames(NBAStandings1e)[4] <- "year1" # 2010-2011
colnames(NBAStandings2016)[4] <- "year2" # 2015-2016
nba_df <- merge(NBAStandings1e[,c('Team','year1')], NBAStandings2016[,c('Team','year2')])
scale <- dim(nba_df)[1]
a<-nba_df
p<-ggplot(nba_df) + geom_segment(aes(x=0,xend=scale,y=year1,yend=year2),size=.75)
# clear junk
p<-p + theme(panel.background = element_blank())
p<-p + theme(panel.grid=element_blank())
p<-p + theme(axis.ticks=element_blank())
# p<-p + theme(axis.text=element_blank())
p<-p + theme(panel.border=element_blank())
# p<-p + theme(panel.grid.major = element_line(linetype = "dashed", fill = NA))
p<-p + theme(panel.grid.major = element_line(linetype = "dashed",color = "grey80"))
p<-p + theme(panel.grid.major.x = element_blank())
p<-p + theme(axis.text.x = element_blank())
# annotate
p<-p + xlab("") + ylab("Percentage Wins")
p<-p + xlim((-5),(scale+12))
p<-p + geom_text(label="2010-2011 Season", x=0, y=(1.1*(max(a$year2,a$year1))),hjust= 1.2,size=3)
p<-p + geom_text(label="2015-2016 Season", x=months,y=(1.1*(max(a$year2,a$year1))),hjust=-0.1,size=3)
p<-p + geom_text(label=nba_df$Team, y=nba_df$year2, x=rep.int(scale,dim(a)[1]),hjust=-0.2,size=2)
p<-p + geom_text(label=nba_df$Team, y=nba_df$year1, x=rep.int( 0,dim(a)[1]),hjust=1.2,size=2)
p
Since the teams that overlap have the same winning percentage, you can deal with overlap more simply by combining the labels for teams with the same winning percentage. I've also made a few other changes to your code intended to streamline the process.
library(Lock5Data)
library(tidyverse)
library(scales)
data("NBAStandings1e")
data("NBAStandings2016")
colnames(NBAStandings1e)[4] <- "2010-11" # 2010-2011
colnames(NBAStandings2016)[4] <- "2015-16" # 2015-2016
nba_df <- merge(NBAStandings1e[,c('Team','2010-11')], NBAStandings2016[,c('Team','2015-16')])
# Convert data to long format
dat = gather(nba_df, Season, value, -Team)
# Combine labels for teams with same winning percentage (see footnote * below)
dat_lab = dat %>% group_by(Season, value) %>%
summarise(Team = paste(Team, collapse="\U2014")) # \U2014 is the emdash character
ggplot(dat, aes(Season, value, group=Team)) +
geom_line() +
theme_minimal() + theme(panel.grid.minor=element_blank()) +
labs(y="Winning Percentage") +
scale_y_continuous(limits=c(0,1), labels=percent) +
geom_text(data=subset(dat_lab, Season=="2010-11"), aes(label=Team, x=0.98), hjust=1, size=2) +
geom_text(data=subset(dat_lab, Season=="2015-16"), aes(label=Team, x=2.02), hjust=0, size=2)
Here's a closeup of what the labels look like:
* If there are teams that overlap due to having very close, but unequal, winning percentages, you can still group them by rounding. For example, if you wanted to group teams with winning percentages that are the same when rounded to the nearest 2 percent, you could do:
dat_lab = dat %>% group_by(Season, group=round(value/0.02)*0.02) %>%
summarise(Team = paste(Team, collapse="\U2014"),
value = mean(value))
This would result in the labels being placed at the mean value for their group.

Vertical line in histogram in r

I'm struggeling a bit with a peace of code in R. I am trying to create 6 different histograms in the same figure. It works fine, but I need to place 1 vertical line in each of the 6 histograms. The code I am working with could look something like this:
require(ggplot2)
require(reshape2)
require(gdata)
MC_beta=rbind(rnorm(1000,-2,0.1),rnorm(1000,-1,0.1),rnorm(1000,0,0.1),rnorm(1000,0.5,0.1),rnorm(1000,1,0.1),rnorm(1000,2,0.1))
df <- data.frame(MC_beta[1,], MC_beta[2,], MC_beta[3,], MC_beta[4,],MC_beta[5,],MC_beta[6,])
names(df)[1:6]<-c("1", "2", "3", "4","5","6")
df2 = melt(df)
z=c(-2,-1,0,0.5,1,2)
ggplot(df2, aes(x=value, fill = variable)) + geom_vline(xintercept = z, colour="black") +geom_histogram(binwidth=0.03,colour = "black") + scale_fill_manual(name = "",values = c('red','blue',"red","blue","red","blue")) +
facet_wrap(~variable,nrow=6, ncol=1) + scale_x_continuous(breaks=seq(-2.5,2.5,0.5)) + guides(fill=FALSE) +
theme_bw() + theme(strip.background = element_blank(),axis.text=element_text(size=14.5),strip.text.x = element_text(size = 14.5)) + stat_function(fun = dnorm)
The problem is with the statement geom_vline(xintercept = z, colour = "black"). Apparently instead of placing one vertical line in each histogram, it places all 6 lines in each histogram. So instead, I want the first element in z to make a vertical line in the first histogram, the second element in z to make a vertical line in the second histogram and so fourth.
Thanks
Your z needs to be a data.frame with the corresponding xintercept for every value of the variable that defines the facet. Try these changes:
z <- data.frame(variable=levels(df2$variable),
mean=c(-2,-1,0,0.5,1,2))
ggplot(df2, aes(x=value, fill = variable))+
geom_vline(data=z, aes(xintercept = mean), colour="black") +
geom_histogram(binwidth=0.03,colour = "black") +
scale_fill_manual(name = "",values = c('red','blue',"red","blue","red","blue")) +
facet_wrap(~variable,nrow=6, ncol=1) +
scale_x_continuous(breaks=seq(-2.5,2.5,0.5))+ guides(fill=FALSE) +
theme_bw() +
theme(strip.background = element_blank(), axis.text=element_text(size=14.5), strip.text.x = element_text(size = 14.5)) +
stat_function(fun = dnorm)
I hope that helps.
You have z outside the data, so you will draw a vertical line in each facet. Use
df2 <- (merge(df2, cbind.data.frame(variable=names(df), z)))
and then
geom_vline(aes(xintercept = z), colour="black")

Resources