How to replicate this excel 3d histogram in R - r

I am looking to reproduce roughly the following 3d histogram (made in Excel) in R. The trick is that the labels should fall between the Justice names, as the bars are meant to delineate the number of times certain justices voted against other justices.
You can use the following code to generate the data:
cutpoints <- c(0, 22, 16, 12, 13, 7, 16, 13, 20)
justice_names <- c("Peckham", "Brewer", "Shiras",
"Fuller", "Gray", "Brown", "McKenna", "White", "Harlan")

Here's a rough reproduction, omitting the 3D.
Plotting the x-axis as numeric and applying labels after allows you to shift the labels to the right by half a mark.
library(ggplot2)
cutpoints <- c(0, 22, 16, 12, 13, 7, 16, 13, 20)
justice_names <- c("Peckham",
"Brewer",
"Shiras",
"Fuller",
"Gray",
"Brown",
"McKenna",
"White",
"Harlan")
hist_data <- data.frame(justice_names, cutpoints, order = seq(1:9))
hist_data$justice_names <-
factor(hist_data$justice_names, levels = hist_data$justice_names)
ggplot(hist_data, aes(x = order, y = cutpoints)) +
geom_bar(stat = "identity", width = 0.5) +
scale_x_continuous(breaks = hist_data$order + 0.5, # Shift labels by 0.5
labels = hist_data$justice_names) +
geom_text(aes(label = cutpoints), vjust = 0, nudge_y = 0.25) +
labs(x = "Justice", y = "Number",
title = "Fig A-1. Number of Cutpoints, 1899-1901") +
theme(panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(linetype = "dashed"))

Related

Merging legend of 2 graphical layers in ggplot2 and ggstar with geom_point and geom_star

I spent a whole day trying to fix this silly graphical issue without success...
I have a pcoa dataset with 10 population let say "special" (I want to display them as solid symbols) and 4 "non special" (4 non solid symbols).
I want one legend with each symbol related to a species and each color to a population of both the "special" and "non special" population.
an example
df <- data.frame(Species=sort(rep(letters[1:10], 14)), Locality=sort(rep(LETTERS[1:10], 14)), Axis.1=rnorm(140,0,0.7), Axis.2=rnorm(140,0,0.7))
#NOTE:in the original dataset, 2 population may share the same species, thus the same symbol
special<- ("A","B","C","D",E","F","G","H","I","J") #this must be solid symbol
non_special<-("K","M","N","O") # this must be empty symbols
df.special<–subset(df, Species== special)
df.non.special<–subset(df, Species == non_special)
library(ggplot)
library(ggstar)
#solid symbols for special population
ggplot(data= df.special, aes(x=Axis.1,y=Axis.2, fill= Locality)) +
geom_star(aes(starshape=Species), size=2)+
scale_starshape_manual(values=c(1,5,9,15,14,13,11,23,21,28))+
#empty symbols for non special population
geom_point(data= df.non.special, aes(x = Axis.1, y = Axis.2, shape=Species, color=Locality), size=2)+
scale_shape_manual(name = "Legend", values=c(8,11,13,14))+
#additional style
theme_bw()+ xlab("Axis.1=30%") +
ylab("Axis.2=20%")+
theme(panel.grid = element_blank(),
legend.title = element_text(hjust = 0.5), axis.text = element_text(size = 15, colour = "black"),
axis.title = element_text(size = 10, face = "bold"))
The desired output should be a scatterplot with one legend with both solid and empty symbols for special and non special population, respectively, colored as different Species.
Thus I need to implement somehow
scale_fill_manual and scale_shape_manual (empty symbols needed) and adding scale_fill_manual to scale_starshape_manual.
Thank you in advance
One option to achieve your desired result would be to
use the override.aes argument of guide_legend to set the shapes/starshapes for the color/fill legends.
Get rid of the shape and starshape legend
Get rid of the legend title for the color legend which in my case is the second or bottom legend.
Remove the spacing and the margin for the legends so that they appear as one.
Note: I slightly adjusted your example data and code as it was not working and e.g. did not include any non special characters. See below.
library(ggplot2)
library(ggstar)
ggplot(data = df.special, aes(x = Axis.1, y = Axis.2)) +
geom_star(aes(starshape = Species, fill = Locality), size = 2) +
scale_starshape_manual(values = c(1, 5, 9, 15, 14, 13, 11, 23, 21, 28)) +
geom_point(data = df.non.special, aes(shape = Species, color = Locality), size = 2) +
scale_shape_manual(values = c(8, 11, 13, 14)) +
theme_bw() +
xlab("Axis.1=30%") +
ylab("Axis.2=20%") +
theme(
panel.grid = element_blank(),
legend.title = element_text(hjust = 0.5), axis.text = element_text(size = 15, colour = "black"),
axis.title = element_text(size = 10, face = "bold")
) +
guides(
color = guide_legend(
override.aes = list(shape = c(8, 11, 13, 14)),
order = 2
),
shape = "none",
fill = guide_legend(
override.aes = list(starshape = c(1, 5, 9, 15, 14, 13, 11, 23, 21, 28)),
order = 1
),
starshape = "none"
) +
labs(color = NULL) +
theme(legend.spacing.y = unit(0, "pt"), legend.margin = margin())
DATA
df <- data.frame(
Species = sort(rep(LETTERS[1:14], 10)),
Locality = sort(rep(letters[1:14], 10)),
Axis.1 = rnorm(140, 0, 0.7),
Axis.2 = rnorm(140, 0, 0.7)
)
special <- c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J")
non_special <- c("K", "L", "M", "N")
df.special <- subset(df, Species %in% special)
df.non.special <- subset(df, Species %in% non_special)

Align asterisk of geom point ggplot with position dodge

I am trying to align significance asterisks (* or ** or ***) to the points of a geom point graph with position dodge to indicate the significance of a value using ggplot2. I wasn't able to find any similar questions and answers with similar issue.
Here is data frame 'df':
df<-data.frame(conc=c(1,10,100,1, 10,100,1, 10, 100),
mean=c( 0.9008428,0.8278645,0.7890388,0.9541905,
0.8537885,0.8212504,1.3828724,0.7165685, 0.7985398),
Treatment=c("A","A","A","B", "B", "B","C","C", "C"),
upper =c(1.0990144, 0.9505348, 0.8273494, 1.0389074, 0.9227461, 0.9657371, 1.6864420, 0.7401891, 0.9046951),
lower=c(0.7026713, 0.7051941, 0.7507282, 0.9528077, 0.7848309, 0.6767638, 1.0793029, 0.6929479, 0.6923846),
p.value=c(0.0003, 0.6500, 1,0.02,0.0400,
0.3301,0.100,0.023, 0.05))
I made a plot with an automatic asterisk, but it is not aligned how i want to, and i believe it's because of position_dodge, but i have too many points in one concentration, so i have to use it (given data frame is minimal).
legend_title <- "Treatment"
breaks_y =c(0, 0.25, 0.5, 0.75, 1, 1.25, 1.5)
breaks = c(1, 10, 100)
df$Label <- NA
df$Label[df$p.value<0.001]<-'***'
df$Label[df$p.value<0.01 & is.na(df$Label)]<-'**'
df$Label[df$p.value<0.05 & is.na(df$Label)]<-'*'
ggplot(df, aes(x = conc, y = mean, color = Treatment)) +
geom_errorbar(aes(ymax = upper, ymin = lower, width = 0),position = position_dodge(width=0.5)) +
geom_point(aes(shape = Treatment, fill = Treatment), size = 4, position = position_dodge(width=0.5)) +
geom_text(aes(label = Label),size = 4, position = position_dodge(width =0.5), color = "black") +
scale_shape_manual(values = c(22, 21, 23)) +
scale_color_manual(values=c('blue','coral1', 'darkgreen' )) +
scale_fill_manual(values=c('blue','coral1', 'darkgreen')) +
labs(x = "Concentration (\u03BCM)", y = "Abs", title = "Viability", fill = "Treatment") +
scale_x_continuous(trans="log10", limits = c(0.5, 170), breaks = breaks) +
scale_y_continuous(limits = c(0, 1.5), breaks = breaks_y) +
theme_light() +
ggpubr::rotate_x_text(angle = 70) +
theme(axis.text = element_text(size = 12, face = "bold"),
axis.title.y = element_text(size = 12, face ="bold"),
axis.title.x = element_text(size = 12, face ="bold"))
How can I align the asterisk automatically to be directly above the correct dot with position_dodge?

ggplot time series: messed up x axis - 2

This is modified version of this question.
I need to create time series plot for 2 lines for the following data:
# Creating data set
year <- c(rep(2018,4), rep(2019,4), rep(2020,4))
month_1 <- c(2, 3, 7, 8, 6, 10, 11, 12, 5, 7, 8, 12)
avg_dlt_calc <- c(10, 20, 11, 21, 13, 7, 10, 15, 9, 14, 16, 32)
avg_dlt_standard <- c(rep(9,12))
data_to_plot <- data.frame(cbind(year,month_1,avg_dlt_calc,avg_dlt_standard ))
data_to_plot$month_1 <- factor(data_to_plot$month_1, levels=unique(data_to_plot$month_1))
ggplot(data_to_plot,aes(x = as.factor(month_1))) +
geom_line(aes(y = avg_dlt_calc, group = year, colour = "DLT Calculated"), size = 0.5) +
geom_line(aes(y = avg_dlt_standard, group = year, colour = "DLT standard"), size = 0.5) +
geom_point(aes(y = avg_dlt_calc, colour = "DLT Calculated")) +
scale_x_discrete(name = "months", limits = data_to_plot$month_1) +
facet_grid(~year, scales = "free")+
scale_color_manual(name="",
labels = c("DLT Calculated",
"DLT standard"),
values = c( "blue",
"red")) +
theme(legend.position="top",
legend.text = element_text(size = 8))
s = data_to_plot$month_1) +
facet_grid(~year, scales = "free")+
But x-axis looks wrong:
If to plot data without this line:
data_to_plot$month_1 <- factor(data_to_plot$month_1, levels=unique(data_to_plot$month_1))
Then x-axis will still be messy:
I am setting limits for x-axis, but looks like it is not working.
How can I fix it?
I've skipped some lines and features of your plot, but in essence, this is what needs to be changed:
ggplot(data_to_plot, aes(x=month_1))+ # no as.factor
geom_point(aes(y=avg_dlt_calc)) +
geom_line(aes(y=avg_dlt_calc)) +
geom_line(aes(y=avg_dlt_standard), colour='red') +
scale_x_continuous(breaks=1:12, limits=c(1,2)) + # do *not* use scale_x_discrete,
# your x-axis is *continuous*; use breaks-argument to set the ticks.
# note, limits should only have 2 values - upper and lower limit.
facet_grid(~year)
In your code, you used limits = data_to_plot$month_1, but ggplot2 only used the 2 first elements of month_1 - it did not interpret it as a set of acceptable values.

Fixing the x axis and the gridline of a plot in r

I am trying to visualise data on a graph with R.
The code below works perfectly but the gridlines seem to be lost (see the image below).
with(res_final, plot(position_aa, mean_res, main="Hydrophobicity",
xlab="Amino acid position",
ylab="Eisenberg scale"))
with(res_final, points(position_aa, mean_res, pch=10, cex=0.5))
.col <- rgb(0, 0, 0, .25) ## alpha .25 for transparency
abline(h=axTicks(3), lty=3, col=.col)
abline(v=seq(-10:14), lty=3, col=.col)
I have positions from -10 to 14. How can I make the x axis with every single position separately labelled?
How can I add the gridlines to the plot below so that it is visible for each position from the x axis?
You haven't provided any data,but the following is a reasonable approximation:
set.seed(69)
res_final <- data.frame(position_aa = seq(-10, 14, 1),
mean_res = c(runif(10, -0.5, 0.25),
runif(4, 0.5, 1.25),
runif(11, -0.5, 0.25)))
The main problem with your code is your use of seq, which isn't doing what you think it is. The way to get a sequence between -10 and 14 is seq(-10, 14, 1) or seq(-10, 14). This change will allow your grid lines to appear as expected.
For your second problem, you can add an axis call using pos = 1 and the at argument to specify the breaks on the axis. You'll need to ensure that the plot area is wide enough (or the axis text is small enough) that some of the numbers don't get suppressed.
with(res_final, plot(position_aa, mean_res, main = "Hydrophobicity",
xlab = "Amino acid position",
ylab = "Eisenberg scale"))
axis(pos = 1, at = seq(-10, 14, 1))
with(res_final, points(position_aa, mean_res, pch = 10, cex = 0.5))
.col <- rgb(0, 0, 0, .25)
abline(h = axTicks(3), lty = 3, col = .col)
abline(v = seq(-10, 14, 1), lty = 3, col = .col)
For completeness, the equivalent in ggplot would be:
library(ggplot2)
ggplot(res_final, aes(position_aa, mean_res)) +
geom_point(shape = 21, size = 5, fill = "white") +
geom_point(shape = 21, size = 2, fill = "black") +
scale_x_continuous(breaks = seq(-10, 14)) +
theme_bw() +
theme(panel.grid.minor = element_blank(),
text = element_text(size = 15),
plot.title.position = "plot",
plot.title = element_text(hjust = 0.5)) +
labs(title = "Hydrophobicity",
x = "Amino acid position",
y = "Eisenberg scale")

ggplot2 histogram has solid line along x axis for which there are no values

I'm using ggplot2 to plot the frequency of distance measurements of various roads. Y axis is frequency, x axis is distance. I notice in all the plots (Not just this one) that there is a solid line along the 0 frequency value for all distances - see graph here:
For example, in the image I provided, the maximum road distance is 25, but the line stretches to 30. No matter what I set the xlim to, the line stretches to that maximum distance. I'm not sure what in the code is causing this. Below is the code I'm using to get this:
ggplot(ln_jan, aes(x=kilo, color=zone_sm)) +
geom_histogram(fill="black", alpha=.8, position="identity", size =1.15)+
xlim(0, 30)+
ylim(0, 4000)+
ggtitle("Road lengths")+
ylab("Frequency")+
xlab("Distance (km)")+
theme(plot.title = element_text(hjust = 0.5, size = 21, face = "bold"))+
scale_color_discrete(name = "road types",
labels=c("highways", "small roads"))+
theme(axis.text=element_text(size=10, face = "bold"),
axis.title=element_text(size=14,face="bold"))+
theme(panel.background = element_rect(fill = 'gray70'))+
theme(plot.title = element_text(size=26))
Here is the head of the dataset for reproducing the problem:
ID kilo zone_sm
185 12.522931 NW
234 12.702159 NW
25315 1.939652 NE
25411 1.938117 NE
25507 1.936778 NE
25603 1.935634 NE
As requested here is the dput(hist(ln_jan$kilo)):
structure(list(breaks = c(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26), counts = c(6079L, 8898L, 4240L, 2414L, 1677L, 986L, 760L, 609L, 394L, 639L, 338L, 53L, 14L), density = c(0.112154533043061, 0.1641636839969, 0.078225895723405, 0.0445371019519575, 0.0309398177189034, 0.0181912106564333, 0.0140216228183462, 0.0112357477583853, 0.00726910446109, 0.0117892328696358, 0.00623593225342238, 0.000977823696542563, 0.000258293051916903), mids = c(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25), xname = "ln_jan$kilo", equidist = TRUE), class = "histogram")
Here is the length(which(ln_jan$kilo>25)) result: 1. The value of this one is 25.01803... I rounded a little
The issue that you're seeing is that specifying xlim() means that ggplot has to show the frequency of observations in the bins up to 30, which means you get bins with 0 frequency up to that limit - the thickness of your line is making it way more obvious than it would otherwise be. You can use coord_cartesian() instead of xlim() to show the x axis you want, and leave the styling of your graph the same. Here is an example using a recreated data set:
set.seed(1)
df <- data.frame(x=exp(rnorm(100)))
p <- ggplot(df, aes(x)) +
geom_histogram(fill="transparent", colour="black", size=2)
p + xlim(0,15)
p + coord_cartesian(xlim=c(0,15)) #<-- this figure shown
Updated code for your plot would look like:
ggplot(ln_jan, aes(x=kilo, color=zone_sm)) +
geom_histogram(fill="black", alpha=.8, position="identity", size =1.15)+
coord_cartesian(xlim=c(0, 30), ylim=c(0,4000)) +
ggtitle("Road lengths")+
ylab("Frequency")+
xlab("Distance (km)")+
theme(plot.title = element_text(hjust = 0.5, size = 21, face = "bold"))+
scale_color_discrete(name = "road types",
labels=c("highways", "small roads"))+
theme(axis.text=element_text(size=10, face = "bold"),
axis.title=element_text(size=14,face="bold"))+
theme(panel.background = element_rect(fill = 'gray70'))+
theme(plot.title = element_text(size=26))

Resources