R: ggplot customize transform axis label to reduce zeros - r

In my following code:
library(ggplot2)
library(scales)
myData <- data.frame(
platform = c("Windows", "MacOs", "Linux"),
number = c(27000, 16000, 9000)
)
ggplot(myData, aes(x = reorder(platform, -number), y = number))+
geom_bar(stat="identity", fill="darkturquoise", width = 0.5)+
geom_text(aes(label = number), vjust=-0.3)+
xlab("Platform")+
scale_y_continuous(breaks = round(seq(0,40000, by = 5000), 1))
that produces this plot:
How do I change the param of scale_y_continuous to reduce the number of 000? i.e, the y-tick will show 5, 10, 15, 20, 25...

Divide the y-axis' labels by 1000 like so:
ggplot(myData, aes(x = reorder(platform, -number), y = number))+
geom_bar(stat="identity", fill="darkturquoise", width = 0.5)+
geom_text(aes(label = number), vjust=-0.3)+
xlab("Platform")+
scale_y_continuous(breaks = seq( 0,40000, by = 5000),
labels = function(y_value) y_value / 1000) # <- ! here !

Related

Position stacked identity data sample size as geom_text directly over a bar using geom_bar from ggplot2

In this experiment, we tracked presence or absence of bacterial infection in our subject animals. We were able to isolate which type of bacteria was present in our animals and created a plot that has Week Since Experiment Start on the X axis, and Percentage of Animals Positive for bacterial infection on the Y axis. This is a stacked identity ggplot where each geom_bar contains the different identities of the bacteria that were in the infected animals each week. Here is a sample dataset with the corresponding ggplot code and result:
DummyData <- data.frame(matrix(ncol = 5, nrow = 78))
colnames(DummyData) <- c('WeeksSinceStart','BacteriaType','PositiveOccurences','SampleSize','NewSampleSize')
DummyData$WeeksSinceStart <- c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,9,9,9,9,9,10,10,10,10)
DummyData$BacteriaType <- c("BactA","BactB","BactD","BactB","BactE","BactA","BactS","BactF","BactE","BactH","BactJ","BactK","BactE","BactB","BactS","BactF","BactL","BactE","BactW","BactH","BactS","BactJ","BactQ","BactN","BactW","BactA","BactD","BactE","BactA","BactC","BactD","BactK","BactL","BactE","BactD","BactA","BactS","BactK","BactB","BactE","BactF","BactH","BactN","BactE","BactL","BactZ","BactE","BactC","BactR","BactD","BactJ","BactN","BactK","BactW","BactR","BactE","BactW","BactA","BactM","BactG","BactO","BactI","BactE","BactD","BactM","BactH","BactC","BactM","BactW","BactA","BactL","BactB","BactE","BactA","BactS","BactH","BactQ","BactF")
PosOcc <- seq(from = 1, to = 2, by = 1)
DummyData$PositiveOccurences <- rep(PosOcc, times = 13)
DummyData$SampleSize <- c(78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,29,29,29,29,29,10,10,10,10)
DummyData$NewSampleSize <- c(78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,29,NA,NA,NA,NA,10,NA,NA,NA)
numcolor <- 20
plotcolors <- colorRampPalette(brewer.pal(8, "Set3"))(numcolor)
#GGplot for Dummy Data
DummyDataPlot <- ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences/SampleSize, fill = BacteriaType)) + geom_bar(position = "stack", stat = "identity") +
geom_text(label = DummyData$NewSampleSize, nudge_y = 0.1) +
scale_y_continuous(limits = c(0,0.6), breaks = seq(0, 1, by = 0.1)) + scale_x_continuous(limits = c(0.5,11), breaks = seq(0,10, by =1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive") +
scale_fill_manual(values = plotcolors)
The problem: I cannot seem to find a way to position the labels from geom_text directly over each bar. I would also love to add the text "n = " to the sample size value directly over each bar. Thank you for your help!
I have tried different values for position_dodge statement and nudge_y statement with no success.
Sometimes the easiest approach is to do some data wrangling, i.e. one option would be to create a separate dataframe for your labels:
library(ggplot2)
library(dplyr)
dat_label <- DummyData |>
group_by(WeeksSinceStart) |>
summarise(y = sum(PositiveOccurences / SampleSize), SampleSize = unique(SampleSize))
ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences / SampleSize, fill = BacteriaType)) +
geom_bar(position = "stack", stat = "identity") +
geom_text(data = dat_label, aes(x = WeeksSinceStart, y = y, label = SampleSize), inherit.aes = FALSE, nudge_y = .01) +
#scale_y_continuous(limits = c(0, 0.6), breaks = seq(0, 1, by = 0.1)) +
scale_x_continuous(limits = c(0.5, 11), breaks = seq(0, 10, by = 1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive"
) +
scale_fill_manual(values = plotcolors)

Plot the legend at the bottom in different way when we deal with discrete color filling in ggplot

Suppose we want to plot this data:
library(ggplot2)
library(sf)
library(raster)
library(colorRamps)
min_lon <- 10
max_lon <- 17
min_lat <- 8
max_lat <- 17
grid_size <- 0.5
lon_grids <- 1 + ((max_lon - min_lon)/grid_size)
lat_grids <- 1 + ((max_lat - min_lat)/grid_size)
points <- data.frame(lon = rep(seq(min_lon, max_lon, grid_size), lat_grids), lat = rep(seq(min_lat, max_lat, grid_size), each = lon_grids))
points$Var <- runif(min= 10, max = 48, 285)
points$value <-cut(points$Var, breaks= seq(10.08, 47.80, length.out = 13), dig.lab = 1)
ggplot() +
coord_sf(xlim = c(min_lon, max_lon), ylim = c(min_lat, max_lat)) +
theme_bw()+
geom_raster(data = points, aes(x = lon, y = lat, fill = value), interpolate = FALSE) +
labs(x="Longitude", y="Latitude")+
scale_fill_manual(values = matlab.like(n = 13), name = "[m]",
labels = sprintf("%.2f", seq(10.08, 47.80, length.out = 13)),
guide = guide_legend(reverse = TRUE))+theme(legend.position = "bottom")
This code produces the following graph:
Two problems I am facing here:
To make it discrete, I used the cut function. I chose the breaks= seq(10.08, 47.80, length.out = 13) arbitrary based on the minimum and maximum values with a random length of 13. Is there any criteria to decide the correct range?
Is there any way to make the legend look like this?
One option would be to use e.g. scale_fill_stepsn with guide_binswhich does not require to manually discretize the variable mapped on fill. Additionally I use a custom function to set the breaks of the legend instead of the default mechanism to set the number of breaks.
set.seed(123)
library(ggplot2)
library(colorRamps)
base <- ggplot() +
coord_sf(xlim = c(min_lon, max_lon), ylim = c(min_lat, max_lat)) +
theme_bw() +
geom_raster(data = points, aes(x = lon, y = lat), interpolate = FALSE) +
labs(x = "Longitude", y = "Latitude") +
theme(legend.position = "bottom")
base +
aes(fill = Var) +
scale_fill_stepsn(colors = matlab.like(n = 13), name = "[m]",
breaks = function(x) seq(x[[1]], x[[2]], length.out = 13),
labels = ~ sprintf("%.0f", .x),
guide = guide_bins(axis = FALSE,
show.limits = TRUE))

ggplot2: How to get dots to group with violin plots?

I would like to plot violin plots where x axis is exon however I want to group the plots. This works if its just violins however when I add in the jitters for some reason its not responding the correct aes and is plotting on its own? Here is a reproducible code with a screen shot of the error. thanks!
set.seed(1)
df <- data.frame(
exons = c(rep("e1", 200), rep("e2", 200)),
values = rnorm(400, 200, 40),
group = c(
rep("g1", 75), rep("g2", 75), rep("g3", 50),
rep("g1", 75), rep("g2", 75), rep("g3", 50)
)
)
ggplot(df, aes(y = values, x = exons, fill = group)) +
geom_violin() +
geom_jitter(shape = 16, position = position_jitter(0.07))
so if the plot works the dots should had been plotted within each of the group for each exon, however here it is clearly not.
You probably want both position_dodge() and position_jitterdodge()
library(ggplot2)
ggplot(df, aes(y = values, x = exons, fill = group)) +
geom_violin(position = position_dodge(width = 0.9)) +
geom_point(position = position_jitterdodge(seed = 1, dodge.width = 0.9))
Another option worth mentioning is geom_quasirandom() function from the ggbeeswarm package
library(ggbeeswarm)
ggplot(df, aes(y = values, x = exons, fill = group)) +
geom_violin(position = position_dodge(width = 0.9)) +
geom_quasirandom(dodge.width = 0.9, varwidth = TRUE)
Created on 2019-08-10 by the reprex package (v0.3.0)
Do you mean something like that?
set.seed ( 1)
df = data.frame (
exons = c(rep("e1", 200), rep("e2", 200))
,values = rnorm(400,200,40)
,group = c(rep("g1", 75), rep("g2", 75), rep("g3",50),
rep("g1", 75), rep("g2", 75), rep("g3",50) )
)
ggplot(df, aes(y= values , x= exons , fill = group )) +
geom_violin()+
geom_jitter(shape=16, position=position_jitter(width = NULL, height = NULL))
You can define the degree of jitter in x and y direction.

Histogram with normal Distribution in R using ggplot2 for illustrations

I'm trying to plot a histogram with ggplot2.
I wrote a simple code for this in R
dnorm.count <- function(x, mean = 0, sd = 1, log = FALSE, n = 1, binwidth = 1){
n * binwidth * dnorm(x = x, mean = mean, sd = sd, log = log)
}
mtcars %>%
ggplot(aes(x = mpg)) +
geom_histogram(bins =60,color = "white", fill = "#9FE367",boundary = 0.5) +
geom_vline(aes(xintercept = mean(mpg)),
linetype="dashed",
size = 1.6,
color = "#FF0000")+
geom_text(aes(label = ..count..), stat= "count",vjust = -0.6)+
stat_function(fun = dnorm.count, color = "#6D67E3",
args = list(mean= mean(mtcars$mpg),
sd = sd(mtcars$mpg),
n = nrow(mtcars)),
lwd = 1.2) +
scale_y_continuous(labels = comma, name = "Frequency") +
scale_x_continuous(breaks=seq(0,max(mtcars$mpg)))+
geom_text(aes(label = paste0("mean = ", round(mean(mtcars$mpg), 2)),
x = mean(mtcars$mpg)*1.2,
y = mean(mtcars$mpg)/5))+
geom_vline(aes(xintercept = sd(mpg)), linetype="dashed",size = 1.6, color = "#FF0000")
What I got is this!
The question is how do I Plot the histogram similar to this
using ggplot2 and is it possible to convert the code to R function?
Edit: For the better explanation of what I'm trying to do:
I wanna create a Histogram exactly the same as the one attached for reference using ggplot2 and then I wanna create a function for the same to reduce the coding. Use any package+ggplot2 you like. The histograms should have lines depicting the standard deviation & mean like the one in reference. If possible depict the standard deviation in the plot as the reference image, that's what I'm trying to achieve.
If your question how to plot histograms like the one you attached in your last figure, this 9 lines of code produce a very similar result.
library(magrittr) ; library(ggplot2)
set.seed(42)
data <- rnorm(1e5)
p <- data %>%
as.data.frame() %>%
ggplot(., aes(x = data)) +
geom_histogram(fill = "white", col = "black", bins = 30 ) +
geom_density(aes( y = 0.3 *..count..)) +
labs(x = "Statistics", y = "Probability/Density") +
theme_bw() + theme(axis.text = element_blank())
You could use annotate() to add symbols or text and geom_segment to show the intervals on the plot like this:
p + annotate(x = sd(data)/2 , y = 8000, geom = "text", label = "σ", size = 10) +
annotate(x = sd(data) , y = 6000, geom = "text", label = "2σ", size = 10) +
annotate(x = sd(data)*1.5 , y = 4000, geom = "text", label = "3σ", size = 10) +
geom_segment(x = 0, xend = sd(data), y = 7500, yend = 7500) +
geom_segment(x = 0, xend = sd(data)*2, y = 5500, yend = 5500) +
geom_segment(x = 0, xend = sd(data)*3, y = 3500, yend = 3500)
This chunk of code would give you something like this:

How to show percent labels on histogram bars using ggplot2

I have seen lots of question regarding converting count on y axis into percent but must of them are in bar plot.
I want to do similar thing in histogram but not able to show the labels on the bar clearly. Please tell me where I am doing wrong.
x = runif(100, min = 0, max = 10)
data1 <- data.frame(x = x)
ggplot(aes(x = x), data = data1)+
geom_histogram(aes(y = (..count..)/sum(..count..)), bins = 10, breaks =
seq(0,10,1), fill = "blue", col = "black")+
geom_text(aes(y = ((..count..)/sum(..count..)),
label = scales::percent((..count..)/sum(..count..))),
stat = "count", vjust = -10)+
scale_y_continuous(labels = scales::percent)
Output:
Use scale_y_continous with breaks and labels will solve your problem.
data1 <- data.frame (x = runif(100, min = 0, max = 10))
ggplot(aes(x=x), data1) + stat_bin(aes(y = ..count..))
ggplot(data1, aes(x = x)) + geom_histogram(fill = "blue", col = "black")+ scale_y_continuous(breaks = seq(0,10,1),labels = paste(seq(0, 10, by = 1) / 100, "%", sep = ""))+geom_text(aes(y = (..count..),label = scales::percent((..count..)/sum(..count..))), stat="bin",colour="green",vjust=2)
or, you can specify where you would like to add the percentage like this:
geom_text(aes(y = (..count..)+0.5))
of course you can change the color as well. from,
stat="bin",colour="your prefer color "
Also you can change the width of the bins as follows:
geom_histogram(fill = "blue", col = "black", binwidth = 0.5)

Resources