Plotting spectroscopic data using ggplot2 - r

I am trying to plot spectroscopic data using ggplot2. I get my data in the following form:
My code so far is:
library(ggplot2)
library(reshape2)
melt_data <- melt(spectroscopic_data, id.vars = "sample_name", variable.name = "wavenumber", value.name = "intensity")
melt_data$probe = factor(melt_data$probe)
melt_data$wellenzahl = as.numeric(levels(melt_data$wellenzahl))[melt_data$wellenzahl]
ggplot(melt_data, aes(x=wavenumber, y=intensity, group=sample_name, color=sample_name)) + geom_line() +
scale_x_reverse(breaks=c(10000, 9500, 9000, 8500, 8000, 7500, 7000, 6500, 6000, 5500, 5000, 4500, 4000)) +
scale_color_manual(values=c("#FF0000", "#0000CC", "#00CC00", "#FF00FF", "#FF9900", "#000000", "#999900", "#33FFFF", "#FFCCFF", "#FFFF00", "#999999", "#9933FF", "#993300", "#99FF33")) +
theme_bw() +
theme(legend.position = "bottom") +
labs(x=expression(wavenumbers), y="intensity", colour = "") +
theme(legend.text=element_text(size=10), axis.text=element_text(size=12), axis.title=element_text(size=14)) +
guides(colour = guide_legend(ncol = 2, keywidth=1.5, keyheight=1, override.aes = list(size=1.8)))
I need the same color for aaa-samples, bbb-samples and so on (multiple measurements of one sample) but the plot does not work. I get a plot that looks like this when you zoom in:
It looks like ggplot2 connects two samples/lines of the same measurement instead of plotting them separately. Does anyone have an idea? I am trying to fix this since hours...
Thank you!

Here is my result after Luke C's awesome support:
library(ggplot2)
library(reshape2)
melted_data <- melt(newtestdata, id.vars = c("sample_name",
"sample_id"), variable.name = "wavenumber", value.name = "intensity")
melted_data$wavenumber=as.numeric(levels(melted_data$wavenumber))[melted_data$wavenumber]
ggplot(melted_data, aes(x=wavenumber, y=intensity, group = sample_id, color = sample_name)) + geom_line() +
scale_x_reverse(breaks=c(1005, 1200, 1400), expand = c(0.01, 0.01)) +
scale_y_continuous(breaks=c(0, 0.5, 1.0, 1.5, 2.0), expand = c(0.01, 0.01)) +
scale_color_manual(values=c("#FF0000", "#0000CC", "#00CC00", "#FF00FF", "#FF9900", "#000000")) +
theme_bw() +
theme(legend.position = "bottom") +
theme(plot.margin=unit(c(1,1,0.5,1),"cm")) +
labs(x=expression(wavenumbers~"in"~cm^{"-1"}), y="absorbance in a.u.", colour = "") +
theme(legend.text=element_text(size=10), axis.text=element_text(size=12), axis.title=element_text(size=14)) +
guides(colour = guide_legend(ncol = 3, keywidth=1.5, keyheight=1, override.aes = list(size=1.2)))
ggsave("buechi-all.pdf", width = 11.69, height = 8.27)

One way is to add a sample id to your data frame before you reshape it. That will allow you to keep the names like "aaa" and "bbb" but assign a unique identifier to act as your grouping variable (since it cannot differentiate between two observations at the same x variable otherwise). For an example where I tried to mimic your input data:
ex<-cbind(c("aaa","aaa","bbb","bbb"), c(0.426,0.405,0.409,0.395), c(0.430,0.408,0.411,0.399), c(0.432,0.411,0.413,0.401))
ex<- as.data.frame(ex)
colnames(ex) <- c("sample_name", "4000", "4004", "4008")
ex$sample_id<-1:nrow(ex)
melt <- melt(ex, id.vars = c("sample_name", "sample_id"), variable.name = "wavenumber", value.name = "intensity")
ggplot(melt, aes(x = wavenumber, y = intensity, group = sample_id, color = sample_name)) +
geom_line() +
theme_classic()
This outputs separate lines for different measurements of samples grouped by sample id, but keeping the color according to the sample name:
Is that sort of what you're after?
Edits below
To show the same approach with a larger dataset:
alpha <-rep(sapply(letters[1:10], function(x) {paste(x,x,x, sep = "")}), each = 2)
adf <- data.frame(alpha)
adf$sample_id <- seq(1, (length(alpha)))
adf$t <- rnorm(20, 0.4, 0.1)
wavenum <- seq(4, 1503)
for(i in wavenum){
for(j in 1:length(alpha)){
adf[j,i] <- adf[j,i-1] + (rnorm(1, 0.01, 0.01))
}
}
adf[1:10, 1:10]
anames <- c("sample_name", "sample_id", (1400 + 4 * seq(0, 1500)))
names(adf)<-anames
melt <- melt(adf, id.vars = c("sample_name", "sample_id"), variable.name = "wavenumber", value.name = "intensity")
head(melt)
ggplot(melt[1:1500,], aes(x = wavenumber, y = intensity, group = sample_id, color = sample_name)) +
geom_line(lwd = 1.5) +
theme_classic()
This will give a similar plot to the one above, where each sample has an individual line for each measurement that are both the same color.
If I'm still missing what you're actually after, I apologize!

Related

Position stacked identity data sample size as geom_text directly over a bar using geom_bar from ggplot2

In this experiment, we tracked presence or absence of bacterial infection in our subject animals. We were able to isolate which type of bacteria was present in our animals and created a plot that has Week Since Experiment Start on the X axis, and Percentage of Animals Positive for bacterial infection on the Y axis. This is a stacked identity ggplot where each geom_bar contains the different identities of the bacteria that were in the infected animals each week. Here is a sample dataset with the corresponding ggplot code and result:
DummyData <- data.frame(matrix(ncol = 5, nrow = 78))
colnames(DummyData) <- c('WeeksSinceStart','BacteriaType','PositiveOccurences','SampleSize','NewSampleSize')
DummyData$WeeksSinceStart <- c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,9,9,9,9,9,10,10,10,10)
DummyData$BacteriaType <- c("BactA","BactB","BactD","BactB","BactE","BactA","BactS","BactF","BactE","BactH","BactJ","BactK","BactE","BactB","BactS","BactF","BactL","BactE","BactW","BactH","BactS","BactJ","BactQ","BactN","BactW","BactA","BactD","BactE","BactA","BactC","BactD","BactK","BactL","BactE","BactD","BactA","BactS","BactK","BactB","BactE","BactF","BactH","BactN","BactE","BactL","BactZ","BactE","BactC","BactR","BactD","BactJ","BactN","BactK","BactW","BactR","BactE","BactW","BactA","BactM","BactG","BactO","BactI","BactE","BactD","BactM","BactH","BactC","BactM","BactW","BactA","BactL","BactB","BactE","BactA","BactS","BactH","BactQ","BactF")
PosOcc <- seq(from = 1, to = 2, by = 1)
DummyData$PositiveOccurences <- rep(PosOcc, times = 13)
DummyData$SampleSize <- c(78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,29,29,29,29,29,10,10,10,10)
DummyData$NewSampleSize <- c(78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,29,NA,NA,NA,NA,10,NA,NA,NA)
numcolor <- 20
plotcolors <- colorRampPalette(brewer.pal(8, "Set3"))(numcolor)
#GGplot for Dummy Data
DummyDataPlot <- ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences/SampleSize, fill = BacteriaType)) + geom_bar(position = "stack", stat = "identity") +
geom_text(label = DummyData$NewSampleSize, nudge_y = 0.1) +
scale_y_continuous(limits = c(0,0.6), breaks = seq(0, 1, by = 0.1)) + scale_x_continuous(limits = c(0.5,11), breaks = seq(0,10, by =1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive") +
scale_fill_manual(values = plotcolors)
The problem: I cannot seem to find a way to position the labels from geom_text directly over each bar. I would also love to add the text "n = " to the sample size value directly over each bar. Thank you for your help!
I have tried different values for position_dodge statement and nudge_y statement with no success.
Sometimes the easiest approach is to do some data wrangling, i.e. one option would be to create a separate dataframe for your labels:
library(ggplot2)
library(dplyr)
dat_label <- DummyData |>
group_by(WeeksSinceStart) |>
summarise(y = sum(PositiveOccurences / SampleSize), SampleSize = unique(SampleSize))
ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences / SampleSize, fill = BacteriaType)) +
geom_bar(position = "stack", stat = "identity") +
geom_text(data = dat_label, aes(x = WeeksSinceStart, y = y, label = SampleSize), inherit.aes = FALSE, nudge_y = .01) +
#scale_y_continuous(limits = c(0, 0.6), breaks = seq(0, 1, by = 0.1)) +
scale_x_continuous(limits = c(0.5, 11), breaks = seq(0, 10, by = 1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive"
) +
scale_fill_manual(values = plotcolors)

ggplot line plot with one group`s lines on top

I am making a line plot of several groups and want to make a visualization where one of the groups lines are highlighted
ggplot(df) + geom_line(aes(x=timepoint ,y=var, group = participant_id, color=color)) +
scale_color_identity(labels = c(red = "g1",gray90 = "Other"),guide = "legend")
However, the group lines are partially obscured by the other groups lines
How can I make these lines always on top of other groups lines?
The simplest way to do this is to plot the gray and red groups on different layers.
First, let's try to replicate your problem with a dummy data set:
set.seed(1)
df <- data.frame(
participant_id = rep(1:50, each = 25),
timepoint = factor(rep(0:24, 50)),
var = c(replicate(50, runif(1, 50, 200) + runif(25, 0.3, 1.5) *
sin(0:24/(0.6*pi))^2/seq(0.002, 0.005, length = 25))),
color = rep(sample(c("red", "gray90"), 50, TRUE, prob = c(1, 9)), each = 100)
)
Now we apply your plotting code:
library(ggplot2)
ggplot(df) +
geom_line(aes(x=timepoint ,y=var, group = participant_id, color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend") +
theme_classic()
This looks broadly similar to your plot. If instead we plot in different layers, we get:
ggplot(df, aes(timepoint, var, group = participant_id)) +
geom_line(data = df[df$color == "gray90",], aes(color = "Other")) +
geom_line(data = df[df$color == "red",], aes(color = "gl")) +
scale_color_manual(values = c("red", "gray90")) +
theme_classic()
Created on 2022-06-20 by the reprex package (v2.0.1)
You can use factor releveling to bring the line (-s) of interest to front.
First, let's plot the data as is, with the red line partly hidden by others.
library(ggplot2)
library(dplyr)
set.seed(13)
df <-
data.frame(timepoint = rep(c(1:100), 20),
participant_id = paste0("p_", sort(rep(c(1:20), 100))),
var = abs(rnorm(2000, 200, 50) - 200),
color = c(rep("red", 100), rep("gray90", 1900)))
ggplot(df) +
geom_line(aes(x = timepoint ,
y = var,
group = participant_id, color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend")
Now let's bring p_1 to front by making it the last factor level.
df %>%
mutate(participant_id = factor(participant_id)) %>%
mutate(participant_id = relevel(participant_id, ref = "p_1")) %>%
mutate(participant_id = factor(participant_id, levels = rev(levels(participant_id)))) %>%
ggplot() +
geom_line(aes(x=timepoint,
y=var,
group = participant_id,
color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend")

Reorder and split the ggplot heatmap based on the clusters in one of the columns

I generated a heatmap with ggplot, and order the samples by using hclust, However, I still need more reordering to get all the similar values corespondent with one of the samples in the ordered cluster. Here I generate a samples data to explain better.
set.seed(99)
M <- data.frame(names = paste0("g", seq(1,30)), S1 = runif(30, 0 , 8), S2 = runif(30, -4, 5), S3 = runif(30, -5, 5))
M.mat <- M %>%
tibble::column_to_rownames('names') %>%
as.matrix()
M.dendro <- as.dendrogram(hclust(d = dist(x = M.mat)))
dendro.plot <- ggdendrogram(data = M.dendro, rotate = TRUE) +
theme(axis.text.y = element_text(size = 6))
print(dendro.plot)
str(M.dendro)
dend.order <- order.dendrogram(M.dendro)
df <- melt(M, id.vars = "names")
df$names <- factor(x = df$names,
levels = M$names[dend.order],
ordered = TRUE)
ggplot(df, aes(x = names, y = variable, fill = value)) +
geom_tile(color = "black") +
scale_fill_gradient2(low = muted("steelblue"), mid = "white", high = muted("red3"),
midpoint = 0, space = "Lab", na.value = "grey50",
guide = "colourbar", aesthetics = "fill"
) +
theme(axis.text.x = element_text(angle = 90, hjust=1), legend.key.size = unit(0.4, "cm")) +
coord_fixed()
For the generated heatmap, I need reorder it such that all the dark blue be on the bottom, the middle color and then the red on the top based on samples S3. Thank you

how to draw two half circles in ggplot in r

How can I make a plot like this with two different-sized half circles (or other shapes such as triangles etc.)?
I've looked into a few options: Another post suggested using some unicode symbol, that didn't work for me. And if I use a vector image, how can I properly adjust the size parameter so the 2 circles touch each other?
Sample data (I would like to make the size of the two half-circles equal to circle1size and circle2size):
df = data.frame(circle1size = c(1, 3, 2),
circle2size = c(3, 6, 5),
middlepointposition = c(1, 2, 3))
And ultimately is there a way to position the half-circles at different y-values too, to encode a 3rd dimension, like so?
Any advice is much appreciated.
What you're asking for is a bar plot in polar coordinates. This can be done easily in ggplot2. Note that we need to map y = sqrt(count) to get the area of the half circle proportional to the count.
df <- data.frame(x = c(1, 2),
type = c("Investors", "Assignees"),
count = c(19419, 1132))
ggplot(df, aes(x = x, y = sqrt(count), fill = type)) + geom_col(width = 1) +
scale_x_discrete(expand = c(0,0), limits = c(0.5, 2.5)) +
coord_polar(theta = "x", direction = -1)
Further styling would have to be applied to remove the gray background, remove the axes, change the color, etc., but that's all standard ggplot2.
Update 1: Improved version with multiple countries.
df <- data.frame(x = rep(c(1, 2), 3),
type = rep(c("Investors", "Assignees"), 3),
country = rep(c("Japan", "Germany", "Korea"), each = 2),
count = c(19419, 1132, 8138, 947, 8349, 436))
df$country <- factor(df$country, levels = c("Japan", "Germany", "Korea"))
ggplot(df, aes(x=x, y=sqrt(count), fill=type)) + geom_col(width =1) +
scale_x_continuous(expand = c(0, 0), limits = c(0.5, 2.5)) +
scale_y_continuous(expand = c(0, 0)) +
coord_polar(theta = "x", direction = -1) +
facet_wrap(~country) +
theme_void()
Update 2: Drawing the individual plots at different locations.
We can do some trickery to take the individual plots and plot them at different locations in an enclosing plot. This works, and is a generic method that can be done with any sort of plot, but it's probably overkill here. Anyways, here is the solution.
library(tidyverse) # for map
library(cowplot) # for draw_text, draw_plot, get_legend, insert_yaxis_grob
# data frame of country data
df <- data.frame(x = rep(c(1, 2), 3),
type = rep(c("Investors", "Assignees"), 3),
country = rep(c("Japan", "Germany", "Korea"), each = 2),
count = c(19419, 1132, 8138, 947, 8349, 436))
# list of coordinates
coord_list = list(Japan = c(1, 3), Germany = c(2, 1), Korea = c(3, 2))
# make list of individual plots
split(df, df$country) %>%
map( ~ ggplot(., aes(x=x, y=sqrt(count), fill=type)) + geom_col(width =1) +
scale_x_continuous(expand = c(0, 0), limits = c(0.5, 2.5)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 160)) +
draw_text(.$country[1], 1, 160, vjust = 0) +
coord_polar(theta = "x", start = 3*pi/2) +
guides(fill = guide_legend(title = "Type", reverse = T)) +
theme_void() + theme(legend.position = "none") ) -> plotlist
# extract the legend
legend <- get_legend(plotlist[[1]] + theme(legend.position = "right"))
# now plot the plots where we want them
width = 1.3
height = 1.3
p <- ggplot() + scale_x_continuous(limits = c(0.5, 3.5)) + scale_y_continuous(limits = c(0.5, 3.5))
for (country in names(coord_list)) {
p <- p + draw_plot(plotlist[[country]], x = coord_list[[country]][1]-width/2,
y = coord_list[[country]][2]-height/2,
width = width, height = height)
}
# plot without legend
p
# plot with legend
ggdraw(insert_yaxis_grob(p, legend))
Update 3: Completely different approach, using geom_arc_bar() from the ggforce package.
library(ggforce)
df <- data.frame(start = rep(c(-pi/2, pi/2), 3),
type = rep(c("Investors", "Assignees"), 3),
country = rep(c("Japan", "Germany", "Korea"), each = 2),
x = rep(c(1, 2, 3), each = 2),
y = rep(c(3, 1, 2), each = 2),
count = c(19419, 1132, 8138, 947, 8349, 436))
r <- 0.5
scale <- r/max(sqrt(df$count))
ggplot(df) +
geom_arc_bar(aes(x0 = x, y0 = y, r0 = 0, r = sqrt(count)*scale,
start = start, end = start + pi, fill = type),
color = "white") +
geom_text(data = df[c(1, 3, 5), ],
aes(label = country, x = x, y = y + scale*sqrt(count) + .05),
size =11/.pt, vjust = 0)+
guides(fill = guide_legend(title = "Type", reverse = T)) +
xlab("x axis") + ylab("y axis") +
coord_fixed() +
theme_bw()
If you don't need to have ggplot2 map aesthetics other than x and y you could try egg::geom_custom,
# devtools::install_github("baptiste/egg")
library(egg)
library(grid)
library(ggplot2)
d = data.frame(r1= c(1,3,2), r2=c(3,6,5), x=1:3, y=1:3)
gl <- Map(mushroomGrob, r1=d$r1, r2=d$r2, gp=list(gpar(fill=c("bisque","maroon"), col="white")))
d$grobs <- I(gl)
ggplot(d, aes(x,y)) +
geom_custom(aes(data=grobs), grob_fun=I) +
theme_minimal()
with the following grob,
mushroomGrob <- function(x=0.5, y=0.5, r1=0.2, r2=0.1, scale = 0.01, angle=0, gp=gpar()){
grob(x=x,y=y,r1=r1,r2=r2, scale=scale, angle=angle, gp=gp , cl="mushroom")
}
preDrawDetails.mushroom <- function(x){
pushViewport(viewport(x=x$x,y=x$y))
}
postDrawDetails.mushroom<- function(x){
upViewport()
}
drawDetails.mushroom <- function(x, recording=FALSE, ...){
th2 <- seq(0,pi, length=180)
th1 <- th2 + pi
d1 <- x$r1*x$scale*cbind(cos(th1+x$angle*pi/180),sin(th1+x$angle*pi/180))
d2 <- x$r2*x$scale*cbind(cos(th2+x$angle*pi/180),sin(th2+x$angle*pi/180))
grid.polygon(unit(c(d1[,1],d2[,1]), "snpc")+unit(0.5,"npc"),
unit(c(d1[,2],d2[,2]), "snpc")+unit(0.5,"npc"),
id=rep(1:2, each=length(th1)), gp=x$gp)
}
# grid.newpage()
# grid.draw(mushroomGrob(gp=gpar(fill=c("bisque","maroon"), col=NA)))

showing different units in each free_y of facet_grid

I have plotted two facets one on top of the other with two different ys (a percentage and a cost) and the same x (Years). I took most of the ideas from this post and some variations of the same.
I'd like to show the labels of the y axis as percentages for the rate and as £ for the costs, but I have been unable to change each y label format independently.
Below a reproducible example using facet_grid (I managed to create a similar thing with facet_wrap but I get stuck with the same problem).
I considered using grid.arrange() from the gridExtra package, but it seemed that would bring other issues with the legend.
library(plyr)
library(tidyr)
library(dplyr)
library(ggplot2)
library(scales)
set.seed(12345)
my_labels <- function(variable, value){
names_li <- list("percentage", "cost in pounds")
return(names_li[value])
}
df <- data.frame(
rate = runif(10, 0, 1),
cost = rnorm(10, 100, 40),
years = seq(from = 2001, to = 2010)
)
df %>%
gather(type_of_var,
value,
rate:cost) ->
df2
df2 %>%
ggplot(aes(x = years,
y = value,
ymin = 0,
ymax = .1)) +
facet_grid(type_of_var ~ .,
scales = 'free_y',
labeller = my_labels) +
labs(x = "Year",
y = "") +
geom_point(subset = . (type_of_var == "rate")) +
geom_line(subset = . (type_of_var == "rate"),
colour = "grey") +
## the following two lines don't work
# scale_y_continuous(subset = . (type_of_var == "rate"),
# labels = percent) +
geom_bar(subset = . (type_of_var == "cost"),
stat = "identity") +
theme_bw() +
theme(strip.text.y = element_text(size = 15,
colour = "black"),
plot.title = element_text(lineheight = 0.8,
face = "bold")) +
scale_x_continuous(breaks = seq(2001, 2010, 1)) +
labs(title = "free_y y axis labels")
Thanks
as a fragile workaround, you could use
label_fun <- function (x) {
if(max(x, na.rm=TRUE) > 1) dollar(x) else percent(x)
}
(assuming you only deal with big money and small percentages)

Resources