Plot different colours based on the conditions - r

This is the first 10 rows of my data frame:
head(test.data,10)
# A tibble: 10 x 5
date o2.permeg co2.ppm apo o2.spike
<time> <dbl> <dbl> <dbl> <chr>
1 2015-01-01 00:00:00 -685.09 413.023 -354.1816 N
2 2015-01-01 00:02:00 -695.10 412.894 -364.8690 N
3 2015-01-01 00:04:00 -687.84 412.979 -357.1627 N
4 2015-01-01 00:06:00 -683.23 412.866 -353.1460 N
5 2015-01-01 00:08:00 -683.28 412.755 -353.7788 N
6 2015-01-01 00:10:00 -685.40 412.647 -356.4659 N
7 2015-01-01 00:12:00 -687.80 412.659 -358.8029 N
8 2015-01-01 00:14:00 -662.79 412.665 NA Y
9 2015-01-01 00:16:00 -684.17 412.762 -354.6321 N
10 2015-01-01 00:18:00 -680.37 412.720 -351.0526 N
As you can see there's a last column named o2.spike, which has characters N and Y in it. N means that the data point is not a spike, and Y means that it is a spike. In this sample, there's only 1 Y, but in the real frame, there are loads, and randomly placed.
My desire is to plot all the data points in a plot, and those marked with Y will be plotted in a different colour.
For your information, this is the current code that I am using to plot everything. The first 3 variables are plotted in red, green, and blue, and I want the "Y" rows to be plotted in as, for example, pink.
library(openair)
test.data$yr_day <- format(as.Date(test.data$date), "%Y-%m-%d")
dir.create(daily) # where "daily" is the path of the folder I want to save the plots into
for (d in unique(test.data$yr_day)) {
mypath <- file.path(daily, paste(name, d, ".png", sep = "" ))
png(filename = mypath, width = 963, height = 690)
timePlot(subset(test.data, yr_day == d),
plot.type = "p",
pollutant = c("co2.ppm", "o2.permeg", "apo"),
y.relation = "free",
date.pad = TRUE,
pch = c(19,19,19),
cex = 0.2,
xlab = paste("Time of day in hours on", d),
ylab = "CO2, O2, and APO concentrations",
name.pol = c("CO2 (ppm)", "O2 (per meg)", "APO (per meg)"),
date.breaks = 24,
date.format = "%H:%M"
)
dev.off()
}
An example plot (containing all the spikes with the same colour as the non-spike ones) is as follows:
So how do I plot the spikes in a different colour from the others? Thank you very much!
Edit:
As asked by Sebastian, I have added this (not sure how you guys will be able to extract the data from that)
dput(head(test.data,20))
structure(list(date = structure(c(1420070400, 1420070520, 1420070640,
1420070760, 1420070880, 1420071000, 1420071120, 1420071240, 1420071360,
1420071480, 1420071600, 1420071720, 1420071840, 1420071960, 1420072080,
1420072200, 1420072320, 1420072440, 1420072560, 1420072680), class = c("POSIXct",
"POSIXt"), tzone = "GMT"), o2.permeg = c(-685.09, -695.1, -687.84,
-683.23, -683.28, -685.4, -687.8, -662.79, -684.17, -680.37,
-684.66, -686.13, -683.27, -680.77, -682.16, -692.54, NA, NA,
NA, NA), co2.ppm = c(413.023, 412.894, 412.979, 412.866, 412.755,
412.647, 412.659, 412.665, 412.762, 412.72, 412.692, 412.71,
412.757, 412.838, 412.922, 413.019, NA, NA, NA, NA), apo = c(-354.181646778043,
-364.868973747017, -357.162673031026, -353.145990453461, -353.778806682578,
-356.465871121718, -358.802863961814, NA, -354.632052505966,
-351.052577565632, -355.489594272076, -356.86508353222, -353.75830548926,
-350.833007159904, -351.781957040573, -361.652649164678, NA,
NA, NA, NA), o2.spike = c("N", "N", "N", "N", "N", "N", "N",
"Y", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N"
)), .Names = c("date", "o2.permeg", "co2.ppm", "apo", "o2.spike"
), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame"
))

Unfortunately, without having data, it's not easy to answer the question.
A ggplot2 solution could be:
g1 <- ggplot(data=test.data, aes(x=date, y=o2.permeg, col=o2.spike)) + geom_point()
g1
Passing a column of the dataframe to parameter "col" in "aes" makes you map with different colors every different value in that column.
It creates even a legend, with names associated to different colors.
I tried this with another dataframe ("iris", contained in base R) and it worked, hope it will be helpful.
Edit:
To have side-by-side plots, you can create 3 plots with ggplot and the use the function plot_grid() provided by "cowplot" package.
library(cowplot)
g1 <- ggplot(data=test.data, aes(x=date, y=o2.permeg, col=o2.spike)) + geom_point()
g2 <- ggplot(data=test.data, aes(x=date, y=co2.ppm, col=o2.spike)) + geom_point()
g3 <- ggplot(data=test.data, aes(x=date, y=apo, col=o2.spike)) + geom_point()
plot_grid(g1, g2, g3, nrow=3, ncol=1)

Related

R - Adding ggplots (calendR) into a gt table

I want to create a table of calendar plots. I used calendR to create the plots (over 70 plots) and I want to get them into a table. I often work with gt tables and I found a working function for adding ggplots to the table.
library(calendR)
library(gt)
plot = calendR(start = "M", # Weeks start on Monday
mbg.col = 4, # Background color of the month names
months.col = "white", # Color of the text of the month names
special.days = "weekend", # Color the weekends
special.col = "lightblue", # Color of the special.days
lty = 0, # Line type (no line)
weeknames = c("M", "T", # Week names
"W", "T",
"F", "S",
"S"),
title.size = 20, # Title size
orientation = "p") # Background image
dplyr::tibble(
Year1 = NA,
Year2 = NA
) %>%
gt() %>%
text_transform(
locations = cells_body(columns = c(Year1, Year2)),
fn = function(x) {
plot %>%
ggplot_image(height = px(300))
}
)
As you see, the plots are not readable because they are too big or do not have the right resolution. How can I solve this problem? I want to have at least 2 plots a row, more would be great.

Determining what RNA SEQ data is filtered on volcano plot

I am using RNA seq data to analyze genes via a volcano plot (which compares differential gene expression of bacteria with and without antibiotic) in R. After having created my plot, I am unsure why some of my values which read "0" TPM in one condition and a TPM value that's not "0" in the other condition were not determined to be differentially expressed. Some of the genes on my volcano plot have this difference in TPM and show up as significant on my plot while others with this "0" value difference are not considered to be significant according to my plot.
Here is a sample of my data
(UO1_D712_##### represents the locus number, the top column represents the different replicates where "1" is untreated and "2" is treated with antibiotic, the letters next to the numbers represent a different technical replicate):
structure(list(`1` = c("U01_D712_00001",
"U01_D712_00002", "U01_D712_00003",
"U01_D712_00004", "U01_D712_00005", "U01_D712_00006", "U01_D712_00007",
"U01_D712_00008", "U01_D712_00009", "U01_D712_00010", "U01_D712_00011",
"U01_D712_00012", "U01_D712_00013", "U01_D712_00014", "U01_D712_00015",
"U01_D712_00016", "U01_D712_00017", "U01_D712_00018", "U01_D712_00019",
"U01_D712_00020", "U01_D712_00021", "U01_D712_00022", "U01_D712_00023",
"U01_D712_00024", "U01_D712_00025", "U01_D712_00026", "U01_D712_00027",
"U01_D712_00028", "U01_D712_00029", "U01_D712_00030"), `1a` = c("6.590456502",
"6.741694688", "7.110342585", "6.299482433", "2.77173982", "2.470330508",
"3.827125186", "6.267229842", "5.524708663", "2.657913228", "2.87209859",
"7.479820548", "4.980185572", "4.210955199", "0", "4.492492822",
"3.611091371", "3.714270433", "7.455036914", "7.045203025", "6.061860857",
"2.925268313", "6.544077039", "5.747318013", "9.97083444", "9.22523089",
"4.20205383", "6.097040679", "2.621192351", "0"), `1b` = c("6.544454427",
"6.601489488", "7.134224619", "5.814043553", "3.280958379", "2.649180803",
"3.860542083", "6.256648363", "5.380427766", "2.581027705", "3.016132165",
"7.405329447", "4.701503289", "4.073814818", "0", "4.304196924",
"3.515977329", "3.843535649", "7.342972625", "6.966606769", "6.122878624",
"3.007522306", "6.495797641", "5.965431621", "9.828050269", "9.219563915",
"4.065778989", "6.105331066", "3.061209408", "0"), `1c` = c("6.608196006",
"6.743010138", "7.102600793", "6.146518601", "3.555184202", "2.364971542",
"4.034053983", "6.158523627", "5.656051812", "2.658660735", "3.054717455",
"7.392164473", "4.950953264", "4.277770477", "0", "4.507936666",
"3.794842979", "3.610794578", "7.471646548", "7.104792624", "6.484767016",
"3.071205184", "6.584425715", "6.20466015", "9.986342122", "9.282943758",
"4.179958213", "6.219551653", "2.984738345", "0"), `1d` = c("6.547155382",
"6.558328892", "6.992501615", "6.449558793", "3.059801464", "2.418800257",
"3.96498952", "6.013208538", "5.279919645", "2.893295085", "1.750510471",
"7.408735671", "4.9425624", "3.804549986", "0", "4.174835979",
"3.806006888", "3.570390524", "7.641137006", "6.976672494", "6.363030106",
"3.083061726", "6.300910093", "6.007490342", "9.926316442", "9.09671588",
"4.320556917", "6.153860107", "2.877230446", "0"), `1e` = c("6.626724417",
"6.577345176", "7.156821278", "6.296873411", "3.618089702", "2.394444986",
"4.129376392", "6.011715246", "5.31197869", "2.00754706", "2.695493528",
"7.538910448", "4.606060035", "3.909472643", "0", "4.346616047",
"3.468681284", "3.338231445", "7.559599613", "7.1527452", "6.232923513",
"3.108624209", "6.535435309", "6.12922864", "10.13108497", "9.310331313",
"3.959568571", "6.182335537", "2.902736258", "0"), `1f` = c("6.419219179",
"6.650459302", "7.319125725", "5.570869357", "2.962845933", "2.55903176",
"4.087597573", "5.995610538", "5.386268651", "2.750800859", "2.416572678",
"7.579148955", "3.952633067", "3.615227674", "0", "4.562838935",
"3.76942104", "3.920096905", "7.935320749", "7.501470652", "6.13700099",
"3.123910608", "6.035971952", "5.706235015", "9.254751395", "8.379630979",
"4.51391973", "5.6890651", "2.43285316", "0"), `1g` = c("6.553221221",
"6.633949852", "7.182305386", "5.769365973", "2.721354972", "1.668390466",
"4.148367057", "6.240883382", "5.458877133", "1.733842637", "2.723803355",
"7.522249899", "4.149567197", "3.780763096", "0", "4.496306813",
"3.645643535", "3.851001768", "7.678552875", "7.283411279", "6.591585956",
"2.879345378", "6.389427003", "5.911222165", "9.851084493", "9.084575304",
"4.272587776", "5.974762147", "2.98852705", "0"), `2a` = c("9.769887737",
"4.550226652", "1.869021464", "4.944848987", "7.9678549", "2.682865013",
"8.495575559", "2.234521659", "3.667196316", "9.180445037", "3.210107621",
"7.21523691", "5.714579923", "5.423986751", "9.118981459", "8.635701597",
"2.742889473", "3.712618983", "8.006057144", "4.999541279", "10.54351774",
"5.880978085", "7.145433526", "6.982416661", "9.339651188", "5.360835327",
"4.699680905", "3.423826225", "6.408271885", "5.038170992"),
`2b` = c("10.26397519", "1.945664005", "2.086763158", "2.800904763",
"8.583418657", "1.536094563", "9.32057547", "3.10685839",
"4.224502319", "10.1252842", "1.811175407", "5.714439316",
"6.039142559", "3.833361174", "9.757360286", "9.565906731",
"1.523640473", "2.315033488", "6.312524363", "4.889986456",
"10.23020108", "4.848727685", "7.533256999", "7.138160378",
"10.30380331", "3.955469283", "3.167940742", "3.599655687",
"4.828945262", "3.701043054"), `2c` = c("9.478481216", "2.789289131",
"3.393949527", "3.754810933", "8.710777154", "1.806170784",
"9.150005253", "2.612275457", "4.961073313", "9.802701699",
"2.933183115", "6.532384958", "6.919449225", "4.432699799",
"9.715063475", "9.265691356", "1.412064593", "3.330131873",
"6.665979896", "5.158526421", "9.417365584", "4.899531204",
"8.173459354", "7.271400938", "9.813068613", "4.384622077",
"3.700645365", "4.457874829", "5.649440022", "3.531010379"
), `2d` = c("8.199795497", "2.565711524", "2.202287889",
"3.856354444", "7.380224849", "2.192476466", "8.14446837",
"1.144258862", "3.31447122", "8.713146629", "2.697890381",
"6.304428859", "5.745291803", "4.898396114", "9.173362747",
"8.339933849", "3.159678152", "4.094587234", "7.608649692",
"5.280206424", "10.34630403", "5.098585806", "7.262400625",
"6.150190905", "9.316698845", "5.073027993", "4.695003229",
"2.485847024", "5.545300465", "4.350571411"), `2e` = c("8.74935033",
"3.739489484", "0.217205045", "4.413657999", "8.745525588",
"3.657060086", "9.279834921", "2.898951179", "4.282874018",
"9.610485827", "3.561102455", "7.228334332", "6.388491443",
"4.389908652", "8.781086564", "9.178866581", "2.374603596",
"3.961037408", "7.864369809", "3.654728044", "10.15284858",
"5.894439123", "7.68020282", "7.12243523", "9.998637438",
"5.092174395", "4.111530392", "3.776835632", "5.624523213",
"4.095011377"), `2f` = c("7.648310926", "4.345557215", "1.986576876",
"4.99426288", "7.087937177", "2.810917253", "7.77706637",
"2.62822773", "3.581811188", "8.470225989", "3.335757437",
"7.416094847", "5.208841128", "5.536128034", "8.255571138",
"7.993319997", "1.9209089", "3.573861828", "7.318814519",
"5.233804806", "11.05855833", "6.247720809", "6.673407583",
"6.029960625", "8.806867591", "5.459208493", "4.001428729",
"3.609936979", "5.876522973", "4.652839671"), `2g` = c("7.555235468",
"3.892899549", "1.726443458", "5.304546796", "7.039588042",
"3.027235295", "7.703852207", "1.753183519", "2.909288568",
"8.385169315", "3.902707541", "7.523315081", "4.978364017",
"5.49103181", "8.096218606", "7.944822989", "2.352609608",
"4.155433517", "7.227355741", "5.532668321", "11.24946953",
"6.159185473", "6.443203375", "5.931761874", "8.7421732",
"5.502000205", "4.652883503", "3.458323017", "6.566487449",
"4.89790353")), row.names = c(NA, 30L), class = "data.frame")
The design matrix:
structure(list(SampleID = c("1a ", "1b", "1c",
"1d", "1e", "1f",
"1g", "2a", "2b", "2c", "2d", "2e", "2f", "2g"), Group = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), Replicate = c("a",
"b", "c", "d", "e", "f", "g", "a", "b", "c", "d", "e", "f", "g"
)), row.names = c(NA, 14L), class = "data.frame")
I read through this edgeR user manual to write my code: http://bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf
Here is how I wrote my code to sample the RNA seq data (note ~/Documents/VOLCANO/R10LB_0ugvs0.5ug.csv refers to the the sample of data above and ~/Documents/VOLCANO/R10LB_0vs0.5_designmatrix.csv refers to the above design matrix):
R10LB_0vs0.5 <- read.csv2("~/Documents/VOLCANO/R10LB_0ugvs0.5ug.csv", sep=",", check.names = F)
R10LB_0vs0.5 <- janitor::remove_empty(R10LB_0vs0.5, which = "cols")
R10LB_0vs0.5_design <- read.csv2("~/Documents/VOLCANO/R10LB_0vs0.5_designmatrix.csv", sep=",")
#Set rownames
rownames(R10LB_0vs0.5) <- R10LB_0vs0.5$`1`
R10LB_0vs0.5 <- R10LB_0vs0.5[,-1]
#colnames(R10LB_0ugvs0.5ug) <- R10LB_0ugvs0.5ug[1,]
#R10LB_0ugvs0.5ug <- R10LB_0ugvs0.5ug[-1,]
#Convert to Matrix
R10LB_0vs0.5 <- data.matrix(R10LB_0vs0.5)
#Create DGEList
R10LB_0vs0.5 <- DGEList(counts = R10LB_0vs0.5, group = R10LB_0vs0.5_design$Group)
str(R10LB_0vs0.5)
#Design Matrix
Group <- as.vector(as.character(R10LB_0vs0.5_design$Group))
Replicate <- as.vector((R10LB_0vs0.5_design$Replicate))
R10LB_0vs0.5_designmatrix <- model.matrix(~0+Group+Replicate)
#Filter
keep <- filterByExpr(R10LB_0vs0.5)
R10LB_0vs0.5_filter <- R10LB_0vs0.5[keep, keep.lib.sizes = FALSE]
#Estimate Sample Dispersion
R10LB_0vs0.5_Disp <- estimateDisp(R10LB_0vs0.5_filter, R10LB_0vs0.5_designmatrix)
#Create Contrasts (Group Comparisons)
CONTRASTS <- makeContrasts( Group1vs2 = Group1-Group2,
levels = R10LB_0vs0.5_designmatrix)
#GLM Fit for Group 1 vs. Group 2
R10LB_0vs0.5fit <- glmFit(R10LB_0vs0.5_Disp, contrast = CONTRASTS[,1])
R10LB_0vs0.5lrt <- glmLRT(R10LB_0vs0.5fit)
R10LB_0vs0.5TT <- topTags(R10LB_0vs0.5lrt, n=nrow(R10LB_0vs0.5_Disp))
write.csv(R10LB_0vs0.5TT, file = "R10LB_0vs0.5_comparison.csv")
saveRDS(R10LB_0vs0.5TT, file = "R10LB_0vs0.5_comparison.RDS")
Here is my code for formatting the data into a volcano plot:
#Basic scatter plot: x is "logFC", y is "PValue"
ggplot(data = R10LB_0vs0.5_comparison, aes(x = logFC, y = PValue)) + geom_point()
#Doesn't look quite like a Volcano plot... convert the p-value into a -log10(p-value)
p4 <- ggplot(data = R10LB_0vs0.5_comparison, aes(x = logFC, y = -log10(PValue))) + geom_point() + theme_minimal()
#Add vertical lines for logFC thresholds, and one horizontal line for the p-value threshold
p5 <- p4 + geom_vline(xintercept = c(-0.6, 0.6), col = "red") + geom_hline(yintercept = -log10(0.05), col = "red")
# The significantly differentially expressed genes are the ones found in the upper-left and upper-right corners.
# Add a column to the data frame to specify if they are UP- or DOWN- regulated (log2FoldChange respectively positive or negative)
# add a column of NAs
R10LB_0vs0.5_comparison$diffexpressed <- "NO"
# if logFC > 0.6 and PValue < 0.05, set as "UP"
R10LB_0vs0.5_comparison$diffexpressed[R10LB_0vs0.5_comparison$logFC > 0.6 & R10LB_0vs0.5_comparison$PValue < 0.05] <- "UP"
# if logFC < -0.6 and PValue < 0.05, set as "DOWN"
R10LB_0vs0.5_comparison$diffexpressed[R10LB_0vs0.5_comparison$logFC < -0.6 & R10LB_0vs0.5_comparison$PValue < 0.05] <- "DOWN"
# Re-plot but this time color the points with "diffexpressed"
p4 <- ggplot(data=R10LB_0vs0.5_comparison, aes(x=logFC, y=-log10(PValue), col=diffexpressed)) + geom_point() + theme_minimal()
# Add lines as before...
p5 <- p4 + geom_vline(xintercept=c(-0.6, 0.6), col="red") + geom_hline(yintercept=-log10(0.05), col="red")
## Change point color
# 1. by default, it is assigned to the categories in an alphabetical order):
p6 <- p5 + scale_color_manual(values=c("blue", "black", "red"))
# 2. to automate a bit: create a named vector: the values are the colors to be used, the names are the categories they will be assigned to:
mycolors <- c("blue", "red", "black")
names(mycolors) <- c("DOWN", "UP", "NO")
p6 <- p5 + scale_colour_manual(values = mycolors)
# Now write down the name of genes beside the points...
# Create a new column "delabel" to de, that will contain the name of genes differentially expressed (NA in case they are not)
R10LB_0vs0.5_comparison$delabel <- NA
R10LB_0vs0.5_comparison$delabel[R10LB_0vs0.5_comparison$diffexpressed != "NO"] <- R10LB_0vs0.5_comparison$gene_symbol[R10LB_0vs0.5_comparison$diffexpressed != "NO"]
ggplot(data=R10LB_0vs0.5_comparison, aes(x=logFC, y=-log10(PValue), col=diffexpressed, label=delabel)) + geom_point() + theme_minimal() +geom_text()
# Finally, we can organize the labels nicely using the "ggrepel" package and the geom_text_repel() function
# load library
library(ggrepel)
# plot adding up all layers we have seen so far
ggplot(data=R10LB_0vs0.5_comparison, aes(x=logFC, y=-log10(PValue), col=diffexpressed, label=delabel)) +geom_point() + theme_minimal() +geom_text_repel() + scale_color_manual(values=c("blue", "black", "red")) + geom_vline(xintercept=c(-0.6, 0.6), col="red") +geom_hline(yintercept=-log10(0.05), col="red")
This is the volcano plot I currently have with all of my data:
Can you explain how some of these functions filtered my data (i.e. filterByExpr, estimateDisp, makeContrasts, glmFit, glmLRT)? Why are some of my data points that change from 0 TPM in one condition to some value in another condition showing up on my plot while others are not?
Are there other specific filtering processes you would recommend to change, fix, and/or improve my volcano plot?

Creating an R barplot with a subset of my dataframe

I am trying to make a barplot in R with the following data frame and code below. However, when I do this, the year column also gets inserted into my graph. Is there a way to stop this from happening but still keep my graph sorted by the year?
barplot(t(as.matrix(Number_Letter_Year_DF)), beside=TRUE,
xlab="Year", ylab="Number",
names.arg=c("2016","2017", "2018"),
legend= c("A", "B","C","D","E","F"), args.legend = list(title="Letter", x="topright", cex=.7))
abline(h=0)
Year A B C D E F
2016 2547.150 2001.075 2493.925 1123.450 1876.625 1718.175
2017 2829.025 1808.025 2681.850 2633.425 3005.525 2542.550
2018 1776.175 1538.900 1614.675 845.225 1155.500 1029.325
We can remove the first column i.e. 'year' and change it to row names
barplot(t(`row.names<-`(as.matrix(Number_Letter_Year_DF[-1]), Number_Letter_Year_DF$Year)), beside=TRUE,
xlab="Year", ylab="Number",
names.arg=c("2016","2017", "2018"),
legend= c("A", "B","C","D","E","F"), args.legend = list(title="Letter", x="topright", cex=.7))
abline(h=0)
data
Number_Letter_Year_DF <- structure(list(Year = 2016:2018, A = c(2547.15, 2829.025, 1776.175
), B = c(2001.075, 1808.025, 1538.9), C = c(2493.925, 2681.85,
1614.675), D = c(1123.45, 2633.425, 845.225), E = c(1876.625,
3005.525, 1155.5), F = c(1718.175, 2542.55, 1029.325)), class = "data.frame",
row.names = c(NA,
-3L))

X-axis for times

I am trying to generate a series of plots that show the same patient taking drinks and urinating at different times. Each plot represents a single day. I want to compare the days and hence I need to ensure that all graphs plotted have the same x-axis. My code is below which I cribbed from How to specify the actual x axis values to plot as x axis ticks in R
### Data Input
time_Thurs <- c("01:10", "05:50", "06:00","06:15", "06:25", "09:35", "10:00", "12:40",
"14:00", "17:20", "18:50", "19:10", "20:10", "21:00", "22:05", "22:35")
event_Thurs <- c("u", "u", "T", "T", "u", "u", "T","T","u", "u", "T", "T", "T", "T", "u", "W")
volume_Thurs <- c(NA, NA, 0.25, 0.25, NA, NA, 0.125, 0.625, NA, NA, 0.25, 0.25, 0.25, 0.25,
NA, 0.25)
total_liquids_Thurs <- sum(volume_Thurs, na.rm=TRUE)
time_Thurs <- paste("04/04/2019", time_Thurs, sep=" ")
time_Fri <- c("01:15", "06:00", "06:10", "06:25", "06:30", "07:10", "08:40", "09:20",
"12:45", "13:45")
event_Fri <- c("u","u", "T","T","u","uu","T", "u", "T", "u")
volume_Fri <- c(NA, NA, 0.25, 0.25, NA, NA, 0.125, NA, 0.625, NA)
total_liquids_Fri <- sum(volume_Fri, na.rm=TRUE)
time_Fri <- paste("05/04/2019", time_Fri, sep=" ")
### Collect all data together
event <- c(event_Thurs, event_Fri)
Volume <- c(volume_Thurs, volume_Fri)
time_log <- c(time_Thurs, time_Fri)
time_log <- strptime(time_log, format = "%d/%m/%Y %H:%M")
time_view <- format(time_log, "%H:%M")
### Put into Dataframe
patient_data <- data.frame(time_log, time_view, event, Volume)
# write.csv(patient_data, file="patient_data.csv", row.names = FALSE)
daily_plot <- function(x, day) {
# x patient data - a data.frame with four columns:
# POSIXct time, time, event and Volume
# date number of day of month
# y volume of liquid
# TotVol total volume of intake over week
# Event - drink or otherwise
x <- x[as.numeric(format(x[,1], "%d")) == day, ]
TotVol <- sum(x[,4], na.rm = TRUE)
DayOfWeek <- weekdays(x[1,1], abbreviate = FALSE)
plot(x[,1],x[,4],
xlim = c(x[1,1],x[length(x[,1]),1]),
xlab="Hours of Study", ylab = "Volume of Liquid Drank /L",
main = paste("Total Liquids Drank = ", TotVol, " L on ", DayOfWeek, "Week 1, Apr 2019"),
sub = "dashed red line = urination", pch=16,
col = c("black", "yellow", "green", "blue")[as.numeric(x[,3])],
xaxt = 'n'
)
xAxis_hrs <- seq(as.POSIXct(x[1,1]), as.POSIXct(x[length(x[,1]),1]), by="hour")
axis(1, at = xAxis_hrs, las = 2)
abline( v = c(x[x[,3] == "u",1]), lty=3, col="red")
}
When I run the function,
daily_plot(patient_data, 4)
I want to print out my x-axis, as amended in the form of hours representing the events over the 24 hour period.
When I wrap my xAxis_hrs vector in strptime(xAxis_hrs, format = "%H") the code crashes - that is the x-axis doesn't print out and I see, Error in axis(1, at = xAxis_hrs, las = 2) : (list) object cannot be coerced to type 'double' . Any help?
The issue is that you pass the labels to the wrong named argument, namely at (which should be the numeric positions of the labels). Use the following instead:
axis(1, at = xAxis_hrs, labels = strptime(xAxis_hrs, format = "%H"), las = 2)
Unfortunately this doesn’t change the fact that the axis labels don’t fit into the plot, and collide with the axis title. The former can be fixed by adjusting the plot margins. I’m not aware of a good solution for the latter, although changing the time format might help: it’s probably not necessary/helpful to print the full minutes and seconds (which are always 0). In fact, did you mean to use format instead of strptime?
Apart from that I fundamentally agree with the other answer recommending ggplot2 in the long run. It makes this kind of stuff a lot less painful.
If you're open to a ggplot solution:
library(tidyverse)
library(lubridate)
daily_ggplot <- function(df, selected_day) {
df_day <- filter(df, day(time_log) == selected_day)
df_urine <- filter(df_day, event == "u")
df_drink <- filter(df_day, event != "u")
TotVol <- sum(df_day$Volume, na.rm = TRUE)
Date <- floor_date(df_day$time_log[1], 'days')
DayOfWeek <- weekdays(Date, abbreviate = F)
plot_title <- paste0("Total drank = ", TotVol, "L on ", DayOfWeek, " Week 1, Apr 2018")
ggplot(df_drink) +
aes(time_log, Volume, color = event) +
geom_point(size = 2) +
geom_vline(data = df_urine, aes(xintercept = time_log), color = "red", linetype = 3) +
labs(x = "Hours of Study", ylab = "Volume of Liquid Drank (L)",
title = plot_title, subtitle = "lines = urination") +
theme_bw() +
scale_x_datetime(date_labels = "%H:%M", limits = c(Date, Date + days(1)))
}
daily_ggplot(patient_data, 4)

Indicating An Event on a Plot

I am studying patient fluid intake and frequency of urination.
I'm collecting volume and time of fluids drank and time of urination.
I want to indicate on a graph that has liquid intake when urination occurs.
Here's my data and code so far ...
time_log <- c("01:10", "05:50", "06:00","06:15", "06:25", "09:35", "10:00", "12:40",
"14:00")
time_log <- paste("04/04/2019", time_log, sep=" ")
time_log <- strptime(time_log, format = "%d/%m/%Y %H:%M")
time_view <- format(time_log, "%H:%M")
event <- c("u", "u", "T", "T", "u", "u", "T","T","u")
Volume <- c(NA, NA, 0.25, 0.25, NA, NA, 0.125, 0.625, NA)
patient_data <- data.frame(time_log, time_view, event, Volume)
total_liquids <- sum(patient_data$Volume, na.rm=TRUE)
plot(patient_data$time_log, patient_data$Volume,
xlim = c(as.POSIXct("2019-04-04 00:00:00"),as.POSIXct("2019-04-04 24:00:00")),
xlab="Hours of Study", ylab = "Volume of Liquid Drank /L",
main = paste("Total Liquids Drank = ", total_liquids, " L"))
This is related to the following question
Time Series Data - How to which was poorly received by the Stack Overflow community.
Here's a way using ggplot2 and dashed vertical lines. When adding the geom_vline, we subset the data for just the urination events (i.e., event == "u").
library(ggplot2)
ggplot(patient_data, aes(x = time_log, y = Volume)) +
geom_point() +
geom_vline(
data = subset(patient_data, event == "u"),
aes(xintercept = time_log),
linetype = 2
) +
labs(
title = paste("Total Liques Drank = ", total_liquids, " L"),
subtitle = "Dashed line reprents urination",
x = "Hours of Study",
y = "Volume of Liquid Drank (L)"
) +
scale_y_continuous(limits = c(0, NA)) # just so we don't start the y-axis at 0.1 or something misleading.

Resources