Annotation Track in Gviz - r

Does anyone with experience using the bioconductor package: Gviz know how to add an AnnotationTrack directly over a DataTrack?
For example, in ggplot2 I can add to a prexitsting plot using + geom_text, but I haven't been able to locate a similar feature for Gviz
Thanks!

Although it's not exactly what you want, one possible solution is to add a HighlightTrack that covers your region of interest. Although this won't specifically label / add the key elements on to your DataTrack, it will help highlight the alignment between differing DataTracks.
Example:
library(Gviz)
library(GenomicRanges)
data(geneModels)
data(cpgIslands)
gen <- genome(cpgIslands)
chr <- 1
start <- 120005434
end <- 129695434
itrack <- IdeogramTrack(genome = gen, chromosome = chr)
gtrack <- GenomeAxisTrack()
grtrack <- GeneRegionTrack(geneModels, genome = gen, chromosome = chr, name = "foo")
htrack <- HighlightTrack(trackList = list(gtrack, grtrack), start = 121535434, end = 124535434, chromosome = chr)
plotTracks(list(itrack, htrack), from = start, to = end)
Graphical output
Although the grtrack is empty, it demonstrates how the HighlightTrack will span the specified DataTracks (in this case, grtrack and gtrack).
See the GViz documentation for more on info.

Related

I can't get my plots to a single grid please help correct my code

I have 11 plots and used a looping function to plot them see my code below. However, I can't get them to fit in just 1 page or less. The plots are actually too big. I am using R software and writing my work in RMarkdown. I have spent almost an entire week trying to resolve this.
group_by(Firm_category) %>%
doo(
~ggboxplot(
data =., x = "Means.type", y = "means",
fill ="grey", palette = "npg", legend = "none",
ggtheme = theme_pubr()
),
result = "plots"
)
graph3
# Add statistical tests to each corresponding plot
Firm_category <- graph3$Firm_category
xx <- for(i in 1:length(Firm_category)){
graph3.i <- graph3$plots[[i]] +
labs(title = Firm_category[i]) +
stat_pvalue_manual(stat.test[i, ], label = "p.adj.signif")
print(graph3.i)
}
#output3.long data sample below as comments
#Firm_category billmonth Means.type means
#Agric 1 Before 38.4444
#Agric 1 After 51.9
Complete data is on my github: https://github.com/Fridahnyakundi/Descriptives-in-R/blob/master/Output3.csv
This code prints all the graphs but in like 4 pages. I want to group them into a grid. I have tried to add all these codes below just before my last curly bracket and none is working, please help me out.
library(cowplot)
print(plot_grid(plotlist = graph3.i[1:11], nrow = 4, ncol = 3))
library(ggpubr)
print(ggarrange(graph3.i[1:11], nrow = 4, ncol = 3))
I tried the gridExtra command as well (they all seem to do the same thing). I am the one with a mistake and I guess it has to do with my list. I read a lot of similar work here, some suggested
dev.new()
dev.off()
I still didn't get what they do. But adding either of them caused my code to stop.
I tried defining my 'for' loop function say call it 'XX', then later call it to make a list of graph but it returned NULL output.
I have tried defining an empty list (as I read in some answers here) then counting them to make a list that can be printed but I got so many errors.
I have done this for almost 3 days and will appreciate your help in resolving this.
Thanks!
I tried to complete your code ... and this works (but I don't have your 'stat.test' object). Basically, I added a graph3.i <- list() and replaced graph3.i in the loop ..
Is it what you wanted to do ?
library(magrittr)
library(dplyr)
library(rstatix)
library(ggplot2)
library(ggpubr)
data <- read.csv(url('http://raw.githubusercontent.com/Fridahnyakundi/Descriptives-in-R/master/Output3.csv'))
graph3 <- data %>% group_by(Firm_category) %>%
doo(
~ggboxplot(
data =., x = "Means.type", y = "means",
fill ="grey", palette = "npg", legend = "none",
ggtheme = theme_pubr()
),
result = "plots"
)
graph3
# Add statistical tests to each corresponding plot
graph3.i <- list()
Firm_category <- graph3$Firm_category
xx <- for(i in 1:length(Firm_category)){
graph3.i[[i]] <- graph3$plots[[i]] +
labs(title = Firm_category[i]) # +
# stat_pvalue_manual(stat.test[i, ], label = "p.adj.signif")
print(graph3.i)
}
library(cowplot)
print(plot_grid(plotlist = graph3.i[1:11], nrow = 4, ncol = 3))

Weird characters appearing in the plot legend when using DoHeatmap

I was using Seurat to analyse single cell RNA-seq data and I managed to draw a heatmap plot with DoHeatmap() after clustering and marker selection, but got a bunch of random characters appearing in the legend. They are random characters as they will change every time you run the code. I was worrying over it's something related to my own dataset, so I then tried the test Seurat object 'ifnb' but still got the same issue (see the red oval in the example plot).
example plot
I also tried importing the Seurat object in R in the terminal (via readRDS) and ran the plotting function, but got the same issue there, so it's not a Rstudio thing.
Here are the codes I ran:
'''
library(Seurat)
library(SeuratData)
library(patchwork)
InstallData("ifnb")
LoadData("ifnb")
ifnb.list <- SplitObject(ifnb, split.by = "stim")
ifnb.list <- lapply(X = ifnb.list, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})
features <- SelectIntegrationFeatures(object.list = ifnb.list)
immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features)
immune.combined <- IntegrateData(anchorset = immune.anchors)
immune.combined <- ScaleData(immune.combined, verbose = FALSE)
immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE)
immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30)
immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30)
immune.combined <- FindClusters(immune.combined, resolution = 0.5)
DefaultAssay(immune.combined) <- 'RNA'
immune_markers <- FindAllMarkers(immune.combined, latent.vars = "stim", test.use = "MAST", assay = 'RNA')
immune_markers %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC) -> top10_immune
DoHeatmap(immune.combined, slot = 'data',features = top10_immune$gene, group.by = 'stim', assay = 'RNA')
'''
Does anyone have any idea how to solve this issue other than reinstalling everything?
I have been having the same issue myself and while I have solved it by not needing the legend, I think you could use this approach and use a similar solution:
DoHeatmap(immune.combined, slot = 'data',features = top10_immune$gene, group.by = 'stim', assay = 'RNA') +
scale_color_manual(
values = my_colors,
limits = c('CTRL', 'STIM'))
Let me know if this works! It doesn't solve the source of the odd text values but it does the job! If you haven't already, I would recommend creating a forum question on the Seurat forums to see where these characters are coming from!
When I use seurat4.0, I met the same problem.
While I loaded 4.1, it disappeared

tmap - changing the behaviour of tm_markers

Here is a reproducible example
#load the packages
library(easypackages)
packages("tidyverse","readxl","sf","tmaptools","tmap","lubridate",
"lwgeom","Cairo","nngeo","purrr","scales", "ggthemes","janitor")
polls<-st_as_sf(read.csv(url("https://www.caerphilly.gov.uk/CaerphillyDocs/FOI/Datasets_polling_stations_csv.aspx")),
coords = c("Easting","Northing"),crs = 27700)%>%
mutate(date = sample(seq(as.Date('2020/01/01'), as.Date('2020/05/31'), by="day"), 147))
test_stack<-polls%>%st_join(polls%>%st_buffer(dist=1000),join=st_within)%>%
filter(Ballot.Box.Polling.Station.x!=Ballot.Box.Polling.Station.y)%>%
add_count(Ballot.Box.Polling.Station.x)%>%
rename(number_of_neighbours = n)%>%
mutate(interval_date = date.x-date.y)%>%
subset(select = -c(6:8,10,11,13:18))## removing this comment will summarise the data so that only number of neighbours is returned %>%
distinct(Ballot.Box.Polling.Station.x,number_of_neighbours,date.x)%>%
filter(number_of_neighbours >=2)
polls%>%mutate(id = as.numeric(row_number()))%>% mutate(thing = case_when(id %% 2 == 0 ~ "stuff",
id %% 2 !=0 ~ "type"))->polls
qtm(polls)
tmap_mode("view")
tm_shape(polls) + tm_markers(col = "thing")
tm_shape(polls) + tm_dots(col ="thing", size = 0.75)
what I'd like to do is change the colour and size of tm_markers, because in the thing I want to use it in it would be nice to make use of different colour markers easily.
Linked to this, is understanding how the clustering of markers works when the map mode is "view" and an html is generated.
Any help on tm_marker behaviour and tm_marker clustering would be brilliant.
thanks == "MANY!"
In the end it turns out to be much simpler than using markers. I don't like "markers", aesthetically, but I do like "dots" and tm_dots lets you sort out the colours more easily (or in my head it's easier..) . And the thing is. Well. Clustering can be applied to dots, bubbles, and tm_symbols.
It's all in here:
https://cran.r-project.org/web/packages/tmap/tmap.pdf
(p 89/90)
ANYWAY
tm_shape(polls) + tm_dots(col ="thing", size = 0.75,clustering = T)
that's the answer (for me). I can cluster and then colour by a field.

r - taking difference of two xyplots?

I have several xyplot objects that I have saved as .RDATA files. I am now interested in being able to look at their differences. I have tried things like
plot1-plot2
but this does not work (I get the "non-numeric argument to binary operator error).
I would also be able to do this if I knew how to extract the timeseries data stored within the lattice xyplot object, but I have looked everywhere and can't figure out how to do this either.
Any suggestions?
EDIT:
just to make it perfectly clear what I mean for MrFlick, by "taking the difference of two plots" I mean plotting the elementwise difference of the timeseries from each plot, assuming it exists (i.e. assuming that the plots have the same domain). Graphically,
I might want to take the following two plots, stored as xyplot objects:
and end up with something that looks like this:
-Paul
Here is a little function I wrote to plot the difference of two xyplots:
getDifferencePlot = function(plot1,plot2){
data1 = plot1$panel.args
data2 = plot2$panel.args
len1 = length(data1)
len2 = length(data2)
if (len1!=len2)
stop("plots do not have the same number of panels -- cannot take difference")
if (len1>1){
plotData = data.table(matrix(0,0,4))
setNames(plotData,c("x","y1","y2","segment"))
for (i in 1:len1){
thing1 = data.table(cbind(data1[[i]]$x,data1[[i]]$y))
thing2 = data.table(cbind(data2[[i]]$x,data2[[i]]$y))
finalThing = merge(thing1, thing2,by = "V1")
segment = rep(i,nrow(finalThing))
finalThing = cbind(finalThing,segment)
setNames(finalThing,c("x","y1","y2","segment"))
plotData = rbind(plotData,finalThing)
}
}
if (len1==1){
plotData = data.table(matrix(0,0,3))
setNames(plotData,c("x","y1","y2"))
thing1 = data.table(cbind(data1[[i]]$x,data1[[i]]$y))
thing2 = data.table(cbind(data2[[i]]$x,data2[[i]]$y))
plotData = merge(thing1, thing2,by = "V1")
}
plotData$difference = plotData$y1-plotData$y2
if (len1==1)
diffPlot = xyplot(difference~x,plotData,type = "l",auto.key = T)
if (len1>1)
diffPlot = xyplot(difference~x|segment,plotData,type = "l",auto.key = T)
return(diffPlot)
}

R HTS package: combinef and aggts not working with gts object

I'm trying to apply the combinef and aggts functions from the R hts package to a time series matrix in order to obtain an optimized set of forecasts across a hierarchy. I've run the same code every month without issue, and am now seeing errors after upgrading to hts package v4.5.
Reproducible example (I can share data file offline if needed)
#Read in forecast data for all levels of hierarchy#
fcast<-read.csv("SampleHierarchyForecast.csv", header = TRUE, check.names = FALSE)
#Convert to time series#
fcast<-ts(fcast, start = as.numeric(2010.25) + (64)/12, end = as.numeric(2010.25) + (75)/12, f= 12)
#Create time series of only the bottom level of the hierarchy#
index<-c()
fcastBottom<-fcast
for (i in 1:length(fcastBottom [1,]))
{
if(nchar(colnames(fcastBottom)[i])!=28)
index[i]<-i
else
index[i]<-0
}
fcastBottom<-fcastBottom[,-index]
#Create grouped time series from the bottom level forecast #
GtsForecast <- gts(fcastBottom, characters = list(c(12,12), c(4)), gnames = c("Category", "Item", "Customer", "Category-Customer"))
#Use combinef function to optimally combine the full hierarchy forecast using the groups from the full hierarchy gts#
combo <- combinef(fcast, groups = GtsForecast$groups)
*Warning message:
In mapply(rep, as.list(gnames), times, SIMPLIFY = FALSE) :
longer argument not a multiple of length of shorter*
traceback()
2: stop("Argument fcasts requires all the forecasts.")
1: combinef(fcast, groups = GtsForecast$groups)
There's a little bug when comebinef() function calls gts(). Now I've fixed it on github. So you can run your own code above without any trouble after updating the development version.
Alternatively, you need to tweak your code a bit if you don't want to install the newest version.
combo <- combinef(fcast, groups = GtsForecast$groups, keep = "bottom")
combo <- ts(combo, start = as.numeric(2010.25) + (64)/12,
end = as.numeric(2010.25) + (75)/12, f = 12)
colnames(combo) <- colnames(fcastBottom)
newGtsForecast <- gts(combo, characters = list(c(12,12), c(4)),
gnames = c("Category", "Item", "Customer",
"Category-Customer"))
Aggregate <- aggts(newGtsForecast)
Hope it helps.

Resources