Related
I have time-series of 2d obsverations that I'm trying to smooth to take out some of the observation variability. I've been applying loess(), but just noticed it doesn't seem to smooth as a function of time but just across the entire pooled coordinates. Am I missing something? Is there a different function I should be using?
df<-structure(list(timestamp = structure(c(1586488380, 1586488440,
1586488560, 1586488620, 1586488680, 1586488740, 1586488800, 1586488860,
1586489520, 1586489580, 1586489700, 1586489820, 1586489880, 1586489940,
1586490000, 1586490060, 1586490120, 1586490180, 1586490240, 1586490300,
1586490360, 1586490420, 1586490480, 1586490540, 1586490600, 1586490660,
1586490720, 1586490780, 1586490840, 1586490900, 1586490960, 1586491020,
1586491200, 1586491260, 1586491320, 1586491380, 1586491440, 1586491500,
1586491560, 1586491620, 1586491680, 1586491740, 1586491800, 1586491860,
1586491920, 1586491980, 1586492040, 1586492100, 1586492160, 1586492220,
1586492280, 1586492340, 1586492400, 1586492460, 1586492520, 1586492580,
1586492640, 1586492700, 1586492760, 1586492820, 1586492880, 1586492940,
1586493000, 1586493060, 1586493120, 1586493180, 1586493240, 1586493300,
1586493360, 1586493420, 1586493480, 1586493540, 1586493600, 1586493660,
1586493720, 1586493780, 1586493840, 1586493900, 1586493960, 1586494020,
1586494200, 1586494260, 1586494320, 1586494380, 1586494440, 1586494500,
1586494560, 1586494620, 1586494680, 1586494740, 1586494800, 1586494860,
1586494920, 1586494980, 1586495040, 1586495100, 1586495160, 1586495220,
1586495280, 1586495340, 1586495400, 1586495460, 1586495520, 1586495580,
1586495640, 1586495700, 1586495760, 1586495820, 1586495880, 1586495940,
1586496000, 1586496060, 1586496120, 1586496180, 1586496240, 1586496300,
1586496360, 1586496420, 1586496480, 1586496540, 1586496600, 1586496660,
1586496720, 1586496780, 1586496840, 1586496900, 1586496960, 1586497020,
1586497080, 1586497140, 1586497200, 1586497260, 1586497320, 1586497380,
1586497440, 1586497500, 1586497560, 1586497620, 1586497680, 1586497740,
1586497800, 1586497860, 1586497920, 1586497980, 1586498040, 1586498100,
1586498160, 1586498220, 1586498280, 1586498340), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), easting = c(740.582355718548, 740.582355718548,
739.726374785548, 739.611045841548, 739.508690311548, 739.398269506548,
739.278804356548, 739.627760514548, 737.913640733548, 738.088450601548,
738.551491861548, 738.957133488548, 739.137345557548, 739.304664573548,
739.460440784548, 739.605842807548, 739.741887116548, 739.719077482548,
739.369420509548, 738.973489249548, 738.521335985548, 739.279305656548,
739.993757669548, 740.085239162548, 740.172262825548, 740.255157063548,
740.334219013548, 740.409718260548, 740.481900024548, 740.550987811548,
740.573883125548, 740.252267406548, 739.261723439548, 738.935233921548,
738.774921432548, 738.615895069548, 738.451107559548, 738.280235586548,
738.493740162548, 738.717501067548, 738.925752666548, 739.120074872548,
739.301840012548, 739.472245999548, 739.632343117548, 739.890965132548,
740.098495936548, 740.293354354548, 740.476683179548, 740.649491986548,
740.678160672548, 740.443560695548, 740.398855065548, 740.451032168548,
740.467918582548, 740.160041067548, 739.819912921548, 739.641686751548,
739.457020461548, 739.265544595548, 739.193281488548, 739.227252654548,
738.995761471548, 738.822890745548, 738.775446949548, 738.726816309548,
738.676941805548, 738.625762928548, 739.254178659548, 739.727445331548,
740.019566884548, 740.129316037548, 740.255273807548, 740.442527947548,
740.615257127548, 740.775140223548, 740.923607252548, 741.055065964548,
741.139279130548, 740.907234314548, 739.290829342548, 739.115359714548,
740.664354207548, 740.589899151548, 740.578913554548, 740.571708783548,
740.568311076548, 740.568740893548, 740.527920123548, 740.358565457548,
740.392277156548, 740.370981239548, 740.289917518548, 740.214089159548,
740.192635592548, 740.176850935548, 740.160315351548, 740.143068630548,
740.103174446548, 740.008327647548, 740.058271768548, 740.205384482548,
740.211048172548, 740.150159818548, 740.122028309548, 740.230164637548,
740.271076846548, 740.075087486548, 739.768752873548, 739.586722485548,
739.940259334548, 740.233576255548, 740.473614136548, 740.495703912548,
740.341935547548, 740.186820856548, 740.204435025548, 740.299218490548,
740.318343269548, 740.238895133548, 739.999671854548, 740.062183564548,
740.196345466548, 740.329697802548, 740.418193609548, 740.311257937548,
740.270203214548, 740.209679752548, 740.146590442548, 740.079785501548,
740.170176300548, 740.268945921548, 740.217498771548, 740.133923060548,
740.117921377548, 740.177771453548, 740.140658663548, 740.080204534548,
740.108449333548, 740.145621912548, 740.182429420548, 740.010376475548,
739.819150336548, 739.616854492548, 739.369690457548, 739.104183601548,
738.938020260548, 738.843359187548, 738.802644324548, 738.761524527548
), northing = c(2307.15134120986, 2307.15134120986, 2307.60836846986,
2307.72110371186, 2307.83015111886, 2307.94605545486, 2308.06963872386,
2307.94323643186, 2308.11539257586, 2307.98516105286, 2307.67209087786,
2307.39795736686, 2307.27544716286, 2307.16124100486, 2307.05447137086,
2306.95438746086, 2306.86033624586, 2306.85049579286, 2307.02449397686,
2307.22230535086, 2307.44905018086, 2306.99878407786, 2306.57790074586,
2306.51052483586, 2306.44607531386, 2306.38433869586, 2306.32512182186,
2306.26824947086, 2306.21356227886, 2306.16091500786, 2306.12077142386,
2306.17964098286, 2306.35042514386, 2306.42512419786, 2306.46180283886,
2306.49818722086, 2306.53588972286, 2306.57498431686, 2306.47587680786,
2306.37665531786, 2306.28431106486, 2306.19814347186, 2306.11754401386,
2306.04198150286, 2305.97099021786, 2305.88981694586, 2305.83737042086,
2305.78847203986, 2305.74280289286, 2305.70008126986, 2305.72436278986,
2305.79211047386, 2305.75001607586, 2305.66373079386, 2305.59454921786,
2305.66723079486, 2305.74826733386, 2305.75631243686, 2305.76412484986,
2305.77169383886, 2305.76318962486, 2305.74988901286, 2305.84052390686,
2305.91140100786, 2305.95786180686, 2306.00518488486, 2306.05340722986,
2306.10256811686, 2305.87083530186, 2305.67745118186, 2305.55808577486,
2305.51324056486, 2305.46177226186, 2305.38525730986, 2305.31467748186,
2305.24934676386, 2305.18868082786, 2305.13496475486, 2305.08628958686,
2305.07937405386, 2305.33244795286, 2305.26640414086, 2304.97847050686,
2304.88865807586, 2304.78121096686, 2304.67333694586, 2304.56517670086,
2304.45687199986, 2304.26299422086, 2303.81398617786, 2303.81772073686,
2303.82576399386, 2303.82937144386, 2303.85166918186, 2303.85679798586,
2303.85841996086, 2303.85934299186, 2303.85962745886, 2303.81418344986,
2303.71268455886, 2303.70396413486, 2303.71187848686, 2303.71241867586,
2303.72409686386, 2303.75626565486, 2303.81432053886, 2303.80604508386,
2303.57280991386, 2303.21896587386, 2303.06912105986, 2303.28351126486,
2303.45378473786, 2303.49375232686, 2303.50460433986, 2303.48302188886,
2303.48174695086, 2303.52692291386, 2303.61686633486, 2303.41139580686,
2303.32679365886, 2303.06984393086, 2303.15017783486, 2303.29293566086,
2303.44528532286, 2303.48203523086, 2302.89274879786, 2302.81440275486,
2302.73512963586, 2302.65410710886, 2302.56757728186, 2302.77320543086,
2303.00846800486, 2303.01932301786, 2302.97477011386, 2303.03742546386,
2303.13970331386, 2303.07628123386, 2302.97297111586, 2303.02123867886,
2303.08476293486, 2303.14766331986, 2303.17026860886, 2303.18249014186,
2303.19025912386, 2303.15218828686, 2303.11064943486, 2302.98849464786,
2302.87632040886, 2302.82807292386, 2302.77934558786)), row.names = 5905:6054, class = "data.frame")
df.fitted<-loess(northing ~ easting, span = .5, data = df)
df$northing.fitted<-df.fitted$fitted
ggplot(df, aes(x=easting,y=northing)) +
geom_path(color='orangered2') +
geom_point(aes(y=northing.fitted))
So, instead of smoothing the "cluster", I'd like to use a rolling average smoothing each x/y pair as a function of time.
You need to regress both easting and northing as functions of time to get smoother x, y values:
df$numtime <- as.numeric(df$timestamp)
df.fitted.northing <-loess(northing ~ numtime, span = .5, data = df)
df.fitted.easting <- loess(easting ~ numtime, span = .5, data = df)
newdat <- data.frame(numtime = seq(min(df$numtime), max(df$numtime), len = 1000))
newdat$northing <- predict(df.fitted.northing, newdat)
newdat$easting <- predict(df.fitted.easting, newdat)
ggplot(df, aes(easting, northing)) +
geom_path(aes(color = "original path"), alpha = 0.6, size = 0.5,
arrow = arrow(length = unit(0.1, "inches"))) +
geom_point(aes(color = "original path"), alpha = 0.6, size = 1) +
geom_path(data = newdat, size = 1, aes(color = "smoothed"),
arrow = arrow(length = unit(0.1, "inches"))) +
coord_equal() +
theme_light() +
scale_color_manual(values = c("original path" = "orangered2",
"smoothed" = "deepskyblue4"), name = "")
I have a ggplot2 graph which plots two separate violin plots onto one graph, given by this example (thanks to #jared_mamrot for providing it):
library(tidyverse)
data("Puromycin")
head(Puromycin)
dat1 <- Puromycin %>%
filter(state == "treated")
dat2 <- Puromycin %>%
filter(state == "untreated")
mycp <- ggplot() +
geom_violin(data = dat1, aes(x= state, y = conc, colour = "Puromycin (Treatment1)")) +
geom_violin(data = dat2, aes(x= state, y = conc, colour = "Puromycin (Treatment2)"))
mycp
I would like to add a boxplot or other summary statistics such as those in http://www.sthda.com/english/wiki/ggplot2-violin-plot-quick-start-guide-r-software-and-data-visualization and https://www.maths.usyd.edu.au/u/UG/SM/STAT3022/r/current/Misc/data-visualization-2.1.pdf, but trying the code suggested in those places does not change the original plot.
mycp + geom_boxplot()
Thanks for reading and hopefully this makes sense!
UPDATE ==========================================================================
So the above example does not reflect exactly my situation I realize now. Essentially, I want to apply statistics onto a combined ggplot2 graph that uses two separate objects as its variables (here TNBC_List1 and ER_List1) Here is an example that does (sorry for the longer example, I will admit I am having trouble creating a simpler reproducible example and I am very new to coding in general):
# Libraries -------------------------------------------------------------
library(BiocManager)
library(GEOquery)
library(plyr)
library(dplyr)
library(Matrix)
library(devtools)
library(Seurat)
library(ggplot2)
library(cowplot)
library(SAVER)
library(metap)
library(multtest)
# Loading Raw Data into RStudio ----------------------------------
filePaths = getGEOSuppFiles("GSE75688")
tarF <- list.files(path = "./GSE75688/", pattern = "*.tar", full.names = TRUE)
tarF
untar(tarF, exdir = "./GSE75688/")
gzipF <- list.files(path = "./GSE75688/", pattern = "*.gz", full.names = TRUE)
ldply(.data = gzipF, .fun = gunzip)
list.files(path = "./GSE75688/", full.names = TRUE)
list.files(path = "./GSE75688/", pattern = "\\.txt$",full.names = TRUE)
# full matrix ----------------------------------------------------------
fullmat <- read.table(file = './GSE75688//GSE75688_GEO_processed_Breast_Cancer_raw_TPM_matrix.txt',
sep = '\t', header = FALSE, stringsAsFactors = FALSE)
fullmat <- data.frame(fullmat[,-1], row.names=fullmat[,1])
colnames(fullmat) <- as.character(fullmat[1, ])
fullmat <- fullmat[-1,]
fullmat <- as.matrix(fullmat)
# BC01 ER+ matrix -----------------------------------------------------------
BC01mat <- grep(pattern =c("^BC01") , x = colnames(fullmat), value = TRUE)
BC01mat = fullmat[,grepl(c("^BC01"),colnames(fullmat))]
BC01mat = BC01mat[,!grepl("^BC01_Pooled",colnames(BC01mat))]
BC01mat = BC01mat[,!grepl("^BC01_Tumor",colnames(BC01mat))]
BC01pdat <- data.frame("samples" = colnames(BC01mat), "treatment" = "ER+")
# BC07 TNBC matrix -----------------------------------------------------------
BC07mat <- grep(pattern =c("^BC07") , x = colnames(fullmat), value = TRUE)
BC07mat <- fullmat[,grepl(c("^BC07"),colnames(fullmat))]
BC07mat <- BC07mat[,!grepl("^BC07_Pooled",colnames(BC07mat))]
BC07mat <- BC07mat[,!grepl("^BC07_Tumor",colnames(BC07mat))]
BC07mat <- BC07mat[,!grepl("^BC07LN_Pooled",colnames(BC07mat))]
BC07mat <- BC07mat[,!grepl("^BC07LN",colnames(BC07mat))]
BC07pdat <- data.frame("samples" = colnames(BC07mat), "treatment" = "TNBC")
#merge samples together =========================================================================
joined <- cbind(BC01mat, BC07mat)
pdat_joined <- rbind(BC01pdat, BC07pdat)
#fdat ___________________________________________________________________________________
fdat <- grep(pattern =c("gene_name|gene_type") , x = colnames(fullmat), value = TRUE)
fdat <- fullmat[,grepl(c("gene_name|gene_type"),colnames(fullmat))]
fdat <- as.data.frame(fdat, stringsAsFactors = FALSE)
fdat <- setNames(cbind(rownames(fdat), fdat, row.names = NULL),
c("ensembl_id", "gene_short_name", "gene_type"))
rownames(pdat_joined) <- pdat_joined$samples
rownames(fdat) = make.names(fdat$gene_short_name, unique=TRUE)
rownames(joined) <- rownames(fdat)
# Create Seurat Object __________________________________________________________________
joined <- as.data.frame(joined)
sobj_pre <- CreateSeuratObject(counts = joined)
sobj_pre <-AddMetaData(sobj_pre,metadata=pdat_joined)
head(sobj_pre#meta.data)
#gene name input
sobj_pre[["RNA"]]#meta.features<-fdat
head(sobj_pre[["RNA"]]#meta.features)
#Downstream analysis -------------------------------------------------------
sobj <- sobj_pre
sobj <- FindVariableFeatures(object = sobj, mean.function = ExpMean, dispersion.function = LogVMR, nfeatures = 2000)
sobj <- ScaleData(object = sobj, features = rownames(sobj), block.size = 2000)
sobj <- RunPCA(sobj, npcs = 100, ndims.print = 1:10, nfeatures.print = 5)
sobj <- FindNeighbors(sobj, reduction = "pca", dims = 1:4, nn.eps = 0.5)
sobj <- FindClusters(sobj, resolution = 1, n.start = 10)
umap.method = 'umap-learn'
metric = 'correlation'
sobj <- RunUMAP(object = sobj, reduction = "pca", dims = 1:4,min.dist = 0.5, seed.use = 123)
p0 <- DimPlot(sobj, reduction = "umap", pt.size = 0.1,label=TRUE) + ggtitle(label = "Title")
p0
# ER+ score computation -------------------
ERlist <- list(c("CPB1", "RP11-53O19.1", "TFF1", "MB", "ANKRD30B",
"LINC00173", "DSCAM-AS1", "IGHG1", "SERPINA5", "ESR1",
"ILRP2", "IGLC3", "CA12", "RP11-64B16.2", "SLC7A2",
"AFF3", "IGFBP4", "GSTM3", "ANKRD30A", "GSTT1", "GSTM1",
"AC026806.2", "C19ORF33", "STC2", "HSPB8", "RPL29P11",
"FBP1", "AGR3", "TCEAL1", "CYP4B1", "SYT1", "COX6C",
"MT1E", "SYTL2", "THSD4", "IFI6", "K1AA1467", "SLC39A6",
"ABCD3", "SERPINA3", "DEGS2", "ERLIN2", "HEBP1", "BCL2",
"TCEAL3", "PPT1", "SLC7A8", "RP11-96D1.10", "H4C8",
"PI15", "PLPP5", "PLAAT4", "GALNT6", "IL6ST", "MYC",
"BST2", "RP11-658F2.8", "MRPS30", "MAPT", "AMFR", "TCEAL4",
"MED13L", "ISG15", "NDUFC2", "TIMP3", "RP13-39P12.3", "PARD68"))
sobj <- AddModuleScore(object = sobj, features = ERlist, name = "ER_List")
#TNBC computation -------------------
tnbclist <- list(c("FABP7", "TSPAN8", "CYP4Z1", "HOXA10", "CLDN1",
"TMSB15A", "C10ORF10", "TRPV6", "HOXA9", "ATP13A4",
"GLYATL2", "RP11-48O20.4", "DYRK3", "MUCL1", "ID4", "FGFR2",
"SHOX2", "Z83851.1", "CD82", "COL6A1", "KRT23", "GCHFR",
"PRICKLE1", "GCNT2", "KHDRBS3", "SIPA1L2", "LMO4", "TFAP2B",
"SLC43A3", "FURIN", "ELF5", "C1ORF116", "ADD3", "EFNA3",
"EFCAB4A", "LTF", "LRRC31", "ARL4C", "GPNMB", "VIM",
"SDR16C5", "RHOV", "PXDC1", "MALL", "YAP1", "A2ML1",
"RP1-257A7.5", "RP11-353N4.6", "ZBTB18", "CTD-2314B22.3", "GALNT3",
"BCL11A", "CXADR", "SSFA2", "ADM", "GUCY1A3", "GSTP1",
"ADCK3", "SLC25A37", "SFRP1", "PRNP", "DEGS1", "RP11-110G21.2",
"AL589743.1", "ATF3", "SIVA1", "TACSTD2", "HEBP2"))
sobj <- AddModuleScore(object = sobj, features = tnbclist, name = "TNBC_List")
#ggplot2 issue ----------------------------------------------------------------------------
sobj[["ClusterName"]] <- Idents(object = sobj)
sobjlists <- FetchData(object = sobj, vars = c("ER_List1", "TNBC_List1", "ClusterName"))
library(reshape2)
melt(sobjlists, id.vars = c("ER_List1", "TNBC_List1", "ClusterName"))
p <- ggplot() + geom_violin(data = sobjlists, aes(x= ClusterName, y = ER_List1, fill = ER_List1, colour = "ER+ Signature"))+ geom_violin(data = sobjlists, aes(x= ClusterName, y = TNBC_List1, fill = TNBC_List1, colour="TNBC Signature"))
Extension ======================================================================
If you want to do this but with two objects (sobjlists1 and sobjlists2, for example) instead of what my example showed (two variables but one object), rbind the two and then do what #StupidWolf says
library(reshape2)
sobjlists1= melt(sobjlists1, id.vars = "treatment")
sobjlists2= melt(sobjlists2, id.vars = "treatment")
combosobjlists <- rbind(sobjlists1, sobjlists2)
and then continue on with their code using combosobjlists:
ggplot(combosobjlists,aes(x= ClusterName, y = value)) +
geom_violin(aes(fill=variable)) +
geom_boxplot(aes(col=variable),
width = 0.2,position=position_dodge(0.9))
Hope this thread helps!
Try to include just the minimum code to show your problem. Like in your example, there's no need to start with the whole seurat processing. You can just provide the data.frame with dput() and we can see the issue with ggplot2 , see this post.
Create some example data:
library(Seurat)
library(ggplot2)
genes = c(unlist(c(ERlist,tnbclist)))
mat = matrix(rnbinom(500*length(genes),mu=500,size=1),ncol=500)
rownames(mat) = genes
colnames(mat) = paste0("cell",1:500)
sobj = CreateSeuratObject(mat)
sobj = NormalizeData(sobj)
Add some made-up cluster:
sobj$ClusterName = factor(sample(0:1,ncol(sobj),replace=TRUE))
Add your module score:
sobj = AddModuleScore(object = sobj, features = tnbclist,
name = "TNBC_List",ctrl=5)
sobj = AddModuleScore(object = sobj, features = ERlist,
name = "ER_List",ctrl=5)
We get the data, what you need to do is to pivot it long correctly. Plotting it twice with ggplot2 is going to cause all kinds of problem:
sobjlists = FetchData(object = sobj, vars = c("ER_List1", "TNBC_List1", "ClusterName"))
head(sobjlists)
ER_List1 TNBC_List1 ClusterName
cell1 -0.05391108 -0.008736057 1
cell2 0.07074816 -0.039064126 1
cell3 0.08688374 -0.066967324 1
cell4 -0.12503649 0.120665057 0
cell5 0.05356685 -0.072293651 0
cell6 -0.20053804 0.178977042 1
Should look like this:
library(reshape2)
sobjlists = melt(sobjlists, id.vars = "ClusterName")
ClusterName variable value
1 1 ER_List1 -0.05391108
2 1 ER_List1 0.07074816
3 1 ER_List1 0.08688374
4 0 ER_List1 -0.12503649
5 0 ER_List1 0.05356685
6 1 ER_List1 -0.20053804
Now we plot:
ggplot(sobjlists,aes(x= ClusterName, y = value)) +
geom_violin(aes(fill=variable)) +
geom_boxplot(aes(col=variable),
width = 0.2,position=position_dodge(0.9))
for you to be able to use the data within a plot without specifying it (like geom_boxplot() ), you need to put the data in the ggplot() function call. Then the following functions are able to inherit them.
You also do not need an extra violin plot per color
library(tidyverse)
data("Puromycin")
head(Puromycin)
mycp <- ggplot(Puromycin,aes(x= state, y = conc, colour=state))+geom_violin()
mycp + geom_boxplot(width=0.1, color= "black") +
scale_color_discrete(
labels= c("Puromycin (Treatment1)","Puromycin (Treatment2)")
)
Result:
I have the following data frame:
df.test <- data.frame(
id = c("EIF3H", "USP9X", "USP44", "USP51", "USP15",
"USP48", "USP47", "USP43", "USPL1", "UCHL5", "USP50", "USP7",
"UCHL1", "USP11", "USP26", "PAN2", "VCPIP1", "USP46", "USP29",
"USP22", "USP49", "ZRANB1", "OTUD4", "OTUD7B", "USP54", "PSMD14",
"USP20", "USP6", "OTUD3", "USP39", "UCHL3", "USP19", "USP21",
"USP30", "TNFAIP3", "USP17L2", "USP32", "JOSD2", "PSMD7", "ATXN3L",
"SENP2", "STAMBPL1", "USP37", "USP35", "USP3", "ALG13", "USP45",
"Control", "USP9Y", "ATXN3", "OTUD6A", "USP42", "USP12", "MPND",
"USP40", "OTUD1", "USP31", "USP8", "USP13", "USP53", "USP34",
"USP17L5", "MYSM1", "USP36", "OTUD7A", "USP10", "USP2", "USP18",
"OTUB1", "EIF3F", "USP1", "USP14", "COPS5", "USP24", "USP4",
"CYLD", "COPS6", "STAMBP", "USP5", "OTUD6B", "BAP1", "USP25",
"YOD1", "USP28", "USP38", "USP41", "JOSD1", "UCK2", "USP16",
"USP27X", "BRCC3", "USP33", "OTUD5", "OTUB2"),
log.score = c(4.22265293851218, 3.03983376346562,
2.4139305569695, 2.32586482009754, 2.30391458369018, 2.19017103893211,
2.10803347738743, 2.10011933499842, 1.82596928196197, 1.79890343496053,
1.78330640083025, 1.58384231036782, 1.4480988629484, 1.4331502122056,
1.41965675282741, 1.37552194849409, 1.37548070593268, 1.3126672736385,
1.27123241483349, 1.25213781606166, 1.1643918571801, 1.14738583497561,
1.0423927129399, 1.03157776352028, 1.0279685056071, 0.953426802337995,
0.94104282122269, 0.929925173732472, 0.886424283199432, 0.886123467368948,
0.815961921373111, 0.811437095842094, 0.767054687254773,
0.754314635766764, 0.750654863646671, 0.728646377897516,
0.707899061519581, 0.703532261199885, 0.692546751828376,
0.684554481775416, 0.652104306506768, 0.642046105413661,
0.630116510664521, 0.62908000782908, 0.619354680809075, 0.614876544107784,
0.61293067306798, 0.606898831140113, 0.603504247802433, 0.578642901486857,
0.576246380387172, 0.549612309171809, 0.53101794103743, 0.513442014568548,
0.506304999011214, 0.492144128304169, 0.462596515841992,
0.454185884038717, 0.450163300207299, 0.434529992991809,
0.429725658566606, 0.42864060724616, 0.419896514762075, 0.409715596281838,
0.365946146577929, 0.363963683646553, 0.357614629472314,
0.352851847129221, 0.343470593766502, 0.313051079788499,
0.304614649499993, 0.291604597354374, 0.287030586811975,
0.272263598289704, 0.27175988000523, 0.265200170411153, 0.264528852761016,
0.244704590019742, 0.179680291853473, 0.154102353851514,
0.147800680553723, 0.127575655021633, 0.126051956011554,
0.1207205737776, 0.118712371231544, 0.11046860245595, 0.0939775902962627,
0.0673791277640148, 0.066320409857141, 0.0582650179118847,
0.0548860857591892, 0.0374554663486737, 0.0147532091971383,
0.0134163514896924),
neg.rank = 1:94)
From this data frame I made this plot:
library(ggplot2)
x <- "neg.rank"
p <- ggplot(df.test, aes_string(x = x, y = df.test$log.score)) +
geom_point()
I want to add labels to the top10 ids and I tried the following:
library(ggrepel)
library(dplyr)
p + geom_label_repel(data = df.test[df.test[[x]] %in% 1:10, ], aes_string(x = x, y = df$log.score, label = df.test$id))
But this gives me a More than one expression parsed error:
More than one expression parsed
Backtrace:
█
1. ├─ggrepel::geom_label_repel(...)
2. │ └─ggplot2::layer(...)
3. └─ggplot2::aes_string(x = x, y = df$log.score, label = df.test$id)
4. └─base::lapply(...)
5. └─ggplot2:::FUN(X[[i]], ...)
6. └─rlang::parse_expr(x)
I have no clue what is wrong with the code.
It is not working as you are inserting the vectors directly into your aes_string.
If you want yours to be working you need to be strict with your aes_string and really should only use strings:
p +
geom_label_repel(
data = df.test[df.test[[x]] %in% 1:10, ],
aes_string(x = x, y = "log.score", label = "id"),
)
I also added a "cleaner" solution. I changed your subsetting logic to use dplyr, as you are already loading the package anyway and changed all your aes_string() to aes().
library(ggplot2)
library(ggrepel)
library(dplyr)
ggplot(df.test, aes(x = neg.rank, y = log.score)) +
geom_point() +
geom_label_repel(
data = df.test %>% slice_min(neg.rank, n = 10),
aes(label = id),
max.overlaps = 10,
xlim = c(10, NA),
ylim = c(3, NA),
direction = "x"
)
Cheers
Hannes
I keep getting an error becasue the bargraphs are used for quaterly data and the line is going to be data from the entire year so it will have many points.
The only issue is with the geom_line function which I am new to using. The error is -->
Error in scale_fill_manual(values = c("green", "yellow")) + geom_line(aes(x = dts2, : non-numeric argument to binary operator
t="DG"
fin=getFinancials(t, auto.assign = F)
dts = labels(fin$BS$A)[[2]]
dts2 = paste(substr(dts,1,7),"::",dts, sep="")
stockprices = getSymbols(t, auto.assign = F)
price = rep(0,NROW(dts))
for(i in 1:NROW(price))
{
price[i]=as.vector(last(stockprices[dts2[i],6]))
}
yr= as.numeric(substr(dts,1,4))
pastyr = yr -2
betayr = paste(pastyr,"::",yr,sep="")
os = fin$BS$A["Total Common Shares Outstanding", ]
gw= fin$BS$A["Goodwill, Net", ]
ta= fin$BS$A["Total Assets", ]
td= fin$BS$A["Total Debt", ]
ni= fin$IS$A["Net Income", ]
btax = fin$IS$A["Income Before Tax", ]
atax = fin$IS$A["Income After Tax",]
intpaid = fin$CF$A["Cash Interest Paid, Supplemental",]
gw[is.na(gw)]=0
intpaid[is.na(intpaid)]=0
taa = (ta - gw)/os
Rd = rep(0,NROW(dts))
for(i in 1:NROW(dts))
{
if(td[i]!=0)
{
Rd[i] = intpaid[i]/td[i]
}
}
gspc = getSymbols("^GSPC", auto.assign = F)
gs5 = getSymbols("GS5", src = "FRED", auto.assign = F)
marketRisk = rep(0,NROW(dts))
riskFree = rep(0,NROW(dts))
beta = rep(0,NROW(dts))
for(i in 1:NROW(dts))
{
marketRisk[i]= mean(yearlyReturn(gspc[betayr[i]]))
riskFree[i] = mean(gs5[betayr[i]])
gspc.weekly = weeklyReturn(gspc[betayr[i]])
stockprices.weekly = weeklyReturn(stockprices[betayr[i]])
beta[i] = CAPM.beta(stockprices.weekly,gspc.weekly)
}
Re = (riskFree/100) + beta * (marketRisk-(riskFree/100))
E = os*price
V=E+td
Tc = (btax - atax)/btax
wacc = (E/V)*Re + (td/V)*Rd*(1-Tc)
margin = (ni/wacc)/os - taa
taadf = data.frame(dts,val = taa,cat="ta")
margindf = data.frame(dts,val = margin ,cat="margin")
mdf=rbind(margindf,taadf)
#linrng = paste(dts[NROW(dts)],"::",dts[1],sep="")
#dfdt = data.frame(stockprices[linrng,6])
#dfdt2 = data.frame(dt = labels(dfdt)[[1]],dfdt$AAPL.Adjusted,cat="taa")
#names(dfdt2)=c("dt,price,cat")
pricedf = data.frame(as.vector((stockprices[dts2[i],6])))
ggplot(mdf, aes(x=dts,y=val,fill=cat)) + geom_bar(stat="identity",color="black")
scale_fill_manual(values = c("green","yellow")) +
geom_line(aes(x=dts2, y=stockprices), stat = "identity",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE)
Note, the object stockprices is An ‘xts’ object. So, you can't use inside ggplot scale. I picked the fist variable of stockprices object to show the code, but you probabli want another one.
library(dplyr)
library(quantmod)
library(PerformanceAnalytics)
library(ggplot2)
stockprices_df <- as.data.frame(stockprices) %>%
mutate(date = rownames(.)) %>%
filter(date %in% dts)
ggplot() +
geom_col(
data = mdf,
aes(x = dts,y = val,fill = cat)
) +
geom_line(
data = stockprices_df,
aes(x = date, y = DG.Open, group = 1 )
) +
scale_fill_manual(values = c("green","yellow"))
[
iarray <- iv$iarray
varray <- iv$varray
n<-gsub("^\\{+(.+)\\}+$", '\\1', iarray)
n1 <- strsplit(n,",")
n1 <- unlist(n1)
n1 <- as.numeric(n1)
df <- as.data.frame(n1)
n<-gsub("^\\{+(.+)\\}+$", '\\1', varray)
n2 <- strsplit(n,",")
n2 <- unlist(n2)
n2 <- as.numeric(n2)
df <- cbind(df,n2)
vmpp <-iv$vmpp
impp <- iv$impp
print(impp)
print(vmpp)
})
output$ivcurve <- renderPlot({
ggplot(data3(), aes(x=n2, y= n1)) + geom_line(colour='blue')+ geom_vline(xintercept = vmpp)+ geom_hline(yintercept = impp) + scale_y_continuous(limits = c(-1, 11))
Basically I'm trying to draw an IV curve from the above code.
As seen in the photo I need a horizontal and a vertical line.
But after I added the geom_vline function it gives me the Error : ggplot2 doesn't know how to deal with data of class numeric
iv is a dataframe and iarray and varray basically looks like this.
iarray = "{9.467182035,9.252423958,9.179368178,9.142931845}"
varray = "{-1.025945126,-0.791203874,-0.506481774,-0.255416444}"
And vmpp and impp are basically numbers as 8.5 and 20
suggestions?
P.s :
dput(iv)
structure(list(id = 3L, seris_id = "SERTPTR0003", module_id = 2L,
isc = 9.1043, voc = 37.61426, impp = 8.524, vmpp = 30.0118,
pmpp = 255.8095, unique_halm_id = 4414L, iarray = "{9.471385758,9.251831868,9.174032904,9.135095327,9.109244512,9.087563112,9.081257993,9.079282455,9.078209387,9.077396672,9.076717653,9.076285598,9.075914058,9.075549594,9.075098675,9.074659768,9.074080201,9.073659578,9.073411255,9.073349331,9.073215686,9.073189667,9.073011759,9.072868405,9.072659064,9.072636165,9.072659725,9.072729724,9.072779321,9.072915415,9.072951718,9.072855259,9.072758863,9.072562734,9.072286497,9.072036161,9.071858009,9.07165223,9.071458902,9.071172024,9.070818323,9.070364851,9.069865071,9.069392026,9.069058847,9.068673155,9.068486996,9.0684006,9.068241175,9.067848351,9.067533806,9.066886103,9.066177782,9.0655086,9.065025577,9.064457111,9.064154995,9.063866251,9.063564149,9.063221961,9.06295813,9.062580288,9.062182005,9.06179715,9.061378517,9.060847632,9.06033015,9.059686156,9.058814993,9.057817299,9.056732355,9.055534236,9.054389596,9.05351149,9.052819766,9.052254696,9.051816304,9.051431465,9.051000987,9.050664797,9.050589584,9.050615635,9.050795719,9.051096084,9.05121704,9.050958132,9.050478383,9.049724325,9.048695951,9.047619756,9.046715916,9.04602525,9.045615278,9.045512729,9.045617691,9.045803509,9.045989974,9.046083526,9.045997615,9.045871618,9.045772357,9.045599926,9.045340971,9.045082036,9.04473025,9.044178732,9.043440888,9.042642632,9.04185002,9.041056695,9.040316091,9.039781509,9.039426971,9.039199774,9.039026035,9.038805897,9.038478843,9.037978051,9.037190302,9.036262611,9.035408047,9.034687132,9.03411323,9.033759457,9.033445779,9.033105372,9.032611665,9.031991392,9.031298017,9.030631384,9.029991493,9.02931152,9.028518372,9.027678053,9.026644378,9.025384369,9.023971135,9.022443918,9.020510444,9.018469233,9.015987042,9.013123551,9.009951782,9.006524239,9.002508657,8.99806541,8.993200713,8.987509287,8.980851319,8.97337198,8.964883202,8.955065215,8.944015742,8.931773812,8.91796823,8.902911552,8.886450605,8.868452754,8.848678419,8.827119435,8.80336248,8.777313996,8.748941051,8.718309497,8.685225063,8.649388501,8.610785476,8.569040812,8.52363426,8.474699468,8.422382481,8.366516735,8.307103187,8.244481209,8.178090447,8.10779633,8.033345875,7.954744415,7.871665908,7.784296593,7.692116999,7.595199333,7.493377787,7.386704971,7.275055109,7.158981607,7.038484468,6.913650942,6.784728642,6.651977027,6.515069048,6.374111623,6.228897233,6.079031999,5.924669253,5.766323899,5.604063459,5.43841477,5.26939121,5.096619936,4.919752772,4.738936722,4.554312451,4.366039658,4.174017769,3.978461295,3.779470133,3.576724216,3.370764477,3.162238756,2.951119622,2.737359938,2.521133452,2.302407806,2.08132299,1.858467726,1.632539296,1.397202225,1.149523324,0.890812319,0.62251893,0.349040094,0.084409259,-0.164612445,-0.4001423,-0.625408177,-0.844927296,-1.067373925,-1.297998987,-1.536777099,-1.782558235,-2.033692207,-2.28906274,-2.54694712,-2.806836154,-3.068463186,-3.331653821,-3.596227332,-3.862303417,-4.129421924,-4.397321356,-4.666082505,-4.935632162,-5.206170796,-5.478105728,-5.751638617,-6.027203502,-6.304753878,-6.584235675,-6.865027697,-7.146774939,-7.428922534,-7.711971427,-7.995982555,-8.281623641,-8.569128828,-8.85847189,-9.14887768,-9.440152159,-9.731968139,-10.02382391,-10.315645796,-10.608918155,-10.906228043,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}",
varray = "{-1.055634971,-0.820094649,-0.530478984,-0.277519378,-0.049665975,0.168173928,0.369832037,0.557189853,0.73806136,0.918444007,1.100988955,1.285835111,1.471379381,1.656087228,1.83947039,2.021804885,2.204138782,2.387586314,2.572217234,2.757544476,2.943083961,3.127927125,3.311936258,3.49497066,3.677517995,3.860273388,4.043516446,4.227247167,4.411953813,4.597148124,4.781785019,4.965795342,5.149247651,5.331933288,5.514618924,5.698279889,5.882706331,6.067759345,6.253369179,6.43883951,6.623542572,6.807967224,6.991834459,7.175283184,7.359219574,7.543715171,7.727930567,7.91102934,8.092315166,8.270881273,8.44728269,8.622987785,8.800575578,8.981370755,9.16634984,9.355446065,9.546982405,9.738937256,9.930334092,10.119987137,10.30698723,10.492242934,10.676242509,10.859335313,11.042009008,11.224684494,11.40735998,11.589966311,11.77250289,11.955040067,12.138134659,12.321927365,12.506347836,12.691464628,12.877626501,13.06357852,13.248553416,13.432761044,13.616063692,13.798391012,13.981067688,14.165071441,14.350401073,14.536497974,14.722526917,14.907441627,15.090542195,15.272247132,15.453463208,15.634886746,15.817842429,16.00302897,16.188982779,16.375285347,16.561309505,16.745870074,16.92840904,17.110739948,17.293002899,17.47610287,17.660388619,17.84523298,18.029659429,18.213737119,18.397257992,18.580849811,18.765279846,18.950546303,19.136368979,19.322329963,19.507382979,19.691527431,19.874834264,20.057444179,20.239565832,20.422106592,20.605414621,20.789699174,20.974891096,21.161129891,21.347647095,21.533745789,21.718937711,21.903640773,22.087576567,22.271163603,22.454611733,22.638548722,22.822346805,23.006144888,23.189873219,23.373392294,23.556911967,23.740989056,23.925624756,24.110540657,24.296084919,24.481558832,24.666267268,24.850416495,25.034286715,25.217528572,25.400211222,25.582891485,25.765153238,25.947554494,26.130303912,26.31319522,26.495810505,26.678077627,26.859158373,27.03919344,27.2183891,27.396813913,27.574889968,27.752475972,27.92950277,28.106320312,28.283627309,28.460794206,28.638101203,28.814990286,28.99097379,29.16472524,29.336732302,29.506996765,29.675866194,29.843757906,30.011789121,30.179820337,30.347501601,30.514694006,30.680631478,30.844614709,31.006505389,31.166790586,31.32526224,31.482546323,31.638644628,31.793695462,31.947491958,32.10059034,32.252713392,32.40427843,32.555147743,32.705110284,32.853817294,33.001129867,33.14656034,33.29010692,33.431979459,33.572526716,33.711957348,33.85055096,33.988306953,34.125295678,34.260890565,34.395089819,34.528311952,34.660559355,34.792040088,34.923102908,35.053748414,35.1835569,35.312946278,35.441359731,35.568936163,35.695816274,35.821859963,35.946717873,36.071016573,36.194685715,36.317446291,36.439648854,36.561153301,36.681402816,36.801235017,36.924136292,37.050595501,37.180333637,37.313071096,37.446575824,37.573873847,37.693009727,37.80489203,37.910636184,38.012195237,38.11396235,38.218102215,38.324333435,38.432308447,38.541817399,38.652023867,38.762158191,38.872501769,38.982844151,39.092768022,39.202552988,39.31199099,39.420452469,39.528287977,39.635566071,39.742146647,39.848308713,39.954401627,40.060004483,40.165396887,40.270857247,40.376317008,40.481775571,40.587234732,40.692485836,40.797320225,40.901805855,41.005874173,41.109873339,41.21366265,41.317242107,41.420542558,41.523772657,41.626762446,41.729471102,41.832232456,41.937530675,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}"), .Names = c("id",
"seris_id", "module_id", "isc", "voc", "impp", "vmpp", "pmpp",
"unique_halm_id", "iarray", "varray"), row.names = 1L, class = "data.frame")
Solved it. since the ggplot requires data frames. I just added two more columns to the data frame and added them there.
vmpp <- iv$vmpp
df <- cbind(df,vmpp)
impp <- iv$impp
df <- cbind(df,impp)
print(df)
})
output$ivcurve <- renderPlot({
ggplot(data3(), aes(x=n2, y= n1)) + geom_line(colour='blue')+ scale_y_continuous(limits = c(-1, 11))+ geom_vline(aes(xintercept = vmpp))+ geom_hline(aes(yintercept = impp))