Related
Hi I have a dataset here,
> dput(data2)
structure(list(Date = structure(c(1651795200, 1651795200, 1652400000,
1652400000, 1653004800, 1653004800, 1653609600, 1653609600, 1654214400,
1654214400), tzone = "UTC", class = c("POSIXct", "POSIXt")),
Country = c("MYR", "JPY", "MYR", "JPY", "MYR", "JPY", "MYR",
"JPY", "MYR", "JPY"), Value = c(-4.58032749341578, -11.8566176470588,
-5.27883238229476, -10.9425785482124, -5.06949191159718,
-10.00938379731, -4.84618722451869, -9.46424356856266, -5.06949191159718,
-12.0721271393643)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L))
I have some code to run an animation bar chart
plot1 <- ggplot(data2, aes(x=reorder(Country, -Value), y=Value, fill=Country)) + geom_bar(stat='identity')
plot1 + coord_flip() + transition_states(Date, transition_length = 2,state_length = 1, wrap =
FALSE) + geom_text(x=1 ,y=-10, family="Times",aes(label=as.character(Date)),size=6, color
="blue") + xlab("Country")
The rendered plot looks great except I have a random character appearing after the date is shown.
For the time series plot which is composed by two subplots:
library(tidyverse)
library(lubridate)
library(feasts)
library(tsibble)
library(gghighlight)
df %>%
mutate(date = as.Date(date, origin = "1899-12-30")) %>%
mutate(year=as.numeric(year(date))) %>%
pivot_longer(`food_index`:`energy_index`) %>%
mutate(date=yearmonth(date)) %>%
as_tsibble(index=date, key=name) %>%
gg_season(value, alpha=1) +
geom_line(size=0.8, alpha=0.8) +
geom_point(size=2, alpha=1)
Out:
Let's say if the current year is 2022, I wanna to plot the line of that year with alpha=1, other years' lines with smaller alpha, ie., alpha=0.3.
How could I do that? Thanks for your helps at advance.
Data:
df <- structure(list(date = c(42766, 42794, 42825, 42855, 42886, 42916,
42947, 42978, 43008, 43039, 43069, 43100, 43131, 43159, 43190,
43220, 43251, 43281, 43312, 43343, 43373, 43404, 43434, 43465,
43496, 43524, 43555, 43585, 43616, 43646, 43677, 43708, 43738,
43769, 43799, 43830, 43861, 43890, 43921, 43951, 43982, 44012,
44043, 44074, 44104, 44135, 44165, 44196, 44227, 44255, 44286,
44316, 44347, 44377, 44408, 44439, 44469, 44500, 44530, 44561
), food_index = c(58.53, 61.23, 55.32, 55.34, 61.73, 56.91, 54.27,
59.08, 60.11, 66.01, 60.11, 63.41, 69.8, 72.45, 81.11, 89.64,
88.64, 88.62, 98.27, 111.11, 129.39, 140.14, 143.44, 169.21,
177.39, 163.88, 135.07, 151.28, 172.81, 143.82, 162.13, 172.22,
176.67, 179.3, 157.27, 169.12, 192.51, 194.2, 179.4, 169.1, 193.17,
174.92, 181.92, 188.41, 192.14, 203.41, 194.19, 174.3, 174.86,
182.33, 182.82, 185.36, 192.41, 195.59, 202.6, 201.51, 225.01,
243.78, 270.67, 304.57), energy_index = c(127.36, 119.87, 120.96,
112.09, 112.19, 109.24, 109.56, 106.89, 109.35, 108.35, 112.39,
117.77, 119.52, 122.24, 120.91, 125.41, 129.72, 135.25, 139.33,
148.6, 169.62, 184.23, 204.38, 198.55, 189.29, 202.47, 220.23,
240.67, 263.12, 249.74, 240.84, 243.42, 261.2, 256.76, 258.69,
277.98, 289.63, 293.46, 310.81, 318.68, 310.04, 302.17, 298.62,
260.92, 269.29, 258.84, 241.68, 224.18, 216.36, 226.57, 235.98,
253.86, 267.37, 261.99, 273.37, 280.91, 291.84, 297.88, 292.78,
289.79)), row.names = c(NA, 60L), class = "data.frame")
You could achieve this by creating a boolean variable that detects the year you would like to highlight and then passing that as the alpha aesthetic inside your plot:
df %>%
mutate(date = as.Date(date, origin = "1899-12-30")) %>%
mutate(year=as.numeric(year(date))) %>%
pivot_longer(`food_index`:`energy_index`) %>%
mutate(date=yearmonth(date),
highlight = ifelse(year == "2021", T, F)) %>%
as_tsibble(index=date, key=name) %>%
gg_season(value, alpha = 0.2) +
geom_line(aes(alpha = highlight),
size=0.8) +
geom_point(aes(alpha = highlight),
size=2) +
scale_alpha_manual(values = c(0.2, 1)) +
guides(alpha = "none") +
theme_bw()
I have time-series of 2d obsverations that I'm trying to smooth to take out some of the observation variability. I've been applying loess(), but just noticed it doesn't seem to smooth as a function of time but just across the entire pooled coordinates. Am I missing something? Is there a different function I should be using?
df<-structure(list(timestamp = structure(c(1586488380, 1586488440,
1586488560, 1586488620, 1586488680, 1586488740, 1586488800, 1586488860,
1586489520, 1586489580, 1586489700, 1586489820, 1586489880, 1586489940,
1586490000, 1586490060, 1586490120, 1586490180, 1586490240, 1586490300,
1586490360, 1586490420, 1586490480, 1586490540, 1586490600, 1586490660,
1586490720, 1586490780, 1586490840, 1586490900, 1586490960, 1586491020,
1586491200, 1586491260, 1586491320, 1586491380, 1586491440, 1586491500,
1586491560, 1586491620, 1586491680, 1586491740, 1586491800, 1586491860,
1586491920, 1586491980, 1586492040, 1586492100, 1586492160, 1586492220,
1586492280, 1586492340, 1586492400, 1586492460, 1586492520, 1586492580,
1586492640, 1586492700, 1586492760, 1586492820, 1586492880, 1586492940,
1586493000, 1586493060, 1586493120, 1586493180, 1586493240, 1586493300,
1586493360, 1586493420, 1586493480, 1586493540, 1586493600, 1586493660,
1586493720, 1586493780, 1586493840, 1586493900, 1586493960, 1586494020,
1586494200, 1586494260, 1586494320, 1586494380, 1586494440, 1586494500,
1586494560, 1586494620, 1586494680, 1586494740, 1586494800, 1586494860,
1586494920, 1586494980, 1586495040, 1586495100, 1586495160, 1586495220,
1586495280, 1586495340, 1586495400, 1586495460, 1586495520, 1586495580,
1586495640, 1586495700, 1586495760, 1586495820, 1586495880, 1586495940,
1586496000, 1586496060, 1586496120, 1586496180, 1586496240, 1586496300,
1586496360, 1586496420, 1586496480, 1586496540, 1586496600, 1586496660,
1586496720, 1586496780, 1586496840, 1586496900, 1586496960, 1586497020,
1586497080, 1586497140, 1586497200, 1586497260, 1586497320, 1586497380,
1586497440, 1586497500, 1586497560, 1586497620, 1586497680, 1586497740,
1586497800, 1586497860, 1586497920, 1586497980, 1586498040, 1586498100,
1586498160, 1586498220, 1586498280, 1586498340), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), easting = c(740.582355718548, 740.582355718548,
739.726374785548, 739.611045841548, 739.508690311548, 739.398269506548,
739.278804356548, 739.627760514548, 737.913640733548, 738.088450601548,
738.551491861548, 738.957133488548, 739.137345557548, 739.304664573548,
739.460440784548, 739.605842807548, 739.741887116548, 739.719077482548,
739.369420509548, 738.973489249548, 738.521335985548, 739.279305656548,
739.993757669548, 740.085239162548, 740.172262825548, 740.255157063548,
740.334219013548, 740.409718260548, 740.481900024548, 740.550987811548,
740.573883125548, 740.252267406548, 739.261723439548, 738.935233921548,
738.774921432548, 738.615895069548, 738.451107559548, 738.280235586548,
738.493740162548, 738.717501067548, 738.925752666548, 739.120074872548,
739.301840012548, 739.472245999548, 739.632343117548, 739.890965132548,
740.098495936548, 740.293354354548, 740.476683179548, 740.649491986548,
740.678160672548, 740.443560695548, 740.398855065548, 740.451032168548,
740.467918582548, 740.160041067548, 739.819912921548, 739.641686751548,
739.457020461548, 739.265544595548, 739.193281488548, 739.227252654548,
738.995761471548, 738.822890745548, 738.775446949548, 738.726816309548,
738.676941805548, 738.625762928548, 739.254178659548, 739.727445331548,
740.019566884548, 740.129316037548, 740.255273807548, 740.442527947548,
740.615257127548, 740.775140223548, 740.923607252548, 741.055065964548,
741.139279130548, 740.907234314548, 739.290829342548, 739.115359714548,
740.664354207548, 740.589899151548, 740.578913554548, 740.571708783548,
740.568311076548, 740.568740893548, 740.527920123548, 740.358565457548,
740.392277156548, 740.370981239548, 740.289917518548, 740.214089159548,
740.192635592548, 740.176850935548, 740.160315351548, 740.143068630548,
740.103174446548, 740.008327647548, 740.058271768548, 740.205384482548,
740.211048172548, 740.150159818548, 740.122028309548, 740.230164637548,
740.271076846548, 740.075087486548, 739.768752873548, 739.586722485548,
739.940259334548, 740.233576255548, 740.473614136548, 740.495703912548,
740.341935547548, 740.186820856548, 740.204435025548, 740.299218490548,
740.318343269548, 740.238895133548, 739.999671854548, 740.062183564548,
740.196345466548, 740.329697802548, 740.418193609548, 740.311257937548,
740.270203214548, 740.209679752548, 740.146590442548, 740.079785501548,
740.170176300548, 740.268945921548, 740.217498771548, 740.133923060548,
740.117921377548, 740.177771453548, 740.140658663548, 740.080204534548,
740.108449333548, 740.145621912548, 740.182429420548, 740.010376475548,
739.819150336548, 739.616854492548, 739.369690457548, 739.104183601548,
738.938020260548, 738.843359187548, 738.802644324548, 738.761524527548
), northing = c(2307.15134120986, 2307.15134120986, 2307.60836846986,
2307.72110371186, 2307.83015111886, 2307.94605545486, 2308.06963872386,
2307.94323643186, 2308.11539257586, 2307.98516105286, 2307.67209087786,
2307.39795736686, 2307.27544716286, 2307.16124100486, 2307.05447137086,
2306.95438746086, 2306.86033624586, 2306.85049579286, 2307.02449397686,
2307.22230535086, 2307.44905018086, 2306.99878407786, 2306.57790074586,
2306.51052483586, 2306.44607531386, 2306.38433869586, 2306.32512182186,
2306.26824947086, 2306.21356227886, 2306.16091500786, 2306.12077142386,
2306.17964098286, 2306.35042514386, 2306.42512419786, 2306.46180283886,
2306.49818722086, 2306.53588972286, 2306.57498431686, 2306.47587680786,
2306.37665531786, 2306.28431106486, 2306.19814347186, 2306.11754401386,
2306.04198150286, 2305.97099021786, 2305.88981694586, 2305.83737042086,
2305.78847203986, 2305.74280289286, 2305.70008126986, 2305.72436278986,
2305.79211047386, 2305.75001607586, 2305.66373079386, 2305.59454921786,
2305.66723079486, 2305.74826733386, 2305.75631243686, 2305.76412484986,
2305.77169383886, 2305.76318962486, 2305.74988901286, 2305.84052390686,
2305.91140100786, 2305.95786180686, 2306.00518488486, 2306.05340722986,
2306.10256811686, 2305.87083530186, 2305.67745118186, 2305.55808577486,
2305.51324056486, 2305.46177226186, 2305.38525730986, 2305.31467748186,
2305.24934676386, 2305.18868082786, 2305.13496475486, 2305.08628958686,
2305.07937405386, 2305.33244795286, 2305.26640414086, 2304.97847050686,
2304.88865807586, 2304.78121096686, 2304.67333694586, 2304.56517670086,
2304.45687199986, 2304.26299422086, 2303.81398617786, 2303.81772073686,
2303.82576399386, 2303.82937144386, 2303.85166918186, 2303.85679798586,
2303.85841996086, 2303.85934299186, 2303.85962745886, 2303.81418344986,
2303.71268455886, 2303.70396413486, 2303.71187848686, 2303.71241867586,
2303.72409686386, 2303.75626565486, 2303.81432053886, 2303.80604508386,
2303.57280991386, 2303.21896587386, 2303.06912105986, 2303.28351126486,
2303.45378473786, 2303.49375232686, 2303.50460433986, 2303.48302188886,
2303.48174695086, 2303.52692291386, 2303.61686633486, 2303.41139580686,
2303.32679365886, 2303.06984393086, 2303.15017783486, 2303.29293566086,
2303.44528532286, 2303.48203523086, 2302.89274879786, 2302.81440275486,
2302.73512963586, 2302.65410710886, 2302.56757728186, 2302.77320543086,
2303.00846800486, 2303.01932301786, 2302.97477011386, 2303.03742546386,
2303.13970331386, 2303.07628123386, 2302.97297111586, 2303.02123867886,
2303.08476293486, 2303.14766331986, 2303.17026860886, 2303.18249014186,
2303.19025912386, 2303.15218828686, 2303.11064943486, 2302.98849464786,
2302.87632040886, 2302.82807292386, 2302.77934558786)), row.names = 5905:6054, class = "data.frame")
df.fitted<-loess(northing ~ easting, span = .5, data = df)
df$northing.fitted<-df.fitted$fitted
ggplot(df, aes(x=easting,y=northing)) +
geom_path(color='orangered2') +
geom_point(aes(y=northing.fitted))
So, instead of smoothing the "cluster", I'd like to use a rolling average smoothing each x/y pair as a function of time.
You need to regress both easting and northing as functions of time to get smoother x, y values:
df$numtime <- as.numeric(df$timestamp)
df.fitted.northing <-loess(northing ~ numtime, span = .5, data = df)
df.fitted.easting <- loess(easting ~ numtime, span = .5, data = df)
newdat <- data.frame(numtime = seq(min(df$numtime), max(df$numtime), len = 1000))
newdat$northing <- predict(df.fitted.northing, newdat)
newdat$easting <- predict(df.fitted.easting, newdat)
ggplot(df, aes(easting, northing)) +
geom_path(aes(color = "original path"), alpha = 0.6, size = 0.5,
arrow = arrow(length = unit(0.1, "inches"))) +
geom_point(aes(color = "original path"), alpha = 0.6, size = 1) +
geom_path(data = newdat, size = 1, aes(color = "smoothed"),
arrow = arrow(length = unit(0.1, "inches"))) +
coord_equal() +
theme_light() +
scale_color_manual(values = c("original path" = "orangered2",
"smoothed" = "deepskyblue4"), name = "")
I'm using ggplot2 to make violin plots of module scores from Seurat, and am wanting to add statistics to it. I made the following violin plot, and I'm wanting to change the bracket labels so that it says "p < 0.13" instead of just 0.13 like it is now (thanks to #StupidWolf for the example!).
library(Seurat)
library(SeuratObject)
library(ggplot2)
library(ggpubr)
library(reshape2)
#add Seurat's module scores and create Seurat object from them =====================
ERlist <- list(c("CPB1", "RP11-53O19.1", "TFF1", "MB", "ANKRD30B",
"LINC00173", "DSCAM-AS1", "IGHG1", "SERPINA5", "ESR1",
"ILRP2", "IGLC3", "CA12", "RP11-64B16.2", "SLC7A2",
"AFF3", "IGFBP4", "GSTM3", "ANKRD30A", "GSTT1", "GSTM1",
"AC026806.2", "C19ORF33", "STC2", "HSPB8", "RPL29P11",
"FBP1", "AGR3", "TCEAL1", "CYP4B1", "SYT1", "COX6C",
"MT1E", "SYTL2", "THSD4", "IFI6", "K1AA1467", "SLC39A6",
"ABCD3", "SERPINA3", "DEGS2", "ERLIN2", "HEBP1", "BCL2",
"TCEAL3", "PPT1", "SLC7A8", "RP11-96D1.10", "H4C8",
"PI15", "PLPP5", "PLAAT4", "GALNT6", "IL6ST", "MYC",
"BST2", "RP11-658F2.8", "MRPS30", "MAPT", "AMFR", "TCEAL4",
"MED13L", "ISG15", "NDUFC2", "TIMP3", "RP13-39P12.3", "PARD68"))
tnbclist <- list(c("FABP7", "TSPAN8", "CYP4Z1", "HOXA10", "CLDN1",
"TMSB15A", "C10ORF10", "TRPV6", "HOXA9", "ATP13A4",
"GLYATL2", "RP11-48O20.4", "DYRK3", "MUCL1", "ID4", "FGFR2",
"SHOX2", "Z83851.1", "CD82", "COL6A1", "KRT23", "GCHFR",
"PRICKLE1", "GCNT2", "KHDRBS3", "SIPA1L2", "LMO4", "TFAP2B",
"SLC43A3", "FURIN", "ELF5", "C1ORF116", "ADD3", "EFNA3",
"EFCAB4A", "LTF", "LRRC31", "ARL4C", "GPNMB", "VIM",
"SDR16C5", "RHOV", "PXDC1", "MALL", "YAP1", "A2ML1",
"RP1-257A7.5", "RP11-353N4.6", "ZBTB18", "CTD-2314B22.3", "GALNT3",
"BCL11A", "CXADR", "SSFA2", "ADM", "GUCY1A3", "GSTP1",
"ADCK3", "SLC25A37", "SFRP1", "PRNP", "DEGS1", "RP11-110G21.2",
"AL589743.1", "ATF3", "SIVA1", "TACSTD2", "HEBP2"))
genes = c(unlist(c(ERlist,tnbclist)))
mat = matrix(rnbinom(500*length(genes),mu=500,size=1),ncol=500)
rownames(mat) = genes
colnames(mat) = paste0("cell",1:500)
sobj = CreateSeuratObject(mat)
sobj = NormalizeData(sobj)
sobj$ClusterName = factor(sample(0:1,ncol(sobj),replace=TRUE))
sobj = AddModuleScore(object = sobj, features = tnbclist,
name = "TNBC_List",ctrl=5)
sobj = AddModuleScore(object = sobj, features = ERlist,
name = "ER_List",ctrl=5)
sobjlists = FetchData(object = sobj, vars = c("ER_List1", "TNBC_List1", "ClusterName"))
#violin plot =======================================================
my_comparisons <- list( c("0", "1") )
ggplot(sobjlists,aes(x= ClusterName, y = ER_List1)) +
geom_violin(aes(fill=ClusterName)) +
geom_boxplot(width=0.1) + labs(y= "ER+ Signature", x = "ClusterName") + ggtitle(label = "Object") +
theme(plot.title = element_text(hjust = 0.5)) + stat_compare_means(comparisons = my_comparisons, method = "wilcox.test")+ # Add pairwise comparisons p-value
stat_compare_means(label.y = 0.75)
The closest I've found is ggpubr: Show significance levels (*** or n.s.) instead of p-value in the label and https://github.com/kassambara/ggpubr/issues/327 but I am not sure how to implement it with how I created the plot.
Thanks for reading!
I'm using ggplot2 to make violin plots of module scores from Seurat, and am wanting to add statistics to it. I made the following violin plot, but I want to switch the violins around from reading "0" and "1" from left to right, to "1" and "0". (Thanks to #StupidWolf for the example!)
library(Seurat)
library(SeuratObject)
library(ggplot2)
library(ggpubr)
library(reshape2)
#add Seurat's module scores and create Seurat object from them =====================
ERlist <- list(c("CPB1", "RP11-53O19.1", "TFF1", "MB", "ANKRD30B",
"LINC00173", "DSCAM-AS1", "IGHG1", "SERPINA5", "ESR1",
"ILRP2", "IGLC3", "CA12", "RP11-64B16.2", "SLC7A2",
"AFF3", "IGFBP4", "GSTM3", "ANKRD30A", "GSTT1", "GSTM1",
"AC026806.2", "C19ORF33", "STC2", "HSPB8", "RPL29P11",
"FBP1", "AGR3", "TCEAL1", "CYP4B1", "SYT1", "COX6C",
"MT1E", "SYTL2", "THSD4", "IFI6", "K1AA1467", "SLC39A6",
"ABCD3", "SERPINA3", "DEGS2", "ERLIN2", "HEBP1", "BCL2",
"TCEAL3", "PPT1", "SLC7A8", "RP11-96D1.10", "H4C8",
"PI15", "PLPP5", "PLAAT4", "GALNT6", "IL6ST", "MYC",
"BST2", "RP11-658F2.8", "MRPS30", "MAPT", "AMFR", "TCEAL4",
"MED13L", "ISG15", "NDUFC2", "TIMP3", "RP13-39P12.3", "PARD68"))
tnbclist <- list(c("FABP7", "TSPAN8", "CYP4Z1", "HOXA10", "CLDN1",
"TMSB15A", "C10ORF10", "TRPV6", "HOXA9", "ATP13A4",
"GLYATL2", "RP11-48O20.4", "DYRK3", "MUCL1", "ID4", "FGFR2",
"SHOX2", "Z83851.1", "CD82", "COL6A1", "KRT23", "GCHFR",
"PRICKLE1", "GCNT2", "KHDRBS3", "SIPA1L2", "LMO4", "TFAP2B",
"SLC43A3", "FURIN", "ELF5", "C1ORF116", "ADD3", "EFNA3",
"EFCAB4A", "LTF", "LRRC31", "ARL4C", "GPNMB", "VIM",
"SDR16C5", "RHOV", "PXDC1", "MALL", "YAP1", "A2ML1",
"RP1-257A7.5", "RP11-353N4.6", "ZBTB18", "CTD-2314B22.3", "GALNT3",
"BCL11A", "CXADR", "SSFA2", "ADM", "GUCY1A3", "GSTP1",
"ADCK3", "SLC25A37", "SFRP1", "PRNP", "DEGS1", "RP11-110G21.2",
"AL589743.1", "ATF3", "SIVA1", "TACSTD2", "HEBP2"))
genes = c(unlist(c(ERlist,tnbclist)))
mat = matrix(rnbinom(500*length(genes),mu=500,size=1),ncol=500)
rownames(mat) = genes
colnames(mat) = paste0("cell",1:500)
sobj = CreateSeuratObject(mat)
sobj = NormalizeData(sobj)
sobj$ClusterName = factor(sample(0:1,ncol(sobj),replace=TRUE))
sobj = AddModuleScore(object = sobj, features = tnbclist,
name = "TNBC_List",ctrl=5)
sobj = AddModuleScore(object = sobj, features = ERlist,
name = "ER_List",ctrl=5)
sobjlists = FetchData(object = sobj, vars = c("ER_List1", "TNBC_List1", "ClusterName"))
#violin plot =======================================================
my_comparisons <- list( c("0", "1") )
ggplot(sobjlists,aes(x= ClusterName, y = ER_List1)) +
geom_violin(aes(fill=ClusterName)) +
geom_boxplot(width=0.1) + labs(y= "ER+ Signature", x = "ClusterName") + ggtitle(label = "Object") +
theme(plot.title = element_text(hjust = 0.5)) + stat_compare_means(comparisons = my_comparisons, method = "wilcox.test")+ # Add pairwise comparisons p-value
stat_compare_means(label.y = 0.75)
Using the solution from How to reorder plots in combined ggplot2 graph?, I can reorder the plots by adding
+ scale_x_discrete(limits = c("1", "0"))
to the very end of the code I have now. However, doing that, the "Wilcoxon, p = 0.13" disappears, and gives the following error:
ggplot(sobjlists,aes(x= ClusterName, y = ER_List1)) +
geom_violin(aes(fill=ClusterName)) +
geom_boxplot(width=0.1) + labs(y= "ER+ Signature", x = "ClusterName") + ggtitle(label = "Object") +
theme(plot.title = element_text(hjust = 0.5)) + stat_compare_means(comparisons = my_comparisons, method = "wilcox.test")+ # Add pairwise comparisons p-value
stat_compare_means(label.y = 0.75) + scale_x_discrete(limits = c("1", "0"))
Warning messages:
1: Unknown or uninitialised column: `p`.
2: Computation failed in `stat_compare_means()`:
argument "x" is missing, with no default
For a TL;DR, how do I keep the top statistic in and reorder the violins?
Thanks for reading!
Here is what worked for me (From https://www.datanovia.com/en/blog/how-to-change-ggplot-legend-order/)
sobjlists$ClusterName <- factor(sobjlists$ClusterName, levels = c("1", "0"))