How can I smooth lines in ggplot? - r

I want to reproduce the following graph but with smoother lines:
Such that the lines are similar to the following graph:
So far, I've tried the following, but I only get a trend instead of smoothing the two series:
plot_fig4 <- ggplot(fig4, aes(x=dias))+
geom_line(aes(y=complete_preds_means), color="#9a6584", size=0.5)+
geom_line(aes(y=contrafact), colour="#000000", size=0.5) +
geom_line(aes(y=complete_preds_means), method = "lm", formula=y~spline(x,21))+
geom_ribbon(aes(ymin=complete_preds_lower, ymax=complete_preds_upper), fill="#9a6584", alpha=0.2)
My data:
structure(list(dias = structure(c(19052, 19053, 19054, 19055,
19056, 19057, 19058, 19059, 19060, 19061, 19062, 19063, 19064,
19065, 19066, 19067, 19068, 19069, 19070, 19071), class = "Date"),
complete_preds_means = c(341.07434, 381.59167, 455.47815,
485.05597, 527.60876, 562.63965, 602.48975, 624.663, 626.5637,
527.2239, 420.71643, 389.30804, 378.74396, 366.61548, 361.36566,
363.37253, 319.31824, 314.39688, 303.60342, 294.8934), contrafact = c(364.5,
358.89, 466.64, 470.11, 464.25, 487.27, 591.2, 715.33, 628.02,
505.98, 402.9, 316.81, 323.35, 358.61, 354.26, 369.5, 317.01,
336.5, 285.33, 270.91), complete_preds_lower = c(320.6368042,
361.7870895, 432.4487762, 461.2275833, 503.2255051, 535.7108551,
576.3850006, 597.9762146, 601.4407013, 504.0448837, 398.7777023,
368.0046799, 356.3603165, 345.5847885, 339.9679932, 342.7514801,
298.3247482, 293.4419693, 282.5286865, 275.4635284), complete_preds_upper = c(359.9897186,
402.5708664, 477.4746765, 508.7775711, 550.3326447, 587.6521027,
628.5320251, 649.9691833, 649.4831665, 547.9886108, 442.046402,
410.8121475, 399.0208908, 389.8615128, 387.4929993, 386.2935928,
340.140834, 336.3622116, 324.793483, 315.4606934)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))

Like this?
df %>%
pivot_longer(-dias) %>%
ggplot() +
aes(x = dias, y = value, col = name) +
geom_smooth(se = FALSE)

You can use the function smooth, with the folowing parameters
Add one geom_smooth(...) line for each of your columns.
If you want the interval confidence for one serie, you switch the "se = FALSE" to True.
> ggplot()+ geom_smooth(data=data, aes(x=dias,
> y=complete_preds_means), method = loess, se=FALSE)

Related

Animation chart in R displaying color code instead of the date

Hi I have a dataset here,
> dput(data2)
structure(list(Date = structure(c(1651795200, 1651795200, 1652400000,
1652400000, 1653004800, 1653004800, 1653609600, 1653609600, 1654214400,
1654214400), tzone = "UTC", class = c("POSIXct", "POSIXt")),
Country = c("MYR", "JPY", "MYR", "JPY", "MYR", "JPY", "MYR",
"JPY", "MYR", "JPY"), Value = c(-4.58032749341578, -11.8566176470588,
-5.27883238229476, -10.9425785482124, -5.06949191159718,
-10.00938379731, -4.84618722451869, -9.46424356856266, -5.06949191159718,
-12.0721271393643)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L))
I have some code to run an animation bar chart
plot1 <- ggplot(data2, aes(x=reorder(Country, -Value), y=Value, fill=Country)) + geom_bar(stat='identity')
plot1 + coord_flip() + transition_states(Date, transition_length = 2,state_length = 1, wrap =
FALSE) + geom_text(x=1 ,y=-10, family="Times",aes(label=as.character(Date)),size=6, color
="blue") + xlab("Country")
The rendered plot looks great except I have a random character appearing after the date is shown.

Customize alpha values based on conditions for multiple facets time series plots in R

For the time series plot which is composed by two subplots:
library(tidyverse)
library(lubridate)
library(feasts)
library(tsibble)
library(gghighlight)
df %>%
mutate(date = as.Date(date, origin = "1899-12-30")) %>%
mutate(year=as.numeric(year(date))) %>%
pivot_longer(`food_index`:`energy_index`) %>%
mutate(date=yearmonth(date)) %>%
as_tsibble(index=date, key=name) %>%
gg_season(value, alpha=1) +
geom_line(size=0.8, alpha=0.8) +
geom_point(size=2, alpha=1)
Out:
Let's say if the current year is 2022, I wanna to plot the line of that year with alpha=1, other years' lines with smaller alpha, ie., alpha=0.3.
How could I do that? Thanks for your helps at advance.
Data:
df <- structure(list(date = c(42766, 42794, 42825, 42855, 42886, 42916,
42947, 42978, 43008, 43039, 43069, 43100, 43131, 43159, 43190,
43220, 43251, 43281, 43312, 43343, 43373, 43404, 43434, 43465,
43496, 43524, 43555, 43585, 43616, 43646, 43677, 43708, 43738,
43769, 43799, 43830, 43861, 43890, 43921, 43951, 43982, 44012,
44043, 44074, 44104, 44135, 44165, 44196, 44227, 44255, 44286,
44316, 44347, 44377, 44408, 44439, 44469, 44500, 44530, 44561
), food_index = c(58.53, 61.23, 55.32, 55.34, 61.73, 56.91, 54.27,
59.08, 60.11, 66.01, 60.11, 63.41, 69.8, 72.45, 81.11, 89.64,
88.64, 88.62, 98.27, 111.11, 129.39, 140.14, 143.44, 169.21,
177.39, 163.88, 135.07, 151.28, 172.81, 143.82, 162.13, 172.22,
176.67, 179.3, 157.27, 169.12, 192.51, 194.2, 179.4, 169.1, 193.17,
174.92, 181.92, 188.41, 192.14, 203.41, 194.19, 174.3, 174.86,
182.33, 182.82, 185.36, 192.41, 195.59, 202.6, 201.51, 225.01,
243.78, 270.67, 304.57), energy_index = c(127.36, 119.87, 120.96,
112.09, 112.19, 109.24, 109.56, 106.89, 109.35, 108.35, 112.39,
117.77, 119.52, 122.24, 120.91, 125.41, 129.72, 135.25, 139.33,
148.6, 169.62, 184.23, 204.38, 198.55, 189.29, 202.47, 220.23,
240.67, 263.12, 249.74, 240.84, 243.42, 261.2, 256.76, 258.69,
277.98, 289.63, 293.46, 310.81, 318.68, 310.04, 302.17, 298.62,
260.92, 269.29, 258.84, 241.68, 224.18, 216.36, 226.57, 235.98,
253.86, 267.37, 261.99, 273.37, 280.91, 291.84, 297.88, 292.78,
289.79)), row.names = c(NA, 60L), class = "data.frame")
You could achieve this by creating a boolean variable that detects the year you would like to highlight and then passing that as the alpha aesthetic inside your plot:
df %>%
mutate(date = as.Date(date, origin = "1899-12-30")) %>%
mutate(year=as.numeric(year(date))) %>%
pivot_longer(`food_index`:`energy_index`) %>%
mutate(date=yearmonth(date),
highlight = ifelse(year == "2021", T, F)) %>%
as_tsibble(index=date, key=name) %>%
gg_season(value, alpha = 0.2) +
geom_line(aes(alpha = highlight),
size=0.8) +
geom_point(aes(alpha = highlight),
size=2) +
scale_alpha_manual(values = c(0.2, 1)) +
guides(alpha = "none") +
theme_bw()

loess() doesn't smooth subsequently but over pooled data

I have time-series of 2d obsverations that I'm trying to smooth to take out some of the observation variability. I've been applying loess(), but just noticed it doesn't seem to smooth as a function of time but just across the entire pooled coordinates. Am I missing something? Is there a different function I should be using?
df<-structure(list(timestamp = structure(c(1586488380, 1586488440,
1586488560, 1586488620, 1586488680, 1586488740, 1586488800, 1586488860,
1586489520, 1586489580, 1586489700, 1586489820, 1586489880, 1586489940,
1586490000, 1586490060, 1586490120, 1586490180, 1586490240, 1586490300,
1586490360, 1586490420, 1586490480, 1586490540, 1586490600, 1586490660,
1586490720, 1586490780, 1586490840, 1586490900, 1586490960, 1586491020,
1586491200, 1586491260, 1586491320, 1586491380, 1586491440, 1586491500,
1586491560, 1586491620, 1586491680, 1586491740, 1586491800, 1586491860,
1586491920, 1586491980, 1586492040, 1586492100, 1586492160, 1586492220,
1586492280, 1586492340, 1586492400, 1586492460, 1586492520, 1586492580,
1586492640, 1586492700, 1586492760, 1586492820, 1586492880, 1586492940,
1586493000, 1586493060, 1586493120, 1586493180, 1586493240, 1586493300,
1586493360, 1586493420, 1586493480, 1586493540, 1586493600, 1586493660,
1586493720, 1586493780, 1586493840, 1586493900, 1586493960, 1586494020,
1586494200, 1586494260, 1586494320, 1586494380, 1586494440, 1586494500,
1586494560, 1586494620, 1586494680, 1586494740, 1586494800, 1586494860,
1586494920, 1586494980, 1586495040, 1586495100, 1586495160, 1586495220,
1586495280, 1586495340, 1586495400, 1586495460, 1586495520, 1586495580,
1586495640, 1586495700, 1586495760, 1586495820, 1586495880, 1586495940,
1586496000, 1586496060, 1586496120, 1586496180, 1586496240, 1586496300,
1586496360, 1586496420, 1586496480, 1586496540, 1586496600, 1586496660,
1586496720, 1586496780, 1586496840, 1586496900, 1586496960, 1586497020,
1586497080, 1586497140, 1586497200, 1586497260, 1586497320, 1586497380,
1586497440, 1586497500, 1586497560, 1586497620, 1586497680, 1586497740,
1586497800, 1586497860, 1586497920, 1586497980, 1586498040, 1586498100,
1586498160, 1586498220, 1586498280, 1586498340), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), easting = c(740.582355718548, 740.582355718548,
739.726374785548, 739.611045841548, 739.508690311548, 739.398269506548,
739.278804356548, 739.627760514548, 737.913640733548, 738.088450601548,
738.551491861548, 738.957133488548, 739.137345557548, 739.304664573548,
739.460440784548, 739.605842807548, 739.741887116548, 739.719077482548,
739.369420509548, 738.973489249548, 738.521335985548, 739.279305656548,
739.993757669548, 740.085239162548, 740.172262825548, 740.255157063548,
740.334219013548, 740.409718260548, 740.481900024548, 740.550987811548,
740.573883125548, 740.252267406548, 739.261723439548, 738.935233921548,
738.774921432548, 738.615895069548, 738.451107559548, 738.280235586548,
738.493740162548, 738.717501067548, 738.925752666548, 739.120074872548,
739.301840012548, 739.472245999548, 739.632343117548, 739.890965132548,
740.098495936548, 740.293354354548, 740.476683179548, 740.649491986548,
740.678160672548, 740.443560695548, 740.398855065548, 740.451032168548,
740.467918582548, 740.160041067548, 739.819912921548, 739.641686751548,
739.457020461548, 739.265544595548, 739.193281488548, 739.227252654548,
738.995761471548, 738.822890745548, 738.775446949548, 738.726816309548,
738.676941805548, 738.625762928548, 739.254178659548, 739.727445331548,
740.019566884548, 740.129316037548, 740.255273807548, 740.442527947548,
740.615257127548, 740.775140223548, 740.923607252548, 741.055065964548,
741.139279130548, 740.907234314548, 739.290829342548, 739.115359714548,
740.664354207548, 740.589899151548, 740.578913554548, 740.571708783548,
740.568311076548, 740.568740893548, 740.527920123548, 740.358565457548,
740.392277156548, 740.370981239548, 740.289917518548, 740.214089159548,
740.192635592548, 740.176850935548, 740.160315351548, 740.143068630548,
740.103174446548, 740.008327647548, 740.058271768548, 740.205384482548,
740.211048172548, 740.150159818548, 740.122028309548, 740.230164637548,
740.271076846548, 740.075087486548, 739.768752873548, 739.586722485548,
739.940259334548, 740.233576255548, 740.473614136548, 740.495703912548,
740.341935547548, 740.186820856548, 740.204435025548, 740.299218490548,
740.318343269548, 740.238895133548, 739.999671854548, 740.062183564548,
740.196345466548, 740.329697802548, 740.418193609548, 740.311257937548,
740.270203214548, 740.209679752548, 740.146590442548, 740.079785501548,
740.170176300548, 740.268945921548, 740.217498771548, 740.133923060548,
740.117921377548, 740.177771453548, 740.140658663548, 740.080204534548,
740.108449333548, 740.145621912548, 740.182429420548, 740.010376475548,
739.819150336548, 739.616854492548, 739.369690457548, 739.104183601548,
738.938020260548, 738.843359187548, 738.802644324548, 738.761524527548
), northing = c(2307.15134120986, 2307.15134120986, 2307.60836846986,
2307.72110371186, 2307.83015111886, 2307.94605545486, 2308.06963872386,
2307.94323643186, 2308.11539257586, 2307.98516105286, 2307.67209087786,
2307.39795736686, 2307.27544716286, 2307.16124100486, 2307.05447137086,
2306.95438746086, 2306.86033624586, 2306.85049579286, 2307.02449397686,
2307.22230535086, 2307.44905018086, 2306.99878407786, 2306.57790074586,
2306.51052483586, 2306.44607531386, 2306.38433869586, 2306.32512182186,
2306.26824947086, 2306.21356227886, 2306.16091500786, 2306.12077142386,
2306.17964098286, 2306.35042514386, 2306.42512419786, 2306.46180283886,
2306.49818722086, 2306.53588972286, 2306.57498431686, 2306.47587680786,
2306.37665531786, 2306.28431106486, 2306.19814347186, 2306.11754401386,
2306.04198150286, 2305.97099021786, 2305.88981694586, 2305.83737042086,
2305.78847203986, 2305.74280289286, 2305.70008126986, 2305.72436278986,
2305.79211047386, 2305.75001607586, 2305.66373079386, 2305.59454921786,
2305.66723079486, 2305.74826733386, 2305.75631243686, 2305.76412484986,
2305.77169383886, 2305.76318962486, 2305.74988901286, 2305.84052390686,
2305.91140100786, 2305.95786180686, 2306.00518488486, 2306.05340722986,
2306.10256811686, 2305.87083530186, 2305.67745118186, 2305.55808577486,
2305.51324056486, 2305.46177226186, 2305.38525730986, 2305.31467748186,
2305.24934676386, 2305.18868082786, 2305.13496475486, 2305.08628958686,
2305.07937405386, 2305.33244795286, 2305.26640414086, 2304.97847050686,
2304.88865807586, 2304.78121096686, 2304.67333694586, 2304.56517670086,
2304.45687199986, 2304.26299422086, 2303.81398617786, 2303.81772073686,
2303.82576399386, 2303.82937144386, 2303.85166918186, 2303.85679798586,
2303.85841996086, 2303.85934299186, 2303.85962745886, 2303.81418344986,
2303.71268455886, 2303.70396413486, 2303.71187848686, 2303.71241867586,
2303.72409686386, 2303.75626565486, 2303.81432053886, 2303.80604508386,
2303.57280991386, 2303.21896587386, 2303.06912105986, 2303.28351126486,
2303.45378473786, 2303.49375232686, 2303.50460433986, 2303.48302188886,
2303.48174695086, 2303.52692291386, 2303.61686633486, 2303.41139580686,
2303.32679365886, 2303.06984393086, 2303.15017783486, 2303.29293566086,
2303.44528532286, 2303.48203523086, 2302.89274879786, 2302.81440275486,
2302.73512963586, 2302.65410710886, 2302.56757728186, 2302.77320543086,
2303.00846800486, 2303.01932301786, 2302.97477011386, 2303.03742546386,
2303.13970331386, 2303.07628123386, 2302.97297111586, 2303.02123867886,
2303.08476293486, 2303.14766331986, 2303.17026860886, 2303.18249014186,
2303.19025912386, 2303.15218828686, 2303.11064943486, 2302.98849464786,
2302.87632040886, 2302.82807292386, 2302.77934558786)), row.names = 5905:6054, class = "data.frame")
df.fitted<-loess(northing ~ easting, span = .5, data = df)
df$northing.fitted<-df.fitted$fitted
ggplot(df, aes(x=easting,y=northing)) +
geom_path(color='orangered2') +
geom_point(aes(y=northing.fitted))
So, instead of smoothing the "cluster", I'd like to use a rolling average smoothing each x/y pair as a function of time.
You need to regress both easting and northing as functions of time to get smoother x, y values:
df$numtime <- as.numeric(df$timestamp)
df.fitted.northing <-loess(northing ~ numtime, span = .5, data = df)
df.fitted.easting <- loess(easting ~ numtime, span = .5, data = df)
newdat <- data.frame(numtime = seq(min(df$numtime), max(df$numtime), len = 1000))
newdat$northing <- predict(df.fitted.northing, newdat)
newdat$easting <- predict(df.fitted.easting, newdat)
ggplot(df, aes(easting, northing)) +
geom_path(aes(color = "original path"), alpha = 0.6, size = 0.5,
arrow = arrow(length = unit(0.1, "inches"))) +
geom_point(aes(color = "original path"), alpha = 0.6, size = 1) +
geom_path(data = newdat, size = 1, aes(color = "smoothed"),
arrow = arrow(length = unit(0.1, "inches"))) +
coord_equal() +
theme_light() +
scale_color_manual(values = c("original path" = "orangered2",
"smoothed" = "deepskyblue4"), name = "")

how to change p-value label in ggpubr/ggplot2

I'm using ggplot2 to make violin plots of module scores from Seurat, and am wanting to add statistics to it. I made the following violin plot, and I'm wanting to change the bracket labels so that it says "p < 0.13" instead of just 0.13 like it is now (thanks to #StupidWolf for the example!).
library(Seurat)
library(SeuratObject)
library(ggplot2)
library(ggpubr)
library(reshape2)
#add Seurat's module scores and create Seurat object from them =====================
ERlist <- list(c("CPB1", "RP11-53O19.1", "TFF1", "MB", "ANKRD30B",
"LINC00173", "DSCAM-AS1", "IGHG1", "SERPINA5", "ESR1",
"ILRP2", "IGLC3", "CA12", "RP11-64B16.2", "SLC7A2",
"AFF3", "IGFBP4", "GSTM3", "ANKRD30A", "GSTT1", "GSTM1",
"AC026806.2", "C19ORF33", "STC2", "HSPB8", "RPL29P11",
"FBP1", "AGR3", "TCEAL1", "CYP4B1", "SYT1", "COX6C",
"MT1E", "SYTL2", "THSD4", "IFI6", "K1AA1467", "SLC39A6",
"ABCD3", "SERPINA3", "DEGS2", "ERLIN2", "HEBP1", "BCL2",
"TCEAL3", "PPT1", "SLC7A8", "RP11-96D1.10", "H4C8",
"PI15", "PLPP5", "PLAAT4", "GALNT6", "IL6ST", "MYC",
"BST2", "RP11-658F2.8", "MRPS30", "MAPT", "AMFR", "TCEAL4",
"MED13L", "ISG15", "NDUFC2", "TIMP3", "RP13-39P12.3", "PARD68"))
tnbclist <- list(c("FABP7", "TSPAN8", "CYP4Z1", "HOXA10", "CLDN1",
"TMSB15A", "C10ORF10", "TRPV6", "HOXA9", "ATP13A4",
"GLYATL2", "RP11-48O20.4", "DYRK3", "MUCL1", "ID4", "FGFR2",
"SHOX2", "Z83851.1", "CD82", "COL6A1", "KRT23", "GCHFR",
"PRICKLE1", "GCNT2", "KHDRBS3", "SIPA1L2", "LMO4", "TFAP2B",
"SLC43A3", "FURIN", "ELF5", "C1ORF116", "ADD3", "EFNA3",
"EFCAB4A", "LTF", "LRRC31", "ARL4C", "GPNMB", "VIM",
"SDR16C5", "RHOV", "PXDC1", "MALL", "YAP1", "A2ML1",
"RP1-257A7.5", "RP11-353N4.6", "ZBTB18", "CTD-2314B22.3", "GALNT3",
"BCL11A", "CXADR", "SSFA2", "ADM", "GUCY1A3", "GSTP1",
"ADCK3", "SLC25A37", "SFRP1", "PRNP", "DEGS1", "RP11-110G21.2",
"AL589743.1", "ATF3", "SIVA1", "TACSTD2", "HEBP2"))
genes = c(unlist(c(ERlist,tnbclist)))
mat = matrix(rnbinom(500*length(genes),mu=500,size=1),ncol=500)
rownames(mat) = genes
colnames(mat) = paste0("cell",1:500)
sobj = CreateSeuratObject(mat)
sobj = NormalizeData(sobj)
sobj$ClusterName = factor(sample(0:1,ncol(sobj),replace=TRUE))
sobj = AddModuleScore(object = sobj, features = tnbclist,
name = "TNBC_List",ctrl=5)
sobj = AddModuleScore(object = sobj, features = ERlist,
name = "ER_List",ctrl=5)
sobjlists = FetchData(object = sobj, vars = c("ER_List1", "TNBC_List1", "ClusterName"))
#violin plot =======================================================
my_comparisons <- list( c("0", "1") )
ggplot(sobjlists,aes(x= ClusterName, y = ER_List1)) +
geom_violin(aes(fill=ClusterName)) +
geom_boxplot(width=0.1) + labs(y= "ER+ Signature", x = "ClusterName") + ggtitle(label = "Object") +
theme(plot.title = element_text(hjust = 0.5)) + stat_compare_means(comparisons = my_comparisons, method = "wilcox.test")+ # Add pairwise comparisons p-value
stat_compare_means(label.y = 0.75)
The closest I've found is ggpubr: Show significance levels (*** or n.s.) instead of p-value in the label and https://github.com/kassambara/ggpubr/issues/327 but I am not sure how to implement it with how I created the plot.
Thanks for reading!

How to Keep Statistics with Reordered Combined ggplot2 Graph

I'm using ggplot2 to make violin plots of module scores from Seurat, and am wanting to add statistics to it. I made the following violin plot, but I want to switch the violins around from reading "0" and "1" from left to right, to "1" and "0". (Thanks to #StupidWolf for the example!)
library(Seurat)
library(SeuratObject)
library(ggplot2)
library(ggpubr)
library(reshape2)
#add Seurat's module scores and create Seurat object from them =====================
ERlist <- list(c("CPB1", "RP11-53O19.1", "TFF1", "MB", "ANKRD30B",
"LINC00173", "DSCAM-AS1", "IGHG1", "SERPINA5", "ESR1",
"ILRP2", "IGLC3", "CA12", "RP11-64B16.2", "SLC7A2",
"AFF3", "IGFBP4", "GSTM3", "ANKRD30A", "GSTT1", "GSTM1",
"AC026806.2", "C19ORF33", "STC2", "HSPB8", "RPL29P11",
"FBP1", "AGR3", "TCEAL1", "CYP4B1", "SYT1", "COX6C",
"MT1E", "SYTL2", "THSD4", "IFI6", "K1AA1467", "SLC39A6",
"ABCD3", "SERPINA3", "DEGS2", "ERLIN2", "HEBP1", "BCL2",
"TCEAL3", "PPT1", "SLC7A8", "RP11-96D1.10", "H4C8",
"PI15", "PLPP5", "PLAAT4", "GALNT6", "IL6ST", "MYC",
"BST2", "RP11-658F2.8", "MRPS30", "MAPT", "AMFR", "TCEAL4",
"MED13L", "ISG15", "NDUFC2", "TIMP3", "RP13-39P12.3", "PARD68"))
tnbclist <- list(c("FABP7", "TSPAN8", "CYP4Z1", "HOXA10", "CLDN1",
"TMSB15A", "C10ORF10", "TRPV6", "HOXA9", "ATP13A4",
"GLYATL2", "RP11-48O20.4", "DYRK3", "MUCL1", "ID4", "FGFR2",
"SHOX2", "Z83851.1", "CD82", "COL6A1", "KRT23", "GCHFR",
"PRICKLE1", "GCNT2", "KHDRBS3", "SIPA1L2", "LMO4", "TFAP2B",
"SLC43A3", "FURIN", "ELF5", "C1ORF116", "ADD3", "EFNA3",
"EFCAB4A", "LTF", "LRRC31", "ARL4C", "GPNMB", "VIM",
"SDR16C5", "RHOV", "PXDC1", "MALL", "YAP1", "A2ML1",
"RP1-257A7.5", "RP11-353N4.6", "ZBTB18", "CTD-2314B22.3", "GALNT3",
"BCL11A", "CXADR", "SSFA2", "ADM", "GUCY1A3", "GSTP1",
"ADCK3", "SLC25A37", "SFRP1", "PRNP", "DEGS1", "RP11-110G21.2",
"AL589743.1", "ATF3", "SIVA1", "TACSTD2", "HEBP2"))
genes = c(unlist(c(ERlist,tnbclist)))
mat = matrix(rnbinom(500*length(genes),mu=500,size=1),ncol=500)
rownames(mat) = genes
colnames(mat) = paste0("cell",1:500)
sobj = CreateSeuratObject(mat)
sobj = NormalizeData(sobj)
sobj$ClusterName = factor(sample(0:1,ncol(sobj),replace=TRUE))
sobj = AddModuleScore(object = sobj, features = tnbclist,
name = "TNBC_List",ctrl=5)
sobj = AddModuleScore(object = sobj, features = ERlist,
name = "ER_List",ctrl=5)
sobjlists = FetchData(object = sobj, vars = c("ER_List1", "TNBC_List1", "ClusterName"))
#violin plot =======================================================
my_comparisons <- list( c("0", "1") )
ggplot(sobjlists,aes(x= ClusterName, y = ER_List1)) +
geom_violin(aes(fill=ClusterName)) +
geom_boxplot(width=0.1) + labs(y= "ER+ Signature", x = "ClusterName") + ggtitle(label = "Object") +
theme(plot.title = element_text(hjust = 0.5)) + stat_compare_means(comparisons = my_comparisons, method = "wilcox.test")+ # Add pairwise comparisons p-value
stat_compare_means(label.y = 0.75)
Using the solution from How to reorder plots in combined ggplot2 graph?, I can reorder the plots by adding
+ scale_x_discrete(limits = c("1", "0"))
to the very end of the code I have now. However, doing that, the "Wilcoxon, p = 0.13" disappears, and gives the following error:
ggplot(sobjlists,aes(x= ClusterName, y = ER_List1)) +
geom_violin(aes(fill=ClusterName)) +
geom_boxplot(width=0.1) + labs(y= "ER+ Signature", x = "ClusterName") + ggtitle(label = "Object") +
theme(plot.title = element_text(hjust = 0.5)) + stat_compare_means(comparisons = my_comparisons, method = "wilcox.test")+ # Add pairwise comparisons p-value
stat_compare_means(label.y = 0.75) + scale_x_discrete(limits = c("1", "0"))
Warning messages:
1: Unknown or uninitialised column: `p`.
2: Computation failed in `stat_compare_means()`:
argument "x" is missing, with no default
For a TL;DR, how do I keep the top statistic in and reorder the violins?
Thanks for reading!
Here is what worked for me (From https://www.datanovia.com/en/blog/how-to-change-ggplot-legend-order/)
sobjlists$ClusterName <- factor(sobjlists$ClusterName, levels = c("1", "0"))

Resources