Increasing font size of auto-generated R and p-value - r

This's a rather straightforward question where I'd like to increase the font size of the automatically generated R and p-value for my correlation plot via ggscatter. I've tried using cex but doesn't seems to work. Would appreciate any help on this, thanks.
My plot
1
My script
cpsbs <- read_csv("cpsbs.csv")
View(cpsbs)
psbs600 <-ggscatter(cpsbs, x = "npq600", y = "rd",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Max NPQ (600s)", ylab = "PsbS relative density")+ theme(text = element_text(size = 18))
My data drame
data.frame(cpsbs)
id gen line npq600 npq900 rd delcq
1 1 PsbS L1.1 3.053330 0.19666 1.2211420 4.862588
2 2 PsbS L1.2 3.133333 0.17000 1.5918041 5.470889
3 3 PsbS L1.3 2.756667 0.17000 2.1668718 4.773088
4 4 PsbS L1.4 3.160000 0.21000 2.6198157 3.809744
5 5 PsbS L1.5 3.306667 0.20700 1.5571007 4.169890
6 6 PsbS L1.6 0.480000 0.33000 0.0000000 0.000000
7 7 PsbS L1.7 2.960000 0.20000 1.0520551 4.485594
8 8 PsbS L1.8 2.946667 0.21000 0.4648043 3.900248
9 9 PsbS L1.9 2.986667 0.18000 1.9454836 3.782560

I think you're looking for the cor.coef.size argument:
ggscatter(cpsbs, x = "npq600", y = "rd",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
cor.coef.size = 10,
xlab = "Max NPQ (600s)", ylab = "PsbS relative density")+
theme(text = element_text(size = 18))

Related

How to rearrange the rows of a matrix in R

I have the following data frame:
> agg_2
# A tibble: 3 × 3
bcs default_flag pred_default
<chr> <dbl> <dbl>
1 high-score 0.00907 0.0121
2 low-score 0.0345 0.0353
3 mid-score 0.0210 0.0204
I plot it as a bar plot using the following code:
barplot(t(as.matrix(agg_2[,-1])),
main = "Actual Default vs Predicted Default",
xlab = "Score Category",
ylab = "Default Rate",
names.arg = c("High Score", "Low Score", "Mid Score"),
col = gray.colors(2),
beside = TRUE)
legend("topleft",
c("Default", "Pred. Default"),
fill = gray.colors(2))
and it gives me this:
How can I rearrange the data frame/matrix so that the pairs of bars in the bar plot are as follows: Low Score then Mid Score then High Score?
Here is one potential solution:
agg_2 <- read.table(text = "bcs default_flag pred_default
high-score 0.00907 0.0121
low-score 0.0345 0.0353
mid-score 0.0210 0.0204", header = TRUE)
agg_2$bcs <- factor(agg_2$bcs, levels = c("low-score", "mid-score", "high-score"), ordered = TRUE)
agg_2 <- agg_2[order(agg_2$bcs),]
barplot(t(as.matrix(agg_2[,-1])),
main = "Actual Default vs Predicted Default",
xlab = "Score Category",
ylab = "Default Rate",
names.arg = agg_2$bcs,
col = gray.colors(2),
beside = TRUE)
legend("topright",
c("Default", "Pred. Default"),
fill = gray.colors(2))
Created on 2022-06-21 by the reprex package (v2.0.1)

data restructure for ggplot geom_bar() stacked bar plotting

avg data frame
structure(list(cluster = 1:10, `B cells` = c(0.0369711424087593,
0.00526325696315245, 0.0601665087700304, 0.0231936137674591,
0.00766480549892195, 0.0285649960414246, 0.0044030329888148,
0.00345795624392323, 0.00309644760567017, 0.00757469580646642
), DCreg = c(0.0304752063136609, 0.174423402403555, 0.0163287878795231,
0.0192154395050034, 0.124511133655915, 0.0296144152010606, 0.205920199256583,
0.114542510479173, 0.485649315606826, 0.0260997195368302), `Dendritic cells` = c(0.156500506395882,
0.0106345235402551, 0.185348445999056, 0.395476210792188, 0.0719924126421944,
0.104614178324861, 0.0226961213600642, 0.00292885066859525, 0.0122661582750054,
0.118394797602606), `Dendritic cells CD103` = c(0.0482626330670718,
0.0140976438812366, 0.030373962919268, 0.0614351282717271, 0.189884617234425,
0.35658217311524, 0.0170390739879794, 0.0042469791834164, 0.0233514821789908,
0.0619204360724114), Endothelium = c(0.11337268119519, 0.027025412632833,
0.43869939276274, 0.0662483745710424, 0.0331520081202891, 0.164940771021627,
0.050135082662031, 0.00351285357934976, 0.0201434603120533, 0.0658151087814588
), Epithelium = c(0.00418217375070304, 0.000413203430326014,
0.0104665752013841, 0.00525017082076173, 0.00415698684351819,
0.0333637286413386, 0.000431569929321054, 0, 0.0011976402913935,
0.000419107154908937), Fibroblasts = c(0.00612607297867521, 0.0116371963351148,
0.0108995123396445, 0.0117009481628146, 0.00674570810846355,
0.0145571600114712, 0.0120879220427041, 0.00272604244680674,
0.00772202564316953, 0.0272894372187893), `Macrophages other` = c(0.00101589948056542,
0.000645130694683314, 0, 0, 0.000639755622911849, 0, 0.000197788594031649,
0.00136588418173722, 0, 0.000420171738310913), `Macrophages type 1` = c(0.221136736926214,
0.0101728310491049, 0.0295121583899105, 0.0455316207473085, 0.0230660380060092,
0.0222078529371378, 0.015179095607796, 0.00459851371158574, 0.0112212936162074,
0.02937463664781), `Macrophages type 2` = c(0.0411011962682536,
0.0522714029078864, 0.012334445025602, 0.0568282306829578, 0.0453391303748083,
0.0181451496347937, 0.239616155787136, 0.0115489617356957, 0.04981525808734,
0.462030477544264), Neutrophils = c(0.0766806635700175, 0.00442125133471751,
0.0476726698091672, 0.0236749605376406, 0.00911361867045396,
0.0236169696110325, 0.00537803767758349, 0.0032239571528306,
0.00201957474248881, 0.0160311845078706), `NK cells` = c(0, 0,
0.000108464194313773, 0, 0, 8.99698299254026e-05, 0.000114169258081956,
0, 4.57749702462694e-05, 2.78396436525612e-05), `T cells CD4` = c(0.0330641154468336,
0.0213946654236908, 0.0323515137814534, 0.148686432010321, 0.0500449048718068,
0.0685338874314457, 0.0273478878575203, 0.00472971607890761,
0.0328998359523529, 0.0354818425253482), `T cells CD8` = c(0.0172498783937768,
0.00877876825324442, 0.0156948623402281, 0.0207354640030442,
0.0145536348676947, 0.0146643634343241, 0.0155197086731341, 0.00171509323694132,
0.0135851481885585, 0.0159896002840603), `T reg cells` = c(0.00451599932441037,
0.0058712074137469, 0.00274652046695111, 0.0167445990360021,
0.0127422536359504, 0.0142171857157357, 0.00996063310868601,
0.00089148571457417, 0.0113706843090688, 0.00663049091849752),
Tumour = c(0.0765887917753441, 0.651476092235795, 0.0173767962070959,
0.0647526184622169, 0.395840854655601, 0.0472273714361081,
0.368387800802699, 0.839842321316499, 0.323145170321728,
0.111585860905902), Unclassified = c(0.132756302704642, 0.00147401150065844,
0.0899193839136316, 0.0405261886295129, 0.0105521371910369,
0.0590598276124738, 0.00558572040583437, 0.000668874269964592,
0.00247072989889988, 0.0149145931108126)), class = "data.frame", row.names = c(NA,
-10L))
cluster B cells DCreg Dendritic cells Dendritic cells CD103 Endothelium Epithelium Neutrophils NK cells T cells CD4 T cells CD8 T reg cells Tumour Unclassified
1 1 0.036971142 0.03047521 0.156500506 0.048262633 0.113372681 0.0041821738 0.076680664 0.000000e+00 0.033064115 0.017249878 0.0045159993 0.07658879 0.1327563027
2 2 0.005263257 0.17442340 0.010634524 0.014097644 0.027025413 0.0004132034 0.004421251 0.000000e+00 0.021394665 0.008778768 0.0058712074 0.65147609 0.0014740115
3 3 0.060166509 0.01632879 0.185348446 0.030373963 0.438699393 0.0104665752 0.047672670 1.084642e-04 0.032351514 0.015694862 0.0027465205 0.01737680 0.0899193839
4 4 0.023193614 0.01921544 0.395476211 0.061435128 0.066248375 0.0052501708 0.023674961 0.000000e+00 0.148686432 0.020735464 0.0167445990 0.06475262 0.0405261886
5 5 0.007664805 0.12451113 0.071992413 0.189884617 0.033152008 0.0041569868 0.009113619 0.000000e+00 0.050044905 0.014553635 0.0127422536 0.39584085 0.0105521372
6 6 0.028564996 0.02961442 0.104614178 0.356582173 0.164940771 0.0333637286 0.023616970 8.996983e-05 0.068533887 0.014664363 0.0142171857 0.04722737 0.0590598276
7 7 0.004403033 0.20592020 0.022696121 0.017039074 0.050135083 0.0004315699 0.005378038 1.141693e-04 0.027347888 0.015519709 0.0099606331 0.36838780 0.0055857204
8 8 0.003457956 0.11454251 0.002928851 0.004246979 0.003512854 0.0000000000 0.003223957 0.000000e+00 0.004729716 0.001715093 0.0008914857 0.83984232 0.0006688743
9 9 0.003096448 0.48564932 0.012266158 0.023351482 0.020143460 0.0011976403 0.002019575 4.577497e-05 0.032899836 0.013585148 0.0113706843 0.32314517 0.0024707299
10 10 0.007574696 0.02609972 0.118394798 0.061920436 0.065815109 0.0004191072 0.016031185 2.783964e-05 0.035481843 0.015989600 0.0066304909 0.11158586 0.0149145931
I have the above data frame and am trying to create a stacked bar using ggplot geom_bar() where each bar = 1 cluster (10 clusters, so 10 bars) and each bar is filled with the proportions of each cell type contributing to a cluster (proportion values for each cluster add up to 1).
I have started by changing the layout of the data :
avgt = avg %>% pivot_longer(cols = -cluster)
Which gave me this layout:
cluster name value
1 1 B cells 0.0370
2 1 DCreg 0.0305
3 1 Dendritic cells 0.157
4 1 Dendritic cells CD103 0.0483
5 1 Endothelium 0.113
6 1 Epithelium 0.00418
7 1 Fibroblasts 0.00613
8 1 Macrophages other 0.00102
9 1 Macrophages type 1 0.221
10 1 Macrophages type 2 0.0411
However I am not sure what to do next as if I use the 'cluster' column as X and 'name' column for the 'fill' I, as expected, get equal proportions for each cell type
p = ggplot(avgt, aes(x = as.factor(cluster), fill = as.factor(name)))+
geom_bar(position = "fill") +
theme_classic()+
scale_y_continuous(labels = scales::percent) +
coord_flip() +
theme(axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text=element_text(size=20)) +
theme(legend.text = element_text(size = 20)) +
xlab("Community")+
ylab("Percentage distribution") +
labs( fill = "")
p
geom_bar() stacked plot result
Any ideas of how I can get this to work?
Thanks in advance

R: Customizing Scatterplots

I am using the R programming language. I am trying to follow the answer posted in this previous stackoverflow post (scatterplot3d: regression plane with residuals) and add a "plane" to a scatterplot.
Suppose I have the following data:
my_data <- data.frame(read.table(header=TRUE,
row.names = 1,
text="
weight height age
1 2998.958 15.26611 53
2 3002.208 18.08711 52
3 3008.171 16.70896 49
4 3002.374 17.37032 55
5 3000.658 18.04860 50
6 3002.688 17.24797 45
7 3004.923 16.45360 47
8 2987.264 16.71712 47
9 3011.332 17.76626 50
10 2983.783 18.10337 42
11 3007.167 18.18355 50
12 3007.049 18.11375 53
13 3002.656 15.49990 42
14 2986.710 16.73089 47
15 2998.286 17.12075 52
"))
I adapted the code to fit my example:
library(scatterplot3d)
model_1 <- lm(age ~ weight + height, data = my_data)
# scatterplot
s3d <- scatterplot3d(my_data$height, my_data$weight, my_data$age, pch = 19, type = "p", color = "darkgrey",
main = "Regression Plane", grid = TRUE, box = FALSE,
mar = c(2.5, 2.5, 2, 1.5), angle = 55)
# regression plane
s3d$plane3d(model_1, draw_polygon = TRUE, draw_lines = TRUE,
polygon_args = list(col = rgb(.1, .2, .7, .5)))
# overlay positive residuals
wh <- resid(model_1) > 0
s3d$points3d(my_data$height, my_data$weight, my_data$age, pch = 19)
Problem: However, the "plane" appears to be absent :
Desired Result:
Can someone please show me what I am doing wrong?
Thanks
The order of height and weight caused the problem.
s3d <- scatterplot3d(my_data$weight, my_data$height,my_data$age, pch = 19, type = c("p"), color = "darkgrey",
main = "Regression Plane", grid = TRUE, box = FALSE,
mar = c(2.5, 2.5, 2, 1.5), angle = 55)
# regression plane
s3d$plane3d(model_1, draw_polygon = TRUE, draw_lines = TRUE,
polygon_args = list(col = rgb(.1, .2, .7, .5)))
# overlay positive residuals
wh <- resid(model_1) > 0
s3d$points3d(my_data$height, my_data$weight, my_data$age, pch = 19)

ERROR: unused argument (output.results = TRUE)

This is how my data looks like:
> dput(head(GDP_NUTS2,5))
structure(list(Regiao = c("T", "N", "Ag", "C", "AML"), t2000 = c(12529.42964,
10054.60679, 13045.59069, 10621.51789, 18104.36306), t2001 = c(13142.7713,
10652.46712, 13920.41552, 11101.08412, 18865.55149), t2002 = c(13714.17406,
11001.34917, 14612.37052, 11507.36163, 19812.29293), t2003 = c(13985.02689,
11031.7278, 15137.89461, 11884.96687, 20165.68892), t2004 = c(14537.15966,
11354.02317, 15479.68985, 12364.05053, 21068.05117), t2005 = c(15107.92333,
11875.44359, 16237.49791, 12754.40299, 21829.31373), t2006 = c(15816.27567,
12439.6426, 17046.29326, 13378.47797, 22714.25829), t2007 = c(16660.99538,
13229.02402, 17981.40383, 14044.39707, 23847.44923), t2008 = c(16971.19746,
13579.51144, 18226.74178, 14091.85326, 24347.83971), t2009 = c(16606.6617,
13243.19054, 17038.45595, 13974.46502, 23794.44899), t2010 = c(16986.91604,
13677.38358, 16976.83391, 14284.14565, 24119.66719), t2011 = c(16655.71238,
13491.68626, 16347.69468, 14011.54637, 23503.1765), t2012 = c(15963.69251,
13111.6173, 16059.51047, 13623.68635, 22118.01701), t2013 = c(16257.04222,
13473.68717, 16301.87448, 13919.18355, 22337.24739), t2014 = c(16596.21219,
13935.07757, 16974.57715, 14220.1043, 22491.62875), t2015 = c(17322.0514,
14570.33755, 17851.78088, 14983.95312, 23101.89351), t2016 = c(18033.44444,
15283.33044, 19251.57661, 15620.77307, 23800.20038), t2017 = c(19006.33518,
16083.53849, 20893.19975, 16410.11278, 24938.22636), t2018 = c(19938.15583,
17031.94867, 22131.96942, 17242.70015, 25974.24055), t2019 = c(20755.955,
17712.44223, 23145.30242, 18045.54697, 26970.71178)), row.names = c(NA,
-5L), class = c("tbl_df", "tbl", "data.frame"))
I'm using the "REAT" package to test the absolute beta convergence comparing years 2000 (t2000) and 2019 (t2019) with OLS (Ordinary Least Squares) estimation using function betaconv.ols().
I've used this code: betaconv.ols(GDP_NUTS2$t2000, 2000, GDP_NUTS2$t2019, 2019, output.results = TRUE) I tried other version of the code but my major problem is the output.results=TRUE because I get always this error: Error in betaconv.ols(GDP_NUTS2$t2000, 2000, GDP_NUTS2$t2019, 2019, output.results = TRUE) : unused argument (output.results = TRUE)
I've been searching for alternatives of output.results but no success.
Any help will be much appreciated.
The argument is print.results based on the args of the function
> args(betaconv.ols)
function (gdp1, time1, gdp2, time2, conditions = NULL, beta.plot = FALSE,
beta.plotPSize = 1, beta.plotPCol = "black", beta.plotLine = FALSE,
beta.plotLineCol = "red", beta.plotX = "Ln (initial)", beta.plotY = "Ln (growth)",
beta.plotTitle = "Beta convergence", beta.bgCol = "gray95",
beta.bgrid = TRUE, beta.bgridCol = "white", beta.bgridSize = 2,
beta.bgridType = "solid", print.results = FALSE)
NULL
betaconv.ols(GDP_NUTS2$t2000, 2000, GDP_NUTS2$t2019, 2019, print.results = TRUE)
-output
Absolute Beta Convergence
Model coefficients (Estimation method: OLS)
Estimate Std. Error t value Pr (>|t|)
Alpha 1.537689e-01 0.048509886 3.169847 0.05048663
Beta -1.341938e-02 0.005137275 -2.612158 0.07953682
Lambda 7.110647e-04 NA NA NA
Halflife 9.748018e+02 NA NA NA
Model summary
Estimate F value df 1 df 2 Pr (>F)
R-Squared 0.6946059 6.823372 1 3 0.07953682

What is wrong with my custom colour palette in this plot?

Using ggsurvplot to draw some Kaplan-Meier curves.
5 curves should be plotted and I want control over their colours.
Here is the output of the survfit being plotted:
> elective_30Decadesurv
Call: survfit(formula = elective30Surv ~ electives$Decade)
n events median 0.95LCL 0.95UCL
electives$Decade=50 14 0 NA NA NA
electives$Decade=60 173 2 NA NA NA
electives$Decade=70 442 5 NA NA NA
electives$Decade=80 168 4 NA NA NA
electives$Decade=90 2 0 NA NA NA
Here is a working plot using the default colour palette, "hue":
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = "hue",
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
See plot in section 3.1.4 of this webpage for the output of the above
The Decade group has 5 entries, so I'm trying to provide five colours to palette.
However, both:
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = c("#440154",
"#3B528B",
"#21908C",
"#5DC863",
"#5DC863"
),
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
And:
> fiveColours <- c("#440154",
"#3B528B",
"#21908C",
"#5DC863",
"#5DC863"
)
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = fiveColours,
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
Give the same error:
Error in names(.cols) <- grp.levels :
'names' attribute [5] must be the same length as the vector [4]
What vector is length [4]?
Is 'names' attribute my colour vector?
If I take one of the colours out of the custom palette, eg fiveColours <- c("#440154","#3B528B","#21908C","#5DC863") I get this error:
Error: Insufficient values in manual scale. 5 needed but only 4 provided.
Which implies the number of colours provided is correct but something else is causing the issue.
I've troubleshot to the limits of my own ability. Help please!
FYI:
> electives %>% select(Decade) %>% group_by(Decade) %>% summarise(n())
# A tibble: 5 x 2
Decade `n()`
<fct> <int>
1 50 14
2 60 173
3 70 442
4 80 168
5 90 2
Should prove the length of the Decade variable and here is how the survival object and survfit were generated:
> elective5Surv <- Surv(electives$surv5Y, electives$dead5Y)
> elective_5Decadesurv <- survfit(elective5Surv ~ electives$Decade)
Ok, I have sorted my own mistake by proof-reading!
Of the five hex colours I’d provided, two were identical (not on purpose.)
I changed the fifth colour to a different hex value (what it was meant to be in the first place) and it works now.
Thanks, Rui, for your response earlier, it helped me down the path!

Resources