Related
I am currently trying to plot a 3D bubble graph with 2 (then later i will try with 3) axes, as in excel, but on R (here is an example of the 3D bubble plot i am trying to plot) :
https://fr.extendoffice.com/documents/excel/2017-excel-create-bubble-chart.html
library(ggplot2)
library(scales)
p <- ggplot(plot_3D, aes(x = var_2020_2021_valeur, y = var_2020_2021_CA)) +
geom_point(aes(color = Specialite, size = CA.annee.N), alpha = 0.5) +
scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07",
"#FFB5C5", "#BF87B3", "#7F5AA2", "#3F2D91", "#000080",
"#2468A0", "#a32cc4", "#9073db", "#c51f5d",
"#5800ff", "#4455ff", "#48ff50")) +
scale_size(range = c(0.5, 12)) + # Réglage de la plage de tailles des points
ylim(-100, 100) +
xlim (-100, 100) +
geom_hline(yintercept=0) +
geom_vline(xintercept=0) +
scale_y_continuous(labels = percent) +
scale_x_continuous(labels = percent)
I get the following message of error :
"Scale for 'y' is already present. Adding another scale for 'y',
which
will replace the existing scale.
Scale for 'x' is already present. Adding another scale for 'x',
which
will replace the existing scale."
Even when i abandon the two last lines of code, it doesn't work...
Here is the structure of my dataset plot_3D :
structure(list(Specialite = c("ANESTHESIE REANIMATION", "Autres",
"CHIRURGIE GENERALE ET VISCERALE", "CHIRURGIE PLASTIQUE", "GASTRO ENTEROLOGIE",
"GYNECOLOGIE OBSTETRIQUE", "IMAGERIE", "MAXILLO STOMATO", "MEDECINE GENERALE et
INTERNE",
"OPHTALMOLOGIE", "ORL", "ORTHOPEDIE", "PNEUMOLOGIE", "URGENTISTE",
"UROLOGIE"), CA.annee.N = c(64310L, 25298L, 1205537L, 42020L,
3694964L, 344370L, 3454L, 588033L, 228439L, 1849804L, 27358L,
2739286L, 0L, 916L, 432907L), Nombre.de.sejours.annee.N = c(171L,
34L, 1504L, 56L, 9224L, 682L, 9L, 1103L, 888L, 2276L, 57L, 4068L,
0L, 2L, 720L), CA.annee.N.1 = c(50135L, 454L, 790559L, 15531L,
2644858L, 304242L, 3026L, 402195L, 459813L, 1308933L, 20597L,
2269691L, 0L, 3901L, 318352L), Nombre.de.sejours.annee.N.1 = c(150L,
1L, 1067L, 25L, 7276L, 627L, 9L, 802L, 1918L, 1693L, 43L, 3519L,
0L, 7L, 547L), CA.annee.N.2 = c(48583L, 453L, 941610L, 16675L,
3140507L, 385813L, 2950L, 642017L, 691982L, 1704005L, 51602L,
2261368L, 7145L, 4648L, 308169L), Nombre.de.sejours.annee.N.2 = c(154L,
1L, 1264L, 28L, 8317L, 831L, 8L, 1286L, 3231L, 2269L, 127L, 3610L,
26L, 10L, 551L), CA_par_sejour_N = c(376.081871345029, 744.058823529412,
801.553856382979, 750.357142857143, 400.581526452732, 504.941348973607,
383.777777777778, 533.121486854034, 257.251126126126, 812.743409490334,
479.964912280702, 673.374139626352, NaN, 458, 601.259722222222
), CA_par_sejour_N1 = c(334.233333333333, 454, 740.917525773196,
621.24, 363.504398020891, 485.234449760766, 336.222222222222,
501.490024937656, 239.735662148071, 773.14412285883, 479, 644.981813015061,
NaN, 557.285714285714, 581.99634369287), CA_par_sejour_N2 = c(315.474025974026,
453, 744.944620253165, 595.535714285714, 377.600937838163, 464.275571600481,
368.75, 499.235614307932, 214.169606932838, 750.993829881005,
406.314960629921, 626.417728531856, 274.807692307692, 464.8,
559.290381125227), var_2020_2021_valeur = c(0.125207553640259,
0.638896087069189, 0.0818395145215454, 0.207837780659878, 0.101999119223065,
0.0406131494220115, 0.141440846001322, 0.063074957314078, 0.0730615704860669,
0.051218505658529, 0.00201443064864667, 0.0440203522616658, NaN,
-0.178159446295822, 0.0330987964754596), var_2020_2021_CA = c(0.282736611149895,
54.7224669603524, 0.524917178856986, 1.70555662867813, 0.397036816343259,
0.131895004634469, 0.141440846001322, 0.462059448774848, -0.503191514811456,
0.413215191304673, 0.328251687138904, 0.206898207729598, NaN,
-0.765188413227378, 0.35983753832236)), class = "data.frame", row.names = c(NA,
-15L))
Could anyone help ?
You should remove xlim and ylim because they can be used when you don't specify anything else on your axis. So your could add the limits to both scale_*_continuous in the limits arguments like this:
library(ggplot2)
library(scales)
p <- ggplot(plot_3D, aes(x = var_2020_2021_valeur, y = var_2020_2021_CA)) +
geom_point(aes(color = Specialite, size = CA.annee.N), alpha = 0.5) +
scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07",
"#FFB5C5", "#BF87B3", "#7F5AA2", "#3F2D91", "#000080",
"#2468A0", "#a32cc4", "#9073db", "#c51f5d",
"#5800ff", "#4455ff", "#48ff50")) +
scale_size(range = c(0.5, 12)) + # Réglage de la plage de tailles des points
geom_hline(yintercept=0) +
geom_vline(xintercept=0) +
scale_y_continuous(labels = percent, limits = c(-100, 100)) +
scale_x_continuous(labels = percent, limits = c(-100, 100))
p
#> Warning: Removed 1 rows containing missing values (geom_point).
Created on 2022-07-12 by the reprex package (v2.0.1)
I am using geom_text_repel. Here is the code
xx <- structure(list(SYMBOL = c("HSPB1", "DSP", "COPA", "SEMA3B", "RFC1",
"FAM120A", "MRPL24", "ENO1", "TAF15", "MRPL47", "MYO5A", "TTC37",
"RBMX", "LEMD2", "GNL3", "TRMT2A", "EIF3H", "PSPC1", "MIOS",
"POLR2A", "SCAMP3", "EEF2", "AGO3", "SASH1", "GTPBP4", "PARP1",
"RPS3", "ATP2A2", "PPAN", "KPNB1", "SPATA5", "RPL18", "ARAP1",
"VDAC2", "SF1", "RCN1", "NSUN5", "CSNK1A1", "PHGDH", "NAV1",
"EIF4G1", "RSL1D1", "DHX29", "TCP1", "HSD17B12", "PLOD1", "AKAP8L",
"STT3A", "DIMT1", "CCT5"), interactor = c(FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE), log2FC_OEIPtoOEC = c(-1.188, -1.071, -0.305, -1.196,
-2.242, -1.083, -2.027, 0.101, -0.613, -1.195, -1.632, -0.934,
-0.604, -1.714, -1.011, -1.301, -1.327, 0.447, -0.563, 0.061,
0.159, -0.354, -0.696, 2.679, -2.407, -1.133, -1.174, -0.023,
-1.739, 0.372, -1.84, -1.755, 2.496, 2.48, -0.13, -0.043, -1.436,
-2.089, 0.601, -1.868, -0.587, -0.656, -0.689, 0.489, -0.389,
-0.214, -0.682, -0.463, -2.124, -0.175), adj.pvalue_OEIPtoOEC = c(3.11820459104672e-06,
0.000375326754978968, 0.000452267099086636, 2.29371074079822e-06,
3.16069740512121e-08, 1.71482977276262e-09, 3.16164967556097e-09,
0.803071498733443, 0.000260890847563963, 7.84919749378136e-08,
2.81074102369284e-07, 0.000650750327476979, 1.76645936204616e-05,
4.64727351507494e-07, 5.97431538613294e-08, 2.06414050372876e-06,
1.60474719971609e-07, 1.06148995172679e-06, 0.000424624322200997,
0.0822930476250642, 0.0669857879144555, 0.0497856508138053, 0.0010128186235129,
6.79027171829534e-10, 7.08618519862231e-09, 5.31048502842373e-07,
9.14238499256634e-06, 0.65661095803186, 0.000109268795075108,
0.000896331917103447, 0.000154543207846778, 4.94297345832319e-06,
1.74956073133704e-06, 6.14100266559054e-10, 0.0165759045939973,
0.609835745607611, 3.22563881688616e-05, 2.36186291346976e-09,
1.96827534532196e-06, 1.23996382790666e-06, 0.000563501819276639,
6.07830082570168e-05, 0.000461083456429919, 0.0127703802958071,
0.000553587826763665, 0.017917136749192, 1.08862548100969e-07,
0.000666364393616902, 4.57980386379692e-08, 0.255896208213884
)), row.names = c(130L, 169L, 307L, 401L, 79L, 24L, 321L, 123L,
69L, 506L, 59L, 402L, 14L, 309L, 362L, 531L, 481L, 181L, 499L,
54L, 275L, 250L, 335L, 200L, 522L, 437L, 489L, 208L, 462L, 125L,
367L, 536L, 331L, 161L, 344L, 294L, 508L, 249L, 220L, 412L, 82L,
315L, 403L, 37L, 246L, 297L, 333L, 411L, 318L, 357L), class = "data.frame")
Here is the code for the plot
ggplot(xx, aes (log2FC_OEIPtoOEC, -log10(adj.pvalue_OEIPtoOEC), label = ifelse (interactor, SYMBOL, ""), colour = interactor)) +
geom_point() +
geom_text_repel (aes(size = 100)) +
geom_hline(yintercept= -log10 (0.05), linetype="dashed") +
geom_vline(xintercept= 1, linetype="dashed") +
xlab ("Log2 fold change of IP to control") +
ylab ("-Log10 of adjusted p-value") + theme_classic()
ggplot(xx, aes (log2FC_OEIPtoOEC, -log10(adj.pvalue_OEIPtoOEC), label = ifelse (interactor, SYMBOL, ""), colour = interactor)) +
geom_point() +
geom_text_repel (aes(size = 0.01)) +
geom_hline(yintercept= -log10 (0.05), linetype="dashed") +
geom_vline(xintercept= 1, linetype="dashed") +
xlab ("Log2 fold change of IP to control") +
ylab ("-Log10 of adjusted p-value") + theme_classic()
in the two plots, the aes for geom_text_repel specifies size as 100 and 0.01 respectively. However, this parameter is basically ignored and the text is always the same size, and a "size" box is added to the legend for some reason
To specify a literal size, rather than a variable that should be mapped to it, you remove the aes to get:
ggplot(xx, aes (log2FC_OEIPtoOEC, -log10(adj.pvalue_OEIPtoOEC), label = ifelse (interactor, SYMBOL, ""), colour = interactor)) +
geom_point() +
geom_text_repel (size = 100) +
geom_hline(yintercept= -log10 (0.05), linetype="dashed") +
geom_vline(xintercept= 1, linetype="dashed") +
xlab ("Log2 fold change of IP to control") +
ylab ("-Log10 of adjusted p-value") + theme_classic()
Alternatively a relationship between the variable used and the size that the text appears can be specified through one of the scale_size_ functions. scale_size_identity() would work here.
I have this data with one column in character and one column in value.
data = structure(list(Station = c("1A", "1B", "2A", "2B", "3A", "3B",
"4A", "4B", "5A", "5B", "6A", "6B", "7A", "7B"), Particles.kg = c(370L,
420L, 250L, 320L, 130L, 210L, 290L, 390L, 230L, 340L, 60L, 90L,
130L, 170L)), class = "data.frame", row.names = c(NA, -14L))
now i convert the character in to factor by
data$Station = as.factor(data$Station)
then i start Hierarchical Cluster analysis
rownames(data) = c(data$Station)
data = data[,-1]
require(stats)
res.dist = dist(x=data, method = "euclidean")
hcl = hclust(d=res.dist, method = "complete")
plot(x=hcl, hang = -1, cex = 0.6)
(sorry can't upload the picture for network issue) but after this in my picture intead 1A, 2A, 3A, 3B it comes 1,2,3,.....,14.
how can i solve this?
After dropping the 1st column, there is only one column left which collapses the data into a vector. Vector drops the rownames of the dataframe, hence there are no labels.
You can use drop = FALSE to keep the data as dataframe after the subset.
rownames(data) = data$Station
data = data[,-1, drop = FALSE]
res.dist = dist(x=data, method = "euclidean")
hcl = hclust(d=res.dist, method = "complete")
plot(x=hcl, hang = -1, cex = 0.6)
There is no need to keep only the measurements column, the plot can be done by subsetting the data in the dist instruction.
Note that I use data[-1], not data[, -1], which would drop the dim attribute. The former always returns a sub-df.
rownames(data) <- data$Station
res.dist <- dist(x = data[-1], method = "euclidean")
hcl <- hclust(d = res.dist, method = "complete")
plot(x = hcl, hang = -1, cex = 0.6)
Load sample data frame
df <- structure(list(ID = c(1,1,1,2,2,2,3,3,3),
time = c(0L,1L,2L,0L,1L,2L,0L,1L,2L),
M1a = c(0, 0.2, 0.3, 0, 1.5, 2.9,0, 2.4, 3.9),
M2a = c(0, 0.4, 0.6,0,0.9, 0.9,0,0.5, 0.7),
M3a = c(0,0.3, 0.4, 0, 0.6, 0.9,0, 0.5, 0.8),
M4a = c(0,0.6, 0.6,0, 0.4, 0.6,0, 0.2, 0.9),
M1b = c(0L, 200L, 300L,0L, 300L, 900L,0L, 900L, 1000L),
M2b = c(0L, 400L, 600L,0L, 600L, 900L,0L, 600L, 1000L),
M3b = c(0L, 300L, 400L,0L, 200L, 800L,0L, 200L, 900L),
M4b = c(0L, 600L, 600L,0L, 800L, 1000L,0L, 400L, 1100L)),
.Names = c("ID", "time", "M1a", "M2a", "M3a", "M4a","M1b", "M2b","M3b", "M4b"), class = "data.frame", row.names = c(NA, -9L))
Now plot two y-axis scatter plot
par(mar=c(5,4,4,5)+.1)
plot(df$time,df$M1a,type="p",col="red", main="M1", cex=0.5, cex.main=2, cex.lab=1.0, cex.axis=0.7)
par(new = TRUE)
plot(df$time,df$M1b,type="p",col="blue",xaxt="n",yaxt="n",xlab="",ylab="")
mtext("Relative change (%)",side=4,line=3)
axis(4)
legend("topleft",col=c("red","blue"),lty=1,legend=c("Absolute Change","Relative Change"))
What I am stuck with?
1.Median trend line
I was able to add regression line, but I want to have a median trend line connecting M1a and M1b medians for three time points.
2.Adding p-values to the plot, repeated one-way anova test
fit1=aov(df$M1a~df$time + Error(ID/time),na.action=na.exclude,data=df);
sig1= summary(fit1)$"Error: Within"$"Pr(>F)"
if (sig<0.001) star='**' else if (sig>=0.001&sig<0.05) star='*' else star='';
if (sig1<0.001) star='**' else star='';
I was planning to add use above code for adding p-value in my 2-y axis plot. Here, I get sig1 as NULL, however, sig1 should print out 0.153.
The final results should include * mark on the main title of plot (M1), if results are significant.
Any tips? Thanks in advance!
To answer #2 first, one needs to look at the inner structures of a summary.aov object:
dput(summary(fit1))
structure(list(`Error: ID` = structure(list(structure(list(Df = 1,
`Sum Sq` = 5.60666666666667, `Mean Sq` = 5.60666666666667,
`F value` = NA_real_, `Pr(>F)` = NA_real_), .Names = c("Df",
"Sum Sq", "Mean Sq", "F value", "Pr(>F)"), class = c("anova",
"data.frame"), row.names = "Residuals")), class = c("summary.aov",
"listof")), `Error: ID:time` = structure(list(structure(list(
Df = 1, `Sum Sq` = 11.3157142857143, `Mean Sq` = 11.3157142857143), .Names = c("Df",
"Sum Sq", "Mean Sq"), class = c("anova", "data.frame"), row.names = "df$time")), class = c("summary.aov",
"listof")), `Error: Within` = structure(list(structure(list(Df = c(1,
5), `Sum Sq` = c(0.325952380952381, 0.573888888888889), `Mean Sq` = c(0.325952380952381,
0.114777777777778), `F value` = c(2.83985617480293, NA), `Pr(>F)` = c(0.152766396924706,
NA)), .Names = c("Df", "Sum Sq", "Mean Sq", "F value", "Pr(>F)"
), class = c("anova", "data.frame"), row.names = c("df$time ",
"Residuals"))), class = c("summary.aov", "listof"))), .Names = c("Error: ID",
"Error: ID:time", "Error: Within"), class = "summary.aovlist")
And note that the values within summary(fit1)$"Error: Within" are actually buried one level deeper (and don't have names so need numeric index. Do this:
summary(fit1)$"Error: Within"[[1]]$`Pr(>F)`
[1] 0.1527664 NA
Now to see if I can figure out the two-0rdinate plot issue. Pretty sure one would need to do any median plotting before the par(new=TRUE) operation because that changes the user coordinate system based on the new data.
Adding a title with extracted value to your plot augmented by the helpful comment by #VincentBonhomme:
plot(df$time,df$M1a,type="p",col="red", cex=0.5, cex.main=2, cex.lab=1.0, cex.axis=0.7)
lines(unique(df$time),
tapply(df$M1a, df$time, median))
par(new = TRUE)
plot( df$time, df$M1b,type="p", col="blue", xaxt="n", yaxt="n", xlab="",ylab="")
lines(unique(df$time),
tapply(df$M1b, df$time, median))
mtext("Relative change (%)",side=4,line=3)
axis(4)
legend("topleft",col=c("red","blue"), lty=1,legend=c("Absolute Change","Relative Change"))
title(main=bquote("P-value for M1 (absolute scale)"==
.(round(summary(fit1)$"Error: Within"[[1]]$`Pr(>F)`, 3) ) ) )
I am using following data and code to plot a barplot:
ddf=structure(list(VAR = structure(1:9, .Label = c("aaa", "bbb",
"ccc", "ddd", "eee", "fff", "ggg", "hhh", "iii"), class = "factor"),
VAL = c(0L, 32L, 64L, 96L, 128L, 160L, 192L, 224L, 256L)), .Names = c("VAR",
"VAL"), class = "data.frame", row.names = c(NA, -9L))
barplot(ddf$VAL,col=rgb(ddf$VAL,256-ddf$VAL,0,maxColorValue=256))
How can I add a legend strip having these colors on it, with corresponding values written there? I want a legend strip as appears in following plot:
library(latticeExtra)
dat <- data.frame(x = rnorm(1000), y = rnorm(1000), z = rnorm(1000, mean = 1))
maxz <- max(abs(dat$z))
levelplot(z ~ x * y, dat, at = seq(-maxz, maxz, length = 100), panel = panel.levelplot.points, par.settings = custom.theme.2())
I have tried to use following function examples but am unable to modify them to my needs:
legend("bottom", legend = LETTERS[1:6], col = c6, ncol = 2, cex = 2, lwd = 3, text.font = i, text.col = c6)
libarary(plotrix)
testcol<-color.gradient(c(0,1),0,c(1,0),nslices=5)
color.legend(11,6,11.8,9,col.labels,testcol,gradient="y")
Thanks in advance.
The trick is to draw the color key as an image in a separate panel. Notice that I decrese the margin between the panels by reducing par(mar=...) and add an outer margin around both panels with par(oma=...).
my.palette <- rgb(ddf$VAL,256-ddf$VAL,0,maxColorValue=256)
# Create two panels side by side
layout(t(1:2), widths=c(5,1))
# Set margins and turn all axis labels horizontally (with `las=1`)
par(mar=rep(.5, 4), oma=rep(3, 4), las=1)
barplot(ddf$VAL,col=my.palette)
# Draw the color legend
image(1, ddf$VAL, t(seq_along(ddf$VAL)), col=my.palette, axes=FALSE)
axis(4)
just for completeness the color.legend version, because I think it's the easiest one:
your data:
ddf=structure(list(VAR = structure(1:9, .Label = c("aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg", "hhh", "iii"), class = "factor"), VAL = c(0L, 32L, 64L, 96L, 128L, 160L, 192L, 224L, 256L)), .Names = ("VAR","VAL"), class = "data.frame", row.names = c(NA, -9L))
now we plot the barplot with the legend.
The numbers are the legend margins (x1,y1,x2,y2)
par(mar=c(1,3,1,5)+0.1,xpd=TRUE)
col.pal<-rgb(ddf$VAL,256-ddf$VAL,0,maxColorValue=256)
barplot(ddf$VAL,col=rgb(ddf$VAL,256-ddf$VAL,0,maxColorValue=256))
color.legend(12,220,13,80,rev(ddf$VAL),rev(col.pal),gradient="y")
Alternatively you can use colors.scale from the plotrix package and add it for example to the standard legend. It's exactly what the plotZCol function does in (my) spatDataManagement package.
From the examples :
x <- rnorm(1000)
y <- rnorm(1000)
library("spatDataManagement")
plotZCol(x,y,zCol=x+y)