Proper density plotting - r

I have 3 columns of data. In columns data ranging from approximately 0,0727
to 10,2989.
and this is how it looks:
http://i61.tinypic.com/2uen3hz.jpg
My Code
MyData <- read.csv2(file="C:/Users/Sysop/Desktop/Koncentracija.csv",header=T,sep=";")
MyData
Data_1<-MyData$Sul311
Data_2<-MyData$Sul322
Data_3<-MyData$Sul333
Data_1_density<- density(Data_1,na.rm = TRUE)
Data_2_density<- density(Data_2,na.rm = TRUE)
Data_3_density<- density(Data_3,na.rm = TRUE)
xlim <- range(Data_1_density$x,Data_2_density$x,Data_3_density$x, na.rm = TRUE)
ylim <- range(Data_1_density$y, Data_2_density$y, Data_3_density$y, na.rm = TRUE)
Col_1 <- rgb(1,0,0,0.4)
Col_2 <- rgb(0,0,1,0.4)
Col_3 <- rgb(0,1,0,0.4)
plot(Data_1_density, xlim = xlim, ylim = ylim, xlab = 'Zn concentracion, mg/l',main = 'Distribution of data', panel.first = grid(nx = 10, ny = 10))
polygon(Data_1_density, density = -1, col = Col_1)
polygon(Data_2_density, density = -1, col = Col_2)
polygon(Data_3_density, density = -1, col = Col_3)
legend('topright',c('distribution 1 ','distribution 2','distribution 3'),cex=1.0, fill = c(Col_1, Col_2, Col_3), bty = 'n',border = NA)
But as we can see density plots begining from negative values, but my data does not have any negative values. One column missing one value so R shows as NA, but I use (na.rm = TRUE) to ignore NA values.
So is these density plots plotted correctly or not?
Here is my data:
Sul311 Sul322 Sul333
1,8032 NA 2,3981
3,4949 3,1696 1,8218
0,5856 0,5577 0,0837
0,1859 1,5894 0,093
1,4686 1,45 2,9744
0,079 0,0727 0,0543
1,0317 1,0782 2,7513
0,5112 0,5484 0,9295
1,3943 1,1805 2,7513
1,1526 1,1619 2,6305
1,3013 10,2989 5,577
0,5949 0,5856 0,725
0,1766 0,2696 1,6917
0,4229 0,3309 1,1089
1,1953 0,3328 1,6787
1,4853 0,6116 1,8367
0,4443 0,3514 1,2939
0,5912 0,3309 1,2901

you can define the begining or the "first value" with the from argument :
density(x, na.rm=T, from=min(x, na.rm=T))
for instance

Related

R: How to plot multiple ARIMA forecasts on the same time-series

I would like to plot several forecasts on the same plot in different colours, however, the scale is off.
I'm open to any other methods.
reproducible example:
require(forecast)
# MAKING DATA
data <- c(3.86000, 19.55810, 19.51091, 20.74048, 20.71333, 29.04191, 30.28864, 25.64300, 23.33368, 23.70870 , 26.16600 ,27.61286 , 27.88409 , 28.41400 , 24.81957 , 24.60952, 27.49857, 32.08000 , 29.98000, 27.49000 , 237.26150, 266.35478, 338.30000, 377.69476, 528.65905, 780.00000 )
a.ts <- ts(data,start=c(2005,1),frequency=12)
# FORECASTS
arima011_css =stats::arima(x = a.ts, order = c(0, 1, 1), method = "CSS") # css estimate
arima011_forecast = forecast(arima011_css, h=10, level=c(99.5))
arima321_css =stats::arima(x = a.ts, order = c(3, 2, 1), method = "CSS") # css estimate
arima321_forecast = forecast(arima321_css, h=10, level=c(99.5))
# MY ATTEMPT AT PLOTS
plot(arima321_forecast)
par(new=T)
plot(arima011_forecast)
Here is something similar to #jay.sf but using ggplot2.
library(ggplot2)
autoplot(a.ts) +
autolayer(arima011_forecast, series = "ARIMA(0,1,1)", alpha = 0.5) +
autolayer(arima321_forecast, series = "ARIMA(3,2,1)", alpha = 0.5) +
guides(colour = guide_legend("Model"))
Created on 2020-05-19 by the reprex package (v0.3.0)
You could do a manual plot using a sequence of dates.
rn <- format(seq.Date(as.Date("2005-01-01"), by="months", length.out=12*3), "%Y.%m")
Your ARIMAs you'll need as.matrix form.
arima321_mat <- as.matrix(as.data.frame(arima321_forecast))
arima011_mat <- as.matrix(as.data.frame(arima011_forecast))
Some colors with different alpha=.
col.1 <- rainbow(2, ,.7)
col.2 <- rainbow(2, ,.7, alpha=.2)
For the CIs use polygon.
plot(data, type="l", xlim=c(1, length(rn)), ylim=c(0, 3500), xaxt="n", main="Forecasts")
axis(1, axTicks(1), labels=F)
mtext(rn[(seq(rn)-1) %% 5 == 0], 1, 1, at=axTicks(1))
lines((length(data)+1):length(rn), arima321_mat[,1], col=col.1[1], lwd=2)
polygon(c(27:36, 36:27), c(arima321_mat[,2], rev(arima321_mat[,3])), col=col.2[1],
border=NA)
lines((length(data)+1):length(rn), arima011_mat[,1], col=col.1[2], lwd=3)
polygon(c(27:36, 36:27), c(arima011_mat[,2], rev(arima011_mat[,3])), col=col.2[2],
border=NA)
legend("topleft", legend=c("ARIMA(3,2,1)", "ARIMA(0,1,1)"), col=col.1, lwd=2, cex=.9)
Edit: To avoid the repetition of lines and polygon calls, you may unite them using Map.
mats <- list(arima321_mat, arima011_mat) ## put matrices into list
plot(.)
axis(.)
mtext(.)
Map(function(i) {
lines((length(data)+1):length(rn), mats[[i]][,1], col=col.1[i], lwd=2)
polygon(c(27:36, 36:27), c(mats[[i]][,2], rev(mats[[i]][,3])), col=col.2[i], border=NA)
}, 1:2)
legend(.)
require(forecast)
data <- c(3.86000, 19.55810, 19.51091, 20.74048, 20.71333, 29.04191, 30.28864, 25.64300, 23.33368, 23.70870 , 26.16600 ,27.61286 , 27.88409 , 28.41400 , 24.81957 , 24.60952, 27.49857, 32.08000 , 29.98000, 27.49000 , 237.26150, 266.35478, 338.30000, 377.69476, 528.65905, 780.00000 )
a.ts <- ts(data,start=c(2005,1),frequency=12)
arima011_css =stats::arima(x = a.ts, order = c(0, 1, 1), method = "CSS") # css estimate
arima011_forecast = predict(arima011_css, n.ahead = 2)$pred
arima321_css =stats::arima(x = a.ts, order = c(3, 2, 1), method = "CSS") # css estimate
arima321_forecast = predict(arima321_css, n.ahead = 2)$pred
plot(a.ts, type = "o", xlim = c(2005, 2007.5) , ylim = c(-1, 1200) , ylab = "price" ,main = "2 month Forecast")
range = c(2007+(3/12), 2007+(4/12)) # adding the dates for the prediction
lines(y = arima011_forecast , x = range , type = "o", col = "red")
lines(y = arima321_forecast, x = range , type = "o", col = "blue")

twoord.plot axis changing

I am using twoord.plot for the first time, and I am having trouble getting the x axis set to years for a time-series data set. I have two different y-axes on different scales. Here is the code that I am working with.
#Install BatchGetSymbols
install.packages('BatchGetSymbols')
library(BatchGetSymbols)
#Get data from FRED
library(quantmod)
getSymbols('CPALTT01USM661S', src = 'FRED')
library(quantmod)
getSymbols('M2SL', src = 'FRED')
#Create data sets with equal number of observations
CPI = CPALTT01USM661S["1960-01-01/2019-01-01"]
M2 = M2SL["1960-01-01/2019-01-01"]
library(plotrix)
twoord.plot(rx = time(CPI), ry = CPI, lx = time(CPI), ly = M2,
main = "Money Supply and Prices",
xlim = NULL, lylim = NULL, rylim = NULL,
mar = c(5,4,4,4), lcol = "red", rcol = "blue", xlab = "", lytickpos = NA,
ylab = "M2", ylab.at = NA,
rytickpos = NA, rylab = "CPI", rylab.at = NA, lpch = 1,rpch = 2,
type = "l", xtickpos = NULL, xticklab = NULL,
halfwidth = 0.4, axislab.cex = 1, do.first = NULL)
Here is the graph that I am getting. Notice the x-axis is not in years.
The date values ( beginnings of each month) are in the index of the matrices, so to extract the year beginnings get every 12th item:
twoord.plot(rx=time(CPI), ry=CPI, lx=time(CPI),ly = M2, main="Money Supply and Prices",xlim=NULL,lylim=NULL,rylim=NULL,
mar=c(5,4,4,4),lcol="red",rcol="blue",xlab="",lytickpos=NA,ylab="M2",ylab.at=NA,
rytickpos=NA,rylab="CPI",rylab.at=NA,lpch=1,rpch=2,
type="l",
xtickpos=index(CPI)[seq(1,nrow(CPI), by=12)], #tick at year start
xticklab=format( index(CPI)[seq(1,nrow(CPI), by=12)], "%Y"), #just year
halfwidth=0.4, axislab.cex=1,
do.first=NULL, las=2) # not sure why las=2 didn't seem to work.

HeatMap: how to cluster only the rows and keep order of the heatmap's column labels as same as in the df?

I wanna plot a heatmap and cluster only the rows (i.e. genes in this tydf1).
Also, wanna keep order of the heatmap's column labels as same as in the df (i.e. tydf1)?
Sample data
df1 <- structure(list(Gene = c("AA", "PQ", "XY", "UBQ"), X_T0_R1 = c(1.46559502, 0.220140568, 0.304127515, 1.098842127), X_T0_R2 = c(1.087642983, 0.237500819, 0.319844338, 1.256624804), X_T0_R3 = c(1.424945196, 0.21066267, 0.256496284, 1.467120048), X_T1_R1 = c(1.289943948, 0.207778662, 0.277942721, 1.238400358), X_T1_R2 = c(1.376535013, 0.488774258, 0.362562315, 0.671502431), X_T1_R3 = c(1.833390311, 0.182798731, 0.332856558, 1.448757569), X_T2_R1 = c(1.450753714, 0.247576125, 0.274415259, 1.035410946), X_T2_R2 = c(1.3094609, 0.390028842, 0.352460646, 0.946426593), X_T2_R3 = c(0.5953716, 1.007079177, 1.912258811, 0.827119776), X_T3_R1 = c(0.7906009, 0.730242116, 1.235644748, 0.832287694), X_T3_R2 = c(1.215333041, 1.012914813, 1.086362205, 1.00918082), X_T3_R3 = c(1.069312467, 0.780421013, 1.002313082, 1.031761442), Y_T0_R1 = c(0.053317766, 3.316414959, 3.617213894, 0.788193798), Y_T0_R2 = c(0.506623748, 3.599442788, 1.734075583, 1.179462912), Y_T0_R3 = c(0.713670106, 2.516735845, 1.236204882, 1.075393433), Y_T1_R1 = c(0.740998252, 1.444496448, 1.077023349, 0.869258744), Y_T1_R2 = c(0.648231834, 0.097957459, 0.791438659, 0.428805547), Y_T1_R3 = c(0.780499252, 0.187840968, 0.820430227, 0.51636582), Y_T2_R1 = c(0.35344654, 1.190274584, 0.401845911, 1.223534348), Y_T2_R2 = c(0.220223951, 1.367784148, 0.362815405, 1.102117612), Y_T2_R3 = c(0.432856978, 1.403057729, 0.10802472, 1.304233845), Y_T3_R1 = c(0.234963735, 1.232129062, 0.072433381, 1.203096462), Y_T3_R2 = c(0.353770497, 0.885122768, 0.011662112, 1.188149743), Y_T3_R3 = c(0.396091395, 1.333921747, 0.192594116, 1.838029829), Z_T0_R1 = c(0.398000559, 1.286528398, 0.129147097, 1.452769794), Z_T0_R2 = c(0.384759325, 1.122251177, 0.119475721, 1.385513609), Z_T0_R3 = c(1.582230097, 0.697419716, 2.406671502, 0.477415567), Z_T1_R1 = c(1.136843842, 0.804552001, 2.13213228, 0.989075996), Z_T1_R2 = c(1.275683837, 1.227821594, 0.31900326, 0.835941568), Z_T1_R3 = c(0.963349308, 0.968589683, 1.706670339, 0.807060135), Z_T2_R1 = c(3.765036263, 0.477443352, 1.712841882, 0.469173869), Z_T2_R2 = c(1.901023385, 0.832736132, 2.223429427, 0.593558769), Z_T2_R3 = c(1.407713024, 0.911920317, 2.011259223, 0.692553388), Z_T3_R1 = c(0.988333629, 1.095130142, 1.648598854, 0.629915612), Z_T3_R2 = c(0.618606729, 0.497458337, 0.549147265, 1.249492088), Z_T3_R3 = c(0.429823986, 0.471389536, 0.977124788, 1.136635484)), row.names = c(NA, -4L ), class = c("data.table", "data.frame"))
Scripts used
library(dplyr)
library(stringr)
library(tidyr)
gdf1 <- gather(df1, "group", "Expression", -Gene)
gdf1$tgroup <- apply(str_split_fixed(gdf1$group, "_", 3)[, c(1, 2)],
1, paste, collapse ="_")
library(dplyr)
tydf1 <- gdf1 %>%
group_by(Gene, tgroup) %>%
summarize(expression_mean = mean(Expression)) %>%
spread(., tgroup, expression_mean)
#1 heatmap script is being used
library(tidyverse)
tydf1 <- tydf1 %>%
as.data.frame() %>%
column_to_rownames(var=colnames(tydf1)[1])
library(gplots)
library(vegan)
randup.m <- as.matrix(tydf1)
scaleRYG <- colorRampPalette(c("red","yellow","darkgreen"),
space = "rgb")(30)
data.dist <- vegdist(randup.m, method = "euclidean")
row.clus <- hclust(data.dist, "aver")
heatmap.2(randup.m, Rowv = as.dendrogram(row.clus),
dendrogram = "row", col = scaleRYG, margins = c(7,10),
density.info = "none", trace = "none", lhei = c(2,6),
colsep = 1:3, sepcolor = "black", sepwidth = c(0.001,0.0001),
xlab = "Identifier", ylab = "Rows")
#2 heatmap script is being used
df2 <- as.matrix(tydf1[, -1])
heatmap(df2)
Also, I want to add a color key.
It is still unclear to me, what the desired output is. There are some notes:
You don't need to use vegdist() to calculate distance matrix for your hclust() call. Because if you check all(vegdist(randup.m, method = "euclidian") == dist(randup.m)) it returns TRUE;
Specifying Colv = F in your heatmap.2() call will prevent reordering of the columns (default is TRUE);
Maybe it is better to scale your data by row (see the uncommented row);
Your call of heatmap.2() returns the heatmap with color key.
So summing it up - in your first script you just miss the Colv = F argument, and after a little adjustment it looks like this:
heatmap.2(randup.m,
Rowv = as.dendrogram(row.clus),
Colv = F,
dendrogram = "row",
#scale = "row",
col = scaleRYG,
density.info = "none",
trace = "none",
srtCol = -45,
adjCol = c(.1, .5),
xlab = "Identifier",
ylab = "Rows"
)
However I am still not sure - is it what you need?

plot_generic() function in plot_roc_components not found in R

I am using plot_roc_components function from rmda package. The definition of it has plot_generic() function. But, I am not able to find definition of this function. Why is it so?
The reason for it to see if there is an option for legend.size(). plot_roc_components gives me figure, however, I want to change the legend size. There is an option for legend.position, but not for its font size.
Could you please explain?
Thanks!
https://github.com/mdbrown/rmda/blob/57553a4cf5b6972176a0603b412260e367147619/R/plot_functions_sub.R
You were looking in one file but it was defined in another file.
plot_generic<- function(xx, predictors, value, plotNew,
standardize, confidence.intervals,
cost.benefit.axis = TRUE, cost.benefits, n.cost.benefits,
cost.benefit.xlab, xlab, ylab,
col, lty, lwd,
xlim, ylim, legend.position,
lty.fpr = 2, lty.tpr = 1,
tpr.fpr.legend = FALSE,
impact.legend = FALSE,
impact.legend.2 = FALSE,
population.size = 1000,
policy = policy, ...){
## xx is output from get_DecisionCurve,
## others are directly from the function call
#save old par parameters and reset them once the function exits.
old.par<- par("mar"); on.exit(par(mar = old.par))
xx.wide <- reshape::cast(xx, thresholds~model, value = value, add.missing = TRUE, fill = NA)
xx.wide$thresholds <- as.numeric(as.character(xx.wide$thresholds))
if(is.numeric(confidence.intervals)){
val_lower <- paste(value, "lower", sep = "_")
val_upper <- paste(value, "upper", sep = "_")
xx.lower <- cast(xx, thresholds~model, value = val_lower, add.missing = TRUE, fill = NA)
xx.upper <- cast(xx, thresholds~model, value = val_upper, add.missing = TRUE, fill = NA)
xx.lower$thresholds <- as.numeric(as.character(xx.lower$thresholds))
xx.upper$thresholds <- as.numeric(as.character(xx.upper$thresholds))
}
# adjust margins to add extra x-axis
if(cost.benefit.axis) par(mar = c(7.5, 4, 3, 2) + 0.1)
#set default ylim if not provided
#initial call to plot and add gridlines

R igraph Format Date on axis

In the following igraph there are dates to be plotted as marks on the x-axis. Below I provided an example. As the dates are specified in the label matrix they are formatted into an atomic value. How do I get the dates on the x-axis to be displayed in a regular date format?
library(igraph)
nodes=data.frame(
c(0,1,2,3),
c("A","B","C","D")
)
colnames(nodes) = c("id","name")
links = data.frame(
c(0,0,1,2),
c(1,2,3,3)
)
colnames(links) = c("from","to")
layout = matrix(
c(as.Date('2010-01-01'),1, as.Date('2010-01-02'),1, as.Date('2010-01-02'),2, as.Date('2010-01-06'),1), byrow = TRUE, nrow=4
)
net = graph.data.frame(links, vertices = nodes)
plot.igraph(
net, xaxt="n",layout=layout,axes=TRUE,asp=0, rescale=FALSE,xlim=c(as.Date('2010-01-01'),as.Date('2010-01-06')),ylim=c(1,2)
)
You can replace the axis by your own values as explained here.
Using your code, it gives:
layout <- data.frame(Date = as.Date(c('2010-01-01','2010-01-02','2010-01-02','2010-01-06')), value = c(1,2,1,1))
plot.igraph(
net,
layout = layout,
rescale = FALSE,
axis = FALSE,
asp = 0,
xlim = as.Date(c('2010-01-01', '2010-01-06')),
ylim = c(1,2)
)
axis(1, at = as.numeric(layout$Date), labels = layout$Date, cex.axis = 0.9)
axis(2, at = 1:max(layout$value), labels = 1:max(layout$value))

Resources