Is there a way to create a distribution curve given the 1st through 4th moments (mean, variance or standard deviation, skewness, and kurtosis)? Here is a small table of the descriptive statistics. The fifth variable has stronger positive skew and larger kurtosis than the rest, and leads me to believe a non-normal distribution may need to be used.
dput(summarystats_factors)
structure(list(ERVALUEY = c(1.21178722715092, 8.4400515531338,
0.226004674926861, 3.89328347004421), ERVOLY = c(0.590757887612924,
7.48697754999463, 0.295973723450469, 3.31326615805655), ERQUALY = c(1.59367031426668,
4.57371901763411, 0.601172123904339, 3.89080479205755), ERMOMTY = c(3.09719686678745,
7.01446175391253, 0.260638252621096, 3.28326189430607), ERSIZEY = c(1.69935727981412,
6.1917295410928, 1.24021163316834, 6.23493767854042), Moment = structure(c("Mean",
"Standard Deviation", "Skewness", "Kurtosis"), .Dim = c(4L, 1L
))), row.names = c(NA, -4L), class = "data.frame")
We could use curve with PearsonDS::dpearson. Note, that the moments= argument expects exactly the order mean, variance, skewness, kurtosis, so that the rows of the data must be ordered correspondingly (as is the case in your example data).
FUN <- function(d, xlim, ylim, lab=colnames(d), main='Theoretical Distributions') {
s <- seq(d)
lapply(s, \(i) {
curve(PearsonDS::dpearson(x, moments=d[, i]), col=i + 1, xlim=xlim, ylim=ylim,
add=ifelse(i == 1, FALSE, TRUE), ylab='y', main=main)
})
legend('topright', col=s + 1, lty=1, legend=lab, cex=.8, bty='n')
}
FUN(dat[-6], xlim=c(-2, 10), ylim=c(-.01, .2))
Data:
dat <- structure(list(ERVALUEY = c(1.21178722715092, 8.4400515531338,
0.226004674926861, 3.89328347004421), ERVOLY = c(0.590757887612924,
7.48697754999463, 0.295973723450469, 3.31326615805655), ERQUALY = c(1.59367031426668,
4.57371901763411, 0.601172123904339, 3.89080479205755), ERMOMTY = c(3.09719686678745,
7.01446175391253, 0.260638252621096, 3.28326189430607), ERSIZEY = c(1.69935727981412,
6.1917295410928, 1.24021163316834, 6.23493767854042), Moment = structure(c("Mean",
"Standard Deviation", "Skewness", "Kurtosis"), .Dim = c(4L, 1L
))), row.names = c(NA, -4L), class = "data.frame")
Use the PearsonDS package, the pearson0 family creates "normal" distributions matching specified moments, but other options are available.
Related
I have a data like this
df<- structure(list(How = c(3.1e-05, 0.000114, 0.000417, 0.00153,
0.00561, 0.0206, 0.0754, 0.277, 1.01, 3.72), Where = c(1, 0.948118156866697,
0.920303987764611, 1.03610743904536, 1.08332987533419, 0.960086785898477,
0.765642506120658, 0.572520170014998, 0.375835106792894, 0.254180720963181
)), class = "data.frame", row.names = c(NA, -10L))
library(drc)
I make my model like this
fit <- drm(formula = Where ~ How, data = df,
fct = LL.4(names=c("Slope","Lower Limit","Upper Limit", "EC50")))
Then I plot it like this
plot(NULL, xlim = c(0.000001, 4), ylim = c(0.01, 1.2),log = "x")
points(df$How, df$Where, pch = 20)
x1 = seq(0.000001, 4, by=0.0001)
y1 = coef(fit)[3] + (coef(fit)[2] - coef(fit)[3])/(1+(x1/coef(fit)[4])^((-1)*coef(fit)[1]))
lines(x1,y1)
Now I want to be able to print the following information inside the figure
max(df$How)
min(df$How)
coef(fit)[2]
coef(fit)[3]
(-1)*coef(fit)[1]
coef(fit)[4]
I tried to do it like this
text(labels = bquote(FirstT~"="~.(round(max(df$How)))))
text(labels = bquote(SecondT~"="~.(round(min(df$How))))
text(labels = bquote(A[min]~"="~.(round(coef(fit)[2]))))
text(labels = bquote(A[max]~"="~.(coef(fit)[3]))))
text(labels = paste0("Slope = ", round((-1)*coef(fit)[1])))
which of course does not work. I am more into an automatic way to find a place in right left corner of the figure that print these info
In the code below, we get the plot area coordinate ranges with par("usr") and then use those and the data point locations to automatically place the labels in the desired locations.
# Reduce margins
par(mar=c(5,4,0.5,0.5))
# Get extreme coordinates of plot area
p = par("usr")
p[1:2] = 10^p[1:2] # Because xscale is logged
text(max(df$How), df$Where[which.max(df$How)],
labels = bquote(FirstT~"="~.(round(max(df$How)))), pos=1)
text(min(df$How), df$Where[which.min(df$How)],
labels = bquote(SecondT~"="~.(round(min(df$How)))), pos=1)
text(1.1*p[1], p[3] + 0.02*diff(p[3:4]),
labels = bquote(A[min]~"="~.(round(coef(fit)[2]))), adj=c(0,0))
> dput(head(inputData))
structure(list(Date = c("2018:07:00", "2018:06:00", "2018:05:00",
"2018:04:00", "2018:03:00", "2018:02:00"), IIP = c(125.8, 127.5,
129.7, 122.6, 140.3, 127.4), CPI = c(139.8, 138.5, 137.8, 137.1,
136.5, 136.4), `Term Spread` = c(1.580025, 1.89438, 2.020112,
1.899074, 1.470544, 1.776862), RealMoney = c(142713.9916, 140728.6495,
140032.2762, 139845.5215, 139816.4682, 139625.865), NSE50 = c(10991.15682,
10742.97381, 10664.44773, 10472.93333, 10232.61842, 10533.10526
), CallMoneyRate = c(6.161175, 6.10112, 5.912088, 5.902226, 5.949956,
5.925538), STCreditSpread = c(-0.4977, -0.3619, 0.4923, 0.1592,
0.3819, -0.1363)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
I want to make my autoregressive plot like this plot:
#------> importing all libraries
library(readr)
install.packages("lubridtae")
library("lubridate")
install.packages("forecast")
library('ggplot2')
library('fpp')
library('forecast')
library('tseries')
#--------->reading data
inputData <- read_csv("C:/Users/sanat/Downloads/exercise_1.csv")
#--------->calculating the lag=1 for NSE50
diff_NSE50<-(diff(inputData$NSE50, lag = 1, differences = 1)/lag(inputData$NSE50))
diff_RealM2<-(diff(inputData$RealMoney, lag = 1, differences = 1)/lag(inputData$RealMoney))
plot.ts(diff_NSE50)
#--------->
lm_fit = dynlm(IIP ~ CallMoneyRate + STCreditSpread + diff_NSE50 + diff_RealM2, data = inputData)
summary(lm_fit)
#--------->
inputData_ts = ts(inputData, frequency = 12, start = 2012)
#--------->area of my doubt is here
VAR_data <- window(ts.union(ts(inputData$IIP), ts(inputData$CallMoneyRate)))
VAR_est <- VAR(y = VAR_data, p = 12)
plot(VAR_est)
I want to my plots to get plotted together in same plot. How do I serparate the var() plots to two separate ones.
Current plot:
My dataset :
dataset
Okay, so this still needs some work, but it should set the right framework for you. I would look more into working with the ggplot2 for future.
Few extra packages needed, namely library(vars) and library(dynlm).
Starting from,
VAR_est <- VAR(y = VAR_data, p = 12)
Now we extract the values we want from the VAR_est object.
y <- as.numeric(VAR_est$y[,1])
z <- as.numeric(VAR_est$y[,2])
x <- 1:length(y)
## second data set on a very different scale
par(mar = c(5, 4, 4, 4) + 0.3) # Leave space for z axis
plot(x, y, type = "l") # first plot
par(new = TRUE)
plot(x, z, type = "l", axes = FALSE, bty = "n", xlab = "", ylab = "")
axis(side=4, at = pretty(range(z)))
mtext("z", side=4, line=3)
I will leave you to add the dotted lines on etc...
Hint: Decompose the VAR_est object, for example, VAR_est$datamat, then see which bit of data corresponds to the part of the plot you want.
Used some of this
I'm trying to plot a K-Means cluster to analyze different categories of products based on their inventory average and sold quantity.
All values are non-negative and of the same measurement unit.
I don't know what I did wrong and the results contain point with negative values. Actually, I believe all the points given in the plot aren't actual valid points from my data.
Here is my code:
reduced_dataset = dataset[1:20, 4:5]
# Using the elbow method to find the optimal number of clusters
wcss = vector()
for (i in 1:10) wcss[i] = sum(kmeans(reduced_dataset, i)$withinss)
plot(1:10,
wcss,
type = 'b',
main = paste('The Elbow Method'),
xlab = 'Number of clusters',
ylab = 'WCSS')
# As a result, number of clusters should be 2
# Fitting K-Means to the dataset
kmeans = kmeans(x = reduced_dataset, centers = 2)
y_kmeans = kmeans$cluster
# Visualising the clusters
library(cluster)
clusplot(reduced_dataset,
y_kmeans,
lines = 0,
shade = TRUE,
color = TRUE,
labels = 2,
plotchar = FALSE,
span = TRUE,
main = paste('Clusters of categories - NOT ON SALE'),
xlab = 'Average Sold Quantity',
ylab = 'Average Inventory')
dput(reduced_dataset):
structure(list(Avg_Sold_No_Promo = c(0.255722695, 1.139983236,
0.458651842, 0.784966698, 1.642746914, 0.115264798, 7.50338696,
0.487603306, 1.023373984, 0.956099815, 1.505901506, 0.253837072,
0.834963325, 0.880898876, 6.527699531, 11.54054054, 3.44077135,
0.750182882, 0.251033058, 1.875698324), Avg_Inventory_No_Promo =
c(6.068672335,
22.57865326, 9.00694927, 11.56137012, 28.47530864, 7.485981308,
170.9064352, 11.07438017, 22.80792683, 40.63863216, 41.73463573,
10.87603306, 35.87408313, 46.09213483, 185.5671362, 315.6015693,
165.1129477, 78.18032187, 9.65857438, 198.4385475)), .Names =
c("Avg_Sold_No_Promo",
"Avg_Inventory_No_Promo"), row.names = c(NA, 20L), class = "data.frame")
Can someone please help me?
The clusplot function does this automatically.
It is called PCA, and that is also why you get the line with the variability explained there.
Complete beginner at R here trying to perform nonmetric multidimensional scaling on a 95x95 matrix of similarities where 8 corresponds to very similar and 1 corresponds to very dissimilar. I also have an additional column (96th) signifying type and ranging from 0 to 1.
First I load the data:
dsimilarity <- read.table("d95x95matrix.txt",
header = T,
row.names = c("Y1", "Y2", "Y3", "Y4", "Y5", "Y6", "Y7", "Y8", "Y9", "Y10", "Y11", "Y12", "Y13", "Y14", "Y15", "Y16", "Y17", "Y18", "Y19", "Y20",
"Y21", "Y22", "Y23", "Y24", "Y25", "Y26", "Y27", "Y28", "Y29", "Y30", "Y31", "Y32", "Y33", "Y34", "Y35", "Y36", "Y37", "Y38", "Y39", "Y40",
"Y41", "Y42", "Y43", "Y44", "Y45", "Y46", "Y47", "Y48", "Y49", "Y50", "Y51", "Y52", "Y53", "Y54", "Y55", "Y56", "Y57", "Y58", "Y59", "Y60",
"Y61", "Y62", "Y63", "Y64", "Y65", "Y66", "Y67", "Y68", "Y69", "Y70", "Y71", "Y72", "Y73", "Y74", "Y75", "Y76", "Y77", "Y78", "Y79", "Y80",
"Y81", "Y82", "Y83", "Y84", "Y85", "Y86", "Y87", "Y88", "Y89", "Y90", "Y91", "Y92", "Y93", "Y94", "Y95"))
I convert the matrix of similarities into a matrix of dissimilarities, and exclude the 96th column:
ddissimilarity <- dsimilarity; ddissimilarity[1:95, 1:95] = 8 - ddissimilarity[1:95, 1:95]
Then I perform the nonmetric MDS using the Smacof function:
ordinal.mds.results <- smacofSym(ddissimilarity[1:95, 1:95],
type = c("ordinal"),
ndim = 2,
ties = "primary",
verbose = T )
I create a new data frame (I'm following a guide and don't really know what's going on here):
mds.config <- as.data.frame(ordinal.mds.results$conf)
All well and good thus far (to my knowledge). However at this point I will try to create an xyplot of the data and get a good result using this code:
xyplot(D2 ~ D1, data = mds.config,
aspect = 1,
main = "Figure 1. MDS solution",
panel = function (x, y) {
panel.xyplot(x, y, col = "black")
panel.text(x, y-.03, labels = rownames(mds.config),
cex = .75)
},
xlab = "MDS Axis 1",
ylab = "MDS Axis 2",
xlim = c(-1.1, 1.1),
ylim = c(-1.1, 1.1))
Now I want to create a figure that incorporates the type in column 96th and assigns different colors to observations of the two different types. However, can't quite figure out how to do so. Does anyone have any ideas of where I'm going wrong here?
xyplot(D2 ~ D1, data = mds.config ~ ddissimilarity[96:96, 96:96],
aspect = 1,
main = "Figure 1. MDS solution",
panel = function (x, y) {
panel.xyplot(x, y, col = "black")
panel.text(x, y-.03, labels = rownames(mds.config),
cex = .75)
},
xlab = "MDS Axis 1",
ylab = "MDS Axis 2",
xlim = c(-1.1, 1.1),
ylim = c(-1.1, 1.1),
group = "Type")
I posted a recent post about controlling x-y plots as two Normal curves and have since realised I was making things too complicated. I have since managed to plot it as ellipse's but this slightly over estimates the error; which ideally could be plotted as rhombus.
The code I have to date is:
plot(c(-5,10), c(-5,5), xlab = expression(Age), ylab = expression(value), type="n")
draw.ellipse(Age, value, a=Age_error, b=value_error, col="grey70")
Which plots:
Is there someway to replace the ellipse with a rhombus whose height is controlled by 2x value_error and width by 2x age_error?
My data frame is below
structure(list(Age = c(1L, 2L, 4L), value = c(3, -2, 0.01), Age_error = c(2,
1.4, 3), value_error = c(0.5, 1, 2.1)), .Names = c("Age", "value",
"Age_error", "value_error"), class = "data.frame", row.names = c(NA,
-3L))
Many thanks
You can use the my.symbols and ms.polygon functions in the TeachingDemos package to draw the rhombuses:
library(TeachingDemos)
plot(c(-5,10), c(-5,5), xlab = expression(Age), ylab = expression(value),
type="n")
my.symbols( Age, value, ms.polygon, n=4, xsize=2*Age_error,
ysize=2*value_error, linesfun=polygon, col='grey' )
Leave out linesfun and col if you don't want the rhombuses filled.