how can we do c means clustering in r,showing overlapping clusters - r

This is what i have tried
setwd("C:\\Ds")
reddit <- mtcars
reddit <- na.omit(reddit)
View(reddit)
cars<-reddit[,c(9,23)]
na.omit(cars)
col1<-cars[,1]/1000000
col2<-cars[,2]/1000000
z<-cbind(col1,col2)
cl<-means(z,4)
options(scipen = 10)
format(z,scientific=FALSE)
plot(z, col = cl$cluster,color=TRUE,las=1,xlab = "Gross in millions",ylab = "Budget in millions")
points(cl$centers, col = 1:2, pch = 18, cex = 2.5)
I want to create something like this:

Related

How to adjust column width in dataframe of foretsplotter function

I am trying to create a forestplot, using forestplotter function, am able to get a beautiful graph, but am not able to see the entire graph, the column widths in few of the columns are so big, even if the string size is less, making the width of the entire graph, so big to see, can someone help me with this and also is it possible to align the datahrame contents uniformly centre aligned......Please help me with this
The code and relevant data are
###Required packages###
library(grid)
library(forestploter)
library(rmeta)
library(gridExtra)
#Data entered#
df <- data.frame(Study=c("A","B","C","D","Summary"),
nA = c(24,187,36,26,273),
median_A = c(4.9,5.69,8.866995074,8.5,NA),
Q1A =c(3,2.86,4.495073892,2,NA),
Q3A =c(8.5,9.78,14.96305419,32,NA),
nP = c(23,193,36,26,278),
median_P = c(7.2,6.79,8.990147783,12.5,NA),
Q1P =c(3.4,3.59,4.002463054,2,NA),
Q3P =c(10.9,10.12,12.06896552,43,NA),
W = c("10.6%","80.8%","8.0%","0.70%",NA),
E=c(-2.3,-1.1,-0.123152709,-4,-1.16881587),
UL=c(1.161473203,0.156288294,3.881699516,10.02689306,-0.039791047),
LL=c(-5.761473203,-2.356288294,-4.128004935,-18.02689306,-2.297840692))
#Calculate SE for box size#
df$SE <- (df$UL-df$E)/1.96
#Column for Confidence intervals for Drug A and Placebo, with 2 significant digit#
df$IQRA <- sprintf("%.2f (%.2f to %.2f)",df$median_A,df$Q1A, df$Q3A)
df$IQRP <- sprintf("%.2f (%.2f to %.2f)",df$median_P,df$Q1P, df$Q3P)
#Column for Confidence intervals for NET EFFECT, with 2 significant digit#
df$MD <- sprintf("%.2f (%.2f to %.2f)", df$E, df$LL, df$UL)
#Create a column with space for forest plot#
df$" "<- paste(rep(" ", 16), collapse = " ")
##Forest plot theme##
#To be modified as needed#
ftn <-forest_theme(
base_size = 16,
base_family = "serif",
ci_pch = 15,
ci_col = "black",
ci_lty = 1,
ci_lwd = 1,
ci_Theight = 0.25,
legend_name = " ",
legend_position = "right",legend_value = "",
xaxis_lwd = 1,
xaxis_cex = 0.7,
refline_lwd = 1,
refline_lty = "dashed",
refline_col = "red",
summary_fill = "blue",
summary_col = "blue",
footnote_cex = 0.4,
footnote_fontface = "plain",
footnote_col = "black",
title_just = c("center"),
title_cex = 1.1,
title_fontface = "bold",
title_col = "black",
show.rownames = FALSE)
##Table in Order for Forest plot##
#First get Column names#
colnames(df)
df2 <-df[,c(1,2,15,6,16,18,17)]
#Make NA cells empty
df2[5,3] <-c(" ")
df2[5,5] <-c(" ")
##Forestplot##
plot<-forest(df2,
est = df$E,
lower = df$LL,
upper = df$UL,
sizes = (df$SE/10),
ci_column = 6,
ref_line = 0,
arrow_lab = c("Drug A Better", "Placebo Better"),
xlim = c(-7, 6),
is_summary = c(FALSE,FALSE,FALSE,FALSE,TRUE),
xlog = FALSE,
ticks_digits = 0,ticks_at = c(-6,0,6),
theme = ftn)
##Show plot
print(plot, autofit = FALSE)

For-loop to dynamically change plot title

I have 5 variables which want to plot and export in one pdf. However, I have some trouble wiht the for-loop I am running,
parC <-list(unit = 100,labelx = "Time",labely = "Time",cols = "black",
pcex = .01, pch = 1,las = 1,
labax = seq(0,nrow(RP),100),
labay = seq(0,nrow(RP),100))
pdf("filename.pdf", onefile=TRUE)
for (i in RP_values){ # the values that are plotted
for (j in name) { # name is a list of names, so that the title changes dynamically
plotting(i, parC, j)
}
}
dev.off()
RP_values = list of values that is plotted
name = list of names to dynamically change the plotting title
plotting = an adjusted version from the plotRP() function of the crqa package. Here I added a main title to the plot.
The code for the plotting() function:
plotting <- function(RP, par, x){
if (exists("par") == FALSE){ # we use some defaults
## default values
unit = 2; labelx = "Time"; labely = "Time"
cols = "black"; pcex = .3; pch = 1; las = 0;
labax = seq(0, nrow(RP), unit); labay = seq(0, nrow(RP), unit);
} else { # we load the values that we desire
for (v in 1:length(par)) assign(names(par)[v], par[[v]])
}
xdim = nrow(RP)
ydim = ncol(RP)
RP = matrix(as.numeric(RP), nrow = xdim, ncol = ydim) # transform it for plotting
ind = which(RP == 1, arr.ind = T)
tstamp = seq(0, xdim, unit)
par(mar = c(5,5, 1, 3), font.axis = 2, cex.axis = 1,
font.lab = 2, cex.lab = 1.2)
plot(tstamp, tstamp, type = "n", xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = x)
matpoints(ind[,1], ind[,2], cex = pcex, col = cols, pch = pch)
mtext(labelx, at = mean(tstamp), side = 1, line = 2.2, cex = 1.2, font = 2)
mtext(labely, at = mean(tstamp), side = 2, line = 2.2, cex = 1.2, font = 2)
# if (is.numeric(labax)){ ## it means there is some default
# mtext(labax, at = seq(1, nrow(RP), nrow(RP)/10), side = 1, line = .5, cex = 1, font = 2)
# mtext(labay, at = seq(1, nrow(RP), nrow(RP)/10), side = 2, line = .5, cex = 1, font = 2)
# } else{
mtext(labax, at = tstamp, side = 1, line = .5, cex = .8, font = 2, las = las)
mtext(labay, at = tstamp, side = 2, line = .5, cex = .8, font = 2, las = las)
# }
}
My problem is instead of 5 plots I get 25, where each plot appears 5 times, but with a different title. If I do not include the "j" part everything works fine, but of course do not have any main title for each plot.
I appreciate any help.
Best,
Johnson
From your description and comments, it appears you need an elementwise loop and not a nested loop. Consider retrieving all pairwise combinations of names and RP_values with expand.grid and iterate through them with mapply. Also, since parC depends on nrows of corresponding RP, have parC defined inside function for only two parameters (with more informative names like title instead of x):
plotting <- function(RP, title) {
parC <- list(unit=100, labelx="Time", labely="Time",
cols="black", pcex=.01, pch=1, las=1,
labax=seq(0, nrow(RP), 100),
labay=seq(0, nrow(RP), 100))
...
plot(tstamp, tstamp, type="n", xlab="", ylab="",
xaxt="n", yaxt="n", main=title)
...
}
params <- expand.grid(RP_values=RP_values, name=name)
out <- mapply(plotting, RP=params$RP_values, title=params$name)

How to fit logarithmic curve over the points in r?

I want to fit my points with logarithmic curve. Here is my data which contains x and y. I desire to plot x and y and the add a logarithmic fitting curve.
x<-structure(list(X2.y = c(39.99724745, 29.55541525, 23.39578201,
15.46797044, 10.52063652, 7.296161198, 6.232038434, 4.811851132,
4.641281547, 4.198523289, 3.325515839, 2.596563723, 1.894902523,
1.556380314), X5.y = c(62.76037622, 48.54726084, 37.71302646,
24.93942365, 17.71060023, 13.31130267, 10.36341862, 7.706914722,
7.170517624, 6.294292013, 4.917428837, 3.767836298, 2.891519878,
2.280974128), X10.y = c(77.83154815, 61.12151516, 47.19228808,
31.21034981, 22.47098182, 17.29384973, 13.09875178, 9.623698726,
8.845091983, 7.681873268, 5.971413758, 4.543320659, 3.551367285,
2.760718282), X25.y = c(96.87401383, 77.00911883, 59.16936025,
39.13368164, 28.48573658, 22.32580849, 16.55485248, 12.0455604,
10.96092113, 9.435085861, 7.303126501, 5.523147205, 4.385086234,
3.366876291), X50.y = c(111.0008027, 88.79545082, 68.05463659,
45.01166182, 32.94782526, 26.05880295, 19.11878542, 13.84223574,
12.53056405, 10.73571912, 8.291067088, 6.25003851, 5.003586577,
3.81655893), X100.y = c(125.0232816, 100.4947544, 76.87430545,
50.84623991, 37.37696657, 29.76423356, 21.66378667, 15.6256447,
14.08861698, 12.0267487, 9.271712877, 6.971562563, 5.61752001,
4.262921183)), class = "data.frame", row.names = c(NA, -14L))
I tried this:
single_idf<-function(x) {
idf<-x
durations = c(5/60, 10/60, 15/60, 30/60, 1, 2, 3, 4, 5, 6, 8, 12, 18, 24)
nd = length(durations)
Tp = c(2, 5, 10, 25, 50, 100)
nTp = length(Tp)
psym = seq(1, nTp)
# open new window for this graph, set plotting parameters for a single graph panel
windows()
par(mfrow = c(1,1), mar = c(5, 5, 5, 5), cex = 1)
# set up custom axis labels and grid line locations
ytick = c(1,2,3,4,5,6,7,8,9,10,20,30,40,50,60,70,80,90,100,
200,300,400,500,600,700,800,900,1000,1100,1200,1300,1400)
yticklab = as.character(ytick)
xgrid = c(5,6,7,8,9,10,15,20,30,40,50,60,120,180,240,300,360,
420,480,540,600,660,720,840,960,1080,1200,1320,1440)
xtick = c(5,10,15,20,30,60,120,180,240,300,360,480,720,1080,1440)
xticklab = c("5","10","15","20","30","60","2","3","4","5","6","8","12","18","24")
ymax1 = max(idf)
durations = durations*60
plot(durations, col=c("#FF00FF") ,lwd=c(1), idf[, 1],
xaxt="n",yaxt="n",
pch = psym[1], log = "xy",
xlim = c(4, 24*60), ylim = range(c(1,idf+150)),
xlab = "(min) Duration (hr)",
ylab = "Intensity (mm/hr)"
)
for (iT in 2:nTp) {
points(durations, idf[, iT], pch = psym[iT], col="#FF00FF",lwd=1)
}
for (iT in 1:nTp) {
mod.lm = lm(log10(idf[, iT]) ~ log10(durations))
b0 = mod.lm$coef[1]
b1 = mod.lm$coef[2]
yfit = log(10^(b0 + b1*log10(durations)))
lines(durations,col=c("#FF00FF"),yfit, lty = psym[iT],lwd=1)
}
}
But when I run this, the curves stands far away from the points. I want to see curves over the points. How can I arrange this?
single_idf(x)
Consider this as an option for you using ggplot2 and dplyr. Also added method='lm' to match OP expected output (Many thanks and credits to #AllanCameron for his magnificent advice):
library(ggplot2)
library(dplyr)
#Data
df <- data.frame(x,y)
#Plot
df %>%
pivot_longer(-y) %>%
ggplot(aes(x=log(y),y=log(value),color=name,group=name))+
geom_point()+
stat_smooth(geom = 'line',method = 'lm')
Output:
The main problem is that you were plotting the natural log of the fit rather than the fit itself.
If you change the line
yfit = log(10^(b0 + b1*log10(durations)))
To
yfit = 10^(b0 + b1*log10(durations))
And rerun your code, you get

R: How to plot multiple ARIMA forecasts on the same time-series

I would like to plot several forecasts on the same plot in different colours, however, the scale is off.
I'm open to any other methods.
reproducible example:
require(forecast)
# MAKING DATA
data <- c(3.86000, 19.55810, 19.51091, 20.74048, 20.71333, 29.04191, 30.28864, 25.64300, 23.33368, 23.70870 , 26.16600 ,27.61286 , 27.88409 , 28.41400 , 24.81957 , 24.60952, 27.49857, 32.08000 , 29.98000, 27.49000 , 237.26150, 266.35478, 338.30000, 377.69476, 528.65905, 780.00000 )
a.ts <- ts(data,start=c(2005,1),frequency=12)
# FORECASTS
arima011_css =stats::arima(x = a.ts, order = c(0, 1, 1), method = "CSS") # css estimate
arima011_forecast = forecast(arima011_css, h=10, level=c(99.5))
arima321_css =stats::arima(x = a.ts, order = c(3, 2, 1), method = "CSS") # css estimate
arima321_forecast = forecast(arima321_css, h=10, level=c(99.5))
# MY ATTEMPT AT PLOTS
plot(arima321_forecast)
par(new=T)
plot(arima011_forecast)
Here is something similar to #jay.sf but using ggplot2.
library(ggplot2)
autoplot(a.ts) +
autolayer(arima011_forecast, series = "ARIMA(0,1,1)", alpha = 0.5) +
autolayer(arima321_forecast, series = "ARIMA(3,2,1)", alpha = 0.5) +
guides(colour = guide_legend("Model"))
Created on 2020-05-19 by the reprex package (v0.3.0)
You could do a manual plot using a sequence of dates.
rn <- format(seq.Date(as.Date("2005-01-01"), by="months", length.out=12*3), "%Y.%m")
Your ARIMAs you'll need as.matrix form.
arima321_mat <- as.matrix(as.data.frame(arima321_forecast))
arima011_mat <- as.matrix(as.data.frame(arima011_forecast))
Some colors with different alpha=.
col.1 <- rainbow(2, ,.7)
col.2 <- rainbow(2, ,.7, alpha=.2)
For the CIs use polygon.
plot(data, type="l", xlim=c(1, length(rn)), ylim=c(0, 3500), xaxt="n", main="Forecasts")
axis(1, axTicks(1), labels=F)
mtext(rn[(seq(rn)-1) %% 5 == 0], 1, 1, at=axTicks(1))
lines((length(data)+1):length(rn), arima321_mat[,1], col=col.1[1], lwd=2)
polygon(c(27:36, 36:27), c(arima321_mat[,2], rev(arima321_mat[,3])), col=col.2[1],
border=NA)
lines((length(data)+1):length(rn), arima011_mat[,1], col=col.1[2], lwd=3)
polygon(c(27:36, 36:27), c(arima011_mat[,2], rev(arima011_mat[,3])), col=col.2[2],
border=NA)
legend("topleft", legend=c("ARIMA(3,2,1)", "ARIMA(0,1,1)"), col=col.1, lwd=2, cex=.9)
Edit: To avoid the repetition of lines and polygon calls, you may unite them using Map.
mats <- list(arima321_mat, arima011_mat) ## put matrices into list
plot(.)
axis(.)
mtext(.)
Map(function(i) {
lines((length(data)+1):length(rn), mats[[i]][,1], col=col.1[i], lwd=2)
polygon(c(27:36, 36:27), c(mats[[i]][,2], rev(mats[[i]][,3])), col=col.2[i], border=NA)
}, 1:2)
legend(.)
require(forecast)
data <- c(3.86000, 19.55810, 19.51091, 20.74048, 20.71333, 29.04191, 30.28864, 25.64300, 23.33368, 23.70870 , 26.16600 ,27.61286 , 27.88409 , 28.41400 , 24.81957 , 24.60952, 27.49857, 32.08000 , 29.98000, 27.49000 , 237.26150, 266.35478, 338.30000, 377.69476, 528.65905, 780.00000 )
a.ts <- ts(data,start=c(2005,1),frequency=12)
arima011_css =stats::arima(x = a.ts, order = c(0, 1, 1), method = "CSS") # css estimate
arima011_forecast = predict(arima011_css, n.ahead = 2)$pred
arima321_css =stats::arima(x = a.ts, order = c(3, 2, 1), method = "CSS") # css estimate
arima321_forecast = predict(arima321_css, n.ahead = 2)$pred
plot(a.ts, type = "o", xlim = c(2005, 2007.5) , ylim = c(-1, 1200) , ylab = "price" ,main = "2 month Forecast")
range = c(2007+(3/12), 2007+(4/12)) # adding the dates for the prediction
lines(y = arima011_forecast , x = range , type = "o", col = "red")
lines(y = arima321_forecast, x = range , type = "o", col = "blue")

labeling points on an archetype archmap

How might one add labels to an archmap from the archetypes package? Or alternatively, would it be possible to recreate the archmap output in ggplot?
Using code from the SportsAnalytics demo (I hope this isn't bad form)
library("SportsAnalytics")
library("archetypes")
data("NBAPlayerStatistics0910")
dat <- subset(NBAPlayerStatistics0910,
select = c(Team, Name, Position,
TotalMinutesPlayed, FieldGoalsMade))
mat <- as.matrix(subset(dat, select = c(TotalMinutesPlayed, FieldGoalsMade)))
a3 <- archetypes(mat, 3)
archmap(a3)
I'd like the player names ( NBAPlayerStatistics0910$Name ) over the points on the chart. Something like below but more readable.
If you don't mind tweaking things a bit, you can start with the archmap() function base, toss in an extra parameter and add a text() call:
amap2 <- function (object, a.names, projection = simplex_projection, projection_args = list(),
rotate = 0, cex = 1.5, col = 1, pch = 1, xlab = "", ylab = "",
axes = FALSE, asp = TRUE, ...)
{
stopifnot("archetypes" %in% class(object))
stopifnot(is.function(projection))
k <- object$k
if (k < 3) {
stop("Need at least 3 archetypes.\n")
}
cmds <- do.call(projection, c(list(parameters(object)), projection_args))
if (rotate != 0) {
a <- pi * rotate/180
A <- matrix(c(cos(a), -sin(a), sin(a), cos(a)), ncol = 2)
cmds <- cmds %*% A
}
hmds <- chull(cmds)
active <- 1:k %in% hmds
plot(cmds, type = "n", xlab = xlab, ylab = ylab, axes = axes,
asp = asp, ...)
points(coef(object) %*% cmds, col = col, pch = pch)
######################
# PLAY WITH THIS BIT #
######################
text(coef(object) %*% cmds, a.names, pos=4)
######################
rad <- ceiling(log10(k)) + 1.5
polygon(cmds[hmds, ])
points(cmds[active, ], pch = 21, cex = rad * cex, bg = "grey")
text(cmds[active, ], labels = (1:k)[active], cex = cex)
if (any(!active)) {
points(cmds[!active, , drop = FALSE], pch = 21, cex = rad *
cex, bg = "white", fg = "grey")
text(cmds[!active, , drop = FALSE], labels = (1:k)[!active],
cex = cex, col = "grey20")
}
invisible(cmds)
}
amap2(a3, dat$Name)
Obviously, my completely quick stab is not the end result you're looking for, but it should help you get on your way (if I read what you want to do correctly).

Resources