Complete beginner at R here trying to perform nonmetric multidimensional scaling on a 95x95 matrix of similarities where 8 corresponds to very similar and 1 corresponds to very dissimilar. I also have an additional column (96th) signifying type and ranging from 0 to 1.
First I load the data:
dsimilarity <- read.table("d95x95matrix.txt",
header = T,
row.names = c("Y1", "Y2", "Y3", "Y4", "Y5", "Y6", "Y7", "Y8", "Y9", "Y10", "Y11", "Y12", "Y13", "Y14", "Y15", "Y16", "Y17", "Y18", "Y19", "Y20",
"Y21", "Y22", "Y23", "Y24", "Y25", "Y26", "Y27", "Y28", "Y29", "Y30", "Y31", "Y32", "Y33", "Y34", "Y35", "Y36", "Y37", "Y38", "Y39", "Y40",
"Y41", "Y42", "Y43", "Y44", "Y45", "Y46", "Y47", "Y48", "Y49", "Y50", "Y51", "Y52", "Y53", "Y54", "Y55", "Y56", "Y57", "Y58", "Y59", "Y60",
"Y61", "Y62", "Y63", "Y64", "Y65", "Y66", "Y67", "Y68", "Y69", "Y70", "Y71", "Y72", "Y73", "Y74", "Y75", "Y76", "Y77", "Y78", "Y79", "Y80",
"Y81", "Y82", "Y83", "Y84", "Y85", "Y86", "Y87", "Y88", "Y89", "Y90", "Y91", "Y92", "Y93", "Y94", "Y95"))
I convert the matrix of similarities into a matrix of dissimilarities, and exclude the 96th column:
ddissimilarity <- dsimilarity; ddissimilarity[1:95, 1:95] = 8 - ddissimilarity[1:95, 1:95]
Then I perform the nonmetric MDS using the Smacof function:
ordinal.mds.results <- smacofSym(ddissimilarity[1:95, 1:95],
type = c("ordinal"),
ndim = 2,
ties = "primary",
verbose = T )
I create a new data frame (I'm following a guide and don't really know what's going on here):
mds.config <- as.data.frame(ordinal.mds.results$conf)
All well and good thus far (to my knowledge). However at this point I will try to create an xyplot of the data and get a good result using this code:
xyplot(D2 ~ D1, data = mds.config,
aspect = 1,
main = "Figure 1. MDS solution",
panel = function (x, y) {
panel.xyplot(x, y, col = "black")
panel.text(x, y-.03, labels = rownames(mds.config),
cex = .75)
},
xlab = "MDS Axis 1",
ylab = "MDS Axis 2",
xlim = c(-1.1, 1.1),
ylim = c(-1.1, 1.1))
Now I want to create a figure that incorporates the type in column 96th and assigns different colors to observations of the two different types. However, can't quite figure out how to do so. Does anyone have any ideas of where I'm going wrong here?
xyplot(D2 ~ D1, data = mds.config ~ ddissimilarity[96:96, 96:96],
aspect = 1,
main = "Figure 1. MDS solution",
panel = function (x, y) {
panel.xyplot(x, y, col = "black")
panel.text(x, y-.03, labels = rownames(mds.config),
cex = .75)
},
xlab = "MDS Axis 1",
ylab = "MDS Axis 2",
xlim = c(-1.1, 1.1),
ylim = c(-1.1, 1.1),
group = "Type")
Related
To expand upon visualize a list of colors/palette in R I am trying to display a series of custom colour palettes in R in a single figure. Is there a way that I can expand on one of the methods listed in the link to display the list of palettes below:
convert_coolers <- function(coolers_string){
strsplit(coolers_string, split = ", ")[[1]]
}
# diverging
storm_panels <- convert_coolers("#001219, #005f73, #0a9396, #94d2bd, #e9d8a6, #ee9b00, #ca6702, #bb3e03, #ae2012, #9b2226")
harry_tipper <- convert_coolers("#f72585, #b5179e, #7209b7, #560bad, #480ca8, #3a0ca3, #3f37c9, #4361ee, #4895ef, #4cc9f0")
firepit <- convert_coolers("#03071e, #370617, #6a040f, #9d0208, #d00000, #dc2f02, #e85d04, #f48c06, #faa307, #ffba08")
# sequences
the_deep <- convert_coolers("#03045e, #023e8a, #0077b6, #0096c7, #00b4d8, #48cae4, #90e0ef, #ade8f4, #caf0f8")
earth <- convert_coolers("#ede0d4, #e6ccb2, #ddb892, #b08968, #7f5539, #9c6644")
# categorical
pastal_rainbow <- convert_coolers("#ff595e, #ffca3a, #8ac926, #1982c4, #6a4c93")
fisherman <- convert_coolers("#353535, #3c6e71, #ffffff, #d9d9d9, #284b63")
in a figure resembling that displayed by RColorBrewer::display.brewer.all()? i.e. with palettes stacked as horizontal bars labelled to the left with the palette title.
I have been trying to dissect the method out from the RColorBrewer function but am finding that it depends too much on internal variables for me to understand what is going on.
I achieved what I set out to do by modifying RColorBrewer::display.brewer.all
Following directly on from the code in the question:
display_custom_palettes <- function(palette_list, palette_names){
nr <- length(palette_list)
nc <- max(lengths(palette_list))
ylim <- c(0, nr)
oldpar <- par(mgp = c(2, 0.25, 0))
on.exit(par(oldpar))
plot(1, 1, xlim = c(0, nc), ylim = ylim, type = "n", axes = FALSE,
bty = "n", xlab = "", ylab = "")
for (i in 1:nr) {
nj <- length(palette_list[[i]])
shadi <- palette_list[[i]]
rect(xleft = 0:(nj - 1), ybottom = i - 1, xright = 1:nj,
ytop = i - 0.2, col = shadi, border = "light grey")
}
text(rep(-0.1, nr), (1:nr) - 0.6, labels = palette_names, xpd = TRUE,
adj = 1)
}
plot.new()
palette_list <- list(storm_panels, harry_tipper, firepit, the_deep, earth, pastal_rainbow, fisherman)
palette_names <- c("storm panels", "harry tipper", "firepit", "the deep", "earth", "rainbow", "fisherman")
display_custom_palettes(palette_list, palette_names)
I am trying to create an heatmap with a row annotation inclusive of p-values as reported in the example in the guide for the use of the ComplexHeatmap package (https://jokergoo.github.io/ComplexHeatmap-reference/book/heatmap-annotations.html#simple-annotation).
I tried to reproduce the example:
library(ComplexHeatmap)
library(circlize) # colorRamp2 function
set.seed(123)
pvalue = 10^-runif(10, min = 0, max = 3)
is_sig = pvalue < 0.01
pch = rep("*", 10)
pch[!is_sig] = NA
# color mapping for -log10(pvalue)
pvalue_col_fun = colorRamp2(c(0, 2, 3), c("green", "white", "red"))
ha = HeatmapAnnotation(
pvalue = anno_simple(-log10(pvalue), col = pvalue_col_fun, pch = pch),
annotation_name_side = "left")
ht = Heatmap(matrix(rnorm(100), 10), name = "mat", top_annotation = ha)
# now we generate two legends, one for the p-value
# see how we define the legend for pvalue
lgd_pvalue = Legend(title = "p-value", col = pvalue_col_fun, at = c(0, 1, 2, 3),
labels = c("1", "0.1", "0.01", "0.001"))
# and one for the significant p-values
lgd_sig = Legend(pch = "*", type = "points", labels = "< 0.01")
# these two self-defined legends are added to the plot by `annotation_legend_list`
draw(ht, annotation_legend_list = list(lgd_pvalue, lgd_sig))
but when I am creating the annotation ha I get the error
Error in anno_simple(-log10(pvalue), col = pvalue_col_fun, pch = pch) :
could not find function "anno_simple"
likely showing a possible problem with the package.
The version of the ComplexHeatmap package I am running is 1.20.0.
The R version is 3.5.1.
Could you please help me solving this problem?
Thanks
> dput(head(inputData))
structure(list(Date = c("2018:07:00", "2018:06:00", "2018:05:00",
"2018:04:00", "2018:03:00", "2018:02:00"), IIP = c(125.8, 127.5,
129.7, 122.6, 140.3, 127.4), CPI = c(139.8, 138.5, 137.8, 137.1,
136.5, 136.4), `Term Spread` = c(1.580025, 1.89438, 2.020112,
1.899074, 1.470544, 1.776862), RealMoney = c(142713.9916, 140728.6495,
140032.2762, 139845.5215, 139816.4682, 139625.865), NSE50 = c(10991.15682,
10742.97381, 10664.44773, 10472.93333, 10232.61842, 10533.10526
), CallMoneyRate = c(6.161175, 6.10112, 5.912088, 5.902226, 5.949956,
5.925538), STCreditSpread = c(-0.4977, -0.3619, 0.4923, 0.1592,
0.3819, -0.1363)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
I want to make my autoregressive plot like this plot:
#------> importing all libraries
library(readr)
install.packages("lubridtae")
library("lubridate")
install.packages("forecast")
library('ggplot2')
library('fpp')
library('forecast')
library('tseries')
#--------->reading data
inputData <- read_csv("C:/Users/sanat/Downloads/exercise_1.csv")
#--------->calculating the lag=1 for NSE50
diff_NSE50<-(diff(inputData$NSE50, lag = 1, differences = 1)/lag(inputData$NSE50))
diff_RealM2<-(diff(inputData$RealMoney, lag = 1, differences = 1)/lag(inputData$RealMoney))
plot.ts(diff_NSE50)
#--------->
lm_fit = dynlm(IIP ~ CallMoneyRate + STCreditSpread + diff_NSE50 + diff_RealM2, data = inputData)
summary(lm_fit)
#--------->
inputData_ts = ts(inputData, frequency = 12, start = 2012)
#--------->area of my doubt is here
VAR_data <- window(ts.union(ts(inputData$IIP), ts(inputData$CallMoneyRate)))
VAR_est <- VAR(y = VAR_data, p = 12)
plot(VAR_est)
I want to my plots to get plotted together in same plot. How do I serparate the var() plots to two separate ones.
Current plot:
My dataset :
dataset
Okay, so this still needs some work, but it should set the right framework for you. I would look more into working with the ggplot2 for future.
Few extra packages needed, namely library(vars) and library(dynlm).
Starting from,
VAR_est <- VAR(y = VAR_data, p = 12)
Now we extract the values we want from the VAR_est object.
y <- as.numeric(VAR_est$y[,1])
z <- as.numeric(VAR_est$y[,2])
x <- 1:length(y)
## second data set on a very different scale
par(mar = c(5, 4, 4, 4) + 0.3) # Leave space for z axis
plot(x, y, type = "l") # first plot
par(new = TRUE)
plot(x, z, type = "l", axes = FALSE, bty = "n", xlab = "", ylab = "")
axis(side=4, at = pretty(range(z)))
mtext("z", side=4, line=3)
I will leave you to add the dotted lines on etc...
Hint: Decompose the VAR_est object, for example, VAR_est$datamat, then see which bit of data corresponds to the part of the plot you want.
Used some of this
I am plotting a time series with the timePlot function of the open air package of R. The graph has grey grid lines in the background that I would like to turn off but I do not find a way to do it. I would expect something simple such as grid = FALSE, but that is not the case. It appears to be rather complex, requiring the use of extra arguments which are passed to xyplot of the library lattice. I believe the answer lies some where in the par.settings function but all attempts have failed. Does anyone have any suggestions to this issue?
Here is by script:
timeozone <- import(i, date="date", date.format = "%m/%d/%Y", header=TRUE, na.strings="")
ROMO = timePlot(timeozone, pollutant = c("C7", "C9", "C10"), group = TRUE, stack = FALSE,y.relation = "same", date.breaks = 9, lty = c(1,2,3), lwd = c(2, 3, 3), fontsize = 15, cols = c("black", "black"), ylab = "Ozone (ppbv)")
panel = function(x, y) {
panel.grid(h = 0, v = 0)
panel.xyplot(x,y)
}
This question already has answers here:
Shading a kernel density plot between two points.
(5 answers)
Closed 7 years ago.
I've written code to plot density data for variations of an A/B test. I'd like to improve the visual by shading (with the fill being slightly transparent) the area below each curve. I'm currently using matplot, but understand ggplot might be a better option.
Any ideas? Thanks.
# Setup data frame - these are results from an A/B experiment
conv_data = data.frame(
VarNames = c("Variation 1", "Variation 2", "Variation 3") # Set variation names
,NumSuccess = c(1,90,899) # Set number of successes / conversions
,NumTrials = c(10,100,1070) # Set number of trials
)
conv_data$NumFailures = conv_data$NumTrials - conv_data$NumSuccess # Set number of failures [no conversions]
num_var = NROW(conv_data) # Set total number of variations
plot_col = rainbow(num_var) # Set plot colors
get_density_data <- function(n_var, s, f) {
x = seq(0,1,length.out=100) # 0.01,0.02,0.03...1
dens_data = matrix(data = NA, nrow=length(x), ncol=(n_var+1))
dens_data[,1] = x
# set density data
for(j in 1:n_var) {
# +1 to s[], f[] to ensure uniform prior
dens_data[,j+1] = dbeta(x, s[j]+1, f[j]+1)
}
return(dens_data)
}
density_data = get_density_data(num_var, conv_data$NumSuccess, conv_data$NumFailures)
matplot(density_data[,1]*100, density_data[,-1], type = "l", lty = 1, col = plot_col, ylab = "Probability Density", xlab = "Conversion Rate %", yaxt = "n")
legend("topleft", col=plot_col, legend = conv_data$VarNames, lwd = 1)
This produces the following plot:
# Setup data frame - these are results from an A/B experiment
conv_data = data.frame(
VarNames = c("Variation 1", "Variation 2", "Variation 3") # Set variation names
,NumSuccess = c(1,90,899) # Set number of successes / conversions
,NumTrials = c(10,100,1070) # Set number of trials
)
conv_data$NumFailures = conv_data$NumTrials - conv_data$NumSuccess # Set number of failures [no conversions]
num_var = NROW(conv_data) # Set total number of variations
plot_col = rainbow(num_var) # Set plot colors
get_density_data <- function(n_var, s, f) {
x = seq(0,1,length.out=100) # 0.01,0.02,0.03...1
dens_data = matrix(data = NA, nrow=length(x), ncol=(n_var+1))
dens_data[,1] = x
# set density data
for(j in 1:n_var) {
# +1 to s[], f[] to ensure uniform prior
dens_data[,j+1] = dbeta(x, s[j]+1, f[j]+1)
}
return(dens_data)
}
density_data = get_density_data(num_var, conv_data$NumSuccess, conv_data$NumFailures)
matplot(density_data[,1]*100, density_data[,-1], type = "l",
lty = 1, col = plot_col, ylab = "Probability Density",
xlab = "Conversion Rate %", yaxt = "n")
legend("topleft", col=plot_col, legend = conv_data$VarNames, lwd = 1)
## and add this part
for (ii in seq_along(plot_col))
polygon(c(density_data[, 1] * 100, rev(density_data[, 1] * 100)),
c(density_data[, ii + 1], rep(0, nrow(density_data))),
col = adjustcolor(plot_col[ii], alpha.f = .25))
Was able to answer own question with:
df = as.data.frame(t(conversion_data))
dfs = stack(df)
ggplot(dfs, aes(x=values)) + geom_density(aes(group=ind, colour=ind, fill=ind), alpha=0.3)