Obtain Specific Column From Correlation Heatmap - r
I have a dataset called allDataNoNAs which has 19 columns for different variables.
First, using the packages:
library(corrplot)
library(corrgram)
library(GGally)
From dput(cor(allDataNoNAs) - my sample correlation
structure(c(1, 0.116349634765185, 0.547691763989625, 0.291991636906379,
0.52347996305183, 0.497643100595069, 0.0129815335193983, 0.418358158731718,
0.471373794854162, 0.505419557447448, 0.276128001065287, 0.114921357444725,
0.483335903285957, 0.0322484793148408, 0.360658177617753, 0.163989166178892,
0.145358618474009, 0.549222657694447, 0.0283182668409127, 0.116349634765185,
1, 0.542678597132992, 0.228195095236888, 0.341733815370385, 0.449234592784623,
0.040928188236085, 0.306532564182676, 0.246214540314882, 0.368735099181333,
0.0974107116463065, 0.118633970020044, 0.0663374870504325, 0.00324065971750887,
0.429993810524071, 0.0660128392326907, -0.208834964557656, 0.517351517191311,
0.00340750071414792, 0.547691763989625, 0.542678597132992, 1,
0.503509567685111, 0.834074832294578, 0.87458120333133, 0.11646402536793,
0.709723789822138, 0.545685105436571, 0.691116703644981, 0.251055925294139,
0.137145560677364, 0.677547477041307, 0.0138408591129587, 0.574449939471671,
0.289088705565296, -0.0151310469001056, 0.995636799856898, 0.00806307965229721,
0.291991636906379, 0.228195095236888, 0.503509567685111, 1, 0.5928306942291,
0.419860437848609, 0.202947501799892, 0.600369342626932, 0.3036531414462,
0.31218278418869, 0.0665676462597262, 0.0706549436236251, 0.463190217918095,
0.017439704947323, 0.20361820902537, 0.563054610829996, 0.367022482937022,
0.539278002253207, 0.0146950545295136, 0.52347996305183, 0.341733815370385,
0.834074832294578, 0.5928306942291, 1, 0.877884027429435, 0.249913906532112,
0.770346073267575, 0.581478562237408, 0.62684315599784, 0.158950811299692,
0.0709795609883571, 0.707727230043996, 0.0374999988906861, 0.36979003972634,
0.532230871495189, 0.237891979696682, 0.868052149324532, 0.0301272383779361,
0.497643100595069, 0.449234592784623, 0.87458120333133, 0.419860437848609,
0.877884027429435, 1, 0.0578337272432955, 0.625271696806798,
0.642882384190134, 0.742158234646655, 0.18412573265697, 0.0846354163480033,
0.636899685921357, 0.00136017420567482, 0.442530075276962, 0.166101818463978,
-0.122330359121607, 0.870582759035652, -0.00536057317986459,
0.0129815335193983, 0.040928188236085, 0.11646402536793, 0.202947501799892,
0.249913906532112, 0.0578337272432955, 1, 0.168170227241747,
0.0103942343836554, 0.0146416101891029, 0.0274638568337838, 0.0232209281980358,
0.438976017479895, 0.00664290788845518, 0.0558346558356874, 0.576321333713829,
0.205483416691572, 0.160939456560856, 0.00633413505889225, 0.418358158731718,
0.306532564182676, 0.709723789822138, 0.600369342626932, 0.770346073267575,
0.625271696806798, 0.168170227241747, 1, 0.421695218774506, 0.481156860252289,
0.109952341757847, 0.0400601095104961, 0.560225169205313, 0.0470119529030615,
0.311744196849895, 0.445382213345548, 0.237447342653341, 0.743416109744227,
0.0437634515476897, 0.471373794854162, 0.246214540314882, 0.545685105436571,
0.3036531414462, 0.581478562237408, 0.642882384190134, 0.0103942343836554,
0.421695218774506, 1, 0.809375500184827, 0.201944501698817, 0.098871956246993,
0.46496436444905, -0.00410066612855966, 0.34093890132072, 0.0955588133868073,
-0.0561387410393148, 0.542950578488189, -0.00611403179202383,
0.505419557447448, 0.368735099181333, 0.691116703644981, 0.31218278418869,
0.62684315599784, 0.742158234646655, 0.0146416101891029, 0.481156860252289,
0.809375500184827, 1, 0.166272569833104, 0.0642480288154233,
0.493094322495752, -0.0143825404077684, 0.420509020130084, 0.0763222806834054,
-0.137267266981321, 0.675599964220607, -0.0155210421858565, 0.276128001065287,
0.0974107116463065, 0.251055925294139, 0.0665676462597262, 0.158950811299692,
0.18412573265697, 0.0274638568337838, 0.109952341757847, 0.201944501698817,
0.166272569833104, 1, 0.803405447808051, 0.209386276142885, 0.019611871344881,
0.698294870666248, 0.024793538949468, 0.00921044459805193, 0.243573446480239,
0.0182042685108301, 0.114921357444725, 0.118633970020044, 0.137145560677364,
0.0706549436236251, 0.0709795609883571, 0.0846354163480033, 0.0232209281980358,
0.0400601095104961, 0.098871956246993, 0.0642480288154233, 0.803405447808051,
1, 0.0518698024423593, 0.0195654257050434, 0.534756730460756,
0.00851489725348713, -0.00157091125920201, 0.131294046914676,
0.0196406046872536, 0.483335903285957, 0.0663374870504325, 0.677547477041307,
0.463190217918095, 0.707727230043996, 0.636899685921357, 0.438976017479895,
0.560225169205313, 0.46496436444905, 0.493094322495752, 0.209386276142885,
0.0518698024423593, 1, 0.00595760440442105, 0.332127234258051,
0.402991372365854, 0.130619402830307, 0.702714128886842, 0.000759081836999778,
0.0322484793148408, 0.00324065971750887, 0.0138408591129587,
0.017439704947323, 0.0374999988906861, 0.00136017420567482, 0.00664290788845518,
0.0470119529030615, -0.00410066612855966, -0.0143825404077684,
0.019611871344881, 0.0195654257050434, 0.00595760440442105, 1,
0.0240839070381978, 0.0543455541899934, 0.121224926189405, 0.0181415673103803,
0.999560527964641, 0.360658177617753, 0.429993810524071, 0.574449939471671,
0.20361820902537, 0.36979003972634, 0.442530075276962, 0.0558346558356874,
0.311744196849895, 0.34093890132072, 0.420509020130084, 0.698294870666248,
0.534756730460756, 0.332127234258051, 0.0240839070381978, 1,
0.101917219961389, -0.0673808764564209, 0.55786516587572, 0.0226512629105265,
0.163989166178892, 0.0660128392326907, 0.289088705565296, 0.563054610829996,
0.532230871495189, 0.166101818463978, 0.576321333713829, 0.445382213345548,
0.0955588133868073, 0.0763222806834054, 0.024793538949468, 0.00851489725348713,
0.402991372365854, 0.0543455541899934, 0.101917219961389, 1,
0.562085375561417, 0.360237027957389, 0.0519977244267395, 0.145358618474009,
-0.208834964557656, -0.0151310469001056, 0.367022482937022, 0.237891979696682,
-0.122330359121607, 0.205483416691572, 0.237447342653341, -0.0561387410393148,
-0.137267266981321, 0.00921044459805193, -0.00157091125920201,
0.130619402830307, 0.121224926189405, -0.0673808764564209, 0.562085375561417,
1, 0.041068964081757, 0.119487910165712, 0.549222657694447, 0.517351517191311,
0.995636799856898, 0.539278002253207, 0.868052149324532, 0.870582759035652,
0.160939456560856, 0.743416109744227, 0.542950578488189, 0.675599964220607,
0.243573446480239, 0.131294046914676, 0.702714128886842, 0.0181415673103803,
0.55786516587572, 0.360237027957389, 0.041068964081757, 1, 0.0121897372730556,
0.0283182668409127, 0.00340750071414792, 0.00806307965229721,
0.0146950545295136, 0.0301272383779361, -0.00536057317986459,
0.00633413505889225, 0.0437634515476897, -0.00611403179202383,
-0.0155210421858565, 0.0182042685108301, 0.0196406046872536,
0.000759081836999778, 0.999560527964641, 0.0226512629105265,
0.0519977244267395, 0.119487910165712, 0.0121897372730556, 1), .Dim = c(19L,
19L), .Dimnames = list(c("RPE", "Duration", "Distance", "Max Speed",
"HML Distance", "HML Efforts", "Sprint Distance", "Sprints",
"Accelerations", "Decelerations", "Average Heart Rate", "Max Heart Rate",
"Average Metabolic Power", "Dynamic Stress Load", "Heart Rate Exertion",
"High Speed Running (Relative)", "HML Density", "Speed Intensity",
"Impacts"), c("RPE", "Duration", "Distance", "Max Speed", "HML Distance",
"HML Efforts", "Sprint Distance", "Sprints", "Accelerations",
"Decelerations", "Average Heart Rate", "Max Heart Rate", "Average Metabolic Power",
"Dynamic Stress Load", "Heart Rate Exertion", "High Speed Running (Relative)",
"HML Density", "Speed Intensity", "Impacts")))
Using the correlation data from above, I am trying to just obtain the first column where I see the correlation between RPE and all other 18 variables. I can do this by doing cor(allDataNoNAs)[,1] but then when I try and plot that as a correlogram using corrplot(corrgram(allDataNoNAs))[,1] it plots all 19x19 correlations and is a mess, when I just need the RPE correlation column.
Using ggcorr() as such:
ggcorr(allDataNoNAs, method = c("everything"), label = TRUE,label_size = 2, label_round = 4)
I obtain the cleaner looking heatmap that I want. But, switching the data parameter to allDataNoNAs[,1] or cor(allDataNoNAs)[,1] does not do the trick to only obtain that one RPE correlation column.
Is it possible to only return one column of a correlation heatmap?
I was able to figure out and answer my own question, though not exactly how I wanted it (wanted it from ggcorr() ), this version suffices, however:
With my same variable names as before
#x is the variable you want to be comparing the y variables with
myCorDF <- cor(x = allDataNoNAs$RPE, y = allDataNoNAs[2:19], use = "everything")
#just changing it to colors that seem better to me
col2 <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "white",
"yellow", "#FF7F00", "red", "#7F0000"))
#this is how I obtain the one column for RPE correlation against other all variables
corrplot(myCorDF, tl.srt = 45, method = "color", addCoef.col = "black",
cl.cex = 0.56, col = col2(50))
A generic code removing my colors would look like this:
corDF <- cor(x = DF$x, y = DF[2:5], use = "everything")
corrplot(corDF, tl.srt = 45, method = "color", addCoef.col = "black",
cl.cex = 0.56)
Related
In R language with (reactable) How to align all bars for different values when using data_bars() and setting text_position = "outside-base"
I want to align all bars to appear in a nice way and not too missy for the columns when I am using data_bars() for a column inside reactable().. when the value is less than 10. the bar appears to the left of others with value > 10. because it starts after the intergers when I position then outside-base. I want exactly to position the numbers outside-base but the bars appears in a consistent way. I provide a simple r code here. data <- data.frame( Group = c("Red Group 1","Red Group 2","Red Group 3","Red Group 4","Red Group 5", "Blue Group 1","Blue Group 2","Blue Group 3","Blue Group 4","Blue Group 5", "Green Group 1","Green Group 2","Green Group 3","Green Group 4","Green Group 5"), Pct1 = c(.27, .82, .044, .68, .78, .74, .66, .33, .23, .02, .50, .055, .40, .70, .60), Pct2 = c(.33, .17, .87, .54, .037, .84, .72, .061, .48, .077, .01, .39, .60, .55, .81) ) reactable( data, pagination = FALSE, defaultColDef = colDef( cell = data_bars(data, text_position = "outside-base", number_fmt = scales::percent) ) )
text_position = 'inside-base' would make it a bit less messy, might want to use a different text color ...
svyboxplot results change when using different categories versus subsets
I am new to the survey package and have a mystery problem. I have made data weights using anesrake package and then created a survey design. I have a problem when using svyboxplot and a grouping variable: It draws similar boxplots for each grouping category, which is not true When I studied the problem subsetting each of my category (15 of them) the values are different for each area / different boxplots for each area. Can anyone help me? I am desperate! Here`s sample to test library(tidyverse) col <- tibble( name = c("seura 1", "seura 2", "seura 3", "seura 4", "seura 5", "seura 6", "seura 7", "seura 8", "seura 9" , "seura 10", "seura 11", "seura 12"), riistakeskus = c("Keski-Suomi","Keski-Suomi","Keski-Suomi","Keski-Suomi","Keski-Suomi","Satakunta","Satakunta", "Satakunta","Uusimaa", "Uusimaa","Uusimaa","Uusimaa"), hirvi_sarvisuositus = c(1,4,5,3,7,5,3,4,6,5,8,9), weights = c(1.1461438,1.1461438,1.1461438,1.1461438,1.1461438,0.5107815,0.5107815,0.5107815,2.0461937, 2.0461937,2.0461937,2.0461937) ) library(survey) my_des1 <- svydesign(data = col, weights = ~weights, ids = ~1) b <- svyboxplot(hirvi_sarvisuositus~factor(riistakeskus), my_des1, all.outliers = F, ylim = c(0,10)) svyboxplot(hirvi_sarvisuositus~1, subset(my_des1, riistakeskus == "Keski-Suomi"), ylim = c(0,10)) svyboxplot(hirvi_sarvisuositus~1, subset(my_des1, riistakeskus == "Satakunta"), ylim = c(0,10)) svyboxplot(hirvi_sarvisuositus~1, subset(my_des1, riistakeskus == "Uusimaa"), ylim = c(0,10))
I had the same problem and would like to add to Anthonys answer, but I cannot comment yet. There is an error in survey:::svyboxplot.default as Anthony indicates but it does not seem to have anything to do with data points. If you use keep.var = FALSE with FUN=svyquantile it does return the overall quantiles instead of the group specific quantiles. Compare svyby(~hirvi_sarvisuositus, ~riistakeskus, my_des1, svyquantile, ci = FALSE, keep.var = FALSE, quantiles = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE) with svyquantile(~hirvi_sarvisuositus, my_des1, quantiles = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE) Note that svyquantile can not compute the SE for some quantiles. If you use keep.var=TRUE instead and try to extract the CIs, you get quantiles by group. svyby(~hirvi_sarvisuositus, ~riistakeskus, my_des1, svyquantile, quantiles = c(0, 0.25, 0.5, 0.75, 1), ci=TRUE, na.rm = TRUE, keep.var = TRUE, vartype = "ci") However, you can't change the svyquantile function options when calling svyboxplot. This needs to be fixed in the package. You could built your boxplots yourself instead. A simple base R solution: q <- svyby(~hirvi_sarvisuositus, ~riistakeskus, my_des1, svyquantile, quantiles = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE, ci=TRUE, keep.var = TRUE, vartype = "ci") boxstats <- q[,2:6] bxp(list(stats=t(as.matrix(boxstats)), n = c(100,100,100), names = rownames(boxstats))) To prevent whiskers inside the box, you can change qrule to use a different way to calculate quantiles (e.g. qrule="hf7" for the quantile() default). An alternative solution would be to use a weighted boxplot from ggplot2: library(ggplot2) ggplot(data=col, aes(y=hirvi_sarvisuositus, x=factor(riistakeskus), weight=weights)) + geom_boxplot() Please note that ggplot2 uses a slightly different estimation of the hinges, see help(geom_boxplot), which influences the results for low N.
great reproducible example, thank you! this result especially looks silly svyboxplot(hirvi_sarvisuositus~riistakeskus,my_des1,ylim=c(0,10)) i think this largely happens because svyquantile just needs more data points to get reasonable estimates.. if you look at the code inside survey:::svyboxplot.default you can find the line that produces all of the same quantile results svyby(~hirvi_sarvisuositus, ~riistakeskus, my_des1, svyquantile, ci = FALSE, keep.var = FALSE, quantiles = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE) not sure if this is really a bug that the survey package author would want to fix.. perhaps consider using the ?bxp function if your use case has that small of a dataset?
Why Forest plot is not showing the confidence interval bars?
Hi I am generating a forest plot by following code. but my visual graph doesnot show the confidence interval on boxes. How can i improve this graphical representation. mydf <- data.frame( Variables=c('Variables','Neuroticism_2','Neuroticism_3','Neuroticism_4'), HazardRatio=c(NA,1.109,1.296,1.363), HazardLower=c(NA,1.041,1.206,1.274), HazardUpper=c(NA,1.182,1.393,1.458), Pvalue=c(NA,"0.001","<0.001","<0.001"), stringsAsFactors=FALSE ) #png('temp.png', width=8, height=4, units='in', res=400) rowseq <- seq(nrow(mydf),1) par(mai=c(1,0,0,0)) plot(mydf$HazardRatio, rowseq, pch=15, xlim=c(-10,12), ylim=c(0,7), xlab='', ylab='', yaxt='n', xaxt='n', bty='n') axis(1, seq(0,5,by=.5), cex.axis=.5) segments(1,-1,1,6.25, lty=3) segments(mydf$HazardLower, rowseq, mydf$HazardUpper, rowseq) text(-8,6.5, "Variables", cex=.75, font=2, pos=4) t1h <- ifelse(!is.na(mydf$Variables), mydf$Variables, '') text(-8,rowseq, t1h, cex=.75, pos=4, font=3) text(-1,6.5, "Hazard Ratio (95%)", cex=.75, font=2, pos=4) t3 <- ifelse(!is.na(mydf$HazardRatio), with(mydf, paste(HazardRatio,' (',HazardLower,'-',HazardUpper,')',sep='')), '') text(3,rowseq, t3, cex=.75, pos=4) text(7.5,6.5, "P Value", cex=.75, font=2, pos=4) t4 <- ifelse(!is.na(mydf$Pvalue), mydf$Pvalue, '') text(7.5,rowseq, t4, cex=.75, pos=4) #dev.off() Edit I even tried to do this by forestplot package. But i am not getting Confidence interval on grpah as well as i want presentation as above graph. test_data <- data.frame(coef=c(1.109,1.296,1.363), low=c(1.041,1.206,1.274), high=c(1.182,1.393,1.458), boxsize=c(0.1, 0.1, 0.1)) row_names <- cbind(c("Variable", "N_Quartile 1", "N_Quartile 2", "N_Quartile 3"), c("HR", test_data$coef), c("CI -95%", test_data$low), c("CI +95%", test_data$high) ) test_data <- rbind(NA, test_data) forestplot(labeltext = row_names, mean = test_data$coef, upper = test_data$high, lower = test_data$low, clip =c(0.1, 25), is.summary=c(TRUE, FALSE, FALSE, FALSE), boxsize = test_data$boxsize, zero = 1,colgap = unit(3, "mm"), txt_gp=fpTxtGp(label= gpar(cex = 0.7), title = gpar(cex = 1) ), xlog = TRUE, xlab = "HR (95% CI)", col = fpColors(lines="black", box="black"), ci.vertices = TRUE, xticks = c(0.1, 1, 2.5,5,7.5))
Your intervals are quite small, so if you do it manually on plot it will take a while to refine the correct settings, and putting text together with it is not trivial. Right now your first code is not even 50% there. My suggestion is to build up the plot slowly using forestplot, and identify the problem, for example if you just plot your data.frame, you see it works, that is the c.i is there, just that it's very narrow, and that's your problem at hand, adjusting the size using lwd.ci so that it is visible: forestplot(test_data[,1:3],lwd.ci=3) Now if we add in the text: forestplot( labeltext =row_names, mean = test_data$coef, upper = test_data$high, lower = test_data$low, txt_gp=fpTxtGp(cex=0.8), is.summary=c(TRUE, FALSE, FALSE, FALSE), boxsize = test_data$boxsize,lwd.ci=3) So the text is taking up a bit too much space, i think one way is to use the conventional est[ll - ul] way of representing estimate and confidence interval, you can see examples here. One way I can try below is to wrap the values for the CI into 1 string, and have just two columns for text: library(stringr) test_data <- data.frame(coef=c(1.109,1.296,1.363), low=c(1.041,1.206,1.274), high=c(1.182,1.393,1.458), boxsize=c(0.1, 0.1, 0.1)) column1 = c("Variable", "N_Quartile 1", "N_Quartile 2", "N_Quartile 3") column2 = cbind(c("HR", test_data$coef), c("CI -95%", test_data$low), c("CI +95%", test_data$high)) L = max(nchar(column2)) padded_text =apply(column2,1, function(i)paste(str_pad(i,L),collapse=" ")) test_data <- rbind(NA, test_data) pdf("test.pdf",width=8,height=4) forestplot( labeltext =cbind(column1,padded_text), mean = test_data$coef, upper = test_data$high, lower = test_data$low, txt_gp=fpTxtGp(cex=0.8),align="c", is.summary=c(TRUE, FALSE, FALSE, FALSE), boxsize = test_data$boxsize,lwd.ci=3, graphwidth=unit(100,'mm')) dev.off()
Forestplot R - expanding plot, edit a variable name
I prepared a code to visualize my data: library(forestplot) test_data <- data.frame(coef=c(1.14, 0.31, 10.70), low=c(1.01, 0.12, 1.14), high=c(1.30, 0.83, 100.16), boxsize=c(0.2, 0.2, 0.2)) row_names <- cbind(c("Variable", "Variable 1", "Variable 2", "So looooooong and nasty name of the variable"), c("OR", test_data$coef), c("CI -95%", test_data$low), c("CI +95%", test_data$high) ) test_data <- rbind(rep(NA, 4), test_data) forestplot(labeltext = row_names, mean = test_data$coef, upper = test_data$high, lower = test_data$low, is.summary=c(TRUE, FALSE, FALSE, FALSE), boxsize = test_data$boxsize, zero = 1, xlog = TRUE, xlab = "OR (95% CI)", col = fpColors(lines="black", box="black"), title="My Happy Happy Title \n o happy happy title...\n", ci.vertices = TRUE, xticks = c(0.1, 1, 10, 100)) It gives a following forestplot: I would like to: 1) expand the plot and diminish font of the plot details on the left for better visualization 2) edit "So looooooong and nasty name of the variable" to move part "name..." below the row like: " So looooooong and nasty name of the variable " However, when I write as "/nSo.../n" it gives another row of number from columns "OR" and "CIs". How correct it?
Three possibilities (one more than you asked for): 1) change text of row labels with txt_gp. 2) cut column spacing from 6 mm default to half that value by passing colgap a grid call to unit. Fully understanding the options for forestplot requires understanding the grid system of plotting. 3) add a "\n" to the loooong label. (I'm puzzled you didn't see that possibility, since you already had a "\n" in the title.) row_names <- cbind(c("Variable", "Variable 1", "Variable 2", "So looooooong and \nnasty name of the variable"), c("OR", test_data$coef), c("CI -95%", test_data$low), c("CI +95%", test_data$high) ) forestplot(labeltext = row_names, mean = test_data$coef, upper = test_data$high, lower = test_data$low, is.summary=c(TRUE, FALSE, FALSE, FALSE), boxsize = test_data$boxsize, zero = 1, colgap = unit(3, "mm"), txt_gp=fpTxtGp(label= gpar(cex = 0.7), title = gpar(cex = 1) ), xlog = TRUE, xlab = "OR (95% CI)", col = fpColors(lines="black", box="black"), title="My Happy Happy Title \n o happy happy title...\n", ci.vertices = TRUE, xticks = c(0.1, 1, 10, 100)) If I only used a cex of 0.7 in the call to gpar passed to 'label', it also affected the size of the title, so I needed to "reset" the 'cex' of the 'title' back to 1.
Assigning type to xyplot
Complete beginner at R here trying to perform nonmetric multidimensional scaling on a 95x95 matrix of similarities where 8 corresponds to very similar and 1 corresponds to very dissimilar. I also have an additional column (96th) signifying type and ranging from 0 to 1. First I load the data: dsimilarity <- read.table("d95x95matrix.txt", header = T, row.names = c("Y1", "Y2", "Y3", "Y4", "Y5", "Y6", "Y7", "Y8", "Y9", "Y10", "Y11", "Y12", "Y13", "Y14", "Y15", "Y16", "Y17", "Y18", "Y19", "Y20", "Y21", "Y22", "Y23", "Y24", "Y25", "Y26", "Y27", "Y28", "Y29", "Y30", "Y31", "Y32", "Y33", "Y34", "Y35", "Y36", "Y37", "Y38", "Y39", "Y40", "Y41", "Y42", "Y43", "Y44", "Y45", "Y46", "Y47", "Y48", "Y49", "Y50", "Y51", "Y52", "Y53", "Y54", "Y55", "Y56", "Y57", "Y58", "Y59", "Y60", "Y61", "Y62", "Y63", "Y64", "Y65", "Y66", "Y67", "Y68", "Y69", "Y70", "Y71", "Y72", "Y73", "Y74", "Y75", "Y76", "Y77", "Y78", "Y79", "Y80", "Y81", "Y82", "Y83", "Y84", "Y85", "Y86", "Y87", "Y88", "Y89", "Y90", "Y91", "Y92", "Y93", "Y94", "Y95")) I convert the matrix of similarities into a matrix of dissimilarities, and exclude the 96th column: ddissimilarity <- dsimilarity; ddissimilarity[1:95, 1:95] = 8 - ddissimilarity[1:95, 1:95] Then I perform the nonmetric MDS using the Smacof function: ordinal.mds.results <- smacofSym(ddissimilarity[1:95, 1:95], type = c("ordinal"), ndim = 2, ties = "primary", verbose = T ) I create a new data frame (I'm following a guide and don't really know what's going on here): mds.config <- as.data.frame(ordinal.mds.results$conf) All well and good thus far (to my knowledge). However at this point I will try to create an xyplot of the data and get a good result using this code: xyplot(D2 ~ D1, data = mds.config, aspect = 1, main = "Figure 1. MDS solution", panel = function (x, y) { panel.xyplot(x, y, col = "black") panel.text(x, y-.03, labels = rownames(mds.config), cex = .75) }, xlab = "MDS Axis 1", ylab = "MDS Axis 2", xlim = c(-1.1, 1.1), ylim = c(-1.1, 1.1)) Now I want to create a figure that incorporates the type in column 96th and assigns different colors to observations of the two different types. However, can't quite figure out how to do so. Does anyone have any ideas of where I'm going wrong here? xyplot(D2 ~ D1, data = mds.config ~ ddissimilarity[96:96, 96:96], aspect = 1, main = "Figure 1. MDS solution", panel = function (x, y) { panel.xyplot(x, y, col = "black") panel.text(x, y-.03, labels = rownames(mds.config), cex = .75) }, xlab = "MDS Axis 1", ylab = "MDS Axis 2", xlim = c(-1.1, 1.1), ylim = c(-1.1, 1.1), group = "Type")