Plotting Clusters using clusplot with coordinates centered around 0 - r

I am trying to plot GIS coordinates, specifically UK national Grid Coordinates which eastings and northings ressemble:
194630000 562220000
I can plot these using clusplot in the Cluster library:
clusplot (df2,k.means.fit$cluster,main=i,color=TRUE,shade=FALSE,labels=0,lines=0,bty="7")
where df2 is my data frame and k.means.fit is the result of the K means analysis on df2.
Note that the coordinates of the centers after the k means analysis have not been normalised:
k.means.fit$centers
# Grid.Ref.Northing Grid.Ref.Easting
#1 206228234 581240726
But when I plot the clusters, all the points are translated such that they are centered around the origin.
I am wanting to show a map in the backround for context of the plots, but unless I am able to stop the translation, or at least know the values the function used, I cannot allign these properly.
I understand clusplots is designed to do a lot of feature automatically, which limits customisation, but I am not able to find a package that creates similar cluster plots.
Intended plot
(this was done at a random placement and is innaccurate)
Actual cluster diagram

Here is a way to produce something close what you are asking for.
Because of the need to translate between (lat, lon) and the graphics
coordinates (x,y) I did not use clusplot. Instead, I am using RgoogleMaps to get the background map and do the coordinate translations. I use car to plot the ellipses.
library(RgoogleMaps)
library(car)
## Some setup to get the map of the Chelmsford area.
lat <- c(51.7,51.8)
lon <- c(0.4, 0.5)
center = c(mean(lat), mean(lon))
zoom <- 10
Chelmsford <- GetMap(center=center, zoom=zoom, maptype= "roadmap",
destfile = "Chelsford.png")
You did not provide any points to test on, so I made up a few. I realize that my points are more separable than yours, but that only affects the clustering algorithm, not the mapping.
## Some Test Data
MC = structure(c(51.7965309028563, 51.794104389723, 51.7908688357699,
51.7787334409852, 51.7633572542762, 51.7674041270742, 51.7479758289189,
51.7649760469292, 51.7447369665147, 51.7576910228736, 51.7487855082363,
51.7601194948316, 51.754452857092, 51.7309692105151, 51.7107148897781,
51.6977473627376, 51.7139561908073, 51.7366387945275, 51.7325891642372,
51.7050420540348, 51.7050420540348, 51.7285391710661, 51.6677457194661,
51.6571998818184, 51.6466515895592, 51.6377241941241, 51.6377241941241,
51.645028557487, 51.6636899185361, 51.6580111872422, 51.6385358481586,
51.63528914486, 51.8789546795942, 51.8571513038925, 51.8531124817854,
51.8514968514399, 51.8676505449041, 51.8805693240155, 51.862805045846,
51.8506890145161, 51.8345292307446, 51.8337210892835, 51.8256388769982,
51.812704320496, 51.8232139304917, 51.8312965778826, 51.8240222604979,
51.8135128390641, 51.8094701011681, 51.807044284361, 51.7973397115523,
51.7803516822409, 51.7803516822409, 51.7949132419417, 51.7949132419417,
51.7811607811046, 51.7763059702794, 51.7787334409852, 51.9007474867743,
51.8781473356377, 51.8910630993239, 51.8757252167833, 51.8821839104485,
51.8821839104485, 51.8595744231562, 51.8821839104485, 51.8741103983922,
51.8660354365472, 51.8797620090535, 51.8765326042323, 51.8652278606205,
51.8934843918728, 51.8829911819196, 0.0895846775599907, 0.109172466823018,
0.153571455819268, 0.144430487496514, 0.140512929643877, 0.115701729910693,
0.109172466823018, 0.0882788249424316, 0.124842698233447, 0.171853392464776,
0.423882947649248, 0.447388294764912, 0.477422904968252, 0.45130585261751,
0.442164884294756, 0.468281936645498, 0.502234104701436, 0.504845809936514,
0.487869725908525, 0.430412210736963, 0.399071747916064, 0.395154190063467,
0.520516041346943, 0.527045304434619, 0.523127746582022, 0.511375073024189,
0.517904336111865, 0.54010383061001, 0.550550651550283, 0.55577406202044,
0.572750146048389, 0.508763367789111, 0.513986778259268, 0.504845809936514,
0.515292630876787, 0.537492125374932, 0.549244798932764, 0.588420377458818,
0.587114524841299, 0.550550651550283, 0.508763367789111, 0.493093136378682,
0.515292630876787, 0.485258020673487, 0.508763367789111, 0.504845809936514,
0.652407155718095, 0.669383239746084, 0.668077387128565, 0.644572040012901,
0.640654482160303, 0.640654482160303, 0.643266187395342, 0.606702314104326,
0.608008166721885, 0.619760840279717, 0.626290103367393, 0.594949640546534,
0.162712424142022, 0.156183161054346, 0.194052886962881, 0.182300213405049,
0.212334823608389, 0.217558234078545, 0.220169939313624, 0.238451875959131,
0.25542795998708, 0.259345517839678, 0.27109819139751, 0.28546257019042,
0.284156717572901, 0.295909391130693, 0.30113280160085), .Dim = c(73L,
2L), .Dimnames = list(NULL, c("lat", "lon")))
Plot the map and points just to get oriented.
PlotOnStaticMap(Chelmsford)
P1 = LatLon2XY.centered(Chelmsford, MC[,1], MC[,2], 10)
names(P1) = c("x", "y")
points(P1, pch=16)
Now we need to find and plot the clusters.
set.seed(42) ## For reproducibility
Clust = kmeans(MC, 7)
## Convert to graphics coordinates
Points = LatLon2XY.centered(Chelmsford, MC[,1], MC[,2], 10)
names(Points) = c("x", "y")
Points = data.frame(Points)
## Replot noting clusters
PlotOnStaticMap(Chelmsford)
points(Points, pch=21, bg=Clust$cluster)
## Add ellipses
for(i in 1:length(unique(Clust$cluster))) {
dataEllipse(Points[Clust$cluster == i,1], Points[Clust$cluster == i,2],
center.pch=10, levels=0.90, fill=TRUE, fill.alpha=0.1,
plot.points=FALSE, col=i, lwd=1,)
}
Et voila!

Related

How to generate an ordination plot from a distance matrix in R

Here I have another 'graphical' problem:
I have obtained from MOTHUR the following distance matrix (coming from a weighted unifrac analysis):
20
F3D0
F3D1 0.222664
F3D141 0.157368 0.293308
F3D142 0.180278 0.319198 0.0944511
F3D143 0.157659 0.290975 0.0545202 0.0761392
F3D144 0.199909 0.34045 0.104358 0.086418 0.089473
F3D145 0.207946 0.348532 0.107841 0.076302 0.0940067 0.051632
F3D146 0.117877 0.253996 0.0891617 0.130867 0.0882064 0.134407 0.138415
F3D147 0.197256 0.336583 0.102114 0.0764106 0.0890669 0.0514887 0.0479297 0.135324
F3D148 0.173824 0.311951 0.0606815 0.0648557 0.056463 0.074914 0.0811015 0.111996 0.0709027
F3D149 0.145614 0.276632 0.0462779 0.105512 0.0628737 0.10902 0.114584 0.0739466 0.107123 0.0690412
F3D150 0.129557 0.277624 0.0840909 0.128305 0.0863231 0.140256 0.145381 0.0744572 0.13672 0.113564 0.0659831
F3D2 0.133531 0.216587 0.160832 0.186833 0.176061 0.214934 0.215261 0.152591 0.205629 0.188325 0.156313 0.153841
F3D3 0.213102 0.305651 0.123818 0.113021 0.139376 0.148558 0.13853 0.174377 0.139851 0.126329 0.131294 0.166738 0.137784
F3D5 0.128668 0.185235 0.167733 0.205183 0.176585 0.224806 0.230984 0.14497 0.223492 0.18933 0.153624 0.148617 0.127574 0.192433
F3D6 0.139411 0.236633 0.135418 0.124848 0.134198 0.175098 0.166205 0.118905 0.166144 0.151842 0.120964 0.12724 0.0950943 0.119852 0.129523
F3D7 0.198884 0.315888 0.130385 0.0989168 0.131945 0.14625 0.126203 0.173689 0.128993 0.121373 0.140199 0.152123 0.152893 0.0906675 0.186674 0.111134
F3D8 0.178656 0.18783 0.205737 0.22104 0.219858 0.268701 0.2644 0.184943 0.268051 0.229503 0.1979 0.20035 0.164427 0.203089 0.119084 0.142398 0.185551
F3D9 0.153265 0.186706 0.196143 0.21504 0.20728 0.262127 0.255558 0.174563 0.2607 0.221969 0.192437 0.185154 0.13976 0.195538 0.0973901 0.127619 0.177605 0.0558726
Mock 0.653789 0.645344 0.633297 0.623553 0.633903 0.633135 0.63394 0.635815 0.645332 0.636453 0.629143 0.646918 0.663222 0.639517 0.649722 0.64073 0.654882 0.63988 0.646155
As this distance matrix come from a PCoA, what I want to do is to plot these distances in an ordination plot with R.
Any idea on how to doing this?
Thanks a lot
You have the vegan library with metaMDS function that generates coordinates for each sample using such a distance matrix as the input.
Let's call M to your matrix, you need to run this code:
# Load the library
library(vegan)
# Use metaMDS function for 2D - plot
NMDS <- metaMDS(distance = M, k = 2)
# Plot your individuals
plot(NMDS$points[,1], NMDS$points[,2])
In NMDS$points you have the coordinates for each of the samples. I suggest to colour the individuals according to a factor of interest such as cases and controls for example in biomedical analyses.
Thanks to #R18, finally I could manage with this issue.
For the distance table I uploaded, the solution that I reached was to use the following code:
library(phyloseq)
library(vegan)
M <- import_mothur_dist("pcoa_UFdistance_matrix.dist")
unifrac <- metaMDS(M, distance = M, k = 2, trymax=100)
plot(unifrac$points[,1], unifrac$points[,2], main="Principal Coordinates Analysis", col.main="red", font.main=4, xlab="PCoA 1", ylab="PCoA 2")
text(unifrac, pos=3)
Hope it will help someone!!

How to determine if a point is above or below a line connecting points in R?

Objective:
Given a set of data, how can I determine if a new data point is above or below the line connecting the points.
For example, how can I determine if the red point shown is above or below the line (without visual inspection)?
I'd like to fit an exact line to the points. Essentially joining the points, and need a fit to be able to use any point on the line for a comparator.
Current attempts:
So far I've tried fitting various splines to the data, but it is still a bit too smooth. I'm really looking for an exact fit to the data, sharp corners and all.
I've tried a natural spline (as well as smooth.splines), but can't quite get the fit exact/sharp enough:
plot(df$lowerx, df$lowery, type='b', xlab='x', ylab='y', pch=21, bg='steel blue')
myspline <- splinefun(df$lowerx, df$lowery, method='natural')
curve(myspline, add=T, from = 0, to=140, n = 100, col='green')
I think once I get the fit right it will be straightforward use it to figure out if points are above or below the line (e.g. using predict or the function), but I need help with the fit.
Also would welcome another approach entirely.
Data:
df <- structure(list(lowerx = c(11.791, 18.073, 23.833, 35.875, 39.638, 44.153, 59.206, 71.498, 83.289, 95.091, 119.676, 131.467, 143.76), lowery = c(5.205, 5.89, 6.233, 9.041, 10, 10.342, 12.603, 13.493, 14.658, 15.274, 15.89, 15.616, 15.342)), .Names = c("lowerx", "lowery"), class = "data.frame", row.names = c(NA, -13L))
The R function approxfun will create a function that does a linear interpolation.
> F <- approxfun(x=df$lowerx, y=df$lowery)
> F(80) > 13
[1] TRUE
I used the data you offered and tested my best guess at the coordinates of the "red point" as (80, 13), so it says that 13 is less than the interpolation and (80,15) is above:
> F(80) > 15
[1] FALSE
this post shows how to do the trick: How to tell whether a point is to the right or left side of a line
if position is +1 the point is above, if it's -1 the point is below, if 0 it is directly on the line, no fitting required, you just need to know which two points to refer to span the line...
applied to your example:
df <- structure(list(lowerx = c(11.791, 18.073, 23.833, 35.875, 39.638, 44.153, 59.206, 71.498, 83.289, 95.091, 119.676, 131.467, 143.76), lowery = c(5.205, 5.89, 6.233, 9.041, 10, 10.342, 12.603, 13.493, 14.658, 15.274, 15.89, 15.616, 15.342)), .Names = c("lowerx", "lowery"), class = "data.frame", row.names = c(NA, -15L))
X <- 79
Y <- 13
xIndex2 <- which(df$lowerx > X)[1]
xIndex1 <- xIndex2 - 1
Ax <- df$lowerx[xIndex1]
Ay <- df$lowery[xIndex1]
Bx <- df$lowerx[xIndex2]
By <- df$lowery[xIndex2]
position = sign((Bx - Ax) * (Y - Ay) - (By - Ay) * (X - Ax))

R spplot diagrams on the map

Hallo everyone can anybody help me to upgrade my code with possibility of insering additional data into my map. This is the code that draw me a map with intensity of migration, and I am trying to add ehtnic information of every region (many small pie charts).
to draw a map
con <- url("http://biogeo.ucdavis.edu/data/gadm2/R/UKR_adm1.RData")
print(load(con))
close(con)
name<-gadm$VARNAME_1
value<-c(4,2,5,2,1,2,4,2,2,4,1,1,1,4,3,3,1,1,3,1,2,4,5,3,4,2,1)
gadm$VARNAME_1<-as.factor(value)
col<- colorRampPalette(c('cadetblue4','cadetblue1','mediumseagreen','tan2','tomato3'))(260)
spplot(gadm, "VARNAME_1", main="Ukraine", scales = list(draw = TRUE), col.regions=col)
sp.label <- function(x, label) {
list("sp.text", coordinates(x), label)
}
NAME.sp.label <- function(x) {
sp.label(x, x$NAME_1)
}
draw.sp.label <- function(x) {
do.call("list", NAME.sp.label(x))
}
spplot(gadm, 'VARNAME_1', sp.layout = draw.sp.label(gadm), col.regions=col,
colorkey = list(labels = list( labels = c("Very low","Low", "Average",
"High","Very high"),
width = 1, cex = 1)))
and this is a part of df, that I am trying to add to that map as pie charts or bar charts, with every latitude (lat) and longitude (long) to locate mu bar or pie charts.
df<-data.frame(region=c('Kiev oblast', 'Donezk oblast'),
rus=c(45,35), ukr=c(65,76), mold=c(11,44),long=c(50.43,48),
lat=c(30.52, 37.82))
i found one example and another but... can't figure out how to use it in ma case.
Hope for your help, thank you.
only that solution i have discovered by now, but it doesn't upgrade my code(((
mapPies( df,nameX="lat", nameY="long", nameZs=c('rus','ukr','mold'),
xlim=c(30,33), ylim=c(44,53), symbolSize = 2)
perhaps this will help:
pieSP The function provide SpatialPolygonsDataFrame depending on few attributes, ready to use for plotGoogleMaps or spplot.
library(plotGoogleMaps)
data(meuse)
coordinates(meuse)<-~x+y
proj4string(meuse) <- CRS('+init=epsg:28992')
pies <- pieSP(meuse,zcol=c('zinc','lead','copper'), max.radius=120)
pies$pie <- rep(c('zinc','lead','copper'),155)
pies$pie2 <- rep(1:3,155)
spplot(pies, 'pie2')

R maps plotting longitude and latitude points

I have a map of the USA and a list of long, lat that I want to plot. Once I get this working, I also want to switch over to the "world" map. The map is generated, but no points appear on the map. The first line of the TSV file contains this header: LONG{tab}LAT R appears to be reading in the 'traffic' table OK. What am I doing wrong?
library("maps")
traffic = read.table("C:/temp/traffic_10.40.tsv", header=T, sep="\t")
png(filename="C:/temp/usa.png", width=850, height=600, bg="white")
map('state', plot = TRUE, fill = FALSE, col = palette())
title("Destinations")
points(x=traffic$LONG,y=traffic$LAT,col='red',cex=0.75)
dev.off()
EDIT
> dput(traffic)
structure(list(LONG = c(47.6218, 32.7942, 34.1121, 40.0068, 47.6218,
33.9553, 33.7629, 40.0068, 39.05, 38.1075, 33.7629, 32.769, 37.3857,
29.4576, 34.1674, 38.8147, 32.7942, 31.1429, 40.3254, 30.3059,
38.2248, 47.6218, 33.9553, 38.1075, 27.1943, 29.4576, 30.5175,
38.5668, 42.6657, 40.2982, 32.7539, 40.6698, 47.6742, 32.7942,
47.6218, 35.8219), LAT = c(-122.35, -96.7653, -118.411, -75.1347,
-122.35, -83.3937, -84.4226, -75.1347, -77.4833, -122.264, -84.4226,
-96.5998, -122.026, -98.5054, -84.8014, -77.0647, -96.7653, -81.471,
-78.9195, -97.7505, -85.7412, -122.35, -83.3937, -122.264, -80.2432,
-98.5054, -97.6721, -121.467, -73.799, -111.698, -97.3363, -73.9438,
-122.115, -96.7653, -122.35, -78.6588)), .Names = c("LONG", "LAT"
), class = "data.frame", row.names = c(NA, -36L))
Also, I am a R newbie and have tried finding this on google with limited success because I am not sure what to search for exactly.
The problem is in your data set rather than your later code.
The first point has one co-ordinate 47.6218 and the other -122.35. Latitudes cannot be outside the range [-90,90] degrees so the longitude must be -122.35 and latitude 47.6218, the opposite of your data set. This is slightly north of the Seattle Space needle
x (horizontal) is traditionally longitude or easting; y (vertical) is traditionally latitude or northing
One option would be to use googleVis for this. You could plot out your points Google-Maps style.
Fixed. I swapped $LAT and $LONG. Now it works perfectly.
points(x=traffic$LAT,y=traffic$LONG,col='red',cex=0.75)

ggplot2 and maps: geom_point and annotation_raster position mismatch

Good day everyone,
Using the code below I can successfully retrieve a raster from Google using ggmap, plot an annotation_raster using ggplot2, and plot site localities as red dots on top of the raster layer. On the plot the positions don't quite match (they should follow the coastline). I know my sites' positions are correct because they plot where they should be when I upload the data onto Google Earth as a KML file.
Suggestions will be appreciated.
This code will run as is... Note that you need a development version of ggplot2, which is available on github. To install:
# install.packages("devtools")
library(devtools)
install_github("ggplot2")
and for the code:
library(ggplot2)
library(ggmap)
library(grDevices)
theme_set(theme_bw())
# Some coordinates of points to plot:
siteLat = c(-22.94414, -22.67119, -29.25241, -30.31181, -32.80670, -33.01054, -32.75833, - 33.36068, -31.81708, -32.09185, -32.31667, -34.13667, -34.05016, -33.91847, -34.13525, -34.12811, -34.10399, -34.16342, -34.41459, -34.58786, -34.83353, -34.37150, -34.40278, -34.17091, -34.08565, -34.04896, -33.98066, -34.02448, -34.20667, -34.05889, -33.97362, -33.99125, -33.28611, -33.02407, -33.01798, -32.99316, -31.09704, -31.05000, -30.91622, -30.70735, -30.28722, -30.27389, -29.86476, -29.54501, -29.49660, -29.28056, -28.80467, -27.42472)
siteLon = c(14.50175, 14.52134, 16.86710, 17.26951, 17.88522, 17.95063, 18.02778, 18.15731, 18.23065, 18.30262, 18.32222, 18.32674, 18.34971, 18.38217, 18.43592, 18.45077, 18.48364, 18.85908, 19.25493, 19.33971, 20.00439, 21.43518, 21.73972, 22.12749, 23.05532, 23.37925, 23.64567, 23.89933, 24.77944, 25.58889, 25.64724, 25.67788, 27.48889, 27.91626, 27.92182, 27.95036, 30.18395, 30.21666, 30.32982, 30.48474, 30.76026, 30.83556, 31.04479, 31.21662, 31.24665, 31.44403, 32.07567, 32.73333)
siteName = c(seq(1:length(siteLon)))
sites <- as.data.frame(cbind(siteLat, siteLon, siteName))
# specify raster's approximate coordinates:
lats = c(-35, -20)
lons = c(10, 35)
SAMap <- GetMap.bbox(lons, lats, maptype = "satellite")
# extract "real" coords of raster:
lonr <- c(SAMap$BBOX$ll[2], SAMap$BBOX$ur[2])
latr <- c(SAMap$BBOX$ll[1], SAMap$BBOX$ur[1])
# extract raster fill data:
h_raster <- as.raster(SAMap$myTile)
# plot using annotation_raster:
g <- ggplot(sites, aes(siteLon, siteLat))
g + annotation_raster(h_raster, lonr[1], lonr[2], latr[1], latr[2]) +
geom_point(aes(x = siteLon, y = siteLat), colour = "red", data = sites) +
scale_x_continuous(limits = lonr) +
scale_y_continuous(limits = latr)
(Sorry, I cannot post an image as I am new here).
Okay, the problem has been resolved thanks to David Kahle. See this post:
https://groups.google.com/forum/?hl=en&fromgroups#!topic/ggplot2/ABffHL3WTpY
AJ

Resources