R: strange map format - r

I got a file containing the following data:
str(dat)
List of 2
$ x: Named num [1:28643] 2714769 2728569 NA 2728569 2740425 ...
..- attr(*, "names")= chr [1:28643] "h" "h" "" "h" ...
$ y: Named num [1:28643] 925000 925000 NA 925000 925000 ...
..- attr(*, "names")= chr [1:28643] "h" "h" "" "h" ...
- attr(*, "class")= chr [1:2] "bor" "list"
dat$x[1:10]
h h h h h h h
2714769 2728569 NA 2728569 2740425 NA 2740425 2751585 NA 2751585
dat$y[1:10]
h h h h h h h
925000 925000 NA 925000 925000 NA 925000 925000 NA 925000
class(dat)
"bor" "list"
table(names(dat$x))
h
479 28164
table(names(dat$y))
h
479 28164
plot(dat, type='l') results in a nice map.
I read about an old/simple form of line-'objects' used in S in "Applied Spatial Data Analysis with R" (Bivand, Pebesma, Gomez-Rubio; Springer 2008) on Page 38, which seem to have similarities to my file. This format defines a line as "start-point; end-point; NA" triplet.
Do you know this format?
How can I convert it to an sp-object?
Thanks in advance

Based on your information, here is one possilbe way to go:
Assuming that your data represent lines and that the NA values indicate the end of each line, you can convert your data to spatial lines doing the following:
# Creating artificial data for the example
dat <- list()
dat$x <- rnorm(1000) + rep(c(rep(0, 99), NA), 10)
dat$y <- dat$x + rnorm(1000)
# For simplicity, convert to data frame
# (this would be the first step for you to do with your data)
mydat <- data.frame(x = dat$x, y = dat$y)
# Convert each part to a line, using the NA values as breaks
mylines <- list()
last <- 1
for(i in 1:nrow(mydat)){
if(is.na(mydat$x[i])){
print(i)
mylines[[as.character(i)]] <- Lines(Line(mydat[last:(i-1),]), ID = as.character(i))
last <- i+1
}
}
# Convert to spatial lines object
mylines <- SpatialLines(mylines)
# Plot to see if it worked
plot(mylines)

Related

Adding point locations to a 3D DEM plot in R

I have some point locations which include UTMs and Elevation as a data frame
I also have a DEM layer.
I have figured out how to plot the DEM in 3D using plot3D in rgl.
I can also plot the points in 3D using points3d.
I have been able to put them in the same plot using points3d with add=TRUE
however the points and DEM are radically far away from each other.
In the code below I also tried to change this to a spatial data frame but rgl doesn't seem to like that.
Is it possible to plot them together with the points laying over the DEM?
I have searched and searched for a solution to this.
Here is the R code I have used so far:
> library(raster)
> library(rgdal)
> library(maptools)
> library(rgeos)
> library(lattice)
> library(latticeExtra)
> library(sp)
> library(rasterVis)
> library(rgl)
>
> # taking data read from a .csv of UTM and elevation values
>
> Points.Sp <- data.frame(Points=Rawdata$PointName, UTM.N=Rawdata$UTM.N, UTM.W=Rawdata$UTM.W, Elevation=Rawdata$Elevation)
> Points.Sp <- unique(Points.Sp) #weeding out duplicates
> Points.Sp <- Points.Sp[,c(3,2,4)] #getting rid of point names # I realize this looks messy but it gets what I want
> head(Points.Sp)
UTM.W UTM.N Elevation
1 275815 3879223 1340
8 274813 3879727 1325
29 275312 3879727 1258
45 275812 3879724 1169
66 276313 3879727 1067
75 276813 3879727 1208
>
> dem.in <- raster("D:/Thesis/SouthernApps/Coweeta/Coweeta/DEM_30m_wgs84.img") # reading in DEM
> plot(dem.in) # check in 2D # takes a long time very large, need to crop
>
> dem.crop <- crop(dem.in, c(272000, 282000, 3878000, 3884000))
> plot(dem.crop) # check in 2D, looks good.
>
> plot3D(dem.crop) # plot in 3D looks like exactly what I want
>
> points3d(Points.Sp, pch=19, cex=2, col="black", add=TRUE) # adds the points to plot but in wrong place
>
> #attempting to set a CRS in case this is the problem.
> coordinates(Points.Sp)=c(1,2)
> proj4string(Points.Sp)=CRS("++proj=utm +zone=17") # set CRS
> str(Points.Sp)
Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
..# data :'data.frame': 71 obs. of 1 variable:
.. ..$ Elevation: int [1:71] 1340 1325 1258 1169 1067 1208 1256 1089 1031 959 ...
..# coords.nrs : num [1:2] 1 2
..# coords : num [1:71, 1:2] 275815 274813 275312 275812 276313 ...
.. ..- attr(*, "dimnames")=List of 2
.. .. ..$ : chr [1:71] "1" "8" "29" "45" ...
.. .. ..$ : chr [1:2] "UTM.W" "UTM.N"
..# bbox : num [1:2, 1:2] 274309 3878440 279876 3883732
.. ..- attr(*, "dimnames")=List of 2
.. .. ..$ : chr [1:2] "UTM.W" "UTM.N"
.. .. ..$ : chr [1:2] "min" "max"
..# proj4string:Formal class 'CRS' [package "sp"] with 1 slot
.. .. ..# projargs: chr "+proj=utm +zone=17 +ellps=WGS84"
>
> # trying this a different way after setting CRS
> x <- Points.Sp#coords[1:71,1]
> y <- Points.Sp#coords[1:71,2]
> z <- Points.Sp#data$Elevation
> m <- data.frame(x=x,y=y,z=z)
>
> plot3D(dem.crop) #again, plot in 3D looks like exactly what I want
> points3d(m, pch=19, cex=2, col="black", add=TRUE) # still adds the points to plot but in wrong place
This code reproduces the problem.
## define a Raster object
data(volcano)
r <- raster(volcano)
extent(r) <- c(0, 610, 0, 870)
## extract sample points
xy <- sampleRandom(r1, 100, xy = TRUE)
r1<-data.frame(x=seq(0, 500, length=(71)), y=seq(0, 500, length=(71)), z=seq(0,500, length=(71)))
## display them
plot3D(r, adjust = FALSE)
points3d(r1, add=TRUE)
As documented in the help page, both the x-axis and y-axis are adjusted with the z values. You can disable this default setting with adjust = FALSE:
library(rgl)
library(rasterVis)
## define a Raster object
data(volcano)
r <- raster(volcano)
extent(r) <- c(0, 610, 0, 870)
## extract sample points
xy <- sampleRandom(r, 100, xy = TRUE)
## display them
plot3D(r, adjust = FALSE)
points3d(xy)
## define a Raster object
data(volcano)
r <- raster(volcano)
extent(r) <- c(0, 610, 0, 870)
## extract sample points
xy <- sampleRandom(r1, 100, xy = TRUE)
#must extract the data from the raster and recombine with the xy data.
#I don't know why this is different than simply using the raw values but it
#provides the desired effect.
r1<-data.frame(x=seq(0, 500, length=(71)), y=seq(0, 500, length=(71)))
z<-extract(r, r1)
r1$z<-z
## display them
plot3D(r, adjust = FALSE)
points3d(r1, add=TRUE)
#points now lie flat on 3d image.
Points flush to 3d Image
Image for original problem

Markov chains in R

I'm using the markovchain package in R and the function
mc<-markovchainFit(data)
I have a propablity matrix mc$estimate and I want to round the propabilities. How do I do that?
Another question: How I can write that matrix to text file or Excel?
I have matrix like this:
mc$estimate
MLE Fit
A 22 - dimensional discrete Markov Chain with following states
A B C D E F G H I J K L M N O P Q R S T Y Z
The transition matrix (by rows) is defined as follows
A B C D E F
A 0.468053492 0.008172363 0.028974740 0.014858841 0.023031204 0.063150074
B 0.003191489 0.590425532 0.020212766 0.019148936 0.011702128 0.102127660
C 0.004054198 0.001707031 0.817134322 0.015896725 0.004374267 0.017497066
D 0.004519774 0.006214689 0.052824859 0.505367232 0.024011299 0.035310734
E 0.005132930 0.001710977 0.005396157 0.010002632 0.698078442 0.068570676
F 0.001155435 0.001386522 0.002195326 0.001675381 0.007683642 0.903347873
G 0.004933473 0.002690985 0.014800419 0.012856929 0.020032890 0.073105098
H 0.005486028 0.004114521 0.016629522 0.022458426 0.035487742 0.053317332
I 0.007445734 0.002271580 0.020570419 0.021327612 0.031423523 0.028899546
J 0.011885111 0.003796633 0.024430505 0.021294156 0.015351601 0.056949488
K 0.008743754 0.001784440 0.022127052 0.026945039 0.021234832 0.070663812
L 0.003227759 0.003026024 0.012507565 0.014726649 0.016743998 0.052854549
M 0.007148954 0.002560819 0.013551003 0.014511310 0.015258216 0.067008109
N 0.010998878 0.002918070 0.018406285 0.025140292 0.029405163 0.073400673
O 0.003787879 0.001578283 0.003787879 0.008207071 0.006313131 0.067866162
P 0.000000000 0.000000000 0.000000000 0.007518797 0.000000000 0.007518797
Q 0.005144695 0.004501608 0.003215434 0.012861736 0.013504823 0.052733119
R 0.009460298 0.003566998 0.022797767 0.024193548 0.015973945 0.095068238
I would round that whit 2 desimals and then write to Excel or text file. How it is possible?
The mc$estimate is a S4 class of the following type,
# [1] "markovchain"
# attr(,"package")
# [1] "markovchain"
by using str you can see what slots that object has that you could print/round,
str(mc)
# Formal class 'markovchain' [package "markovchain"] with 4 slots
# ..# states : chr -----
# ..# byrow : logi TRUE
# ..# transitionMatrix: num -----
# .. ..- attr(*, "dimnames")=List of 2
# .. .. ..$ : chr [1:x] ----
# .. .. ..$ : chr [1:x] ----
# ..# name : chr "MLE Fit"
Your output will be a little different since you did not provide the data. But you might see that the slot transitionMatrix looks like a matrix. Let us check,
class(mc$estimate#transitionMatrix)
# [1] "matrix"
Voila! We can easily round a matrix,
print(round(mc$estimate#transitionMatrix, digits=2))
and to store it,
write.csv(mc$esimate#transitionMatrix, file = "transition_matrix.csv", row.names = FALSE)
Hope this helps
It's a typo in the write.csv line for estimate after mc$
write.csv(mc$estimate#transitionMatrix, file = "transition_matrix.csv", row.names = FALSE)
once fixed it works.

Convert text to date/timestamp in a matrix in R for plot.ts

I have a matrix with value and timestamps as strings.
> m
value time
[1,] 0 "2014-10-20T01:48:00.019+02:00"
[2,] 0 "2014-10-20T01:48:30.019+02:00"
[3,] 0 "2014-10-20T01:49:00.019+02:00"
[4,] 0 "2014-10-20T01:49:30.020+02:00"
[5,] 0 "2014-10-20T01:50:00.020+02:00"
...
I would like to convert the strings to timestamps or so, to plot them on a timeseries chart (I suggest to use plot.ts!?). I knew that I can use
strptime(data, "%Y-%m-%dT%H:%M:%OS")
to convert the string, but I don't knew how to apply to the matrix.
Background:
I loaded data from a JSON file:
{
measurements:
0: {
id: "87144000"
self: "http://xxxyyyzzz.com/measurement/measurements/87144000"
source: {...}
time: "2014-10-20T01:48:00.019+02:00"
type: "LightSensor"
LightSensor: {
light: {
unit: "LUX"
value: 0
}
}
}
...
}
I loaded and transform:
> l <- fromJSON(file = "./dev/learning-r/data/c8y-measurement-light.json")
> m <- lapply (l$measurements, function(x) c(x$LightSensor$light['value'], x['time']))
> m <- do.call(rbind, m)
> str(m)
List of 2000
$ : num 0
...
$ : num 0
[list output truncated]
- attr(*, "dim")= int [1:2] 1000 2
- attr(*, "dimnames")=List of 2
..$ : NULL
..$ : chr [1:2] "value" "time"
Thanks!
You might use:
strptime(x['time'], "%Y-%m-%dT%H:%M:%OS");
directly in your lapply function. This ignores the timezone for now, but should work. Look at the %z parameter if you interested in the timezone (some regex might be required).
Then convert the matrix to a dataframe:
df <- data.frame(m)
Now you should be able to plot the data directly (please check that not all values are zero with summary(df)) with plot(df).
I found out, that the structure m is a list. I converted the list to a data.frame
dat <- data.frame(m)
Now I can apply #agtudy slightly changed command
dat$time <- as.POSIXct(strptime(dat$time, "%Y-%m-%dT%H:%M:%OS"))
Results in
> dat
value time
1 0 2014-10-20 01:48:00

passing column names to data.table programmatically

I would like to be able to write a function that runs regressions in a data.table by groups and then nicely organizes the results. Here is a sample of what I would like to do:
require(data.table)
dtb = data.table(y=1:10, x=10:1, z=sample(1:10), weights=1:10, thedate=1:2)
models = c("y ~ x", "y ~ z")
res = lapply(models, function(f) {dtb[,as.list(coef(lm(f, weights=weights, data=.SD))),by=thedate]})
#do more stuff with res
I would like to wrap all this into a function since the #doe more stuff might be long. The issue I face is how to pass the various names of things to data.table? For example, how do I pass the column name weights? how do I pass thedate? I envision a prototype that looks like this:
myfun = function(dtb, models, weights, dates)
Let me be clear: passing the formulas to my function is NOT the problem. If the weights I wanted to use and the column name describing the date, thedate were known then my function could simply look like this:
myfun = function(dtb, models) {
res = lapply(models, function(f) {dtb[,as.list(coef(lm(f, weights=weights, data=.SD))),by=thedate]})
#do more stuff with res
}
However the column names corresponding to thedate and to the weights are unknown in advance. I would like to pass them to my function as so:
#this will not work
myfun = function(dtb, models, w, d) {
res = lapply(models, function(f) {dtb[,as.list(coef(lm(f, weights=w, data=.SD))),by=d]})
#do more stuff with res
}
Thanks
Here is a solution that relies on having the data in long format (which makes more sense to me, in this cas
library(reshape2)
dtlong <- data.table(melt(dtb, measure.var = c('x','z')))
foo <- function(f, d, by, w ){
# get the name of the w argument (weights)
w.char <- deparse(substitute(w))
# convert `list(a,b)` to `c('a','b')`
# obviously, this would have to change depending on how `by` was defined
by <- unlist(lapply(as.list(as.list(match.call())[['by']])[-1], as.character))
# create the call substituting the names as required
.c <- substitute(as.list(coef(lm(f, data = .SD, weights = w), list(w = as.name(w.char)))))
# actually perform the calculations
d[,eval(.c), by = by]
}
foo(f= y~value, d= dtlong, by = list(variable, thedate), w = weights)
variable thedate (Intercept) value
1: x 1 11.000000 -1.00000000
2: x 2 11.000000 -1.00000000
3: z 1 1.009595 0.89019190
4: z 2 7.538462 -0.03846154
one possible solution:
fun = function(dtb, models, w_col_name, date_name) {
res = lapply(models, function(f) {dtb[,as.list(coef(lm(f, weights=eval(parse(text=w_col_name)), data=.SD))),by=eval(parse(text=paste0("list(",date_name,")")))]})
}
Can't you just add (inside that anonymous function call):
f <- as.formula(f)
... as a separate line before the dtb[,as.list(coef(lm(f, ...)? That's the usual way of turning a character element into a formula object.
> res = lapply(models, function(f) {f <- as.formula(f)
dtb[,as.list(coef(lm(f, weights=weights, data=.SD))),by=thedate]})
>
> str(res)
List of 2
$ :Classes ‘data.table’ and 'data.frame': 2 obs. of 3 variables:
..$ thedate : int [1:2] 1 2
..$ (Intercept): num [1:2] 11 11
..$ x : num [1:2] -1 -1
..- attr(*, ".internal.selfref")=<externalptr>
$ :Classes ‘data.table’ and 'data.frame': 2 obs. of 3 variables:
..$ thedate : int [1:2] 1 2
..$ (Intercept): num [1:2] 6.27 11.7
..$ z : num [1:2] 0.0633 -0.7995
..- attr(*, ".internal.selfref")=<externalptr>
If you need to build character versions of formulas from component names, just use paste or paste0 and pass to the models character vector. Tested code supplied with receipt of testable examples.

Filtering values out of two dimensional data in R, comparing themselves

I'm analyzing USA election data, candidates contribution, etc. So I got raw data from internet and trying to learn some R executing some exercises in it. This is a CSV file that I successfully loaded and analyzed with ?summary.
I also used ?tapply successfully, to separate candidates money contribution by state:
data_amt_st = tapply(data$contb_receipt_amt, data[c('cand_nm', 'contbr_st')], sum)
?str (for a small sample) tells me the format of this data:
> str(data_amt_st)
num [1:3, 1:21] NA NA 451 NA NA 201 NA NA 200 NA ...
- attr(*, "dimnames")=List of 2
..$ cand_nm : chr [1:3] "Bachmann, Michele" "Obama, Barack" "Romney, Mitt"
..$ contbr_st: chr [1:21] "33" "46" "48" "7" ...
Now I need to filter out values from data_amt_st. I need states that "Obama, Barack" had more contributions than other candidates, but don't know how to do. Something with ?subset?
Thank you very much.
EDIT 1: Attending what guys told me, about making a more concrete question: I need a list of the states where Barack Obama achieved a higher contribution level (more money) than other candidates.
EDIT 2: Trying to give you a reproducible example (is it correct?):
x = c("a", "b", "c")
y0 = c(3, 5, 1)
y1 = c(2, 1, 6)
y2 = c(4, 2, 3)
m = cbind(x, y0, y1, y2)
m
# x y0 y1 y2
# [1,] "a" "3" "2" "4"
# [2,] "b" "5" "1" "2"
# [3,] "c" "1" "6" "3"
Now, I need to know, for what y values, a is higher than b and c.
Maybe
## max by column (MARGIN=2)
max_amt <- apply(data_amt_st,MARGIN=2,max,na.rm=TRUE)
data_amt_st[,max_amt==data_amt_st["Obama",]]
?
(Not sure how this will work with NA values in the Obama row: using dput to give us a reproducible example ( http://tinyurl.com/reproducible-000 ) would be useful ...)
x <- letters[1:3]
y0 <- c(3, 5, 1)
y1 <- c(2, 1, 6)
y2 <- c(4, 2, 3)
m <- data.frame(y0, y1, y2)
rownames(m) <- x
maxvals <- apply(m,2,max,na.rm=TRUE)
which(m["a",]==maxvals) ## or
names(m)[m["a",]==maxvals]

Resources