Colours plot vector based - r

I have a vector z like this
z <- as.numeric(as.factor(c("A","B","C","D","E","F","G","H")))
and for different days a data frame df like this
df[[1]]
ID LON LAT
A 1 1
B 10 14
C 12 13
df[[2]]
ID LON LAT
A 2 3
B 11 18
D 12 13
df[[3]]
ID LON LAT
A 13 1
E 10 14
D 12 13
where the IDs are the ones in z but can be different for every day.
I have assigned a colour to each element of the vector
range01 <- function(x)(x-min(x))/diff(range(x))
rainbow(7)
cRamp <- function(x){
cols <- colorRamp(rainbow(7))(range01(x))
apply(cols, 1, function(xt)rgb(xt[1], xt[2], xt[3], maxColorValue=255))
}
and what I would like to do is to plot for every day my df with the colours cRamp(z) but I am no able to link the ID value in every df with the one in z
Here is my code
for (i in 1:length(myfiles)){
plot(df[[i]]$LON,df[[i]]$LAT, col = cRamp(z))
map(add=T,col="saddlebrown",interior = FALSE)
legend("topleft", legend=c(unique(df[[i]]$ID)), col=cRamp(z))
}
but the colour for e.g. ID A are not the same for every day!
Many thanks

Maybe something like this:
z <- LETTERS[1:7]
df <- list(
data.frame(ID=LETTERS[1:3],
LON=c(1,10,12),
LAT=c(1,14,13)),
data.frame(ID=LETTERS[3:5],
LON=c(2,11,18),
LAT=c(2,9,20))
)
layout(t(1:2))
for (i in 1:2){
plot(df[[i]]$LON, df[[i]]$LAT,
col = rainbow(length(z))[match(df[[i]]$ID,z)],
pch=16)
legend("topleft",
legend=z,
col=rainbow(length(z)),
pch=16)
}

Related

How can I join elements (columns from dataframes) from two lists by row names using R?

I need help please. I have two lists: the first contains ndvi time series for distinct points, the second contains precipitation time series for the same plots (plots are in the same order in the two lists).
I need to combine the two lists. I want to add the column called precipitation from one list to the corresponding ndvi column from the other list respecting the dates (represented here by letters in the row names) to a posterior analises of correlation between columns. However, both time series of ndvi and precipitation have distinct lenghts and distinct dates.
I created the two lists to be used as example of my dataset. However, in my actual dataset the row names are monthly dates in the format "%Y-%m-%d".
library(tidyverse)
set.seed(100)
# First variable is ndvi.mon1 (monthly ndvi)
ndvi.mon1 <- vector("list", length = 3)
for (i in seq_along(ndvi.mon1)) {
aux <- data.frame(ndvi = sample(randu$x,
sample(c(seq(1,20, 1)),1),
replace = T))
ndvi.mon1[i] <- aux
ndvi.mon1 <- ndvi.mon1 %>% map(data.frame)
rownames(ndvi.mon1[[i]]) <- sample(letters, size=seq(letters[1:as.numeric(aux %>% map(length))]) %>% length)
}
# Second variable is precipitation
precipitation <- vector("list", length = 3)
for (i in seq_along(ndvi.mon1)){
prec_aux <- data.frame(precipitation = sample(randu$x*500,
26,
replace = T))
row.names(prec_aux) <- seq(letters[1:as.numeric(prec_aux %>% map(length))])
precipitation[i] <- prec_aux
precipitation <- precipitation %>% map(data.frame)
rownames(precipitation[[i]]) <- letters[1:(as.numeric(precipitation[i] %>% map(dim) %>% map(first)))]
}
Can someone help me please?
Thank you!!!
Marcio.
Maybe like this?
library(dplyr)
library(purrr)
precipitation2 <- precipitation %>%
map(rownames_to_column) %>%
map(rename, precipitation = 2)
ndvi.mon2 <- ndvi.mon1 %>%
map(rownames_to_column) %>%
map(rename, ndvi = 2)
purrr::map2(ndvi.mon2, precipitation2, left_join, by = "rowname")
[[1]]
rowname ndvi precipitation
1 k 0.354886 209.7415
2 x 0.596309 103.3700
3 r 0.978769 403.8775
4 l 0.322291 354.2630
5 c 0.831722 348.9390
6 s 0.973205 273.6030
7 h 0.949827 218.6430
8 y 0.443353 61.9310
9 b 0.826368 8.3290
10 d 0.337308 291.2110
The below will return a list of data.frames, that have been merged, using rownames:
lapply(seq_along(ndvi.mon1), function(i) {
merge(
x = data.frame(date = rownames(ndvi.mon1[[i]]), ndvi = ndvi.mon1[[i]][,1]),
y = data.frame(date = rownames(precipitation[[i]]), precip = precipitation[[i]][,1]),
by="date"
)
})
Output:
[[1]]
date ndvi precip
1 b 0.826368 8.3290
2 c 0.831722 348.9390
3 d 0.337308 291.2110
4 h 0.949827 218.6430
5 k 0.354886 209.7415
6 l 0.322291 354.2630
7 r 0.978769 403.8775
8 s 0.973205 273.6030
9 x 0.596309 103.3700
10 y 0.443353 61.9310
[[2]]
date ndvi precip
1 g 0.415824 283.9335
2 k 0.573737 311.8785
3 p 0.582422 354.2630
4 y 0.952495 495.4340
[[3]]
date ndvi precip
1 b 0.656463 332.5700
2 c 0.347482 94.7870
3 d 0.215425 431.3770
4 e 0.063100 499.2245
5 f 0.419460 304.5190
6 g 0.712057 226.7125
7 h 0.666700 284.9645
8 i 0.778547 182.0295
9 k 0.902520 82.5515
10 l 0.593219 430.6630
11 m 0.788715 443.5345
12 n 0.347482 132.3950
13 q 0.719538 79.1835
14 r 0.911370 100.7025
15 s 0.258743 309.3575
16 t 0.940644 142.3725
17 u 0.626980 335.4360
18 v 0.167640 390.4915
19 w 0.826368 63.3760
20 x 0.937211 439.8685

Calculating the distance between coordinates R

We have a set of 50 csv files from participants, currently being read into a list as
file_paths <- fs::dir_ls("data")
file_paths
file_contents <- list ()
for (i in seq_along (file_paths)) {
file_contents[[i]] <- read_csv(
file = file_paths[[i]]
)
}
dt <- set_names(file_contents, file_paths)
My data looks like this:
level time X Y Type
1 1 355. -10.6 22.36 P
1 1 371. -33 24.85 O
1 2 389. -10.58 17.23 P
1 2 402. -16.7 30.46 O
1 3 419. -29.41 17.32 P
1 4 429. -10.28 26.36 O
2 5 438. -26.86 32.98 P
2 6 451. -21 17.06 O
2 7 463. -21 32.98 P
2 8 474. -19.9 17.06 O
We have 70 sets of coordinates per csv.
Time does not matter for this, but I would like to split up by the level column at some stage.
For every 'P' I want to compare it to 'O' and get the distance between coordinates.The first P will always match with the first O and so on.
For now, I have them split into two different lists, though this may be the complete wrong way to do it! I'm having trouble figuring out how to take all of these csv files and get the distances for all of them, the list seems to cause issues with most functions (like dist)
Here is how I've pulled the right information so far
for (i in seq_along (dt)) {
pLoc[[i]] <- dplyr::filter(dt[[i]], grepl("P", type))
oLoc[[i]] <- dplyr::filter(dt[[i]], grepl("o", type))
pX[[i]] <- pLoc[[i]] %>% pull(as.numeric(headX))
pY[[i]] <- pLoc[[i]] %>% pull(as.numeric(headY))
pCoordinates[[i]] <- cbind(pX[[i]], pY[[i]])
}
[EDITED] Following comments, here is how you can do it with the raster library:
library(raster)
library(dplyr)
df = data.frame(
x = c(10, 20 ,15,9),
y = c(45,34,54,24),
type = c("P","O","P","O")
)
df = cbind(df[df$type=="P",] %>%
dplyr::select(-type) %>%
dplyr::rename(xP = x,
yP = y),
df[df$type=="O",] %>%
dplyr::select(-type) %>%
dplyr::rename(xO = x,
yO = y))
The following could probably be achieved more efficiently with some form of the apply() function:
v = c()
for(i in 1:nrow(df)){
dist = raster::pointDistance(lonlat = F,
p1 = c(df$xP[i],df$yP[i]),
p2 = c(df$xO[i],df$yO[i]))
v = c(v,dist)
}
df$dist = v
print(df)
xP yP xO yO dist
1 10 45 20 34 14.86607
3 15 54 9 24 30.59412

Selecting 10 names based on 10 highest numbers of other column

I want to select the top 10 voted restaurants, and plot them together.
So i want to create a plot that shows the restaurant names and their votes.
I used:
topTenVotes <- top_n(dataSet, 10, Votes)
and it showed me data of the columns in dataset based on the top 10 highest votes, however i want just the number of votes and restaurant names.
My Question is how to select only the top 10 highest votes and their restaurant names, and plotting them together?
expected output:
Restaurant Names Votes
A 300
B 250
C 230
D 220
E 210
F 205
G 200
H 194
I 160
J 120
K 34
And then a bar plot that shows these restaurant names and their votes
Another simple approach with base functions creating another variable:
df <- data.frame(Names = LETTERS, Votes = sample(40:400, length(LETTERS)))
x <- df$Votes
names(x) <- df$Names # x <- setNames(df$Votes, df$Names) is another approach
barplot(sort(x, decreasing = TRUE)[1:10], xlab = "Restaurant Name", ylab = "Votes")
Or a one-line solution with base functions:
barplot(sort(xtabs(Votes ~ Names, df), decreasing = TRUE)[1:10], xlab = "Restaurant Names")
I'm not seeing a data set to use, so here's a minimal example to show how it might work:
library(tidyverse)
df <-
tibble(
restaurant = c("res1", "res2", "res3", "res4"),
votes = c(2, 5, 8, 6)
)
df %>%
arrange(-votes) %>%
head(3) %>%
ggplot(aes(x = reorder(restaurant, votes), y = votes)) +
geom_col() +
coord_flip()
The top_n command also works in this case but is designed for grouped data.
Its more efficient, though less readable, to use base functions:
#toy data
d <- data.frame(list(Names = sample(LETTERS, size = 15), value = rnorm(25, 10, n = 15)))
head(d)
Names value
1 D 25.592749
2 B 28.362303
3 H 1.576343
4 L 28.718517
5 S 27.648078
6 Y 29.364797
#reorder by, and retain, the top 10
newdata <- data.frame()
for (i in 1:10) {
newdata <- rbind(newdata,d[which(d$value == sort(d$value, decreasing = T)[1:10][i]),])
}
newdata
Names value
8 W 45.11330
13 K 36.50623
14 P 31.33122
15 T 30.28397
6 Y 29.36480
7 Q 29.29337
4 L 28.71852
10 Z 28.62501
2 B 28.36230
5 S 27.64808

Create data frame by iteratively adding rows

I am trying to create a data frame (BOS.df) in order to explore the structure of a future analysis I will perform prior to receiving the actual data. In this scenario, lets say that there are 4 restaurants looking to run ad campaigns (the "Restaurant" variable). The total number of days that the campaign will last is cmp.lngth. I want random numbers for how much they are billing for the ads (ra.num). The ad campaigns start on StartDate. ultimately, I want to create a data frame the cycles through each restaurant, and adds a random billing number for each day of the ad campaign by adding rows.
#Create Data Placeholders
set.seed(123)
Restaurant <- c('B1', 'B2', 'B3', 'B4')
cmp.lngth <- 42
ra.num <- rnorm(cmp.lngth, mean = 100, sd = 10)
StartDate <- as.Date("2017-07-14")
BOS.df <- data.frame(matrix(NA, nrow =0, ncol = 3))
colnames(BOS.df) <- c("Restaurant", "Billings", "Date")
for(i in 1:length(Restaurant)){
for(z in 1:cmp.lngth){
BOS.row <- c(as.character(Restaurant[i]),ra.num[z],StartDate +
cmp.lngth[z]-1)
BOS.df <- rbind(BOS.df, BOS.row)
}
}
My code is not functioning correctly right now. The column names are incorrect, and the data is not being placed correctly if at all. The output comes through as follows:
X.B1. X.94.3952435344779. X.17402.
1 B1 94.3952435344779 17402
2 B1 <NA> <NA>
3 B1 <NA> <NA>
4 B1 <NA> <NA>
5 B1 <NA> <NA>
6 B1 <NA> <NA>
How can I obtain the correct output? Is there a more efficient way than using a for loop?
Using expand.grid:
cmp.lngth <- 2
StartDate <- as.Date("2017-07-14")
set.seed(1)
df1 <- data.frame(expand.grid(Restaurant, seq(cmp.lngth) + StartDate))
colnames(df1) <- c("Restaurant", "Date")
df1$Billings <- rnorm(nrow(df1), mean = 100, sd = 10)
df1 <- df1[ order(df1$Restaurant, df1$Date), ]
df1
# Restaurant Date Billings
# 1 B1 2017-07-15 93.73546
# 5 B1 2017-07-16 103.29508
# 2 B2 2017-07-15 101.83643
# 6 B2 2017-07-16 91.79532
# 3 B3 2017-07-15 91.64371
# 7 B3 2017-07-16 104.87429
# 4 B4 2017-07-15 115.95281
# 8 B4 2017-07-16 107.38325
You can use rbind, but this would be another way to do it.
Also, the length of the data frame should be cmp.lngth*length(Restaurant), not cmp.lngth.
#Create Data Placeholders
set.seed(123)
Restaurant <- c('B1', 'B2', 'B3', 'B4')
cmp.lngth <- 42
ra.num <- rnorm(cmp.lngth, mean = 100, sd = 10)
StartDate <- as.Date("2017-07-14")
BOS.df <- data.frame(matrix(NA, nrow = cmp.lngth*length(Restaurant), ncol = 3))
colnames(BOS.df) <- c("Restaurant", "Billings", "Date")
count <- 1
for(name in Restaurant){
for(z in 1:cmp.lngth){
BOS.row <- c(name, ra.num[z], as.character(StartDate + z - 1))
BOS.df[count,] <- BOS.row
count <- count + 1
}
}
I would also recommend you to look at the package called tidyverse and use add_row with tibble instead of data frame. Here is a sample code:
library(tidyverse)
BOS.tb <- tibble(Restaurant = character(),
Billings = numeric(),
Date = character())
for(name in Restaurant){
for(z in 1:cmp.lngth){
BOS.row <- c(name, ra.num[z], as.character(StartDate + z - 1))
BOS.tb <- add_row(BOS.tb,
Restaurant = name,
Billings = ra.num[z],
Date = as.character(StartDate + z - 1))
}
}

Identify nearest neighbor in grid in R (spatial)

I would like to create a square grid and identify the grid cells that border a set of other grid cell for which a binary variable takes a 1. In the following example, I would like to generate a vector of cell ids that border id g13 and g24:
require(sp)
grid <- GridTopology(c(0,0), c(1,1), c(5,5))
polys <- as(grid, "SpatialPolygons")
centroids <- coordinates(polys)
id <- names(polys)
tr <- ifelse(id == "g13" | id == "g24", 1, 0)
ex <- SpatialPolygonsDataFrame(polys, data = data.frame(id = id, tr = tr, row.names = row.names(polys)))
plot(ex)
text(coordinates(polys), labels = row.names(polys))
Such that it outputs a vector for all matching g13 as (g7, g8, g9, g12, g14, g17, g18, g19) and one matching g24 as (g18, g19, g20, g23, g24, g25). Any and all thoughts greatly appreciated.
rgeos::gTouches is perfect for this:
library(rgeos)
adj <- gTouches(polys, polys[which(ex$tr==1)], byid=TRUE)
apply(adj, 1, which)
# $g13
# g7 g8 g9 g12 g14 g17 g18 g19
# 7 8 9 12 14 17 18 19
#
# $g24
# g18 g19 g20 g23 g25
# 18 19 20 23 25
And, because everyone loves pictures:
plot(ex, col=ifelse(seq_along(ex) %in% c(unlist(adj), which(ex$tr==1)), 'gray', NA))
text(coordinates(polys), labels=row.names(polys))

Resources