Collapse Character Vector to Long Format in R - r

I want to convert my dataframe from wide to long but based on the character vector in one column (based on residents number from the following dput.)
From the following dput, the outcome should have a total of three rows showing all the 3 residents. Is there a way to do it? I tried using seperate rows but the output is not what I desire.
Tried using
Building_Details_Trial_50 %>% tidyr::separate_rows(residents)
dput
structure(list(time = "Mar 22", buildingId = "50", region = "Central",
geometry = structure(list(structure(list(structure(c(-447.361154068258,
-447.557850744738, -533.811390293442, -536.961556093902,
-443.736917153567, -447.361154068258, 5919.51770006977, 5906.87385860642,
5908.2156806004, 5958.8966109417, 5959.54382538916, 5919.51770006977
), dim = c(6L, 2L))), class = c("XY", "POLYGON", "sfg"))), class = c("sfc_POLYGON",
"sfc"), precision = 0, bbox = structure(c(xmin = -536.961556093902,
ymin = 5906.87385860642, xmax = -443.736917153567, ymax = 5959.54382538916
), class = "bbox"), crs = structure(list(input = NA_character_,
wkt = NA_character_), class = "crs"), n_empty = 0L),
count = 3L, geom_points = structure(list(structure(c(-490.403818453599,
5933.7360887923), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = -490.403818453599,
ymin = 5933.7360887923, xmax = -490.403818453599, ymax = 5933.7360887923
), class = "bbox"), crs = structure(list(input = NA_character_,
wkt = NA_character_), class = "crs"), n_empty = 0L),
long = -490.403818453599, lat = 5933.7360887923, residents = list(
c("556", "155", "143"))), row.names = 1L, sf_column = "geometry", agr = structure(c(time = NA_integer_,
buildingId = NA_integer_, region = NA_integer_, count = NA_integer_,
long = NA_integer_, lat = NA_integer_, residents = NA_integer_
), levels = c("constant", "aggregate", "identity"), class = "factor"), class = c("sf",
"tbl_df", "tbl", "data.frame"))
Ideal output
Residents
buildingId
Region
556
50
Central
155
50
Central
143
50
Central

Related

R sf calculate average value of each point

I have a forty year monthly timeseries dataset. I would like to create 3 new columns (AvgTMean, AvgTMin, AvgTMax) that would be the average of tmean, Tmin and TMax for each point in the data (each point will have its own unique average value) respectively. I will then map these average values from any of the 3 average value columns on an interactive.
The purpose is to create a map that shows the 40 year average temperature values.
How can I calculate the average of each point?
Sample data (sf):
structure(list(Info = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), tmean = c(22.2395992279053, 22.7657985687256,
24.4260005950928, 19.601001739502, 21, 24, 26, 21.45, 27.6), Variable = c("tmean", "tmean",
"tmean", "tmean", "tmax", "tmax", "tmax", "tmax", "tmax"),
year = c(2021L, 2021L, 1980L, 1980L, 2021L, 2021L,
2021L, 2021L, 2021L), month = c(11L, 12L, 0L, 1L, 6L, 7L,
8L, 9L, 10L), TMin = c(15, 15.23, 16.12, 13.45, 16.46, 12.11, 11.55, 9.78, 10.56), TMax = c(0,
39.69, 40.001, 43.2, 40.6976985931396, 41.7550983428955, 42.1988983154297,
41.6512985229492, 40.2621994018555), geometry = structure(list(
structure(c(-80.2083333327448, 26.2083333333333), class = c("XY",
"POINT", "sfg")), structure(c(-80.2083333327448, 26.2083333333333
), class = c("XY", "POINT", "sfg")), structure(c(-80.2083333327448,
26.2083333333333), class = c("XY", "POINT", "sfg")),
structure(c(-80.2083333327448, 26.2083333333333), class = c("XY",
"POINT", "sfg")), structure(c(-80.2083333327448, 26.0416666666667
), class = c("XY", "POINT", "sfg")), structure(c(-80.2083333327448,
26.0416666666667), class = c("XY", "POINT", "sfg")),
structure(c(-80.2083333327448, 26.0416666666667), class = c("XY",
"POINT", "sfg")), structure(c(-80.2083333327448, 26.0416666666667
), class = c("XY", "POINT", "sfg")), structure(c(-80.2083333327448,
26.0416666666667), class = c("XY", "POINT", "sfg"))), precision = 0, bbox = structure(c(xmin = -80.2083333327448,
ymin = 26.0416666666667, xmax = -80.2083333327448, ymax = 26.2083333333333
), class = "bbox"), crs = structure(list(input = "WGS 84",
wkt = "GEOGCRS[\"WGS 84\",\n DATUM[\"World Geodetic System 1984\",\n ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n LENGTHUNIT[\"metre\",1]]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n CS[ellipsoidal,2],\n AXIS[\"latitude\",north,\n ORDER[1],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n AXIS[\"longitude\",east,\n ORDER[2],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n ID[\"EPSG\",4326]]"), class = "crs"), n_empty = 0L, class = c("sfc_POINT",
"sfc"))), row.names = c(NA, 9L), sf_column = "geometry", agr = structure(c(Info = NA_integer_,
tmean = NA_integer_, CITYNAME = NA_integer_, Model = NA_integer_,
Variable = NA_integer_, Datatype = NA_integer_, Resolution = NA_integer_,
year = NA_integer_, month = NA_integer_, TMin = NA_integer_,
TMax = NA_integer_), class = "factor", .Label = c("constant",
"aggregate", "identity")), class = c("sf", "data.frame"))
Code:
library(tidyverse)
library(sf)
sf_avg = sf %>%
summarise(AvgTMean = mean(tmean),
AvgTMin = mean(TMin),
AvgTMax = mean(TMax)) # Returns the same average value for both the points
# Test static map
qtm(df_df, fill ="AvgTMean", legend = TRUE) +
tm_add_legend(labels = dff$AvgTMean)
I had trouble with your example so I make a small reproducible example. You will probably need to adapt it for your needs.
# loading lib
library(sf)
library(dplyr)
# data is in tidy format so you have a replicate of 3 points
pnt_attribute <- data.frame(year = rep(seq(from = 2022 - 40, to = 2021, by = 1),3)
, temp1 = c(rnorm(40, mean = 20, sd = 5)
, rnorm(40, mean = 20, sd = 10)
, rnorm(40, mean = 15, sd = 5))
)
# this create a table with 3 points replicate 40 times
pnts <- data.frame(
id = c(rep(1, 40),
rep(2, 40),
rep(3,40))
, x = c(rep(5, 40),
rep(1, 40),
rep(0, 40))
, y = c(rep(2, 40),
rep(3, 40),
rep(1, 40)))
# combine the two df then transorm into sf
rep_example <- st_as_sf(cbind(pnts, pnt_attribute), coords = c("x", "y"))
Then you create your summary:
rep_example_agg <- rep_example |>
group_by(id) |>
summarize(AvgTMean = mean(temp1, na.rm = TRUE),
AvgTMin = min(temp1, na.rm = TRUE),
AvgTMax = max(temp1, na.rm = TRUE))
# the result
# Simple feature collection with 3 features and 4 fields
# Geometry type: POINT
# Dimension: XY
# Bounding box: xmin: 0 ymin: 1 xmax: 5 ymax: 3
# CRS: NA
# # A tibble: 3 × 5
# id AvgTMean AvgTMin AvgTMax geometry
# <dbl> <dbl> <dbl> <dbl> <POINT>
# 1 1 19.4 5.92 29.2 (5 2)
# 2 2 20.0 -1.34 40.8 (1 3)
# 3 3 14.6 3.37 26.1 (0 1)
I think you either needed the group_by of your data are in an other structure.
Good resource: https://geocompr.robinlovelace.net/attr.html#vector-attribute-aggregation

R create column in dataframe value name of dataframe

I have a list of dataframes (these are spatial dataframes) named for example "map_g1_r1_airport", "map_g1_r1_hotel", "map_g1_r2_bank", "map_g1_r2_market"
These are elements that were digitized from several maps. The maps were originally called "map_g1_r1", "map_g1_r2".
I am trying to add a column to each dataframe with the name of the original map using a loop.
Here is what I am trying to do:
map_g1_r1_airport$mapid<-map_g1_r1
With the loop (Unfortunately this does not do what I intend to do. Instead it simply creates a "content" field in the Values board.):
list_df<-c("map_g1_r1_airport", "map_g1_r1_hotel", "map_g1_r2_bank", "map_g1_r2_market")
for (df in 1:length(list_df)){
paste(list_df[df],"$mapid<-",
print(content<-gsub("(.*)_.*","\\1",
c(paste(list_df[df]))),sep=""),
quote=FALSE)}
Any help is most welcome!
Here is one example of the data before change:
structure(list(id = c(1, 2, 3), Name = structure(c(1L, 3L, 4L
), .Label = c("A", "B", "C", "D", "E"
), class = "factor"), Year = structure(c(NA_integer_, NA_integer_,
NA_integer_), .Label = character(0), class = "factor"), geometry = structure(list(
structure(c(41.4086152370865, 2.44718243982123), class = c("XY",
"POINT", "sfg")), structure(c(45.3852740543083, -4.31103098867136
), class = c("XY", "POINT", "sfg")), structure(c(38.4200314592624,
-6.96113884231683), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 41.4086152370865,
ymin = 2.31103098867136, xmax = 45.4200314592624, ymax = -4.44718243982123
), class = "bbox"), crs = structure(list(epsg = NA_integer_,
proj4string = NA_character_), class = "crs"), n_empty = 0L)), sf_column = "geometry", agr = structure(c(id = NA_integer_,
Name = NA_integer_, Year = NA_integer_), .Label = c("constant",
"aggregate", "identity"), class = "factor"), row.names = c(NA,
3L), class = c("sf", "data.frame"))
This is what I would like to get (with the mapid map_g1_r1):
structure(list(id = c(1, 2, 3), Name = structure(c(1L, 3L, 4L
), .Label = c("A", "B", "C", "D", "E"
), class = "factor"), Year = structure(c(NA_integer_, NA_integer_,
NA_integer_), .Label = character(0), class = "factor"), geometry = structure(list(
structure(c(41.4086152370865, 2.44718243982123), class = c("XY",
"POINT", "sfg")), structure(c(45.3852740543083, -4.31103098867136
), class = c("XY", "POINT", "sfg")), structure(c(38.4200314592624,
-6.96113884231683), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 41.4086152370865,
ymin = 2.31103098867136, xmax = 45.4200314592624, ymax = -4.44718243982123
), class = "bbox"), crs = structure(list(epsg = NA_integer_,
proj4string = NA_character_), class = "crs"), n_empty = 0L),
mapid = c("map_g1_r1", "map_g1_r1", "map_g1_r1")), sf_column = "geometry", agr = structure(c(id = NA_integer_,
Name = NA_integer_, Year = NA_integer_, mapid = NA_integer_), .Label = c("constant",
"aggregate", "identity"), class = "factor"), row.names = c(NA,
3L), class = c("sf", "data.frame"))
You can achieve that even without a loop.
I would first start by creating a list with the names you want to see in each spatial data.frame. I assume they are derived from the names of the list.
mapid = names(list_df)
following that you can employ mapply to use a function that takes
the first element of a list (or vector) and the first element of another list/vector. Them it moves on and apply the same function to the second elements of each vector. It is essentially a multiple input version of lapply.
The function we will give to mapply is cbind which creates takes takes two data.frames and joins them by column. In this case one data.frame will be your spatial object and the other will be a vector with one single element: the current map name. cbind will naturally convert this name to a 1-column data.frame and repeat the name to match the number of rows in the spatial object.
final = mapply(cbind, list_df, mapid)
I haven't tested it, but it should work.
You can get all the individual dataframes in a list using mget and add a new column with their name using mutate.
Using tidyverse functions you can do this as :
library(dplyr)
library(purrr)
list_df<-c("map_g1_r1_airport", "map_g1_r1_hotel", "map_g1_r2_bank", "map_g1_r2_market")
tmp <- mget(list_df)
result <- imap(tmp, ~.x %>% mutate(map_id = .y))
result will have all changed dataframes in a list, if you want these changes to reflect in the original object you can use list2env.
list2env(result, .GlobalEnv)

merge datasets with loop R

I have a list of 72 datasets loaded in the Global Environment. Each dataset contains a column called uniqueID filled with unique identifiers.
I want to merge each dataset with another dataset based on these unique identifiers (i.e. uniqueID).
I can do the following on a dataset by dataset basis:
dataset1<-merge(dataset1,tomerge,by="uniqueID",all.x=TRUE)
However as I have many datasets I would like to do this using a loop.
Here is what I have tried:
dflist<-Filter(is.data.frame, as.list(.GlobalEnv))
dflist<-function(df,x){df<-merge(df,tomerge,all.x=TRUE)}
Here is an example of dataset1 and tomerge that I am trying to merge:
dput(dataset1[1:2, ])
structure(list(id = c(1, 2), geometry = structure(list(structure(c(12.7709873378252,
-2.34780379794057), class = c("XY", "POINT", "sfg")), structure(c(13.7404727250738,
-3.08397066598979), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 12.7709873378252,
ymin = -2.08397066598979, xmax = 13.7404727250738, ymax = -1.34780379794057
), class = "bbox"), crs = structure(list(epsg = NA_integer_,
proj4string = NA_character_), class = "crs"), n_empty = 0L),
uniqueID = c("id_1_v12_3", "id_2_v13_3")), sf_column = "geometry", agr = structure(c(id = NA_integer_,
shapefile = NA_integer_), .Label = c("constant", "aggregate",
"identity"), class = "factor"), row.names = 1:2, class = c("sf",
"data.frame"))
dput(tomerge[1:2, 2:3 ])
structure(list(uniqueID = c("id_1_v12_3", "id_2_v13_2"), todigit = c("y",
"y")), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
)) ```
We can use lapply to loop over the list of data.frame and merge with the 'tomerge' data
dfmergelist <- lapply(dflist, function(x)
merge(x, tomerge, by = "uniqueID", all.x = TRUE))
where
dflist <- Filter(is.data.frame, mget(ls()))

r: How can I use data.table package with sf geometry column?

I want to use data.table to join two dataframes that each have a sf geometry column, and I want to do a cross join in data.table. Reproducible data below and my code is as follows:
library(data.table)
#convert data from data frames to data table
#add the dummy key 'k' and remove it at the end
setkey(as.data.table(data1)[,c(k=1,.SD)],k)[as.data.table(data2).
[,c(k=1,.SD)],allow.cartesian=TRUE][,k:=NULL]
This gives the following result
Using data.table changes the sf geometry column. How can I use data.table while preserving the original format of the geometry column?
For instance, the orginial dataframe geometry column for data1 looks like this:
Reproducible data:
data1:
structure(list(shape = c("polygon 1", "polygon 2"), geometry = structure(list(
structure(list(structure(c(-4e-04, -4e-04, -3e-05, -3e-05,
-4e-04, 51.199, 51.1975, 51.1975, 51.199, 51.199), .Dim = c(5L,
2L))), class = c("XY", "POLYGON", "sfg"), precision = 0, bbox = structure(c(xmin = -4e-04,
ymin = 51.1975, xmax = -3e-05, ymax = 51.199), class = "bbox"), crs = structure(list(
input = NA_character_, wkt = NA_character_), class = "crs"), n_empty = 0L),
structure(list(structure(c(5e-05, 5e-05, 0.003, 0.003, 5e-05,
51.1972, 51.1967, 51.1967, 51.1972, 51.1972), .Dim = c(5L,
2L))), class = c("XY", "POLYGON", "sfg"), precision = 0, bbox = structure(c(xmin = 5e-05,
ymin = 51.1967, xmax = 0.003, ymax = 51.1972), class = "bbox"), crs = structure(list(
input = NA_character_, wkt = NA_character_), class = "crs"), n_empty = 0L)), class =
c("sfc_POLYGON",
"sfc"), precision = 0, bbox = structure(c(xmin = -4e-04, ymin = 51.1967,
xmax = 0.003, ymax = 51.199), class = "bbox"), crs = structure(list(
input = NA_character_, wkt = NA_character_), class = "crs"), n_empty = 0L)), row.names = c(NA,
-2L), sf_column = "geometry", agr = structure(c(shape = NA_integer_), .Label = c("constant",
"aggregate", "identity"), class = "factor"), class = c("sf",
"tbl_df", "tbl", "data.frame"))
data2:
structure(list(shape = c("polygon 1", "polygon 2"), geometry = structure(list(
structure(list(structure(c(0.0095, 0.0085, 0.0075, 0.0075,
0.01055, 0.01055, 0.012, 0.0115, 0.0095, 51.21, 51.199, 51.199,
51.197, 51.196, 51.198, 51.198, 51.21, 51.21), .Dim = c(9L,
2L))), class = c("XY", "POLYGON", "sfg"), precision = 0, bbox = structure(c(xmin = 0.0075,
ymin = 51.196, xmax = 0.012, ymax = 51.21), class = "bbox"), crs = structure(list(
input = NA_character_, wkt = NA_character_), class = "crs"), n_empty = 0L),
structure(list(structure(c(0.0205, 0.019, 0.019, 0.02, 0.021,
0.0205, 51.196, 51.1955, 51.194, 51.193, 51.194, 51.196), .Dim = c(6L,
2L))), class = c("XY", "POLYGON", "sfg"), precision = 0, bbox = structure(c(xmin = 0.019,
ymin = 51.193, xmax = 0.021, ymax = 51.196), class = "bbox"), crs = structure(list(
input = NA_character_, wkt = NA_character_), class = "crs"), n_empty = 0L)), class =
c("sfc_POLYGON",
"sfc"), precision = 0, bbox = structure(c(xmin = 0.0075, ymin = 51.193,
xmax = 0.021, ymax = 51.21), class = "bbox"), crs = structure(list(
input = NA_character_, wkt = NA_character_), class = "crs"), n_empty = 0L)), row.names = c(NA,
-2L), sf_column = "geometry", agr = structure(c(shape = NA_integer_), .Label = c("constant",
"aggregate", "identity"), class = "factor"), class = c("sf",
"tbl_df", "tbl", "data.frame"))

gganimate: identical(classes, col_classes(to)) is not TRUE

I have a fairly simple problem where I want to create a gif which loops through departure_hour and colors the lines based on link volumes. One caveat is the number of rows between states (i.e. departure_hour) may be different.
Here is the code I am trying:
vol <- ggplot() +
geom_sf(data = test, aes(color=link_volume)) +
scale_color_distiller(palette = "OrRd", direction = 1) +
ggtitle("{frame_time}") +
transition_time(departure_hour) +
ease_aes("linear") +
enter_fade() +
exit_fade()
animate(vol, fps = 10, width = 750, height = 450)
However, when I do this I am getting the error:
Error in tween_state(as.data.frame(full_set$from), as.data.frame(full_set$to),:
identical(classes, col_classes(to)) is not TRUE
First, I do not understand if the error is referring to column classes or color classes? If it is color classes am I correct in assuming that the color scales between each plot may be different and that is the reason for this error?
Second, how do I fix this error? There seems to be just one more question on this issue and it has no solution.
Sample data:
> dput(head(test,5))
structure(list(linkid = c(12698L, 26221L, 36429L, 36430L, 47315L
), departure_hour = c(14, 19, 11, 0, 18), link_volume = c(500L,
1550L, 350L, 100L, 550L), geometry = structure(list(structure(c(1065088.71736072,
1065084.18813218, 1253892.13487564, 1253935.59094818), .Dim = c(2L,
2L), class = c("XY", "LINESTRING", "sfg")), structure(c(1060907.62521458,
1060984.50834787, 1237578.71728528, 1237818.59111698), .Dim = c(2L,
2L), class = c("XY", "LINESTRING", "sfg")), structure(c(1063031.34624456,
1062955.36965935, 1241210.04281066, 1241498.76584417), .Dim = c(2L,
2L), class = c("XY", "LINESTRING", "sfg")), structure(c(1063031.34624456,
1063034.73081084, 1241210.04281066, 1241198.98905491), .Dim = c(2L,
2L), class = c("XY", "LINESTRING", "sfg")), structure(c(1058112.52771678,
1058131.02887377, 1236388.96345761, 1236342.13157851), .Dim = c(2L,
2L), class = c("XY", "LINESTRING", "sfg"))), class = c("sfc_LINESTRING",
"sfc"), precision = 0, bbox = structure(c(xmin = 1058112.52771678,
ymin = 1236342.13157851, xmax = 1065088.71736072, ymax = 1253935.59094818
), class = "bbox"), crs = structure(list(epsg = 5070L, proj4string = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs"), class = "crs"), n_empty = 0L)), sf_column = "geometry", agr = structure(c(linkid = NA_integer_,
departure_hour = NA_integer_, link_volume = NA_integer_), .Label = c("constant",
"aggregate", "identity"), class = "factor"), row.names = c(NA,
5L), class = c("sf", "data.table", "data.frame"))

Resources