random generator for multiple character vectors - r

I'm new to R and want to write a code that generates random workouts. I have 4 character vectors that look like this
Compound_movements <- c('Hip thrust', 'Squat', 'Deadlift')
Abduction <- c('cable aduction', 'lying plated aduction')
Upper <- c('good mornings', 'kneeling squat')
Maxiums <- c('smith machine kick backs', 'cable kickbacks', 'single leg hipthrusts' )
G_H_tie_in <- c('stomp downs' )
I want to code that will pick out and then print 1 or 2 exercises from each vector. What are the best functions for this?

sample is what I think you need.
set.seed(42) # R-4.0.2
sample(Compound_movements, size = 1)
# [1] "Hip thrust"
sample(Compound_movements, size = 1)
# [1] "Hip thrust"
sample(Compound_movements, size = 1)
# [1] "Hip thrust"
sample(Compound_movements, size = 1)
# [1] "Hip thrust"
sample(Compound_movements, size = 1)
# [1] "Squat"
(Yes, apparently you'll do hip thrusts four times in a row ... random is random.)
If those are stored in a list of exercises, you can pick one from each with:
lst_of_exercises <- list(
Compound_movements = c('Hip thrust', 'Squat', 'Deadlift'),
Abduction = c('cable aduction', 'lying plated aduction'),
Upper = c('good mornings', 'kneeling squat'),
Maxiums = c('smith machine kick backs', 'cable kickbacks', 'single leg hipthrusts' ),
G_H_tie_in = c('stomp downs' )
)
sapply(lst_of_exercises, sample, size = 1)
# Compound_movements Abduction Upper Maxiums G_H_tie_in
# "Squat" "lying plated aduction" "good mornings" "single leg hipthrusts" "stomp downs"
And if you need to do other than the same number for all (e.g., 2 of one group, 1 of the others), then perhaps
Map(sample, lst_of_exercises, size = c(1,1,1,2,1))
# $Compound_movements
# [1] "Deadlift"
# $Abduction
# [1] "lying plated aduction"
# $Upper
# [1] "good mornings"
# $Maxiums
# [1] "smith machine kick backs" "cable kickbacks"
# $G_H_tie_in
# [1] "stomp downs"

Related

Sequence of numbers by hyphen without hyphenating single occurrences

I want to generate readable number sequences (e.g. 1, 2, 3, 4 = 1-4), but for a set of data where each number in the sequence must have four digits (e.g. 99 = 0099 or 1 = 0001 or 1022 = 1022) AND where there are different letters in front of each number.
I was looking at the answer to this question, which managed to do almost exactly as I want with two caveats:
If there is a stand-alone number that does not appear in a sequence, it will appear twice with a hyphen in between
If there are several stand-alone numbers that do no appear in a sequence, they won't be included in the result
### Create Data Set ====
## Create the data for different tags. I'm only using two unique levels here, but in my dataset I've got
## 400+ unique levels.
FM <- paste0('FM', c('0001', '0016', '0017', '0018', '0019', '0021', '0024', '0026', '0028'))
SC <- paste0('SC', c('0002', '0003', '0004', '0010', '0012', '0014', '0033', '0036', '0039'))
## Combine data
my.seq1 <- c(FM, SC)
## Sort data by number in sequence
my.seq1 <- my.seq1[order(substr(my.seq1, 3, 7))]
### Attempt Number Sequencing ====
## Get the letters
sp.tags <- substr(my.seq1, 1, 2)
## Get the readable number sequence
lapply(split(my.seq1, sp.tags), ## Split data by the tag ID
function(x){
## Get the run lengths as per [previous answer][1]
rl <- rle(c(1, pmin(diff(as.numeric(substr(x, 3, 7))), 2)))
## Generate number sequence by separator as per [previous answer][1]
seq2 <- paste0(x[c(1, cumsum(rl$lengths))], c("-", ",")[rl$values], collapse="")
return(substr(seq2, 1, nchar(seq2)-1))
})
## Combine lists and sort elements
my.seq2 <- unlist(strsplit(do.call(c, my.seq2), ","))
my.seq2 <- my.seq2[order(substr(my.seq2, 3, 7))]
names(my.seq2) <- NULL
my.seq2
[1] "FM0001-FM0001" "SC0002-SC0004" "FM0016-FM0019" "FM0028" "SC0039"
my.seq1
[1] "FM0001" "SC0002" "SC0003" "SC0004" "SC0010" "SC0012" "SC0014" "FM0016" "FM0017" "FM0018" "FM0019" "FM0021"
[13] "FM0024" "FM0026" "FM0028" "SC0033" "SC0036" "SC0039"
The major problems with this are:
Some values are completely missing from the data set (e.g. FM0021, FM0024, FM0026)
The first number in the sequence (FM0001) appears with a hyphen in between
I feel like I'm getting warmer by using A5C1D2H2I1M1N2O1R2T1's answer to utilize seqToHumanReadable because it's quite elegant AND solves both problems. Two more problems are that I'm not able to tag the ID before each number and can't force the number of digits to four (e.g. 0004 becomes 4).
library(R.utils)
lapply(split(my.seq1, sp.tags), function(x){
return(unlist(strsplit(seqToHumanReadable(substr(x, 3, 7)), ',')))
})
$FM
[1] "1" " 16-19" " 21" " 24" " 26" " 28"
$SC
[1] "2-4" " 10" " 12" " 14" " 33" " 36" " 39"
Ideally the result would be:
"FM0001, SC002-SC004, SC0012, SC0014, FM0017-FM0019, FM0021, FM0024, FM0026, FM0028, SC0033, SC0036, SC0039"
Any ideas? It's one of those things that's really simple to do by hand but would take blinking ages, and you'd think a function would exist for it but I haven't found it yet or it doesn't exist :(
This should do?
# get the prefix/tag and number
tag <- gsub("(^[A-z]+)(.+)", "\\1", my.seq1)
num <- gsub("([A-z]+)(\\d+$)", "\\2", my.seq1)
# get a sequence id
n <- length(tag)
do_match <- c(FALSE, diff(as.numeric(num)) == 1 & tag[-1] == tag[-n])
seq_id <- cumsum(!do_match) # a sequence id
# tapply to combine the result
res <- setNames(tapply(my.seq1, seq_id, function(x)
if(length(x) < 2)
return(x)
else
paste(x[1], x[length(x)], sep = "-")), NULL)
# show the result
res
#R> [1] "FM0001" "SC0002-SC0004" "SC0010" "SC0012" "SC0014" "FM0016-FM0019" "FM0021"
#R> [8] "FM0024" "FM0026" "FM0028" "SC0033" "SC0036" "SC0039"
# compare with
my.seq1
#R> [1] "FM0001" "SC0002" "SC0003" "SC0004" "SC0010" "SC0012" "SC0014" "FM0016" "FM0017" "FM0018" "FM0019" "FM0021" "FM0024"
#R> [14] "FM0026" "FM0028" "SC0033" "SC0036" "SC0039"
Data
FM <- paste0('FM', c('0001', '0016', '0017', '0018', '0019', '0021', '0024', '0026', '0028'))
SC <- paste0('SC', c('0002', '0003', '0004', '0010', '0012', '0014', '0033', '0036', '0039'))
my.seq1 <- c(FM, SC)
my.seq1 <- my.seq1[order(substr(my.seq1, 3, 7))]

Still some randomness with sp::spsample(..., ..., type='regular')

I am building a sp::SpatialLines using spsample. In the doc, it is written for spsample(x, n, type, ...):
type: character; "random" for completely spatial random; "regular" for regular (systematically aligned) sampling; [...]
Yet, I just realized that successively created lines with spsample and type='regular' between the very same two points were not identical:
library(sp)
set.seed(12)
for (i in 1:10) {
p1 = c(400000, 401000)
p2 = c(5600000, 5601000)
l1 = as.data.frame(spsample(SpatialLines(list(Lines(Line(cbind(p1, p2)), ID="a"))),
10000, "regular"))
l2 = as.data.frame(spsample(SpatialLines(list(Lines(Line(cbind(p1, p2)), ID="a"))),
10000, "regular"))
print(all.equal(l1, l2))
}
# [1] "Component “p1”: Mean relative difference: 1.8687e-07"
# [1] "Component “p1”: Mean relative difference: 1.680998e-07"
# [1] "Component “p1”: Mean relative difference: 3.382085e-08"
# [1] "Component “p1”: Mean relative difference: 1.155756e-07"
# [1] TRUE
# [1] "Component “p1”: Mean relative difference: 1.051644e-07"
# [1] TRUE
# [1] "Component “p1”: Mean relative difference: 4.354955e-08"
# [1] "Component “p1”: Mean relative difference: 2.074916e-08"
# [1] "Component “p1”: Mean relative difference: 1.380726e-07"
I have been fighting hard in my code to understand why measures of distances between (what should be) two same points and (what should be) two same lines did not give strictly identical results.
Any idea why is this so, and how to ensure consistent results between successive runs? (or: any alternative to build two identical lines in a similar spirit as above?)
That's some weird behavior. Although, if you put the seed before both samples, it will have no differences. Hence, it is probably due to the origin of the regular sampling varying slightly in the different runs.
....
set.seed(12)
l1 = as.data.frame(spsample(SpatialLines(list(Lines(Line(cbind(p1, p2)), ID="a"))),
10000, "regular"))
set.seed(12)
l2 = as.data.frame(spsample(SpatialLines(list(Lines(Line(cbind(p1, p2)), ID="a"))),
10000, "regular"))
....
# [1] TRUE
# [1] TRUE
# [1] TRUE
....
sf as an alternative to sp
As I have become a big fan of the sf Package I tested whether this would have the same issues. Turns out it doesn't:
(don't get confused, there are some conversions between sf and sp objects, in order to stick as close to the code given in OP)
library(sf)
library(dplyr)
library(sp)
set.seed(12)
for (i in 1:10) {
p1 <- c(400000, 401000)
p2 <- c(5600000, 5601000)
l1 <- as.data.frame(
st_as_sf(SpatialLines(list(Lines(Line(cbind(p1, p2)), ID="a"))) %>%
st_make_grid(n=100, what = "centers") %>%
as("Spatial")
)
l2 <- as.data.frame(
st_as_sf(SpatialLines(list(Lines(Line(cbind(p1, p2)), ID="a"))) %>%
st_make_grid(n=100, what = "centers") %>%
as("Spatial")
)
print(all.equal(l1, l2))
}
# [1] TRUE
# [1] TRUE
# [1] TRUE
# [1] TRUE
# [1] TRUE
# [1] TRUE
# [1] TRUE
# [1] TRUE
# [1] TRUE
# [1] TRUE

Can't iterate output as a list inside the loop in R

The below code works fine outside the loop but didn't produce iterating results inside the loop.
elvalue <- function(x)
{
height= list(
zmean = mean(x),
zmax = max(x),
zsd = sd(x),
zmin = min(x)
)
return(height)
}
rawlist <- list.files(path = "./data/lidar/lidar_16/clipraw", pattern = ".las$", full.names = T, recursive = FALSE)
for(i in 1:length(rawlist)){
readlas <- readLAS(rawlist[i])
lasdf <- data.frame(readlas#data)
cls2 <- subset(lasdf, lasdf$Classification==2)
ht2 <- elvalue(cls2$Z)
cls3 <- subset(lasdf, lasdf$Classification==3)
ht3 <- elvalue(cls3$Z)
cls4 <- subset(lasdf, lasdf$Classification==4)
ht4 <- elvalue(cls4$Z)
cls5 <- subset(lasdf, lasdf$Classification==5)
ht5 <- elvalue(cls5$Z)
allht <- list(ht2, ht3, ht4, ht5)
It gives the output only for first file which is as follows: The output is in the list format so how can I iterate for all files simultaneously and save it as a separate data frame.
[[1]]
[[1]]$zmean
[1] 434.1715
[[1]]$zmax
[1] 501.41
[[1]]$zsd
[1] 27.11985
[[1]]$zmin
[1] 388.27
[[2]]
[[2]]$zmean
[1] 430.5257
[[2]]$zmax
[1] 501.58
[[2]]$zsd
[1] 26.41594
[[2]]$zmin
[1] 388.33
[[3]]
[[3]]$zmean
[1] 428.083
[[3]]$zmax
[1] 503.12
[[3]]$zsd
[1] 29.06311
[[3]]$zmin
[1] 388.66
[[4]]
[[4]]$zmean
[1] 454.2379
[[4]]$zmax
[1] 520.27
[[4]]$zsd
[1] 26.27641
[[4]]$zmin
[1] 390.61
Now I want to iterate the results and save it in a data.frame for all of the 17 files stored in my folder.
Thanks and Regards,
Yogendra

Understanding vectorisation

I was looking for a way to format large numbers in R as 2.3K or 5.6M. I found this solution on SO. Turns out, it shows some strange behaviour for some input vectors.
Here is what I am trying to understand -
# Test vector with weird behaviour
x <- c(302.456500093388, 32553.3619756151, 3323.71232001074, 12065.4076372462,
0, 6270.87962956305, 383.337515655172, 402.20778095643, 19466.0204345063,
1779.05474064539, 1467.09928489114, 3786.27112222457, 2080.08078309959,
51114.7097545816, 51188.7710104291, 59713.9414049798)
# Formatting function for large numbers
comprss <- function(tx) {
div <- findInterval(as.numeric(gsub("\\,", "", tx)),
c(1, 1e3, 1e6, 1e9, 1e12) )
paste(round( as.numeric(gsub("\\,","",tx))/10^(3*(div-1)), 1),
c('','K','M','B','T')[div], sep = '')
}
# Compare outputs for the following three commands
x
comprss(x)
sapply(x, comprss)
We can see that comprss(x) produces 0k as the 5th element which is weird, but comprss(x[5]) gives us the expected results. The 6th element is even weirder.
As far as I know, all the functions used in the body of comprss are vectorised. Then why do I still need to sapply my way out of this?
Here's a vectorized version adapted from pryr:::print.bytes:
format_for_humans <- function(x, digits = 3){
grouping <- pmax(floor(log(abs(x), 1000)), 0)
paste0(signif(x / (1000 ^ grouping), digits = digits),
c('', 'K', 'M', 'B', 'T')[grouping + 1])
}
format_for_humans(10 ^ seq(0, 12, 2))
#> [1] "1" "100" "10K" "1M" "100M" "10B" "1T"
x <- c(302.456500093388, 32553.3619756151, 3323.71232001074, 12065.4076372462,
0, 6270.87962956305, 383.337515655172, 402.20778095643, 19466.0204345063,
1779.05474064539, 1467.09928489114, 3786.27112222457, 2080.08078309959,
51114.7097545816, 51188.7710104291, 59713.9414049798)
format_for_humans(x)
#> [1] "302" "32.6K" "3.32K" "12.1K" "0" "6.27K" "383" "402"
#> [9] "19.5K" "1.78K" "1.47K" "3.79K" "2.08K" "51.1K" "51.2K" "59.7K"
format_for_humans(x, digits = 1)
#> [1] "300" "30K" "3K" "10K" "0" "6K" "400" "400" "20K" "2K" "1K"
#> [12] "4K" "2K" "50K" "50K" "60K"

Get the second value in the row if the dates match in R

I am in the trouble of getting the values which have the same dates from two different data sources in R. The code is
#Monthly data
month_data <- c(580.11, 618.25, 641.24, 604.85, 580.86, 580.07, 632.97,
685.09, 754.50, 680.30, 698.37, 707.38, 480.11, 528.25,
541.24, 614.85, 680.86)
month_dates <- seq(as.Date("2001/06/01"), by = "1 months", length = 17)
month_data <- data.frame(month_dates, month_data)
#the dates_for_match is a list:
dates_for_match<-list(c( "2001-08-01","2001-09-01", "2001-10-01"),c("2001-11-01","2001-12-01","2002-01-01"),c("2002-02-01","2002-03-01","2002-04-01"),c("2002-05-01","2002-06-01","2002-07-01"),c( "2002-08-01","2002-09-01", "2002-10-01"))
Example:
> dates_for_match
[[1]]
[1] "2001-08-01" "2001-09-01" "2001-10-01"
[[2]]
[1] "2001-11-01" "2001-12-01" "2002-01-01"
[[3]]
[1] "2002-02-01" "2002-03-01" "2002-04-01"
[[4]]
[1] "2002-05-01" "2002-06-01" "2002-07-01"
[[5]]
[1] "2002-08-01" "2002-09-01" "2002-10-01"
I want to use the dates_for_match list to get the values from month_data that have the same dates.
You need %in%...
month_data[ month_dates %in% unlist( dates_for_match ) , 2 ]

Resources