Sampling a vector by equal split - r

If I have a vector of strings of any length say;
vec <- c("a","b","c","d","e","f","g","h","i")
I am looking for a function to sample n number of values from this vector using a strategy shown below. Showing figures since it's hard to explain.
function call:
result:
schematic diagram:
fn(vector=vec,n=1)
"a"
|
a b c d e f g h i
fn(vector=vec,n=2)
"a" "i"
_______________
| |
a b c d e f g h i
fn(vector=vec,n=3)
"a" "e" "i"
_______________
| | |
a b c d e f g h i
fn(vector=vec,n=4)
"a" "c" "g" "i"
_______________
| | | |
a b c d e f g h i
fn(vector=vec,n=5)
"a" "c" "e" "g" "i"
_______________
| | | | |
a b c d e f g h i
The sampling does not have to be accurate. The values can be from roughly the correct region but must be consistent. The string vector can be even or odd.

One way would be to use seq(), taking advantage of it's length.out= argument to get the evenly spaced indices that you seek:
fn <- function(x, n) {
x[round(seq(1,length(x), length.out=n))]
}
## Check that it works
fn(vec, 1)
# [1] "a"
fn(vec, 2)
# [1] "a" "i"
fn(vec, 4)
# [1] "a" "d" "f" "i"
fn(vec, 8)
# [1] "a" "b" "c" "d" "f" "g" "h" "i"

This should do what you are looking for:
fn <- function(myVector, n) {
# check for valid n
if(n > length(myVector)) stop("invalid n")
if(n == 1) return(myVector[1])
if(n == 2) return(myVector[c(1, length(myVector))])
middleSpots <- ceiling(length(vec) * (1:(n-2) / (n-1)))
return(myVector[c(1, middleSpots, length(myVector))])
}

Related

Extract vectors from nested loops in R as vectors

I'm learning to use R and I'm trying to extract cont, p0 and pf variables from nested loop as 3 different vectors from this code.
v<-c("a","b","c","d","e","f","g","h")
n<-length(v)
mv<-5
a<-n-(mv-1)
cont<-0
for (num in (a-1):0){
for (i in 0:num){
cont<-cont+1
p0<-v[a-num]
pf<-v[n-i]
}
}
The expected result should be:
> print(cont)
[1] 1 2 3 4 5 6 7 8 9 10
> print (p0)
[1] "a" "a" "a" "a" "b" "b" "b" "c" "c" "d"
> print (pf)
[1] "h" "g" "f" "e" "h" "g" "f" "h" "g" "h"
I would keep cont as an index variable and store the other variables in vectors.
v<-c("a","b","c","d","e","f","g","h")
n<-length(v)
mv<-5
a<-n-(mv-1)
cont = 0
cont_stored = vector();
p0 = vector();
pf = vector();
for (num in (a-1):0){
for (i in 0:num){
cont <- cont+1
cat("cont = ", cont, "\n"); ## useful function for printing stuff out in loops
cont_stored[cont] = cont;
p0[cont] = v[a-num]
pf[cont] = v[n-i]
}
}
cont_stored
p0
pf
You can do this without explicit for loop :
v <- c("a","b","c","d","e","f","g","h")
n <- length(v)
mv <- 5
a <- n-(mv-1)
cont <- 0
p0 <- rep(v[1:a], a:1)
pf <- v[unlist(sapply((n-a + 1):n, function(x) n:x))]
p0
# [1] "a" "a" "a" "a" "b" "b" "b" "c" "c" "d"
pf
# [1] "h" "g" "f" "e" "h" "g" "f" "h" "g" "h"
If you need cont you could use p0 or pf with seq_along.
cont <- seq_along(p0)
cont
#[1] 1 2 3 4 5 6 7 8 9 10

Substitution Encryption/Decryption in R

Using the variables alpha and key, encrypt ptext into a variable named ctext. Using substitution cipher
So I have a text file separated in a vector
ptext <- strsplit(ptext,split = "", fixed = TRUE)
ptext <- unlist(ptext)
I also created a key for this cipher
key <- "ZGYHXIWJVKULTMSARBQCPDOENF"
key <- unlist(strsplit(key,""))
and an Alphabet vector for the key
alpha <- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
alpha <= toupper(alpha)
alpha <- unlist(strsplit(alpha,""))
Now my goal is to try to replace all the character in the ptext vector corresponding to the letters in the key in relation to alpha (Example: A in alpha in relation to Z in the key. So all A's in the text would be replaced by a Z)
I know I am supposed to match the alpha in key
cipher <- match(key,alpha)
Now my issue is, the ptext file is over 1000 characters in it. How would I be able to replace all the letters in that vector?
You could use chartr which will avoid splitting the string and pasting back.
ptext <- 'REQWDSFFFSLK'
alpha <- 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
key <- 'ZGYHXIWJVKULTMSARBQCPDOENF'
chartr(alpha, key, ptext)
#[1] "BXROHQIIIQLU"
Here, all R is replaced with B, E with X and so on for every character value.
Basically, you need to do what you are doing with the cipher but apply that to each letter of ptext. You can either leave as a vector of single strings or put back together as desired, for example using paste0 below.
set.seed(123)
ptext <- strsplit(LETTERS[sample(26, 100, T)],split = "", fixed = TRUE)
ptext <- unlist(ptext)
key <- "ZGYHXIWJVKULTMSARBQCPDOENF"
key <- unlist(strsplit(key,""))
alpha <- unlist(strsplit(LETTERS,""))
encoded <- sapply(ptext, function(x) key[match(x, alpha)])
encoded
#> O S N C J R V K E T N V Y Z E S Y Y I C
#> "S" "Q" "M" "Y" "K" "B" "D" "U" "X" "C" "M" "D" "N" "F" "X" "Q" "N" "N" "V" "Y"
#> H Z G J I S D N Q K G U L O J M G I I J
#> "J" "F" "W" "K" "V" "Q" "H" "M" "R" "U" "W" "P" "L" "S" "K" "T" "W" "V" "V" "K"
#> W U G U F Y B E H L M R A Y Y F U O I O
#> "O" "P" "W" "P" "I" "N" "G" "X" "J" "L" "T" "B" "Z" "N" "N" "I" "P" "S" "V" "S"
#> Z P T F K H V V G P Q V R Q B D M E V S
#> "F" "A" "C" "I" "U" "J" "D" "D" "W" "A" "R" "D" "B" "R" "G" "H" "T" "X" "D" "Q"
#> Y T V Y N Y W C H P L Y N C N G C W V Z
#> "N" "C" "D" "N" "M" "N" "O" "Y" "J" "A" "L" "N" "M" "Y" "M" "W" "Y" "O" "D" "F"
paste0(encoded, collapse = "")
#> [1] "SQMYKBDUXCMDNFXQNNVYJFWKVQHMRUWPLSKTWVVKOPWPINGXJLTBZNNIPSVSFACIUJDDWARDBRGHTXDQNCDNMNOYJALNMYMWYODF"

Creating a loop for segments within a vector that give return values for each corresponding segment

I've created a vector "numGrades" of 100 random numbers to represent values within a grading system. I need to write a "for" loop that takes segments of numerical grades and returns a vector of corresponding letter grades i.e.: 90+ = "A", 80-89 = "B", 70-79 = "C", 60-69 = "D", 0-59 = "F". I want to be able to run numGrades to return the corresponding letter grade for example: numGrades = [72, 65, 93] returning = ["C", "D", "A"] with the loop handling vectors of any length. This is what I have tried so far individually. All of these loops have returned warnings:
set.seed(43)
numGrades <- sample(0:100, 100, replace=FALSE)
for (i in numGrades )
if(91 <= numGrades[i]) {
numGrades[i] <- "A"
} else if (80 <= numGrades[i] & numGrades[i] <= 90) {
numGrades[i] =="B"
} else if (70 <= numGrades[i] & numGrades[i] <= 79) {
numGrades[i] =="C"
} else if (60 <= numGrades[i] & numGrades[i] <= 69) {
numGrades[i] =="D"
} else if (0 <= numGrades[i] & numGrades[i] <= 59) {
numGrades[i] =="F"
}
It's saying:
Error in if (91 <= numGrades[i]) { :
missing value where TRUE/FALSE needed
New Edit (Returns for grades >= 91 only):
numGrades <- (0:100)
for (i in 1:length(numGrades ))
if(91 <= numGrades[i]) {
numGrades[i] <- "A"
} else if (80 <= numGrades[i] & numGrades[i] <= 90) {
numGrades[i] =="B"
} else if (70 <= numGrades[i] & numGrades[i] <= 79) {
numGrades[i] =="C"
} else if (60 <= numGrades[i] & numGrades[i] <= 69) {
numGrades[i] =="D"
} else if (0 <= numGrades[i] & numGrades[i] <= 59) {
numGrades[i] =="F"
}
Working Rough Draft
ltrGrades <- (0:100)
numGrades <- character(length(ltrGrades))
for (i in 1:length(ltrGrades ))
if(any(ltrGrades[i] == 91:100)) {
numGrades[i] <- "A"
} else if (any(ltrGrades[i] == 80:90)) {
numGrades[i] <- "B"
} else if (any(ltrGrades[i] == 70:79)) {
numGrades[i] <- "C"
} else if (any(ltrGrades[i] == 60:69)) {
numGrades[i] <- "D"
} else if (any(ltrGrades[i] == 0:59)) {
numGrades[i] <- "F"
}
There are some fundamental R syntax problems with your code.
(numGrades[i]) c(80:90))
(numGrades[i]) >=80 && <=89)
(numGrades[i]) ==80:89 )
Several alternatives, most inefficient:
(80 <= numGrades[i] & numGrades[i] < 90) # the most basic
(dplyr::between(numGrades[i], 80, 90)) # if dplyr is loaded
(data.table::between(numGrades[i], 80, 90)) # if data.table is available
(numGrades[i] %in% 80:89) # works only if all grades are perfectly integral
(any(numGrades[i] == 80:89)) # ditto
Why are they wrong?
(numGrades[i]) c(80:90)), because there is not operator
(numGrades[i]) >=80 && <=89), R does not infer them as you suggest, every time you do an (in)equality test you need to specify both the LHS and RHS for each one; similarly, unlikely many languages, R does not "chain" them, so (80 <= numGrades[i] <= 89) will not work
(numGrades[i]) ==80:89 ) is getting closer, but if/else statements require a single comparison; in this case, you are comparing one number with a sequence (range) of 10, so the reply from this is length 10. It must be length 1.
Bottom line, though, is that you do not need a loop.
# set.seed(43)
numGrades <- sample(0:100, 100, replace=FALSE)
cut(numGrades, c(-1, 60, 70, 80, 90, 101), labels=c("F","D","C","B","A"))
# [1] F A F D F F D F F C F F F F F B F A F F C A F F F F A F A F C C C B F
# [36] F F F B A F F F A B B C D F F F B D F F B D D B F F F F F F D F A F F
# [71] F F F F B F D F A F F F F F A B F F F C F F F D D C C F F F
# Levels: F D C B A
or if you don't like or understand what a factor is, then
as.character(cut(numGrades, c(-1, 60, 70, 80, 90, 101), labels=c("F","D","C","B","A")))
# [1] "F" "A" "F" "D" "F" "F" "D" "F" "F" "C" "F" "F" "F" "F" "F" "B" "F"
# [18] "A" "F" "F" "C" "A" "F" "F" "F" "F" "A" "F" "A" "F" "C" "C" "C" "B"
# [35] "F" "F" "F" "F" "B" "A" "F" "F" "F" "A" "B" "B" "C" "D" "F" "F" "F"
# [52] "B" "D" "F" "F" "B" "D" "D" "B" "F" "F" "F" "F" "F" "F" "D" "F" "A"
# [69] "F" "F" "F" "F" "F" "F" "B" "F" "D" "F" "A" "F" "F" "F" "F" "F" "A"
# [86] "B" "F" "F" "F" "C" "F" "F" "F" "D" "D" "C" "C" "F" "F" "F"
EDIT
I just noticed something else about your code.
When you start, numGrades is either numeric or integer. However, since it is a vector, the first time you assign a letter to one of its elements, the entire vector is converted to a character vector. The second pass through the for loop will try to compare a number with a string, which will not do a numeric comparison, try 8 < "75" for why this will fail.
As a workaround for this:
ltrGrades <- character(length(numGrades))
for (i in 1:length(numGrades ))
if(91 <= numGrades[i]) {
ltrGrades[i] <- "A"
} else if (80 <= numGrades[i] & numGrades[i] <= 90) {
ltrGrades[i] <- "B"
} else if (70 <= numGrades[i] & numGrades[i] <= 79) {
ltrGrades[i] <- "C"
} else if (60 <= numGrades[i] & numGrades[i] <= 69) {
ltrGrades[i] <- "D"
} else if (0 <= numGrades[i] & numGrades[i] <= 59) {
ltrGrades[i] <- "F"
}

Define names of objects in multiple lists within a list (using lapply?)

My apologies for the somewhat confusing title (any suggestion for improvement are welcome)..
Suppose I have a list which contains several (e.g. four) lists in which I would like to store 20 objects later on:
mylist <- vector(mode="list",length=4)
names(mylist) <- c("One","Two","Three","Four")
mylist$One <- mylist$Two <- mylist$Three <- mylist$Four <- vector(mode="list",
length=20)
I would like to define the names of those objects beforehand. Of course, I can do that as following:
names(mylist$One) <- c("A","B","C","D","E","F","G","H","I","J",
"K","L","M","N","O","P","Q","R","S","T")
names(mylist$Two) <- names(mylist$Three) <- names(mylist$Four) <- names(mylist$One)
But if the number of the lists would increase (as is the case in my actual data), this becomes rather cumbersome, so I was trying to do this with a function such as lapply :
mylist <- lapply(mylist,FUN=function(x) {names(x) <-
c("A","B","C","D","E","F","G","H","I","J",
"K","L","M","N","O","P","Q","R","S","T")})
This, however, does not give me the same result, but I can not seem to figure out what I am overlooking here. Any suggestions?
Thanks!
You need to return a value in your lapply call:
mylist <- lapply(mylist,FUN=function(x) {names(x) <-
c("A","B","C","D","E","F","G","H","I","J",
"K","L","M","N","O","P","Q","R","S","T")
x ## <- note the x here; you could also use return(x)
})
mylist
# $One
# A B C D E F G H I J K L M N O P Q R S T
# "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T"
#
# $Two
# A B C D E F G H I J K L M N O P Q R S T
# "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T"
#
# $Three
# A B C D E F G H I J K L M N O P Q R S T
# "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T"
#
# $Four
# A B C D E F G H I J K L M N O P Q R S T
# "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T"
This is my implementation, which I think it produces the results you are expecting
mylist <- vector(mode="list",length=4)
names(mylist) <- c("One","Two","Three","Four")
mylist$One <- mylist$Two <- mylist$Three <- mylist$Four <- vector(mode="list",length=20)
renameList <- function(mylist,k){
names(mylist) <- LETTERS[1:k]
return(mylist)
}
mylist2 <- lapply(mylist, function(x) renameList(x,20))
# > str(mylist2)
# List of 4
# $ One :List of 20
# ..$ A: NULL
# ..$ B: NULL
# ..$ C: NULL

Contract vertices the graph and combine attributes

I came across a problem.
I have the scenery following:
# Create a graph
g1 <- graph.full(5)
V(g1)$name <- letters[1:vcount(g1)]
V(g)
Vertex sequence:
[1] "a" "b" "c" "d" "e"
# Contract vertex "a" and "b"
vec = c(1,1,2,3,4)
contract_1 <- contract.vertices(g1, vec, vertex.attr.comb=toString)
V(contract_1)
Vertex sequence:
[1] "a, b" "c" "d" "e"
# Contract vertex "a, b" and "c"
vec = c(1,1,2,3)
contract_2 <- contract.vertices(contract_1, vec, vertex.attr.comb=toString)
V(contract_2)
Vertex sequence:
[1] "a, b, c" "d" "e"
And so on ... (contract "a, b, c" and "d", creating the vertex "a, b, c, d")
I need to differentiate the vertexs of the previous level.
Eg.:
By contracting the vertices "a, b​​" and "c", I need using a extra markup as "|" or ";". In this case, the result would be "a, b | c" or "a, b- c" or "a, b; c".
By contracting the vertices "a, b, c" and "d" the result would be "a, b, c | d" or "a, b, c; d"
I tried a few things ...
Eg.:
g <- contract.vertices(g, matching,
vertex.attr.comb=list(name=function(x) paste(toString(x,"",sep=";"))))
However, not work
paste also has a collapse argument:
contract.vertices(
contract_1,
c(1,1,2,3),
vertex.attr.comb = list( name = function(x) paste(x, collapse=";") )
)
You could also use nested parentheses:
library(igraph)
g <- list()
k <- 5
g[[1]] <- graph.full(k)
V(g[[1]])$name <- letters[1:vcount(g1)]
for(i in 2:k) {
g[[i]] <- contract.vertices(
g[[i-1]],
c(1,1,2:k)[1:(k-i+2)],
vertex.attr.comb = list( name = function(x)
if( length(x) > 1 ) paste0( "(", paste0(x,collapse=","), ")" ) else x
)
)
}
lapply(g, V)
# [[1]]
# Vertex sequence:
# [1] "a" "b" "c" "d" "e"
# [[2]]
# Vertex sequence:
# [1] "(a,b)" "c" "d" "e"
# [[3]]
# Vertex sequence:
# [1] "((a,b),c)" "d" "e"
# [[4]]
# Vertex sequence:
# [1] "(((a,b),c),d)" "e"
# [[5]]
# Vertex sequence:
# [1] "((((a,b),c),d),e)"

Resources