Error creating a vector from a self-made function - r

Trying to make my problem reproducible, I have the following vector:
trialvector <- as.vector(c("K", "K", "m", "m", "K"))
And this function to try to convert this vector into one which transforms "K" into a numeric 3 and "m" into a numeric 6, I want to assign this vector to a variable called multiplier:
Expcalc <- function(vector) {
multiplier <<- vector(mode = "numeric", length = length(vector))
for (i in seq_along(vector)) {
if (vector[i] == "K") {
multiplier[i] <- 3
} else if (vector[i] == "M" | i == "m") {
multiplier[i] <- 6
} else {
multiplier[i] <- 0
}
}
}
Instead of getting the output I want (a Vector of 6 and/or 3 depending on which character was in trialvector, I get a vector full of zeros. and this error:
Warning messages:
1: In Expcalc(trialvector) : NAs introduced by coercion
2: In Expcalc(trialvector) : NAs introduced by coercion
What am I doing wrong?

trialvector <- as.vector(c("K", "K", "m", "m", "K"))
trailvector
[1] "K" "K" "m" "m" "K"
Expcalc <- function(vector) {
multiplier <- as.vector(x = c(), mode = "numeric")
for (i in vector) {
if (i == "K") {
multiplier <- append(multiplier, 3)
} else if (i == "M" | i == "m") {
multiplier <- append(multiplier, 6)
} else {
multiplier <- append(multiplier, 0)
}
}
return(multiplier)
}
trailvector <- Expcalc(trialvector)
trailvector
[1] 3 3 6 6 3
I switched the for loop and then just appended the new values into the new vector. Output matches what you are looking for.

Related

My R function is returning a vector that is larger than expected

I have an assignment for class that tasks me with creating a function in R that takes in a vector of values and converts them between Fahrenheit, Celsius and Kelvin. The example that I try to run through my code should produce a return vector that contains only 3 values, yet it is returning a vector that holds 26 values.
Here is my code below:
convTemp <- function(x, from = "C", to = "F") {
newX <- vector("double", length(x))
if (from == to) {
warning("Your 'from' parameter is the same as your 'to' parameter!") # From and To are same temperature scale
}
if (from == "C" && to == "F") { # Celsius to Fahrenheit
for (i in x) {
newX[i] <- ((9/5)*x[i]+32)
}
}
if (from == "C" && to == "K") { # Celsius to Kelvin
for (i in x) {
newX[i] <- (x[i]+273.15)
}
}
if (from == "F" && to == "C") { # Fahrenheit to Celsius
for (i in x) {
newX[i] <- ((x[i]-32)*(5/9))
}
}
if (from == "K" && to == "C") { # Kelvin to Celsius
for (i in x) {
newX[i] <- (x[i]-273.15)
}
}
if (from == "F" && to == "K") { # Fahrenheit to Kelvin
for (i in x) {
newX[i] <- ((((x[i]-32)*5)/9)+273.15)
}
}
if (from == "K" && to == "F") { # Kelvin to Fahrenheit
for (i in x) {
newX[i] <- ((((x[i]-273.15)*9)/5)+32)
}
}
return(newX)
}
convTemp(c(35,40,45), from="F", to="K")
And here is the output I'm receiving:
[1] 0 0 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[26] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
So I'm not sure why the function is returning such a large vector of missing values when it should be returning a vector of 3 Kelvin values.
Since R is vectorized, you do not need the for-loops. Check the code below to see how to better write your function:
convTemp <- function(x, from = "C", to = "F") {
stopifnot(c(from, to) %in% c('C', 'F', 'K'))# IN CASE YOU INPUT A DIFFERENT ARGUMENT OTHER THAN C,F, AND K
if (from == to) {
warning("Your 'from' parameter is the same as your 'to' parameter!") # From and To are same temperature scale
}
if (from == "C"){
if (to == "F") 9/5 * x + 32 # Celsius to Fahrenheit
else (x + 273.15) # Celsius to Kelvin
}
else if (from == "F") {
if(to == "C") (x -32) * 5/9 # Fahrenheit to Celsius
else (x - 32) * 5/9 + 273.15 # Fahrenheit to Kelvin
}
else {
if(to == 'C')(x - 273.15) # Kelvin to Celsius
else (x-273.15) * 9/5 + 32 # Kelvin to Fahrenheit
}
}
convTemp(c(35,40,45), from="F", to="K")
[1] 274.8167 277.5944 280.3722
In the code (assuming it is a coding exercise), we need not loop over the values (for(i in x)) but loop over the sequence (for(i in seq_along(x)))
convTemp <- function(x, from = "C", to = "F") {
newX <- vector("double", length(x))
if (from == to) {
warning("Your 'from' parameter is the same as your 'to' parameter!") # From and To are same temperature scale
}
if (from == "C" && to == "F") { # Celsius to Fahrenheit
for (i in seq_along(x)) {
newX[i] <- ((9/5)*x[i]+32)
}
}
if (from == "C" && to == "K") { # Celsius to Kelvin
for (i in seq_along(x)) {
newX[i] <- (x[i]+273.15)
}
}
if (from == "F" && to == "C") { # Fahrenheit to Celsius
for (i in seq_along(x)) {
newX[i] <- ((x[i]-32)*(5/9))
}
}
if (from == "K" && to == "C") { # Kelvin to Celsius
for (i in seq_along(x)) {
newX[i] <- (x[i]-273.15)
}
}
if (from == "F" && to == "K") { # Fahrenheit to Kelvin
for (i in seq_along(x)) {
newX[i] <- ((((x[i]-32)*5)/9)+273.15)
}
}
if (from == "K" && to == "F") { # Kelvin to Fahrenheit
for (i in seq_along(x)) {
newX[i] <- ((((x[i]-273.15)*9)/5)+32)
}
}
return(newX)
}
-testing
> convTemp(c(35,40,45), from="F", to="K")
[1] 274.8167 277.5944 280.3722
Not that you asked, but since #onyambu helped eliminate the for loops, I though I might help eliminate the if statements. I'm not sure if this is more efficient, but I find a bunch of if statements difficult to follow sometimes.
convTemp <- function(x, from = "C", to = "F") {
stopifnot(c(from, to) %in% c('C', 'F', 'K'))
from. <- c(0,-32,-273.15)[which(c('C', 'F', 'K') %in% from)]
to. <- c(0,32,273.15)[which( c('C', 'F', 'K') %in% to)]
mult. <- c(1, 5/9, 9/5 )[sum(which(c(from, to) %in% 'F'))+1]
(x + from.)*mult. + to.
}
convTemp(c(35,40,45), from="F", to="K")
#> [1] 274.8167 277.5944 280.3722
convTemp(c(35,40,45), from="C", to="F")
#> [1] 95 104 113

R replace list elements from within a function

In R I'd like to replace some elements in a list using the $ notation:
# functions
replaceNonNull <- function(x, value) {
if(!is.null(x)){
thisx <- deparse(substitute(x))
print(paste0("replacing ", thisx, " with '",value,"'"))
#x <<- value
assign(thisx, value, envir = .GlobalEnv)
}
}
mylist = list("a"=1:3)
replaceNonNull(mylist$a,"456");mylist$a
However after running replaceNonNull, a new variable is created with name 'mylist$a'. How can I change the a value in the list instead?
Maybe you want something like this:
replaceNonNull <- function(x, el, value, env = globalenv()) {
if (!is.null(x[[el]])) {
nx <- deparse(substitute(x))
nv <- deparse(substitute(value))
cat("replacing value of", sQuote(el), "in", sQuote(nx), "with", sQuote(nv), "\n")
env[[nx]][[el]] <- value
}
}
mylist <- list(a = 1:3)
replaceNonNull(mylist, "a", 4:6)
## replacing value of ‘a’ in ‘mylist’ with ‘4:6’
mylist$a
## [1] 4 5 6
replaceNonNull(mylist, "b", 4:6)
mylist$b
## NULL
Nonstandard evaluation is a dangerous game, so you should be aware of the limitations. Here, x must be the name of a variable bound in env (hence not a call to the $ operator). Otherwise, you will continue to see unexpected behaviour:
mylist <- list(zzz = list(a = 1:3))
replaceNonNull(mylist$zzz, "a", 4:6)
## replacing value of ‘a’ in ‘mylist$zzz’ with ‘4:6’
mylist$zzz$a
## [1] 1 2 3
`mylist$zzz`
## $a
## [1] 4 5 6
You can avoid unintended assignments by adding a test:
replaceNonNull <- function(x, el, value, env = globalenv()) {
nx <- deparse(substitute(x))
if (!exists(nx, env, mode = "list")) {
stop("There is no list in ", sQuote("env"), " named ", sQuote(nx), ".")
}
if (!is.null(x[[el]])) {
nv <- deparse(substitute(value))
cat("replacing value of", sQuote(el), "in", sQuote(nx), "with", sQuote(nv), "\n")
env[[nx]][[el]] <- value
}
}
rm(`mylist$zzz`) # clean up after last example
replaceNonNull(mylist$zzz, "a", 4:6)
## Error in replaceNonNull(mylist$zzz, "a", 4:6) :
## There is no list in ‘env’ named ‘mylist$zzz’.
The problem you're having is that the first argument of assign is:
x - a variable name, given as a character string.
But even outside the function, this doesn't work.
assign(mylist$a,0)
#Error in assign(mylist$a, 0) : invalid first argument
assign("mylist$a",0)
mylist
#$a
#[1] 1 2 3
However, you can use $<-, like this:
> mylist$a <- 0
> mylist$a
[1] 0
One approach, then is to create that expression and evaluate it:
mylist = list("a"=1:3)
myexpression <- deparse(substitute(mylist$a))
myexpression
#[1] "mylist$a"
library(rlang)
expr(!!parse_expr(myexpression) <- 0)
#mylist$a <- 0
eval(expr(!!parse_expr(myexpression) <- 0))
mylist$a
#[1] 0
Obviously use <<- inside the function.

Why do my function return character(0) even though there is no error in the code?

I am seeking help on this function I have created. The purpose of this function: First, I want to extract a column from a data frame and arrange it in descending order. Then, I rank each element by "H", "M" and "L". I want to rank them such as the first 33% of the items should have the tag "H" and the last 33% of the items are tagged as "L". The rest should be tagged as "M".
This is the code:
ranking_prod <- function(data, column) {
data <- arrange(data, desc(column))
size <- length(data$column)
first_third <- data$column[round(size / 3)]
last_third <- data$column[round(size - (size / 3))]
case_when(data$column > first_third ~ "H",
data$column < last_third ~ "L",
TRUE ~ "M")
}
However, when I apply this function to the following data frame:
> one <- c("a", "b", "c", "d", "e")
> two <- c(1, 2, 2, 1, 5)
> three <- c(2, 2, 2, 4, 5)
> dataframe <- data.frame(one, two, three)
It returns:
> rank_volume(dataframe, two)
character(0)
Where is the error in the code? Why is it displaying character(0) as the results?
We can use [[ instead of $ and also as we are passing unquoted argument, convert to string. As we are converting to symbol with ensym, the input can either be unquoted or quoted
ranking_prod <- function(data, column) {
column <- rlang::ensym(column)
colstr <- rlang::as_string(column)
data <- dplyr::arrange(data, desc(!!column))
size <- length(data[[colstr]])
first_third <- data[[colstr]][round(size / 3)]
last_third <- data[[colstr]][round(size - (size / 3))]
dplyr::case_when(data[[colstr]] > first_third ~ "H",
data[[colstr]] < last_third ~ "L",
TRUE ~ "M")
}
-testing
ranking_prod(dataframe, two)
#[1] "H" "M" "M" "L" "L"
ranking_prod(dataframe, 'two')
#[1] "H" "M" "M" "L" "L"

How do you repeat sampling from a vector of characters until a certain sequence appears?

There are 10 cards written as "S", "T", "A", "T", "I", "S", "T", "I", "C", "S".
After picking ONE card randomly, you put the card back to the original place and mix it. Repeat this until "S","A","T" come out in order.
x<-c("S","T","A","T","I","S","T","I","C","S")
repeat{
print(sample(x,1,replace=TRUE))}
I don't know how to stop when "S","A","T" comes out.
Here is one way
tmp <- c(NA, sample(x, 2, replace = TRUE))
k <- 0
while (!identical(tmp, c("S", "A", "T"))) {
tmp <- c(tmp[-1], sample(x, 1))
k <- k + 1
}
You can use this:
x<-c("S","T","A","T","I","S","T","I","C","S")
pre_last <- NULL
last <- NULL
curr <- NULL
repeat{
curr <- sample(x,1,replace=TRUE)
if(curr == "T")
if(last == "A" && pre_last == "S")
break
pre_last <- last
last <- curr
}
# Result
pre_last
last
curr
Another solution:
s = sapply(1:3,function(i) {sample(x,1,replace=TRUE)})
print(s)
repeat{
if(s[1]=="S" & s[2]=="A" & s[3]=="T")
break
else {
s=s[-1]
s=c(s,(sample(x,1,replace=TRUE)))
print(s)
}
}
You could do this:
set.seed(1)
i <- 1
wanted <- c("S","A","T")
res <- c()
while(TRUE){
r <- sample(x,1,replace = TRUE)
res <- c(res, r)
if(r==wanted[i])
i <- i+1
else i <- 1
if(i==(length(wanted)+1)) break
}
> res
[1] "A" "T" "S" "S" "A" "C" "S" "T" "T" "S" "A" "T"

Comparing strings in multiple R data frames to retrieve the first characters that are in agreement

I have the following data frames in R.
df1<-as.data.frame(cbind(Site=c(1,2,3,4,5),Nucs=c("ACTG","ACT","GTAC","GTC","GACT")))
df2<-as.data.frame(cbind(Site=c(1,2,3,4,5),Nucs=c("AC","ATC","GTCA","GC","GAC")))
I am trying to determine what the longest possible string that is consistent between the two Nucs columns.
So far, I have tried this:
x1 <- strsplit(as.character(df1$Nucs),"")
x2 <- strsplit(as.character(df2$Nucs),"")
x <- Map(intersect, x1, x2)
sapply( x, paste0, collapse="")
This gives me the following:
[1] "AC" "ACT" "GTAC" "GC" "GAC"
which is not quite what I want because in the case of Site 3 I have GTAC and GTCA so I only want the first two characters that are consistent in the string, i.e. GT.
Does anybody have any ideas on how I can go about this?
I also find a solution that you can try:
CompareVectors <- function(x, y){
comp_length <- min(length(y), length(x))
x <- x[1 : comp_length]
y <- y[1 : comp_length]
compare <- x == y
id <- which(compare == F)[1]
if(!is.na(id)){
x <- x[which(compare[1: (id - 1)])]
}
return(paste(x, collapse = ""))
}
OUTPUT:
sapply(1 : length(x1), function(i) CompareVectors(x1[[i]], x2[[i]]))
[1] "AC" "A" "GT" "G" "GAC"
So here's my not very efficient solution:
df1 <- as.data.frame(cbind(Site=c(1,2,3,4,5),Nucs=c("ACTG","ACT","GTAC","GTC","GACT")))
df2 <- as.data.frame(cbind(Site=c(1,2,3,4,5),Nucs=c("AC","ATC","GTCA","GC","GAC")))
x1 <- strsplit(as.character(df1$Nucs),"")
x2 <- strsplit(as.character(df2$Nucs),"")
for(i in 1:nrow(df1)){
a = ""
for(j in 1:min(length(x1[[i]]),length(x2[[i]]))){
a= paste(a,x1[[i]][j] == x2[[i]][j],sep=",")
}
print(head(x1[[i]],sum(as.logical(strsplit(a,",")[[1]][-1]))))
}
Output:
[1] "A" "C"
[1] "A"
[1] "G" "T"
[1] "G"
[1] "G" "A" "C"
Would you like me to comment the code?
You can try this, although a bit lengthy:
sapply(1:nrow(df1), function(x) {
s1 <- unlist(strsplit(as.character(df1$Nucs[x]), split = ''))
s2 <- unlist(strsplit(as.character(df2$Nucs[x]), split = ''))
n <- min(length(s1), length(s2))
i <- 1
while(i <= n) {
if (s1[i] != s2[i]) {
break
}
i <- i + 1
}
if (i > 0)
paste(s1[1:(i-1)], collapse ='')
else
''
})
# [1] "AC" "A" "GT" "G" "GAC"
Here's another solution. It may not be covering all possible cases but that's probably easy to extend.
df1<-as.data.frame(cbind(Site=c(1,2,3,4,5),Nucs=c("ACTG","ACT","GTAC","GTC","GACT")))
df2<-as.data.frame(cbind(Site=c(1,2,3,4,5),Nucs=c("AC","ATC","GTCA","GC","GAC")))
mapply(x = as.list(df1$Nucs), y = as.list(df2$Nucs), FUN = function(x, y) {
x <- as.character(x); y <- as.character(y) # doesn't work with factors
# To keep everything in one easy to debug chunk, just switch in case
# x is shorter than y.
if (!(nchar(x) >= nchar(y))) {
xp <- y
yp <- x
} else {
xp <- x
yp <- y
}
# create elements to work on and vector for storage
to.glue <- strsplit(xp, "")[[1]]
out <- rep(NA, times = length(to.glue)) # used as output
# If one string is shorter than the other, extract one element
# at a time and see if there's a match in y. If yes, then pro-
# ceed to the second element, concatenate it with the first
# one and see if this pattern is present anywhere in y...
for (i in 1:length(to.glue)) {
glued <- paste(to.glue[1:i], collapse = "")
fm <- pmatch(x = glued, table = yp)
if (is.na(fm)) {
return(out[i-1])
} else {
out[i] <- glued
}
}
})
[1] "AC" "A" "GT" "G" "GAC"

Resources