Related
I have an assignment for class that tasks me with creating a function in R that takes in a vector of values and converts them between Fahrenheit, Celsius and Kelvin. The example that I try to run through my code should produce a return vector that contains only 3 values, yet it is returning a vector that holds 26 values.
Here is my code below:
convTemp <- function(x, from = "C", to = "F") {
newX <- vector("double", length(x))
if (from == to) {
warning("Your 'from' parameter is the same as your 'to' parameter!") # From and To are same temperature scale
}
if (from == "C" && to == "F") { # Celsius to Fahrenheit
for (i in x) {
newX[i] <- ((9/5)*x[i]+32)
}
}
if (from == "C" && to == "K") { # Celsius to Kelvin
for (i in x) {
newX[i] <- (x[i]+273.15)
}
}
if (from == "F" && to == "C") { # Fahrenheit to Celsius
for (i in x) {
newX[i] <- ((x[i]-32)*(5/9))
}
}
if (from == "K" && to == "C") { # Kelvin to Celsius
for (i in x) {
newX[i] <- (x[i]-273.15)
}
}
if (from == "F" && to == "K") { # Fahrenheit to Kelvin
for (i in x) {
newX[i] <- ((((x[i]-32)*5)/9)+273.15)
}
}
if (from == "K" && to == "F") { # Kelvin to Fahrenheit
for (i in x) {
newX[i] <- ((((x[i]-273.15)*9)/5)+32)
}
}
return(newX)
}
convTemp(c(35,40,45), from="F", to="K")
And here is the output I'm receiving:
[1] 0 0 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[26] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
So I'm not sure why the function is returning such a large vector of missing values when it should be returning a vector of 3 Kelvin values.
Since R is vectorized, you do not need the for-loops. Check the code below to see how to better write your function:
convTemp <- function(x, from = "C", to = "F") {
stopifnot(c(from, to) %in% c('C', 'F', 'K'))# IN CASE YOU INPUT A DIFFERENT ARGUMENT OTHER THAN C,F, AND K
if (from == to) {
warning("Your 'from' parameter is the same as your 'to' parameter!") # From and To are same temperature scale
}
if (from == "C"){
if (to == "F") 9/5 * x + 32 # Celsius to Fahrenheit
else (x + 273.15) # Celsius to Kelvin
}
else if (from == "F") {
if(to == "C") (x -32) * 5/9 # Fahrenheit to Celsius
else (x - 32) * 5/9 + 273.15 # Fahrenheit to Kelvin
}
else {
if(to == 'C')(x - 273.15) # Kelvin to Celsius
else (x-273.15) * 9/5 + 32 # Kelvin to Fahrenheit
}
}
convTemp(c(35,40,45), from="F", to="K")
[1] 274.8167 277.5944 280.3722
In the code (assuming it is a coding exercise), we need not loop over the values (for(i in x)) but loop over the sequence (for(i in seq_along(x)))
convTemp <- function(x, from = "C", to = "F") {
newX <- vector("double", length(x))
if (from == to) {
warning("Your 'from' parameter is the same as your 'to' parameter!") # From and To are same temperature scale
}
if (from == "C" && to == "F") { # Celsius to Fahrenheit
for (i in seq_along(x)) {
newX[i] <- ((9/5)*x[i]+32)
}
}
if (from == "C" && to == "K") { # Celsius to Kelvin
for (i in seq_along(x)) {
newX[i] <- (x[i]+273.15)
}
}
if (from == "F" && to == "C") { # Fahrenheit to Celsius
for (i in seq_along(x)) {
newX[i] <- ((x[i]-32)*(5/9))
}
}
if (from == "K" && to == "C") { # Kelvin to Celsius
for (i in seq_along(x)) {
newX[i] <- (x[i]-273.15)
}
}
if (from == "F" && to == "K") { # Fahrenheit to Kelvin
for (i in seq_along(x)) {
newX[i] <- ((((x[i]-32)*5)/9)+273.15)
}
}
if (from == "K" && to == "F") { # Kelvin to Fahrenheit
for (i in seq_along(x)) {
newX[i] <- ((((x[i]-273.15)*9)/5)+32)
}
}
return(newX)
}
-testing
> convTemp(c(35,40,45), from="F", to="K")
[1] 274.8167 277.5944 280.3722
Not that you asked, but since #onyambu helped eliminate the for loops, I though I might help eliminate the if statements. I'm not sure if this is more efficient, but I find a bunch of if statements difficult to follow sometimes.
convTemp <- function(x, from = "C", to = "F") {
stopifnot(c(from, to) %in% c('C', 'F', 'K'))
from. <- c(0,-32,-273.15)[which(c('C', 'F', 'K') %in% from)]
to. <- c(0,32,273.15)[which( c('C', 'F', 'K') %in% to)]
mult. <- c(1, 5/9, 9/5 )[sum(which(c(from, to) %in% 'F'))+1]
(x + from.)*mult. + to.
}
convTemp(c(35,40,45), from="F", to="K")
#> [1] 274.8167 277.5944 280.3722
convTemp(c(35,40,45), from="C", to="F")
#> [1] 95 104 113
In R I'd like to replace some elements in a list using the $ notation:
# functions
replaceNonNull <- function(x, value) {
if(!is.null(x)){
thisx <- deparse(substitute(x))
print(paste0("replacing ", thisx, " with '",value,"'"))
#x <<- value
assign(thisx, value, envir = .GlobalEnv)
}
}
mylist = list("a"=1:3)
replaceNonNull(mylist$a,"456");mylist$a
However after running replaceNonNull, a new variable is created with name 'mylist$a'. How can I change the a value in the list instead?
Maybe you want something like this:
replaceNonNull <- function(x, el, value, env = globalenv()) {
if (!is.null(x[[el]])) {
nx <- deparse(substitute(x))
nv <- deparse(substitute(value))
cat("replacing value of", sQuote(el), "in", sQuote(nx), "with", sQuote(nv), "\n")
env[[nx]][[el]] <- value
}
}
mylist <- list(a = 1:3)
replaceNonNull(mylist, "a", 4:6)
## replacing value of ‘a’ in ‘mylist’ with ‘4:6’
mylist$a
## [1] 4 5 6
replaceNonNull(mylist, "b", 4:6)
mylist$b
## NULL
Nonstandard evaluation is a dangerous game, so you should be aware of the limitations. Here, x must be the name of a variable bound in env (hence not a call to the $ operator). Otherwise, you will continue to see unexpected behaviour:
mylist <- list(zzz = list(a = 1:3))
replaceNonNull(mylist$zzz, "a", 4:6)
## replacing value of ‘a’ in ‘mylist$zzz’ with ‘4:6’
mylist$zzz$a
## [1] 1 2 3
`mylist$zzz`
## $a
## [1] 4 5 6
You can avoid unintended assignments by adding a test:
replaceNonNull <- function(x, el, value, env = globalenv()) {
nx <- deparse(substitute(x))
if (!exists(nx, env, mode = "list")) {
stop("There is no list in ", sQuote("env"), " named ", sQuote(nx), ".")
}
if (!is.null(x[[el]])) {
nv <- deparse(substitute(value))
cat("replacing value of", sQuote(el), "in", sQuote(nx), "with", sQuote(nv), "\n")
env[[nx]][[el]] <- value
}
}
rm(`mylist$zzz`) # clean up after last example
replaceNonNull(mylist$zzz, "a", 4:6)
## Error in replaceNonNull(mylist$zzz, "a", 4:6) :
## There is no list in ‘env’ named ‘mylist$zzz’.
The problem you're having is that the first argument of assign is:
x - a variable name, given as a character string.
But even outside the function, this doesn't work.
assign(mylist$a,0)
#Error in assign(mylist$a, 0) : invalid first argument
assign("mylist$a",0)
mylist
#$a
#[1] 1 2 3
However, you can use $<-, like this:
> mylist$a <- 0
> mylist$a
[1] 0
One approach, then is to create that expression and evaluate it:
mylist = list("a"=1:3)
myexpression <- deparse(substitute(mylist$a))
myexpression
#[1] "mylist$a"
library(rlang)
expr(!!parse_expr(myexpression) <- 0)
#mylist$a <- 0
eval(expr(!!parse_expr(myexpression) <- 0))
mylist$a
#[1] 0
Obviously use <<- inside the function.
I am seeking help on this function I have created. The purpose of this function: First, I want to extract a column from a data frame and arrange it in descending order. Then, I rank each element by "H", "M" and "L". I want to rank them such as the first 33% of the items should have the tag "H" and the last 33% of the items are tagged as "L". The rest should be tagged as "M".
This is the code:
ranking_prod <- function(data, column) {
data <- arrange(data, desc(column))
size <- length(data$column)
first_third <- data$column[round(size / 3)]
last_third <- data$column[round(size - (size / 3))]
case_when(data$column > first_third ~ "H",
data$column < last_third ~ "L",
TRUE ~ "M")
}
However, when I apply this function to the following data frame:
> one <- c("a", "b", "c", "d", "e")
> two <- c(1, 2, 2, 1, 5)
> three <- c(2, 2, 2, 4, 5)
> dataframe <- data.frame(one, two, three)
It returns:
> rank_volume(dataframe, two)
character(0)
Where is the error in the code? Why is it displaying character(0) as the results?
We can use [[ instead of $ and also as we are passing unquoted argument, convert to string. As we are converting to symbol with ensym, the input can either be unquoted or quoted
ranking_prod <- function(data, column) {
column <- rlang::ensym(column)
colstr <- rlang::as_string(column)
data <- dplyr::arrange(data, desc(!!column))
size <- length(data[[colstr]])
first_third <- data[[colstr]][round(size / 3)]
last_third <- data[[colstr]][round(size - (size / 3))]
dplyr::case_when(data[[colstr]] > first_third ~ "H",
data[[colstr]] < last_third ~ "L",
TRUE ~ "M")
}
-testing
ranking_prod(dataframe, two)
#[1] "H" "M" "M" "L" "L"
ranking_prod(dataframe, 'two')
#[1] "H" "M" "M" "L" "L"
There are 10 cards written as "S", "T", "A", "T", "I", "S", "T", "I", "C", "S".
After picking ONE card randomly, you put the card back to the original place and mix it. Repeat this until "S","A","T" come out in order.
x<-c("S","T","A","T","I","S","T","I","C","S")
repeat{
print(sample(x,1,replace=TRUE))}
I don't know how to stop when "S","A","T" comes out.
Here is one way
tmp <- c(NA, sample(x, 2, replace = TRUE))
k <- 0
while (!identical(tmp, c("S", "A", "T"))) {
tmp <- c(tmp[-1], sample(x, 1))
k <- k + 1
}
You can use this:
x<-c("S","T","A","T","I","S","T","I","C","S")
pre_last <- NULL
last <- NULL
curr <- NULL
repeat{
curr <- sample(x,1,replace=TRUE)
if(curr == "T")
if(last == "A" && pre_last == "S")
break
pre_last <- last
last <- curr
}
# Result
pre_last
last
curr
Another solution:
s = sapply(1:3,function(i) {sample(x,1,replace=TRUE)})
print(s)
repeat{
if(s[1]=="S" & s[2]=="A" & s[3]=="T")
break
else {
s=s[-1]
s=c(s,(sample(x,1,replace=TRUE)))
print(s)
}
}
You could do this:
set.seed(1)
i <- 1
wanted <- c("S","A","T")
res <- c()
while(TRUE){
r <- sample(x,1,replace = TRUE)
res <- c(res, r)
if(r==wanted[i])
i <- i+1
else i <- 1
if(i==(length(wanted)+1)) break
}
> res
[1] "A" "T" "S" "S" "A" "C" "S" "T" "T" "S" "A" "T"
I have the following data frames in R.
df1<-as.data.frame(cbind(Site=c(1,2,3,4,5),Nucs=c("ACTG","ACT","GTAC","GTC","GACT")))
df2<-as.data.frame(cbind(Site=c(1,2,3,4,5),Nucs=c("AC","ATC","GTCA","GC","GAC")))
I am trying to determine what the longest possible string that is consistent between the two Nucs columns.
So far, I have tried this:
x1 <- strsplit(as.character(df1$Nucs),"")
x2 <- strsplit(as.character(df2$Nucs),"")
x <- Map(intersect, x1, x2)
sapply( x, paste0, collapse="")
This gives me the following:
[1] "AC" "ACT" "GTAC" "GC" "GAC"
which is not quite what I want because in the case of Site 3 I have GTAC and GTCA so I only want the first two characters that are consistent in the string, i.e. GT.
Does anybody have any ideas on how I can go about this?
I also find a solution that you can try:
CompareVectors <- function(x, y){
comp_length <- min(length(y), length(x))
x <- x[1 : comp_length]
y <- y[1 : comp_length]
compare <- x == y
id <- which(compare == F)[1]
if(!is.na(id)){
x <- x[which(compare[1: (id - 1)])]
}
return(paste(x, collapse = ""))
}
OUTPUT:
sapply(1 : length(x1), function(i) CompareVectors(x1[[i]], x2[[i]]))
[1] "AC" "A" "GT" "G" "GAC"
So here's my not very efficient solution:
df1 <- as.data.frame(cbind(Site=c(1,2,3,4,5),Nucs=c("ACTG","ACT","GTAC","GTC","GACT")))
df2 <- as.data.frame(cbind(Site=c(1,2,3,4,5),Nucs=c("AC","ATC","GTCA","GC","GAC")))
x1 <- strsplit(as.character(df1$Nucs),"")
x2 <- strsplit(as.character(df2$Nucs),"")
for(i in 1:nrow(df1)){
a = ""
for(j in 1:min(length(x1[[i]]),length(x2[[i]]))){
a= paste(a,x1[[i]][j] == x2[[i]][j],sep=",")
}
print(head(x1[[i]],sum(as.logical(strsplit(a,",")[[1]][-1]))))
}
Output:
[1] "A" "C"
[1] "A"
[1] "G" "T"
[1] "G"
[1] "G" "A" "C"
Would you like me to comment the code?
You can try this, although a bit lengthy:
sapply(1:nrow(df1), function(x) {
s1 <- unlist(strsplit(as.character(df1$Nucs[x]), split = ''))
s2 <- unlist(strsplit(as.character(df2$Nucs[x]), split = ''))
n <- min(length(s1), length(s2))
i <- 1
while(i <= n) {
if (s1[i] != s2[i]) {
break
}
i <- i + 1
}
if (i > 0)
paste(s1[1:(i-1)], collapse ='')
else
''
})
# [1] "AC" "A" "GT" "G" "GAC"
Here's another solution. It may not be covering all possible cases but that's probably easy to extend.
df1<-as.data.frame(cbind(Site=c(1,2,3,4,5),Nucs=c("ACTG","ACT","GTAC","GTC","GACT")))
df2<-as.data.frame(cbind(Site=c(1,2,3,4,5),Nucs=c("AC","ATC","GTCA","GC","GAC")))
mapply(x = as.list(df1$Nucs), y = as.list(df2$Nucs), FUN = function(x, y) {
x <- as.character(x); y <- as.character(y) # doesn't work with factors
# To keep everything in one easy to debug chunk, just switch in case
# x is shorter than y.
if (!(nchar(x) >= nchar(y))) {
xp <- y
yp <- x
} else {
xp <- x
yp <- y
}
# create elements to work on and vector for storage
to.glue <- strsplit(xp, "")[[1]]
out <- rep(NA, times = length(to.glue)) # used as output
# If one string is shorter than the other, extract one element
# at a time and see if there's a match in y. If yes, then pro-
# ceed to the second element, concatenate it with the first
# one and see if this pattern is present anywhere in y...
for (i in 1:length(to.glue)) {
glued <- paste(to.glue[1:i], collapse = "")
fm <- pmatch(x = glued, table = yp)
if (is.na(fm)) {
return(out[i-1])
} else {
out[i] <- glued
}
}
})
[1] "AC" "A" "GT" "G" "GAC"