Extract integer value from a character string - r

I have a character in the form "RADIANCE_MULT_BAND_1 = 1.2412E-02" saved in a variable. I want to extract the integer value as 0.012412. Please suggest the syntax to do so in R.

# function to extract number from a string
f = function(x) {
y = unlist(strsplit(x, " "))
y = as.numeric(y)
y = y[!is.na(y)]
y }
# having one value
x = "RADIANCE_MULT_BAND_1 = 1.2412E-02"
f(x)
# [1] 0.012412
# having multiple values
dt = data.frame(x = c("RADIANCE_MULT_BAND_1 = 1.2412E-02",
"RADIANCE_MULT_BAND_2 = 1.5412E-02",
"RADIANCE_MULT_BAND_3 = 2.2412E-02"), stringsAsFactors = F)
dt$value = f(dt$x)
dt
# x value
# 1 RADIANCE_MULT_BAND_1 = 1.2412E-02 0.012412
# 2 RADIANCE_MULT_BAND_2 = 1.5412E-02 0.015412
# 3 RADIANCE_MULT_BAND_3 = 2.2412E-02 0.022412

Related

Trying to create an R function which finds the input value in column 1 of a dataframe and returns column 2 value of the same row

New to R functions, I have a dataframe which looks like this except about 10,000 rows long:
Gene.name
Ortho.name
abc
DEF
qrs
TUV
wx
YZ
I'm trying to create a really simple function in r which when I input qrs, returns TUV. If someone could help I would really appreciate it.
fun <- function(vec, data) data$Ortho.name[ match(vec, data$Gene.name) ]
Z <- structure(list(Gene.name = c("abc", "qrs", "wx"), Ortho.name = c("DEF", "TUV", "YZ")), class = "data.frame", row.names = c(NA, -3L))
fun("qrs", data = Z)
# [1] "TUV"
fun("nothing", data = Z)
# [1] NA
fun(c("qrs", "abc", "not found"), data = Z)
# [1] "TUV" "DEF" NA
In case anyone is using seurat for plotting orthologs in a cross-species comparison, this is how I implemented the above using orthologs from BioMart:
chick_fish_ortho <- read.csv("chick_orthos.csv")
mac_fish_ortho <- read.csv('mac_orthos.csv')
macfun('glula', mac_fish_ortho)
chickfun('glula', chick_fish_ortho)
chickfun <- function(vec, data) data$Chicken.gene.name[ match(vec, data$Gene.name) ]
macfun <- function(vec, data) data$Macaque.gene.name[ match(vec, data$Gene.name) ]
fish_chick_mac <- function(gene, chickdata, macdata) {
p1 = FeaturePlot(object = fish_MG, reduction = "umap", label = TRUE, min.cutoff = 0, features = gene)
p2 = FeaturePlot(object = chick_MG, reduction = "umap", label = TRUE, min.cutoff = 0, features = chickfun(gene, chickdata))
p3 = FeaturePlot(object = mac_MG, reduction = "umap", label = TRUE, min.cutoff = 0, features = macfun(gene, macdata))
p1 + p2 + p3
}
fish_chick_mac('glula', chick_fish_ortho, mac_fish_ortho)

Extract values from list of named lists in R

Based on the names of sublists with xyz values of a list, I would like to extract a sample of the xyz values from a sublist. Note: the lists do not start at 1.
Example data
set.seed(123)
data <- list('4' = list(x = rnorm(5), y = rnorm(5), z = rnorm(5)),
'5' = list(x = rnorm(5), y = rnorm(5), z = rnorm(5)),
'6' = list(x = rnorm(5), y = rnorm(5), z = rnorm(5)),
'7' = list(x = rnorm(5), y = rnorm(5), z = rnorm(5)),
'8' = list(x = rnorm(5), y = rnorm(5), z = rnorm(5)))
Function to extract random values (derived from here)
I have the following function to sample random xyz values from the list:
get_elements <- function(data, i) {
#select the main list
tmp <- data[[i]]
#Check the length of each sublist, select minimum value
#and sample 1 number from 1 to that number
rand_int <- sample(min(lengths(tmp)), 1)
#select that element from each sub-list
sapply(tmp, `[[`, rand_int)
}
Example of function
# Show list number 8
data[['8']]
#> $x
#> [1] 0.3796395 -0.5023235 -0.3332074 -1.0185754 -1.0717912
#> $y
#> [1] 0.30352864 0.44820978 0.05300423 0.92226747 2.05008469
#> $z
#> [1] -0.4910312 -2.3091689 1.0057385 -0.7092008 -0.6880086
# Extract random combination from list 8
get_elements(data, '8')
#> x y z
#> -0.33320738 0.05300423 1.00573852
Rewrite function
Using the same function as above, I replaced i with 'i':
get_elements <- function(data, i) {
tmp <- data[['i']] # <-- changed i to 'i'
rand_int <- sample(min(lengths(tmp)), 1)
sapply(tmp, `[[`, rand_int)
}
Error & Question
get_elements(data, 8)
Warning in min(lengths(tmp)): no non-missing arguments to min;
returning Inf list()
Suddenly the function breaks, and I do not understand why? What is the reason for this error?
Try this:
get_elements <- function(data, i)
{
tmp <- data[[paste(i)]]
rand_int <- sample(min(lengths(tmp)), 1)
sapply(tmp, `[[`, rand_int)
}
The reason your initial code failed is because "i" is understood as "character i", not as "convert variable i into character". See:
i = 1
print("i") # i
print(i) # 1

Populating a Data Frame with Characters in a For Loop R

Currently I have a loop that is adding rows from one data frame into another master data frame. Unfortunately, it converts the characters into numbers, but I don't want that. How can I get the following for loop to add the rows from one data frame into the master data frame while keeping the characters?
AnnotationsD <- data.frame(x = vector(mode = "numeric",
length = length(x)), type = 0, label = 0, lesion = 0)
x = c(1,2)
for(i in length(x)){
D = data.frame(x = i, type = c("Distance"),
label = c("*"), lesion = c("Wild"))
AnnotationsD[[i,]] <- D[[i]]
}
So what I would like to come out of this is:
x type label lesion
1 1 Distance * Wild
2 2 Distance * Wild
This should work:
x = c(1,2)
AnnotationsD <- data.frame(x = as.character(NA), type = as.character(NA),
label = as.character(NA), lesion = as.character(NA),
stringsAsFactors =F)
for(i in 1:length(x)){
D = c(x = as.character(i), type = as.character("Distance"),
label = as.character("*"), lesion = as.character("Wild"))
AnnotationsD[i,] <- D
}

Apply a function based on column name in data.tables R

I'm looking to apply a user define function based on the name given to a column
dt <- data.table(gr_id = 1, id = seq(1,10),min_c = runif(10,10,30),
ml_c = runif(10,30,50),mx_c = runif(10,50,100),
min_t = runif(10,10,20),ml_t = runif(10,20,25),
mx_t = runif(10,25,30))
I would like to apply a function which calculates (min(min)+min(ml))/mx for both "c" columns and "t" columns. Currently, I've done as follows. However, becomes hard when I want to add more columns (lets say, "a")
dt[,{
temp1 = min(min_c)
temp2 = min(ml_c)
temp3 = min(mx_c)
score_c = (temp1+temp2)/temp3
temp4 = min(min_t)
temp5 = min(ml_t)
temp6 = min(mx_t)
score_t = (temp4+temp5)/temp6
list(score_c = score_c,
score_t = score_t)
},by = gr_id
]
I think this will work. the basic idea is using get.
# the original code could be simplified to:
dt[, .(
score_c = (min(min_c) + min(ml_c)) / min(mx_c),
score_t = (min(min_t) + min(ml_t)) / min(mx_t)
), by = gr_id]
#
# gr_id score_c score_t
# 1: 1 0.9051556 1.28054
# using `get`
cols <- c('c', 't')
dt[, {
res <- lapply(cols, function(i){
vars <- paste(c('min', 'ml', 'mx'), i, sep = '_')
(min(get(vars[1])) + min(get(vars[2]))) / min(get(vars[3]))
})
names(res) <- paste('score', cols, sep = '_')
res
}, by = gr_id]
# gr_id score_c score_t
# 1: 1 0.9051556 1.28054

Faster alternative to nested loops

I have written the below function, which contains a nested loop. In short, it calculates differences in emissions between i (28) pairs alternative technologies for j (48) countries. For a single combination and a single country, it takes 0.32 sec, which should give a total time of 0.32*28*48 = around 7 min. The function actually takes about 50 min, which makes me think there may be some unnecessary computing going on. Is a nested loop the most efficient approach here?
Any help is greatly appreciated!
alt.comb.p <- function(Fmat){
y.empty = matrix(data = 0,ncol = 2,nrow = nrow(FD)-1)
row.names(y.empty) <- paste(FD$V1[2:nrow(FD)],FD$V2[2:nrow(FD)],sep = " ")
country.list = unique(FD$V1)
for (j in 1:length(country.list)){ # for every country
for (i in 1:ncol(alt.comb)){ # for every possible combination
# the final demand of the first item of the combination is calculated
first = alt.comb[,i][1]
first.name = row.names(Eprice.Exio)[first]
loc1 = grep(pattern = first.name,x = row.names(y.empty))
country.first = substr(x = row.names(y.empty)[loc1[j]],start = 0,stop = 2)
y.empty[,1][loc1[j]] <- Eprice.Exio[first.name,country.first]
# the final demand of the second item of the combination is calculated
second = alt.comb[,i][2]
second.name = row.names(Eprice.Exio)[second]
loc2 = grep(pattern = second.name,x = row.names(y.empty))
country.second = substr(x = row.names(y.empty)[loc2[j]],start = 0,stop = 2)
y.empty[,2][loc2[j]] <- Eprice.Exio[second.name,country.second]
# calculates the difference between the total pressures from item 1 and item 2
r.1 = sum(Fmat%*%as.vector(y.empty[,1]))
r.2 = sum(Fmat%*%as.vector(y.empty[,2]))
r.dif = r.1-r.2 # negative means alternative 1 is better
alt.comb[2+j,i] <- r.dif
row.names(alt.comb)[2+j] <- country.first
y.empty = matrix(data = 0,ncol = 2,nrow = nrow(FD)-1)
row.names(y.empty) <- paste(FD$V1[2:nrow(FD)],FD$V2[2:nrow(FD)],sep = " ")
}
}
return(alt.comb)
}
Edit:
A simplified example would be:
Fmat = matrix(data = runif(1:9600), ncol=9600, nrow=9600)
alt.comb.p <- function(Fmat){
y.empty = matrix(data = 0,ncol = 2,nrow = 9600)
country.list = runif(n = 10)
alt.comb = matrix(data=0,ncol=5,nrow=10)
for (j in 1:10){ # for every country
for (i in 1:5){ # for every possible combination
y.empty[50,1] <- runif(1)
y.empty[60,2] <- runif(1)
# calculates the difference between the total pressures from item 1 and item 2
r.1 = sum(Fmat%*%as.vector(y.empty[,1]))
r.2 = sum(Fmat%*%as.vector(y.empty[,2]))
r.dif = r.1-r.2 # negative means alternative 1 is better
alt.comb[j,i] <- r.dif
y.empty = matrix(data = 0,ncol = 2,nrow = 9600)
}
}
return(alt.comb)
}

Resources