Splitting strings from integers in R - r

I've recently come across an interesting problem while trying to
create a custom database.
my rows are in form:
183746IGH
105928759UBS
and so on (so basically an integer concatenated with a string, both of relatively random sizes.). What I'm trying to do is somehow separate the whole number in column 1 and everything else(the letters) in column 2. How can this be done? I've been trying with strsplit but it doesn't seem to offer this kind of functionality.
Thank you for any help.

Other options include tstrsplit from the devel version of data.table
library(data.table)#v1.9.5+
setDT(df)[,tstrsplit(V1,'(?<=\\d)(?=\\D)', perl=TRUE, type.convert=TRUE)]
# V1 V2
#1: 131341 adad
#2: 45365 adadar
#3: 425 cavsbsb
#4: 46567567 daadvsv
If there are elements were 'non-numeric' part appears first and 'numeric' last, then, we can use a bit more generalized option as the regex pattern,
setDT(df)[,tstrsplit(V1, "(?<=\\d)(?=\\D)|(?<=\\D)(?=\\d)",
perl = TRUE)]
Or using extract from tidyr
library(tidyr)
extract(df, V1, into=c('V1', 'V2'), '(\\d+)(\\D+)', convert=TRUE)
# V1 V2
#1 131341 adad
#2 45365 adadar
#3 425 cavsbsb
#4 46567567 daadvsv
If you need the original column as well,
extract(df, V1, into=c('V2', 'V3'), '(\\d+)(\\D+)',
convert=TRUE, remove=FALSE)
# V1 V2 V3
#1 131341adad 131341 adad
#2 45365adadar 45365 adadar
#3 425cavsbsb 425 cavsbsb
#4 46567567daadvsv 46567567 daadvsv
For the data.table, we can use := to create the new columns so that the existing columns remain in the output, i.e.
setDT(df)[,paste0('V',2:3):=tstrsplit(V1,'(?<=\\d)(?=\\D)',
perl=TRUE, type.convert=TRUE)]
# V1 V2 V3
#1: 131341adad 131341 adad
#2: 45365adadar 45365 adadar
#3: 425cavsbsb 425 cavsbsb
#4: 46567567daadvsv 46567567 daadvsv
NOTE: Both the solutions have the option to convert the class of the split columns (type.convert/convert).
data
df <- data.frame(V1 = c("131341adad", "45365adadar", "425cavsbsb",
"46567567daadvsv"))

And another way with base-R and regular expressions:
all <- c(' 183746IGH','105928759UBS')
numeric <- sapply(a, function(x) sub('[[:alpha:]]+','', x))
alphabetic <- sapply(a, function(x) sub('[[:digit:]]+','', x))
> data.frame(all,alphabetic,numeric)
all alphabetic numeric
183746IGH 183746IGH IGH 183746
105928759UBS 105928759UBS UBS 105928759
Or as per #rawr's comment below:
> read.table(text = gsub('(\\d)(\\D)', '\\1 \\2', all))
V1 V2
1 183746 IGH
2 105928759 UBS
Or a vectorised version of the above with a function:
get_alphanum <- function(x, type) {
type <- switch(type,
alpha = '[[:digit:]]+',
digit = '[[:alpha:]]+')
sub(type,'', x)
}
get_alphanum <- Vectorize(get_alphanum)
Which gives a result applied directly on a vector!
> get_alphanum(all, type='alpha')
183746IGH 105928759UBS
" IGH" "UBS"
> get_alphanum(all, type='digit')
183746IGH 105928759UBS
" 183746" "105928759"
which can also be used to create a data.frame:
> data.frame(all,
alpha=get_alphanum(all, type='alpha') ,
numeric=get_alphanum(all, type='digit'))
all alpha numeric
183746IGH 183746IGH IGH 183746
105928759UBS 105928759UBS UBS 105928759

You could do:
df <- data.frame(V1 = c("adad131341", "adadar45365", "cavsbsb425", "daadvsv46567567"))
library(dplyr)
library(stringr)
df %>% mutate(V2 = str_extract(V1, "[0-9]+"),
V3 = str_extract(V1, "[aA-zZ]+"))
Which gives:
# V1 V2 V3
#1 adad131341 131341 adad
#2 adadar45365 45365 adadar
#3 cavsbsb425 425 cavsbsb
#4 daadvsv46567567 46567567 daadvsv

strsplit does work if you provide the correct regex to split on.
In this case, you would want something like:
strsplit(String, split = "(?<=[a-zA-Z])(?=[0-9])", perl = TRUE)
Here it is applied to #Steven's sample data:
strsplit(as.character(df$V1), split = "(?<=[a-zA-Z])(?=[0-9])", perl = TRUE)
# [[1]]
# [1] "adad" "131341"
#
# [[2]]
# [1] "adadar" "45365"
#
# [[3]]
# [1] "cavsbsb" "425"
#
# [[4]]
# [1] "daadvsv" "46567567"
Some time in the past I've written a function to do this since my mind honestly doesn't think in regex very often. The function looks like:
SplitMe <- function(string, alphaFirst = TRUE, bind = FALSE) {
if (!is.character(string)) string <- as.character(string)
Pattern <- ifelse(isTRUE(alphaFirst),
"(?<=[a-zA-Z])(?=[0-9])",
"(?<=[0-9])(?=[a-zA-Z])")
out <- strsplit(string, split = Pattern, perl = TRUE)
if (isTRUE(bind)) {
require(data.table)
as.data.table(do.call(rbind, out))
} else {
out
}
}
The intended usage was something like:
library(data.table)
as.data.table(df)[, c("char", "num") := SplitMe(V1, bind = TRUE)][]
# V1 char num
# 1: adad131341 adad 131341
# 2: adadar45365 adadar 45365
# 3: cavsbsb425 cavsbsb 425
# 4: daadvsv46567567 daadvsv 46567567
Once you know that pattern, you can use it in other places that make use of strsplit, like separate from "tidyr", which conveniently separates values into columns:
library(dplyr)
library(tidyr)
df %>%
separate(V1, into = c("char", "num"),
sep = "(?<=[a-zA-Z])(?=[0-9])", perl = TRUE)
# char num
# 1 adad 131341
# 2 adadar 45365
# 3 cavsbsb 425
# 4 daadvsv 46567567

read.pattern in the gsubfn package can do that. Each parenthesized part of the regular expression given in the pattern argument will be read into a separate column:
x <- c("183746IGH", "105928759UBS")
library(gsubfn)
read.pattern(text = x, pattern = "(\\d+)(\\D+)")
giving:
V1 V2
1 183746 IGH
2 105928759 UBS

Related

Separate Million and Billion Data from one column

I am trying below code for separating "M" and "B" with their values in 2 different column.
I want output like this:
level 1 level 2
M 3.2 B 3.6
M 4 B 2.8
B 3.5
Input:
reve=c("M 3.2","B 3.6","B 2.8","B 3.5","M 4")
#class(reve)
data=data.frame(reve)
Here is what I have tried.
index=which(grepl("M ",data$reve)
data$reve=gsub("M ","",data$reve)
data$reve=gsub("B ","",data$reve)
data$reve=as.numeric(data$reve)
If you have a data frame you can do that with dplyr separate()
I give you an example of this:
library(dplyr)
df <- tibble(coupe = c("M 2.3", "M 4.5", "B 1"))
df %>% separate(coupe, c("MorB","Quant"), " ")
OUTPUT
# MorB Quant
# <chr> <chr>
#1 M 2.3
#2 M 4.5
#3 B 1
Hope it help you!
For counting the number of "M" rows:
df %>% separate(YourColumn, c("MorB","Quant"), " ") %>%
filter(MorB == "M") %>% nrow()
Here is a base R approach.
lst <- split(reve, substr(reve, 1, 1))
df1 <- as.data.frame(lapply(lst, `length<-`, max(lengths(lst))))
df1
# B M
#1 B 3.6 M 3.2
#2 B 2.8 M 4
#3 B 3.5 <NA>
split the vector in two by the first letter. This gives you a list with entries of unequal length. Use lapply to make the entries having the same length, i.e. append the shorter one with NAs. Call as.data.frame.
If you want to change the names, you can use setNames
setNames(df1, c("level_2", "level_1"))
In case I misunderstood your desired output, try
df1 <- data.frame(do.call(rbind, (strsplit(reve, " "))), stringsAsFactors = FALSE)
df1[] <- lapply(df1, type.convert, as.is = TRUE)
df1
# X1 X2
#1 M 3.2
#2 B 3.6
#3 B 2.8
#4 B 3.5
#5 M 4.0
I think options rooted in regex may also be helpful for these types of problems
reve=c("M 3.2","B 3.6","B 2.8","B 3.5","M 4")
data=data.frame(reve, stringsAsFactors = F) # handle your data as strings, not factors
# regex to extract M vals and B vals
mvals <- stringi::stri_extract_all_regex(data, "M+\\s[0-9]\\.[0-9]|M+\\s[0-9]")[[1]]
bvals <- stringi::stri_extract_all_regex(data, "B+\\s[0-9]\\.[0-9]|B+\\s[0-9]")[[1]]
# gluing things together into a single df
len <- max(length(mvals), length(bvals)) # find the length
data.frame(M = c(mvals, rep(NA, len - length(mvals))) # ensure vectors are the same size
,B = c(bvals, rep(NA, len - length(bvals)))) # ensure vectors are the same size
In case regex is unfamiliar, the first expression searches for "M" followed by a space, then by digits 0 through 9, then a period, then digits 0 through 9 again. The vertical pipe is on "or" operator, so the expression also searches for "M" followed by a space, then digits 0 through 9. The second half of the expression accounts for cases like "M 4". The second expression does the same thing, just for lines that contain "B" in lieu of "M".
These are quick and dirty regex statements. I'm sure cleaner formulations are possible to get the same results.
We can count Millions or Billions as follows:
Input datatset:
reve=c("M 3.2","B 3.6","B 2.8","B 3.5","M 4")
data=data.frame(reve)
Code
library(dplyr)
library(tidyr)
data %>%
separate(reve, c("Label", "Value"),extra = "merge") %>%
group_by(Label) %>%
summarise(n = n())
Output
# A tibble: 2 x 2
Label n
<chr> <int>
1 B 3
2 M 2

Mutliply several columns of a dataframe by a factor (scalar)

I have a very basic problem and can't find a solution, so sorry in advance for the beginner question.
I have a data frame with several ID columns and 30 numerical columns. I want to multiply all values of those 30 columns with the same factor. I want to keep the the rest of the data frame unchanged. I figured that dplyr and transmute_all or transmute_at are my friends, but I can't find a way to express the function Column1:Column30 * factor. All examples given use simple functions like mean and that doesn't help me with the expression.
I would use mutate_at. For example:
library(dplyr)
mtcars %>%
mutate_at(vars(mpg:qsec),
.funs = funs(. * 3))
I'll give a solution with data.table, the dplyr version should be close to identical.
library(data.table)
# convert to data.table format to use data.table syntax
setDT(my_df)
# .SD refers to all the columns mentioned in the .SDcols argument
# (all columns by default when this argument is not specified)
# - instead of using backticks around *, you could use quotes: "*"
my_df[ , lapply(.SD, `*`, factor), .SDcols = Column1:Column30]
On some made-up data
set.seed(0123498)
# create fake data
DT = setDT(replicate(8, rnorm(5), simplify = FALSE))
DT
# V1 V2 V3 V4 V5 V6 V7 V8
# 1: -0.2685077 -1.06491111 0.7307661 0.09880937 0.2791274 -0.5589676 1.5320685 0.4730013
# 2: 1.0783236 -0.17810929 -0.2578453 0.95940860 1.0990367 -0.6983235 0.9530062 -1.3800769
# 3: 1.1730611 -0.48828441 -1.6314077 -0.76117268 -0.5753245 -0.7370099 0.3982160 -0.8088035
# 4: 0.2060451 -0.07105785 -1.1878591 -0.83464592 2.1872117 -0.4390479 0.1428239 1.2634280
# 5: 1.6142695 0.46381602 0.5315299 2.34790945 -1.2977851 1.0428450 1.9292390 0.5337248
scalar = 3
DT[ , lapply(.SD, "*", scalar), .SDcols = V4:V6]
# V4 V5 V6
# 1: 0.2964281 0.8373822 -1.676903
# 2: 2.8782258 3.2971101 -2.094970
# 3: -2.2835180 -1.7259734 -2.211030
# 4: -2.5039378 6.5616352 -1.317144
# 5: 7.0437283 -3.8933554 3.128535
If it's all numeric columns you want to multiply, (or if you can easily write a test) I'd use lapply with an is.numeric test:
Calling the data frame dd (and using iris to demonstrate):
dd = iris
dd[] = lapply(dd, FUN = function(x) if (is.numeric(x)) return(x * 2) else return(x))
This is equivalent to a simple for loop, which also works just fine.
for (i in 1:ncol(dd)) {
if (is.numeric(dd[[i]])) dd[[i]] = dd[[i]] * 2
}
Another way is to use lapply only on the relevant columns, e.g.:
dd[1:30] = lapply(dd[1:30], "*", 2)
Since dplyr version 1.0, you can use across():
dd = iris
dd = dd %>%
mutate(across(where(is.numeric), function(x) x * 2))
May be this will help you, just R base
> set.seed(100)
> df = data.frame(id=rep(1:5), val1=rnorm(5), val2=rnorm(5), val3=rnorm(5))
> df
id val1 val2 val3
1 1 -0.50219235 0.3186301 0.08988614
2 2 0.13153117 -0.5817907 0.09627446
3 3 -0.07891709 0.7145327 -0.20163395
4 4 0.88678481 -0.8252594 0.73984050
5 5 0.11697127 -0.3598621 0.12337950
# Multiply by 2 all columns except id column
> df[, !colnames(df) %in% c("id")] <- df[, !colnames(df) %in% c("id")] * 2
> df
id val1 val2 val3
1 1 -1.0043847 0.6372602 0.1797723
2 2 0.2630623 -1.1635814 0.1925489
3 3 -0.1578342 1.4290654 -0.4032679
4 4 1.7735696 -1.6505189 1.4796810
5 5 0.2339425 -0.7197243 0.2467590
>
You could just use apply
my_df <- data_frame(//some data)
my_scaled_df <- apply(data_frame, 2, transformation_logic)
For this you can use try:
y <- xx[-(1:2)]*100
this "xx[-(1:2)]" is non numeric columns so you need to exclude these from the calculation.

How to flatten R data frame that contains lists?

I want to find the best "R way" to flatten a dataframe that looks like this:
CAT COUNT TREAT
A 1,2,3 Treat-a, Treat-b
B 4,5 Treat-c,Treat-d,Treat-e
So it will be structured like this:
CAT COUNT1 COUNT2 COUNT3 TREAT1 TREAT2 TREAT3
A 1 2 3 Treat-a Treat-b NA
B 4 5 NA Treat-c Treat-d Treat-e
Example code to generate the source dataframe:
df<-data.frame(CAT=c("A","B"))
df$COUNT <-list(1:3,4:5)
df$TREAT <-list(paste("Treat-", letters[1:2],sep=""),paste("Treat-", letters[3:5],sep=""))
I believe I need a combination of rbind and unlist? Any help would be greatly appreciated.
- Tim
Here is a solution using base R, accepting vectors of any length inside your list and no need to specify which columns of the dataframe you want to collapse. Part of the solution was generated using this answer.
df2 <- do.call(cbind,lapply(df,function(x){
#check if it is a list, otherwise just return as is
if(is.list(x)){
return(data.frame(t(sapply(x,'[',seq(max(sapply(x,length)))))))
} else{
return(x)
}
}))
As of R 3.2 there is lengths to replace sapply(x, length) as well,
df3 <- do.call(cbind.data.frame, lapply(df, function(x) {
# check if it is a list, otherwise just return as is
if (is.list(x)) {
data.frame(t(sapply(x,'[', seq(max(lengths(x))))))
} else {
x
}
}))
data used:
df <- structure(list(CAT = structure(1:2, .Label = c("A", "B"), class = "factor"),
COUNT = list(1:3, 4:5), TREAT = list(c("Treat-a", "Treat-b"
), c("Treat-c", "Treat-d", "Treat-e"))), .Names = c("CAT",
"COUNT", "TREAT"), row.names = c(NA, -2L), class = "data.frame")
Here is another way in base r
df<-data.frame(CAT=c("A","B"))
df$COUNT <-list(1:3,4:5)
df$TREAT <-list(paste("Treat-", letters[1:2],sep=""),paste("Treat-", letters[3:5],sep=""))
Create a helper function to do the work
f <- function(l) {
if (!is.list(l)) return(l)
do.call('rbind', lapply(l, function(x) `length<-`(x, max(lengths(l)))))
}
Always test your code
f(df$TREAT)
# [,1] [,2] [,3]
# [1,] "Treat-a" "Treat-b" NA
# [2,] "Treat-c" "Treat-d" "Treat-e"
Apply it
df[] <- lapply(df, f)
df
# CAT COUNT.1 COUNT.2 COUNT.3 TREAT.1 TREAT.2 TREAT.3
# 1 A 1 2 3 Treat-a Treat-b <NA>
# 2 B 4 5 NA Treat-c Treat-d Treat-e
There's a deleted answer here that indicates that "splitstackshape" could be used for this. It can, but the deleted answer used the wrong function. Instead, it should use the listCol_w function. Unfortunately, in its present form, this function is not vectorized across columns, so you would need to nest the calls to listCol_w for each column that needs to be flattened.
Here's the approach:
library(splitstackshape)
listCol_w(listCol_w(df, "COUNT", fill = NA), "TREAT", fill = NA)
## CAT COUNT_fl_1 COUNT_fl_2 COUNT_fl_3 TREAT_fl_1 TREAT_fl_2 TREAT_fl_3
## 1: A 1 2 3 Treat-a Treat-b NA
## 2: B 4 5 NA Treat-c Treat-d Treat-e
Note that fill = NA has been specified because it defaults to fill = NA_character_, which would otherwise coerce all the values to character.
Another alternative would be to use transpose from "data.table". Here's a possible implementation (looks scary, but using the function is easy). Benefits are that (1) you can specify the columns to flatten, (2) you can decide whether you want to drop the original column or not, and (3) it's fast.
flatten <- function(indt, cols, drop = FALSE) {
require(data.table)
if (!is.data.table(indt)) indt <- as.data.table(indt)
x <- unlist(indt[, lapply(.SD, function(x) max(lengths(x))), .SDcols = cols])
nams <- paste(rep(cols, x), sequence(x), sep = "_")
indt[, (nams) := unlist(lapply(.SD, transpose), recursive = FALSE), .SDcols = cols]
if (isTRUE(drop)) {
indt[, (nams) := unlist(lapply(.SD, transpose), recursive = FALSE),
.SDcols = cols][, (cols) := NULL]
}
indt[]
}
Usage would be...
Keeping original columns:
flatten(df, c("COUNT", "TREAT"))
# CAT COUNT TREAT COUNT_1 COUNT_2 COUNT_3 TREAT_1 TREAT_2 TREAT_3
# 1: A 1,2,3 Treat-a,Treat-b 1 2 3 Treat-a Treat-b NA
# 2: B 4,5 Treat-c,Treat-d,Treat-e 4 5 NA Treat-c Treat-d Treat-e
Dropping original columns:
flatten(df, c("COUNT", "TREAT"), TRUE)
# CAT COUNT_1 COUNT_2 COUNT_3 TREAT_1 TREAT_2 TREAT_3
# 1: A 1 2 3 Treat-a Treat-b NA
# 2: B 4 5 NA Treat-c Treat-d Treat-e
See this gist for a comparison with the other solutions proposed.

How to rank rows by two columns at once in R?

Here is the code to rank based on column v2:
x <- data.frame(v1 = c(2,1,1,2), v2 = c(1,1,3,2))
x$rank1 <- rank(x$v2, ties.method='first')
But I really want to rank based on both v2 and/then v1 since there are ties in v2. How can I do that without using RPostgreSQL?
How about:
within(x, rank2 <- rank(order(v2, v1), ties.method='first'))
# v1 v2 rank1 rank2
# 1 2 1 1 2
# 2 1 1 2 1
# 3 1 3 4 4
# 4 2 2 3 3
order works, but for manipulating data frames, also check out the plyr and dplyr packages.
> arranged_x <- arrange(x, v2, v1)
Here we create a sequence of numbers and then reorder it as if it was created near the ordered data:
x$rank <- seq.int(nrow(x))[match(rownames(x),rownames(x[order(x$v2,x$v1),]))]
Or:
x$rank <- (1:nrow(x))[order(order(x$v2,x$v1))]
Or even:
x$rank <- rank(order(order(x$v2,x$v1)))
Try this:
x <- data.frame(v1 = c(2,1,1,2), v2 = c(1,1,3,2))
# The order function returns the index (address) of the desired order
# of the examined object rows
orderlist<- order(x$v2, x$v1)
# So to get the position of each row in the index, you can do a grep
x$rank<-sapply(1:nrow(x), function(x) grep(paste0("^",x,"$"), orderlist ) )
x
# For a little bit more general case
# With one tie
x <- data.frame(v1 = c(2,1,1,2,2), v2 = c(1,1,3,2,2))
x$rankv2<-rank(x$v2)
x$rankv1<-rank(x$v1)
orderlist<- order(x$rankv2, x$rankv1)
orderlist
#This rank would not be appropriate
x$rank<-sapply(1:nrow(x), function(x) grep(paste0("^",x,"$"), orderlist ) )
#there are ties
grep(T,duplicated(x$rankv2,x$rankv1) )
# Example for only one tie
makeTieRank<-mean(x[which(x[,"rankv2"] %in% x[grep(T,duplicated(x$rankv2,x$rankv1) ),][,c("rankv2")] &
x[,"rankv1"] %in% x[grep(T,duplicated(x$rankv2,x$rankv1) ),][,c("rankv1")]),]$rank)
x[which(x[,"rankv2"] %in% x[grep(T,duplicated(x$rankv2,x$rankv1) ),][,c("rankv2")] &
x[,"rankv1"] %in% x[grep(T,duplicated(x$rankv2,x$rankv1) ),][,c("rankv1")]),]$rank<-makeTieRank
x

How to transform a vector using a dataframe(associate table) in R language?

For example, I have a dataframe like this (the content of V1 is not the same as line number):
V1 V2 V3
1 cat animal
3 dog animal
4 apple fruit
And a vector like this:
c(4,1,3)
Is there an easy way to get a vector like this in R?
c("fruit:apple", "animal:cat", "animal:dog")
I tried ==(my_frame$V1==my_vector) but found that can't be used for two vectors..
A slightly more concise version:
my_charvec <- as.character(my_vector)
rownames(my_frame) <- my_frame$V1
apply(my_frame[my_charvec,c(3,2)],1,paste,collapse=":")
Here's the output:
4 1 3
"fruit:apple" "animal:cat" "animal:dog"
The 4,1,3 on the output are just names; you can ignore them if you want to.
Something like this works:
## my_dat <- read.table(text="V1 V2 V3
## 1 cat animal
## 2 dog animal
## 3 apple fruit", header=T)
##
## my_vect <- c(3,1,2)
library(qdap) #for paste2 function
paste2(my_dat[sapply(my_vect, function(x) which(x == my_dat[, 1])), 3:2], sep=":")
## [1] "fruit:apple" "animal:cat" "animal:dog"
First I match the my_vect with the column 1 of my_dat using sapply, which and ==. This tells the order to grab in:
sapply(my_vect, function(x) which(x == my_dat[, 1]))
## [1] 3 1 2
Then I index and grab only the last two columns in the order you requested (3rd col. then 2nd)
my_dat[sapply(my_vect, function(x) which(x == my_dat[, 1])), 3:2]
## V3 V2
## 3 fruit apple
## 1 animal cat
## 2 animal dog
Then I use paste2 from the qdap package to bind the columns together without specifying the specific columns (just being lazy; you could accomplish this with base paste by explicitly stating the vectors.
#Firegun,how about using 'merge', like so:
#original data frames
df1=data.frame(V1=c(1,3,4),V2=c("cat","dog","apple"),V3=c("animal","animal","fruit"))
df2=data.frame(V1=c(4,1,3))
# just merge and don't sort (which is the default)
df3=merge(df2,df1,by.x="V1",sort=FALSE)
vec=as.vector(paste0(df3$V3,":",df3$V2))
> vec
[1] "fruit:apple" "animal:cat" "animal:dog"
Another base R solution, using match and mapply
d <- read.table(text='V1 V2 V3
1 cat animal
3 dog animal
4 apple fruit', header=TRUE)
v <- c(4,1,3)
with(d[match(v, d$V1), ], paste(V3, V2, sep=':'))
# [1] "fruit:apple" "animal:cat" "animal:dog"
Maybe something like this?
> df<-data.frame(V1=c(1,2,3), V2=c("a","b","c"), V3=c("v", "nv", "nv"))
> v <- c(3,1,2)
> df[v,]
## V1 V2 V3
## 3 3 c nv
## 1 1 a v
## 2 2 b nv
> res <- as.character(apply(df[v,], 1, function(r) paste(r[3],r[2],sep=":")))
> res
## [1] "nv:c" "v:a" "nv:b"
Here is a base solution
a <- data.frame(c("cat", "dog", "apple"), c("animal", "animal", "fruit"))
v <- c(3,1,2)
apply(a[v,], 1, paste0, collapse=":")

Resources