Refining GUI developed by RGTK2 - r

I am developing a GUI using R and GTK2 and created something as shown in the photo.
Here's the script:
library(RGtk2)
rm(list=ls())
window.master <- gtkWindow("toplevel",show=FALSE)
window.master$setDefaultSize(1024,768)
window.master$set(title="Window Name")
forms.notebook <- gtkNotebook()
forms.notebook$setTabPos("top")
form1.notebook <- gtkNotebook()
form1.notebook$setTabPos("top")
form1.boxp1.y3 <- gtkVBox(FALSE,3)
form1.framep1y1 <- gtkFrame("AAA")
form1.boxp1y1.2y <- gtkVBox(FALSE,2)
form1.boxp1y1.y1.4x <- gtkHBox(FALSE,4)
form1.boxp1y1.y1.4x$packStart(gtkLabel("BBB"),FALSE,FALSE)
form1.boxp1y1.y1.4x$packStart(gtkEntry(),TRUE,TRUE)
form1.boxp1y1.y1.4x$packStart(gtkLabel("CCC"),FALSE,FALSE)
form1.boxp1y1.y1.4x$packStart(gtkEntry(),TRUE,TRUE)
form1.boxp1y1.y2.4x <- gtkHBox(FALSE,4)
form1.boxp1y1.y2.4x$packStart(gtkLabel("DDD"),FALSE,FALSE)
form1.boxp1y1.y2.4x$packStart(gtkEntry(),TRUE,TRUE)
form1.boxp1y1.y2.4x$packStart(gtkLabel("EEE"),FALSE,FALSE)
form1.boxp1y1.y2.4x$packStart(gtkEntry(),TRUE,TRUE)
form1.boxp1y1.2y$add(form1.boxp1y1.y1.4x)
form1.boxp1y1.2y$add(form1.boxp1y1.y2.4x)
form1.framep1y1$add(form1.boxp1y1.2y)
form1.framep1y2 <- gtkFrame("FFF")
form1.boxp1y2.2y <- gtkVBox(FALSE,2)
### 1nd row ###
form1.boxp1y1.y1.6x <- gtkHBox(FALSE,6)
form1.boxp1y1.y1.6x$packStart(gtkLabel("GGG"),FALSE,FALSE)
form1.boxp1y1.y1.6x$packStart(gtkEntry(),TRUE,TRUE)
form1.boxp1y1.y1.6x$packStart(gtkLabel("HHH"),FALSE,FALSE)
form1.boxp1y1.y1.6x$packStart(gtkEntry(),FALSE,FALSE)
form1.boxp1y1.y1.6x$packStart(gtkLabel("III"),FALSE,FALSE)
form1.boxp1y1.y1.6x$packStart(gtkEntry(),FALSE,FALSE)
### 2nd row ###
form1.boxp1y1.y2.4x <- gtkHBox(FALSE,4)
form1.boxp1y1.y2.4x$packStart(gtkLabel("JJJ"),FALSE,FALSE)
form1.boxp1y1.y2.4x$packStart(gtkEntry(),TRUE,TRUE)
form1.boxp1y1.y2.4x$packStart(gtkLabel("KKK"),FALSE,FALSE)
form1.boxp1y1.y2.4x$packStart(gtkEntry(),TRUE,TRUE)
### 3nd row ###
form1.boxp1y1.y3.4x <- gtkHBox(FALSE,4)
form1.boxp1y1.y3.4x$packStart(gtkLabel("LLL"),FALSE,FALSE)
form1.boxp1y1.y3.4x$packStart(gtkEntry(),TRUE,TRUE)
form1.boxp1y1.y3.4x$packStart(gtkLabel("MMM"),FALSE,FALSE)
form1.boxp1y1.y3.4x$packStart(gtkEntry(),TRUE,TRUE)
form1.boxp1y2.2y$add(form1.boxp1y1.y1.6x)
form1.boxp1y2.2y$add(form1.boxp1y1.y2.4x)
form1.boxp1y2.2y$add(form1.boxp1y1.y3.4x)
form1.framep1y2$add(form1.boxp1y2.2y)
form1.framep1y3 <- gtkFrame("NNN")
form1.boxp1y3.2y <- gtkVBox(FALSE,2)
### 1nd row ###
form1.boxp1y3.y1.6x <- gtkHBox(FALSE,6)
form1.boxp1y3.y1.6x$packStart(gtkLabel("OOO"),FALSE,FALSE)
form1.boxp1y3.y1.6x$packStart(gtkEntry(),TRUE,TRUE)
form1.boxp1y3.y1.6x$packStart(gtkLabel("PPP"),FALSE,FALSE)
form1.boxp1y3.y1.6x$packStart(gtkEntry(),FALSE,FALSE)
form1.boxp1y3.y1.6x$packStart(gtkLabel("QQQ"),FALSE,FALSE)
form1.boxp1y3.y1.6x$packStart(gtkEntry(),FALSE,FALSE)
### 2nd row ###
form1.boxp1y3.y2.2x <- gtkHBox(FALSE,2)
form1.boxp1y3.y2.2x$packStart(gtkCheckButton("RRR"),TRUE,TRUE)
form1.boxp1y3.y2.2x$packStart(gtkCheckButton("SSS"),TRUE,TRUE)
form1.boxp1y3.2y$add(form1.boxp1y3.y1.6x)
form1.boxp1y3.2y$add(form1.boxp1y3.y2.2x)
form1.framep1y3$add(form1.boxp1y3.2y)
form1.boxp1.y3$packStart(form1.framep1y1,FALSE,FALSE)
form1.boxp1.y3$packStart(form1.framep1y2,FALSE,FALSE)
form1.boxp1.y3$packStart(form1.framep1y3,FALSE,FALSE)
form1.boxp2.ud <- gtkVBox(FALSE,2)
form1.framep2f1 <- gtkFrame("TTT")
form1.framep2f2 <- gtkFrame("UUU")
form1.boxp2.ud$add(form1.framep2f1)
form1.boxp2.ud$add(form1.framep2f2)
form1.boxp3.ud <- gtkVBox(FALSE,1)
form1.framep3f1 <- gtkFrame("VVV")
form1.boxp3.ud$add(form1.framep3f1)
form1.boxp4.ud <- gtkVBox(FALSE,1)
form1.framep4f1 <- gtkFrame("WWW")
form1.boxp4.ud$add(form1.framep4f1)
form1.boxp5.ud <- gtkVBox(FALSE,1)
form1.framep5f1 <- gtkFrame("XXX")
form1.boxp5.ud$add(form1.framep5f1)
form1.notebook$add(form1.boxp1.y3)
form1.notebook$add(form1.boxp2.ud)
form1.notebook$add(form1.boxp3.ud)
form1.notebook$add(form1.boxp4.ud)
form1.notebook$add(form1.boxp5.ud)
forms.notebook$add(form1.notebook)
window.master$add(forms.notebook)
window.master$show()
I have a few questions:
How to change the tab name? (I can't use Page1,Page2,Page3 everywhere)
How to add spacer between each row/frame?
I want to display information from the dataframe into the gtkEntry field, I think I need to give label of the gtkEntry field, how can I do that?
Thanks.

A few answers:
How to change the tab name? (I can't use Page1,Page2,Page3 everywhere):
Instead of form1.notebook$add(form1.boxp1.y3) use form1$appendPage() with the tab.label option. See ?gtkNotebookAppendPage
How to add spacer between each row/frame?
For a layout like this you should be using gtkTable. For gtkVBox you can use setSpacing (?gtkBoxSetSpacing and/or padding when packing.
I want to display information from the dataframe into the gtkEntry field, I think I need to give label of the gtkEntry field, how can I do that?
Not sure what you want to do here. I'm guessing you want to a) name your gtkEntry objects and then set their text. If you coordinate your names with your data frame this can be done quickly. For example, using a list to store the entries along the lines of (from your BBB label):
l <- list()
form1.boxp1y1.y1.4x$packStart(l$BBB <- gtkEntry(),TRUE,TRUE) ## BBB from your label
etc.
Then supposing your data frame has a column BBB (along with others) you can do something like
sapply(names(l), function(i) l[[i]]$setText(your_dataframe[1,i]))
Or if you don't like so many ['s you could use:
mapply(gtkEntrySetText(l, your_dataframe[1, names(l)])

Related

Creating a for loop in R from a list

I'm trying to create a for loop in R to iterate through a list of genetic variants, labeled with rsID's, and filter the results by patient ID.
ace2_snps <- c("rs4646121", "rs4646127", "rs1996225", "rs2158082", "rs4830974", "rs148271868", "rs113539251", "rs4646135", "rs4646179", "rs2301693", "rs16980031", "rs12689012", "rs4646141", "rs142049267", "rs16979971", "rs12007623", "rs4646182", "rs147214574", "rs6632677", "rs139469582", "rs149000434", "rs148805807", "rs112032651", "rs144314464", "rs147077778", "rs182259051", "rs112621533", "rs35803318", "rs35304868", "rs113848176", "rs145345877", "rs12009805", "rs233570", "rs73635824", "rs73635823", "rs4646142", "rs4646157", "rs2074192", "rs79878075", "rs144239059", "rs67635467", "rs183583165", "rs137910448", "rs116419580", "rs2097723", "rs4646170")
for (snps in ace2_snps) {
genotype_snps <- as.data.frame(bgen_ACE2$data[snps,,])
idfromcsv <- read.csv("/Users/keeseyyyyy/Desktop/Walley/pospatid.csv")
id <- as.character(idfromcsv[[1]])
filtered_snps <- genotype_snps[id,] }
I need to run genotype_rs146217251 <- as.data.frame(bgen_ACE2$data["rs146217251",,]) for each rsID, and then I'd like to label the variable filtered_snps according to its rsID in the place of "snps" in the variable name for each variant.
I'm not very familiar with R syntax. Can anyone give me some tips?
For one variant, the process would go like this:
genotype_rs146217251 <- as.data.frame(bgen_ACE2$data["rs146217251",,])
idfromcsv <- read.csv("/Users/keeseyyyyy/Desktop/Walley/pospatid.csv")
id <- as.character(idfromcsv[[1]])
filtered <- genotype_rs146217251[id,]

How to create a loop that changes part of a column name in a data frame

I am trying to find Cronbach's Alpha for survey data containing a series of multi-item measures. Rather than have to manually write out every single multi-item measure, it looks like something a loop should be able to manage far more effectively, but it needs to change only part of the column name, according to the question number.
The basic idea as it currently sits in my head would be...
for (N in 4:22) {
ytqN <- data.frame(YT_Data$QNa, YT_Data$QNb, YT_Data$QNc)
alpha(ytqN)
}
The loop would then create new data frames for each multi item measure and run Cronbach's Alpha as it goes.
This doesn't work though. :(
ytq4 <- data.frame(YT_Data$Q4a, YT_Data$Q4b, YT_Data$Q4c)
alpha(ytq4)
ytq5 <- data.frame(YT_Data$Q5a, YT_Data$Q5b, YT_Data$Q5c)
alpha(ytq5)
ytq6 <- data.frame(YT_Data$Q6a, YT_Data$Q6b, YT_Data$Q6c)
alpha(ytq6)
ytq7 <- data.frame(YT_Data$Q7a, YT_Data$Q7b, YT_Data$Q7c)
alpha(ytq7)
ytq8 <- data.frame(YT_Data$Q8a, YT_Data$Q8b, YT_Data$Q8c)
alpha(ytq8)
ytq9 <- data.frame(YT_Data$Q9a, YT_Data$Q9b, YT_Data$Q9c)
alpha(ytq9)
ytq10 <- data.frame(YT_Data$Q10a, YT_Data$Q10b, YT_Data$Q10c)
alpha(ytq10)
ytq11 <- data.frame(YT_Data$Q11a, YT_Data$Q11b, YT_Data$Q11c)
alpha(ytq11)
ytq12 <- data.frame(YT_Data$Q12a, YT_Data$Q12b, YT_Data$Q12c)
alpha(ytq12)
ytq13 <- data.frame(YT_Data$Q13a, YT_Data$Q13b, YT_Data$Q13c)
alpha(ytq13)
ytq14 <- data.frame(YT_Data$Q14a, YT_Data$Q14b, YT_Data$Q14c)
alpha(ytq14)
ytq15 <- data.frame(YT_Data$Q15a, YT_Data$Q15b, YT_Data$Q15c)
alpha(ytq15)
ytq16 <- data.frame(YT_Data$Q16a, YT_Data$Q16b, YT_Data$Q16c)
alpha(ytq16)
ytq17 <- data.frame(YT_Data$Q17a, YT_Data$Q17b, YT_Data$Q17c)
alpha(ytq17)
ytq18 <- data.frame(YT_Data$Q18a, YT_Data$Q18b, YT_Data$Q18c)
alpha(ytq18)
ytq19 <- data.frame(8 - YT_Data$Q19a, YT_Data$Q19b, YT_Data$Q19c)
# Reverse code Q19a
alpha(ytq19)
ytq20 <- data.frame(YT_Data$Q20a, YT_Data$Q20b, YT_Data$Q20c)
alpha(ytq20)
ytq21 <- data.frame(YT_Data$Q21a, YT_Data$Q21b, YT_Data$Q21c)
alpha(ytq21)
ytq22 <- data.frame(YT_Data$Q22a, YT_Data$Q22b, YT_Data$Q22c)
alpha(ytq22)
The desired results would be a single output containing all the Cronbach's Alphas for the multi item measures for questions 4-22 in the data set I am currently working on executed via a single piece of code, rather than have to go question by question.
It's easier to help if you include your data, but I guess this should work:
alpha_list = list()
for(N in 4:22){
ytq = data.frame(YT_Data[paste0("Q",N,"a")],
YT_Data[paste0("Q",N,"b")],
YT_Data[paste0("Q",N,"c")])
alpha_list[[N]] = alpha(ytq)
}
We are using paste0() to create the column names while looping on N. alpha_list will be a list with the results given by alpha()

Create and combine 2 grid.tables

I have created a grid.table object to display a dataframe in PowerBi, below there is my code:
dataset <- data.frame(BDS_ID = c("001","002"),
PRIORITY = c("high","medium"),
STATUS = c("onair","onair"),
COMPANY = c("airfr","fly"))
my.result <- melt(dataset, id = c("BDS_ID"))
mytheme <- ttheme_default(base_size = 10,
core=list(fg_params=list(hjust=0, x=0.01),
bg_params=list(fill=c("white", "grey90"))))
for (i in 1:nrow(tg)) {
tg$grobs[[i]] <- editGrob(tg$grobs[[i]], gp=gpar(fontface="bold"))
}
grid.draw(tg)
and this is my output:
I would like to improve my output in the following way: I would like that the row headers to be unique and have a different column for each different value of each variable repeating the column with the row headers each time.
I tried to do this using the statement t(dataset), but I do not get the desired result because the row headers are not repeated.
I would like to get an output (always classy grob) similar to this:
**PRIORITY** high **PRIORITY** medium
**STATUS** onair **STATUS** onair
**COMPANY** airfr **COMPANY** fly
Does anyone knows how to achive this?
Thanks
I'm unable to reproduce the grob format you've shown based on the code you've provided, but I've got something similar:
dataset <- data.frame(BDS_ID = c("001","002"),
PRIORITY = c("high","medium"),
STATUS = c("onair","onair"),
COMPANY = c("airfr","fly"))
dataset <- data.frame(t(dataset))
dataset$label1 <- rownames(dataset)
dataset$label2 <- rownames(dataset)
colnames(dataset) <- c("status1", "status2", "label1", "label2")
dataset <- dataset[c(2:nrow(dataset)), c(3, 1, 4, 2)]
rownames(dataset) <- NULL
test <- grid.draw(tableGrob(dataset))
The above code produces the following object. It doesn't look exactly like yours, but it's in the general structure you're looking for:

r - getting all NA in ordered factor column

Instead of showing more2 or less2 in the column, it only shows up as NA. Why aren't the character names appearing instead?
careermore2 <- vector(mode="character",length=length(mlb$careeryrs))
"less2" <- careermore2[mlb$careeryrs<=2]
"more2" <- careermore2[mlb$careeryrs>=2]
No.seasons <- factor(careermore2,levels=c("more2","less2"),exclude=NA,ordered=TRUE)
mlb2 <- cbind(mlb,No.seasons)
str(mlb2$No.seasons)
head(mlb2$No.seasons)
mlb2[mlb2$No.seasons=="more2",]
Looking at careermore2 I would say you've got these the wrong way round:
"less2" <- careermore2[mlb$careeryrs<=2]
"more2" <- careermore2[mlb$careeryrs>=2]
That creates two objects. You really meant:
careermore2[mlb$careeryrs<=2] = "less2"
careermore2[mlb$careeryrs>=2] = "more2"
ie set the corresponding values in careermore2. And you probably want <2 or >2 rather than have = in both...

How to improve this code for getting pairwise?

It is a question build upon the previous question (http://stackoverflow.com/questions/6538448/r-how-to-write-a-loop-to-get-a-matrix).
It is different from the previous one, as more details is provided, and libraries and example file is provided according to comments from DWin. So, I submitted it as a new question. Could you mind to teach me how to modify this code further?
To load the necessary libraries:
source("http://bioconductor.org/biocLite.R")
biocLite()
My protseq.fasta file has the following contents:
>drugbank_target|1 Peptidoglycan synthetase ftsI (DB00303)
MVKFNSSRKSGKSKKTIRKLTAPETVKQNKPQKVFEKCFMRGRYMLSTVLILLGLCALVARAAYVQSINADTLSNEADKR
SLRKDEVLSVRGSILDRNGQLLSVSVPMSAIVADPKTMLKENSLADKERIAALAEELGMTENDLVKKIEKNSKSGYLYLA
RQVELSKANYIRRLKIKGIILETEHRRFYPRVEEAAHVVGYTDIDGNGIEGIEKSFNSLLVGKDGSRTVRKDKRGNIVAH
ISDEKKYDAQDVTLSIDEKLQSMVYREIKKAVSENNAESGTAVLVDVRTGEVLAMATAPSYNPNNRVGVKSELMRNRAIT
DTFEPGSTVKPFVVLTALQRGVVKRDEIIDTTSFKLSGKEIVDVAPRAQQTLDEILMNSSNRGVSRLALRMPPSALMETY
QNAGLSKPTDLGLIGEQVGILNANRKRWADIERATVAYGYGITATPLQIARAYATLGSFGVYRPLSITKVDPPVIGKRVF
SEKITKDIVGILEKVAIKNKRAMVEGYRVGVKTGTARKIENGHYVNKYVAFTAGIAPISDPRYALVVLINDPKAGEYYGG
AVSAPVFSNIMGYALRANAIPQDAEAAENTTTKSAKRIVYIGEHKNQKVN
>drugbank_target|3 Histidine decarboxylase (DB00114; DB00117)
MMEPEEYRERGREMVDYICQYLSTVRERRVTPDVQPGYLRAQLPESAPEDPDSWDSIFGDIERIIMPGVVHWQSPHMHAY
YPALTSWPSLLGDMLADAINCLGFTWASSPACTELEMNVMDWLAKMLGLPEHFLHHHPSSQGGGVLQSTVSESTLIALLA
ARKNKILEMKTSEPDADESCLNARLVAYASDQAHSSVEKAGLISLVKMKFLPVDDNFSLRGEALQKAIEEDKQRGLVPVF
VCATLGTTGVCAFDCLSELGPICAREGLWLHIDAAYAGTAFLCPEFRGFLKGIEYADSFTFNPSKWMMVHFDCTGFWVKD
KYKLQQTFSVNPIYLRHANSGVATDFMHWQIPLSRRFRSVKLWFVIRSFGVKNLQAHVRHGTEMAKYFESLVRNDPSFEI
PAKRHLGLVVFRLKGPNCLTENVLKEIAKAGRLFLIPATIQDKLIIRFTVTSQFTTRDDILRDWNLIRDAATLILSQHCT
SQPSPRVGNLISQIRGARAWACGTSLQSVSGAGDDPVQARKIIKQPQRVGAGPMKRENGLHLETLLDPVDDCFSEEAPDA
TKHKLSSFLFSYLSVQTKKKTVRSLSCNSVPVSAQKPLPTEASVKNGGSSRVRIFSRFPEDMMMLKKSAFKKLIKFYSVP
SFPECSSQCGLQLPCCPLQAMV
>drugbank_target|5 Glutaminase liver isoform, mitochondrial (DB00130; DB00142)
MRSMKALQKALSRAGSHCGRGGWGHPSRSPLLGGGVRHHLSEAAAQGRETPHSHQPQHQDHDSSESGMLSRLGDLLFYTI
AEGQERTPIHKFTTALKATGLQTSDPRLRDCMSEMHRVVQESSSGGLLDRDLFRKCVSSSIVLLTQAFRKKFVIPDFEEF
TGHVDRIFEDVKELTGGKVAAYIPQLAKSNPDLWGVSLCTVDGQRHSVGHTKIPFCLQSCVKPLTYAISISTLGTDYVHK
FVGKEPSGLRYNKLSLDEEGIPHNPMVNAGAIVVSSLIKMDCNKAEKFDFVLQYLNKMAGNEYMGFSNATFQSEKETGDR
NYAIGYYHEEKKCFPKGVDMMAALDLYFQLCSVEVTCESGSVMAATLANGGICPITGESVLSAEAVRNTLSLMHSCGMYD
FSGQFAFHVGLPAKSAVSGAILLVVPNVMGMMCLSPPLDKLGNSHRGTSFCQKLVSLFNFHNYDNLRHCARKLDPRREGA
EIRNKTVVNLLFAAYSGDVSALRRFALSAMDMEQKDYDSRTALHVAAAEGHIEVVKFLIEACKVNPFAKDRWGNIPLDDA
VQFNHLEVVKLLQDYQDSYTLSETQAEAAAEALSKENLESMV
>drugbank_target|6 Coagulation factor XIII A chain (DB00130; DB01839; DB02340)
SETSRTAFGGRRAVPPNNSNAAEDDLPTVELQGVVPRGVNLQEFLNVTSVHLFKERWDTNKVDHHTDKYENNKLIVRRGQ
SFYVQIDFSRPYDPRRDLFRVEYVIGRYPQENKGTYIPVPIVSELQSGKWGAKIVMREDRSVRLSIQSSPKCIVGKFRMY
VAVWTPYGVLRTSRNPETDTYILFNPWCEDDAVYLDNEKEREEYVLNDIGVIFYGEVNDIKTRSWSYGQFEDGILDTCLY
VMDRAQMDLSGRGNPIKVSRVGSAMVNAKDDEGVLVGSWDNIYAYGVPPSAWTGSVDILLEYRSSENPVRYGQCWVFAGV
FNTFLRCLGIPARIVTNYFSAHDNDANLQMDIFLEEDGNVNSKLTKDSVWNYHCWNEAWMTRPDLPVGFGGWQAVDSTPQ
ENSDGMYRCGPASVQAIKHGHVCFQFDAPFVFAEVNSDLIYITAKKDGTHVVENVDATHIGKLIVTKQIGGDGMMDITDT
YKFQEGQEEERLALETALMYGAKKPLNTEGVMKSRSNVDMDFEVENAVLGKDFKLSITFRNNSHNRYTITAYLSANITFY
TGVPKAEFKKETFDVTLEPLSFKKEAVLIQAGEYMGQLLEQASLHFFVTARINETRDVLAKQKSTVLTIPEIIIKVRGTQ
VVGSDMTVTVQFTNPLKETLRNVWVHLDGPGVTRPMKKMFREIRPNSTVQWEEVCRPWVSGHRKLIASMSSDSLRHVYGE
LDVQIQRRPSM
To load the data to R for the analysis, I have done:
require("Biostrings")
data(BLOSUM100)
seqs <- readFASTA("./protseq.fasta", strip.descs=TRUE)
To get the the pairwise numbers, as there are a total of 4 sequences, I have done:
number <-c(1:4); dat <- expand.grid(number,number, stringsAsFactors=FALSE)
datr <- dat[dat[,1] > dat[,2] , ]
In order to calculate the score one by one, I can do this:
score(pairwiseAlignment(seqs[[x]]$seq, seqs[[y]]$seq, substitutionMatrix=BLOSUM100, gapOpening=0, gapExtension=-5))
However, I have problem to add a new column as "score" to include all the score for each pairs of the proteins. I tried to do this, but did not work.
datr$score <- lapply(datr, 1, function(i) { x <- datr[i,1]; y<- datr[i,2]; score(pairwiseAlignment(seqs[[x]]$seq, seqs[[y]]$seq, substitutionMatrix=BLOSUM100, gapOpening=0, gapExtension=-5))})
Could you mind to comments how to further improve it? Thanks DWin and diliop for wonderful solutions to my previous question.
Try:
datr$score <- sapply(1:nrow(datr), function(i) {
x <- datr[i,1]
y <- datr[i,2]
score(pairwiseAlignment(seqs[[x]]$seq, seqs[[y]]$seq, substitutionMatrix=BLOSUM100,gapOpening=0, gapExtension=-5))
})
To be able to reference your sequences better using their names, you might want to tidy up datr by doing the following:
colnames(datr) <- c("seq1id", "seq2id", "score")
datr$seq1name <- sapply(datr$seq1id, function(i) seqs[[i]]$desc)
datr$seq2name <- sapply(datr$seq2id, function(i) seqs[[i]]$desc)
Or if you just want to extract the accession IDs i.e. the contents of your parentheses, you could use stringr as such:
library(stringr)
datr$seq1name <- sapply(datr$seq2id, function(i) str_extract(seqs[[i]]$desc, "DB[0-9\\ ;DB]+"))
Hope this helps!

Resources