I have created a dictionary, and I want to add the values from each item in the dictionary. Here is what I have so far:
import sys
import re
import collections
import numpy as np
dict = {'x':['x',0], 'y':['y',0], 'z':['z',0]}
I am then using an input file to count the instances of x, y, and z and add them to the dictionary:
with open(input_file, 'r', encoding='utf-8') as f:
for line in f:
words = line.split()
count_lines += 1
num_lines += 1
num_words += len(words)
title = line
for key in dict:
title = line
if re.search(key, title):
trusted[key][1]+=1
title = re.sub(key,dict[key][0],title)
while re.search(key, title):
dict[key][1]+=1
title = re.sub(key,dict[key][0],title)
dict_values = dict.values()
output_file.write('Dict:', sum(dict_values[1:-1]))
Then my dictionary would be, for example: dict = {'x':['x',6], 'y':['y',10], 'z':['z',8]}, and I want to add 6, 10, and 8 together.
I have tried this with and without the string split, I have tried assigning the sum equation to a variable and writing the variable, etc. I am continuously getting "TypeError: unsupported operand type(s) for +: 'int' and 'list'" and "TypeError: 'dict_values' object is not subscriptable" error messages.
Related
I'm getting error messages when identifying ColumnDataSource as my source, want to do it right.
Let me show the errors.
First, I generate some random data in a DataFrame and put it into the ColumnDataSource:
col_list = ['ob1','ob2','ob3','ob4','ob5']
df = pd.DataFrame(np.random.uniform(73.965,74.03,size=(25, 5)).astype(float), columns=col_list)
df.reset_index(inplace=True)
df = df.rename(columns = {'index':'order'})
df['order'] = df['order'] + 1
cds = ColumnDataSource(data=df)
So far so good.
I try to generate a graph:
p = figure(title = 'ColumnDataSource test', sizing_mode = 'stretch_both')
p.line(x=cds.data['order'], y = cds.data['ob1'], source = cds.data[['order', 'ob1']])
show(p)
and get the following error:
Traceback (most recent call last):
File "e:\Black_Belt_Six_Sigma\first_take.py", line 57, in <module>
p.line(x=cds.data['order'], y = cds.data['ob1'], source = cds.data[['order', 'ob1']])
TypeError: unhashable type: 'list'
Fair enough, I won't give the source parameter a list and try again:
p = figure(title = 'ColumnDataSource test', sizing_mode = 'stretch_both')
p.line(x=cds.data['order'], y = cds.data['ob1'], source = cds.data)
show(p)
I get no graph but only the following error:
RuntimeError:
Expected x and y to reference fields in the supplied data source.
When a 'source' argument is passed to a glyph method, values that are sequences
(like lists or arrays) must come from references to data columns in the source.
For instance, as an example:
source = ColumnDataSource(data=dict(x=a_list, y=an_array))
p.circle(x='x', y='y', source=source, ...) # pass column names and a source
Alternatively, *all* data sequences may be provided as literals as long as a
source is *not* provided:
p.circle(x=a_list, y=an_array, ...) # pass actual sequences and no source
Based on this error message I've tried the following:
cds = ColumnDataSource(data=dict(order = df['order'].to_list(), ob1 = df['ob1'].to_list()))
p = figure(title = 'ColumnDataSource test', sizing_mode = 'stretch_both')
p.line(x=cds.data['order'], y = cds.data['ob1'], source = cds)
show(p)
And
cds = ColumnDataSource(data=dict(order = df['order'], ob1 = df['ob1']))
p = figure(title = 'ColumnDataSource test', sizing_mode = 'stretch_both')
p.line(x=cds.data['order'], y = cds.data['ob1'], source = cds)
show(p)
Both keep returning the same error message.
I can get a graph/plot if I don't specify the source parameter, so maybe that's the right course of action? Seems odd, I imagine it's important if the developers made it a parameter.
You should pass your dictionary keys order and ob1 directly to the arguments x and y. And your ColumDataSource cds to the argument source (see more examples here):
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
col_list = ['ob1','ob2','ob3','ob4','ob5']
df = pd.DataFrame(np.random.uniform(73.965,74.03,size=(25, 5)).astype(float), columns=col_list)
df.reset_index(inplace=True)
df = df.rename(columns = {'index':'order'})
df['order'] = df['order'] + 1
cds = ColumnDataSource(data=df)
p = figure(title = 'ColumnDataSource test', sizing_mode = 'stretch_both')
p.line(x='order',y='ob1',source=cds)
show(p)
The problem I am faced with regards to llvmlite is producing a simple hello world example.
I am unable to display the string global variable value in the function I created.
It appears to always print out the number one.
I have already tried to return the stringtype which produced a error.
from llvmlite import ir
i64 = ir.IntType(64)
i8 = ir.IntType(16)
hellostr = 'hello, world!'
stringtype = ir.ArrayType(i64, len(hellostr))
module = ir.Module( name="m_hello_example" )
hello = ir.GlobalVariable(module, stringtype, '.str4')
fn_int_to_int_type = ir.FunctionType(i64, [stringtype.as_pointer()] )
fn_hel = ir.Function( module, fn_int_to_int_type, name="fn_hel" )
fn_hel_block = fn_hel.append_basic_block( name="fn_hel_entry" )
builder = ir.IRBuilder(fn_hel_block )
# zero = builder.constant(i64, 0)
# const_1 = ir.Constant(stringtype,1);
# builder.ret(const_1)
const_1 = ir.Constant(i64,1);
# print(const_1)
builder.ret(const_1)
print( module )
I was expecting the output to print out the string 'hello, world!'.
Any help would be much appreciated.
Thanks.
It ended up that I was able to solve my problem with the following code:
import llvmlite.ir as ir
import llvmlite.binding as llvm
from ctypes import CFUNCTYPE
def main():
m = ir.Module()
func_ty = ir.FunctionType(ir.VoidType(), []) #defining printer function as type void
func = ir.Function(m, func_ty, name="printer") #define function as printer
builder = ir.IRBuilder(func.append_basic_block('entry')) #defining the entry point of the function printer
fmt = "%s\n\0" #in function printf allows for inserting arg in, next global_fmt statements allow for creating #"fstr" assignment
c_fmt = ir.Constant(ir.ArrayType(ir.IntType(8), len(fmt)),
bytearray(fmt.encode("utf8")))
global_fmt = ir.GlobalVariable(m, c_fmt.type, name="fstr")
global_fmt.linkage = 'internal'
global_fmt.global_constant = True
global_fmt.initializer = c_fmt
arg = "Hello, World!\0" #args will be passed into printf function.
c_str_val = ir.Constant(ir.ArrayType(ir.IntType(8), len(arg)),
bytearray(arg.encode("utf8"))) #creates the c_str_value as a constant
printf_ty = ir.FunctionType(ir.IntType(32), [], var_arg=True) #creation of the printf function begins here and specifies the passing of a argument
printf = ir.Function(m, printf_ty, name="printf")
c_str = builder.alloca(c_str_val.type) #creation of the allocation of the %".2" variable
builder.store(c_str_val, c_str) #store as defined on the next line below %".2"
voidptr_ty = ir.IntType(8).as_pointer()
fmt_arg = builder.bitcast(global_fmt, voidptr_ty) #creates the %".4" variable with the point pointing to the fstr
builder.call(printf, [fmt_arg, c_str]) #We are calling the prinf function with the fmt and arg and returning the value as defiend on the next line
builder.ret_void()
#Next lines are for calling llvm and returning the assembly.
llvm.initialize()
llvm.initialize_native_target()
llvm.initialize_native_asmprinter()
print(str(m)) #PRINTING OUT THE ASSEMBLY
llvm_module = llvm.parse_assembly(str(m)) #Parsing teh assembly
tm = llvm.Target.from_default_triple().create_target_machine() #creating the target machine
with llvm.create_mcjit_compiler(llvm_module, tm) as ee:
ee.finalize_object() #Making sure all modules owned by the execution engine are fully processed and usable for execution
fptr = ee.get_function_address("printer") #fptr will reference the printer function
py_func = CFUNCTYPE(None)(fptr)
py_func() #run the function printer
if __name__ == "__main__":
main()
It appears that I didn't correctly assign the variable and hence why I wasn't returning anything.
On a simple grammar I am in the bad situation that one of my ParseActions is not called.
For me this is strange as parseActions of a base symbol ("logic_oper") and a derived symbol ("cmd_line") are called correctly. Just "pa_logic_cmd" is not called. You can see this on the output which is included at the end of the code.
As there is no exception on parsing the input string, I am assuming that the grammar is (basically) correct.
import io, sys
import pyparsing as pp
def diag(msg, t):
print("%s: %s" % (msg , str(t)) )
def pa_logic_oper(t): diag('logic_oper', t)
def pa_operand(t): diag('operand', t)
def pa_ident(t): diag('ident', t)
def pa_logic_cmd(t): diag('>>>>>> logic_cmd', t)
def pa_cmd_line(t): diag('cmd_line', t)
def make_grammar():
semi = pp.Literal(';')
ident = pp.Word(pp.alphas, pp.alphanums).setParseAction(pa_ident)
operand = (ident).setParseAction(pa_operand)
op_and = pp.Keyword('A')
op_or = pp.Keyword('O')
logic_oper = (( op_and | op_or) + pp.Optional(operand))
logic_oper.setParseAction(pa_logic_oper)
logic_cmd = logic_oper + pp.Suppress(semi)
logic_cmd.setParseAction(pa_logic_cmd)
cmd_line = (logic_cmd)
cmd_line.setParseAction(pa_cmd_line)
grammar = pp.OneOrMore(cmd_line) + pp.StringEnd()
return grammar
if __name__ == "__main__":
inp_str = '''
A param1;
O param2;
A ;
'''
grammar = make_grammar()
print( "pp-version:" + pp.__version__)
parse_res = grammar.parseString( inp_str )
'''USAGE/Output: python test_4.py
pp-version:2.0.3
operand: ['param1']
logic_oper: ['A', 'param1']
cmd_line: ['A', 'param1']
operand: ['param2']
logic_oper: ['O', 'param2']
cmd_line: ['O', 'param2']
logic_oper: ['A']
cmd_line: ['A']
'''
Can anybody give me a hint on this parseAction problem?
Thanks,
The problem is here:
cmd_line = (logic_cmd)
cmd_line.setParseAction(pa_cmd_line)
The first line assigns cmd_line to be the same expression as logic_cmd. You can verify by adding this line:
print("???", cmd_line is logic_cmd)
Then the second line calls setParseAction, which overwrites the parse action of logic_cmd, so the pa_logic_cmd will never get called.
Remove the second line, since you are already testing the calling of the parse action with pa_logic_cmd. You could change to using the addParseAction method instead, but to my mind that is an invalid test (adding 2 parse actions to the same pyparsing expression object).
Or, change the definition of cmd_line to:
cmd_line = pp.Group(logic_cmd)
Now you will have wrapped logic_cmd inside another expression, and you can then independently set and test the running of parse actions on the two different expressions.
I'm building an ExpressionSet class using rpy2, following the relevant tutorial as a guide. One of the most common things I do with the Eset object is subsetting, which in native R is as straightforward as
eset2<-eset1[1:10,1:5] # first ten features, first five samples
which returns a new ExpressionSet object with subsets of both the expression and phenotype data, using the given indices. Rpy2's RS4 object doesn't seem to allow direct subsetting, or have rx/rx2 attributes unlike e.g. RS3 vectors. I tried, with ~50% success, adding a '_subset' function (below) that creates subsets of these two datasets separately and assigns them back to Eset, but is there a more straightforward way that I'm missing?
from rpy2 import (robjects, rinterface)
from rpy2.robjects import (r, pandas2ri, Formula)
from rpy2.robjects.packages import (importr,)
from rpy2.robjects.methods import (RS4,)
class ExpressionSet(RS4):
# funcs to get the attributes
def _assay_get(self): # returns an environment, use ['exprs'] key to access
return self.slots["assayData"]
def _pdata_get(self): # returns an RS4 object, use .slots("data") to access
return self.slots["phenoData"]
def _feats_get(self): # returns an RS4 object, use .slots("data") to access
return self.slots["featureData"]
def _annot_get(self): # slots returns a tuple, just pick 1st (only) element
return self.slots["annotation"][0]
def _class_get(self): # slots returns a tuple, just pick 1st (only) element
return self.slots["class"][0]
# funcs to set the attributes
def _assay_set(self, value):
self.slots["assayData"] = value
def _pdata_set(self, value):
self.slots["phenoData"] = value
def _feats_set(self,value):
self.slots["featureData"] = value
def _annot_set(self, value):
self.slots["annotation"] = value
def _class_set(self, value):
self.slots["class"] = value
# funcs to work with the above to get/set the data
def _exprs_get(self):
return self.assay["exprs"]
def _pheno_get(self):
pdata = self.pData
return pdata.slots["data"]
def _exprs_set(self, value):
assay = self.assay
assay["exprs"] = value
def _pheno_set(self, value):
pdata = self.pData
pdata.slots["data"] = value
assay = property(_assay_get, _assay_set, None, "R attribute 'assayData'")
pData = property(_pdata_get, _pdata_set, None, "R attribute 'phenoData'")
fData = property(_feats_get, _feats_set, None, "R attribute 'featureData'")
annot = property(_annot_get, _annot_set, None, "R attribute 'annotation'")
exprs = property(_exprs_get, _exprs_set, None, "R attribute 'exprs'")
pheno = property(_pheno_get, _pheno_set, None, "R attribute 'pheno")
def _subset(self, features=None, samples=None):
features = features if features else self.exprs.rownames
samples = samples if samples else self.exprs.colnames
fx = robjects.BoolVector([f in features for f in self.exprs.rownames])
sx = robjects.BoolVector([s in samples for s in self.exprs.colnames])
self.pheno = self.pheno.rx(sx, self.pheno.colnames)
self.exprs = self.exprs.rx(fx,sx) # can't assign back to exprs this way
When doing
eset2<-eset1[1:10,1:5]
in R, the R S4 method "[" with the signature ("ExpressionSet") is fetched and run using the parameter values you provided.
The documentation is suggesting the use of getmethod (see http://rpy2.readthedocs.org/en/version_2.7.x/generated_rst/s4class.html#methods ) to facilitate the task of fetching the relevant S4 method, but its behaviour seems to have changed after the documentation was written (resolution of the dispatch through inheritance is no longer done).
The following should do it though:
from rpy2.robjects.packages import importr
methods = importr('methods')
r_subset_expressionset = methods.selectMethod("[", "ExpressionSet")
with thanks to #lgautier's answer, here's a snippet of my above code, modified to allow subsetting of the RS4 object:
from multipledispatch import dispatch
#dispatch(RS4)
def eset_subset(eset, features=None, samples=None):
"""
subset an RS4 eset object
"""
features = features if features else eset.exprs.rownames
samples = samples if samples else eset.exprs.colnames
fx = robjects.BoolVector([f in features for f in eset.exprs.rownames])
sx = robjects.BoolVector([s in samples for s in eset.exprs.colnames])
esub=methods.selectMethod("[", signature="ExpressionSet")(eset, fx,sx)
return esub
I want to do pairwise alignment with uniprot and pdb sequences. I have an input file containing uniprot and pdb IDs like this.
pdb id uniprot id
1dbh Q07889
1e43 P00692
1f1s Q53591
first, I need to read each line in an input file
2) retrieve the pdb and uniprot sequences from pdb.fasta and uniprot.fasta files
3) Do alignment and calculate sequence identity.
Usually, I use the following program for pairwise alignment and seq.identity calculation.
library("seqinr")
seq1 <- "MDEKRRAQHNEVERRRRDKINNWIVQLSKIIPDSSMESTKSGQSKGGILSKASDYIQELRQSNHR"
seq2<- "MKGQQKTAETEEGTVQIQEGAVATGEDPTSVAIASIQSAATFPDPNVKYVFRTENGGQVM"
library(Biostrings)
globalAlign<- pairwiseAlignment(seq1, seq2)
pid(globalAlign, type = "PID3")
I need to print the output like this
pdbid uniprotid seq.identity
1dbh Q07889 99
1e43 P00692 80
1f1s Q53591 56
How can I change the above code ? your help would be appreciated!
'
This code is hopefully what your looking for:
class test():
def get_seq(self, pdb,fasta_file): # Get sequences
from Bio.PDB.PDBParser import PDBParser
from Bio import SeqIO
aa = {'ARG':'R','HIS':'H','LYS':'K','ASP':'D','GLU':'E','SER':'S','THR':'T','ASN':'N','GLN':'Q','CYS':'C','SEC':'U','GLY':'G','PRO':'P','ALA':'A','ILE':'I','LEU':'L','MET':'M','PHE':'F','TRP':'W','TYR':'Y','VAL':'V'}
p=PDBParser(PERMISSIVE=1)
structure_id="%s" % pdb[:-4]
structure=p.get_structure(structure_id, pdb)
residues = structure.get_residues()
seq_pdb = ''
for res in residues:
res = res.get_resname()
if res in aa:
seq_pdb = seq_pdb+aa[res]
handle = open(fasta_file, "rU")
for record in SeqIO.parse(handle, "fasta") :
seq_fasta = record.seq
handle.close()
self.seq_aln(seq_pdb,seq_fasta)
def seq_aln(self,seq1,seq2): # Align the sequences
from Bio import pairwise2
from Bio.SubsMat import MatrixInfo as matlist
matrix = matlist.blosum62
gap_open = -10
gap_extend = -0.5
alns = pairwise2.align.globalds(seq1, seq2, matrix, gap_open, gap_extend)
top_aln = alns[0]
aln_seq1, aln_seq2, score, begin, end = top_aln
with open('aln.fasta', 'w') as outfile:
outfile.write('> PDB_seq\n'+str(aln_seq1)+'\n> Uniprot_seq\n'+str(aln_seq2))
print aln_seq1+'\n'+aln_seq2
self.seq_id('aln.fasta')
def seq_id(self,aln_fasta): # Get sequence ID
import string
from Bio import AlignIO
input_handle = open("aln.fasta", "rU")
alignment = AlignIO.read(input_handle, "fasta")
j=0 # counts positions in first sequence
i=0 # counts identity hits
for record in alignment:
#print record
for amino_acid in record.seq:
if amino_acid == '-':
pass
else:
if amino_acid == alignment[0].seq[j]:
i += 1
j += 1
j = 0
seq = str(record.seq)
gap_strip = seq.replace('-', '')
percent = 100*i/len(gap_strip)
print record.id+' '+str(percent)
i=0
a = test()
a.get_seq('1DBH.pdb','Q07889.fasta')
This outputs:
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------EQTYYDLVKAF-AEIRQYIRELNLIIKVFREPFVSNSKLFSANDVENIFSRIVDIHELSVKLLGHIEDTVE-TDEGSPHPLVGSCFEDLAEELAFDPYESYARDILRPGFHDRFLSQLSKPGAALYLQSIGEGFKEAVQYVLPRLLLAPVYHCLHYFELLKQLEEKSEDQEDKECLKQAITALLNVQSG-EKICSKSLAKRRLSESA-------------AIKK-NEIQKNIDGWEGKDIGQCCNEFI-EGTLTRVGAKHERHIFLFDGL-ICCKSNHGQPRLPGASNAEYRLKEKFF-RKVQINDKDDTNEYKHAFEIILKDENSVIFSAKSAEEKNNW-AALISLQYRSTL---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
MQAQQLPYEFFSEENAPKWRGLLVPALKKVQGQVHPTLESNDDALQYVEELILQLLNMLCQAQPRSASDVEERVQKSFPHPIDKWAIADAQSAIEKRKRRNPLSLPVEKIHPLLKEVLGYKIDHQVSVYIVAVLEYISADILKLVGNYVRNIRHYEITKQDIKVAMCADKVLMDMFHQDVEDINILSLTDEEPSTSGEQTYYDLVKAFMAEIRQYIRELNLIIKVFREPFVSNSKLFSANDVENIFSRIVDIHELSVKLLGHIEDTVEMTDEGSPHPLVGSCFEDLAEELAFDPYESYARDILRPGFHDRFLSQLSKPGAALYLQSIGEGFKEAVQYVLPRLLLAPVYHCLHYFELLKQLEEKSEDQEDKECLKQAITALLNVQSGMEKICSKSLAKRRLSESACRFYSQQMKGKQLAIKKMNEIQKNIDGWEGKDIGQCCNEFIMEGTLTRVGAKHERHIFLFDGLMICCKSNHGQPRLPGASNAEYRLKEKFFMRKVQINDKDDTNEYKHAFEIILKDENSVIFSAKSAEEKNNWMAALISLQYRSTLERMLDVTMLQEEKEEQMRLPSADVYRFAEPDSEENIIFEENMQPKAGIPIIKAGTVIKLIERLTYHMYADPNFVRTFLTTYRSFCKPQELLSLIIERFEIPEPEPTEADRIAIENGDQPLSAELKRFRKEYIQPVQLRVLNVCRHWVEHHFYDFERDAYLLQRMEEFIGTVRGKAMKKWVESITKIIQRKKIARDNGPGHNITFQSSPPTVEWHISRPGHIETFDLLTLHPIEIARQLTLLESDLYRAVQPSELVGSVWTKEDKEINSPNLLKMIRHTTNLTLWFEKCIVETENLEERVAVVSRIIEILQVFQELNNFNGVLEVVSAMNSSPVYRLDHTFEQIPSRQKKILEEAHELSEDHYKKYLAKLRSINPPCVPFFGIYLTNILKTEEGNPEVLKRHGKELINFSKRRKVAEITGEIQQYQNQPYCLRVESDIKRFFENLNPMGNSMEKEFTDYLFNKSLEIEPRNPKPLPRFPKKYSYPLKSPGVRPSNPRPGTMRHPTPLQQEPRKISYSRIPESETESTASAPNSPRTPLTPPPASGASSTTDVCSVFDSDHSSPFHSSNDTVFIQVTLPHGPRSASVSSISLTKGTDEVPVPPPVPPRRRPESAPAESSPSKIMSKHLDSPPAIPPRQPTSKAYSPRYSISDRTSISDPPESPPLLPPREPVRTPDVFSSSPLHLQPPPLGKKSDHGNAFFPNSPSPFTPPPPQTPSPHGTRRHLPSPPLTQEVDLHSIAGPPVPPRQSTSQHIPKLPPKTYKREHTHPSMHRDGPPLLENAHSS
PDB_seq 100 # pdb to itself would obviously have 100% identity
Uniprot_seq 24 # pdb sequence has 24% identity to the uniprot sequence
For this to work on you input file, you need to put my a.get_seq() in a for loop with the inputs from your text file.
EDIT:
Replace the seq_id function with this one:
def seq_id(self,aln_fasta):
import string
from Bio import AlignIO
from Bio import SeqIO
record_iterator = SeqIO.parse(aln_fasta, "fasta")
first_record = record_iterator.next()
print '%s has a length of %d' % (first_record.id, len(str(first_record.seq).replace('-','')))
second_record = record_iterator.next()
print '%s has a length of %d' % (second_record.id, len(str(second_record.seq).replace('-','')))
lengths = [len(str(first_record.seq).replace('-','')), len(str(second_record.seq).replace('-',''))]
if lengths.index(min(lengths)) == 0: # If both sequences have the same length the PDB sequence will be taken as the shortest
print 'PDB sequence has the shortest length'
else:
print 'Uniport sequence has the shortes length'
idenities = 0
for i,v in enumerate(first_record.seq):
if v == '-':
pass
#print i,v, second_record.seq[i]
if v == second_record.seq[i]:
idenities +=1
#print i,v, second_record.seq[i], idenities
print 'Sequence Idenity = %.2f percent' % (100.0*(idenities/min(lengths)))
to pass the arguments to the class use:
with open('input_file.txt', 'r') as infile:
next(infile)
next(infile) # Going by your input file
for line in infile:
line = line.split()
a.get_seq(segs[0]+'.pdb',segs[1]+'.fasta')
It might be something like this; a repeatable example (e.g., with short files posted on-line) would help...
library(Biostrings)
pdb = readAAStringSet("pdb.fasta")
uniprot = readAAStringSet("uniprot.fasta")
to input all sequences into two objects. pairwiseAlignment accepts a vector as first (query) argument, so if you were wanting to align all pdb against all uniprot pre-allocate a result matrix
pids = matrix(numeric(), length(uniprot), length(pdb),
dimnames=list(names(uniprot), names(pdb)))
and then do the calculations
for (i in seq_along(uniprot)) {
globalAlignment = pairwiseAlignment(pdb, uniprot[i])
pids[i,] = pid(globalAlignment)
}