from hashlib import md5
from Crypto.Cipher import AES
from Crypto.Random import get_random_bytes
from Crypto.Util.Padding import pad, unpad
class AESCipher:
def __init__(self, key):
password = key.encode('utf-8')
self.key = md5(password).digest()
def encrypt(self, data):
vector = b'0123456789ABCDEF'
# vector = get_random_bytes(AES.block_size)
# .encrypt(pad(message, BLOCK_SIZE))
encryption_cipher = AES.new(self.key, AES.MODE_CBC, vector)
return vector + encryption_cipher.encrypt(pad(data, AES.block_size))
def decrypt(self, data):
file_vector = data[:AES.block_size]
decryption_cipher = AES.new(self.key, AES.MODE_CBC, file_vector)
return unpad(decryption_cipher.decrypt(data[AES.block_size:]), AES.block_size)
i have this block of code for AES encryption,if i change the length of key does that mean if i take key length of 16 bytes it will automatically perform AES128 and vice-versa?
Related
I am trying to fine-tune a Huggingface bert-large-uncased-whole-word-masking model and i get a type error like this when training:
"TypeError: only integer tensors of a single element can be converted to an index"
Here is the code:
train_inputs = tokenizer(text_list[0:457], return_tensors='pt', max_length=512, truncation=True, padding='max_length')
train_inputs['labels']= train_inputs.input_ids.detach().clone()
Then i mask randomly about 15% of the words in the input-ids,
and define a class for the dataset, and then the mistake happens in the training loop:
class MeditationsDataset(torch.utils.data.Dataset):
def __init__(self, encodings):
self.encodings= encodings
def __getitem__(self, idx):
return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
def __len__(self):
return self.encodings.input_ids
train_dataset = MeditationsDataset(train_inputs)
train_dataloader = torch.utils.data.DataLoader(dataset= train_dataset, batch_size=8, shuffle=False)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
from transformers import BertModel, AdamW
model = BertModel.from_pretrained("bert-large-uncased-whole-word-masking")
model.to(device)
model.train()
optim = AdamW(model.parameters(), lr=1e-5)
num_epochs = 2
from tqdm.auto import tqdm
for epoch in range(num_epochs):
loop = tqdm(train_dataloader, leave=True)
for batch in loop:
batch = {k: v.to(device) for k, v in batch.items()}
outputs = model(**batch)
loss = outputs.loss
loss.backward()
optimizer.step()
optimizer.zero_grad()
The mistake happens in "for batch in loop"
Does anybody understand it and know how to solve this? Thanks in advance for your help
In the class MeditationsDataset in function __getitem__ torch.tensor(val[idx]) is deprecated by PyTorch you should use instead val[idx].clone().detach()
from Cryptodome.Cipher import AES
from Cryptodome.Random import get_random_bytes
import hashlib
import base64
def decrypt(enc, key_hash): # To decrypt data
print("\nIn Decryption method\n")
unpad = lambda s: s[:-ord(s[-1:])]
enc = base64.b64decode(enc)
iv = enc[:AES.block_size]
cipher = AES.new(key_hash, AES.MODE_CFB, iv)
ciper_text = cipher.decrypt(enc[AES.block_size:])
ciper_text = ciper_text.decode('utf-16')
ciper_text = unpad(ciper_text)
return ciper_text
def encrypt(ID, temperature, key_hash): # To encrypt data
print("\nIn Encryption method\n")
BS = AES.block_size
pad = lambda s: s + (BS - len(s) % BS) * chr(BS - len(s) % BS)
ID = pad(ID)
ID = ID.encode('utf-16')
temperature = pad(temperature)
temperature = temperature.encode('utf-16')
iv = get_random_bytes(AES.block_size)
cipher = AES.new(key= key_hash, mode= AES.MODE_CFB, iv= iv)
ID_cipher = base64.b64encode(iv + cipher.encrypt(ID))
temperature_cipher = base64.b64encode(iv + cipher.encrypt(temperature))
print("Id cipher is '{0}'".format(ID_cipher))
print("temp cipher is '{0}'".format(temperature_cipher))
return (ID_cipher, temperature_cipher)
no = int(input("enter no of records"))
key_hash = hashlib.sha256(b"charaka").digest() # Creating key for cipher
for i in range(no):
ID = input("enter ID\n")
temperature = input("enter temperature\n")
(ID_cipher, temperature_cipher) = encrypt(ID, temperature, key_hash)
print("Decyrpted ID is '{0}'".format((decrypt(ID_cipher, key_hash))))
print("Decyrpted temp is '{0}'".format((decrypt(temperature_cipher, key_hash))))
When I want to enter a record i.e "ID, temperature" and trying to decrypt both ID is decrypting fine but temperature is not decrypting. Sometimes it produces utf-16 error i.e
ciper_text = ciper_text.decode('utf-16')
UnicodeDecodeError: 'utf-16-le' codec can't decode bytes in position 4-5: illegal UTF-16 surrogate
sometimes, output is not displayed properly
In Decryption method
Decyrpted temp is '勼⼋'
My doubt is when ID is decrypting well why am I getting problem in decrypting temperature value. I have explored other tutorials about encoding techniques but the problem is same.
I used pycryptodome library to encrypt strings using AES.
Thank you
I have created a dictionary, and I want to add the values from each item in the dictionary. Here is what I have so far:
import sys
import re
import collections
import numpy as np
dict = {'x':['x',0], 'y':['y',0], 'z':['z',0]}
I am then using an input file to count the instances of x, y, and z and add them to the dictionary:
with open(input_file, 'r', encoding='utf-8') as f:
for line in f:
words = line.split()
count_lines += 1
num_lines += 1
num_words += len(words)
title = line
for key in dict:
title = line
if re.search(key, title):
trusted[key][1]+=1
title = re.sub(key,dict[key][0],title)
while re.search(key, title):
dict[key][1]+=1
title = re.sub(key,dict[key][0],title)
dict_values = dict.values()
output_file.write('Dict:', sum(dict_values[1:-1]))
Then my dictionary would be, for example: dict = {'x':['x',6], 'y':['y',10], 'z':['z',8]}, and I want to add 6, 10, and 8 together.
I have tried this with and without the string split, I have tried assigning the sum equation to a variable and writing the variable, etc. I am continuously getting "TypeError: unsupported operand type(s) for +: 'int' and 'list'" and "TypeError: 'dict_values' object is not subscriptable" error messages.
Is it possible to call Cognitive Services API in Azure ML studio when build model?” any document our sample experiment can be reference?
Thanks in advance.
Here is the sample code you can try:
import urllib2
import urllib
import sys
import base64
import json
import numpy as np
import pandas as pd
# The entry point function can contain up to two input arguments:
# Param<dataframe1>: a pandas.DataFrame
# Param<dataframe2>: a pandas.DataFrame
def azureml_main(dataframe1 = None, dataframe2 = None):
# Execution logic goes here
#print('Input pandas.DataFrame #1:\r\n\r\n{0}'.format(dataframe1))
# Account key is for Ted Way
account_key = str(dataframe2['Col1'][0])
#account_key = 'api_key'
#base_url = 'https://api.datamarket.azure.com/data.ashx/amla/text-analytics/v1'
#base_url = str(dataframe2['Col2'][0])
base_url = 'https://westus.api.cognitive.microsoft.com/'
headers = {'Content-Type':'application/json', 'Ocp-Apim-Subscription-Key':account_key}
#input_text = sys.argv[2]
sentiment_scores = []
num_examples = len(dataframe1.index)
input_texts = '{"documents":['
#for each record
for i in range(0,num_examples):
input_text = str(dataframe1['Text'][i])
input_text = input_text.replace("\"", "'")
#params = { 'Text': input_text}
input_texts = input_texts + '{"id":"' + str(i) + '","text":"'+ input_text + '"},'
input_texts = input_texts + ']}'
print input_texts
# Detect sentiment.
batch_sentiment_url = base_url + 'text/analytics/v2.0/sentiment'
req = urllib2.Request(batch_sentiment_url, input_texts, headers)
response = urllib2.urlopen(req)
result = response.read()
obj = json.loads(result)
for sentiment_analysis in obj['documents']:
sentiment_scores.append( str(sentiment_analysis['score']))
#print('Sentiment score: ' + str(obj['Score']))
sentiment_scores = pd.Series(np.array(sentiment_scores))
df1 = pd.DataFrame({'SentimentScore':sentiment_scores})
# Don't return the original text'
#frames = [dataframe1, df1]
#dataframe1 = pd.concat(frames, axis=1)
# Return value must be of a sequence of pandas.DataFrame
return df1
It is possible to execute Python snippets inside Azure ML. From there, you may call the Microsoft Cognitive Services API using a Python interface (take a look at the example for the Face API from Python).
I'm building an ExpressionSet class using rpy2, following the relevant tutorial as a guide. One of the most common things I do with the Eset object is subsetting, which in native R is as straightforward as
eset2<-eset1[1:10,1:5] # first ten features, first five samples
which returns a new ExpressionSet object with subsets of both the expression and phenotype data, using the given indices. Rpy2's RS4 object doesn't seem to allow direct subsetting, or have rx/rx2 attributes unlike e.g. RS3 vectors. I tried, with ~50% success, adding a '_subset' function (below) that creates subsets of these two datasets separately and assigns them back to Eset, but is there a more straightforward way that I'm missing?
from rpy2 import (robjects, rinterface)
from rpy2.robjects import (r, pandas2ri, Formula)
from rpy2.robjects.packages import (importr,)
from rpy2.robjects.methods import (RS4,)
class ExpressionSet(RS4):
# funcs to get the attributes
def _assay_get(self): # returns an environment, use ['exprs'] key to access
return self.slots["assayData"]
def _pdata_get(self): # returns an RS4 object, use .slots("data") to access
return self.slots["phenoData"]
def _feats_get(self): # returns an RS4 object, use .slots("data") to access
return self.slots["featureData"]
def _annot_get(self): # slots returns a tuple, just pick 1st (only) element
return self.slots["annotation"][0]
def _class_get(self): # slots returns a tuple, just pick 1st (only) element
return self.slots["class"][0]
# funcs to set the attributes
def _assay_set(self, value):
self.slots["assayData"] = value
def _pdata_set(self, value):
self.slots["phenoData"] = value
def _feats_set(self,value):
self.slots["featureData"] = value
def _annot_set(self, value):
self.slots["annotation"] = value
def _class_set(self, value):
self.slots["class"] = value
# funcs to work with the above to get/set the data
def _exprs_get(self):
return self.assay["exprs"]
def _pheno_get(self):
pdata = self.pData
return pdata.slots["data"]
def _exprs_set(self, value):
assay = self.assay
assay["exprs"] = value
def _pheno_set(self, value):
pdata = self.pData
pdata.slots["data"] = value
assay = property(_assay_get, _assay_set, None, "R attribute 'assayData'")
pData = property(_pdata_get, _pdata_set, None, "R attribute 'phenoData'")
fData = property(_feats_get, _feats_set, None, "R attribute 'featureData'")
annot = property(_annot_get, _annot_set, None, "R attribute 'annotation'")
exprs = property(_exprs_get, _exprs_set, None, "R attribute 'exprs'")
pheno = property(_pheno_get, _pheno_set, None, "R attribute 'pheno")
def _subset(self, features=None, samples=None):
features = features if features else self.exprs.rownames
samples = samples if samples else self.exprs.colnames
fx = robjects.BoolVector([f in features for f in self.exprs.rownames])
sx = robjects.BoolVector([s in samples for s in self.exprs.colnames])
self.pheno = self.pheno.rx(sx, self.pheno.colnames)
self.exprs = self.exprs.rx(fx,sx) # can't assign back to exprs this way
When doing
eset2<-eset1[1:10,1:5]
in R, the R S4 method "[" with the signature ("ExpressionSet") is fetched and run using the parameter values you provided.
The documentation is suggesting the use of getmethod (see http://rpy2.readthedocs.org/en/version_2.7.x/generated_rst/s4class.html#methods ) to facilitate the task of fetching the relevant S4 method, but its behaviour seems to have changed after the documentation was written (resolution of the dispatch through inheritance is no longer done).
The following should do it though:
from rpy2.robjects.packages import importr
methods = importr('methods')
r_subset_expressionset = methods.selectMethod("[", "ExpressionSet")
with thanks to #lgautier's answer, here's a snippet of my above code, modified to allow subsetting of the RS4 object:
from multipledispatch import dispatch
#dispatch(RS4)
def eset_subset(eset, features=None, samples=None):
"""
subset an RS4 eset object
"""
features = features if features else eset.exprs.rownames
samples = samples if samples else eset.exprs.colnames
fx = robjects.BoolVector([f in features for f in eset.exprs.rownames])
sx = robjects.BoolVector([s in samples for s in eset.exprs.colnames])
esub=methods.selectMethod("[", signature="ExpressionSet")(eset, fx,sx)
return esub