Unable to convert string in to number in Jupyter - jupyter-notebook

Here is my code:
def str2number(amount):
if type(amount) == int:
return amount
if amount[-1] == 'M': d = float(amount[:-1])
return d*1000000
elif amount[-1] == 'K': d = float(amount[:-1])
return d*1000
else: d = float(amount[:])
return d
FIFA18['ValueNum'] = FIFA18['Value'].apply(lambda x: str2number(x))
FIFA18['WageNum'] = FIFA18['Wage'].apply(lambda x: str2number(x))
Here is the error code:
ValueError Traceback (most recent call
last) in
15 return d
16
---> 17 FIFA18['ValueNum'] = FIFA18['Value'].apply(lambda x: str2number(x))
18 FIFA18['WageNum'] = FIFA18['Wage'].apply(lambda x: str2number(x))
c:\users\brain\appdata\local\programs\python\python39\lib\site-packages\pandas\core\series.py
in apply(self, func, convert_dtype, args, **kwds) 4136
else: 4137 values = self.astype(object)._values
-> 4138 mapped = lib.map_infer(values, f, convert=convert_dtype) 4139 4140 if len(mapped) and
isinstance(mapped[0], Series):
pandas_libs\lib.pyx in pandas._libs.lib.map_infer()
in (x)
15 return d
16
---> 17 FIFA18['ValueNum'] = FIFA18['Value'].apply(lambda x: str2number(x))
18 FIFA18['WageNum'] = FIFA18['Wage'].apply(lambda x: str2number(x))
in str2number(amount)
12
13 else:
---> 14 d = float(amount[:])
15 return d
16
ValueError: could not convert string to float: '� 0.00'

Are you sure that this part is correct?
else:
d = float(amount[:])
return d
You are assuming that everything else is just a number but your dataset contains some other characters. You need to clean your dataset first or change the function to account for those cases too.

Related

ValueError: shapes (2,1000) and (2,2,1000) not aligned: 1000 (dim 1) != 2 (dim 1)

I'm implementing a MLP to test a simple NN architecture, hoping to scale up to a bigger network with a larger dataset. My end goal is making a working phone recognizer for TIMIT data, as part of my internship.
To build the MLP, I used the suggestions of this video: https://www.youtube.com/watch?v=Z97XGNUUx9o.
And the proposal of my teacher to use the following inputs:
X = np.random.rand(5,1000)
y = X[4:5,:]
The error message is the following:
ValueError Traceback (most recent call last)
Cell In [63], line 7
5 build_model()
6 mlp = MLP(1000, [1000], 1000)
----> 7 mlp.train(inputs,targets, 50, 0.1)
8 output = mlp.forward_propagate(input)
Cell In [62], line 117, in MLP.train(self, inputs, targets, epochs, learning_rate)
115 output = self.forward_propagate(input)
116 error = target - output
--> 117 self.back_propagate(error)
118 self.gradient_descent(learning_rate=1)
119 sum_error += self._mse(target,output)
Cell In [62], line 96, in MLP.back_propagate(self, error)
94 current_activations = self.activations[i]
95 current_activations_reshaped = current_activations.reshape(current_activations.shape[0], -1)
---> 96 self.derivatives[i] = np.dot(current_activations, delta)
97 error = np.dot(delta, self.weights[i].T)
98 return error
File <__array_function__ internals>:180, in dot(*args, **kwargs)
ValueError: shapes (2,1000) and (2,2,1000) not aligned: 1000 (dim 1) != 2 (dim 1)
This is the relevant code:
class MLP(object):
def __init__(self, num_inputs=3, hidden_layers=[3,3], num_outputs=2):
self.num_inputs = num_inputs
self.hidden_layers = hidden_layers
self.num_outputs = num_outputs
layers = [num_inputs] + hidden_layers + [num_outputs]
weights = []
for i in range(len(layers) - 1):
w = np.random.rand(layers[i], layers[i + 1])
weights.append(w)
self.weights = weights
activations = []
for i in range(len(layers)):
a = np.zeros(layers[i])
activations.append(a)
self.activations = activations
derivatives = []
for i in range(len(layers) - 1):
d = np.zeros((layers[i], layers[i+1]))
derivatives.append(d)
self.derivatives = derivatives
def forward_propagate(self,inputs):
activations = inputs
self.activations[0] = inputs
for i in range(len(self.weights)):
net_inputs = np.dot(activations,self.weights)
activations = self._sigmoid(net_inputs)
self.activations[i+1] = activations
return activations
def back_propagate(self, error):
for i in reversed(range(len(self.derivatives))):
activations = self.activations[i+1]
delta = error * self._sigmoid_derivative(activations)
delta_reshaped = delta.reshape(delta.shape[0], -1).T
current_activations = self.activations[i]
current_activations_reshaped = current_activations.reshape(current_activations.shape[0], -1)
self.derivatives[i] = np.dot(current_activations, delta)
error = np.dot(delta, self.weights[i].T)
return error
def _sigmoid_derivative(self,x):
return x * (1.0 - x)
def _sigmoid(self,x):
y = 1.0 / (1+np.exp(-x))
return y
def gradient_descent(self, learning_rate):
for i in range(len(self.weights)):
weights = self.weights[i]
derivatives = self.derivatives[i]
weights += derivatives + learning_rate
def _mse(self,target,output):
return np.average((target-output)**2)
def train(self,inputs,targets,epochs,learning_rate):
for i in range(epochs):
sum_error = 0
for input,target in zip(inputs,targets):
output = self.forward_propagate(input)
error = target - output
self.back_propagate(error)
self.gradient_descent(learning_rate=1)
sum_error += self._mse(target,output)
print("Error: {} at epoch {}".format(sum_error/len(inputs), i))
And this is how I ran it:
if __name__ == "__main__":
X, y = load_dataset()
inputs = X
targets = y
build_model()
mlp = MLP(1000, [1000], 1000)
mlp.train(inputs,targets, 50, 0.1)
output = mlp.forward_propagate(input)
Thanks in advance!
I tried to do what the video said, to set up an MLP, as was the suggestion of the teacher, but I don't know how to solve the shape error.

index 43280 is out of bounds for dimension 0 with size 32

def train(epoch):
model.train()
loss_all = 0
for data in train_loader:
data = data.to(device)
optimizer.zero_grad()
output = model(data.x.float(), data.edge_index, data.batch)
loss = F.nll_loss(output, data.y)
loss.backward()
loss_all += data.num_graphs * loss.item()
optimizer.step()
return loss_all / len(train_dataset)
def test(loader):
model.eval()
correct = 0
for data in loader:
data = data.to(device)
#for i in data.batch:
pred = model(data.x.float(), data.edge_index, data.batch).max(dim=1)[1]
correct += pred.eq(data.y).sum().item()
return correct / len(loader.dataset)
train_loader = DataLoader(train_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)
for epoch in range(1, 201):
loss = train(epoch)
train_acc = test(train_loader)
test_acc = test(test_loader)
print('Epoch: {:03d}, Loss: {:.5f}, Train Acc: {:.5f}, Test Acc: {:.5f}'.
format(epoch, loss, train_acc, test_acc))
Error message:
It gives a runtime error: 'index 43280 is out of bounds for dimension 0 with size 32'.
/usr/local/lib/python3.7/dist-packages/torch_geometric/deprecation.py:12: UserWarning: 'data.DataLoader' is deprecated, use 'loader.DataLoader' instead
warnings.warn(out)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-36-5c532377865e> in <module>()
36
37 for epoch in range(1, 201):
---> 38 loss = train(epoch)
39 train_acc = test(train_loader)
40 test_acc = test(test_loader)
7 frames
/usr/local/lib/python3.7/dist-packages/torch_scatter/scatter.py in scatter_sum(src, index, dim, out, dim_size)
19 size[dim] = int(index.max()) + 1
20 out = torch.zeros(size, dtype=src.dtype, device=src.device)
---> 21 return out.scatter_add_(dim, index, src)
22 else:
23 return out.scatter_add_(dim, index, src)
RuntimeError: index 43280 is out of bounds for dimension 0 with size 32
What can be done to solve this issue, thanks for your help.

ERROR: vars() argument must have __dict__ attribute when trying to use trainer.train() on custom HF dataset?

I have the following model that I am trying to fine-tune (CLIP_ViT + classification head). Here’s my model definition:
class CLIPNN(nn.Module):
def __init__(self, num_labels, pretrained_name="openai/clip-vit-base-patch32", dropout=0.1):
super().__init__()
self.num_labels = num_labels
# load pre-trained transformer & processor
self.transformer = CLIPVisionModel.from_pretrained(pretrained_name)
self.processor = CLIPProcessor.from_pretrained(pretrained_name)
# initialize other layers (head after the transformer body)
self.classifier = nn.Sequential(
nn.Linear(512, 128, bias=True),
nn.ReLU(inplace=True),
nn.Dropout(p=dropout, inplace=False),
nn.Linear(128, self.num_labels, bias=True))
def forward(self, inputs, labels=None, **kwargs):
logits = self.classifier(inputs)
loss = None
if labels is not None:
loss_fct = nn.CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
return SequenceClassifierOutput(
loss=loss,
logits=logits,
)
I also have the following definition for a dataset:
class CLIPDataset(nn.utils.data.Dataset):
def __init__(self, embeddings, labels):
self.embeddings = embeddings
self.labels = labels
def __getitem__(self, idx):
item = {"embeddings": nn.Tensor(self.embeddings[idx])}
item['labels'] = nn.LongTensor([self.labels[idx]])
return item
def __len__(self):
return len(self.labels)
Note: here I am assuming that the model is fed pre-computed embeddings and does not compute embeddings, I know this is not the right logic if I want to fine-tune the CLIP base model, I am just trying to get my code to work.
Something like this throws an error:
model = CLIPNN(num_labels=2)
train_data = CLIPDataset(train_data, y_train)
test_data = CLIPDataset(test_data, y_test)
trainer = Trainer(
model=model, args=training_args, train_dataset=train_data, eval_dataset=test_data
)
trainer.train()
TypeError Traceback (most recent call last) in
----> 1 trainer.train()
~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/trainer.py
in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval,
**kwargs) 1256 self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
1257 → 1258 for step, inputs in enumerate(epoch_iterator): 1259 1260 #
Skip past any already trained steps if resuming training
~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/utils/data/dataloader.py in next(self) 515 if self._sampler_iter is None: 516 self._reset() →
517 data = self._next_data() 518 self._num_yielded += 1 519 if
self._dataset_kind == _DatasetKind.Iterable and \
~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self) 555 def _next_data(self): 556 index =
self._next_index() # may raise StopIteration → 557 data =
self._dataset_fetcher.fetch(index) # may raise StopIteration 558 if
self._pin_memory: 559 data = _utils.pin_memory.pin_memory(data)
~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py
in fetch(self, possibly_batched_index) 45 else: 46 data =
self.dataset[possibly_batched_index] —> 47 return
self.collate_fn(data)
~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/data/data_collator.py
in default_data_collator(features, return_tensors) 64 65 if
return_tensors == “pt”: —> 66 return
torch_default_data_collator(features) 67 elif return_tensors == “tf”:
68 return tf_default_data_collator(features)
~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/data/data_collator.py
in torch_default_data_collator(features) 80 81 if not
isinstance(features[0], (dict, BatchEncoding)): —> 82 features =
[vars(f) for f in features] 83 first = features[0] 84 batch = {}
~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/data/data_collator.py
in (.0) 80 81 if not isinstance(features[0], (dict, BatchEncoding)):
—> 82 features = [vars(f) for f in features] 83 first = features[0] 84
batch = {}
TypeError: vars() argument must have dict attribute
any idea what I'm doing wrong?

how to remove 'builtin_function_or_method' object cannot be interpreted as an integer error

I am getting invalid literal error in line 6 and 8 how to i deal with it
num_of_test_cases = int(input())
num_of_lines = input
num_of_columns =input
while(True):
num_of_test_cases = num_of_test_cases -1
for i in range(num_of_lines):
i+=1
for j in range(num_of_columns):
j+=1
if((i+j)%2==0):
print("*")
else:
print(".")
print("\n")
return 0

Julia #code_warntype reveals hidden temporary variable #temp#

I ran a type-stability check on my code recently. When I call #code_warntype on it, I get the following output:
Variables:
#unused#::IHT.#kw##L0_reg
#temp##_2::Array{Any,1}
::IHT.#L0_reg
x::Array{Float64,2}
y::Array{Float64,1}
k::Int64
#temp##_7::Bool
#temp##_8::Bool
max_iter::Int64
max_step::Int64
quiet::Bool
v::IHT.IHTVariables{Float64,Array{Float64,1}}
tol::Float64
#temp##_14::Int64
#temp##_15::Int64
#temp##_16::Int64
#temp##_17::Any
###...###
#temp##_17::Any = (Core.arrayref(#temp##_2::Array{Any,1},#temp##_16::Int64)::Any
###...###
v::IHT.IHTVariables{Float64,Array{Float64,1}} = (Core.typeassert)((Core.arrayref)(#temp##_2::Array{Any,1},(Base.box)(Int64,(Base.add_int)(#temp##_16::Int64,1)))::Any,IHT.IHTVariables{Float64,Array{Float64,1}})::IHT.IHTVariables{Float64,Array{Float64,1}}
Minimal working example, using my IHT.jl package:
Pkg.clone("https://github.com/klkeys/IHT.jl")
n = 100; p = 250; k = 2;
x = randn(n,p)
b = zeros(p); b[1:k] = randn(k); shuffle!(b)
y = x*b + randn(n)
#code_warntype L0_reg(x, y, k, quiet=true)
It would seem like the compiler is using #temp# to read the arguments to the function L0_reg. The function arguments are completely specified. From where does this evil little #temp##_2 variable arise? Am I able to tell the compiler what its type is? (hopefully not Array{Any,1}...)
You can use #code_lowered to view where the #temp# variables are coming from:
julia> #code_lowered L0_reg(x, y, k, quiet=true)
LambdaInfo template for (::IHT.#kw##L0_reg){T<:Union{Float32,Float64}}(::Array{Any,1}, ::IHT.#L0_reg, x::DenseArray{T,2}, y::DenseArray{T,1}, k::Int64)
:(begin
NewvarNode(:(temp))
NewvarNode(:(tol))
#temp##_7 = true
#temp##_8 = true
max_iter = 100
max_step = 50
quiet = true
SSAValue(0) = (IHT.colon)(1,(Base.length)(#temp##_2) >> 1)
#temp##_14 = (Base.start)(SSAValue(0))
10:
unless !((Base.done)(SSAValue(0),#temp##_14)) goto 42
SSAValue(1) = (Base.next)(SSAValue(0),#temp##_14)
#temp##_15 = (Core.getfield)(SSAValue(1),1)
#temp##_14 = (Core.getfield)(SSAValue(1),2)
#temp##_16 = #temp##_15 * 2 - 1
#temp##_17 = (Core.arrayref)(#temp##_2,#temp##_16)
unless #temp##_17 === :quiet goto 20
quiet = (Core.typeassert)((Core.arrayref)(#temp##_2,#temp##_16 + 1),IHT.Bool)
goto 40
20:
unless #temp##_17 === :max_step goto 24
max_step = (Core.typeassert)((Core.arrayref)(#temp##_2,#temp##_16 + 1),IHT.Int)
goto 40
24:
unless #temp##_17 === :max_iter goto 28
max_iter = (Core.typeassert)((Core.arrayref)(#temp##_2,#temp##_16 + 1),IHT.Int)
goto 40
28:
unless #temp##_17 === :tol goto 33
tol = (Core.typeassert)((Core.arrayref)(#temp##_2,#temp##_16 + 1),IHT.Float)
#temp##_8 = false
goto 40
33:
unless #temp##_17 === :temp goto 38
temp = (Core.typeassert)((Core.arrayref)(#temp##_2,#temp##_16 + 1),(Core.apply_type)(IHT.IHTVariables,$(Expr(:static_parameter, 1))))
#temp##_7 = false
goto 40
38:
(Base.kwerr)(#temp##_2,,x,y,k)
40:
goto 10
42:
unless #temp##_7 goto 45
temp = (IHT.IHTVariables)(x,y,k)
45:
unless #temp##_8 goto 48
tol = (IHT.convert)($(Expr(:static_parameter, 1)),0.0001)
48:
return (IHT.#L0_reg#75)(temp,tol,max_iter,max_step,quiet,,x,y,k)
end)
In this case, the temps (in particular #temp##_2) are coming from the keyword arguments. This is quite normal for keyword arguments.
julia> f(; x=1) = x
f (generic function with 1 method)
julia> #code_warntype f(x=1)
Variables:
#unused#::#kw##f
#temp##_2::Array{Any,1}
::#f
x::Any
#temp##_5::Int64
#temp##_6::Int64
#temp##_7::Int64
#temp##_8::Any
Body:
begin
x::Any = 1
SSAValue(2) = (Base.arraylen)(#temp##_2::Array{Any,1})::Int64
SSAValue(3) = (Base.select_value)((Base.sle_int)(0,1)::Bool,(Base.box)(Int64,(Base.ashr_int)(SSAValue(2),(Base.box)(UInt64,1))),(Base.box)(Int64,(Base.shl_int)(SSAValue(2),(Base.box)(UInt64,(Base.box)(Int64,(Base.neg_int)(1))))))::Int64
SSAValue(4) = (Base.select_value)((Base.sle_int)(1,SSAValue(3))::Bool,SSAValue(3),(Base.box)(Int64,(Base.sub_int)(1,1)))::Int64
#temp##_5::Int64 = 1
6:
unless (Base.box)(Base.Bool,(Base.not_int)((#temp##_5::Int64 === (Base.box)(Int64,(Base.add_int)(SSAValue(4),1)))::Bool)) goto 21
SSAValue(5) = #temp##_5::Int64
SSAValue(6) = (Base.box)(Int64,(Base.add_int)(#temp##_5::Int64,1))
#temp##_6::Int64 = SSAValue(5)
#temp##_5::Int64 = SSAValue(6)
#temp##_7::Int64 = (Base.box)(Int64,(Base.sub_int)((Base.box)(Int64,(Base.mul_int)(#temp##_6::Int64,2)),1))
#temp##_8::Any = (Core.arrayref)(#temp##_2::Array{Any,1},#temp##_7::Int64)::Any
unless (#temp##_8::Any === :x)::Bool goto 17
x::Any = (Core.arrayref)(#temp##_2::Array{Any,1},(Base.box)(Int64,(Base.add_int)(#temp##_7::Int64,1)))::Any
goto 19
17:
(Base.throw)($(Expr(:new, :(Base.MethodError), :((Core.getfield)((Core.getfield)((Core.getfield)(#f,:name)::TypeName,:mt),:kwsorter)), :((Core.tuple)(#temp##_2,)::Tuple{Array{Any,1},#f}))))::Union{}
19:
goto 6
21:
return x::Any
end::Any
Keyword arguments are known to have callsite overhead that can be somewhat worked around by declaring types. Note that unless your function does very little, it's unlikely the sorting of keyword arguments is actually a huge bottleneck (despite the nasty #code_warntype output).
When you do #code_warntype on a keyword argument call, you're actually viewing the type instabilities of the keyword argument sorter, an autogenerated wrapper around the real function. As you can see, the code ends up calling (IHT.#L0_reg#75)(temp,tol,max_iter,max_step,quiet,,x,y,k), which is a plain function that takes positional arguments. So the output of #code_warntype is almost useless in this case.

Resources