Fastai v2 dataset has no show_batch method - fast-ai

I am having trouble with my Datablock not having show_batch methods when customising to my own use case.
I am trying to port some of my code from fastai v1 to v2. Working through the Datablock tutorial https://docs.fast.ai/tutorial.datablock.html
My Datablock & Dataset:
dblock = DataBlock(get_items = get_image_files,
get_y = parent_label,
splitter = RandomSplitter())
dsets = dblock.datasets("PlantVillage-Dataset/raw/color/")
dsets.train[0] # this works
The error I get when I try dsets.show_batch():
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-56-5a2f74730596> in <module>
----> 1 dsets.show_batch()
~/.pyenv/versions/3.7.8/envs/fastai/lib/python3.7/site-packages/fastai/data/core.py in __getattr__(self, k)
315 return res if is_indexer(it) else list(zip(*res))
316
--> 317 def __getattr__(self,k): return gather_attrs(self, k, 'tls')
318 def __dir__(self): return super().__dir__() + gather_attr_names(self, 'tls')
319 def __len__(self): return len(self.tls[0])
~/.pyenv/versions/3.7.8/envs/fastai/lib/python3.7/site-packages/fastcore/transform.py in gather_attrs(o, k, nm)
163 att = getattr(o,nm)
164 res = [t for t in att.attrgot(k) if t is not None]
--> 165 if not res: raise AttributeError(k)
166 return res[0] if len(res)==1 else L(res)
167
AttributeError: show_batch

dls = dblock.dataloaders(path)
dls.show_batch()
After intialising the Datablock I needed to construct a dataloader for batch construction.

Related

ERROR: vars() argument must have __dict__ attribute when trying to use trainer.train() on custom HF dataset?

I have the following model that I am trying to fine-tune (CLIP_ViT + classification head). Here’s my model definition:
class CLIPNN(nn.Module):
def __init__(self, num_labels, pretrained_name="openai/clip-vit-base-patch32", dropout=0.1):
super().__init__()
self.num_labels = num_labels
# load pre-trained transformer & processor
self.transformer = CLIPVisionModel.from_pretrained(pretrained_name)
self.processor = CLIPProcessor.from_pretrained(pretrained_name)
# initialize other layers (head after the transformer body)
self.classifier = nn.Sequential(
nn.Linear(512, 128, bias=True),
nn.ReLU(inplace=True),
nn.Dropout(p=dropout, inplace=False),
nn.Linear(128, self.num_labels, bias=True))
def forward(self, inputs, labels=None, **kwargs):
logits = self.classifier(inputs)
loss = None
if labels is not None:
loss_fct = nn.CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
return SequenceClassifierOutput(
loss=loss,
logits=logits,
)
I also have the following definition for a dataset:
class CLIPDataset(nn.utils.data.Dataset):
def __init__(self, embeddings, labels):
self.embeddings = embeddings
self.labels = labels
def __getitem__(self, idx):
item = {"embeddings": nn.Tensor(self.embeddings[idx])}
item['labels'] = nn.LongTensor([self.labels[idx]])
return item
def __len__(self):
return len(self.labels)
Note: here I am assuming that the model is fed pre-computed embeddings and does not compute embeddings, I know this is not the right logic if I want to fine-tune the CLIP base model, I am just trying to get my code to work.
Something like this throws an error:
model = CLIPNN(num_labels=2)
train_data = CLIPDataset(train_data, y_train)
test_data = CLIPDataset(test_data, y_test)
trainer = Trainer(
model=model, args=training_args, train_dataset=train_data, eval_dataset=test_data
)
trainer.train()
TypeError Traceback (most recent call last) in
----> 1 trainer.train()
~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/trainer.py
in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval,
**kwargs) 1256 self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
1257 → 1258 for step, inputs in enumerate(epoch_iterator): 1259 1260 #
Skip past any already trained steps if resuming training
~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/utils/data/dataloader.py in next(self) 515 if self._sampler_iter is None: 516 self._reset() →
517 data = self._next_data() 518 self._num_yielded += 1 519 if
self._dataset_kind == _DatasetKind.Iterable and \
~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self) 555 def _next_data(self): 556 index =
self._next_index() # may raise StopIteration → 557 data =
self._dataset_fetcher.fetch(index) # may raise StopIteration 558 if
self._pin_memory: 559 data = _utils.pin_memory.pin_memory(data)
~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py
in fetch(self, possibly_batched_index) 45 else: 46 data =
self.dataset[possibly_batched_index] —> 47 return
self.collate_fn(data)
~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/data/data_collator.py
in default_data_collator(features, return_tensors) 64 65 if
return_tensors == “pt”: —> 66 return
torch_default_data_collator(features) 67 elif return_tensors == “tf”:
68 return tf_default_data_collator(features)
~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/data/data_collator.py
in torch_default_data_collator(features) 80 81 if not
isinstance(features[0], (dict, BatchEncoding)): —> 82 features =
[vars(f) for f in features] 83 first = features[0] 84 batch = {}
~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/data/data_collator.py
in (.0) 80 81 if not isinstance(features[0], (dict, BatchEncoding)):
—> 82 features = [vars(f) for f in features] 83 first = features[0] 84
batch = {}
TypeError: vars() argument must have dict attribute
any idea what I'm doing wrong?

Can't run "Multivariate forecasting" in Pyro tutorial

I am trying just to run the sample program at https://pyro.ai/examples/forecast_simple.html.
It runs until it reaches "RuntimeError torch.linalg.cholesky: For batch 4284: U(2,2) is zero, singular U." Every time when I run the code, it stops at the same location with "batch 4284".
Can anyone teach me how to fix it?
I am using below versions.
Python 3.9.1
pyro-api 0.1.2
pyro-ppl 1.7.0
torch 1.19.0
Windows10Pro 64bit 20H2
VScode 1.60.0
INFO step 0 loss = 7.356
INFO step 50 loss = 1.87751
INFO step 100 loss = 1.55338
INFO step 150 loss = 1.40953
INFO step 200 loss = 1.31982
INFO step 250 loss = 1.2017
INFO step 300 loss = 1.1389
INFO step 350 loss = 1.10407
INFO step 400 loss = 1.07474
INFO step 450 loss = 1.06728
INFO step 500 loss = 1.0285
DEBUG crps = 0.59017
DEBUG mae = 0.866027
DEBUG num_samples = 100
DEBUG rmse = 1.02721
DEBUG seed = 1.23457e+09
DEBUG t0 = 0
DEBUG t1 = 2160
DEBUG t2 = 2496
DEBUG test_walltime = 0.411458
DEBUG train_walltime = 28.8177
DEBUG AutoNormal.locs.obs_corr = -1.62159
DEBUG AutoNormal.locs.trans_corr = 2.49729
DEBUG AutoNormal.locs.trans_loc = 0.904184
DEBUG AutoNormal.scales.obs_corr = 0.207397
DEBUG AutoNormal.scales.trans_corr = 0.0915508
DEBUG AutoNormal.scales.trans_loc = 0.0111603
INFO Training on window [168:2328], testing on window [2328:2664]
INFO step 0 loss = 7.37245
INFO step 50 loss = 1.87162
:
:
:
DEBUG crps = 0.62036
DEBUG mae = 0.907584
DEBUG num_samples = 100
DEBUG rmse = 1.08631
DEBUG seed = 1.23457e+09
DEBUG t0 = 1512
DEBUG t1 = 3672
DEBUG t2 = 4008
DEBUG test_walltime = 0.404958
DEBUG train_walltime = 26.7937
DEBUG AutoNormal.locs.obs_corr = -0.889496
DEBUG AutoNormal.locs.trans_corr = 1.85566
DEBUG AutoNormal.locs.trans_loc = 0.903074
DEBUG AutoNormal.scales.obs_corr = 0.247679
DEBUG AutoNormal.scales.trans_corr = 0.0577488
DEBUG AutoNormal.scales.trans_loc = 0.012068
INFO Training on window [1680:3840], testing on window [3840:4176]
INFO step 0 loss = 7.48406
INFO step 50 loss = 1.92277
INFO step 100 loss = 1.58563
INFO step 150 loss = 1.52081
INFO step 200 loss = 1.44076
INFO step 250 loss = 1.38033
INFO step 300 loss = 1.29202
INFO step 350 loss = 1.26101
INFO step 400 loss = 1.23141
INFO step 450 loss = 1.23901
INFO step 500 loss = 1.21247
RuntimeError: torch.linalg.cholesky: For batch 4284: U(2,2) is zero, singular U.
RuntimeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_16928/3907557438.py in <module>
15
16 args = parser.parse_args()
---> 17 main(args)
~\AppData\Local\Temp/ipykernel_16928/4270697941.py in main(args)
24 }
25
---> 26 metrics = backtest(
27 data,
28 covariates,
c:\Users\9033113\venv\lib\site-packages\pyro\contrib\forecast\evaluate.py in backtest(data, covariates, model_fn, forecaster_fn, metrics, transform, train_window, min_train_window, test_window, min_test_window, stride, seed, num_samples, batch_size, forecaster_options)
199 while True:
200 try:
--> 201 pred = forecaster(
202 train_data,
203 test_covariates,
c:\Users\9033113\venv\lib\site-packages\pyro\contrib\forecast\forecaster.py in __call__(self, data, covariates, num_samples, batch_size)
359 :rtype: ~torch.Tensor
360 """
--> 361 return super().__call__(data, covariates, num_samples, batch_size)
362
363 def forward(self, data, covariates, num_samples, batch_size=None):
c:\Users\9033113\venv\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or
_global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
c:\Users\9033113\venv\lib\site-packages\pyro\contrib\forecast\forecaster.py in forward(self, data, covariates, num_samples, batch_size)
388 stack.enter_context(poutine.replay(trace=tr.trace))
389 with pyro.plate("particles", num_samples, dim=dim):
--> 390 return self.model(data, covariates)
391
392
c:\Users\9033113\venv\lib\site-packages\pyro\nn\module.py in __call__(self, *args, **kwargs)
424 def __call__(self, *args, **kwargs):
425 with self._pyro_context:
--> 426 return super().__call__(*args, **kwargs)
427
428 def __getattr__(self, name):
c:\Users\9033113\venv\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or
_global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
c:\Users\9033113\venv\lib\site-packages\pyro\contrib\forecast\forecaster.py in forward(self, data, covariates)
183 self._forecast = None
184
--> 185 self.model(zero_data, covariates)
186
187 assert self._forecast is not None, ".predict() was not called by .model()"
~\AppData\Local\Temp/ipykernel_16928/1541431941.py in model(self, zero_data, covariates)
76
77 # The final statement registers our noise model and prediction.
---> 78 self.predict(noise_model, prediction)
c:\Users\9033113\venv\lib\site-packages\pyro\contrib\forecast\forecaster.py in predict(self, noise_dist, prediction)
155 # PrefixConditionMessenger is handled outside of the .model() call.
156 self._prefix_condition_data["residual"] = data - left_pred
--> 157 noise = pyro.sample("residual", noise_dist)
158 del self._prefix_condition_data["residual"]
159
c:\Users\9033113\venv\lib\site-packages\pyro\primitives.py in sample(name, fn, *args, **kwargs)
162 }
163 # apply the stack and return its return value
--> 164 apply_stack(msg)
165 return msg["value"]
166
c:\Users\9033113\venv\lib\site-packages\pyro\poutine\runtime.py in apply_stack(initial_msg)
215 break
216
--> 217 default_process_message(msg)
218
219 for frame in stack[-pointer:]:
c:\Users\9033113\venv\lib\site-packages\pyro\poutine\runtime.py in default_process_message(msg)
176 return msg
177
--> 178 msg["value"] = msg["fn"](*msg["args"], **msg["kwargs"])
179
180 # after fn has been called, update msg to prevent it from being called again.
c:\Users\9033113\venv\lib\site-packages\pyro\distributions\torch_distribution.py in __call__(self, sample_shape)
46 """
47 return (
---> 48 self.rsample(sample_shape)
49 if self.has_rsample
50 else self.sample(sample_shape)
c:\Users\9033113\venv\lib\site-packages\pyro\distributions\hmm.py in rsample(self, sample_shape)
582 )
583 trans = trans.expand(trans.batch_shape[:-1] + (self.duration,))
--> 584 z = _sequential_gaussian_filter_sample(self._init, trans, sample_shape)
585 x = self._obs.left_condition(z).rsample()
586 return x
c:\Users\9033113\venv\lib\site-packages\pyro\distributions\hmm.py in _sequential_gaussian_filter_sample(init, trans, sample_shape)
142 joint = (x + y).event_permute(perm)
143 tape.append(joint)
--> 144 contracted = joint.marginalize(left=state_dim)
145 if time > even_time:
146 contracted = Gaussian.cat((contracted, gaussian[..., -1:]), dim=-1)
c:\Users\9033113\venv\lib\site-packages\pyro\ops\gaussian.py in marginalize(self, left, right)
242 P_ba = self.precision[..., b, a]
243 P_bb = self.precision[..., b, b]
--> 244 P_b = cholesky(P_bb)
245 P_a = triangular_solve(P_ba, P_b, upper=False)
246 P_at = P_a.transpose(-1, -2)
c:\Users\9033113\venv\lib\site-packages\pyro\ops\tensor_utils.py in cholesky(x)
398 if x.size(-1) == 1:
399 return x.sqrt()
--> 400 return torch.linalg.cholesky(x)
401
402
RuntimeError: torch.linalg.cholesky: For batch 4284: U(2,2
) is zero, singular U
.

Pystan, Runtime error - Initialization failed

I'm trying to develop a Bayesian model using Pystan. I'm able to compile the model successfully. But when I'm sampling data I'm getting run time error. Refer to the code below:
my_code = '''
data {
int N;
int K1;
int K2;
real max_intercept;
matrix[N, K1] X1;
matrix[N, K2] X2;
vector[N] y;
}
parameters {
vector<lower=0>[K1] beta1;
vector[K2] beta2;
real<lower=0, upper=max_intercept> alpha;
real<lower=0> noise_var;
}
model {
beta1 ~ normal(0, 1);
beta2 ~ normal(0, 1);
noise_var ~ inv_gamma(0.05, 0.05 * 0.01);
y ~ normal(X1*beta1 + X2*beta2 + alpha, sqrt(noise_var));
}
'''
fit1 = sm1.sampling(data=input_data, iter=2000, chains=4, init=0.5,n_jobs=-1) #Getting an error here
I have checked all the data points (no missing data or no column with same number through out) and their data types (all are float 64). I also scaled the data using MinMaxScaler
input_data = {
'N': len(data_scaled), #836
'K1': len(pos_var), #17
'K2': len(pos_neg_var),#29
'X1': X1, #(836,17)
'X2': X2, #(836,17)
'y': data['orders'].values,
'max_intercept': min(data['orders']) #0
}
Below is the error I'm getting.
RemoteTraceback Traceback (most recent call last)
RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\abc\.conda\envs\stan_env\lib\multiprocessing\pool.py", line 121, in worker
result = (True, func(*args, **kwds))
File "C:\Users\abc\.conda\envs\stan_env\lib\multiprocessing\pool.py", line 44, in mapstar
return list(map(*args))
File "stanfit4anon_model_a396b59aabedfaa132f3a814776a219f_7619586994410633893.pyx", line 371, in stanfit4anon_model_a396b59aabedfaa132f3a814776a219f_7619586994410633893._call_sampler_star
File "stanfit4anon_model_a396b59aabedfaa132f3a814776a219f_7619586994410633893.pyx", line 404, in stanfit4anon_model_a396b59aabedfaa132f3a814776a219f_7619586994410633893._call_sampler
RuntimeError: Initialization failed.
"""
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
<timed exec> in <module>
~\.conda\envs\stan_env\lib\site-packages\pystan\model.py in sampling(self, data, pars, chains, iter, warmup, thin, seed, init, sample_file, diagnostic_file, verbose, algorithm, control, n_jobs, **kwargs)
776 call_sampler_args = izip(itertools.repeat(data), args_list, itertools.repeat(pars))
777 call_sampler_star = self.module._call_sampler_star
--> 778 ret_and_samples = _map_parallel(call_sampler_star, call_sampler_args, n_jobs)
779 samples = [smpl for _, smpl in ret_and_samples]
780
~\.conda\envs\stan_env\lib\site-packages\pystan\model.py in _map_parallel(function, args, n_jobs)
83 try:
84 pool = multiprocessing.Pool(processes=n_jobs)
---> 85 map_result = pool.map(function, args)
86 finally:
87 pool.close()
~\.conda\envs\stan_env\lib\multiprocessing\pool.py in map(self, func, iterable, chunksize)
266 in a list that is returned.
267 '''
--> 268 return self._map_async(func, iterable, mapstar, chunksize).get()
269
270 def starmap(self, func, iterable, chunksize=None):
~\.conda\envs\stan_env\lib\multiprocessing\pool.py in get(self, timeout)
655 return self._value
656 else:
--> 657 raise self._value
658
659 def _set(self, i, obj):
RuntimeError: Initialization failed.
I'm relatively new to Pystan. I appreciate any guidance I get here.
I fixed the issue! Runtime error generally comes when the data is not meeting the constraints defined in the model.
For instance X values having some -ve numbers when the constraint is X>0 defined in the model.
Also most common mistake, need to make sure Y values are not off. In my data there are few Y values that 0, these values passed missing values and pos value checks. Upon imputing the values with mean of Y the problem is resolved.
Happy learning!

Custom Evaluation Function based on F1 for use in xgboost - Python API

I have written the following custom evaluation function to use with xgboost, in order to optimize F1. Umfortuantely it returns an exception when run with xgboost.
The evaluation function is the following:
def F1_eval(preds, labels):
t = np.arange(0, 1, 0.005)
f = np.repeat(0, 200)
Results = np.vstack([t, f]).T
P = sum(labels == 1)
for i in range(200):
m = (preds >= Results[i, 0])
TP = sum(labels[m] == 1)
FP = sum(labels[m] == 0)
if (FP + TP) > 0:
Precision = TP/(FP + TP)
Recall = TP/P
if (Precision + Recall >0) :
F1 = 2 * Precision * Recall / (Precision + Recall)
else:
F1 = 0
Results[i, 1] = F1
return(max(Results[:, 1]))
Below I provide a reproducible example along with the error message:
from sklearn import datasets
Wine = datasets.load_wine()
X_wine = Wine.data
y_wine = Wine.target
y_wine[y_wine == 2] = 1
X_wine_train, X_wine_test, y_wine_train, y_wine_test = train_test_split(X_wine, y_wine, test_size = 0.2)
clf_wine = xgb.XGBClassifier(max_depth=6, learning_rate=0.1,silent=False, objective='binary:logistic', \
booster='gbtree', n_jobs=8, nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, \
subsample=0.8, colsample_bytree=0.8, colsample_bylevel=1, reg_alpha=0, reg_lambda=1)
clf_wine.fit(X_wine_train, y_wine_train,\
eval_set=[(X_wine_train, y_wine_train), (X_wine_test, y_wine_test)], eval_metric=F1_eval, early_stopping_rounds=10, verbose=True)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-453-452852658dd8> in <module>()
12 clf_wine = xgb.XGBClassifier(max_depth=6, learning_rate=0.1,silent=False, objective='binary:logistic', booster='gbtree', n_jobs=8, nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, subsample=0.8, colsample_bytree=0.8, colsample_bylevel=1, reg_alpha=0, reg_lambda=1)
13
---> 14 clf_wine.fit(X_wine_train, y_wine_train,eval_set=[(X_wine_train, y_wine_train), (X_wine_test, y_wine_test)], eval_metric=F1_eval, early_stopping_rounds=10, verbose=True)
15
C:\ProgramData\Anaconda3\lib\site-packages\xgboost\sklearn.py in fit(self, X, y, sample_weight, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set)
519 early_stopping_rounds=early_stopping_rounds,
520 evals_result=evals_result, obj=obj, feval=feval,
--> 521 verbose_eval=verbose, xgb_model=None)
522
523 self.objective = xgb_options["objective"]
C:\ProgramData\Anaconda3\lib\site-packages\xgboost\training.py in train(params, dtrain, num_boost_round, evals, obj, feval, maximize, early_stopping_rounds, evals_result, verbose_eval, xgb_model, callbacks, learning_rates)
202 evals=evals,
203 obj=obj, feval=feval,
--> 204 xgb_model=xgb_model, callbacks=callbacks)
205
206
C:\ProgramData\Anaconda3\lib\site-packages\xgboost\training.py in _train_internal(params, dtrain, num_boost_round, evals, obj, feval, xgb_model, callbacks)
82 # check evaluation result.
83 if len(evals) != 0:
---> 84 bst_eval_set = bst.eval_set(evals, i, feval)
85 if isinstance(bst_eval_set, STRING_TYPES):
86 msg = bst_eval_set
C:\ProgramData\Anaconda3\lib\site-packages\xgboost\core.py in eval_set(self, evals, iteration, feval)
957 if feval is not None:
958 for dmat, evname in evals:
--> 959 feval_ret = feval(self.predict(dmat), dmat)
960 if isinstance(feval_ret, list):
961 for name, val in feval_ret:
<ipython-input-383-dfb8d5181b18> in F1_eval(preds, labels)
11
12
---> 13 P = sum(labels == 1)
14
15
TypeError: 'bool' object is not iterable
I do not understand why the function is not working. I have followed the examples here: https://github.com/dmlc/xgboost/blob/master/demo/guide-python/custom_objective.py
I would like to understand where I err.
When doing sum(labels == 1), Python evaluates labels == 1 as a Boolean object, thus you get TypeError: 'bool' object is not iterable
The function sum expecting an iterable object, like a list. Here's an example of your error:
In[32]: sum(True)
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2963, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-32-6eb8f80b7f2e>", line 1, in <module>
sum(True)
TypeError: 'bool' object is not iterable
If you want to use f1_score of scikit-learn you can implement the following wrapup:
from sklearn.metrics import f1_score
import numpy as np
def f1_eval(y_pred, dtrain):
y_true = dtrain.get_label()
err = 1-f1_score(y_true, np.round(y_pred))
return 'f1_err', err
params of the wrap up are list (of predictions) and DMatrix, and it returns a string, float
# Setting your classifier
clf_wine = xgb.XGBClassifier(max_depth=6, learning_rate=0.1,silent=False, objective='binary:logistic', \
booster='gbtree', n_jobs=8, nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, \
subsample=0.8, colsample_bytree=0.8, colsample_bylevel=1, reg_alpha=0, reg_lambda=1)
# When you fit, add eval_metric=f1_eval
# Please don't forget to insert all the .fit arguments required
clf_wine.fit(eval_metric=f1_eval)
Here you can see an example of how to implement custom objective function and custom evaluation metric
Example containing the following code:
# user defined evaluation function, return a pair metric_name, result
# NOTE: when you do customized loss function, the default prediction value is margin
# this may make builtin evaluation metric not function properly
# for example, we are doing logistic loss, the prediction is score before logistic transformation
# the builtin evaluation error assumes input is after logistic transformation
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
def evalerror(preds, dtrain):
labels = dtrain.get_label()
# return a pair metric_name, result
# since preds are margin(before logistic transformation, cutoff at 0)
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
which specify that an evaluation function gets as arguments (predictions, dtrain) dtrain is of type DMatrix and returns a string, float which is the name of the metric and the error.
Adding working python code example
import numpy as np
def _F1_eval(preds, labels):
t = np.arange(0, 1, 0.005)
f = np.repeat(0, 200)
results = np.vstack([t, f]).T
# assuming labels only containing 0's and 1's
n_pos_examples = sum(labels)
if n_pos_examples == 0:
raise ValueError("labels not containing positive examples")
for i in range(200):
pred_indexes = (preds >= results[i, 0])
TP = sum(labels[pred_indexes])
FP = len(labels[pred_indexes]) - TP
precision = 0
recall = TP / n_pos_examples
if (FP + TP) > 0:
precision = TP / (FP + TP)
if (precision + recall > 0):
F1 = 2 * precision * recall / (precision + recall)
else:
F1 = 0
results[i, 1] = F1
return (max(results[:, 1]))
if __name__ == '__main__':
labels = np.random.binomial(1, 0.75, 100)
preds = np.random.random_sample(100)
print(_F1_eval(preds, labels))
And if you want to implement _F1_eval to work specifically for xgboost evaluation methods add this:
def F1_eval(preds, dtrain):
res = _F1_eval(preds, dtrain.get_label())
return 'f1_err', 1-res

1D convolution sequence in keras

I am a noob trying to build a network to classify 2 sequences of floats to one of 16450 different integers. I have 70408 samples and I have padded each sample to have 1400 values. So 1 sample has 2 column vectors eg. [104.243,120.12...], [125.25,14.556...]. Both my x_train is size (70408,1400). I am trying to use keras' functional API but can't seem to figure out the right input shape. Any help would be appreciated.
samples = 70408
mass_size = 1400
intensity_size = 1400
output_size = 16450
mass_input = Input(shape=(samples,mass_size), dtype='float32')
mass_net = layers.Conv1D(32,5,activation='relu')(mass_input)
mass_net = layers.AveragePooling1D(3)(mass_net)
mass_net = layers.Conv1D(16,5,activation='relu')(mass_net)
mass_net = layers.GlobalAveragePooling1D()(mass_net)
intensity_input = Input(shape=(samples,intensity_size), dtype='float32')
intensity_net = layers.Conv1D(32,5,activation='relu')(intensity_input)
intensity_net = layers.AveragePooling1D(3)(intensity_net)
intensity_net = layers.Conv1D(16,5,activation='relu')(intensity_net)
intensity_net = layers.GlobalAveragePooling1D()(intensity_net)
concatenated = layers.concatenate([mass_net,intensity_net],axis=-1)
output = layers.Dense(output_size,activation='softmax')(concatenated)
print(mass_data.shape, intensity_data.shape)
model = Model([mass_data,intensity_data],output)
model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['acc'])
model.fit([mass_data,intensity_data],y_train,epochs=10,batch_size=128)
The error I keep getting is:
TypeError Traceback (most recent call last)
<ipython-input-18-aab93c439dd0> in <module>()
28
29 print(mass_data.shape, intensity_data.shape)
---> 30 model = Model([mass_data,intensity_data],output)
31 model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['acc'])
32
~\Anaconda3\envs\deeplearning\lib\site-packages\keras\legacy\interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name +
90 '` call to the Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
~\Anaconda3\envs\deeplearning\lib\site-packages\keras\engine\topology.py in __init__(self, inputs, outputs, name)
1528
1529 # Check for redundancy in inputs.
-> 1530 if len(set(self.inputs)) != len(self.inputs):
1531 raise ValueError('The list of inputs passed to the model '
1532 'is redundant. '
TypeError: unhashable type: 'numpy.ndarray'
The problem seems to be here:
model = Model([mass_data,intensity_data],output)
You should use the input tensors you created, not numpy data:
model = Model([mass_input, intensity_input],output)
Another problem, related to my old comment is the input_shape.
Since you now have your data as (samples, length, features), you need input_shape=(length,features)

Resources