Hello Guys, i found several Posts about WER Running out of Memory and most the Posts suggest ist fixed.
Im using the latest WER Version in combination with Huggingface and i still Run out of Memory?
wer_metric = load_metric("wer")
def compute_metrics(processor):
def __call__(pred):
pred_logits = pred.predictions
pred_ids = np.argmax(pred_logits, axis=-1)
pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id
pred_str = processor.batch_decode(pred_ids)
# we do not want to group tokens when computing the metrics
label_str = processor.batch_decode(pred.label_ids, group_tokens=False)
wer = wer_metric.compute(predictions=pred_str, references=label_str)
return {"wer": wer}
return __call__
...
trainer = Trainer(
model=model,
data_collator=data_collator,
args=args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
tokenizer=processor.feature_extractor,
train_seq_lengths=train_dataset.input_seq_lengths,
compute_metrics=compute_metrics(processor),
)
***** Running Evaluation *****
Num examples = 15588
Batch size = 4
100%|███████████████████████████████████████| 3897/3897 [21:27<00:00, 3.07it/s]Traceback (most recent call last):
File "/tmp/pycharm_project_263/audioengine/model/finetuning/wav2vec2/finetune_parquet.py", line 151, in <module>
File "/tmp/pycharm_project_263/audioengine/model/finetuning/wav2vec2/finetune_parquet.py", line 128, in main
max_val_samples = data_args.max_val_samples if data_args.max_val_samples is not None else len(eval_dataset)
File "/usr/local/lib/python3.8/dist-packages/transformers/trainer.py", line 1757, in evaluate
output = self.prediction_loop(
File "/usr/local/lib/python3.8/dist-packages/transformers/trainer.py", line 1930, in prediction_loop
metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids))
File "/tmp/pycharm_project_263/audioengine/model/finetuning/wav2vec2/wav2vec2_trainer.py", line 191, in __call__
wer = wer_metric.compute(predictions=pred_str, references=label_str)
File "/usr/local/lib/python3.8/dist-packages/datasets/metric.py", line 403, in compute
output = self._compute(predictions=predictions, references=references, **kwargs)
File "/home/warmachine/.cache/huggingface/modules/datasets_modules/metrics/wer/73b2d32b723b7fb8f204d785c00980ae4d937f12a65466f8fdf78706e2951281/wer.py", line 94, in _compute
return wer(references, predictions)
File "/usr/local/lib/python3.8/dist-packages/jiwer/measures.py", line 80, in wer
measures = compute_measures(
File "/usr/local/lib/python3.8/dist-packages/jiwer/measures.py", line 192, in compute_measures
H, S, D, I = _get_operation_counts(truth, hypothesis)
File "/usr/local/lib/python3.8/dist-packages/jiwer/measures.py", line 273, in _get_operation_counts
editops = Levenshtein.editops(source_string, destination_string)
MemoryError
100%|███████████████████████████████████████| 3897/3897 [21:40<00:00, 3.00it/s]
Process finished with exit code 1