First of all, Your work is very impressive. 😀
I encountered some problems during reproducing and would like to get some help.
Traceback (most recent call last):
File "/cdaAlign/cdalign-main/train.py", line 337, in <module>
cli_main()
File "/cdaAlign/cdalign-main/train.py", line 333, in cli_main
main(args)
File "/cdaAlign/cdalign-main/train.py", line 93, in main
train(args, trainer, task, epoch_itr)
File "/cdaAlign/cdalign-main/train.py", line 132, in train
for i, samples in enumerate(progress, start=epoch_itr.iterations_in_epoch):
File "/cdaAlign/cdalign-main/fairseq/progress_bar.py", line 181, in __iter__
for i, obj in enumerate(self.iterable, start=self.offset):
File "/cdaAlign/cdalign-main/fairseq/data/iterators.py", line 314, in __next__
chunk.append(next(self.itr))
File "/cdaAlign/cdalign-main/fairseq/data/iterators.py", line 43, in __next__
return next(self.itr)
File "/cdaAlign/cdalign-main/fairseq/data/iterators.py", line 36, in __iter__
for x in self.iterable:
File "/envs/cda-align/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 530, in __next__
data = self._next_data()
File "/envs/cda-align/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1224, in _next_data
return self._process_data(data)
File "/envs/cda-align/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1250, in _process_data
data.reraise()
File "/envs/cda-align/lib/python3.8/site-packages/torch/_utils.py", line 457, in reraise
raise exception
KeyError: Caught KeyError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/envs/cda-align/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "/envs/cda-align/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/envs/cda-align/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/cdaAlign/cdalign-main/fairseq/data/language_pair_dataset.py", line 215, in __getitem__
example['alignment'] = self.align_dataset[index]
File "/cdaAlign/cdalign-main/fairseq/data/indexed_dataset.py", line 222, in __getitem__
ptx = self.cache_index[i]
KeyError: 2206735
My implementations are simplified as follows. And I got a CSR score Lower than decoding without constraint. I would really appreciate your help with locating my mistake.