We recently wanted to carve out validation sets in HICO-DET. We use the following code:
![image](https://private-user-images.githubusercontent.com/112799241/241395804-8641f730-59ee-4a26-a065-d18038ef87c0.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3MTgxMjQ5NzMsIm5iZiI6MTcxODEyNDY3MywicGF0aCI6Ii8xMTI3OTkyNDEvMjQxMzk1ODA0LTg2NDFmNzMwLTU5ZWUtNGEyNi1hMDY1LWQxODAzOGVmODdjMC5wbmc_WC1BbXotQWxnb3JpdGhtPUFXUzQtSE1BQy1TSEEyNTYmWC1BbXotQ3JlZGVudGlhbD1BS0lBVkNPRFlMU0E1M1BRSzRaQSUyRjIwMjQwNjExJTJGdXMtZWFzdC0xJTJGczMlMkZhd3M0X3JlcXVlc3QmWC1BbXotRGF0ZT0yMDI0MDYxMVQxNjUxMTNaJlgtQW16LUV4cGlyZXM9MzAwJlgtQW16LVNpZ25hdHVyZT04MzcxOWJmZjUxNmFhNDhiNDI1ZjMzZGE2MzI5ZjZiNTI3NDIzMGI0YWQ2NTE1M2MxNjg4NzdlMjBmNzk2ODI2JlgtQW16LVNpZ25lZEhlYWRlcnM9aG9zdCZhY3Rvcl9pZD0wJmtleV9pZD0wJnJlcG9faWQ9MCJ9.Fp-VIjuM6dCCBNiKYbxXxhdKwAW20AixWiMahzr6rxs)
Namespace(alpha=0.5, aux_loss=True, backbone='resnet50', batch_size=16, bbox_loss_coef=5, box_score_thresh=0.2, cache=False, clip_max_norm=0.1, data_root='./hicodet', dataset='hicodet', dec_layers=6, device='cuda', dilation=False, dim_feedforward=2048, dropout=0.1, enc_layers=6, eos_coef=0.1, epochs=20, eval=True, fg_iou_thresh=0.5, gamma=0.2, giou_loss_coef=2, hidden_dim=256, lr_drop=10, lr_head=0.0001, max_instances=15, min_instances=3, nheads=8, num_queries=100, num_workers=2, output_dir='checkpoints', partitions=['train2015', 'test2015'], port='1234', position_embedding='sine', pre_norm=False, pretrained='', print_interval=500, repr_dim=512, resume='/home/quan107552101247/upt/checkpoints/jokex2/ckpt_47040_20.pt', sanity=False, seed=66, set_cost_bbox=5, set_cost_class=1, set_cost_giou=2, weight_decay=0.0001, world_size=1)
=> Rank 0: continue from saved checkpoint /home/quan107552101247/upt/checkpoints/jokex2/ckpt_47040_20.pt
0%| | 0/7527 [00:00<?, ?it/s]Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
File "/home/quan107552101247/upt/pocket/data/base.py", line 167, in __getattr__
if hasattr(self.dataset, key):
File "/home/quan107552101247/upt/pocket/data/base.py", line 167, in __getattr__
if hasattr(self.dataset, key):
File "/home/quan107552101247/upt/pocket/data/base.py", line 167, in __getattr__
if hasattr(self.dataset, key):
[Previous line repeated 993 more times]
RecursionError: maximum recursion depth exceeded
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
File "/home/upt/pocket/data/base.py", line 167, in __getattr__
if hasattr(self.dataset, key):
File "/home/upt/pocket/data/base.py", line 167, in __getattr__
if hasattr(self.dataset, key):
File "/home/upt/pocket/data/base.py", line 167, in __getattr__
if hasattr(self.dataset, key):
[Previous line repeated 993 more times]
RecursionError: maximum recursion depth exceeded
0%| | 0/7527 [00:05<?, ?it/s]
Traceback (most recent call last):
......
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 199, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 157, in start_processes
while not context.join():
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 118, in join
raise Exception(msg)
Exception:
-- Process 0 terminated with the following error:
Traceback (most recent call last):
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 872, in _try_get_data
data = self._data_queue.get(timeout=timeout)
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/queue.py", line 179, in get
self.not_empty.wait(remaining)
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/threading.py", line 306, in wait
gotit = waiter.acquire(True, timeout)
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/site-packages/torch/utils/data/_utils/signal_handling.py", line 66, in handler
_error_if_any_worker_fails()
RuntimeError: DataLoader worker (pid 3874908) exited unexpectedly with exit code 1. Details are lost due to multiprocessing. Rerunning with num_workers=0 may give better error trace.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
......
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/site-packages/tqdm/std.py", line 1195, in __iter__
for obj in iterable:
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 435, in __next__
data = self._next_data()
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1068, in _next_data
idx, data = self._get_data()
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1024, in _get_data
success, data = self._try_get_data()
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 885, in _try_get_data
raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str)) from e
RuntimeError: DataLoader worker (pid(s) 3874908, 3875008) exited unexpectedly
Traceback (most recent call last):
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 19, in _wrap
fn(i, *args)
File "/home/upt/main.py", line 107, in main
ap = engine.test_hico(test_loader)
File "/usr/local/anaconda3/envs/pocket/lib/python3.8/site-packages/torch/autograd/grad_mode.py", line 26, in decorate_context
return func(*args, **kwargs)
File "/home/upt/utils.py", line 170, in test_hico
inputs = pocket.ops.relocate_to_cuda(batch[0])
File "/home/upt/pocket/ops/relocate.py", line 63, in relocate_to_cuda
return [relocate_to_cuda(item, ignore, device, **kwargs) for item in x]
File "/home/upt/pocket/ops/relocate.py", line 63, in <listcomp>
return [relocate_to_cuda(item, ignore, device, **kwargs) for item in x]
File "/home/upt/pocket/ops/relocate.py", line 71, in relocate_to_cuda
raise TypeError('Unsupported type of data {}'.format(type(x)))
TypeError: Unsupported type of data <class 'PIL.Image.Image'>
···
How do we solve it?
Thank you so much!
yaoyaosanqi.