File "E:/code/SparK-main/SparK-main-latest/pretrain/vis_reconstruction.py", line 55, in build_spark
assert len(missing) == 0, f'load_state_dict missing keys: {missing}'
AssertionError: load_state_dict missing keys: ['imn_m', 'imn_s', 'norm_black', 'sparse_encoder.sp_cnn.conv1.weight', 'sparse_encoder.sp_cnn.bn1.weight', 'sparse_encoder.sp_cnn.bn1.bias', 'sparse_encoder.sp_cnn.bn1.running_mean', 'sparse_encoder.sp_cnn.bn1.running_var', 'sparse_encoder.sp_cnn.layer1.0.conv1.weight', 'sparse_encoder.sp_cnn.layer1.0.bn1.weight', 'sparse_encoder.sp_cnn.layer1.0.bn1.bias', 'sparse_encoder.sp_cnn.layer1.0.bn1.running_mean', 'sparse_encoder.sp_cnn.layer1.0.bn1.running_var', 'sparse_encoder.sp_cnn.layer1.0.conv2.weight', 'sparse_encoder.sp_cnn.layer1.0.bn2.weight', 'sparse_encoder.sp_cnn.layer1.0.bn2.bias', 'sparse_encoder.sp_cnn.layer1.0.bn2.running_mean', 'sparse_encoder.sp_cnn.layer1.0.bn2.running_var', 'sparse_encoder.sp_cnn.layer1.0.conv3.weight', 'sparse_encoder.sp_cnn.layer1.0.bn3.weight', 'sparse_encoder.sp_cnn.layer1.0.bn3.bias', 'sparse_encoder.sp_cnn.layer1.0.bn3.running_mean', 'sparse_encoder.sp_cnn.layer1.0.bn3.running_var', 'sparse_encoder.sp_cnn.layer1.0.downsample.0.weight', 'sparse_encoder.sp_cnn.layer1.0.downsample.1.weight', 'sparse_encoder.sp_cnn.layer1.0.downsample.1.bias', 'sparse_encoder.sp_cnn.layer1.0.downsample.1.running_mean', 'sparse_encoder.sp_cnn.layer1.0.downsample.1.running_var', 'sparse_encoder.sp_cnn.layer1.1.conv1.weight', 'sparse_encoder.sp_cnn.layer1.1.bn1.weight', 'sparse_encoder.sp_cnn.layer1.1.bn1.bias', 'sparse_encoder.sp_cnn.layer1.1.bn1.running_mean', 'sparse_encoder.sp_cnn.layer1.1.bn1.running_var', 'sparse_encoder.sp_cnn.layer1.1.conv2.weight', 'sparse_encoder.sp_cnn.layer1.1.bn2.weight', 'sparse_encoder.sp_cnn.layer1.1.bn2.bias', 'sparse_encoder.sp_cnn.layer1.1.bn2.running_mean', 'sparse_encoder.sp_cnn.layer1.1.bn2.running_var', 'sparse_encoder.sp_cnn.layer1.1.conv3.weight', 'sparse_encoder.sp_cnn.layer1.1.bn3.weight', 'sparse_encoder.sp_cnn.layer1.1.bn3.bias', 'sparse_encoder.sp_cnn.layer1.1.bn3.running_mean', 'sparse_encoder.sp_cnn.layer1.1.bn3.running_var', 'sparse_encoder.sp_cnn.layer1.2.conv1.weight', 'sparse_encoder.sp_cnn.layer1.2.bn1.weight', 'sparse_encoder.sp_cnn.layer1.2.bn1.bias', 'sparse_encoder.sp_cnn.layer1.2.bn1.running_mean', 'sparse_encoder.sp_cnn.layer1.2.bn1.running_var', 'sparse_encoder.sp_cnn.layer1.2.conv2.weight', 'sparse_encoder.sp_cnn.layer1.2.bn2.weight', 'sparse_encoder.sp_cnn.layer1.2.bn2.bias', 'sparse_encoder.sp_cnn.layer1.2.bn2.running_mean', 'sparse_encoder.sp_cnn.layer1.2.bn2.running_var', 'sparse_encoder.sp_cnn.layer1.2.conv3.weight', 'sparse_encoder.sp_cnn.layer1.2.bn3.weight', 'sparse_encoder.sp_cnn.layer1.2.bn3.bias', 'sparse_encoder.sp_cnn.layer1.2.bn3.running_mean', 'sparse_encoder.sp_cnn.layer1.2.bn3.running_var', 'sparse_encoder.sp_cnn.layer2.0.conv1.weight', 'sparse_encoder.sp_cnn.layer2.0.bn1.weight', 'sparse_encoder.sp_cnn.layer2.0.bn1.bias', 'sparse_encoder.sp_cnn.layer2.0.bn1.running_mean', 'sparse_encoder.sp_cnn.layer2.0.bn1.running_var', 'sparse_encoder.sp_cnn.layer2.0.conv2.weight', 'sparse_encoder.sp_cnn.layer2.0.bn2.weight', 'sparse_encoder.sp_cnn.layer2.0.bn2.bias', 'sparse_encoder.sp_cnn.layer2.0.bn2.running_mean', 'sparse_encoder.sp_cnn.layer2.0.bn2.running_var', 'sparse_encoder.sp_cnn.layer2.0.conv3.weight', 'sparse_encoder.sp_cnn.layer2.0.bn3.weight', 'sparse_encoder.sp_cnn.layer2.0.bn3.bias', 'sparse_encoder.sp_cnn.layer2.0.bn3.running_mean', 'sparse_encoder.sp_cnn.layer2.0.bn3.running_var', 'sparse_encoder.sp_cnn.layer2.0.downsample.0.weight', 'sparse_encoder.sp_cnn.layer2.0.downsample.1.weight', 'sparse_encoder.sp_cnn.layer2.0.downsample.1.bias', 'sparse_encoder.sp_cnn.layer2.0.downsample.1.running_mean', 'sparse_encoder.sp_cnn.layer2.0.downsample.1.running_var', 'sparse_encoder.sp_cnn.layer2.1.conv1.weight', 'sparse_encoder.sp_cnn.layer2.1.bn1.weight', 'sparse_encoder.sp_cnn.layer2.1.bn1.bias', 'sparse_encoder.sp_cnn.layer2.1.bn1.running_mean', 'sparse_encoder.sp_cnn.layer2.1.bn1.running_var', 'sparse_encoder.sp_cnn.layer2.1.conv2.weight', 'sparse_encoder.sp_cnn.layer2.1.bn2.weight', 'sparse_encoder.sp_cnn.layer2.1.bn2.bias', 'sparse_encoder.sp_cnn.layer2.1.bn2.running_mean', 'sparse_encoder.sp_cnn.layer2.1.bn2.running_var', 'sparse_encoder.sp_cnn.layer2.1.conv3.weight', 'sparse_encoder.sp_cnn.layer2.1.bn3.weight', 'sparse_encoder.sp_cnn.layer2.1.bn3.bias', 'sparse_encoder.sp_cnn.layer2.1.bn3.running_mean', 'sparse_encoder.sp_cnn.layer2.1.bn3.running_var', 'sparse_encoder.sp_cnn.layer2.2.conv1.weight', 'sparse_encoder.sp_cnn.layer2.2.bn1.weight', 'sparse_encoder.sp_cnn.layer2.2.bn1.bias', 'sparse_encoder.sp_cnn.layer2.2.bn1.running_mean', 'sparse_encoder.sp_cnn.layer2.2.bn1.running_var', 'sparse_encoder.sp_cnn.layer2.2.conv2.weight', 'sparse_encoder.sp_cnn.layer2.2.bn2.weight', 'sparse_encoder.sp_cnn.layer2.2.bn2.bias', 'sparse_encoder.sp_cnn.layer2.2.bn2.running_mean', 'sparse_encoder.sp_cnn.layer2.2.bn2.running_var', 'sparse_encoder.sp_cnn.layer2.2.conv3.weight', 'sparse_encoder.sp_cnn.layer2.2.bn3.weight', 'sparse_encoder.sp_cnn.layer2.2.bn3.bias', 'sparse_encoder.sp_cnn.layer2.2.bn3.running_mean', 'sparse_encoder.sp_cnn.layer2.2.bn3.running_var', 'sparse_encoder.sp_cnn.layer2.3.conv1.weight', 'sparse_encoder.sp_cnn.layer2.3.bn1.weight', 'sparse_encoder.sp_cnn.layer2.3.bn1.bias', 'sparse_encoder.sp_cnn.layer2.3.bn1.running_mean', 'sparse_encoder.sp_cnn.layer2.3.bn1.running_var', 'sparse_encoder.sp_cnn.layer2.3.conv2.weight', 'sparse_encoder.sp_cnn.layer2.3.bn2.weight', 'sparse_encoder.sp_cnn.layer2.3.bn2.bias', 'sparse_encoder.sp_cnn.layer2.3.bn2.running_mean', 'sparse_encoder.sp_cnn.layer2.3.bn2.running_var', 'sparse_encoder.sp_cnn.layer2.3.conv3.weight', 'sparse_encoder.sp_cnn.layer2.3.bn3.weight', 'sparse_encoder.sp_cnn.layer2.3.bn3.bias', 'sparse_encoder.sp_cnn.layer2.3.bn3.running_mean', 'sparse_encoder.sp_cnn.layer2.3.bn3.running_var', 'sparse_encoder.sp_cnn.layer3.0.conv1.weight', 'sparse_encoder.sp_cnn.layer3.0.bn1.weight', 'sparse_encoder.sp_cnn.layer3.0.bn1.bias', 'sparse_encoder.sp_cnn.layer3.0.bn1.running_mean', 'sparse_encoder.sp_cnn.layer3.0.bn1.running_var', 'sparse_encoder.sp_cnn.layer3.0.conv2.weight', 'sparse_encoder.sp_cnn.layer3.0.bn2.weight', 'sparse_encoder.sp_cnn.layer3.0.bn2.bias', 'sparse_encoder.sp_cnn.layer3.0.bn2.running_mean', 'sparse_encoder.sp_cnn.layer3.0.bn2.running_var', 'sparse_encoder.sp_cnn.layer3.0.conv3.weight', 'sparse_encoder.sp_cnn.layer3.0.bn3.weight', 'sparse_encoder.sp_cnn.layer3.0.bn3.bias', 'sparse_encoder.sp_cnn.layer3.0.bn3.running_mean', 'sparse_encoder.sp_cnn.layer3.0.bn3.running_var', 'sparse_encoder.sp_cnn.layer3.0.downsample.0.weight', 'sparse_encoder.sp_cnn.layer3.0.downsample.1.weight', 'sparse_encoder.sp_cnn.layer3.0.downsample.1.bias', 'sparse_encoder.sp_cnn.layer3.0.downsample.1.running_mean', 'sparse_encoder.sp_cnn.layer3.0.downsample.1.running_var', 'sparse_encoder.sp_cnn.layer3.1.conv1.weight', 'sparse_encoder.sp_cnn.layer3.1.bn1.weight', 'sparse_encoder.sp_cnn.layer3.1.bn1.bias', 'sparse_encoder.sp_cnn.layer3.1.bn1.running_mean', 'sparse_encoder.sp_cnn.layer3.1.bn1.running_var', 'sparse_encoder.sp_cnn.layer3.1.conv2.weight', 'sparse_encoder.sp_cnn.layer3.1.bn2.weight', 'sparse_encoder.sp_cnn.layer3.1.bn2.bias', 'sparse_encoder.sp_cnn.layer3.1.bn2.running_mean', 'sparse_encoder.sp_cnn.layer3.1.bn2.running_var', 'sparse_encoder.sp_cnn.layer3.1.conv3.weight', 'sparse_encoder.sp_cnn.layer3.1.bn3.weight', 'sparse_encoder.sp_cnn.layer3.1.bn3.bias', 'sparse_encoder.sp_cnn.layer3.1.bn3.running_mean', 'sparse_encoder.sp_cnn.layer3.1.bn3.running_var', 'sparse_encoder.sp_cnn.layer3.2.conv1.weight', 'sparse_encoder.sp_cnn.layer3.2.bn1.weight', 'sparse_encoder.sp_cnn.layer3.2.bn1.bias', 'sparse_encoder.sp_cnn.layer3.2.bn1.running_mean', 'sparse_encoder.sp_cnn.layer3.2.bn1.running_var', 'sparse_encoder.sp_cnn.layer3.2.conv2.weight', 'sparse_encoder.sp_cnn.layer3.2.bn2.weight', 'sparse_encoder.sp_cnn.layer3.2.bn2.bias', 'sparse_encoder.sp_cnn.layer3.2.bn2.running_mean', 'sparse_encoder.sp_cnn.layer3.2.bn2.running_var', 'sparse_encoder.sp_cnn.layer3.2.conv3.weight', 'sparse_encoder.sp_cnn.layer3.2.bn3.weight', 'sparse_encoder.sp_cnn.layer3.2.bn3.bias', 'sparse_encoder.sp_cnn.layer3.2.bn3.running_mean', 'sparse_encoder.sp_cnn.layer3.2.bn3.running_var', 'sparse_encoder.sp_cnn.layer3.3.conv1.weight', 'sparse_encoder.sp_cnn.layer3.3.bn1.weight', 'sparse_encoder.sp_cnn.layer3.3.bn1.bias', 'sparse_encoder.sp_cnn.layer3.3.bn1.running_mean', 'sparse_encoder.sp_cnn.layer3.3.bn1.running_var', 'sparse_encoder.sp_cnn.layer3.3.conv2.weight', 'sparse_encoder.sp_cnn.layer3.3.bn2.weight', 'sparse_encoder.sp_cnn.layer3.3.bn2.bias', 'sparse_encoder.sp_cnn.layer3.3.bn2.running_mean', 'sparse_encoder.sp_cnn.layer3.3.bn2.running_var', 'sparse_encoder.sp_cnn.layer3.3.conv3.weight', 'sparse_encoder.sp_cnn.layer3.3.bn3.weight', 'sparse_encoder.sp_cnn.layer3.3.bn3.bias', 'sparse_encoder.sp_cnn.layer3.3.bn3.running_mean', 'sparse_encoder.sp_cnn.layer3.3.bn3.running_var', 'sparse_encoder.sp_cnn.layer3.4.conv1.weight', 'sparse_encoder.sp_cnn.layer3.4.bn1.weight', 'sparse_encoder.sp_cnn.layer3.4.bn1.bias', 'sparse_encoder.sp_cnn.layer3.4.bn1.running_mean', 'sparse_encoder.sp_cnn.layer3.4.bn1.running_var', 'sparse_encoder.sp_cnn.layer3.4.conv2.weight', 'sparse_encoder.sp_cnn.layer3.4.bn2.weight', 'sparse_encoder.sp_cnn.layer3.4.bn2.bias', 'sparse_encoder.sp_cnn.layer3.4.bn2.running_mean', 'sparse_encoder.sp_cnn.layer3.4.bn2.running_var', 'sparse_encoder.sp_cnn.layer3.4.conv3.weight', 'sparse_encoder.sp_cnn.layer3.4.bn3.weight', 'sparse_encoder.sp_cnn.layer3.4.bn3.bias', 'sparse_encoder.sp_cnn.layer3.4.bn3.running_mean', 'sparse_encoder.sp_cnn.layer3.4.bn3.running_var', 'sparse_encoder.sp_cnn.layer3.5.conv1.weight', 'sparse_encoder.sp_cnn.layer3.5.bn1.weight', 'sparse_encoder.sp_cnn.layer3.5.bn1.bias', 'sparse_encoder.sp_cnn.layer3.5.bn1.running_mean', 'sparse_encoder.sp_cnn.layer3.5.bn1.running_var', 'sparse_encoder.sp_cnn.layer3.5.conv2.weight', 'sparse_encoder.sp_cnn.layer3.5.bn2.weight', 'sparse_encoder.sp_cnn.layer3.5.bn2.bias', 'sparse_encoder.sp_cnn.layer3.5.bn2.running_mean', 'sparse_encoder.sp_cnn.layer3.5.bn2.running_var', 'sparse_encoder.sp_cnn.layer3.5.conv3.weight', 'sparse_encoder.sp_cnn.layer3.5.bn3.weight', 'sparse_encoder.sp_cnn.layer3.5.bn3.bias', 'sparse_encoder.sp_cnn.layer3.5.bn3.running_mean', 'sparse_encoder.sp_cnn.layer3.5.bn3.running_var', 'sparse_encoder.sp_cnn.layer4.0.conv1.weight', 'sparse_encoder.sp_cnn.layer4.0.bn1.weight', 'sparse_encoder.sp_cnn.layer4.0.bn1.bias', 'sparse_encoder.sp_cnn.layer4.0.bn1.running_mean', 'sparse_encoder.sp_cnn.layer4.0.bn1.running_var', 'sparse_encoder.sp_cnn.layer4.0.conv2.weight', 'sparse_encoder.sp_cnn.layer4.0.bn2.weight', 'sparse_encoder.sp_cnn.layer4.0.bn2.bias', 'sparse_encoder.sp_cnn.layer4.0.bn2.running_mean', 'sparse_encoder.sp_cnn.layer4.0.bn2.running_var', 'sparse_encoder.sp_cnn.layer4.0.conv3.weight', 'sparse_encoder.sp_cnn.layer4.0.bn3.weight', 'sparse_encoder.sp_cnn.layer4.0.bn3.bias', 'sparse_encoder.sp_cnn.layer4.0.bn3.running_mean', 'sparse_encoder.sp_cnn.layer4.0.bn3.running_var', 'sparse_encoder.sp_cnn.layer4.0.downsample.0.weight', 'sparse_encoder.sp_cnn.layer4.0.downsample.1.weight', 'sparse_encoder.sp_cnn.layer4.0.downsample.1.bias', 'sparse_encoder.sp_cnn.layer4.0.downsample.1.running_mean', 'sparse_encoder.sp_cnn.layer4.0.downsample.1.running_var', 'sparse_encoder.sp_cnn.layer4.1.conv1.weight', 'sparse_encoder.sp_cnn.layer4.1.bn1.weight', 'sparse_encoder.sp_cnn.layer4.1.bn1.bias', 'sparse_encoder.sp_cnn.layer4.1.bn1.running_mean', 'sparse_encoder.sp_cnn.layer4.1.bn1.running_var', 'sparse_encoder.sp_cnn.layer4.1.conv2.weight', 'sparse_encoder.sp_cnn.layer4.1.bn2.weight', 'sparse_encoder.sp_cnn.layer4.1.bn2.bias', 'sparse_encoder.sp_cnn.layer4.1.bn2.running_mean', 'sparse_encoder.sp_cnn.layer4.1.bn2.running_var', 'sparse_encoder.sp_cnn.layer4.1.conv3.weight', 'sparse_encoder.sp_cnn.layer4.1.bn3.weight', 'sparse_encoder.sp_cnn.layer4.1.bn3.bias', 'sparse_encoder.sp_cnn.layer4.1.bn3.running_mean', 'sparse_encoder.sp_cnn.layer4.1.bn3.running_var', 'sparse_encoder.sp_cnn.layer4.2.conv1.weight', 'sparse_encoder.sp_cnn.layer4.2.bn1.weight', 'sparse_encoder.sp_cnn.layer4.2.bn1.bias', 'sparse_encoder.sp_cnn.layer4.2.bn1.running_mean', 'sparse_encoder.sp_cnn.layer4.2.bn1.running_var', 'sparse_encoder.sp_cnn.layer4.2.conv2.weight', 'sparse_encoder.sp_cnn.layer4.2.bn2.weight', 'sparse_encoder.sp_cnn.layer4.2.bn2.bias', 'sparse_encoder.sp_cnn.layer4.2.bn2.running_mean', 'sparse_encoder.sp_cnn.layer4.2.bn2.running_var', 'sparse_encoder.sp_cnn.layer4.2.conv3.weight', 'sparse_encoder.sp_cnn.layer4.2.bn3.weight', 'sparse_encoder.sp_cnn.layer4.2.bn3.bias', 'sparse_encoder.sp_cnn.layer4.2.bn3.running_mean', 'sparse_encoder.sp_cnn.layer4.2.bn3.running_var', 'dense_decoder.dec.0.up_sample.weight', 'dense_decoder.dec.0.up_sample.bias', 'dense_decoder.dec.0.conv.0.weight', 'dense_decoder.dec.0.conv.1.weight', 'dense_decoder.dec.0.conv.1.bias', 'dense_decoder.dec.0.conv.1.running_mean', 'dense_decoder.dec.0.conv.1.running_var', 'dense_decoder.dec.0.conv.3.weight', 'dense_decoder.dec.0.conv.4.weight', 'dense_decoder.dec.0.conv.4.bias', 'dense_decoder.dec.0.conv.4.running_mean', 'dense_decoder.dec.0.conv.4.running_var', 'dense_decoder.dec.1.up_sample.weight', 'dense_decoder.dec.1.up_sample.bias', 'dense_decoder.dec.1.conv.0.weight', 'dense_decoder.dec.1.conv.1.weight', 'dense_decoder.dec.1.conv.1.bias', 'dense_decoder.dec.1.conv.1.running_mean', 'dense_decoder.dec.1.conv.1.running_var', 'dense_decoder.dec.1.conv.3.weight', 'dense_decoder.dec.1.conv.4.weight', 'dense_decoder.dec.1.conv.4.bias', 'dense_decoder.dec.1.conv.4.running_mean', 'dense_decoder.dec.1.conv.4.running_var', 'dense_decoder.dec.2.up_sample.weight', 'dense_decoder.dec.2.up_sample.bias', 'dense_decoder.dec.2.conv.0.weight', 'dense_decoder.dec.2.conv.1.weight', 'dense_decoder.dec.2.conv.1.bias', 'dense_decoder.dec.2.conv.1.running_mean', 'dense_decoder.dec.2.conv.1.running_var', 'dense_decoder.dec.2.conv.3.weight', 'dense_decoder.dec.2.conv.4.weight', 'dense_decoder.dec.2.conv.4.bias', 'dense_decoder.dec.2.conv.4.running_mean', 'dense_decoder.dec.2.conv.4.running_var', 'dense_decoder.dec.3.up_sample.weight', 'dense_decoder.dec.3.up_sample.bias', 'dense_decoder.dec.3.conv.0.weight', 'dense_decoder.dec.3.conv.1.weight', 'dense_decoder.dec.3.conv.1.bias', 'dense_decoder.dec.3.conv.1.running_mean', 'dense_decoder.dec.3.conv.1.running_var', 'dense_decoder.dec.3.conv.3.weight', 'dense_decoder.dec.3.conv.4.weight', 'dense_decoder.dec.3.conv.4.bias', 'dense_decoder.dec.3.conv.4.running_mean', 'dense_decoder.dec.3.conv.4.running_var', 'dense_decoder.dec.4.up_sample.weight', 'dense_decoder.dec.4.up_sample.bias', 'dense_decoder.dec.4.conv.0.weight', 'dense_decoder.dec.4.conv.1.weight', 'dense_decoder.dec.4.conv.1.bias', 'dense_decoder.dec.4.conv.1.running_mean', 'dense_decoder.dec.4.conv.1.running_var', 'dense_decoder.dec.4.conv.3.weight', 'dense_decoder.dec.4.conv.4.weight', 'dense_decoder.dec.4.conv.4.bias', 'dense_decoder.dec.4.conv.4.running_mean', 'dense_decoder.dec.4.conv.4.running_var', 'dense_decoder.proj.weight', 'dense_decoder.proj.bias', 'densify_norms.0.weight', 'densify_norms.0.bias', 'densify_norms.1.weight', 'densify_norms.1.bias', 'densify_norms.2.weight', 'densify_norms.2.bias', 'densify_norms.3.weight', 'densify_norms.3.bias', 'densify_projs.0.weight', 'densify_projs.0.bias', 'densify_projs.1.weight', 'densify_projs.1.bias', 'densify_projs.2.weight', 'densify_projs.2.bias', 'densify_projs.3.weight', 'densify_projs.3.bias', 'mask_tokens.0', 'mask_tokens.1', 'mask_tokens.2', 'mask_tokens.3']