Hi, I was trying to train the model on the Logo Dataset that you had provided, without resizing the Images.
Instead of Hard resize to 256x256 which distorts the aspect ratio, I decided to use random cropping however after a few epochs I'd run into CUDA error
Here is the error:
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [0,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [1,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [2,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [3,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [4,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [5,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [6,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [7,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [8,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [9,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [10,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [11,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [12,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [13,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [14,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [15,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [16,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [17,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [18,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [19,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [20,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [21,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [22,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [23,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [24,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [25,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [26,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [27,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [28,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [29,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [30,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [228,0,0], thread: [31,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [32,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [33,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [34,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [35,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [36,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [37,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [38,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [39,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [40,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [41,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [42,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [43,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [44,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [45,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [46,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [47,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [48,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [49,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [50,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [51,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [52,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [53,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [54,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [55,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [56,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [57,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [58,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [59,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [60,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [61,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [62,0,0] Assertion input_val >= zero && input_val <= one
failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:102: operator(): block: [197,0,0], thread: [63,0,0] Assertion input_val >= zero && input_val <= one
failed.
Traceback (most recent call last):
File "/home/qblocks/shashank/Development/Oct_21/Watermark_removal/deep-blind-watermark-removal_patch/main.py", line 71, in
main(args)
File "/home/qblocks/shashank/Development/Oct_21/Watermark_removal/deep-blind-watermark-removal_patch/main.py", line 41, in main
Machine.train(epoch)
File "/home/qblocks/shashank/Development/Oct_21/Watermark_removal/deep-blind-watermark-removal_patch/scripts/machines/VX.py", line 129, in train
l2_loss,att_loss,wm_loss,style_loss,ssim_loss = self.loss(outputs[0],self.norm(target),outputs[1],mask,outputs[2],self.norm(wm))
File "/home/qblocks/shashank/Development/Oct_21/Watermark_removal/wm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/qblocks/shashank/Development/Oct_21/Watermark_removal/deep-blind-watermark-removal_patch/scripts/machines/VX.py", line 85, in forward
att_loss = self.attLoss(pred_ms, mask)
File "/home/qblocks/shashank/Development/Oct_21/Watermark_removal/wm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/qblocks/shashank/Development/Oct_21/Watermark_removal/wm/lib/python3.8/site-packages/torch/nn/modules/loss.py", line 530, in forward
return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
File "/home/qblocks/shashank/Development/Oct_21/Watermark_removal/wm/lib/python3.8/site-packages/torch/nn/functional.py", line 2525, in binary_cross_entropy
return torch._C._nn.binary_cross_entropy(
RuntimeError: CUDA error: device-side assert triggered
Initially I thought that this might be due to errors in the input mask as it expects the values to be b/w 0 and 1. However, upon printing the values of Mask and pred_ms (prediction), it was found that the model prediction tensor was NaN.
mask tensor([[[[1., 1., 1., ..., 0., 0., 0.],
[1., 1., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[1., 1., 1., ..., 0., 0., 0.],
[1., 1., 1., ..., 0., 0., 0.],
[1., 1., 1., ..., 0., 0., 0.]]],
[[[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]],
[[[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]],
[[[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[1., 0., 0., ..., 1., 1., 1.],
[1., 1., 0., ..., 1., 1., 0.],
[1., 1., 1., ..., 1., 0., 0.]]]], device='cuda:0')
pred_mask tensor([[[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]]],
[[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]]],
[[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]]],
[[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]]]], device='cuda:0',
grad_fn=<SigmoidBackward>)
Following is the code that I am using to Crop the patch:
from __future__ import print_function, absolute_import
import os
import csv
import numpy as np
import json
import random
import math
import matplotlib.pyplot as plt
from collections import namedtuple
from os import listdir
from os.path import isfile, join
import torch
# torch.manual_seed(17)
import torch.utils.data as data
from scripts.utils.osutils import *
from scripts.utils.imutils import *
from scripts.utils.transforms import *
import torchvision.transforms as transforms
from PIL import Image
from PIL import ImageEnhance
from PIL import ImageFilter
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import glob
class COCO(data.Dataset):
def __init__(self,train,config=None, sample=[],gan_norm=False):
self.train = []
self.anno = []
self.mask = []
self.wm = []
self.input_size = config.input_size
self.normalized_input = config.normalized_input
self.base_folder = config.base_dir
self.dataset = train+config.data
if config == None:
self.data_augumentation = False
else:
self.data_augumentation = config.data_augumentation
self.istrain = False if self.dataset.find('train') == -1 else True
self.sample = sample
self.gan_norm = gan_norm
file_paths2 = sorted(glob.glob(join(self.base_folder,'wm_DIV2K','full_*',self.dataset,'image/*')))
for fl2 in file_paths2:
file_name2 = fl2.split('/')[-1]
self.train.append(fl2)
self.mask.append(fl2.replace('/image/','/mask/'))
self.wm.append(fl2.replace('/image/','/wm/'))
self.anno.append(os.path.join(self.base_folder,'wm_DIV2K','natural',self.dataset,file_name2.split('-')[0]+'.'+file_name2.split('.')[-1]))
if len(self.sample) > 0:
self.train = [ self.train[i] for i in self.sample ]
self.mask = [ self.mask[i] for i in self.sample ]
self.anno = [ self.anno[i] for i in self.sample ]
self.trans = transforms.Compose([
transforms.ToTensor(),
])
print('total Dataset of '+self.dataset+' is : ', len(self.train))
def __getitem__(self, index):
img = Image.open(self.train[index]).convert('RGB')
mask = Image.open(self.mask[index]).convert('L')
anno = Image.open(self.anno[index]).convert('RGB')
wm = Image.open(self.wm[index]).convert('RGB')
W, H = img.size
if W < self.input_size or H < self.input_size:
img = img.resize((self.input_size, self.input_size))
mask = mask.resize((self.input_size, self.input_size))
anno = anno.resize((self.input_size, self.input_size))
wm = wm.resize((self.input_size, self.input_size))
i, j, h, w = transforms.RandomCrop.get_params(img, output_size=(self.input_size, self.input_size))
img = transforms.functional.crop(img,i,j,h,w)
mask = transforms.functional.crop(mask,i,j,h,w)
anno = transforms.functional.crop(anno,i,j,h,w)
wm = transforms.functional.crop(wm,i,j,h,w)
img = self.trans(img)
anno = self.trans(anno)
mask = self.trans(mask)
wm = self.trans(wm)
return {"image": img,
"target": anno,
"mask": mask,
"wm": wm,
"name": self.train[index].split('/')[-1],
"imgurl":self.train[index],
"maskurl":self.mask[index],
"targeturl":self.anno[index],
"wmurl":self.wm[index]
}
def __len__(self):
return len(self.train)
Any Help would be appreciated!!