I currently have my own code for generating batches of points and labels as below:
def load_lasfile(filename):
inFile = laspy.file.File(filename)
data = np.vstack([inFile.x, inFile.y, inFile.z, inFile.classification]).transpose()
points = data.astype(np.float32)
# labels_seg = data.loc[:, 'class'].to_numpy().astype(np.int64)
return points # labels_seg
def minMax(x):
return pd.Series(index=['min', 'max'], data=[x.min(), x.max()])
class Dataset(IterableDataset):
"""
"""
def __init__(self, file_in, path='../Data/DALES', batch_size=32, npoints=2048):
super(Dataset).__init__()
self.file_list = [os.path.join(path, line.strip()) for line in open(os.path.join(path, file_in))]
self.batch_size = batch_size
self.npoints = npoints
self.index = 0
self.first_batch = True
self.sq_size = 0
self.label_to_names = {0: 'unknown',
1: 'buildings',
2: 'cars',
3: 'trucks',
4: 'poles',
5: 'power lines',
6: 'fences',
7: 'ground',
8: 'chair',
9: 'vegetation'}
self.init_labels()
self.ignored_labels = np.array([0])
self.dataset_task = 'segmentation'
def next_batch(self):
# __file = self.file_list[self.index]
for each_file in self.file_list:
# Find sq_size which gives value of x and y coordinates skip considering uniformity in data files
batch_accumulation = 0
data_ = load_lasfile(each_file)
print("-----------{}-----------".format(each_file))
if self.first_batch:
data_min = data_.min(axis=0)
data_max = data_.max(axis=0)
x_range = data_max[0] - data_min[0]
y_range = data_max[1] - data_min[1]
x_unq = np.sort(np.unique(data_[:, 0]))
y_unq = np.sort(np.unique(data_[:, 1]))
size = 0
while size < self.npoints and self.first_batch:
self.sq_size += 1
pos_x = np.logical_and(x_unq[self.sq_size] > data_[:, 0],
data_[:, 0] >= x_unq[0])
pos_y = np.logical_and(y_unq[self.sq_size] > data_[:, 1],
data_[:, 1] >= y_unq[0])
pos = np.logical_and(pos_x, pos_y)
size = len(data_[pos])
self.sq_size -= 1 # to include not more than self.npoints
self.first_batch = False
self.y_ll = 0
while (self.y_ll + self.sq_size) < len(y_unq):
self.y_ul = self.y_ll + self.sq_size
self.x_ll = 0
while (self.x_ll + self.sq_size) < len(x_unq):
self.x_ul = self.x_ll + self.sq_size
pos_x = np.logical_and(x_unq[self.x_ul] > data_[:, 0],
data_[:, 0] >= x_unq[self.x_ll])
pos_y = np.logical_and(y_unq[self.y_ul] > data_[:, 1],
data_[:, 1] >= y_unq[self.y_ll])
pos = np.logical_and(pos_x, pos_y)
self.x_ll += self.sq_size
data = data_[pos]
data = np.resize(data, (self.npoints, data_.shape[1]))
if labels_seperate:
if batch_accumulation < self.batch_size:
if batch_accumulation == 0:
data_batch = np.reshape(data[:, :-1], (1, data.shape[0], data.shape[1] - 1))
labels_batch = np.reshape(data[:, -1], (1, -1))
else:
data_batch = np.vstack(
(data_batch, data[:, :-1].reshape((1, data.shape[0], data.shape[1] - 1)))
)
labels_batch = np.vstack(
(labels_batch, data[:, -1].reshape((1, -1)))
)
batch_accumulation += 1
else:
batch_accumulation = 0
yield data_batch, labels_batch
else:
if batch_accumulation < self.batch_size:
if batch_accumulation == 0:
data_batch = np.reshape(data, (1, data.shape[0], data.shape[1]))
else:
data_batch = np.vstack(
(data_batch, data.reshape((1, data.shape[0], data.shape[1])))
)
batch_accumulation += 1
else:
batch_accumulation = 0
yield data_batch
self.y_ll += self.sq_size
self.y_ul = -1
self.x_ul = -1
pos_x = np.logical_and(x_unq[self.x_ul] > data_[:, 0],
data_[:, 0] >= x_unq[self.x_ll])
pos_y = np.logical_and(y_unq[self.y_ul] > data_[:, 1],
data_[:, 1] >= y_unq[self.y_ll])
pos = np.logical_and(pos_x, pos_y)
data = data_[pos]
data = np.resize(data, (self.npoints, data_.shape[1]))
if labels_seperate:
if batch_accumulation < self.batch_size:
if batch_accumulation == 0:
data_batch = np.reshape(data[:, :-1], (1, data.shape[0], data.shape[1] - 1))
labels_batch = np.reshape(data[:, -1], (1, -1))
else:
data_batch = np.vstack(
(data_batch, data[:, :-1].reshape((1, data.shape[0], data.shape[1] - 1)))
)
labels_batch = np.vstack(
(labels_batch, data[:, -1].reshape((1, -1)))
)
yield data_batch, labels_batch
else:
if batch_accumulation < self.batch_size:
if batch_accumulation == 0:
data_batch = np.reshape(data, (1, data.shape[0], data.shape[1]))
else:
data_batch = np.vstack(
(data_batch, data.reshape((1, data.shape[0], data.shape[1])))
)
yield data_batch
def init_labels(self):
# Initialize all label parameters given the label_to_names dict
self.num_classes = len(self.label_to_names)
self.label_values = np.sort([k for k, v in self.label_to_names.items()])
self.label_names = [self.label_to_names[k] for k in self.label_values]
self.label_to_idx = {l: i for i, l in enumerate(self.label_values)}
self.name_to_label = {v: k for k, v in self.label_to_names.items()}
def __iter__(self):
return self.next_batch()
def __len__(self):
return 2000
class Dataset was imported as DalesDataset and used in train file as below.
training_dataset = DalesDataset(config.train_filelist, path='../Data/DALES/train', batch_size=1, npoints=2048)
test_dataset = DalesDataset(config.test_filelist, path='../Data/DALES/test', batch_size=1, npoints=2048)
training_loader = DataLoader(training_dataset,
batch_size=1,
num_workers=0,
pin_memory=True)
test_loader = DataLoader(test_dataset,
batch_size=None,
num_workers=0,
pin_memory=True)
but I run into many errors. How should the data look to feed into your model?
I did not understand the length for the generator. so I added a random 2000 for checking how it runs.
But I find it hard to integrate this into your code. Is it possible to use this code or should I make changes?
I just need an idea or a link from where I can make it proper.