I get the error below. Could please let me know what am I doing wrong?
Accelerator(
amp : bool = False
)
train(
seed : int = 0
save_path : str = runs/baseline/
num_iters : int = 250000
save_iters : list = [10000, 50000, 100000, 200000]
sample_freq : int = 10000
valid_freq : int = 1000
batch_size : int = 12
val_batch_size : int = 12
num_workers : int = 4
val_idx : list = [0, 1, 2, 3, 4, 5, 6, 7]
lambdas : dict = {'mel/loss': 15.0, 'adv/feat_loss': 2.0, 'adv/gen_loss': 1.0, 'vq/commitment_loss': 0.25, 'vq/codebook_loss': 1.0}
)
load(
resume : bool = False
tag : str = latest
load_weights : bool = False
)
DAC(
encoder_dim : int = 64
encoder_rates : list = [2, 4, 8, 8]
decoder_dim : int = 1536
decoder_rates : list = [8, 8, 4, 2]
n_codebooks : int = 9
codebook_size : int = 1024
codebook_dim : int = 8
quantizer_dropout : float = 1.0
sample_rate : int = 44100
)
Discriminator(
rates : list = []
periods : list = [2, 3, 5, 7, 11]
fft_sizes : list = [2048, 1024, 512]
sample_rate : int = 44100
bands : list = [[0.0, 0.1], [0.1, 0.25], [0.25, 0.5], [0.5, 0.75], [0.75, 1.0]]
)
AdamW(
# scope = generator
lr : float = 0.0001
betas : list = [0.8, 0.99]
eps : float = 1e-08
weight_decay : float = 0.01
amsgrad : bool = False
maximize : bool = False
capturable : bool = False
differentiable : bool = False
)
ExponentialLR(
# scope = generator
gamma : float = 0.999996
)
AdamW(
# scope = discriminator
lr : float = 0.0001
betas : list = [0.8, 0.99]
eps : float = 1e-08
weight_decay : float = 0.01
amsgrad : bool = False
maximize : bool = False
capturable : bool = False
differentiable : bool = False
)
ExponentialLR(
# scope = discriminator
gamma : float = 0.999996
)
build_dataset(
# scope = train
folders : dict = {'speech_fb': ['/data/testset44p1/'], 'speech_hq': ['/data/testset44p1/'], 'speech_uq': ['/data/testset44p1/'], 'music_hq': ['/data/testset44p1/'], 'music_uq': ['/data/testset44p1/'], 'general': ['/data/testset44p1/']}
)
AudioLoader(
# scope = train
sources : list = ['/data/testset44p1/']
weights : NoneType = None
relative_path : str =
ext : list = ['.wav', '.flac', '.mp3', '.mp4']
shuffle : bool = True
shuffle_state : int = 0
)
build_transform(
# scope = train
augment_prob : float = 0.0
preprocess : list = ['Identity']
augment : list = ['Identity']
postprocess : list = ['VolumeNorm', 'RescaleAudio', 'ShiftPhase']
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : float = 1.0
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : float = 1.0
)
VolumeNorm(
# scope = train
db : list = ['const', -16]
name : NoneType = None
prob : float = 1.0
)
RescaleAudio(
# scope = train
val : float = 1.0
name : NoneType = None
prob : int = 1
)
ShiftPhase(
# scope = train
shift : tuple = ('uniform', -3.141592653589793, 3.141592653589793)
name : NoneType = None
prob : int = 1
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : int = 1
)
AudioDataset(
# scope = train
n_examples : int = 10000000
duration : float = 0.38
offset : NoneType = None
loudness_cutoff : int = -40
num_channels : int = 1
transform : Compose = <audiotools.data.transforms.Compose object at 0x7f20bc0cc520>
aligned : bool = False
shuffle_loaders : bool = False
without_replacement : bool = True
)
AudioLoader(
# scope = train
sources : list = ['/data/testset44p1/']
weights : NoneType = None
relative_path : str =
ext : list = ['.wav', '.flac', '.mp3', '.mp4']
shuffle : bool = True
shuffle_state : int = 0
)
build_transform(
# scope = train
augment_prob : float = 0.0
preprocess : list = ['Identity']
augment : list = ['Identity']
postprocess : list = ['VolumeNorm', 'RescaleAudio', 'ShiftPhase']
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : float = 1.0
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : float = 1.0
)
VolumeNorm(
# scope = train
db : list = ['const', -16]
name : NoneType = None
prob : float = 1.0
)
RescaleAudio(
# scope = train
val : float = 1.0
name : NoneType = None
prob : int = 1
)
ShiftPhase(
# scope = train
shift : tuple = ('uniform', -3.141592653589793, 3.141592653589793)
name : NoneType = None
prob : int = 1
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : int = 1
)
AudioDataset(
# scope = train
n_examples : int = 10000000
duration : float = 0.38
offset : NoneType = None
loudness_cutoff : int = -40
num_channels : int = 1
transform : Compose = <audiotools.data.transforms.Compose object at 0x7f20bc0cc970>
aligned : bool = False
shuffle_loaders : bool = False
without_replacement : bool = True
)
AudioLoader(
# scope = train
sources : list = ['/data/testset44p1/']
weights : NoneType = None
relative_path : str =
ext : list = ['.wav', '.flac', '.mp3', '.mp4']
shuffle : bool = True
shuffle_state : int = 0
)
build_transform(
# scope = train
augment_prob : float = 0.0
preprocess : list = ['Identity']
augment : list = ['Identity']
postprocess : list = ['VolumeNorm', 'RescaleAudio', 'ShiftPhase']
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : float = 1.0
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : float = 1.0
)
VolumeNorm(
# scope = train
db : list = ['const', -16]
name : NoneType = None
prob : float = 1.0
)
RescaleAudio(
# scope = train
val : float = 1.0
name : NoneType = None
prob : int = 1
)
ShiftPhase(
# scope = train
shift : tuple = ('uniform', -3.141592653589793, 3.141592653589793)
name : NoneType = None
prob : int = 1
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : int = 1
)
AudioDataset(
# scope = train
n_examples : int = 10000000
duration : float = 0.38
offset : NoneType = None
loudness_cutoff : int = -40
num_channels : int = 1
transform : Compose = <audiotools.data.transforms.Compose object at 0x7f20bc0ccdc0>
aligned : bool = False
shuffle_loaders : bool = False
without_replacement : bool = True
)
AudioLoader(
# scope = train
sources : list = ['/data/testset44p1/']
weights : NoneType = None
relative_path : str =
ext : list = ['.wav', '.flac', '.mp3', '.mp4']
shuffle : bool = True
shuffle_state : int = 0
)
build_transform(
# scope = train
augment_prob : float = 0.0
preprocess : list = ['Identity']
augment : list = ['Identity']
postprocess : list = ['VolumeNorm', 'RescaleAudio', 'ShiftPhase']
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : float = 1.0
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : float = 1.0
)
VolumeNorm(
# scope = train
db : list = ['const', -16]
name : NoneType = None
prob : float = 1.0
)
RescaleAudio(
# scope = train
val : float = 1.0
name : NoneType = None
prob : int = 1
)
ShiftPhase(
# scope = train
shift : tuple = ('uniform', -3.141592653589793, 3.141592653589793)
name : NoneType = None
prob : int = 1
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : int = 1
)
AudioDataset(
# scope = train
n_examples : int = 10000000
duration : float = 0.38
offset : NoneType = None
loudness_cutoff : int = -40
num_channels : int = 1
transform : Compose = <audiotools.data.transforms.Compose object at 0x7f20bc0cd210>
aligned : bool = False
shuffle_loaders : bool = False
without_replacement : bool = True
)
AudioLoader(
# scope = train
sources : list = ['/data/testset44p1/']
weights : NoneType = None
relative_path : str =
ext : list = ['.wav', '.flac', '.mp3', '.mp4']
shuffle : bool = True
shuffle_state : int = 0
)
build_transform(
# scope = train
augment_prob : float = 0.0
preprocess : list = ['Identity']
augment : list = ['Identity']
postprocess : list = ['VolumeNorm', 'RescaleAudio', 'ShiftPhase']
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : float = 1.0
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : float = 1.0
)
VolumeNorm(
# scope = train
db : list = ['const', -16]
name : NoneType = None
prob : float = 1.0
)
RescaleAudio(
# scope = train
val : float = 1.0
name : NoneType = None
prob : int = 1
)
ShiftPhase(
# scope = train
shift : tuple = ('uniform', -3.141592653589793, 3.141592653589793)
name : NoneType = None
prob : int = 1
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : int = 1
)
AudioDataset(
# scope = train
n_examples : int = 10000000
duration : float = 0.38
offset : NoneType = None
loudness_cutoff : int = -40
num_channels : int = 1
transform : Compose = <audiotools.data.transforms.Compose object at 0x7f20bc0cd660>
aligned : bool = False
shuffle_loaders : bool = False
without_replacement : bool = True
)
AudioLoader(
# scope = train
sources : list = ['/data/testset44p1/']
weights : NoneType = None
relative_path : str =
ext : list = ['.wav', '.flac', '.mp3', '.mp4']
shuffle : bool = True
shuffle_state : int = 0
)
build_transform(
# scope = train
augment_prob : float = 0.0
preprocess : list = ['Identity']
augment : list = ['Identity']
postprocess : list = ['VolumeNorm', 'RescaleAudio', 'ShiftPhase']
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : float = 1.0
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : float = 1.0
)
VolumeNorm(
# scope = train
db : list = ['const', -16]
name : NoneType = None
prob : float = 1.0
)
RescaleAudio(
# scope = train
val : float = 1.0
name : NoneType = None
prob : int = 1
)
ShiftPhase(
# scope = train
shift : tuple = ('uniform', -3.141592653589793, 3.141592653589793)
name : NoneType = None
prob : int = 1
)
BaseTransform(
# scope = train
keys : list = []
name : NoneType = None
prob : int = 1
)
AudioDataset(
# scope = train
n_examples : int = 10000000
duration : float = 0.38
offset : NoneType = None
loudness_cutoff : int = -40
num_channels : int = 1
transform : Compose = <audiotools.data.transforms.Compose object at 0x7f20bc0cdab0>
aligned : bool = False
shuffle_loaders : bool = False
without_replacement : bool = True
)
build_dataset(
# scope = val
folders : dict = {'speech_hq': ['/data/testset44p1/'], 'music_hq': ['/data/testset44p1/'], 'general': ['/data/testset44p1/']}
)
AudioLoader(
# scope = val
sources : list = ['/data/testset44p1/']
weights : NoneType = None
relative_path : str =
ext : list = ['.wav', '.flac', '.mp3', '.mp4']
shuffle : bool = True
shuffle_state : int = 0
)
build_transform(
# scope = val
augment_prob : float = 1.0
preprocess : list = ['Identity']
augment : list = ['Identity']
postprocess : list = ['VolumeNorm', 'RescaleAudio', 'ShiftPhase']
)
BaseTransform(
# scope = val
keys : list = []
name : NoneType = None
prob : float = 1.0
)
BaseTransform(
# scope = val
keys : list = []
name : NoneType = None
prob : float = 1.0
)
VolumeNorm(
# scope = val
db : list = ['const', -16]
name : NoneType = None
prob : float = 1.0
)
RescaleAudio(
# scope = val
val : float = 1.0
name : NoneType = None
prob : int = 1
)
ShiftPhase(
# scope = val
shift : tuple = ('uniform', -3.141592653589793, 3.141592653589793)
name : NoneType = None
prob : int = 1
)
BaseTransform(
# scope = val
keys : list = []
name : NoneType = None
prob : int = 1
)
AudioDataset(
# scope = val
n_examples : int = 250
duration : float = 5.0
offset : NoneType = None
loudness_cutoff : int = -40
num_channels : int = 1
transform : Compose = <audiotools.data.transforms.Compose object at 0x7f20bc0cdf60>
aligned : bool = False
shuffle_loaders : bool = False
without_replacement : bool = True
)
AudioLoader(
# scope = val
sources : list = ['/data/testset44p1/']
weights : NoneType = None
relative_path : str =
ext : list = ['.wav', '.flac', '.mp3', '.mp4']
shuffle : bool = True
shuffle_state : int = 0
)
build_transform(
# scope = val
augment_prob : float = 1.0
preprocess : list = ['Identity']
augment : list = ['Identity']
postprocess : list = ['VolumeNorm', 'RescaleAudio', 'ShiftPhase']
)
BaseTransform(
# scope = val
keys : list = []
name : NoneType = None
prob : float = 1.0
)
BaseTransform(
# scope = val
keys : list = []
name : NoneType = None
prob : float = 1.0
)
VolumeNorm(
# scope = val
db : list = ['const', -16]
name : NoneType = None
prob : float = 1.0
)
RescaleAudio(
# scope = val
val : float = 1.0
name : NoneType = None
prob : int = 1
)
ShiftPhase(
# scope = val
shift : tuple = ('uniform', -3.141592653589793, 3.141592653589793)
name : NoneType = None
prob : int = 1
)
BaseTransform(
# scope = val
keys : list = []
name : NoneType = None
prob : int = 1
)
AudioDataset(
# scope = val
n_examples : int = 250
duration : float = 5.0
offset : NoneType = None
loudness_cutoff : int = -40
num_channels : int = 1
transform : Compose = <audiotools.data.transforms.Compose object at 0x7f20bc0ce3b0>
aligned : bool = False
shuffle_loaders : bool = False
without_replacement : bool = True
)
AudioLoader(
# scope = val
sources : list = ['/data/testset44p1/']
weights : NoneType = None
relative_path : str =
ext : list = ['.wav', '.flac', '.mp3', '.mp4']
shuffle : bool = True
shuffle_state : int = 0
)
build_transform(
# scope = val
augment_prob : float = 1.0
preprocess : list = ['Identity']
augment : list = ['Identity']
postprocess : list = ['VolumeNorm', 'RescaleAudio', 'ShiftPhase']
)
BaseTransform(
# scope = val
keys : list = []
name : NoneType = None
prob : float = 1.0
)
BaseTransform(
# scope = val
keys : list = []
name : NoneType = None
prob : float = 1.0
)
VolumeNorm(
# scope = val
db : list = ['const', -16]
name : NoneType = None
prob : float = 1.0
)
RescaleAudio(
# scope = val
val : float = 1.0
name : NoneType = None
prob : int = 1
)
ShiftPhase(
# scope = val
shift : tuple = ('uniform', -3.141592653589793, 3.141592653589793)
name : NoneType = None
prob : int = 1
)
BaseTransform(
# scope = val
keys : list = []
name : NoneType = None
prob : int = 1
)
AudioDataset(
# scope = val
n_examples : int = 250
duration : float = 5.0
offset : NoneType = None
loudness_cutoff : int = -40
num_channels : int = 1
transform : Compose = <audiotools.data.transforms.Compose object at 0x7f20bc0ce800>
aligned : bool = False
shuffle_loaders : bool = False
without_replacement : bool = True
)
L1Loss(
attribute : str = audio_data
weight : float = 1.0
)
MultiScaleSTFTLoss(
window_lengths : list = [2048, 512]
clamp_eps : float = 1e-05
mag_weight : float = 1.0
log_weight : float = 1.0
pow : float = 2.0
weight : float = 1.0
match_stride : bool = False
window_type : NoneType = None
)
MelSpectrogramLoss(
n_mels : list = [5, 10, 20, 40, 80, 160, 320]
window_lengths : list = [32, 64, 128, 256, 512, 1024, 2048]
clamp_eps : float = 1e-05
mag_weight : float = 0.0
log_weight : float = 1.0
pow : float = 1.0
weight : float = 1.0
match_stride : bool = False
mel_fmin : list = [0, 0, 0, 0, 0, 0, 0]
mel_fmax : list = [None, None, None, None, None, None, None]
window_type : NoneType = None
)
GANLoss(
)
Traceback (most recent call last):
File "/home/user/descript-audio-codec/scripts/train.py", line 436, in <module>
train(args, accel)
File "/home/user/anaconda3/envs/dac/lib/python3.10/site-packages/argbind/argbind.py", line 159, in cmd_func
return func(*cmd_args, **kwargs)
File "/home/user/descript-audio-codec/scripts/train.py", line 410, in train
for tracker.step, batch in enumerate(train_dataloader, start=tracker.step):
File "/home/user/descript-audio-codec/scripts/train.py", line 63, in get_infinite_loader
for batch in dataloader:
File "/home/user/anaconda3/envs/dac/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 633, in __next__
data = self._next_data()
File "/home/user/anaconda3/envs/dac/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1345, in _next_data
return self._process_data(data)
File "/home/user/anaconda3/envs/dac/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1371, in _process_data
data.reraise()
File "/home/user/anaconda3/envs/dac/lib/python3.10/site-packages/torch/_utils.py", line 644, in reraise
raise exception
ZeroDivisionError: Caught ZeroDivisionError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/user/anaconda3/envs/dac/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
data = fetcher.fetch(index)
File "/home/user/anaconda3/envs/dac/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/user/anaconda3/envs/dac/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/user/anaconda3/envs/dac/lib/python3.10/site-packages/audiotools/data/datasets.py", line 487, in __getitem__
return dataset[idx // len(self.datasets)]
File "/home/user/anaconda3/envs/dac/lib/python3.10/site-packages/audiotools/data/datasets.py", line 419, in __getitem__
item[keys[0]] = loader(**loader_kwargs)
File "/home/user/anaconda3/envs/dac/lib/python3.10/site-packages/audiotools/data/datasets.py", line 90, in __call__
global_idx % len(self.audio_indices)
ZeroDivisionError: integer division or modulo by zero