How can change to see voice conversion? Only see "gt" and "generated" but where is

a simple example (not tested yet): <div class="highlight highlight-source-python n

In Tensorboard, validation no see audio of source and target speakers about freevc HOT 3 CLOSED

olawod commented on June 25, 2024

In Tensorboard, validation no see audio of source and target speakers

from freevc.

Comments (3)

OlaWod commented on June 25, 2024

I did not log voice conversion in validation.
If you want that, just modify the evaluate function (from line 221) in train.py.

from freevc.

OlaWod commented on June 25, 2024

a simple example (not tested yet):

def evaluate(hps, generator, eval_loader, writer_eval):
    generator.eval()
    with torch.no_grad():
      for batch_idx, items in enumerate(eval_loader):
        if hps.model.use_spk:
          c, spec, y, spk = items
          g = spk[:1].cuda(0)
        else:
          c, spec, y = items
          g = None
        y_source = y[:-1] # modified
        spec, y = spec[:1].cuda(0), y[:1].cuda(0)
        c = c[:-1].cuda(0) # modified
        break
      mel = spec_to_mel_torch(
        spec, 
        hps.data.filter_length, 
        hps.data.n_mel_channels, 
        hps.data.sampling_rate,
        hps.data.mel_fmin, 
        hps.data.mel_fmax)
      y_hat = generator.module.infer(c, g=g, mel=mel)
      
      y_hat_mel = mel_spectrogram_torch(
        y_hat.squeeze(1).float(),
        hps.data.filter_length,
        hps.data.n_mel_channels,
        hps.data.sampling_rate,
        hps.data.hop_length,
        hps.data.win_length,
        hps.data.mel_fmin,
        hps.data.mel_fmax
      )
    image_dict = {
      "gen/mel": utils.plot_spectrogram_to_numpy(y_hat_mel[0].cpu().numpy()),
      "target/mel": utils.plot_spectrogram_to_numpy(mel[0].cpu().numpy()) # modified
    }
    audio_dict = {
      "gen/audio": y_hat[0],
      "source/audio": y_source[0], # modified
      "target/audio": y[0] # modified
    }
    utils.summarize(
      writer=writer_eval,
      global_step=global_step, 
      images=image_dict,
      audios=audio_dict,
      audio_sampling_rate=hps.data.sampling_rate
    )
    generator. Train()