akhaliq3
spaces demo
5019931
raw
history blame
5.06 kB
import argparse
import os
import soundfile
from typing import NoReturn
import musdb
import numpy as np
from bytesep.utils import load_audio
def create_evaluation(args) -> NoReturn:
r"""Random mix and write out audios for evaluation.
Args:
vctk_dataset_dir: str, the directory of the VCTK dataset
symphony_dataset_dir: str, the directory of the symphony dataset
evaluation_audios_dir: str, the directory to write out randomly selected and mixed audio segments
sample_rate: int
channels: int, e.g., 1 | 2
evaluation_segments_num: int
mono: bool
Returns:
NoReturn
"""
# arguments & parameters
vctk_dataset_dir = args.vctk_dataset_dir
musdb18_dataset_dir = args.musdb18_dataset_dir
evaluation_audios_dir = args.evaluation_audios_dir
sample_rate = args.sample_rate
channels = args.channels
evaluation_segments_num = args.evaluation_segments_num
mono = True if channels == 1 else False
split = 'test'
random_state = np.random.RandomState(1234)
# paths
audios_dir = os.path.join(vctk_dataset_dir, "wav48", split)
for source_type in ['speech', 'music', 'mixture']:
output_dir = os.path.join(evaluation_audios_dir, split, source_type)
os.makedirs(output_dir, exist_ok=True)
# Get VCTK audio paths.
speech_audio_paths = []
speaker_ids = sorted(os.listdir(audios_dir))
for speaker_id in speaker_ids:
speaker_audios_dir = os.path.join(audios_dir, speaker_id)
audio_names = sorted(os.listdir(speaker_audios_dir))
for audio_name in audio_names:
speaker_audio_path = os.path.join(speaker_audios_dir, audio_name)
speech_audio_paths.append(speaker_audio_path)
# Get Musdb18 audio paths.
mus = musdb.DB(root=musdb18_dataset_dir, subsets=[split])
track_indexes = np.arange(len(mus.tracks))
for n in range(evaluation_segments_num):
print('{} / {}'.format(n, evaluation_segments_num))
# Randomly select and write out a clean speech segment.
speech_audio_path = random_state.choice(speech_audio_paths)
speech_audio = load_audio(
audio_path=speech_audio_path, mono=mono, sample_rate=sample_rate
)
# (channels_num, audio_samples)
if channels == 2:
speech_audio = np.tile(speech_audio, (2, 1))
# (channels_num, audio_samples)
output_speech_path = os.path.join(
evaluation_audios_dir, split, 'speech', '{:04d}.wav'.format(n)
)
soundfile.write(
file=output_speech_path, data=speech_audio.T, samplerate=sample_rate
)
print("Write out to {}".format(output_speech_path))
# Randomly select and write out a clean music segment.
track_index = random_state.choice(track_indexes)
track = mus[track_index]
segment_samples = speech_audio.shape[1]
start_sample = int(
random_state.uniform(0.0, segment_samples - speech_audio.shape[1])
)
music_audio = track.audio[start_sample : start_sample + segment_samples, :].T
# (channels_num, audio_samples)
output_music_path = os.path.join(
evaluation_audios_dir, split, 'music', '{:04d}.wav'.format(n)
)
soundfile.write(
file=output_music_path, data=music_audio.T, samplerate=sample_rate
)
print("Write out to {}".format(output_music_path))
# Mix speech and music segments and write out a mixture segment.
mixture_audio = speech_audio + music_audio
# (channels_num, audio_samples)
output_mixture_path = os.path.join(
evaluation_audios_dir, split, 'mixture', '{:04d}.wav'.format(n)
)
soundfile.write(
file=output_mixture_path, data=mixture_audio.T, samplerate=sample_rate
)
print("Write out to {}".format(output_mixture_path))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--vctk_dataset_dir",
type=str,
required=True,
help="The directory of the VCTK dataset.",
)
parser.add_argument(
"--musdb18_dataset_dir",
type=str,
required=True,
help="The directory of the MUSDB18 dataset.",
)
parser.add_argument(
"--evaluation_audios_dir",
type=str,
required=True,
help="The directory to write out randomly selected and mixed audio segments.",
)
parser.add_argument(
"--sample_rate",
type=int,
required=True,
help="Sample rate",
)
parser.add_argument(
"--channels",
type=int,
required=True,
help="Audio channels, e.g, 1 or 2.",
)
parser.add_argument(
"--evaluation_segments_num",
type=int,
required=True,
help="The number of segments to create for evaluation.",
)
# Parse arguments.
args = parser.parse_args()
create_evaluation(args)