Spaces:

akhaliq
/

Music_Source_Separation

Music_Source_Separation / bytesep /data /batch_data_preprocessors.py

akhaliq3

spaces demo

5019931 about 3 years ago

4.76 kB

	from typing import Dict, List

	import torch


	class BasicBatchDataPreprocessor:
	def __init__(self, target_source_types: List[str]):
	r"""Batch data preprocessor. Used for preparing mixtures and targets for
	training. If there are multiple target source types, the waveforms of
	those sources will be stacked along the channel dimension.

	Args:
	target_source_types: List[str], e.g., ['vocals', 'bass', ...]
	"""
	self.target_source_types = target_source_types

	def __call__(self, batch_data_dict: Dict) -> List[Dict]:
	r"""Format waveforms and targets for training.

	Args:
	batch_data_dict: dict, e.g., {
	'mixture': (batch_size, channels_num, segment_samples),
	'vocals': (batch_size, channels_num, segment_samples),
	'bass': (batch_size, channels_num, segment_samples),
	...,
	}

	Returns:
	input_dict: dict, e.g., {
	'waveform': (batch_size, channels_num, segment_samples),
	}
	output_dict: dict, e.g., {
	'target': (batch_size, target_sources_num * channels_num, segment_samples)
	}
	"""
	mixtures = batch_data_dict['mixture']
	# mixtures: (batch_size, channels_num, segment_samples)

	# Concatenate waveforms of multiple targets along the channel axis.
	targets = torch.cat(
	[batch_data_dict[source_type] for source_type in self.target_source_types],
	dim=1,
	)
	# targets: (batch_size, target_sources_num * channels_num, segment_samples)

	input_dict = {'waveform': mixtures}
	target_dict = {'waveform': targets}

	return input_dict, target_dict


	class ConditionalSisoBatchDataPreprocessor:
	def __init__(self, target_source_types: List[str]):
	r"""Conditional single input single output (SISO) batch data
	preprocessor. Select one target source from several target sources as
	training target and prepare the corresponding conditional vector.

	Args:
	target_source_types: List[str], e.g., ['vocals', 'bass', ...]
	"""
	self.target_source_types = target_source_types

	def __call__(self, batch_data_dict: Dict) -> List[Dict]:
	r"""Format waveforms and targets for training.

	Args:
	batch_data_dict: dict, e.g., {
	'mixture': (batch_size, channels_num, segment_samples),
	'vocals': (batch_size, channels_num, segment_samples),
	'bass': (batch_size, channels_num, segment_samples),
	...,
	}

	Returns:
	input_dict: dict, e.g., {
	'waveform': (batch_size, channels_num, segment_samples),
	'condition': (batch_size, target_sources_num),
	}
	output_dict: dict, e.g., {
	'target': (batch_size, channels_num, segment_samples)
	}
	"""

	batch_size = len(batch_data_dict['mixture'])
	target_sources_num = len(self.target_source_types)

	assert (
	batch_size % target_sources_num == 0
	), "Batch size should be \
	evenly divided by target sources number."

	mixtures = batch_data_dict['mixture']
	# mixtures: (batch_size, channels_num, segment_samples)

	conditions = torch.zeros(batch_size, target_sources_num).to(mixtures.device)
	# conditions: (batch_size, target_sources_num)

	targets = []

	for n in range(batch_size):

	k = n % target_sources_num # source class index
	source_type = self.target_source_types[k]

	targets.append(batch_data_dict[source_type][n])

	conditions[n, k] = 1

	# conditions will looks like:
	# [[1, 0, 0, 0],
	# [0, 1, 0, 0],
	# [0, 0, 1, 0],
	# [0, 0, 0, 1],
	# [1, 0, 0, 0],
	# [0, 1, 0, 0],
	# ...,
	# ]

	targets = torch.stack(targets, dim=0)
	# targets: (batch_size, channels_num, segment_samples)

	input_dict = {
	'waveform': mixtures,
	'condition': conditions,
	}

	target_dict = {'waveform': targets}

	return input_dict, target_dict


	def get_batch_data_preprocessor_class(batch_data_preprocessor_type: str) -> object:
	r"""Get batch data preprocessor class."""
	if batch_data_preprocessor_type == 'BasicBatchDataPreprocessor':
	return BasicBatchDataPreprocessor

	elif batch_data_preprocessor_type == 'ConditionalSisoBatchDataPreprocessor':
	return ConditionalSisoBatchDataPreprocessor

	else:
	raise NotImplementedError