examples = [ [ "3D-Speaker", "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx|37.8MB", "pyannote/segmentation-3.0", "4", "0", "0-four-speakers-zh.wav", ], [ "3D-Speaker", "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx|37.8MB", "pyannote/segmentation-3.0", "2", "0", "1-two-speakers-en.wav", ], [ "3D-Speaker", "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx|37.8MB", "pyannote/segmentation-3.0", "2", "0", "2-two-speakers-en.wav", ], [ "3D-Speaker", "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx|37.8MB", "pyannote/segmentation-3.0", "2", "0", "3-two-speakers-en.wav", ], ]