Spaces:

clr
/

phonalign

clr commited on Mar 24, 2023

Commit

f691099

•

1 Parent(s): 94f9cb0

Update ctcalign.py

Files changed (1) hide show

ctcalign.py CHANGED Viewed

@@ -239,21 +239,20 @@ def prep_transcript(xcp,lang):
 def langsalign(wav_path,transcript_string,lang):
-	norm_txt, rec_label_ids = prep_transcript(transcript_string, lang)
-	emit = get_frame_probs(wav_path, lang)
     print('ALL LABELS',label_ids)
     print(norm_txt)
     print(rec_label_ids)
     print('SIZE:', emit.size())
-	trellis = get_trellis(emit, rec_label_ids, d[lang]['blank_id'])
-	path = backtrack(trellis, emit, rec_label_ids, d[lang]['blank_id'])
-	segments = merge_repeats(path,norm_txt)
-	words = merge_words(segments, d[lang]['model_word_separator'])
-	#segments = [s for s in segments if s[0] != model_word_separator]
-	print(segments)
-	return mfalike(segments,words)

 def langsalign(wav_path,transcript_string,lang):
+    norm_txt, rec_label_ids = prep_transcript(transcript_string, lang)
+    emit = get_frame_probs(wav_path, lang)
     print('ALL LABELS',label_ids)
     print(norm_txt)
     print(rec_label_ids)
     print('SIZE:', emit.size())
+    trellis = get_trellis(emit, rec_label_ids, d[lang]['blank_id'])
+    path = backtrack(trellis, emit, rec_label_ids, d[lang]['blank_id'])
+    segments = merge_repeats(path,norm_txt)
+    words = merge_words(segments, d[lang]['model_word_separator'])
+    #segments = [s for s in segments if s[0] != model_word_separator]
+    print(segments)
+    return mfalike(segments,words)