clr commited on
Commit
f691099
1 Parent(s): 94f9cb0

Update ctcalign.py

Browse files
Files changed (1) hide show
  1. ctcalign.py +9 -10
ctcalign.py CHANGED
@@ -239,21 +239,20 @@ def prep_transcript(xcp,lang):
239
 
240
 
241
  def langsalign(wav_path,transcript_string,lang):
242
-
243
- norm_txt, rec_label_ids = prep_transcript(transcript_string, lang)
244
- emit = get_frame_probs(wav_path, lang)
245
 
246
  print('ALL LABELS',label_ids)
247
  print(norm_txt)
248
  print(rec_label_ids)
249
  print('SIZE:', emit.size())
250
 
251
- trellis = get_trellis(emit, rec_label_ids, d[lang]['blank_id'])
252
- path = backtrack(trellis, emit, rec_label_ids, d[lang]['blank_id'])
253
- segments = merge_repeats(path,norm_txt)
254
- words = merge_words(segments, d[lang]['model_word_separator'])
255
 
256
- #segments = [s for s in segments if s[0] != model_word_separator]
257
- print(segments)
258
- return mfalike(segments,words)
259
 
 
239
 
240
 
241
  def langsalign(wav_path,transcript_string,lang):
242
+ norm_txt, rec_label_ids = prep_transcript(transcript_string, lang)
243
+ emit = get_frame_probs(wav_path, lang)
 
244
 
245
  print('ALL LABELS',label_ids)
246
  print(norm_txt)
247
  print(rec_label_ids)
248
  print('SIZE:', emit.size())
249
 
250
+ trellis = get_trellis(emit, rec_label_ids, d[lang]['blank_id'])
251
+ path = backtrack(trellis, emit, rec_label_ids, d[lang]['blank_id'])
252
+ segments = merge_repeats(path,norm_txt)
253
+ words = merge_words(segments, d[lang]['model_word_separator'])
254
 
255
+ #segments = [s for s in segments if s[0] != model_word_separator]
256
+ print(segments)
257
+ return mfalike(segments,words)
258